ggml-opencl-dequant.cl 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. #define MULTILINE_QUOTE(...) #__VA_ARGS__
  2. const char * clblast_dequant = MULTILINE_QUOTE(
  3. struct block_q4_0
  4. {
  5. float d;
  6. uchar qs[16];
  7. };
  8. __kernel void dequantize_row_q4_0(__global struct block_q4_0* blocks, __global float* result) {
  9. const uint i = get_global_id(0) / 32;
  10. const uint l = get_local_id(0);
  11. const float d = blocks[i].d;
  12. const uchar vi = blocks[i].qs[l];
  13. const uint index = i*32 + l*2;
  14. result[index + 0] = ((vi & 0xf) - 8)*d;
  15. result[index + 1] = ((vi >> 4) - 8)*d;
  16. }
  17. struct block_q4_1
  18. {
  19. float d;
  20. float m;
  21. uchar qs[16];
  22. };
  23. __kernel void dequantize_row_q4_1(__global struct block_q4_1* blocks, __global float* result) {
  24. const uint i = get_global_id(0) / 32;
  25. const uint l = get_local_id(0);
  26. const float d = blocks[i].d;
  27. const float m = blocks[i].m;
  28. const uchar vi = blocks[i].qs[l];
  29. const uint index = i*32 + l*2;
  30. result[index + 0] = (vi & 0xf) * d + m;
  31. result[index + 1] = (vi >> 4) * d + m;
  32. }
  33. struct block_q4_2
  34. {
  35. ushort d;
  36. uchar qs[8];
  37. };
  38. __kernel void dequantize_row_q4_2(__global struct block_q4_2* blocks, __global float* result) {
  39. const uint i = get_global_id(0) / 16;
  40. const uint l = get_local_id(0);
  41. const float d = vload_half(0, (__global half*) &blocks[i].d);;
  42. const uchar vi = blocks[i].qs[l];
  43. const uint index = i*16 + l*2;
  44. result[index + 0] = ((vi & 0xf) - 8)*d;
  45. result[index + 1] = ((vi >> 4) - 8)*d;
  46. }
  47. );