test-quantize.c 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. #include "ggml.h"
  2. #undef NDEBUG
  3. #include <assert.h>
  4. #include <math.h>
  5. int main(void) {
  6. #define QK 32
  7. float src[QK];
  8. uint8_t dst[24];
  9. int64_t hist[16];
  10. for (int i = 0; i < QK; i++) {
  11. src[i] = (float)(i + 1);
  12. }
  13. size_t size = ggml_quantize_q4_0(src, dst, QK, QK, hist);
  14. assert(size == 20);
  15. float max_result = ((float *)dst)[0];
  16. float max_expected = src[31] / ((1 << 3) - 1);
  17. assert(max_result == max_expected);
  18. for (int i = 0; i < QK; i++) {
  19. uint8_t q4_result = (i % 2) ? (dst[sizeof(float) + i/2] >> 4) : (dst[sizeof(float) + i/2] & 0xF);
  20. uint8_t q4_expected = roundf(src[i] / max_expected) + 8;
  21. assert(q4_result == q4_expected);
  22. }
  23. size = ggml_quantize_q4_1(src, dst, QK, QK, hist);
  24. assert(size == 24);
  25. float delta_result = ((float *)dst)[0];
  26. float delta_expected = (src[31] - src[0]) / ((1 << 4) - 1);
  27. assert(delta_result == delta_expected);
  28. float min_result = ((float *)dst)[1];
  29. float min_expected = src[0];
  30. assert(min_result == min_expected);
  31. for (int i = 0; i < QK; i++) {
  32. uint8_t q4_result = (i % 2) ? (dst[sizeof(float)*2 + i/2] >> 4) : (dst[sizeof(float)*2 + i/2] & 0xF);
  33. uint8_t q4_expected = roundf((src[i] - min_expected) / delta_expected);
  34. assert(q4_result == q4_expected);
  35. }
  36. return 0;
  37. }