unit_quant_test.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. package tests
  2. import (
  3. "math"
  4. "testing"
  5. "makarna/pkg/tensor"
  6. )
  7. func float32NearlyEqual(a, b, epsilon float32) bool {
  8. return float32(math.Abs(float64(a-b))) <= epsilon
  9. }
  10. func TestDequantizeQ4_K(t *testing.T) {
  11. // Create a BlockQ4_K with known parameters
  12. // D = 1.0 (FP16: 0x3C00)
  13. // DMin = 0.0 (FP16: 0x0000)
  14. // Scales: all 1s (ls=1, lm=0)
  15. // QS: all 8s (0x88) -> q=8
  16. // Expected: w = 8 * (1.0 * 1.0) - 0 = 8.0
  17. // Create scales array
  18. // j < 4: scales[j] = ls, scales[j+4] = lm
  19. // ls = 1, lm = 0
  20. // scales[0..3] = 1, scales[4..7] = 0
  21. // j >= 4 (packed):
  22. // scales[j+4] (8..11) = (ls & 0xF) | ((lm & 0xF) << 4) = 1 | 0 = 1
  23. // scales[j-4] (0..3) |= ((ls >> 4) << 6) = 0
  24. // scales[j] (4..7) |= ((lm >> 4) << 6) = 0
  25. scales := [12]uint8{
  26. 1, 1, 1, 1, // ls (0..3)
  27. 0, 0, 0, 0, // lm (0..3) initially
  28. 1, 1, 1, 1, // ls/lm (4..7) packed: (1 | 0<<4) = 1
  29. }
  30. qs := [128]uint8{}
  31. for i := range qs {
  32. qs[i] = 0x88 // q=8 for both low and high nibbles
  33. }
  34. block := &tensor.BlockQ4_K{
  35. D: 0x3C00, // 1.0
  36. DMin: 0x0000, // 0.0
  37. Scales: scales,
  38. QS: qs,
  39. }
  40. out := make([]float32, 256)
  41. tensor.DequantizeQ4_K(block, out)
  42. for i, v := range out {
  43. if !float32NearlyEqual(v, 8.0, 1e-4) {
  44. t.Errorf("DequantizeQ4_K[%d] = %f, expected 8.0", i, v)
  45. }
  46. }
  47. }
  48. func TestFP16ToFP32(t *testing.T) {
  49. tests := []struct {
  50. in uint16
  51. out float32
  52. }{
  53. {0x3C00, 1.0},
  54. {0x0000, 0.0},
  55. {0xC000, -2.0},
  56. {0x7C00, float32(math.Inf(1))},
  57. }
  58. for _, tc := range tests {
  59. if res := tensor.FP16ToFP32(tc.in); res != tc.out {
  60. t.Errorf("FP16ToFP32(0x%X) = %f, expected %f", tc.in, res, tc.out)
  61. }
  62. }
  63. }
  64. func TestDequantizeQ8_K(t *testing.T) {
  65. // Create a BlockQ8_K with known parameters
  66. // D = 2.0, QS = [1, 2, 3, ..., 127, -127, -126, ..., -1, 0, ...]
  67. // Expected output: D * qs[i]
  68. d := float32(0.5)
  69. var qs [256]int8
  70. for i := 0; i < 256; i++ {
  71. qs[i] = int8((i % 255) - 127) // Range: -127 to 127
  72. }
  73. var bsums [16]int16 // Not used in dequantization
  74. block := &tensor.BlockQ8_K{
  75. D: d,
  76. QS: qs,
  77. BSums: bsums,
  78. }
  79. out := make([]float32, 256)
  80. tensor.DequantizeQ8_K(block, out)
  81. for i := 0; i < 256; i++ {
  82. expected := d * float32(qs[i])
  83. if !float32NearlyEqual(out[i], expected, 1e-6) {
  84. t.Errorf("DequantizeQ8_K[%d] = %f, expected %f", i, out[i], expected)
  85. }
  86. }
  87. }