package quant import ( "math/rand" "testing" "time" ) func BenchmarkQuantizeQ8K(b *testing.B) { // 1M floats (typical small tensor) data := make([]float32, 1024*1024) r := rand.New(rand.NewSource(42)) for i := range data { data[i] = r.Float32()*2 - 1 // [-1, 1] } b.ResetTimer() for i := 0; i < b.N; i++ { _ = QuantizeQ8K(data) } } func BenchmarkQuantizeQ6K(b *testing.B) { data := make([]float32, 1024*1024) r := rand.New(rand.NewSource(42)) for i := range data { data[i] = r.Float32()*2 - 1 } b.ResetTimer() for i := 0; i < b.N; i++ { _ = QuantizeQ6K(data) } } func BenchmarkQuantizeQ4K(b *testing.B) { data := make([]float32, 1024*1024) r := rand.New(rand.NewSource(42)) for i := range data { data[i] = r.Float32()*2 - 1 } b.ResetTimer() for i := 0; i < b.N; i++ { _ = QuantizeQ4K(data) } } func TestQuantizeQ8K_Basic(t *testing.T) { // Simple test: 256 elements data := make([]float32, 256) for i := range data { data[i] = float32(i-128) / 128.0 // [-1, ~1] } start := time.Now() result := QuantizeQ8K(data) elapsed := time.Since(start) // Expect 292 bytes (1 block) if len(result) != 292 { t.Errorf("Expected 292 bytes, got %d", len(result)) } t.Logf("Q8K: 256 floats -> %d bytes in %v", len(result), elapsed) } func TestQuantizeQ6K_Basic(t *testing.T) { data := make([]float32, 256) for i := range data { data[i] = float32(i-128) / 128.0 } start := time.Now() result := QuantizeQ6K(data) elapsed := time.Since(start) // Expect 210 bytes (1 block) if len(result) != 210 { t.Errorf("Expected 210 bytes, got %d", len(result)) } t.Logf("Q6K: 256 floats -> %d bytes in %v", len(result), elapsed) } func TestQuantizeQ4K_Basic(t *testing.T) { data := make([]float32, 256) for i := range data { data[i] = float32(i-128) / 128.0 } start := time.Now() result := QuantizeQ4K(data) elapsed := time.Since(start) // Expect 144 bytes (1 block) if len(result) != 144 { t.Errorf("Expected 144 bytes, got %d", len(result)) } t.Logf("Q4K: 256 floats -> %d bytes in %v", len(result), elapsed) } func TestLargeQuantization(t *testing.T) { // Test with 4M elements (typical large weight matrix) size := 4 * 1024 * 1024 data := make([]float32, size) r := rand.New(rand.NewSource(42)) for i := range data { data[i] = r.Float32()*2 - 1 } t.Run("Q8K_4M", func(t *testing.T) { start := time.Now() result := QuantizeQ8K(data) elapsed := time.Since(start) mbps := float64(size*4) / elapsed.Seconds() / (1024 * 1024) t.Logf("Q8K: %d floats (%.1f MB) -> %d bytes in %v (%.1f MB/s)", size, float64(size*4)/(1024*1024), len(result), elapsed, mbps) }) t.Run("Q6K_4M", func(t *testing.T) { start := time.Now() result := QuantizeQ6K(data) elapsed := time.Since(start) mbps := float64(size*4) / elapsed.Seconds() / (1024 * 1024) t.Logf("Q6K: %d floats (%.1f MB) -> %d bytes in %v (%.1f MB/s)", size, float64(size*4)/(1024*1024), len(result), elapsed, mbps) }) t.Run("Q4K_4M", func(t *testing.T) { start := time.Now() result := QuantizeQ4K(data) elapsed := time.Since(start) mbps := float64(size*4) / elapsed.Seconds() / (1024 * 1024) t.Logf("Q4K: %d floats (%.1f MB) -> %d bytes in %v (%.1f MB/s)", size, float64(size*4)/(1024*1024), len(result), elapsed, mbps) }) }