| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- package quant
- import (
- "math/rand"
- "testing"
- "time"
- )
- func BenchmarkQuantizeQ8K(b *testing.B) {
- // 1M floats (typical small tensor)
- data := make([]float32, 1024*1024)
- r := rand.New(rand.NewSource(42))
- for i := range data {
- data[i] = r.Float32()*2 - 1 // [-1, 1]
- }
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- _ = QuantizeQ8K(data)
- }
- }
- func BenchmarkQuantizeQ6K(b *testing.B) {
- data := make([]float32, 1024*1024)
- r := rand.New(rand.NewSource(42))
- for i := range data {
- data[i] = r.Float32()*2 - 1
- }
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- _ = QuantizeQ6K(data)
- }
- }
- func BenchmarkQuantizeQ4K(b *testing.B) {
- data := make([]float32, 1024*1024)
- r := rand.New(rand.NewSource(42))
- for i := range data {
- data[i] = r.Float32()*2 - 1
- }
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- _ = QuantizeQ4K(data)
- }
- }
- func TestQuantizeQ8K_Basic(t *testing.T) {
- // Simple test: 256 elements
- data := make([]float32, 256)
- for i := range data {
- data[i] = float32(i-128) / 128.0 // [-1, ~1]
- }
- start := time.Now()
- result := QuantizeQ8K(data)
- elapsed := time.Since(start)
- // Expect 292 bytes (1 block)
- if len(result) != 292 {
- t.Errorf("Expected 292 bytes, got %d", len(result))
- }
- t.Logf("Q8K: 256 floats -> %d bytes in %v", len(result), elapsed)
- }
- func TestQuantizeQ6K_Basic(t *testing.T) {
- data := make([]float32, 256)
- for i := range data {
- data[i] = float32(i-128) / 128.0
- }
- start := time.Now()
- result := QuantizeQ6K(data)
- elapsed := time.Since(start)
- // Expect 210 bytes (1 block)
- if len(result) != 210 {
- t.Errorf("Expected 210 bytes, got %d", len(result))
- }
- t.Logf("Q6K: 256 floats -> %d bytes in %v", len(result), elapsed)
- }
- func TestQuantizeQ4K_Basic(t *testing.T) {
- data := make([]float32, 256)
- for i := range data {
- data[i] = float32(i-128) / 128.0
- }
- start := time.Now()
- result := QuantizeQ4K(data)
- elapsed := time.Since(start)
- // Expect 144 bytes (1 block)
- if len(result) != 144 {
- t.Errorf("Expected 144 bytes, got %d", len(result))
- }
- t.Logf("Q4K: 256 floats -> %d bytes in %v", len(result), elapsed)
- }
- func TestLargeQuantization(t *testing.T) {
- // Test with 4M elements (typical large weight matrix)
- size := 4 * 1024 * 1024
- data := make([]float32, size)
- r := rand.New(rand.NewSource(42))
- for i := range data {
- data[i] = r.Float32()*2 - 1
- }
- t.Run("Q8K_4M", func(t *testing.T) {
- start := time.Now()
- result := QuantizeQ8K(data)
- elapsed := time.Since(start)
- mbps := float64(size*4) / elapsed.Seconds() / (1024 * 1024)
- t.Logf("Q8K: %d floats (%.1f MB) -> %d bytes in %v (%.1f MB/s)",
- size, float64(size*4)/(1024*1024), len(result), elapsed, mbps)
- })
- t.Run("Q6K_4M", func(t *testing.T) {
- start := time.Now()
- result := QuantizeQ6K(data)
- elapsed := time.Since(start)
- mbps := float64(size*4) / elapsed.Seconds() / (1024 * 1024)
- t.Logf("Q6K: %d floats (%.1f MB) -> %d bytes in %v (%.1f MB/s)",
- size, float64(size*4)/(1024*1024), len(result), elapsed, mbps)
- })
- t.Run("Q4K_4M", func(t *testing.T) {
- start := time.Now()
- result := QuantizeQ4K(data)
- elapsed := time.Since(start)
- mbps := float64(size*4) / elapsed.Seconds() / (1024 * 1024)
- t.Logf("Q4K: %d floats (%.1f MB) -> %d bytes in %v (%.1f MB/s)",
- size, float64(size*4)/(1024*1024), len(result), elapsed, mbps)
- })
- }
|