// Package quant provides quantization configuration and functions package quant import ( "path/filepath" "strings" "makarna/pkg/loader" ) // QuantType represents a quantization type type QuantType string const ( TypeF32 QuantType = "f32" TypeF16 QuantType = "f16" TypeQ2K QuantType = "q2_k" TypeQ3K QuantType = "q3_k" TypeQ4K QuantType = "q4_k" TypeQ5K QuantType = "q5_k" TypeQ6K QuantType = "q6_k" TypeQ8K QuantType = "q8_k" ) // Rule defines a pattern-based quantization rule type Rule struct { Pattern string QuantType QuantType } // ApplyRules determines the quant type for a tensor based on an ordered rule list. // First match wins; if no match, returns baseQuant. func ApplyRules(tensorName string, baseQuant QuantType, rules []Rule) QuantType { for _, rule := range rules { if matchPattern(tensorName, rule.Pattern) { return rule.QuantType } } return baseQuant } // ToDType converts QuantType to loader.DType func (q QuantType) ToDType() loader.DType { switch q { case TypeF32: return loader.F32 case TypeF16: return loader.F16 case TypeQ2K: return loader.Q2_K case TypeQ3K: return loader.Q3_K case TypeQ4K: return loader.Q4_K case TypeQ5K: return loader.Q5_K case TypeQ6K: return loader.Q6_K case TypeQ8K: return loader.Q8_K default: return loader.F32 } } // matchPattern matches a tensor name against a glob-like pattern func matchPattern(name, pattern string) bool { pattern = strings.ToLower(pattern) name = strings.ToLower(name) // Handle *something* pattern (contains) if strings.HasPrefix(pattern, "*") && strings.HasSuffix(pattern, "*") { inner := pattern[1 : len(pattern)-1] return strings.Contains(name, inner) } // Try filepath.Match if matched, _ := filepath.Match(pattern, name); matched { return true } // Fallback: check if pattern (without wildcards) is contained cleanPattern := strings.ReplaceAll(pattern, "*", "") if cleanPattern != "" && strings.Contains(name, cleanPattern) { return true } return false }