package qwen3 import ( "strings" "makarna/pkg/convert" "makarna/pkg/quant" ) type convertPlugin struct{} func (convertPlugin) Apply(spec *convert.Spec) { prev := spec.ResolveQuant mixRules := map[quant.QuantType][]quant.Rule{ quant.TypeQ4K: { {Pattern: "*embed_tokens*", QuantType: quant.TypeQ6K}, {Pattern: "*norm*", QuantType: quant.TypeF32}, }, quant.TypeQ3K: { {Pattern: "*embed_tokens*", QuantType: quant.TypeQ6K}, {Pattern: "*lm_head*", QuantType: quant.TypeQ6K}, {Pattern: "*norm*", QuantType: quant.TypeF32}, }, quant.TypeQ6K: { {Pattern: "*embed_tokens*", QuantType: quant.TypeQ8K}, }, quant.TypeQ2K: { {Pattern: "*embed_tokens*", QuantType: quant.TypeQ6K}, {Pattern: "*lm_head*", QuantType: quant.TypeQ6K}, {Pattern: "*v_proj*", QuantType: quant.TypeQ3K}, {Pattern: "*o_proj*", QuantType: quant.TypeQ3K}, {Pattern: "*down_proj*", QuantType: quant.TypeQ3K}, }, } spec.ResolveQuant = func(name string, baseQuant quant.QuantType) quant.QuantType { qt := baseQuant if prev != nil { qt = prev(name, baseQuant) } if spec.MixMode { if rules, ok := mixRules[baseQuant]; ok { qt = quant.ApplyRules(name, baseQuant, rules) } } lname := strings.ToLower(name) // Keep norms in F32 when requested (safe even if tensor is not quantizable). if strings.Contains(lname, "norm") { return quant.TypeF32 } // Enforce higher quality for embeddings and head even when mix mode is off. // This is a model-specific policy. if strings.Contains(lname, "embed_tokens") { switch baseQuant { case quant.TypeQ6K: return quant.TypeQ8K case quant.TypeQ2K, quant.TypeQ3K, quant.TypeQ4K: return quant.TypeQ6K } } if strings.Contains(lname, "lm_head") { switch baseQuant { case quant.TypeQ2K, quant.TypeQ3K, quant.TypeQ4K: return quant.TypeQ6K } } return qt } } func init() { convert.Register("qwen3", convertPlugin{}) }