| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- package qwen3
- import (
- "strings"
- "makarna/pkg/convert"
- "makarna/pkg/quant"
- )
- type convertPlugin struct{}
- func (convertPlugin) Apply(spec *convert.Spec) {
- prev := spec.ResolveQuant
- mixRules := map[quant.QuantType][]quant.Rule{
- quant.TypeQ4K: {
- {Pattern: "*embed_tokens*", QuantType: quant.TypeQ6K},
- {Pattern: "*norm*", QuantType: quant.TypeF32},
- },
- quant.TypeQ3K: {
- {Pattern: "*embed_tokens*", QuantType: quant.TypeQ6K},
- {Pattern: "*lm_head*", QuantType: quant.TypeQ6K},
- {Pattern: "*norm*", QuantType: quant.TypeF32},
- },
- quant.TypeQ6K: {
- {Pattern: "*embed_tokens*", QuantType: quant.TypeQ8K},
- },
- quant.TypeQ2K: {
- {Pattern: "*embed_tokens*", QuantType: quant.TypeQ6K},
- {Pattern: "*lm_head*", QuantType: quant.TypeQ6K},
- {Pattern: "*v_proj*", QuantType: quant.TypeQ3K},
- {Pattern: "*o_proj*", QuantType: quant.TypeQ3K},
- {Pattern: "*down_proj*", QuantType: quant.TypeQ3K},
- },
- }
- spec.ResolveQuant = func(name string, baseQuant quant.QuantType) quant.QuantType {
- qt := baseQuant
- if prev != nil {
- qt = prev(name, baseQuant)
- }
- if spec.MixMode {
- if rules, ok := mixRules[baseQuant]; ok {
- qt = quant.ApplyRules(name, baseQuant, rules)
- }
- }
- lname := strings.ToLower(name)
- // Keep norms in F32 when requested (safe even if tensor is not quantizable).
- if strings.Contains(lname, "norm") {
- return quant.TypeF32
- }
- // Enforce higher quality for embeddings and head even when mix mode is off.
- // This is a model-specific policy.
- if strings.Contains(lname, "embed_tokens") {
- switch baseQuant {
- case quant.TypeQ6K:
- return quant.TypeQ8K
- case quant.TypeQ2K, quant.TypeQ3K, quant.TypeQ4K:
- return quant.TypeQ6K
- }
- }
- if strings.Contains(lname, "lm_head") {
- switch baseQuant {
- case quant.TypeQ2K, quant.TypeQ3K, quant.TypeQ4K:
- return quant.TypeQ6K
- }
- }
- return qt
- }
- }
- func init() {
- convert.Register("qwen3", convertPlugin{})
- }
|