// Package qwen3 implements the Qwen3 model family with device-agnostic execution. // Supports: Qwen3-0.6B, Qwen3-1.7B, Qwen3-4B, Qwen3-8B, Qwen3-14B, Qwen3-32B // The model works with both CPU and GPU placement - the compute package handles dispatching. package qwen3 import ( "fmt" "makarna/pkg/backend/cpu" "makarna/pkg/model" "makarna/pkg/tensor" ) // Model implements the Qwen3 architecture type Model struct { config *model.Config tokenEmb tensor.Tensor layers []*Layer norm tensor.Tensor output tensor.Tensor } // Layer represents a single Qwen3 transformer layer type Layer struct { idx int attnNorm tensor.Tensor wq, wk, wv, wo tensor.Tensor qNorm, kNorm tensor.Tensor mlpNorm tensor.Tensor wGate, wUp, wDown tensor.Tensor } // New creates a new Qwen3 model func New(cfg *model.Config) (model.Model, error) { m := &Model{config: cfg, layers: make([]*Layer, cfg.NumLayers)} for i := range m.layers { m.layers[i] = &Layer{idx: i} } return m, nil } func (m *Model) Config() *model.Config { return m.config } func (m *Model) Close() error { return nil } func (m *Model) SetTensor(name string, t tensor.Tensor) error { switch name { case "model.embed_tokens.weight": m.tokenEmb = t case "model.norm.weight": m.norm = t case "lm_head.weight": m.output = t default: var idx int var suffix string if _, err := fmt.Sscanf(name, "model.layers.%d.%s", &idx, &suffix); err == nil && idx < len(m.layers) { m.layers[idx].setTensor(suffix, t) } } return nil } func (l *Layer) setTensor(name string, t tensor.Tensor) { switch name { case "input_layernorm.weight": l.attnNorm = t case "self_attn.q_proj.weight": l.wq = t case "self_attn.k_proj.weight": l.wk = t case "self_attn.v_proj.weight": l.wv = t case "self_attn.o_proj.weight": l.wo = t case "self_attn.q_norm.weight": l.qNorm = t case "self_attn.k_norm.weight": l.kNorm = t case "post_attention_layernorm.weight": l.mlpNorm = t case "mlp.gate_proj.weight": l.wGate = t case "mlp.up_proj.weight": l.wUp = t case "mlp.down_proj.weight": l.wDown = t } } // asCPU safely converts a tensor to *cpu.Tensor // This is a transitional helper - eventually all ops will be device-aware func asCPU(t tensor.Tensor) *cpu.Tensor { if ct, ok := t.(*cpu.Tensor); ok { return ct } panic(fmt.Sprintf("expected *cpu.Tensor, got %T", t)) } // Forward is implemented in forward_device.go to use device-aware operations. // This allows the same code to work with both CPU and GPU without duplication. func init() { model.Register("Qwen3ForCausalLM", New) }