| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- // Package qwen3 implements the Qwen3 model family with device-agnostic execution.
- // Supports: Qwen3-0.6B, Qwen3-1.7B, Qwen3-4B, Qwen3-8B, Qwen3-14B, Qwen3-32B
- // The model works with both CPU and GPU placement - the compute package handles dispatching.
- package qwen3
- import (
- "fmt"
- "makarna/pkg/backend/cpu"
- "makarna/pkg/model"
- "makarna/pkg/tensor"
- )
- // Model implements the Qwen3 architecture
- type Model struct {
- config *model.Config
- tokenEmb tensor.Tensor
- layers []*Layer
- norm tensor.Tensor
- output tensor.Tensor
- }
- // Layer represents a single Qwen3 transformer layer
- type Layer struct {
- idx int
- attnNorm tensor.Tensor
- wq, wk, wv, wo tensor.Tensor
- qNorm, kNorm tensor.Tensor
- mlpNorm tensor.Tensor
- wGate, wUp, wDown tensor.Tensor
- }
- // New creates a new Qwen3 model
- func New(cfg *model.Config) (model.Model, error) {
- m := &Model{config: cfg, layers: make([]*Layer, cfg.NumLayers)}
- for i := range m.layers {
- m.layers[i] = &Layer{idx: i}
- }
- return m, nil
- }
- func (m *Model) Config() *model.Config { return m.config }
- func (m *Model) Close() error { return nil }
- func (m *Model) SetTensor(name string, t tensor.Tensor) error {
- switch name {
- case "model.embed_tokens.weight":
- m.tokenEmb = t
- case "model.norm.weight":
- m.norm = t
- case "lm_head.weight":
- m.output = t
- default:
- var idx int
- var suffix string
- if _, err := fmt.Sscanf(name, "model.layers.%d.%s", &idx, &suffix); err == nil && idx < len(m.layers) {
- m.layers[idx].setTensor(suffix, t)
- }
- }
- return nil
- }
- func (l *Layer) setTensor(name string, t tensor.Tensor) {
- switch name {
- case "input_layernorm.weight":
- l.attnNorm = t
- case "self_attn.q_proj.weight":
- l.wq = t
- case "self_attn.k_proj.weight":
- l.wk = t
- case "self_attn.v_proj.weight":
- l.wv = t
- case "self_attn.o_proj.weight":
- l.wo = t
- case "self_attn.q_norm.weight":
- l.qNorm = t
- case "self_attn.k_norm.weight":
- l.kNorm = t
- case "post_attention_layernorm.weight":
- l.mlpNorm = t
- case "mlp.gate_proj.weight":
- l.wGate = t
- case "mlp.up_proj.weight":
- l.wUp = t
- case "mlp.down_proj.weight":
- l.wDown = t
- }
- }
- // asCPU safely converts a tensor to *cpu.Tensor
- // This is a transitional helper - eventually all ops will be device-aware
- func asCPU(t tensor.Tensor) *cpu.Tensor {
- if ct, ok := t.(*cpu.Tensor); ok {
- return ct
- }
- panic(fmt.Sprintf("expected *cpu.Tensor, got %T", t))
- }
- // Forward is implemented in forward_device.go to use device-aware operations.
- // This allows the same code to work with both CPU and GPU without duplication.
- func init() {
- model.Register("Qwen3ForCausalLM", New)
- }
|