// Package qwen3 implements the Qwen3 model family with device-agnostic execution.
// Supports: Qwen3-0.6B, Qwen3-1.7B, Qwen3-4B, Qwen3-8B, Qwen3-14B, Qwen3-32B
// The model works with both CPU and GPU placement - the compute package handles dispatching.
package qwen3

import (
	"fmt"

	"makarna/pkg/backend/cpu"
	"makarna/pkg/model"
	"makarna/pkg/tensor"
)

// Model implements the Qwen3 architecture
type Model struct {
	config   *model.Config
	tokenEmb tensor.Tensor
	layers   []*Layer
	norm     tensor.Tensor
	output   tensor.Tensor
}

// Layer represents a single Qwen3 transformer layer
type Layer struct {
	idx            int
	attnNorm       tensor.Tensor
	wq, wk, wv, wo tensor.Tensor
	qNorm, kNorm   tensor.Tensor
	mlpNorm        tensor.Tensor
	wGate, wUp, wDown tensor.Tensor
}

// New creates a new Qwen3 model
func New(cfg *model.Config) (model.Model, error) {
	m := &Model{config: cfg, layers: make([]*Layer, cfg.NumLayers)}
	for i := range m.layers {
		m.layers[i] = &Layer{idx: i}
	}
	return m, nil
}

func (m *Model) Config() *model.Config { return m.config }
func (m *Model) Close() error          { return nil }

func (m *Model) SetTensor(name string, t tensor.Tensor) error {
	switch name {
	case "model.embed_tokens.weight":
		m.tokenEmb = t
	case "model.norm.weight":
		m.norm = t
	case "lm_head.weight":
		m.output = t
	default:
		var idx int
		var suffix string
		if _, err := fmt.Sscanf(name, "model.layers.%d.%s", &idx, &suffix); err == nil && idx < len(m.layers) {
			m.layers[idx].setTensor(suffix, t)
		}
	}
	return nil
}

func (l *Layer) setTensor(name string, t tensor.Tensor) {
	switch name {
	case "input_layernorm.weight":
		l.attnNorm = t
	case "self_attn.q_proj.weight":
		l.wq = t
	case "self_attn.k_proj.weight":
		l.wk = t
	case "self_attn.v_proj.weight":
		l.wv = t
	case "self_attn.o_proj.weight":
		l.wo = t
	case "self_attn.q_norm.weight":
		l.qNorm = t
	case "self_attn.k_norm.weight":
		l.kNorm = t
	case "post_attention_layernorm.weight":
		l.mlpNorm = t
	case "mlp.gate_proj.weight":
		l.wGate = t
	case "mlp.up_proj.weight":
		l.wUp = t
	case "mlp.down_proj.weight":
		l.wDown = t
	}
}

// asCPU safely converts a tensor to *cpu.Tensor
// This is a transitional helper - eventually all ops will be device-aware
func asCPU(t tensor.Tensor) *cpu.Tensor {
	if ct, ok := t.(*cpu.Tensor); ok {
		return ct
	}
	panic(fmt.Sprintf("expected *cpu.Tensor, got %T", t))
}

// Forward is implemented in forward_device.go to use device-aware operations.
// This allows the same code to work with both CPU and GPU without duplication.

func init() {
	model.Register("Qwen3ForCausalLM", New)
}