package nn import ( "math" "makarna/pkg/backend/cpu" ) // RoPE applies Rotary Positional Embeddings in-place // x: [seqLen, numHeads * headDim] // positions: position for each token in sequence (len = seqLen) // headDim: dimension of each attention head // theta: RoPE base frequency (typically 10000 for Llama, 1000000 for Qwen3) // // This uses the split-half rotation format (HuggingFace standard): // - Split head into [first_half, second_half] // - new_first = first * cos - second * sin // - new_second = second * cos + first * sin func RoPE(x *cpu.Tensor, positions []int, headDim int, theta float32) error { data := x.DataFloat32() shape := x.Shape() seqLen := shape[0] totalDim := shape[1] // numHeads * headDim halfDim := headDim / 2 // Precompute inverse frequencies for the half-dimension once per call invFreqs := make([]float64, halfDim) for j := 0; j < halfDim; j++ { invFreqs[j] = 1.0 / math.Pow(float64(theta), float64(2*j)/float64(headDim)) } for seq := 0; seq < seqLen; seq++ { pos := positions[seq] rowStart := seq * totalDim // Apply RoPE to each head for headStart := 0; headStart < totalDim; headStart += headDim { for j := 0; j < halfDim; j++ { // Compute frequency: precomputed invFreq * position freq := float64(pos) * invFreqs[j] sin, cos := math.Sincos(freq) // Split-half indexing: pair (j, j + halfDim) idx0 := rowStart + headStart + j // First half element idx1 := rowStart + headStart + j + halfDim // Second half element v0 := data[idx0] // first half value v1 := data[idx1] // second half value // Rotation: // new_first = first * cos - second * sin // new_second = second * cos + first * sin data[idx0] = v0*float32(cos) - v1*float32(sin) data[idx1] = v1*float32(cos) + v0*float32(sin) } } } return nil } // RoPESingle applies RoPE for a single position (for single token generation) func RoPESingle(x *cpu.Tensor, pos, headDim int, theta float32) error { seqLen := x.Shape()[0] positions := make([]int, seqLen) for i := range positions { positions[i] = pos + i } return RoPE(x, positions, headDim, theta) }