rope.go 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. package nn
  2. import (
  3. "math"
  4. "makarna/pkg/backend/cpu"
  5. )
  6. // RoPE applies Rotary Positional Embeddings in-place
  7. // x: [seqLen, numHeads * headDim]
  8. // positions: position for each token in sequence (len = seqLen)
  9. // headDim: dimension of each attention head
  10. // theta: RoPE base frequency (typically 10000 for Llama, 1000000 for Qwen3)
  11. //
  12. // This uses the split-half rotation format (HuggingFace standard):
  13. // - Split head into [first_half, second_half]
  14. // - new_first = first * cos - second * sin
  15. // - new_second = second * cos + first * sin
  16. func RoPE(x *cpu.Tensor, positions []int, headDim int, theta float32) error {
  17. data := x.DataFloat32()
  18. shape := x.Shape()
  19. seqLen := shape[0]
  20. totalDim := shape[1] // numHeads * headDim
  21. halfDim := headDim / 2
  22. // Precompute inverse frequencies for the half-dimension once per call
  23. invFreqs := make([]float64, halfDim)
  24. for j := 0; j < halfDim; j++ {
  25. invFreqs[j] = 1.0 / math.Pow(float64(theta), float64(2*j)/float64(headDim))
  26. }
  27. for seq := 0; seq < seqLen; seq++ {
  28. pos := positions[seq]
  29. rowStart := seq * totalDim
  30. // Apply RoPE to each head
  31. for headStart := 0; headStart < totalDim; headStart += headDim {
  32. for j := 0; j < halfDim; j++ {
  33. // Compute frequency: precomputed invFreq * position
  34. freq := float64(pos) * invFreqs[j]
  35. sin, cos := math.Sincos(freq)
  36. // Split-half indexing: pair (j, j + halfDim)
  37. idx0 := rowStart + headStart + j // First half element
  38. idx1 := rowStart + headStart + j + halfDim // Second half element
  39. v0 := data[idx0] // first half value
  40. v1 := data[idx1] // second half value
  41. // Rotation:
  42. // new_first = first * cos - second * sin
  43. // new_second = second * cos + first * sin
  44. data[idx0] = v0*float32(cos) - v1*float32(sin)
  45. data[idx1] = v1*float32(cos) + v0*float32(sin)
  46. }
  47. }
  48. }
  49. return nil
  50. }
  51. // RoPESingle applies RoPE for a single position (for single token generation)
  52. func RoPESingle(x *cpu.Tensor, pos, headDim int, theta float32) error {
  53. seqLen := x.Shape()[0]
  54. positions := make([]int, seqLen)
  55. for i := range positions {
  56. positions[i] = pos + i
  57. }
  58. return RoPE(x, positions, headDim, theta)
  59. }