conv1d.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. package nn
  2. import (
  3. "fmt"
  4. "math"
  5. "makarna/pkg/backend/cpu"
  6. )
  7. type ActivationKind uint8
  8. const (
  9. ActivationNone ActivationKind = iota
  10. ActivationSiLU
  11. ActivationTanh
  12. ActivationReLU
  13. )
  14. func applyActivation(x float32, act ActivationKind) float32 {
  15. switch act {
  16. case ActivationNone:
  17. return x
  18. case ActivationSiLU:
  19. return x * Sigmoid(x)
  20. case ActivationReLU:
  21. if x < 0 {
  22. return 0
  23. }
  24. return x
  25. case ActivationTanh:
  26. return float32(math.Tanh(float64(x)))
  27. default:
  28. return x
  29. }
  30. }
  31. func FlattenConvWeights(w *cpu.Tensor, projSize int, kernel int) ([]float32, error) {
  32. if w == nil {
  33. return nil, fmt.Errorf("missing conv weights")
  34. }
  35. data := w.DataFloat32()
  36. expected := projSize * kernel
  37. shape := w.Shape()
  38. if shape.NumElements() < expected || len(data) < expected {
  39. return nil, fmt.Errorf("unexpected conv weight size %d", len(data))
  40. }
  41. if len(shape) == 2 {
  42. if shape[0] == projSize && shape[1] == kernel {
  43. return data[:expected], nil
  44. }
  45. if shape[0] == kernel && shape[1] == projSize {
  46. out := make([]float32, expected)
  47. for d := 0; d < projSize; d++ {
  48. for j := 0; j < kernel; j++ {
  49. out[d*kernel+j] = data[j*projSize+d]
  50. }
  51. }
  52. return out, nil
  53. }
  54. }
  55. if len(shape) == 3 {
  56. if shape[0] == projSize && shape[1] == 1 && shape[2] == kernel {
  57. return data[:expected], nil
  58. }
  59. if shape[0] == kernel && shape[1] == 1 && shape[2] == projSize {
  60. out := make([]float32, expected)
  61. for d := 0; d < projSize; d++ {
  62. for j := 0; j < kernel; j++ {
  63. out[d*kernel+j] = data[j*projSize+d]
  64. }
  65. }
  66. return out, nil
  67. }
  68. }
  69. if len(data) >= expected {
  70. return data[:expected], nil
  71. }
  72. return nil, fmt.Errorf("unexpected conv weight size %d", len(data))
  73. }
  74. func CausalShortConv1DInplaceAct(xFlat []float32, state *cpu.Tensor, w *cpu.Tensor, tokens int, projSize int, kernel int, act ActivationKind) error {
  75. if kernel <= 1 {
  76. for i := range xFlat {
  77. xFlat[i] = applyActivation(xFlat[i], act)
  78. }
  79. return nil
  80. }
  81. convLen := kernel - 1
  82. if state == nil {
  83. return fmt.Errorf("nil conv state")
  84. }
  85. if state.Shape().NumElements() != projSize*convLen {
  86. return fmt.Errorf("conv state shape mismatch %v", state.Shape())
  87. }
  88. weights, err := FlattenConvWeights(w, projSize, kernel)
  89. if err != nil {
  90. return err
  91. }
  92. st := state.DataFloat32()
  93. out := make([]float32, len(xFlat))
  94. for t := 0; t < tokens; t++ {
  95. base := t * projSize
  96. for d := 0; d < projSize; d++ {
  97. acc := float32(0)
  98. wBase := d * kernel
  99. for j := 0; j < convLen; j++ {
  100. acc += weights[wBase+j] * st[d*convLen+j]
  101. }
  102. acc += weights[wBase+convLen] * xFlat[base+d]
  103. out[base+d] = applyActivation(acc, act)
  104. }
  105. if convLen > 0 {
  106. for d := 0; d < projSize; d++ {
  107. off := d * convLen
  108. copy(st[off:off+convLen-1], st[off+1:off+convLen])
  109. st[off+convLen-1] = xFlat[base+d]
  110. }
  111. }
  112. }
  113. copy(xFlat, out)
  114. return nil
  115. }
  116. // CausalShortConv1DInplace is the backward-compatible API.
  117. // It applies a causal short conv1d followed by SiLU.
  118. func CausalShortConv1DInplace(xFlat []float32, state *cpu.Tensor, w *cpu.Tensor, tokens int, projSize int, kernel int) error {
  119. return CausalShortConv1DInplaceAct(xFlat, state, w, tokens, projSize, kernel, ActivationSiLU)
  120. }