embedding.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. package nn
  2. import (
  3. "fmt"
  4. "makarna/pkg/backend/cpu"
  5. "makarna/pkg/tensor"
  6. )
  7. // Embedding looks up token embeddings
  8. // ids: token IDs
  9. // weight: [vocab_size, dim]
  10. // out: [seq_len, dim]
  11. func Embedding(ids []int, weight, out *cpu.Tensor) error {
  12. inShape := weight.Shape()
  13. if len(inShape) != 2 {
  14. return fmt.Errorf("embedding: expected 2D weight, got %v", inShape)
  15. }
  16. vocabSize := inShape[0]
  17. dim := inShape[1]
  18. oData := out.DataFloat32()
  19. // Validate output shape
  20. outShape := out.Shape()
  21. if outShape[0] != len(ids) || outShape[1] != dim {
  22. return fmt.Errorf("embedding: output shape mismatch: expected [%d, %d], got %v", len(ids), dim, outShape)
  23. }
  24. switch weight.DType() {
  25. case tensor.Float32:
  26. wData := weight.DataFloat32()
  27. for i, id := range ids {
  28. if id < 0 || id >= vocabSize {
  29. return fmt.Errorf("embedding: id %d out of range [0, %d)", id, vocabSize)
  30. }
  31. src := wData[id*dim : (id+1)*dim]
  32. dst := oData[i*dim : (i+1)*dim]
  33. copy(dst, src)
  34. }
  35. case tensor.Float16:
  36. wData := weight.DataUint16()
  37. for i, id := range ids {
  38. if id < 0 || id >= vocabSize {
  39. return fmt.Errorf("embedding: id %d out of range [0, %d)", id, vocabSize)
  40. }
  41. src := wData[id*dim : (id+1)*dim]
  42. dst := oData[i*dim : (i+1)*dim]
  43. for j := 0; j < dim; j++ {
  44. dst[j] = float16BitsToFloat32(src[j])
  45. }
  46. }
  47. case tensor.BFloat16:
  48. wData := weight.DataUint16()
  49. for i, id := range ids {
  50. if id < 0 || id >= vocabSize {
  51. return fmt.Errorf("embedding: id %d out of range [0, %d)", id, vocabSize)
  52. }
  53. src := wData[id*dim : (id+1)*dim]
  54. dst := oData[i*dim : (i+1)*dim]
  55. for j := 0; j < dim; j++ {
  56. dst[j] = bfloat16BitsToFloat32(src[j])
  57. }
  58. }
  59. case tensor.Q4_K:
  60. const blockSize = 256
  61. if dim%blockSize != 0 {
  62. return fmt.Errorf("embedding: Q4_K dim %d must be multiple of %d", dim, blockSize)
  63. }
  64. wData := weight.DataQ4_K()
  65. blocksPerDim := dim / blockSize
  66. var deqBuf [256]float32
  67. for i, id := range ids {
  68. if id < 0 || id >= vocabSize {
  69. return fmt.Errorf("embedding: id %d out of range [0, %d)", id, vocabSize)
  70. }
  71. dst := oData[i*dim : (i+1)*dim]
  72. blockStart := id * blocksPerDim
  73. for b := 0; b < blocksPerDim; b++ {
  74. block := &wData[blockStart+b]
  75. tensor.DequantizeQ4_K(block, deqBuf[:])
  76. copy(dst[b*blockSize:], deqBuf[:])
  77. }
  78. }
  79. case tensor.Q8_K:
  80. const blockSize = 256
  81. if dim%blockSize != 0 {
  82. return fmt.Errorf("embedding: Q8_K dim %d must be multiple of %d", dim, blockSize)
  83. }
  84. wData := weight.DataQ8_K()
  85. blocksPerDim := dim / blockSize
  86. var deqBuf [256]float32
  87. for i, id := range ids {
  88. if id < 0 || id >= vocabSize {
  89. return fmt.Errorf("embedding: id %d out of range [0, %d)", id, vocabSize)
  90. }
  91. dst := oData[i*dim : (i+1)*dim]
  92. blockStart := id * blocksPerDim
  93. for b := 0; b < blocksPerDim; b++ {
  94. block := &wData[blockStart+b]
  95. tensor.DequantizeQ8_K(block, deqBuf[:])
  96. copy(dst[b*blockSize:], deqBuf[:])
  97. }
  98. }
  99. case tensor.Q3_K:
  100. const blockSize = 256
  101. if dim%blockSize != 0 {
  102. return fmt.Errorf("embedding: Q3_K dim %d must be multiple of %d", dim, blockSize)
  103. }
  104. wData := weight.DataQ3_K()
  105. blocksPerDim := dim / blockSize
  106. var deqBuf [256]float32
  107. for i, id := range ids {
  108. if id < 0 || id >= vocabSize {
  109. return fmt.Errorf("embedding: id %d out of range [0, %d)", id, vocabSize)
  110. }
  111. dst := oData[i*dim : (i+1)*dim]
  112. blockStart := id * blocksPerDim
  113. for b := 0; b < blocksPerDim; b++ {
  114. block := &wData[blockStart+b]
  115. tensor.DequantizeQ3_K(block, deqBuf[:])
  116. copy(dst[b*blockSize:], deqBuf[:])
  117. }
  118. }
  119. case tensor.Q5_K:
  120. const blockSize = 256
  121. if dim%blockSize != 0 {
  122. return fmt.Errorf("embedding: Q5_K dim %d must be multiple of %d", dim, blockSize)
  123. }
  124. wData := weight.DataQ5_K()
  125. blocksPerDim := dim / blockSize
  126. var deqBuf [256]float32
  127. for i, id := range ids {
  128. if id < 0 || id >= vocabSize {
  129. return fmt.Errorf("embedding: id %d out of range [0, %d)", id, vocabSize)
  130. }
  131. dst := oData[i*dim : (i+1)*dim]
  132. blockStart := id * blocksPerDim
  133. for b := 0; b < blocksPerDim; b++ {
  134. block := &wData[blockStart+b]
  135. tensor.DequantizeQ5_K(block, deqBuf[:])
  136. copy(dst[b*blockSize:], deqBuf[:])
  137. }
  138. }
  139. case tensor.Q6_K:
  140. const blockSize = 256
  141. if dim%blockSize != 0 {
  142. return fmt.Errorf("embedding: Q6_K dim %d must be multiple of %d", dim, blockSize)
  143. }
  144. wData := weight.DataQ6_K()
  145. blocksPerDim := dim / blockSize
  146. for i, id := range ids {
  147. if id < 0 || id >= vocabSize {
  148. return fmt.Errorf("embedding: id %d out of range [0, %d)", id, vocabSize)
  149. }
  150. dst := oData[i*dim : (i+1)*dim]
  151. blockStart := id * blocksPerDim
  152. for b := 0; b < blocksPerDim; b++ {
  153. block := &wData[blockStart+b]
  154. seg := dst[b*blockSize : (b+1)*blockSize]
  155. tensor.DequantizeQ6_K(block, seg)
  156. }
  157. }
  158. case tensor.Q2_K:
  159. const blockSize = 256
  160. if dim%blockSize != 0 {
  161. return fmt.Errorf("embedding: Q2_K dim %d must be multiple of %d", dim, blockSize)
  162. }
  163. wData := weight.DataQ2_K()
  164. blocksPerDim := dim / blockSize
  165. var deqBuf [256]float32
  166. for i, id := range ids {
  167. if id < 0 || id >= vocabSize {
  168. return fmt.Errorf("embedding: id %d out of range [0, %d)", id, vocabSize)
  169. }
  170. dst := oData[i*dim : (i+1)*dim]
  171. blockStart := id * blocksPerDim
  172. for b := 0; b < blocksPerDim; b++ {
  173. block := &wData[blockStart+b]
  174. tensor.DequantizeQ2_K(block, deqBuf[:])
  175. copy(dst[b*blockSize:], deqBuf[:])
  176. }
  177. }
  178. default:
  179. return fmt.Errorf("embedding: unsupported weight dtype %v", weight.DType())
  180. }
  181. return nil
  182. }