cuda_stub.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. //go:build !cuda
  2. package cuda
  3. import (
  4. "errors"
  5. "unsafe"
  6. "makarna/pkg/tensor"
  7. )
  8. var ErrCUDANotAvailable = errors.New("CUDA support not compiled in - build with -tags=cuda")
  9. // MemoryInfo returns (total, free) bytes for the current CUDA device.
  10. // In non-CUDA builds this always returns ErrCUDANotAvailable.
  11. func MemoryInfo() (total uint64, free uint64, err error) {
  12. return 0, 0, ErrCUDANotAvailable
  13. }
  14. func MemoryInfoDevice(gpu int) (total uint64, free uint64, err error) {
  15. return 0, 0, ErrCUDANotAvailable
  16. }
  17. func DeviceCount() (int, error) {
  18. return 0, ErrCUDANotAvailable
  19. }
  20. // Tensor is a stub when CUDA is not available
  21. type Tensor struct {
  22. shape tensor.Shape
  23. dtype tensor.DType
  24. gpu int
  25. }
  26. func NewTensor(shape tensor.Shape, dtype tensor.DType, gpu int) (*Tensor, error) {
  27. return nil, ErrCUDANotAvailable
  28. }
  29. func (t *Tensor) Shape() tensor.Shape { return nil }
  30. func (t *Tensor) DType() tensor.DType { return 0 }
  31. func (t *Tensor) Device() tensor.DeviceType { return tensor.CPU }
  32. func (t *Tensor) GPU() int { return -1 }
  33. func (t *Tensor) Placement() tensor.DevicePlacement {
  34. return tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
  35. }
  36. func (t *Tensor) Data() interface{} { return nil }
  37. func (t *Tensor) Free() {}
  38. func (t *Tensor) Add(other tensor.Tensor) error { return ErrCUDANotAvailable }
  39. func (t *Tensor) Mul(other tensor.Tensor) error { return ErrCUDANotAvailable }
  40. func (t *Tensor) MatMul(other, out tensor.Tensor) error { return ErrCUDANotAvailable }
  41. func (t *Tensor) Reshape(shape tensor.Shape) (tensor.Tensor, error) { return nil, ErrCUDANotAvailable }
  42. func (t *Tensor) View(shape tensor.Shape) (tensor.Tensor, error) { return nil, ErrCUDANotAvailable }
  43. func (t *Tensor) ViewAt(shape tensor.Shape, offsetBytes uintptr) (*Tensor, error) {
  44. return nil, ErrCUDANotAvailable
  45. }
  46. func (t *Tensor) ToDevice(device tensor.DeviceType) (tensor.Tensor, error) {
  47. return nil, ErrCUDANotAvailable
  48. }
  49. func (t *Tensor) CopyFrom(data interface{}) error { return ErrCUDANotAvailable }
  50. func (t *Tensor) CopyToHost(dst []float32) error { return ErrCUDANotAvailable }
  51. func (t *Tensor) CopyPartialFrom(dstOffset int, src []float32) error { return ErrCUDANotAvailable }
  52. func (t *Tensor) CopyPartialFromDevice(dstOffset int, src *Tensor, srcOffset int, length int) error {
  53. return ErrCUDANotAvailable
  54. }
  55. func MemcpyH2D(dst, src unsafe.Pointer, size uintptr, gpu int) error { return ErrCUDANotAvailable }
  56. func MemcpyD2H(dst, src unsafe.Pointer, size uintptr, gpu int) error { return ErrCUDANotAvailable }
  57. func MemcpyD2D(dst, src unsafe.Pointer, size uintptr, gpu int) error { return ErrCUDANotAvailable }
  58. func CastF32ToF16(srcF32, dstF16 unsafe.Pointer, n int, gpu int) error { return ErrCUDANotAvailable }
  59. func KDACausalShortConv1D(x, state, w unsafe.Pointer, tokens, projSize, kernel int, gpu int) error {
  60. return ErrCUDANotAvailable
  61. }
  62. func L2NormHeads(q, k unsafe.Pointer, tokens, numHeads, headDim int, eps float32, gpu int) error {
  63. return ErrCUDANotAvailable
  64. }
  65. func KDAGate(g, aLog, dtBias, out unsafe.Pointer, tokens, numHeads, headDim int, gpu int) error {
  66. return ErrCUDANotAvailable
  67. }
  68. func KDARecurrent(q, k, v, g, beta, state unsafe.Pointer, tokens, numHeads, headDim int, gpu int) error {
  69. return ErrCUDANotAvailable
  70. }
  71. func RMSNormGated(out, g, weight unsafe.Pointer, n, headDim int, eps float32, gpu int) error {
  72. return ErrCUDANotAvailable
  73. }
  74. func Sigmoid(x unsafe.Pointer, n int, gpu int) error {
  75. return ErrCUDANotAvailable
  76. }
  77. func SoftmaxRows(x unsafe.Pointer, rows, cols int, gpu int) error {
  78. return ErrCUDANotAvailable
  79. }
  80. func TopKPerRow(scores unsafe.Pointer, indices unsafe.Pointer, values unsafe.Pointer, rows, cols, k int, gpu int) error {
  81. return ErrCUDANotAvailable
  82. }
  83. func PagedAttention(Q, kBlocksDev, vBlocksDev, out unsafe.Pointer, seqLen, kvLen, numHeads, numKVHeads, headDim, blockSize int, scale float32, startPos int, gpu int) error {
  84. return ErrCUDANotAvailable
  85. }
  86. func PagedAttentionBatch(Q, kBlocksFlatDev, vBlocksFlatDev, blockOffsetsDev, kvLensDev, queryPosDev, out unsafe.Pointer, numTokens, numHeads, numKVHeads, headDim, blockSize int, scale float32, maxKvLen int, gpu int) error {
  87. return ErrCUDANotAvailable
  88. }
  89. func PagedAttentionF32F16KV(Q, kBlocksDev, vBlocksDev, out unsafe.Pointer, seqLen, kvLen, numHeads, numKVHeads, headDim, blockSize int, scale float32, startPos int, gpu int) error {
  90. return ErrCUDANotAvailable
  91. }
  92. func PagedAttentionBatchF32F16KV(Q, kBlocksFlatDev, vBlocksFlatDev, blockOffsetsDev, kvLensDev, queryPosDev, out unsafe.Pointer, numTokens, numHeads, numKVHeads, headDim, blockSize int, scale float32, maxKvLen int, gpu int) error {
  93. return ErrCUDANotAvailable
  94. }
  95. func PagedAttentionRoPEF32F16KV(Q, kBlocksDev, vBlocksDev, out unsafe.Pointer, seqLen, kvLen, numHeads, numKVHeads, headDim, blockSize int, scale float32, startPos int, theta float32, gpu int) error {
  96. return ErrCUDANotAvailable
  97. }
  98. func PagedAttentionBatchRoPEF32F16KV(Q, kBlocksFlatDev, vBlocksFlatDev, blockOffsetsDev, kvLensDev, queryPosDev, out unsafe.Pointer, numTokens, numHeads, numKVHeads, headDim, blockSize int, scale float32, maxKvLen int, theta float32, gpu int) error {
  99. return ErrCUDANotAvailable
  100. }
  101. func AllocAndCopyInt32(data []int32, gpu int) (unsafe.Pointer, error) {
  102. return nil, ErrCUDANotAvailable
  103. }
  104. func TopKLogitsF32(logits unsafe.Pointer, vocab int, repIDs []int32, repPenalty float32, k int, gpu int) ([]int32, []float32, int, error) {
  105. return nil, nil, 0, ErrCUDANotAvailable
  106. }
  107. func DequantQ8K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
  108. return ErrCUDANotAvailable
  109. }
  110. func DequantQ4K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
  111. return ErrCUDANotAvailable
  112. }
  113. func DequantQ5K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
  114. return ErrCUDANotAvailable
  115. }
  116. func DequantQ6K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
  117. return ErrCUDANotAvailable
  118. }
  119. func DequantQ3K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
  120. return ErrCUDANotAvailable
  121. }
  122. func DequantQ2K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
  123. return ErrCUDANotAvailable
  124. }
  125. func MatMulQ2K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  126. return ErrCUDANotAvailable
  127. }
  128. func MatMulQ4K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  129. return ErrCUDANotAvailable
  130. }
  131. func MatMulQ5K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  132. return ErrCUDANotAvailable
  133. }
  134. func MatMulQ3K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  135. return ErrCUDANotAvailable
  136. }
  137. func MatMulQ6K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  138. return ErrCUDANotAvailable
  139. }
  140. func MatMulQ8K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  141. return ErrCUDANotAvailable
  142. }
  143. func MatMulF16Q8K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  144. return ErrCUDANotAvailable
  145. }
  146. func MatMulF16Q4K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  147. return ErrCUDANotAvailable
  148. }
  149. func MatMulF16Q5K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  150. return ErrCUDANotAvailable
  151. }
  152. func MatMulF16Q2K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  153. return ErrCUDANotAvailable
  154. }
  155. func MatMulF16Q3K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  156. return ErrCUDANotAvailable
  157. }
  158. func MatMulF16Q6K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
  159. return ErrCUDANotAvailable
  160. }
  161. func FreeDevicePtr(ptr unsafe.Pointer) {}
  162. func Free(ptr unsafe.Pointer) {}
  163. func AllocAndCopyPtrTable(ptrs []uintptr, gpu int) (unsafe.Pointer, error) {
  164. return nil, ErrCUDANotAvailable
  165. }
  166. // Available returns whether CUDA is available
  167. func Available() bool {
  168. return false
  169. }