//go:build !cuda package cuda import ( "errors" "unsafe" "makarna/pkg/tensor" ) var ErrCUDANotAvailable = errors.New("CUDA support not compiled in - build with -tags=cuda") // MemoryInfo returns (total, free) bytes for the current CUDA device. // In non-CUDA builds this always returns ErrCUDANotAvailable. func MemoryInfo() (total uint64, free uint64, err error) { return 0, 0, ErrCUDANotAvailable } func MemoryInfoDevice(gpu int) (total uint64, free uint64, err error) { return 0, 0, ErrCUDANotAvailable } func DeviceCount() (int, error) { return 0, ErrCUDANotAvailable } // Tensor is a stub when CUDA is not available type Tensor struct { shape tensor.Shape dtype tensor.DType gpu int } func NewTensor(shape tensor.Shape, dtype tensor.DType, gpu int) (*Tensor, error) { return nil, ErrCUDANotAvailable } func (t *Tensor) Shape() tensor.Shape { return nil } func (t *Tensor) DType() tensor.DType { return 0 } func (t *Tensor) Device() tensor.DeviceType { return tensor.CPU } func (t *Tensor) GPU() int { return -1 } func (t *Tensor) Placement() tensor.DevicePlacement { return tensor.DevicePlacement{Type: tensor.CPU, GPU: -1} } func (t *Tensor) Data() interface{} { return nil } func (t *Tensor) Free() {} func (t *Tensor) Add(other tensor.Tensor) error { return ErrCUDANotAvailable } func (t *Tensor) Mul(other tensor.Tensor) error { return ErrCUDANotAvailable } func (t *Tensor) MatMul(other, out tensor.Tensor) error { return ErrCUDANotAvailable } func (t *Tensor) Reshape(shape tensor.Shape) (tensor.Tensor, error) { return nil, ErrCUDANotAvailable } func (t *Tensor) View(shape tensor.Shape) (tensor.Tensor, error) { return nil, ErrCUDANotAvailable } func (t *Tensor) ViewAt(shape tensor.Shape, offsetBytes uintptr) (*Tensor, error) { return nil, ErrCUDANotAvailable } func (t *Tensor) ToDevice(device tensor.DeviceType) (tensor.Tensor, error) { return nil, ErrCUDANotAvailable } func (t *Tensor) CopyFrom(data interface{}) error { return ErrCUDANotAvailable } func (t *Tensor) CopyToHost(dst []float32) error { return ErrCUDANotAvailable } func (t *Tensor) CopyPartialFrom(dstOffset int, src []float32) error { return ErrCUDANotAvailable } func (t *Tensor) CopyPartialFromDevice(dstOffset int, src *Tensor, srcOffset int, length int) error { return ErrCUDANotAvailable } func MemcpyH2D(dst, src unsafe.Pointer, size uintptr, gpu int) error { return ErrCUDANotAvailable } func MemcpyD2H(dst, src unsafe.Pointer, size uintptr, gpu int) error { return ErrCUDANotAvailable } func MemcpyD2D(dst, src unsafe.Pointer, size uintptr, gpu int) error { return ErrCUDANotAvailable } func CastF32ToF16(srcF32, dstF16 unsafe.Pointer, n int, gpu int) error { return ErrCUDANotAvailable } func KDACausalShortConv1D(x, state, w unsafe.Pointer, tokens, projSize, kernel int, gpu int) error { return ErrCUDANotAvailable } func L2NormHeads(q, k unsafe.Pointer, tokens, numHeads, headDim int, eps float32, gpu int) error { return ErrCUDANotAvailable } func KDAGate(g, aLog, dtBias, out unsafe.Pointer, tokens, numHeads, headDim int, gpu int) error { return ErrCUDANotAvailable } func KDARecurrent(q, k, v, g, beta, state unsafe.Pointer, tokens, numHeads, headDim int, gpu int) error { return ErrCUDANotAvailable } func RMSNormGated(out, g, weight unsafe.Pointer, n, headDim int, eps float32, gpu int) error { return ErrCUDANotAvailable } func Sigmoid(x unsafe.Pointer, n int, gpu int) error { return ErrCUDANotAvailable } func SoftmaxRows(x unsafe.Pointer, rows, cols int, gpu int) error { return ErrCUDANotAvailable } func TopKPerRow(scores unsafe.Pointer, indices unsafe.Pointer, values unsafe.Pointer, rows, cols, k int, gpu int) error { return ErrCUDANotAvailable } func PagedAttention(Q, kBlocksDev, vBlocksDev, out unsafe.Pointer, seqLen, kvLen, numHeads, numKVHeads, headDim, blockSize int, scale float32, startPos int, gpu int) error { return ErrCUDANotAvailable } func PagedAttentionBatch(Q, kBlocksFlatDev, vBlocksFlatDev, blockOffsetsDev, kvLensDev, queryPosDev, out unsafe.Pointer, numTokens, numHeads, numKVHeads, headDim, blockSize int, scale float32, maxKvLen int, gpu int) error { return ErrCUDANotAvailable } func PagedAttentionF32F16KV(Q, kBlocksDev, vBlocksDev, out unsafe.Pointer, seqLen, kvLen, numHeads, numKVHeads, headDim, blockSize int, scale float32, startPos int, gpu int) error { return ErrCUDANotAvailable } func PagedAttentionBatchF32F16KV(Q, kBlocksFlatDev, vBlocksFlatDev, blockOffsetsDev, kvLensDev, queryPosDev, out unsafe.Pointer, numTokens, numHeads, numKVHeads, headDim, blockSize int, scale float32, maxKvLen int, gpu int) error { return ErrCUDANotAvailable } func PagedAttentionRoPEF32F16KV(Q, kBlocksDev, vBlocksDev, out unsafe.Pointer, seqLen, kvLen, numHeads, numKVHeads, headDim, blockSize int, scale float32, startPos int, theta float32, gpu int) error { return ErrCUDANotAvailable } func PagedAttentionBatchRoPEF32F16KV(Q, kBlocksFlatDev, vBlocksFlatDev, blockOffsetsDev, kvLensDev, queryPosDev, out unsafe.Pointer, numTokens, numHeads, numKVHeads, headDim, blockSize int, scale float32, maxKvLen int, theta float32, gpu int) error { return ErrCUDANotAvailable } func AllocAndCopyInt32(data []int32, gpu int) (unsafe.Pointer, error) { return nil, ErrCUDANotAvailable } func TopKLogitsF32(logits unsafe.Pointer, vocab int, repIDs []int32, repPenalty float32, k int, gpu int) ([]int32, []float32, int, error) { return nil, nil, 0, ErrCUDANotAvailable } func DequantQ8K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error { return ErrCUDANotAvailable } func DequantQ4K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error { return ErrCUDANotAvailable } func DequantQ5K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error { return ErrCUDANotAvailable } func DequantQ6K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error { return ErrCUDANotAvailable } func DequantQ3K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error { return ErrCUDANotAvailable } func DequantQ2K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error { return ErrCUDANotAvailable } func MatMulQ2K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulQ4K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulQ5K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulQ3K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulQ6K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulQ8K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulF16Q8K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulF16Q4K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulF16Q5K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulF16Q2K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulF16Q3K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func MatMulF16Q6K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error { return ErrCUDANotAvailable } func FreeDevicePtr(ptr unsafe.Pointer) {} func Free(ptr unsafe.Pointer) {} func AllocAndCopyPtrTable(ptrs []uintptr, gpu int) (unsafe.Pointer, error) { return nil, ErrCUDANotAvailable } // Available returns whether CUDA is available func Available() bool { return false }