| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- //go:build !cuda
- package cuda
- import (
- "errors"
- "unsafe"
- "makarna/pkg/tensor"
- )
- var ErrCUDANotAvailable = errors.New("CUDA support not compiled in - build with -tags=cuda")
- // MemoryInfo returns (total, free) bytes for the current CUDA device.
- // In non-CUDA builds this always returns ErrCUDANotAvailable.
- func MemoryInfo() (total uint64, free uint64, err error) {
- return 0, 0, ErrCUDANotAvailable
- }
- func MemoryInfoDevice(gpu int) (total uint64, free uint64, err error) {
- return 0, 0, ErrCUDANotAvailable
- }
- func DeviceCount() (int, error) {
- return 0, ErrCUDANotAvailable
- }
- // Tensor is a stub when CUDA is not available
- type Tensor struct {
- shape tensor.Shape
- dtype tensor.DType
- gpu int
- }
- func NewTensor(shape tensor.Shape, dtype tensor.DType, gpu int) (*Tensor, error) {
- return nil, ErrCUDANotAvailable
- }
- func (t *Tensor) Shape() tensor.Shape { return nil }
- func (t *Tensor) DType() tensor.DType { return 0 }
- func (t *Tensor) Device() tensor.DeviceType { return tensor.CPU }
- func (t *Tensor) GPU() int { return -1 }
- func (t *Tensor) Placement() tensor.DevicePlacement {
- return tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
- }
- func (t *Tensor) Data() interface{} { return nil }
- func (t *Tensor) Free() {}
- func (t *Tensor) Add(other tensor.Tensor) error { return ErrCUDANotAvailable }
- func (t *Tensor) Mul(other tensor.Tensor) error { return ErrCUDANotAvailable }
- func (t *Tensor) MatMul(other, out tensor.Tensor) error { return ErrCUDANotAvailable }
- func (t *Tensor) Reshape(shape tensor.Shape) (tensor.Tensor, error) { return nil, ErrCUDANotAvailable }
- func (t *Tensor) View(shape tensor.Shape) (tensor.Tensor, error) { return nil, ErrCUDANotAvailable }
- func (t *Tensor) ViewAt(shape tensor.Shape, offsetBytes uintptr) (*Tensor, error) {
- return nil, ErrCUDANotAvailable
- }
- func (t *Tensor) ToDevice(device tensor.DeviceType) (tensor.Tensor, error) {
- return nil, ErrCUDANotAvailable
- }
- func (t *Tensor) CopyFrom(data interface{}) error { return ErrCUDANotAvailable }
- func (t *Tensor) CopyToHost(dst []float32) error { return ErrCUDANotAvailable }
- func (t *Tensor) CopyPartialFrom(dstOffset int, src []float32) error { return ErrCUDANotAvailable }
- func (t *Tensor) CopyPartialFromDevice(dstOffset int, src *Tensor, srcOffset int, length int) error {
- return ErrCUDANotAvailable
- }
- func MemcpyH2D(dst, src unsafe.Pointer, size uintptr, gpu int) error { return ErrCUDANotAvailable }
- func MemcpyD2H(dst, src unsafe.Pointer, size uintptr, gpu int) error { return ErrCUDANotAvailable }
- func MemcpyD2D(dst, src unsafe.Pointer, size uintptr, gpu int) error { return ErrCUDANotAvailable }
- func CastF32ToF16(srcF32, dstF16 unsafe.Pointer, n int, gpu int) error { return ErrCUDANotAvailable }
- func KDACausalShortConv1D(x, state, w unsafe.Pointer, tokens, projSize, kernel int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func L2NormHeads(q, k unsafe.Pointer, tokens, numHeads, headDim int, eps float32, gpu int) error {
- return ErrCUDANotAvailable
- }
- func KDAGate(g, aLog, dtBias, out unsafe.Pointer, tokens, numHeads, headDim int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func KDARecurrent(q, k, v, g, beta, state unsafe.Pointer, tokens, numHeads, headDim int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func RMSNormGated(out, g, weight unsafe.Pointer, n, headDim int, eps float32, gpu int) error {
- return ErrCUDANotAvailable
- }
- func Sigmoid(x unsafe.Pointer, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func SoftmaxRows(x unsafe.Pointer, rows, cols int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func TopKPerRow(scores unsafe.Pointer, indices unsafe.Pointer, values unsafe.Pointer, rows, cols, k int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func PagedAttention(Q, kBlocksDev, vBlocksDev, out unsafe.Pointer, seqLen, kvLen, numHeads, numKVHeads, headDim, blockSize int, scale float32, startPos int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func PagedAttentionBatch(Q, kBlocksFlatDev, vBlocksFlatDev, blockOffsetsDev, kvLensDev, queryPosDev, out unsafe.Pointer, numTokens, numHeads, numKVHeads, headDim, blockSize int, scale float32, maxKvLen int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func PagedAttentionF32F16KV(Q, kBlocksDev, vBlocksDev, out unsafe.Pointer, seqLen, kvLen, numHeads, numKVHeads, headDim, blockSize int, scale float32, startPos int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func PagedAttentionBatchF32F16KV(Q, kBlocksFlatDev, vBlocksFlatDev, blockOffsetsDev, kvLensDev, queryPosDev, out unsafe.Pointer, numTokens, numHeads, numKVHeads, headDim, blockSize int, scale float32, maxKvLen int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func PagedAttentionRoPEF32F16KV(Q, kBlocksDev, vBlocksDev, out unsafe.Pointer, seqLen, kvLen, numHeads, numKVHeads, headDim, blockSize int, scale float32, startPos int, theta float32, gpu int) error {
- return ErrCUDANotAvailable
- }
- func PagedAttentionBatchRoPEF32F16KV(Q, kBlocksFlatDev, vBlocksFlatDev, blockOffsetsDev, kvLensDev, queryPosDev, out unsafe.Pointer, numTokens, numHeads, numKVHeads, headDim, blockSize int, scale float32, maxKvLen int, theta float32, gpu int) error {
- return ErrCUDANotAvailable
- }
- func AllocAndCopyInt32(data []int32, gpu int) (unsafe.Pointer, error) {
- return nil, ErrCUDANotAvailable
- }
- func TopKLogitsF32(logits unsafe.Pointer, vocab int, repIDs []int32, repPenalty float32, k int, gpu int) ([]int32, []float32, int, error) {
- return nil, nil, 0, ErrCUDANotAvailable
- }
- func DequantQ8K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func DequantQ4K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func DequantQ5K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func DequantQ6K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func DequantQ3K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func DequantQ2K(blocks unsafe.Pointer, out unsafe.Pointer, numBlocks int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulQ2K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulQ4K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulQ5K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulQ3K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulQ6K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulQ8K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulF16Q8K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulF16Q4K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulF16Q5K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulF16Q2K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulF16Q3K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func MatMulF16Q6K(aPtr, bPtr, cPtr unsafe.Pointer, m, k, n int, gpu int) error {
- return ErrCUDANotAvailable
- }
- func FreeDevicePtr(ptr unsafe.Pointer) {}
- func Free(ptr unsafe.Pointer) {}
- func AllocAndCopyPtrTable(ptrs []uintptr, gpu int) (unsafe.Pointer, error) {
- return nil, ErrCUDANotAvailable
- }
- // Available returns whether CUDA is available
- func Available() bool {
- return false
- }
|