| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- package cpu
- import (
- "fmt"
- "unsafe"
- "makarna/pkg/tensor"
- )
- // Tensor implements tensor.Tensor for CPU
- type Tensor struct {
- shape tensor.Shape
- dtype tensor.DType
- dataFloat32 []float32
- dataUint16 []uint16
- dataQ4_K []tensor.BlockQ4_K
- dataQ3_K []tensor.BlockQ3_K
- dataQ5_K []tensor.BlockQ5_K
- dataQ6_K []tensor.BlockQ6_K
- dataQ8_K []tensor.BlockQ8_K
- dataQ2_K []tensor.BlockQ2_K
- }
- func (t *Tensor) Placement() tensor.DevicePlacement {
- return tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
- }
- // NewTensor creates a new Float32 tensor
- func NewTensor(shape tensor.Shape, data []float32) *Tensor {
- if data == nil {
- data = make([]float32, shape.NumElements())
- }
- return &Tensor{
- shape: shape,
- dtype: tensor.Float32,
- dataFloat32: data,
- }
- }
- func NewTensorU16(shape tensor.Shape, dtype tensor.DType, data []uint16) (*Tensor, error) {
- if dtype != tensor.Float16 && dtype != tensor.BFloat16 {
- return nil, fmt.Errorf("unsupported u16 tensor dtype: %v", dtype)
- }
- if data == nil {
- data = make([]uint16, shape.NumElements())
- }
- if len(data) != shape.NumElements() {
- return nil, fmt.Errorf("size mismatch for u16 tensor: expected %d, got %d", shape.NumElements(), len(data))
- }
- return &Tensor{shape: shape, dtype: dtype, dataUint16: data}, nil
- }
- // NewTensorFromBytes creates a tensor from raw bytes (zero-copy mmap)
- func NewTensorFromBytes(shape tensor.Shape, dtype tensor.DType, data []byte) (*Tensor, error) {
- if len(data) == 0 {
- return nil, fmt.Errorf("empty data for tensor")
- }
- ptr := unsafe.Pointer(&data[0])
- t := &Tensor{
- shape: shape,
- dtype: dtype,
- }
- switch dtype {
- case tensor.Float32:
- expectedSize := shape.NumElements() * 4
- if len(data) != expectedSize {
- return nil, fmt.Errorf("size mismatch for F32 tensor: expected %d bytes, got %d", expectedSize, len(data))
- }
- // Zero-copy cast
- t.dataFloat32 = unsafe.Slice((*float32)(ptr), shape.NumElements())
- case tensor.Float16, tensor.BFloat16:
- expectedSize := shape.NumElements() * 2
- if len(data) != expectedSize {
- return nil, fmt.Errorf("size mismatch for %v tensor: expected %d bytes, got %d", dtype, expectedSize, len(data))
- }
- t.dataUint16 = unsafe.Slice((*uint16)(ptr), shape.NumElements())
- case tensor.Q4_K:
- const blockSize = 256
- const blockBytes = 144 // 2(D)+2(DMin)+12(scales)+128(qs)
- elemCount := shape.NumElements()
- if elemCount%blockSize != 0 {
- return nil, fmt.Errorf("Q4_K tensor elements must be multiple of %d", blockSize)
- }
- numBlocks := elemCount / blockSize
- expectedSize := numBlocks * blockBytes
- if len(data) != expectedSize {
- return nil, fmt.Errorf("size mismatch for Q4_K tensor: expected %d bytes, got %d", expectedSize, len(data))
- }
- t.dataQ4_K = unsafe.Slice((*tensor.BlockQ4_K)(ptr), numBlocks)
- case tensor.Q8_K:
- const blockSize = 256
- const blockBytes = 292 // 4(D)+256(qs)+32(bsums)
- elemCount := shape.NumElements()
- if elemCount%blockSize != 0 {
- return nil, fmt.Errorf("Q8_K tensor elements must be multiple of %d", blockSize)
- }
- numBlocks := elemCount / blockSize
- expectedSize := numBlocks * blockBytes
- if len(data) != expectedSize {
- return nil, fmt.Errorf("size mismatch for Q8_K tensor: expected %d bytes, got %d", expectedSize, len(data))
- }
- t.dataQ8_K = unsafe.Slice((*tensor.BlockQ8_K)(ptr), numBlocks)
- case tensor.Q3_K:
- const blockSize = 256
- const blockBytes = 110 // 32(hmask)+64(qs)+12(scales)+2(D)
- elemCount := shape.NumElements()
- if elemCount%blockSize != 0 {
- return nil, fmt.Errorf("Q3_K tensor elements must be multiple of %d", blockSize)
- }
- numBlocks := elemCount / blockSize
- expectedSize := numBlocks * blockBytes
- if len(data) != expectedSize {
- return nil, fmt.Errorf("size mismatch for Q3_K tensor: expected %d bytes, got %d", expectedSize, len(data))
- }
- t.dataQ3_K = unsafe.Slice((*tensor.BlockQ3_K)(ptr), numBlocks)
- case tensor.Q5_K:
- const blockSize = 256
- const blockBytes = 176
- elemCount := shape.NumElements()
- if elemCount%blockSize != 0 {
- return nil, fmt.Errorf("Q5_K tensor elements must be multiple of %d", blockSize)
- }
- numBlocks := elemCount / blockSize
- expectedSize := numBlocks * blockBytes
- if len(data) != expectedSize {
- return nil, fmt.Errorf("size mismatch for Q5_K tensor: expected %d bytes, got %d", expectedSize, len(data))
- }
- t.dataQ5_K = unsafe.Slice((*tensor.BlockQ5_K)(ptr), numBlocks)
- case tensor.Q6_K:
- const blockSize = 256
- const blockBytes = 210 // 128(ql)+64(qh)+16(scales)+2(D)
- elemCount := shape.NumElements()
- if elemCount%blockSize != 0 {
- return nil, fmt.Errorf("Q6_K tensor elements must be multiple of %d", blockSize)
- }
- numBlocks := elemCount / blockSize
- expectedSize := numBlocks * blockBytes
- if len(data) != expectedSize {
- return nil, fmt.Errorf("size mismatch for Q6_K tensor: expected %d bytes, got %d", expectedSize, len(data))
- }
- t.dataQ6_K = unsafe.Slice((*tensor.BlockQ6_K)(ptr), numBlocks)
- case tensor.Q2_K:
- const blockSize = 256
- const blockBytes = 84 // 16(scales)+64(qs)+2(D)+2(DMin)
- elemCount := shape.NumElements()
- if elemCount%blockSize != 0 {
- return nil, fmt.Errorf("Q2_K tensor elements must be multiple of %d", blockSize)
- }
- numBlocks := elemCount / blockSize
- expectedSize := numBlocks * blockBytes
- if len(data) != expectedSize {
- return nil, fmt.Errorf("size mismatch for Q2_K tensor: expected %d bytes, got %d", expectedSize, len(data))
- }
- t.dataQ2_K = unsafe.Slice((*tensor.BlockQ2_K)(ptr), numBlocks)
- default:
- return nil, fmt.Errorf("unsupported tensor dtype: %v", dtype)
- }
- return t, nil
- }
- // Shape returns tensor dimensions
- func (t *Tensor) Shape() tensor.Shape {
- return t.shape
- }
- // DType returns data type
- func (t *Tensor) DType() tensor.DType {
- return t.dtype
- }
- // Device returns CPU
- func (t *Tensor) Device() tensor.DeviceType {
- return tensor.CPU
- }
- // Data returns raw data pointer.
- // Use DataFloat32/DataQ4_K etc for type-safe access.
- func (t *Tensor) Data() interface{} {
- switch t.dtype {
- case tensor.Float32:
- return unsafe.Pointer(&t.dataFloat32[0])
- case tensor.Float16, tensor.BFloat16:
- return unsafe.Pointer(&t.dataUint16[0])
- case tensor.Q4_K:
- return unsafe.Pointer(&t.dataQ4_K[0])
- case tensor.Q3_K:
- return unsafe.Pointer(&t.dataQ3_K[0])
- case tensor.Q5_K:
- return unsafe.Pointer(&t.dataQ5_K[0])
- case tensor.Q6_K:
- return unsafe.Pointer(&t.dataQ6_K[0])
- case tensor.Q8_K:
- return unsafe.Pointer(&t.dataQ8_K[0])
- case tensor.Q2_K:
- return unsafe.Pointer(&t.dataQ2_K[0])
- default:
- panic(fmt.Sprintf("internal error: unsupported dtype %v in Data()", t.dtype))
- }
- }
- // DataFloat32 returns the underlying float32 slice directly
- func (t *Tensor) DataFloat32() []float32 {
- return t.dataFloat32
- }
- func (t *Tensor) DataUint16() []uint16 {
- return t.dataUint16
- }
- // DataQ4_K returns the underlying Q4_K block slice
- func (t *Tensor) DataQ4_K() []tensor.BlockQ4_K {
- return t.dataQ4_K
- }
- // DataQ3_K returns the underlying Q3_K block slice
- func (t *Tensor) DataQ3_K() []tensor.BlockQ3_K {
- return t.dataQ3_K
- }
- func (t *Tensor) DataQ5_K() []tensor.BlockQ5_K {
- return t.dataQ5_K
- }
- // DataQ6_K returns the underlying Q6_K block slice
- func (t *Tensor) DataQ6_K() []tensor.BlockQ6_K {
- return t.dataQ6_K
- }
- // DataQ8_K returns the underlying Q8_K block slice
- func (t *Tensor) DataQ8_K() []tensor.BlockQ8_K {
- return t.dataQ8_K
- }
- // DataQ2_K returns the underlying Q2_K block slice
- func (t *Tensor) DataQ2_K() []tensor.BlockQ2_K {
- return t.dataQ2_K
- }
|