// Package compute provides device-agnostic computation with hybrid CPU/GPU support. package compute import ( "fmt" "makarna/pkg/backend/cpu" "makarna/pkg/backend/cuda" "makarna/pkg/backend/device" "makarna/pkg/tensor" ) // Activation wraps a tensor with device tracking. // It enables efficient hybrid execution where transfers only happen // when crossing device boundaries. type Activation struct { tensor tensor.Tensor placement tensor.DevicePlacement } // NewActivation creates an activation on the specified device. func NewActivation(shape tensor.Shape, placement tensor.DevicePlacement) (*Activation, error) { var t tensor.Tensor var err error if placement.Type == tensor.CUDA && device.CUDAAvailable() { t, err = cuda.NewTensor(shape, tensor.Float32, placement.GPU) if err != nil { // Fallback to CPU t = cpu.NewTensor(shape, nil) placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1} } } else { t = cpu.NewTensor(shape, nil) placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1} } return &Activation{tensor: t, placement: placement.Normalize()}, err } // NewActivationFrom wraps an existing tensor. func NewActivationFrom(t tensor.Tensor) *Activation { var placement tensor.DevicePlacement if ct, ok := t.(*cuda.Tensor); ok { placement = tensor.DevicePlacement{Type: tensor.CUDA, GPU: ct.GPU()} } else { placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1} } return &Activation{tensor: t, placement: placement.Normalize()} } // Tensor returns the underlying tensor. func (a *Activation) Tensor() tensor.Tensor { return a.tensor } // Placement returns the current device placement. func (a *Activation) Placement() tensor.DevicePlacement { return a.placement } // IsGPU returns true if the activation is on GPU. func (a *Activation) IsGPU() bool { return a.placement.Type == tensor.CUDA } // Shape returns the tensor shape. func (a *Activation) Shape() tensor.Shape { return a.tensor.Shape() } // EnsureOn moves the activation to the target device if needed. // Returns true if a transfer occurred. func (a *Activation) EnsureOn(target tensor.DevicePlacement) (transferred bool, err error) { target = target.Normalize() // Already on target device if a.placement == target { return false, nil } // Transfer needed newTensor, err := device.EnsureOn(a.tensor, target) if err != nil { return false, fmt.Errorf("activation transfer %v -> %v: %w", a.placement, target, err) } // Free old GPU tensor to prevent memory leak if oldCT, ok := a.tensor.(*cuda.Tensor); ok && oldCT != nil { oldCT.Free() } a.tensor = newTensor a.placement = target return true, nil } // AsCPU returns the tensor as *cpu.Tensor, transferring if needed. func (a *Activation) AsCPU() (*cpu.Tensor, error) { if _, err := a.EnsureOn(tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}); err != nil { return nil, err } return a.tensor.(*cpu.Tensor), nil } // AsCUDA returns the tensor as *cuda.Tensor, transferring if needed. func (a *Activation) AsCUDA(gpu int) (*cuda.Tensor, error) { if _, err := a.EnsureOn(tensor.DevicePlacement{Type: tensor.CUDA, GPU: gpu}); err != nil { return nil, err } return a.tensor.(*cuda.Tensor), nil } // ReplaceWith replaces the underlying tensor and updates placement. func (a *Activation) ReplaceWith(t tensor.Tensor) { if a.tensor != nil { if oldCT, ok := a.tensor.(*cuda.Tensor); ok { if newCT, ok2 := t.(*cuda.Tensor); ok2 { if oldCT != newCT { oldCT.Free() } } else { oldCT.Free() } } } a.tensor = t if ct, ok := t.(*cuda.Tensor); ok { a.placement = tensor.DevicePlacement{Type: tensor.CUDA, GPU: ct.GPU()} } else { a.placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1} } } // Clone creates a deep copy of the activation on the same device. func (a *Activation) Clone() (*Activation, error) { if a.IsGPU() { ct := a.tensor.(*cuda.Tensor) newT, err := cuda.NewTensor(ct.Shape(), ct.DType(), ct.GPU()) if err != nil { return nil, err } // Copy GPU to GPU using CopyToHost then CopyFrom (simple path) tempBuf := make([]float32, ct.Shape().NumElements()) if err := ct.CopyToHost(tempBuf); err != nil { return nil, err } if err := newT.CopyFrom(tempBuf); err != nil { return nil, err } return &Activation{tensor: newT, placement: a.placement}, nil } // CPU clone src := a.tensor.(*cpu.Tensor) dst := cpu.NewTensor(src.Shape(), nil) copy(dst.DataFloat32(), src.DataFloat32()) return &Activation{tensor: dst, placement: a.placement}, nil } // CopyFrom copies data from a CPU tensor to this activation func (a *Activation) CopyFrom(t *cpu.Tensor) error { if a.IsGPU() { return a.tensor.(*cuda.Tensor).CopyFrom(t.DataFloat32()) } src := t.DataFloat32() dst := a.tensor.(*cpu.Tensor).DataFloat32() copy(dst, src) return nil } // FreeActivation frees GPU memory if the activation is on GPU. // Safe to call on nil or CPU activations. func FreeActivation(a *Activation) { if a == nil { return } if ct, ok := a.tensor.(*cuda.Tensor); ok && ct != nil { ct.Free() } }