| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- // Package compute provides device-agnostic computation with hybrid CPU/GPU support.
- package compute
- import (
- "fmt"
- "makarna/pkg/backend/cpu"
- "makarna/pkg/backend/cuda"
- "makarna/pkg/backend/device"
- "makarna/pkg/tensor"
- )
- // Activation wraps a tensor with device tracking.
- // It enables efficient hybrid execution where transfers only happen
- // when crossing device boundaries.
- type Activation struct {
- tensor tensor.Tensor
- placement tensor.DevicePlacement
- }
- // NewActivation creates an activation on the specified device.
- func NewActivation(shape tensor.Shape, placement tensor.DevicePlacement) (*Activation, error) {
- var t tensor.Tensor
- var err error
- if placement.Type == tensor.CUDA && device.CUDAAvailable() {
- t, err = cuda.NewTensor(shape, tensor.Float32, placement.GPU)
- if err != nil {
- // Fallback to CPU
- t = cpu.NewTensor(shape, nil)
- placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
- }
- } else {
- t = cpu.NewTensor(shape, nil)
- placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
- }
- return &Activation{tensor: t, placement: placement.Normalize()}, err
- }
- // NewActivationFrom wraps an existing tensor.
- func NewActivationFrom(t tensor.Tensor) *Activation {
- var placement tensor.DevicePlacement
- if ct, ok := t.(*cuda.Tensor); ok {
- placement = tensor.DevicePlacement{Type: tensor.CUDA, GPU: ct.GPU()}
- } else {
- placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
- }
- return &Activation{tensor: t, placement: placement.Normalize()}
- }
- // Tensor returns the underlying tensor.
- func (a *Activation) Tensor() tensor.Tensor {
- return a.tensor
- }
- // Placement returns the current device placement.
- func (a *Activation) Placement() tensor.DevicePlacement {
- return a.placement
- }
- // IsGPU returns true if the activation is on GPU.
- func (a *Activation) IsGPU() bool {
- return a.placement.Type == tensor.CUDA
- }
- // Shape returns the tensor shape.
- func (a *Activation) Shape() tensor.Shape {
- return a.tensor.Shape()
- }
- // EnsureOn moves the activation to the target device if needed.
- // Returns true if a transfer occurred.
- func (a *Activation) EnsureOn(target tensor.DevicePlacement) (transferred bool, err error) {
- target = target.Normalize()
- // Already on target device
- if a.placement == target {
- return false, nil
- }
- // Transfer needed
- newTensor, err := device.EnsureOn(a.tensor, target)
- if err != nil {
- return false, fmt.Errorf("activation transfer %v -> %v: %w", a.placement, target, err)
- }
- // Free old GPU tensor to prevent memory leak
- if oldCT, ok := a.tensor.(*cuda.Tensor); ok && oldCT != nil {
- oldCT.Free()
- }
- a.tensor = newTensor
- a.placement = target
- return true, nil
- }
- // AsCPU returns the tensor as *cpu.Tensor, transferring if needed.
- func (a *Activation) AsCPU() (*cpu.Tensor, error) {
- if _, err := a.EnsureOn(tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}); err != nil {
- return nil, err
- }
- return a.tensor.(*cpu.Tensor), nil
- }
- // AsCUDA returns the tensor as *cuda.Tensor, transferring if needed.
- func (a *Activation) AsCUDA(gpu int) (*cuda.Tensor, error) {
- if _, err := a.EnsureOn(tensor.DevicePlacement{Type: tensor.CUDA, GPU: gpu}); err != nil {
- return nil, err
- }
- return a.tensor.(*cuda.Tensor), nil
- }
- // ReplaceWith replaces the underlying tensor and updates placement.
- func (a *Activation) ReplaceWith(t tensor.Tensor) {
- if a.tensor != nil {
- if oldCT, ok := a.tensor.(*cuda.Tensor); ok {
- if newCT, ok2 := t.(*cuda.Tensor); ok2 {
- if oldCT != newCT {
- oldCT.Free()
- }
- } else {
- oldCT.Free()
- }
- }
- }
- a.tensor = t
- if ct, ok := t.(*cuda.Tensor); ok {
- a.placement = tensor.DevicePlacement{Type: tensor.CUDA, GPU: ct.GPU()}
- } else {
- a.placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
- }
- }
- // Clone creates a deep copy of the activation on the same device.
- func (a *Activation) Clone() (*Activation, error) {
- if a.IsGPU() {
- ct := a.tensor.(*cuda.Tensor)
- newT, err := cuda.NewTensor(ct.Shape(), ct.DType(), ct.GPU())
- if err != nil {
- return nil, err
- }
- // Copy GPU to GPU using CopyToHost then CopyFrom (simple path)
- tempBuf := make([]float32, ct.Shape().NumElements())
- if err := ct.CopyToHost(tempBuf); err != nil {
- return nil, err
- }
- if err := newT.CopyFrom(tempBuf); err != nil {
- return nil, err
- }
- return &Activation{tensor: newT, placement: a.placement}, nil
- }
- // CPU clone
- src := a.tensor.(*cpu.Tensor)
- dst := cpu.NewTensor(src.Shape(), nil)
- copy(dst.DataFloat32(), src.DataFloat32())
- return &Activation{tensor: dst, placement: a.placement}, nil
- }
- // CopyFrom copies data from a CPU tensor to this activation
- func (a *Activation) CopyFrom(t *cpu.Tensor) error {
- if a.IsGPU() {
- return a.tensor.(*cuda.Tensor).CopyFrom(t.DataFloat32())
- }
- src := t.DataFloat32()
- dst := a.tensor.(*cpu.Tensor).DataFloat32()
- copy(dst, src)
- return nil
- }
- // FreeActivation frees GPU memory if the activation is on GPU.
- // Safe to call on nil or CPU activations.
- func FreeActivation(a *Activation) {
- if a == nil {
- return
- }
- if ct, ok := a.tensor.(*cuda.Tensor); ok && ct != nil {
- ct.Free()
- }
- }
|