| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122 |
- // Package compute provides device-agnostic computation dispatching.
- // Operations automatically route to the appropriate backend (CPU/CUDA)
- // based on tensor placement, eliminating manual device management in model code.
- package compute
- import (
- "fmt"
- "makarna/pkg/backend/cpu"
- "makarna/pkg/backend/cuda"
- "makarna/pkg/backend/device"
- "makarna/pkg/tensor"
- )
- // Context holds computation state for a forward pass.
- type Context struct {
- Dispatcher *device.DeviceDispatcher
- LayerIdx int
- Scratch *ScratchSpace
- CPUMoE bool // Keep MoE expert weights on CPU
- }
- // NewContext creates a computation context.
- func NewContext(dispatcher *device.DeviceDispatcher, layerIdx int) *Context {
- return &Context{
- Dispatcher: dispatcher,
- LayerIdx: layerIdx,
- }
- }
- // Placement returns the current layer's device placement.
- func (c *Context) Placement() tensor.DevicePlacement {
- if c.Dispatcher == nil {
- return tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
- }
- return c.Dispatcher.LayerPlacement(c.LayerIdx)
- }
- // IsGPU returns true if current layer is on GPU.
- func (c *Context) IsGPU() bool {
- return c.Placement().Type == tensor.CUDA
- }
- // EnsureWeight ensures a weight tensor is on the correct device with caching.
- func (c *Context) EnsureWeight(t tensor.Tensor, name string) (tensor.Tensor, error) {
- if c.Dispatcher == nil {
- return t, nil
- }
- placement := c.Placement()
- if placement.Type == tensor.CPU {
- return t, nil
- }
- cache := c.Dispatcher.GetWeightCache(placement.GPU)
- key := fmt.Sprintf("%d:%s", c.LayerIdx, name)
- return device.EnsureOnCached(t, placement, cache, key)
- }
- // EnsureActivation ensures an activation tensor is on the correct device.
- // Unlike weights, activations are not cached between forward passes.
- func (c *Context) EnsureActivation(t tensor.Tensor) (tensor.Tensor, error) {
- if c.Dispatcher == nil {
- return t, nil
- }
- return device.EnsureOn(t, c.Placement())
- }
- // Zeros creates a zero tensor on the appropriate device.
- func Zeros(ctx *Context, shape tensor.Shape) tensor.Tensor {
- if ctx == nil || !ctx.IsGPU() || !device.CUDAAvailable() {
- return cpu.NewTensor(shape, nil)
- }
- t, err := cuda.NewTensor(shape, tensor.Float32, ctx.Placement().GPU)
- if err != nil {
- // Fallback to CPU
- return cpu.NewTensor(shape, nil)
- }
- return t
- }
- // ZerosCPU always creates a CPU tensor (for inputs/outputs).
- func ZerosCPU(shape tensor.Shape) *cpu.Tensor {
- return cpu.NewTensor(shape, nil)
- }
- // ToCPU copies a tensor to CPU if needed.
- func ToCPU(t tensor.Tensor) (*cpu.Tensor, error) {
- if cpuT, ok := t.(*cpu.Tensor); ok {
- return cpuT, nil
- }
- result, err := device.EnsureOn(t, tensor.DevicePlacement{Type: tensor.CPU, GPU: -1})
- if err != nil {
- return nil, err
- }
- return result.(*cpu.Tensor), nil
- }
- // Copy copies data between tensors, handling cross-device copies.
- func Copy(dst, src tensor.Tensor) error {
- // Same device, same type
- if dstCPU, ok := dst.(*cpu.Tensor); ok {
- if srcCPU, ok := src.(*cpu.Tensor); ok {
- copy(dstCPU.DataFloat32(), srcCPU.DataFloat32())
- return nil
- }
- }
- if dstCUDA, ok := dst.(*cuda.Tensor); ok {
- if srcCUDA, ok := src.(*cuda.Tensor); ok {
- // TODO: CUDA-to-CUDA copy kernel
- _ = dstCUDA
- _ = srcCUDA
- return fmt.Errorf("CUDA-to-CUDA copy not implemented")
- }
- }
- // Cross-device: need intermediate copy
- return fmt.Errorf("cross-device copy requires explicit conversion")
- }
|