1
0

activation.go 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. // Package compute provides device-agnostic computation with hybrid CPU/GPU support.
  2. package compute
  3. import (
  4. "fmt"
  5. "makarna/pkg/backend/cpu"
  6. "makarna/pkg/backend/cuda"
  7. "makarna/pkg/backend/device"
  8. "makarna/pkg/tensor"
  9. )
  10. // Activation wraps a tensor with device tracking.
  11. // It enables efficient hybrid execution where transfers only happen
  12. // when crossing device boundaries.
  13. type Activation struct {
  14. tensor tensor.Tensor
  15. placement tensor.DevicePlacement
  16. }
  17. // NewActivation creates an activation on the specified device.
  18. func NewActivation(shape tensor.Shape, placement tensor.DevicePlacement) (*Activation, error) {
  19. var t tensor.Tensor
  20. var err error
  21. if placement.Type == tensor.CUDA && device.CUDAAvailable() {
  22. t, err = cuda.NewTensor(shape, tensor.Float32, placement.GPU)
  23. if err != nil {
  24. // Fallback to CPU
  25. t = cpu.NewTensor(shape, nil)
  26. placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
  27. }
  28. } else {
  29. t = cpu.NewTensor(shape, nil)
  30. placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
  31. }
  32. return &Activation{tensor: t, placement: placement.Normalize()}, err
  33. }
  34. // NewActivationFrom wraps an existing tensor.
  35. func NewActivationFrom(t tensor.Tensor) *Activation {
  36. var placement tensor.DevicePlacement
  37. if ct, ok := t.(*cuda.Tensor); ok {
  38. placement = tensor.DevicePlacement{Type: tensor.CUDA, GPU: ct.GPU()}
  39. } else {
  40. placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
  41. }
  42. return &Activation{tensor: t, placement: placement.Normalize()}
  43. }
  44. // Tensor returns the underlying tensor.
  45. func (a *Activation) Tensor() tensor.Tensor {
  46. return a.tensor
  47. }
  48. // Placement returns the current device placement.
  49. func (a *Activation) Placement() tensor.DevicePlacement {
  50. return a.placement
  51. }
  52. // IsGPU returns true if the activation is on GPU.
  53. func (a *Activation) IsGPU() bool {
  54. return a.placement.Type == tensor.CUDA
  55. }
  56. // Shape returns the tensor shape.
  57. func (a *Activation) Shape() tensor.Shape {
  58. return a.tensor.Shape()
  59. }
  60. // EnsureOn moves the activation to the target device if needed.
  61. // Returns true if a transfer occurred.
  62. func (a *Activation) EnsureOn(target tensor.DevicePlacement) (transferred bool, err error) {
  63. target = target.Normalize()
  64. // Already on target device
  65. if a.placement == target {
  66. return false, nil
  67. }
  68. // Transfer needed
  69. newTensor, err := device.EnsureOn(a.tensor, target)
  70. if err != nil {
  71. return false, fmt.Errorf("activation transfer %v -> %v: %w", a.placement, target, err)
  72. }
  73. // Free old GPU tensor to prevent memory leak
  74. if oldCT, ok := a.tensor.(*cuda.Tensor); ok && oldCT != nil {
  75. oldCT.Free()
  76. }
  77. a.tensor = newTensor
  78. a.placement = target
  79. return true, nil
  80. }
  81. // AsCPU returns the tensor as *cpu.Tensor, transferring if needed.
  82. func (a *Activation) AsCPU() (*cpu.Tensor, error) {
  83. if _, err := a.EnsureOn(tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}); err != nil {
  84. return nil, err
  85. }
  86. return a.tensor.(*cpu.Tensor), nil
  87. }
  88. // AsCUDA returns the tensor as *cuda.Tensor, transferring if needed.
  89. func (a *Activation) AsCUDA(gpu int) (*cuda.Tensor, error) {
  90. if _, err := a.EnsureOn(tensor.DevicePlacement{Type: tensor.CUDA, GPU: gpu}); err != nil {
  91. return nil, err
  92. }
  93. return a.tensor.(*cuda.Tensor), nil
  94. }
  95. // ReplaceWith replaces the underlying tensor and updates placement.
  96. func (a *Activation) ReplaceWith(t tensor.Tensor) {
  97. if a.tensor != nil {
  98. if oldCT, ok := a.tensor.(*cuda.Tensor); ok {
  99. if newCT, ok2 := t.(*cuda.Tensor); ok2 {
  100. if oldCT != newCT {
  101. oldCT.Free()
  102. }
  103. } else {
  104. oldCT.Free()
  105. }
  106. }
  107. }
  108. a.tensor = t
  109. if ct, ok := t.(*cuda.Tensor); ok {
  110. a.placement = tensor.DevicePlacement{Type: tensor.CUDA, GPU: ct.GPU()}
  111. } else {
  112. a.placement = tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
  113. }
  114. }
  115. // Clone creates a deep copy of the activation on the same device.
  116. func (a *Activation) Clone() (*Activation, error) {
  117. if a.IsGPU() {
  118. ct := a.tensor.(*cuda.Tensor)
  119. newT, err := cuda.NewTensor(ct.Shape(), ct.DType(), ct.GPU())
  120. if err != nil {
  121. return nil, err
  122. }
  123. // Copy GPU to GPU using CopyToHost then CopyFrom (simple path)
  124. tempBuf := make([]float32, ct.Shape().NumElements())
  125. if err := ct.CopyToHost(tempBuf); err != nil {
  126. return nil, err
  127. }
  128. if err := newT.CopyFrom(tempBuf); err != nil {
  129. return nil, err
  130. }
  131. return &Activation{tensor: newT, placement: a.placement}, nil
  132. }
  133. // CPU clone
  134. src := a.tensor.(*cpu.Tensor)
  135. dst := cpu.NewTensor(src.Shape(), nil)
  136. copy(dst.DataFloat32(), src.DataFloat32())
  137. return &Activation{tensor: dst, placement: a.placement}, nil
  138. }
  139. // CopyFrom copies data from a CPU tensor to this activation
  140. func (a *Activation) CopyFrom(t *cpu.Tensor) error {
  141. if a.IsGPU() {
  142. return a.tensor.(*cuda.Tensor).CopyFrom(t.DataFloat32())
  143. }
  144. src := t.DataFloat32()
  145. dst := a.tensor.(*cpu.Tensor).DataFloat32()
  146. copy(dst, src)
  147. return nil
  148. }
  149. // FreeActivation frees GPU memory if the activation is on GPU.
  150. // Safe to call on nil or CPU activations.
  151. func FreeActivation(a *Activation) {
  152. if a == nil {
  153. return
  154. }
  155. if ct, ok := a.tensor.(*cuda.Tensor); ok && ct != nil {
  156. ct.Free()
  157. }
  158. }