1
0

placement_test.go 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. package engine
  2. import (
  3. "testing"
  4. "makarna/pkg/backend/cuda"
  5. "makarna/pkg/loader"
  6. "makarna/pkg/tensor"
  7. )
  8. func TestPlanLayerDevicesFallsBackToCPUWhenNoCUDA(t *testing.T) {
  9. cudaDeviceCountFn = func() (int, error) { return 0, nil }
  10. defer func() { cudaDeviceCountFn = cuda.DeviceCount }()
  11. cfg := &loader.ModelConfig{
  12. Params: map[string]any{
  13. "num_hidden_layers": float64(4),
  14. "num_attention_heads": float64(8),
  15. "num_key_value_heads": float64(8),
  16. "head_dim": float64(128),
  17. "max_position_embeddings": float64(2048),
  18. },
  19. }
  20. md := &loader.ModelData{}
  21. placements := PlanLayerDevices(md, cfg, 0.9, nil)
  22. if len(placements) != 4 {
  23. t.Fatalf("expected 4 placements, got %d", len(placements))
  24. }
  25. for i, p := range placements {
  26. if p.Type != tensor.CPU {
  27. t.Fatalf("layer %d expected CPU, got %v", i, p.Type)
  28. }
  29. }
  30. }
  31. func TestPlanLayerDevicesFitsInBudget(t *testing.T) {
  32. cudaDeviceCountFn = func() (int, error) { return 1, nil }
  33. cudaMemoryInfoDeviceFn = func(gpu int) (uint64, uint64, error) {
  34. return 8 * 1024 * 1024 * 1024, 8 * 1024 * 1024 * 1024, nil
  35. }
  36. defer func() { cudaDeviceCountFn = cuda.DeviceCount }()
  37. defer func() { cudaMemoryInfoDeviceFn = cuda.MemoryInfoDevice }()
  38. cfg := &loader.ModelConfig{
  39. Params: map[string]any{
  40. "num_hidden_layers": float64(2),
  41. "num_attention_heads": float64(8),
  42. "num_key_value_heads": float64(8),
  43. "head_dim": float64(128),
  44. "max_position_embeddings": float64(128),
  45. },
  46. }
  47. md := &loader.ModelData{
  48. Metadata: loader.Metadata{
  49. Tensors: map[string]loader.TensorEntry{
  50. "w": {Shape: []uint64{1024, 1024}, DType: loader.F32},
  51. },
  52. ModelConfig: *cfg,
  53. },
  54. }
  55. placements := PlanLayerDevices(md, cfg, 0.5, nil)
  56. for i, p := range placements {
  57. if p.Type != tensor.CUDA {
  58. t.Fatalf("layer %d expected CUDA, got %v", i, p.Type)
  59. }
  60. }
  61. }