package engine import ( "testing" "makarna/pkg/backend/cuda" "makarna/pkg/loader" "makarna/pkg/tensor" ) func TestPlanLayerDevicesFallsBackToCPUWhenNoCUDA(t *testing.T) { cudaDeviceCountFn = func() (int, error) { return 0, nil } defer func() { cudaDeviceCountFn = cuda.DeviceCount }() cfg := &loader.ModelConfig{ Params: map[string]any{ "num_hidden_layers": float64(4), "num_attention_heads": float64(8), "num_key_value_heads": float64(8), "head_dim": float64(128), "max_position_embeddings": float64(2048), }, } md := &loader.ModelData{} placements := PlanLayerDevices(md, cfg, 0.9, nil) if len(placements) != 4 { t.Fatalf("expected 4 placements, got %d", len(placements)) } for i, p := range placements { if p.Type != tensor.CPU { t.Fatalf("layer %d expected CPU, got %v", i, p.Type) } } } func TestPlanLayerDevicesFitsInBudget(t *testing.T) { cudaDeviceCountFn = func() (int, error) { return 1, nil } cudaMemoryInfoDeviceFn = func(gpu int) (uint64, uint64, error) { return 8 * 1024 * 1024 * 1024, 8 * 1024 * 1024 * 1024, nil } defer func() { cudaDeviceCountFn = cuda.DeviceCount }() defer func() { cudaMemoryInfoDeviceFn = cuda.MemoryInfoDevice }() cfg := &loader.ModelConfig{ Params: map[string]any{ "num_hidden_layers": float64(2), "num_attention_heads": float64(8), "num_key_value_heads": float64(8), "head_dim": float64(128), "max_position_embeddings": float64(128), }, } md := &loader.ModelData{ Metadata: loader.Metadata{ Tensors: map[string]loader.TensorEntry{ "w": {Shape: []uint64{1024, 1024}, DType: loader.F32}, }, ModelConfig: *cfg, }, } placements := PlanLayerDevices(md, cfg, 0.5, nil) for i, p := range placements { if p.Type != tensor.CUDA { t.Fatalf("layer %d expected CUDA, got %v", i, p.Type) } } }