| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- package engine
- import (
- "testing"
- "makarna/pkg/backend/cuda"
- "makarna/pkg/loader"
- "makarna/pkg/tensor"
- )
- func TestPlanLayerDevicesFallsBackToCPUWhenNoCUDA(t *testing.T) {
- cudaDeviceCountFn = func() (int, error) { return 0, nil }
- defer func() { cudaDeviceCountFn = cuda.DeviceCount }()
- cfg := &loader.ModelConfig{
- Params: map[string]any{
- "num_hidden_layers": float64(4),
- "num_attention_heads": float64(8),
- "num_key_value_heads": float64(8),
- "head_dim": float64(128),
- "max_position_embeddings": float64(2048),
- },
- }
- md := &loader.ModelData{}
- placements := PlanLayerDevices(md, cfg, 0.9, nil)
- if len(placements) != 4 {
- t.Fatalf("expected 4 placements, got %d", len(placements))
- }
- for i, p := range placements {
- if p.Type != tensor.CPU {
- t.Fatalf("layer %d expected CPU, got %v", i, p.Type)
- }
- }
- }
- func TestPlanLayerDevicesFitsInBudget(t *testing.T) {
- cudaDeviceCountFn = func() (int, error) { return 1, nil }
- cudaMemoryInfoDeviceFn = func(gpu int) (uint64, uint64, error) {
- return 8 * 1024 * 1024 * 1024, 8 * 1024 * 1024 * 1024, nil
- }
- defer func() { cudaDeviceCountFn = cuda.DeviceCount }()
- defer func() { cudaMemoryInfoDeviceFn = cuda.MemoryInfoDevice }()
- cfg := &loader.ModelConfig{
- Params: map[string]any{
- "num_hidden_layers": float64(2),
- "num_attention_heads": float64(8),
- "num_key_value_heads": float64(8),
- "head_dim": float64(128),
- "max_position_embeddings": float64(128),
- },
- }
- md := &loader.ModelData{
- Metadata: loader.Metadata{
- Tensors: map[string]loader.TensorEntry{
- "w": {Shape: []uint64{1024, 1024}, DType: loader.F32},
- },
- ModelConfig: *cfg,
- },
- }
- placements := PlanLayerDevices(md, cfg, 0.5, nil)
- for i, p := range placements {
- if p.Type != tensor.CUDA {
- t.Fatalf("layer %d expected CUDA, got %v", i, p.Type)
- }
- }
- }
|