| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- package engine
- import (
- "context"
- "testing"
- "makarna/pkg/backend/cpu"
- "makarna/pkg/backend/cpu/nn"
- "makarna/pkg/graph"
- "makarna/pkg/kvcache"
- "makarna/pkg/model"
- "makarna/pkg/tensor"
- )
- func TestSchedulerOffsetsPositionsAndContext(t *testing.T) {
- cfg := &model.Config{
- VocabSize: 5,
- NumLayers: 1,
- NumKVHeads: 1,
- HeadDim: 2,
- }
- mock := &mockModel{cfg: cfg}
- engine := &Engine{model: mock}
- plan := graph.BuildPlan(graph.RequestSpec{
- ID: "req-1",
- MaxContext: 8,
- BlockSize: 4,
- NumLayers: 1,
- UseAttention: true,
- LayerDevices: []tensor.DevicePlacement{{Type: tensor.CUDA, GPU: 0}},
- })
- pool, err := kvcache.NewBlockPool(kvcache.BlockPoolConfig{
- NumLayers: 1,
- NumKVHeads: 1,
- HeadDim: 2,
- BlockSize: 4,
- NumBlocks: 8,
- Device: tensor.CPU,
- GPU: 0,
- })
- if err != nil {
- t.Fatalf("NewBlockPool: %v", err)
- }
- cache := kvcache.NewPagedKVCache(pool, kvcache.PagedCacheConfig{
- NumLayers: 1,
- NumKVHeads: 1,
- HeadDim: 2,
- BlockSize: 4,
- MaxSeqLen: 8,
- Device: tensor.CPU,
- GPU: 0,
- }, "sched")
- defer cache.Free()
- sched := engine.NewScheduler(plan, cache)
- if _, err := sched.Prefill([]int{1, 2, 3}); err != nil {
- t.Fatalf("prefill failed: %v", err)
- }
- if _, err := sched.Decode([]int{4, 5}); err != nil {
- t.Fatalf("decode failed: %v", err)
- }
- if cache.SeqLen() != 5 {
- t.Fatalf("expected seqLen 5, got %d", cache.SeqLen())
- }
- if remaining := sched.RemainingContext(); remaining != 3 {
- t.Fatalf("expected remaining context 3, got %d", remaining)
- }
- if len(mock.positions) != 2 {
- t.Fatalf("expected 2 forward calls, got %d", len(mock.positions))
- }
- expectSlice(t, []int{0, 1, 2}, mock.positions[0])
- expectSlice(t, []int{3, 4}, mock.positions[1])
- if plan.Layers[0].Device.Type != tensor.CUDA || plan.Layers[0].Device.GPU != 0 {
- t.Fatalf("expected layer device cuda:0, got %+v", plan.Layers[0].Device)
- }
- }
- type mockModel struct {
- cfg *model.Config
- positions [][]int
- }
- func (m *mockModel) Forward(ctx context.Context, input tensor.Tensor, positions tensor.Tensor, kv model.KVCache) (tensor.Tensor, error) {
- seq := input.Shape()[0]
- pos := nn.ParsePositions(positions, seq)
- m.positions = append(m.positions, pos)
- return cpu.NewTensor(tensor.Shape{seq, m.cfg.VocabSize}, nil), nil
- }
- func (m *mockModel) Config() *model.Config { return m.cfg }
- func (m *mockModel) Close() error { return nil }
- func (m *mockModel) SetTensor(string, tensor.Tensor) error {
- return nil
- }
- func expectSlice(t *testing.T, want, got []int) {
- if len(want) != len(got) {
- t.Fatalf("length mismatch want=%v got=%v", want, got)
- }
- for i := range want {
- if want[i] != got[i] {
- t.Fatalf("slice mismatch at %d: want=%d got=%d", i, want[i], got[i])
- }
- }
- }
|