| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- package graph
- import "makarna/pkg/tensor"
- // ExecutionPlan describes the static computation graph for a request.
- // It is intentionally lightweight so the plan can be reused across
- // decode steps without rebuilding the structure.
- type ExecutionPlan struct {
- // RequestID links the plan to a running session.
- RequestID string
- // MaxContext tokens reserved for this request. The KV cache manager
- // must have already reserved enough blocks to satisfy this budget.
- MaxContext int
- // BlockSize controls how many tokens are packed in each KV block.
- BlockSize int
- // Layers lists per-layer stage information (prefill/decode flags).
- Layers []LayerPlan
- }
- // LayerPlan captures per-layer execution intent. The current engine only
- // needs to distinguish whether a layer participates in decode.
- type LayerPlan struct {
- Index int
- HasAttention bool
- HasMLP bool
- SupportsDecode bool
- Device tensor.DevicePlacement
- }
- // RequestSpec declares what a caller wants to run. The scheduler converts
- // this into an ExecutionPlan and hands it to the runtime.
- type RequestSpec struct {
- ID string
- MaxContext int
- BlockSize int
- NumLayers int
- UseAttention bool
- LayerDevices []tensor.DevicePlacement
- }
- // BuildPlan produces a minimal ExecutionPlan suitable for single-GPU decode.
- // The plan stays constant while the scheduler feeds new token batches.
- func BuildPlan(spec RequestSpec) ExecutionPlan {
- plan := ExecutionPlan{
- RequestID: spec.ID,
- MaxContext: spec.MaxContext,
- BlockSize: spec.BlockSize,
- Layers: make([]LayerPlan, spec.NumLayers),
- }
- for i := 0; i < spec.NumLayers; i++ {
- device := tensor.DevicePlacement{Type: tensor.CPU, GPU: -1}
- if i < len(spec.LayerDevices) {
- device = spec.LayerDevices[i].Normalize()
- }
- plan.Layers[i] = LayerPlan{
- Index: i,
- HasAttention: spec.UseAttention,
- HasMLP: true,
- SupportsDecode: true,
- Device: device,
- }
- }
- return plan
- }
|