// Package kvcache defines the unified KV cache interface used by models. package kvcache import "makarna/pkg/tensor" type View struct { K tensor.Tensor V tensor.Tensor Start int Length int Device tensor.DeviceType GPU int } // PackedView exposes a head-major packed view of KV data for CPU attention. // Layout for K/V: // [kvHead][tokenWithinBlock][headDim] // Stored contiguously as a flat slice. // This is an optional fast-path and is not part of KVCacheInterface. type PackedView struct { K []float32 V []float32 Start int // inclusive Length int BlockSize int HeadDim int NumKVHeads int } // PackedViewsProvider is an optional interface implemented by caches that can // expose head-major packed KV views for fast CPU attention kernels. type PackedViewsProvider interface { ViewsPacked(layer int) []PackedView } // KVCacheInterface is the unified interface for KV cache implementations. // Both the legacy Cache and new PagedKVCache implement this interface. type KVCacheInterface interface { // SeqLen returns the current sequence length (number of computed tokens). SeqLen() int // Commit advances the sequence length after processing new tokens. Commit(newTokens int) // Append writes new K/V tokens into the cache for a layer. // Returns views of the appended data and the start position. Append(layer int, k, v tensor.Tensor) ([]View, int, error) // ContiguousKV returns a contiguous view of K/V for attention. // Returns (k, v, ok, error) where ok indicates if contiguous mode is available. ContiguousKV(layer, kvLen, kvDim int) (tensor.Tensor, tensor.Tensor, bool, error) // Views returns the live KV block views for a layer. Views(layer int) []View // IsOnGPU returns true if the cache is on GPU. IsOnGPU() bool // LayerDevice returns the device placement for a layer. LayerDevice(layer int) tensor.DevicePlacement // MaxSeqLen returns the maximum sequence length. MaxSeqLen() int // Truncate rewinds the cache to a specific sequence length. Truncate(seqLen int) // Free releases all resources. Free() } // Verify that Cache and PagedKVCache implement KVCacheInterface var _ KVCacheInterface = (*PagedKVCache)(nil)