|
|
@@ -382,7 +382,9 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr &
|
|
|
// if all tokens are output, split by sequence
|
|
|
ubatch = balloc.split_seq(n_ubatch);
|
|
|
} else {
|
|
|
- ubatch = balloc.split_equal(n_ubatch, false);
|
|
|
+ // TODO: non-sequential equal split can be done if using unified KV cache
|
|
|
+ // for simplicity, we always use sequential equal split for now
|
|
|
+ ubatch = balloc.split_equal(n_ubatch, true);
|
|
|
}
|
|
|
|
|
|
if (ubatch.n_tokens == 0) {
|