|
@@ -181,7 +181,7 @@ llama_context::llama_context(
|
|
|
// graph outputs buffer
|
|
// graph outputs buffer
|
|
|
{
|
|
{
|
|
|
// resized during inference when a batch uses more outputs
|
|
// resized during inference when a batch uses more outputs
|
|
|
- if ((uint32_t) output_reserve(params.n_seq_max) < params.n_seq_max) {
|
|
|
|
|
|
|
+ if (output_reserve(params.n_seq_max) < params.n_seq_max) {
|
|
|
throw std::runtime_error("failed to reserve initial output buffer");
|
|
throw std::runtime_error("failed to reserve initial output buffer");
|
|
|
}
|
|
}
|
|
|
|
|
|