před 4 měsíci · f4e664f838
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -181,7 +181,7 @@ llama_context::llama_context(
 
															         // graph outputs buffer
														
 
															         {
														
 
															             // resized during inference when a batch uses more outputs
														
 
															-            if ((uint32_t) output_reserve(params.n_seq_max) < params.n_seq_max) {
														
 
															+            if (output_reserve(params.n_seq_max) < params.n_seq_max) {
														
 
															                 throw std::runtime_error("failed to reserve initial output buffer");
														
 
															             }