il y a 4 mois · f4e664f838
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -181,7 +181,7 @@ llama_context::llama_context(
 
				         // graph outputs buffer
			
 
				         {
			
 
				             // resized during inference when a batch uses more outputs
			
 
				-            if ((uint32_t) output_reserve(params.n_seq_max) < params.n_seq_max) {
			
 
				+            if (output_reserve(params.n_seq_max) < params.n_seq_max) {
			
 
				                 throw std::runtime_error("failed to reserve initial output buffer");
			
 
				             }