il y a 8 mois · a75cb30dc9
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1050,6 +1050,9 @@ int llama_context::decode(llama_batch & inp_batch) {
 
				     // finalize the batch processing
			
 
				     kv_guard.commit();
			
 
				 
			
 
				+    // set to total number of outputs in the batch, for use in llama_get_logits_ith
			
 
				+    n_outputs = n_outputs_all;
			
 
				+
			
 
				     // set output mappings
			
 
				     {
			
 
				         bool sorted_output = true;
			
@@ -1103,9 +1106,6 @@ int llama_context::decode(llama_batch & inp_batch) {
 
				         }
			
 
				     }
			
 
				 
			
 
				-    // set to total number of outputs in the batch, for use in llama_get_logits_ith
			
 
				-    n_outputs = n_outputs_all;
			
 
				-
			
 
				     // wait for the computation to finish (automatically done when obtaining the model output)
			
 
				     //synchronize();