8 months ago · a75cb30dc9
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1050,6 +1050,9 @@ int llama_context::decode(llama_batch & inp_batch) {
 
															     // finalize the batch processing
														
 
															     kv_guard.commit();
														
 
															+    // set to total number of outputs in the batch, for use in llama_get_logits_ith
														
 
															+    n_outputs = n_outputs_all;
														
 
															+
														
 
															     // set output mappings
														
 
															     {
														
 
															         bool sorted_output = true;
														
@@ -1103,9 +1106,6 @@ int llama_context::decode(llama_batch & inp_batch) {
 
															         }
														
 
															     }
														
 
															-    // set to total number of outputs in the batch, for use in llama_get_logits_ith
														
 
															-    n_outputs = n_outputs_all;
														
 
															-
														
 
															     // wait for the computation to finish (automatically done when obtaining the model output)
														
 
															     //synchronize();