|
@@ -1050,6 +1050,9 @@ int llama_context::decode(llama_batch & inp_batch) {
|
|
|
// finalize the batch processing
|
|
// finalize the batch processing
|
|
|
kv_guard.commit();
|
|
kv_guard.commit();
|
|
|
|
|
|
|
|
|
|
+ // set to total number of outputs in the batch, for use in llama_get_logits_ith
|
|
|
|
|
+ n_outputs = n_outputs_all;
|
|
|
|
|
+
|
|
|
// set output mappings
|
|
// set output mappings
|
|
|
{
|
|
{
|
|
|
bool sorted_output = true;
|
|
bool sorted_output = true;
|
|
@@ -1103,9 +1106,6 @@ int llama_context::decode(llama_batch & inp_batch) {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // set to total number of outputs in the batch, for use in llama_get_logits_ith
|
|
|
|
|
- n_outputs = n_outputs_all;
|
|
|
|
|
-
|
|
|
|
|
// wait for the computation to finish (automatically done when obtaining the model output)
|
|
// wait for the computation to finish (automatically done when obtaining the model output)
|
|
|
//synchronize();
|
|
//synchronize();
|
|
|
|
|
|