|
|
@@ -282,8 +282,6 @@ static llama_token llama_sampling_sample_impl(
|
|
|
GGML_ASSERT(!original_logits.empty());
|
|
|
}
|
|
|
llama_token id = 0;
|
|
|
- // Get a pointer to the logits
|
|
|
- float * logits = llama_get_logits_ith(ctx_main, idx);
|
|
|
|
|
|
if (temp < 0.0) {
|
|
|
// greedy sampling, with probs
|
|
|
@@ -324,6 +322,9 @@ static llama_token llama_sampling_sample_impl(
|
|
|
}
|
|
|
|
|
|
if (ctx_sampling->grammar != NULL && !is_resampling) {
|
|
|
+ // Get a pointer to the logits
|
|
|
+ float * logits = llama_get_logits_ith(ctx_main, idx);
|
|
|
+
|
|
|
// Create an array with a single token data element for the sampled id
|
|
|
llama_token_data single_token_data = {id, logits[id], 0.0f};
|
|
|
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
|