|
|
@@ -378,7 +378,7 @@ static llama_token_data_array llama_sampling_prepare_impl(
|
|
|
if (ctx_sampling->grammar != NULL && !apply_grammar) {
|
|
|
GGML_ASSERT(original_logits != NULL);
|
|
|
// Only make a copy of the original logits if we are not applying grammar checks, not sure if I actually have to do this.
|
|
|
- *original_logits = {logits, logits + llama_n_vocab(llama_get_model(ctx_main))};
|
|
|
+ *original_logits = {logits, logits + n_vocab};
|
|
|
}
|
|
|
|
|
|
// apply params.logit_bias map
|
|
|
@@ -391,10 +391,10 @@ static llama_token_data_array llama_sampling_prepare_impl(
|
|
|
llama_sample_apply_guidance(ctx_main, logits, logits_guidance, params.cfg_scale);
|
|
|
}
|
|
|
|
|
|
- cur.clear();
|
|
|
+ cur.resize(n_vocab);
|
|
|
|
|
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
|
- cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
|
|
+ cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
|
|
|
}
|
|
|
|
|
|
llama_token_data_array cur_p = { cur.data(), cur.size(), false };
|