|
|
@@ -252,11 +252,6 @@ llama_tokens common_speculative_gen_draft(
|
|
|
// add drafted token for each sequence
|
|
|
const llama_token id = cur_p->data[0].id;
|
|
|
|
|
|
- // only collect very high-confidence draft tokens
|
|
|
- if (cur_p->data[0].p < params.p_min) {
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
common_sampler_accept(smpl, id, true);
|
|
|
|
|
|
result.push_back(id);
|
|
|
@@ -265,6 +260,11 @@ llama_tokens common_speculative_gen_draft(
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
+ // only collect very high-confidence draft tokens
|
|
|
+ if (cur_p->data[0].p < params.p_min) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
common_batch_add(batch, id, n_past + i + 1, { 0 }, true);
|
|
|
|
|
|
// evaluate the drafted tokens on the draft model
|