|
|
@@ -3608,7 +3608,7 @@ struct server_context {
|
|
|
slot.task->params.sampling.preserved_tokens.find(token) != slot.task->params.sampling.preserved_tokens.end();
|
|
|
};
|
|
|
|
|
|
- // frist, add sampled tokens from any ongoing sequences
|
|
|
+ // first, add sampled tokens from any ongoing sequences
|
|
|
for (auto & slot : slots) {
|
|
|
if (slot.state != SLOT_STATE_GENERATING) {
|
|
|
continue;
|
|
|
@@ -3950,7 +3950,7 @@ struct server_context {
|
|
|
// If using an alora, there may be uncached tokens that come
|
|
|
// before the invocation sequence. When this happens, the
|
|
|
// tokens before the invocation sequence need to be
|
|
|
- // processed without the adpter in a separate batch, then
|
|
|
+ // processed without the adapter in a separate batch, then
|
|
|
// the adapter needs to be enabled for the remaining tokens.
|
|
|
if (lora_all_alora(slot.lora) && slot.alora_invocation_start - 1 > slot.prompt.n_tokens()) {
|
|
|
SLT_DBG(slot, "processing pre-alora tokens without the adapter (n_tokens = %d, alora_invocation_start = %d)\n", slot.prompt.n_tokens(), slot.alora_invocation_start);
|