hai 2 meses · 0f715b4e75
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -3608,7 +3608,7 @@ struct server_context {
 
				                 slot.task->params.sampling.preserved_tokens.find(token) != slot.task->params.sampling.preserved_tokens.end();
			
 
				         };
			
 
				 
			
 
				-        // frist, add sampled tokens from any ongoing sequences
			
 
				+        // first, add sampled tokens from any ongoing sequences
			
 
				         for (auto & slot : slots) {
			
 
				             if (slot.state != SLOT_STATE_GENERATING) {
			
 
				                 continue;
			
@@ -3950,7 +3950,7 @@ struct server_context {
 
				                     // If using an alora, there may be uncached tokens that come
			
 
				                     // before the invocation sequence. When this happens, the
			
 
				                     // tokens before the invocation sequence need to be
			
 
				-                    // processed without the adpter in a separate batch, then
			
 
				+                    // processed without the adapter in a separate batch, then
			
 
				                     // the adapter needs to be enabled for the remaining tokens.
			
 
				                     if (lora_all_alora(slot.lora) && slot.alora_invocation_start - 1 > slot.prompt.n_tokens()) {
			
 
				                         SLT_DBG(slot, "processing pre-alora tokens without the adapter (n_tokens = %d, alora_invocation_start = %d)\n", slot.prompt.n_tokens(), slot.alora_invocation_start);