2 лет назад · 2e17dfd80a
--- a/main.cpp
+++ b/main.cpp
@@ -258,6 +258,9 @@ int main(int argc, char ** argv) {
 
				         params.interactive = true;
			
 
				     }
			
 
				 
			
 
				+    // determine newline token
			
 
				+    auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
			
 
				+
			
 
				     fprintf(stderr, "\n");
			
 
				     fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
			
 
				     fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
			
@@ -359,6 +362,16 @@ int main(int argc, char ** argv) {
 
				                 last_n_tokens.push_back(id);
			
 
				             }
			
 
				 
			
 
				+            // replace end of text token with newline token when in interactive mode
			
 
				+            if (id == llama_token_eos() && params.interactive) {
			
 
				+                id = llama_token_newline.front();
			
 
				+                if (params.antiprompt.size() != 0) {
			
 
				+                    // tokenize and inject first reverse prompt
			
 
				+                    const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
			
 
				+                    embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				             // add it to the context
			
 
				             embd.push_back(id);
			
 
				 
			
@@ -451,12 +464,8 @@ int main(int argc, char ** argv) {
 
				 
			
 
				         // end of text token
			
 
				         if (embd.back() == llama_token_eos()) {
			
 
				-            if (params.interactive) {
			
 
				-                is_interacting = true;
			
 
				-            } else {
			
 
				-                fprintf(stderr, " [end of text]\n");
			
 
				-                break;
			
 
				-            }
			
 
				+            fprintf(stderr, " [end of text]\n");
			
 
				+            break;
			
 
				         }
			
 
				 
			
 
				         // In interactive mode, respect the maximum number of tokens and drop back to user input when reached.