|
|
@@ -258,6 +258,9 @@ int main(int argc, char ** argv) {
|
|
|
params.interactive = true;
|
|
|
}
|
|
|
|
|
|
+ // determine newline token
|
|
|
+ auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
|
|
|
+
|
|
|
fprintf(stderr, "\n");
|
|
|
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
|
|
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
|
|
@@ -359,6 +362,16 @@ int main(int argc, char ** argv) {
|
|
|
last_n_tokens.push_back(id);
|
|
|
}
|
|
|
|
|
|
+ // replace end of text token with newline token when in interactive mode
|
|
|
+ if (id == llama_token_eos() && params.interactive) {
|
|
|
+ id = llama_token_newline.front();
|
|
|
+ if (params.antiprompt.size() != 0) {
|
|
|
+ // tokenize and inject first reverse prompt
|
|
|
+ const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
|
|
|
+ embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
// add it to the context
|
|
|
embd.push_back(id);
|
|
|
|
|
|
@@ -451,12 +464,8 @@ int main(int argc, char ** argv) {
|
|
|
|
|
|
// end of text token
|
|
|
if (embd.back() == llama_token_eos()) {
|
|
|
- if (params.interactive) {
|
|
|
- is_interacting = true;
|
|
|
- } else {
|
|
|
- fprintf(stderr, " [end of text]\n");
|
|
|
- break;
|
|
|
- }
|
|
|
+ fprintf(stderr, " [end of text]\n");
|
|
|
+ break;
|
|
|
}
|
|
|
|
|
|
// In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
|