|
@@ -798,7 +798,8 @@ int main(int argc, char ** argv) {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
|
|
// In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
|
|
|
- if (params.interactive && n_remain <= 0 && params.n_predict != -1) {
|
|
|
|
|
|
|
+ // We skip this logic when n_predict == -1 (infinite) or -2 (stop at context size).
|
|
|
|
|
+ if (params.interactive && n_remain <= 0 && params.n_predict >= 0) {
|
|
|
n_remain = params.n_predict;
|
|
n_remain = params.n_predict;
|
|
|
is_interacting = true;
|
|
is_interacting = true;
|
|
|
}
|
|
}
|