|
@@ -331,6 +331,13 @@ int main(int argc, char ** argv) {
|
|
|
|
|
|
|
|
std::vector<llama_token> embd;
|
|
std::vector<llama_token> embd;
|
|
|
|
|
|
|
|
|
|
+ // do one empty run to warm up the model
|
|
|
|
|
+ {
|
|
|
|
|
+ const std::vector<llama_token> tmp = { llama_token_bos(), };
|
|
|
|
|
+ llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
|
|
|
|
|
+ llama_reset_timings(ctx);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
|
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
|
|
// predict
|
|
// predict
|
|
|
if (embd.size() > 0) {
|
|
if (embd.size() > 0) {
|