Просмотр исходного кода

Fix prompt cache saving and chat-persistent rollover (#1678)

* Fix prompt cache saving and chat-persistent rollover (fixes #1670)

* clang-tidy

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Evan Jones 2 лет назад
Родитель
Сommit
136476e898
1 измененных файлов с 7 добавлено и 6 удалено
  1. 7 6
      examples/main/main.cpp

+ 7 - 6
examples/main/main.cpp

@@ -202,6 +202,13 @@ int main(int argc, char ** argv) {
         }
     }
 
+    // if we will use the cache for the full prompt without reaching the end of the cache, force
+    // reevaluation of the last token token to recalculate the cached logits
+    if (!embd_inp.empty() && n_matching_session_tokens == embd_inp.size() &&
+            session_tokens.size() > embd_inp.size()) {
+        session_tokens.resize(embd_inp.size() - 1);
+    }
+
     // number of tokens to keep when resetting context
     if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct) {
         params.n_keep = (int)embd_inp.size();
@@ -360,12 +367,6 @@ int main(int argc, char ** argv) {
                     }
                 }
                 if (i > 0) {
-                    // check if we've used up all the prompt but not all cached tokens
-                    if (embd.size() == i && n_session_consumed < (int) session_tokens.size()) {
-                        // force revaluation of the last token to recalculate logits
-                        i--;
-                        n_past--;
-                    }
                     embd.erase(embd.begin(), embd.begin() + i);
                 }
             }