Ver código fonte

llama_context: synchronize before reallocating output buffer (#17974)

Jeff Bolz 1 mês atrás
pai
commit
5266379bca
1 arquivos alterados com 1 adições e 0 exclusões
  1. 1 0
      src/llama-context.cpp

+ 1 - 0
src/llama-context.cpp

@@ -1318,6 +1318,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
             // This doesn't happen often, but may be annoying in some cases (like the HellaSwag benchmark)
             LLAMA_LOG_INFO("%s: reallocating output buffer from size %.02f MiB to %.02f MiB\n", __func__, prev_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
 #endif
+            synchronize();
             buf_output = nullptr;
             logits = nullptr;
             embd = nullptr;