Pārlūkot izejas kodu

llama : fix kv_cache `n` init (close #1903)

Georgi Gerganov 2 gadi atpakaļ
vecāks
revīzija
051e1b0e6a
3 mainītis faili ar 4 papildinājumiem un 0 dzēšanām
  1. 1 0
      .gitignore
  2. 1 0
      examples/CMakeLists.txt
  3. 2 0
      llama.cpp

+ 1 - 0
.gitignore

@@ -34,6 +34,7 @@ models/*
 /perplexity
 /embedding
 /train-text-from-scratch
+/simple
 /benchmark-matmult
 /vdot
 /server

+ 1 - 0
examples/CMakeLists.txt

@@ -38,6 +38,7 @@ else()
     add_subdirectory(benchmark)
     add_subdirectory(baby-llama)
     add_subdirectory(train-text-from-scratch)
+    add_subdirectory(simple)
     if (LLAMA_METAL)
         add_subdirectory(metal)
     endif()

+ 2 - 0
llama.cpp

@@ -886,6 +886,7 @@ static bool kv_cache_init(
     const int64_t n_elements = n_embd*n_mem;
 
     cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
+    cache.n = 0;
 
     struct ggml_init_params params;
     params.mem_size   = cache.buf.size;
@@ -904,6 +905,7 @@ static bool kv_cache_init(
     ggml_set_name(cache.k, "cache_k");
     ggml_set_name(cache.v, "cache_v");
 
+    (void) n_gpu_layers;
 #ifdef GGML_USE_CUBLAS
     if (n_gpu_layers > n_layer + 1) {
         ggml_cuda_assign_buffers_no_scratch(cache.v);