Преглед на файлове

llama : fix compatibility with old 2 expert models (#6735)

slaren преди 1 година
родител
ревизия
c71bfd736e
променени са 1 файла, в които са добавени 1 реда и са изтрити 1 реда
  1. 1 1
      llama.cpp

+ 1 - 1
llama.cpp

@@ -4592,7 +4592,7 @@ static bool llm_load_tensors(
     size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
     size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
 
 
     // for moe merged tensors
     // for moe merged tensors
-    ctx_size += ggml_tensor_overhead()*hparams.n_expert*n_layer;
+    ctx_size += ggml_tensor_overhead()*n_layer*3;
 
 
     std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
     std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
     for (auto & it : buft_layer_count) {
     for (auto & it : buft_layer_count) {