Quellcode durchsuchen

model : detect GigaChat3-10-A1.8B as deepseek lite (#17420)

* Detect GigaChat3-10-A1.8B as deepseek lite

Hardcodes checking number of layers to detect if lite version of deepseek.

* Add commnent identifying deepseek lite variants

deepseek lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
ubergarm vor 1 Monat
Ursprung
Commit
23bc779a6e
2 geänderte Dateien mit 6 neuen und 3 gelöschten Zeilen
  1. 4 2
      src/llama-model.cpp
  2. 2 1
      src/models/deepseek2.cpp

+ 4 - 2
src/llama-model.cpp

@@ -1593,7 +1593,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
             } break;
         case LLM_ARCH_DEEPSEEK2:
             {
-                bool is_lite = (hparams.n_layer == 27);
+                // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
+                bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
                 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
                 ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT,   hparams.n_layer_dense_lead);
                 if (!is_lite) {
@@ -4581,7 +4582,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                 } break;
             case LLM_ARCH_DEEPSEEK2:
                 {
-                    const bool is_lite = (hparams.n_layer == 27);
+                    // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
+                    const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
 
                     const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
 

+ 2 - 1
src/models/deepseek2.cpp

@@ -4,7 +4,8 @@
 
 llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_graph_params & params) :
     llm_graph_context(params) {
-    bool is_lite = (hparams.n_layer == 27);
+    // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
+    bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
 
     const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);