|
|
@@ -1593,7 +1593,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|
|
} break;
|
|
|
case LLM_ARCH_DEEPSEEK2:
|
|
|
{
|
|
|
- bool is_lite = (hparams.n_layer == 27);
|
|
|
+ // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
|
|
+ bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
|
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
|
|
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
|
|
|
if (!is_lite) {
|
|
|
@@ -4581,7 +4582,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|
|
} break;
|
|
|
case LLM_ARCH_DEEPSEEK2:
|
|
|
{
|
|
|
- const bool is_lite = (hparams.n_layer == 27);
|
|
|
+ // lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
|
|
+ const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
|
|
|
|
|
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
|
|
|
|