2 月之前 · 73a48c9790
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -742,6 +742,12 @@ class TextModel(ModelBase):
 
				         if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
			
 
				             self.gguf_writer.add_expert_used_count(n_experts_used)
			
 
				             logger.info(f"gguf: experts used count = {n_experts_used}")
			
 
				+        if (n_expert_groups := self.hparams.get("n_group")) is not None:
			
 
				+            self.gguf_writer.add_expert_group_count(n_expert_groups)
			
 
				+            logger.info(f"gguf: expert groups count = {n_expert_groups}")
			
 
				+        if (n_group_used := self.hparams.get("topk_group")) is not None:
			
 
				+            self.gguf_writer.add_expert_group_used_count(n_group_used)
			
 
				+            logger.info(f"gguf: expert groups used count = {n_group_used}")
			
 
				 
			
 
				         if (head_dim := self.hparams.get("head_dim")) is not None:
			
 
				             self.gguf_writer.add_key_length(head_dim)
			
@@ -8233,8 +8239,6 @@ class BailingMoeV2Model(TextModel):
 
				         self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
			
 
				         self.gguf_writer.add_expert_count(hparams["num_experts"])
			
 
				         self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
			
 
				-        self.gguf_writer.add_expert_group_count(hparams["n_group"])
			
 
				-        self.gguf_writer.add_expert_group_used_count(hparams["topk_group"])
			
 
				         self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
			
 
				 
			
 
				         if hparams["score_function"] == "sigmoid":
			
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -6369,6 +6369,8 @@ void llama_model::print_info() const {
 
				         LLAMA_LOG_INFO("%s: n_ff             = %s\n",     __func__, print_f([&](uint32_t il) { return hparams.n_ff(il); }, hparams.n_layer).c_str());
			
 
				         LLAMA_LOG_INFO("%s: n_expert         = %u\n",     __func__, hparams.n_expert);
			
 
				         LLAMA_LOG_INFO("%s: n_expert_used    = %u\n",     __func__, hparams.n_expert_used);
			
 
				+        LLAMA_LOG_INFO("%s: n_expert_groups  = %d\n",     __func__, hparams.n_expert_groups);
			
 
				+        LLAMA_LOG_INFO("%s: n_group_used     = %d\n",     __func__, hparams.n_group_used);
			
 
				         LLAMA_LOG_INFO("%s: causal attn      = %d\n",     __func__, hparams.causal_attn);
			
 
				         LLAMA_LOG_INFO("%s: pooling type     = %d\n",     __func__, hparams.pooling_type);
			
 
				         LLAMA_LOG_INFO("%s: rope type        = %d\n",     __func__, hparams.rope_type);
			
@@ -6469,8 +6471,6 @@ void llama_model::print_info() const {
 
				         LLAMA_LOG_INFO("%s: n_ff_exp             = %d\n",     __func__, hparams.n_ff_exp);
			
 
				         LLAMA_LOG_INFO("%s: n_ff_shexp           = %d\n",     __func__, hparams.n_ff_shexp);
			
 
				         LLAMA_LOG_INFO("%s: n_expert_shared      = %d\n",     __func__, hparams.n_expert_shared);
			
 
				-        LLAMA_LOG_INFO("%s: n_expert_groups      = %d\n",     __func__, hparams.n_expert_groups);
			
 
				-        LLAMA_LOG_INFO("%s: n_group_used         = %d\n",     __func__, hparams.n_group_used);
			
 
				         LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n",   __func__, hparams.expert_weights_scale);
			
 
				         LLAMA_LOG_INFO("%s: expert_weights_norm  = %d\n",     __func__, hparams.expert_weights_norm);
			
 
				         LLAMA_LOG_INFO("%s: expert_gating_func   = %s\n",     __func__, llama_expert_gating_func_name((llama_expert_gating_func_type) hparams.expert_gating_func));