Piotr Wilkin 3 mesiacov pred
rodič
commit
ce87b7d78e
2 zmenil súbory, kde vykonal 3 pridanie a 2 odobranie
  1. 2 1
      convert_hf_to_gguf.py
  2. 1 1
      src/llama-model.cpp

+ 2 - 1
convert_hf_to_gguf.py

@@ -3760,7 +3760,8 @@ class Qwen3NextModel(Qwen3MoeModel):
         self.gguf_writer.add_ssm_group_count(self.find_hparam(["linear_num_key_heads"]))
         self.gguf_writer.add_ssm_time_step_rank(self.find_hparam(["linear_num_value_heads"]))
         self.gguf_writer.add_ssm_inner_size(self.find_hparam(['linear_value_head_dim']) * self.find_hparam(['linear_num_value_heads']))
-        rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
+        if (rope_dim := self.hparams.get("head_dim")) is None:
+            rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
         self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.25)))
 
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:

+ 1 - 1
src/llama-model.cpp

@@ -7134,7 +7134,6 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
         case LLM_ARCH_ARCEE:
         case LLM_ARCH_ERNIE4_5:
         case LLM_ARCH_ERNIE4_5_MOE:
-        case LLM_ARCH_QWEN3NEXT:
             return LLAMA_ROPE_TYPE_NORM;
 
         // the pairs of head values are offset by n_rot/2
@@ -7154,6 +7153,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
         case LLM_ARCH_QWEN2MOE:
         case LLM_ARCH_QWEN3:
         case LLM_ARCH_QWEN3MOE:
+        case LLM_ARCH_QWEN3NEXT:
         case LLM_ARCH_LLADA_MOE:
         case LLM_ARCH_OLMO2:
         case LLM_ARCH_OLMOE: