|
|
@@ -3760,7 +3760,8 @@ class Qwen3NextModel(Qwen3MoeModel):
|
|
|
self.gguf_writer.add_ssm_group_count(self.find_hparam(["linear_num_key_heads"]))
|
|
|
self.gguf_writer.add_ssm_time_step_rank(self.find_hparam(["linear_num_value_heads"]))
|
|
|
self.gguf_writer.add_ssm_inner_size(self.find_hparam(['linear_value_head_dim']) * self.find_hparam(['linear_num_value_heads']))
|
|
|
- rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
|
|
|
+ if (rope_dim := self.hparams.get("head_dim")) is None:
|
|
|
+ rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
|
|
|
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.25)))
|
|
|
|
|
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|