Просмотр исходного кода

convert : fix Gemma3N, GraniteMoe and Ernie4.5Moe (#19084)

* fix Gemma3N and Ernie4.5Moe

* fix GraniteMoe
Sigbjørn Skjæret 3 дней назад
Родитель
Сommit
70d860824a
1 измененных файлов с 12 добавлено и 12 удалено
  1. 12 12
      convert_hf_to_gguf.py

+ 12 - 12
convert_hf_to_gguf.py

@@ -3799,7 +3799,7 @@ class Ernie4_5MoeModel(Ernie4_5Model):
                     merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
                     merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
                     yield from super().modify_tensors(data_torch, merged_name, bid)
                     yield from super().modify_tensors(data_torch, merged_name, bid)
         else:
         else:
-            yield from super().modify_tensors(data_torch, name, bid)
+            yield from ModelBase.modify_tensors(self, data_torch, name, bid)
 
 
     def prepare_tensors(self):
     def prepare_tensors(self):
         super().prepare_tensors()
         super().prepare_tensors()
@@ -6153,7 +6153,7 @@ class Gemma3nVisionAudioModel(ConformerAudioModel):
         if new_name.endswith("conv_stem.conv.bias") or new_name.endswith("layer_scale.gamma"):
         if new_name.endswith("conv_stem.conv.bias") or new_name.endswith("layer_scale.gamma"):
             data_torch = data_torch.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) # [1, C, 1, 1]
             data_torch = data_torch.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) # [1, C, 1, 1]
 
 
-        yield from super().modify_tensors(data_torch, new_name, bid)
+        yield from ModelBase.modify_tensors(self, data_torch, new_name, bid)
 
 
 
 
 @ModelBase.register("Gemma3nForCausalLM", "Gemma3nForConditionalGeneration")
 @ModelBase.register("Gemma3nForCausalLM", "Gemma3nForConditionalGeneration")
@@ -6253,7 +6253,7 @@ class Gemma3NModel(Gemma3Model):
 
 
             # Continue with normal processing
             # Continue with normal processing
             name = name.replace("language_model.", "")
             name = name.replace("language_model.", "")
-            yield from super().modify_tensors(data_torch, name, bid)
+            yield from ModelBase.modify_tensors(self, data_torch, name, bid)
             return
             return
 
 
         if "altup_unembed_projections" in name:
         if "altup_unembed_projections" in name:
@@ -6270,7 +6270,7 @@ class Gemma3NModel(Gemma3Model):
                 raise ValueError(f"Unknown name: {name}")
                 raise ValueError(f"Unknown name: {name}")
             out = self._stack_matrices(self._altup_unembd)
             out = self._stack_matrices(self._altup_unembd)
             if out is not None:
             if out is not None:
-                yield from super().modify_tensors(out, "model.altup_unembed_projections.weight", bid)
+                yield from ModelBase.modify_tensors(self, out, "model.altup_unembed_projections.weight", bid)
                 return
                 return
             else:
             else:
                 return
                 return
@@ -6287,7 +6287,7 @@ class Gemma3NModel(Gemma3Model):
                 raise ValueError(f"Unknown name: {name}")
                 raise ValueError(f"Unknown name: {name}")
             out = self._stack_matrices(self._altup_proj)
             out = self._stack_matrices(self._altup_proj)
             if out is not None:
             if out is not None:
-                yield from super().modify_tensors(out, "model.altup_projections.weight", bid)
+                yield from ModelBase.modify_tensors(self, out, "model.altup_projections.weight", bid)
                 return
                 return
             else:
             else:
                 return
                 return
@@ -8803,8 +8803,8 @@ class GraniteMoeModel(GraniteModel):
             ffn_dim = self.hparams["intermediate_size"]
             ffn_dim = self.hparams["intermediate_size"]
             assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * intermediate_size"
             assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * intermediate_size"
             gate, up = data_torch.split(ffn_dim, dim=-2)
             gate, up = data_torch.split(ffn_dim, dim=-2)
-            yield from super().modify_tensors(gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_EXP, bid), bid)
-            yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), bid)
+            yield from ModelBase.modify_tensors(self, gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_EXP, bid), bid)
+            yield from ModelBase.modify_tensors(self, up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), bid)
 
 
         has_experts = bool(self.hparams.get('num_local_experts'))
         has_experts = bool(self.hparams.get('num_local_experts'))
 
 
@@ -8813,15 +8813,15 @@ class GraniteMoeModel(GraniteModel):
             assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * shared_intermediate_size"
             assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * shared_intermediate_size"
             gate, up = data_torch.split(ffn_dim, dim=-2)
             gate, up = data_torch.split(ffn_dim, dim=-2)
             if has_experts:
             if has_experts:
-                yield from super().modify_tensors(gate,self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), bid)
-                yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), bid)
+                yield from ModelBase.modify_tensors(self, gate,self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), bid)
+                yield from ModelBase.modify_tensors(self, up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), bid)
                 return
                 return
-            yield from super().modify_tensors(gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), bid)
-            yield from super().modify_tensors(up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), bid)
+            yield from ModelBase.modify_tensors(self, gate, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), bid)
+            yield from ModelBase.modify_tensors(self, up, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), bid)
             return
             return
 
 
         if not has_experts and name.endswith("shared_mlp.output_linear.weight"):
         if not has_experts and name.endswith("shared_mlp.output_linear.weight"):
-            yield from super().modify_tensors(data_torch, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid), bid)
+            yield from ModelBase.modify_tensors(self, data_torch, self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid), bid)
             return
             return
 
 
         yield from super().modify_tensors(data_torch, name, bid)
         yield from super().modify_tensors(data_torch, name, bid)