Browse Source

convert : fix conversion for inheriting models that were bypassing modify_tensors (#19064)

* Add undo_permute = False where needed

* Replace super().modify_tensors with ModelBase

* Add one more ModelBase.modify_tensors

* Update convert_hf_to_gguf.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update convert_hf_to_gguf.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update convert_hf_to_gguf.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

---------

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
Bartowski 3 days ago
parent
commit
9981c30130
1 changed files with 11 additions and 11 deletions
  1. 11 11
      convert_hf_to_gguf.py

+ 11 - 11
convert_hf_to_gguf.py

@@ -2736,7 +2736,7 @@ class AfmoeModel(LlamaModel):
 
                     data_torch = torch.stack(datas, dim=0)
                     merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
-                    yield from super().modify_tensors(data_torch, merged_name, bid)
+                    yield from ModelBase.modify_tensors(self, data_torch, merged_name, bid)
 
                 return
             else:
@@ -2745,7 +2745,7 @@ class AfmoeModel(LlamaModel):
         if name.endswith(".expert_bias"):
             name = name.replace(".expert_bias", ".expert_bias.bias")
 
-        yield from super().modify_tensors(data_torch, name, bid)
+        yield from ModelBase.modify_tensors(self, data_torch, name, bid)
 
 
 @ModelBase.register(
@@ -8918,7 +8918,7 @@ class GraniteHybridModel(Mamba2Model, GraniteMoeModel):
             return Mamba2Model.modify_tensors(self, data_torch, name, bid)
         elif bid in self._attn_layers:
             return GraniteMoeModel.modify_tensors(self, data_torch, name, bid)
-        yield from super().modify_tensors(data_torch, name, bid)
+        yield from ModelBase.modify_tensors(self, data_torch, name, bid)
 
     def set_gguf_parameters(self):
         """This method merges params from both parents and some that are
@@ -9050,33 +9050,33 @@ class NemotronHModel(GraniteHybridModel):
         if self.is_moe and bid is not None:
             if name.endswith("mixer.gate.e_score_correction_bias"):
                 new_name = name.replace("e_score_correction_bias", "e_score_correction.bias")
-                yield from super().modify_tensors(data_torch, new_name, bid)
+                yield from ModelBase.modify_tensors(self, data_torch, new_name, bid)
                 return
 
             if name.endswith("mixer.dt_bias"):
                 new_name = name.replace("dt_bias", "dt.bias")
-                yield from super().modify_tensors(data_torch, new_name, bid)
+                yield from ModelBase.modify_tensors(self, data_torch, new_name, bid)
                 return
 
             if name.endswith("mixer.conv1d.weight"):
                 squeezed_data = data_torch.squeeze()
-                yield from super().modify_tensors(squeezed_data, name, bid)
+                yield from ModelBase.modify_tensors(self, squeezed_data, name, bid)
                 return
 
             if name.endswith("mixer.A_log"):
                 transformed_data = -torch.exp(data_torch)
                 reshaped_data = transformed_data.squeeze().reshape(-1, 1)
-                yield from super().modify_tensors(reshaped_data, name, bid)
+                yield from ModelBase.modify_tensors(self, reshaped_data, name, bid)
                 return
 
             if name.endswith("mixer.D"):
                 reshaped_data = data_torch.squeeze().reshape(-1, 1)
-                yield from super().modify_tensors(reshaped_data, name, bid)
+                yield from ModelBase.modify_tensors(self, reshaped_data, name, bid)
                 return
 
             if name.endswith("mixer.norm.weight"):
                 reshaped_data = data_torch.reshape(self.n_group, -1)
-                yield from super().modify_tensors(reshaped_data, name, bid)
+                yield from ModelBase.modify_tensors(self, reshaped_data, name, bid)
                 return
 
             if name.find("mixer.experts") != -1:
@@ -9101,7 +9101,7 @@ class NemotronHModel(GraniteHybridModel):
                         data_torch = torch.stack(datas, dim=0)
                         merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
 
-                        yield from super().modify_tensors(data_torch, merged_name, bid)
+                        yield from ModelBase.modify_tensors(self, data_torch, merged_name, bid)
                     return
                 else:
                     return
@@ -10731,7 +10731,7 @@ class CogVLMModel(LlamaModel):
         if name.startswith("model.vision."):
             return
 
-        yield from super().modify_tensors(data_torch, name, bid)
+        yield from ModelBase.modify_tensors(self, data_torch, name, bid)
 
 
 @ModelBase.register("JanusForConditionalGeneration")