|
@@ -2736,7 +2736,7 @@ class AfmoeModel(LlamaModel):
|
|
|
|
|
|
|
|
data_torch = torch.stack(datas, dim=0)
|
|
data_torch = torch.stack(datas, dim=0)
|
|
|
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
|
|
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
|
|
|
- yield from super().modify_tensors(data_torch, merged_name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, data_torch, merged_name, bid)
|
|
|
|
|
|
|
|
return
|
|
return
|
|
|
else:
|
|
else:
|
|
@@ -2745,7 +2745,7 @@ class AfmoeModel(LlamaModel):
|
|
|
if name.endswith(".expert_bias"):
|
|
if name.endswith(".expert_bias"):
|
|
|
name = name.replace(".expert_bias", ".expert_bias.bias")
|
|
name = name.replace(".expert_bias", ".expert_bias.bias")
|
|
|
|
|
|
|
|
- yield from super().modify_tensors(data_torch, name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, data_torch, name, bid)
|
|
|
|
|
|
|
|
|
|
|
|
|
@ModelBase.register(
|
|
@ModelBase.register(
|
|
@@ -8918,7 +8918,7 @@ class GraniteHybridModel(Mamba2Model, GraniteMoeModel):
|
|
|
return Mamba2Model.modify_tensors(self, data_torch, name, bid)
|
|
return Mamba2Model.modify_tensors(self, data_torch, name, bid)
|
|
|
elif bid in self._attn_layers:
|
|
elif bid in self._attn_layers:
|
|
|
return GraniteMoeModel.modify_tensors(self, data_torch, name, bid)
|
|
return GraniteMoeModel.modify_tensors(self, data_torch, name, bid)
|
|
|
- yield from super().modify_tensors(data_torch, name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, data_torch, name, bid)
|
|
|
|
|
|
|
|
def set_gguf_parameters(self):
|
|
def set_gguf_parameters(self):
|
|
|
"""This method merges params from both parents and some that are
|
|
"""This method merges params from both parents and some that are
|
|
@@ -9050,33 +9050,33 @@ class NemotronHModel(GraniteHybridModel):
|
|
|
if self.is_moe and bid is not None:
|
|
if self.is_moe and bid is not None:
|
|
|
if name.endswith("mixer.gate.e_score_correction_bias"):
|
|
if name.endswith("mixer.gate.e_score_correction_bias"):
|
|
|
new_name = name.replace("e_score_correction_bias", "e_score_correction.bias")
|
|
new_name = name.replace("e_score_correction_bias", "e_score_correction.bias")
|
|
|
- yield from super().modify_tensors(data_torch, new_name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, data_torch, new_name, bid)
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
if name.endswith("mixer.dt_bias"):
|
|
if name.endswith("mixer.dt_bias"):
|
|
|
new_name = name.replace("dt_bias", "dt.bias")
|
|
new_name = name.replace("dt_bias", "dt.bias")
|
|
|
- yield from super().modify_tensors(data_torch, new_name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, data_torch, new_name, bid)
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
if name.endswith("mixer.conv1d.weight"):
|
|
if name.endswith("mixer.conv1d.weight"):
|
|
|
squeezed_data = data_torch.squeeze()
|
|
squeezed_data = data_torch.squeeze()
|
|
|
- yield from super().modify_tensors(squeezed_data, name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, squeezed_data, name, bid)
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
if name.endswith("mixer.A_log"):
|
|
if name.endswith("mixer.A_log"):
|
|
|
transformed_data = -torch.exp(data_torch)
|
|
transformed_data = -torch.exp(data_torch)
|
|
|
reshaped_data = transformed_data.squeeze().reshape(-1, 1)
|
|
reshaped_data = transformed_data.squeeze().reshape(-1, 1)
|
|
|
- yield from super().modify_tensors(reshaped_data, name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, reshaped_data, name, bid)
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
if name.endswith("mixer.D"):
|
|
if name.endswith("mixer.D"):
|
|
|
reshaped_data = data_torch.squeeze().reshape(-1, 1)
|
|
reshaped_data = data_torch.squeeze().reshape(-1, 1)
|
|
|
- yield from super().modify_tensors(reshaped_data, name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, reshaped_data, name, bid)
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
if name.endswith("mixer.norm.weight"):
|
|
if name.endswith("mixer.norm.weight"):
|
|
|
reshaped_data = data_torch.reshape(self.n_group, -1)
|
|
reshaped_data = data_torch.reshape(self.n_group, -1)
|
|
|
- yield from super().modify_tensors(reshaped_data, name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, reshaped_data, name, bid)
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
if name.find("mixer.experts") != -1:
|
|
if name.find("mixer.experts") != -1:
|
|
@@ -9101,7 +9101,7 @@ class NemotronHModel(GraniteHybridModel):
|
|
|
data_torch = torch.stack(datas, dim=0)
|
|
data_torch = torch.stack(datas, dim=0)
|
|
|
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
|
|
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
|
|
|
|
|
|
|
|
- yield from super().modify_tensors(data_torch, merged_name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, data_torch, merged_name, bid)
|
|
|
return
|
|
return
|
|
|
else:
|
|
else:
|
|
|
return
|
|
return
|
|
@@ -10731,7 +10731,7 @@ class CogVLMModel(LlamaModel):
|
|
|
if name.startswith("model.vision."):
|
|
if name.startswith("model.vision."):
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
- yield from super().modify_tensors(data_torch, name, bid)
|
|
|
|
|
|
|
+ yield from ModelBase.modify_tensors(self, data_torch, name, bid)
|
|
|
|
|
|
|
|
|
|
|
|
|
@ModelBase.register("JanusForConditionalGeneration")
|
|
@ModelBase.register("JanusForConditionalGeneration")
|