|
@@ -9212,7 +9212,7 @@ class NemotronHModel(GraniteHybridModel):
|
|
|
return [(mapped_name, reshaped_data)]
|
|
return [(mapped_name, reshaped_data)]
|
|
|
|
|
|
|
|
if name.endswith("mixer.norm.weight"):
|
|
if name.endswith("mixer.norm.weight"):
|
|
|
- reshaped_data = data_torch.reshape(8, 512)
|
|
|
|
|
|
|
+ reshaped_data = data_torch.reshape(self.n_group, -1)
|
|
|
mapped_name = self.map_tensor_name(name)
|
|
mapped_name = self.map_tensor_name(name)
|
|
|
return [(mapped_name, reshaped_data)]
|
|
return [(mapped_name, reshaped_data)]
|
|
|
|
|
|