فهرست منبع

convert : move rope_parameters to TextModel class (#18061)

* make sure to search text_config for rope parameters

* move rope_parameters to TextModel class
Sigbjørn Skjæret 1 ماه پیش
والد
کامیت
d6a1e18c65
1فایلهای تغییر یافته به همراه9 افزوده شده و 8 حذف شده
  1. 9 8
      convert_hf_to_gguf.py

+ 9 - 8
convert_hf_to_gguf.py

@@ -136,19 +136,11 @@ class ModelBase:
         self.remote_hf_model_id = remote_hf_model_id
         self.remote_hf_model_id = remote_hf_model_id
         self.sentence_transformers_dense_modules = sentence_transformers_dense_modules
         self.sentence_transformers_dense_modules = sentence_transformers_dense_modules
         self.hparams = ModelBase.load_hparams(self.dir_model, self.is_mistral_format) if hparams is None else hparams
         self.hparams = ModelBase.load_hparams(self.dir_model, self.is_mistral_format) if hparams is None else hparams
-        self.rope_parameters = self.hparams.get("rope_parameters", self.hparams.get("rope_scaling")) or {}
         self.model_tensors = self.index_tensors(remote_hf_model_id=remote_hf_model_id)
         self.model_tensors = self.index_tensors(remote_hf_model_id=remote_hf_model_id)
         self.metadata_override = metadata_override
         self.metadata_override = metadata_override
         self.model_name = model_name
         self.model_name = model_name
         self.dir_model_card = dir_model  # overridden in convert_lora_to_gguf.py
         self.dir_model_card = dir_model  # overridden in convert_lora_to_gguf.py
 
 
-        # Ensure "rope_theta" and "rope_type" is mirrored in rope_parameters
-        if "full_attention" not in self.rope_parameters and "sliding_attention" not in self.rope_parameters:
-            if "rope_theta" not in self.rope_parameters and (rope_theta := self.find_hparam(["rope_theta", "global_rope_theta", "rotary_emb_base"], optional=True)) is not None:
-                self.rope_parameters["rope_theta"] = rope_theta
-            if "rope_type" not in self.rope_parameters and (rope_type := self.rope_parameters.get("type")) is not None:
-                self.rope_parameters["rope_type"] = rope_type
-
         # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
         # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
         if self.ftype == gguf.LlamaFileType.GUESSED:
         if self.ftype == gguf.LlamaFileType.GUESSED:
             # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
             # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
@@ -765,6 +757,15 @@ class TextModel(ModelBase):
         self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
         self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
         self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
         self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
 
 
+        self.rope_parameters = self.hparams.get("rope_parameters", self.hparams.get("rope_scaling")) or {}
+
+        # Ensure "rope_theta" and "rope_type" is mirrored in rope_parameters
+        if "full_attention" not in self.rope_parameters and "sliding_attention" not in self.rope_parameters:
+            if "rope_theta" not in self.rope_parameters and (rope_theta := self.find_hparam(["rope_theta", "global_rope_theta", "rotary_emb_base"], optional=True)) is not None:
+                self.rope_parameters["rope_theta"] = rope_theta
+            if "rope_type" not in self.rope_parameters and (rope_type := self.rope_parameters.get("type")) is not None:
+                self.rope_parameters["rope_type"] = rope_type
+
     @classmethod
     @classmethod
     def __init_subclass__(cls):
     def __init_subclass__(cls):
         # can't use an abstract property, because overriding it without type errors
         # can't use an abstract property, because overriding it without type errors