|
|
@@ -78,7 +78,7 @@ class ModelBase:
|
|
|
# subclasses should define this!
|
|
|
model_arch: gguf.MODEL_ARCH
|
|
|
|
|
|
- def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False,
|
|
|
+ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, *, is_big_endian: bool = False,
|
|
|
use_temp_file: bool = False, eager: bool = False,
|
|
|
metadata_override: Path | None = None, model_name: str | None = None,
|
|
|
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
|
|
|
@@ -454,13 +454,6 @@ class ModelBase:
|
|
|
|
|
|
|
|
|
class TextModel(ModelBase):
|
|
|
- @classmethod
|
|
|
- def __init_subclass__(cls):
|
|
|
- # can't use an abstract property, because overriding it without type errors
|
|
|
- # would require using decorated functions instead of simply defining the property
|
|
|
- if "model_arch" not in cls.__dict__:
|
|
|
- raise TypeError(f"Missing property 'model_arch' for {cls.__name__!r}")
|
|
|
-
|
|
|
def set_vocab(self):
|
|
|
self._set_vocab_gpt2()
|
|
|
|
|
|
@@ -3373,14 +3366,7 @@ class BertModel(TextModel):
|
|
|
|
|
|
return [(self.map_tensor_name(name), data_torch)]
|
|
|
|
|
|
-
|
|
|
-@ModelBase.register("RobertaModel")
|
|
|
-class RobertaModel(BertModel):
|
|
|
- model_arch = gguf.MODEL_ARCH.BERT
|
|
|
-
|
|
|
- def __init__(self, *args, **kwargs):
|
|
|
- super().__init__(*args, **kwargs)
|
|
|
-
|
|
|
+ def _xlmroberta_tokenizer_init(self) -> None:
|
|
|
# we need the pad_token_id to know how to chop down position_embd matrix
|
|
|
if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
|
|
|
self._position_offset = 1 + pad_token_id
|
|
|
@@ -3389,82 +3375,7 @@ class RobertaModel(BertModel):
|
|
|
else:
|
|
|
self._position_offset = None
|
|
|
|
|
|
- def set_vocab(self):
|
|
|
- """Support BPE tokenizers for roberta models"""
|
|
|
- bpe_tok_path = self.dir_model / "tokenizer.json"
|
|
|
- if bpe_tok_path.exists():
|
|
|
- self._set_vocab_gpt2()
|
|
|
- self.gguf_writer.add_add_bos_token(True)
|
|
|
- self.gguf_writer.add_add_eos_token(True)
|
|
|
-
|
|
|
- # we need this to validate the size of the token_type embeddings
|
|
|
- # though currently we are passing all zeros to the token_type embeddings
|
|
|
- # "Sequence A" or "Sequence B"
|
|
|
- self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
|
|
|
-
|
|
|
- else:
|
|
|
- return super().set_vocab()
|
|
|
-
|
|
|
- def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
|
- # if name starts with "roberta.", remove the prefix
|
|
|
- # e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
|
|
|
- if name.startswith("roberta."):
|
|
|
- name = name[8:]
|
|
|
-
|
|
|
- # position embeddings start at pad_token_id + 1, so just chop down the weight tensor
|
|
|
- if name == "embeddings.position_embeddings.weight":
|
|
|
- if self._position_offset is not None:
|
|
|
- data_torch = data_torch[self._position_offset:,:]
|
|
|
-
|
|
|
- return super().modify_tensors(data_torch, name, bid)
|
|
|
-
|
|
|
-
|
|
|
-@ModelBase.register("NomicBertModel")
|
|
|
-class NomicBertModel(BertModel):
|
|
|
- model_arch = gguf.MODEL_ARCH.NOMIC_BERT
|
|
|
-
|
|
|
- def __init__(self, *args, **kwargs):
|
|
|
- super().__init__(*args, **kwargs)
|
|
|
-
|
|
|
- # the HF config claims n_ctx=8192, but it uses RoPE scaling
|
|
|
- self.hparams["n_ctx"] = 2048
|
|
|
-
|
|
|
- # SwigLU activation
|
|
|
- assert self.hparams["activation_function"] == "swiglu"
|
|
|
- # this doesn't do anything in the HF version
|
|
|
- assert self.hparams["causal"] is False
|
|
|
- # no bias tensors
|
|
|
- assert self.hparams["qkv_proj_bias"] is False
|
|
|
- assert self.hparams["mlp_fc1_bias"] is False
|
|
|
- assert self.hparams["mlp_fc2_bias"] is False
|
|
|
- # norm at end of layer
|
|
|
- assert self.hparams["prenorm"] is False
|
|
|
- # standard RoPE
|
|
|
- assert self.hparams["rotary_emb_fraction"] == 1.0
|
|
|
- assert self.hparams["rotary_emb_interleaved"] is False
|
|
|
- assert self.hparams["rotary_emb_scale_base"] is None
|
|
|
-
|
|
|
- def set_gguf_parameters(self):
|
|
|
- super().set_gguf_parameters()
|
|
|
- self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
|
|
|
-
|
|
|
-
|
|
|
-@ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
|
|
|
-class XLMRobertaModel(BertModel):
|
|
|
- model_arch = gguf.MODEL_ARCH.BERT
|
|
|
-
|
|
|
- def __init__(self, *args, **kwargs):
|
|
|
- super().__init__(*args, **kwargs)
|
|
|
-
|
|
|
- # we need the pad_token_id to know how to chop down position_embd matrix
|
|
|
- if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
|
|
|
- self._position_offset = 1 + pad_token_id
|
|
|
- if "max_position_embeddings" in self.hparams:
|
|
|
- self.hparams["max_position_embeddings"] -= self._position_offset
|
|
|
- else:
|
|
|
- self._position_offset = None
|
|
|
-
|
|
|
- def set_vocab(self):
|
|
|
+ def _xlmroberta_set_vocab(self) -> None:
|
|
|
# to avoid TypeError: Descriptors cannot be created directly
|
|
|
# exception when importing sentencepiece_model_pb2
|
|
|
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
|
|
|
@@ -3546,6 +3457,138 @@ class XLMRobertaModel(BertModel):
|
|
|
self.gguf_writer.add_add_bos_token(True)
|
|
|
self.gguf_writer.add_add_eos_token(True)
|
|
|
|
|
|
+
|
|
|
+@ModelBase.register("RobertaModel")
|
|
|
+class RobertaModel(BertModel):
|
|
|
+ model_arch = gguf.MODEL_ARCH.BERT
|
|
|
+
|
|
|
+ def __init__(self, *args, **kwargs):
|
|
|
+ super().__init__(*args, **kwargs)
|
|
|
+
|
|
|
+ # we need the pad_token_id to know how to chop down position_embd matrix
|
|
|
+ if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
|
|
|
+ self._position_offset = 1 + pad_token_id
|
|
|
+ if "max_position_embeddings" in self.hparams:
|
|
|
+ self.hparams["max_position_embeddings"] -= self._position_offset
|
|
|
+ else:
|
|
|
+ self._position_offset = None
|
|
|
+
|
|
|
+ def set_vocab(self):
|
|
|
+ """Support BPE tokenizers for roberta models"""
|
|
|
+ bpe_tok_path = self.dir_model / "tokenizer.json"
|
|
|
+ if bpe_tok_path.exists():
|
|
|
+ self._set_vocab_gpt2()
|
|
|
+ self.gguf_writer.add_add_bos_token(True)
|
|
|
+ self.gguf_writer.add_add_eos_token(True)
|
|
|
+
|
|
|
+ # we need this to validate the size of the token_type embeddings
|
|
|
+ # though currently we are passing all zeros to the token_type embeddings
|
|
|
+ # "Sequence A" or "Sequence B"
|
|
|
+ self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
|
|
|
+
|
|
|
+ else:
|
|
|
+ return super().set_vocab()
|
|
|
+
|
|
|
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
|
+ # if name starts with "roberta.", remove the prefix
|
|
|
+ # e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
|
|
|
+ if name.startswith("roberta."):
|
|
|
+ name = name[8:]
|
|
|
+
|
|
|
+ # position embeddings start at pad_token_id + 1, so just chop down the weight tensor
|
|
|
+ if name == "embeddings.position_embeddings.weight":
|
|
|
+ if self._position_offset is not None:
|
|
|
+ data_torch = data_torch[self._position_offset:,:]
|
|
|
+
|
|
|
+ return super().modify_tensors(data_torch, name, bid)
|
|
|
+
|
|
|
+
|
|
|
+@ModelBase.register("NomicBertModel")
|
|
|
+class NomicBertModel(BertModel):
|
|
|
+ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, **kwargs: Any):
|
|
|
+ hparams = kwargs.pop("hparams", None)
|
|
|
+ if hparams is None:
|
|
|
+ hparams = ModelBase.load_hparams(dir_model)
|
|
|
+
|
|
|
+ self.is_moe = bool(hparams.get("moe_every_n_layers"))
|
|
|
+ self.model_arch = gguf.MODEL_ARCH.NOMIC_BERT_MOE if self.is_moe else gguf.MODEL_ARCH.NOMIC_BERT
|
|
|
+
|
|
|
+ super().__init__(dir_model, ftype, fname_out, hparams=hparams, **kwargs)
|
|
|
+
|
|
|
+ self._tokenizer_is_xlmroberta = self._is_tokenizer_xlmroberta()
|
|
|
+ if self._tokenizer_is_xlmroberta:
|
|
|
+ self._xlmroberta_tokenizer_init()
|
|
|
+
|
|
|
+ # the HF config claims n_ctx=8192, but it uses RoPE scaling
|
|
|
+ self.hparams["n_ctx"] = 2048
|
|
|
+
|
|
|
+ assert self.hparams["activation_function"] == "gelu" if self.is_moe else "swiglu"
|
|
|
+
|
|
|
+ # this doesn't do anything in the HF version
|
|
|
+ assert self.hparams["causal"] is False
|
|
|
+ # no bias tensors unless MoE
|
|
|
+ assert self.hparams["qkv_proj_bias"] == self.is_moe
|
|
|
+ assert self.hparams["mlp_fc1_bias"] == self.is_moe
|
|
|
+ assert self.hparams["mlp_fc2_bias"] == self.is_moe
|
|
|
+
|
|
|
+ # norm at end of layer
|
|
|
+ assert self.hparams["prenorm"] is False
|
|
|
+ # standard RoPE
|
|
|
+ assert self.hparams["rotary_emb_fraction"] == 1.0
|
|
|
+ assert self.hparams["rotary_emb_interleaved"] is False
|
|
|
+ assert self.hparams["rotary_emb_scale_base"] is None
|
|
|
+
|
|
|
+ def set_vocab(self) -> None:
|
|
|
+ if self._tokenizer_is_xlmroberta:
|
|
|
+ return self._xlmroberta_set_vocab()
|
|
|
+ return super().set_vocab()
|
|
|
+
|
|
|
+ def modify_tensors(self, data_torch: torch.Tensor, name: str, bid: int | None) -> Iterable[tuple[str, torch.Tensor]]:
|
|
|
+ # If the tensor is an experts bias tensor, skip it by returning an empty list.
|
|
|
+ if "mlp.experts.bias" in name:
|
|
|
+ return [] # Explicitly return an empty list.
|
|
|
+
|
|
|
+ if "mlp.experts.mlp.w1" in name:
|
|
|
+ data_torch = data_torch.view(self.hparams["num_experts"], self.hparams["n_inner"], self.hparams["n_embd"])
|
|
|
+ name += ".weight"
|
|
|
+
|
|
|
+ if "mlp.experts.mlp.w2" in name:
|
|
|
+ data_torch = data_torch.view(self.hparams["num_experts"], self.hparams["n_inner"], self.hparams["n_embd"])
|
|
|
+ data_torch = data_torch.transpose(1, 2)
|
|
|
+ name += ".weight"
|
|
|
+
|
|
|
+ return [(self.map_tensor_name(name), data_torch)]
|
|
|
+
|
|
|
+ def set_gguf_parameters(self):
|
|
|
+ super().set_gguf_parameters()
|
|
|
+ self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
|
|
|
+ if self.is_moe:
|
|
|
+ self.gguf_writer.add_moe_every_n_layers(self.hparams["moe_every_n_layers"])
|
|
|
+ self.gguf_writer.add_expert_count(self.hparams["num_experts"])
|
|
|
+ self.gguf_writer.add_expert_used_count(self.hparams["moe_top_k"])
|
|
|
+
|
|
|
+ def _is_tokenizer_xlmroberta(self) -> bool:
|
|
|
+ with open(self.dir_model / "tokenizer.json") as f:
|
|
|
+ tokenizer_json = json.load(f)
|
|
|
+ toktyp = tokenizer_json["model"]["type"]
|
|
|
+ if toktyp == "Unigram":
|
|
|
+ return True
|
|
|
+ if toktyp == "WordPiece":
|
|
|
+ return False
|
|
|
+ raise ValueError(f"unknown tokenizer: {toktyp}")
|
|
|
+
|
|
|
+
|
|
|
+@ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
|
|
|
+class XLMRobertaModel(BertModel):
|
|
|
+ model_arch = gguf.MODEL_ARCH.BERT
|
|
|
+
|
|
|
+ def __init__(self, *args, **kwargs):
|
|
|
+ super().__init__(*args, **kwargs)
|
|
|
+ self._xlmroberta_tokenizer_init()
|
|
|
+
|
|
|
+ def set_vocab(self):
|
|
|
+ self._xlmroberta_set_vocab()
|
|
|
+
|
|
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
|
# if name starts with "roberta.", remove the prefix
|
|
|
# e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
|