пре 3 недеља · d0a6a31470
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1062,6 +1062,9 @@ class TextModel(ModelBase):
 
				         if chkhsh == "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273":
			
 
				             # ref: https://huggingface.co/alvarobartt/grok-2-tokenizer
			
 
				             res = "grok-2"
			
 
				+        if chkhsh == "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df":
			
 
				+            # ref: https://huggingface.co/aari1995/German_Semantic_V3
			
 
				+            res = "jina-v2-de"
			
 
				         if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
			
 
				             # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
			
 
				             res = "llama-bpe"
			
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -166,6 +166,8 @@ pre_computed_hashes = [
 
				     {"name": "kimi-k2",   "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base",   "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"},
			
 
				     {"name": "qwen2",     "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B", "chkhsh": "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c"},
			
 
				     {"name": "grok-2",    "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"},
			
 
				+    # jina-v2-de variants
			
 
				+    {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"},
			
 
				 ]
			
 
				 
			
 
				 
			
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -3322,7 +3322,14 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
 
				                         layer.attn_norm_2_b = create_tensor(tn(LLM_TENSOR_ATTN_NORM_2, "bias",   i), {n_embd}, TENSOR_NOT_REQUIRED);
			
 
				 
			
 
				                         layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, TENSOR_NOT_REQUIRED);
			
 
				-                        layer.ffn_up   = create_tensor(tn(LLM_TENSOR_FFN_UP,   "weight", i), {n_embd, layer.ffn_gate ? n_ff : n_ff * 2}, 0);
			
 
				+
			
 
				+                        const auto tn_ffn_up_weight = tn(LLM_TENSOR_FFN_UP, "weight", i);
			
 
				+                        ggml_tensor * t_ffn_up = ml.get_tensor_meta(tn_ffn_up_weight.str().c_str());
			
 
				+                        const int64_t n_ffn_up = t_ffn_up ? t_ffn_up->ne[1] : n_ff;
			
 
				+
			
 
				+                        GGML_ASSERT(n_ffn_up == n_ff || n_ffn_up == n_ff * 2);
			
 
				+                        layer.ffn_up   = create_tensor(tn_ffn_up_weight, {n_embd, n_ffn_up}, 0);
			
 
				+                        layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ffn_up}, TENSOR_NOT_REQUIRED);
			
 
				 
			
 
				                         layer.ffn_down   = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
			
 
				                         layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias",   i), {n_embd}, 0);
			
--- a/src/models/bert.cpp
+++ b/src/models/bert.cpp
@@ -142,11 +142,13 @@ llm_build_bert::llm_build_bert(const llama_model & model, const llm_graph_params
 
				                     LLM_FFN_GELU, LLM_FFN_SEQ, il);
			
 
				             cb(cur, "ffn_out", il);
			
 
				         } else if (model.arch == LLM_ARCH_JINA_BERT_V2) {
			
 
				+            const bool up_contains_gate = !model.layers[il].ffn_gate && model.layers[il].ffn_up->ne[1] != hparams.n_ff();
			
 
				+            auto type_op = up_contains_gate ? LLM_FFN_GEGLU : LLM_FFN_GELU;
			
 
				             cur = build_ffn(cur,
			
 
				-                    model.layers[il].ffn_up, NULL, NULL,
			
 
				+                    model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
			
 
				                     model.layers[il].ffn_gate, NULL, NULL,
			
 
				                     model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, NULL,
			
 
				-                    model.layers[il].ffn_gate ? LLM_FFN_GELU : LLM_FFN_GEGLU, LLM_FFN_PAR, il);
			
 
				+                    type_op, LLM_FFN_PAR, il);
			
 
				             cb(cur, "ffn_out", il);
			
 
				         } else {
			
 
				             cur = build_ffn(cur,