1 год назад · bc5ba007b2
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -573,6 +573,9 @@ class Model:
 
				         if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
			
 
				             # ref: https://huggingface.co/BAAI/bge-small-en-v1.5
			
 
				             res = "bert-bge"
			
 
				+        if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
			
 
				+            # ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
			
 
				+            res = "bert-bge-large"
			
 
				         if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
			
 
				             # ref: https://huggingface.co/mosaicml/mpt-7b
			
 
				             res = "mpt"
			
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -72,6 +72,7 @@ models = [
 
				     {"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
			
 
				     {"name": "falcon",         "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
			
 
				     {"name": "bert-bge",       "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
			
 
				+    {"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
			
 
				     {"name": "mpt",            "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
			
 
				     {"name": "starcoder",      "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
			
 
				     {"name": "gpt-2",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
			
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1882,12 +1882,17 @@ struct server_context {
 
				                         }
			
 
				 
			
 
				                         if (slot.inf_type == SERVER_TASK_INF_TYPE_EMBEDDING || slot.inf_type == SERVER_TASK_INF_TYPE_RERANK) {
			
 
				-                            // this prompt is too large to process - discard it
			
 
				                             if (slot.n_prompt_tokens > n_ubatch) {
			
 
				                                 slot.release();
			
 
				                                 send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER);
			
 
				                                 continue;
			
 
				                             }
			
 
				+
			
 
				+                            if (slot.n_prompt_tokens > slot.n_ctx) {
			
 
				+                                slot.release();
			
 
				+                                send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_SERVER);
			
 
				+                                continue;
			
 
				+                            }
			
 
				                         } else {
			
 
				                             if (!params.ctx_shift) {
			
 
				                                 // if context shift is disabled, we make sure prompt size is smaller than KV size