4 mesiacov pred · 5a6bc6b1a6
--- a/examples/model-conversion/Makefile
+++ b/examples/model-conversion/Makefile
@@ -144,6 +144,15 @@ perplexity-run:
 
				 hf-create-model:
			
 
				 	@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}"
			
 
				 
			
 
				+hf-create-model-dry-run:
			
 
				+	@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -d
			
 
				+
			
 
				+hf-create-model-embedding:
			
 
				+	@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e
			
 
				+
			
 
				+hf-create-model-embedding-dry-run:
			
 
				+	@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e -d
			
 
				+
			
 
				 hf-create-model-private:
			
 
				 	@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -p
			
 
				 
			
--- a/examples/model-conversion/README.md
+++ b/examples/model-conversion/README.md
@@ -285,13 +285,21 @@ For the following targets a `HF_TOKEN` environment variable is required.
 
				 This will create a new model repsository on Hugging Face with the specified
			
 
				 model name.
			
 
				 ```console
			
 
				-(venv) $ make hf-create-model MODEL_NAME='TestModel' NAMESPACE="danbev"
			
 
				+(venv) $ make hf-create-model MODEL_NAME='TestModel' NAMESPACE="danbev" ORIGINAL_BASE_MODEL="some-base-model"
			
 
				 Repository ID:  danbev/TestModel-GGUF
			
 
				 Repository created: https://huggingface.co/danbev/TestModel-GGUF
			
 
				 ```
			
 
				 Note that we append a `-GGUF` suffix to the model name to ensure a consistent
			
 
				 naming convention for GGUF models.
			
 
				 
			
 
				+An embedding model can be created using the following command:
			
 
				+```console
			
 
				+(venv) $ make hf-create-model-embedding MODEL_NAME='TestEmbeddingModel' NAMESPACE="danbev" ORIGINAL_BASE_MODEL="some-base-model"
			
 
				+```
			
 
				+The only difference is that the model card for an embedding model will be different
			
 
				+with regards to the llama-server command and also how to access/call the embedding
			
 
				+endpoint.
			
 
				+
			
 
				 ### Upload a GGUF model to model repository
			
 
				 The following target uploads a model to an existing Hugging Face model repository.
			
 
				 ```console
			
--- a/examples/model-conversion/scripts/causal/modelcard.template
+++ b/examples/model-conversion/scripts/causal/modelcard.template
--- a/examples/model-conversion/scripts/embedding/modelcard.template
+++ b/examples/model-conversion/scripts/embedding/modelcard.template
@@ -0,0 +1,48 @@
 
				+---
			
 
				+base_model:
			
 
				+- {base_model}
			
 
				+---
			
 
				+# {model_name} GGUF
			
 
				+
			
 
				+Recommended way to run this model:
			
 
				+
			
 
				+```sh
			
 
				+llama-server -hf {namespace}/{model_name}-GGUF
			
 
				+```
			
 
				+
			
 
				+Then the endpoint can be accessed at http://localhost:8080/embedding, for
			
 
				+example using `curl`:
			
 
				+```console
			
 
				+curl --request POST \
			
 
				+    --url http://localhost:8080/embedding \
			
 
				+    --header "Content-Type: application/json" \
			
 
				+    --data '{{"input": "Hello embeddings"}}' \
			
 
				+    --silent
			
 
				+```
			
 
				+
			
 
				+Alternatively, the `llama-embedding` command line tool can be used:
			
 
				+```sh
			
 
				+llama-embedding -hf {namespace}/{model_name}-GGUF --verbose-prompt -p "Hello embeddings"
			
 
				+```
			
 
				+
			
 
				+#### embd_normalize
			
 
				+When a model uses pooling, or the pooling method is specified using `--pooling`,
			
 
				+the normalization can be controlled by the `embd_normalize` parameter.
			
 
				+
			
 
				+The default value is `2` which means that the embeddings are normalized using
			
 
				+the Euclidean norm (L2). Other options are:
			
 
				+* -1 No normalization
			
 
				+*  0 Max absolute
			
 
				+*  1 Taxicab
			
 
				+*  2 Euclidean/L2
			
 
				+* \>2 P-Norm
			
 
				+
			
 
				+This can be passed in the request body to `llama-server`, for example:
			
 
				+```sh
			
 
				+    --data '{{"input": "Hello embeddings", "embd_normalize": -1}}' \
			
 
				+```
			
 
				+
			
 
				+And for `llama-embedding`, by passing `--embd-normalize <value>`, for example:
			
 
				+```sh
			
 
				+llama-embedding -hf {namespace}/{model_name}-GGUF  --embd-normalize -1 -p "Hello embeddings"
			
 
				+```
			
--- a/examples/model-conversion/scripts/utils/hf-create-model.py
+++ b/examples/model-conversion/scripts/utils/hf-create-model.py
@@ -26,21 +26,31 @@ parser.add_argument('--namespace', '-ns', help='Namespace to add the model to',
 
				 parser.add_argument('--org-base-model', '-b', help='Original Base model name', default="")
			
 
				 parser.add_argument('--no-card', action='store_true', help='Skip creating model card')
			
 
				 parser.add_argument('--private', '-p', action='store_true', help='Create private model')
			
 
				+parser.add_argument('--embedding', '-e', action='store_true', help='Use embedding model card template')
			
 
				+parser.add_argument('--dry-run', '-d', action='store_true', help='Print repository info and template without creating repository')
			
 
				 
			
 
				 args = parser.parse_args()
			
 
				 
			
 
				 repo_id = f"{args.namespace}/{args.model_name}-GGUF"
			
 
				 print("Repository ID: ", repo_id)
			
 
				 
			
 
				-repo_url = api.create_repo(
			
 
				-    repo_id=repo_id,
			
 
				-    repo_type="model",
			
 
				-    private=args.private,
			
 
				-    exist_ok=False
			
 
				-)
			
 
				+repo_url = None
			
 
				+if not args.dry_run:
			
 
				+    repo_url = api.create_repo(
			
 
				+        repo_id=repo_id,
			
 
				+        repo_type="model",
			
 
				+        private=args.private,
			
 
				+        exist_ok=False
			
 
				+    )
			
 
				 
			
 
				 if not args.no_card:
			
 
				-    template_path = "scripts/readme.md.template"
			
 
				+    if args.embedding:
			
 
				+        template_path = "scripts/embedding/modelcard.template"
			
 
				+    else:
			
 
				+        template_path = "scripts/causal/modelcard.template"
			
 
				+
			
 
				+    print("Template path: ", template_path)
			
 
				+
			
 
				     model_card_content = load_template_and_substitute(
			
 
				         template_path,
			
 
				         model_name=args.model_name,
			
@@ -48,16 +58,21 @@ if not args.no_card:
 
				         base_model=args.org_base_model,
			
 
				     )
			
 
				 
			
 
				-    if model_card_content:
			
 
				-        api.upload_file(
			
 
				-            path_or_fileobj=model_card_content.encode('utf-8'),
			
 
				-            path_in_repo="README.md",
			
 
				-            repo_id=repo_id
			
 
				-        )
			
 
				-        print("Model card created successfully.")
			
 
				+    if args.dry_run:
			
 
				+        print("\nTemplate Content:\n")
			
 
				+        print(model_card_content)
			
 
				     else:
			
 
				-        print("Failed to create model card.")
			
 
				+        if model_card_content:
			
 
				+            api.upload_file(
			
 
				+                path_or_fileobj=model_card_content.encode('utf-8'),
			
 
				+                path_in_repo="README.md",
			
 
				+                repo_id=repo_id
			
 
				+            )
			
 
				+            print("Model card created successfully.")
			
 
				+        else:
			
 
				+            print("Failed to create model card.")
			
 
				 
			
 
				-print(f"Repository created: {repo_url}")
			
 
				+if not args.dry_run and repo_url:
			
 
				+    print(f"Repository created: {repo_url}")