2 weeks ago · df7fb92170
--- a/examples/model-conversion/Makefile
+++ b/examples/model-conversion/Makefile
@@ -138,16 +138,13 @@ embedding-run-original-model-st: embedding-run-original-model
 
				 embedding-run-converted-model:
			
 
				 	@./scripts/embedding/run-converted-model.sh $(CONVERTED_EMBEDDING_MODEL) \
			
 
				 	$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") \
			
 
				-	$(if $(USE_POOLING),--pooling)
			
 
				-
			
 
				-embedding-run-converted-model-st: USE_POOLING=1
			
 
				-embedding-run-converted-model-st: embedding-run-converted-model
			
 
				+	$(if $(EMBD_NORMALIZE),--embd-normalize "$(EMBD_NORMALIZE)")
			
 
				 
			
 
				 embedding-verify-logits: embedding-run-original-model embedding-run-converted-model
			
 
				 	@./scripts/embedding/compare-embeddings-logits.sh \
			
 
				 	$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
			
 
				 
			
 
				-embedding-verify-logits-st: embedding-run-original-model-st embedding-run-converted-model-st
			
 
				+embedding-verify-logits-st: embedding-run-original-model-st embedding-run-converted-model
			
 
				 	@./scripts/embedding/compare-embeddings-logits.sh \
			
 
				 	$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
			
 
				 
			
--- a/examples/model-conversion/README.md
+++ b/examples/model-conversion/README.md
@@ -198,14 +198,13 @@ model, and the other is a text file which allows for manual visual inspection.
 
				 
			
 
				 #### Using SentenceTransformer with numbered layers
			
 
				 For models that have numbered SentenceTransformer layers (01_Pooling, 02_Dense,
			
 
				-03_Dense, 04_Normalize), use the `-st` targets to apply all these layers:
			
 
				+03_Dense, 04_Normalize), these will be applied automatically when running the
			
 
				+converted model but currently there is a separate target to run the original
			
 
				+version:
			
 
				 
			
 
				 ```console
			
 
				 # Run original model with SentenceTransformer (applies all numbered layers)
			
 
				 (venv) $ make embedding-run-original-model-st
			
 
				-
			
 
				-# Run converted model with pooling enabled
			
 
				-(venv) $ make embedding-run-converted-model-st
			
 
				 ```
			
 
				 
			
 
				 This will use the SentenceTransformer library to load and run the model, which
			
@@ -213,6 +212,17 @@ automatically applies all the numbered layers in the correct order. This is
 
				 particularly useful when comparing with models that should include these
			
 
				 additional transformation layers beyond just the base model output.
			
 
				 
			
 
				+The type of normalization can be specified for the converted model but is not
			
 
				+strictly necessary as the verification uses cosine similarity and the magnitude
			
 
				+of the output vectors does not affect this. But the normalization type can be
			
 
				+specified as an argument to the target which might be useful for manual
			
 
				+inspection:
			
 
				+```console
			
 
				+(venv) $ make embedding-verify-logits-st EMBD_NORMALIZE=1
			
 
				+```
			
 
				+The original model will apply the normalization according to the normalization
			
 
				+layer specified in the modules.json configuration file.
			
 
				+
			
 
				 ### Model conversion
			
 
				 After updates have been made to [gguf-py](../../gguf-py) to add support for the
			
 
				 new model the model can be converted to GGUF format using the following command:
			
--- a/examples/model-conversion/scripts/embedding/run-converted-model.sh
+++ b/examples/model-conversion/scripts/embedding/run-converted-model.sh
@@ -5,7 +5,7 @@ set -e
 
				 # Parse command line arguments
			
 
				 CONVERTED_MODEL=""
			
 
				 PROMPTS_FILE=""
			
 
				-USE_POOLING=""
			
 
				+EMBD_NORMALIZE="2"
			
 
				 
			
 
				 while [[ $# -gt 0 ]]; do
			
 
				     case $1 in
			
@@ -13,9 +13,9 @@ while [[ $# -gt 0 ]]; do
 
				             PROMPTS_FILE="$2"
			
 
				             shift 2
			
 
				             ;;
			
 
				-        --pooling)
			
 
				-            USE_POOLING="1"
			
 
				-            shift
			
 
				+        --embd-normalize)
			
 
				+            EMBD_NORMALIZE="$2"
			
 
				+            shift 2
			
 
				             ;;
			
 
				         *)
			
 
				             if [ -z "$CONVERTED_MODEL" ]; then
			
@@ -51,8 +51,4 @@ fi
 
				 echo $CONVERTED_MODEL
			
 
				 
			
 
				 cmake --build ../../build --target llama-debug -j8
			
 
				-if [ -n "$USE_POOLING" ]; then
			
 
				-    ../../build/bin/llama-debug -m "$CONVERTED_MODEL" --embedding --pooling mean -p "$PROMPT" --save-logits
			
 
				-else
			
 
				-    ../../build/bin/llama-debug -m "$CONVERTED_MODEL" --embedding --pooling none -p "$PROMPT" --save-logits
			
 
				-fi
			
 
				+../../build/bin/llama-debug -m "$CONVERTED_MODEL" --embedding -p "$PROMPT" --save-logits --embd-normalize $EMBD_NORMALIZE