Makefile 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. MAKEFLAGS += --no-print-directory
  2. define validate_model_path
  3. @if [ -z "$(MODEL_PATH)" ]; then \
  4. echo "Error: MODEL_PATH must be provided either as:"; \
  5. echo " 1. Environment variable: export MODEL_PATH=/path/to/model"; \
  6. echo " 2. Command line argument: make $(1) MODEL_PATH=/path/to/model"; \
  7. exit 1; \
  8. fi
  9. endef
  10. define validate_embedding_model_path
  11. @if [ -z "$(EMBEDDING_MODEL_PATH)" ]; then \
  12. echo "Error: EMBEDDING_MODEL_PATH must be provided either as:"; \
  13. echo " 1. Environment variable: export EMBEDDING_MODEL_PATH=/path/to/model"; \
  14. echo " 2. Command line argument: make $(1) EMBEDDING_MODEL_PATH=/path/to/model"; \
  15. exit 1; \
  16. fi
  17. endef
  18. define quantize_model
  19. @CONVERTED_MODEL="$(1)" QUANTIZED_TYPE="$(QUANTIZED_TYPE)" \
  20. TOKEN_EMBD_TYPE="$(TOKEN_EMBD_TYPE)" OUTPUT_TYPE="$(OUTPUT_TYPE)" \
  21. ./scripts/utils/quantize.sh "$(1)" "$(QUANTIZED_TYPE)" "$(TOKEN_EMBD_TYPE)" "$(OUTPUT_TYPE)"
  22. @echo "Export the quantized model path to $(2) variable in your environment"
  23. endef
  24. DEVICE ?= auto
  25. ###
  26. ### Casual Model targets/recipes
  27. ###
  28. causal-convert-model-bf16: OUTTYPE=bf16
  29. causal-convert-model-bf16: causal-convert-model
  30. causal-convert-model:
  31. $(call validate_model_path,causal-convert-model)
  32. @MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(MODEL_PATH)" \
  33. METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
  34. ./scripts/causal/convert-model.sh
  35. causal-convert-mm-model-bf16: OUTTYPE=bf16
  36. causal-convert-mm-model-bf16: MM_OUTTYPE=f16
  37. causal-convert-mm-model-bf16: causal-convert-mm-model
  38. causal-convert-mm-model:
  39. $(call validate_model_path,causal-convert-mm-model)
  40. @MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(MODEL_PATH)" \
  41. METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
  42. ./scripts/causal/convert-model.sh
  43. @MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(MM_OUTTYPE)" MODEL_PATH="$(MODEL_PATH)" \
  44. METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
  45. ./scripts/causal/convert-model.sh --mmproj
  46. causal-run-original-model:
  47. $(call validate_model_path,causal-run-original-model)
  48. @MODEL_PATH="$(MODEL_PATH)" ./scripts/causal/run-org-model.py --device "$(DEVICE)"
  49. causal-run-converted-model:
  50. @CONVERTED_MODEL="$(CONVERTED_MODEL)" ./scripts/causal/run-converted-model.sh
  51. causal-verify-logits: causal-run-original-model causal-run-converted-model
  52. @./scripts/causal/compare-logits.py
  53. @MODEL_PATH="$(MODEL_PATH)" ./scripts/utils/check-nmse.py -m ${MODEL_PATH}
  54. causal-run-original-embeddings:
  55. @./scripts/causal/run-casual-gen-embeddings-org.py
  56. causal-run-converted-embeddings:
  57. @./scripts/causal/run-converted-model-embeddings-logits.sh
  58. causal-verify-embeddings: causal-run-original-embeddings causal-run-converted-embeddings
  59. @./scripts/causal/compare-embeddings-logits.sh
  60. causal-inspect-original-model:
  61. @./scripts/utils/inspect-org-model.py
  62. causal-inspect-converted-model:
  63. @./scripts/utils/inspect-converted-model.sh
  64. causal-start-embedding-server:
  65. @./scripts/utils/run-embedding-server.sh ${CONVERTED_MODEL}
  66. causal-curl-embedding-endpoint: causal-run-original-embeddings
  67. @./scripts/utils/curl-embedding-server.sh | ./scripts/causal/compare-embeddings-logits.sh
  68. causal-quantize-Q8_0: QUANTIZED_TYPE = Q8_0
  69. causal-quantize-Q8_0: causal-quantize-model
  70. causal-quantize-Q4_0: QUANTIZED_TYPE = Q4_0
  71. causal-quantize-Q4_0: causal-quantize-model
  72. # For Quantization Aware Trained (QAT) models in Q4_0 we explicitly set the
  73. # token embedding and output types to Q8_0 instead of the default Q6_K.
  74. causal-quantize-qat-Q4_0: QUANTIZED_TYPE = Q4_0
  75. causal-quantize-qat-Q4_0: TOKEN_EMBD_TYPE = Q8_0
  76. causal-quantize-qat-Q4_0: OUTPUT_TYPE = Q8_0
  77. causal-quantize-qat-Q4_0: causal-quantize-model
  78. causal-quantize-model:
  79. $(call quantize_model,$(CONVERTED_MODEL),QUANTIZED_MODEL)
  80. causal-run-quantized-model:
  81. @QUANTIZED_MODEL="$(QUANTIZED_MODEL)" ./scripts/causal/run-converted-model.sh ${QUANTIZED_MODEL}
  82. ###
  83. ### Embedding Model targets/recipes
  84. ###
  85. embedding-convert-model-bf16: OUTTYPE=bf16
  86. embedding-convert-model-bf16: embedding-convert-model
  87. embedding-convert-model:
  88. $(call validate_embedding_model_path,embedding-convert-model)
  89. @MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \
  90. METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
  91. ./scripts/embedding/convert-model.sh
  92. embedding-convert-model-st:
  93. $(call validate_embedding_model_path,embedding-convert-model-st)
  94. @MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \
  95. METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
  96. ./scripts/embedding/convert-model.sh -st
  97. embedding-run-original-model:
  98. $(call validate_embedding_model_path,embedding-run-original-model)
  99. @EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \
  100. USE_SENTENCE_TRANSFORMERS="$(USE_SENTENCE_TRANSFORMERS)" \
  101. ./scripts/embedding/run-original-model.py \
  102. $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") \
  103. $(if $(USE_SENTENCE_TRANSFORMERS),--use-sentence-transformers)
  104. embedding-run-original-model-st: USE_SENTENCE_TRANSFORMERS=1
  105. embedding-run-original-model-st: embedding-run-original-model
  106. embedding-run-converted-model:
  107. @./scripts/embedding/run-converted-model.sh $(CONVERTED_EMBEDDING_MODEL) \
  108. $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") \
  109. $(if $(USE_POOLING),--pooling)
  110. embedding-run-converted-model-st: USE_POOLING=1
  111. embedding-run-converted-model-st: embedding-run-converted-model
  112. embedding-verify-logits: embedding-run-original-model embedding-run-converted-model
  113. @./scripts/embedding/compare-embeddings-logits.sh \
  114. $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
  115. embedding-verify-logits-st: embedding-run-original-model-st embedding-run-converted-model-st
  116. @./scripts/embedding/compare-embeddings-logits.sh \
  117. $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
  118. embedding-inspect-original-model:
  119. $(call validate_embedding_model_path,embedding-inspect-original-model)
  120. @EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" ./scripts/utils/inspect-org-model.py -m ${EMBEDDING_MODEL_PATH}
  121. embedding-inspect-converted-model:
  122. @CONVERTED_EMBEDDING_MODEL="$(CONVERTED_EMBEDDING_MODEL)" ./scripts/utils/inspect-converted-model.sh ${CONVERTED_EMBEDDING_MODEL}
  123. embedding-start-embedding-server:
  124. @./scripts/utils/run-embedding-server.sh ${CONVERTED_EMBEDDING_MODEL}
  125. embedding-curl-embedding-endpoint:
  126. @./scripts/utils/curl-embedding-server.sh | ./scripts/embedding/compare-embeddings-logits.sh
  127. embedding-quantize-Q8_0: QUANTIZED_TYPE = Q8_0
  128. embedding-quantize-Q8_0: embedding-quantize-model
  129. embedding-quantize-Q4_0: QUANTIZED_TYPE = Q4_0
  130. embedding-quantize-Q4_0: embedding-quantize-model
  131. # For Quantization Aware Trained (QAT) models in Q4_0 we explicitly set the
  132. # token embedding and output types to Q8_0 instead of the default Q6_K.
  133. embedding-quantize-qat-Q4_0: QUANTIZED_TYPE = Q4_0
  134. embedding-quantize-qat-Q4_0: TOKEN_EMBD_TYPE = Q8_0
  135. embedding-quantize-qat-Q4_0: OUTPUT_TYPE = Q8_0
  136. embedding-quantize-qat-Q4_0: embedding-quantize-model
  137. embedding-quantize-model:
  138. $(call quantize_model,$(CONVERTED_EMBEDDING_MODEL),QUANTIZED_EMBEDDING_MODEL)
  139. embedding-run-quantized-model:
  140. @./scripts/embedding/run-converted-model.sh $(QUANTIZED_EMBEDDING_MODEL) \
  141. $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
  142. ###
  143. ### Perplexity targets/recipes
  144. ###
  145. perplexity-data-gen:
  146. CONVERTED_MODEL="$(CONVERTED_MODEL)" ./scripts/utils/perplexity-gen.sh
  147. perplexity-run-full:
  148. QUANTIZED_MODEL="$(QUANTIZED_MODEL)" LOOGITS_FILE="$(LOGITS_FILE)" \
  149. ./scripts/utils/perplexity-run.sh
  150. perplexity-run:
  151. QUANTIZED_MODEL="$(QUANTIZED_MODEL)" ./scripts/utils/perplexity-run-simple.sh
  152. ###
  153. ### HuggingFace targets/recipes
  154. ###
  155. hf-create-model:
  156. @./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}"
  157. hf-create-model-dry-run:
  158. @./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -d
  159. hf-create-model-embedding:
  160. @./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e
  161. hf-create-model-embedding-dry-run:
  162. @./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e -d
  163. hf-create-model-private:
  164. @./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -p
  165. hf-upload-gguf-to-model:
  166. @./scripts/utils/hf-upload-gguf-model.py -m "${MODEL_PATH}" -r "${REPO_ID}" -o "${NAME_IN_REPO}"
  167. hf-create-collection:
  168. @./scripts/utils/hf-create-collection.py -n "${NAME}" -d "${DESCRIPTION}" -ns "${NAMESPACE}"
  169. hf-add-model-to-collection:
  170. @./scripts/utils/hf-add-model-to-collection.py -c "${COLLECTION}" -m "${MODEL}"
  171. .PHONY: clean
  172. clean:
  173. @${RM} -rf data .converted_embedding_model.txt .converted_model.txt .embedding_model_name.txt .model_name.txt