1
0

quantize.sh 935 B

12345678910111213141516171819202122232425262728293031323334
  1. #!/bin/bash
  2. set -e
  3. CONVERTED_MODEL="${1:-"$CONVERTED_MODEL"}"
  4. QUANTIZED_TYPE="${2:-"$QUANTIZED_TYPE"}"
  5. QUANTIZED_MODEL=$CONVERTED_MODEL
  6. # Final check if we have a model path
  7. if [ -z "$CONVERTED_MODEL" ]; then
  8. echo "Error: Model path must be provided either as:" >&2
  9. echo " 1. Command line argument" >&2
  10. echo " 2. CONVERTED_MODEL environment variable" >&2
  11. exit 1
  12. fi
  13. echo $CONVERTED_MODEL
  14. # Process the quantized model filename
  15. if [[ "$QUANTIZED_MODEL" == *.gguf ]]; then
  16. # Remove .gguf suffix, add quantized type, then add .gguf back
  17. BASE_NAME="${QUANTIZED_MODEL%.gguf}"
  18. QUANTIZED_MODEL="${BASE_NAME}-${QUANTIZED_TYPE}.gguf"
  19. else
  20. echo "Error: QUANTIZED_MODEL must end with .gguf extension" >&2
  21. exit 1
  22. fi
  23. cmake --build ../../build --target llama-quantize -j8
  24. ../../build/bin/llama-quantize $CONVERTED_MODEL $QUANTIZED_MODEL $QUANTIZED_TYPE
  25. echo "Quantized model saved to: $QUANTIZED_MODEL"