quantize.sh 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. #!/usr/bin/env bash
  2. set -e
  3. CONVERTED_MODEL="${1:-"$CONVERTED_MODEL"}"
  4. QUANTIZED_TYPE="${2:-"$QUANTIZED_TYPE"}"
  5. TOKEN_EMBD_TYPE="${3:-"${TOKEN_EMBD_TYPE}"}"
  6. OUTPUT_TYPE="${4:-"${OUTPUT_TYPE}"}"
  7. QUANTIZED_MODEL=$CONVERTED_MODEL
  8. # Final check if we have a model path
  9. if [ -z "$CONVERTED_MODEL" ]; then
  10. echo "Error: Model path must be provided either as:" >&2
  11. echo " 1. Command line argument" >&2
  12. echo " 2. CONVERTED_MODEL environment variable" >&2
  13. exit 1
  14. fi
  15. if [ -z "$QUANTIZED_TYPE" ]; then
  16. echo "Error: QUANTIZED_TYPE is required" >&2
  17. exit 1
  18. fi
  19. echo $CONVERTED_MODEL
  20. # Process the quantized model filename
  21. if [[ "$QUANTIZED_MODEL" == *.gguf ]]; then
  22. # Remove .gguf suffix, add quantized type, then add .gguf back
  23. BASE_NAME="${QUANTIZED_MODEL%.gguf}"
  24. QUANTIZED_MODEL="${BASE_NAME}-${QUANTIZED_TYPE}.gguf"
  25. else
  26. echo "Error: QUANTIZED_MODEL must end with .gguf extension" >&2
  27. exit 1
  28. fi
  29. cmake --build ../../build --target llama-quantize -j8
  30. echo $TOKEN_EMBD_TYPE
  31. echo $OUTPUT_TYPE
  32. CMD_ARGS=("../../build/bin/llama-quantize")
  33. [[ -n "$TOKEN_EMBD_TYPE" ]] && CMD_ARGS+=("--token-embedding-type" "$TOKEN_EMBD_TYPE")
  34. [[ -n "$OUTPUT_TYPE" ]] && CMD_ARGS+=("--output-tensor-type" "$OUTPUT_TYPE")
  35. CMD_ARGS+=("$CONVERTED_MODEL" "$QUANTIZED_MODEL" "$QUANTIZED_TYPE")
  36. "${CMD_ARGS[@]}"
  37. echo "Quantized model saved to: $QUANTIZED_MODEL"