quantize.sh 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. #!/usr/bin/env bash
  2. set -e
  3. CONVERTED_MODEL="${1:-"$CONVERTED_MODEL"}"
  4. QUANTIZED_TYPE="${2:-"$QUANTIZED_TYPE"}"
  5. TOKEN_EMBD_TYPE="${3:-"${TOKEN_EMBD_TYPE}"}"
  6. OUTPUT_TYPE="${4:-"${OUTPUT_TYPE}"}"
  7. BUILD_DIR="${5:-"$BUILD_DIR"}"
  8. QUANTIZED_MODEL=$CONVERTED_MODEL
  9. # Final check if we have a model path
  10. if [ -z "$CONVERTED_MODEL" ]; then
  11. echo "Error: Model path must be provided either as:" >&2
  12. echo " 1. Command line argument" >&2
  13. echo " 2. CONVERTED_MODEL environment variable" >&2
  14. exit 1
  15. fi
  16. if [ -z "$QUANTIZED_TYPE" ]; then
  17. echo "Error: QUANTIZED_TYPE is required" >&2
  18. exit 1
  19. fi
  20. echo $CONVERTED_MODEL
  21. # Process the quantized model filename
  22. if [[ "$QUANTIZED_MODEL" == *.gguf ]]; then
  23. # Remove .gguf suffix, add quantized type, then add .gguf back
  24. BASE_NAME="${QUANTIZED_MODEL%.gguf}"
  25. QUANTIZED_MODEL="${BASE_NAME}-${QUANTIZED_TYPE}.gguf"
  26. else
  27. echo "Error: QUANTIZED_MODEL must end with .gguf extension" >&2
  28. exit 1
  29. fi
  30. if [ -z "$BUILD_DIR" ]; then
  31. BUILD_DIR="../../build"
  32. fi
  33. cmake --build $BUILD_DIR --target llama-quantize -j8
  34. echo $TOKEN_EMBD_TYPE
  35. echo $OUTPUT_TYPE
  36. CMD_ARGS=("${BUILD_DIR}/bin/llama-quantize")
  37. [[ -n "$TOKEN_EMBD_TYPE" ]] && CMD_ARGS+=("--token-embedding-type" "$TOKEN_EMBD_TYPE")
  38. [[ -n "$OUTPUT_TYPE" ]] && CMD_ARGS+=("--output-tensor-type" "$OUTPUT_TYPE")
  39. CMD_ARGS+=("$CONVERTED_MODEL" "$QUANTIZED_MODEL" "$QUANTIZED_TYPE")
  40. "${CMD_ARGS[@]}"
  41. echo "Quantized model saved to: $QUANTIZED_MODEL"