1
0

tests.sh 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. #!/usr/bin/env bash
  2. set -eu
  3. if [ $# -lt 1 ]
  4. then
  5. echo "usage: $0 path_to_build_binary [path_to_temp_folder]"
  6. echo "example: $0 ../../build/bin ../../tmp"
  7. exit 1
  8. fi
  9. if [ $# -gt 1 ]
  10. then
  11. TMP_DIR=$2
  12. else
  13. TMP_DIR=/tmp
  14. fi
  15. set -x
  16. SPLIT=$1/llama-gguf-split
  17. QUANTIZE=$1/llama-quantize
  18. MAIN=$1/llama-completion
  19. WORK_PATH=$TMP_DIR/quantize
  20. ROOT_DIR=$(realpath $(dirname $0)/../../)
  21. mkdir -p "$WORK_PATH"
  22. # Clean up in case of previously failed test
  23. rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf
  24. # 1. Get a model
  25. (
  26. cd $WORK_PATH
  27. "$ROOT_DIR"/scripts/hf.sh --repo ggml-org/Qwen3-0.6B-GGUF --file Qwen3-0.6B-Q8_0.gguf
  28. )
  29. echo PASS
  30. # 2. Split model
  31. $SPLIT --split-max-tensors 28 $WORK_PATH/Qwen3-0.6B-Q8_0.gguf $WORK_PATH/ggml-model-split
  32. echo PASS
  33. echo
  34. # 3. Requant model with '--keep-split'
  35. $QUANTIZE --allow-requantize --keep-split $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K
  36. echo PASS
  37. echo
  38. # 3a. Test the requanted model is loading properly
  39. $MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-00001-of-00012.gguf -p "I believe the meaning of life is" --n-predict 32
  40. echo PASS
  41. echo
  42. # 4. Requant mode without '--keep-split'
  43. $QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K
  44. echo PASS
  45. echo
  46. # 4b. Test the requanted model is loading properly
  47. $MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-merge.gguf -p "I believe the meaning of life is" --n-predict 32
  48. echo PASS
  49. echo
  50. # Clean up
  51. rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf