1
0

tests.sh 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. #!/usr/bin/env bash
  2. set -eu
  3. if [ $# -lt 1 ]
  4. then
  5. echo "usage: $0 path_to_build_binary [path_to_temp_folder]"
  6. echo "example: $0 ../../build/bin ../../tmp"
  7. exit 1
  8. fi
  9. if [ $# -gt 1 ]
  10. then
  11. TMP_DIR=$2
  12. else
  13. TMP_DIR=/tmp
  14. fi
  15. set -x
  16. SPLIT=$1/llama-gguf-split
  17. MAIN=$1/llama-cli
  18. WORK_PATH=$TMP_DIR/gguf-split
  19. ROOT_DIR=$(realpath $(dirname $0)/../../)
  20. mkdir -p "$WORK_PATH"
  21. # Clean up in case of previously failed test
  22. rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf
  23. # 1. Get a model
  24. (
  25. cd $WORK_PATH
  26. "$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
  27. )
  28. echo PASS
  29. # 2. Split with max tensors strategy
  30. $SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split
  31. echo PASS
  32. echo
  33. # 2b. Test the sharded model is loading properly
  34. $MAIN -no-cnv --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --n-predict 32
  35. echo PASS
  36. echo
  37. # 3. Merge
  38. $SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-merge.gguf
  39. echo PASS
  40. echo
  41. # 3b. Test the merged model is loading properly
  42. $MAIN -no-cnv --model $WORK_PATH/ggml-model-merge.gguf --n-predict 32
  43. echo PASS
  44. echo
  45. # 4. Split with no tensors in the first split
  46. $SPLIT --split-max-tensors 32 --no-tensor-first-split $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors
  47. echo PASS
  48. echo
  49. # 4b. Test the sharded model is loading properly
  50. $MAIN -no-cnv --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --n-predict 32
  51. echo PASS
  52. echo
  53. # 5. Merge
  54. #$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf
  55. #echo PASS
  56. #echo
  57. # 5b. Test the merged model is loading properly
  58. #$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge-2.gguf --n-predict 32
  59. #echo PASS
  60. #echo
  61. # 6. Split with size strategy
  62. $SPLIT --split-max-size 2G $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-2G
  63. echo PASS
  64. echo
  65. # 6b. Test the sharded model is loading properly
  66. $MAIN -no-cnv --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --n-predict 32
  67. echo PASS
  68. echo
  69. # Clean up
  70. rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf