test-tokenizers-repo.sh 893 B

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. #!/usr/bin/env bash
  2. if [ $# -lt 2 ]; then
  3. printf "Usage: $0 <git-repo> <target-folder> [<test-exe>]\n"
  4. exit 1
  5. fi
  6. if [ $# -eq 3 ]; then
  7. toktest=$3
  8. else
  9. toktest="./test-tokenizer-0"
  10. fi
  11. if [ ! -x $toktest ]; then
  12. printf "Test executable \"$toktest\" not found!\n"
  13. exit 1
  14. fi
  15. repo=$1
  16. folder=$2
  17. if [ -d $folder ] && [ -d $folder/.git ]; then
  18. (cd $folder; git pull)
  19. else
  20. git clone $repo $folder
  21. # byteswap models if on big endian
  22. if [ "$(uname -m)" = s390x ]; then
  23. for f in $folder/*/*.gguf; do
  24. echo YES | python3 "$(dirname $0)/../gguf-py/gguf/scripts/gguf_convert_endian.py" $f big
  25. done
  26. fi
  27. fi
  28. shopt -s globstar
  29. for gguf in $folder/**/*.gguf; do
  30. if [ -f $gguf.inp ] && [ -f $gguf.out ]; then
  31. $toktest $gguf
  32. else
  33. printf "Found \"$gguf\" without matching inp/out files, ignoring...\n"
  34. fi
  35. done