test_bench.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. import pytest
  2. import subprocess
  3. import sys
  4. tmp_path='/data/local/tmp'
  5. pkg_path=f'{tmp_path}/llama.cpp'
  6. lib_path=f'{pkg_path}/lib'
  7. bin_path=f'{pkg_path}/bin'
  8. model='../gguf/Llama-3.2-1B-Instruct-Q4_0.gguf'
  9. cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_path} {bin_path}'
  10. def run_cmd(cmd):
  11. p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
  12. sys.stdout.write(p.stdout)
  13. assert(p.returncode == 0)
  14. @pytest.mark.dependency()
  15. def test_install():
  16. run_cmd(['adb', 'push', 'llama.cpp', f'{tmp_path}'])
  17. run_cmd(['adb', 'shell', f'chmod 755 {bin_path}/*'])
  18. ## Basic cli tests
  19. def run_llama_cli(dev, opts):
  20. prompt='what is the most popular cookie in the world?\nPlease provide a very brief bullet point summary.\nBegin your answer with **BEGIN**.'
  21. opts = '--batch-size 128 -n 128 -no-cnv --seed 42 ' + opts
  22. run_cmd(['adb', 'shell', f'{cli_pref}/llama-cli -m {model} --device {dev} -ngl 99 -t 4 {opts} -p "{prompt}"'])
  23. @pytest.mark.dependency(depends=['test_install'])
  24. def test_llama_cli_cpu():
  25. run_llama_cli('none', '-ctk q8_0 -ctv q8_0 -fa on')
  26. @pytest.mark.dependency(depends=['test_install'])
  27. def test_llama_cli_gpu():
  28. run_llama_cli('GPUOpenCL', '-fa on')
  29. @pytest.mark.dependency(depends=['test_install'])
  30. def test_llama_cli_npu():
  31. run_llama_cli('HTP0', '-ctk q8_0 -ctv q8_0 -fa on')
  32. ## Basic bench tests
  33. def run_llama_bench(dev):
  34. run_cmd(['adb', 'shell', f'{cli_pref}/llama-bench -m {model} --device {dev} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32'])
  35. @pytest.mark.dependency(depends=['test_install'])
  36. def test_llama_bench_cpu():
  37. run_llama_bench('none')
  38. def test_llama_bench_gpu():
  39. run_llama_bench('GPUOpenCL')
  40. def test_llama_bench_npu():
  41. run_llama_bench('HTP0')