test_basic.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import pytest
  2. from utils import *
  3. server = ServerPreset.tinyllama2()
  4. @pytest.fixture(scope="module", autouse=True)
  5. def create_server():
  6. global server
  7. server = ServerPreset.tinyllama2()
  8. def test_server_start_simple():
  9. global server
  10. server.start()
  11. res = server.make_request("GET", "/health")
  12. assert res.status_code == 200
  13. def test_server_props():
  14. global server
  15. server.start()
  16. res = server.make_request("GET", "/props")
  17. assert res.status_code == 200
  18. assert res.body["total_slots"] == server.n_slots
  19. def test_server_models():
  20. global server
  21. server.start()
  22. res = server.make_request("GET", "/models")
  23. assert res.status_code == 200
  24. assert len(res.body["data"]) == 1
  25. assert res.body["data"][0]["id"] == server.model_alias
  26. def test_load_split_model():
  27. global server
  28. server.model_hf_repo = "ggml-org/models"
  29. server.model_hf_file = "tinyllamas/split/stories15M-q8_0-00001-of-00003.gguf"
  30. server.model_alias = "tinyllama-split"
  31. server.start()
  32. res = server.make_request("POST", "/completion", data={
  33. "n_predict": 16,
  34. "prompt": "Hello",
  35. "temperature": 0.0,
  36. })
  37. assert res.status_code == 200
  38. assert match_regex("(little|girl)+", res.body["content"])