test_router.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import pytest
  2. from utils import *
  3. server: ServerProcess
  4. @pytest.fixture(autouse=True)
  5. def create_server():
  6. global server
  7. server = ServerPreset.router()
  8. @pytest.mark.parametrize(
  9. "model,success",
  10. [
  11. ("ggml-org/tinygemma3-GGUF:Q8_0", True),
  12. ("non-existent/model", False),
  13. ]
  14. )
  15. def test_router_chat_completion_stream(model: str, success: bool):
  16. # TODO: make sure the model is in cache (ie. ServerProcess.load_all()) before starting the router server
  17. global server
  18. server.start()
  19. content = ""
  20. ex: ServerError | None = None
  21. try:
  22. res = server.make_stream_request("POST", "/chat/completions", data={
  23. "model": model,
  24. "max_tokens": 16,
  25. "messages": [
  26. {"role": "user", "content": "hello"},
  27. ],
  28. "stream": True,
  29. })
  30. for data in res:
  31. if data["choices"]:
  32. choice = data["choices"][0]
  33. if choice["finish_reason"] in ["stop", "length"]:
  34. assert "content" not in choice["delta"]
  35. else:
  36. assert choice["finish_reason"] is None
  37. content += choice["delta"]["content"] or ''
  38. except ServerError as e:
  39. ex = e
  40. if success:
  41. assert ex is None
  42. assert len(content) > 0
  43. else:
  44. assert ex is not None
  45. assert content == ""