| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- import pytest
- from utils import *
- server: ServerProcess
- @pytest.fixture(autouse=True)
- def create_server():
- global server
- server = ServerPreset.router()
- @pytest.mark.parametrize(
- "model,success",
- [
- ("ggml-org/tinygemma3-GGUF:Q8_0", True),
- ("non-existent/model", False),
- ]
- )
- def test_router_chat_completion_stream(model: str, success: bool):
- # TODO: make sure the model is in cache (ie. ServerProcess.load_all()) before starting the router server
- global server
- server.start()
- content = ""
- ex: ServerError | None = None
- try:
- res = server.make_stream_request("POST", "/chat/completions", data={
- "model": model,
- "max_tokens": 16,
- "messages": [
- {"role": "user", "content": "hello"},
- ],
- "stream": True,
- })
- for data in res:
- if data["choices"]:
- choice = data["choices"][0]
- if choice["finish_reason"] in ["stop", "length"]:
- assert "content" not in choice["delta"]
- else:
- assert choice["finish_reason"] is None
- content += choice["delta"]["content"] or ''
- except ServerError as e:
- ex = e
- if success:
- assert ex is None
- assert len(content) > 0
- else:
- assert ex is not None
- assert content == ""
|