|
@@ -13,28 +13,28 @@ def test_infill_without_input_extra():
|
|
|
global server
|
|
global server
|
|
|
server.start()
|
|
server.start()
|
|
|
res = server.make_request("POST", "/infill", data={
|
|
res = server.make_request("POST", "/infill", data={
|
|
|
- "prompt": "Complete this",
|
|
|
|
|
- "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n int n_threads = llama_",
|
|
|
|
|
|
|
+ "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
|
|
|
|
|
+ "prompt": " int n_threads = llama_",
|
|
|
"input_suffix": "}\n",
|
|
"input_suffix": "}\n",
|
|
|
})
|
|
})
|
|
|
assert res.status_code == 200
|
|
assert res.status_code == 200
|
|
|
- assert match_regex("(One|day|she|saw|big|scary|bird)+", res.body["content"])
|
|
|
|
|
|
|
+ assert match_regex("(Ann|small|shiny)+", res.body["content"])
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_infill_with_input_extra():
|
|
def test_infill_with_input_extra():
|
|
|
global server
|
|
global server
|
|
|
server.start()
|
|
server.start()
|
|
|
res = server.make_request("POST", "/infill", data={
|
|
res = server.make_request("POST", "/infill", data={
|
|
|
- "prompt": "Complete this",
|
|
|
|
|
"input_extra": [{
|
|
"input_extra": [{
|
|
|
"filename": "llama.h",
|
|
"filename": "llama.h",
|
|
|
"text": "LLAMA_API int32_t llama_n_threads();\n"
|
|
"text": "LLAMA_API int32_t llama_n_threads();\n"
|
|
|
}],
|
|
}],
|
|
|
- "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n int n_threads = llama_",
|
|
|
|
|
|
|
+ "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
|
|
|
|
|
+ "prompt": " int n_threads = llama_",
|
|
|
"input_suffix": "}\n",
|
|
"input_suffix": "}\n",
|
|
|
})
|
|
})
|
|
|
assert res.status_code == 200
|
|
assert res.status_code == 200
|
|
|
- assert match_regex("(cuts|Jimmy|mom|came|into|the|room)+", res.body["content"])
|
|
|
|
|
|
|
+ assert match_regex("(Dad|excited|park)+", res.body["content"])
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("input_extra", [
|
|
@pytest.mark.parametrize("input_extra", [
|
|
@@ -48,10 +48,30 @@ def test_invalid_input_extra_req(input_extra):
|
|
|
global server
|
|
global server
|
|
|
server.start()
|
|
server.start()
|
|
|
res = server.make_request("POST", "/infill", data={
|
|
res = server.make_request("POST", "/infill", data={
|
|
|
- "prompt": "Complete this",
|
|
|
|
|
"input_extra": [input_extra],
|
|
"input_extra": [input_extra],
|
|
|
- "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n int n_threads = llama_",
|
|
|
|
|
|
|
+ "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
|
|
|
|
|
+ "prompt": " int n_threads = llama_",
|
|
|
"input_suffix": "}\n",
|
|
"input_suffix": "}\n",
|
|
|
})
|
|
})
|
|
|
assert res.status_code == 400
|
|
assert res.status_code == 400
|
|
|
assert "error" in res.body
|
|
assert "error" in res.body
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@pytest.mark.skipif(not is_slow_test_allowed(), reason="skipping slow test")
|
|
|
|
|
+def test_with_qwen_model():
|
|
|
|
|
+ global server
|
|
|
|
|
+ server.model_file = None
|
|
|
|
|
+ server.model_hf_repo = "ggml-org/Qwen2.5-Coder-1.5B-IQ3_XXS-GGUF"
|
|
|
|
|
+ server.model_hf_file = "qwen2.5-coder-1.5b-iq3_xxs-imat.gguf"
|
|
|
|
|
+ server.start(timeout_seconds=600)
|
|
|
|
|
+ res = server.make_request("POST", "/infill", data={
|
|
|
|
|
+ "input_extra": [{
|
|
|
|
|
+ "filename": "llama.h",
|
|
|
|
|
+ "text": "LLAMA_API int32_t llama_n_threads();\n"
|
|
|
|
|
+ }],
|
|
|
|
|
+ "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n",
|
|
|
|
|
+ "prompt": " int n_threads = llama_",
|
|
|
|
|
+ "input_suffix": "}\n",
|
|
|
|
|
+ })
|
|
|
|
|
+ assert res.status_code == 200
|
|
|
|
|
+ assert res.body["content"] == "n_threads();\n printf(\"Number of threads: %d\\n\", n_threads);\n return 0;\n"
|