1 week ago · fbbf3ad190
--- a/requirements/requirements-tool_bench.txt
+++ b/requirements/requirements-tool_bench.txt
@@ -3,7 +3,7 @@ pytest~=8.3.3
 
															 huggingface_hub>=0.34.0,<1.0
														
 
															 matplotlib~=3.10.0
														
 
															 numpy~=1.26.4
														
 
															-openai~=1.55.3
														
 
															+openai~=2.14.0
														
 
															 pandas~=2.2.3
														
 
															 prometheus-client~=0.20.0
														
 
															 requests~=2.32.3
														
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -6,7 +6,7 @@ Set of LLM REST APIs and a web UI to interact with llama.cpp.
 
															 **Features:**
														
 
															  * LLM inference of F16 and quantized models on GPU and CPU
														
 
															- * [OpenAI API](https://github.com/openai/openai-openapi) compatible chat completions and embeddings routes
														
 
															+ * [OpenAI API](https://github.com/openai/openai-openapi) compatible chat completions, responses, and embeddings routes
														
 
															  * [Anthropic Messages API](https://docs.anthropic.com/en/api/messages) compatible chat completions
														
 
															  * Reranking endpoint (https://github.com/ggml-org/llama.cpp/pull/9510)
														
 
															  * Parallel decoding with multi-user support
														
@@ -1267,6 +1267,49 @@ This provides information on the performance of the server. It also allows calcu
 
															 The total number of tokens in context is equal to `prompt_n + cache_n + predicted_n`
														
 
															+### POST `/v1/responses`: OpenAI-compatible Responses API
														
 
															+
														
 
															+*Options:*
														
 
															+
														
 
															+See [OpenAI Responses API documentation](https://platform.openai.com/docs/api-reference/responses).
														
 
															+
														
 
															+*Examples:*
														
 
															+
														
 
															+You can use either Python `openai` library with appropriate checkpoints:
														
 
															+
														
 
															+```python
														
 
															+import openai
														
 
															+
														
 
															+client = openai.OpenAI(
														
 
															+    base_url="http://localhost:8080/v1", # "http://<Your api-server IP>:port"
														
 
															+    api_key = "sk-no-key-required"
														
 
															+)
														
 
															+
														
 
															+response = client.responses.create(
														
 
															+  model="gpt-4.1",
														
 
															+  instructions="You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests.",
														
 
															+  input="Write a limerick about python exceptions"
														
 
															+)
														
 
															+
														
 
															+print(response.output_text)
														
 
															+```
														
 
															+
														
 
															+... or raw HTTP requests:
														
 
															+
														
 
															+```shell
														
 
															+curl http://localhost:8080/v1/responses \
														
 
															+-H "Content-Type: application/json" \
														
 
															+-H "Authorization: Bearer no-key" \
														
 
															+-d '{
														
 
															+"model": "gpt-4.1",
														
 
															+"instructions": "You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests.",
														
 
															+"input": "Write a limerick about python exceptions"
														
 
															+}'
														
 
															+```
														
 
															+
														
 
															+This endpoint works by converting Responses request into Chat Completions request.
														
 
															+
														
 
															+
														
 
															 ### POST `/v1/embeddings`: OpenAI-compatible embeddings API
														
 
															 This endpoint requires that the model uses a pooling different than type `none`. The embeddings are normalized using the Eucledian norm.
														
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@@ -1069,6 +1069,283 @@ json oaicompat_chat_params_parse(
 
															     return llama_params;
														
 
															 }
														
 
															+json convert_responses_to_chatcmpl(const json & response_body) {
														
 
															+    if (!response_body.contains("input")) {
														
 
															+        throw std::invalid_argument("'input' is required");
														
 
															+    }
														
 
															+    if (!json_value(response_body, "previous_response_id", std::string{}).empty()) {
														
 
															+        throw std::invalid_argument("llama.cpp does not support 'previous_response_id'.");
														
 
															+    }
														
 
															+
														
 
															+    const json input_value = response_body.at("input");
														
 
															+    json chatcmpl_body = response_body;
														
 
															+    chatcmpl_body.erase("input");
														
 
															+    std::vector<json> chatcmpl_messages;
														
 
															+
														
 
															+    if (response_body.contains("instructions")) {
														
 
															+        chatcmpl_messages.push_back({
														
 
															+            {"role",    "system"},
														
 
															+            {"content", json_value(response_body, "instructions", std::string())},
														
 
															+        });
														
 
															+        chatcmpl_body.erase("instructions");
														
 
															+    }
														
 
															+
														
 
															+    if (input_value.is_string()) {
														
 
															+        // #responses_create-input-text_input
														
 
															+        chatcmpl_messages.push_back({
														
 
															+            {"role",    "user"},
														
 
															+            {"content", input_value},
														
 
															+        });
														
 
															+    } else if (input_value.is_array()) {
														
 
															+        // #responses_create-input-input_item_list
														
 
															+
														
 
															+        static auto exists_and_is_array = [](const json & j, const char * key) -> bool {
														
 
															+            return j.contains(key) && j.at(key).is_array();
														
 
															+        };
														
 
															+        static auto exists_and_is_string = [](const json & j, const char * key) -> bool {
														
 
															+            return j.contains(key) && j.at(key).is_string();
														
 
															+        };
														
 
															+
														
 
															+        for (json item : input_value) {
														
 
															+            if (exists_and_is_string(item, "content")) {
														
 
															+                // #responses_create-input-input_item_list-input_message-content-text_input
														
 
															+                // Only "Input message" contains item["content"]::string
														
 
															+                // After converting item["content"]::string to item["content"]::array,
														
 
															+                // we can treat "Input message" as sum of "Item-Input message" and "Item-Output message"
														
 
															+                item["content"] = json::array({
														
 
															+                    json {
														
 
															+                        {"text", item.at("content")},
														
 
															+                        {"type", "input_text"}
														
 
															+                    }
														
 
															+                });
														
 
															+            }
														
 
															+
														
 
															+            if (exists_and_is_array(item, "content") &&
														
 
															+                exists_and_is_string(item, "role") &&
														
 
															+                (item.at("role") == "user" ||
														
 
															+                    item.at("role") == "system" ||
														
 
															+                    item.at("role") == "developer")
														
 
															+            ) {
														
 
															+                // #responses_create-input-input_item_list-item-input_message
														
 
															+                std::vector<json> chatcmpl_content;
														
 
															+
														
 
															+                for (const json & input_item : item.at("content")) {
														
 
															+                    const std::string type = json_value(input_item, "type", std::string());
														
 
															+
														
 
															+                    if (type == "input_text") {
														
 
															+                        if (!input_item.contains("text")) {
														
 
															+                            throw std::invalid_argument("'Input text' requires 'text'");
														
 
															+                        }
														
 
															+                        chatcmpl_content.push_back({
														
 
															+                            {"text", input_item.at("text")},
														
 
															+                            {"type", "text"},
														
 
															+                        });
														
 
															+                    } else if (type == "input_image") {
														
 
															+                        // While `detail` is marked as required,
														
 
															+                        // it has default value("auto") and can be omitted.
														
 
															+
														
 
															+                        if (!input_item.contains("image_url")) {
														
 
															+                            throw std::invalid_argument("'image_url' is required");
														
 
															+                        }
														
 
															+                        chatcmpl_content.push_back({
														
 
															+                            {"image_url", json {
														
 
															+                                {"url", input_item.at("image_url")}
														
 
															+                            }},
														
 
															+                            {"type", "image_url"},
														
 
															+                        });
														
 
															+                    } else if (type == "input_file") {
														
 
															+                        throw std::invalid_argument("'input_file' is not supported by llamacpp at this moment");
														
 
															+                        // if (input_item.contains("file_url")) {
														
 
															+                        //     // chat completion API does not support file_url
														
 
															+                        //     throw std::invalid_argument("'file_url' is not supported");
														
 
															+                        // }
														
 
															+                        // if (!input_item.contains("file_data") || !input_item.contains("filename")) {
														
 
															+                        //     throw std::invalid_argument("Both 'file_data' and 'filename' are required");
														
 
															+                        // }
														
 
															+                        // chatcmpl_content.push_back({
														
 
															+                        //     {"file", json {
														
 
															+                        //         {"file_data", input_item.at("file_data")},
														
 
															+                        //         {"filename",  input_item.at("filename")},
														
 
															+                        //     }},
														
 
															+                        //     {"type", "file"},
														
 
															+                        // });
														
 
															+                    } else {
														
 
															+                        throw std::invalid_argument("'type' must be one of 'input_text', 'input_image', or 'input_file'");
														
 
															+                    }
														
 
															+                }
														
 
															+
														
 
															+                if (item.contains("type")) {
														
 
															+                    item.erase("type");
														
 
															+                }
														
 
															+                if (item.contains("status")) {
														
 
															+                    item.erase("status");
														
 
															+                }
														
 
															+                item["content"] = chatcmpl_content;
														
 
															+
														
 
															+                chatcmpl_messages.push_back(item);
														
 
															+            } else if (exists_and_is_array(item, "content") &&
														
 
															+                exists_and_is_string(item, "role") &&
														
 
															+                item.at("role") == "assistant" &&
														
 
															+                // exists_and_is_string(item, "status") &&
														
 
															+                // (item.at("status") == "in_progress" ||
														
 
															+                //     item.at("status") == "completed" ||
														
 
															+                //     item.at("status") == "incomplete") &&
														
 
															+                // item["status"] not sent by codex-cli
														
 
															+                exists_and_is_string(item, "type") &&
														
 
															+                item.at("type") == "message"
														
 
															+            ) {
														
 
															+                // #responses_create-input-input_item_list-item-output_message
														
 
															+                std::vector<json> chatcmpl_content;
														
 
															+
														
 
															+                for (const auto & output_text : item.at("content")) {
														
 
															+                    const std::string type = json_value(output_text, "type", std::string());
														
 
															+                    if (type != "output_text") {
														
 
															+                        throw std::invalid_argument("'type' must be 'output_text'");
														
 
															+                    }
														
 
															+                    if (!exists_and_is_string(output_text, "text")) {
														
 
															+                        throw std::invalid_argument("'Output text' requires 'text'");
														
 
															+                    }
														
 
															+                    // Ignore annotations and logprobs for now
														
 
															+                    chatcmpl_content.push_back({
														
 
															+                        {"text", output_text.at("text")},
														
 
															+                        {"type", "text"},
														
 
															+                    });
														
 
															+                }
														
 
															+
														
 
															+                item.erase("status");
														
 
															+                item.erase("type");
														
 
															+                item["content"] = chatcmpl_content;
														
 
															+                chatcmpl_messages.push_back(item);
														
 
															+            } else if (exists_and_is_string(item, "arguments") &&
														
 
															+                exists_and_is_string(item, "call_id") &&
														
 
															+                exists_and_is_string(item, "name") &&
														
 
															+                exists_and_is_string(item, "type") &&
														
 
															+                item.at("type") == "function_call"
														
 
															+            ) {
														
 
															+                // #responses_create-input-input_item_list-item-function_tool_call
														
 
															+                json msg = json {
														
 
															+                    {"role", "assistant"},
														
 
															+                    {"tool_calls", json::array({ json {
														
 
															+                        {"function", json {
														
 
															+                            {"arguments", item.at("arguments")},
														
 
															+                            {"name",      item.at("name")},
														
 
															+                        }},
														
 
															+                        {"id",   item.at("call_id")},
														
 
															+                        {"type", "function"},
														
 
															+                    }})},
														
 
															+                };
														
 
															+
														
 
															+                if (!chatcmpl_messages.empty() && chatcmpl_messages.back().contains("reasoning_content")) {
														
 
															+                    // Move reasoning content from dummy message to tool call message
														
 
															+                    msg["reasoning_content"] = chatcmpl_messages.back().at("reasoning_content");
														
 
															+                    chatcmpl_messages.pop_back();
														
 
															+                }
														
 
															+                chatcmpl_messages.push_back(msg);
														
 
															+            } else if (exists_and_is_string(item, "call_id") &&
														
 
															+                (exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) &&
														
 
															+                exists_and_is_string(item, "type") &&
														
 
															+                item.at("type") == "function_call_output"
														
 
															+            ) {
														
 
															+                // #responses_create-input-input_item_list-item-function_tool_call_output
														
 
															+                if (item.at("output").is_string()) {
														
 
															+                    chatcmpl_messages.push_back(json {
														
 
															+                        {"content",      item.at("output")},
														
 
															+                        {"role",         "tool"},
														
 
															+                        {"tool_call_id", item.at("call_id")},
														
 
															+                    });
														
 
															+                } else {
														
 
															+                    json chatcmpl_outputs = item.at("output");
														
 
															+                    for (json & chatcmpl_output : chatcmpl_outputs) {
														
 
															+                        if (!chatcmpl_output.contains("type") || chatcmpl_output.at("type") != "input_text") {
														
 
															+                            throw std::invalid_argument("Output of tool call should be 'Input text'");
														
 
															+                        }
														
 
															+                        chatcmpl_output["type"] = "text";
														
 
															+                    }
														
 
															+                    chatcmpl_messages.push_back(json {
														
 
															+                        {"content",      chatcmpl_outputs},
														
 
															+                        {"role",         "tool"},
														
 
															+                        {"tool_call_id", item.at("call_id")},
														
 
															+                    });
														
 
															+                }
														
 
															+            } else if (// exists_and_is_string(item, "id") &&
														
 
															+                // item["id"] not sent by codex-cli
														
 
															+                exists_and_is_array(item, "summary") &&
														
 
															+                exists_and_is_string(item, "type") &&
														
 
															+                item.at("type") == "reasoning") {
														
 
															+                // #responses_create-input-input_item_list-item-reasoning
														
 
															+
														
 
															+                if (!exists_and_is_array(item, "content")) {
														
 
															+                    throw std::invalid_argument("item['content'] is not an array");
														
 
															+                }
														
 
															+                if (item.at("content").empty()) {
														
 
															+                    throw std::invalid_argument("item['content'] is empty");
														
 
															+                }
														
 
															+                if (!exists_and_is_string(item.at("content")[0], "text")) {
														
 
															+                    throw std::invalid_argument("item['content']['text'] is not a string");
														
 
															+                }
														
 
															+
														
 
															+                // Pack reasoning content in dummy message
														
 
															+                chatcmpl_messages.push_back(json {
														
 
															+                    {"role", "assistant"},
														
 
															+                    {"content", json::array()},
														
 
															+                    {"reasoning_content", item.at("content")[0].at("text")},
														
 
															+                });
														
 
															+            } else {
														
 
															+                throw std::invalid_argument("Cannot determine type of 'item'");
														
 
															+            }
														
 
															+        }
														
 
															+    } else {
														
 
															+        throw std::invalid_argument("'input' must be a string or array of objects");
														
 
															+    }
														
 
															+
														
 
															+    // Remove unused dummy message which contains
														
 
															+    // reasoning content not followed by tool call
														
 
															+    chatcmpl_messages.erase(std::remove_if(
														
 
															+        chatcmpl_messages.begin(),
														
 
															+        chatcmpl_messages.end(),
														
 
															+        [](const json & x){ return x.contains("role") &&
														
 
															+            x.at("role") == "assistant" &&
														
 
															+            x.contains("content") &&
														
 
															+            x.at("content") == json::array() &&
														
 
															+            x.contains("reasoning_content");
														
 
															+        }),
														
 
															+        chatcmpl_messages.end()
														
 
															+    );
														
 
															+
														
 
															+    chatcmpl_body["messages"] = chatcmpl_messages;
														
 
															+
														
 
															+    if (response_body.contains("tools")) {
														
 
															+        if (!response_body.at("tools").is_array()) {
														
 
															+            throw std::invalid_argument("'tools' must be an array of objects");
														
 
															+        }
														
 
															+        std::vector<json> chatcmpl_tools;
														
 
															+        for (json resp_tool : response_body.at("tools")) {
														
 
															+            json chatcmpl_tool;
														
 
															+
														
 
															+            if (json_value(resp_tool, "type", std::string()) != "function") {
														
 
															+                throw std::invalid_argument("'type' of tool must be 'function'");
														
 
															+            }
														
 
															+            resp_tool.erase("type");
														
 
															+            chatcmpl_tool["type"] = "function";
														
 
															+
														
 
															+            if (!resp_tool.contains("strict")) {
														
 
															+                resp_tool["strict"] = true;
														
 
															+            }
														
 
															+            chatcmpl_tool["function"] = resp_tool;
														
 
															+            chatcmpl_tools.push_back(chatcmpl_tool);
														
 
															+        }
														
 
															+        chatcmpl_body.erase("tools");
														
 
															+        chatcmpl_body["tools"] = chatcmpl_tools;
														
 
															+    }
														
 
															+
														
 
															+    if (response_body.contains("max_output_tokens")) {
														
 
															+        chatcmpl_body.erase("max_output_tokens");
														
 
															+        chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
														
 
															+    }
														
 
															+
														
 
															+    return chatcmpl_body;
														
 
															+}
														
 
															+
														
 
															 json convert_anthropic_to_oai(const json & body) {
														
 
															     json oai_body;
														
@@ -1482,6 +1759,24 @@ std::string format_oai_sse(const json & data) {
 
															     return ss.str();
														
 
															 }
														
 
															+std::string format_oai_resp_sse(const json & data) {
														
 
															+    std::ostringstream ss;
														
 
															+    auto send_single = [&ss](const json & event_obj) {
														
 
															+        ss << "event: " << event_obj.at("event").get<std::string>() << "\n";
														
 
															+        ss << "data: " << safe_json_to_str(event_obj.at("data")) << "\n\n";
														
 
															+    };
														
 
															+
														
 
															+    if (data.is_array()) {
														
 
															+        for (const auto & item : data) {
														
 
															+            send_single(item);
														
 
															+        }
														
 
															+    } else {
														
 
															+        send_single(data);
														
 
															+    }
														
 
															+
														
 
															+    return ss.str();
														
 
															+}
														
 
															+
														
 
															 std::string format_anthropic_sse(const json & data) {
														
 
															     std::ostringstream ss;
														
--- a/tools/server/server-common.h
+++ b/tools/server/server-common.h
@@ -294,6 +294,9 @@ json oaicompat_chat_params_parse(
 
															     const server_chat_params & opt,
														
 
															     std::vector<raw_buffer> & out_files);
														
 
															+// convert OpenAI Responses API format to OpenAI Chat Completions API format
														
 
															+json convert_responses_to_chatcmpl(const json & body);
														
 
															+
														
 
															 // convert Anthropic Messages API format to OpenAI Chat Completions API format
														
 
															 json convert_anthropic_to_oai(const json & body);
														
@@ -331,6 +334,8 @@ std::string tokens_to_output_formatted_string(const llama_context * ctx, const l
 
															 // note: if data is a json array, it will be sent as multiple events, one per item
														
 
															 std::string format_oai_sse(const json & data);
														
 
															+std::string format_oai_resp_sse(const json & data);
														
 
															+
														
 
															 // format Anthropic-style SSE with event types
														
 
															 std::string format_anthropic_sse(const json & data);
														
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -3073,6 +3073,8 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
 
															         json first_result_json = first_result->to_json();
														
 
															         if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
														
 
															             res->data = format_anthropic_sse(first_result_json);
														
 
															+        } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
														
 
															+            res->data = format_oai_resp_sse(first_result_json);
														
 
															         } else {
														
 
															             res->data = format_oai_sse(first_result_json);
														
 
															         }
														
@@ -3107,13 +3109,16 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
 
															                 // check if there is more data
														
 
															                 if (!rd.has_next()) {
														
 
															-                    if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
														
 
															-                        // Anthropic doesn't send [DONE], message_stop was already sent
														
 
															-                        output = "";
														
 
															-                    } else if (res_type != TASK_RESPONSE_TYPE_NONE) {
														
 
															-                        output = "data: [DONE]\n\n";
														
 
															-                    } else {
														
 
															-                        output = "";
														
 
															+                    switch (res_type) {
														
 
															+                        case TASK_RESPONSE_TYPE_NONE:
														
 
															+                        case TASK_RESPONSE_TYPE_OAI_RESP:
														
 
															+                        case TASK_RESPONSE_TYPE_ANTHROPIC:
														
 
															+                            output = "";
														
 
															+                            break;
														
 
															+
														
 
															+                        default:
														
 
															+                            output = "data: [DONE]\n\n";
														
 
															+                            break;
														
 
															                     }
														
 
															                     SRV_DBG("%s", "all results received, terminating stream\n");
														
 
															                     return false; // no more data, terminate
														
@@ -3141,6 +3146,8 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
 
															                     json res_json = result->to_json();
														
 
															                     if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
														
 
															                         output = format_anthropic_sse(res_json);
														
 
															+                    } else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
														
 
															+                        output = format_oai_resp_sse(res_json);
														
 
															                     } else {
														
 
															                         output = format_oai_sse(res_json);
														
 
															                     }
														
@@ -3575,6 +3582,22 @@ void server_routes::init_routes() {
 
															             TASK_RESPONSE_TYPE_OAI_CHAT);
														
 
															     };
														
 
															+    this->post_responses_oai = [this](const server_http_req & req) {
														
 
															+        auto res = create_response();
														
 
															+        std::vector<raw_buffer> files;
														
 
															+        json body = convert_responses_to_chatcmpl(json::parse(req.body));
														
 
															+        json body_parsed = oaicompat_chat_params_parse(
														
 
															+            body,
														
 
															+            meta->chat_params,
														
 
															+            files);
														
 
															+        return handle_completions_impl(
														
 
															+            req,
														
 
															+            SERVER_TASK_TYPE_COMPLETION,
														
 
															+            body_parsed,
														
 
															+            files,
														
 
															+            TASK_RESPONSE_TYPE_OAI_RESP);
														
 
															+    };
														
 
															+
														
 
															     this->post_anthropic_messages = [this](const server_http_req & req) {
														
 
															         auto res = create_response();
														
 
															         std::vector<raw_buffer> files;
														
--- a/tools/server/server-context.h
+++ b/tools/server/server-context.h
@@ -94,6 +94,7 @@ struct server_routes {
 
															     server_http_context::handler_t post_completions;
														
 
															     server_http_context::handler_t post_completions_oai;
														
 
															     server_http_context::handler_t post_chat_completions;
														
 
															+    server_http_context::handler_t post_responses_oai;
														
 
															     server_http_context::handler_t post_anthropic_messages;
														
 
															     server_http_context::handler_t post_anthropic_count_tokens;
														
 
															     server_http_context::handler_t post_apply_template;
														
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -584,6 +584,8 @@ json server_task_result_cmpl_final::to_json() {
 
															             return to_json_oaicompat();
														
 
															         case TASK_RESPONSE_TYPE_OAI_CHAT:
														
 
															             return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat();
														
 
															+        case TASK_RESPONSE_TYPE_OAI_RESP:
														
 
															+            return stream ? to_json_oaicompat_resp_stream() : to_json_oaicompat_resp();
														
 
															         case TASK_RESPONSE_TYPE_ANTHROPIC:
														
 
															             return stream ? to_json_anthropic_stream() : to_json_anthropic();
														
 
															         default:
														
@@ -801,6 +803,186 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
 
															     return deltas;
														
 
															 }
														
 
															+json server_task_result_cmpl_final::to_json_oaicompat_resp() {
														
 
															+    common_chat_msg msg;
														
 
															+    if (!oaicompat_msg.empty()) {
														
 
															+        msg = oaicompat_msg;
														
 
															+    } else {
														
 
															+        msg.role = "assistant";
														
 
															+        msg.content = content;
														
 
															+    }
														
 
															+
														
 
															+    std::vector<json> output;
														
 
															+
														
 
															+    if (msg.reasoning_content != "") {
														
 
															+        output.push_back(json {
														
 
															+            {"id",      "rs_" + random_string()},
														
 
															+            {"summary", json::array()},
														
 
															+            {"type",    "reasoning"},
														
 
															+            {"content", json::array({ json {
														
 
															+                {"text", msg.reasoning_content},
														
 
															+                {"type", "reasoning_text"},
														
 
															+            }})},
														
 
															+            {"encrypted_content", ""},
														
 
															+            {"status",            "completed"},
														
 
															+        });
														
 
															+    }
														
 
															+
														
 
															+    if (msg.content != "") {
														
 
															+        output.push_back(json {
														
 
															+            {"content", json::array({ json {
														
 
															+                {"type",        "output_text"},
														
 
															+                {"annotations", json::array()},
														
 
															+                {"logprobs",    json::array()},
														
 
															+                {"text",        msg.content},
														
 
															+            }})},
														
 
															+            {"id",     "msg_" + random_string()},
														
 
															+            {"role",   msg.role},
														
 
															+            {"status", "completed"},
														
 
															+            {"type",   "message"},
														
 
															+        });
														
 
															+    }
														
 
															+
														
 
															+    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
														
 
															+        output.push_back(json {
														
 
															+            {"type",      "function_call"},
														
 
															+            {"status",    "completed"},
														
 
															+            {"arguments", tool_call.arguments},
														
 
															+            {"call_id",   "fc_" + tool_call.id},
														
 
															+            {"name",      tool_call.name},
														
 
															+        });
														
 
															+    }
														
 
															+
														
 
															+    std::time_t t = std::time(0);
														
 
															+    json res = {
														
 
															+        {"completed_at", t},
														
 
															+        {"created_at",   t},
														
 
															+        {"id",           oai_resp_id},
														
 
															+        {"model",        oaicompat_model},
														
 
															+        {"object",       "response"},
														
 
															+        {"output",       output},
														
 
															+        {"status",       "completed"},
														
 
															+        {"usage",        json {
														
 
															+            {"input_tokens",  n_prompt_tokens},
														
 
															+            {"output_tokens", n_decoded},
														
 
															+            {"total_tokens",  n_decoded + n_prompt_tokens},
														
 
															+        }},
														
 
															+    };
														
 
															+
														
 
															+    return res;
														
 
															+}
														
 
															+
														
 
															+json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
														
 
															+    std::vector<json> server_sent_events;
														
 
															+    std::vector<json> output;
														
 
															+
														
 
															+    if (oaicompat_msg.reasoning_content != "") {
														
 
															+        const json output_item = json {
														
 
															+            {"id",      oai_resp_reasoning_id},
														
 
															+            {"summary", json::array()},
														
 
															+            {"type",    "reasoning"},
														
 
															+            {"content", json::array({ json {
														
 
															+                {"text", oaicompat_msg.reasoning_content},
														
 
															+                {"type", "reasoning_text"},
														
 
															+            }})},
														
 
															+            {"encrypted_content", ""},
														
 
															+        };
														
 
															+
														
 
															+        server_sent_events.push_back(json {
														
 
															+            {"event", "response.output_item.done"},
														
 
															+            {"data", json {
														
 
															+                {"type", "response.output_item.done"},
														
 
															+                {"item", output_item}
														
 
															+            }}
														
 
															+        });
														
 
															+        output.push_back(output_item);
														
 
															+    }
														
 
															+
														
 
															+    if (oaicompat_msg.content != "") {
														
 
															+        server_sent_events.push_back(json {
														
 
															+            {"event", "response.output_text.done"},
														
 
															+            {"data", json {
														
 
															+                {"type",    "response.output_text.done"},
														
 
															+                {"item_id", oai_resp_message_id},
														
 
															+                {"text",    oaicompat_msg.content}
														
 
															+            }}
														
 
															+        });
														
 
															+
														
 
															+        const json content_part = {
														
 
															+            {"type",        "output_text"},
														
 
															+            {"annotations", json::array()},
														
 
															+            {"logprobs",    json::array()},
														
 
															+            {"text",        oaicompat_msg.content}
														
 
															+        };
														
 
															+
														
 
															+        server_sent_events.push_back(json {
														
 
															+            {"event", "response.content_part.done"},
														
 
															+            {"data", json {
														
 
															+                {"type",    "response.content_part.done"},
														
 
															+                {"item_id", oai_resp_message_id},
														
 
															+                {"part",    content_part}
														
 
															+            }}
														
 
															+        });
														
 
															+        const json output_item = {
														
 
															+            {"type",    "message"},
														
 
															+            {"status",  "completed"},
														
 
															+            {"id",      oai_resp_message_id},
														
 
															+            {"content", json::array({content_part})},
														
 
															+            {"role",    "assistant"}
														
 
															+        };
														
 
															+
														
 
															+        server_sent_events.push_back(json {
														
 
															+            {"event", "response.output_item.done"},
														
 
															+            {"data", json {
														
 
															+                {"type", "response.output_item.done"},
														
 
															+                {"item", output_item}
														
 
															+            }}
														
 
															+        });
														
 
															+        output.push_back(output_item);
														
 
															+    }
														
 
															+
														
 
															+    for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
														
 
															+        const json output_item = {
														
 
															+            {"type",      "function_call"},
														
 
															+            {"status",    "completed"},
														
 
															+            {"arguments", tool_call.arguments},
														
 
															+            {"call_id",   "fc_" + tool_call.id},
														
 
															+            {"name",      tool_call.name}
														
 
															+        };
														
 
															+        server_sent_events.push_back(json {
														
 
															+            {"event", "response.output_item.done"},
														
 
															+            {"data", json {
														
 
															+                {"type", "response.output_item.done"},
														
 
															+                {"item", output_item}
														
 
															+            }}
														
 
															+        });
														
 
															+        output.push_back(output_item);
														
 
															+    }
														
 
															+
														
 
															+    std::time_t t = std::time(0);
														
 
															+    server_sent_events.push_back(json {
														
 
															+        {"event", "response.completed"},
														
 
															+        {"data", json {
														
 
															+            {"type", "response.completed"},
														
 
															+            {"response", json {
														
 
															+                {"id",         oai_resp_id},
														
 
															+                {"object",     "response"},
														
 
															+                {"created_at", t},
														
 
															+                {"status",     "completed"},
														
 
															+                {"model",      oaicompat_model},
														
 
															+                {"output",     output},
														
 
															+                {"usage",      json {
														
 
															+                    {"input_tokens",  n_prompt_tokens},
														
 
															+                    {"output_tokens", n_decoded},
														
 
															+                    {"total_tokens",  n_decoded + n_prompt_tokens}
														
 
															+                }}
														
 
															+            }},
														
 
															+        }}
														
 
															+    });
														
 
															+
														
 
															+    return server_sent_events;
														
 
															+}
														
 
															+
														
 
															 json server_task_result_cmpl_final::to_json_anthropic() {
														
 
															     std::string stop_reason = "max_tokens";
														
 
															     if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
														
@@ -1057,6 +1239,36 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() {
 
															 //
														
 
															 // server_task_result_cmpl_partial
														
 
															 //
														
 
															+void server_task_result_cmpl_partial::update(task_result_state & state) {
														
 
															+    is_updated = true;
														
 
															+    state.update_chat_msg(content, true, oaicompat_msg_diffs);
														
 
															+
														
 
															+    // Copy current state for use in to_json_*() (reflects state BEFORE this chunk)
														
 
															+    thinking_block_started = state.thinking_block_started;
														
 
															+    text_block_started     = state.text_block_started;
														
 
															+
														
 
															+    oai_resp_id            = state.oai_resp_id;
														
 
															+    oai_resp_reasoning_id  = state.oai_resp_reasoning_id;
														
 
															+    oai_resp_message_id    = state.oai_resp_message_id;
														
 
															+    oai_resp_fc_id         = state.oai_resp_fc_id;
														
 
															+
														
 
															+    // track if the accumulated message has any reasoning content
														
 
															+    anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
														
 
															+
														
 
															+    // Pre-compute state updates based on diffs (for next chunk)
														
 
															+    for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
														
 
															+        if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) {
														
 
															+            state.thinking_block_started = true;
														
 
															+        }
														
 
															+        if (!diff.content_delta.empty() && !state.text_block_started) {
														
 
															+            state.text_block_started = true;
														
 
															+        }
														
 
															+        if (!diff.tool_call_delta.name.empty()) {
														
 
															+            state.oai_resp_fc_id = diff.tool_call_delta.id;
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															 json server_task_result_cmpl_partial::to_json() {
														
 
															     GGML_ASSERT(is_updated && "update() must be called before to_json()");
														
 
															     switch (res_type) {
														
@@ -1066,6 +1278,8 @@ json server_task_result_cmpl_partial::to_json() {
 
															             return to_json_oaicompat();
														
 
															         case TASK_RESPONSE_TYPE_OAI_CHAT:
														
 
															             return to_json_oaicompat_chat();
														
 
															+        case TASK_RESPONSE_TYPE_OAI_RESP:
														
 
															+            return to_json_oaicompat_resp();
														
 
															         case TASK_RESPONSE_TYPE_ANTHROPIC:
														
 
															             return to_json_anthropic();
														
 
															         default:
														
@@ -1190,6 +1404,132 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
 
															     return deltas;
														
 
															 }
														
 
															+json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
														
 
															+    std::vector<json> events;
														
 
															+
														
 
															+    if (n_decoded == 1) {
														
 
															+        events.push_back(json {
														
 
															+            {"event", "response.created"},
														
 
															+            {"data", json {
														
 
															+                {"type", "response.created"},
														
 
															+                {"response", json {
														
 
															+                    {"id",     oai_resp_id},
														
 
															+                    {"object", "response"},
														
 
															+                    {"status", "in_progress"},
														
 
															+                }},
														
 
															+            }},
														
 
															+        });
														
 
															+        events.push_back(json {
														
 
															+            {"event", "response.in_progress"},
														
 
															+            {"data", json {
														
 
															+                {"type", "response.in_progress"},
														
 
															+                {"response", json {
														
 
															+                    {"id",     oai_resp_id},
														
 
															+                    {"object", "response"},
														
 
															+                    {"status", "in_progress"},
														
 
															+                }},
														
 
															+            }},
														
 
															+        });
														
 
															+    }
														
 
															+
														
 
															+    for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
														
 
															+        if (!diff.reasoning_content_delta.empty()) {
														
 
															+            if (!thinking_block_started) {
														
 
															+                events.push_back(json {
														
 
															+                    {"event", "response.output_item.added"},
														
 
															+                    {"data", json {
														
 
															+                        {"type", "response.output_item.added"},
														
 
															+                        {"item", json {
														
 
															+                            {"id",                oai_resp_reasoning_id},
														
 
															+                            {"summary",           json::array()},
														
 
															+                            {"type",              "reasoning"},
														
 
															+                            {"content",           json::array()},
														
 
															+                            {"encrypted_content", ""},
														
 
															+                            {"status",            "in_progress"},
														
 
															+                        }},
														
 
															+                    }},
														
 
															+                });
														
 
															+                thinking_block_started = true;
														
 
															+            }
														
 
															+            events.push_back(json {
														
 
															+                {"event", "response.reasoning_text.delta"},
														
 
															+                {"data", json {
														
 
															+                    {"type",    "response.reasoning_text.delta"},
														
 
															+                    {"delta",   diff.reasoning_content_delta},
														
 
															+                    {"item_id", oai_resp_reasoning_id},
														
 
															+                }},
														
 
															+            });
														
 
															+        }
														
 
															+
														
 
															+        if (!diff.content_delta.empty()) {
														
 
															+            if (!text_block_started) {
														
 
															+                events.push_back(json {
														
 
															+                    {"event", "response.output_item.added"},
														
 
															+                    {"data", json {
														
 
															+                        {"type", "response.output_item.added"},
														
 
															+                        {"item", json {
														
 
															+                            {"content", json::array()},
														
 
															+                            {"id",      oai_resp_message_id},
														
 
															+                            {"role",    "assistant"},
														
 
															+                            {"status",  "in_progress"},
														
 
															+                            {"type",    "message"},
														
 
															+                        }},
														
 
															+                    }},
														
 
															+                });
														
 
															+                events.push_back(json {
														
 
															+                    {"event", "response.content_part.added"},
														
 
															+                    {"data", json {
														
 
															+                        {"type",    "response.content_part.added"},
														
 
															+                        {"item_id", oai_resp_message_id},
														
 
															+                        {"part", json {
														
 
															+                            {"type", "output_text"},
														
 
															+                            {"text", ""},
														
 
															+                        }},
														
 
															+                    }},
														
 
															+                });
														
 
															+                text_block_started = true;
														
 
															+            }
														
 
															+            events.push_back(json {
														
 
															+                {"event", "response.output_text.delta"},
														
 
															+                {"data", json {
														
 
															+                    {"type",    "response.output_text.delta"},
														
 
															+                    {"item_id", oai_resp_message_id},
														
 
															+                    {"delta",   diff.content_delta},
														
 
															+                }},
														
 
															+            });
														
 
															+        }
														
 
															+
														
 
															+        if (!diff.tool_call_delta.name.empty()) {
														
 
															+            events.push_back(json {
														
 
															+                {"event", "response.output_item.added"},
														
 
															+                {"data", json {
														
 
															+                    {"type",  "response.output_item.added"},
														
 
															+                    {"item", json {
														
 
															+                        {"arguments", ""},
														
 
															+                        {"call_id",   "fc_" + diff.tool_call_delta.id},
														
 
															+                        {"name",      diff.tool_call_delta.name},
														
 
															+                        {"type",      "function_call"},
														
 
															+                        {"status",    "in_progress"},
														
 
															+                    }},
														
 
															+                }},
														
 
															+            });
														
 
															+            oai_resp_fc_id = diff.tool_call_delta.id;
														
 
															+        }
														
 
															+
														
 
															+        if (!diff.tool_call_delta.arguments.empty()) {
														
 
															+            events.push_back(json {
														
 
															+                {"event", "response.function_call_arguments.delta"},
														
 
															+                {"data", json {
														
 
															+                    {"type",    "response.function_call_arguments.delta"},
														
 
															+                    {"delta",   diff.tool_call_delta.arguments},
														
 
															+                    {"item_id", "fc_" + oai_resp_fc_id},
														
 
															+                }},
														
 
															+            });
														
 
															+        }
														
 
															+    }
														
 
															+    return events;
														
 
															+}
														
 
															+
														
 
															 //
														
 
															 // server_task_result_embd
														
 
															 //
														
@@ -1260,8 +1600,8 @@ json server_task_result_cmpl_partial::to_json_anthropic() {
 
															     // use local copies of streaming state (copied from task_result_state in update())
														
 
															     // these reflect the state BEFORE this chunk was processed
														
 
															-    bool thinking_started = anthropic_thinking_block_started;
														
 
															-    bool text_started     = anthropic_text_block_started;
														
 
															+    bool thinking_started = thinking_block_started;
														
 
															+    bool text_started     = text_block_started;
														
 
															     for (const auto & diff : oaicompat_msg_diffs) {
														
 
															         // handle thinking/reasoning content
														
--- a/tools/server/server-task.h
+++ b/tools/server/server-task.h
@@ -33,6 +33,7 @@ enum task_response_type {
 
															     TASK_RESPONSE_TYPE_NONE, // llama.cpp native format
														
 
															     TASK_RESPONSE_TYPE_OAI_CHAT,
														
 
															     TASK_RESPONSE_TYPE_OAI_CMPL,
														
 
															+    TASK_RESPONSE_TYPE_OAI_RESP,
														
 
															     TASK_RESPONSE_TYPE_OAI_EMBD,
														
 
															     TASK_RESPONSE_TYPE_ANTHROPIC,
														
 
															 };
														
@@ -98,12 +99,22 @@ struct task_result_state {
 
															     std::string generated_text; // append new chunks of generated text here
														
 
															     std::vector<std::string> generated_tool_call_ids;
														
 
															-    // for Anthropic API streaming: track content block state across chunks
														
 
															-    bool anthropic_thinking_block_started = false;
														
 
															-    bool anthropic_text_block_started = false;
														
 
															+    // for OpenAI Responses and Anthropic streaming API:
														
 
															+    // track output item / content block state across chunks
														
 
															+    bool thinking_block_started = false;
														
 
															+    bool text_block_started = false;
														
 
															+
														
 
															+    // for OpenAI Responses streaming API
														
 
															+    const std::string oai_resp_id;
														
 
															+    const std::string oai_resp_reasoning_id;
														
 
															+    const std::string oai_resp_message_id;
														
 
															+    std::string oai_resp_fc_id; // function call ID for current args delta
														
 
															     task_result_state(const common_chat_parser_params & chat_parser_params)
														
 
															-        : chat_parser_params(chat_parser_params) {}
														
 
															+        : chat_parser_params(chat_parser_params)
														
 
															+        , oai_resp_id("resp_" + random_string())
														
 
															+        , oai_resp_reasoning_id("rs_" + random_string())
														
 
															+        , oai_resp_message_id("msg_" + random_string()) {}
														
 
															     // parse partial tool calls and update the internal state
														
 
															     common_chat_msg update_chat_msg(
														
@@ -352,6 +363,11 @@ struct server_task_result_cmpl_final : server_task_result {
 
															     std::vector<common_chat_msg_diff> oaicompat_msg_diffs; // to be populated by update()
														
 
															     bool is_updated = false;
														
 
															+    // for OpenAI Responses API
														
 
															+    std::string oai_resp_id;
														
 
															+    std::string oai_resp_reasoning_id;
														
 
															+    std::string oai_resp_message_id;
														
 
															+
														
 
															     virtual bool is_stop() override {
														
 
															         return true; // in stream mode, final responses are considered stop
														
 
															     }
														
@@ -361,6 +377,10 @@ struct server_task_result_cmpl_final : server_task_result {
 
															     virtual void update(task_result_state & state) override {
														
 
															         is_updated = true;
														
 
															         oaicompat_msg = state.update_chat_msg(content, false, oaicompat_msg_diffs);
														
 
															+
														
 
															+        oai_resp_id = state.oai_resp_id;
														
 
															+        oai_resp_reasoning_id = state.oai_resp_reasoning_id;
														
 
															+        oai_resp_message_id = state.oai_resp_message_id;
														
 
															     }
														
 
															     json to_json_non_oaicompat();
														
@@ -371,6 +391,10 @@ struct server_task_result_cmpl_final : server_task_result {
 
															     json to_json_oaicompat_chat_stream();
														
 
															+    json to_json_oaicompat_resp();
														
 
															+
														
 
															+    json to_json_oaicompat_resp_stream();
														
 
															+
														
 
															     json to_json_anthropic();
														
 
															     json to_json_anthropic_stream();
														
@@ -397,38 +421,26 @@ struct server_task_result_cmpl_partial : server_task_result {
 
															     std::vector<common_chat_msg_diff> oaicompat_msg_diffs; // to be populated by update()
														
 
															     bool is_updated = false;
														
 
															+    // Streaming state copied from task_result_state for this chunk
														
 
															+    bool thinking_block_started = false;
														
 
															+    bool text_block_started     = false;
														
 
															+
														
 
															+    // for OpenAI Responses API
														
 
															+    std::string oai_resp_id;
														
 
															+    std::string oai_resp_reasoning_id;
														
 
															+    std::string oai_resp_message_id;
														
 
															+    std::string oai_resp_fc_id;
														
 
															+
														
 
															     // for Anthropic API: track if any reasoning content has been generated
														
 
															     bool anthropic_has_reasoning = false;
														
 
															-    // Streaming state copied from task_result_state for this chunk
														
 
															-    bool anthropic_thinking_block_started = false;
														
 
															-    bool anthropic_text_block_started = false;
														
 
															     virtual bool is_stop() override {
														
 
															         return false; // in stream mode, partial responses are not considered stop
														
 
															     }
														
 
															-    virtual json to_json() override;
														
 
															+    virtual void update(task_result_state & state) override;
														
 
															-    virtual void update(task_result_state & state) override {
														
 
															-        is_updated = true;
														
 
															-        state.update_chat_msg(content, true, oaicompat_msg_diffs);
														
 
															-        // track if the accumulated message has any reasoning content
														
 
															-        anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
														
 
															-
														
 
															-        // Copy current state for use in to_json_anthropic() (reflects state BEFORE this chunk)
														
 
															-        anthropic_thinking_block_started = state.anthropic_thinking_block_started;
														
 
															-        anthropic_text_block_started = state.anthropic_text_block_started;
														
 
															-
														
 
															-        // Pre-compute state updates based on diffs (for next chunk)
														
 
															-        for (const auto & diff : oaicompat_msg_diffs) {
														
 
															-            if (!diff.reasoning_content_delta.empty() && !state.anthropic_thinking_block_started) {
														
 
															-                state.anthropic_thinking_block_started = true;
														
 
															-            }
														
 
															-            if (!diff.content_delta.empty() && !state.anthropic_text_block_started) {
														
 
															-                state.anthropic_text_block_started = true;
														
 
															-            }
														
 
															-        }
														
 
															-    }
														
 
															+    virtual json to_json() override;
														
 
															     json to_json_non_oaicompat();
														
@@ -436,6 +448,8 @@ struct server_task_result_cmpl_partial : server_task_result {
 
															     json to_json_oaicompat_chat();
														
 
															+    json to_json_oaicompat_resp();
														
 
															+
														
 
															     json to_json_anthropic();
														
 
															 };
														
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -140,6 +140,7 @@ int main(int argc, char ** argv) {
 
															         routes.post_completions            = models_routes->proxy_post;
														
 
															         routes.post_completions_oai        = models_routes->proxy_post;
														
 
															         routes.post_chat_completions       = models_routes->proxy_post;
														
 
															+        routes.post_responses_oai          = models_routes->proxy_post;
														
 
															         routes.post_anthropic_messages     = models_routes->proxy_post;
														
 
															         routes.post_anthropic_count_tokens = models_routes->proxy_post;
														
 
															         routes.post_infill                 = models_routes->proxy_post;
														
@@ -176,6 +177,7 @@ int main(int argc, char ** argv) {
 
															     ctx_http.post("/chat/completions",    ex_wrapper(routes.post_chat_completions));
														
 
															     ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions));
														
 
															     ctx_http.post("/api/chat",            ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint
														
 
															+    ctx_http.post("/v1/responses",        ex_wrapper(routes.post_responses_oai));
														
 
															     ctx_http.post("/v1/messages",         ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API
														
 
															     ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting
														
 
															     ctx_http.post("/infill",              ex_wrapper(routes.post_infill));
														
--- a/tools/server/tests/requirements.txt
+++ b/tools/server/tests/requirements.txt
@@ -2,7 +2,7 @@ aiohttp~=3.9.3
 
															 pytest~=8.3.3
														
 
															 huggingface_hub>=0.34.0,<1.0
														
 
															 numpy~=1.26.4
														
 
															-openai~=1.55.3
														
 
															+openai~=2.14.0
														
 
															 prometheus-client~=0.20.0
														
 
															 requests~=2.32.3
														
 
															 wget~=3.2
														
--- a/tools/server/tests/unit/test_compat_oai_responses.py
+++ b/tools/server/tests/unit/test_compat_oai_responses.py
@@ -0,0 +1,73 @@
 
															+import pytest
														
 
															+from openai import OpenAI
														
 
															+from utils import *
														
 
															+
														
 
															+server: ServerProcess
														
 
															+
														
 
															+@pytest.fixture(autouse=True)
														
 
															+def create_server():
														
 
															+    global server
														
 
															+    server = ServerPreset.tinyllama2()
														
 
															+
														
 
															+def test_responses_with_openai_library():
														
 
															+    global server
														
 
															+    server.start()
														
 
															+    client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
														
 
															+    res = client.responses.create(
														
 
															+        model="gpt-4.1",
														
 
															+        input=[
														
 
															+            {"role": "system", "content": "Book"},
														
 
															+            {"role": "user", "content": "What is the best book"},
														
 
															+        ],
														
 
															+        max_output_tokens=8,
														
 
															+        temperature=0.8,
														
 
															+    )
														
 
															+    assert res.id.startswith("resp_")
														
 
															+    assert res.output[0].id is not None
														
 
															+    assert res.output[0].id.startswith("msg_")
														
 
															+    assert match_regex("(Suddenly)+", res.output_text)
														
 
															+
														
 
															+def test_responses_stream_with_openai_library():
														
 
															+    global server
														
 
															+    server.start()
														
 
															+    client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
														
 
															+    stream = client.responses.create(
														
 
															+        model="gpt-4.1",
														
 
															+        input=[
														
 
															+            {"role": "system", "content": "Book"},
														
 
															+            {"role": "user", "content": "What is the best book"},
														
 
															+        ],
														
 
															+        max_output_tokens=8,
														
 
															+        temperature=0.8,
														
 
															+        stream=True,
														
 
															+    )
														
 
															+
														
 
															+    gathered_text = ''
														
 
															+    resp_id = ''
														
 
															+    msg_id = ''
														
 
															+    for r in stream:
														
 
															+        if r.type == "response.created":
														
 
															+            assert r.response.id.startswith("resp_")
														
 
															+            resp_id = r.response.id
														
 
															+        if r.type == "response.in_progress":
														
 
															+            assert r.response.id == resp_id
														
 
															+        if r.type == "response.output_item.added":
														
 
															+            assert r.item.id is not None
														
 
															+            assert r.item.id.startswith("msg_")
														
 
															+            msg_id = r.item.id
														
 
															+        if (r.type == "response.content_part.added" or
														
 
															+            r.type == "response.output_text.delta" or
														
 
															+            r.type == "response.output_text.done" or
														
 
															+            r.type == "response.content_part.done"):
														
 
															+            assert r.item_id == msg_id
														
 
															+        if r.type == "response.output_item.done":
														
 
															+            assert r.item.id == msg_id
														
 
															+
														
 
															+        if r.type == "response.output_text.delta":
														
 
															+            gathered_text += r.delta
														
 
															+        if r.type == "response.completed":
														
 
															+            assert r.response.id.startswith("resp_")
														
 
															+            assert r.response.output[0].id is not None
														
 
															+            assert r.response.output[0].id.startswith("msg_")
														
 
															+            assert gathered_text == r.response.output_text
														
 
															+            assert match_regex("(Suddenly)+", r.response.output_text)