|
@@ -584,6 +584,8 @@ json server_task_result_cmpl_final::to_json() {
|
|
|
return to_json_oaicompat();
|
|
return to_json_oaicompat();
|
|
|
case TASK_RESPONSE_TYPE_OAI_CHAT:
|
|
case TASK_RESPONSE_TYPE_OAI_CHAT:
|
|
|
return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat();
|
|
return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat();
|
|
|
|
|
+ case TASK_RESPONSE_TYPE_OAI_RESP:
|
|
|
|
|
+ return stream ? to_json_oaicompat_resp_stream() : to_json_oaicompat_resp();
|
|
|
case TASK_RESPONSE_TYPE_ANTHROPIC:
|
|
case TASK_RESPONSE_TYPE_ANTHROPIC:
|
|
|
return stream ? to_json_anthropic_stream() : to_json_anthropic();
|
|
return stream ? to_json_anthropic_stream() : to_json_anthropic();
|
|
|
default:
|
|
default:
|
|
@@ -801,6 +803,186 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
|
|
|
return deltas;
|
|
return deltas;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
|
|
|
|
+ common_chat_msg msg;
|
|
|
|
|
+ if (!oaicompat_msg.empty()) {
|
|
|
|
|
+ msg = oaicompat_msg;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ msg.role = "assistant";
|
|
|
|
|
+ msg.content = content;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ std::vector<json> output;
|
|
|
|
|
+
|
|
|
|
|
+ if (msg.reasoning_content != "") {
|
|
|
|
|
+ output.push_back(json {
|
|
|
|
|
+ {"id", "rs_" + random_string()},
|
|
|
|
|
+ {"summary", json::array()},
|
|
|
|
|
+ {"type", "reasoning"},
|
|
|
|
|
+ {"content", json::array({ json {
|
|
|
|
|
+ {"text", msg.reasoning_content},
|
|
|
|
|
+ {"type", "reasoning_text"},
|
|
|
|
|
+ }})},
|
|
|
|
|
+ {"encrypted_content", ""},
|
|
|
|
|
+ {"status", "completed"},
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (msg.content != "") {
|
|
|
|
|
+ output.push_back(json {
|
|
|
|
|
+ {"content", json::array({ json {
|
|
|
|
|
+ {"type", "output_text"},
|
|
|
|
|
+ {"annotations", json::array()},
|
|
|
|
|
+ {"logprobs", json::array()},
|
|
|
|
|
+ {"text", msg.content},
|
|
|
|
|
+ }})},
|
|
|
|
|
+ {"id", "msg_" + random_string()},
|
|
|
|
|
+ {"role", msg.role},
|
|
|
|
|
+ {"status", "completed"},
|
|
|
|
|
+ {"type", "message"},
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
|
|
|
|
+ output.push_back(json {
|
|
|
|
|
+ {"type", "function_call"},
|
|
|
|
|
+ {"status", "completed"},
|
|
|
|
|
+ {"arguments", tool_call.arguments},
|
|
|
|
|
+ {"call_id", "fc_" + tool_call.id},
|
|
|
|
|
+ {"name", tool_call.name},
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ std::time_t t = std::time(0);
|
|
|
|
|
+ json res = {
|
|
|
|
|
+ {"completed_at", t},
|
|
|
|
|
+ {"created_at", t},
|
|
|
|
|
+ {"id", oai_resp_id},
|
|
|
|
|
+ {"model", oaicompat_model},
|
|
|
|
|
+ {"object", "response"},
|
|
|
|
|
+ {"output", output},
|
|
|
|
|
+ {"status", "completed"},
|
|
|
|
|
+ {"usage", json {
|
|
|
|
|
+ {"input_tokens", n_prompt_tokens},
|
|
|
|
|
+ {"output_tokens", n_decoded},
|
|
|
|
|
+ {"total_tokens", n_decoded + n_prompt_tokens},
|
|
|
|
|
+ }},
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ return res;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|
|
|
|
+ std::vector<json> server_sent_events;
|
|
|
|
|
+ std::vector<json> output;
|
|
|
|
|
+
|
|
|
|
|
+ if (oaicompat_msg.reasoning_content != "") {
|
|
|
|
|
+ const json output_item = json {
|
|
|
|
|
+ {"id", oai_resp_reasoning_id},
|
|
|
|
|
+ {"summary", json::array()},
|
|
|
|
|
+ {"type", "reasoning"},
|
|
|
|
|
+ {"content", json::array({ json {
|
|
|
|
|
+ {"text", oaicompat_msg.reasoning_content},
|
|
|
|
|
+ {"type", "reasoning_text"},
|
|
|
|
|
+ }})},
|
|
|
|
|
+ {"encrypted_content", ""},
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ server_sent_events.push_back(json {
|
|
|
|
|
+ {"event", "response.output_item.done"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.output_item.done"},
|
|
|
|
|
+ {"item", output_item}
|
|
|
|
|
+ }}
|
|
|
|
|
+ });
|
|
|
|
|
+ output.push_back(output_item);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (oaicompat_msg.content != "") {
|
|
|
|
|
+ server_sent_events.push_back(json {
|
|
|
|
|
+ {"event", "response.output_text.done"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.output_text.done"},
|
|
|
|
|
+ {"item_id", oai_resp_message_id},
|
|
|
|
|
+ {"text", oaicompat_msg.content}
|
|
|
|
|
+ }}
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ const json content_part = {
|
|
|
|
|
+ {"type", "output_text"},
|
|
|
|
|
+ {"annotations", json::array()},
|
|
|
|
|
+ {"logprobs", json::array()},
|
|
|
|
|
+ {"text", oaicompat_msg.content}
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ server_sent_events.push_back(json {
|
|
|
|
|
+ {"event", "response.content_part.done"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.content_part.done"},
|
|
|
|
|
+ {"item_id", oai_resp_message_id},
|
|
|
|
|
+ {"part", content_part}
|
|
|
|
|
+ }}
|
|
|
|
|
+ });
|
|
|
|
|
+ const json output_item = {
|
|
|
|
|
+ {"type", "message"},
|
|
|
|
|
+ {"status", "completed"},
|
|
|
|
|
+ {"id", oai_resp_message_id},
|
|
|
|
|
+ {"content", json::array({content_part})},
|
|
|
|
|
+ {"role", "assistant"}
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ server_sent_events.push_back(json {
|
|
|
|
|
+ {"event", "response.output_item.done"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.output_item.done"},
|
|
|
|
|
+ {"item", output_item}
|
|
|
|
|
+ }}
|
|
|
|
|
+ });
|
|
|
|
|
+ output.push_back(output_item);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ for (const common_chat_tool_call & tool_call : oaicompat_msg.tool_calls) {
|
|
|
|
|
+ const json output_item = {
|
|
|
|
|
+ {"type", "function_call"},
|
|
|
|
|
+ {"status", "completed"},
|
|
|
|
|
+ {"arguments", tool_call.arguments},
|
|
|
|
|
+ {"call_id", "fc_" + tool_call.id},
|
|
|
|
|
+ {"name", tool_call.name}
|
|
|
|
|
+ };
|
|
|
|
|
+ server_sent_events.push_back(json {
|
|
|
|
|
+ {"event", "response.output_item.done"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.output_item.done"},
|
|
|
|
|
+ {"item", output_item}
|
|
|
|
|
+ }}
|
|
|
|
|
+ });
|
|
|
|
|
+ output.push_back(output_item);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ std::time_t t = std::time(0);
|
|
|
|
|
+ server_sent_events.push_back(json {
|
|
|
|
|
+ {"event", "response.completed"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.completed"},
|
|
|
|
|
+ {"response", json {
|
|
|
|
|
+ {"id", oai_resp_id},
|
|
|
|
|
+ {"object", "response"},
|
|
|
|
|
+ {"created_at", t},
|
|
|
|
|
+ {"status", "completed"},
|
|
|
|
|
+ {"model", oaicompat_model},
|
|
|
|
|
+ {"output", output},
|
|
|
|
|
+ {"usage", json {
|
|
|
|
|
+ {"input_tokens", n_prompt_tokens},
|
|
|
|
|
+ {"output_tokens", n_decoded},
|
|
|
|
|
+ {"total_tokens", n_decoded + n_prompt_tokens}
|
|
|
|
|
+ }}
|
|
|
|
|
+ }},
|
|
|
|
|
+ }}
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ return server_sent_events;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
json server_task_result_cmpl_final::to_json_anthropic() {
|
|
json server_task_result_cmpl_final::to_json_anthropic() {
|
|
|
std::string stop_reason = "max_tokens";
|
|
std::string stop_reason = "max_tokens";
|
|
|
if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
|
|
if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
|
|
@@ -1057,6 +1239,36 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() {
|
|
|
//
|
|
//
|
|
|
// server_task_result_cmpl_partial
|
|
// server_task_result_cmpl_partial
|
|
|
//
|
|
//
|
|
|
|
|
+void server_task_result_cmpl_partial::update(task_result_state & state) {
|
|
|
|
|
+ is_updated = true;
|
|
|
|
|
+ state.update_chat_msg(content, true, oaicompat_msg_diffs);
|
|
|
|
|
+
|
|
|
|
|
+ // Copy current state for use in to_json_*() (reflects state BEFORE this chunk)
|
|
|
|
|
+ thinking_block_started = state.thinking_block_started;
|
|
|
|
|
+ text_block_started = state.text_block_started;
|
|
|
|
|
+
|
|
|
|
|
+ oai_resp_id = state.oai_resp_id;
|
|
|
|
|
+ oai_resp_reasoning_id = state.oai_resp_reasoning_id;
|
|
|
|
|
+ oai_resp_message_id = state.oai_resp_message_id;
|
|
|
|
|
+ oai_resp_fc_id = state.oai_resp_fc_id;
|
|
|
|
|
+
|
|
|
|
|
+ // track if the accumulated message has any reasoning content
|
|
|
|
|
+ anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty();
|
|
|
|
|
+
|
|
|
|
|
+ // Pre-compute state updates based on diffs (for next chunk)
|
|
|
|
|
+ for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
|
|
|
|
|
+ if (!diff.reasoning_content_delta.empty() && !state.thinking_block_started) {
|
|
|
|
|
+ state.thinking_block_started = true;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (!diff.content_delta.empty() && !state.text_block_started) {
|
|
|
|
|
+ state.text_block_started = true;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (!diff.tool_call_delta.name.empty()) {
|
|
|
|
|
+ state.oai_resp_fc_id = diff.tool_call_delta.id;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
json server_task_result_cmpl_partial::to_json() {
|
|
json server_task_result_cmpl_partial::to_json() {
|
|
|
GGML_ASSERT(is_updated && "update() must be called before to_json()");
|
|
GGML_ASSERT(is_updated && "update() must be called before to_json()");
|
|
|
switch (res_type) {
|
|
switch (res_type) {
|
|
@@ -1066,6 +1278,8 @@ json server_task_result_cmpl_partial::to_json() {
|
|
|
return to_json_oaicompat();
|
|
return to_json_oaicompat();
|
|
|
case TASK_RESPONSE_TYPE_OAI_CHAT:
|
|
case TASK_RESPONSE_TYPE_OAI_CHAT:
|
|
|
return to_json_oaicompat_chat();
|
|
return to_json_oaicompat_chat();
|
|
|
|
|
+ case TASK_RESPONSE_TYPE_OAI_RESP:
|
|
|
|
|
+ return to_json_oaicompat_resp();
|
|
|
case TASK_RESPONSE_TYPE_ANTHROPIC:
|
|
case TASK_RESPONSE_TYPE_ANTHROPIC:
|
|
|
return to_json_anthropic();
|
|
return to_json_anthropic();
|
|
|
default:
|
|
default:
|
|
@@ -1190,6 +1404,132 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
|
|
|
return deltas;
|
|
return deltas;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
|
|
|
|
+ std::vector<json> events;
|
|
|
|
|
+
|
|
|
|
|
+ if (n_decoded == 1) {
|
|
|
|
|
+ events.push_back(json {
|
|
|
|
|
+ {"event", "response.created"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.created"},
|
|
|
|
|
+ {"response", json {
|
|
|
|
|
+ {"id", oai_resp_id},
|
|
|
|
|
+ {"object", "response"},
|
|
|
|
|
+ {"status", "in_progress"},
|
|
|
|
|
+ }},
|
|
|
|
|
+ }},
|
|
|
|
|
+ });
|
|
|
|
|
+ events.push_back(json {
|
|
|
|
|
+ {"event", "response.in_progress"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.in_progress"},
|
|
|
|
|
+ {"response", json {
|
|
|
|
|
+ {"id", oai_resp_id},
|
|
|
|
|
+ {"object", "response"},
|
|
|
|
|
+ {"status", "in_progress"},
|
|
|
|
|
+ }},
|
|
|
|
|
+ }},
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) {
|
|
|
|
|
+ if (!diff.reasoning_content_delta.empty()) {
|
|
|
|
|
+ if (!thinking_block_started) {
|
|
|
|
|
+ events.push_back(json {
|
|
|
|
|
+ {"event", "response.output_item.added"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.output_item.added"},
|
|
|
|
|
+ {"item", json {
|
|
|
|
|
+ {"id", oai_resp_reasoning_id},
|
|
|
|
|
+ {"summary", json::array()},
|
|
|
|
|
+ {"type", "reasoning"},
|
|
|
|
|
+ {"content", json::array()},
|
|
|
|
|
+ {"encrypted_content", ""},
|
|
|
|
|
+ {"status", "in_progress"},
|
|
|
|
|
+ }},
|
|
|
|
|
+ }},
|
|
|
|
|
+ });
|
|
|
|
|
+ thinking_block_started = true;
|
|
|
|
|
+ }
|
|
|
|
|
+ events.push_back(json {
|
|
|
|
|
+ {"event", "response.reasoning_text.delta"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.reasoning_text.delta"},
|
|
|
|
|
+ {"delta", diff.reasoning_content_delta},
|
|
|
|
|
+ {"item_id", oai_resp_reasoning_id},
|
|
|
|
|
+ }},
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (!diff.content_delta.empty()) {
|
|
|
|
|
+ if (!text_block_started) {
|
|
|
|
|
+ events.push_back(json {
|
|
|
|
|
+ {"event", "response.output_item.added"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.output_item.added"},
|
|
|
|
|
+ {"item", json {
|
|
|
|
|
+ {"content", json::array()},
|
|
|
|
|
+ {"id", oai_resp_message_id},
|
|
|
|
|
+ {"role", "assistant"},
|
|
|
|
|
+ {"status", "in_progress"},
|
|
|
|
|
+ {"type", "message"},
|
|
|
|
|
+ }},
|
|
|
|
|
+ }},
|
|
|
|
|
+ });
|
|
|
|
|
+ events.push_back(json {
|
|
|
|
|
+ {"event", "response.content_part.added"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.content_part.added"},
|
|
|
|
|
+ {"item_id", oai_resp_message_id},
|
|
|
|
|
+ {"part", json {
|
|
|
|
|
+ {"type", "output_text"},
|
|
|
|
|
+ {"text", ""},
|
|
|
|
|
+ }},
|
|
|
|
|
+ }},
|
|
|
|
|
+ });
|
|
|
|
|
+ text_block_started = true;
|
|
|
|
|
+ }
|
|
|
|
|
+ events.push_back(json {
|
|
|
|
|
+ {"event", "response.output_text.delta"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.output_text.delta"},
|
|
|
|
|
+ {"item_id", oai_resp_message_id},
|
|
|
|
|
+ {"delta", diff.content_delta},
|
|
|
|
|
+ }},
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (!diff.tool_call_delta.name.empty()) {
|
|
|
|
|
+ events.push_back(json {
|
|
|
|
|
+ {"event", "response.output_item.added"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.output_item.added"},
|
|
|
|
|
+ {"item", json {
|
|
|
|
|
+ {"arguments", ""},
|
|
|
|
|
+ {"call_id", "fc_" + diff.tool_call_delta.id},
|
|
|
|
|
+ {"name", diff.tool_call_delta.name},
|
|
|
|
|
+ {"type", "function_call"},
|
|
|
|
|
+ {"status", "in_progress"},
|
|
|
|
|
+ }},
|
|
|
|
|
+ }},
|
|
|
|
|
+ });
|
|
|
|
|
+ oai_resp_fc_id = diff.tool_call_delta.id;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (!diff.tool_call_delta.arguments.empty()) {
|
|
|
|
|
+ events.push_back(json {
|
|
|
|
|
+ {"event", "response.function_call_arguments.delta"},
|
|
|
|
|
+ {"data", json {
|
|
|
|
|
+ {"type", "response.function_call_arguments.delta"},
|
|
|
|
|
+ {"delta", diff.tool_call_delta.arguments},
|
|
|
|
|
+ {"item_id", "fc_" + oai_resp_fc_id},
|
|
|
|
|
+ }},
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return events;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
//
|
|
//
|
|
|
// server_task_result_embd
|
|
// server_task_result_embd
|
|
|
//
|
|
//
|
|
@@ -1260,8 +1600,8 @@ json server_task_result_cmpl_partial::to_json_anthropic() {
|
|
|
|
|
|
|
|
// use local copies of streaming state (copied from task_result_state in update())
|
|
// use local copies of streaming state (copied from task_result_state in update())
|
|
|
// these reflect the state BEFORE this chunk was processed
|
|
// these reflect the state BEFORE this chunk was processed
|
|
|
- bool thinking_started = anthropic_thinking_block_started;
|
|
|
|
|
- bool text_started = anthropic_text_block_started;
|
|
|
|
|
|
|
+ bool thinking_started = thinking_block_started;
|
|
|
|
|
+ bool text_started = text_block_started;
|
|
|
|
|
|
|
|
for (const auto & diff : oaicompat_msg_diffs) {
|
|
for (const auto & diff : oaicompat_msg_diffs) {
|
|
|
// handle thinking/reasoning content
|
|
// handle thinking/reasoning content
|