3 месяцев назад · 12bbc3fa50
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -3432,7 +3432,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
 
				         {"--reasoning-format"}, "FORMAT",
			
 
				         "controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"
			
 
				         "- none: leaves thoughts unparsed in `message.content`\n"
			
 
				-        "- deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)\n"
			
 
				+        "- deepseek: puts thoughts in `message.reasoning_content`\n"
			
 
				+        "- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`\n"
			
 
				         "(default: auto)",
			
 
				         [](common_params & params, const std::string & value) {
			
 
				             params.reasoning_format = common_reasoning_format_from_name(value);
			
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -3,9 +3,12 @@
 
				 #include "log.h"
			
 
				 #include "regex-partial.h"
			
 
				 
			
 
				+#include <algorithm>
			
 
				+#include <cctype>
			
 
				 #include <optional>
			
 
				 #include <stdexcept>
			
 
				 #include <string>
			
 
				+#include <string_view>
			
 
				 #include <vector>
			
 
				 
			
 
				 using json = nlohmann::ordered_json;
			
@@ -166,6 +169,27 @@ void common_chat_msg_parser::consume_literal(const std::string & literal) {
 
				 }
			
 
				 
			
 
				 bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
			
 
				+    std::string pending_reasoning_prefix;
			
 
				+
			
 
				+    if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    auto set_reasoning_prefix = [&](size_t prefix_pos) {
			
 
				+        if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content) {
			
 
				+            return;
			
 
				+        }
			
 
				+        if (prefix_pos + start_think.size() > input_.size()) {
			
 
				+            pending_reasoning_prefix.clear();
			
 
				+            return;
			
 
				+        }
			
 
				+        // Capture the exact literal that opened the reasoning section so we can
			
 
				+        // surface it back to callers. This ensures formats that force the
			
 
				+        // reasoning tag open (e.g. DeepSeek R1) retain their original prefix
			
 
				+        // instead of dropping it during parsing.
			
 
				+        pending_reasoning_prefix = input_.substr(prefix_pos, start_think.size());
			
 
				+    };
			
 
				+
			
 
				     auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
			
 
				         auto stripped_reasoning = string_strip(reasoning);
			
 
				         if (stripped_reasoning.empty()) {
			
@@ -178,28 +202,116 @@ bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think
 
				                 add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
			
 
				             }
			
 
				         } else {
			
 
				+            if (!pending_reasoning_prefix.empty()) {
			
 
				+                add_reasoning_content(pending_reasoning_prefix);
			
 
				+                pending_reasoning_prefix.clear();
			
 
				+            }
			
 
				             add_reasoning_content(stripped_reasoning);
			
 
				         }
			
 
				     };
			
 
				-    if (syntax_.reasoning_format != COMMON_REASONING_FORMAT_NONE) {
			
 
				-        if (syntax_.thinking_forced_open || try_consume_literal(start_think)) {
			
 
				-            if (auto res = try_find_literal(end_think)) {
			
 
				-                handle_reasoning(res->prelude, /* closed */ true);
			
 
				-                consume_spaces();
			
 
				-                return true;
			
 
				-            }
			
 
				-            auto rest = consume_rest();
			
 
				+
			
 
				+    const size_t saved_pos = pos_;
			
 
				+    const size_t saved_content_size = result_.content.size();
			
 
				+    const size_t saved_reasoning_size = result_.reasoning_content.size();
			
 
				+
			
 
				+    auto restore_state = [&]() {
			
 
				+        move_to(saved_pos);
			
 
				+        result_.content.resize(saved_content_size);
			
 
				+        result_.reasoning_content.resize(saved_reasoning_size);
			
 
				+    };
			
 
				+
			
 
				+    // Allow leading whitespace to be preserved as content when reasoning is present at the start
			
 
				+    size_t cursor = pos_;
			
 
				+    size_t whitespace_end = cursor;
			
 
				+    while (whitespace_end < input_.size() && std::isspace(static_cast<unsigned char>(input_[whitespace_end]))) {
			
 
				+        ++whitespace_end;
			
 
				+    }
			
 
				+
			
 
				+    if (whitespace_end >= input_.size()) {
			
 
				+        restore_state();
			
 
				+        if (syntax_.thinking_forced_open) {
			
 
				+            auto rest = input_.substr(saved_pos);
			
 
				             if (!rest.empty()) {
			
 
				                 handle_reasoning(rest, /* closed */ !is_partial());
			
 
				             }
			
 
				-            // Allow unclosed thinking tags, for now (https://github.com/ggml-org/llama.cpp/issues/13812, https://github.com/ggml-org/llama.cpp/issues/13877)
			
 
				-            // if (!syntax_.thinking_forced_open) {
			
 
				-            //     throw common_chat_msg_partial_exception(end_think);
			
 
				-            // }
			
 
				+            move_to(input_.size());
			
 
				             return true;
			
 
				         }
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    cursor = whitespace_end;
			
 
				+    const size_t remaining = input_.size() - cursor;
			
 
				+    const size_t start_prefix = std::min(start_think.size(), remaining);
			
 
				+    const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0;
			
 
				+
			
 
				+    if (has_start_tag && start_prefix < start_think.size()) {
			
 
				+        move_to(input_.size());
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+    if (has_start_tag) {
			
 
				+        if (whitespace_end > pos_) {
			
 
				+            add_content(input_.substr(pos_, whitespace_end - pos_));
			
 
				+        }
			
 
				+        set_reasoning_prefix(cursor);
			
 
				+        cursor += start_think.size();
			
 
				+    } else if (syntax_.thinking_forced_open) {
			
 
				+        cursor = whitespace_end;
			
 
				+    } else {
			
 
				+        restore_state();
			
 
				+        return false;
			
 
				+    }
			
 
				+    while (true) {
			
 
				+        if (cursor >= input_.size()) {
			
 
				+            move_to(input_.size());
			
 
				+            return true;
			
 
				+        }
			
 
				+
			
 
				+        size_t end_pos = input_.find(end_think, cursor);
			
 
				+        if (end_pos == std::string::npos) {
			
 
				+            std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor);
			
 
				+            size_t partial_off = string_find_partial_stop(remaining_view, end_think);
			
 
				+            size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off;
			
 
				+            if (reasoning_end > cursor) {
			
 
				+                handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial());
			
 
				+            }
			
 
				+            move_to(input_.size());
			
 
				+            return true;
			
 
				+        }
			
 
				+
			
 
				+        if (end_pos > cursor) {
			
 
				+            handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true);
			
 
				+        } else {
			
 
				+            handle_reasoning("", /* closed */ true);
			
 
				+        }
			
 
				+
			
 
				+        cursor = end_pos + end_think.size();
			
 
				+
			
 
				+        while (cursor < input_.size() && std::isspace(static_cast<unsigned char>(input_[cursor]))) {
			
 
				+            ++cursor;
			
 
				+        }
			
 
				+
			
 
				+        const size_t next_remaining = input_.size() - cursor;
			
 
				+        if (next_remaining == 0) {
			
 
				+            move_to(cursor);
			
 
				+            return true;
			
 
				+        }
			
 
				+
			
 
				+        const size_t next_prefix = std::min(start_think.size(), next_remaining);
			
 
				+        if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) {
			
 
				+            if (next_prefix < start_think.size()) {
			
 
				+                move_to(input_.size());
			
 
				+                return true;
			
 
				+            }
			
 
				+            set_reasoning_prefix(cursor);
			
 
				+            cursor += start_think.size();
			
 
				+            continue;
			
 
				+        }
			
 
				+
			
 
				+        move_to(cursor);
			
 
				+        return true;
			
 
				     }
			
 
				-    return false;
			
 
				 }
			
 
				 
			
 
				 std::string common_chat_msg_parser::consume_rest() {
			
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1408,6 +1408,8 @@ static common_chat_params common_chat_params_init_apertus(const common_chat_temp
 
				     return data;
			
 
				 }
			
 
				 static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
			
 
				+    builder.try_parse_reasoning("<think>", "</think>");
			
 
				+
			
 
				     if (!builder.syntax().parse_tool_calls) {
			
 
				         builder.add_content(builder.consume_rest());
			
 
				         return;
			
@@ -2862,6 +2864,7 @@ common_chat_params common_chat_templates_apply(
 
				 }
			
 
				 
			
 
				 static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
			
 
				+    builder.try_parse_reasoning("<think>", "</think>");
			
 
				     builder.add_content(builder.consume_rest());
			
 
				 }
			
 
				 
			
--- a/common/common.h
+++ b/common/common.h
@@ -433,7 +433,7 @@ struct common_params {
 
				     std::string chat_template = "";                                                                         // NOLINT
			
 
				     bool use_jinja = false;                                                                                 // NOLINT
			
 
				     bool enable_chat_template = true;
			
 
				-    common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO;
			
 
				+    common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
			
 
				     int reasoning_budget = -1;
			
 
				     bool prefill_assistant = true;                                                                          // if true, any trailing assistant message will be prefilled into the response
			
 
				 
			
--- a/tests/test-chat-parser.cpp
+++ b/tests/test-chat-parser.cpp
@@ -106,6 +106,34 @@ static void test_reasoning() {
 
				     assert_equals("<think>Cogito</think>", builder.result().content);
			
 
				     assert_equals("Ergo sum", builder.consume_rest());
			
 
				   }
			
 
				+  {
			
 
				+    const std::string variant("content_only_inline_think");
			
 
				+    common_chat_syntax syntax = {
			
 
				+        /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
			
 
				+        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
			
 
				+        /* .reasoning_in_content = */ false,
			
 
				+        /* .thinking_forced_open = */ false,
			
 
				+        /* .parse_tool_calls = */ false,
			
 
				+    };
			
 
				+    const std::string input = "<think>Pense</think>Bonjour";
			
 
				+    auto msg = common_chat_parse(input, false, syntax);
			
 
				+    assert_equals(variant, std::string("Pense"), msg.reasoning_content);
			
 
				+    assert_equals(variant, std::string("Bonjour"), msg.content);
			
 
				+  }
			
 
				+  {
			
 
				+    const std::string variant("llama_3_inline_think");
			
 
				+    common_chat_syntax syntax = {
			
 
				+        /* .format = */ COMMON_CHAT_FORMAT_LLAMA_3_X,
			
 
				+        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
			
 
				+        /* .reasoning_in_content = */ false,
			
 
				+        /* .thinking_forced_open = */ false,
			
 
				+        /* .parse_tool_calls = */ false,
			
 
				+    };
			
 
				+    const std::string input = "<think>Plan</think>Réponse";
			
 
				+    auto msg = common_chat_parse(input, false, syntax);
			
 
				+    assert_equals(variant, std::string("Plan"), msg.reasoning_content);
			
 
				+    assert_equals(variant, std::string("Réponse"), msg.content);
			
 
				+  }
			
 
				   // Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
			
 
				   {
			
 
				     common_chat_syntax syntax = {
			
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -190,7 +190,7 @@ The project is under active development, and we are [looking for feedback and co
 
				 | `--no-slots` | disables slots monitoring endpoint<br/>(env: LLAMA_ARG_NO_ENDPOINT_SLOTS) |
			
 
				 | `--slot-save-path PATH` | path to save slot kv cache (default: disabled) |
			
 
				 | `--jinja` | use jinja template for chat (default: disabled)<br/>(env: LLAMA_ARG_JINJA) |
			
 
				-| `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:<br/>- none: leaves thoughts unparsed in `message.content`<br/>- deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)<br/>(default: auto)<br/>(env: LLAMA_ARG_THINK) |
			
 
				+| `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:<br/>- none: leaves thoughts unparsed in `message.content`<br/>- deepseek: puts thoughts in `message.reasoning_content`<br/>- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`<br/>(default: deepseek)<br/>(env: LLAMA_ARG_THINK) |
			
 
				 | `--reasoning-budget N` | controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)<br/>(env: LLAMA_ARG_THINK_BUDGET) |
			
 
				 | `--chat-template JINJA_TEMPLATE` | set custom jinja chat template (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, seed_oss, smolvlm, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE) |
			
 
				 | `--chat-template-file JINJA_TEMPLATE_FILE` | set custom jinja chat template file (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, seed_oss, smolvlm, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE_FILE) |
			
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
@@ -1,7 +1,6 @@
 
				 <script lang="ts">
			
 
				 	import { getDeletionInfo } from '$lib/stores/chat.svelte';
			
 
				 	import { copyToClipboard } from '$lib/utils/copy';
			
 
				-	import { parseThinkingContent } from '$lib/utils/thinking';
			
 
				 	import ChatMessageAssistant from './ChatMessageAssistant.svelte';
			
 
				 	import ChatMessageUser from './ChatMessageUser.svelte';
			
 
				 
			
@@ -47,26 +46,13 @@
 
				 
			
 
				 	let thinkingContent = $derived.by(() => {
			
 
				 		if (message.role === 'assistant') {
			
 
				-			if (message.thinking) {
			
 
				-				return message.thinking;
			
 
				-			}
			
 
				-
			
 
				-			const parsed = parseThinkingContent(message.content);
			
 
				+			const trimmedThinking = message.thinking?.trim();
			
 
				 
			
 
				-			return parsed.thinking;
			
 
				+			return trimmedThinking ? trimmedThinking : null;
			
 
				 		}
			
 
				 		return null;
			
 
				 	});
			
 
				 
			
 
				-	let messageContent = $derived.by(() => {
			
 
				-		if (message.role === 'assistant') {
			
 
				-			const parsed = parseThinkingContent(message.content);
			
 
				-			return parsed.cleanContent?.replace('<|channel|>analysis', '');
			
 
				-		}
			
 
				-
			
 
				-		return message.content?.replace('<|channel|>analysis', '');
			
 
				-	});
			
 
				-
			
 
				 	function handleCancelEdit() {
			
 
				 		isEditing = false;
			
 
				 		editedContent = message.content;
			
@@ -165,7 +151,7 @@
 
				 		{editedContent}
			
 
				 		{isEditing}
			
 
				 		{message}
			
 
				-		{messageContent}
			
 
				+		messageContent={message.content}
			
 
				 		onCancelEdit={handleCancelEdit}
			
 
				 		onConfirmDelete={handleConfirmDelete}
			
 
				 		onCopy={handleCopy}
			
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
@@ -131,7 +131,11 @@
 
				 			</div>
			
 
				 		</div>
			
 
				 	{:else if message.role === 'assistant'}
			
 
				-		<MarkdownContent content={messageContent || ''} />
			
 
				+		{#if config().disableReasoningFormat}
			
 
				+			<pre class="raw-output">{messageContent || ''}</pre>
			
 
				+		{:else}
			
 
				+			<MarkdownContent content={messageContent || ''} />
			
 
				+		{/if}
			
 
				 	{:else}
			
 
				 		<div class="text-sm whitespace-pre-wrap">
			
 
				 			{messageContent}
			
@@ -203,4 +207,21 @@
 
				 			background-position: -200% 0;
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				+	.raw-output {
			
 
				+		width: 100%;
			
 
				+		max-width: 48rem;
			
 
				+		margin-top: 1.5rem;
			
 
				+		padding: 1rem 1.25rem;
			
 
				+		border-radius: 1rem;
			
 
				+		background: hsl(var(--muted) / 0.3);
			
 
				+		color: var(--foreground);
			
 
				+		font-family:
			
 
				+			ui-monospace, SFMono-Regular, 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas,
			
 
				+			'Liberation Mono', Menlo, monospace;
			
 
				+		font-size: 0.875rem;
			
 
				+		line-height: 1.6;
			
 
				+		white-space: pre-wrap;
			
 
				+		word-break: break-word;
			
 
				+	}
			
 
				 </style>
			
--- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
@@ -148,6 +148,12 @@
 
				 					key: 'showThoughtInProgress',
			
 
				 					label: 'Show thought in progress',
			
 
				 					type: 'checkbox'
			
 
				+				},
			
 
				+				{
			
 
				+					key: 'disableReasoningFormat',
			
 
				+					label:
			
 
				+						'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
			
 
				+					type: 'checkbox'
			
 
				 				}
			
 
				 			]
			
 
				 		},
			
--- a/tools/server/webui/src/lib/constants/settings-config.ts
+++ b/tools/server/webui/src/lib/constants/settings-config.ts
@@ -6,6 +6,7 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean> =
 
				 	theme: 'system',
			
 
				 	showTokensPerSecond: false,
			
 
				 	showThoughtInProgress: false,
			
 
				+	disableReasoningFormat: false,
			
 
				 	keepStatsVisible: false,
			
 
				 	askForTitleConfirmation: false,
			
 
				 	pasteLongTextToFileLen: 2500,
			
@@ -76,6 +77,8 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
 
				 	custom: 'Custom JSON parameters to send to the API. Must be valid JSON format.',
			
 
				 	showTokensPerSecond: 'Display generation speed in tokens per second during streaming.',
			
 
				 	showThoughtInProgress: 'Expand thought process by default when generating messages.',
			
 
				+	disableReasoningFormat:
			
 
				+		'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
			
 
				 	keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
			
 
				 	askForTitleConfirmation:
			
 
				 		'Ask for confirmation before automatically changing conversation title when editing the first message.',
			
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -78,6 +78,8 @@ export class ChatService {
 
				 			timings_per_token
			
 
				 		} = options;
			
 
				 
			
 
				+		const currentConfig = config();
			
 
				+
			
 
				 		// Cancel any ongoing request and create a new abort controller
			
 
				 		this.abort();
			
 
				 		this.abortController = new AbortController();
			
@@ -117,7 +119,7 @@ export class ChatService {
 
				 			stream
			
 
				 		};
			
 
				 
			
 
				-		requestBody.reasoning_format = 'auto';
			
 
				+		requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto';
			
 
				 
			
 
				 		if (temperature !== undefined) requestBody.temperature = temperature;
			
 
				 		// Set max_tokens to -1 (infinite) if not provided or empty
			
@@ -161,7 +163,6 @@ export class ChatService {
 
				 		}
			
 
				 
			
 
				 		try {
			
 
				-			const currentConfig = config();
			
 
				 			const apiKey = currentConfig.apiKey?.toString().trim();
			
 
				 
			
 
				 			const response = await fetch(`./v1/chat/completions`, {
			
@@ -256,10 +257,8 @@ export class ChatService {
 
				 		}
			
 
				 
			
 
				 		const decoder = new TextDecoder();
			
 
				-		let fullResponse = '';
			
 
				+		let aggregatedContent = '';
			
 
				 		let fullReasoningContent = '';
			
 
				-		let regularContent = '';
			
 
				-		let insideThinkTag = false;
			
 
				 		let hasReceivedData = false;
			
 
				 		let lastTimings: ChatMessageTimings | undefined;
			
 
				 
			
@@ -277,7 +276,7 @@ export class ChatService {
 
				 					if (line.startsWith('data: ')) {
			
 
				 						const data = line.slice(6);
			
 
				 						if (data === '[DONE]') {
			
 
				-							if (!hasReceivedData && fullResponse.length === 0) {
			
 
				+							if (!hasReceivedData && aggregatedContent.length === 0) {
			
 
				 								const contextError = new Error(
			
 
				 									'The request exceeds the available context size. Try increasing the context size or enable context shift.'
			
 
				 								);
			
@@ -286,7 +285,7 @@ export class ChatService {
 
				 								return;
			
 
				 							}
			
 
				 
			
 
				-							onComplete?.(regularContent, fullReasoningContent || undefined, lastTimings);
			
 
				+							onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
			
 
				 
			
 
				 							return;
			
 
				 						}
			
@@ -310,27 +309,8 @@ export class ChatService {
 
				 
			
 
				 							if (content) {
			
 
				 								hasReceivedData = true;
			
 
				-								fullResponse += content;
			
 
				-
			
 
				-								// Track the regular content before processing this chunk
			
 
				-								const regularContentBefore = regularContent;
			
 
				-
			
 
				-								// Process content character by character to handle think tags
			
 
				-								insideThinkTag = this.processContentForThinkTags(
			
 
				-									content,
			
 
				-									insideThinkTag,
			
 
				-									() => {
			
 
				-										// Think content is ignored - we don't include it in API requests
			
 
				-									},
			
 
				-									(regularChunk) => {
			
 
				-										regularContent += regularChunk;
			
 
				-									}
			
 
				-								);
			
 
				-
			
 
				-								const newRegularContent = regularContent.slice(regularContentBefore.length);
			
 
				-								if (newRegularContent) {
			
 
				-									onChunk?.(newRegularContent);
			
 
				-								}
			
 
				+								aggregatedContent += content;
			
 
				+								onChunk?.(content);
			
 
				 							}
			
 
				 
			
 
				 							if (reasoningContent) {
			
@@ -345,7 +325,7 @@ export class ChatService {
 
				 				}
			
 
				 			}
			
 
				 
			
 
				-			if (!hasReceivedData && fullResponse.length === 0) {
			
 
				+			if (!hasReceivedData && aggregatedContent.length === 0) {
			
 
				 				const contextError = new Error(
			
 
				 					'The request exceeds the available context size. Try increasing the context size or enable context shift.'
			
 
				 				);
			
@@ -552,51 +532,6 @@ export class ChatService {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	/**
			
 
				-	 * Processes content to separate thinking tags from regular content.
			
 
				-	 * Parses <think> and </think> tags to route content to appropriate handlers.
			
 
				-	 *
			
 
				-	 * @param content - The content string to process
			
 
				-	 * @param currentInsideThinkTag - Current state of whether we're inside a think tag
			
 
				-	 * @param addThinkContent - Callback to handle content inside think tags
			
 
				-	 * @param addRegularContent - Callback to handle regular content outside think tags
			
 
				-	 * @returns Boolean indicating if we're still inside a think tag after processing
			
 
				-	 * @private
			
 
				-	 */
			
 
				-	private processContentForThinkTags(
			
 
				-		content: string,
			
 
				-		currentInsideThinkTag: boolean,
			
 
				-		addThinkContent: (chunk: string) => void,
			
 
				-		addRegularContent: (chunk: string) => void
			
 
				-	): boolean {
			
 
				-		let i = 0;
			
 
				-		let insideThinkTag = currentInsideThinkTag;
			
 
				-
			
 
				-		while (i < content.length) {
			
 
				-			if (!insideThinkTag && content.substring(i, i + 7) === '<think>') {
			
 
				-				insideThinkTag = true;
			
 
				-				i += 7; // Skip the <think> tag
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			if (insideThinkTag && content.substring(i, i + 8) === '</think>') {
			
 
				-				insideThinkTag = false;
			
 
				-				i += 8; // Skip the </think> tag
			
 
				-				continue;
			
 
				-			}
			
 
				-
			
 
				-			if (insideThinkTag) {
			
 
				-				addThinkContent(content[i]);
			
 
				-			} else {
			
 
				-				addRegularContent(content[i]);
			
 
				-			}
			
 
				-
			
 
				-			i++;
			
 
				-		}
			
 
				-
			
 
				-		return insideThinkTag;
			
 
				-	}
			
 
				-
			
 
				 	/**
			
 
				 	 * Aborts any ongoing chat completion request.
			
 
				 	 * Cancels the current request and cleans up the abort controller.
			
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -5,7 +5,6 @@ import { config } from '$lib/stores/settings.svelte';
 
				 import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
			
 
				 import { browser } from '$app/environment';
			
 
				 import { goto } from '$app/navigation';
			
 
				-import { extractPartialThinking } from '$lib/utils/thinking';
			
 
				 import { toast } from 'svelte-sonner';
			
 
				 import type { ExportedConversations } from '$lib/types/database';
			
 
				 
			
@@ -344,11 +343,9 @@ class ChatStore {
 
				 				this.currentResponse = streamedContent;
			
 
				 
			
 
				 				captureModelIfNeeded();
			
 
				-
			
 
				-				const partialThinking = extractPartialThinking(streamedContent);
			
 
				 				const messageIndex = this.findMessageIndex(assistantMessage.id);
			
 
				 				this.updateMessageAtIndex(messageIndex, {
			
 
				-					content: partialThinking.remainingContent || streamedContent
			
 
				+					content: streamedContent
			
 
				 				});
			
 
				 			},
			
 
				 
			
@@ -696,18 +693,16 @@ class ChatStore {
 
				 
			
 
				 		if (lastMessage && lastMessage.role === 'assistant') {
			
 
				 			try {
			
 
				-				const partialThinking = extractPartialThinking(this.currentResponse);
			
 
				-
			
 
				 				const updateData: {
			
 
				 					content: string;
			
 
				 					thinking?: string;
			
 
				 					timings?: ChatMessageTimings;
			
 
				 				} = {
			
 
				-					content: partialThinking.remainingContent || this.currentResponse
			
 
				+					content: this.currentResponse
			
 
				 				};
			
 
				 
			
 
				-				if (partialThinking.thinking) {
			
 
				-					updateData.thinking = partialThinking.thinking;
			
 
				+				if (lastMessage.thinking?.trim()) {
			
 
				+					updateData.thinking = lastMessage.thinking;
			
 
				 				}
			
 
				 
			
 
				 				const lastKnownState = await slotsService.getCurrentState();
			
@@ -727,7 +722,10 @@ class ChatStore {
 
				 
			
 
				 				await DatabaseStore.updateMessage(lastMessage.id, updateData);
			
 
				 
			
 
				-				lastMessage.content = partialThinking.remainingContent || this.currentResponse;
			
 
				+				lastMessage.content = this.currentResponse;
			
 
				+				if (updateData.thinking !== undefined) {
			
 
				+					lastMessage.thinking = updateData.thinking;
			
 
				+				}
			
 
				 				if (updateData.timings) {
			
 
				 					lastMessage.timings = updateData.timings;
			
 
				 				}
			
--- a/tools/server/webui/src/lib/utils/thinking.ts
+++ b/tools/server/webui/src/lib/utils/thinking.ts
@@ -1,143 +0,0 @@
 
				-/**
			
 
				- * Parses thinking content from a message that may contain <think> tags or [THINK] tags
			
 
				- * Returns an object with thinking content and cleaned message content
			
 
				- * Handles both complete blocks and incomplete blocks (streaming)
			
 
				- * Supports formats: <think>...</think> and [THINK]...[/THINK]
			
 
				- * @param content - The message content to parse
			
 
				- * @returns An object containing the extracted thinking content and the cleaned message content
			
 
				- */
			
 
				-export function parseThinkingContent(content: string): {
			
 
				-	thinking: string | null;
			
 
				-	cleanContent: string;
			
 
				-} {
			
 
				-	const incompleteThinkMatch = content.includes('<think>') && !content.includes('</think>');
			
 
				-	const incompleteThinkBracketMatch = content.includes('[THINK]') && !content.includes('[/THINK]');
			
 
				-
			
 
				-	if (incompleteThinkMatch) {
			
 
				-		const cleanContent = content.split('</think>')?.[1]?.trim();
			
 
				-		const thinkingContent = content.split('<think>')?.[1]?.trim();
			
 
				-
			
 
				-		return {
			
 
				-			cleanContent,
			
 
				-			thinking: thinkingContent
			
 
				-		};
			
 
				-	}
			
 
				-
			
 
				-	if (incompleteThinkBracketMatch) {
			
 
				-		const cleanContent = content.split('[/THINK]')?.[1]?.trim();
			
 
				-		const thinkingContent = content.split('[THINK]')?.[1]?.trim();
			
 
				-
			
 
				-		return {
			
 
				-			cleanContent,
			
 
				-			thinking: thinkingContent
			
 
				-		};
			
 
				-	}
			
 
				-
			
 
				-	const completeThinkMatch = content.match(/<think>([\s\S]*?)<\/think>/);
			
 
				-	const completeThinkBracketMatch = content.match(/\[THINK\]([\s\S]*?)\[\/THINK\]/);
			
 
				-
			
 
				-	if (completeThinkMatch) {
			
 
				-		const thinkingContent = completeThinkMatch[1]?.trim() ?? '';
			
 
				-		const cleanContent = `${content.slice(0, completeThinkMatch.index ?? 0)}${content.slice(
			
 
				-			(completeThinkMatch.index ?? 0) + completeThinkMatch[0].length
			
 
				-		)}`.trim();
			
 
				-
			
 
				-		return {
			
 
				-			thinking: thinkingContent,
			
 
				-			cleanContent
			
 
				-		};
			
 
				-	}
			
 
				-
			
 
				-	if (completeThinkBracketMatch) {
			
 
				-		const thinkingContent = completeThinkBracketMatch[1]?.trim() ?? '';
			
 
				-		const cleanContent = `${content.slice(0, completeThinkBracketMatch.index ?? 0)}${content.slice(
			
 
				-			(completeThinkBracketMatch.index ?? 0) + completeThinkBracketMatch[0].length
			
 
				-		)}`.trim();
			
 
				-
			
 
				-		return {
			
 
				-			thinking: thinkingContent,
			
 
				-			cleanContent
			
 
				-		};
			
 
				-	}
			
 
				-
			
 
				-	return {
			
 
				-		thinking: null,
			
 
				-		cleanContent: content
			
 
				-	};
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Checks if content contains an opening thinking tag (for streaming)
			
 
				- * Supports both <think> and [THINK] formats
			
 
				- * @param content - The message content to check
			
 
				- * @returns True if the content contains an opening thinking tag
			
 
				- */
			
 
				-export function hasThinkingStart(content: string): boolean {
			
 
				-	return (
			
 
				-		content.includes('<think>') ||
			
 
				-		content.includes('[THINK]') ||
			
 
				-		content.includes('<|channel|>analysis')
			
 
				-	);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Checks if content contains a closing thinking tag (for streaming)
			
 
				- * Supports both </think> and [/THINK] formats
			
 
				- * @param content - The message content to check
			
 
				- * @returns True if the content contains a closing thinking tag
			
 
				- */
			
 
				-export function hasThinkingEnd(content: string): boolean {
			
 
				-	return content.includes('</think>') || content.includes('[/THINK]');
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * Extracts partial thinking content during streaming
			
 
				- * Supports both <think> and [THINK] formats
			
 
				- * Used when we have opening tag but not yet closing tag
			
 
				- * @param content - The message content to extract partial thinking from
			
 
				- * @returns An object containing the extracted partial thinking content and the remaining content
			
 
				- */
			
 
				-export function extractPartialThinking(content: string): {
			
 
				-	thinking: string | null;
			
 
				-	remainingContent: string;
			
 
				-} {
			
 
				-	const thinkStartIndex = content.indexOf('<think>');
			
 
				-	const thinkEndIndex = content.indexOf('</think>');
			
 
				-
			
 
				-	const bracketStartIndex = content.indexOf('[THINK]');
			
 
				-	const bracketEndIndex = content.indexOf('[/THINK]');
			
 
				-
			
 
				-	const useThinkFormat =
			
 
				-		thinkStartIndex !== -1 && (bracketStartIndex === -1 || thinkStartIndex < bracketStartIndex);
			
 
				-	const useBracketFormat =
			
 
				-		bracketStartIndex !== -1 && (thinkStartIndex === -1 || bracketStartIndex < thinkStartIndex);
			
 
				-
			
 
				-	if (useThinkFormat) {
			
 
				-		if (thinkEndIndex === -1) {
			
 
				-			const thinkingStart = thinkStartIndex + '<think>'.length;
			
 
				-
			
 
				-			return {
			
 
				-				thinking: content.substring(thinkingStart),
			
 
				-				remainingContent: content.substring(0, thinkStartIndex)
			
 
				-			};
			
 
				-		}
			
 
				-	} else if (useBracketFormat) {
			
 
				-		if (bracketEndIndex === -1) {
			
 
				-			const thinkingStart = bracketStartIndex + '[THINK]'.length;
			
 
				-
			
 
				-			return {
			
 
				-				thinking: content.substring(thinkingStart),
			
 
				-				remainingContent: content.substring(0, bracketStartIndex)
			
 
				-			};
			
 
				-		}
			
 
				-	} else {
			
 
				-		return { thinking: null, remainingContent: content };
			
 
				-	}
			
 
				-
			
 
				-	const parsed = parseThinkingContent(content);
			
 
				-
			
 
				-	return {
			
 
				-		thinking: parsed.thinking,
			
 
				-		remainingContent: parsed.cleanContent
			
 
				-	};
			
 
				-}
			
--- a/tools/server/webui/src/stories/ChatMessage.stories.svelte
+++ b/tools/server/webui/src/stories/ChatMessage.stories.svelte
@@ -36,64 +36,36 @@
 
				 		children: []
			
 
				 	};
			
 
				 
			
 
				-	let processingMessage = $state({
			
 
				-		id: '4',
			
 
				-		convId: 'conv-1',
			
 
				-		type: 'message',
			
 
				-		timestamp: 0, // No timestamp = processing
			
 
				-		role: 'assistant',
			
 
				-		content: '',
			
 
				-		parent: '1',
			
 
				-		thinking: '',
			
 
				-		children: []
			
 
				-	});
			
 
				-
			
 
				-	let streamingMessage = $state({
			
 
				-		id: '5',
			
 
				-		convId: 'conv-1',
			
 
				-		type: 'message',
			
 
				-		timestamp: 0, // No timestamp = streaming
			
 
				-		role: 'assistant',
			
 
				-		content: '',
			
 
				-		parent: '1',
			
 
				-		thinking: '',
			
 
				-		children: []
			
 
				-	});
			
 
				-
			
 
				-	// Message with <think> format thinking content
			
 
				-	const thinkTagMessage: DatabaseMessage = {
			
 
				-		id: '6',
			
 
				+	const assistantWithReasoning: DatabaseMessage = {
			
 
				+		id: '3',
			
 
				 		convId: 'conv-1',
			
 
				 		type: 'message',
			
 
				 		timestamp: Date.now() - 1000 * 60 * 2,
			
 
				 		role: 'assistant',
			
 
				-		content:
			
 
				-			"<think>\nLet me analyze this step by step:\n\n1. The user is asking about thinking formats\n2. I need to demonstrate the &lt;think&gt; tag format\n3. This content should be displayed in the thinking section\n4. The main response should be separate\n\nThis is a good example of reasoning content.\n</think>\n\nHere's my response after thinking through the problem. The thinking content above should be displayed separately from this main response content.",
			
 
				+		content: "Here's the concise answer, now that I've thought it through carefully for you.",
			
 
				 		parent: '1',
			
 
				-		thinking: '',
			
 
				+		thinking:
			
 
				+			"Let's consider the user's question step by step:\\n\\n1. Identify the core problem\\n2. Evaluate relevant information\\n3. Formulate a clear answer\\n\\nFollowing this process ensures the final response stays focused and accurate.",
			
 
				 		children: []
			
 
				 	};
			
 
				-
			
 
				-	// Message with [THINK] format thinking content
			
 
				-	const thinkBracketMessage: DatabaseMessage = {
			
 
				-		id: '7',
			
 
				+	const rawOutputMessage: DatabaseMessage = {
			
 
				+		id: '6',
			
 
				 		convId: 'conv-1',
			
 
				 		type: 'message',
			
 
				-		timestamp: Date.now() - 1000 * 60 * 1,
			
 
				+		timestamp: Date.now() - 1000 * 60,
			
 
				 		role: 'assistant',
			
 
				 		content:
			
 
				-			'[THINK]\nThis is the DeepSeek-style thinking format:\n\n- Using square brackets instead of angle brackets\n- Should work identically to the &lt;think&gt; format\n- Content parsing should extract this reasoning\n- Display should be the same as &lt;think&gt; format\n\nBoth formats should be supported seamlessly.\n[/THINK]\n\nThis is the main response content that comes after the [THINK] block. The reasoning above should be parsed and displayed in the thinking section.',
			
 
				+			'<|channel|>analysis<|message|>User greeted me. Initiating overcomplicated analysis: Is this a trap? No, just a normal hello. Respond calmly, act like a helpful assistant, and do not start explaining quantum physics again. Confidence 0.73. Engaging socially acceptable greeting protocol...<|end|>Hello there! How can I help you today?',
			
 
				 		parent: '1',
			
 
				 		thinking: '',
			
 
				 		children: []
			
 
				 	};
			
 
				 
			
 
				-	// Streaming message for <think> format
			
 
				-	let streamingThinkMessage = $state({
			
 
				-		id: '8',
			
 
				+	let processingMessage = $state({
			
 
				+		id: '4',
			
 
				 		convId: 'conv-1',
			
 
				 		type: 'message',
			
 
				-		timestamp: 0, // No timestamp = streaming
			
 
				+		timestamp: 0, // No timestamp = processing
			
 
				 		role: 'assistant',
			
 
				 		content: '',
			
 
				 		parent: '1',
			
@@ -101,9 +73,8 @@
 
				 		children: []
			
 
				 	});
			
 
				 
			
 
				-	// Streaming message for [THINK] format
			
 
				-	let streamingBracketMessage = $state({
			
 
				-		id: '9',
			
 
				+	let streamingMessage = $state({
			
 
				+		id: '5',
			
 
				 		convId: 'conv-1',
			
 
				 		type: 'message',
			
 
				 		timestamp: 0, // No timestamp = streaming
			
@@ -120,6 +91,10 @@
 
				 	args={{
			
 
				 		message: userMessage
			
 
				 	}}
			
 
				+	play={async () => {
			
 
				+		const { updateConfig } = await import('$lib/stores/settings.svelte');
			
 
				+		updateConfig('disableReasoningFormat', false);
			
 
				+	}}
			
 
				 />
			
 
				 
			
 
				 <Story
			
@@ -128,15 +103,45 @@
 
				 		class: 'max-w-[56rem] w-[calc(100vw-2rem)]',
			
 
				 		message: assistantMessage
			
 
				 	}}
			
 
				+	play={async () => {
			
 
				+		const { updateConfig } = await import('$lib/stores/settings.svelte');
			
 
				+		updateConfig('disableReasoningFormat', false);
			
 
				+	}}
			
 
				+/>
			
 
				+
			
 
				+<Story
			
 
				+	name="AssistantWithReasoning"
			
 
				+	args={{
			
 
				+		class: 'max-w-[56rem] w-[calc(100vw-2rem)]',
			
 
				+		message: assistantWithReasoning
			
 
				+	}}
			
 
				+	play={async () => {
			
 
				+		const { updateConfig } = await import('$lib/stores/settings.svelte');
			
 
				+		updateConfig('disableReasoningFormat', false);
			
 
				+	}}
			
 
				+/>
			
 
				+
			
 
				+<Story
			
 
				+	name="RawLlmOutput"
			
 
				+	args={{
			
 
				+		class: 'max-w-[56rem] w-[calc(100vw-2rem)]',
			
 
				+		message: rawOutputMessage
			
 
				+	}}
			
 
				+	play={async () => {
			
 
				+		const { updateConfig } = await import('$lib/stores/settings.svelte');
			
 
				+		updateConfig('disableReasoningFormat', true);
			
 
				+	}}
			
 
				 />
			
 
				 
			
 
				 <Story
			
 
				-	name="WithThinkingBlock"
			
 
				+	name="WithReasoningContent"
			
 
				 	args={{
			
 
				 		message: streamingMessage
			
 
				 	}}
			
 
				 	asChild
			
 
				 	play={async () => {
			
 
				+		const { updateConfig } = await import('$lib/stores/settings.svelte');
			
 
				+		updateConfig('disableReasoningFormat', false);
			
 
				 		// Phase 1: Stream reasoning content in chunks
			
 
				 		let reasoningText =
			
 
				 			'I need to think about this carefully. Let me break down the problem:\n\n1. The user is asking for help with something complex\n2. I should provide a thorough and helpful response\n3. I need to consider multiple approaches\n4. The best solution would be to explain step by step\n\nThis approach will ensure clarity and understanding.';
			
@@ -187,126 +192,16 @@
 
				 		message: processingMessage
			
 
				 	}}
			
 
				 	play={async () => {
			
 
				+		const { updateConfig } = await import('$lib/stores/settings.svelte');
			
 
				+		updateConfig('disableReasoningFormat', false);
			
 
				 		// Import the chat store to simulate loading state
			
 
				 		const { chatStore } = await import('$lib/stores/chat.svelte');
			
 
				-		
			
 
				+
			
 
				 		// Set loading state to true to trigger the processing UI
			
 
				 		chatStore.isLoading = true;
			
 
				-		
			
 
				+
			
 
				 		// Simulate the processing state hook behavior
			
 
				 		// This will show the "Generating..." text and parameter details
			
 
				-		await new Promise(resolve => setTimeout(resolve, 100));
			
 
				-	}}
			
 
				-/>
			
 
				-
			
 
				-<Story
			
 
				-	name="ThinkTagFormat"
			
 
				-	args={{
			
 
				-		class: 'max-w-[56rem] w-[calc(100vw-2rem)]',
			
 
				-		message: thinkTagMessage
			
 
				+		await new Promise((resolve) => setTimeout(resolve, 100));
			
 
				 	}}
			
 
				 />
			
 
				-
			
 
				-<Story
			
 
				-	name="ThinkBracketFormat"
			
 
				-	args={{
			
 
				-		class: 'max-w-[56rem] w-[calc(100vw-2rem)]',
			
 
				-		message: thinkBracketMessage
			
 
				-	}}
			
 
				-/>
			
 
				-
			
 
				-<Story
			
 
				-	name="StreamingThinkTag"
			
 
				-	args={{
			
 
				-		message: streamingThinkMessage
			
 
				-	}}
			
 
				-	parameters={{
			
 
				-		test: {
			
 
				-			timeout: 30000
			
 
				-		}
			
 
				-	}}
			
 
				-	asChild
			
 
				-	play={async () => {
			
 
				-		// Phase 1: Stream <think> reasoning content
			
 
				-		const thinkingContent =
			
 
				-			'Let me work through this problem systematically:\n\n1. First, I need to understand what the user is asking\n2. Then I should consider different approaches\n3. I need to evaluate the pros and cons\n4. Finally, I should provide a clear recommendation\n\nThis step-by-step approach will ensure accuracy.';
			
 
				-
			
 
				-		let currentContent = '<think>\n';
			
 
				-		streamingThinkMessage.content = currentContent;
			
 
				-
			
 
				-		for (let i = 0; i < thinkingContent.length; i++) {
			
 
				-			currentContent += thinkingContent[i];
			
 
				-			streamingThinkMessage.content = currentContent;
			
 
				-			await new Promise((resolve) => setTimeout(resolve, 5));
			
 
				-		}
			
 
				-
			
 
				-		// Close the thinking block
			
 
				-		currentContent += '\n</think>\n\n';
			
 
				-		streamingThinkMessage.content = currentContent;
			
 
				-		await new Promise((resolve) => setTimeout(resolve, 200));
			
 
				-
			
 
				-		// Phase 2: Stream main response content
			
 
				-		const responseContent =
			
 
				-			"Based on my analysis above, here's the solution:\n\n**Key Points:**\n- The approach should be systematic\n- We need to consider all factors\n- Implementation should be step-by-step\n\nThis ensures the best possible outcome.";
			
 
				-
			
 
				-		for (let i = 0; i < responseContent.length; i++) {
			
 
				-			currentContent += responseContent[i];
			
 
				-			streamingThinkMessage.content = currentContent;
			
 
				-			await new Promise((resolve) => setTimeout(resolve, 10));
			
 
				-		}
			
 
				-
			
 
				-		streamingThinkMessage.timestamp = Date.now();
			
 
				-	}}
			
 
				->
			
 
				-	<div class="w-[56rem]">
			
 
				-		<ChatMessage message={streamingThinkMessage} />
			
 
				-	</div>
			
 
				-</Story>
			
 
				-
			
 
				-<Story
			
 
				-	name="StreamingThinkBracket"
			
 
				-	args={{
			
 
				-		message: streamingBracketMessage
			
 
				-	}}
			
 
				-	parameters={{
			
 
				-		test: {
			
 
				-			timeout: 30000
			
 
				-		}
			
 
				-	}}
			
 
				-	asChild
			
 
				-	play={async () => {
			
 
				-		// Phase 1: Stream [THINK] reasoning content
			
 
				-		const thinkingContent =
			
 
				-			'Using the DeepSeek format now:\n\n- This demonstrates the &#91;THINK&#93; bracket format\n- Should parse identically to &lt;think&gt; tags\n- The UI should display this in the thinking section\n- Main content should be separate\n\nBoth formats provide the same functionality.';
			
 
				-
			
 
				-		let currentContent = '[THINK]\n';
			
 
				-		streamingBracketMessage.content = currentContent;
			
 
				-
			
 
				-		for (let i = 0; i < thinkingContent.length; i++) {
			
 
				-			currentContent += thinkingContent[i];
			
 
				-			streamingBracketMessage.content = currentContent;
			
 
				-			await new Promise((resolve) => setTimeout(resolve, 5));
			
 
				-		}
			
 
				-
			
 
				-		// Close the thinking block
			
 
				-		currentContent += '\n[/THINK]\n\n';
			
 
				-		streamingBracketMessage.content = currentContent;
			
 
				-		await new Promise((resolve) => setTimeout(resolve, 200));
			
 
				-
			
 
				-		// Phase 2: Stream main response content
			
 
				-		const responseContent =
			
 
				-			"Here's my response after using the &#91;THINK&#93; format:\n\n**Observations:**\n- Both &lt;think&gt; and &#91;THINK&#93; formats work seamlessly\n- The parsing logic handles both cases\n- UI display is consistent across formats\n\nThis demonstrates the enhanced thinking content support.";
			
 
				-
			
 
				-		for (let i = 0; i < responseContent.length; i++) {
			
 
				-			currentContent += responseContent[i];
			
 
				-			streamingBracketMessage.content = currentContent;
			
 
				-			await new Promise((resolve) => setTimeout(resolve, 10));
			
 
				-		}
			
 
				-
			
 
				-		streamingBracketMessage.timestamp = Date.now();
			
 
				-	}}
			
 
				->
			
 
				-	<div class="w-[56rem]">
			
 
				-		<ChatMessage message={streamingBracketMessage} />
			
 
				-	</div>
			
 
				-</Story>