test-chat-parser.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. // Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
  2. //
  3. // Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
  4. // e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
  5. //
  6. // cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
  7. //
  8. #include <exception>
  9. #include <iostream>
  10. #include <string>
  11. #include "chat-parser.h"
  12. #include "common.h"
  13. #include "log.h"
  14. #include "regex-partial.h"
  15. template <class T>
  16. static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
  17. if (expected != actual) {
  18. std::cerr << label << std::endl;
  19. std::cerr << "Expected: " << expected << std::endl;
  20. std::cerr << "Actual: " << actual << std::endl;
  21. std::cerr << std::flush;
  22. throw std::runtime_error("Test failed");
  23. }
  24. }
  25. template <class T>
  26. static void assert_equals(const T & expected, const T & actual) {
  27. assert_equals("", expected, actual);
  28. }
  29. static void assert_equals(const char * expected, const std::string & actual) {
  30. return assert_equals<std::string>(expected, actual);
  31. }
  32. static void assert_throws(const std::function<void()> & fn, const std::string & expected_exception_pattern = "") {
  33. try {
  34. fn();
  35. } catch (const std::exception & e) {
  36. if (expected_exception_pattern.empty()) {
  37. return;
  38. }
  39. std::regex expected_exception_regex(expected_exception_pattern);
  40. std::string actual_message = e.what();
  41. if (std::regex_search(actual_message, expected_exception_regex)) {
  42. return;
  43. }
  44. throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")");
  45. throw std::runtime_error("Exception of unexpected type: " + std::string(e.what()));
  46. }
  47. throw std::runtime_error("Exception was expected but not thrown");
  48. }
  49. static void test_reasoning() {
  50. //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
  51. {
  52. common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, {
  53. /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
  54. /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
  55. /* .reasoning_in_content = */ false,
  56. /* .thinking_forced_open = */ false,
  57. });
  58. assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
  59. assert_equals("<tnk>Cogito</tnk>Ergo sum", builder.consume_rest());
  60. }
  61. {
  62. common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, {
  63. /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
  64. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  65. /* .reasoning_in_content = */ false,
  66. /* .thinking_forced_open = */ false,
  67. });
  68. assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
  69. assert_equals(std::string("Cogito"), builder.result().reasoning_content);
  70. assert_equals("Ergo sum", builder.consume_rest());
  71. }
  72. {
  73. common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, {
  74. /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
  75. /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
  76. /* .reasoning_in_content = */ false,
  77. /* .thinking_forced_open = */ false,
  78. });
  79. assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
  80. assert_equals("Cogito</tnk>Ergo sum", builder.consume_rest());
  81. }
  82. {
  83. common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, {
  84. /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
  85. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  86. /* .reasoning_in_content = */ false,
  87. /* .thinking_forced_open = */ true,
  88. });
  89. assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
  90. assert_equals(std::string("Cogito"), builder.result().reasoning_content);
  91. assert_equals("Ergo sum", builder.consume_rest());
  92. }
  93. {
  94. common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, {
  95. /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
  96. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  97. /* .reasoning_in_content = */ true,
  98. /* .thinking_forced_open = */ true,
  99. });
  100. assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
  101. assert_equals("<think>Cogito</think>", builder.result().content);
  102. assert_equals("Ergo sum", builder.consume_rest());
  103. }
  104. // Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
  105. {
  106. common_chat_syntax syntax = {
  107. /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
  108. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  109. /* .reasoning_in_content = */ false,
  110. /* .thinking_forced_open = */ true,
  111. /* .parse_tool_calls = */ true,
  112. };
  113. const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
  114. common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, syntax);
  115. assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
  116. assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
  117. assert_equals(variant, std::string("ok"), builder.consume_rest());
  118. }
  119. // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
  120. {
  121. common_chat_syntax syntax = {
  122. /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
  123. /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
  124. /* .reasoning_in_content = */ false,
  125. /* .thinking_forced_open = */ true,
  126. /* .parse_tool_calls = */ true,
  127. };
  128. const std::string variant("deepseek_v3_1_reasoning_format_none");
  129. const std::string input = "REASONING</think>ok";
  130. auto msg = common_chat_parse(input, false, syntax);
  131. assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
  132. assert_equals(variant, std::string(""), msg.reasoning_content);
  133. }
  134. }
  135. static void test_regex() {
  136. auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") {
  137. common_chat_msg_parser builder(input, /* is_partial= */ false, {});
  138. assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern);
  139. };
  140. test_throws("Hello, world!", "abc", "^abc$");
  141. test_throws("Hello, world!", "e", "^e$");
  142. {
  143. common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
  144. builder.consume_regex(common_regex("Hello"));
  145. assert_equals(", world!", builder.consume_rest());
  146. }
  147. {
  148. // When in non partial mode, we can say whether the regex was consumed or not.
  149. common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
  150. assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value());
  151. }
  152. {
  153. common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
  154. auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?"));
  155. assert_equals(true, res.has_value());
  156. // Verify captures
  157. assert_equals<size_t>(2, res->groups.size());
  158. assert_equals("Hell", builder.str(res->groups[0]));
  159. assert_equals("el", builder.str(res->groups[1]));
  160. // Verify position is after the match
  161. assert_equals<size_t>(4, builder.pos());
  162. assert_equals("o,", builder.consume_rest());
  163. }
  164. {
  165. // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception.
  166. common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {});
  167. assert_throws([&]() {
  168. builder.try_consume_regex(common_regex("Hello, world!"));
  169. }, "^Hello, world!$");
  170. }
  171. // Now regardless of the mode, we can tell these aren't a match.
  172. for (const auto is_partial : {false, true}) {
  173. common_chat_msg_parser builder("Hello,", is_partial, {});
  174. assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value());
  175. }
  176. for (const auto is_partial : {false, true}) {
  177. common_chat_msg_parser builder("Hello,", is_partial, {});
  178. assert_equals(false, builder.try_consume_literal("Oh"));
  179. }
  180. }
  181. const std::vector<std::string> barely_healable_jsons = {
  182. "{",
  183. "{\"",
  184. "{\"\\",
  185. "{\"n",
  186. "{\"name\"",
  187. "{\"name\":",
  188. "{\"name\":\"",
  189. "{\"name\":\"\\",
  190. "{\"name\":\"python",
  191. "{\"name\":\"python\\",
  192. "{\",",
  193. "{\":",
  194. "{\"[",
  195. "{\"]",
  196. "{\"{",
  197. "{\"}",
  198. "{\"1",
  199. "{\"name\":\",",
  200. "{\"name\":\":",
  201. "{\"name\":\"[",
  202. "{\"name\":\"]",
  203. "{\"name\":\"{",
  204. "{\"name\":\"}",
  205. "{\"name\":\"1",
  206. };
  207. static void test(const std::string & input, bool is_partial, const std::vector<std::vector<std::string>> & args_paths, const std::vector<std::vector<std::string>> & content_paths, const std::string & expected) {
  208. common_chat_msg_parser builder(input, is_partial, {});
  209. auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths);
  210. assert_equals(true, js.has_value());
  211. assert_equals(is_partial, js->is_partial);
  212. assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
  213. }
  214. static void test_deepseek_v3_1_tool_calls() {
  215. //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
  216. // variant: happy path for when it works as the model card says it should
  217. const std::string variant("simple");
  218. common_chat_syntax syntax = {
  219. /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
  220. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  221. /* .reasoning_in_content = */ false,
  222. /* .thinking_forced_open = */ false,
  223. /* .parse_tool_calls = */ true,
  224. };
  225. const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
  226. auto msg = common_chat_parse(input, false, syntax);
  227. assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
  228. assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
  229. // JSON arguments are dumped without spaces
  230. assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
  231. assert_equals(variant, std::string(""), msg.content);
  232. assert_equals(variant, std::string(""), msg.reasoning_content);
  233. // variant: simple + thinking open
  234. {
  235. common_chat_syntax syntax = {
  236. /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
  237. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  238. /* .reasoning_in_content = */ false,
  239. /* .thinking_forced_open = */ true,
  240. /* .parse_tool_calls = */ true,
  241. };
  242. const std::string variant("simple_thinking");
  243. const std::string in = "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
  244. auto m = common_chat_parse(in, false, syntax);
  245. assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
  246. assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
  247. assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
  248. assert_equals(variant, std::string(""), m.content);
  249. assert_equals(variant, std::string("REASONING"), m.reasoning_content);
  250. }
  251. // variant: simple + multiple tool calls
  252. {
  253. common_chat_syntax syntax = {
  254. /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
  255. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  256. /* .reasoning_in_content = */ false,
  257. /* .thinking_forced_open = */ false,
  258. /* .parse_tool_calls = */ true,
  259. };
  260. const std::string variant("simple_multiple_tool_calls");
  261. const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>";
  262. auto m = common_chat_parse(in, false, syntax);
  263. assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
  264. assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
  265. assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
  266. assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
  267. assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
  268. assert_equals(variant, std::string("CONTENT"), m.content);
  269. assert_equals(variant, std::string(""), m.reasoning_content);
  270. }
  271. // variant: thinking forced open + tool call in reasoning content
  272. {
  273. common_chat_syntax syntax = {
  274. /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
  275. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  276. /* .reasoning_in_content = */ false,
  277. /* .thinking_forced_open = */ true,
  278. /* .parse_tool_calls = */ true,
  279. };
  280. const std::string variant("thinking_forced_open_tool_call_in_reasoning");
  281. const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
  282. auto m = common_chat_parse(in, false, syntax);
  283. assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
  284. assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
  285. assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
  286. assert_equals(variant, std::string(""), m.content);
  287. assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content);
  288. }
  289. // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
  290. // This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
  291. // to make tool calls in reasoning content according to the model card, but it does sometimes, so
  292. // add the reasoning content as regular content and parse the tool calls.
  293. {
  294. common_chat_syntax syntax = {
  295. /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
  296. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  297. /* .reasoning_in_content = */ false,
  298. /* .thinking_forced_open = */ true,
  299. /* .parse_tool_calls = */ true,
  300. };
  301. const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
  302. const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
  303. auto m = common_chat_parse(in, false, syntax);
  304. assert_equals(variant, std::string("REASONING"), m.content);
  305. assert_equals(variant, std::string(""), m.reasoning_content);
  306. assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
  307. assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
  308. assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
  309. }
  310. // variant: thinking forced open + tool call in reasoning content + no closing think + partial
  311. {
  312. common_chat_syntax syntax = {
  313. /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
  314. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  315. /* .reasoning_in_content = */ false,
  316. /* .thinking_forced_open = */ true,
  317. /* .parse_tool_calls = */ true,
  318. };
  319. const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
  320. const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
  321. auto m = common_chat_parse(in, /* is_partial= */ true, syntax);
  322. assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), m.reasoning_content);
  323. assert_equals(variant, std::string(""), m.content);
  324. assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
  325. }
  326. // variant: thinking not forced open + reasoning + regular content + no tool calls
  327. {
  328. common_chat_syntax syntax = {
  329. /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
  330. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  331. /* .reasoning_in_content = */ false,
  332. /* .thinking_forced_open = */ true,
  333. /* .parse_tool_calls = */ true,
  334. };
  335. const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
  336. const std::string in = "REASONING</think>CONTENT";
  337. auto m = common_chat_parse(in, false, syntax);
  338. assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
  339. assert_equals(variant, std::string("CONTENT"), m.content);
  340. assert_equals(variant, std::string("REASONING"), m.reasoning_content);
  341. }
  342. // variant: thinking not forced open + missing reasoning + no tool calls
  343. {
  344. common_chat_syntax syntax = {
  345. /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
  346. /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
  347. /* .reasoning_in_content = */ false,
  348. /* .thinking_forced_open = */ false,
  349. /* .parse_tool_calls = */ true,
  350. };
  351. const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
  352. const std::string in = "CONTENT";
  353. auto m = common_chat_parse(in, false, syntax);
  354. assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
  355. assert_equals(variant, std::string("CONTENT"), m.content);
  356. assert_equals(variant, std::string(""), m.reasoning_content);
  357. }
  358. }
  359. static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
  360. common_chat_msg_parser builder(input, parse_as_partial, {});
  361. auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
  362. assert_equals(true, js.has_value());
  363. assert_equals(is_partial, js->is_partial);
  364. assert_equals(expected, js->value.dump());
  365. }
  366. static void test_json_with_dumped_args_no_args() {
  367. // Normal JSON, nothing to heal, nothing to dump
  368. test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}");
  369. // Full json is args
  370. test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}");
  371. // If the arguments are further down, don't heal partial content.
  372. for (const auto & src : barely_healable_jsons) {
  373. test(src, true, {{"arguments"}}, {}, "{}");
  374. }
  375. // But heal content that isn't partial.
  376. test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}");
  377. }
  378. static void test_json_with_dumped_args() {
  379. // Partial content.
  380. test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}");
  381. test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}");
  382. test("{\"content\": ", true, {}, {{"content"}}, "{}");
  383. // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted).
  384. test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python");
  385. for (const auto & src : barely_healable_jsons) {
  386. test(src, true, {{}}, {}, src);
  387. }
  388. // Full JSON w/ args
  389. for (auto parse_as_partial : {true, false}) {
  390. test_with_args(
  391. R"({"name": "python", "args": {"arg1": 1}})",
  392. R"({"name":"python","args":"{\"arg1\":1}"})",
  393. parse_as_partial,
  394. /* is_partial= */ false
  395. );
  396. }
  397. // Partial JSON w/ partial args
  398. test_with_args(
  399. R"({"foo": "bar", "args": {")",
  400. R"({"foo":"bar","args":"{\""})"
  401. );
  402. // Partial args broken in object key
  403. test_with_args(
  404. R"({"foo": "bar", "args": {"ar)",
  405. R"({"foo":"bar","args":"{\"ar"})"
  406. );
  407. // Partial args broken after object key
  408. test_with_args(
  409. R"({"foo": "bar", "args": {"arg1")",
  410. R"({"foo":"bar","args":"{\"arg1\""})"
  411. );
  412. // Partial args broken before object value
  413. test_with_args(
  414. R"({"foo": "bar", "args": {"arg1":)",
  415. R"({"foo":"bar","args":"{\"arg1\":"})"
  416. );
  417. // Partial args broken before object value (space)
  418. test_with_args(
  419. R"({"foo": "bar", "args": {"arg1": )",
  420. R"({"foo":"bar","args":"{\"arg1\":"})"
  421. );
  422. // Partial args broken in object value that may not be complete (int)
  423. test_with_args(
  424. R"({"foo": "bar", "args": {"arg1": 1)",
  425. R"({"foo":"bar","args":"{\"arg1\":"})"
  426. );
  427. // Partial args broken in object value that is complete (int)
  428. test_with_args(
  429. R"({"foo": "bar", "args": {"arg1": 1 )",
  430. R"({"foo":"bar","args":"{\"arg1\":1"})"
  431. );
  432. // Partial args broken in object value that is incomplete (string)
  433. test_with_args(
  434. R"({"foo": "bar", "args": {"arg1": ")",
  435. R"({"foo":"bar","args":"{\"arg1\":\""})"
  436. );
  437. // Partial args broken in object value that is complete (string)
  438. test_with_args(
  439. R"({"foo": "bar", "args": {"arg1": "1")",
  440. R"({"foo":"bar","args":"{\"arg1\":\"1\""})"
  441. );
  442. // Partial args broken on array opening
  443. test_with_args(
  444. R"({"foo": "bar", "args": [)",
  445. R"({"foo":"bar","args":"["})"
  446. );
  447. // Partial args broken on array value that is incomplete (int)
  448. test_with_args(
  449. R"({"foo": "bar", "args": [1)",
  450. R"({"foo":"bar","args":"["})"
  451. );
  452. // Partial args broken on array value that is complete (int)
  453. test_with_args(
  454. R"({"foo": "bar", "args": [1 )",
  455. R"({"foo":"bar","args":"[1"})"
  456. );
  457. // Partial args broken on array value that is complete (string)
  458. test_with_args(
  459. R"({"foo": "bar", "args": ["1")",
  460. R"({"foo":"bar","args":"[\"1\""})"
  461. );
  462. // Partial args broken after array value
  463. test_with_args(
  464. R"({"foo": "bar", "args": [1,)",
  465. R"({"foo":"bar","args":"[1,"})"
  466. );
  467. // Partial args broken on nested array
  468. test_with_args(
  469. R"({"foo": "bar", "args": {"arg1": [)",
  470. R"({"foo":"bar","args":"{\"arg1\":["})"
  471. );
  472. }
  473. static void test_positions() {
  474. {
  475. common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
  476. assert_equals<size_t>(0, builder.pos());
  477. assert_throws([&]() { builder.move_to(100); });
  478. assert_equals<size_t>(0, builder.pos());
  479. assert_throws([&]() { builder.move_back(1); });
  480. assert_equals<size_t>(0, builder.pos());
  481. builder.move_to(8);
  482. assert_equals<size_t>(8, builder.pos());
  483. builder.move_back(1);
  484. assert_equals<size_t>(7, builder.pos());
  485. assert_equals("world!", builder.consume_rest());
  486. builder.move_to(0);
  487. assert_equals<size_t>(0, builder.pos());
  488. assert_throws([&]() { builder.finish(); });
  489. assert_equals<size_t>(0, builder.pos());
  490. builder.move_to(builder.input().size());
  491. builder.finish();
  492. }
  493. {
  494. common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {});
  495. builder.move_to(builder.input().size());
  496. assert_equals<size_t>(builder.input().size(), builder.pos());
  497. builder.finish();
  498. }
  499. }
  500. int main() {
  501. test_positions();
  502. test_json_with_dumped_args_no_args();
  503. test_json_with_dumped_args();
  504. test_reasoning();
  505. test_regex();
  506. test_deepseek_v3_1_tool_calls();
  507. std::cout << "All tests passed!\n";
  508. return 0;
  509. }