test-basic.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. #include "tests.h"
  2. void test_basic(testing & t) {
  3. t.test("chars", [](testing & t) {
  4. // Test common escape sequences - newline
  5. t.test("escape_sequence_newline", [](testing &t) {
  6. auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("[\\n\\t\\\\]"); });
  7. common_peg_parse_context ctx;
  8. common_peg_parse_result result;
  9. ctx = common_peg_parse_context("\n");
  10. result = common_chat_combinator_parser.parse(ctx);
  11. t.assert_equal("escape_sequence_newline", true, result.success());
  12. });
  13. // Test common escape sequences - tab
  14. t.test("escape_sequence_tab", [](testing &t) {
  15. auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("[\\n\\t\\\\]"); });
  16. common_peg_parse_context ctx;
  17. common_peg_parse_result result;
  18. ctx = common_peg_parse_context("\t");
  19. result = common_chat_combinator_parser.parse(ctx);
  20. t.assert_equal("escape_sequence_tab", true, result.success());
  21. });
  22. // Test common escape sequences - backslash
  23. t.test("escape_sequence_backslash", [](testing &t) {
  24. auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("[\\n\\t\\\\]"); });
  25. common_peg_parse_context ctx;
  26. common_peg_parse_result result;
  27. ctx = common_peg_parse_context("\\");
  28. result = common_chat_combinator_parser.parse(ctx);
  29. t.assert_equal("escape_sequence_backslash", true, result.success());
  30. });
  31. // Test common escape sequences - space (should ())
  32. t.test("escape_sequence_space_fail", [](testing &t) {
  33. auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("[\\n\\t\\\\]"); });
  34. common_peg_parse_context ctx;
  35. common_peg_parse_result result;
  36. ctx = common_peg_parse_context(" ");
  37. result = common_chat_combinator_parser.parse(ctx);
  38. t.assert_equal("escape_sequence_space_fail", true, result.fail());
  39. });
  40. // Test escaped dash - 'a' should succeed
  41. t.test("escaped_dash_a", [](testing &t) {
  42. auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("[a\\-z]"); });
  43. common_peg_parse_context ctx;
  44. common_peg_parse_result result;
  45. ctx = common_peg_parse_context("a");
  46. result = common_chat_combinator_parser.parse(ctx);
  47. t.assert_equal("escaped_dash_a", true, result.success());
  48. });
  49. // Test escaped dash - '-' should succeed (literal dash)
  50. t.test("escaped_dash_literal", [](testing &t) {
  51. auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("[a\\-z]"); });
  52. common_peg_parse_context ctx;
  53. common_peg_parse_result result;
  54. ctx = common_peg_parse_context("-");
  55. result = common_chat_combinator_parser.parse(ctx);
  56. t.assert_equal("escaped_dash_literal", true, result.success());
  57. });
  58. // Test escaped dash - 'z' should succeed
  59. t.test("escaped_dash_z", [](testing &t) {
  60. auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("[a\\-z]"); });
  61. common_peg_parse_context ctx;
  62. common_peg_parse_result result;
  63. ctx = common_peg_parse_context("z");
  64. result = common_chat_combinator_parser.parse(ctx);
  65. t.assert_equal("escaped_dash_z", true, result.success());
  66. });
  67. // Test escaped dash - 'b' should NOT match (since \- is literal dash, not range)
  68. t.test("escaped_dash_b_fail", [](testing &t) {
  69. auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("[a\\-z]"); });
  70. common_peg_parse_context ctx;
  71. common_peg_parse_result result;
  72. ctx = common_peg_parse_context("b");
  73. result = common_chat_combinator_parser.parse(ctx);
  74. t.assert_equal("escaped_dash_b_fail", true, result.fail());
  75. });
  76. });
  77. t.test("optional", [](testing & t) {
  78. // Full match with optional part present
  79. t.test("optional_present", [](testing &t) {
  80. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  81. return p.literal("hello") + p.optional(p.literal(" world"));
  82. });
  83. auto ctx = common_peg_parse_context("hello world");
  84. auto result = parser.parse(ctx);
  85. t.assert_equal("optional_present", true, result.success());
  86. t.assert_equal("optional_present_end", 11u, result.end);
  87. });
  88. // Full match with optional part absent
  89. t.test("optional_absent", [](testing &t) {
  90. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  91. return p.literal("hello") + p.optional(p.literal(" world"));
  92. });
  93. auto ctx = common_peg_parse_context("hello", false);
  94. auto result = parser.parse(ctx);
  95. t.assert_equal("optional_absent", true, result.success());
  96. t.assert_equal("optional_absent_end", 5u, result.end);
  97. });
  98. // Partial match - waiting for more input to determine if optional matches
  99. t.test("partial_match_need_more", [](testing &t) {
  100. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  101. return p.literal("hello") + p.optional(p.literal(" world"));
  102. });
  103. auto ctx = common_peg_parse_context("hello ", true);
  104. auto result = parser.parse(ctx);
  105. t.assert_equal("partial_match_need_more", true, result.need_more_input());
  106. });
  107. });
  108. t.test("partial parsing", [](testing & t) {
  109. // Literals - Basic Success
  110. t.test("literal_success", [&](testing & t) {
  111. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("hello"); });
  112. common_peg_parse_context ctx;
  113. common_peg_parse_result result;
  114. ctx = common_peg_parse_context("hello");
  115. result = parser.parse(ctx);
  116. t.assert_equal("literal_success", true, result.success());
  117. });
  118. // Char Classes - Basic Lowercase Success
  119. t.test("char_class_lowercase_success", [&](testing & t) {
  120. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("a-z"); });
  121. common_peg_parse_context ctx;
  122. common_peg_parse_result result;
  123. ctx = common_peg_parse_context("a");
  124. result = parser.parse(ctx);
  125. t.assert_equal("char_class_lowercase_success", true, result.success());
  126. });
  127. // Char Classes - Uppercase Fail
  128. t.test("char_class_uppercase_fail", [&](testing & t) {
  129. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("a-z"); });
  130. common_peg_parse_context ctx;
  131. common_peg_parse_result result;
  132. ctx = common_peg_parse_context("A");
  133. result = parser.parse(ctx);
  134. t.assert_equal("char_class_uppercase_fail", true, result.fail());
  135. });
  136. // Char Classes with Dash - Lowercase Success
  137. t.test("char_class_with_dash_lowercase", [&](testing & t) {
  138. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("a-z-"); });
  139. common_peg_parse_context ctx;
  140. common_peg_parse_result result;
  141. ctx = common_peg_parse_context("f");
  142. result = parser.parse(ctx);
  143. t.assert_equal("char_class_with_dash_lowercase", true, result.success());
  144. });
  145. // Char Classes with Dash - Literal Dash Success
  146. t.test("char_class_with_dash_literal_dash", [&](testing & t) {
  147. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("a-z-"); });
  148. common_peg_parse_context ctx;
  149. common_peg_parse_result result;
  150. ctx = common_peg_parse_context("-");
  151. result = parser.parse(ctx);
  152. t.assert_equal("char_class_with_dash_literal_dash", true, result.success());
  153. });
  154. // Char Classes with Dash - Uppercase Fail
  155. t.test("char_class_with_dash_uppercase_fail", [&](testing & t) {
  156. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.chars("a-z-"); });
  157. common_peg_parse_context ctx;
  158. common_peg_parse_result result;
  159. ctx = common_peg_parse_context("A");
  160. result = parser.parse(ctx);
  161. t.assert_equal("char_class_with_dash_uppercase_fail", true, result.fail());
  162. });
  163. // Sequences - Partial Match 1
  164. t.test("sequence_partial_match_1", [&](testing & t) {
  165. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("<think>") + p.literal("</think>"); });
  166. auto ctx = common_peg_parse_context("<thi", true);
  167. auto result = parser.parse(ctx);
  168. t.assert_equal("sequence_partial_match_1", true, result.need_more_input());
  169. });
  170. // Sequences - Partial Match 2
  171. t.test("sequence_partial_match_2", [&](testing & t) {
  172. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("begin") + p.literal("end"); });
  173. auto ctx = common_peg_parse_context("begin", true);
  174. auto result = parser.parse(ctx);
  175. t.assert_equal("sequence_partial_match_2", true, result.need_more_input());
  176. });
  177. // Sequences - Partial Match 3
  178. t.test("sequence_partial_match_3", [&](testing & t) {
  179. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("<think>") + p.literal("</think>"); });
  180. auto ctx = common_peg_parse_context("<think></", true);
  181. auto result = parser.parse(ctx);
  182. t.assert_equal("sequence_partial_match_3", true, result.need_more_input());
  183. });
  184. // Sequences - Full Match
  185. t.test("sequence_full_match", [&](testing & t) {
  186. auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("hello") + p.literal("world"); });
  187. auto ctx = common_peg_parse_context("helloworld", false);
  188. auto result = common_chat_combinator_parser.parse(ctx);
  189. t.assert_equal("sequence_full_match", true, result.success());
  190. });
  191. // Sequences - No Match
  192. t.test("sequence_no_match", [&](testing & t) {
  193. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("<think>") + p.literal("</think>"); });
  194. auto ctx = common_peg_parse_context("<think>I am common_chat_combinator_parser", true);
  195. auto result = parser.parse(ctx);
  196. t.assert_equal("sequence_no_match", true, result.fail());
  197. });
  198. // Choices - Partial Match 1
  199. t.test("choices_partial_match_1", [&](testing & t) {
  200. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("option1") | p.literal("option2"); });
  201. auto ctx = common_peg_parse_context("opt", true);
  202. auto result = parser.parse(ctx);
  203. t.assert_equal("choices_partial_match_1", true, result.need_more_input());
  204. });
  205. // Choices - Partial Match 2
  206. t.test("choices_partial_match_2", [&](testing & t) {
  207. auto parser =
  208. build_peg_parser([](common_peg_parser_builder & p) { return p.literal("choice_a") | p.literal("choice_b"); });
  209. auto ctx = common_peg_parse_context("choice", true);
  210. auto result = parser.parse(ctx);
  211. t.assert_equal("choices_partial_match_2", true, result.need_more_input());
  212. });
  213. // Choices - Full Match 1
  214. t.test("choices_full_match_1", [&](testing & t) {
  215. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("first") | p.literal("second"); });
  216. auto ctx = common_peg_parse_context("first", false);
  217. auto result = parser.parse(ctx);
  218. t.assert_equal("choices_full_match_1", true, result.success());
  219. });
  220. // Choices - Full Match 2
  221. t.test("choices_full_match_2", [&](testing & t) {
  222. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("alpha") | p.literal("beta"); });
  223. auto ctx = common_peg_parse_context("beta", false);
  224. auto result = parser.parse(ctx);
  225. t.assert_equal("choices_full_match_2", true, result.success());
  226. });
  227. // Choices - No Match
  228. t.test("choices_no_match", [&](testing & t) {
  229. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("good") | p.literal("better"); });
  230. auto ctx = common_peg_parse_context("best", false);
  231. auto result = parser.parse(ctx);
  232. t.assert_equal("choices_no_match", true, result.fail());
  233. });
  234. // Zero or More - Partial Match 1
  235. t.test("zero_or_more_partial_match_1", [&](testing & t) {
  236. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.zero_or_more(p.literal("ab")); });
  237. auto ctx = common_peg_parse_context("a", true);
  238. auto result = parser.parse(ctx);
  239. t.assert_equal("zero_or_more_partial_match_1", true, result.need_more_input());
  240. });
  241. // Zero or More - Partial Match 2
  242. t.test("zero_or_more_partial_match_2", [&](testing & t) {
  243. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.zero_or_more(p.literal("xy")); });
  244. auto ctx = common_peg_parse_context("xyx", true);
  245. auto result = parser.parse(ctx);
  246. t.assert_equal("zero_or_more_partial_match_2", true, result.need_more_input());
  247. });
  248. // Zero or More - Full Match
  249. t.test("zero_or_more_full_match", [&](testing & t) {
  250. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.zero_or_more(p.literal("test")); });
  251. auto ctx = common_peg_parse_context("test", false);
  252. auto result = parser.parse(ctx);
  253. t.assert_equal("zero_or_more_full_match", true, result.success());
  254. });
  255. // One or More - Partial Match 1
  256. t.test("one_or_more_partial_match_1", [&](testing & t) {
  257. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("repeat")); });
  258. auto ctx = common_peg_parse_context("rep", true);
  259. auto result = parser.parse(ctx);
  260. t.assert_equal("one_or_more_partial_match_1", true, result.need_more_input());
  261. });
  262. // One or More - Partial Match 2
  263. t.test("one_or_more_partial_match_2", [&](testing & t) {
  264. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("ab")); });
  265. auto ctx = common_peg_parse_context("aba", true);
  266. auto result = parser.parse(ctx);
  267. t.assert_equal("one_or_more_partial_match_2", true, result.need_more_input());
  268. });
  269. // One or More - Full Match
  270. t.test("one_or_more_full_match", [&](testing & t) {
  271. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("single")); });
  272. auto ctx = common_peg_parse_context("single", false);
  273. auto result = parser.parse(ctx);
  274. t.assert_equal("one_or_more_full_match", true, result.success());
  275. });
  276. // One or More - No Match
  277. t.test("one_or_more_no_match", [&](testing & t) {
  278. auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("()")); });
  279. auto ctx = common_peg_parse_context("success", false);
  280. auto result = parser.parse(ctx);
  281. t.assert_equal("one_or_more_no_match", true, result.fail());
  282. });
  283. });
  284. t.test("recursive rules", [](testing &t) {
  285. // Test simple number
  286. t.test("simple_number", [](testing &t) {
  287. auto value_parser = build_peg_parser([](common_peg_parser_builder & p) {
  288. p.rule("number", p.chars("0-9"));
  289. p.rule("list", p.literal("[") + p.ref("value") + p.literal("]"));
  290. return p.rule("value", p.ref("number") | p.ref("list"));
  291. });
  292. common_peg_parse_context ctx("1", false);
  293. auto result = value_parser.parse(ctx);
  294. t.assert_equal("result_is_success", true, result.success());
  295. });
  296. // Test simple list
  297. t.test("simple_list", [](testing &t) {
  298. auto value_parser = build_peg_parser([](common_peg_parser_builder & p) {
  299. p.rule("number", p.chars("0-9"));
  300. p.rule("list", p.literal("[") + p.ref("value") + p.literal("]"));
  301. return p.rule("value", p.ref("number") | p.ref("list"));
  302. });
  303. common_peg_parse_context ctx("[1]", false);
  304. auto result = value_parser.parse(ctx);
  305. t.assert_equal("result_is_success", true, result.success());
  306. });
  307. // Test nested list
  308. t.test("nested_list", [](testing &t) {
  309. auto value_parser = build_peg_parser([](common_peg_parser_builder & p) {
  310. p.rule("number", p.chars("0-9"));
  311. p.rule("list", p.literal("[") + p.ref("value") + p.literal("]"));
  312. return p.rule("value", p.ref("number") | p.ref("list"));
  313. });
  314. common_peg_parse_context ctx("[[2]]", false);
  315. auto result = value_parser.parse(ctx);
  316. t.assert_equal("result_is_success", true, result.success());
  317. });
  318. // Test deeply nested list
  319. t.test("deeply_nested_list", [](testing &t) {
  320. auto value_parser = build_peg_parser([](common_peg_parser_builder & p) {
  321. p.rule("number", p.chars("0-9"));
  322. p.rule("list", p.literal("[") + p.ref("value") + p.literal("]"));
  323. return p.rule("value", p.ref("number") | p.ref("list"));
  324. });
  325. common_peg_parse_context ctx("[[[3]]]", false);
  326. auto result = value_parser.parse(ctx);
  327. t.assert_equal("result_is_success", true, result.success());
  328. });
  329. // Test need_more_input match
  330. t.test("need_more_input_match", [](testing &t) {
  331. auto value_parser = build_peg_parser([](common_peg_parser_builder & p) {
  332. p.rule("number", p.chars("0-9"));
  333. p.rule("list", p.literal("[") + p.ref("value") + p.literal("]"));
  334. return p.rule("value", p.ref("number") | p.ref("list"));
  335. });
  336. common_peg_parse_context ctx("[[", true);
  337. auto result = value_parser.parse(ctx);
  338. t.assert_equal("result_is_need_more_input", true, result.need_more_input());
  339. });
  340. // Test no match
  341. t.test("no_match", [](testing &t) {
  342. auto value_parser = build_peg_parser([](common_peg_parser_builder & p) {
  343. p.rule("number", p.chars("0-9"));
  344. p.rule("list", p.literal("[") + p.ref("value") + p.literal("]"));
  345. return p.rule("value", p.ref("number") | p.ref("list"));
  346. });
  347. common_peg_parse_context ctx("[a]", false);
  348. auto result = value_parser.parse(ctx);
  349. t.assert_equal("result_is_fail", true, result.fail());
  350. });
  351. });
  352. }