1
0

test-gbnf-generation.cpp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. #include "tests.h"
  2. #include "json-schema-to-grammar.h"
  3. #include <regex>
  4. static std::string trim_leading_space(const std::string & s) {
  5. static const std::regex leading_ws_re = std::regex(R"((^|\n)\s+)");
  6. return std::regex_replace(s, leading_ws_re, "$1");
  7. }
  8. static void assert_gbnf_equal(testing & t, const std::string & expected, const std::string & actual) {
  9. t.assert_equal("gbnf are equal", trim_leading_space(expected), trim_leading_space(actual));
  10. }
  11. void test_gbnf_generation(testing &t) {
  12. t.test("literal grammar generation", [](testing &t) {
  13. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  14. return p.literal("hello");
  15. });
  16. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  17. parser.build_grammar(builder);
  18. });
  19. assert_gbnf_equal(t, R"""(
  20. root ::= "hello"
  21. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  22. )""", gbnf);
  23. });
  24. t.test("char class grammar", [](testing &t) {
  25. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  26. return p.chars("[a-z]", 1, 1);
  27. });
  28. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  29. parser.build_grammar(builder);
  30. });
  31. assert_gbnf_equal(t, R"""(
  32. root ::= [a-z]
  33. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  34. )""", gbnf);
  35. });
  36. t.test("sequence grammar", [](testing &t) {
  37. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  38. return p.literal("hello") + p.literal(" ") + p.literal("world");
  39. });
  40. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  41. parser.build_grammar(builder);
  42. });
  43. assert_gbnf_equal(t, R"""(
  44. root ::= "hello" " " "world"
  45. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  46. )""", gbnf);
  47. });
  48. t.test("choice grammar", [](testing &t) {
  49. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  50. return p.literal("cat") | p.literal("dog");
  51. });
  52. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  53. parser.build_grammar(builder);
  54. });
  55. assert_gbnf_equal(t, R"""(
  56. root ::= "cat" | "dog"
  57. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  58. )""", gbnf);
  59. });
  60. t.test("one_or_more grammar", [](testing &t) {
  61. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  62. return p.one_or_more(p.literal("a"));
  63. });
  64. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  65. parser.build_grammar(builder);
  66. });
  67. assert_gbnf_equal(t, R"""(
  68. root ::= "a"+
  69. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  70. )""", gbnf);
  71. });
  72. t.test("zero_or_more grammar", [](testing &t) {
  73. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  74. return p.zero_or_more(p.literal("a"));
  75. });
  76. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  77. parser.build_grammar(builder);
  78. });
  79. assert_gbnf_equal(t, R"""(
  80. root ::= "a"*
  81. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  82. )""", gbnf);
  83. });
  84. t.test("optional grammar", [](testing &t) {
  85. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  86. return p.literal("hello") + p.optional(p.literal(" world"));
  87. });
  88. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  89. parser.build_grammar(builder);
  90. });
  91. assert_gbnf_equal(t, R"""(
  92. root ::= "hello" " world"?
  93. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  94. )""", gbnf);
  95. });
  96. t.test("until grammar", [](testing &t) {
  97. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  98. return p.until("</tag>");
  99. });
  100. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  101. parser.build_grammar(builder);
  102. });
  103. assert_gbnf_equal(t, R"""(
  104. root ::= ([^<] | "<" [^/] | "</" [^t] | "</t" [^a] | "</ta" [^g] | "</tag" [^>])*
  105. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  106. )""", gbnf);
  107. });
  108. t.test("complex expressions with parentheses", [](testing &t) {
  109. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  110. return p.one_or_more(p.literal("a") | p.literal("b"));
  111. });
  112. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  113. parser.build_grammar(builder);
  114. });
  115. assert_gbnf_equal(t, R"""(
  116. root ::= ("a" | "b")+
  117. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  118. )""", gbnf);
  119. });
  120. t.test("rule references", [](testing &t) {
  121. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  122. auto digit = p.rule("digit", p.chars("[0-9]", 1, 1));
  123. return p.one_or_more(digit);
  124. });
  125. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  126. parser.build_grammar(builder);
  127. });
  128. assert_gbnf_equal(t, R"""(
  129. digit ::= [0-9]
  130. root ::= digit+
  131. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  132. )""", gbnf);
  133. });
  134. t.test("escaping in literals", [](testing &t) {
  135. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  136. return p.literal("hello\nworld\n!");
  137. });
  138. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  139. parser.build_grammar(builder);
  140. });
  141. assert_gbnf_equal(t, R"""(
  142. root ::= "hello\nworld\n!"
  143. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  144. )""", gbnf);
  145. });
  146. t.test("operator<< (whitespace insertion)", [](testing &t) {
  147. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  148. return p.literal("hello") << p.literal("world");
  149. });
  150. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  151. parser.build_grammar(builder);
  152. });
  153. assert_gbnf_equal(t, R"""(
  154. root ::= "hello" space "world"
  155. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  156. )""", gbnf);
  157. });
  158. t.test("emit only reachable rules", [](testing &t) {
  159. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  160. p.rule("orphan", p.literal("orphan"));
  161. return p.literal("hello") + p.rule("child", p.literal(" world"));
  162. });
  163. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  164. parser.build_grammar(builder);
  165. });
  166. assert_gbnf_equal(t, R"""(
  167. child ::= " world"
  168. root ::= "hello" child
  169. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  170. )""", gbnf);
  171. });
  172. t.test("emit only trigger rules (and references)", [](testing &t) {
  173. auto parser = build_peg_parser([](common_peg_parser_builder & p) {
  174. auto rule1 = p.rule("rule-1", p.literal("a") + p.ref("rule-2"));
  175. p.rule("rule-2", p.literal("b") + p.ref("rule-3"), true);
  176. p.rule("rule-3", p.literal("c") + p.ref("rule-4"));
  177. p.rule("rule-4", p.literal("d"), true);
  178. return rule1;
  179. });
  180. auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
  181. parser.build_grammar(builder);
  182. });
  183. assert_gbnf_equal(t, R"""(
  184. root ::= rule-1
  185. rule-1 ::= "a" rule-2
  186. rule-2 ::= "b" rule-3
  187. rule-3 ::= "c" rule-4
  188. rule-4 ::= "d"
  189. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  190. )""", gbnf);
  191. auto gbnf_lazy = build_grammar([&](const common_grammar_builder & builder) {
  192. parser.build_grammar(builder, true);
  193. });
  194. assert_gbnf_equal(t, R"""(
  195. root ::= rule-2 | rule-4
  196. rule-2 ::= "b" rule-3
  197. rule-3 ::= "c" rule-4
  198. rule-4 ::= "d"
  199. space ::= | " " | "\n"{1,2} [ \t]{0,20}
  200. )""", gbnf_lazy);
  201. });
  202. }