1
0

test-grammar-parser.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #include "llama.h"
  5. // TODO: shold not include libllama sources
  6. #include "../src/llama-grammar.h"
  7. #include <cassert>
  8. static const char * type_str(llama_gretype type) {
  9. switch (type) {
  10. case LLAMA_GRETYPE_CHAR: return "LLAMA_GRETYPE_CHAR";
  11. case LLAMA_GRETYPE_CHAR_NOT: return "LLAMA_GRETYPE_CHAR_NOT";
  12. case LLAMA_GRETYPE_CHAR_ALT: return "LLAMA_GRETYPE_CHAR_ALT";
  13. case LLAMA_GRETYPE_CHAR_RNG_UPPER: return "LLAMA_GRETYPE_CHAR_RNG_UPPER";
  14. case LLAMA_GRETYPE_RULE_REF: return "LLAMA_GRETYPE_RULE_REF";
  15. case LLAMA_GRETYPE_ALT: return "LLAMA_GRETYPE_ALT";
  16. case LLAMA_GRETYPE_END: return "LLAMA_GRETYPE_END";
  17. default: return "?";
  18. }
  19. }
  20. static void verify_parsing(const char *grammar_bytes, const std::vector<std::pair<std::string, uint32_t>> expected, const std::vector<llama_grammar_element> &expected_rules) {
  21. uint32_t index = 0;
  22. llama_grammar_parser parsed_grammar;
  23. parsed_grammar.parse(grammar_bytes);
  24. std::map<uint32_t, std::string> symbol_names;
  25. for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it) {
  26. symbol_names[it->second] = it->first;
  27. }
  28. auto print_all = [&]() {
  29. fprintf(stderr, " verify_parsing(R\"\"\"(%s)\"\"\", {\n", grammar_bytes);
  30. for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it) {
  31. fprintf(stderr, " {\"%s\", %u},\n", it->first.c_str(), it->second);
  32. }
  33. fprintf(stderr, " }, {\n");
  34. for (size_t i_rule = 0; i_rule < parsed_grammar.rules.size(); i_rule++) {
  35. fprintf(stderr, " // %s (index %zu)\n", symbol_names[i_rule].c_str(), i_rule);
  36. auto & rule = parsed_grammar.rules[i_rule];
  37. for (uint32_t i = 0; i < rule.size(); i++) {
  38. std::string rule_str;
  39. fprintf(stderr, " {%s, ", type_str(rule[i].type));
  40. if (rule[i].type == LLAMA_GRETYPE_CHAR || rule[i].type == LLAMA_GRETYPE_CHAR_ALT ||
  41. rule[i].type == LLAMA_GRETYPE_CHAR_NOT || rule[i].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
  42. char c = rule[i].value;
  43. if (c == '\n') {
  44. fprintf(stderr, "'\\n'");
  45. } else if (c == '\t') {
  46. fprintf(stderr, "'\\t'");
  47. } else if (c == '\r') {
  48. fprintf(stderr, "'\\r'");
  49. } else if (c == '\0') {
  50. fprintf(stderr, "'\\0'");
  51. } else {
  52. fprintf(stderr, "'%c'", c);
  53. }
  54. } else if (rule[i].type == LLAMA_GRETYPE_RULE_REF) {
  55. fprintf(stderr, "/* %s */ %u", symbol_names[rule[i].value].c_str(), rule[i].value);
  56. } else {
  57. fprintf(stderr, "%u", rule[i].value);
  58. }
  59. fprintf(stderr, "},\n");
  60. }
  61. }
  62. fprintf(stderr, " });\n");
  63. };
  64. if (getenv("TEST_GRAMMAR_PARSER_PRINT_ALL")) {
  65. print_all();
  66. fprintf(stderr, "\n");
  67. return;
  68. }
  69. fprintf(stderr, "Testing grammar:%s\n", grammar_bytes);
  70. if (parsed_grammar.symbol_ids.size() != expected.size()) {
  71. fprintf(stderr, "Code to update expectation (set TEST_GRAMMAR_PARSER_PRINT_ALL=1 to print all):\n");
  72. print_all();
  73. assert(parsed_grammar.symbol_ids.size() == expected.size());
  74. }
  75. for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
  76. {
  77. std::string key = it->first;
  78. uint32_t value = it->second;
  79. std::pair<std::string, uint32_t> expected_pair = expected[index];
  80. // pretty print error message before asserting
  81. if (expected_pair.first != key || expected_pair.second != value)
  82. {
  83. fprintf(stderr, "index: %u\n", index);
  84. fprintf(stderr, "expected_pair: %s, %u\n", expected_pair.first.c_str(), expected_pair.second);
  85. fprintf(stderr, "actual_pair: %s, %u\n", key.c_str(), value);
  86. fprintf(stderr, "expected_pair != actual_pair\n");
  87. fprintf(stderr, "Code to update expectation (set TEST_GRAMMAR_PARSER_PRINT_ALL=1 to print all):\n");
  88. print_all();
  89. }
  90. assert(expected_pair.first == key && expected_pair.second == value);
  91. index++;
  92. }
  93. index = 0;
  94. for (auto rule : parsed_grammar.rules)
  95. {
  96. // compare rule to expected rule
  97. for (uint32_t i = 0; i < rule.size(); i++)
  98. {
  99. llama_grammar_element element = rule[i];
  100. llama_grammar_element expected_element = expected_rules[index];
  101. // pretty print error message before asserting
  102. if (expected_element.type != element.type || expected_element.value != element.value)
  103. {
  104. fprintf(stderr, "index: %u\n", index);
  105. fprintf(stderr, "expected_element: %s, %u\n", type_str(expected_element.type), expected_element.value);
  106. fprintf(stderr, "actual_element: %s, %u\n", type_str(element.type), element.value);
  107. fprintf(stderr, "expected_element != actual_element\n");
  108. fprintf(stderr, "all elements:\n");
  109. fprintf(stderr, "Code to update expectation (set TEST_GRAMMAR_PARSER_PRINT_ALL=1 to print all):\n");
  110. print_all();
  111. }
  112. assert(expected_element.type == element.type && expected_element.value == element.value);
  113. index++;
  114. }
  115. }
  116. }
  117. static void verify_failure(const char * grammar_bytes) {
  118. fprintf(stderr, "Testing expected failure:%s\n", grammar_bytes);
  119. llama_grammar_parser result;
  120. result.parse(grammar_bytes);
  121. assert(result.rules.empty() && "should have failed");
  122. }
  123. int main()
  124. {
  125. verify_failure(R"""(
  126. root ::= "a"{,}"
  127. )""");
  128. verify_failure(R"""(
  129. root ::= "a"{,10}"
  130. )""");
  131. verify_parsing(R"""(
  132. root ::= "a"
  133. )""", {
  134. {"root", 0},
  135. }, {
  136. // root (index 0)
  137. {LLAMA_GRETYPE_CHAR, 'a'},
  138. {LLAMA_GRETYPE_END, 0},
  139. });
  140. verify_parsing(R"""(
  141. root ::= "a" | [bdx-z] | [^1-3]
  142. )""", {
  143. {"root", 0},
  144. }, {
  145. // root (index 0)
  146. {LLAMA_GRETYPE_CHAR, 'a'},
  147. {LLAMA_GRETYPE_ALT, 0},
  148. {LLAMA_GRETYPE_CHAR, 'b'},
  149. {LLAMA_GRETYPE_CHAR_ALT, 'd'},
  150. {LLAMA_GRETYPE_CHAR_ALT, 'x'},
  151. {LLAMA_GRETYPE_CHAR_RNG_UPPER, 'z'},
  152. {LLAMA_GRETYPE_ALT, 0},
  153. {LLAMA_GRETYPE_CHAR_NOT, '1'},
  154. {LLAMA_GRETYPE_CHAR_RNG_UPPER, '3'},
  155. {LLAMA_GRETYPE_END, 0},
  156. });
  157. verify_parsing(R"""(
  158. root ::= a+
  159. a ::= "a"
  160. )""", {
  161. {"a", 1},
  162. {"root", 0},
  163. {"root_2", 2},
  164. }, {
  165. // root (index 0)
  166. {LLAMA_GRETYPE_RULE_REF, /* a */ 1},
  167. {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
  168. {LLAMA_GRETYPE_END, 0},
  169. // a (index 1)
  170. {LLAMA_GRETYPE_CHAR, 'a'},
  171. {LLAMA_GRETYPE_END, 0},
  172. // root_2 (index 2)
  173. {LLAMA_GRETYPE_RULE_REF, /* a */ 1},
  174. {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
  175. {LLAMA_GRETYPE_ALT, 0},
  176. {LLAMA_GRETYPE_END, 0},
  177. });
  178. verify_parsing(R"""(
  179. root ::= "a"+
  180. )""", {
  181. {"root", 0},
  182. {"root_1", 1},
  183. }, {
  184. // root (index 0)
  185. {LLAMA_GRETYPE_CHAR, 'a'},
  186. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  187. {LLAMA_GRETYPE_END, 0},
  188. // root_1 (index 1)
  189. {LLAMA_GRETYPE_CHAR, 'a'},
  190. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  191. {LLAMA_GRETYPE_ALT, 0},
  192. {LLAMA_GRETYPE_END, 0},
  193. });
  194. verify_parsing(R"""(
  195. root ::= a?
  196. a ::= "a"
  197. )""", {
  198. {"a", 1},
  199. {"root", 0},
  200. {"root_2", 2},
  201. }, {
  202. // root (index 0)
  203. {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
  204. {LLAMA_GRETYPE_END, 0},
  205. // a (index 1)
  206. {LLAMA_GRETYPE_CHAR, 'a'},
  207. {LLAMA_GRETYPE_END, 0},
  208. // root_2 (index 2)
  209. {LLAMA_GRETYPE_RULE_REF, /* a */ 1},
  210. {LLAMA_GRETYPE_ALT, 0},
  211. {LLAMA_GRETYPE_END, 0},
  212. });
  213. verify_parsing(R"""(
  214. root ::= "a"?
  215. )""", {
  216. {"root", 0},
  217. {"root_1", 1},
  218. }, {
  219. // root (index 0)
  220. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  221. {LLAMA_GRETYPE_END, 0},
  222. // root_1 (index 1)
  223. {LLAMA_GRETYPE_CHAR, 'a'},
  224. {LLAMA_GRETYPE_ALT, 0},
  225. {LLAMA_GRETYPE_END, 0},
  226. });
  227. verify_parsing(R"""(
  228. root ::= a*
  229. a ::= "a"
  230. )""", {
  231. {"a", 1},
  232. {"root", 0},
  233. {"root_2", 2},
  234. }, {
  235. // root (index 0)
  236. {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
  237. {LLAMA_GRETYPE_END, 0},
  238. // a (index 1)
  239. {LLAMA_GRETYPE_CHAR, 'a'},
  240. {LLAMA_GRETYPE_END, 0},
  241. // root_2 (index 2)
  242. {LLAMA_GRETYPE_RULE_REF, /* a */ 1},
  243. {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
  244. {LLAMA_GRETYPE_ALT, 0},
  245. {LLAMA_GRETYPE_END, 0},
  246. });
  247. verify_parsing(R"""(
  248. root ::= "a"*
  249. )""", {
  250. {"root", 0},
  251. {"root_1", 1},
  252. }, {
  253. // root (index 0)
  254. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  255. {LLAMA_GRETYPE_END, 0},
  256. // root_1 (index 1)
  257. {LLAMA_GRETYPE_CHAR, 'a'},
  258. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  259. {LLAMA_GRETYPE_ALT, 0},
  260. {LLAMA_GRETYPE_END, 0},
  261. });
  262. verify_parsing(R"""(
  263. root ::= "a"{2}
  264. )""", {
  265. {"root", 0},
  266. }, {
  267. // root (index 0)
  268. {LLAMA_GRETYPE_CHAR, 'a'},
  269. {LLAMA_GRETYPE_CHAR, 'a'},
  270. {LLAMA_GRETYPE_END, 0},
  271. });
  272. verify_parsing(R"""(
  273. root ::= "a"{2,}
  274. )""", {
  275. {"root", 0},
  276. {"root_1", 1},
  277. }, {
  278. // root (index 0)
  279. {LLAMA_GRETYPE_CHAR, 'a'},
  280. {LLAMA_GRETYPE_CHAR, 'a'},
  281. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  282. {LLAMA_GRETYPE_END, 0},
  283. // root_1 (index 1)
  284. {LLAMA_GRETYPE_CHAR, 'a'},
  285. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  286. {LLAMA_GRETYPE_ALT, 0},
  287. {LLAMA_GRETYPE_END, 0},
  288. });
  289. verify_parsing(R"""(
  290. root ::= "a"{ 4}
  291. )""", {
  292. {"root", 0},
  293. }, {
  294. // root (index 0)
  295. {LLAMA_GRETYPE_CHAR, 'a'},
  296. {LLAMA_GRETYPE_CHAR, 'a'},
  297. {LLAMA_GRETYPE_CHAR, 'a'},
  298. {LLAMA_GRETYPE_CHAR, 'a'},
  299. {LLAMA_GRETYPE_END, 0},
  300. });
  301. verify_parsing(R"""(
  302. root ::= "a"{2,4}
  303. )""", {
  304. {"root", 0},
  305. {"root_1", 1},
  306. {"root_2", 2},
  307. }, {
  308. // root (index 0)
  309. {LLAMA_GRETYPE_CHAR, 'a'},
  310. {LLAMA_GRETYPE_CHAR, 'a'},
  311. {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
  312. {LLAMA_GRETYPE_END, 0},
  313. // root_1 (index 1)
  314. {LLAMA_GRETYPE_CHAR, 'a'},
  315. {LLAMA_GRETYPE_ALT, 0},
  316. {LLAMA_GRETYPE_END, 0},
  317. // root_2 (index 2)
  318. {LLAMA_GRETYPE_CHAR, 'a'},
  319. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  320. {LLAMA_GRETYPE_ALT, 0},
  321. {LLAMA_GRETYPE_END, 0},
  322. });
  323. verify_parsing(R"""(
  324. root ::= (expr "=" term "\n")+
  325. expr ::= term ([-+*/] term)*
  326. term ::= [0-9]+
  327. )""", {
  328. {"expr", 2},
  329. {"expr_5", 5},
  330. {"expr_6", 6},
  331. {"root", 0},
  332. {"root_1", 1},
  333. {"root_4", 4},
  334. {"term", 3},
  335. {"term_7", 7},
  336. }, {
  337. // root (index 0)
  338. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  339. {LLAMA_GRETYPE_RULE_REF, /* root_4 */ 4},
  340. {LLAMA_GRETYPE_END, 0},
  341. // root_1 (index 1)
  342. {LLAMA_GRETYPE_RULE_REF, /* expr */ 2},
  343. {LLAMA_GRETYPE_CHAR, '='},
  344. {LLAMA_GRETYPE_RULE_REF, /* term */ 3},
  345. {LLAMA_GRETYPE_CHAR, '\n'},
  346. {LLAMA_GRETYPE_END, 0},
  347. // expr (index 2)
  348. {LLAMA_GRETYPE_RULE_REF, /* term */ 3},
  349. {LLAMA_GRETYPE_RULE_REF, /* expr_6 */ 6},
  350. {LLAMA_GRETYPE_END, 0},
  351. // term (index 3)
  352. {LLAMA_GRETYPE_CHAR, '0'},
  353. {LLAMA_GRETYPE_CHAR_RNG_UPPER, '9'},
  354. {LLAMA_GRETYPE_RULE_REF, /* term_7 */ 7},
  355. {LLAMA_GRETYPE_END, 0},
  356. // root_4 (index 4)
  357. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  358. {LLAMA_GRETYPE_RULE_REF, /* root_4 */ 4},
  359. {LLAMA_GRETYPE_ALT, 0},
  360. {LLAMA_GRETYPE_END, 0},
  361. // expr_5 (index 5)
  362. {LLAMA_GRETYPE_CHAR, '-'},
  363. {LLAMA_GRETYPE_CHAR_ALT, '+'},
  364. {LLAMA_GRETYPE_CHAR_ALT, '*'},
  365. {LLAMA_GRETYPE_CHAR_ALT, '/'},
  366. {LLAMA_GRETYPE_RULE_REF, /* term */ 3},
  367. {LLAMA_GRETYPE_END, 0},
  368. // expr_6 (index 6)
  369. {LLAMA_GRETYPE_RULE_REF, /* expr_5 */ 5},
  370. {LLAMA_GRETYPE_RULE_REF, /* expr_6 */ 6},
  371. {LLAMA_GRETYPE_ALT, 0},
  372. {LLAMA_GRETYPE_END, 0},
  373. // term_7 (index 7)
  374. {LLAMA_GRETYPE_CHAR, '0'},
  375. {LLAMA_GRETYPE_CHAR_RNG_UPPER, '9'},
  376. {LLAMA_GRETYPE_RULE_REF, /* term_7 */ 7},
  377. {LLAMA_GRETYPE_ALT, 0},
  378. {LLAMA_GRETYPE_END, 0},
  379. });
  380. verify_parsing(R"""(
  381. root ::= (expr "=" ws term "\n")+
  382. expr ::= term ([-+*/] term)*
  383. term ::= ident | num | "(" ws expr ")" ws
  384. ident ::= [a-z] [a-z0-9_]* ws
  385. num ::= [0-9]+ ws
  386. ws ::= [ \t\n]*
  387. )""", {
  388. {"expr", 2},
  389. {"expr_6", 6},
  390. {"expr_7", 7},
  391. {"ident", 8},
  392. {"ident_10", 10},
  393. {"num", 9},
  394. {"num_11", 11},
  395. {"root", 0},
  396. {"root_1", 1},
  397. {"root_5", 5},
  398. {"term", 4},
  399. {"ws", 3},
  400. {"ws_12", 12},
  401. }, {
  402. // root (index 0)
  403. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  404. {LLAMA_GRETYPE_RULE_REF, /* root_5 */ 5},
  405. {LLAMA_GRETYPE_END, 0},
  406. // root_1 (index 1)
  407. {LLAMA_GRETYPE_RULE_REF, /* expr */ 2},
  408. {LLAMA_GRETYPE_CHAR, '='},
  409. {LLAMA_GRETYPE_RULE_REF, /* ws */ 3},
  410. {LLAMA_GRETYPE_RULE_REF, /* term */ 4},
  411. {LLAMA_GRETYPE_CHAR, '\n'},
  412. {LLAMA_GRETYPE_END, 0},
  413. // expr (index 2)
  414. {LLAMA_GRETYPE_RULE_REF, /* term */ 4},
  415. {LLAMA_GRETYPE_RULE_REF, /* expr_7 */ 7},
  416. {LLAMA_GRETYPE_END, 0},
  417. // ws (index 3)
  418. {LLAMA_GRETYPE_RULE_REF, /* ws_12 */ 12},
  419. {LLAMA_GRETYPE_END, 0},
  420. // term (index 4)
  421. {LLAMA_GRETYPE_RULE_REF, /* ident */ 8},
  422. {LLAMA_GRETYPE_ALT, 0},
  423. {LLAMA_GRETYPE_RULE_REF, /* num */ 9},
  424. {LLAMA_GRETYPE_ALT, 0},
  425. {LLAMA_GRETYPE_CHAR, '('},
  426. {LLAMA_GRETYPE_RULE_REF, /* ws */ 3},
  427. {LLAMA_GRETYPE_RULE_REF, /* expr */ 2},
  428. {LLAMA_GRETYPE_CHAR, ')'},
  429. {LLAMA_GRETYPE_RULE_REF, /* ws */ 3},
  430. {LLAMA_GRETYPE_END, 0},
  431. // root_5 (index 5)
  432. {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
  433. {LLAMA_GRETYPE_RULE_REF, /* root_5 */ 5},
  434. {LLAMA_GRETYPE_ALT, 0},
  435. {LLAMA_GRETYPE_END, 0},
  436. // expr_6 (index 6)
  437. {LLAMA_GRETYPE_CHAR, '-'},
  438. {LLAMA_GRETYPE_CHAR_ALT, '+'},
  439. {LLAMA_GRETYPE_CHAR_ALT, '*'},
  440. {LLAMA_GRETYPE_CHAR_ALT, '/'},
  441. {LLAMA_GRETYPE_RULE_REF, /* term */ 4},
  442. {LLAMA_GRETYPE_END, 0},
  443. // expr_7 (index 7)
  444. {LLAMA_GRETYPE_RULE_REF, /* expr_6 */ 6},
  445. {LLAMA_GRETYPE_RULE_REF, /* expr_7 */ 7},
  446. {LLAMA_GRETYPE_ALT, 0},
  447. {LLAMA_GRETYPE_END, 0},
  448. // ident (index 8)
  449. {LLAMA_GRETYPE_CHAR, 'a'},
  450. {LLAMA_GRETYPE_CHAR_RNG_UPPER, 'z'},
  451. {LLAMA_GRETYPE_RULE_REF, /* ident_10 */ 10},
  452. {LLAMA_GRETYPE_RULE_REF, /* ws */ 3},
  453. {LLAMA_GRETYPE_END, 0},
  454. // num (index 9)
  455. {LLAMA_GRETYPE_CHAR, '0'},
  456. {LLAMA_GRETYPE_CHAR_RNG_UPPER, '9'},
  457. {LLAMA_GRETYPE_RULE_REF, /* num_11 */ 11},
  458. {LLAMA_GRETYPE_RULE_REF, /* ws */ 3},
  459. {LLAMA_GRETYPE_END, 0},
  460. // ident_10 (index 10)
  461. {LLAMA_GRETYPE_CHAR, 'a'},
  462. {LLAMA_GRETYPE_CHAR_RNG_UPPER, 'z'},
  463. {LLAMA_GRETYPE_CHAR_ALT, '0'},
  464. {LLAMA_GRETYPE_CHAR_RNG_UPPER, '9'},
  465. {LLAMA_GRETYPE_CHAR_ALT, '_'},
  466. {LLAMA_GRETYPE_RULE_REF, /* ident_10 */ 10},
  467. {LLAMA_GRETYPE_ALT, 0},
  468. {LLAMA_GRETYPE_END, 0},
  469. // num_11 (index 11)
  470. {LLAMA_GRETYPE_CHAR, '0'},
  471. {LLAMA_GRETYPE_CHAR_RNG_UPPER, '9'},
  472. {LLAMA_GRETYPE_RULE_REF, /* num_11 */ 11},
  473. {LLAMA_GRETYPE_ALT, 0},
  474. {LLAMA_GRETYPE_END, 0},
  475. // ws_12 (index 12)
  476. {LLAMA_GRETYPE_CHAR, ' '},
  477. {LLAMA_GRETYPE_CHAR_ALT, '\t'},
  478. {LLAMA_GRETYPE_CHAR_ALT, '\n'},
  479. {LLAMA_GRETYPE_RULE_REF, /* ws_12 */ 12},
  480. {LLAMA_GRETYPE_ALT, 0},
  481. {LLAMA_GRETYPE_END, 0},
  482. });
  483. // <[1000]> = "<think>"
  484. // <[1001]> = "</think>"
  485. verify_parsing(R"""(
  486. root ::= <[1000]> !<[1001]> <[1001]>
  487. )""", {
  488. {"root", 0}
  489. }, {
  490. // root (index 0)
  491. {LLAMA_GRETYPE_TOKEN, 1000},
  492. {LLAMA_GRETYPE_TOKEN_NOT, 1001},
  493. {LLAMA_GRETYPE_TOKEN, 1001},
  494. {LLAMA_GRETYPE_END, 0},
  495. });
  496. return 0;
  497. }