test-grammar-integration.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #define LLAMA_API_INTERNAL
  5. #include "ggml.h"
  6. #include "llama.h"
  7. #include "grammar-parser.h"
  8. #include "json-schema-to-grammar.h"
  9. #include "unicode.h"
  10. #include <cassert>
  11. #include <string>
  12. #include <vector>
  13. using json = nlohmann::ordered_json;
  14. //#define INCLUDE_FAILING_TESTS 1
  15. static llama_grammar* build_grammar(const std::string & grammar_str) {
  16. auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  17. // Ensure we parsed correctly
  18. assert(!parsed_grammar.rules.empty());
  19. // Ensure we have a root node
  20. assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
  21. std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
  22. llama_grammar* grammar = llama_grammar_init(
  23. grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
  24. return grammar;
  25. }
  26. static bool test_build_grammar_fails(const std::string & grammar_str) {
  27. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  28. bool grammar_fails = false;
  29. llama_grammar * grammar = build_grammar(grammar_str);
  30. if (grammar != nullptr) {
  31. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  32. } else {
  33. grammar_fails = true;
  34. fprintf(stdout, " ✅︎\n");
  35. }
  36. return grammar_fails;
  37. }
  38. static bool match_string(const std::string & input, llama_grammar* grammar) {
  39. auto decoded = decode_utf8(input, {});
  40. const auto & code_points = decoded.first;
  41. for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
  42. auto prev_stacks = grammar->stacks;
  43. llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
  44. if (grammar->stacks.empty()) {
  45. // no stacks means that the grammar failed to match at this point
  46. return false;
  47. }
  48. }
  49. for (const auto & stack : grammar->stacks) {
  50. if (stack.empty()) {
  51. // An empty stack means that the grammar has been completed
  52. return true;
  53. }
  54. }
  55. return false;
  56. }
  57. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  58. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  59. fflush(stderr);
  60. auto grammar = build_grammar(grammar_str);
  61. // Save the original grammar stacks so that we can reset after every new string we want to test
  62. auto original_stacks = grammar->stacks;
  63. fprintf(stderr, " 🔵 Valid strings:\n");
  64. // Passing strings
  65. for (const auto & test_string : passing_strings) {
  66. fprintf(stderr, " \"%s\" ", test_string.c_str());
  67. fflush(stderr);
  68. bool matched = match_string(test_string, grammar);
  69. if (!matched) {
  70. fprintf(stderr, "❌ (failed to match)\n");
  71. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  72. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  73. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  74. if (grammar_file) {
  75. fprintf(grammar_file, "%s", grammar_str.c_str());
  76. fclose(grammar_file);
  77. }
  78. // DEBUG: Write the test string to test-grammar-integration.string.txt
  79. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  80. if (string_file) {
  81. fprintf(string_file, "%s", test_string.c_str());
  82. fclose(string_file);
  83. }
  84. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  85. } else {
  86. fprintf(stdout, "✅︎\n");
  87. }
  88. assert(matched);
  89. // Reset the grammar stacks
  90. grammar->stacks = original_stacks;
  91. }
  92. fprintf(stderr, " 🟠 Invalid strings:\n");
  93. // Failing strings
  94. for (const auto & test_string : failing_strings) {
  95. fprintf(stderr, " \"%s\" ", test_string.c_str());
  96. fflush(stderr);
  97. bool matched = match_string(test_string, grammar);
  98. if (matched) {
  99. fprintf(stderr, "❌ (incorrectly matched)\n");
  100. } else {
  101. fprintf(stdout, "✅︎\n");
  102. }
  103. assert(!matched);
  104. // Reset the grammar stacks
  105. grammar->stacks = original_stacks;
  106. }
  107. // Clean up allocated memory
  108. llama_grammar_free(grammar);
  109. }
  110. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  111. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  112. }
  113. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  114. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
  115. }
  116. static void test_simple_grammar() {
  117. test_schema(
  118. "min 0",
  119. R"""({
  120. "type": "integer",
  121. "minimum": 0
  122. })""",
  123. // Passing strings
  124. {
  125. "0",
  126. "10",
  127. "12",
  128. "10000",
  129. },
  130. // Failing strings
  131. {
  132. "-1",
  133. "-10",
  134. "-10000",
  135. "-100000000000000000000000000000000",
  136. "100000000000000000000000000000000",
  137. "00",
  138. "01",
  139. "-0",
  140. }
  141. );
  142. test_schema(
  143. "min 2",
  144. // Schema
  145. R"""({
  146. "type": "integer",
  147. "minimum": 2
  148. })""",
  149. // Passing strings
  150. {
  151. "2",
  152. "3",
  153. "4",
  154. "10",
  155. "20",
  156. "1234567890000000",
  157. },
  158. // Failing strings
  159. {
  160. "0",
  161. "1",
  162. "-1",
  163. "-100",
  164. "0",
  165. "1",
  166. "01",
  167. "02",
  168. "12345678900000000",
  169. }
  170. );
  171. test_schema(
  172. "min 456",
  173. R"""({
  174. "type": "integer",
  175. "minimum": 456
  176. })""",
  177. // Passing strings
  178. {
  179. "456",
  180. "4560",
  181. "457",
  182. "460",
  183. "500",
  184. },
  185. // Failing strings
  186. {
  187. "455",
  188. "356",
  189. "50",
  190. "050",
  191. "-1",
  192. "-456",
  193. }
  194. );
  195. test_schema(
  196. "min -123",
  197. R"""({
  198. "type": "integer",
  199. "minimum": -123
  200. })""",
  201. // Passing strings
  202. {
  203. "-123",
  204. "-122",
  205. "-11",
  206. "-1",
  207. "0",
  208. "1",
  209. "123",
  210. "1234",
  211. "2345",
  212. },
  213. // Failing strings
  214. {
  215. "-1234",
  216. "-124",
  217. }
  218. );
  219. test_schema(
  220. "max 9999",
  221. // Schema
  222. R"""({
  223. "type": "integer",
  224. "maximum": 9999
  225. })""",
  226. // Passing strings
  227. {
  228. "-99999",
  229. "0",
  230. "9999",
  231. },
  232. // Failing strings
  233. {
  234. "10000",
  235. "99991",
  236. }
  237. );
  238. test_schema(
  239. "max -9999",
  240. // Schema
  241. R"""({
  242. "type": "integer",
  243. "maximum": -9999
  244. })""",
  245. // Passing strings
  246. {
  247. "-10000",
  248. "-9999",
  249. },
  250. // Failing strings
  251. {
  252. "-9998",
  253. "0",
  254. "9999",
  255. }
  256. );
  257. test_schema(
  258. "min 5 max 30",
  259. // Schema
  260. R"""({
  261. "type": "integer",
  262. "minimum": 5,
  263. "maximum": 30
  264. })""",
  265. // Passing strings
  266. {
  267. "5",
  268. "10",
  269. "30",
  270. },
  271. // Failing strings
  272. {
  273. "05",
  274. "4",
  275. "-1",
  276. "31",
  277. "123",
  278. "0123",
  279. }
  280. );
  281. test_schema(
  282. "min -1 max 1",
  283. R"""({
  284. "type": "integer",
  285. "minimum": -1,
  286. "maximum": 1
  287. })""",
  288. // Passing strings
  289. {
  290. "-1",
  291. "0",
  292. "1",
  293. },
  294. // Failing strings
  295. {
  296. "-11",
  297. "-10",
  298. "-2",
  299. "2",
  300. "10",
  301. "11",
  302. }
  303. );
  304. test_schema(
  305. "min -123 max 42",
  306. R"""({
  307. "type": "integer",
  308. "minimum": -123,
  309. "maximum": 42
  310. })""",
  311. // Passing strings
  312. {
  313. "-123",
  314. "-122",
  315. "-13",
  316. "-11",
  317. "-2",
  318. "-1",
  319. "0",
  320. "1",
  321. "5",
  322. "10",
  323. "39",
  324. "40",
  325. "42",
  326. },
  327. // Failing strings
  328. {
  329. "-0123",
  330. "-124",
  331. "-1123",
  332. "-200",
  333. "43",
  334. "123",
  335. "0123",
  336. }
  337. );
  338. test_schema(
  339. "exclusive min / max",
  340. // Schema
  341. R"""({
  342. "type": "integer",
  343. "exclusiveMinimum": 0,
  344. "exclusiveMaximum": 10000
  345. })""",
  346. // Passing strings
  347. {
  348. "1",
  349. "9999",
  350. },
  351. // Failing strings
  352. {
  353. "0",
  354. "01",
  355. "10000",
  356. "99999",
  357. }
  358. );
  359. // Test case for a simple grammar
  360. test_grammar(
  361. "simple grammar",
  362. R"""(
  363. root ::= expr
  364. expr ::= term ("+" term)*
  365. term ::= number
  366. number ::= [0-9]+)""",
  367. // Passing strings
  368. {
  369. "42",
  370. "1+2+3+4+5",
  371. "123+456",
  372. },
  373. // Failing strings
  374. {
  375. "+",
  376. "/ 3",
  377. "1+2+3+4+5+",
  378. "12a45",
  379. }
  380. );
  381. }
  382. static void test_complex_grammar() {
  383. // Test case for a more complex grammar, with both failure strings and success strings
  384. test_grammar(
  385. "medium complexity grammar",
  386. // Grammar
  387. R"""(
  388. root ::= expression
  389. expression ::= term ws (("+"|"-") ws term)*
  390. term ::= factor ws (("*"|"/") ws factor)*
  391. factor ::= number | variable | "(" expression ")" | function-call
  392. number ::= [0-9]+
  393. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  394. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  395. ws ::= [ \t\n\r]?)""",
  396. // Passing strings
  397. {
  398. "42",
  399. "1*2*3*4*5",
  400. "x",
  401. "x+10",
  402. "x1+y2",
  403. "(a+b)*(c-d)",
  404. "func()",
  405. "func(x,y+2)",
  406. "a*(b+c)-d/e",
  407. "f(g(x),h(y,z))",
  408. "x + 10",
  409. "x1 + y2",
  410. "(a + b) * (c - d)",
  411. "func()",
  412. "func(x, y + 2)",
  413. "a * (b + c) - d / e",
  414. "f(g(x), h(y, z))",
  415. "123+456",
  416. "123*456*789-123/456+789*123",
  417. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  418. },
  419. // Failing strings
  420. {
  421. "+",
  422. "/ 3x",
  423. "x + + y",
  424. "a * / b",
  425. "func(,)",
  426. "func(x y)",
  427. "(a + b",
  428. "x + y)",
  429. "a + b * (c - d",
  430. "42 +",
  431. "x +",
  432. "x + 10 +",
  433. "(a + b) * (c - d",
  434. "func(",
  435. "func(x, y + 2",
  436. "a * (b + c) - d /",
  437. "f(g(x), h(y, z)",
  438. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  439. }
  440. );
  441. }
  442. static void test_special_chars() {
  443. // A collection of tests to exercise special characters such as "."
  444. test_grammar(
  445. "special characters",
  446. // Grammar
  447. R"""(
  448. root ::= ... "abc" ...
  449. )""",
  450. // Passing strings
  451. {
  452. "abcabcabc",
  453. "aaaabcccc",
  454. // NOTE: Also ensures that multi-byte characters still count as a single character
  455. "🔵🟠✅abc❌🟠🔵"
  456. },
  457. // Failing strings
  458. {
  459. "aaabcccc",
  460. "aaaaabcccc",
  461. "aaaabccc",
  462. "aaaabccccc",
  463. "🔵🟠✅❌abc❌✅🟠🔵"
  464. "🔵🟠abc🟠🔵"
  465. }
  466. );
  467. }
  468. static void test_quantifiers() {
  469. // A collection of tests to exercise * + and ? quantifiers
  470. test_grammar(
  471. "* quantifier",
  472. // Grammar
  473. R"""(root ::= "a"*)""",
  474. // Passing strings
  475. {
  476. "",
  477. "a",
  478. "aaaaa",
  479. "aaaaaaaaaaaaaaaaaa",
  480. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  481. },
  482. // Failing strings
  483. {
  484. "b",
  485. "ab",
  486. "aab",
  487. "ba",
  488. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  489. }
  490. );
  491. test_grammar(
  492. "+ quantifier",
  493. // Grammar
  494. R"""(root ::= "a"+)""",
  495. // Passing strings
  496. {
  497. "a",
  498. "aaaaa",
  499. "aaaaaaaaaaaaaaaaaa",
  500. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  501. },
  502. // Failing strings
  503. {
  504. "",
  505. "b",
  506. "ab",
  507. "aab",
  508. "ba",
  509. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  510. }
  511. );
  512. test_grammar(
  513. "? quantifier",
  514. // Grammar
  515. R"""(root ::= "a"?)""",
  516. // Passing strings
  517. {
  518. "",
  519. "a"
  520. },
  521. // Failing strings
  522. {
  523. "b",
  524. "ab",
  525. "aa",
  526. "ba",
  527. }
  528. );
  529. test_grammar(
  530. "mixed quantifiers",
  531. // Grammar
  532. R"""(
  533. root ::= cons+ vowel* cons? (vowel cons)*
  534. vowel ::= [aeiouy]
  535. cons ::= [bcdfghjklmnpqrstvwxyz]
  536. )""",
  537. // Passing strings
  538. {
  539. "yes",
  540. "no",
  541. "noyes",
  542. "crwth",
  543. "four",
  544. "bryyyy",
  545. },
  546. // Failing strings
  547. {
  548. "yess",
  549. "yesno",
  550. "forty",
  551. "catyyy",
  552. }
  553. );
  554. test_grammar(
  555. "simple exact repetition",
  556. // Grammar
  557. R"""(
  558. root ::= [ab]{4}
  559. )""",
  560. // Passing strings
  561. {
  562. "aaaa",
  563. "bbbb",
  564. "abab",
  565. },
  566. // Failing strings
  567. {
  568. "a",
  569. "b",
  570. "aaaaa",
  571. }
  572. );
  573. test_grammar(
  574. "simple min repetition",
  575. // Grammar
  576. R"""(
  577. root ::= [ab]{4,}
  578. )""",
  579. // Passing strings
  580. {
  581. "aaaa",
  582. "aaaaab",
  583. "bbbb",
  584. "ababab",
  585. },
  586. // Failing strings
  587. {
  588. "",
  589. "aba",
  590. }
  591. );
  592. test_grammar(
  593. "simple max repetition",
  594. // Grammar
  595. R"""(
  596. root ::= [ab]{0,4}
  597. )""",
  598. // Passing strings
  599. {
  600. "",
  601. "a",
  602. "aa",
  603. "aaa",
  604. "aaab",
  605. },
  606. // Failing strings
  607. {
  608. "aaaaa",
  609. }
  610. );
  611. test_grammar(
  612. "min / max repetition",
  613. // Grammar
  614. R"""(
  615. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  616. )""",
  617. // Passing strings
  618. {
  619. "0xFF 0x12 0xAB",
  620. "0xFF 0x12 0xAB 0x00 0x00",
  621. },
  622. // Failing strings
  623. {
  624. "",
  625. "0xFF",
  626. "0xFF 0x12",
  627. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  628. }
  629. );
  630. }
  631. static void test_failure_missing_root() {
  632. fprintf(stderr, "⚫ Testing missing root node:\n");
  633. // Test case for a grammar that is missing a root rule
  634. const std::string grammar_str = R"""(
  635. rot ::= expr
  636. expr ::= term ("+" term)*
  637. term ::= number
  638. number ::= [0-9]+)""";
  639. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  640. // Ensure we parsed correctly
  641. assert(!parsed_grammar.rules.empty());
  642. // Ensure we do NOT have a root node
  643. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  644. fprintf(stderr, " ✅︎ Passed\n");
  645. }
  646. static void test_failure_missing_reference() {
  647. fprintf(stderr, "⚫ Testing missing reference node:\n");
  648. // Test case for a grammar that is missing a referenced rule
  649. const std::string grammar_str =
  650. R"""(root ::= expr
  651. expr ::= term ("+" term)*
  652. term ::= numero
  653. number ::= [0-9]+)""";
  654. fprintf(stderr, " Expected error: ");
  655. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  656. // Ensure we did NOT parsed correctly
  657. assert(parsed_grammar.rules.empty());
  658. fprintf(stderr, " End of expected error.\n");
  659. fprintf(stderr, " ✅︎ Passed\n");
  660. }
  661. static void test_failure_left_recursion() {
  662. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  663. // Test simple left recursion detection
  664. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  665. assert(test_build_grammar_fails(simple_str));
  666. // Test more complicated left recursion detection
  667. const std::string medium_str = R"""(
  668. root ::= asdf
  669. asdf ::= "a" | asdf "a"
  670. )""";
  671. assert(test_build_grammar_fails(medium_str));
  672. // Test even more complicated left recursion detection
  673. const std::string hard_str = R"""(
  674. root ::= asdf
  675. asdf ::= "a" | foo "b"
  676. foo ::= "c" | asdf "d" | "e")""";
  677. assert(test_build_grammar_fails(hard_str));
  678. // Test yet even more complicated left recursion detection
  679. const std::string hardest_str = R"""(
  680. root ::= asdf
  681. asdf ::= "a" | foo "b"
  682. foo ::= "c" | empty asdf "d" | "e"
  683. empty ::= "blah" | )""";
  684. assert(test_build_grammar_fails(hardest_str));
  685. fprintf(stderr, " ✅︎ Passed\n");
  686. }
  687. static void test_json_schema() {
  688. // Note that this is similar to the regular grammar tests,
  689. // but we convert each json schema to a grammar before parsing.
  690. // Otherwise, this test structure is the same.
  691. test_schema(
  692. "empty schema (object)",
  693. // Schema
  694. R"""(
  695. {}
  696. )""",
  697. // Passing strings
  698. {
  699. "{}",
  700. R"""({"foo": "bar"})""",
  701. },
  702. // Failing strings
  703. {
  704. "",
  705. "[]",
  706. "null",
  707. "\"\"",
  708. "true",
  709. }
  710. );
  711. test_schema(
  712. "exotic formats (list)",
  713. // Schema
  714. R"""(
  715. {
  716. "items": [
  717. { "format": "date" },
  718. { "format": "uuid" },
  719. { "format": "time" },
  720. { "format": "date-time" }
  721. ]
  722. }
  723. )""",
  724. // Passing strings
  725. {
  726. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  727. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  728. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  729. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  730. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  731. },
  732. // Failing strings
  733. {
  734. R"""(["foo", "bar"])""",
  735. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  736. }
  737. );
  738. test_schema(
  739. "string",
  740. // Schema
  741. R"""(
  742. {
  743. "type": "string"
  744. }
  745. )""",
  746. // Passing strings
  747. {
  748. "\"foo\"",
  749. "\"bar\"",
  750. "\"\"",
  751. },
  752. // Failing strings
  753. {
  754. "{}",
  755. "\"foo\": \"bar\"",
  756. }
  757. );
  758. test_schema(
  759. "string w/ min length 1",
  760. // Schema
  761. R"""(
  762. {
  763. "type": "string",
  764. "minLength": 1
  765. }
  766. )""",
  767. // Passing strings
  768. {
  769. "\"foo\"",
  770. "\"bar\"",
  771. },
  772. // Failing strings
  773. {
  774. "\"\"",
  775. "{}",
  776. "\"foo\": \"bar\"",
  777. }
  778. );
  779. test_schema(
  780. "string w/ min length 3",
  781. // Schema
  782. R"""(
  783. {
  784. "type": "string",
  785. "minLength": 3
  786. }
  787. )""",
  788. // Passing strings
  789. {
  790. "\"foo\"",
  791. "\"bar\"",
  792. "\"foobar\"",
  793. },
  794. // Failing strings
  795. {
  796. "\"\"",
  797. "\"f\"",
  798. "\"fo\"",
  799. }
  800. );
  801. test_schema(
  802. "string w/ max length",
  803. // Schema
  804. R"""(
  805. {
  806. "type": "string",
  807. "maxLength": 3
  808. }
  809. )""",
  810. // Passing strings
  811. {
  812. "\"foo\"",
  813. "\"bar\"",
  814. "\"\"",
  815. "\"f\"",
  816. "\"fo\"",
  817. },
  818. // Failing strings
  819. {
  820. "\"foobar\"",
  821. }
  822. );
  823. test_schema(
  824. "string w/ min & max length",
  825. // Schema
  826. R"""(
  827. {
  828. "type": "string",
  829. "minLength": 1,
  830. "maxLength": 4
  831. }
  832. )""",
  833. // Passing strings
  834. {
  835. "\"foo\"",
  836. "\"bar\"",
  837. "\"f\"",
  838. "\"barf\"",
  839. },
  840. // Failing strings
  841. {
  842. "\"\"",
  843. "\"barfo\"",
  844. "\"foobar\"",
  845. }
  846. );
  847. test_schema(
  848. "boolean",
  849. // Schema
  850. R"""(
  851. {
  852. "type": "boolean"
  853. }
  854. )""",
  855. // Passing strings
  856. {
  857. "true",
  858. "false",
  859. },
  860. // Failing strings
  861. {
  862. "\"\"",
  863. "\"true\"",
  864. "True",
  865. "FALSE",
  866. }
  867. );
  868. test_schema(
  869. "integer",
  870. // Schema
  871. R"""(
  872. {
  873. "type": "integer"
  874. }
  875. )""",
  876. // Passing strings
  877. {
  878. "0",
  879. "12345",
  880. "1234567890123456"
  881. },
  882. // Failing strings
  883. {
  884. "",
  885. "01",
  886. "007",
  887. "12345678901234567"
  888. }
  889. );
  890. test_schema(
  891. "string const",
  892. // Schema
  893. R"""(
  894. {
  895. "const": "foo"
  896. }
  897. )""",
  898. // Passing strings
  899. {
  900. "\"foo\"",
  901. },
  902. // Failing strings
  903. {
  904. "foo",
  905. "\"bar\"",
  906. }
  907. );
  908. test_schema(
  909. "non-string const",
  910. // Schema
  911. R"""(
  912. {
  913. "const": true
  914. }
  915. )""",
  916. // Passing strings
  917. {
  918. "true",
  919. },
  920. // Failing strings
  921. {
  922. "",
  923. "foo",
  924. "\"true\"",
  925. }
  926. );
  927. test_schema(
  928. "non-string const",
  929. // Schema
  930. R"""(
  931. {
  932. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  933. }
  934. )""",
  935. // Passing strings
  936. {
  937. "\"red\"",
  938. "null",
  939. "42",
  940. "[\"foo\"]",
  941. },
  942. // Failing strings
  943. {
  944. "",
  945. "420",
  946. "true",
  947. "foo",
  948. }
  949. );
  950. test_schema(
  951. "min+max items",
  952. // Schema
  953. R"""(
  954. {
  955. "items": {
  956. "type": ["number", "integer"]
  957. },
  958. "minItems": 3,
  959. "maxItems": 5
  960. }
  961. )""",
  962. // Passing strings
  963. {
  964. "[1, 2, 3]",
  965. "[1, 2, 3, 4]",
  966. "[1, 2, 3, 4, 5]",
  967. },
  968. // Failing strings
  969. {
  970. "[1, 2]",
  971. "[1, 2, 3, 4, 5, 6]",
  972. "1"
  973. }
  974. );
  975. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  976. test_schema(
  977. "object properties",
  978. // Schema
  979. R"""(
  980. {
  981. "type": "object",
  982. "properties": {
  983. "number": { "type": "number" },
  984. "street_name": { "type": "string" },
  985. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  986. }
  987. }
  988. )""",
  989. // Passing strings
  990. {
  991. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  992. // "By default, leaving out properties is valid"
  993. R"""({ "street_name": "Pennsylvania" })""",
  994. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  995. // "By extension, even an empty object is valid"
  996. R"""({})""",
  997. // "By default, providing additional properties is valid"
  998. #ifdef INCLUDE_FAILING_TESTS
  999. // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
  1000. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1001. // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
  1002. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1003. #endif
  1004. },
  1005. // Failing strings
  1006. {
  1007. // Change datatype from number to string
  1008. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1009. // Reorder properties
  1010. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  1011. // Reorder properties
  1012. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1013. }
  1014. );
  1015. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1016. test_schema(
  1017. "object properties, additionalProperties: true",
  1018. // Schema
  1019. R"""(
  1020. {
  1021. "type": "object",
  1022. "properties": {
  1023. "number": { "type": "number" },
  1024. "street_name": { "type": "string" },
  1025. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1026. },
  1027. "additionalProperties": true
  1028. }
  1029. )""",
  1030. // Passing strings
  1031. {
  1032. // "By extension, even an empty object is valid"
  1033. R"""({})""",
  1034. #ifdef INCLUDE_FAILING_TESTS
  1035. // TODO: Following line should pass and doesn't
  1036. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1037. // "By default, leaving out properties is valid"
  1038. // TODO: Following line should pass and doesn't
  1039. R"""({ "street_name": "Pennsylvania" })""",
  1040. // TODO: Following line should pass and doesn't
  1041. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1042. // "By default, providing additional properties is valid"
  1043. // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
  1044. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1045. // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
  1046. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1047. #endif
  1048. },
  1049. // Failing strings
  1050. {
  1051. // Change datatype from number to string
  1052. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1053. // Reorder properties
  1054. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1055. }
  1056. );
  1057. // Additional properties: false
  1058. test_schema(
  1059. "required + optional props each in original order",
  1060. // Schema
  1061. R"""(
  1062. {
  1063. "type": "object",
  1064. "properties": {
  1065. "number": { "type": "number" },
  1066. "street_name": { "type": "string" },
  1067. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1068. },
  1069. "additionalProperties": false
  1070. }
  1071. )""",
  1072. // Passing strings
  1073. {
  1074. R"""({ "street_name": "Pennsylvania" })""",
  1075. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1076. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1077. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1078. #ifdef INCLUDE_FAILING_TESTS
  1079. // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
  1080. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1081. #endif
  1082. },
  1083. // Failing strings
  1084. {
  1085. // Reorder properties
  1086. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1087. // Add "direction"
  1088. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1089. }
  1090. );
  1091. test_schema(
  1092. "required + optional props each in original order",
  1093. // Schema
  1094. R"""(
  1095. {
  1096. "properties": {
  1097. "b": {"type": "string"},
  1098. "a": {"type": "string"},
  1099. "d": {"type": "string"},
  1100. "c": {"type": "string"}
  1101. },
  1102. "required": ["a", "b"],
  1103. "additionalProperties": false
  1104. }
  1105. )""",
  1106. // Passing strings
  1107. {
  1108. R"""({"b": "foo", "a": "bar"})""",
  1109. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1110. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1111. },
  1112. // Failing strings
  1113. {
  1114. R"""({"a": "foo", "b": "bar"})""",
  1115. R"""({"b": "bar"})""",
  1116. R"""({"a": "foo", "c": "baz"})""",
  1117. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1118. }
  1119. );
  1120. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1121. test_schema(
  1122. "required props",
  1123. // Schema
  1124. R"""(
  1125. {
  1126. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1127. "$id": "https://example.com/product.schema.json",
  1128. "title": "Product",
  1129. "description": "A product from Acme's catalog",
  1130. "type": "object",
  1131. "properties": {
  1132. "productId": {
  1133. "description": "The unique identifier for a product",
  1134. "type": "integer"
  1135. },
  1136. "productName": {
  1137. "description": "Name of the product",
  1138. "type": "string"
  1139. },
  1140. "price": {
  1141. "description": "The price of the product",
  1142. "type": "number",
  1143. "exclusiveMinimum": 0
  1144. },
  1145. "tags": {
  1146. "description": "Tags for the product",
  1147. "type": "array",
  1148. "items": {
  1149. "type": "string"
  1150. },
  1151. "minItems": 1,
  1152. "uniqueItems": true
  1153. },
  1154. "dimensions": {
  1155. "type": "object",
  1156. "properties": {
  1157. "length": {
  1158. "type": "number"
  1159. },
  1160. "width": {
  1161. "type": "number"
  1162. },
  1163. "height": {
  1164. "type": "number"
  1165. }
  1166. },
  1167. "required": [ "length", "width", "height" ]
  1168. }
  1169. },
  1170. "required": [ "productId", "productName", "price" ]
  1171. }
  1172. )""",
  1173. // Passing strings
  1174. {
  1175. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1176. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1177. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1178. },
  1179. // Failing strings
  1180. {
  1181. R"""({})""", // Missing all required properties
  1182. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1183. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1184. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1185. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1186. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1187. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1188. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1189. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1190. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1191. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1192. }
  1193. );
  1194. }
  1195. int main() {
  1196. fprintf(stdout, "Running grammar integration tests...\n");
  1197. test_simple_grammar();
  1198. test_complex_grammar();
  1199. test_special_chars();
  1200. test_quantifiers();
  1201. test_failure_missing_root();
  1202. test_failure_missing_reference();
  1203. test_failure_left_recursion();
  1204. test_json_schema();
  1205. fprintf(stdout, "All tests passed.\n");
  1206. return 0;
  1207. }