test-grammar-integration.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #include "json-schema-to-grammar.h"
  5. #include "../src/unicode.h"
  6. #include "../src/llama-grammar.h"
  7. #include <cassert>
  8. #include <string>
  9. #include <vector>
  10. using json = nlohmann::ordered_json;
  11. static llama_grammar * build_grammar(const std::string & grammar_str) {
  12. return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0);
  13. }
  14. static bool test_build_grammar_fails(const std::string & grammar_str) {
  15. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  16. bool grammar_fails = false;
  17. llama_grammar * grammar = build_grammar(grammar_str);
  18. if (grammar != nullptr) {
  19. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  20. } else {
  21. grammar_fails = true;
  22. fprintf(stdout, " ✅︎\n");
  23. }
  24. return grammar_fails;
  25. }
  26. static bool match_string(const std::string & input, llama_grammar * grammar) {
  27. const auto cpts = unicode_cpts_from_utf8(input);
  28. auto & stacks_cur = llama_grammar_get_stacks(grammar);
  29. for (const auto & cpt : cpts) {
  30. llama_grammar_accept(grammar, cpt);
  31. if (stacks_cur.empty()) {
  32. // no stacks means that the grammar failed to match at this point
  33. return false;
  34. }
  35. }
  36. for (const auto & stack : stacks_cur) {
  37. if (stack.empty()) {
  38. // An empty stack means that the grammar has been completed
  39. return true;
  40. }
  41. }
  42. return false;
  43. }
  44. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  45. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  46. fflush(stderr);
  47. auto * grammar = build_grammar(grammar_str);
  48. // Save the original grammar stacks so that we can reset after every new string we want to test
  49. const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar); // copy
  50. llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
  51. fprintf(stderr, " 🔵 Valid strings:\n");
  52. // Passing strings
  53. for (const auto & test_string : passing_strings) {
  54. fprintf(stderr, " \"%s\" ", test_string.c_str());
  55. fflush(stderr);
  56. bool matched = match_string(test_string, grammar);
  57. if (!matched) {
  58. fprintf(stderr, "❌ (failed to match)\n");
  59. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  60. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  61. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  62. if (grammar_file) {
  63. fprintf(grammar_file, "%s", grammar_str.c_str());
  64. fclose(grammar_file);
  65. }
  66. // DEBUG: Write the test string to test-grammar-integration.string.txt
  67. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  68. if (string_file) {
  69. fprintf(string_file, "%s", test_string.c_str());
  70. fclose(string_file);
  71. }
  72. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  73. } else {
  74. fprintf(stdout, "✅︎\n");
  75. }
  76. assert(matched);
  77. // Reset the grammar stacks
  78. stacks_cur = stacks_org;
  79. }
  80. fprintf(stderr, " 🟠 Invalid strings:\n");
  81. // Failing strings
  82. for (const auto & test_string : failing_strings) {
  83. fprintf(stderr, " \"%s\" ", test_string.c_str());
  84. fflush(stderr);
  85. bool matched = match_string(test_string, grammar);
  86. if (matched) {
  87. fprintf(stderr, "❌ (incorrectly matched)\n");
  88. } else {
  89. fprintf(stdout, "✅︎\n");
  90. }
  91. assert(!matched);
  92. // Reset the grammar stacks
  93. stacks_cur = stacks_org;
  94. }
  95. // Clean up allocated memory
  96. llama_grammar_free_impl(grammar);
  97. }
  98. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  99. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  100. }
  101. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  102. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str), true), passing_strings, failing_strings);
  103. }
  104. static void test_simple_grammar() {
  105. test_schema(
  106. "min 0",
  107. R"""({
  108. "type": "integer",
  109. "minimum": 0
  110. })""",
  111. // Passing strings
  112. {
  113. "0",
  114. "10",
  115. "12",
  116. "10000",
  117. },
  118. // Failing strings
  119. {
  120. "-1",
  121. "-10",
  122. "-10000",
  123. "-100000000000000000000000000000000",
  124. "100000000000000000000000000000000",
  125. "00",
  126. "01",
  127. "-0",
  128. }
  129. );
  130. test_schema(
  131. "min 2",
  132. // Schema
  133. R"""({
  134. "type": "integer",
  135. "minimum": 2
  136. })""",
  137. // Passing strings
  138. {
  139. "2",
  140. "3",
  141. "4",
  142. "10",
  143. "20",
  144. "1234567890000000",
  145. },
  146. // Failing strings
  147. {
  148. "0",
  149. "1",
  150. "-1",
  151. "-100",
  152. "0",
  153. "1",
  154. "01",
  155. "02",
  156. "12345678900000000",
  157. }
  158. );
  159. test_schema(
  160. "min 456",
  161. R"""({
  162. "type": "integer",
  163. "minimum": 456
  164. })""",
  165. // Passing strings
  166. {
  167. "456",
  168. "4560",
  169. "457",
  170. "460",
  171. "500",
  172. },
  173. // Failing strings
  174. {
  175. "455",
  176. "356",
  177. "50",
  178. "050",
  179. "-1",
  180. "-456",
  181. }
  182. );
  183. test_schema(
  184. "min -123",
  185. R"""({
  186. "type": "integer",
  187. "minimum": -123
  188. })""",
  189. // Passing strings
  190. {
  191. "-123",
  192. "-122",
  193. "-11",
  194. "-1",
  195. "0",
  196. "1",
  197. "123",
  198. "1234",
  199. "2345",
  200. },
  201. // Failing strings
  202. {
  203. "-1234",
  204. "-124",
  205. }
  206. );
  207. test_schema(
  208. "max 9999",
  209. // Schema
  210. R"""({
  211. "type": "integer",
  212. "maximum": 9999
  213. })""",
  214. // Passing strings
  215. {
  216. "-99999",
  217. "0",
  218. "9999",
  219. },
  220. // Failing strings
  221. {
  222. "10000",
  223. "99991",
  224. }
  225. );
  226. test_schema(
  227. "max -9999",
  228. // Schema
  229. R"""({
  230. "type": "integer",
  231. "maximum": -9999
  232. })""",
  233. // Passing strings
  234. {
  235. "-10000",
  236. "-9999",
  237. },
  238. // Failing strings
  239. {
  240. "-9998",
  241. "0",
  242. "9999",
  243. }
  244. );
  245. test_schema(
  246. "min 5 max 30",
  247. // Schema
  248. R"""({
  249. "type": "integer",
  250. "minimum": 5,
  251. "maximum": 30
  252. })""",
  253. // Passing strings
  254. {
  255. "5",
  256. "10",
  257. "30",
  258. },
  259. // Failing strings
  260. {
  261. "05",
  262. "4",
  263. "-1",
  264. "31",
  265. "123",
  266. "0123",
  267. }
  268. );
  269. test_schema(
  270. "min -1 max 1",
  271. R"""({
  272. "type": "integer",
  273. "minimum": -1,
  274. "maximum": 1
  275. })""",
  276. // Passing strings
  277. {
  278. "-1",
  279. "0",
  280. "1",
  281. },
  282. // Failing strings
  283. {
  284. "-11",
  285. "-10",
  286. "-2",
  287. "2",
  288. "10",
  289. "11",
  290. }
  291. );
  292. test_schema(
  293. "min -123 max 42",
  294. R"""({
  295. "type": "integer",
  296. "minimum": -123,
  297. "maximum": 42
  298. })""",
  299. // Passing strings
  300. {
  301. "-123",
  302. "-122",
  303. "-13",
  304. "-11",
  305. "-2",
  306. "-1",
  307. "0",
  308. "1",
  309. "5",
  310. "10",
  311. "39",
  312. "40",
  313. "42",
  314. },
  315. // Failing strings
  316. {
  317. "-0123",
  318. "-124",
  319. "-1123",
  320. "-200",
  321. "43",
  322. "123",
  323. "0123",
  324. }
  325. );
  326. test_schema(
  327. "exclusive min / max",
  328. // Schema
  329. R"""({
  330. "type": "integer",
  331. "exclusiveMinimum": 0,
  332. "exclusiveMaximum": 10000
  333. })""",
  334. // Passing strings
  335. {
  336. "1",
  337. "9999",
  338. },
  339. // Failing strings
  340. {
  341. "0",
  342. "01",
  343. "10000",
  344. "99999",
  345. }
  346. );
  347. // Test case for a simple grammar
  348. test_grammar(
  349. "simple grammar",
  350. R"""(
  351. root ::= expr
  352. expr ::= term ("+" term)*
  353. term ::= number
  354. number ::= [0-9]+)""",
  355. // Passing strings
  356. {
  357. "42",
  358. "1+2+3+4+5",
  359. "123+456",
  360. },
  361. // Failing strings
  362. {
  363. "+",
  364. "/ 3",
  365. "1+2+3+4+5+",
  366. "12a45",
  367. }
  368. );
  369. }
  370. static void test_complex_grammar() {
  371. // Test case for a more complex grammar, with both failure strings and success strings
  372. test_grammar(
  373. "medium complexity grammar",
  374. // Grammar
  375. R"""(
  376. root ::= expression
  377. expression ::= term ws (("+"|"-") ws term)*
  378. term ::= factor ws (("*"|"/") ws factor)*
  379. factor ::= number | variable | "(" expression ")" | function-call
  380. number ::= [0-9]+
  381. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  382. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  383. ws ::= [ \t\n\r]?)""",
  384. // Passing strings
  385. {
  386. "42",
  387. "1*2*3*4*5",
  388. "x",
  389. "x+10",
  390. "x1+y2",
  391. "(a+b)*(c-d)",
  392. "func()",
  393. "func(x,y+2)",
  394. "a*(b+c)-d/e",
  395. "f(g(x),h(y,z))",
  396. "x + 10",
  397. "x1 + y2",
  398. "(a + b) * (c - d)",
  399. "func()",
  400. "func(x, y + 2)",
  401. "a * (b + c) - d / e",
  402. "f(g(x), h(y, z))",
  403. "123+456",
  404. "123*456*789-123/456+789*123",
  405. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  406. },
  407. // Failing strings
  408. {
  409. "+",
  410. "/ 3x",
  411. "x + + y",
  412. "a * / b",
  413. "func(,)",
  414. "func(x y)",
  415. "(a + b",
  416. "x + y)",
  417. "a + b * (c - d",
  418. "42 +",
  419. "x +",
  420. "x + 10 +",
  421. "(a + b) * (c - d",
  422. "func(",
  423. "func(x, y + 2",
  424. "a * (b + c) - d /",
  425. "f(g(x), h(y, z)",
  426. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  427. }
  428. );
  429. }
  430. static void test_special_chars() {
  431. // A collection of tests to exercise special characters such as "."
  432. test_grammar(
  433. "special characters",
  434. // Grammar
  435. R"""(
  436. root ::= ... "abc" ...
  437. )""",
  438. // Passing strings
  439. {
  440. "abcabcabc",
  441. "aaaabcccc",
  442. // NOTE: Also ensures that multi-byte characters still count as a single character
  443. "🔵🟠✅abc❌🟠🔵"
  444. },
  445. // Failing strings
  446. {
  447. "aaabcccc",
  448. "aaaaabcccc",
  449. "aaaabccc",
  450. "aaaabccccc",
  451. "🔵🟠✅❌abc❌✅🟠🔵",
  452. "🔵🟠abc🟠🔵"
  453. }
  454. );
  455. }
  456. static void test_quantifiers() {
  457. // A collection of tests to exercise * + and ? quantifiers
  458. test_grammar(
  459. "* quantifier",
  460. // Grammar
  461. R"""(root ::= "a"*)""",
  462. // Passing strings
  463. {
  464. "",
  465. "a",
  466. "aaaaa",
  467. "aaaaaaaaaaaaaaaaaa",
  468. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  469. },
  470. // Failing strings
  471. {
  472. "b",
  473. "ab",
  474. "aab",
  475. "ba",
  476. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  477. }
  478. );
  479. test_grammar(
  480. "+ quantifier",
  481. // Grammar
  482. R"""(root ::= "a"+)""",
  483. // Passing strings
  484. {
  485. "a",
  486. "aaaaa",
  487. "aaaaaaaaaaaaaaaaaa",
  488. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  489. },
  490. // Failing strings
  491. {
  492. "",
  493. "b",
  494. "ab",
  495. "aab",
  496. "ba",
  497. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  498. }
  499. );
  500. test_grammar(
  501. "? quantifier",
  502. // Grammar
  503. R"""(root ::= "a"?)""",
  504. // Passing strings
  505. {
  506. "",
  507. "a"
  508. },
  509. // Failing strings
  510. {
  511. "b",
  512. "ab",
  513. "aa",
  514. "ba",
  515. }
  516. );
  517. test_grammar(
  518. "mixed quantifiers",
  519. // Grammar
  520. R"""(
  521. root ::= cons+ vowel* cons? (vowel cons)*
  522. vowel ::= [aeiouy]
  523. cons ::= [bcdfghjklmnpqrstvwxyz]
  524. )""",
  525. // Passing strings
  526. {
  527. "yes",
  528. "no",
  529. "noyes",
  530. "crwth",
  531. "four",
  532. "bryyyy",
  533. },
  534. // Failing strings
  535. {
  536. "yess",
  537. "yesno",
  538. "forty",
  539. "catyyy",
  540. }
  541. );
  542. test_grammar(
  543. "simple exact repetition",
  544. // Grammar
  545. R"""(
  546. root ::= [ab]{4}
  547. )""",
  548. // Passing strings
  549. {
  550. "aaaa",
  551. "bbbb",
  552. "abab",
  553. },
  554. // Failing strings
  555. {
  556. "a",
  557. "b",
  558. "aaaaa",
  559. }
  560. );
  561. test_grammar(
  562. "simple min repetition",
  563. // Grammar
  564. R"""(
  565. root ::= [ab]{4,}
  566. )""",
  567. // Passing strings
  568. {
  569. "aaaa",
  570. "aaaaab",
  571. "bbbb",
  572. "ababab",
  573. },
  574. // Failing strings
  575. {
  576. "",
  577. "aba",
  578. }
  579. );
  580. test_grammar(
  581. "simple max repetition",
  582. // Grammar
  583. R"""(
  584. root ::= [ab]{0,4}
  585. )""",
  586. // Passing strings
  587. {
  588. "",
  589. "a",
  590. "aa",
  591. "aaa",
  592. "aaab",
  593. },
  594. // Failing strings
  595. {
  596. "aaaaa",
  597. }
  598. );
  599. test_grammar(
  600. "min / max repetition",
  601. // Grammar
  602. R"""(
  603. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  604. )""",
  605. // Passing strings
  606. {
  607. "0xFF 0x12 0xAB",
  608. "0xFF 0x12 0xAB 0x00 0x00",
  609. },
  610. // Failing strings
  611. {
  612. "",
  613. "0xFF",
  614. "0xFF 0x12",
  615. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  616. }
  617. );
  618. }
  619. static void test_failure_missing_root() {
  620. fprintf(stderr, "⚫ Testing missing root node:\n");
  621. // Test case for a grammar that is missing a root rule
  622. const std::string grammar_str = R"""(
  623. rot ::= expr
  624. expr ::= term ("+" term)*
  625. term ::= number
  626. number ::= [0-9]+)""";
  627. llama_grammar_parser parsed_grammar;
  628. parsed_grammar.parse(grammar_str.c_str());
  629. // Ensure we parsed correctly
  630. assert(!parsed_grammar.rules.empty());
  631. // Ensure we do NOT have a root node
  632. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  633. fprintf(stderr, " ✅︎ Passed\n");
  634. }
  635. static void test_failure_missing_reference() {
  636. fprintf(stderr, "⚫ Testing missing reference node:\n");
  637. // Test case for a grammar that is missing a referenced rule
  638. const std::string grammar_str =
  639. R"""(root ::= expr
  640. expr ::= term ("+" term)*
  641. term ::= numero
  642. number ::= [0-9]+)""";
  643. fprintf(stderr, " Expected error: ");
  644. llama_grammar_parser parsed_grammar;
  645. parsed_grammar.parse(grammar_str.c_str());
  646. // Ensure we did NOT parsed correctly
  647. assert(parsed_grammar.rules.empty());
  648. fprintf(stderr, " End of expected error.\n");
  649. fprintf(stderr, " ✅︎ Passed\n");
  650. }
  651. static void test_failure_left_recursion() {
  652. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  653. // Test simple left recursion detection
  654. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  655. assert(test_build_grammar_fails(simple_str));
  656. // Test more complicated left recursion detection
  657. const std::string medium_str = R"""(
  658. root ::= asdf
  659. asdf ::= "a" | asdf "a"
  660. )""";
  661. assert(test_build_grammar_fails(medium_str));
  662. // Test even more complicated left recursion detection
  663. const std::string hard_str = R"""(
  664. root ::= asdf
  665. asdf ::= "a" | foo "b"
  666. foo ::= "c" | asdf "d" | "e")""";
  667. assert(test_build_grammar_fails(hard_str));
  668. // Test yet even more complicated left recursion detection
  669. const std::string hardest_str = R"""(
  670. root ::= asdf
  671. asdf ::= "a" | foo "b"
  672. foo ::= "c" | empty asdf "d" | "e"
  673. empty ::= "blah" | )""";
  674. assert(test_build_grammar_fails(hardest_str));
  675. fprintf(stderr, " ✅︎ Passed\n");
  676. }
  677. static void test_json_schema() {
  678. // Note that this is similar to the regular grammar tests,
  679. // but we convert each json schema to a grammar before parsing.
  680. // Otherwise, this test structure is the same.
  681. test_schema(
  682. "empty schema (object)",
  683. // Schema
  684. R"""(
  685. {}
  686. )""",
  687. // Passing strings
  688. {
  689. R"""({})""",
  690. R"""({"foo": "bar"})""",
  691. },
  692. // Failing strings
  693. {
  694. "",
  695. "[]",
  696. "null",
  697. R"""("")""",
  698. "true",
  699. }
  700. );
  701. test_schema(
  702. "exotic formats (list)",
  703. // Schema
  704. R"""({
  705. "items": [
  706. { "format": "date" },
  707. { "format": "uuid" },
  708. { "format": "time" },
  709. { "format": "date-time" }
  710. ]
  711. })""",
  712. // Passing strings
  713. {
  714. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  715. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  716. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  717. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  718. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  719. },
  720. // Failing strings
  721. {
  722. R"""(["foo", "bar"])""",
  723. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  724. }
  725. );
  726. test_schema(
  727. "string",
  728. // Schema
  729. R"""({
  730. "type": "string"
  731. })""",
  732. // Passing strings
  733. {
  734. R"""("foo")""",
  735. R"""("bar")""",
  736. R"""("")""",
  737. },
  738. // Failing strings
  739. {
  740. R"""({})""",
  741. R"""("foo": "bar")""",
  742. }
  743. );
  744. test_schema(
  745. "string w/ min length 1",
  746. // Schema
  747. R"""({
  748. "type": "string",
  749. "minLength": 1
  750. })""",
  751. // Passing strings
  752. {
  753. R"""("foo")""",
  754. R"""("bar")""",
  755. },
  756. // Failing strings
  757. {
  758. R"""("")""",
  759. R"""({})""",
  760. R"""("foo": "bar")""",
  761. }
  762. );
  763. test_schema(
  764. "string w/ min length 3",
  765. // Schema
  766. R"""({
  767. "type": "string",
  768. "minLength": 3
  769. })""",
  770. // Passing strings
  771. {
  772. R"""("foo")""",
  773. R"""("bar")""",
  774. R"""("foobar")""",
  775. },
  776. // Failing strings
  777. {
  778. R"""("")""",
  779. R"""("f")""",
  780. R"""("fo")""",
  781. }
  782. );
  783. test_schema(
  784. "string w/ max length",
  785. // Schema
  786. R"""({
  787. "type": "string",
  788. "maxLength": 3
  789. })""",
  790. // Passing strings
  791. {
  792. R"""("foo")""",
  793. R"""("bar")""",
  794. R"""("")""",
  795. R"""("f")""",
  796. R"""("fo")""",
  797. },
  798. // Failing strings
  799. {
  800. R"""("foobar")""",
  801. }
  802. );
  803. test_schema(
  804. "string w/ min & max length",
  805. // Schema
  806. R"""({
  807. "type": "string",
  808. "minLength": 1,
  809. "maxLength": 4
  810. })""",
  811. // Passing strings
  812. {
  813. R"""("foo")""",
  814. R"""("bar")""",
  815. R"""("f")""",
  816. R"""("barf")""",
  817. },
  818. // Failing strings
  819. {
  820. R"""("")""",
  821. R"""("barfo")""",
  822. R"""("foobar")""",
  823. }
  824. );
  825. test_schema(
  826. "boolean",
  827. // Schema
  828. R"""({
  829. "type": "boolean"
  830. })""",
  831. // Passing strings
  832. {
  833. "true",
  834. "false",
  835. },
  836. // Failing strings
  837. {
  838. R"""("")""",
  839. R"""("true")""",
  840. R"""(True)""",
  841. R"""(FALSE)""",
  842. }
  843. );
  844. test_schema(
  845. "integer",
  846. // Schema
  847. R"""({
  848. "type": "integer"
  849. })""",
  850. // Passing strings
  851. {
  852. R"""(0)""",
  853. R"""(12345)""",
  854. R"""(1234567890123456)""",
  855. },
  856. // Failing strings
  857. {
  858. R"""()""",
  859. R"""(01)""",
  860. R"""(007)""",
  861. R"""(12345678901234567 )""",
  862. }
  863. );
  864. test_schema(
  865. "string const",
  866. // Schema
  867. R"""({
  868. "const": "foo"
  869. })""",
  870. // Passing strings
  871. {
  872. R"""("foo")""",
  873. },
  874. // Failing strings
  875. {
  876. R"""(foo)""",
  877. R"""("bar")""",
  878. }
  879. );
  880. test_schema(
  881. "non-string const",
  882. // Schema
  883. R"""({
  884. "const": true
  885. })""",
  886. // Passing strings
  887. {
  888. R"""(true)""",
  889. },
  890. // Failing strings
  891. {
  892. R"""()""",
  893. R"""(foo)""",
  894. R"""("true")""",
  895. }
  896. );
  897. test_schema(
  898. "non-string const",
  899. // Schema
  900. R"""({
  901. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  902. })""",
  903. // Passing strings
  904. {
  905. R"""("red")""",
  906. R"""(null)""",
  907. R"""(42)""",
  908. R"""(["foo"])""",
  909. },
  910. // Failing strings
  911. {
  912. R"""()""",
  913. R"""(420)""",
  914. R"""(true)""",
  915. R"""(foo)""",
  916. }
  917. );
  918. test_schema(
  919. "simple pattern",
  920. // Schema
  921. R"""({
  922. "pattern": "^[a-zA-Z0-9_-]*$"
  923. })""",
  924. // Passing strings
  925. {
  926. R"""("")""",
  927. R"""("He_llo-12")""",
  928. },
  929. // Failing strings
  930. {
  931. R"""("!")""",
  932. R"""("Hello World")""",
  933. }
  934. );
  935. test_schema(
  936. "pattern with escapes",
  937. // Schema
  938. R"""({
  939. "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
  940. })""",
  941. // Passing strings
  942. {
  943. R"""("a^$.[]()|{}*+?b")""",
  944. },
  945. // Failing strings
  946. {
  947. R"""("ab")""",
  948. }
  949. );
  950. test_schema(
  951. "",
  952. // Schema
  953. R"""(
  954. {
  955. "type": ["array", "null"],
  956. "items": { "type": "string" }
  957. }
  958. )""",
  959. // Passing strings
  960. {
  961. "null",
  962. "[]",
  963. "[\"123\"]",
  964. "[\"foo\", \"bar\"]",
  965. },
  966. // Failing strings
  967. {
  968. "",
  969. "[123]",
  970. "\"foo\"",
  971. "[\"foo\", 42]",
  972. }
  973. );
  974. test_schema(
  975. "min+max items",
  976. // Schema
  977. R"""({
  978. "items": {
  979. "type": ["number", "integer"]
  980. },
  981. "minItems": 3,
  982. "maxItems": 5
  983. })""",
  984. // Passing strings
  985. {
  986. R"""([1, 2, 3])""",
  987. R"""([1, 2, 3, 4])""",
  988. R"""([1, 2, 3, 4, 5])""",
  989. },
  990. // Failing strings
  991. {
  992. R"""([1, 2])""",
  993. R"""([1, 2, 3, 4, 5, 6])""",
  994. R"""(1)""",
  995. }
  996. );
  997. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  998. test_schema(
  999. "object properties",
  1000. // Schema
  1001. R"""({
  1002. "type": "object",
  1003. "properties": {
  1004. "number": { "type": "number" },
  1005. "street_name": { "type": "string" },
  1006. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1007. }
  1008. })""",
  1009. // Passing strings
  1010. {
  1011. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1012. // "By default, leaving out properties is valid"
  1013. R"""({ "street_name": "Pennsylvania" })""",
  1014. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1015. // "By extension, even an empty object is valid"
  1016. R"""({})""",
  1017. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1018. },
  1019. // Failing strings
  1020. {
  1021. // Change datatype from number to string
  1022. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1023. // Reorder properties
  1024. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  1025. // Reorder properties
  1026. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1027. // "Additional properties default to false for generation, even though the spec says true.
  1028. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1029. }
  1030. );
  1031. test_schema(
  1032. "additional properties can't override other properties",
  1033. R"""({
  1034. "properties": {
  1035. "a": {"type": "integer"},
  1036. "b": {"type": "integer"}
  1037. },
  1038. "additionalProperties": true
  1039. })""",
  1040. // Passing strings
  1041. {
  1042. R"""({"a": 42})""",
  1043. R"""({"c": ""})""",
  1044. R"""({"a": 42, "c": ""})""",
  1045. R"""({"a_": ""})""",
  1046. },
  1047. // Failing strings
  1048. {
  1049. R"""()""",
  1050. R"""({"a": ""})""",
  1051. R"""({"a": "", "b": ""})""",
  1052. }
  1053. );
  1054. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1055. test_schema(
  1056. "object properties, additionalProperties: true",
  1057. // Schema
  1058. R"""({
  1059. "type": "object",
  1060. "properties": {
  1061. "number": { "type": "number" },
  1062. "street_name": { "type": "string" },
  1063. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1064. },
  1065. "additionalProperties": true
  1066. })""",
  1067. // Passing strings
  1068. {
  1069. // "By extension, even an empty object is valid"
  1070. R"""({})""",
  1071. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1072. // "By default, leaving out properties is valid"
  1073. R"""({ "street_name": "Pennsylvania" })""",
  1074. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1075. // "By default, providing additional properties is valid"
  1076. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1077. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1078. },
  1079. // Failing strings
  1080. {
  1081. // Change datatype from number to string
  1082. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1083. // Reorder properties
  1084. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1085. }
  1086. );
  1087. // Additional properties: false
  1088. test_schema(
  1089. "required + optional props each in original order",
  1090. // Schema
  1091. R"""({
  1092. "type": "object",
  1093. "properties": {
  1094. "number": { "type": "number" },
  1095. "street_name": { "type": "string" },
  1096. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1097. },
  1098. "additionalProperties": false
  1099. })""",
  1100. // Passing strings
  1101. {
  1102. R"""({ "street_name": "Pennsylvania" })""",
  1103. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1104. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1105. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1106. // Spaces are permitted around enum values
  1107. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1108. },
  1109. // Failing strings
  1110. {
  1111. // Reorder properties
  1112. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1113. // Add "direction"
  1114. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1115. }
  1116. );
  1117. test_schema(
  1118. "required + optional props each in original order",
  1119. // Schema
  1120. R"""({
  1121. "properties": {
  1122. "b": {"type": "string"},
  1123. "a": {"type": "string"},
  1124. "d": {"type": "string"},
  1125. "c": {"type": "string"}
  1126. },
  1127. "required": ["a", "b"],
  1128. "additionalProperties": false
  1129. })""",
  1130. // Passing strings
  1131. {
  1132. R"""({"b": "foo", "a": "bar"})""",
  1133. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1134. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1135. },
  1136. // Failing strings
  1137. {
  1138. R"""({"a": "foo", "b": "bar"})""",
  1139. R"""({"b": "bar"})""",
  1140. R"""({"a": "foo", "c": "baz"})""",
  1141. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1142. }
  1143. );
  1144. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1145. test_schema(
  1146. "required props",
  1147. // Schema
  1148. R"""({
  1149. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1150. "$id": "https://example.com/product.schema.json",
  1151. "title": "Product",
  1152. "description": "A product from Acme's catalog",
  1153. "type": "object",
  1154. "properties": {
  1155. "productId": {
  1156. "description": "The unique identifier for a product",
  1157. "type": "integer"
  1158. },
  1159. "productName": {
  1160. "description": "Name of the product",
  1161. "type": "string"
  1162. },
  1163. "price": {
  1164. "description": "The price of the product",
  1165. "type": "number",
  1166. "exclusiveMinimum": 0
  1167. },
  1168. "tags": {
  1169. "description": "Tags for the product",
  1170. "type": "array",
  1171. "items": {
  1172. "type": "string"
  1173. },
  1174. "minItems": 1,
  1175. "uniqueItems": true
  1176. },
  1177. "dimensions": {
  1178. "type": "object",
  1179. "properties": {
  1180. "length": {
  1181. "type": "number"
  1182. },
  1183. "width": {
  1184. "type": "number"
  1185. },
  1186. "height": {
  1187. "type": "number"
  1188. }
  1189. },
  1190. "required": [ "length", "width", "height" ]
  1191. }
  1192. },
  1193. "required": [ "productId", "productName", "price" ]
  1194. })""",
  1195. // Passing strings
  1196. {
  1197. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1198. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1199. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1200. },
  1201. // Failing strings
  1202. {
  1203. R"""({})""", // Missing all required properties
  1204. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1205. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1206. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1207. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1208. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1209. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1210. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1211. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1212. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1213. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1214. }
  1215. );
  1216. }
  1217. int main() {
  1218. fprintf(stdout, "Running grammar integration tests...\n");
  1219. test_simple_grammar();
  1220. test_complex_grammar();
  1221. test_special_chars();
  1222. test_quantifiers();
  1223. test_failure_missing_root();
  1224. test_failure_missing_reference();
  1225. test_failure_left_recursion();
  1226. test_json_schema();
  1227. fprintf(stdout, "All tests passed.\n");
  1228. return 0;
  1229. }