test-grammar-integration.cpp 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #define LLAMA_API_INTERNAL
  5. #include "ggml.h"
  6. #include "llama.h"
  7. #include "grammar-parser.h"
  8. #include "json-schema-to-grammar.h"
  9. #include "unicode.h"
  10. #include <cassert>
  11. #include <string>
  12. #include <vector>
  13. using json = nlohmann::ordered_json;
  14. static llama_grammar* build_grammar(const std::string & grammar_str) {
  15. auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  16. // Ensure we parsed correctly
  17. assert(!parsed_grammar.rules.empty());
  18. // Ensure we have a root node
  19. assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
  20. std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
  21. llama_grammar* grammar = llama_grammar_init(
  22. grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
  23. return grammar;
  24. }
  25. static bool test_build_grammar_fails(const std::string & grammar_str) {
  26. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  27. bool grammar_fails = false;
  28. llama_grammar * grammar = build_grammar(grammar_str);
  29. if (grammar != nullptr) {
  30. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  31. } else {
  32. grammar_fails = true;
  33. fprintf(stdout, " ✅︎\n");
  34. }
  35. return grammar_fails;
  36. }
  37. static bool match_string(const std::string & input, llama_grammar* grammar) {
  38. auto decoded = decode_utf8(input, {});
  39. const auto & code_points = decoded.first;
  40. for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
  41. auto prev_stacks = grammar->stacks;
  42. llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
  43. if (grammar->stacks.empty()) {
  44. // no stacks means that the grammar failed to match at this point
  45. return false;
  46. }
  47. }
  48. for (const auto & stack : grammar->stacks) {
  49. if (stack.empty()) {
  50. // An empty stack means that the grammar has been completed
  51. return true;
  52. }
  53. }
  54. return false;
  55. }
  56. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  57. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  58. fflush(stderr);
  59. auto grammar = build_grammar(grammar_str);
  60. // Save the original grammar stacks so that we can reset after every new string we want to test
  61. auto original_stacks = grammar->stacks;
  62. fprintf(stderr, " 🔵 Valid strings:\n");
  63. // Passing strings
  64. for (const auto & test_string : passing_strings) {
  65. fprintf(stderr, " \"%s\" ", test_string.c_str());
  66. fflush(stderr);
  67. bool matched = match_string(test_string, grammar);
  68. if (!matched) {
  69. fprintf(stderr, "❌ (failed to match)\n");
  70. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  71. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  72. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  73. if (grammar_file) {
  74. fprintf(grammar_file, "%s", grammar_str.c_str());
  75. fclose(grammar_file);
  76. }
  77. // DEBUG: Write the test string to test-grammar-integration.string.txt
  78. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  79. if (string_file) {
  80. fprintf(string_file, "%s", test_string.c_str());
  81. fclose(string_file);
  82. }
  83. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  84. } else {
  85. fprintf(stdout, "✅︎\n");
  86. }
  87. assert(matched);
  88. // Reset the grammar stacks
  89. grammar->stacks = original_stacks;
  90. }
  91. fprintf(stderr, " 🟠 Invalid strings:\n");
  92. // Failing strings
  93. for (const auto & test_string : failing_strings) {
  94. fprintf(stderr, " \"%s\" ", test_string.c_str());
  95. fflush(stderr);
  96. bool matched = match_string(test_string, grammar);
  97. if (matched) {
  98. fprintf(stderr, "❌ (incorrectly matched)\n");
  99. } else {
  100. fprintf(stdout, "✅︎\n");
  101. }
  102. assert(!matched);
  103. // Reset the grammar stacks
  104. grammar->stacks = original_stacks;
  105. }
  106. // Clean up allocated memory
  107. llama_grammar_free(grammar);
  108. }
  109. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  110. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  111. }
  112. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  113. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
  114. }
  115. static void test_simple_grammar() {
  116. test_schema(
  117. "min 0",
  118. R"""({
  119. "type": "integer",
  120. "minimum": 0
  121. })""",
  122. // Passing strings
  123. {
  124. "0",
  125. "10",
  126. "12",
  127. "10000",
  128. },
  129. // Failing strings
  130. {
  131. "-1",
  132. "-10",
  133. "-10000",
  134. "-100000000000000000000000000000000",
  135. "100000000000000000000000000000000",
  136. "00",
  137. "01",
  138. "-0",
  139. }
  140. );
  141. test_schema(
  142. "min 2",
  143. // Schema
  144. R"""({
  145. "type": "integer",
  146. "minimum": 2
  147. })""",
  148. // Passing strings
  149. {
  150. "2",
  151. "3",
  152. "4",
  153. "10",
  154. "20",
  155. "1234567890000000",
  156. },
  157. // Failing strings
  158. {
  159. "0",
  160. "1",
  161. "-1",
  162. "-100",
  163. "0",
  164. "1",
  165. "01",
  166. "02",
  167. "12345678900000000",
  168. }
  169. );
  170. test_schema(
  171. "min 456",
  172. R"""({
  173. "type": "integer",
  174. "minimum": 456
  175. })""",
  176. // Passing strings
  177. {
  178. "456",
  179. "4560",
  180. "457",
  181. "460",
  182. "500",
  183. },
  184. // Failing strings
  185. {
  186. "455",
  187. "356",
  188. "50",
  189. "050",
  190. "-1",
  191. "-456",
  192. }
  193. );
  194. test_schema(
  195. "min -123",
  196. R"""({
  197. "type": "integer",
  198. "minimum": -123
  199. })""",
  200. // Passing strings
  201. {
  202. "-123",
  203. "-122",
  204. "-11",
  205. "-1",
  206. "0",
  207. "1",
  208. "123",
  209. "1234",
  210. "2345",
  211. },
  212. // Failing strings
  213. {
  214. "-1234",
  215. "-124",
  216. }
  217. );
  218. test_schema(
  219. "max 9999",
  220. // Schema
  221. R"""({
  222. "type": "integer",
  223. "maximum": 9999
  224. })""",
  225. // Passing strings
  226. {
  227. "-99999",
  228. "0",
  229. "9999",
  230. },
  231. // Failing strings
  232. {
  233. "10000",
  234. "99991",
  235. }
  236. );
  237. test_schema(
  238. "max -9999",
  239. // Schema
  240. R"""({
  241. "type": "integer",
  242. "maximum": -9999
  243. })""",
  244. // Passing strings
  245. {
  246. "-10000",
  247. "-9999",
  248. },
  249. // Failing strings
  250. {
  251. "-9998",
  252. "0",
  253. "9999",
  254. }
  255. );
  256. test_schema(
  257. "min 5 max 30",
  258. // Schema
  259. R"""({
  260. "type": "integer",
  261. "minimum": 5,
  262. "maximum": 30
  263. })""",
  264. // Passing strings
  265. {
  266. "5",
  267. "10",
  268. "30",
  269. },
  270. // Failing strings
  271. {
  272. "05",
  273. "4",
  274. "-1",
  275. "31",
  276. "123",
  277. "0123",
  278. }
  279. );
  280. test_schema(
  281. "min -1 max 1",
  282. R"""({
  283. "type": "integer",
  284. "minimum": -1,
  285. "maximum": 1
  286. })""",
  287. // Passing strings
  288. {
  289. "-1",
  290. "0",
  291. "1",
  292. },
  293. // Failing strings
  294. {
  295. "-11",
  296. "-10",
  297. "-2",
  298. "2",
  299. "10",
  300. "11",
  301. }
  302. );
  303. test_schema(
  304. "min -123 max 42",
  305. R"""({
  306. "type": "integer",
  307. "minimum": -123,
  308. "maximum": 42
  309. })""",
  310. // Passing strings
  311. {
  312. "-123",
  313. "-122",
  314. "-13",
  315. "-11",
  316. "-2",
  317. "-1",
  318. "0",
  319. "1",
  320. "5",
  321. "10",
  322. "39",
  323. "40",
  324. "42",
  325. },
  326. // Failing strings
  327. {
  328. "-0123",
  329. "-124",
  330. "-1123",
  331. "-200",
  332. "43",
  333. "123",
  334. "0123",
  335. }
  336. );
  337. test_schema(
  338. "exclusive min / max",
  339. // Schema
  340. R"""({
  341. "type": "integer",
  342. "exclusiveMinimum": 0,
  343. "exclusiveMaximum": 10000
  344. })""",
  345. // Passing strings
  346. {
  347. "1",
  348. "9999",
  349. },
  350. // Failing strings
  351. {
  352. "0",
  353. "01",
  354. "10000",
  355. "99999",
  356. }
  357. );
  358. // Test case for a simple grammar
  359. test_grammar(
  360. "simple grammar",
  361. R"""(
  362. root ::= expr
  363. expr ::= term ("+" term)*
  364. term ::= number
  365. number ::= [0-9]+)""",
  366. // Passing strings
  367. {
  368. "42",
  369. "1+2+3+4+5",
  370. "123+456",
  371. },
  372. // Failing strings
  373. {
  374. "+",
  375. "/ 3",
  376. "1+2+3+4+5+",
  377. "12a45",
  378. }
  379. );
  380. }
  381. static void test_complex_grammar() {
  382. // Test case for a more complex grammar, with both failure strings and success strings
  383. test_grammar(
  384. "medium complexity grammar",
  385. // Grammar
  386. R"""(
  387. root ::= expression
  388. expression ::= term ws (("+"|"-") ws term)*
  389. term ::= factor ws (("*"|"/") ws factor)*
  390. factor ::= number | variable | "(" expression ")" | function-call
  391. number ::= [0-9]+
  392. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  393. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  394. ws ::= [ \t\n\r]?)""",
  395. // Passing strings
  396. {
  397. "42",
  398. "1*2*3*4*5",
  399. "x",
  400. "x+10",
  401. "x1+y2",
  402. "(a+b)*(c-d)",
  403. "func()",
  404. "func(x,y+2)",
  405. "a*(b+c)-d/e",
  406. "f(g(x),h(y,z))",
  407. "x + 10",
  408. "x1 + y2",
  409. "(a + b) * (c - d)",
  410. "func()",
  411. "func(x, y + 2)",
  412. "a * (b + c) - d / e",
  413. "f(g(x), h(y, z))",
  414. "123+456",
  415. "123*456*789-123/456+789*123",
  416. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  417. },
  418. // Failing strings
  419. {
  420. "+",
  421. "/ 3x",
  422. "x + + y",
  423. "a * / b",
  424. "func(,)",
  425. "func(x y)",
  426. "(a + b",
  427. "x + y)",
  428. "a + b * (c - d",
  429. "42 +",
  430. "x +",
  431. "x + 10 +",
  432. "(a + b) * (c - d",
  433. "func(",
  434. "func(x, y + 2",
  435. "a * (b + c) - d /",
  436. "f(g(x), h(y, z)",
  437. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  438. }
  439. );
  440. }
  441. static void test_special_chars() {
  442. // A collection of tests to exercise special characters such as "."
  443. test_grammar(
  444. "special characters",
  445. // Grammar
  446. R"""(
  447. root ::= ... "abc" ...
  448. )""",
  449. // Passing strings
  450. {
  451. "abcabcabc",
  452. "aaaabcccc",
  453. // NOTE: Also ensures that multi-byte characters still count as a single character
  454. "🔵🟠✅abc❌🟠🔵"
  455. },
  456. // Failing strings
  457. {
  458. "aaabcccc",
  459. "aaaaabcccc",
  460. "aaaabccc",
  461. "aaaabccccc",
  462. "🔵🟠✅❌abc❌✅🟠🔵"
  463. "🔵🟠abc🟠🔵"
  464. }
  465. );
  466. }
  467. static void test_quantifiers() {
  468. // A collection of tests to exercise * + and ? quantifiers
  469. test_grammar(
  470. "* quantifier",
  471. // Grammar
  472. R"""(root ::= "a"*)""",
  473. // Passing strings
  474. {
  475. "",
  476. "a",
  477. "aaaaa",
  478. "aaaaaaaaaaaaaaaaaa",
  479. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  480. },
  481. // Failing strings
  482. {
  483. "b",
  484. "ab",
  485. "aab",
  486. "ba",
  487. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  488. }
  489. );
  490. test_grammar(
  491. "+ quantifier",
  492. // Grammar
  493. R"""(root ::= "a"+)""",
  494. // Passing strings
  495. {
  496. "a",
  497. "aaaaa",
  498. "aaaaaaaaaaaaaaaaaa",
  499. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  500. },
  501. // Failing strings
  502. {
  503. "",
  504. "b",
  505. "ab",
  506. "aab",
  507. "ba",
  508. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  509. }
  510. );
  511. test_grammar(
  512. "? quantifier",
  513. // Grammar
  514. R"""(root ::= "a"?)""",
  515. // Passing strings
  516. {
  517. "",
  518. "a"
  519. },
  520. // Failing strings
  521. {
  522. "b",
  523. "ab",
  524. "aa",
  525. "ba",
  526. }
  527. );
  528. test_grammar(
  529. "mixed quantifiers",
  530. // Grammar
  531. R"""(
  532. root ::= cons+ vowel* cons? (vowel cons)*
  533. vowel ::= [aeiouy]
  534. cons ::= [bcdfghjklmnpqrstvwxyz]
  535. )""",
  536. // Passing strings
  537. {
  538. "yes",
  539. "no",
  540. "noyes",
  541. "crwth",
  542. "four",
  543. "bryyyy",
  544. },
  545. // Failing strings
  546. {
  547. "yess",
  548. "yesno",
  549. "forty",
  550. "catyyy",
  551. }
  552. );
  553. test_grammar(
  554. "simple exact repetition",
  555. // Grammar
  556. R"""(
  557. root ::= [ab]{4}
  558. )""",
  559. // Passing strings
  560. {
  561. "aaaa",
  562. "bbbb",
  563. "abab",
  564. },
  565. // Failing strings
  566. {
  567. "a",
  568. "b",
  569. "aaaaa",
  570. }
  571. );
  572. test_grammar(
  573. "simple min repetition",
  574. // Grammar
  575. R"""(
  576. root ::= [ab]{4,}
  577. )""",
  578. // Passing strings
  579. {
  580. "aaaa",
  581. "aaaaab",
  582. "bbbb",
  583. "ababab",
  584. },
  585. // Failing strings
  586. {
  587. "",
  588. "aba",
  589. }
  590. );
  591. test_grammar(
  592. "simple max repetition",
  593. // Grammar
  594. R"""(
  595. root ::= [ab]{0,4}
  596. )""",
  597. // Passing strings
  598. {
  599. "",
  600. "a",
  601. "aa",
  602. "aaa",
  603. "aaab",
  604. },
  605. // Failing strings
  606. {
  607. "aaaaa",
  608. }
  609. );
  610. test_grammar(
  611. "min / max repetition",
  612. // Grammar
  613. R"""(
  614. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  615. )""",
  616. // Passing strings
  617. {
  618. "0xFF 0x12 0xAB",
  619. "0xFF 0x12 0xAB 0x00 0x00",
  620. },
  621. // Failing strings
  622. {
  623. "",
  624. "0xFF",
  625. "0xFF 0x12",
  626. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  627. }
  628. );
  629. }
  630. static void test_failure_missing_root() {
  631. fprintf(stderr, "⚫ Testing missing root node:\n");
  632. // Test case for a grammar that is missing a root rule
  633. const std::string grammar_str = R"""(
  634. rot ::= expr
  635. expr ::= term ("+" term)*
  636. term ::= number
  637. number ::= [0-9]+)""";
  638. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  639. // Ensure we parsed correctly
  640. assert(!parsed_grammar.rules.empty());
  641. // Ensure we do NOT have a root node
  642. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  643. fprintf(stderr, " ✅︎ Passed\n");
  644. }
  645. static void test_failure_missing_reference() {
  646. fprintf(stderr, "⚫ Testing missing reference node:\n");
  647. // Test case for a grammar that is missing a referenced rule
  648. const std::string grammar_str =
  649. R"""(root ::= expr
  650. expr ::= term ("+" term)*
  651. term ::= numero
  652. number ::= [0-9]+)""";
  653. fprintf(stderr, " Expected error: ");
  654. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  655. // Ensure we did NOT parsed correctly
  656. assert(parsed_grammar.rules.empty());
  657. fprintf(stderr, " End of expected error.\n");
  658. fprintf(stderr, " ✅︎ Passed\n");
  659. }
  660. static void test_failure_left_recursion() {
  661. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  662. // Test simple left recursion detection
  663. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  664. assert(test_build_grammar_fails(simple_str));
  665. // Test more complicated left recursion detection
  666. const std::string medium_str = R"""(
  667. root ::= asdf
  668. asdf ::= "a" | asdf "a"
  669. )""";
  670. assert(test_build_grammar_fails(medium_str));
  671. // Test even more complicated left recursion detection
  672. const std::string hard_str = R"""(
  673. root ::= asdf
  674. asdf ::= "a" | foo "b"
  675. foo ::= "c" | asdf "d" | "e")""";
  676. assert(test_build_grammar_fails(hard_str));
  677. // Test yet even more complicated left recursion detection
  678. const std::string hardest_str = R"""(
  679. root ::= asdf
  680. asdf ::= "a" | foo "b"
  681. foo ::= "c" | empty asdf "d" | "e"
  682. empty ::= "blah" | )""";
  683. assert(test_build_grammar_fails(hardest_str));
  684. fprintf(stderr, " ✅︎ Passed\n");
  685. }
  686. static void test_json_schema() {
  687. // Note that this is similar to the regular grammar tests,
  688. // but we convert each json schema to a grammar before parsing.
  689. // Otherwise, this test structure is the same.
  690. test_schema(
  691. "empty schema (object)",
  692. // Schema
  693. R"""(
  694. {}
  695. )""",
  696. // Passing strings
  697. {
  698. R"""({})""",
  699. R"""({"foo": "bar"})""",
  700. },
  701. // Failing strings
  702. {
  703. "",
  704. "[]",
  705. "null",
  706. R"""("")""",
  707. "true",
  708. }
  709. );
  710. test_schema(
  711. "exotic formats (list)",
  712. // Schema
  713. R"""({
  714. "items": [
  715. { "format": "date" },
  716. { "format": "uuid" },
  717. { "format": "time" },
  718. { "format": "date-time" }
  719. ]
  720. })""",
  721. // Passing strings
  722. {
  723. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  724. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  725. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  726. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  727. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  728. },
  729. // Failing strings
  730. {
  731. R"""(["foo", "bar"])""",
  732. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  733. }
  734. );
  735. test_schema(
  736. "string",
  737. // Schema
  738. R"""({
  739. "type": "string"
  740. })""",
  741. // Passing strings
  742. {
  743. R"""("foo")""",
  744. R"""("bar")""",
  745. R"""("")""",
  746. },
  747. // Failing strings
  748. {
  749. R"""({})""",
  750. R"""("foo": "bar")""",
  751. }
  752. );
  753. test_schema(
  754. "string w/ min length 1",
  755. // Schema
  756. R"""({
  757. "type": "string",
  758. "minLength": 1
  759. })""",
  760. // Passing strings
  761. {
  762. R"""("foo")""",
  763. R"""("bar")""",
  764. },
  765. // Failing strings
  766. {
  767. R"""("")""",
  768. R"""({})""",
  769. R"""("foo": "bar")""",
  770. }
  771. );
  772. test_schema(
  773. "string w/ min length 3",
  774. // Schema
  775. R"""({
  776. "type": "string",
  777. "minLength": 3
  778. })""",
  779. // Passing strings
  780. {
  781. R"""("foo")""",
  782. R"""("bar")""",
  783. R"""("foobar")""",
  784. },
  785. // Failing strings
  786. {
  787. R"""("")""",
  788. R"""("f")""",
  789. R"""("fo")""",
  790. }
  791. );
  792. test_schema(
  793. "string w/ max length",
  794. // Schema
  795. R"""({
  796. "type": "string",
  797. "maxLength": 3
  798. })""",
  799. // Passing strings
  800. {
  801. R"""("foo")""",
  802. R"""("bar")""",
  803. R"""("")""",
  804. R"""("f")""",
  805. R"""("fo")""",
  806. },
  807. // Failing strings
  808. {
  809. R"""("foobar")""",
  810. }
  811. );
  812. test_schema(
  813. "string w/ min & max length",
  814. // Schema
  815. R"""({
  816. "type": "string",
  817. "minLength": 1,
  818. "maxLength": 4
  819. })""",
  820. // Passing strings
  821. {
  822. R"""("foo")""",
  823. R"""("bar")""",
  824. R"""("f")""",
  825. R"""("barf")""",
  826. },
  827. // Failing strings
  828. {
  829. R"""("")""",
  830. R"""("barfo")""",
  831. R"""("foobar")""",
  832. }
  833. );
  834. test_schema(
  835. "boolean",
  836. // Schema
  837. R"""({
  838. "type": "boolean"
  839. })""",
  840. // Passing strings
  841. {
  842. "true",
  843. "false",
  844. },
  845. // Failing strings
  846. {
  847. R"""("")""",
  848. R"""("true")""",
  849. R"""(True)""",
  850. R"""(FALSE)""",
  851. }
  852. );
  853. test_schema(
  854. "integer",
  855. // Schema
  856. R"""({
  857. "type": "integer"
  858. })""",
  859. // Passing strings
  860. {
  861. R"""(0)""",
  862. R"""(12345)""",
  863. R"""(1234567890123456)""",
  864. },
  865. // Failing strings
  866. {
  867. R"""()""",
  868. R"""(01)""",
  869. R"""(007)""",
  870. R"""(12345678901234567 )""",
  871. }
  872. );
  873. test_schema(
  874. "string const",
  875. // Schema
  876. R"""({
  877. "const": "foo"
  878. })""",
  879. // Passing strings
  880. {
  881. R"""("foo")""",
  882. },
  883. // Failing strings
  884. {
  885. R"""(foo)""",
  886. R"""("bar")""",
  887. }
  888. );
  889. test_schema(
  890. "non-string const",
  891. // Schema
  892. R"""({
  893. "const": true
  894. })""",
  895. // Passing strings
  896. {
  897. R"""(true)""",
  898. },
  899. // Failing strings
  900. {
  901. R"""()""",
  902. R"""(foo)""",
  903. R"""("true")""",
  904. }
  905. );
  906. test_schema(
  907. "non-string const",
  908. // Schema
  909. R"""({
  910. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  911. })""",
  912. // Passing strings
  913. {
  914. R"""("red")""",
  915. R"""(null)""",
  916. R"""(42)""",
  917. R"""(["foo"])""",
  918. },
  919. // Failing strings
  920. {
  921. R"""()""",
  922. R"""(420)""",
  923. R"""(true)""",
  924. R"""(foo)""",
  925. }
  926. );
  927. test_schema(
  928. "min+max items",
  929. // Schema
  930. R"""({
  931. "items": {
  932. "type": ["number", "integer"]
  933. },
  934. "minItems": 3,
  935. "maxItems": 5
  936. })""",
  937. // Passing strings
  938. {
  939. R"""([1, 2, 3])""",
  940. R"""([1, 2, 3, 4])""",
  941. R"""([1, 2, 3, 4, 5])""",
  942. },
  943. // Failing strings
  944. {
  945. R"""([1, 2])""",
  946. R"""([1, 2, 3, 4, 5, 6])""",
  947. R"""(1)""",
  948. }
  949. );
  950. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  951. test_schema(
  952. "object properties",
  953. // Schema
  954. R"""({
  955. "type": "object",
  956. "properties": {
  957. "number": { "type": "number" },
  958. "street_name": { "type": "string" },
  959. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  960. }
  961. })""",
  962. // Passing strings
  963. {
  964. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  965. // "By default, leaving out properties is valid"
  966. R"""({ "street_name": "Pennsylvania" })""",
  967. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  968. // "By extension, even an empty object is valid"
  969. R"""({})""",
  970. // "By default, providing additional properties is valid"
  971. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  972. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  973. },
  974. // Failing strings
  975. {
  976. // Change datatype from number to string
  977. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  978. // Reorder properties
  979. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  980. // Reorder properties
  981. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  982. }
  983. );
  984. test_schema(
  985. "additional properties can't override other properties",
  986. R"""({
  987. "properties": {
  988. "a": {"type": "integer"},
  989. "b": {"type": "integer"}
  990. },
  991. "additionalProperties": true
  992. })""",
  993. // Passing strings
  994. {
  995. R"""({"a": 42})""",
  996. R"""({"c": ""})""",
  997. R"""({"a": 42, "c": ""})""",
  998. R"""({"a_": ""})""",
  999. },
  1000. // Failing strings
  1001. {
  1002. R"""()""",
  1003. R"""({"a": ""})""",
  1004. R"""({"a": "", "b": ""})""",
  1005. }
  1006. );
  1007. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1008. test_schema(
  1009. "object properties, additionalProperties: true",
  1010. // Schema
  1011. R"""({
  1012. "type": "object",
  1013. "properties": {
  1014. "number": { "type": "number" },
  1015. "street_name": { "type": "string" },
  1016. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1017. },
  1018. "additionalProperties": true
  1019. })""",
  1020. // Passing strings
  1021. {
  1022. // "By extension, even an empty object is valid"
  1023. R"""({})""",
  1024. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1025. // "By default, leaving out properties is valid"
  1026. R"""({ "street_name": "Pennsylvania" })""",
  1027. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1028. // "By default, providing additional properties is valid"
  1029. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1030. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1031. },
  1032. // Failing strings
  1033. {
  1034. // Change datatype from number to string
  1035. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1036. // Reorder properties
  1037. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1038. }
  1039. );
  1040. // Additional properties: false
  1041. test_schema(
  1042. "required + optional props each in original order",
  1043. // Schema
  1044. R"""({
  1045. "type": "object",
  1046. "properties": {
  1047. "number": { "type": "number" },
  1048. "street_name": { "type": "string" },
  1049. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1050. },
  1051. "additionalProperties": false
  1052. })""",
  1053. // Passing strings
  1054. {
  1055. R"""({ "street_name": "Pennsylvania" })""",
  1056. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1057. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1058. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1059. // Spaces are permitted around enum values
  1060. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1061. },
  1062. // Failing strings
  1063. {
  1064. // Reorder properties
  1065. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1066. // Add "direction"
  1067. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1068. }
  1069. );
  1070. test_schema(
  1071. "required + optional props each in original order",
  1072. // Schema
  1073. R"""({
  1074. "properties": {
  1075. "b": {"type": "string"},
  1076. "a": {"type": "string"},
  1077. "d": {"type": "string"},
  1078. "c": {"type": "string"}
  1079. },
  1080. "required": ["a", "b"],
  1081. "additionalProperties": false
  1082. })""",
  1083. // Passing strings
  1084. {
  1085. R"""({"b": "foo", "a": "bar"})""",
  1086. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1087. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1088. },
  1089. // Failing strings
  1090. {
  1091. R"""({"a": "foo", "b": "bar"})""",
  1092. R"""({"b": "bar"})""",
  1093. R"""({"a": "foo", "c": "baz"})""",
  1094. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1095. }
  1096. );
  1097. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1098. test_schema(
  1099. "required props",
  1100. // Schema
  1101. R"""({
  1102. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1103. "$id": "https://example.com/product.schema.json",
  1104. "title": "Product",
  1105. "description": "A product from Acme's catalog",
  1106. "type": "object",
  1107. "properties": {
  1108. "productId": {
  1109. "description": "The unique identifier for a product",
  1110. "type": "integer"
  1111. },
  1112. "productName": {
  1113. "description": "Name of the product",
  1114. "type": "string"
  1115. },
  1116. "price": {
  1117. "description": "The price of the product",
  1118. "type": "number",
  1119. "exclusiveMinimum": 0
  1120. },
  1121. "tags": {
  1122. "description": "Tags for the product",
  1123. "type": "array",
  1124. "items": {
  1125. "type": "string"
  1126. },
  1127. "minItems": 1,
  1128. "uniqueItems": true
  1129. },
  1130. "dimensions": {
  1131. "type": "object",
  1132. "properties": {
  1133. "length": {
  1134. "type": "number"
  1135. },
  1136. "width": {
  1137. "type": "number"
  1138. },
  1139. "height": {
  1140. "type": "number"
  1141. }
  1142. },
  1143. "required": [ "length", "width", "height" ]
  1144. }
  1145. },
  1146. "required": [ "productId", "productName", "price" ]
  1147. })""",
  1148. // Passing strings
  1149. {
  1150. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1151. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1152. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1153. },
  1154. // Failing strings
  1155. {
  1156. R"""({})""", // Missing all required properties
  1157. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1158. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1159. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1160. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1161. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1162. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1163. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1164. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1165. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1166. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1167. }
  1168. );
  1169. }
  1170. int main() {
  1171. fprintf(stdout, "Running grammar integration tests...\n");
  1172. test_simple_grammar();
  1173. test_complex_grammar();
  1174. test_special_chars();
  1175. test_quantifiers();
  1176. test_failure_missing_root();
  1177. test_failure_missing_reference();
  1178. test_failure_left_recursion();
  1179. test_json_schema();
  1180. fprintf(stdout, "All tests passed.\n");
  1181. return 0;
  1182. }