test-grammar-integration.cpp 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #define LLAMA_API_INTERNAL
  5. #include "ggml.h"
  6. #include "llama.h"
  7. #include "grammar-parser.h"
  8. #include "json-schema-to-grammar.h"
  9. #include "unicode.h"
  10. #include <cassert>
  11. #include <string>
  12. #include <vector>
  13. using json = nlohmann::ordered_json;
  14. static llama_grammar* build_grammar(const std::string & grammar_str) {
  15. auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  16. // Ensure we parsed correctly
  17. assert(!parsed_grammar.rules.empty());
  18. // Ensure we have a root node
  19. assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
  20. std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
  21. llama_grammar* grammar = llama_grammar_init(
  22. grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
  23. return grammar;
  24. }
  25. static bool test_build_grammar_fails(const std::string & grammar_str) {
  26. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  27. bool grammar_fails = false;
  28. llama_grammar * grammar = build_grammar(grammar_str);
  29. if (grammar != nullptr) {
  30. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  31. } else {
  32. grammar_fails = true;
  33. fprintf(stdout, " ✅︎\n");
  34. }
  35. return grammar_fails;
  36. }
  37. static bool match_string(const std::string & input, llama_grammar* grammar) {
  38. auto decoded = decode_utf8(input, {});
  39. const auto & code_points = decoded.first;
  40. for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
  41. auto prev_stacks = grammar->stacks;
  42. llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
  43. if (grammar->stacks.empty()) {
  44. // no stacks means that the grammar failed to match at this point
  45. return false;
  46. }
  47. }
  48. for (const auto & stack : grammar->stacks) {
  49. if (stack.empty()) {
  50. // An empty stack means that the grammar has been completed
  51. return true;
  52. }
  53. }
  54. return false;
  55. }
  56. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  57. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  58. fflush(stderr);
  59. auto grammar = build_grammar(grammar_str);
  60. // Save the original grammar stacks so that we can reset after every new string we want to test
  61. auto original_stacks = grammar->stacks;
  62. fprintf(stderr, " 🔵 Valid strings:\n");
  63. // Passing strings
  64. for (const auto & test_string : passing_strings) {
  65. fprintf(stderr, " \"%s\" ", test_string.c_str());
  66. fflush(stderr);
  67. bool matched = match_string(test_string, grammar);
  68. if (!matched) {
  69. fprintf(stderr, "❌ (failed to match)\n");
  70. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  71. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  72. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  73. if (grammar_file) {
  74. fprintf(grammar_file, "%s", grammar_str.c_str());
  75. fclose(grammar_file);
  76. }
  77. // DEBUG: Write the test string to test-grammar-integration.string.txt
  78. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  79. if (string_file) {
  80. fprintf(string_file, "%s", test_string.c_str());
  81. fclose(string_file);
  82. }
  83. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  84. } else {
  85. fprintf(stdout, "✅︎\n");
  86. }
  87. assert(matched);
  88. // Reset the grammar stacks
  89. grammar->stacks = original_stacks;
  90. }
  91. fprintf(stderr, " 🟠 Invalid strings:\n");
  92. // Failing strings
  93. for (const auto & test_string : failing_strings) {
  94. fprintf(stderr, " \"%s\" ", test_string.c_str());
  95. fflush(stderr);
  96. bool matched = match_string(test_string, grammar);
  97. if (matched) {
  98. fprintf(stderr, "❌ (incorrectly matched)\n");
  99. } else {
  100. fprintf(stdout, "✅︎\n");
  101. }
  102. assert(!matched);
  103. // Reset the grammar stacks
  104. grammar->stacks = original_stacks;
  105. }
  106. // Clean up allocated memory
  107. llama_grammar_free(grammar);
  108. }
  109. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  110. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  111. }
  112. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  113. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
  114. }
  115. static void test_simple_grammar() {
  116. test_schema(
  117. "min 0",
  118. R"""({
  119. "type": "integer",
  120. "minimum": 0
  121. })""",
  122. // Passing strings
  123. {
  124. "0",
  125. "10",
  126. "12",
  127. "10000",
  128. },
  129. // Failing strings
  130. {
  131. "-1",
  132. "-10",
  133. "-10000",
  134. "-100000000000000000000000000000000",
  135. "100000000000000000000000000000000",
  136. "00",
  137. "01",
  138. "-0",
  139. }
  140. );
  141. test_schema(
  142. "min 2",
  143. // Schema
  144. R"""({
  145. "type": "integer",
  146. "minimum": 2
  147. })""",
  148. // Passing strings
  149. {
  150. "2",
  151. "3",
  152. "4",
  153. "10",
  154. "20",
  155. "1234567890000000",
  156. },
  157. // Failing strings
  158. {
  159. "0",
  160. "1",
  161. "-1",
  162. "-100",
  163. "0",
  164. "1",
  165. "01",
  166. "02",
  167. "12345678900000000",
  168. }
  169. );
  170. test_schema(
  171. "min 456",
  172. R"""({
  173. "type": "integer",
  174. "minimum": 456
  175. })""",
  176. // Passing strings
  177. {
  178. "456",
  179. "4560",
  180. "457",
  181. "460",
  182. "500",
  183. },
  184. // Failing strings
  185. {
  186. "455",
  187. "356",
  188. "50",
  189. "050",
  190. "-1",
  191. "-456",
  192. }
  193. );
  194. test_schema(
  195. "min -123",
  196. R"""({
  197. "type": "integer",
  198. "minimum": -123
  199. })""",
  200. // Passing strings
  201. {
  202. "-123",
  203. "-122",
  204. "-11",
  205. "-1",
  206. "0",
  207. "1",
  208. "123",
  209. "1234",
  210. "2345",
  211. },
  212. // Failing strings
  213. {
  214. "-1234",
  215. "-124",
  216. }
  217. );
  218. test_schema(
  219. "max 9999",
  220. // Schema
  221. R"""({
  222. "type": "integer",
  223. "maximum": 9999
  224. })""",
  225. // Passing strings
  226. {
  227. "-99999",
  228. "0",
  229. "9999",
  230. },
  231. // Failing strings
  232. {
  233. "10000",
  234. "99991",
  235. }
  236. );
  237. test_schema(
  238. "max -9999",
  239. // Schema
  240. R"""({
  241. "type": "integer",
  242. "maximum": -9999
  243. })""",
  244. // Passing strings
  245. {
  246. "-10000",
  247. "-9999",
  248. },
  249. // Failing strings
  250. {
  251. "-9998",
  252. "0",
  253. "9999",
  254. }
  255. );
  256. test_schema(
  257. "min 5 max 30",
  258. // Schema
  259. R"""({
  260. "type": "integer",
  261. "minimum": 5,
  262. "maximum": 30
  263. })""",
  264. // Passing strings
  265. {
  266. "5",
  267. "10",
  268. "30",
  269. },
  270. // Failing strings
  271. {
  272. "05",
  273. "4",
  274. "-1",
  275. "31",
  276. "123",
  277. "0123",
  278. }
  279. );
  280. test_schema(
  281. "min -1 max 1",
  282. R"""({
  283. "type": "integer",
  284. "minimum": -1,
  285. "maximum": 1
  286. })""",
  287. // Passing strings
  288. {
  289. "-1",
  290. "0",
  291. "1",
  292. },
  293. // Failing strings
  294. {
  295. "-11",
  296. "-10",
  297. "-2",
  298. "2",
  299. "10",
  300. "11",
  301. }
  302. );
  303. test_schema(
  304. "min -123 max 42",
  305. R"""({
  306. "type": "integer",
  307. "minimum": -123,
  308. "maximum": 42
  309. })""",
  310. // Passing strings
  311. {
  312. "-123",
  313. "-122",
  314. "-13",
  315. "-11",
  316. "-2",
  317. "-1",
  318. "0",
  319. "1",
  320. "5",
  321. "10",
  322. "39",
  323. "40",
  324. "42",
  325. },
  326. // Failing strings
  327. {
  328. "-0123",
  329. "-124",
  330. "-1123",
  331. "-200",
  332. "43",
  333. "123",
  334. "0123",
  335. }
  336. );
  337. test_schema(
  338. "exclusive min / max",
  339. // Schema
  340. R"""({
  341. "type": "integer",
  342. "exclusiveMinimum": 0,
  343. "exclusiveMaximum": 10000
  344. })""",
  345. // Passing strings
  346. {
  347. "1",
  348. "9999",
  349. },
  350. // Failing strings
  351. {
  352. "0",
  353. "01",
  354. "10000",
  355. "99999",
  356. }
  357. );
  358. // Test case for a simple grammar
  359. test_grammar(
  360. "simple grammar",
  361. R"""(
  362. root ::= expr
  363. expr ::= term ("+" term)*
  364. term ::= number
  365. number ::= [0-9]+)""",
  366. // Passing strings
  367. {
  368. "42",
  369. "1+2+3+4+5",
  370. "123+456",
  371. },
  372. // Failing strings
  373. {
  374. "+",
  375. "/ 3",
  376. "1+2+3+4+5+",
  377. "12a45",
  378. }
  379. );
  380. }
  381. static void test_complex_grammar() {
  382. // Test case for a more complex grammar, with both failure strings and success strings
  383. test_grammar(
  384. "medium complexity grammar",
  385. // Grammar
  386. R"""(
  387. root ::= expression
  388. expression ::= term ws (("+"|"-") ws term)*
  389. term ::= factor ws (("*"|"/") ws factor)*
  390. factor ::= number | variable | "(" expression ")" | function-call
  391. number ::= [0-9]+
  392. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  393. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  394. ws ::= [ \t\n\r]?)""",
  395. // Passing strings
  396. {
  397. "42",
  398. "1*2*3*4*5",
  399. "x",
  400. "x+10",
  401. "x1+y2",
  402. "(a+b)*(c-d)",
  403. "func()",
  404. "func(x,y+2)",
  405. "a*(b+c)-d/e",
  406. "f(g(x),h(y,z))",
  407. "x + 10",
  408. "x1 + y2",
  409. "(a + b) * (c - d)",
  410. "func()",
  411. "func(x, y + 2)",
  412. "a * (b + c) - d / e",
  413. "f(g(x), h(y, z))",
  414. "123+456",
  415. "123*456*789-123/456+789*123",
  416. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  417. },
  418. // Failing strings
  419. {
  420. "+",
  421. "/ 3x",
  422. "x + + y",
  423. "a * / b",
  424. "func(,)",
  425. "func(x y)",
  426. "(a + b",
  427. "x + y)",
  428. "a + b * (c - d",
  429. "42 +",
  430. "x +",
  431. "x + 10 +",
  432. "(a + b) * (c - d",
  433. "func(",
  434. "func(x, y + 2",
  435. "a * (b + c) - d /",
  436. "f(g(x), h(y, z)",
  437. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  438. }
  439. );
  440. }
  441. static void test_special_chars() {
  442. // A collection of tests to exercise special characters such as "."
  443. test_grammar(
  444. "special characters",
  445. // Grammar
  446. R"""(
  447. root ::= ... "abc" ...
  448. )""",
  449. // Passing strings
  450. {
  451. "abcabcabc",
  452. "aaaabcccc",
  453. // NOTE: Also ensures that multi-byte characters still count as a single character
  454. "🔵🟠✅abc❌🟠🔵"
  455. },
  456. // Failing strings
  457. {
  458. "aaabcccc",
  459. "aaaaabcccc",
  460. "aaaabccc",
  461. "aaaabccccc",
  462. "🔵🟠✅❌abc❌✅🟠🔵"
  463. "🔵🟠abc🟠🔵"
  464. }
  465. );
  466. }
  467. static void test_quantifiers() {
  468. // A collection of tests to exercise * + and ? quantifiers
  469. test_grammar(
  470. "* quantifier",
  471. // Grammar
  472. R"""(root ::= "a"*)""",
  473. // Passing strings
  474. {
  475. "",
  476. "a",
  477. "aaaaa",
  478. "aaaaaaaaaaaaaaaaaa",
  479. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  480. },
  481. // Failing strings
  482. {
  483. "b",
  484. "ab",
  485. "aab",
  486. "ba",
  487. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  488. }
  489. );
  490. test_grammar(
  491. "+ quantifier",
  492. // Grammar
  493. R"""(root ::= "a"+)""",
  494. // Passing strings
  495. {
  496. "a",
  497. "aaaaa",
  498. "aaaaaaaaaaaaaaaaaa",
  499. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  500. },
  501. // Failing strings
  502. {
  503. "",
  504. "b",
  505. "ab",
  506. "aab",
  507. "ba",
  508. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  509. }
  510. );
  511. test_grammar(
  512. "? quantifier",
  513. // Grammar
  514. R"""(root ::= "a"?)""",
  515. // Passing strings
  516. {
  517. "",
  518. "a"
  519. },
  520. // Failing strings
  521. {
  522. "b",
  523. "ab",
  524. "aa",
  525. "ba",
  526. }
  527. );
  528. test_grammar(
  529. "mixed quantifiers",
  530. // Grammar
  531. R"""(
  532. root ::= cons+ vowel* cons? (vowel cons)*
  533. vowel ::= [aeiouy]
  534. cons ::= [bcdfghjklmnpqrstvwxyz]
  535. )""",
  536. // Passing strings
  537. {
  538. "yes",
  539. "no",
  540. "noyes",
  541. "crwth",
  542. "four",
  543. "bryyyy",
  544. },
  545. // Failing strings
  546. {
  547. "yess",
  548. "yesno",
  549. "forty",
  550. "catyyy",
  551. }
  552. );
  553. test_grammar(
  554. "simple exact repetition",
  555. // Grammar
  556. R"""(
  557. root ::= [ab]{4}
  558. )""",
  559. // Passing strings
  560. {
  561. "aaaa",
  562. "bbbb",
  563. "abab",
  564. },
  565. // Failing strings
  566. {
  567. "a",
  568. "b",
  569. "aaaaa",
  570. }
  571. );
  572. test_grammar(
  573. "simple min repetition",
  574. // Grammar
  575. R"""(
  576. root ::= [ab]{4,}
  577. )""",
  578. // Passing strings
  579. {
  580. "aaaa",
  581. "aaaaab",
  582. "bbbb",
  583. "ababab",
  584. },
  585. // Failing strings
  586. {
  587. "",
  588. "aba",
  589. }
  590. );
  591. test_grammar(
  592. "simple max repetition",
  593. // Grammar
  594. R"""(
  595. root ::= [ab]{0,4}
  596. )""",
  597. // Passing strings
  598. {
  599. "",
  600. "a",
  601. "aa",
  602. "aaa",
  603. "aaab",
  604. },
  605. // Failing strings
  606. {
  607. "aaaaa",
  608. }
  609. );
  610. test_grammar(
  611. "min / max repetition",
  612. // Grammar
  613. R"""(
  614. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  615. )""",
  616. // Passing strings
  617. {
  618. "0xFF 0x12 0xAB",
  619. "0xFF 0x12 0xAB 0x00 0x00",
  620. },
  621. // Failing strings
  622. {
  623. "",
  624. "0xFF",
  625. "0xFF 0x12",
  626. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  627. }
  628. );
  629. }
  630. static void test_failure_missing_root() {
  631. fprintf(stderr, "⚫ Testing missing root node:\n");
  632. // Test case for a grammar that is missing a root rule
  633. const std::string grammar_str = R"""(
  634. rot ::= expr
  635. expr ::= term ("+" term)*
  636. term ::= number
  637. number ::= [0-9]+)""";
  638. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  639. // Ensure we parsed correctly
  640. assert(!parsed_grammar.rules.empty());
  641. // Ensure we do NOT have a root node
  642. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  643. fprintf(stderr, " ✅︎ Passed\n");
  644. }
  645. static void test_failure_missing_reference() {
  646. fprintf(stderr, "⚫ Testing missing reference node:\n");
  647. // Test case for a grammar that is missing a referenced rule
  648. const std::string grammar_str =
  649. R"""(root ::= expr
  650. expr ::= term ("+" term)*
  651. term ::= numero
  652. number ::= [0-9]+)""";
  653. fprintf(stderr, " Expected error: ");
  654. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  655. // Ensure we did NOT parsed correctly
  656. assert(parsed_grammar.rules.empty());
  657. fprintf(stderr, " End of expected error.\n");
  658. fprintf(stderr, " ✅︎ Passed\n");
  659. }
  660. static void test_failure_left_recursion() {
  661. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  662. // Test simple left recursion detection
  663. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  664. assert(test_build_grammar_fails(simple_str));
  665. // Test more complicated left recursion detection
  666. const std::string medium_str = R"""(
  667. root ::= asdf
  668. asdf ::= "a" | asdf "a"
  669. )""";
  670. assert(test_build_grammar_fails(medium_str));
  671. // Test even more complicated left recursion detection
  672. const std::string hard_str = R"""(
  673. root ::= asdf
  674. asdf ::= "a" | foo "b"
  675. foo ::= "c" | asdf "d" | "e")""";
  676. assert(test_build_grammar_fails(hard_str));
  677. // Test yet even more complicated left recursion detection
  678. const std::string hardest_str = R"""(
  679. root ::= asdf
  680. asdf ::= "a" | foo "b"
  681. foo ::= "c" | empty asdf "d" | "e"
  682. empty ::= "blah" | )""";
  683. assert(test_build_grammar_fails(hardest_str));
  684. fprintf(stderr, " ✅︎ Passed\n");
  685. }
  686. static void test_json_schema() {
  687. // Note that this is similar to the regular grammar tests,
  688. // but we convert each json schema to a grammar before parsing.
  689. // Otherwise, this test structure is the same.
  690. test_schema(
  691. "empty schema (object)",
  692. // Schema
  693. R"""(
  694. {}
  695. )""",
  696. // Passing strings
  697. {
  698. R"""({})""",
  699. R"""({"foo": "bar"})""",
  700. },
  701. // Failing strings
  702. {
  703. "",
  704. "[]",
  705. "null",
  706. R"""("")""",
  707. "true",
  708. }
  709. );
  710. test_schema(
  711. "exotic formats (list)",
  712. // Schema
  713. R"""({
  714. "items": [
  715. { "format": "date" },
  716. { "format": "uuid" },
  717. { "format": "time" },
  718. { "format": "date-time" }
  719. ]
  720. })""",
  721. // Passing strings
  722. {
  723. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  724. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  725. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  726. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  727. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  728. },
  729. // Failing strings
  730. {
  731. R"""(["foo", "bar"])""",
  732. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  733. }
  734. );
  735. test_schema(
  736. "string",
  737. // Schema
  738. R"""({
  739. "type": "string"
  740. })""",
  741. // Passing strings
  742. {
  743. R"""("foo")""",
  744. R"""("bar")""",
  745. R"""("")""",
  746. },
  747. // Failing strings
  748. {
  749. R"""({})""",
  750. R"""("foo": "bar")""",
  751. }
  752. );
  753. test_schema(
  754. "string w/ min length 1",
  755. // Schema
  756. R"""({
  757. "type": "string",
  758. "minLength": 1
  759. })""",
  760. // Passing strings
  761. {
  762. R"""("foo")""",
  763. R"""("bar")""",
  764. },
  765. // Failing strings
  766. {
  767. R"""("")""",
  768. R"""({})""",
  769. R"""("foo": "bar")""",
  770. }
  771. );
  772. test_schema(
  773. "string w/ min length 3",
  774. // Schema
  775. R"""({
  776. "type": "string",
  777. "minLength": 3
  778. })""",
  779. // Passing strings
  780. {
  781. R"""("foo")""",
  782. R"""("bar")""",
  783. R"""("foobar")""",
  784. },
  785. // Failing strings
  786. {
  787. R"""("")""",
  788. R"""("f")""",
  789. R"""("fo")""",
  790. }
  791. );
  792. test_schema(
  793. "string w/ max length",
  794. // Schema
  795. R"""({
  796. "type": "string",
  797. "maxLength": 3
  798. })""",
  799. // Passing strings
  800. {
  801. R"""("foo")""",
  802. R"""("bar")""",
  803. R"""("")""",
  804. R"""("f")""",
  805. R"""("fo")""",
  806. },
  807. // Failing strings
  808. {
  809. R"""("foobar")""",
  810. }
  811. );
  812. test_schema(
  813. "string w/ min & max length",
  814. // Schema
  815. R"""({
  816. "type": "string",
  817. "minLength": 1,
  818. "maxLength": 4
  819. })""",
  820. // Passing strings
  821. {
  822. R"""("foo")""",
  823. R"""("bar")""",
  824. R"""("f")""",
  825. R"""("barf")""",
  826. },
  827. // Failing strings
  828. {
  829. R"""("")""",
  830. R"""("barfo")""",
  831. R"""("foobar")""",
  832. }
  833. );
  834. test_schema(
  835. "boolean",
  836. // Schema
  837. R"""({
  838. "type": "boolean"
  839. })""",
  840. // Passing strings
  841. {
  842. "true",
  843. "false",
  844. },
  845. // Failing strings
  846. {
  847. R"""("")""",
  848. R"""("true")""",
  849. R"""(True)""",
  850. R"""(FALSE)""",
  851. }
  852. );
  853. test_schema(
  854. "integer",
  855. // Schema
  856. R"""({
  857. "type": "integer"
  858. })""",
  859. // Passing strings
  860. {
  861. R"""(0)""",
  862. R"""(12345)""",
  863. R"""(1234567890123456)""",
  864. },
  865. // Failing strings
  866. {
  867. R"""()""",
  868. R"""(01)""",
  869. R"""(007)""",
  870. R"""(12345678901234567 )""",
  871. }
  872. );
  873. test_schema(
  874. "string const",
  875. // Schema
  876. R"""({
  877. "const": "foo"
  878. })""",
  879. // Passing strings
  880. {
  881. R"""("foo")""",
  882. },
  883. // Failing strings
  884. {
  885. R"""(foo)""",
  886. R"""("bar")""",
  887. }
  888. );
  889. test_schema(
  890. "non-string const",
  891. // Schema
  892. R"""({
  893. "const": true
  894. })""",
  895. // Passing strings
  896. {
  897. R"""(true)""",
  898. },
  899. // Failing strings
  900. {
  901. R"""()""",
  902. R"""(foo)""",
  903. R"""("true")""",
  904. }
  905. );
  906. test_schema(
  907. "non-string const",
  908. // Schema
  909. R"""({
  910. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  911. })""",
  912. // Passing strings
  913. {
  914. R"""("red")""",
  915. R"""(null)""",
  916. R"""(42)""",
  917. R"""(["foo"])""",
  918. },
  919. // Failing strings
  920. {
  921. R"""()""",
  922. R"""(420)""",
  923. R"""(true)""",
  924. R"""(foo)""",
  925. }
  926. );
  927. test_schema(
  928. "simple pattern",
  929. // Schema
  930. R"""({
  931. "pattern": "^[a-zA-Z0-9_-]*$"
  932. })""",
  933. // Passing strings
  934. {
  935. R"""("")""",
  936. R"""("He_llo-12")""",
  937. },
  938. // Failing strings
  939. {
  940. R"""("!")""",
  941. R"""("Hello World")""",
  942. }
  943. );
  944. test_schema(
  945. "pattern with escapes",
  946. // Schema
  947. R"""({
  948. "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
  949. })""",
  950. // Passing strings
  951. {
  952. R"""("a^$.[]()|{}*+?b")""",
  953. },
  954. // Failing strings
  955. {
  956. R"""("ab")""",
  957. }
  958. );
  959. test_schema(
  960. "",
  961. // Schema
  962. R"""(
  963. {
  964. "type": ["array", "null"],
  965. "items": { "type": "string" }
  966. }
  967. )""",
  968. // Passing strings
  969. {
  970. "null",
  971. "[]",
  972. "[\"123\"]",
  973. "[\"foo\", \"bar\"]",
  974. },
  975. // Failing strings
  976. {
  977. "",
  978. "[123]",
  979. "\"foo\"",
  980. "[\"foo\", 42]",
  981. }
  982. );
  983. test_schema(
  984. "min+max items",
  985. // Schema
  986. R"""({
  987. "items": {
  988. "type": ["number", "integer"]
  989. },
  990. "minItems": 3,
  991. "maxItems": 5
  992. })""",
  993. // Passing strings
  994. {
  995. R"""([1, 2, 3])""",
  996. R"""([1, 2, 3, 4])""",
  997. R"""([1, 2, 3, 4, 5])""",
  998. },
  999. // Failing strings
  1000. {
  1001. R"""([1, 2])""",
  1002. R"""([1, 2, 3, 4, 5, 6])""",
  1003. R"""(1)""",
  1004. }
  1005. );
  1006. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1007. test_schema(
  1008. "object properties",
  1009. // Schema
  1010. R"""({
  1011. "type": "object",
  1012. "properties": {
  1013. "number": { "type": "number" },
  1014. "street_name": { "type": "string" },
  1015. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1016. }
  1017. })""",
  1018. // Passing strings
  1019. {
  1020. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1021. // "By default, leaving out properties is valid"
  1022. R"""({ "street_name": "Pennsylvania" })""",
  1023. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1024. // "By extension, even an empty object is valid"
  1025. R"""({})""",
  1026. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1027. },
  1028. // Failing strings
  1029. {
  1030. // Change datatype from number to string
  1031. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1032. // Reorder properties
  1033. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  1034. // Reorder properties
  1035. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1036. // "Additional properties default to false for generation, even though the spec says true.
  1037. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1038. }
  1039. );
  1040. test_schema(
  1041. "additional properties can't override other properties",
  1042. R"""({
  1043. "properties": {
  1044. "a": {"type": "integer"},
  1045. "b": {"type": "integer"}
  1046. },
  1047. "additionalProperties": true
  1048. })""",
  1049. // Passing strings
  1050. {
  1051. R"""({"a": 42})""",
  1052. R"""({"c": ""})""",
  1053. R"""({"a": 42, "c": ""})""",
  1054. R"""({"a_": ""})""",
  1055. },
  1056. // Failing strings
  1057. {
  1058. R"""()""",
  1059. R"""({"a": ""})""",
  1060. R"""({"a": "", "b": ""})""",
  1061. }
  1062. );
  1063. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1064. test_schema(
  1065. "object properties, additionalProperties: true",
  1066. // Schema
  1067. R"""({
  1068. "type": "object",
  1069. "properties": {
  1070. "number": { "type": "number" },
  1071. "street_name": { "type": "string" },
  1072. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1073. },
  1074. "additionalProperties": true
  1075. })""",
  1076. // Passing strings
  1077. {
  1078. // "By extension, even an empty object is valid"
  1079. R"""({})""",
  1080. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1081. // "By default, leaving out properties is valid"
  1082. R"""({ "street_name": "Pennsylvania" })""",
  1083. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1084. // "By default, providing additional properties is valid"
  1085. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1086. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1087. },
  1088. // Failing strings
  1089. {
  1090. // Change datatype from number to string
  1091. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1092. // Reorder properties
  1093. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1094. }
  1095. );
  1096. // Additional properties: false
  1097. test_schema(
  1098. "required + optional props each in original order",
  1099. // Schema
  1100. R"""({
  1101. "type": "object",
  1102. "properties": {
  1103. "number": { "type": "number" },
  1104. "street_name": { "type": "string" },
  1105. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1106. },
  1107. "additionalProperties": false
  1108. })""",
  1109. // Passing strings
  1110. {
  1111. R"""({ "street_name": "Pennsylvania" })""",
  1112. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1113. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1114. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1115. // Spaces are permitted around enum values
  1116. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1117. },
  1118. // Failing strings
  1119. {
  1120. // Reorder properties
  1121. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1122. // Add "direction"
  1123. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1124. }
  1125. );
  1126. test_schema(
  1127. "required + optional props each in original order",
  1128. // Schema
  1129. R"""({
  1130. "properties": {
  1131. "b": {"type": "string"},
  1132. "a": {"type": "string"},
  1133. "d": {"type": "string"},
  1134. "c": {"type": "string"}
  1135. },
  1136. "required": ["a", "b"],
  1137. "additionalProperties": false
  1138. })""",
  1139. // Passing strings
  1140. {
  1141. R"""({"b": "foo", "a": "bar"})""",
  1142. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1143. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1144. },
  1145. // Failing strings
  1146. {
  1147. R"""({"a": "foo", "b": "bar"})""",
  1148. R"""({"b": "bar"})""",
  1149. R"""({"a": "foo", "c": "baz"})""",
  1150. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1151. }
  1152. );
  1153. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1154. test_schema(
  1155. "required props",
  1156. // Schema
  1157. R"""({
  1158. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1159. "$id": "https://example.com/product.schema.json",
  1160. "title": "Product",
  1161. "description": "A product from Acme's catalog",
  1162. "type": "object",
  1163. "properties": {
  1164. "productId": {
  1165. "description": "The unique identifier for a product",
  1166. "type": "integer"
  1167. },
  1168. "productName": {
  1169. "description": "Name of the product",
  1170. "type": "string"
  1171. },
  1172. "price": {
  1173. "description": "The price of the product",
  1174. "type": "number",
  1175. "exclusiveMinimum": 0
  1176. },
  1177. "tags": {
  1178. "description": "Tags for the product",
  1179. "type": "array",
  1180. "items": {
  1181. "type": "string"
  1182. },
  1183. "minItems": 1,
  1184. "uniqueItems": true
  1185. },
  1186. "dimensions": {
  1187. "type": "object",
  1188. "properties": {
  1189. "length": {
  1190. "type": "number"
  1191. },
  1192. "width": {
  1193. "type": "number"
  1194. },
  1195. "height": {
  1196. "type": "number"
  1197. }
  1198. },
  1199. "required": [ "length", "width", "height" ]
  1200. }
  1201. },
  1202. "required": [ "productId", "productName", "price" ]
  1203. })""",
  1204. // Passing strings
  1205. {
  1206. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1207. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1208. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1209. },
  1210. // Failing strings
  1211. {
  1212. R"""({})""", // Missing all required properties
  1213. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1214. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1215. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1216. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1217. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1218. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1219. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1220. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1221. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1222. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1223. }
  1224. );
  1225. }
  1226. int main() {
  1227. fprintf(stdout, "Running grammar integration tests...\n");
  1228. test_simple_grammar();
  1229. test_complex_grammar();
  1230. test_special_chars();
  1231. test_quantifiers();
  1232. test_failure_missing_root();
  1233. test_failure_missing_reference();
  1234. test_failure_left_recursion();
  1235. test_json_schema();
  1236. fprintf(stdout, "All tests passed.\n");
  1237. return 0;
  1238. }