test-grammar-integration.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #define LLAMA_API_INTERNAL
  5. #include "ggml.h"
  6. #include "llama.h"
  7. #include "grammar-parser.h"
  8. #include "json-schema-to-grammar.h"
  9. #include "unicode.h"
  10. #include <cassert>
  11. #include <string>
  12. #include <vector>
  13. using json = nlohmann::ordered_json;
  14. //#define INCLUDE_FAILING_TESTS 1
  15. static llama_grammar* build_grammar(const std::string & grammar_str) {
  16. auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  17. // Ensure we parsed correctly
  18. assert(!parsed_grammar.rules.empty());
  19. // Ensure we have a root node
  20. assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
  21. std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
  22. llama_grammar* grammar = llama_grammar_init(
  23. grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
  24. return grammar;
  25. }
  26. static bool test_build_grammar_fails(const std::string & grammar_str) {
  27. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  28. bool grammar_fails = false;
  29. try {
  30. build_grammar(grammar_str);
  31. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  32. } catch (const std::exception & err) {
  33. grammar_fails = true;
  34. fprintf(stdout, " ✅︎\n");
  35. }
  36. return grammar_fails;
  37. }
  38. static bool match_string(const std::string & input, llama_grammar* grammar) {
  39. auto decoded = decode_utf8(input, {});
  40. const auto & code_points = decoded.first;
  41. for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
  42. auto prev_stacks = grammar->stacks;
  43. llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
  44. if (grammar->stacks.empty()) {
  45. // no stacks means that the grammar failed to match at this point
  46. return false;
  47. }
  48. }
  49. for (const auto & stack : grammar->stacks) {
  50. if (stack.empty()) {
  51. // An empty stack means that the grammar has been completed
  52. return true;
  53. }
  54. }
  55. return false;
  56. }
  57. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  58. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  59. fflush(stderr);
  60. auto grammar = build_grammar(grammar_str);
  61. // Save the original grammar stacks so that we can reset after every new string we want to test
  62. auto original_stacks = grammar->stacks;
  63. fprintf(stderr, " 🔵 Valid strings:\n");
  64. // Passing strings
  65. for (const auto & test_string : passing_strings) {
  66. fprintf(stderr, " \"%s\" ", test_string.c_str());
  67. fflush(stderr);
  68. bool matched = match_string(test_string, grammar);
  69. if (!matched) {
  70. fprintf(stderr, "❌ (failed to match)\n");
  71. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  72. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  73. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  74. if (grammar_file) {
  75. fprintf(grammar_file, "%s", grammar_str.c_str());
  76. fclose(grammar_file);
  77. }
  78. // DEBUG: Write the test string to test-grammar-integration.string.txt
  79. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  80. if (string_file) {
  81. fprintf(string_file, "%s", test_string.c_str());
  82. fclose(string_file);
  83. }
  84. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  85. } else {
  86. fprintf(stdout, "✅︎\n");
  87. }
  88. assert(matched);
  89. // Reset the grammar stacks
  90. grammar->stacks = original_stacks;
  91. }
  92. fprintf(stderr, " 🟠 Invalid strings:\n");
  93. // Failing strings
  94. for (const auto & test_string : failing_strings) {
  95. fprintf(stderr, " \"%s\" ", test_string.c_str());
  96. fflush(stderr);
  97. bool matched = match_string(test_string, grammar);
  98. if (matched) {
  99. fprintf(stderr, "❌ (incorrectly matched)\n");
  100. } else {
  101. fprintf(stdout, "✅︎\n");
  102. }
  103. assert(!matched);
  104. // Reset the grammar stacks
  105. grammar->stacks = original_stacks;
  106. }
  107. // Clean up allocated memory
  108. llama_grammar_free(grammar);
  109. }
  110. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  111. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  112. }
  113. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  114. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
  115. }
  116. static void test_simple_grammar() {
  117. // Test case for a simple grammar
  118. test_grammar(
  119. "simple grammar",
  120. R"""(
  121. root ::= expr
  122. expr ::= term ("+" term)*
  123. term ::= number
  124. number ::= [0-9]+)""",
  125. // Passing strings
  126. {
  127. "42",
  128. "1+2+3+4+5",
  129. "123+456",
  130. },
  131. // Failing strings
  132. {
  133. "+",
  134. "/ 3",
  135. "1+2+3+4+5+",
  136. "12a45",
  137. }
  138. );
  139. }
  140. static void test_complex_grammar() {
  141. // Test case for a more complex grammar, with both failure strings and success strings
  142. test_grammar(
  143. "medium complexity grammar",
  144. // Grammar
  145. R"""(
  146. root ::= expression
  147. expression ::= term ws (("+"|"-") ws term)*
  148. term ::= factor ws (("*"|"/") ws factor)*
  149. factor ::= number | variable | "(" expression ")" | function-call
  150. number ::= [0-9]+
  151. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  152. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  153. ws ::= [ \t\n\r]?)""",
  154. // Passing strings
  155. {
  156. "42",
  157. "1*2*3*4*5",
  158. "x",
  159. "x+10",
  160. "x1+y2",
  161. "(a+b)*(c-d)",
  162. "func()",
  163. "func(x,y+2)",
  164. "a*(b+c)-d/e",
  165. "f(g(x),h(y,z))",
  166. "x + 10",
  167. "x1 + y2",
  168. "(a + b) * (c - d)",
  169. "func()",
  170. "func(x, y + 2)",
  171. "a * (b + c) - d / e",
  172. "f(g(x), h(y, z))",
  173. "123+456",
  174. "123*456*789-123/456+789*123",
  175. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  176. },
  177. // Failing strings
  178. {
  179. "+",
  180. "/ 3x",
  181. "x + + y",
  182. "a * / b",
  183. "func(,)",
  184. "func(x y)",
  185. "(a + b",
  186. "x + y)",
  187. "a + b * (c - d",
  188. "42 +",
  189. "x +",
  190. "x + 10 +",
  191. "(a + b) * (c - d",
  192. "func(",
  193. "func(x, y + 2",
  194. "a * (b + c) - d /",
  195. "f(g(x), h(y, z)",
  196. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  197. }
  198. );
  199. }
  200. static void test_special_chars() {
  201. // A collection of tests to exercise special characters such as "."
  202. test_grammar(
  203. "special characters",
  204. // Grammar
  205. R"""(
  206. root ::= ... "abc" ...
  207. )""",
  208. // Passing strings
  209. {
  210. "abcabcabc",
  211. "aaaabcccc",
  212. // NOTE: Also ensures that multi-byte characters still count as a single character
  213. "🔵🟠✅abc❌🟠🔵"
  214. },
  215. // Failing strings
  216. {
  217. "aaabcccc",
  218. "aaaaabcccc",
  219. "aaaabccc",
  220. "aaaabccccc",
  221. "🔵🟠✅❌abc❌✅🟠🔵"
  222. "🔵🟠abc🟠🔵"
  223. }
  224. );
  225. }
  226. static void test_quantifiers() {
  227. // A collection of tests to exercise * + and ? quantifiers
  228. test_grammar(
  229. "* quantifier",
  230. // Grammar
  231. R"""(root ::= "a"*)""",
  232. // Passing strings
  233. {
  234. "",
  235. "a",
  236. "aaaaa",
  237. "aaaaaaaaaaaaaaaaaa",
  238. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  239. },
  240. // Failing strings
  241. {
  242. "b",
  243. "ab",
  244. "aab",
  245. "ba",
  246. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  247. }
  248. );
  249. test_grammar(
  250. "+ quantifier",
  251. // Grammar
  252. R"""(root ::= "a"+)""",
  253. // Passing strings
  254. {
  255. "a",
  256. "aaaaa",
  257. "aaaaaaaaaaaaaaaaaa",
  258. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  259. },
  260. // Failing strings
  261. {
  262. "",
  263. "b",
  264. "ab",
  265. "aab",
  266. "ba",
  267. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  268. }
  269. );
  270. test_grammar(
  271. "? quantifier",
  272. // Grammar
  273. R"""(root ::= "a"?)""",
  274. // Passing strings
  275. {
  276. "",
  277. "a"
  278. },
  279. // Failing strings
  280. {
  281. "b",
  282. "ab",
  283. "aa",
  284. "ba",
  285. }
  286. );
  287. test_grammar(
  288. "mixed quantifiers",
  289. // Grammar
  290. R"""(
  291. root ::= cons+ vowel* cons? (vowel cons)*
  292. vowel ::= [aeiouy]
  293. cons ::= [bcdfghjklmnpqrstvwxyz]
  294. )""",
  295. // Passing strings
  296. {
  297. "yes",
  298. "no",
  299. "noyes",
  300. "crwth",
  301. "four",
  302. "bryyyy",
  303. },
  304. // Failing strings
  305. {
  306. "yess",
  307. "yesno",
  308. "forty",
  309. "catyyy",
  310. }
  311. );
  312. test_grammar(
  313. "simple exact repetition",
  314. // Grammar
  315. R"""(
  316. root ::= [ab]{4}
  317. )""",
  318. // Passing strings
  319. {
  320. "aaaa",
  321. "bbbb",
  322. "abab",
  323. },
  324. // Failing strings
  325. {
  326. "a",
  327. "b",
  328. "aaaaa",
  329. }
  330. );
  331. test_grammar(
  332. "simple min repetition",
  333. // Grammar
  334. R"""(
  335. root ::= [ab]{4,}
  336. )""",
  337. // Passing strings
  338. {
  339. "aaaa",
  340. "aaaaab",
  341. "bbbb",
  342. "ababab",
  343. },
  344. // Failing strings
  345. {
  346. "",
  347. "aba",
  348. }
  349. );
  350. test_grammar(
  351. "simple max repetition",
  352. // Grammar
  353. R"""(
  354. root ::= [ab]{0,4}
  355. )""",
  356. // Passing strings
  357. {
  358. "",
  359. "a",
  360. "aa",
  361. "aaa",
  362. "aaab",
  363. },
  364. // Failing strings
  365. {
  366. "aaaaa",
  367. }
  368. );
  369. test_grammar(
  370. "min / max repetition",
  371. // Grammar
  372. R"""(
  373. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  374. )""",
  375. // Passing strings
  376. {
  377. "0xFF 0x12 0xAB",
  378. "0xFF 0x12 0xAB 0x00 0x00",
  379. },
  380. // Failing strings
  381. {
  382. "",
  383. "0xFF",
  384. "0xFF 0x12",
  385. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  386. }
  387. );
  388. }
  389. static void test_failure_missing_root() {
  390. fprintf(stderr, "⚫ Testing missing root node:\n");
  391. // Test case for a grammar that is missing a root rule
  392. const std::string grammar_str = R"""(
  393. rot ::= expr
  394. expr ::= term ("+" term)*
  395. term ::= number
  396. number ::= [0-9]+)""";
  397. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  398. // Ensure we parsed correctly
  399. assert(!parsed_grammar.rules.empty());
  400. // Ensure we do NOT have a root node
  401. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  402. fprintf(stderr, " ✅︎ Passed\n");
  403. }
  404. static void test_failure_missing_reference() {
  405. fprintf(stderr, "⚫ Testing missing reference node:\n");
  406. // Test case for a grammar that is missing a referenced rule
  407. const std::string grammar_str =
  408. R"""(root ::= expr
  409. expr ::= term ("+" term)*
  410. term ::= numero
  411. number ::= [0-9]+)""";
  412. fprintf(stderr, " Expected error: ");
  413. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  414. // Ensure we did NOT parsed correctly
  415. assert(parsed_grammar.rules.empty());
  416. fprintf(stderr, " End of expected error.\n");
  417. fprintf(stderr, " ✅︎ Passed\n");
  418. }
  419. static void test_failure_left_recursion() {
  420. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  421. // Test simple left recursion detection
  422. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  423. assert(test_build_grammar_fails(simple_str));
  424. // Test more complicated left recursion detection
  425. const std::string medium_str = R"""(
  426. root ::= asdf
  427. asdf ::= "a" | asdf "a"
  428. )""";
  429. assert(test_build_grammar_fails(medium_str));
  430. // Test even more complicated left recursion detection
  431. const std::string hard_str = R"""(
  432. root ::= asdf
  433. asdf ::= "a" | foo "b"
  434. foo ::= "c" | asdf "d" | "e")""";
  435. assert(test_build_grammar_fails(hard_str));
  436. // Test yet even more complicated left recursion detection
  437. const std::string hardest_str = R"""(
  438. root ::= asdf
  439. asdf ::= "a" | foo "b"
  440. foo ::= "c" | empty asdf "d" | "e"
  441. empty ::= "blah" | )""";
  442. assert(test_build_grammar_fails(hardest_str));
  443. fprintf(stderr, " ✅︎ Passed\n");
  444. }
  445. static void test_json_schema() {
  446. // Note that this is similar to the regular grammar tests,
  447. // but we convert each json schema to a grammar before parsing.
  448. // Otherwise, this test structure is the same.
  449. test_schema(
  450. "empty schema (object)",
  451. // Schema
  452. R"""(
  453. {}
  454. )""",
  455. // Passing strings
  456. {
  457. "{}",
  458. R"""({"foo": "bar"})""",
  459. },
  460. // Failing strings
  461. {
  462. "",
  463. "[]",
  464. "null",
  465. "\"\"",
  466. "true",
  467. }
  468. );
  469. test_schema(
  470. "exotic formats (list)",
  471. // Schema
  472. R"""(
  473. {
  474. "items": [
  475. { "format": "date" },
  476. { "format": "uuid" },
  477. { "format": "time" },
  478. { "format": "date-time" }
  479. ]
  480. }
  481. )""",
  482. // Passing strings
  483. {
  484. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  485. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  486. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  487. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  488. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  489. },
  490. // Failing strings
  491. {
  492. R"""(["foo", "bar"])""",
  493. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  494. }
  495. );
  496. test_schema(
  497. "string",
  498. // Schema
  499. R"""(
  500. {
  501. "type": "string"
  502. }
  503. )""",
  504. // Passing strings
  505. {
  506. "\"foo\"",
  507. "\"bar\"",
  508. "\"\"",
  509. },
  510. // Failing strings
  511. {
  512. "{}",
  513. "\"foo\": \"bar\"",
  514. }
  515. );
  516. test_schema(
  517. "string w/ min length 1",
  518. // Schema
  519. R"""(
  520. {
  521. "type": "string",
  522. "minLength": 1
  523. }
  524. )""",
  525. // Passing strings
  526. {
  527. "\"foo\"",
  528. "\"bar\"",
  529. },
  530. // Failing strings
  531. {
  532. "\"\"",
  533. "{}",
  534. "\"foo\": \"bar\"",
  535. }
  536. );
  537. test_schema(
  538. "string w/ min length 3",
  539. // Schema
  540. R"""(
  541. {
  542. "type": "string",
  543. "minLength": 3
  544. }
  545. )""",
  546. // Passing strings
  547. {
  548. "\"foo\"",
  549. "\"bar\"",
  550. "\"foobar\"",
  551. },
  552. // Failing strings
  553. {
  554. "\"\"",
  555. "\"f\"",
  556. "\"fo\"",
  557. }
  558. );
  559. test_schema(
  560. "string w/ max length",
  561. // Schema
  562. R"""(
  563. {
  564. "type": "string",
  565. "maxLength": 3
  566. }
  567. )""",
  568. // Passing strings
  569. {
  570. "\"foo\"",
  571. "\"bar\"",
  572. "\"\"",
  573. "\"f\"",
  574. "\"fo\"",
  575. },
  576. // Failing strings
  577. {
  578. "\"foobar\"",
  579. }
  580. );
  581. test_schema(
  582. "string w/ min & max length",
  583. // Schema
  584. R"""(
  585. {
  586. "type": "string",
  587. "minLength": 1,
  588. "maxLength": 4
  589. }
  590. )""",
  591. // Passing strings
  592. {
  593. "\"foo\"",
  594. "\"bar\"",
  595. "\"f\"",
  596. "\"barf\"",
  597. },
  598. // Failing strings
  599. {
  600. "\"\"",
  601. "\"barfo\"",
  602. "\"foobar\"",
  603. }
  604. );
  605. test_schema(
  606. "boolean",
  607. // Schema
  608. R"""(
  609. {
  610. "type": "boolean"
  611. }
  612. )""",
  613. // Passing strings
  614. {
  615. "true",
  616. "false",
  617. },
  618. // Failing strings
  619. {
  620. "\"\"",
  621. "\"true\"",
  622. "True",
  623. "FALSE",
  624. }
  625. );
  626. test_schema(
  627. "integer",
  628. // Schema
  629. R"""(
  630. {
  631. "type": "integer"
  632. }
  633. )""",
  634. // Passing strings
  635. {
  636. "0",
  637. "12345",
  638. "1234567890123456"
  639. },
  640. // Failing strings
  641. {
  642. "",
  643. "01",
  644. "007",
  645. "12345678901234567"
  646. }
  647. );
  648. test_schema(
  649. "string const",
  650. // Schema
  651. R"""(
  652. {
  653. "const": "foo"
  654. }
  655. )""",
  656. // Passing strings
  657. {
  658. "\"foo\"",
  659. },
  660. // Failing strings
  661. {
  662. "foo",
  663. "\"bar\"",
  664. }
  665. );
  666. test_schema(
  667. "non-string const",
  668. // Schema
  669. R"""(
  670. {
  671. "const": true
  672. }
  673. )""",
  674. // Passing strings
  675. {
  676. "true",
  677. },
  678. // Failing strings
  679. {
  680. "",
  681. "foo",
  682. "\"true\"",
  683. }
  684. );
  685. test_schema(
  686. "non-string const",
  687. // Schema
  688. R"""(
  689. {
  690. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  691. }
  692. )""",
  693. // Passing strings
  694. {
  695. "\"red\"",
  696. "null",
  697. "42",
  698. "[\"foo\"]",
  699. },
  700. // Failing strings
  701. {
  702. "",
  703. "420",
  704. "true",
  705. "foo",
  706. }
  707. );
  708. test_schema(
  709. "min+max items",
  710. // Schema
  711. R"""(
  712. {
  713. "items": {
  714. "type": ["number", "integer"]
  715. },
  716. "minItems": 3,
  717. "maxItems": 5
  718. }
  719. )""",
  720. // Passing strings
  721. {
  722. "[1, 2, 3]",
  723. "[1, 2, 3, 4]",
  724. "[1, 2, 3, 4, 5]",
  725. },
  726. // Failing strings
  727. {
  728. "[1, 2]",
  729. "[1, 2, 3, 4, 5, 6]",
  730. "1"
  731. }
  732. );
  733. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  734. test_schema(
  735. "object properties",
  736. // Schema
  737. R"""(
  738. {
  739. "type": "object",
  740. "properties": {
  741. "number": { "type": "number" },
  742. "street_name": { "type": "string" },
  743. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  744. }
  745. }
  746. )""",
  747. // Passing strings
  748. {
  749. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  750. // "By default, leaving out properties is valid"
  751. R"""({ "street_name": "Pennsylvania" })""",
  752. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  753. // "By extension, even an empty object is valid"
  754. R"""({})""",
  755. // "By default, providing additional properties is valid"
  756. #ifdef INCLUDE_FAILING_TESTS
  757. // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
  758. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  759. // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
  760. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  761. #endif
  762. },
  763. // Failing strings
  764. {
  765. // Change datatype from number to string
  766. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  767. // Reorder properties
  768. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  769. // Reorder properties
  770. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  771. }
  772. );
  773. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  774. test_schema(
  775. "object properties, additionalProperties: true",
  776. // Schema
  777. R"""(
  778. {
  779. "type": "object",
  780. "properties": {
  781. "number": { "type": "number" },
  782. "street_name": { "type": "string" },
  783. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  784. },
  785. "additionalProperties": true
  786. }
  787. )""",
  788. // Passing strings
  789. {
  790. // "By extension, even an empty object is valid"
  791. R"""({})""",
  792. #ifdef INCLUDE_FAILING_TESTS
  793. // TODO: Following line should pass and doesn't
  794. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  795. // "By default, leaving out properties is valid"
  796. // TODO: Following line should pass and doesn't
  797. R"""({ "street_name": "Pennsylvania" })""",
  798. // TODO: Following line should pass and doesn't
  799. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  800. // "By default, providing additional properties is valid"
  801. // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
  802. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  803. // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
  804. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  805. #endif
  806. },
  807. // Failing strings
  808. {
  809. // Change datatype from number to string
  810. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  811. // Reorder properties
  812. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  813. }
  814. );
  815. // Additional properties: false
  816. test_schema(
  817. "required + optional props each in original order",
  818. // Schema
  819. R"""(
  820. {
  821. "type": "object",
  822. "properties": {
  823. "number": { "type": "number" },
  824. "street_name": { "type": "string" },
  825. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  826. },
  827. "additionalProperties": false
  828. }
  829. )""",
  830. // Passing strings
  831. {
  832. R"""({ "street_name": "Pennsylvania" })""",
  833. R"""({ "number": 1600, "street_type":"Avenue"})""",
  834. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  835. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  836. #ifdef INCLUDE_FAILING_TESTS
  837. // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
  838. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  839. #endif
  840. },
  841. // Failing strings
  842. {
  843. // Reorder properties
  844. R"""({ "street_type": "Avenue", "number": 1600 })""",
  845. // Add "direction"
  846. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  847. }
  848. );
  849. test_schema(
  850. "required + optional props each in original order",
  851. // Schema
  852. R"""(
  853. {
  854. "properties": {
  855. "b": {"type": "string"},
  856. "a": {"type": "string"},
  857. "d": {"type": "string"},
  858. "c": {"type": "string"}
  859. },
  860. "required": ["a", "b"],
  861. "additionalProperties": false
  862. }
  863. )""",
  864. // Passing strings
  865. {
  866. R"""({"b": "foo", "a": "bar"})""",
  867. R"""({"b":"foo","a":"bar","d":"qux"})""",
  868. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  869. },
  870. // Failing strings
  871. {
  872. R"""({"a": "foo", "b": "bar"})""",
  873. R"""({"b": "bar"})""",
  874. R"""({"a": "foo", "c": "baz"})""",
  875. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  876. }
  877. );
  878. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  879. test_schema(
  880. "required props",
  881. // Schema
  882. R"""(
  883. {
  884. "$schema": "https://json-schema.org/draft/2020-12/schema",
  885. "$id": "https://example.com/product.schema.json",
  886. "title": "Product",
  887. "description": "A product from Acme's catalog",
  888. "type": "object",
  889. "properties": {
  890. "productId": {
  891. "description": "The unique identifier for a product",
  892. "type": "integer"
  893. },
  894. "productName": {
  895. "description": "Name of the product",
  896. "type": "string"
  897. },
  898. "price": {
  899. "description": "The price of the product",
  900. "type": "number",
  901. "exclusiveMinimum": 0
  902. },
  903. "tags": {
  904. "description": "Tags for the product",
  905. "type": "array",
  906. "items": {
  907. "type": "string"
  908. },
  909. "minItems": 1,
  910. "uniqueItems": true
  911. },
  912. "dimensions": {
  913. "type": "object",
  914. "properties": {
  915. "length": {
  916. "type": "number"
  917. },
  918. "width": {
  919. "type": "number"
  920. },
  921. "height": {
  922. "type": "number"
  923. }
  924. },
  925. "required": [ "length", "width", "height" ]
  926. }
  927. },
  928. "required": [ "productId", "productName", "price" ]
  929. }
  930. )""",
  931. // Passing strings
  932. {
  933. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  934. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  935. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  936. },
  937. // Failing strings
  938. {
  939. R"""({})""", // Missing all required properties
  940. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  941. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  942. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  943. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  944. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  945. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  946. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  947. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  948. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  949. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  950. }
  951. );
  952. }
  953. int main() {
  954. fprintf(stdout, "Running grammar integration tests...\n");
  955. test_simple_grammar();
  956. test_complex_grammar();
  957. test_special_chars();
  958. test_quantifiers();
  959. test_failure_missing_root();
  960. test_failure_missing_reference();
  961. test_failure_left_recursion();
  962. test_json_schema();
  963. fprintf(stdout, "All tests passed.\n");
  964. return 0;
  965. }