test-grammar-integration.cpp 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #include "unicode.h"
  5. #include "llama-grammar.h"
  6. #include "json-schema-to-grammar.h"
  7. #include <cassert>
  8. #include <string>
  9. #include <vector>
  10. using json = nlohmann::ordered_json;
  11. static llama_grammar * build_grammar(const std::string & grammar_str) {
  12. return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root");
  13. }
  14. static bool test_build_grammar_fails(const std::string & grammar_str) {
  15. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  16. bool grammar_fails = false;
  17. llama_grammar * grammar = build_grammar(grammar_str);
  18. if (grammar != nullptr) {
  19. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  20. } else {
  21. grammar_fails = true;
  22. fprintf(stdout, " ✅︎\n");
  23. }
  24. return grammar_fails;
  25. }
  26. static bool match_string(const std::string & input, llama_grammar * grammar) {
  27. const auto cpts = unicode_cpts_from_utf8(input);
  28. const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
  29. llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
  30. for (const auto & cpt : cpts) {
  31. const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(grammar); // copy
  32. llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
  33. if (stacks_cur.empty()) {
  34. // no stacks means that the grammar failed to match at this point
  35. return false;
  36. }
  37. }
  38. for (const auto & stack : stacks_cur) {
  39. if (stack.empty()) {
  40. // An empty stack means that the grammar has been completed
  41. return true;
  42. }
  43. }
  44. return false;
  45. }
  46. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  47. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  48. fflush(stderr);
  49. auto * grammar = build_grammar(grammar_str);
  50. // Save the original grammar stacks so that we can reset after every new string we want to test
  51. const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar);
  52. llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
  53. fprintf(stderr, " 🔵 Valid strings:\n");
  54. // Passing strings
  55. for (const auto & test_string : passing_strings) {
  56. fprintf(stderr, " \"%s\" ", test_string.c_str());
  57. fflush(stderr);
  58. bool matched = match_string(test_string, grammar);
  59. if (!matched) {
  60. fprintf(stderr, "❌ (failed to match)\n");
  61. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  62. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  63. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  64. if (grammar_file) {
  65. fprintf(grammar_file, "%s", grammar_str.c_str());
  66. fclose(grammar_file);
  67. }
  68. // DEBUG: Write the test string to test-grammar-integration.string.txt
  69. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  70. if (string_file) {
  71. fprintf(string_file, "%s", test_string.c_str());
  72. fclose(string_file);
  73. }
  74. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  75. } else {
  76. fprintf(stdout, "✅︎\n");
  77. }
  78. assert(matched);
  79. // Reset the grammar stacks
  80. stacks_cur = stacks_org;
  81. }
  82. fprintf(stderr, " 🟠 Invalid strings:\n");
  83. // Failing strings
  84. for (const auto & test_string : failing_strings) {
  85. fprintf(stderr, " \"%s\" ", test_string.c_str());
  86. fflush(stderr);
  87. bool matched = match_string(test_string, grammar);
  88. if (matched) {
  89. fprintf(stderr, "❌ (incorrectly matched)\n");
  90. } else {
  91. fprintf(stdout, "✅︎\n");
  92. }
  93. assert(!matched);
  94. // Reset the grammar stacks
  95. stacks_cur = stacks_org;
  96. }
  97. // Clean up allocated memory
  98. llama_grammar_free_impl(grammar);
  99. }
  100. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  101. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  102. }
  103. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  104. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
  105. }
  106. static void test_simple_grammar() {
  107. test_schema(
  108. "min 0",
  109. R"""({
  110. "type": "integer",
  111. "minimum": 0
  112. })""",
  113. // Passing strings
  114. {
  115. "0",
  116. "10",
  117. "12",
  118. "10000",
  119. },
  120. // Failing strings
  121. {
  122. "-1",
  123. "-10",
  124. "-10000",
  125. "-100000000000000000000000000000000",
  126. "100000000000000000000000000000000",
  127. "00",
  128. "01",
  129. "-0",
  130. }
  131. );
  132. test_schema(
  133. "min 2",
  134. // Schema
  135. R"""({
  136. "type": "integer",
  137. "minimum": 2
  138. })""",
  139. // Passing strings
  140. {
  141. "2",
  142. "3",
  143. "4",
  144. "10",
  145. "20",
  146. "1234567890000000",
  147. },
  148. // Failing strings
  149. {
  150. "0",
  151. "1",
  152. "-1",
  153. "-100",
  154. "0",
  155. "1",
  156. "01",
  157. "02",
  158. "12345678900000000",
  159. }
  160. );
  161. test_schema(
  162. "min 456",
  163. R"""({
  164. "type": "integer",
  165. "minimum": 456
  166. })""",
  167. // Passing strings
  168. {
  169. "456",
  170. "4560",
  171. "457",
  172. "460",
  173. "500",
  174. },
  175. // Failing strings
  176. {
  177. "455",
  178. "356",
  179. "50",
  180. "050",
  181. "-1",
  182. "-456",
  183. }
  184. );
  185. test_schema(
  186. "min -123",
  187. R"""({
  188. "type": "integer",
  189. "minimum": -123
  190. })""",
  191. // Passing strings
  192. {
  193. "-123",
  194. "-122",
  195. "-11",
  196. "-1",
  197. "0",
  198. "1",
  199. "123",
  200. "1234",
  201. "2345",
  202. },
  203. // Failing strings
  204. {
  205. "-1234",
  206. "-124",
  207. }
  208. );
  209. test_schema(
  210. "max 9999",
  211. // Schema
  212. R"""({
  213. "type": "integer",
  214. "maximum": 9999
  215. })""",
  216. // Passing strings
  217. {
  218. "-99999",
  219. "0",
  220. "9999",
  221. },
  222. // Failing strings
  223. {
  224. "10000",
  225. "99991",
  226. }
  227. );
  228. test_schema(
  229. "max -9999",
  230. // Schema
  231. R"""({
  232. "type": "integer",
  233. "maximum": -9999
  234. })""",
  235. // Passing strings
  236. {
  237. "-10000",
  238. "-9999",
  239. },
  240. // Failing strings
  241. {
  242. "-9998",
  243. "0",
  244. "9999",
  245. }
  246. );
  247. test_schema(
  248. "min 5 max 30",
  249. // Schema
  250. R"""({
  251. "type": "integer",
  252. "minimum": 5,
  253. "maximum": 30
  254. })""",
  255. // Passing strings
  256. {
  257. "5",
  258. "10",
  259. "30",
  260. },
  261. // Failing strings
  262. {
  263. "05",
  264. "4",
  265. "-1",
  266. "31",
  267. "123",
  268. "0123",
  269. }
  270. );
  271. test_schema(
  272. "min -1 max 1",
  273. R"""({
  274. "type": "integer",
  275. "minimum": -1,
  276. "maximum": 1
  277. })""",
  278. // Passing strings
  279. {
  280. "-1",
  281. "0",
  282. "1",
  283. },
  284. // Failing strings
  285. {
  286. "-11",
  287. "-10",
  288. "-2",
  289. "2",
  290. "10",
  291. "11",
  292. }
  293. );
  294. test_schema(
  295. "min -123 max 42",
  296. R"""({
  297. "type": "integer",
  298. "minimum": -123,
  299. "maximum": 42
  300. })""",
  301. // Passing strings
  302. {
  303. "-123",
  304. "-122",
  305. "-13",
  306. "-11",
  307. "-2",
  308. "-1",
  309. "0",
  310. "1",
  311. "5",
  312. "10",
  313. "39",
  314. "40",
  315. "42",
  316. },
  317. // Failing strings
  318. {
  319. "-0123",
  320. "-124",
  321. "-1123",
  322. "-200",
  323. "43",
  324. "123",
  325. "0123",
  326. }
  327. );
  328. test_schema(
  329. "exclusive min / max",
  330. // Schema
  331. R"""({
  332. "type": "integer",
  333. "exclusiveMinimum": 0,
  334. "exclusiveMaximum": 10000
  335. })""",
  336. // Passing strings
  337. {
  338. "1",
  339. "9999",
  340. },
  341. // Failing strings
  342. {
  343. "0",
  344. "01",
  345. "10000",
  346. "99999",
  347. }
  348. );
  349. // Test case for a simple grammar
  350. test_grammar(
  351. "simple grammar",
  352. R"""(
  353. root ::= expr
  354. expr ::= term ("+" term)*
  355. term ::= number
  356. number ::= [0-9]+)""",
  357. // Passing strings
  358. {
  359. "42",
  360. "1+2+3+4+5",
  361. "123+456",
  362. },
  363. // Failing strings
  364. {
  365. "+",
  366. "/ 3",
  367. "1+2+3+4+5+",
  368. "12a45",
  369. }
  370. );
  371. }
  372. static void test_complex_grammar() {
  373. // Test case for a more complex grammar, with both failure strings and success strings
  374. test_grammar(
  375. "medium complexity grammar",
  376. // Grammar
  377. R"""(
  378. root ::= expression
  379. expression ::= term ws (("+"|"-") ws term)*
  380. term ::= factor ws (("*"|"/") ws factor)*
  381. factor ::= number | variable | "(" expression ")" | function-call
  382. number ::= [0-9]+
  383. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  384. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  385. ws ::= [ \t\n\r]?)""",
  386. // Passing strings
  387. {
  388. "42",
  389. "1*2*3*4*5",
  390. "x",
  391. "x+10",
  392. "x1+y2",
  393. "(a+b)*(c-d)",
  394. "func()",
  395. "func(x,y+2)",
  396. "a*(b+c)-d/e",
  397. "f(g(x),h(y,z))",
  398. "x + 10",
  399. "x1 + y2",
  400. "(a + b) * (c - d)",
  401. "func()",
  402. "func(x, y + 2)",
  403. "a * (b + c) - d / e",
  404. "f(g(x), h(y, z))",
  405. "123+456",
  406. "123*456*789-123/456+789*123",
  407. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  408. },
  409. // Failing strings
  410. {
  411. "+",
  412. "/ 3x",
  413. "x + + y",
  414. "a * / b",
  415. "func(,)",
  416. "func(x y)",
  417. "(a + b",
  418. "x + y)",
  419. "a + b * (c - d",
  420. "42 +",
  421. "x +",
  422. "x + 10 +",
  423. "(a + b) * (c - d",
  424. "func(",
  425. "func(x, y + 2",
  426. "a * (b + c) - d /",
  427. "f(g(x), h(y, z)",
  428. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  429. }
  430. );
  431. }
  432. static void test_special_chars() {
  433. // A collection of tests to exercise special characters such as "."
  434. test_grammar(
  435. "special characters",
  436. // Grammar
  437. R"""(
  438. root ::= ... "abc" ...
  439. )""",
  440. // Passing strings
  441. {
  442. "abcabcabc",
  443. "aaaabcccc",
  444. // NOTE: Also ensures that multi-byte characters still count as a single character
  445. "🔵🟠✅abc❌🟠🔵"
  446. },
  447. // Failing strings
  448. {
  449. "aaabcccc",
  450. "aaaaabcccc",
  451. "aaaabccc",
  452. "aaaabccccc",
  453. "🔵🟠✅❌abc❌✅🟠🔵",
  454. "🔵🟠abc🟠🔵"
  455. }
  456. );
  457. }
  458. static void test_quantifiers() {
  459. // A collection of tests to exercise * + and ? quantifiers
  460. test_grammar(
  461. "* quantifier",
  462. // Grammar
  463. R"""(root ::= "a"*)""",
  464. // Passing strings
  465. {
  466. "",
  467. "a",
  468. "aaaaa",
  469. "aaaaaaaaaaaaaaaaaa",
  470. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  471. },
  472. // Failing strings
  473. {
  474. "b",
  475. "ab",
  476. "aab",
  477. "ba",
  478. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  479. }
  480. );
  481. test_grammar(
  482. "+ quantifier",
  483. // Grammar
  484. R"""(root ::= "a"+)""",
  485. // Passing strings
  486. {
  487. "a",
  488. "aaaaa",
  489. "aaaaaaaaaaaaaaaaaa",
  490. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  491. },
  492. // Failing strings
  493. {
  494. "",
  495. "b",
  496. "ab",
  497. "aab",
  498. "ba",
  499. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  500. }
  501. );
  502. test_grammar(
  503. "? quantifier",
  504. // Grammar
  505. R"""(root ::= "a"?)""",
  506. // Passing strings
  507. {
  508. "",
  509. "a"
  510. },
  511. // Failing strings
  512. {
  513. "b",
  514. "ab",
  515. "aa",
  516. "ba",
  517. }
  518. );
  519. test_grammar(
  520. "mixed quantifiers",
  521. // Grammar
  522. R"""(
  523. root ::= cons+ vowel* cons? (vowel cons)*
  524. vowel ::= [aeiouy]
  525. cons ::= [bcdfghjklmnpqrstvwxyz]
  526. )""",
  527. // Passing strings
  528. {
  529. "yes",
  530. "no",
  531. "noyes",
  532. "crwth",
  533. "four",
  534. "bryyyy",
  535. },
  536. // Failing strings
  537. {
  538. "yess",
  539. "yesno",
  540. "forty",
  541. "catyyy",
  542. }
  543. );
  544. test_grammar(
  545. "simple exact repetition",
  546. // Grammar
  547. R"""(
  548. root ::= [ab]{4}
  549. )""",
  550. // Passing strings
  551. {
  552. "aaaa",
  553. "bbbb",
  554. "abab",
  555. },
  556. // Failing strings
  557. {
  558. "a",
  559. "b",
  560. "aaaaa",
  561. }
  562. );
  563. test_grammar(
  564. "simple min repetition",
  565. // Grammar
  566. R"""(
  567. root ::= [ab]{4,}
  568. )""",
  569. // Passing strings
  570. {
  571. "aaaa",
  572. "aaaaab",
  573. "bbbb",
  574. "ababab",
  575. },
  576. // Failing strings
  577. {
  578. "",
  579. "aba",
  580. }
  581. );
  582. test_grammar(
  583. "simple max repetition",
  584. // Grammar
  585. R"""(
  586. root ::= [ab]{0,4}
  587. )""",
  588. // Passing strings
  589. {
  590. "",
  591. "a",
  592. "aa",
  593. "aaa",
  594. "aaab",
  595. },
  596. // Failing strings
  597. {
  598. "aaaaa",
  599. }
  600. );
  601. test_grammar(
  602. "min / max repetition",
  603. // Grammar
  604. R"""(
  605. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  606. )""",
  607. // Passing strings
  608. {
  609. "0xFF 0x12 0xAB",
  610. "0xFF 0x12 0xAB 0x00 0x00",
  611. },
  612. // Failing strings
  613. {
  614. "",
  615. "0xFF",
  616. "0xFF 0x12",
  617. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  618. }
  619. );
  620. }
  621. static void test_failure_missing_root() {
  622. fprintf(stderr, "⚫ Testing missing root node:\n");
  623. // Test case for a grammar that is missing a root rule
  624. const std::string grammar_str = R"""(
  625. rot ::= expr
  626. expr ::= term ("+" term)*
  627. term ::= number
  628. number ::= [0-9]+)""";
  629. llama_grammar_parser parsed_grammar;
  630. parsed_grammar.parse(grammar_str.c_str());
  631. // Ensure we parsed correctly
  632. assert(!parsed_grammar.rules.empty());
  633. // Ensure we do NOT have a root node
  634. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  635. fprintf(stderr, " ✅︎ Passed\n");
  636. }
  637. static void test_failure_missing_reference() {
  638. fprintf(stderr, "⚫ Testing missing reference node:\n");
  639. // Test case for a grammar that is missing a referenced rule
  640. const std::string grammar_str =
  641. R"""(root ::= expr
  642. expr ::= term ("+" term)*
  643. term ::= numero
  644. number ::= [0-9]+)""";
  645. fprintf(stderr, " Expected error: ");
  646. llama_grammar_parser parsed_grammar;
  647. parsed_grammar.parse(grammar_str.c_str());
  648. // Ensure we did NOT parsed correctly
  649. assert(parsed_grammar.rules.empty());
  650. fprintf(stderr, " End of expected error.\n");
  651. fprintf(stderr, " ✅︎ Passed\n");
  652. }
  653. static void test_failure_left_recursion() {
  654. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  655. // Test simple left recursion detection
  656. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  657. assert(test_build_grammar_fails(simple_str));
  658. // Test more complicated left recursion detection
  659. const std::string medium_str = R"""(
  660. root ::= asdf
  661. asdf ::= "a" | asdf "a"
  662. )""";
  663. assert(test_build_grammar_fails(medium_str));
  664. // Test even more complicated left recursion detection
  665. const std::string hard_str = R"""(
  666. root ::= asdf
  667. asdf ::= "a" | foo "b"
  668. foo ::= "c" | asdf "d" | "e")""";
  669. assert(test_build_grammar_fails(hard_str));
  670. // Test yet even more complicated left recursion detection
  671. const std::string hardest_str = R"""(
  672. root ::= asdf
  673. asdf ::= "a" | foo "b"
  674. foo ::= "c" | empty asdf "d" | "e"
  675. empty ::= "blah" | )""";
  676. assert(test_build_grammar_fails(hardest_str));
  677. fprintf(stderr, " ✅︎ Passed\n");
  678. }
  679. static void test_json_schema() {
  680. // Note that this is similar to the regular grammar tests,
  681. // but we convert each json schema to a grammar before parsing.
  682. // Otherwise, this test structure is the same.
  683. test_schema(
  684. "empty schema (object)",
  685. // Schema
  686. R"""(
  687. {}
  688. )""",
  689. // Passing strings
  690. {
  691. R"""({})""",
  692. R"""({"foo": "bar"})""",
  693. },
  694. // Failing strings
  695. {
  696. "",
  697. "[]",
  698. "null",
  699. R"""("")""",
  700. "true",
  701. }
  702. );
  703. test_schema(
  704. "exotic formats (list)",
  705. // Schema
  706. R"""({
  707. "items": [
  708. { "format": "date" },
  709. { "format": "uuid" },
  710. { "format": "time" },
  711. { "format": "date-time" }
  712. ]
  713. })""",
  714. // Passing strings
  715. {
  716. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  717. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  718. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  719. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  720. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  721. },
  722. // Failing strings
  723. {
  724. R"""(["foo", "bar"])""",
  725. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  726. }
  727. );
  728. test_schema(
  729. "string",
  730. // Schema
  731. R"""({
  732. "type": "string"
  733. })""",
  734. // Passing strings
  735. {
  736. R"""("foo")""",
  737. R"""("bar")""",
  738. R"""("")""",
  739. },
  740. // Failing strings
  741. {
  742. R"""({})""",
  743. R"""("foo": "bar")""",
  744. }
  745. );
  746. test_schema(
  747. "string w/ min length 1",
  748. // Schema
  749. R"""({
  750. "type": "string",
  751. "minLength": 1
  752. })""",
  753. // Passing strings
  754. {
  755. R"""("foo")""",
  756. R"""("bar")""",
  757. },
  758. // Failing strings
  759. {
  760. R"""("")""",
  761. R"""({})""",
  762. R"""("foo": "bar")""",
  763. }
  764. );
  765. test_schema(
  766. "string w/ min length 3",
  767. // Schema
  768. R"""({
  769. "type": "string",
  770. "minLength": 3
  771. })""",
  772. // Passing strings
  773. {
  774. R"""("foo")""",
  775. R"""("bar")""",
  776. R"""("foobar")""",
  777. },
  778. // Failing strings
  779. {
  780. R"""("")""",
  781. R"""("f")""",
  782. R"""("fo")""",
  783. }
  784. );
  785. test_schema(
  786. "string w/ max length",
  787. // Schema
  788. R"""({
  789. "type": "string",
  790. "maxLength": 3
  791. })""",
  792. // Passing strings
  793. {
  794. R"""("foo")""",
  795. R"""("bar")""",
  796. R"""("")""",
  797. R"""("f")""",
  798. R"""("fo")""",
  799. },
  800. // Failing strings
  801. {
  802. R"""("foobar")""",
  803. }
  804. );
  805. test_schema(
  806. "string w/ min & max length",
  807. // Schema
  808. R"""({
  809. "type": "string",
  810. "minLength": 1,
  811. "maxLength": 4
  812. })""",
  813. // Passing strings
  814. {
  815. R"""("foo")""",
  816. R"""("bar")""",
  817. R"""("f")""",
  818. R"""("barf")""",
  819. },
  820. // Failing strings
  821. {
  822. R"""("")""",
  823. R"""("barfo")""",
  824. R"""("foobar")""",
  825. }
  826. );
  827. test_schema(
  828. "boolean",
  829. // Schema
  830. R"""({
  831. "type": "boolean"
  832. })""",
  833. // Passing strings
  834. {
  835. "true",
  836. "false",
  837. },
  838. // Failing strings
  839. {
  840. R"""("")""",
  841. R"""("true")""",
  842. R"""(True)""",
  843. R"""(FALSE)""",
  844. }
  845. );
  846. test_schema(
  847. "integer",
  848. // Schema
  849. R"""({
  850. "type": "integer"
  851. })""",
  852. // Passing strings
  853. {
  854. R"""(0)""",
  855. R"""(12345)""",
  856. R"""(1234567890123456)""",
  857. },
  858. // Failing strings
  859. {
  860. R"""()""",
  861. R"""(01)""",
  862. R"""(007)""",
  863. R"""(12345678901234567 )""",
  864. }
  865. );
  866. test_schema(
  867. "string const",
  868. // Schema
  869. R"""({
  870. "const": "foo"
  871. })""",
  872. // Passing strings
  873. {
  874. R"""("foo")""",
  875. },
  876. // Failing strings
  877. {
  878. R"""(foo)""",
  879. R"""("bar")""",
  880. }
  881. );
  882. test_schema(
  883. "non-string const",
  884. // Schema
  885. R"""({
  886. "const": true
  887. })""",
  888. // Passing strings
  889. {
  890. R"""(true)""",
  891. },
  892. // Failing strings
  893. {
  894. R"""()""",
  895. R"""(foo)""",
  896. R"""("true")""",
  897. }
  898. );
  899. test_schema(
  900. "non-string const",
  901. // Schema
  902. R"""({
  903. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  904. })""",
  905. // Passing strings
  906. {
  907. R"""("red")""",
  908. R"""(null)""",
  909. R"""(42)""",
  910. R"""(["foo"])""",
  911. },
  912. // Failing strings
  913. {
  914. R"""()""",
  915. R"""(420)""",
  916. R"""(true)""",
  917. R"""(foo)""",
  918. }
  919. );
  920. test_schema(
  921. "simple pattern",
  922. // Schema
  923. R"""({
  924. "pattern": "^[a-zA-Z0-9_-]*$"
  925. })""",
  926. // Passing strings
  927. {
  928. R"""("")""",
  929. R"""("He_llo-12")""",
  930. },
  931. // Failing strings
  932. {
  933. R"""("!")""",
  934. R"""("Hello World")""",
  935. }
  936. );
  937. test_schema(
  938. "pattern with escapes",
  939. // Schema
  940. R"""({
  941. "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
  942. })""",
  943. // Passing strings
  944. {
  945. R"""("a^$.[]()|{}*+?b")""",
  946. },
  947. // Failing strings
  948. {
  949. R"""("ab")""",
  950. }
  951. );
  952. test_schema(
  953. "",
  954. // Schema
  955. R"""(
  956. {
  957. "type": ["array", "null"],
  958. "items": { "type": "string" }
  959. }
  960. )""",
  961. // Passing strings
  962. {
  963. "null",
  964. "[]",
  965. "[\"123\"]",
  966. "[\"foo\", \"bar\"]",
  967. },
  968. // Failing strings
  969. {
  970. "",
  971. "[123]",
  972. "\"foo\"",
  973. "[\"foo\", 42]",
  974. }
  975. );
  976. test_schema(
  977. "min+max items",
  978. // Schema
  979. R"""({
  980. "items": {
  981. "type": ["number", "integer"]
  982. },
  983. "minItems": 3,
  984. "maxItems": 5
  985. })""",
  986. // Passing strings
  987. {
  988. R"""([1, 2, 3])""",
  989. R"""([1, 2, 3, 4])""",
  990. R"""([1, 2, 3, 4, 5])""",
  991. },
  992. // Failing strings
  993. {
  994. R"""([1, 2])""",
  995. R"""([1, 2, 3, 4, 5, 6])""",
  996. R"""(1)""",
  997. }
  998. );
  999. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1000. test_schema(
  1001. "object properties",
  1002. // Schema
  1003. R"""({
  1004. "type": "object",
  1005. "properties": {
  1006. "number": { "type": "number" },
  1007. "street_name": { "type": "string" },
  1008. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1009. }
  1010. })""",
  1011. // Passing strings
  1012. {
  1013. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1014. // "By default, leaving out properties is valid"
  1015. R"""({ "street_name": "Pennsylvania" })""",
  1016. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1017. // "By extension, even an empty object is valid"
  1018. R"""({})""",
  1019. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1020. },
  1021. // Failing strings
  1022. {
  1023. // Change datatype from number to string
  1024. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1025. // Reorder properties
  1026. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  1027. // Reorder properties
  1028. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1029. // "Additional properties default to false for generation, even though the spec says true.
  1030. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1031. }
  1032. );
  1033. test_schema(
  1034. "additional properties can't override other properties",
  1035. R"""({
  1036. "properties": {
  1037. "a": {"type": "integer"},
  1038. "b": {"type": "integer"}
  1039. },
  1040. "additionalProperties": true
  1041. })""",
  1042. // Passing strings
  1043. {
  1044. R"""({"a": 42})""",
  1045. R"""({"c": ""})""",
  1046. R"""({"a": 42, "c": ""})""",
  1047. R"""({"a_": ""})""",
  1048. },
  1049. // Failing strings
  1050. {
  1051. R"""()""",
  1052. R"""({"a": ""})""",
  1053. R"""({"a": "", "b": ""})""",
  1054. }
  1055. );
  1056. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1057. test_schema(
  1058. "object properties, additionalProperties: true",
  1059. // Schema
  1060. R"""({
  1061. "type": "object",
  1062. "properties": {
  1063. "number": { "type": "number" },
  1064. "street_name": { "type": "string" },
  1065. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1066. },
  1067. "additionalProperties": true
  1068. })""",
  1069. // Passing strings
  1070. {
  1071. // "By extension, even an empty object is valid"
  1072. R"""({})""",
  1073. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1074. // "By default, leaving out properties is valid"
  1075. R"""({ "street_name": "Pennsylvania" })""",
  1076. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1077. // "By default, providing additional properties is valid"
  1078. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1079. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1080. },
  1081. // Failing strings
  1082. {
  1083. // Change datatype from number to string
  1084. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1085. // Reorder properties
  1086. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1087. }
  1088. );
  1089. // Additional properties: false
  1090. test_schema(
  1091. "required + optional props each in original order",
  1092. // Schema
  1093. R"""({
  1094. "type": "object",
  1095. "properties": {
  1096. "number": { "type": "number" },
  1097. "street_name": { "type": "string" },
  1098. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1099. },
  1100. "additionalProperties": false
  1101. })""",
  1102. // Passing strings
  1103. {
  1104. R"""({ "street_name": "Pennsylvania" })""",
  1105. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1106. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1107. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1108. // Spaces are permitted around enum values
  1109. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1110. },
  1111. // Failing strings
  1112. {
  1113. // Reorder properties
  1114. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1115. // Add "direction"
  1116. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1117. }
  1118. );
  1119. test_schema(
  1120. "required + optional props each in original order",
  1121. // Schema
  1122. R"""({
  1123. "properties": {
  1124. "b": {"type": "string"},
  1125. "a": {"type": "string"},
  1126. "d": {"type": "string"},
  1127. "c": {"type": "string"}
  1128. },
  1129. "required": ["a", "b"],
  1130. "additionalProperties": false
  1131. })""",
  1132. // Passing strings
  1133. {
  1134. R"""({"b": "foo", "a": "bar"})""",
  1135. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1136. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1137. },
  1138. // Failing strings
  1139. {
  1140. R"""({"a": "foo", "b": "bar"})""",
  1141. R"""({"b": "bar"})""",
  1142. R"""({"a": "foo", "c": "baz"})""",
  1143. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1144. }
  1145. );
  1146. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1147. test_schema(
  1148. "required props",
  1149. // Schema
  1150. R"""({
  1151. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1152. "$id": "https://example.com/product.schema.json",
  1153. "title": "Product",
  1154. "description": "A product from Acme's catalog",
  1155. "type": "object",
  1156. "properties": {
  1157. "productId": {
  1158. "description": "The unique identifier for a product",
  1159. "type": "integer"
  1160. },
  1161. "productName": {
  1162. "description": "Name of the product",
  1163. "type": "string"
  1164. },
  1165. "price": {
  1166. "description": "The price of the product",
  1167. "type": "number",
  1168. "exclusiveMinimum": 0
  1169. },
  1170. "tags": {
  1171. "description": "Tags for the product",
  1172. "type": "array",
  1173. "items": {
  1174. "type": "string"
  1175. },
  1176. "minItems": 1,
  1177. "uniqueItems": true
  1178. },
  1179. "dimensions": {
  1180. "type": "object",
  1181. "properties": {
  1182. "length": {
  1183. "type": "number"
  1184. },
  1185. "width": {
  1186. "type": "number"
  1187. },
  1188. "height": {
  1189. "type": "number"
  1190. }
  1191. },
  1192. "required": [ "length", "width", "height" ]
  1193. }
  1194. },
  1195. "required": [ "productId", "productName", "price" ]
  1196. })""",
  1197. // Passing strings
  1198. {
  1199. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1200. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1201. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1202. },
  1203. // Failing strings
  1204. {
  1205. R"""({})""", // Missing all required properties
  1206. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1207. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1208. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1209. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1210. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1211. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1212. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1213. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1214. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1215. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1216. }
  1217. );
  1218. }
  1219. int main() {
  1220. fprintf(stdout, "Running grammar integration tests...\n");
  1221. test_simple_grammar();
  1222. test_complex_grammar();
  1223. test_special_chars();
  1224. test_quantifiers();
  1225. test_failure_missing_root();
  1226. test_failure_missing_reference();
  1227. test_failure_left_recursion();
  1228. test_json_schema();
  1229. fprintf(stdout, "All tests passed.\n");
  1230. return 0;
  1231. }