1
0

test-grammar-integration.cpp 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #include "json-schema-to-grammar.h"
  5. #include "../src/unicode.h"
  6. #include "../src/llama-grammar.h"
  7. #include <nlohmann/json.hpp>
  8. #include <cassert>
  9. #include <string>
  10. #include <vector>
  11. using json = nlohmann::ordered_json;
  12. static llama_grammar * build_grammar(const std::string & grammar_str) {
  13. return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0);
  14. }
  15. static bool test_build_grammar_fails(const std::string & grammar_str) {
  16. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  17. bool grammar_fails = false;
  18. llama_grammar * grammar = build_grammar(grammar_str);
  19. if (grammar != nullptr) {
  20. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  21. } else {
  22. grammar_fails = true;
  23. fprintf(stdout, " ✅︎\n");
  24. }
  25. return grammar_fails;
  26. }
  27. static bool match_string(const std::string & input, llama_grammar * grammar) {
  28. const auto cpts = unicode_cpts_from_utf8(input);
  29. auto & stacks_cur = llama_grammar_get_stacks(grammar);
  30. for (const auto & cpt : cpts) {
  31. llama_grammar_accept(grammar, cpt);
  32. if (stacks_cur.empty()) {
  33. // no stacks means that the grammar failed to match at this point
  34. return false;
  35. }
  36. }
  37. for (const auto & stack : stacks_cur) {
  38. if (stack.empty()) {
  39. // An empty stack means that the grammar has been completed
  40. return true;
  41. }
  42. }
  43. return false;
  44. }
  45. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  46. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  47. fflush(stderr);
  48. auto * grammar = build_grammar(grammar_str);
  49. // Save the original grammar stacks so that we can reset after every new string we want to test
  50. const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar); // copy
  51. llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
  52. fprintf(stderr, " 🔵 Valid strings:\n");
  53. // Passing strings
  54. for (const auto & test_string : passing_strings) {
  55. fprintf(stderr, " \"%s\" ", test_string.c_str());
  56. fflush(stderr);
  57. bool matched = match_string(test_string, grammar);
  58. if (!matched) {
  59. fprintf(stderr, "❌ (failed to match)\n");
  60. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  61. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  62. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  63. if (grammar_file) {
  64. fprintf(grammar_file, "%s", grammar_str.c_str());
  65. fclose(grammar_file);
  66. }
  67. // DEBUG: Write the test string to test-grammar-integration.string.txt
  68. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  69. if (string_file) {
  70. fprintf(string_file, "%s", test_string.c_str());
  71. fclose(string_file);
  72. }
  73. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  74. } else {
  75. fprintf(stdout, "✅︎\n");
  76. }
  77. assert(matched);
  78. // Reset the grammar stacks
  79. stacks_cur = stacks_org;
  80. }
  81. fprintf(stderr, " 🟠 Invalid strings:\n");
  82. // Failing strings
  83. for (const auto & test_string : failing_strings) {
  84. fprintf(stderr, " \"%s\" ", test_string.c_str());
  85. fflush(stderr);
  86. bool matched = match_string(test_string, grammar);
  87. if (matched) {
  88. fprintf(stderr, "❌ (incorrectly matched)\n");
  89. } else {
  90. fprintf(stdout, "✅︎\n");
  91. }
  92. assert(!matched);
  93. // Reset the grammar stacks
  94. stacks_cur = stacks_org;
  95. }
  96. // Clean up allocated memory
  97. llama_grammar_free_impl(grammar);
  98. }
  99. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  100. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  101. }
  102. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  103. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str), true), passing_strings, failing_strings);
  104. }
  105. static void test_simple_grammar() {
  106. test_schema(
  107. "min 0",
  108. R"""({
  109. "type": "integer",
  110. "minimum": 0
  111. })""",
  112. // Passing strings
  113. {
  114. "0",
  115. "10",
  116. "12",
  117. "10000",
  118. },
  119. // Failing strings
  120. {
  121. "-1",
  122. "-10",
  123. "-10000",
  124. "-100000000000000000000000000000000",
  125. "100000000000000000000000000000000",
  126. "00",
  127. "01",
  128. "-0",
  129. }
  130. );
  131. test_schema(
  132. "min 2",
  133. // Schema
  134. R"""({
  135. "type": "integer",
  136. "minimum": 2
  137. })""",
  138. // Passing strings
  139. {
  140. "2",
  141. "3",
  142. "4",
  143. "10",
  144. "20",
  145. "1234567890000000",
  146. },
  147. // Failing strings
  148. {
  149. "0",
  150. "1",
  151. "-1",
  152. "-100",
  153. "0",
  154. "1",
  155. "01",
  156. "02",
  157. "12345678900000000",
  158. }
  159. );
  160. test_schema(
  161. "min 456",
  162. R"""({
  163. "type": "integer",
  164. "minimum": 456
  165. })""",
  166. // Passing strings
  167. {
  168. "456",
  169. "4560",
  170. "457",
  171. "460",
  172. "500",
  173. },
  174. // Failing strings
  175. {
  176. "455",
  177. "356",
  178. "50",
  179. "050",
  180. "-1",
  181. "-456",
  182. }
  183. );
  184. test_schema(
  185. "min -123",
  186. R"""({
  187. "type": "integer",
  188. "minimum": -123
  189. })""",
  190. // Passing strings
  191. {
  192. "-123",
  193. "-122",
  194. "-11",
  195. "-1",
  196. "0",
  197. "1",
  198. "123",
  199. "1234",
  200. "2345",
  201. },
  202. // Failing strings
  203. {
  204. "-1234",
  205. "-124",
  206. }
  207. );
  208. test_schema(
  209. "max 9999",
  210. // Schema
  211. R"""({
  212. "type": "integer",
  213. "maximum": 9999
  214. })""",
  215. // Passing strings
  216. {
  217. "-99999",
  218. "0",
  219. "9999",
  220. },
  221. // Failing strings
  222. {
  223. "10000",
  224. "99991",
  225. }
  226. );
  227. test_schema(
  228. "max -9999",
  229. // Schema
  230. R"""({
  231. "type": "integer",
  232. "maximum": -9999
  233. })""",
  234. // Passing strings
  235. {
  236. "-10000",
  237. "-9999",
  238. },
  239. // Failing strings
  240. {
  241. "-9998",
  242. "0",
  243. "9999",
  244. }
  245. );
  246. test_schema(
  247. "min 5 max 30",
  248. // Schema
  249. R"""({
  250. "type": "integer",
  251. "minimum": 5,
  252. "maximum": 30
  253. })""",
  254. // Passing strings
  255. {
  256. "5",
  257. "10",
  258. "30",
  259. },
  260. // Failing strings
  261. {
  262. "05",
  263. "4",
  264. "-1",
  265. "31",
  266. "123",
  267. "0123",
  268. }
  269. );
  270. test_schema(
  271. "min -1 max 1",
  272. R"""({
  273. "type": "integer",
  274. "minimum": -1,
  275. "maximum": 1
  276. })""",
  277. // Passing strings
  278. {
  279. "-1",
  280. "0",
  281. "1",
  282. },
  283. // Failing strings
  284. {
  285. "-11",
  286. "-10",
  287. "-2",
  288. "2",
  289. "10",
  290. "11",
  291. }
  292. );
  293. test_schema(
  294. "min -123 max 42",
  295. R"""({
  296. "type": "integer",
  297. "minimum": -123,
  298. "maximum": 42
  299. })""",
  300. // Passing strings
  301. {
  302. "-123",
  303. "-122",
  304. "-13",
  305. "-11",
  306. "-2",
  307. "-1",
  308. "0",
  309. "1",
  310. "5",
  311. "10",
  312. "39",
  313. "40",
  314. "42",
  315. },
  316. // Failing strings
  317. {
  318. "-0123",
  319. "-124",
  320. "-1123",
  321. "-200",
  322. "43",
  323. "123",
  324. "0123",
  325. }
  326. );
  327. test_schema(
  328. "exclusive min / max",
  329. // Schema
  330. R"""({
  331. "type": "integer",
  332. "exclusiveMinimum": 0,
  333. "exclusiveMaximum": 10000
  334. })""",
  335. // Passing strings
  336. {
  337. "1",
  338. "9999",
  339. },
  340. // Failing strings
  341. {
  342. "0",
  343. "01",
  344. "10000",
  345. "99999",
  346. }
  347. );
  348. // Test case for a simple grammar
  349. test_grammar(
  350. "simple grammar",
  351. R"""(
  352. root ::= expr
  353. expr ::= term ("+" term)*
  354. term ::= number
  355. number ::= [0-9]+)""",
  356. // Passing strings
  357. {
  358. "42",
  359. "1+2+3+4+5",
  360. "123+456",
  361. },
  362. // Failing strings
  363. {
  364. "+",
  365. "/ 3",
  366. "1+2+3+4+5+",
  367. "12a45",
  368. }
  369. );
  370. }
  371. static void test_complex_grammar() {
  372. // Test case for a more complex grammar, with both failure strings and success strings
  373. test_grammar(
  374. "medium complexity grammar",
  375. // Grammar
  376. R"""(
  377. root ::= expression
  378. expression ::= term ws (("+"|"-") ws term)*
  379. term ::= factor ws (("*"|"/") ws factor)*
  380. factor ::= number | variable | "(" expression ")" | function-call
  381. number ::= [0-9]+
  382. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  383. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  384. ws ::= [ \t\n\r]?)""",
  385. // Passing strings
  386. {
  387. "42",
  388. "1*2*3*4*5",
  389. "x",
  390. "x+10",
  391. "x1+y2",
  392. "(a+b)*(c-d)",
  393. "func()",
  394. "func(x,y+2)",
  395. "a*(b+c)-d/e",
  396. "f(g(x),h(y,z))",
  397. "x + 10",
  398. "x1 + y2",
  399. "(a + b) * (c - d)",
  400. "func()",
  401. "func(x, y + 2)",
  402. "a * (b + c) - d / e",
  403. "f(g(x), h(y, z))",
  404. "123+456",
  405. "123*456*789-123/456+789*123",
  406. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  407. },
  408. // Failing strings
  409. {
  410. "+",
  411. "/ 3x",
  412. "x + + y",
  413. "a * / b",
  414. "func(,)",
  415. "func(x y)",
  416. "(a + b",
  417. "x + y)",
  418. "a + b * (c - d",
  419. "42 +",
  420. "x +",
  421. "x + 10 +",
  422. "(a + b) * (c - d",
  423. "func(",
  424. "func(x, y + 2",
  425. "a * (b + c) - d /",
  426. "f(g(x), h(y, z)",
  427. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  428. }
  429. );
  430. }
  431. static void test_special_chars() {
  432. // A collection of tests to exercise special characters such as "."
  433. test_grammar(
  434. "special characters",
  435. // Grammar
  436. R"""(
  437. root ::= ... "abc" ...
  438. )""",
  439. // Passing strings
  440. {
  441. "abcabcabc",
  442. "aaaabcccc",
  443. // NOTE: Also ensures that multi-byte characters still count as a single character
  444. "🔵🟠✅abc❌🟠🔵"
  445. },
  446. // Failing strings
  447. {
  448. "aaabcccc",
  449. "aaaaabcccc",
  450. "aaaabccc",
  451. "aaaabccccc",
  452. "🔵🟠✅❌abc❌✅🟠🔵",
  453. "🔵🟠abc🟠🔵"
  454. }
  455. );
  456. }
  457. static void test_quantifiers() {
  458. // A collection of tests to exercise * + and ? quantifiers
  459. test_grammar(
  460. "* quantifier",
  461. // Grammar
  462. R"""(root ::= "a"*)""",
  463. // Passing strings
  464. {
  465. "",
  466. "a",
  467. "aaaaa",
  468. "aaaaaaaaaaaaaaaaaa",
  469. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  470. },
  471. // Failing strings
  472. {
  473. "b",
  474. "ab",
  475. "aab",
  476. "ba",
  477. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  478. }
  479. );
  480. test_grammar(
  481. "+ quantifier",
  482. // Grammar
  483. R"""(root ::= "a"+)""",
  484. // Passing strings
  485. {
  486. "a",
  487. "aaaaa",
  488. "aaaaaaaaaaaaaaaaaa",
  489. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  490. },
  491. // Failing strings
  492. {
  493. "",
  494. "b",
  495. "ab",
  496. "aab",
  497. "ba",
  498. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  499. }
  500. );
  501. test_grammar(
  502. "? quantifier",
  503. // Grammar
  504. R"""(root ::= "a"?)""",
  505. // Passing strings
  506. {
  507. "",
  508. "a"
  509. },
  510. // Failing strings
  511. {
  512. "b",
  513. "ab",
  514. "aa",
  515. "ba",
  516. }
  517. );
  518. test_grammar(
  519. "mixed quantifiers",
  520. // Grammar
  521. R"""(
  522. root ::= cons+ vowel* cons? (vowel cons)*
  523. vowel ::= [aeiouy]
  524. cons ::= [bcdfghjklmnpqrstvwxyz]
  525. )""",
  526. // Passing strings
  527. {
  528. "yes",
  529. "no",
  530. "noyes",
  531. "crwth",
  532. "four",
  533. "bryyyy",
  534. },
  535. // Failing strings
  536. {
  537. "yess",
  538. "yesno",
  539. "forty",
  540. "catyyy",
  541. }
  542. );
  543. test_grammar(
  544. "simple exact repetition",
  545. // Grammar
  546. R"""(
  547. root ::= [ab]{4}
  548. )""",
  549. // Passing strings
  550. {
  551. "aaaa",
  552. "bbbb",
  553. "abab",
  554. },
  555. // Failing strings
  556. {
  557. "a",
  558. "b",
  559. "aaaaa",
  560. }
  561. );
  562. test_grammar(
  563. "simple min repetition",
  564. // Grammar
  565. R"""(
  566. root ::= [ab]{4,}
  567. )""",
  568. // Passing strings
  569. {
  570. "aaaa",
  571. "aaaaab",
  572. "bbbb",
  573. "ababab",
  574. },
  575. // Failing strings
  576. {
  577. "",
  578. "aba",
  579. }
  580. );
  581. test_grammar(
  582. "simple max repetition",
  583. // Grammar
  584. R"""(
  585. root ::= [ab]{0,4}
  586. )""",
  587. // Passing strings
  588. {
  589. "",
  590. "a",
  591. "aa",
  592. "aaa",
  593. "aaab",
  594. },
  595. // Failing strings
  596. {
  597. "aaaaa",
  598. }
  599. );
  600. test_grammar(
  601. "min / max repetition",
  602. // Grammar
  603. R"""(
  604. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  605. )""",
  606. // Passing strings
  607. {
  608. "0xFF 0x12 0xAB",
  609. "0xFF 0x12 0xAB 0x00 0x00",
  610. },
  611. // Failing strings
  612. {
  613. "",
  614. "0xFF",
  615. "0xFF 0x12",
  616. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  617. }
  618. );
  619. }
  620. static void test_failure_missing_root() {
  621. fprintf(stderr, "⚫ Testing missing root node:\n");
  622. // Test case for a grammar that is missing a root rule
  623. const std::string grammar_str = R"""(
  624. rot ::= expr
  625. expr ::= term ("+" term)*
  626. term ::= number
  627. number ::= [0-9]+)""";
  628. llama_grammar_parser parsed_grammar;
  629. parsed_grammar.parse(grammar_str.c_str());
  630. // Ensure we parsed correctly
  631. assert(!parsed_grammar.rules.empty());
  632. // Ensure we do NOT have a root node
  633. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  634. fprintf(stderr, " ✅︎ Passed\n");
  635. }
  636. static void test_failure_missing_reference() {
  637. fprintf(stderr, "⚫ Testing missing reference node:\n");
  638. // Test case for a grammar that is missing a referenced rule
  639. const std::string grammar_str =
  640. R"""(root ::= expr
  641. expr ::= term ("+" term)*
  642. term ::= numero
  643. number ::= [0-9]+)""";
  644. fprintf(stderr, " Expected error: ");
  645. llama_grammar_parser parsed_grammar;
  646. parsed_grammar.parse(grammar_str.c_str());
  647. // Ensure we did NOT parsed correctly
  648. assert(parsed_grammar.rules.empty());
  649. fprintf(stderr, " End of expected error.\n");
  650. fprintf(stderr, " ✅︎ Passed\n");
  651. }
  652. static void test_failure_left_recursion() {
  653. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  654. // Test simple left recursion detection
  655. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  656. assert(test_build_grammar_fails(simple_str));
  657. // Test more complicated left recursion detection
  658. const std::string medium_str = R"""(
  659. root ::= asdf
  660. asdf ::= "a" | asdf "a"
  661. )""";
  662. assert(test_build_grammar_fails(medium_str));
  663. // Test even more complicated left recursion detection
  664. const std::string hard_str = R"""(
  665. root ::= asdf
  666. asdf ::= "a" | foo "b"
  667. foo ::= "c" | asdf "d" | "e")""";
  668. assert(test_build_grammar_fails(hard_str));
  669. // Test yet even more complicated left recursion detection
  670. const std::string hardest_str = R"""(
  671. root ::= asdf
  672. asdf ::= "a" | foo "b"
  673. foo ::= "c" | empty asdf "d" | "e"
  674. empty ::= "blah" | )""";
  675. assert(test_build_grammar_fails(hardest_str));
  676. fprintf(stderr, " ✅︎ Passed\n");
  677. }
  678. static void test_json_schema() {
  679. // Note that this is similar to the regular grammar tests,
  680. // but we convert each json schema to a grammar before parsing.
  681. // Otherwise, this test structure is the same.
  682. test_schema(
  683. "empty schema (object)",
  684. // Schema
  685. R"""(
  686. {}
  687. )""",
  688. // Passing strings
  689. {
  690. R"""({})""",
  691. R"""({"foo": "bar"})""",
  692. },
  693. // Failing strings
  694. {
  695. "",
  696. "[]",
  697. "null",
  698. R"""("")""",
  699. "true",
  700. }
  701. );
  702. test_schema(
  703. "exotic formats (list)",
  704. // Schema
  705. R"""({
  706. "items": [
  707. { "format": "date" },
  708. { "format": "uuid" },
  709. { "format": "time" },
  710. { "format": "date-time" }
  711. ]
  712. })""",
  713. // Passing strings
  714. {
  715. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  716. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  717. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  718. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  719. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  720. },
  721. // Failing strings
  722. {
  723. R"""(["foo", "bar"])""",
  724. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  725. }
  726. );
  727. test_schema(
  728. "string",
  729. // Schema
  730. R"""({
  731. "type": "string"
  732. })""",
  733. // Passing strings
  734. {
  735. R"""("foo")""",
  736. R"""("bar")""",
  737. R"""("")""",
  738. },
  739. // Failing strings
  740. {
  741. R"""({})""",
  742. R"""("foo": "bar")""",
  743. }
  744. );
  745. test_schema(
  746. "string w/ min length 1",
  747. // Schema
  748. R"""({
  749. "type": "string",
  750. "minLength": 1
  751. })""",
  752. // Passing strings
  753. {
  754. R"""("foo")""",
  755. R"""("bar")""",
  756. },
  757. // Failing strings
  758. {
  759. R"""("")""",
  760. R"""({})""",
  761. R"""("foo": "bar")""",
  762. }
  763. );
  764. test_schema(
  765. "string w/ min length 3",
  766. // Schema
  767. R"""({
  768. "type": "string",
  769. "minLength": 3
  770. })""",
  771. // Passing strings
  772. {
  773. R"""("foo")""",
  774. R"""("bar")""",
  775. R"""("foobar")""",
  776. },
  777. // Failing strings
  778. {
  779. R"""("")""",
  780. R"""("f")""",
  781. R"""("fo")""",
  782. }
  783. );
  784. test_schema(
  785. "string w/ max length",
  786. // Schema
  787. R"""({
  788. "type": "string",
  789. "maxLength": 3
  790. })""",
  791. // Passing strings
  792. {
  793. R"""("foo")""",
  794. R"""("bar")""",
  795. R"""("")""",
  796. R"""("f")""",
  797. R"""("fo")""",
  798. },
  799. // Failing strings
  800. {
  801. R"""("foobar")""",
  802. }
  803. );
  804. test_schema(
  805. "string w/ min & max length",
  806. // Schema
  807. R"""({
  808. "type": "string",
  809. "minLength": 1,
  810. "maxLength": 4
  811. })""",
  812. // Passing strings
  813. {
  814. R"""("foo")""",
  815. R"""("bar")""",
  816. R"""("f")""",
  817. R"""("barf")""",
  818. },
  819. // Failing strings
  820. {
  821. R"""("")""",
  822. R"""("barfo")""",
  823. R"""("foobar")""",
  824. }
  825. );
  826. test_schema(
  827. "boolean",
  828. // Schema
  829. R"""({
  830. "type": "boolean"
  831. })""",
  832. // Passing strings
  833. {
  834. "true",
  835. "false",
  836. },
  837. // Failing strings
  838. {
  839. R"""("")""",
  840. R"""("true")""",
  841. R"""(True)""",
  842. R"""(FALSE)""",
  843. }
  844. );
  845. test_schema(
  846. "integer",
  847. // Schema
  848. R"""({
  849. "type": "integer"
  850. })""",
  851. // Passing strings
  852. {
  853. R"""(0)""",
  854. R"""(12345)""",
  855. R"""(1234567890123456)""",
  856. },
  857. // Failing strings
  858. {
  859. R"""()""",
  860. R"""(01)""",
  861. R"""(007)""",
  862. R"""(12345678901234567 )""",
  863. }
  864. );
  865. test_schema(
  866. "string const",
  867. // Schema
  868. R"""({
  869. "const": "foo"
  870. })""",
  871. // Passing strings
  872. {
  873. R"""("foo")""",
  874. },
  875. // Failing strings
  876. {
  877. R"""(foo)""",
  878. R"""("bar")""",
  879. }
  880. );
  881. test_schema(
  882. "non-string const",
  883. // Schema
  884. R"""({
  885. "const": true
  886. })""",
  887. // Passing strings
  888. {
  889. R"""(true)""",
  890. },
  891. // Failing strings
  892. {
  893. R"""()""",
  894. R"""(foo)""",
  895. R"""("true")""",
  896. }
  897. );
  898. test_schema(
  899. "non-string const",
  900. // Schema
  901. R"""({
  902. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  903. })""",
  904. // Passing strings
  905. {
  906. R"""("red")""",
  907. R"""(null)""",
  908. R"""(42)""",
  909. R"""(["foo"])""",
  910. },
  911. // Failing strings
  912. {
  913. R"""()""",
  914. R"""(420)""",
  915. R"""(true)""",
  916. R"""(foo)""",
  917. }
  918. );
  919. test_schema(
  920. "simple pattern",
  921. // Schema
  922. R"""({
  923. "pattern": "^[a-zA-Z0-9_-]*$"
  924. })""",
  925. // Passing strings
  926. {
  927. R"""("")""",
  928. R"""("He_llo-12")""",
  929. },
  930. // Failing strings
  931. {
  932. R"""("!")""",
  933. R"""("Hello World")""",
  934. }
  935. );
  936. test_schema(
  937. "pattern with escapes",
  938. // Schema
  939. R"""({
  940. "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
  941. })""",
  942. // Passing strings
  943. {
  944. R"""("a^$.[]()|{}*+?b")""",
  945. },
  946. // Failing strings
  947. {
  948. R"""("ab")""",
  949. }
  950. );
  951. test_schema(
  952. "",
  953. // Schema
  954. R"""(
  955. {
  956. "type": ["array", "null"],
  957. "items": { "type": "string" }
  958. }
  959. )""",
  960. // Passing strings
  961. {
  962. "null",
  963. "[]",
  964. "[\"123\"]",
  965. "[\"foo\", \"bar\"]",
  966. },
  967. // Failing strings
  968. {
  969. "",
  970. "[123]",
  971. "\"foo\"",
  972. "[\"foo\", 42]",
  973. }
  974. );
  975. test_schema(
  976. "min+max items",
  977. // Schema
  978. R"""({
  979. "items": {
  980. "type": ["number", "integer"]
  981. },
  982. "minItems": 3,
  983. "maxItems": 5
  984. })""",
  985. // Passing strings
  986. {
  987. R"""([1, 2, 3])""",
  988. R"""([1, 2, 3, 4])""",
  989. R"""([1, 2, 3, 4, 5])""",
  990. },
  991. // Failing strings
  992. {
  993. R"""([1, 2])""",
  994. R"""([1, 2, 3, 4, 5, 6])""",
  995. R"""(1)""",
  996. }
  997. );
  998. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  999. test_schema(
  1000. "object properties",
  1001. // Schema
  1002. R"""({
  1003. "type": "object",
  1004. "properties": {
  1005. "number": { "type": "number" },
  1006. "street_name": { "type": "string" },
  1007. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1008. }
  1009. })""",
  1010. // Passing strings
  1011. {
  1012. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1013. // "By default, leaving out properties is valid"
  1014. R"""({ "street_name": "Pennsylvania" })""",
  1015. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1016. // "By extension, even an empty object is valid"
  1017. R"""({})""",
  1018. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1019. },
  1020. // Failing strings
  1021. {
  1022. // Change datatype from number to string
  1023. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1024. // Reorder properties
  1025. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  1026. // Reorder properties
  1027. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1028. // "Additional properties default to false for generation, even though the spec says true.
  1029. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1030. }
  1031. );
  1032. test_schema(
  1033. "additional properties can't override other properties",
  1034. R"""({
  1035. "properties": {
  1036. "a": {"type": "integer"},
  1037. "b": {"type": "integer"}
  1038. },
  1039. "additionalProperties": true
  1040. })""",
  1041. // Passing strings
  1042. {
  1043. R"""({"a": 42})""",
  1044. R"""({"c": ""})""",
  1045. R"""({"a": 42, "c": ""})""",
  1046. R"""({"a_": ""})""",
  1047. },
  1048. // Failing strings
  1049. {
  1050. R"""()""",
  1051. R"""({"a": ""})""",
  1052. R"""({"a": "", "b": ""})""",
  1053. }
  1054. );
  1055. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1056. test_schema(
  1057. "object properties, additionalProperties: true",
  1058. // Schema
  1059. R"""({
  1060. "type": "object",
  1061. "properties": {
  1062. "number": { "type": "number" },
  1063. "street_name": { "type": "string" },
  1064. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1065. },
  1066. "additionalProperties": true
  1067. })""",
  1068. // Passing strings
  1069. {
  1070. // "By extension, even an empty object is valid"
  1071. R"""({})""",
  1072. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1073. // "By default, leaving out properties is valid"
  1074. R"""({ "street_name": "Pennsylvania" })""",
  1075. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1076. // "By default, providing additional properties is valid"
  1077. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1078. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1079. },
  1080. // Failing strings
  1081. {
  1082. // Change datatype from number to string
  1083. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1084. // Reorder properties
  1085. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1086. }
  1087. );
  1088. // Additional properties: false
  1089. test_schema(
  1090. "required + optional props each in original order",
  1091. // Schema
  1092. R"""({
  1093. "type": "object",
  1094. "properties": {
  1095. "number": { "type": "number" },
  1096. "street_name": { "type": "string" },
  1097. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1098. },
  1099. "additionalProperties": false
  1100. })""",
  1101. // Passing strings
  1102. {
  1103. R"""({ "street_name": "Pennsylvania" })""",
  1104. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1105. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1106. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1107. // Spaces are permitted around enum values
  1108. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1109. },
  1110. // Failing strings
  1111. {
  1112. // Reorder properties
  1113. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1114. // Add "direction"
  1115. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1116. }
  1117. );
  1118. test_schema(
  1119. "required + optional props each in original order",
  1120. // Schema
  1121. R"""({
  1122. "properties": {
  1123. "b": {"type": "string"},
  1124. "a": {"type": "string"},
  1125. "d": {"type": "string"},
  1126. "c": {"type": "string"}
  1127. },
  1128. "required": ["a", "b"],
  1129. "additionalProperties": false
  1130. })""",
  1131. // Passing strings
  1132. {
  1133. R"""({"b": "foo", "a": "bar"})""",
  1134. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1135. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1136. },
  1137. // Failing strings
  1138. {
  1139. R"""({"a": "foo", "b": "bar"})""",
  1140. R"""({"b": "bar"})""",
  1141. R"""({"a": "foo", "c": "baz"})""",
  1142. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1143. }
  1144. );
  1145. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1146. test_schema(
  1147. "required props",
  1148. // Schema
  1149. R"""({
  1150. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1151. "$id": "https://example.com/product.schema.json",
  1152. "title": "Product",
  1153. "description": "A product from Acme's catalog",
  1154. "type": "object",
  1155. "properties": {
  1156. "productId": {
  1157. "description": "The unique identifier for a product",
  1158. "type": "integer"
  1159. },
  1160. "productName": {
  1161. "description": "Name of the product",
  1162. "type": "string"
  1163. },
  1164. "price": {
  1165. "description": "The price of the product",
  1166. "type": "number",
  1167. "exclusiveMinimum": 0
  1168. },
  1169. "tags": {
  1170. "description": "Tags for the product",
  1171. "type": "array",
  1172. "items": {
  1173. "type": "string"
  1174. },
  1175. "minItems": 1,
  1176. "uniqueItems": true
  1177. },
  1178. "dimensions": {
  1179. "type": "object",
  1180. "properties": {
  1181. "length": {
  1182. "type": "number"
  1183. },
  1184. "width": {
  1185. "type": "number"
  1186. },
  1187. "height": {
  1188. "type": "number"
  1189. }
  1190. },
  1191. "required": [ "length", "width", "height" ]
  1192. }
  1193. },
  1194. "required": [ "productId", "productName", "price" ]
  1195. })""",
  1196. // Passing strings
  1197. {
  1198. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1199. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1200. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1201. },
  1202. // Failing strings
  1203. {
  1204. R"""({})""", // Missing all required properties
  1205. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1206. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1207. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1208. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1209. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1210. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1211. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1212. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1213. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1214. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1215. }
  1216. );
  1217. }
  1218. int main() {
  1219. fprintf(stdout, "Running grammar integration tests...\n");
  1220. test_simple_grammar();
  1221. test_complex_grammar();
  1222. test_special_chars();
  1223. test_quantifiers();
  1224. test_failure_missing_root();
  1225. test_failure_missing_reference();
  1226. test_failure_left_recursion();
  1227. test_json_schema();
  1228. fprintf(stdout, "All tests passed.\n");
  1229. return 0;
  1230. }