test-grammar-integration.cpp 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #include "unicode.h"
  5. #include "llama-grammar.h"
  6. #include "json-schema-to-grammar.h"
  7. #include <cassert>
  8. #include <string>
  9. #include <vector>
  10. using json = nlohmann::ordered_json;
  11. static llama_grammar * build_grammar(const std::string & grammar_str) {
  12. return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root");
  13. }
  14. static bool test_build_grammar_fails(const std::string & grammar_str) {
  15. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  16. bool grammar_fails = false;
  17. llama_grammar * grammar = build_grammar(grammar_str);
  18. if (grammar != nullptr) {
  19. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  20. } else {
  21. grammar_fails = true;
  22. fprintf(stdout, " ✅︎\n");
  23. }
  24. return grammar_fails;
  25. }
  26. static bool match_string(const std::string & input, llama_grammar * grammar) {
  27. const auto cpts = unicode_cpts_from_utf8(input);
  28. auto & stacks_cur = llama_grammar_get_stacks(grammar);
  29. for (const auto & cpt : cpts) {
  30. llama_grammar_accept(grammar, cpt);
  31. if (stacks_cur.empty()) {
  32. // no stacks means that the grammar failed to match at this point
  33. return false;
  34. }
  35. }
  36. for (const auto & stack : stacks_cur) {
  37. if (stack.empty()) {
  38. // An empty stack means that the grammar has been completed
  39. return true;
  40. }
  41. }
  42. return false;
  43. }
  44. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  45. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  46. fflush(stderr);
  47. auto * grammar = build_grammar(grammar_str);
  48. // Save the original grammar stacks so that we can reset after every new string we want to test
  49. const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar); // copy
  50. llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
  51. fprintf(stderr, " 🔵 Valid strings:\n");
  52. // Passing strings
  53. for (const auto & test_string : passing_strings) {
  54. fprintf(stderr, " \"%s\" ", test_string.c_str());
  55. fflush(stderr);
  56. bool matched = match_string(test_string, grammar);
  57. if (!matched) {
  58. fprintf(stderr, "❌ (failed to match)\n");
  59. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  60. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  61. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  62. if (grammar_file) {
  63. fprintf(grammar_file, "%s", grammar_str.c_str());
  64. fclose(grammar_file);
  65. }
  66. // DEBUG: Write the test string to test-grammar-integration.string.txt
  67. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  68. if (string_file) {
  69. fprintf(string_file, "%s", test_string.c_str());
  70. fclose(string_file);
  71. }
  72. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  73. } else {
  74. fprintf(stdout, "✅︎\n");
  75. }
  76. assert(matched);
  77. // Reset the grammar stacks
  78. stacks_cur = stacks_org;
  79. }
  80. fprintf(stderr, " 🟠 Invalid strings:\n");
  81. // Failing strings
  82. for (const auto & test_string : failing_strings) {
  83. fprintf(stderr, " \"%s\" ", test_string.c_str());
  84. fflush(stderr);
  85. bool matched = match_string(test_string, grammar);
  86. if (matched) {
  87. fprintf(stderr, "❌ (incorrectly matched)\n");
  88. } else {
  89. fprintf(stdout, "✅︎\n");
  90. }
  91. assert(!matched);
  92. // Reset the grammar stacks
  93. stacks_cur = stacks_org;
  94. }
  95. // Clean up allocated memory
  96. llama_grammar_free_impl(grammar);
  97. }
  98. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  99. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  100. }
  101. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  102. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
  103. }
  104. static void test_simple_grammar() {
  105. test_schema(
  106. "min 0",
  107. R"""({
  108. "type": "integer",
  109. "minimum": 0
  110. })""",
  111. // Passing strings
  112. {
  113. "0",
  114. "10",
  115. "12",
  116. "10000",
  117. },
  118. // Failing strings
  119. {
  120. "-1",
  121. "-10",
  122. "-10000",
  123. "-100000000000000000000000000000000",
  124. "100000000000000000000000000000000",
  125. "00",
  126. "01",
  127. "-0",
  128. }
  129. );
  130. test_schema(
  131. "min 2",
  132. // Schema
  133. R"""({
  134. "type": "integer",
  135. "minimum": 2
  136. })""",
  137. // Passing strings
  138. {
  139. "2",
  140. "3",
  141. "4",
  142. "10",
  143. "20",
  144. "1234567890000000",
  145. },
  146. // Failing strings
  147. {
  148. "0",
  149. "1",
  150. "-1",
  151. "-100",
  152. "0",
  153. "1",
  154. "01",
  155. "02",
  156. "12345678900000000",
  157. }
  158. );
  159. test_schema(
  160. "min 456",
  161. R"""({
  162. "type": "integer",
  163. "minimum": 456
  164. })""",
  165. // Passing strings
  166. {
  167. "456",
  168. "4560",
  169. "457",
  170. "460",
  171. "500",
  172. },
  173. // Failing strings
  174. {
  175. "455",
  176. "356",
  177. "50",
  178. "050",
  179. "-1",
  180. "-456",
  181. }
  182. );
  183. test_schema(
  184. "min -123",
  185. R"""({
  186. "type": "integer",
  187. "minimum": -123
  188. })""",
  189. // Passing strings
  190. {
  191. "-123",
  192. "-122",
  193. "-11",
  194. "-1",
  195. "0",
  196. "1",
  197. "123",
  198. "1234",
  199. "2345",
  200. },
  201. // Failing strings
  202. {
  203. "-1234",
  204. "-124",
  205. }
  206. );
  207. test_schema(
  208. "max 9999",
  209. // Schema
  210. R"""({
  211. "type": "integer",
  212. "maximum": 9999
  213. })""",
  214. // Passing strings
  215. {
  216. "-99999",
  217. "0",
  218. "9999",
  219. },
  220. // Failing strings
  221. {
  222. "10000",
  223. "99991",
  224. }
  225. );
  226. test_schema(
  227. "max -9999",
  228. // Schema
  229. R"""({
  230. "type": "integer",
  231. "maximum": -9999
  232. })""",
  233. // Passing strings
  234. {
  235. "-10000",
  236. "-9999",
  237. },
  238. // Failing strings
  239. {
  240. "-9998",
  241. "0",
  242. "9999",
  243. }
  244. );
  245. test_schema(
  246. "min 5 max 30",
  247. // Schema
  248. R"""({
  249. "type": "integer",
  250. "minimum": 5,
  251. "maximum": 30
  252. })""",
  253. // Passing strings
  254. {
  255. "5",
  256. "10",
  257. "30",
  258. },
  259. // Failing strings
  260. {
  261. "05",
  262. "4",
  263. "-1",
  264. "31",
  265. "123",
  266. "0123",
  267. }
  268. );
  269. test_schema(
  270. "min -1 max 1",
  271. R"""({
  272. "type": "integer",
  273. "minimum": -1,
  274. "maximum": 1
  275. })""",
  276. // Passing strings
  277. {
  278. "-1",
  279. "0",
  280. "1",
  281. },
  282. // Failing strings
  283. {
  284. "-11",
  285. "-10",
  286. "-2",
  287. "2",
  288. "10",
  289. "11",
  290. }
  291. );
  292. test_schema(
  293. "min -123 max 42",
  294. R"""({
  295. "type": "integer",
  296. "minimum": -123,
  297. "maximum": 42
  298. })""",
  299. // Passing strings
  300. {
  301. "-123",
  302. "-122",
  303. "-13",
  304. "-11",
  305. "-2",
  306. "-1",
  307. "0",
  308. "1",
  309. "5",
  310. "10",
  311. "39",
  312. "40",
  313. "42",
  314. },
  315. // Failing strings
  316. {
  317. "-0123",
  318. "-124",
  319. "-1123",
  320. "-200",
  321. "43",
  322. "123",
  323. "0123",
  324. }
  325. );
  326. test_schema(
  327. "exclusive min / max",
  328. // Schema
  329. R"""({
  330. "type": "integer",
  331. "exclusiveMinimum": 0,
  332. "exclusiveMaximum": 10000
  333. })""",
  334. // Passing strings
  335. {
  336. "1",
  337. "9999",
  338. },
  339. // Failing strings
  340. {
  341. "0",
  342. "01",
  343. "10000",
  344. "99999",
  345. }
  346. );
  347. // Test case for a simple grammar
  348. test_grammar(
  349. "simple grammar",
  350. R"""(
  351. root ::= expr
  352. expr ::= term ("+" term)*
  353. term ::= number
  354. number ::= [0-9]+)""",
  355. // Passing strings
  356. {
  357. "42",
  358. "1+2+3+4+5",
  359. "123+456",
  360. },
  361. // Failing strings
  362. {
  363. "+",
  364. "/ 3",
  365. "1+2+3+4+5+",
  366. "12a45",
  367. }
  368. );
  369. }
  370. static void test_complex_grammar() {
  371. // Test case for a more complex grammar, with both failure strings and success strings
  372. test_grammar(
  373. "medium complexity grammar",
  374. // Grammar
  375. R"""(
  376. root ::= expression
  377. expression ::= term ws (("+"|"-") ws term)*
  378. term ::= factor ws (("*"|"/") ws factor)*
  379. factor ::= number | variable | "(" expression ")" | function-call
  380. number ::= [0-9]+
  381. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  382. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  383. ws ::= [ \t\n\r]?)""",
  384. // Passing strings
  385. {
  386. "42",
  387. "1*2*3*4*5",
  388. "x",
  389. "x+10",
  390. "x1+y2",
  391. "(a+b)*(c-d)",
  392. "func()",
  393. "func(x,y+2)",
  394. "a*(b+c)-d/e",
  395. "f(g(x),h(y,z))",
  396. "x + 10",
  397. "x1 + y2",
  398. "(a + b) * (c - d)",
  399. "func()",
  400. "func(x, y + 2)",
  401. "a * (b + c) - d / e",
  402. "f(g(x), h(y, z))",
  403. "123+456",
  404. "123*456*789-123/456+789*123",
  405. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  406. },
  407. // Failing strings
  408. {
  409. "+",
  410. "/ 3x",
  411. "x + + y",
  412. "a * / b",
  413. "func(,)",
  414. "func(x y)",
  415. "(a + b",
  416. "x + y)",
  417. "a + b * (c - d",
  418. "42 +",
  419. "x +",
  420. "x + 10 +",
  421. "(a + b) * (c - d",
  422. "func(",
  423. "func(x, y + 2",
  424. "a * (b + c) - d /",
  425. "f(g(x), h(y, z)",
  426. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  427. }
  428. );
  429. }
  430. static void test_special_chars() {
  431. // A collection of tests to exercise special characters such as "."
  432. test_grammar(
  433. "special characters",
  434. // Grammar
  435. R"""(
  436. root ::= ... "abc" ...
  437. )""",
  438. // Passing strings
  439. {
  440. "abcabcabc",
  441. "aaaabcccc",
  442. // NOTE: Also ensures that multi-byte characters still count as a single character
  443. "🔵🟠✅abc❌🟠🔵"
  444. },
  445. // Failing strings
  446. {
  447. "aaabcccc",
  448. "aaaaabcccc",
  449. "aaaabccc",
  450. "aaaabccccc",
  451. "🔵🟠✅❌abc❌✅🟠🔵",
  452. "🔵🟠abc🟠🔵"
  453. }
  454. );
  455. }
  456. static void test_quantifiers() {
  457. // A collection of tests to exercise * + and ? quantifiers
  458. test_grammar(
  459. "* quantifier",
  460. // Grammar
  461. R"""(root ::= "a"*)""",
  462. // Passing strings
  463. {
  464. "",
  465. "a",
  466. "aaaaa",
  467. "aaaaaaaaaaaaaaaaaa",
  468. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  469. },
  470. // Failing strings
  471. {
  472. "b",
  473. "ab",
  474. "aab",
  475. "ba",
  476. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  477. }
  478. );
  479. test_grammar(
  480. "+ quantifier",
  481. // Grammar
  482. R"""(root ::= "a"+)""",
  483. // Passing strings
  484. {
  485. "a",
  486. "aaaaa",
  487. "aaaaaaaaaaaaaaaaaa",
  488. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  489. },
  490. // Failing strings
  491. {
  492. "",
  493. "b",
  494. "ab",
  495. "aab",
  496. "ba",
  497. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  498. }
  499. );
  500. test_grammar(
  501. "? quantifier",
  502. // Grammar
  503. R"""(root ::= "a"?)""",
  504. // Passing strings
  505. {
  506. "",
  507. "a"
  508. },
  509. // Failing strings
  510. {
  511. "b",
  512. "ab",
  513. "aa",
  514. "ba",
  515. }
  516. );
  517. test_grammar(
  518. "mixed quantifiers",
  519. // Grammar
  520. R"""(
  521. root ::= cons+ vowel* cons? (vowel cons)*
  522. vowel ::= [aeiouy]
  523. cons ::= [bcdfghjklmnpqrstvwxyz]
  524. )""",
  525. // Passing strings
  526. {
  527. "yes",
  528. "no",
  529. "noyes",
  530. "crwth",
  531. "four",
  532. "bryyyy",
  533. },
  534. // Failing strings
  535. {
  536. "yess",
  537. "yesno",
  538. "forty",
  539. "catyyy",
  540. }
  541. );
  542. test_grammar(
  543. "simple exact repetition",
  544. // Grammar
  545. R"""(
  546. root ::= [ab]{4}
  547. )""",
  548. // Passing strings
  549. {
  550. "aaaa",
  551. "bbbb",
  552. "abab",
  553. },
  554. // Failing strings
  555. {
  556. "a",
  557. "b",
  558. "aaaaa",
  559. }
  560. );
  561. test_grammar(
  562. "simple min repetition",
  563. // Grammar
  564. R"""(
  565. root ::= [ab]{4,}
  566. )""",
  567. // Passing strings
  568. {
  569. "aaaa",
  570. "aaaaab",
  571. "bbbb",
  572. "ababab",
  573. },
  574. // Failing strings
  575. {
  576. "",
  577. "aba",
  578. }
  579. );
  580. test_grammar(
  581. "simple max repetition",
  582. // Grammar
  583. R"""(
  584. root ::= [ab]{0,4}
  585. )""",
  586. // Passing strings
  587. {
  588. "",
  589. "a",
  590. "aa",
  591. "aaa",
  592. "aaab",
  593. },
  594. // Failing strings
  595. {
  596. "aaaaa",
  597. }
  598. );
  599. test_grammar(
  600. "min / max repetition",
  601. // Grammar
  602. R"""(
  603. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  604. )""",
  605. // Passing strings
  606. {
  607. "0xFF 0x12 0xAB",
  608. "0xFF 0x12 0xAB 0x00 0x00",
  609. },
  610. // Failing strings
  611. {
  612. "",
  613. "0xFF",
  614. "0xFF 0x12",
  615. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  616. }
  617. );
  618. }
  619. static void test_failure_missing_root() {
  620. fprintf(stderr, "⚫ Testing missing root node:\n");
  621. // Test case for a grammar that is missing a root rule
  622. const std::string grammar_str = R"""(
  623. rot ::= expr
  624. expr ::= term ("+" term)*
  625. term ::= number
  626. number ::= [0-9]+)""";
  627. llama_grammar_parser parsed_grammar;
  628. parsed_grammar.parse(grammar_str.c_str());
  629. // Ensure we parsed correctly
  630. assert(!parsed_grammar.rules.empty());
  631. // Ensure we do NOT have a root node
  632. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  633. fprintf(stderr, " ✅︎ Passed\n");
  634. }
  635. static void test_failure_missing_reference() {
  636. fprintf(stderr, "⚫ Testing missing reference node:\n");
  637. // Test case for a grammar that is missing a referenced rule
  638. const std::string grammar_str =
  639. R"""(root ::= expr
  640. expr ::= term ("+" term)*
  641. term ::= numero
  642. number ::= [0-9]+)""";
  643. fprintf(stderr, " Expected error: ");
  644. llama_grammar_parser parsed_grammar;
  645. parsed_grammar.parse(grammar_str.c_str());
  646. // Ensure we did NOT parsed correctly
  647. assert(parsed_grammar.rules.empty());
  648. fprintf(stderr, " End of expected error.\n");
  649. fprintf(stderr, " ✅︎ Passed\n");
  650. }
  651. static void test_failure_left_recursion() {
  652. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  653. // Test simple left recursion detection
  654. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  655. assert(test_build_grammar_fails(simple_str));
  656. // Test more complicated left recursion detection
  657. const std::string medium_str = R"""(
  658. root ::= asdf
  659. asdf ::= "a" | asdf "a"
  660. )""";
  661. assert(test_build_grammar_fails(medium_str));
  662. // Test even more complicated left recursion detection
  663. const std::string hard_str = R"""(
  664. root ::= asdf
  665. asdf ::= "a" | foo "b"
  666. foo ::= "c" | asdf "d" | "e")""";
  667. assert(test_build_grammar_fails(hard_str));
  668. // Test yet even more complicated left recursion detection
  669. const std::string hardest_str = R"""(
  670. root ::= asdf
  671. asdf ::= "a" | foo "b"
  672. foo ::= "c" | empty asdf "d" | "e"
  673. empty ::= "blah" | )""";
  674. assert(test_build_grammar_fails(hardest_str));
  675. fprintf(stderr, " ✅︎ Passed\n");
  676. }
  677. static void test_json_schema() {
  678. // Note that this is similar to the regular grammar tests,
  679. // but we convert each json schema to a grammar before parsing.
  680. // Otherwise, this test structure is the same.
  681. test_schema(
  682. "empty schema (object)",
  683. // Schema
  684. R"""(
  685. {}
  686. )""",
  687. // Passing strings
  688. {
  689. R"""({})""",
  690. R"""({"foo": "bar"})""",
  691. },
  692. // Failing strings
  693. {
  694. "",
  695. "[]",
  696. "null",
  697. R"""("")""",
  698. "true",
  699. }
  700. );
  701. test_schema(
  702. "exotic formats (list)",
  703. // Schema
  704. R"""({
  705. "items": [
  706. { "format": "date" },
  707. { "format": "uuid" },
  708. { "format": "time" },
  709. { "format": "date-time" }
  710. ]
  711. })""",
  712. // Passing strings
  713. {
  714. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  715. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  716. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  717. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  718. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  719. },
  720. // Failing strings
  721. {
  722. R"""(["foo", "bar"])""",
  723. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  724. }
  725. );
  726. test_schema(
  727. "string",
  728. // Schema
  729. R"""({
  730. "type": "string"
  731. })""",
  732. // Passing strings
  733. {
  734. R"""("foo")""",
  735. R"""("bar")""",
  736. R"""("")""",
  737. },
  738. // Failing strings
  739. {
  740. R"""({})""",
  741. R"""("foo": "bar")""",
  742. }
  743. );
  744. test_schema(
  745. "string w/ min length 1",
  746. // Schema
  747. R"""({
  748. "type": "string",
  749. "minLength": 1
  750. })""",
  751. // Passing strings
  752. {
  753. R"""("foo")""",
  754. R"""("bar")""",
  755. },
  756. // Failing strings
  757. {
  758. R"""("")""",
  759. R"""({})""",
  760. R"""("foo": "bar")""",
  761. }
  762. );
  763. test_schema(
  764. "string w/ min length 3",
  765. // Schema
  766. R"""({
  767. "type": "string",
  768. "minLength": 3
  769. })""",
  770. // Passing strings
  771. {
  772. R"""("foo")""",
  773. R"""("bar")""",
  774. R"""("foobar")""",
  775. },
  776. // Failing strings
  777. {
  778. R"""("")""",
  779. R"""("f")""",
  780. R"""("fo")""",
  781. }
  782. );
  783. test_schema(
  784. "string w/ max length",
  785. // Schema
  786. R"""({
  787. "type": "string",
  788. "maxLength": 3
  789. })""",
  790. // Passing strings
  791. {
  792. R"""("foo")""",
  793. R"""("bar")""",
  794. R"""("")""",
  795. R"""("f")""",
  796. R"""("fo")""",
  797. },
  798. // Failing strings
  799. {
  800. R"""("foobar")""",
  801. }
  802. );
  803. test_schema(
  804. "string w/ min & max length",
  805. // Schema
  806. R"""({
  807. "type": "string",
  808. "minLength": 1,
  809. "maxLength": 4
  810. })""",
  811. // Passing strings
  812. {
  813. R"""("foo")""",
  814. R"""("bar")""",
  815. R"""("f")""",
  816. R"""("barf")""",
  817. },
  818. // Failing strings
  819. {
  820. R"""("")""",
  821. R"""("barfo")""",
  822. R"""("foobar")""",
  823. }
  824. );
  825. test_schema(
  826. "boolean",
  827. // Schema
  828. R"""({
  829. "type": "boolean"
  830. })""",
  831. // Passing strings
  832. {
  833. "true",
  834. "false",
  835. },
  836. // Failing strings
  837. {
  838. R"""("")""",
  839. R"""("true")""",
  840. R"""(True)""",
  841. R"""(FALSE)""",
  842. }
  843. );
  844. test_schema(
  845. "integer",
  846. // Schema
  847. R"""({
  848. "type": "integer"
  849. })""",
  850. // Passing strings
  851. {
  852. R"""(0)""",
  853. R"""(12345)""",
  854. R"""(1234567890123456)""",
  855. },
  856. // Failing strings
  857. {
  858. R"""()""",
  859. R"""(01)""",
  860. R"""(007)""",
  861. R"""(12345678901234567 )""",
  862. }
  863. );
  864. test_schema(
  865. "string const",
  866. // Schema
  867. R"""({
  868. "const": "foo"
  869. })""",
  870. // Passing strings
  871. {
  872. R"""("foo")""",
  873. },
  874. // Failing strings
  875. {
  876. R"""(foo)""",
  877. R"""("bar")""",
  878. }
  879. );
  880. test_schema(
  881. "non-string const",
  882. // Schema
  883. R"""({
  884. "const": true
  885. })""",
  886. // Passing strings
  887. {
  888. R"""(true)""",
  889. },
  890. // Failing strings
  891. {
  892. R"""()""",
  893. R"""(foo)""",
  894. R"""("true")""",
  895. }
  896. );
  897. test_schema(
  898. "non-string const",
  899. // Schema
  900. R"""({
  901. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  902. })""",
  903. // Passing strings
  904. {
  905. R"""("red")""",
  906. R"""(null)""",
  907. R"""(42)""",
  908. R"""(["foo"])""",
  909. },
  910. // Failing strings
  911. {
  912. R"""()""",
  913. R"""(420)""",
  914. R"""(true)""",
  915. R"""(foo)""",
  916. }
  917. );
  918. test_schema(
  919. "simple pattern",
  920. // Schema
  921. R"""({
  922. "pattern": "^[a-zA-Z0-9_-]*$"
  923. })""",
  924. // Passing strings
  925. {
  926. R"""("")""",
  927. R"""("He_llo-12")""",
  928. },
  929. // Failing strings
  930. {
  931. R"""("!")""",
  932. R"""("Hello World")""",
  933. }
  934. );
  935. test_schema(
  936. "pattern with escapes",
  937. // Schema
  938. R"""({
  939. "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
  940. })""",
  941. // Passing strings
  942. {
  943. R"""("a^$.[]()|{}*+?b")""",
  944. },
  945. // Failing strings
  946. {
  947. R"""("ab")""",
  948. }
  949. );
  950. test_schema(
  951. "",
  952. // Schema
  953. R"""(
  954. {
  955. "type": ["array", "null"],
  956. "items": { "type": "string" }
  957. }
  958. )""",
  959. // Passing strings
  960. {
  961. "null",
  962. "[]",
  963. "[\"123\"]",
  964. "[\"foo\", \"bar\"]",
  965. },
  966. // Failing strings
  967. {
  968. "",
  969. "[123]",
  970. "\"foo\"",
  971. "[\"foo\", 42]",
  972. }
  973. );
  974. test_schema(
  975. "min+max items",
  976. // Schema
  977. R"""({
  978. "items": {
  979. "type": ["number", "integer"]
  980. },
  981. "minItems": 3,
  982. "maxItems": 5
  983. })""",
  984. // Passing strings
  985. {
  986. R"""([1, 2, 3])""",
  987. R"""([1, 2, 3, 4])""",
  988. R"""([1, 2, 3, 4, 5])""",
  989. },
  990. // Failing strings
  991. {
  992. R"""([1, 2])""",
  993. R"""([1, 2, 3, 4, 5, 6])""",
  994. R"""(1)""",
  995. }
  996. );
  997. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  998. test_schema(
  999. "object properties",
  1000. // Schema
  1001. R"""({
  1002. "type": "object",
  1003. "properties": {
  1004. "number": { "type": "number" },
  1005. "street_name": { "type": "string" },
  1006. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1007. }
  1008. })""",
  1009. // Passing strings
  1010. {
  1011. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1012. // "By default, leaving out properties is valid"
  1013. R"""({ "street_name": "Pennsylvania" })""",
  1014. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1015. // "By extension, even an empty object is valid"
  1016. R"""({})""",
  1017. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1018. },
  1019. // Failing strings
  1020. {
  1021. // Change datatype from number to string
  1022. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1023. // Reorder properties
  1024. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  1025. // Reorder properties
  1026. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1027. // "Additional properties default to false for generation, even though the spec says true.
  1028. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1029. }
  1030. );
  1031. test_schema(
  1032. "additional properties can't override other properties",
  1033. R"""({
  1034. "properties": {
  1035. "a": {"type": "integer"},
  1036. "b": {"type": "integer"}
  1037. },
  1038. "additionalProperties": true
  1039. })""",
  1040. // Passing strings
  1041. {
  1042. R"""({"a": 42})""",
  1043. R"""({"c": ""})""",
  1044. R"""({"a": 42, "c": ""})""",
  1045. R"""({"a_": ""})""",
  1046. },
  1047. // Failing strings
  1048. {
  1049. R"""()""",
  1050. R"""({"a": ""})""",
  1051. R"""({"a": "", "b": ""})""",
  1052. }
  1053. );
  1054. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1055. test_schema(
  1056. "object properties, additionalProperties: true",
  1057. // Schema
  1058. R"""({
  1059. "type": "object",
  1060. "properties": {
  1061. "number": { "type": "number" },
  1062. "street_name": { "type": "string" },
  1063. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1064. },
  1065. "additionalProperties": true
  1066. })""",
  1067. // Passing strings
  1068. {
  1069. // "By extension, even an empty object is valid"
  1070. R"""({})""",
  1071. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1072. // "By default, leaving out properties is valid"
  1073. R"""({ "street_name": "Pennsylvania" })""",
  1074. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1075. // "By default, providing additional properties is valid"
  1076. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1077. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1078. },
  1079. // Failing strings
  1080. {
  1081. // Change datatype from number to string
  1082. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1083. // Reorder properties
  1084. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1085. }
  1086. );
  1087. // Additional properties: false
  1088. test_schema(
  1089. "required + optional props each in original order",
  1090. // Schema
  1091. R"""({
  1092. "type": "object",
  1093. "properties": {
  1094. "number": { "type": "number" },
  1095. "street_name": { "type": "string" },
  1096. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1097. },
  1098. "additionalProperties": false
  1099. })""",
  1100. // Passing strings
  1101. {
  1102. R"""({ "street_name": "Pennsylvania" })""",
  1103. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1104. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1105. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1106. // Spaces are permitted around enum values
  1107. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1108. },
  1109. // Failing strings
  1110. {
  1111. // Reorder properties
  1112. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1113. // Add "direction"
  1114. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1115. }
  1116. );
  1117. test_schema(
  1118. "required + optional props each in original order",
  1119. // Schema
  1120. R"""({
  1121. "properties": {
  1122. "b": {"type": "string"},
  1123. "a": {"type": "string"},
  1124. "d": {"type": "string"},
  1125. "c": {"type": "string"}
  1126. },
  1127. "required": ["a", "b"],
  1128. "additionalProperties": false
  1129. })""",
  1130. // Passing strings
  1131. {
  1132. R"""({"b": "foo", "a": "bar"})""",
  1133. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1134. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1135. },
  1136. // Failing strings
  1137. {
  1138. R"""({"a": "foo", "b": "bar"})""",
  1139. R"""({"b": "bar"})""",
  1140. R"""({"a": "foo", "c": "baz"})""",
  1141. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1142. }
  1143. );
  1144. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1145. test_schema(
  1146. "required props",
  1147. // Schema
  1148. R"""({
  1149. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1150. "$id": "https://example.com/product.schema.json",
  1151. "title": "Product",
  1152. "description": "A product from Acme's catalog",
  1153. "type": "object",
  1154. "properties": {
  1155. "productId": {
  1156. "description": "The unique identifier for a product",
  1157. "type": "integer"
  1158. },
  1159. "productName": {
  1160. "description": "Name of the product",
  1161. "type": "string"
  1162. },
  1163. "price": {
  1164. "description": "The price of the product",
  1165. "type": "number",
  1166. "exclusiveMinimum": 0
  1167. },
  1168. "tags": {
  1169. "description": "Tags for the product",
  1170. "type": "array",
  1171. "items": {
  1172. "type": "string"
  1173. },
  1174. "minItems": 1,
  1175. "uniqueItems": true
  1176. },
  1177. "dimensions": {
  1178. "type": "object",
  1179. "properties": {
  1180. "length": {
  1181. "type": "number"
  1182. },
  1183. "width": {
  1184. "type": "number"
  1185. },
  1186. "height": {
  1187. "type": "number"
  1188. }
  1189. },
  1190. "required": [ "length", "width", "height" ]
  1191. }
  1192. },
  1193. "required": [ "productId", "productName", "price" ]
  1194. })""",
  1195. // Passing strings
  1196. {
  1197. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1198. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1199. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1200. },
  1201. // Failing strings
  1202. {
  1203. R"""({})""", // Missing all required properties
  1204. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1205. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1206. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1207. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1208. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1209. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1210. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1211. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1212. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1213. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1214. }
  1215. );
  1216. }
  1217. int main() {
  1218. fprintf(stdout, "Running grammar integration tests...\n");
  1219. test_simple_grammar();
  1220. test_complex_grammar();
  1221. test_special_chars();
  1222. test_quantifiers();
  1223. test_failure_missing_root();
  1224. test_failure_missing_reference();
  1225. test_failure_left_recursion();
  1226. test_json_schema();
  1227. fprintf(stdout, "All tests passed.\n");
  1228. return 0;
  1229. }