1
0

test-grammar-integration.cpp 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #include "json-schema-to-grammar.h"
  5. #include "../src/unicode.h"
  6. #include "../src/llama-grammar.h"
  7. #include <nlohmann/json.hpp>
  8. #include <cassert>
  9. #include <string>
  10. #include <vector>
  11. using json = nlohmann::ordered_json;
  12. static llama_grammar * build_grammar(const std::string & grammar_str) {
  13. return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0);
  14. }
  15. static bool test_build_grammar_fails(const std::string & grammar_str) {
  16. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  17. bool grammar_fails = false;
  18. llama_grammar * grammar = build_grammar(grammar_str);
  19. if (grammar != nullptr) {
  20. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  21. } else {
  22. grammar_fails = true;
  23. fprintf(stdout, " ✅︎\n");
  24. }
  25. return grammar_fails;
  26. }
  27. static bool match_string(const std::string & input, llama_grammar * grammar) {
  28. const auto cpts = unicode_cpts_from_utf8(input);
  29. auto & stacks_cur = llama_grammar_get_stacks(grammar);
  30. for (const auto & cpt : cpts) {
  31. llama_grammar_accept(grammar, cpt);
  32. if (stacks_cur.empty()) {
  33. // no stacks means that the grammar failed to match at this point
  34. return false;
  35. }
  36. }
  37. for (const auto & stack : stacks_cur) {
  38. if (stack.empty()) {
  39. // An empty stack means that the grammar has been completed
  40. return true;
  41. }
  42. }
  43. return false;
  44. }
  45. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  46. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  47. fflush(stderr);
  48. auto * grammar = build_grammar(grammar_str);
  49. // Save the original grammar stacks so that we can reset after every new string we want to test
  50. const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar); // copy
  51. llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
  52. fprintf(stderr, " 🔵 Valid strings:\n");
  53. // Passing strings
  54. for (const auto & test_string : passing_strings) {
  55. fprintf(stderr, " \"%s\" ", test_string.c_str());
  56. fflush(stderr);
  57. bool matched = match_string(test_string, grammar);
  58. if (!matched) {
  59. fprintf(stderr, "❌ (failed to match)\n");
  60. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  61. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  62. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  63. if (grammar_file) {
  64. fprintf(grammar_file, "%s", grammar_str.c_str());
  65. fclose(grammar_file);
  66. }
  67. // DEBUG: Write the test string to test-grammar-integration.string.txt
  68. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  69. if (string_file) {
  70. fprintf(string_file, "%s", test_string.c_str());
  71. fclose(string_file);
  72. }
  73. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  74. } else {
  75. fprintf(stdout, "✅︎\n");
  76. }
  77. assert(matched);
  78. // Reset the grammar stacks
  79. stacks_cur = stacks_org;
  80. }
  81. fprintf(stderr, " 🟠 Invalid strings:\n");
  82. // Failing strings
  83. for (const auto & test_string : failing_strings) {
  84. fprintf(stderr, " \"%s\" ", test_string.c_str());
  85. fflush(stderr);
  86. bool matched = match_string(test_string, grammar);
  87. if (matched) {
  88. fprintf(stderr, "❌ (incorrectly matched)\n");
  89. } else {
  90. fprintf(stdout, "✅︎\n");
  91. }
  92. assert(!matched);
  93. // Reset the grammar stacks
  94. stacks_cur = stacks_org;
  95. }
  96. // Clean up allocated memory
  97. llama_grammar_free_impl(grammar);
  98. }
  99. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  100. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  101. }
  102. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  103. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str), true), passing_strings, failing_strings);
  104. }
  105. static void test_simple_grammar() {
  106. test_schema(
  107. "min 0",
  108. R"""({
  109. "type": "integer",
  110. "minimum": 0
  111. })""",
  112. // Passing strings
  113. {
  114. "0",
  115. "10",
  116. "12",
  117. "10000",
  118. },
  119. // Failing strings
  120. {
  121. "-1",
  122. "-10",
  123. "-10000",
  124. "-100000000000000000000000000000000",
  125. "100000000000000000000000000000000",
  126. "00",
  127. "01",
  128. "-0",
  129. }
  130. );
  131. test_schema(
  132. "min 2",
  133. // Schema
  134. R"""({
  135. "type": "integer",
  136. "minimum": 2
  137. })""",
  138. // Passing strings
  139. {
  140. "2",
  141. "3",
  142. "4",
  143. "10",
  144. "20",
  145. "1234567890000000",
  146. },
  147. // Failing strings
  148. {
  149. "0",
  150. "1",
  151. "-1",
  152. "-100",
  153. "0",
  154. "1",
  155. "01",
  156. "02",
  157. "12345678900000000",
  158. }
  159. );
  160. test_schema(
  161. "min 456",
  162. R"""({
  163. "type": "integer",
  164. "minimum": 456
  165. })""",
  166. // Passing strings
  167. {
  168. "456",
  169. "4560",
  170. "457",
  171. "460",
  172. "500",
  173. },
  174. // Failing strings
  175. {
  176. "455",
  177. "356",
  178. "50",
  179. "050",
  180. "-1",
  181. "-456",
  182. }
  183. );
  184. test_schema(
  185. "min -123",
  186. R"""({
  187. "type": "integer",
  188. "minimum": -123
  189. })""",
  190. // Passing strings
  191. {
  192. "-123",
  193. "-122",
  194. "-11",
  195. "-1",
  196. "0",
  197. "1",
  198. "123",
  199. "1234",
  200. "2345",
  201. },
  202. // Failing strings
  203. {
  204. "-1234",
  205. "-124",
  206. }
  207. );
  208. test_schema(
  209. "max 9999",
  210. // Schema
  211. R"""({
  212. "type": "integer",
  213. "maximum": 9999
  214. })""",
  215. // Passing strings
  216. {
  217. "-99999",
  218. "0",
  219. "9999",
  220. },
  221. // Failing strings
  222. {
  223. "10000",
  224. "99991",
  225. }
  226. );
  227. test_schema(
  228. "max -9999",
  229. // Schema
  230. R"""({
  231. "type": "integer",
  232. "maximum": -9999
  233. })""",
  234. // Passing strings
  235. {
  236. "-10000",
  237. "-9999",
  238. },
  239. // Failing strings
  240. {
  241. "-9998",
  242. "0",
  243. "9999",
  244. }
  245. );
  246. test_schema(
  247. "min 5 max 30",
  248. // Schema
  249. R"""({
  250. "type": "integer",
  251. "minimum": 5,
  252. "maximum": 30
  253. })""",
  254. // Passing strings
  255. {
  256. "5",
  257. "10",
  258. "30",
  259. },
  260. // Failing strings
  261. {
  262. "05",
  263. "4",
  264. "-1",
  265. "31",
  266. "123",
  267. "0123",
  268. }
  269. );
  270. test_schema(
  271. "min 1 max 900719925474091",
  272. // Schema
  273. R"""({
  274. "type": "integer",
  275. "exclusiveMinimum": 0,
  276. "maximum": 900719925474091
  277. })""",
  278. // Passing strings
  279. {
  280. "1",
  281. "2",
  282. "10",
  283. "900719925474090",
  284. "900719925474091",
  285. },
  286. // Failing strings
  287. {
  288. "0",
  289. "01",
  290. "900719925474092",
  291. "9007199254740910",
  292. }
  293. );
  294. test_schema(
  295. "min -1 max 1",
  296. R"""({
  297. "type": "integer",
  298. "minimum": -1,
  299. "maximum": 1
  300. })""",
  301. // Passing strings
  302. {
  303. "-1",
  304. "0",
  305. "1",
  306. },
  307. // Failing strings
  308. {
  309. "-11",
  310. "-10",
  311. "-2",
  312. "2",
  313. "10",
  314. "11",
  315. }
  316. );
  317. test_schema(
  318. "min -123 max 42",
  319. R"""({
  320. "type": "integer",
  321. "minimum": -123,
  322. "maximum": 42
  323. })""",
  324. // Passing strings
  325. {
  326. "-123",
  327. "-122",
  328. "-13",
  329. "-11",
  330. "-2",
  331. "-1",
  332. "0",
  333. "1",
  334. "5",
  335. "10",
  336. "39",
  337. "40",
  338. "42",
  339. },
  340. // Failing strings
  341. {
  342. "-0123",
  343. "-124",
  344. "-1123",
  345. "-200",
  346. "43",
  347. "123",
  348. "0123",
  349. }
  350. );
  351. test_schema(
  352. "exclusive min / max",
  353. // Schema
  354. R"""({
  355. "type": "integer",
  356. "exclusiveMinimum": 0,
  357. "exclusiveMaximum": 10000
  358. })""",
  359. // Passing strings
  360. {
  361. "1",
  362. "9999",
  363. },
  364. // Failing strings
  365. {
  366. "0",
  367. "01",
  368. "10000",
  369. "99999",
  370. }
  371. );
  372. // Test case for a simple grammar
  373. test_grammar(
  374. "simple grammar",
  375. R"""(
  376. root ::= expr
  377. expr ::= term ("+" term)*
  378. term ::= number
  379. number ::= [0-9]+)""",
  380. // Passing strings
  381. {
  382. "42",
  383. "1+2+3+4+5",
  384. "123+456",
  385. },
  386. // Failing strings
  387. {
  388. "+",
  389. "/ 3",
  390. "1+2+3+4+5+",
  391. "12a45",
  392. }
  393. );
  394. }
  395. static void test_complex_grammar() {
  396. // Test case for a more complex grammar, with both failure strings and success strings
  397. test_grammar(
  398. "medium complexity grammar",
  399. // Grammar
  400. R"""(
  401. root ::= expression
  402. expression ::= term ws (("+"|"-") ws term)*
  403. term ::= factor ws (("*"|"/") ws factor)*
  404. factor ::= number | variable | "(" expression ")" | function-call
  405. number ::= [0-9]+
  406. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  407. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  408. ws ::= [ \t\n\r]?)""",
  409. // Passing strings
  410. {
  411. "42",
  412. "1*2*3*4*5",
  413. "x",
  414. "x+10",
  415. "x1+y2",
  416. "(a+b)*(c-d)",
  417. "func()",
  418. "func(x,y+2)",
  419. "a*(b+c)-d/e",
  420. "f(g(x),h(y,z))",
  421. "x + 10",
  422. "x1 + y2",
  423. "(a + b) * (c - d)",
  424. "func()",
  425. "func(x, y + 2)",
  426. "a * (b + c) - d / e",
  427. "f(g(x), h(y, z))",
  428. "123+456",
  429. "123*456*789-123/456+789*123",
  430. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  431. },
  432. // Failing strings
  433. {
  434. "+",
  435. "/ 3x",
  436. "x + + y",
  437. "a * / b",
  438. "func(,)",
  439. "func(x y)",
  440. "(a + b",
  441. "x + y)",
  442. "a + b * (c - d",
  443. "42 +",
  444. "x +",
  445. "x + 10 +",
  446. "(a + b) * (c - d",
  447. "func(",
  448. "func(x, y + 2",
  449. "a * (b + c) - d /",
  450. "f(g(x), h(y, z)",
  451. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  452. }
  453. );
  454. }
  455. static void test_special_chars() {
  456. // A collection of tests to exercise special characters such as "."
  457. test_grammar(
  458. "special characters",
  459. // Grammar
  460. R"""(
  461. root ::= ... "abc" ...
  462. )""",
  463. // Passing strings
  464. {
  465. "abcabcabc",
  466. "aaaabcccc",
  467. // NOTE: Also ensures that multi-byte characters still count as a single character
  468. "🔵🟠✅abc❌🟠🔵"
  469. },
  470. // Failing strings
  471. {
  472. "aaabcccc",
  473. "aaaaabcccc",
  474. "aaaabccc",
  475. "aaaabccccc",
  476. "🔵🟠✅❌abc❌✅🟠🔵",
  477. "🔵🟠abc🟠🔵"
  478. }
  479. );
  480. }
  481. static void test_quantifiers() {
  482. // A collection of tests to exercise * + and ? quantifiers
  483. test_grammar(
  484. "* quantifier",
  485. // Grammar
  486. R"""(root ::= "a"*)""",
  487. // Passing strings
  488. {
  489. "",
  490. "a",
  491. "aaaaa",
  492. "aaaaaaaaaaaaaaaaaa",
  493. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  494. },
  495. // Failing strings
  496. {
  497. "b",
  498. "ab",
  499. "aab",
  500. "ba",
  501. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  502. }
  503. );
  504. test_grammar(
  505. "+ quantifier",
  506. // Grammar
  507. R"""(root ::= "a"+)""",
  508. // Passing strings
  509. {
  510. "a",
  511. "aaaaa",
  512. "aaaaaaaaaaaaaaaaaa",
  513. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  514. },
  515. // Failing strings
  516. {
  517. "",
  518. "b",
  519. "ab",
  520. "aab",
  521. "ba",
  522. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  523. }
  524. );
  525. test_grammar(
  526. "? quantifier",
  527. // Grammar
  528. R"""(root ::= "a"?)""",
  529. // Passing strings
  530. {
  531. "",
  532. "a"
  533. },
  534. // Failing strings
  535. {
  536. "b",
  537. "ab",
  538. "aa",
  539. "ba",
  540. }
  541. );
  542. test_grammar(
  543. "mixed quantifiers",
  544. // Grammar
  545. R"""(
  546. root ::= cons+ vowel* cons? (vowel cons)*
  547. vowel ::= [aeiouy]
  548. cons ::= [bcdfghjklmnpqrstvwxyz]
  549. )""",
  550. // Passing strings
  551. {
  552. "yes",
  553. "no",
  554. "noyes",
  555. "crwth",
  556. "four",
  557. "bryyyy",
  558. },
  559. // Failing strings
  560. {
  561. "yess",
  562. "yesno",
  563. "forty",
  564. "catyyy",
  565. }
  566. );
  567. test_grammar(
  568. "simple exact repetition",
  569. // Grammar
  570. R"""(
  571. root ::= [ab]{4}
  572. )""",
  573. // Passing strings
  574. {
  575. "aaaa",
  576. "bbbb",
  577. "abab",
  578. },
  579. // Failing strings
  580. {
  581. "a",
  582. "b",
  583. "aaaaa",
  584. }
  585. );
  586. test_grammar(
  587. "simple min repetition",
  588. // Grammar
  589. R"""(
  590. root ::= [ab]{4,}
  591. )""",
  592. // Passing strings
  593. {
  594. "aaaa",
  595. "aaaaab",
  596. "bbbb",
  597. "ababab",
  598. },
  599. // Failing strings
  600. {
  601. "",
  602. "aba",
  603. }
  604. );
  605. test_grammar(
  606. "simple max repetition",
  607. // Grammar
  608. R"""(
  609. root ::= [ab]{0,4}
  610. )""",
  611. // Passing strings
  612. {
  613. "",
  614. "a",
  615. "aa",
  616. "aaa",
  617. "aaab",
  618. },
  619. // Failing strings
  620. {
  621. "aaaaa",
  622. }
  623. );
  624. test_grammar(
  625. "min / max repetition",
  626. // Grammar
  627. R"""(
  628. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  629. )""",
  630. // Passing strings
  631. {
  632. "0xFF 0x12 0xAB",
  633. "0xFF 0x12 0xAB 0x00 0x00",
  634. },
  635. // Failing strings
  636. {
  637. "",
  638. "0xFF",
  639. "0xFF 0x12",
  640. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  641. }
  642. );
  643. }
  644. static void test_failure_missing_root() {
  645. fprintf(stderr, "⚫ Testing missing root node:\n");
  646. // Test case for a grammar that is missing a root rule
  647. const std::string grammar_str = R"""(
  648. rot ::= expr
  649. expr ::= term ("+" term)*
  650. term ::= number
  651. number ::= [0-9]+)""";
  652. llama_grammar_parser parsed_grammar;
  653. parsed_grammar.parse(grammar_str.c_str());
  654. // Ensure we parsed correctly
  655. assert(!parsed_grammar.rules.empty());
  656. // Ensure we do NOT have a root node
  657. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  658. fprintf(stderr, " ✅︎ Passed\n");
  659. }
  660. static void test_failure_missing_reference() {
  661. fprintf(stderr, "⚫ Testing missing reference node:\n");
  662. // Test case for a grammar that is missing a referenced rule
  663. const std::string grammar_str =
  664. R"""(root ::= expr
  665. expr ::= term ("+" term)*
  666. term ::= numero
  667. number ::= [0-9]+)""";
  668. fprintf(stderr, " Expected error: ");
  669. llama_grammar_parser parsed_grammar;
  670. parsed_grammar.parse(grammar_str.c_str());
  671. // Ensure we did NOT parsed correctly
  672. assert(parsed_grammar.rules.empty());
  673. fprintf(stderr, " End of expected error.\n");
  674. fprintf(stderr, " ✅︎ Passed\n");
  675. }
  676. static void test_failure_left_recursion() {
  677. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  678. // Test simple left recursion detection
  679. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  680. assert(test_build_grammar_fails(simple_str));
  681. // Test more complicated left recursion detection
  682. const std::string medium_str = R"""(
  683. root ::= asdf
  684. asdf ::= "a" | asdf "a"
  685. )""";
  686. assert(test_build_grammar_fails(medium_str));
  687. // Test even more complicated left recursion detection
  688. const std::string hard_str = R"""(
  689. root ::= asdf
  690. asdf ::= "a" | foo "b"
  691. foo ::= "c" | asdf "d" | "e")""";
  692. assert(test_build_grammar_fails(hard_str));
  693. // Test yet even more complicated left recursion detection
  694. const std::string hardest_str = R"""(
  695. root ::= asdf
  696. asdf ::= "a" | foo "b"
  697. foo ::= "c" | empty asdf "d" | "e"
  698. empty ::= "blah" | )""";
  699. assert(test_build_grammar_fails(hardest_str));
  700. fprintf(stderr, " ✅︎ Passed\n");
  701. }
  702. static void test_json_schema() {
  703. // Note that this is similar to the regular grammar tests,
  704. // but we convert each json schema to a grammar before parsing.
  705. // Otherwise, this test structure is the same.
  706. test_schema(
  707. "empty schema (object)",
  708. // Schema
  709. R"""(
  710. {}
  711. )""",
  712. // Passing strings
  713. {
  714. R"""({})""",
  715. R"""({"foo": "bar"})""",
  716. },
  717. // Failing strings
  718. {
  719. "",
  720. "[]",
  721. "null",
  722. R"""("")""",
  723. "true",
  724. }
  725. );
  726. test_schema(
  727. "exotic formats (list)",
  728. // Schema
  729. R"""({
  730. "items": [
  731. { "format": "date" },
  732. { "format": "uuid" },
  733. { "format": "time" },
  734. { "format": "date-time" }
  735. ]
  736. })""",
  737. // Passing strings
  738. {
  739. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  740. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  741. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  742. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  743. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  744. },
  745. // Failing strings
  746. {
  747. R"""(["foo", "bar"])""",
  748. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  749. }
  750. );
  751. test_schema(
  752. "string",
  753. // Schema
  754. R"""({
  755. "type": "string"
  756. })""",
  757. // Passing strings
  758. {
  759. R"""("foo")""",
  760. R"""("bar")""",
  761. R"""("")""",
  762. },
  763. // Failing strings
  764. {
  765. R"""({})""",
  766. R"""("foo": "bar")""",
  767. }
  768. );
  769. test_schema(
  770. "string w/ min length 1",
  771. // Schema
  772. R"""({
  773. "type": "string",
  774. "minLength": 1
  775. })""",
  776. // Passing strings
  777. {
  778. R"""("foo")""",
  779. R"""("bar")""",
  780. },
  781. // Failing strings
  782. {
  783. R"""("")""",
  784. R"""({})""",
  785. R"""("foo": "bar")""",
  786. }
  787. );
  788. test_schema(
  789. "string w/ min length 3",
  790. // Schema
  791. R"""({
  792. "type": "string",
  793. "minLength": 3
  794. })""",
  795. // Passing strings
  796. {
  797. R"""("foo")""",
  798. R"""("bar")""",
  799. R"""("foobar")""",
  800. },
  801. // Failing strings
  802. {
  803. R"""("")""",
  804. R"""("f")""",
  805. R"""("fo")""",
  806. }
  807. );
  808. test_schema(
  809. "string w/ max length",
  810. // Schema
  811. R"""({
  812. "type": "string",
  813. "maxLength": 3
  814. })""",
  815. // Passing strings
  816. {
  817. R"""("foo")""",
  818. R"""("bar")""",
  819. R"""("")""",
  820. R"""("f")""",
  821. R"""("fo")""",
  822. },
  823. // Failing strings
  824. {
  825. R"""("foobar")""",
  826. }
  827. );
  828. test_schema(
  829. "string w/ min & max length",
  830. // Schema
  831. R"""({
  832. "type": "string",
  833. "minLength": 1,
  834. "maxLength": 4
  835. })""",
  836. // Passing strings
  837. {
  838. R"""("foo")""",
  839. R"""("bar")""",
  840. R"""("f")""",
  841. R"""("barf")""",
  842. },
  843. // Failing strings
  844. {
  845. R"""("")""",
  846. R"""("barfo")""",
  847. R"""("foobar")""",
  848. }
  849. );
  850. test_schema(
  851. "boolean",
  852. // Schema
  853. R"""({
  854. "type": "boolean"
  855. })""",
  856. // Passing strings
  857. {
  858. "true",
  859. "false",
  860. },
  861. // Failing strings
  862. {
  863. R"""("")""",
  864. R"""("true")""",
  865. R"""(True)""",
  866. R"""(FALSE)""",
  867. }
  868. );
  869. test_schema(
  870. "integer",
  871. // Schema
  872. R"""({
  873. "type": "integer"
  874. })""",
  875. // Passing strings
  876. {
  877. R"""(0)""",
  878. R"""(12345)""",
  879. R"""(1234567890123456)""",
  880. },
  881. // Failing strings
  882. {
  883. R"""()""",
  884. R"""(01)""",
  885. R"""(007)""",
  886. R"""(12345678901234567 )""",
  887. }
  888. );
  889. test_schema(
  890. "string const",
  891. // Schema
  892. R"""({
  893. "const": "foo"
  894. })""",
  895. // Passing strings
  896. {
  897. R"""("foo")""",
  898. },
  899. // Failing strings
  900. {
  901. R"""(foo)""",
  902. R"""("bar")""",
  903. }
  904. );
  905. test_schema(
  906. "non-string const",
  907. // Schema
  908. R"""({
  909. "const": true
  910. })""",
  911. // Passing strings
  912. {
  913. R"""(true)""",
  914. },
  915. // Failing strings
  916. {
  917. R"""()""",
  918. R"""(foo)""",
  919. R"""("true")""",
  920. }
  921. );
  922. test_schema(
  923. "non-string const",
  924. // Schema
  925. R"""({
  926. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  927. })""",
  928. // Passing strings
  929. {
  930. R"""("red")""",
  931. R"""(null)""",
  932. R"""(42)""",
  933. R"""(["foo"])""",
  934. },
  935. // Failing strings
  936. {
  937. R"""()""",
  938. R"""(420)""",
  939. R"""(true)""",
  940. R"""(foo)""",
  941. }
  942. );
  943. test_schema(
  944. "simple pattern",
  945. // Schema
  946. R"""({
  947. "pattern": "^[a-zA-Z0-9_-]*$"
  948. })""",
  949. // Passing strings
  950. {
  951. R"""("")""",
  952. R"""("He_llo-12")""",
  953. },
  954. // Failing strings
  955. {
  956. R"""("!")""",
  957. R"""("Hello World")""",
  958. }
  959. );
  960. test_schema(
  961. "pattern with escapes",
  962. // Schema
  963. R"""({
  964. "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
  965. })""",
  966. // Passing strings
  967. {
  968. R"""("a^$.[]()|{}*+?b")""",
  969. },
  970. // Failing strings
  971. {
  972. R"""("ab")""",
  973. }
  974. );
  975. test_schema(
  976. "",
  977. // Schema
  978. R"""(
  979. {
  980. "type": ["array", "null"],
  981. "items": { "type": "string" }
  982. }
  983. )""",
  984. // Passing strings
  985. {
  986. "null",
  987. "[]",
  988. "[\"123\"]",
  989. "[\"foo\", \"bar\"]",
  990. },
  991. // Failing strings
  992. {
  993. "",
  994. "[123]",
  995. "\"foo\"",
  996. "[\"foo\", 42]",
  997. }
  998. );
  999. test_schema(
  1000. "min+max items",
  1001. // Schema
  1002. R"""({
  1003. "items": {
  1004. "type": ["number", "integer"]
  1005. },
  1006. "minItems": 3,
  1007. "maxItems": 5
  1008. })""",
  1009. // Passing strings
  1010. {
  1011. R"""([1, 2, 3])""",
  1012. R"""([1, 2, 3, 4])""",
  1013. R"""([1, 2, 3, 4, 5])""",
  1014. },
  1015. // Failing strings
  1016. {
  1017. R"""([1, 2])""",
  1018. R"""([1, 2, 3, 4, 5, 6])""",
  1019. R"""(1)""",
  1020. }
  1021. );
  1022. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1023. test_schema(
  1024. "object properties",
  1025. // Schema
  1026. R"""({
  1027. "type": "object",
  1028. "properties": {
  1029. "number": { "type": "number" },
  1030. "street_name": { "type": "string" },
  1031. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1032. }
  1033. })""",
  1034. // Passing strings
  1035. {
  1036. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1037. // "By default, leaving out properties is valid"
  1038. R"""({ "street_name": "Pennsylvania" })""",
  1039. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1040. // "By extension, even an empty object is valid"
  1041. R"""({})""",
  1042. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1043. },
  1044. // Failing strings
  1045. {
  1046. // Change datatype from number to string
  1047. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1048. // Reorder properties
  1049. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  1050. // Reorder properties
  1051. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1052. // "Additional properties default to false for generation, even though the spec says true.
  1053. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1054. }
  1055. );
  1056. test_schema(
  1057. "additional properties can't override other properties",
  1058. R"""({
  1059. "properties": {
  1060. "a": {"type": "integer"},
  1061. "b": {"type": "integer"}
  1062. },
  1063. "additionalProperties": true
  1064. })""",
  1065. // Passing strings
  1066. {
  1067. R"""({"a": 42})""",
  1068. R"""({"c": ""})""",
  1069. R"""({"a": 42, "c": ""})""",
  1070. R"""({"a_": ""})""",
  1071. },
  1072. // Failing strings
  1073. {
  1074. R"""()""",
  1075. R"""({"a": ""})""",
  1076. R"""({"a": "", "b": ""})""",
  1077. }
  1078. );
  1079. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1080. test_schema(
  1081. "object properties, additionalProperties: true",
  1082. // Schema
  1083. R"""({
  1084. "type": "object",
  1085. "properties": {
  1086. "number": { "type": "number" },
  1087. "street_name": { "type": "string" },
  1088. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1089. },
  1090. "additionalProperties": true
  1091. })""",
  1092. // Passing strings
  1093. {
  1094. // "By extension, even an empty object is valid"
  1095. R"""({})""",
  1096. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1097. // "By default, leaving out properties is valid"
  1098. R"""({ "street_name": "Pennsylvania" })""",
  1099. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1100. // "By default, providing additional properties is valid"
  1101. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1102. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1103. },
  1104. // Failing strings
  1105. {
  1106. // Change datatype from number to string
  1107. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1108. // Reorder properties
  1109. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1110. }
  1111. );
  1112. // Additional properties: false
  1113. test_schema(
  1114. "required + optional props each in original order",
  1115. // Schema
  1116. R"""({
  1117. "type": "object",
  1118. "properties": {
  1119. "number": { "type": "number" },
  1120. "street_name": { "type": "string" },
  1121. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1122. },
  1123. "additionalProperties": false
  1124. })""",
  1125. // Passing strings
  1126. {
  1127. R"""({ "street_name": "Pennsylvania" })""",
  1128. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1129. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1130. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1131. // Spaces are permitted around enum values
  1132. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1133. },
  1134. // Failing strings
  1135. {
  1136. // Reorder properties
  1137. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1138. // Add "direction"
  1139. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1140. }
  1141. );
  1142. test_schema(
  1143. "required + optional props each in original order",
  1144. // Schema
  1145. R"""({
  1146. "properties": {
  1147. "b": {"type": "string"},
  1148. "a": {"type": "string"},
  1149. "d": {"type": "string"},
  1150. "c": {"type": "string"}
  1151. },
  1152. "required": ["a", "b"],
  1153. "additionalProperties": false
  1154. })""",
  1155. // Passing strings
  1156. {
  1157. R"""({"b": "foo", "a": "bar"})""",
  1158. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1159. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1160. },
  1161. // Failing strings
  1162. {
  1163. R"""({"a": "foo", "b": "bar"})""",
  1164. R"""({"b": "bar"})""",
  1165. R"""({"a": "foo", "c": "baz"})""",
  1166. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1167. }
  1168. );
  1169. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1170. test_schema(
  1171. "required props",
  1172. // Schema
  1173. R"""({
  1174. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1175. "$id": "https://example.com/product.schema.json",
  1176. "title": "Product",
  1177. "description": "A product from Acme's catalog",
  1178. "type": "object",
  1179. "properties": {
  1180. "productId": {
  1181. "description": "The unique identifier for a product",
  1182. "type": "integer"
  1183. },
  1184. "productName": {
  1185. "description": "Name of the product",
  1186. "type": "string"
  1187. },
  1188. "price": {
  1189. "description": "The price of the product",
  1190. "type": "number",
  1191. "exclusiveMinimum": 0
  1192. },
  1193. "tags": {
  1194. "description": "Tags for the product",
  1195. "type": "array",
  1196. "items": {
  1197. "type": "string"
  1198. },
  1199. "minItems": 1,
  1200. "uniqueItems": true
  1201. },
  1202. "dimensions": {
  1203. "type": "object",
  1204. "properties": {
  1205. "length": {
  1206. "type": "number"
  1207. },
  1208. "width": {
  1209. "type": "number"
  1210. },
  1211. "height": {
  1212. "type": "number"
  1213. }
  1214. },
  1215. "required": [ "length", "width", "height" ]
  1216. }
  1217. },
  1218. "required": [ "productId", "productName", "price" ]
  1219. })""",
  1220. // Passing strings
  1221. {
  1222. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1223. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1224. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1225. },
  1226. // Failing strings
  1227. {
  1228. R"""({})""", // Missing all required properties
  1229. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1230. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1231. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1232. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1233. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1234. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1235. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1236. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1237. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1238. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1239. }
  1240. );
  1241. }
  1242. int main() {
  1243. fprintf(stdout, "Running grammar integration tests...\n");
  1244. test_simple_grammar();
  1245. test_complex_grammar();
  1246. test_special_chars();
  1247. test_quantifiers();
  1248. test_failure_missing_root();
  1249. test_failure_missing_reference();
  1250. test_failure_left_recursion();
  1251. test_json_schema();
  1252. fprintf(stdout, "All tests passed.\n");
  1253. return 0;
  1254. }