test-grammar-integration.cpp 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #define LLAMA_API_INTERNAL
  5. #include "ggml.h"
  6. #include "llama.h"
  7. #include "grammar-parser.h"
  8. #include "json-schema-to-grammar.h"
  9. #include "unicode.h"
  10. #include <cassert>
  11. #include <string>
  12. #include <vector>
  13. using json = nlohmann::ordered_json;
  14. static llama_grammar* build_grammar(const std::string & grammar_str) {
  15. auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  16. // Ensure we parsed correctly
  17. assert(!parsed_grammar.rules.empty());
  18. // Ensure we have a root node
  19. assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
  20. std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
  21. llama_grammar* grammar = llama_grammar_init(
  22. grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
  23. return grammar;
  24. }
  25. static bool test_build_grammar_fails(const std::string & grammar_str) {
  26. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  27. bool grammar_fails = false;
  28. llama_grammar * grammar = build_grammar(grammar_str);
  29. if (grammar != nullptr) {
  30. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  31. } else {
  32. grammar_fails = true;
  33. fprintf(stdout, " ✅︎\n");
  34. }
  35. return grammar_fails;
  36. }
  37. static bool match_string(const std::string & input, llama_grammar* grammar) {
  38. auto decoded = decode_utf8(input, {});
  39. const auto & code_points = decoded.first;
  40. for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
  41. auto prev_stacks = grammar->stacks;
  42. llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
  43. if (grammar->stacks.empty()) {
  44. // no stacks means that the grammar failed to match at this point
  45. return false;
  46. }
  47. }
  48. for (const auto & stack : grammar->stacks) {
  49. if (stack.empty()) {
  50. // An empty stack means that the grammar has been completed
  51. return true;
  52. }
  53. }
  54. return false;
  55. }
  56. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  57. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  58. fflush(stderr);
  59. auto grammar = build_grammar(grammar_str);
  60. // Save the original grammar stacks so that we can reset after every new string we want to test
  61. auto original_stacks = grammar->stacks;
  62. fprintf(stderr, " 🔵 Valid strings:\n");
  63. // Passing strings
  64. for (const auto & test_string : passing_strings) {
  65. fprintf(stderr, " \"%s\" ", test_string.c_str());
  66. fflush(stderr);
  67. bool matched = match_string(test_string, grammar);
  68. if (!matched) {
  69. fprintf(stderr, "❌ (failed to match)\n");
  70. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  71. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  72. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  73. if (grammar_file) {
  74. fprintf(grammar_file, "%s", grammar_str.c_str());
  75. fclose(grammar_file);
  76. }
  77. // DEBUG: Write the test string to test-grammar-integration.string.txt
  78. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  79. if (string_file) {
  80. fprintf(string_file, "%s", test_string.c_str());
  81. fclose(string_file);
  82. }
  83. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  84. } else {
  85. fprintf(stdout, "✅︎\n");
  86. }
  87. assert(matched);
  88. // Reset the grammar stacks
  89. grammar->stacks = original_stacks;
  90. }
  91. fprintf(stderr, " 🟠 Invalid strings:\n");
  92. // Failing strings
  93. for (const auto & test_string : failing_strings) {
  94. fprintf(stderr, " \"%s\" ", test_string.c_str());
  95. fflush(stderr);
  96. bool matched = match_string(test_string, grammar);
  97. if (matched) {
  98. fprintf(stderr, "❌ (incorrectly matched)\n");
  99. } else {
  100. fprintf(stdout, "✅︎\n");
  101. }
  102. assert(!matched);
  103. // Reset the grammar stacks
  104. grammar->stacks = original_stacks;
  105. }
  106. // Clean up allocated memory
  107. llama_grammar_free(grammar);
  108. }
  109. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  110. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  111. }
  112. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  113. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
  114. }
  115. static void test_simple_grammar() {
  116. test_schema(
  117. "min 0",
  118. R"""({
  119. "type": "integer",
  120. "minimum": 0
  121. })""",
  122. // Passing strings
  123. {
  124. "0",
  125. "10",
  126. "12",
  127. "10000",
  128. },
  129. // Failing strings
  130. {
  131. "-1",
  132. "-10",
  133. "-10000",
  134. "-100000000000000000000000000000000",
  135. "100000000000000000000000000000000",
  136. "00",
  137. "01",
  138. "-0",
  139. }
  140. );
  141. test_schema(
  142. "min 2",
  143. // Schema
  144. R"""({
  145. "type": "integer",
  146. "minimum": 2
  147. })""",
  148. // Passing strings
  149. {
  150. "2",
  151. "3",
  152. "4",
  153. "10",
  154. "20",
  155. "1234567890000000",
  156. },
  157. // Failing strings
  158. {
  159. "0",
  160. "1",
  161. "-1",
  162. "-100",
  163. "0",
  164. "1",
  165. "01",
  166. "02",
  167. "12345678900000000",
  168. }
  169. );
  170. test_schema(
  171. "min 456",
  172. R"""({
  173. "type": "integer",
  174. "minimum": 456
  175. })""",
  176. // Passing strings
  177. {
  178. "456",
  179. "4560",
  180. "457",
  181. "460",
  182. "500",
  183. },
  184. // Failing strings
  185. {
  186. "455",
  187. "356",
  188. "50",
  189. "050",
  190. "-1",
  191. "-456",
  192. }
  193. );
  194. test_schema(
  195. "min -123",
  196. R"""({
  197. "type": "integer",
  198. "minimum": -123
  199. })""",
  200. // Passing strings
  201. {
  202. "-123",
  203. "-122",
  204. "-11",
  205. "-1",
  206. "0",
  207. "1",
  208. "123",
  209. "1234",
  210. "2345",
  211. },
  212. // Failing strings
  213. {
  214. "-1234",
  215. "-124",
  216. }
  217. );
  218. test_schema(
  219. "max 9999",
  220. // Schema
  221. R"""({
  222. "type": "integer",
  223. "maximum": 9999
  224. })""",
  225. // Passing strings
  226. {
  227. "-99999",
  228. "0",
  229. "9999",
  230. },
  231. // Failing strings
  232. {
  233. "10000",
  234. "99991",
  235. }
  236. );
  237. test_schema(
  238. "max -9999",
  239. // Schema
  240. R"""({
  241. "type": "integer",
  242. "maximum": -9999
  243. })""",
  244. // Passing strings
  245. {
  246. "-10000",
  247. "-9999",
  248. },
  249. // Failing strings
  250. {
  251. "-9998",
  252. "0",
  253. "9999",
  254. }
  255. );
  256. test_schema(
  257. "min 5 max 30",
  258. // Schema
  259. R"""({
  260. "type": "integer",
  261. "minimum": 5,
  262. "maximum": 30
  263. })""",
  264. // Passing strings
  265. {
  266. "5",
  267. "10",
  268. "30",
  269. },
  270. // Failing strings
  271. {
  272. "05",
  273. "4",
  274. "-1",
  275. "31",
  276. "123",
  277. "0123",
  278. }
  279. );
  280. test_schema(
  281. "min -1 max 1",
  282. R"""({
  283. "type": "integer",
  284. "minimum": -1,
  285. "maximum": 1
  286. })""",
  287. // Passing strings
  288. {
  289. "-1",
  290. "0",
  291. "1",
  292. },
  293. // Failing strings
  294. {
  295. "-11",
  296. "-10",
  297. "-2",
  298. "2",
  299. "10",
  300. "11",
  301. }
  302. );
  303. test_schema(
  304. "min -123 max 42",
  305. R"""({
  306. "type": "integer",
  307. "minimum": -123,
  308. "maximum": 42
  309. })""",
  310. // Passing strings
  311. {
  312. "-123",
  313. "-122",
  314. "-13",
  315. "-11",
  316. "-2",
  317. "-1",
  318. "0",
  319. "1",
  320. "5",
  321. "10",
  322. "39",
  323. "40",
  324. "42",
  325. },
  326. // Failing strings
  327. {
  328. "-0123",
  329. "-124",
  330. "-1123",
  331. "-200",
  332. "43",
  333. "123",
  334. "0123",
  335. }
  336. );
  337. test_schema(
  338. "exclusive min / max",
  339. // Schema
  340. R"""({
  341. "type": "integer",
  342. "exclusiveMinimum": 0,
  343. "exclusiveMaximum": 10000
  344. })""",
  345. // Passing strings
  346. {
  347. "1",
  348. "9999",
  349. },
  350. // Failing strings
  351. {
  352. "0",
  353. "01",
  354. "10000",
  355. "99999",
  356. }
  357. );
  358. // Test case for a simple grammar
  359. test_grammar(
  360. "simple grammar",
  361. R"""(
  362. root ::= expr
  363. expr ::= term ("+" term)*
  364. term ::= number
  365. number ::= [0-9]+)""",
  366. // Passing strings
  367. {
  368. "42",
  369. "1+2+3+4+5",
  370. "123+456",
  371. },
  372. // Failing strings
  373. {
  374. "+",
  375. "/ 3",
  376. "1+2+3+4+5+",
  377. "12a45",
  378. }
  379. );
  380. }
  381. static void test_complex_grammar() {
  382. // Test case for a more complex grammar, with both failure strings and success strings
  383. test_grammar(
  384. "medium complexity grammar",
  385. // Grammar
  386. R"""(
  387. root ::= expression
  388. expression ::= term ws (("+"|"-") ws term)*
  389. term ::= factor ws (("*"|"/") ws factor)*
  390. factor ::= number | variable | "(" expression ")" | function-call
  391. number ::= [0-9]+
  392. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  393. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  394. ws ::= [ \t\n\r]?)""",
  395. // Passing strings
  396. {
  397. "42",
  398. "1*2*3*4*5",
  399. "x",
  400. "x+10",
  401. "x1+y2",
  402. "(a+b)*(c-d)",
  403. "func()",
  404. "func(x,y+2)",
  405. "a*(b+c)-d/e",
  406. "f(g(x),h(y,z))",
  407. "x + 10",
  408. "x1 + y2",
  409. "(a + b) * (c - d)",
  410. "func()",
  411. "func(x, y + 2)",
  412. "a * (b + c) - d / e",
  413. "f(g(x), h(y, z))",
  414. "123+456",
  415. "123*456*789-123/456+789*123",
  416. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  417. },
  418. // Failing strings
  419. {
  420. "+",
  421. "/ 3x",
  422. "x + + y",
  423. "a * / b",
  424. "func(,)",
  425. "func(x y)",
  426. "(a + b",
  427. "x + y)",
  428. "a + b * (c - d",
  429. "42 +",
  430. "x +",
  431. "x + 10 +",
  432. "(a + b) * (c - d",
  433. "func(",
  434. "func(x, y + 2",
  435. "a * (b + c) - d /",
  436. "f(g(x), h(y, z)",
  437. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  438. }
  439. );
  440. }
  441. static void test_special_chars() {
  442. // A collection of tests to exercise special characters such as "."
  443. test_grammar(
  444. "special characters",
  445. // Grammar
  446. R"""(
  447. root ::= ... "abc" ...
  448. )""",
  449. // Passing strings
  450. {
  451. "abcabcabc",
  452. "aaaabcccc",
  453. // NOTE: Also ensures that multi-byte characters still count as a single character
  454. "🔵🟠✅abc❌🟠🔵"
  455. },
  456. // Failing strings
  457. {
  458. "aaabcccc",
  459. "aaaaabcccc",
  460. "aaaabccc",
  461. "aaaabccccc",
  462. "🔵🟠✅❌abc❌✅🟠🔵"
  463. "🔵🟠abc🟠🔵"
  464. }
  465. );
  466. }
  467. static void test_quantifiers() {
  468. // A collection of tests to exercise * + and ? quantifiers
  469. test_grammar(
  470. "* quantifier",
  471. // Grammar
  472. R"""(root ::= "a"*)""",
  473. // Passing strings
  474. {
  475. "",
  476. "a",
  477. "aaaaa",
  478. "aaaaaaaaaaaaaaaaaa",
  479. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  480. },
  481. // Failing strings
  482. {
  483. "b",
  484. "ab",
  485. "aab",
  486. "ba",
  487. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  488. }
  489. );
  490. test_grammar(
  491. "+ quantifier",
  492. // Grammar
  493. R"""(root ::= "a"+)""",
  494. // Passing strings
  495. {
  496. "a",
  497. "aaaaa",
  498. "aaaaaaaaaaaaaaaaaa",
  499. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  500. },
  501. // Failing strings
  502. {
  503. "",
  504. "b",
  505. "ab",
  506. "aab",
  507. "ba",
  508. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  509. }
  510. );
  511. test_grammar(
  512. "? quantifier",
  513. // Grammar
  514. R"""(root ::= "a"?)""",
  515. // Passing strings
  516. {
  517. "",
  518. "a"
  519. },
  520. // Failing strings
  521. {
  522. "b",
  523. "ab",
  524. "aa",
  525. "ba",
  526. }
  527. );
  528. test_grammar(
  529. "mixed quantifiers",
  530. // Grammar
  531. R"""(
  532. root ::= cons+ vowel* cons? (vowel cons)*
  533. vowel ::= [aeiouy]
  534. cons ::= [bcdfghjklmnpqrstvwxyz]
  535. )""",
  536. // Passing strings
  537. {
  538. "yes",
  539. "no",
  540. "noyes",
  541. "crwth",
  542. "four",
  543. "bryyyy",
  544. },
  545. // Failing strings
  546. {
  547. "yess",
  548. "yesno",
  549. "forty",
  550. "catyyy",
  551. }
  552. );
  553. test_grammar(
  554. "simple exact repetition",
  555. // Grammar
  556. R"""(
  557. root ::= [ab]{4}
  558. )""",
  559. // Passing strings
  560. {
  561. "aaaa",
  562. "bbbb",
  563. "abab",
  564. },
  565. // Failing strings
  566. {
  567. "a",
  568. "b",
  569. "aaaaa",
  570. }
  571. );
  572. test_grammar(
  573. "simple min repetition",
  574. // Grammar
  575. R"""(
  576. root ::= [ab]{4,}
  577. )""",
  578. // Passing strings
  579. {
  580. "aaaa",
  581. "aaaaab",
  582. "bbbb",
  583. "ababab",
  584. },
  585. // Failing strings
  586. {
  587. "",
  588. "aba",
  589. }
  590. );
  591. test_grammar(
  592. "simple max repetition",
  593. // Grammar
  594. R"""(
  595. root ::= [ab]{0,4}
  596. )""",
  597. // Passing strings
  598. {
  599. "",
  600. "a",
  601. "aa",
  602. "aaa",
  603. "aaab",
  604. },
  605. // Failing strings
  606. {
  607. "aaaaa",
  608. }
  609. );
  610. test_grammar(
  611. "min / max repetition",
  612. // Grammar
  613. R"""(
  614. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  615. )""",
  616. // Passing strings
  617. {
  618. "0xFF 0x12 0xAB",
  619. "0xFF 0x12 0xAB 0x00 0x00",
  620. },
  621. // Failing strings
  622. {
  623. "",
  624. "0xFF",
  625. "0xFF 0x12",
  626. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  627. }
  628. );
  629. }
  630. static void test_failure_missing_root() {
  631. fprintf(stderr, "⚫ Testing missing root node:\n");
  632. // Test case for a grammar that is missing a root rule
  633. const std::string grammar_str = R"""(
  634. rot ::= expr
  635. expr ::= term ("+" term)*
  636. term ::= number
  637. number ::= [0-9]+)""";
  638. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  639. // Ensure we parsed correctly
  640. assert(!parsed_grammar.rules.empty());
  641. // Ensure we do NOT have a root node
  642. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  643. fprintf(stderr, " ✅︎ Passed\n");
  644. }
  645. static void test_failure_missing_reference() {
  646. fprintf(stderr, "⚫ Testing missing reference node:\n");
  647. // Test case for a grammar that is missing a referenced rule
  648. const std::string grammar_str =
  649. R"""(root ::= expr
  650. expr ::= term ("+" term)*
  651. term ::= numero
  652. number ::= [0-9]+)""";
  653. fprintf(stderr, " Expected error: ");
  654. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  655. // Ensure we did NOT parsed correctly
  656. assert(parsed_grammar.rules.empty());
  657. fprintf(stderr, " End of expected error.\n");
  658. fprintf(stderr, " ✅︎ Passed\n");
  659. }
  660. static void test_failure_left_recursion() {
  661. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  662. // Test simple left recursion detection
  663. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  664. assert(test_build_grammar_fails(simple_str));
  665. // Test more complicated left recursion detection
  666. const std::string medium_str = R"""(
  667. root ::= asdf
  668. asdf ::= "a" | asdf "a"
  669. )""";
  670. assert(test_build_grammar_fails(medium_str));
  671. // Test even more complicated left recursion detection
  672. const std::string hard_str = R"""(
  673. root ::= asdf
  674. asdf ::= "a" | foo "b"
  675. foo ::= "c" | asdf "d" | "e")""";
  676. assert(test_build_grammar_fails(hard_str));
  677. // Test yet even more complicated left recursion detection
  678. const std::string hardest_str = R"""(
  679. root ::= asdf
  680. asdf ::= "a" | foo "b"
  681. foo ::= "c" | empty asdf "d" | "e"
  682. empty ::= "blah" | )""";
  683. assert(test_build_grammar_fails(hardest_str));
  684. fprintf(stderr, " ✅︎ Passed\n");
  685. }
  686. static void test_json_schema() {
  687. // Note that this is similar to the regular grammar tests,
  688. // but we convert each json schema to a grammar before parsing.
  689. // Otherwise, this test structure is the same.
  690. test_schema(
  691. "empty schema (object)",
  692. // Schema
  693. R"""(
  694. {}
  695. )""",
  696. // Passing strings
  697. {
  698. R"""({})""",
  699. R"""({"foo": "bar"})""",
  700. },
  701. // Failing strings
  702. {
  703. "",
  704. "[]",
  705. "null",
  706. R"""("")""",
  707. "true",
  708. }
  709. );
  710. test_schema(
  711. "exotic formats (list)",
  712. // Schema
  713. R"""({
  714. "items": [
  715. { "format": "date" },
  716. { "format": "uuid" },
  717. { "format": "time" },
  718. { "format": "date-time" }
  719. ]
  720. })""",
  721. // Passing strings
  722. {
  723. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  724. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  725. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  726. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  727. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  728. },
  729. // Failing strings
  730. {
  731. R"""(["foo", "bar"])""",
  732. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  733. }
  734. );
  735. test_schema(
  736. "string",
  737. // Schema
  738. R"""({
  739. "type": "string"
  740. })""",
  741. // Passing strings
  742. {
  743. R"""("foo")""",
  744. R"""("bar")""",
  745. R"""("")""",
  746. },
  747. // Failing strings
  748. {
  749. R"""({})""",
  750. R"""("foo": "bar")""",
  751. }
  752. );
  753. test_schema(
  754. "string w/ min length 1",
  755. // Schema
  756. R"""({
  757. "type": "string",
  758. "minLength": 1
  759. })""",
  760. // Passing strings
  761. {
  762. R"""("foo")""",
  763. R"""("bar")""",
  764. },
  765. // Failing strings
  766. {
  767. R"""("")""",
  768. R"""({})""",
  769. R"""("foo": "bar")""",
  770. }
  771. );
  772. test_schema(
  773. "string w/ min length 3",
  774. // Schema
  775. R"""({
  776. "type": "string",
  777. "minLength": 3
  778. })""",
  779. // Passing strings
  780. {
  781. R"""("foo")""",
  782. R"""("bar")""",
  783. R"""("foobar")""",
  784. },
  785. // Failing strings
  786. {
  787. R"""("")""",
  788. R"""("f")""",
  789. R"""("fo")""",
  790. }
  791. );
  792. test_schema(
  793. "string w/ max length",
  794. // Schema
  795. R"""({
  796. "type": "string",
  797. "maxLength": 3
  798. })""",
  799. // Passing strings
  800. {
  801. R"""("foo")""",
  802. R"""("bar")""",
  803. R"""("")""",
  804. R"""("f")""",
  805. R"""("fo")""",
  806. },
  807. // Failing strings
  808. {
  809. R"""("foobar")""",
  810. }
  811. );
  812. test_schema(
  813. "string w/ min & max length",
  814. // Schema
  815. R"""({
  816. "type": "string",
  817. "minLength": 1,
  818. "maxLength": 4
  819. })""",
  820. // Passing strings
  821. {
  822. R"""("foo")""",
  823. R"""("bar")""",
  824. R"""("f")""",
  825. R"""("barf")""",
  826. },
  827. // Failing strings
  828. {
  829. R"""("")""",
  830. R"""("barfo")""",
  831. R"""("foobar")""",
  832. }
  833. );
  834. test_schema(
  835. "boolean",
  836. // Schema
  837. R"""({
  838. "type": "boolean"
  839. })""",
  840. // Passing strings
  841. {
  842. "true",
  843. "false",
  844. },
  845. // Failing strings
  846. {
  847. R"""("")""",
  848. R"""("true")""",
  849. R"""(True)""",
  850. R"""(FALSE)""",
  851. }
  852. );
  853. test_schema(
  854. "integer",
  855. // Schema
  856. R"""({
  857. "type": "integer"
  858. })""",
  859. // Passing strings
  860. {
  861. R"""(0)""",
  862. R"""(12345)""",
  863. R"""(1234567890123456)""",
  864. },
  865. // Failing strings
  866. {
  867. R"""()""",
  868. R"""(01)""",
  869. R"""(007)""",
  870. R"""(12345678901234567 )""",
  871. }
  872. );
  873. test_schema(
  874. "string const",
  875. // Schema
  876. R"""({
  877. "const": "foo"
  878. })""",
  879. // Passing strings
  880. {
  881. R"""("foo")""",
  882. },
  883. // Failing strings
  884. {
  885. R"""(foo)""",
  886. R"""("bar")""",
  887. }
  888. );
  889. test_schema(
  890. "non-string const",
  891. // Schema
  892. R"""({
  893. "const": true
  894. })""",
  895. // Passing strings
  896. {
  897. R"""(true)""",
  898. },
  899. // Failing strings
  900. {
  901. R"""()""",
  902. R"""(foo)""",
  903. R"""("true")""",
  904. }
  905. );
  906. test_schema(
  907. "non-string const",
  908. // Schema
  909. R"""({
  910. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  911. })""",
  912. // Passing strings
  913. {
  914. R"""("red")""",
  915. R"""(null)""",
  916. R"""(42)""",
  917. R"""(["foo"])""",
  918. },
  919. // Failing strings
  920. {
  921. R"""()""",
  922. R"""(420)""",
  923. R"""(true)""",
  924. R"""(foo)""",
  925. }
  926. );
  927. test_schema(
  928. "",
  929. // Schema
  930. R"""(
  931. {
  932. "type": ["array", "null"],
  933. "items": { "type": "string" }
  934. }
  935. )""",
  936. // Passing strings
  937. {
  938. "null",
  939. "[]",
  940. "[\"123\"]",
  941. "[\"foo\", \"bar\"]",
  942. },
  943. // Failing strings
  944. {
  945. "",
  946. "[123]",
  947. "\"foo\"",
  948. "[\"foo\", 42]",
  949. }
  950. );
  951. test_schema(
  952. "min+max items",
  953. // Schema
  954. R"""({
  955. "items": {
  956. "type": ["number", "integer"]
  957. },
  958. "minItems": 3,
  959. "maxItems": 5
  960. })""",
  961. // Passing strings
  962. {
  963. R"""([1, 2, 3])""",
  964. R"""([1, 2, 3, 4])""",
  965. R"""([1, 2, 3, 4, 5])""",
  966. },
  967. // Failing strings
  968. {
  969. R"""([1, 2])""",
  970. R"""([1, 2, 3, 4, 5, 6])""",
  971. R"""(1)""",
  972. }
  973. );
  974. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  975. test_schema(
  976. "object properties",
  977. // Schema
  978. R"""({
  979. "type": "object",
  980. "properties": {
  981. "number": { "type": "number" },
  982. "street_name": { "type": "string" },
  983. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  984. }
  985. })""",
  986. // Passing strings
  987. {
  988. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  989. // "By default, leaving out properties is valid"
  990. R"""({ "street_name": "Pennsylvania" })""",
  991. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  992. // "By extension, even an empty object is valid"
  993. R"""({})""",
  994. // "By default, providing additional properties is valid"
  995. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  996. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  997. },
  998. // Failing strings
  999. {
  1000. // Change datatype from number to string
  1001. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1002. // Reorder properties
  1003. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  1004. // Reorder properties
  1005. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1006. }
  1007. );
  1008. test_schema(
  1009. "additional properties can't override other properties",
  1010. R"""({
  1011. "properties": {
  1012. "a": {"type": "integer"},
  1013. "b": {"type": "integer"}
  1014. },
  1015. "additionalProperties": true
  1016. })""",
  1017. // Passing strings
  1018. {
  1019. R"""({"a": 42})""",
  1020. R"""({"c": ""})""",
  1021. R"""({"a": 42, "c": ""})""",
  1022. R"""({"a_": ""})""",
  1023. },
  1024. // Failing strings
  1025. {
  1026. R"""()""",
  1027. R"""({"a": ""})""",
  1028. R"""({"a": "", "b": ""})""",
  1029. }
  1030. );
  1031. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1032. test_schema(
  1033. "object properties, additionalProperties: true",
  1034. // Schema
  1035. R"""({
  1036. "type": "object",
  1037. "properties": {
  1038. "number": { "type": "number" },
  1039. "street_name": { "type": "string" },
  1040. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1041. },
  1042. "additionalProperties": true
  1043. })""",
  1044. // Passing strings
  1045. {
  1046. // "By extension, even an empty object is valid"
  1047. R"""({})""",
  1048. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1049. // "By default, leaving out properties is valid"
  1050. R"""({ "street_name": "Pennsylvania" })""",
  1051. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1052. // "By default, providing additional properties is valid"
  1053. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1054. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1055. },
  1056. // Failing strings
  1057. {
  1058. // Change datatype from number to string
  1059. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1060. // Reorder properties
  1061. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1062. }
  1063. );
  1064. // Additional properties: false
  1065. test_schema(
  1066. "required + optional props each in original order",
  1067. // Schema
  1068. R"""({
  1069. "type": "object",
  1070. "properties": {
  1071. "number": { "type": "number" },
  1072. "street_name": { "type": "string" },
  1073. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1074. },
  1075. "additionalProperties": false
  1076. })""",
  1077. // Passing strings
  1078. {
  1079. R"""({ "street_name": "Pennsylvania" })""",
  1080. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1081. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1082. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1083. // Spaces are permitted around enum values
  1084. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1085. },
  1086. // Failing strings
  1087. {
  1088. // Reorder properties
  1089. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1090. // Add "direction"
  1091. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1092. }
  1093. );
  1094. test_schema(
  1095. "required + optional props each in original order",
  1096. // Schema
  1097. R"""({
  1098. "properties": {
  1099. "b": {"type": "string"},
  1100. "a": {"type": "string"},
  1101. "d": {"type": "string"},
  1102. "c": {"type": "string"}
  1103. },
  1104. "required": ["a", "b"],
  1105. "additionalProperties": false
  1106. })""",
  1107. // Passing strings
  1108. {
  1109. R"""({"b": "foo", "a": "bar"})""",
  1110. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1111. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1112. },
  1113. // Failing strings
  1114. {
  1115. R"""({"a": "foo", "b": "bar"})""",
  1116. R"""({"b": "bar"})""",
  1117. R"""({"a": "foo", "c": "baz"})""",
  1118. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1119. }
  1120. );
  1121. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1122. test_schema(
  1123. "required props",
  1124. // Schema
  1125. R"""({
  1126. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1127. "$id": "https://example.com/product.schema.json",
  1128. "title": "Product",
  1129. "description": "A product from Acme's catalog",
  1130. "type": "object",
  1131. "properties": {
  1132. "productId": {
  1133. "description": "The unique identifier for a product",
  1134. "type": "integer"
  1135. },
  1136. "productName": {
  1137. "description": "Name of the product",
  1138. "type": "string"
  1139. },
  1140. "price": {
  1141. "description": "The price of the product",
  1142. "type": "number",
  1143. "exclusiveMinimum": 0
  1144. },
  1145. "tags": {
  1146. "description": "Tags for the product",
  1147. "type": "array",
  1148. "items": {
  1149. "type": "string"
  1150. },
  1151. "minItems": 1,
  1152. "uniqueItems": true
  1153. },
  1154. "dimensions": {
  1155. "type": "object",
  1156. "properties": {
  1157. "length": {
  1158. "type": "number"
  1159. },
  1160. "width": {
  1161. "type": "number"
  1162. },
  1163. "height": {
  1164. "type": "number"
  1165. }
  1166. },
  1167. "required": [ "length", "width", "height" ]
  1168. }
  1169. },
  1170. "required": [ "productId", "productName", "price" ]
  1171. })""",
  1172. // Passing strings
  1173. {
  1174. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1175. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1176. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1177. },
  1178. // Failing strings
  1179. {
  1180. R"""({})""", // Missing all required properties
  1181. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1182. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1183. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1184. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1185. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1186. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1187. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1188. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1189. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1190. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1191. }
  1192. );
  1193. }
  1194. int main() {
  1195. fprintf(stdout, "Running grammar integration tests...\n");
  1196. test_simple_grammar();
  1197. test_complex_grammar();
  1198. test_special_chars();
  1199. test_quantifiers();
  1200. test_failure_missing_root();
  1201. test_failure_missing_reference();
  1202. test_failure_left_recursion();
  1203. test_json_schema();
  1204. fprintf(stdout, "All tests passed.\n");
  1205. return 0;
  1206. }