test-grammar-integration.cpp 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #define LLAMA_API_INTERNAL
  5. #include "ggml.h"
  6. #include "llama.h"
  7. #include "grammar-parser.h"
  8. #include "json-schema-to-grammar.h"
  9. #include "unicode.h"
  10. #include <cassert>
  11. #include <string>
  12. #include <vector>
  13. using json = nlohmann::ordered_json;
  14. static llama_grammar* build_grammar(const std::string & grammar_str) {
  15. auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  16. // Ensure we parsed correctly
  17. assert(!parsed_grammar.rules.empty());
  18. // Ensure we have a root node
  19. assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
  20. std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
  21. llama_grammar* grammar = llama_grammar_init(
  22. grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
  23. return grammar;
  24. }
  25. static bool test_build_grammar_fails(const std::string & grammar_str) {
  26. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  27. bool grammar_fails = false;
  28. llama_grammar * grammar = build_grammar(grammar_str);
  29. if (grammar != nullptr) {
  30. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  31. } else {
  32. grammar_fails = true;
  33. fprintf(stdout, " ✅︎\n");
  34. }
  35. return grammar_fails;
  36. }
  37. static bool match_string(const std::string & input, llama_grammar * grammar) {
  38. auto decoded = decode_utf8(input, {});
  39. const auto & code_points = decoded.first;
  40. const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
  41. llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
  42. for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
  43. const llama_grammar_stacks prev_stacks = llama_grammar_get_stacks(grammar); // copy
  44. llama_grammar_accept(rules, prev_stacks, *it, cur_stacks);
  45. if (cur_stacks.empty()) {
  46. // no stacks means that the grammar failed to match at this point
  47. return false;
  48. }
  49. }
  50. for (const auto & stack : cur_stacks) {
  51. if (stack.empty()) {
  52. // An empty stack means that the grammar has been completed
  53. return true;
  54. }
  55. }
  56. return false;
  57. }
  58. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  59. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  60. fflush(stderr);
  61. auto grammar = build_grammar(grammar_str);
  62. // Save the original grammar stacks so that we can reset after every new string we want to test
  63. const llama_grammar_stacks original_stacks = llama_grammar_get_stacks(grammar);
  64. llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
  65. fprintf(stderr, " 🔵 Valid strings:\n");
  66. // Passing strings
  67. for (const auto & test_string : passing_strings) {
  68. fprintf(stderr, " \"%s\" ", test_string.c_str());
  69. fflush(stderr);
  70. bool matched = match_string(test_string, grammar);
  71. if (!matched) {
  72. fprintf(stderr, "❌ (failed to match)\n");
  73. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  74. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  75. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  76. if (grammar_file) {
  77. fprintf(grammar_file, "%s", grammar_str.c_str());
  78. fclose(grammar_file);
  79. }
  80. // DEBUG: Write the test string to test-grammar-integration.string.txt
  81. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  82. if (string_file) {
  83. fprintf(string_file, "%s", test_string.c_str());
  84. fclose(string_file);
  85. }
  86. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  87. } else {
  88. fprintf(stdout, "✅︎\n");
  89. }
  90. assert(matched);
  91. // Reset the grammar stacks
  92. cur_stacks = original_stacks;
  93. }
  94. fprintf(stderr, " 🟠 Invalid strings:\n");
  95. // Failing strings
  96. for (const auto & test_string : failing_strings) {
  97. fprintf(stderr, " \"%s\" ", test_string.c_str());
  98. fflush(stderr);
  99. bool matched = match_string(test_string, grammar);
  100. if (matched) {
  101. fprintf(stderr, "❌ (incorrectly matched)\n");
  102. } else {
  103. fprintf(stdout, "✅︎\n");
  104. }
  105. assert(!matched);
  106. // Reset the grammar stacks
  107. cur_stacks = original_stacks;
  108. }
  109. // Clean up allocated memory
  110. llama_grammar_free(grammar);
  111. }
  112. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  113. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  114. }
  115. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  116. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
  117. }
  118. static void test_simple_grammar() {
  119. test_schema(
  120. "min 0",
  121. R"""({
  122. "type": "integer",
  123. "minimum": 0
  124. })""",
  125. // Passing strings
  126. {
  127. "0",
  128. "10",
  129. "12",
  130. "10000",
  131. },
  132. // Failing strings
  133. {
  134. "-1",
  135. "-10",
  136. "-10000",
  137. "-100000000000000000000000000000000",
  138. "100000000000000000000000000000000",
  139. "00",
  140. "01",
  141. "-0",
  142. }
  143. );
  144. test_schema(
  145. "min 2",
  146. // Schema
  147. R"""({
  148. "type": "integer",
  149. "minimum": 2
  150. })""",
  151. // Passing strings
  152. {
  153. "2",
  154. "3",
  155. "4",
  156. "10",
  157. "20",
  158. "1234567890000000",
  159. },
  160. // Failing strings
  161. {
  162. "0",
  163. "1",
  164. "-1",
  165. "-100",
  166. "0",
  167. "1",
  168. "01",
  169. "02",
  170. "12345678900000000",
  171. }
  172. );
  173. test_schema(
  174. "min 456",
  175. R"""({
  176. "type": "integer",
  177. "minimum": 456
  178. })""",
  179. // Passing strings
  180. {
  181. "456",
  182. "4560",
  183. "457",
  184. "460",
  185. "500",
  186. },
  187. // Failing strings
  188. {
  189. "455",
  190. "356",
  191. "50",
  192. "050",
  193. "-1",
  194. "-456",
  195. }
  196. );
  197. test_schema(
  198. "min -123",
  199. R"""({
  200. "type": "integer",
  201. "minimum": -123
  202. })""",
  203. // Passing strings
  204. {
  205. "-123",
  206. "-122",
  207. "-11",
  208. "-1",
  209. "0",
  210. "1",
  211. "123",
  212. "1234",
  213. "2345",
  214. },
  215. // Failing strings
  216. {
  217. "-1234",
  218. "-124",
  219. }
  220. );
  221. test_schema(
  222. "max 9999",
  223. // Schema
  224. R"""({
  225. "type": "integer",
  226. "maximum": 9999
  227. })""",
  228. // Passing strings
  229. {
  230. "-99999",
  231. "0",
  232. "9999",
  233. },
  234. // Failing strings
  235. {
  236. "10000",
  237. "99991",
  238. }
  239. );
  240. test_schema(
  241. "max -9999",
  242. // Schema
  243. R"""({
  244. "type": "integer",
  245. "maximum": -9999
  246. })""",
  247. // Passing strings
  248. {
  249. "-10000",
  250. "-9999",
  251. },
  252. // Failing strings
  253. {
  254. "-9998",
  255. "0",
  256. "9999",
  257. }
  258. );
  259. test_schema(
  260. "min 5 max 30",
  261. // Schema
  262. R"""({
  263. "type": "integer",
  264. "minimum": 5,
  265. "maximum": 30
  266. })""",
  267. // Passing strings
  268. {
  269. "5",
  270. "10",
  271. "30",
  272. },
  273. // Failing strings
  274. {
  275. "05",
  276. "4",
  277. "-1",
  278. "31",
  279. "123",
  280. "0123",
  281. }
  282. );
  283. test_schema(
  284. "min -1 max 1",
  285. R"""({
  286. "type": "integer",
  287. "minimum": -1,
  288. "maximum": 1
  289. })""",
  290. // Passing strings
  291. {
  292. "-1",
  293. "0",
  294. "1",
  295. },
  296. // Failing strings
  297. {
  298. "-11",
  299. "-10",
  300. "-2",
  301. "2",
  302. "10",
  303. "11",
  304. }
  305. );
  306. test_schema(
  307. "min -123 max 42",
  308. R"""({
  309. "type": "integer",
  310. "minimum": -123,
  311. "maximum": 42
  312. })""",
  313. // Passing strings
  314. {
  315. "-123",
  316. "-122",
  317. "-13",
  318. "-11",
  319. "-2",
  320. "-1",
  321. "0",
  322. "1",
  323. "5",
  324. "10",
  325. "39",
  326. "40",
  327. "42",
  328. },
  329. // Failing strings
  330. {
  331. "-0123",
  332. "-124",
  333. "-1123",
  334. "-200",
  335. "43",
  336. "123",
  337. "0123",
  338. }
  339. );
  340. test_schema(
  341. "exclusive min / max",
  342. // Schema
  343. R"""({
  344. "type": "integer",
  345. "exclusiveMinimum": 0,
  346. "exclusiveMaximum": 10000
  347. })""",
  348. // Passing strings
  349. {
  350. "1",
  351. "9999",
  352. },
  353. // Failing strings
  354. {
  355. "0",
  356. "01",
  357. "10000",
  358. "99999",
  359. }
  360. );
  361. // Test case for a simple grammar
  362. test_grammar(
  363. "simple grammar",
  364. R"""(
  365. root ::= expr
  366. expr ::= term ("+" term)*
  367. term ::= number
  368. number ::= [0-9]+)""",
  369. // Passing strings
  370. {
  371. "42",
  372. "1+2+3+4+5",
  373. "123+456",
  374. },
  375. // Failing strings
  376. {
  377. "+",
  378. "/ 3",
  379. "1+2+3+4+5+",
  380. "12a45",
  381. }
  382. );
  383. }
  384. static void test_complex_grammar() {
  385. // Test case for a more complex grammar, with both failure strings and success strings
  386. test_grammar(
  387. "medium complexity grammar",
  388. // Grammar
  389. R"""(
  390. root ::= expression
  391. expression ::= term ws (("+"|"-") ws term)*
  392. term ::= factor ws (("*"|"/") ws factor)*
  393. factor ::= number | variable | "(" expression ")" | function-call
  394. number ::= [0-9]+
  395. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  396. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  397. ws ::= [ \t\n\r]?)""",
  398. // Passing strings
  399. {
  400. "42",
  401. "1*2*3*4*5",
  402. "x",
  403. "x+10",
  404. "x1+y2",
  405. "(a+b)*(c-d)",
  406. "func()",
  407. "func(x,y+2)",
  408. "a*(b+c)-d/e",
  409. "f(g(x),h(y,z))",
  410. "x + 10",
  411. "x1 + y2",
  412. "(a + b) * (c - d)",
  413. "func()",
  414. "func(x, y + 2)",
  415. "a * (b + c) - d / e",
  416. "f(g(x), h(y, z))",
  417. "123+456",
  418. "123*456*789-123/456+789*123",
  419. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  420. },
  421. // Failing strings
  422. {
  423. "+",
  424. "/ 3x",
  425. "x + + y",
  426. "a * / b",
  427. "func(,)",
  428. "func(x y)",
  429. "(a + b",
  430. "x + y)",
  431. "a + b * (c - d",
  432. "42 +",
  433. "x +",
  434. "x + 10 +",
  435. "(a + b) * (c - d",
  436. "func(",
  437. "func(x, y + 2",
  438. "a * (b + c) - d /",
  439. "f(g(x), h(y, z)",
  440. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  441. }
  442. );
  443. }
  444. static void test_special_chars() {
  445. // A collection of tests to exercise special characters such as "."
  446. test_grammar(
  447. "special characters",
  448. // Grammar
  449. R"""(
  450. root ::= ... "abc" ...
  451. )""",
  452. // Passing strings
  453. {
  454. "abcabcabc",
  455. "aaaabcccc",
  456. // NOTE: Also ensures that multi-byte characters still count as a single character
  457. "🔵🟠✅abc❌🟠🔵"
  458. },
  459. // Failing strings
  460. {
  461. "aaabcccc",
  462. "aaaaabcccc",
  463. "aaaabccc",
  464. "aaaabccccc",
  465. "🔵🟠✅❌abc❌✅🟠🔵",
  466. "🔵🟠abc🟠🔵"
  467. }
  468. );
  469. }
  470. static void test_quantifiers() {
  471. // A collection of tests to exercise * + and ? quantifiers
  472. test_grammar(
  473. "* quantifier",
  474. // Grammar
  475. R"""(root ::= "a"*)""",
  476. // Passing strings
  477. {
  478. "",
  479. "a",
  480. "aaaaa",
  481. "aaaaaaaaaaaaaaaaaa",
  482. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  483. },
  484. // Failing strings
  485. {
  486. "b",
  487. "ab",
  488. "aab",
  489. "ba",
  490. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  491. }
  492. );
  493. test_grammar(
  494. "+ quantifier",
  495. // Grammar
  496. R"""(root ::= "a"+)""",
  497. // Passing strings
  498. {
  499. "a",
  500. "aaaaa",
  501. "aaaaaaaaaaaaaaaaaa",
  502. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  503. },
  504. // Failing strings
  505. {
  506. "",
  507. "b",
  508. "ab",
  509. "aab",
  510. "ba",
  511. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  512. }
  513. );
  514. test_grammar(
  515. "? quantifier",
  516. // Grammar
  517. R"""(root ::= "a"?)""",
  518. // Passing strings
  519. {
  520. "",
  521. "a"
  522. },
  523. // Failing strings
  524. {
  525. "b",
  526. "ab",
  527. "aa",
  528. "ba",
  529. }
  530. );
  531. test_grammar(
  532. "mixed quantifiers",
  533. // Grammar
  534. R"""(
  535. root ::= cons+ vowel* cons? (vowel cons)*
  536. vowel ::= [aeiouy]
  537. cons ::= [bcdfghjklmnpqrstvwxyz]
  538. )""",
  539. // Passing strings
  540. {
  541. "yes",
  542. "no",
  543. "noyes",
  544. "crwth",
  545. "four",
  546. "bryyyy",
  547. },
  548. // Failing strings
  549. {
  550. "yess",
  551. "yesno",
  552. "forty",
  553. "catyyy",
  554. }
  555. );
  556. test_grammar(
  557. "simple exact repetition",
  558. // Grammar
  559. R"""(
  560. root ::= [ab]{4}
  561. )""",
  562. // Passing strings
  563. {
  564. "aaaa",
  565. "bbbb",
  566. "abab",
  567. },
  568. // Failing strings
  569. {
  570. "a",
  571. "b",
  572. "aaaaa",
  573. }
  574. );
  575. test_grammar(
  576. "simple min repetition",
  577. // Grammar
  578. R"""(
  579. root ::= [ab]{4,}
  580. )""",
  581. // Passing strings
  582. {
  583. "aaaa",
  584. "aaaaab",
  585. "bbbb",
  586. "ababab",
  587. },
  588. // Failing strings
  589. {
  590. "",
  591. "aba",
  592. }
  593. );
  594. test_grammar(
  595. "simple max repetition",
  596. // Grammar
  597. R"""(
  598. root ::= [ab]{0,4}
  599. )""",
  600. // Passing strings
  601. {
  602. "",
  603. "a",
  604. "aa",
  605. "aaa",
  606. "aaab",
  607. },
  608. // Failing strings
  609. {
  610. "aaaaa",
  611. }
  612. );
  613. test_grammar(
  614. "min / max repetition",
  615. // Grammar
  616. R"""(
  617. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  618. )""",
  619. // Passing strings
  620. {
  621. "0xFF 0x12 0xAB",
  622. "0xFF 0x12 0xAB 0x00 0x00",
  623. },
  624. // Failing strings
  625. {
  626. "",
  627. "0xFF",
  628. "0xFF 0x12",
  629. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  630. }
  631. );
  632. }
  633. static void test_failure_missing_root() {
  634. fprintf(stderr, "⚫ Testing missing root node:\n");
  635. // Test case for a grammar that is missing a root rule
  636. const std::string grammar_str = R"""(
  637. rot ::= expr
  638. expr ::= term ("+" term)*
  639. term ::= number
  640. number ::= [0-9]+)""";
  641. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  642. // Ensure we parsed correctly
  643. assert(!parsed_grammar.rules.empty());
  644. // Ensure we do NOT have a root node
  645. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  646. fprintf(stderr, " ✅︎ Passed\n");
  647. }
  648. static void test_failure_missing_reference() {
  649. fprintf(stderr, "⚫ Testing missing reference node:\n");
  650. // Test case for a grammar that is missing a referenced rule
  651. const std::string grammar_str =
  652. R"""(root ::= expr
  653. expr ::= term ("+" term)*
  654. term ::= numero
  655. number ::= [0-9]+)""";
  656. fprintf(stderr, " Expected error: ");
  657. grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
  658. // Ensure we did NOT parsed correctly
  659. assert(parsed_grammar.rules.empty());
  660. fprintf(stderr, " End of expected error.\n");
  661. fprintf(stderr, " ✅︎ Passed\n");
  662. }
  663. static void test_failure_left_recursion() {
  664. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  665. // Test simple left recursion detection
  666. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  667. assert(test_build_grammar_fails(simple_str));
  668. // Test more complicated left recursion detection
  669. const std::string medium_str = R"""(
  670. root ::= asdf
  671. asdf ::= "a" | asdf "a"
  672. )""";
  673. assert(test_build_grammar_fails(medium_str));
  674. // Test even more complicated left recursion detection
  675. const std::string hard_str = R"""(
  676. root ::= asdf
  677. asdf ::= "a" | foo "b"
  678. foo ::= "c" | asdf "d" | "e")""";
  679. assert(test_build_grammar_fails(hard_str));
  680. // Test yet even more complicated left recursion detection
  681. const std::string hardest_str = R"""(
  682. root ::= asdf
  683. asdf ::= "a" | foo "b"
  684. foo ::= "c" | empty asdf "d" | "e"
  685. empty ::= "blah" | )""";
  686. assert(test_build_grammar_fails(hardest_str));
  687. fprintf(stderr, " ✅︎ Passed\n");
  688. }
  689. static void test_json_schema() {
  690. // Note that this is similar to the regular grammar tests,
  691. // but we convert each json schema to a grammar before parsing.
  692. // Otherwise, this test structure is the same.
  693. test_schema(
  694. "empty schema (object)",
  695. // Schema
  696. R"""(
  697. {}
  698. )""",
  699. // Passing strings
  700. {
  701. R"""({})""",
  702. R"""({"foo": "bar"})""",
  703. },
  704. // Failing strings
  705. {
  706. "",
  707. "[]",
  708. "null",
  709. R"""("")""",
  710. "true",
  711. }
  712. );
  713. test_schema(
  714. "exotic formats (list)",
  715. // Schema
  716. R"""({
  717. "items": [
  718. { "format": "date" },
  719. { "format": "uuid" },
  720. { "format": "time" },
  721. { "format": "date-time" }
  722. ]
  723. })""",
  724. // Passing strings
  725. {
  726. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  727. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  728. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  729. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  730. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  731. },
  732. // Failing strings
  733. {
  734. R"""(["foo", "bar"])""",
  735. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  736. }
  737. );
  738. test_schema(
  739. "string",
  740. // Schema
  741. R"""({
  742. "type": "string"
  743. })""",
  744. // Passing strings
  745. {
  746. R"""("foo")""",
  747. R"""("bar")""",
  748. R"""("")""",
  749. },
  750. // Failing strings
  751. {
  752. R"""({})""",
  753. R"""("foo": "bar")""",
  754. }
  755. );
  756. test_schema(
  757. "string w/ min length 1",
  758. // Schema
  759. R"""({
  760. "type": "string",
  761. "minLength": 1
  762. })""",
  763. // Passing strings
  764. {
  765. R"""("foo")""",
  766. R"""("bar")""",
  767. },
  768. // Failing strings
  769. {
  770. R"""("")""",
  771. R"""({})""",
  772. R"""("foo": "bar")""",
  773. }
  774. );
  775. test_schema(
  776. "string w/ min length 3",
  777. // Schema
  778. R"""({
  779. "type": "string",
  780. "minLength": 3
  781. })""",
  782. // Passing strings
  783. {
  784. R"""("foo")""",
  785. R"""("bar")""",
  786. R"""("foobar")""",
  787. },
  788. // Failing strings
  789. {
  790. R"""("")""",
  791. R"""("f")""",
  792. R"""("fo")""",
  793. }
  794. );
  795. test_schema(
  796. "string w/ max length",
  797. // Schema
  798. R"""({
  799. "type": "string",
  800. "maxLength": 3
  801. })""",
  802. // Passing strings
  803. {
  804. R"""("foo")""",
  805. R"""("bar")""",
  806. R"""("")""",
  807. R"""("f")""",
  808. R"""("fo")""",
  809. },
  810. // Failing strings
  811. {
  812. R"""("foobar")""",
  813. }
  814. );
  815. test_schema(
  816. "string w/ min & max length",
  817. // Schema
  818. R"""({
  819. "type": "string",
  820. "minLength": 1,
  821. "maxLength": 4
  822. })""",
  823. // Passing strings
  824. {
  825. R"""("foo")""",
  826. R"""("bar")""",
  827. R"""("f")""",
  828. R"""("barf")""",
  829. },
  830. // Failing strings
  831. {
  832. R"""("")""",
  833. R"""("barfo")""",
  834. R"""("foobar")""",
  835. }
  836. );
  837. test_schema(
  838. "boolean",
  839. // Schema
  840. R"""({
  841. "type": "boolean"
  842. })""",
  843. // Passing strings
  844. {
  845. "true",
  846. "false",
  847. },
  848. // Failing strings
  849. {
  850. R"""("")""",
  851. R"""("true")""",
  852. R"""(True)""",
  853. R"""(FALSE)""",
  854. }
  855. );
  856. test_schema(
  857. "integer",
  858. // Schema
  859. R"""({
  860. "type": "integer"
  861. })""",
  862. // Passing strings
  863. {
  864. R"""(0)""",
  865. R"""(12345)""",
  866. R"""(1234567890123456)""",
  867. },
  868. // Failing strings
  869. {
  870. R"""()""",
  871. R"""(01)""",
  872. R"""(007)""",
  873. R"""(12345678901234567 )""",
  874. }
  875. );
  876. test_schema(
  877. "string const",
  878. // Schema
  879. R"""({
  880. "const": "foo"
  881. })""",
  882. // Passing strings
  883. {
  884. R"""("foo")""",
  885. },
  886. // Failing strings
  887. {
  888. R"""(foo)""",
  889. R"""("bar")""",
  890. }
  891. );
  892. test_schema(
  893. "non-string const",
  894. // Schema
  895. R"""({
  896. "const": true
  897. })""",
  898. // Passing strings
  899. {
  900. R"""(true)""",
  901. },
  902. // Failing strings
  903. {
  904. R"""()""",
  905. R"""(foo)""",
  906. R"""("true")""",
  907. }
  908. );
  909. test_schema(
  910. "non-string const",
  911. // Schema
  912. R"""({
  913. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  914. })""",
  915. // Passing strings
  916. {
  917. R"""("red")""",
  918. R"""(null)""",
  919. R"""(42)""",
  920. R"""(["foo"])""",
  921. },
  922. // Failing strings
  923. {
  924. R"""()""",
  925. R"""(420)""",
  926. R"""(true)""",
  927. R"""(foo)""",
  928. }
  929. );
  930. test_schema(
  931. "simple pattern",
  932. // Schema
  933. R"""({
  934. "pattern": "^[a-zA-Z0-9_-]*$"
  935. })""",
  936. // Passing strings
  937. {
  938. R"""("")""",
  939. R"""("He_llo-12")""",
  940. },
  941. // Failing strings
  942. {
  943. R"""("!")""",
  944. R"""("Hello World")""",
  945. }
  946. );
  947. test_schema(
  948. "pattern with escapes",
  949. // Schema
  950. R"""({
  951. "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
  952. })""",
  953. // Passing strings
  954. {
  955. R"""("a^$.[]()|{}*+?b")""",
  956. },
  957. // Failing strings
  958. {
  959. R"""("ab")""",
  960. }
  961. );
  962. test_schema(
  963. "",
  964. // Schema
  965. R"""(
  966. {
  967. "type": ["array", "null"],
  968. "items": { "type": "string" }
  969. }
  970. )""",
  971. // Passing strings
  972. {
  973. "null",
  974. "[]",
  975. "[\"123\"]",
  976. "[\"foo\", \"bar\"]",
  977. },
  978. // Failing strings
  979. {
  980. "",
  981. "[123]",
  982. "\"foo\"",
  983. "[\"foo\", 42]",
  984. }
  985. );
  986. test_schema(
  987. "min+max items",
  988. // Schema
  989. R"""({
  990. "items": {
  991. "type": ["number", "integer"]
  992. },
  993. "minItems": 3,
  994. "maxItems": 5
  995. })""",
  996. // Passing strings
  997. {
  998. R"""([1, 2, 3])""",
  999. R"""([1, 2, 3, 4])""",
  1000. R"""([1, 2, 3, 4, 5])""",
  1001. },
  1002. // Failing strings
  1003. {
  1004. R"""([1, 2])""",
  1005. R"""([1, 2, 3, 4, 5, 6])""",
  1006. R"""(1)""",
  1007. }
  1008. );
  1009. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1010. test_schema(
  1011. "object properties",
  1012. // Schema
  1013. R"""({
  1014. "type": "object",
  1015. "properties": {
  1016. "number": { "type": "number" },
  1017. "street_name": { "type": "string" },
  1018. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1019. }
  1020. })""",
  1021. // Passing strings
  1022. {
  1023. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1024. // "By default, leaving out properties is valid"
  1025. R"""({ "street_name": "Pennsylvania" })""",
  1026. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1027. // "By extension, even an empty object is valid"
  1028. R"""({})""",
  1029. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1030. },
  1031. // Failing strings
  1032. {
  1033. // Change datatype from number to string
  1034. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1035. // Reorder properties
  1036. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  1037. // Reorder properties
  1038. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1039. // "Additional properties default to false for generation, even though the spec says true.
  1040. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1041. }
  1042. );
  1043. test_schema(
  1044. "additional properties can't override other properties",
  1045. R"""({
  1046. "properties": {
  1047. "a": {"type": "integer"},
  1048. "b": {"type": "integer"}
  1049. },
  1050. "additionalProperties": true
  1051. })""",
  1052. // Passing strings
  1053. {
  1054. R"""({"a": 42})""",
  1055. R"""({"c": ""})""",
  1056. R"""({"a": 42, "c": ""})""",
  1057. R"""({"a_": ""})""",
  1058. },
  1059. // Failing strings
  1060. {
  1061. R"""()""",
  1062. R"""({"a": ""})""",
  1063. R"""({"a": "", "b": ""})""",
  1064. }
  1065. );
  1066. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1067. test_schema(
  1068. "object properties, additionalProperties: true",
  1069. // Schema
  1070. R"""({
  1071. "type": "object",
  1072. "properties": {
  1073. "number": { "type": "number" },
  1074. "street_name": { "type": "string" },
  1075. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1076. },
  1077. "additionalProperties": true
  1078. })""",
  1079. // Passing strings
  1080. {
  1081. // "By extension, even an empty object is valid"
  1082. R"""({})""",
  1083. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1084. // "By default, leaving out properties is valid"
  1085. R"""({ "street_name": "Pennsylvania" })""",
  1086. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1087. // "By default, providing additional properties is valid"
  1088. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1089. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1090. },
  1091. // Failing strings
  1092. {
  1093. // Change datatype from number to string
  1094. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1095. // Reorder properties
  1096. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1097. }
  1098. );
  1099. // Additional properties: false
  1100. test_schema(
  1101. "required + optional props each in original order",
  1102. // Schema
  1103. R"""({
  1104. "type": "object",
  1105. "properties": {
  1106. "number": { "type": "number" },
  1107. "street_name": { "type": "string" },
  1108. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1109. },
  1110. "additionalProperties": false
  1111. })""",
  1112. // Passing strings
  1113. {
  1114. R"""({ "street_name": "Pennsylvania" })""",
  1115. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1116. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1117. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1118. // Spaces are permitted around enum values
  1119. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1120. },
  1121. // Failing strings
  1122. {
  1123. // Reorder properties
  1124. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1125. // Add "direction"
  1126. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1127. }
  1128. );
  1129. test_schema(
  1130. "required + optional props each in original order",
  1131. // Schema
  1132. R"""({
  1133. "properties": {
  1134. "b": {"type": "string"},
  1135. "a": {"type": "string"},
  1136. "d": {"type": "string"},
  1137. "c": {"type": "string"}
  1138. },
  1139. "required": ["a", "b"],
  1140. "additionalProperties": false
  1141. })""",
  1142. // Passing strings
  1143. {
  1144. R"""({"b": "foo", "a": "bar"})""",
  1145. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1146. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1147. },
  1148. // Failing strings
  1149. {
  1150. R"""({"a": "foo", "b": "bar"})""",
  1151. R"""({"b": "bar"})""",
  1152. R"""({"a": "foo", "c": "baz"})""",
  1153. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1154. }
  1155. );
  1156. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1157. test_schema(
  1158. "required props",
  1159. // Schema
  1160. R"""({
  1161. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1162. "$id": "https://example.com/product.schema.json",
  1163. "title": "Product",
  1164. "description": "A product from Acme's catalog",
  1165. "type": "object",
  1166. "properties": {
  1167. "productId": {
  1168. "description": "The unique identifier for a product",
  1169. "type": "integer"
  1170. },
  1171. "productName": {
  1172. "description": "Name of the product",
  1173. "type": "string"
  1174. },
  1175. "price": {
  1176. "description": "The price of the product",
  1177. "type": "number",
  1178. "exclusiveMinimum": 0
  1179. },
  1180. "tags": {
  1181. "description": "Tags for the product",
  1182. "type": "array",
  1183. "items": {
  1184. "type": "string"
  1185. },
  1186. "minItems": 1,
  1187. "uniqueItems": true
  1188. },
  1189. "dimensions": {
  1190. "type": "object",
  1191. "properties": {
  1192. "length": {
  1193. "type": "number"
  1194. },
  1195. "width": {
  1196. "type": "number"
  1197. },
  1198. "height": {
  1199. "type": "number"
  1200. }
  1201. },
  1202. "required": [ "length", "width", "height" ]
  1203. }
  1204. },
  1205. "required": [ "productId", "productName", "price" ]
  1206. })""",
  1207. // Passing strings
  1208. {
  1209. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1210. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1211. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1212. },
  1213. // Failing strings
  1214. {
  1215. R"""({})""", // Missing all required properties
  1216. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1217. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1218. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1219. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1220. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1221. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1222. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1223. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1224. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1225. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1226. }
  1227. );
  1228. }
  1229. int main() {
  1230. fprintf(stdout, "Running grammar integration tests...\n");
  1231. test_simple_grammar();
  1232. test_complex_grammar();
  1233. test_special_chars();
  1234. test_quantifiers();
  1235. test_failure_missing_root();
  1236. test_failure_missing_reference();
  1237. test_failure_left_recursion();
  1238. test_json_schema();
  1239. fprintf(stdout, "All tests passed.\n");
  1240. return 0;
  1241. }