1
0

test-grammar-integration.cpp 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439
  1. #ifdef NDEBUG
  2. #undef NDEBUG
  3. #endif
  4. #include "json-schema-to-grammar.h"
  5. #include "../src/unicode.h"
  6. #include "../src/llama-grammar.h"
  7. #include <nlohmann/json.hpp>
  8. #include <cassert>
  9. #include <string>
  10. #include <vector>
  11. using json = nlohmann::ordered_json;
  12. static llama_grammar * build_grammar(const std::string & grammar_str) {
  13. return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0);
  14. }
  15. static bool test_build_grammar_fails(const std::string & grammar_str) {
  16. fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
  17. bool grammar_fails = false;
  18. llama_grammar * grammar = build_grammar(grammar_str);
  19. if (grammar != nullptr) {
  20. fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
  21. } else {
  22. grammar_fails = true;
  23. fprintf(stdout, " ✅︎\n");
  24. }
  25. return grammar_fails;
  26. }
  27. struct token_and_piece {
  28. llama_token token;
  29. std::string piece;
  30. };
  31. // token() encodes a 32-bit ID as 5 bytes: a 0xff marker followed by the ID in big-endian order.
  32. static std::string token(llama_token id) {
  33. return std::string{
  34. static_cast<char>(0xff),
  35. static_cast<char>((id >> 24) & 0xff),
  36. static_cast<char>((id >> 16) & 0xff),
  37. static_cast<char>((id >> 8) & 0xff),
  38. static_cast<char>(id & 0xff)
  39. };
  40. }
  41. // parse_tokens() parses the token encodes above and UTF-8 text.
  42. static std::vector<token_and_piece> parse_tokens(const std::string & input) {
  43. std::vector<token_and_piece> result;
  44. result.reserve(input.size());
  45. size_t offset = 0;
  46. while (offset < input.size()) {
  47. try {
  48. if (static_cast<unsigned char>(input[offset]) == 0xff) {
  49. if (offset + 5 > input.size()) {
  50. throw std::runtime_error("not enough bytes for token id");
  51. }
  52. uint32_t val =
  53. (static_cast<unsigned char>(input[offset + 1]) << 24) |
  54. (static_cast<unsigned char>(input[offset + 2]) << 16) |
  55. (static_cast<unsigned char>(input[offset + 3]) << 8) |
  56. (static_cast<unsigned char>(input[offset + 4]));
  57. auto piece = "<[" + std::to_string(val) + "]>";
  58. result.push_back({static_cast<llama_token>(val), piece});
  59. offset += 5;
  60. } else {
  61. uint32_t cpt = unicode_cpt_from_utf8(input, offset);
  62. result.push_back({0, unicode_cpt_to_utf8(cpt)});
  63. }
  64. } catch (const std::invalid_argument & /*ex*/) {
  65. // Silently ignore invalid UTF-8 input to avoid leaking the exception beyond llama_tokenize
  66. ++offset;
  67. result.push_back({0, unicode_cpt_to_utf8(0xFFFD)}); // replacement character
  68. }
  69. }
  70. return result;
  71. }
  72. static bool match_string(const std::string & input, llama_grammar * grammar) {
  73. const auto parsed = parse_tokens(input);
  74. auto & stacks_cur = llama_grammar_get_stacks(grammar);
  75. for (const auto & in : parsed) {
  76. try {
  77. llama_grammar_accept_token(*grammar, in.token, in.piece);
  78. } catch (const std::runtime_error & /*e*/) {
  79. // normally this shouldn't get hit because of llama_grammar_apply
  80. return false;
  81. }
  82. if (stacks_cur.empty()) {
  83. // no stacks means that the grammar failed to match at this point
  84. return false;
  85. }
  86. }
  87. for (const auto & stack : stacks_cur) {
  88. if (stack.empty()) {
  89. // An empty stack means that the grammar has been completed
  90. return true;
  91. }
  92. }
  93. return false;
  94. }
  95. static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  96. fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
  97. fflush(stderr);
  98. auto * grammar = build_grammar(grammar_str);
  99. // Save the original grammar stacks so that we can reset after every new string we want to test
  100. const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar); // copy
  101. llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
  102. fprintf(stderr, " 🔵 Valid strings:\n");
  103. // Passing strings
  104. for (const auto & test_string : passing_strings) {
  105. fprintf(stderr, " \"%s\" ", test_string.c_str());
  106. fflush(stderr);
  107. bool matched = match_string(test_string, grammar);
  108. if (!matched) {
  109. fprintf(stderr, "❌ (failed to match)\n");
  110. // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
  111. // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
  112. FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
  113. if (grammar_file) {
  114. fprintf(grammar_file, "%s", grammar_str.c_str());
  115. fclose(grammar_file);
  116. }
  117. // DEBUG: Write the test string to test-grammar-integration.string.txt
  118. FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
  119. if (string_file) {
  120. fprintf(string_file, "%s", test_string.c_str());
  121. fclose(string_file);
  122. }
  123. fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
  124. } else {
  125. fprintf(stdout, "✅︎\n");
  126. }
  127. assert(matched);
  128. // Reset the grammar stacks
  129. stacks_cur = stacks_org;
  130. }
  131. fprintf(stderr, " 🟠 Invalid strings:\n");
  132. // Failing strings
  133. for (const auto & test_string : failing_strings) {
  134. fprintf(stderr, " \"%s\" ", test_string.c_str());
  135. fflush(stderr);
  136. bool matched = match_string(test_string, grammar);
  137. if (matched) {
  138. fprintf(stderr, "❌ (incorrectly matched)\n");
  139. } else {
  140. fprintf(stdout, "✅︎\n");
  141. }
  142. assert(!matched);
  143. // Reset the grammar stacks
  144. stacks_cur = stacks_org;
  145. }
  146. // Clean up allocated memory
  147. llama_grammar_free_impl(grammar);
  148. }
  149. static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  150. test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
  151. }
  152. static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
  153. test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str), true), passing_strings, failing_strings);
  154. }
  155. static void test_simple_grammar() {
  156. test_schema(
  157. "min 0",
  158. R"""({
  159. "type": "integer",
  160. "minimum": 0
  161. })""",
  162. // Passing strings
  163. {
  164. "0",
  165. "10",
  166. "12",
  167. "10000",
  168. },
  169. // Failing strings
  170. {
  171. "-1",
  172. "-10",
  173. "-10000",
  174. "-100000000000000000000000000000000",
  175. "100000000000000000000000000000000",
  176. "00",
  177. "01",
  178. "-0",
  179. }
  180. );
  181. test_schema(
  182. "min 2",
  183. // Schema
  184. R"""({
  185. "type": "integer",
  186. "minimum": 2
  187. })""",
  188. // Passing strings
  189. {
  190. "2",
  191. "3",
  192. "4",
  193. "10",
  194. "20",
  195. "1234567890000000",
  196. },
  197. // Failing strings
  198. {
  199. "0",
  200. "1",
  201. "-1",
  202. "-100",
  203. "0",
  204. "1",
  205. "01",
  206. "02",
  207. "12345678900000000",
  208. }
  209. );
  210. test_schema(
  211. "min 456",
  212. R"""({
  213. "type": "integer",
  214. "minimum": 456
  215. })""",
  216. // Passing strings
  217. {
  218. "456",
  219. "4560",
  220. "457",
  221. "460",
  222. "500",
  223. },
  224. // Failing strings
  225. {
  226. "455",
  227. "356",
  228. "50",
  229. "050",
  230. "-1",
  231. "-456",
  232. }
  233. );
  234. test_schema(
  235. "min -123",
  236. R"""({
  237. "type": "integer",
  238. "minimum": -123
  239. })""",
  240. // Passing strings
  241. {
  242. "-123",
  243. "-122",
  244. "-11",
  245. "-1",
  246. "0",
  247. "1",
  248. "123",
  249. "1234",
  250. "2345",
  251. },
  252. // Failing strings
  253. {
  254. "-1234",
  255. "-124",
  256. }
  257. );
  258. test_schema(
  259. "max 9999",
  260. // Schema
  261. R"""({
  262. "type": "integer",
  263. "maximum": 9999
  264. })""",
  265. // Passing strings
  266. {
  267. "-99999",
  268. "0",
  269. "9999",
  270. },
  271. // Failing strings
  272. {
  273. "10000",
  274. "99991",
  275. }
  276. );
  277. test_schema(
  278. "max -9999",
  279. // Schema
  280. R"""({
  281. "type": "integer",
  282. "maximum": -9999
  283. })""",
  284. // Passing strings
  285. {
  286. "-10000",
  287. "-9999",
  288. },
  289. // Failing strings
  290. {
  291. "-9998",
  292. "0",
  293. "9999",
  294. }
  295. );
  296. test_schema(
  297. "min 5 max 30",
  298. // Schema
  299. R"""({
  300. "type": "integer",
  301. "minimum": 5,
  302. "maximum": 30
  303. })""",
  304. // Passing strings
  305. {
  306. "5",
  307. "10",
  308. "30",
  309. },
  310. // Failing strings
  311. {
  312. "05",
  313. "4",
  314. "-1",
  315. "31",
  316. "123",
  317. "0123",
  318. }
  319. );
  320. test_schema(
  321. "min 1 max 900719925474091",
  322. // Schema
  323. R"""({
  324. "type": "integer",
  325. "exclusiveMinimum": 0,
  326. "maximum": 900719925474091
  327. })""",
  328. // Passing strings
  329. {
  330. "1",
  331. "2",
  332. "10",
  333. "900719925474090",
  334. "900719925474091",
  335. },
  336. // Failing strings
  337. {
  338. "0",
  339. "01",
  340. "900719925474092",
  341. "9007199254740910",
  342. }
  343. );
  344. test_schema(
  345. "min -1 max 1",
  346. R"""({
  347. "type": "integer",
  348. "minimum": -1,
  349. "maximum": 1
  350. })""",
  351. // Passing strings
  352. {
  353. "-1",
  354. "0",
  355. "1",
  356. },
  357. // Failing strings
  358. {
  359. "-11",
  360. "-10",
  361. "-2",
  362. "2",
  363. "10",
  364. "11",
  365. }
  366. );
  367. test_schema(
  368. "min -123 max 42",
  369. R"""({
  370. "type": "integer",
  371. "minimum": -123,
  372. "maximum": 42
  373. })""",
  374. // Passing strings
  375. {
  376. "-123",
  377. "-122",
  378. "-13",
  379. "-11",
  380. "-2",
  381. "-1",
  382. "0",
  383. "1",
  384. "5",
  385. "10",
  386. "39",
  387. "40",
  388. "42",
  389. },
  390. // Failing strings
  391. {
  392. "-0123",
  393. "-124",
  394. "-1123",
  395. "-200",
  396. "43",
  397. "123",
  398. "0123",
  399. }
  400. );
  401. test_schema(
  402. "exclusive min / max",
  403. // Schema
  404. R"""({
  405. "type": "integer",
  406. "exclusiveMinimum": 0,
  407. "exclusiveMaximum": 10000
  408. })""",
  409. // Passing strings
  410. {
  411. "1",
  412. "9999",
  413. },
  414. // Failing strings
  415. {
  416. "0",
  417. "01",
  418. "10000",
  419. "99999",
  420. }
  421. );
  422. // Test case for a simple grammar
  423. test_grammar(
  424. "simple grammar",
  425. R"""(
  426. root ::= expr
  427. expr ::= term ("+" term)*
  428. term ::= number
  429. number ::= [0-9]+)""",
  430. // Passing strings
  431. {
  432. "42",
  433. "1+2+3+4+5",
  434. "123+456",
  435. },
  436. // Failing strings
  437. {
  438. "+",
  439. "/ 3",
  440. "1+2+3+4+5+",
  441. "12a45",
  442. }
  443. );
  444. // Test case for a simple grammar with tokens
  445. test_grammar(
  446. "simple grammar with tokens",
  447. R"""(
  448. root ::= <[10]> content <[11]>
  449. content ::= (!<[11]>)*)""",
  450. // Passing strings
  451. {
  452. token(10) + "hello world" + token(11),
  453. token(10) + "text with " + token(12) + " other tokens " + token(13) + " mixed in" + token(11),
  454. token(10) + token(11),
  455. token(10) + token(12) + token(13) + token(14) + token(15) + token(11),
  456. token(10) + "a" + token(11),
  457. },
  458. // Failing strings
  459. {
  460. token(10) + "missing end token",
  461. token(10),
  462. "missing start token" + token(11),
  463. token(10) + token(11) + token(11), // double end token
  464. token(11) + "wrong order" + token(10),
  465. }
  466. );
  467. }
  468. static void test_complex_grammar() {
  469. // Test case for a more complex grammar, with both failure strings and success strings
  470. test_grammar(
  471. "medium complexity grammar",
  472. // Grammar
  473. R"""(
  474. root ::= expression
  475. expression ::= term ws (("+"|"-") ws term)*
  476. term ::= factor ws (("*"|"/") ws factor)*
  477. factor ::= number | variable | "(" expression ")" | function-call
  478. number ::= [0-9]+
  479. variable ::= [a-zA-Z_][a-zA-Z0-9_]*
  480. function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
  481. ws ::= [ \t\n\r]?)""",
  482. // Passing strings
  483. {
  484. "42",
  485. "1*2*3*4*5",
  486. "x",
  487. "x+10",
  488. "x1+y2",
  489. "(a+b)*(c-d)",
  490. "func()",
  491. "func(x,y+2)",
  492. "a*(b+c)-d/e",
  493. "f(g(x),h(y,z))",
  494. "x + 10",
  495. "x1 + y2",
  496. "(a + b) * (c - d)",
  497. "func()",
  498. "func(x, y + 2)",
  499. "a * (b + c) - d / e",
  500. "f(g(x), h(y, z))",
  501. "123+456",
  502. "123*456*789-123/456+789*123",
  503. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
  504. },
  505. // Failing strings
  506. {
  507. "+",
  508. "/ 3x",
  509. "x + + y",
  510. "a * / b",
  511. "func(,)",
  512. "func(x y)",
  513. "(a + b",
  514. "x + y)",
  515. "a + b * (c - d",
  516. "42 +",
  517. "x +",
  518. "x + 10 +",
  519. "(a + b) * (c - d",
  520. "func(",
  521. "func(x, y + 2",
  522. "a * (b + c) - d /",
  523. "f(g(x), h(y, z)",
  524. "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
  525. }
  526. );
  527. // Test case for a more complex grammar with tokens
  528. test_grammar(
  529. "complex grammar with tokens",
  530. R"""(
  531. root ::= reasoning+ content tool-call*
  532. reasoning ::= <[10]> (!<[11]>)* <[11]>
  533. content ::= <[20]> (!<[21]>)* <[21]>
  534. tool-call ::= <[12]> name <[13]> args <[14]>
  535. name ::= (!<[13]>)+
  536. args ::= (!<[14]>)*)""",
  537. // Passing strings
  538. {
  539. token(10) + "I am thinking" + token(11) + token(20) + "hello world!" + token(21) + token(12) + "search" + token(13) + "query=test" + token(14),
  540. token(10) + "reasoning 1" + token(11) + token(10) + "reasoning 2" + token(11) + token(20) + token(21) + token(12) + "tool" + token(13) + token(14),
  541. token(10) + token(11) + token(20) + "content" + token(21),
  542. token(10) + "think" + token(12) + " nested" + token(11) + token(20) + token(10) + "more content" + token(21) + token(12) + "fn" + token(13) + "x=1,y=2" + token(14) + token(12) + "fn2" + token(13) + token(14),
  543. token(10) + "reasoning" + token(11) + token(10) + "more" + token(11) + token(10) + "even more" + token(11) + token(20) + "text" + token(21) + token(12) + "a" + token(13) + "b" + token(14) + token(12) + "c" + token(13) + "d" + token(14),
  544. },
  545. // Failing strings
  546. {
  547. token(20) + "content only" + token(21),
  548. token(10) + "no closing reasoning",
  549. token(10) + token(11) + token(20) + "no closing content",
  550. token(10) + token(11) + token(20) + token(21) + token(12) + "incomplete tool",
  551. token(10) + token(11) + token(11) + token(20) + token(21),
  552. }
  553. );
  554. }
  555. static void test_special_chars() {
  556. // A collection of tests to exercise special characters such as "."
  557. test_grammar(
  558. "special characters",
  559. // Grammar
  560. R"""(
  561. root ::= ... "abc" ...
  562. )""",
  563. // Passing strings
  564. {
  565. "abcabcabc",
  566. "aaaabcccc",
  567. // NOTE: Also ensures that multi-byte characters still count as a single character
  568. "🔵🟠✅abc❌🟠🔵"
  569. },
  570. // Failing strings
  571. {
  572. "aaabcccc",
  573. "aaaaabcccc",
  574. "aaaabccc",
  575. "aaaabccccc",
  576. "🔵🟠✅❌abc❌✅🟠🔵",
  577. "🔵🟠abc🟠🔵"
  578. }
  579. );
  580. }
  581. static void test_quantifiers() {
  582. // A collection of tests to exercise * + and ? quantifiers
  583. test_grammar(
  584. "* quantifier",
  585. // Grammar
  586. R"""(root ::= "a"*)""",
  587. // Passing strings
  588. {
  589. "",
  590. "a",
  591. "aaaaa",
  592. "aaaaaaaaaaaaaaaaaa",
  593. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  594. },
  595. // Failing strings
  596. {
  597. "b",
  598. "ab",
  599. "aab",
  600. "ba",
  601. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  602. }
  603. );
  604. test_grammar(
  605. "+ quantifier",
  606. // Grammar
  607. R"""(root ::= "a"+)""",
  608. // Passing strings
  609. {
  610. "a",
  611. "aaaaa",
  612. "aaaaaaaaaaaaaaaaaa",
  613. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
  614. },
  615. // Failing strings
  616. {
  617. "",
  618. "b",
  619. "ab",
  620. "aab",
  621. "ba",
  622. "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
  623. }
  624. );
  625. test_grammar(
  626. "? quantifier",
  627. // Grammar
  628. R"""(root ::= "a"?)""",
  629. // Passing strings
  630. {
  631. "",
  632. "a"
  633. },
  634. // Failing strings
  635. {
  636. "b",
  637. "ab",
  638. "aa",
  639. "ba",
  640. }
  641. );
  642. test_grammar(
  643. "mixed quantifiers",
  644. // Grammar
  645. R"""(
  646. root ::= cons+ vowel* cons? (vowel cons)*
  647. vowel ::= [aeiouy]
  648. cons ::= [bcdfghjklmnpqrstvwxyz]
  649. )""",
  650. // Passing strings
  651. {
  652. "yes",
  653. "no",
  654. "noyes",
  655. "crwth",
  656. "four",
  657. "bryyyy",
  658. },
  659. // Failing strings
  660. {
  661. "yess",
  662. "yesno",
  663. "forty",
  664. "catyyy",
  665. }
  666. );
  667. test_grammar(
  668. "simple exact repetition",
  669. // Grammar
  670. R"""(
  671. root ::= [ab]{4}
  672. )""",
  673. // Passing strings
  674. {
  675. "aaaa",
  676. "bbbb",
  677. "abab",
  678. },
  679. // Failing strings
  680. {
  681. "a",
  682. "b",
  683. "aaaaa",
  684. }
  685. );
  686. test_grammar(
  687. "simple min repetition",
  688. // Grammar
  689. R"""(
  690. root ::= [ab]{4,}
  691. )""",
  692. // Passing strings
  693. {
  694. "aaaa",
  695. "aaaaab",
  696. "bbbb",
  697. "ababab",
  698. },
  699. // Failing strings
  700. {
  701. "",
  702. "aba",
  703. }
  704. );
  705. test_grammar(
  706. "simple max repetition",
  707. // Grammar
  708. R"""(
  709. root ::= [ab]{0,4}
  710. )""",
  711. // Passing strings
  712. {
  713. "",
  714. "a",
  715. "aa",
  716. "aaa",
  717. "aaab",
  718. },
  719. // Failing strings
  720. {
  721. "aaaaa",
  722. }
  723. );
  724. test_grammar(
  725. "min / max repetition",
  726. // Grammar
  727. R"""(
  728. root ::= ("0x" [A-F0-9]{2} " "?){3,5}
  729. )""",
  730. // Passing strings
  731. {
  732. "0xFF 0x12 0xAB",
  733. "0xFF 0x12 0xAB 0x00 0x00",
  734. },
  735. // Failing strings
  736. {
  737. "",
  738. "0xFF",
  739. "0xFF 0x12",
  740. "0xFF 0x12 0xAB 0x00 0x00 0x00",
  741. }
  742. );
  743. }
  744. static void test_failure_missing_root() {
  745. fprintf(stderr, "⚫ Testing missing root node:\n");
  746. // Test case for a grammar that is missing a root rule
  747. const std::string grammar_str = R"""(
  748. rot ::= expr
  749. expr ::= term ("+" term)*
  750. term ::= number
  751. number ::= [0-9]+)""";
  752. llama_grammar_parser parsed_grammar;
  753. parsed_grammar.parse(grammar_str.c_str());
  754. // Ensure we parsed correctly
  755. assert(!parsed_grammar.rules.empty());
  756. // Ensure we do NOT have a root node
  757. assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
  758. fprintf(stderr, " ✅︎ Passed\n");
  759. }
  760. static void test_failure_missing_reference() {
  761. fprintf(stderr, "⚫ Testing missing reference node:\n");
  762. // Test case for a grammar that is missing a referenced rule
  763. const std::string grammar_str =
  764. R"""(root ::= expr
  765. expr ::= term ("+" term)*
  766. term ::= numero
  767. number ::= [0-9]+)""";
  768. fprintf(stderr, " Expected error: ");
  769. llama_grammar_parser parsed_grammar;
  770. parsed_grammar.parse(grammar_str.c_str());
  771. // Ensure we did NOT parsed correctly
  772. assert(parsed_grammar.rules.empty());
  773. fprintf(stderr, " End of expected error.\n");
  774. fprintf(stderr, " ✅︎ Passed\n");
  775. }
  776. static void test_failure_left_recursion() {
  777. fprintf(stderr, "⚫ Testing left recursion detection:\n");
  778. // Test simple left recursion detection
  779. const std::string simple_str = R"""(root ::= "a" | root "a")""";
  780. assert(test_build_grammar_fails(simple_str));
  781. // Test more complicated left recursion detection
  782. const std::string medium_str = R"""(
  783. root ::= asdf
  784. asdf ::= "a" | asdf "a"
  785. )""";
  786. assert(test_build_grammar_fails(medium_str));
  787. // Test even more complicated left recursion detection
  788. const std::string hard_str = R"""(
  789. root ::= asdf
  790. asdf ::= "a" | foo "b"
  791. foo ::= "c" | asdf "d" | "e")""";
  792. assert(test_build_grammar_fails(hard_str));
  793. // Test yet even more complicated left recursion detection
  794. const std::string hardest_str = R"""(
  795. root ::= asdf
  796. asdf ::= "a" | foo "b"
  797. foo ::= "c" | empty asdf "d" | "e"
  798. empty ::= "blah" | )""";
  799. assert(test_build_grammar_fails(hardest_str));
  800. fprintf(stderr, " ✅︎ Passed\n");
  801. }
  802. static void test_json_schema() {
  803. // Note that this is similar to the regular grammar tests,
  804. // but we convert each json schema to a grammar before parsing.
  805. // Otherwise, this test structure is the same.
  806. test_schema(
  807. "empty schema (object)",
  808. // Schema
  809. R"""(
  810. {}
  811. )""",
  812. // Passing strings
  813. {
  814. R"""({})""",
  815. R"""({"foo": "bar"})""",
  816. },
  817. // Failing strings
  818. {
  819. "",
  820. "[]",
  821. "null",
  822. R"""("")""",
  823. "true",
  824. }
  825. );
  826. test_schema(
  827. "exotic formats (list)",
  828. // Schema
  829. R"""({
  830. "items": [
  831. { "format": "date" },
  832. { "format": "uuid" },
  833. { "format": "time" },
  834. { "format": "date-time" }
  835. ]
  836. })""",
  837. // Passing strings
  838. {
  839. // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  840. // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  841. R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
  842. //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  843. //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
  844. },
  845. // Failing strings
  846. {
  847. R"""(["foo", "bar"])""",
  848. R"""(["12345678-1234-1234-1234-1234567890ab"])""",
  849. }
  850. );
  851. test_schema(
  852. "string",
  853. // Schema
  854. R"""({
  855. "type": "string"
  856. })""",
  857. // Passing strings
  858. {
  859. R"""("foo")""",
  860. R"""("bar")""",
  861. R"""("")""",
  862. },
  863. // Failing strings
  864. {
  865. R"""({})""",
  866. R"""("foo": "bar")""",
  867. }
  868. );
  869. test_schema(
  870. "string w/ min length 1",
  871. // Schema
  872. R"""({
  873. "type": "string",
  874. "minLength": 1
  875. })""",
  876. // Passing strings
  877. {
  878. R"""("foo")""",
  879. R"""("bar")""",
  880. },
  881. // Failing strings
  882. {
  883. R"""("")""",
  884. R"""({})""",
  885. R"""("foo": "bar")""",
  886. }
  887. );
  888. test_schema(
  889. "string w/ min length 3",
  890. // Schema
  891. R"""({
  892. "type": "string",
  893. "minLength": 3
  894. })""",
  895. // Passing strings
  896. {
  897. R"""("foo")""",
  898. R"""("bar")""",
  899. R"""("foobar")""",
  900. },
  901. // Failing strings
  902. {
  903. R"""("")""",
  904. R"""("f")""",
  905. R"""("fo")""",
  906. }
  907. );
  908. test_schema(
  909. "string w/ max length",
  910. // Schema
  911. R"""({
  912. "type": "string",
  913. "maxLength": 3
  914. })""",
  915. // Passing strings
  916. {
  917. R"""("foo")""",
  918. R"""("bar")""",
  919. R"""("")""",
  920. R"""("f")""",
  921. R"""("fo")""",
  922. },
  923. // Failing strings
  924. {
  925. R"""("foobar")""",
  926. }
  927. );
  928. test_schema(
  929. "string w/ min & max length",
  930. // Schema
  931. R"""({
  932. "type": "string",
  933. "minLength": 1,
  934. "maxLength": 4
  935. })""",
  936. // Passing strings
  937. {
  938. R"""("foo")""",
  939. R"""("bar")""",
  940. R"""("f")""",
  941. R"""("barf")""",
  942. },
  943. // Failing strings
  944. {
  945. R"""("")""",
  946. R"""("barfo")""",
  947. R"""("foobar")""",
  948. }
  949. );
  950. test_schema(
  951. "boolean",
  952. // Schema
  953. R"""({
  954. "type": "boolean"
  955. })""",
  956. // Passing strings
  957. {
  958. "true",
  959. "false",
  960. },
  961. // Failing strings
  962. {
  963. R"""("")""",
  964. R"""("true")""",
  965. R"""(True)""",
  966. R"""(FALSE)""",
  967. }
  968. );
  969. test_schema(
  970. "integer",
  971. // Schema
  972. R"""({
  973. "type": "integer"
  974. })""",
  975. // Passing strings
  976. {
  977. R"""(0)""",
  978. R"""(12345)""",
  979. R"""(1234567890123456)""",
  980. },
  981. // Failing strings
  982. {
  983. R"""()""",
  984. R"""(01)""",
  985. R"""(007)""",
  986. R"""(12345678901234567 )""",
  987. }
  988. );
  989. test_schema(
  990. "string const",
  991. // Schema
  992. R"""({
  993. "const": "foo"
  994. })""",
  995. // Passing strings
  996. {
  997. R"""("foo")""",
  998. },
  999. // Failing strings
  1000. {
  1001. R"""(foo)""",
  1002. R"""("bar")""",
  1003. }
  1004. );
  1005. test_schema(
  1006. "non-string const",
  1007. // Schema
  1008. R"""({
  1009. "const": true
  1010. })""",
  1011. // Passing strings
  1012. {
  1013. R"""(true)""",
  1014. },
  1015. // Failing strings
  1016. {
  1017. R"""()""",
  1018. R"""(foo)""",
  1019. R"""("true")""",
  1020. }
  1021. );
  1022. test_schema(
  1023. "non-string const",
  1024. // Schema
  1025. R"""({
  1026. "enum": ["red", "amber", "green", null, 42, ["foo"]]
  1027. })""",
  1028. // Passing strings
  1029. {
  1030. R"""("red")""",
  1031. R"""(null)""",
  1032. R"""(42)""",
  1033. R"""(["foo"])""",
  1034. },
  1035. // Failing strings
  1036. {
  1037. R"""()""",
  1038. R"""(420)""",
  1039. R"""(true)""",
  1040. R"""(foo)""",
  1041. }
  1042. );
  1043. test_schema(
  1044. "simple pattern",
  1045. // Schema
  1046. R"""({
  1047. "pattern": "^[a-zA-Z0-9_-]*$"
  1048. })""",
  1049. // Passing strings
  1050. {
  1051. R"""("")""",
  1052. R"""("He_llo-12")""",
  1053. },
  1054. // Failing strings
  1055. {
  1056. R"""("!")""",
  1057. R"""("Hello World")""",
  1058. }
  1059. );
  1060. test_schema(
  1061. "pattern with escapes",
  1062. // Schema
  1063. R"""({
  1064. "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
  1065. })""",
  1066. // Passing strings
  1067. {
  1068. R"""("a^$.[]()|{}*+?b")""",
  1069. },
  1070. // Failing strings
  1071. {
  1072. R"""("ab")""",
  1073. }
  1074. );
  1075. test_schema(
  1076. "",
  1077. // Schema
  1078. R"""(
  1079. {
  1080. "type": ["array", "null"],
  1081. "items": { "type": "string" }
  1082. }
  1083. )""",
  1084. // Passing strings
  1085. {
  1086. "null",
  1087. "[]",
  1088. "[\"123\"]",
  1089. "[\"foo\", \"bar\"]",
  1090. },
  1091. // Failing strings
  1092. {
  1093. "",
  1094. "[123]",
  1095. "\"foo\"",
  1096. "[\"foo\", 42]",
  1097. }
  1098. );
  1099. test_schema(
  1100. "min+max items",
  1101. // Schema
  1102. R"""({
  1103. "items": {
  1104. "type": ["number", "integer"]
  1105. },
  1106. "minItems": 3,
  1107. "maxItems": 5
  1108. })""",
  1109. // Passing strings
  1110. {
  1111. R"""([1, 2, 3])""",
  1112. R"""([1, 2, 3, 4])""",
  1113. R"""([1, 2, 3, 4, 5])""",
  1114. },
  1115. // Failing strings
  1116. {
  1117. R"""([1, 2])""",
  1118. R"""([1, 2, 3, 4, 5, 6])""",
  1119. R"""(1)""",
  1120. }
  1121. );
  1122. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1123. test_schema(
  1124. "object properties",
  1125. // Schema
  1126. R"""({
  1127. "type": "object",
  1128. "properties": {
  1129. "number": { "type": "number" },
  1130. "street_name": { "type": "string" },
  1131. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1132. }
  1133. })""",
  1134. // Passing strings
  1135. {
  1136. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1137. // "By default, leaving out properties is valid"
  1138. R"""({ "street_name": "Pennsylvania" })""",
  1139. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1140. // "By extension, even an empty object is valid"
  1141. R"""({})""",
  1142. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1143. },
  1144. // Failing strings
  1145. {
  1146. // Change datatype from number to string
  1147. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1148. // Reorder properties
  1149. R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
  1150. // Reorder properties
  1151. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1152. // "Additional properties default to false for generation, even though the spec says true.
  1153. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1154. }
  1155. );
  1156. test_schema(
  1157. "additional properties can't override other properties",
  1158. R"""({
  1159. "properties": {
  1160. "a": {"type": "integer"},
  1161. "b": {"type": "integer"}
  1162. },
  1163. "additionalProperties": true
  1164. })""",
  1165. // Passing strings
  1166. {
  1167. R"""({"a": 42})""",
  1168. R"""({"c": ""})""",
  1169. R"""({"a": 42, "c": ""})""",
  1170. R"""({"a_": ""})""",
  1171. },
  1172. // Failing strings
  1173. {
  1174. R"""()""",
  1175. R"""({"a": ""})""",
  1176. R"""({"a": "", "b": ""})""",
  1177. }
  1178. );
  1179. // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
  1180. test_schema(
  1181. "object properties, additionalProperties: true",
  1182. // Schema
  1183. R"""({
  1184. "type": "object",
  1185. "properties": {
  1186. "number": { "type": "number" },
  1187. "street_name": { "type": "string" },
  1188. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1189. },
  1190. "additionalProperties": true
  1191. })""",
  1192. // Passing strings
  1193. {
  1194. // "By extension, even an empty object is valid"
  1195. R"""({})""",
  1196. R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
  1197. // "By default, leaving out properties is valid"
  1198. R"""({ "street_name": "Pennsylvania" })""",
  1199. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1200. // "By default, providing additional properties is valid"
  1201. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
  1202. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1203. },
  1204. // Failing strings
  1205. {
  1206. // Change datatype from number to string
  1207. R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1208. // Reorder properties
  1209. R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
  1210. }
  1211. );
  1212. // Additional properties: false
  1213. test_schema(
  1214. "required + optional props each in original order",
  1215. // Schema
  1216. R"""({
  1217. "type": "object",
  1218. "properties": {
  1219. "number": { "type": "number" },
  1220. "street_name": { "type": "string" },
  1221. "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
  1222. },
  1223. "additionalProperties": false
  1224. })""",
  1225. // Passing strings
  1226. {
  1227. R"""({ "street_name": "Pennsylvania" })""",
  1228. R"""({ "number": 1600, "street_type":"Avenue"})""",
  1229. R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
  1230. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
  1231. // Spaces are permitted around enum values
  1232. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
  1233. },
  1234. // Failing strings
  1235. {
  1236. // Reorder properties
  1237. R"""({ "street_type": "Avenue", "number": 1600 })""",
  1238. // Add "direction"
  1239. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
  1240. }
  1241. );
  1242. test_schema(
  1243. "required + optional props each in original order",
  1244. // Schema
  1245. R"""({
  1246. "properties": {
  1247. "b": {"type": "string"},
  1248. "a": {"type": "string"},
  1249. "d": {"type": "string"},
  1250. "c": {"type": "string"}
  1251. },
  1252. "required": ["a", "b"],
  1253. "additionalProperties": false
  1254. })""",
  1255. // Passing strings
  1256. {
  1257. R"""({"b": "foo", "a": "bar"})""",
  1258. R"""({"b":"foo","a":"bar","d":"qux"})""",
  1259. R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
  1260. },
  1261. // Failing strings
  1262. {
  1263. R"""({"a": "foo", "b": "bar"})""",
  1264. R"""({"b": "bar"})""",
  1265. R"""({"a": "foo", "c": "baz"})""",
  1266. R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
  1267. }
  1268. );
  1269. // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
  1270. test_schema(
  1271. "required props",
  1272. // Schema
  1273. R"""({
  1274. "$schema": "https://json-schema.org/draft/2020-12/schema",
  1275. "$id": "https://example.com/product.schema.json",
  1276. "title": "Product",
  1277. "description": "A product from Acme's catalog",
  1278. "type": "object",
  1279. "properties": {
  1280. "productId": {
  1281. "description": "The unique identifier for a product",
  1282. "type": "integer"
  1283. },
  1284. "productName": {
  1285. "description": "Name of the product",
  1286. "type": "string"
  1287. },
  1288. "price": {
  1289. "description": "The price of the product",
  1290. "type": "number",
  1291. "exclusiveMinimum": 0
  1292. },
  1293. "tags": {
  1294. "description": "Tags for the product",
  1295. "type": "array",
  1296. "items": {
  1297. "type": "string"
  1298. },
  1299. "minItems": 1,
  1300. "uniqueItems": true
  1301. },
  1302. "dimensions": {
  1303. "type": "object",
  1304. "properties": {
  1305. "length": {
  1306. "type": "number"
  1307. },
  1308. "width": {
  1309. "type": "number"
  1310. },
  1311. "height": {
  1312. "type": "number"
  1313. }
  1314. },
  1315. "required": [ "length", "width", "height" ]
  1316. }
  1317. },
  1318. "required": [ "productId", "productName", "price" ]
  1319. })""",
  1320. // Passing strings
  1321. {
  1322. R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
  1323. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
  1324. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
  1325. },
  1326. // Failing strings
  1327. {
  1328. R"""({})""", // Missing all required properties
  1329. R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
  1330. // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
  1331. // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
  1332. // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
  1333. R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
  1334. R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
  1335. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
  1336. R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
  1337. // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
  1338. // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
  1339. }
  1340. );
  1341. }
  1342. int main() {
  1343. fprintf(stdout, "Running grammar integration tests...\n");
  1344. test_simple_grammar();
  1345. test_complex_grammar();
  1346. test_special_chars();
  1347. test_quantifiers();
  1348. test_failure_missing_root();
  1349. test_failure_missing_reference();
  1350. test_failure_left_recursion();
  1351. test_json_schema();
  1352. fprintf(stdout, "All tests passed.\n");
  1353. return 0;
  1354. }