| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325 |
- #ifdef NDEBUG
- #undef NDEBUG
- #endif
- #define LLAMA_API_INTERNAL
- #include "ggml.h"
- #include "llama.h"
- #include "grammar-parser.h"
- #include "json-schema-to-grammar.h"
- #include "unicode.h"
- #include <cassert>
- #include <string>
- #include <vector>
- using json = nlohmann::ordered_json;
- static llama_grammar* build_grammar(const std::string & grammar_str) {
- auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
- // Ensure we parsed correctly
- assert(!parsed_grammar.rules.empty());
- // Ensure we have a root node
- assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
- std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
- llama_grammar* grammar = llama_grammar_init(
- grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
- return grammar;
- }
- static bool test_build_grammar_fails(const std::string & grammar_str) {
- fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
- bool grammar_fails = false;
- llama_grammar * grammar = build_grammar(grammar_str);
- if (grammar != nullptr) {
- fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
- } else {
- grammar_fails = true;
- fprintf(stdout, " ✅︎\n");
- }
- return grammar_fails;
- }
- static bool match_string(const std::string & input, llama_grammar * grammar) {
- auto decoded = decode_utf8(input, {});
- const auto & code_points = decoded.first;
- const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
- llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
- for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
- const llama_grammar_stacks prev_stacks = llama_grammar_get_stacks(grammar); // copy
- llama_grammar_accept(rules, prev_stacks, *it, cur_stacks);
- if (cur_stacks.empty()) {
- // no stacks means that the grammar failed to match at this point
- return false;
- }
- }
- for (const auto & stack : cur_stacks) {
- if (stack.empty()) {
- // An empty stack means that the grammar has been completed
- return true;
- }
- }
- return false;
- }
- static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
- fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
- fflush(stderr);
- auto grammar = build_grammar(grammar_str);
- // Save the original grammar stacks so that we can reset after every new string we want to test
- const llama_grammar_stacks original_stacks = llama_grammar_get_stacks(grammar);
- llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
- fprintf(stderr, " 🔵 Valid strings:\n");
- // Passing strings
- for (const auto & test_string : passing_strings) {
- fprintf(stderr, " \"%s\" ", test_string.c_str());
- fflush(stderr);
- bool matched = match_string(test_string, grammar);
- if (!matched) {
- fprintf(stderr, "❌ (failed to match)\n");
- // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
- // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
- FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
- if (grammar_file) {
- fprintf(grammar_file, "%s", grammar_str.c_str());
- fclose(grammar_file);
- }
- // DEBUG: Write the test string to test-grammar-integration.string.txt
- FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
- if (string_file) {
- fprintf(string_file, "%s", test_string.c_str());
- fclose(string_file);
- }
- fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
- } else {
- fprintf(stdout, "✅︎\n");
- }
- assert(matched);
- // Reset the grammar stacks
- cur_stacks = original_stacks;
- }
- fprintf(stderr, " 🟠 Invalid strings:\n");
- // Failing strings
- for (const auto & test_string : failing_strings) {
- fprintf(stderr, " \"%s\" ", test_string.c_str());
- fflush(stderr);
- bool matched = match_string(test_string, grammar);
- if (matched) {
- fprintf(stderr, "❌ (incorrectly matched)\n");
- } else {
- fprintf(stdout, "✅︎\n");
- }
- assert(!matched);
- // Reset the grammar stacks
- cur_stacks = original_stacks;
- }
- // Clean up allocated memory
- llama_grammar_free(grammar);
- }
- static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
- test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
- }
- static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
- test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
- }
- static void test_simple_grammar() {
- test_schema(
- "min 0",
- R"""({
- "type": "integer",
- "minimum": 0
- })""",
- // Passing strings
- {
- "0",
- "10",
- "12",
- "10000",
- },
- // Failing strings
- {
- "-1",
- "-10",
- "-10000",
- "-100000000000000000000000000000000",
- "100000000000000000000000000000000",
- "00",
- "01",
- "-0",
- }
- );
- test_schema(
- "min 2",
- // Schema
- R"""({
- "type": "integer",
- "minimum": 2
- })""",
- // Passing strings
- {
- "2",
- "3",
- "4",
- "10",
- "20",
- "1234567890000000",
- },
- // Failing strings
- {
- "0",
- "1",
- "-1",
- "-100",
- "0",
- "1",
- "01",
- "02",
- "12345678900000000",
- }
- );
- test_schema(
- "min 456",
- R"""({
- "type": "integer",
- "minimum": 456
- })""",
- // Passing strings
- {
- "456",
- "4560",
- "457",
- "460",
- "500",
- },
- // Failing strings
- {
- "455",
- "356",
- "50",
- "050",
- "-1",
- "-456",
- }
- );
- test_schema(
- "min -123",
- R"""({
- "type": "integer",
- "minimum": -123
- })""",
- // Passing strings
- {
- "-123",
- "-122",
- "-11",
- "-1",
- "0",
- "1",
- "123",
- "1234",
- "2345",
- },
- // Failing strings
- {
- "-1234",
- "-124",
- }
- );
- test_schema(
- "max 9999",
- // Schema
- R"""({
- "type": "integer",
- "maximum": 9999
- })""",
- // Passing strings
- {
- "-99999",
- "0",
- "9999",
- },
- // Failing strings
- {
- "10000",
- "99991",
- }
- );
- test_schema(
- "max -9999",
- // Schema
- R"""({
- "type": "integer",
- "maximum": -9999
- })""",
- // Passing strings
- {
- "-10000",
- "-9999",
- },
- // Failing strings
- {
- "-9998",
- "0",
- "9999",
- }
- );
- test_schema(
- "min 5 max 30",
- // Schema
- R"""({
- "type": "integer",
- "minimum": 5,
- "maximum": 30
- })""",
- // Passing strings
- {
- "5",
- "10",
- "30",
- },
- // Failing strings
- {
- "05",
- "4",
- "-1",
- "31",
- "123",
- "0123",
- }
- );
- test_schema(
- "min -1 max 1",
- R"""({
- "type": "integer",
- "minimum": -1,
- "maximum": 1
- })""",
- // Passing strings
- {
- "-1",
- "0",
- "1",
- },
- // Failing strings
- {
- "-11",
- "-10",
- "-2",
- "2",
- "10",
- "11",
- }
- );
- test_schema(
- "min -123 max 42",
- R"""({
- "type": "integer",
- "minimum": -123,
- "maximum": 42
- })""",
- // Passing strings
- {
- "-123",
- "-122",
- "-13",
- "-11",
- "-2",
- "-1",
- "0",
- "1",
- "5",
- "10",
- "39",
- "40",
- "42",
- },
- // Failing strings
- {
- "-0123",
- "-124",
- "-1123",
- "-200",
- "43",
- "123",
- "0123",
- }
- );
- test_schema(
- "exclusive min / max",
- // Schema
- R"""({
- "type": "integer",
- "exclusiveMinimum": 0,
- "exclusiveMaximum": 10000
- })""",
- // Passing strings
- {
- "1",
- "9999",
- },
- // Failing strings
- {
- "0",
- "01",
- "10000",
- "99999",
- }
- );
- // Test case for a simple grammar
- test_grammar(
- "simple grammar",
- R"""(
- root ::= expr
- expr ::= term ("+" term)*
- term ::= number
- number ::= [0-9]+)""",
- // Passing strings
- {
- "42",
- "1+2+3+4+5",
- "123+456",
- },
- // Failing strings
- {
- "+",
- "/ 3",
- "1+2+3+4+5+",
- "12a45",
- }
- );
- }
- static void test_complex_grammar() {
- // Test case for a more complex grammar, with both failure strings and success strings
- test_grammar(
- "medium complexity grammar",
- // Grammar
- R"""(
- root ::= expression
- expression ::= term ws (("+"|"-") ws term)*
- term ::= factor ws (("*"|"/") ws factor)*
- factor ::= number | variable | "(" expression ")" | function-call
- number ::= [0-9]+
- variable ::= [a-zA-Z_][a-zA-Z0-9_]*
- function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
- ws ::= [ \t\n\r]?)""",
- // Passing strings
- {
- "42",
- "1*2*3*4*5",
- "x",
- "x+10",
- "x1+y2",
- "(a+b)*(c-d)",
- "func()",
- "func(x,y+2)",
- "a*(b+c)-d/e",
- "f(g(x),h(y,z))",
- "x + 10",
- "x1 + y2",
- "(a + b) * (c - d)",
- "func()",
- "func(x, y + 2)",
- "a * (b + c) - d / e",
- "f(g(x), h(y, z))",
- "123+456",
- "123*456*789-123/456+789*123",
- "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
- },
- // Failing strings
- {
- "+",
- "/ 3x",
- "x + + y",
- "a * / b",
- "func(,)",
- "func(x y)",
- "(a + b",
- "x + y)",
- "a + b * (c - d",
- "42 +",
- "x +",
- "x + 10 +",
- "(a + b) * (c - d",
- "func(",
- "func(x, y + 2",
- "a * (b + c) - d /",
- "f(g(x), h(y, z)",
- "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
- }
- );
- }
- static void test_special_chars() {
- // A collection of tests to exercise special characters such as "."
- test_grammar(
- "special characters",
- // Grammar
- R"""(
- root ::= ... "abc" ...
- )""",
- // Passing strings
- {
- "abcabcabc",
- "aaaabcccc",
- // NOTE: Also ensures that multi-byte characters still count as a single character
- "🔵🟠✅abc❌🟠🔵"
- },
- // Failing strings
- {
- "aaabcccc",
- "aaaaabcccc",
- "aaaabccc",
- "aaaabccccc",
- "🔵🟠✅❌abc❌✅🟠🔵",
- "🔵🟠abc🟠🔵"
- }
- );
- }
- static void test_quantifiers() {
- // A collection of tests to exercise * + and ? quantifiers
- test_grammar(
- "* quantifier",
- // Grammar
- R"""(root ::= "a"*)""",
- // Passing strings
- {
- "",
- "a",
- "aaaaa",
- "aaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
- },
- // Failing strings
- {
- "b",
- "ab",
- "aab",
- "ba",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
- }
- );
- test_grammar(
- "+ quantifier",
- // Grammar
- R"""(root ::= "a"+)""",
- // Passing strings
- {
- "a",
- "aaaaa",
- "aaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
- },
- // Failing strings
- {
- "",
- "b",
- "ab",
- "aab",
- "ba",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
- }
- );
- test_grammar(
- "? quantifier",
- // Grammar
- R"""(root ::= "a"?)""",
- // Passing strings
- {
- "",
- "a"
- },
- // Failing strings
- {
- "b",
- "ab",
- "aa",
- "ba",
- }
- );
- test_grammar(
- "mixed quantifiers",
- // Grammar
- R"""(
- root ::= cons+ vowel* cons? (vowel cons)*
- vowel ::= [aeiouy]
- cons ::= [bcdfghjklmnpqrstvwxyz]
- )""",
- // Passing strings
- {
- "yes",
- "no",
- "noyes",
- "crwth",
- "four",
- "bryyyy",
- },
- // Failing strings
- {
- "yess",
- "yesno",
- "forty",
- "catyyy",
- }
- );
- test_grammar(
- "simple exact repetition",
- // Grammar
- R"""(
- root ::= [ab]{4}
- )""",
- // Passing strings
- {
- "aaaa",
- "bbbb",
- "abab",
- },
- // Failing strings
- {
- "a",
- "b",
- "aaaaa",
- }
- );
- test_grammar(
- "simple min repetition",
- // Grammar
- R"""(
- root ::= [ab]{4,}
- )""",
- // Passing strings
- {
- "aaaa",
- "aaaaab",
- "bbbb",
- "ababab",
- },
- // Failing strings
- {
- "",
- "aba",
- }
- );
- test_grammar(
- "simple max repetition",
- // Grammar
- R"""(
- root ::= [ab]{0,4}
- )""",
- // Passing strings
- {
- "",
- "a",
- "aa",
- "aaa",
- "aaab",
- },
- // Failing strings
- {
- "aaaaa",
- }
- );
- test_grammar(
- "min / max repetition",
- // Grammar
- R"""(
- root ::= ("0x" [A-F0-9]{2} " "?){3,5}
- )""",
- // Passing strings
- {
- "0xFF 0x12 0xAB",
- "0xFF 0x12 0xAB 0x00 0x00",
- },
- // Failing strings
- {
- "",
- "0xFF",
- "0xFF 0x12",
- "0xFF 0x12 0xAB 0x00 0x00 0x00",
- }
- );
- }
- static void test_failure_missing_root() {
- fprintf(stderr, "⚫ Testing missing root node:\n");
- // Test case for a grammar that is missing a root rule
- const std::string grammar_str = R"""(
- rot ::= expr
- expr ::= term ("+" term)*
- term ::= number
- number ::= [0-9]+)""";
- grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
- // Ensure we parsed correctly
- assert(!parsed_grammar.rules.empty());
- // Ensure we do NOT have a root node
- assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
- fprintf(stderr, " ✅︎ Passed\n");
- }
- static void test_failure_missing_reference() {
- fprintf(stderr, "⚫ Testing missing reference node:\n");
- // Test case for a grammar that is missing a referenced rule
- const std::string grammar_str =
- R"""(root ::= expr
- expr ::= term ("+" term)*
- term ::= numero
- number ::= [0-9]+)""";
- fprintf(stderr, " Expected error: ");
- grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
- // Ensure we did NOT parsed correctly
- assert(parsed_grammar.rules.empty());
- fprintf(stderr, " End of expected error.\n");
- fprintf(stderr, " ✅︎ Passed\n");
- }
- static void test_failure_left_recursion() {
- fprintf(stderr, "⚫ Testing left recursion detection:\n");
- // Test simple left recursion detection
- const std::string simple_str = R"""(root ::= "a" | root "a")""";
- assert(test_build_grammar_fails(simple_str));
- // Test more complicated left recursion detection
- const std::string medium_str = R"""(
- root ::= asdf
- asdf ::= "a" | asdf "a"
- )""";
- assert(test_build_grammar_fails(medium_str));
- // Test even more complicated left recursion detection
- const std::string hard_str = R"""(
- root ::= asdf
- asdf ::= "a" | foo "b"
- foo ::= "c" | asdf "d" | "e")""";
- assert(test_build_grammar_fails(hard_str));
- // Test yet even more complicated left recursion detection
- const std::string hardest_str = R"""(
- root ::= asdf
- asdf ::= "a" | foo "b"
- foo ::= "c" | empty asdf "d" | "e"
- empty ::= "blah" | )""";
- assert(test_build_grammar_fails(hardest_str));
- fprintf(stderr, " ✅︎ Passed\n");
- }
- static void test_json_schema() {
- // Note that this is similar to the regular grammar tests,
- // but we convert each json schema to a grammar before parsing.
- // Otherwise, this test structure is the same.
- test_schema(
- "empty schema (object)",
- // Schema
- R"""(
- {}
- )""",
- // Passing strings
- {
- R"""({})""",
- R"""({"foo": "bar"})""",
- },
- // Failing strings
- {
- "",
- "[]",
- "null",
- R"""("")""",
- "true",
- }
- );
- test_schema(
- "exotic formats (list)",
- // Schema
- R"""({
- "items": [
- { "format": "date" },
- { "format": "uuid" },
- { "format": "time" },
- { "format": "date-time" }
- ]
- })""",
- // Passing strings
- {
- // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
- //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- },
- // Failing strings
- {
- R"""(["foo", "bar"])""",
- R"""(["12345678-1234-1234-1234-1234567890ab"])""",
- }
- );
- test_schema(
- "string",
- // Schema
- R"""({
- "type": "string"
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("")""",
- },
- // Failing strings
- {
- R"""({})""",
- R"""("foo": "bar")""",
- }
- );
- test_schema(
- "string w/ min length 1",
- // Schema
- R"""({
- "type": "string",
- "minLength": 1
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- },
- // Failing strings
- {
- R"""("")""",
- R"""({})""",
- R"""("foo": "bar")""",
- }
- );
- test_schema(
- "string w/ min length 3",
- // Schema
- R"""({
- "type": "string",
- "minLength": 3
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("foobar")""",
- },
- // Failing strings
- {
- R"""("")""",
- R"""("f")""",
- R"""("fo")""",
- }
- );
- test_schema(
- "string w/ max length",
- // Schema
- R"""({
- "type": "string",
- "maxLength": 3
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("")""",
- R"""("f")""",
- R"""("fo")""",
- },
- // Failing strings
- {
- R"""("foobar")""",
- }
- );
- test_schema(
- "string w/ min & max length",
- // Schema
- R"""({
- "type": "string",
- "minLength": 1,
- "maxLength": 4
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("f")""",
- R"""("barf")""",
- },
- // Failing strings
- {
- R"""("")""",
- R"""("barfo")""",
- R"""("foobar")""",
- }
- );
- test_schema(
- "boolean",
- // Schema
- R"""({
- "type": "boolean"
- })""",
- // Passing strings
- {
- "true",
- "false",
- },
- // Failing strings
- {
- R"""("")""",
- R"""("true")""",
- R"""(True)""",
- R"""(FALSE)""",
- }
- );
- test_schema(
- "integer",
- // Schema
- R"""({
- "type": "integer"
- })""",
- // Passing strings
- {
- R"""(0)""",
- R"""(12345)""",
- R"""(1234567890123456)""",
- },
- // Failing strings
- {
- R"""()""",
- R"""(01)""",
- R"""(007)""",
- R"""(12345678901234567 )""",
- }
- );
- test_schema(
- "string const",
- // Schema
- R"""({
- "const": "foo"
- })""",
- // Passing strings
- {
- R"""("foo")""",
- },
- // Failing strings
- {
- R"""(foo)""",
- R"""("bar")""",
- }
- );
- test_schema(
- "non-string const",
- // Schema
- R"""({
- "const": true
- })""",
- // Passing strings
- {
- R"""(true)""",
- },
- // Failing strings
- {
- R"""()""",
- R"""(foo)""",
- R"""("true")""",
- }
- );
- test_schema(
- "non-string const",
- // Schema
- R"""({
- "enum": ["red", "amber", "green", null, 42, ["foo"]]
- })""",
- // Passing strings
- {
- R"""("red")""",
- R"""(null)""",
- R"""(42)""",
- R"""(["foo"])""",
- },
- // Failing strings
- {
- R"""()""",
- R"""(420)""",
- R"""(true)""",
- R"""(foo)""",
- }
- );
- test_schema(
- "simple pattern",
- // Schema
- R"""({
- "pattern": "^[a-zA-Z0-9_-]*$"
- })""",
- // Passing strings
- {
- R"""("")""",
- R"""("He_llo-12")""",
- },
- // Failing strings
- {
- R"""("!")""",
- R"""("Hello World")""",
- }
- );
- test_schema(
- "pattern with escapes",
- // Schema
- R"""({
- "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
- })""",
- // Passing strings
- {
- R"""("a^$.[]()|{}*+?b")""",
- },
- // Failing strings
- {
- R"""("ab")""",
- }
- );
- test_schema(
- "",
- // Schema
- R"""(
- {
- "type": ["array", "null"],
- "items": { "type": "string" }
- }
- )""",
- // Passing strings
- {
- "null",
- "[]",
- "[\"123\"]",
- "[\"foo\", \"bar\"]",
- },
- // Failing strings
- {
- "",
- "[123]",
- "\"foo\"",
- "[\"foo\", 42]",
- }
- );
- test_schema(
- "min+max items",
- // Schema
- R"""({
- "items": {
- "type": ["number", "integer"]
- },
- "minItems": 3,
- "maxItems": 5
- })""",
- // Passing strings
- {
- R"""([1, 2, 3])""",
- R"""([1, 2, 3, 4])""",
- R"""([1, 2, 3, 4, 5])""",
- },
- // Failing strings
- {
- R"""([1, 2])""",
- R"""([1, 2, 3, 4, 5, 6])""",
- R"""(1)""",
- }
- );
- // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
- test_schema(
- "object properties",
- // Schema
- R"""({
- "type": "object",
- "properties": {
- "number": { "type": "number" },
- "street_name": { "type": "string" },
- "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
- }
- })""",
- // Passing strings
- {
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // "By default, leaving out properties is valid"
- R"""({ "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
- // "By extension, even an empty object is valid"
- R"""({})""",
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
- },
- // Failing strings
- {
- // Change datatype from number to string
- R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // Reorder properties
- R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
- // Reorder properties
- R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // "Additional properties default to false for generation, even though the spec says true.
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
- }
- );
- test_schema(
- "additional properties can't override other properties",
- R"""({
- "properties": {
- "a": {"type": "integer"},
- "b": {"type": "integer"}
- },
- "additionalProperties": true
- })""",
- // Passing strings
- {
- R"""({"a": 42})""",
- R"""({"c": ""})""",
- R"""({"a": 42, "c": ""})""",
- R"""({"a_": ""})""",
- },
- // Failing strings
- {
- R"""()""",
- R"""({"a": ""})""",
- R"""({"a": "", "b": ""})""",
- }
- );
- // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
- test_schema(
- "object properties, additionalProperties: true",
- // Schema
- R"""({
- "type": "object",
- "properties": {
- "number": { "type": "number" },
- "street_name": { "type": "string" },
- "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
- },
- "additionalProperties": true
- })""",
- // Passing strings
- {
- // "By extension, even an empty object is valid"
- R"""({})""",
- R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
- // "By default, leaving out properties is valid"
- R"""({ "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
- // "By default, providing additional properties is valid"
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
- },
- // Failing strings
- {
- // Change datatype from number to string
- R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // Reorder properties
- R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
- }
- );
- // Additional properties: false
- test_schema(
- "required + optional props each in original order",
- // Schema
- R"""({
- "type": "object",
- "properties": {
- "number": { "type": "number" },
- "street_name": { "type": "string" },
- "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
- },
- "additionalProperties": false
- })""",
- // Passing strings
- {
- R"""({ "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_type":"Avenue"})""",
- R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // Spaces are permitted around enum values
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
- },
- // Failing strings
- {
- // Reorder properties
- R"""({ "street_type": "Avenue", "number": 1600 })""",
- // Add "direction"
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
- }
- );
- test_schema(
- "required + optional props each in original order",
- // Schema
- R"""({
- "properties": {
- "b": {"type": "string"},
- "a": {"type": "string"},
- "d": {"type": "string"},
- "c": {"type": "string"}
- },
- "required": ["a", "b"],
- "additionalProperties": false
- })""",
- // Passing strings
- {
- R"""({"b": "foo", "a": "bar"})""",
- R"""({"b":"foo","a":"bar","d":"qux"})""",
- R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
- },
- // Failing strings
- {
- R"""({"a": "foo", "b": "bar"})""",
- R"""({"b": "bar"})""",
- R"""({"a": "foo", "c": "baz"})""",
- R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
- }
- );
- // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
- test_schema(
- "required props",
- // Schema
- R"""({
- "$schema": "https://json-schema.org/draft/2020-12/schema",
- "$id": "https://example.com/product.schema.json",
- "title": "Product",
- "description": "A product from Acme's catalog",
- "type": "object",
- "properties": {
- "productId": {
- "description": "The unique identifier for a product",
- "type": "integer"
- },
- "productName": {
- "description": "Name of the product",
- "type": "string"
- },
- "price": {
- "description": "The price of the product",
- "type": "number",
- "exclusiveMinimum": 0
- },
- "tags": {
- "description": "Tags for the product",
- "type": "array",
- "items": {
- "type": "string"
- },
- "minItems": 1,
- "uniqueItems": true
- },
- "dimensions": {
- "type": "object",
- "properties": {
- "length": {
- "type": "number"
- },
- "width": {
- "type": "number"
- },
- "height": {
- "type": "number"
- }
- },
- "required": [ "length", "width", "height" ]
- }
- },
- "required": [ "productId", "productName", "price" ]
- })""",
- // Passing strings
- {
- R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
- },
- // Failing strings
- {
- R"""({})""", // Missing all required properties
- R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
- // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
- // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
- // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
- R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
- R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
- // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
- // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
- }
- );
- }
- int main() {
- fprintf(stdout, "Running grammar integration tests...\n");
- test_simple_grammar();
- test_complex_grammar();
- test_special_chars();
- test_quantifiers();
- test_failure_missing_root();
- test_failure_missing_reference();
- test_failure_left_recursion();
- test_json_schema();
- fprintf(stdout, "All tests passed.\n");
- return 0;
- }
|