| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318 |
- #ifdef NDEBUG
- #undef NDEBUG
- #endif
- #define LLAMA_API_INTERNAL
- #include "ggml.h"
- #include "llama.h"
- #include "grammar-parser.h"
- #include "json-schema-to-grammar.h"
- #include "unicode.h"
- #include <cassert>
- #include <string>
- #include <vector>
- using json = nlohmann::ordered_json;
- static llama_grammar* build_grammar(const std::string & grammar_str) {
- auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
- // Ensure we parsed correctly
- assert(!parsed_grammar.rules.empty());
- // Ensure we have a root node
- assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
- std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
- llama_grammar* grammar = llama_grammar_init(
- grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
- return grammar;
- }
- static bool test_build_grammar_fails(const std::string & grammar_str) {
- fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
- bool grammar_fails = false;
- llama_grammar * grammar = build_grammar(grammar_str);
- if (grammar != nullptr) {
- fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
- } else {
- grammar_fails = true;
- fprintf(stdout, " ✅︎\n");
- }
- return grammar_fails;
- }
- static bool match_string(const std::string & input, llama_grammar* grammar) {
- auto decoded = decode_utf8(input, {});
- const auto & code_points = decoded.first;
- for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
- auto prev_stacks = grammar->stacks;
- llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
- if (grammar->stacks.empty()) {
- // no stacks means that the grammar failed to match at this point
- return false;
- }
- }
- for (const auto & stack : grammar->stacks) {
- if (stack.empty()) {
- // An empty stack means that the grammar has been completed
- return true;
- }
- }
- return false;
- }
- static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
- fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
- fflush(stderr);
- auto grammar = build_grammar(grammar_str);
- // Save the original grammar stacks so that we can reset after every new string we want to test
- auto original_stacks = grammar->stacks;
- fprintf(stderr, " 🔵 Valid strings:\n");
- // Passing strings
- for (const auto & test_string : passing_strings) {
- fprintf(stderr, " \"%s\" ", test_string.c_str());
- fflush(stderr);
- bool matched = match_string(test_string, grammar);
- if (!matched) {
- fprintf(stderr, "❌ (failed to match)\n");
- // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
- // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
- FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
- if (grammar_file) {
- fprintf(grammar_file, "%s", grammar_str.c_str());
- fclose(grammar_file);
- }
- // DEBUG: Write the test string to test-grammar-integration.string.txt
- FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
- if (string_file) {
- fprintf(string_file, "%s", test_string.c_str());
- fclose(string_file);
- }
- fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
- } else {
- fprintf(stdout, "✅︎\n");
- }
- assert(matched);
- // Reset the grammar stacks
- grammar->stacks = original_stacks;
- }
- fprintf(stderr, " 🟠 Invalid strings:\n");
- // Failing strings
- for (const auto & test_string : failing_strings) {
- fprintf(stderr, " \"%s\" ", test_string.c_str());
- fflush(stderr);
- bool matched = match_string(test_string, grammar);
- if (matched) {
- fprintf(stderr, "❌ (incorrectly matched)\n");
- } else {
- fprintf(stdout, "✅︎\n");
- }
- assert(!matched);
- // Reset the grammar stacks
- grammar->stacks = original_stacks;
- }
- // Clean up allocated memory
- llama_grammar_free(grammar);
- }
- static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
- test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
- }
- static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
- test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
- }
- static void test_simple_grammar() {
- test_schema(
- "min 0",
- R"""({
- "type": "integer",
- "minimum": 0
- })""",
- // Passing strings
- {
- "0",
- "10",
- "12",
- "10000",
- },
- // Failing strings
- {
- "-1",
- "-10",
- "-10000",
- "-100000000000000000000000000000000",
- "100000000000000000000000000000000",
- "00",
- "01",
- "-0",
- }
- );
- test_schema(
- "min 2",
- // Schema
- R"""({
- "type": "integer",
- "minimum": 2
- })""",
- // Passing strings
- {
- "2",
- "3",
- "4",
- "10",
- "20",
- "1234567890000000",
- },
- // Failing strings
- {
- "0",
- "1",
- "-1",
- "-100",
- "0",
- "1",
- "01",
- "02",
- "12345678900000000",
- }
- );
- test_schema(
- "min 456",
- R"""({
- "type": "integer",
- "minimum": 456
- })""",
- // Passing strings
- {
- "456",
- "4560",
- "457",
- "460",
- "500",
- },
- // Failing strings
- {
- "455",
- "356",
- "50",
- "050",
- "-1",
- "-456",
- }
- );
- test_schema(
- "min -123",
- R"""({
- "type": "integer",
- "minimum": -123
- })""",
- // Passing strings
- {
- "-123",
- "-122",
- "-11",
- "-1",
- "0",
- "1",
- "123",
- "1234",
- "2345",
- },
- // Failing strings
- {
- "-1234",
- "-124",
- }
- );
- test_schema(
- "max 9999",
- // Schema
- R"""({
- "type": "integer",
- "maximum": 9999
- })""",
- // Passing strings
- {
- "-99999",
- "0",
- "9999",
- },
- // Failing strings
- {
- "10000",
- "99991",
- }
- );
- test_schema(
- "max -9999",
- // Schema
- R"""({
- "type": "integer",
- "maximum": -9999
- })""",
- // Passing strings
- {
- "-10000",
- "-9999",
- },
- // Failing strings
- {
- "-9998",
- "0",
- "9999",
- }
- );
- test_schema(
- "min 5 max 30",
- // Schema
- R"""({
- "type": "integer",
- "minimum": 5,
- "maximum": 30
- })""",
- // Passing strings
- {
- "5",
- "10",
- "30",
- },
- // Failing strings
- {
- "05",
- "4",
- "-1",
- "31",
- "123",
- "0123",
- }
- );
- test_schema(
- "min -1 max 1",
- R"""({
- "type": "integer",
- "minimum": -1,
- "maximum": 1
- })""",
- // Passing strings
- {
- "-1",
- "0",
- "1",
- },
- // Failing strings
- {
- "-11",
- "-10",
- "-2",
- "2",
- "10",
- "11",
- }
- );
- test_schema(
- "min -123 max 42",
- R"""({
- "type": "integer",
- "minimum": -123,
- "maximum": 42
- })""",
- // Passing strings
- {
- "-123",
- "-122",
- "-13",
- "-11",
- "-2",
- "-1",
- "0",
- "1",
- "5",
- "10",
- "39",
- "40",
- "42",
- },
- // Failing strings
- {
- "-0123",
- "-124",
- "-1123",
- "-200",
- "43",
- "123",
- "0123",
- }
- );
- test_schema(
- "exclusive min / max",
- // Schema
- R"""({
- "type": "integer",
- "exclusiveMinimum": 0,
- "exclusiveMaximum": 10000
- })""",
- // Passing strings
- {
- "1",
- "9999",
- },
- // Failing strings
- {
- "0",
- "01",
- "10000",
- "99999",
- }
- );
- // Test case for a simple grammar
- test_grammar(
- "simple grammar",
- R"""(
- root ::= expr
- expr ::= term ("+" term)*
- term ::= number
- number ::= [0-9]+)""",
- // Passing strings
- {
- "42",
- "1+2+3+4+5",
- "123+456",
- },
- // Failing strings
- {
- "+",
- "/ 3",
- "1+2+3+4+5+",
- "12a45",
- }
- );
- }
- static void test_complex_grammar() {
- // Test case for a more complex grammar, with both failure strings and success strings
- test_grammar(
- "medium complexity grammar",
- // Grammar
- R"""(
- root ::= expression
- expression ::= term ws (("+"|"-") ws term)*
- term ::= factor ws (("*"|"/") ws factor)*
- factor ::= number | variable | "(" expression ")" | function-call
- number ::= [0-9]+
- variable ::= [a-zA-Z_][a-zA-Z0-9_]*
- function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
- ws ::= [ \t\n\r]?)""",
- // Passing strings
- {
- "42",
- "1*2*3*4*5",
- "x",
- "x+10",
- "x1+y2",
- "(a+b)*(c-d)",
- "func()",
- "func(x,y+2)",
- "a*(b+c)-d/e",
- "f(g(x),h(y,z))",
- "x + 10",
- "x1 + y2",
- "(a + b) * (c - d)",
- "func()",
- "func(x, y + 2)",
- "a * (b + c) - d / e",
- "f(g(x), h(y, z))",
- "123+456",
- "123*456*789-123/456+789*123",
- "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
- },
- // Failing strings
- {
- "+",
- "/ 3x",
- "x + + y",
- "a * / b",
- "func(,)",
- "func(x y)",
- "(a + b",
- "x + y)",
- "a + b * (c - d",
- "42 +",
- "x +",
- "x + 10 +",
- "(a + b) * (c - d",
- "func(",
- "func(x, y + 2",
- "a * (b + c) - d /",
- "f(g(x), h(y, z)",
- "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
- }
- );
- }
- static void test_special_chars() {
- // A collection of tests to exercise special characters such as "."
- test_grammar(
- "special characters",
- // Grammar
- R"""(
- root ::= ... "abc" ...
- )""",
- // Passing strings
- {
- "abcabcabc",
- "aaaabcccc",
- // NOTE: Also ensures that multi-byte characters still count as a single character
- "🔵🟠✅abc❌🟠🔵"
- },
- // Failing strings
- {
- "aaabcccc",
- "aaaaabcccc",
- "aaaabccc",
- "aaaabccccc",
- "🔵🟠✅❌abc❌✅🟠🔵"
- "🔵🟠abc🟠🔵"
- }
- );
- }
- static void test_quantifiers() {
- // A collection of tests to exercise * + and ? quantifiers
- test_grammar(
- "* quantifier",
- // Grammar
- R"""(root ::= "a"*)""",
- // Passing strings
- {
- "",
- "a",
- "aaaaa",
- "aaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
- },
- // Failing strings
- {
- "b",
- "ab",
- "aab",
- "ba",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
- }
- );
- test_grammar(
- "+ quantifier",
- // Grammar
- R"""(root ::= "a"+)""",
- // Passing strings
- {
- "a",
- "aaaaa",
- "aaaaaaaaaaaaaaaaaa",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
- },
- // Failing strings
- {
- "",
- "b",
- "ab",
- "aab",
- "ba",
- "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
- }
- );
- test_grammar(
- "? quantifier",
- // Grammar
- R"""(root ::= "a"?)""",
- // Passing strings
- {
- "",
- "a"
- },
- // Failing strings
- {
- "b",
- "ab",
- "aa",
- "ba",
- }
- );
- test_grammar(
- "mixed quantifiers",
- // Grammar
- R"""(
- root ::= cons+ vowel* cons? (vowel cons)*
- vowel ::= [aeiouy]
- cons ::= [bcdfghjklmnpqrstvwxyz]
- )""",
- // Passing strings
- {
- "yes",
- "no",
- "noyes",
- "crwth",
- "four",
- "bryyyy",
- },
- // Failing strings
- {
- "yess",
- "yesno",
- "forty",
- "catyyy",
- }
- );
- test_grammar(
- "simple exact repetition",
- // Grammar
- R"""(
- root ::= [ab]{4}
- )""",
- // Passing strings
- {
- "aaaa",
- "bbbb",
- "abab",
- },
- // Failing strings
- {
- "a",
- "b",
- "aaaaa",
- }
- );
- test_grammar(
- "simple min repetition",
- // Grammar
- R"""(
- root ::= [ab]{4,}
- )""",
- // Passing strings
- {
- "aaaa",
- "aaaaab",
- "bbbb",
- "ababab",
- },
- // Failing strings
- {
- "",
- "aba",
- }
- );
- test_grammar(
- "simple max repetition",
- // Grammar
- R"""(
- root ::= [ab]{0,4}
- )""",
- // Passing strings
- {
- "",
- "a",
- "aa",
- "aaa",
- "aaab",
- },
- // Failing strings
- {
- "aaaaa",
- }
- );
- test_grammar(
- "min / max repetition",
- // Grammar
- R"""(
- root ::= ("0x" [A-F0-9]{2} " "?){3,5}
- )""",
- // Passing strings
- {
- "0xFF 0x12 0xAB",
- "0xFF 0x12 0xAB 0x00 0x00",
- },
- // Failing strings
- {
- "",
- "0xFF",
- "0xFF 0x12",
- "0xFF 0x12 0xAB 0x00 0x00 0x00",
- }
- );
- }
- static void test_failure_missing_root() {
- fprintf(stderr, "⚫ Testing missing root node:\n");
- // Test case for a grammar that is missing a root rule
- const std::string grammar_str = R"""(
- rot ::= expr
- expr ::= term ("+" term)*
- term ::= number
- number ::= [0-9]+)""";
- grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
- // Ensure we parsed correctly
- assert(!parsed_grammar.rules.empty());
- // Ensure we do NOT have a root node
- assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
- fprintf(stderr, " ✅︎ Passed\n");
- }
- static void test_failure_missing_reference() {
- fprintf(stderr, "⚫ Testing missing reference node:\n");
- // Test case for a grammar that is missing a referenced rule
- const std::string grammar_str =
- R"""(root ::= expr
- expr ::= term ("+" term)*
- term ::= numero
- number ::= [0-9]+)""";
- fprintf(stderr, " Expected error: ");
- grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
- // Ensure we did NOT parsed correctly
- assert(parsed_grammar.rules.empty());
- fprintf(stderr, " End of expected error.\n");
- fprintf(stderr, " ✅︎ Passed\n");
- }
- static void test_failure_left_recursion() {
- fprintf(stderr, "⚫ Testing left recursion detection:\n");
- // Test simple left recursion detection
- const std::string simple_str = R"""(root ::= "a" | root "a")""";
- assert(test_build_grammar_fails(simple_str));
- // Test more complicated left recursion detection
- const std::string medium_str = R"""(
- root ::= asdf
- asdf ::= "a" | asdf "a"
- )""";
- assert(test_build_grammar_fails(medium_str));
- // Test even more complicated left recursion detection
- const std::string hard_str = R"""(
- root ::= asdf
- asdf ::= "a" | foo "b"
- foo ::= "c" | asdf "d" | "e")""";
- assert(test_build_grammar_fails(hard_str));
- // Test yet even more complicated left recursion detection
- const std::string hardest_str = R"""(
- root ::= asdf
- asdf ::= "a" | foo "b"
- foo ::= "c" | empty asdf "d" | "e"
- empty ::= "blah" | )""";
- assert(test_build_grammar_fails(hardest_str));
- fprintf(stderr, " ✅︎ Passed\n");
- }
- static void test_json_schema() {
- // Note that this is similar to the regular grammar tests,
- // but we convert each json schema to a grammar before parsing.
- // Otherwise, this test structure is the same.
- test_schema(
- "empty schema (object)",
- // Schema
- R"""(
- {}
- )""",
- // Passing strings
- {
- R"""({})""",
- R"""({"foo": "bar"})""",
- },
- // Failing strings
- {
- "",
- "[]",
- "null",
- R"""("")""",
- "true",
- }
- );
- test_schema(
- "exotic formats (list)",
- // Schema
- R"""({
- "items": [
- { "format": "date" },
- { "format": "uuid" },
- { "format": "time" },
- { "format": "date-time" }
- ]
- })""",
- // Passing strings
- {
- // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
- //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- },
- // Failing strings
- {
- R"""(["foo", "bar"])""",
- R"""(["12345678-1234-1234-1234-1234567890ab"])""",
- }
- );
- test_schema(
- "string",
- // Schema
- R"""({
- "type": "string"
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("")""",
- },
- // Failing strings
- {
- R"""({})""",
- R"""("foo": "bar")""",
- }
- );
- test_schema(
- "string w/ min length 1",
- // Schema
- R"""({
- "type": "string",
- "minLength": 1
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- },
- // Failing strings
- {
- R"""("")""",
- R"""({})""",
- R"""("foo": "bar")""",
- }
- );
- test_schema(
- "string w/ min length 3",
- // Schema
- R"""({
- "type": "string",
- "minLength": 3
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("foobar")""",
- },
- // Failing strings
- {
- R"""("")""",
- R"""("f")""",
- R"""("fo")""",
- }
- );
- test_schema(
- "string w/ max length",
- // Schema
- R"""({
- "type": "string",
- "maxLength": 3
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("")""",
- R"""("f")""",
- R"""("fo")""",
- },
- // Failing strings
- {
- R"""("foobar")""",
- }
- );
- test_schema(
- "string w/ min & max length",
- // Schema
- R"""({
- "type": "string",
- "minLength": 1,
- "maxLength": 4
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("f")""",
- R"""("barf")""",
- },
- // Failing strings
- {
- R"""("")""",
- R"""("barfo")""",
- R"""("foobar")""",
- }
- );
- test_schema(
- "boolean",
- // Schema
- R"""({
- "type": "boolean"
- })""",
- // Passing strings
- {
- "true",
- "false",
- },
- // Failing strings
- {
- R"""("")""",
- R"""("true")""",
- R"""(True)""",
- R"""(FALSE)""",
- }
- );
- test_schema(
- "integer",
- // Schema
- R"""({
- "type": "integer"
- })""",
- // Passing strings
- {
- R"""(0)""",
- R"""(12345)""",
- R"""(1234567890123456)""",
- },
- // Failing strings
- {
- R"""()""",
- R"""(01)""",
- R"""(007)""",
- R"""(12345678901234567 )""",
- }
- );
- test_schema(
- "string const",
- // Schema
- R"""({
- "const": "foo"
- })""",
- // Passing strings
- {
- R"""("foo")""",
- },
- // Failing strings
- {
- R"""(foo)""",
- R"""("bar")""",
- }
- );
- test_schema(
- "non-string const",
- // Schema
- R"""({
- "const": true
- })""",
- // Passing strings
- {
- R"""(true)""",
- },
- // Failing strings
- {
- R"""()""",
- R"""(foo)""",
- R"""("true")""",
- }
- );
- test_schema(
- "non-string const",
- // Schema
- R"""({
- "enum": ["red", "amber", "green", null, 42, ["foo"]]
- })""",
- // Passing strings
- {
- R"""("red")""",
- R"""(null)""",
- R"""(42)""",
- R"""(["foo"])""",
- },
- // Failing strings
- {
- R"""()""",
- R"""(420)""",
- R"""(true)""",
- R"""(foo)""",
- }
- );
- test_schema(
- "simple pattern",
- // Schema
- R"""({
- "pattern": "^[a-zA-Z0-9_-]*$"
- })""",
- // Passing strings
- {
- R"""("")""",
- R"""("He_llo-12")""",
- },
- // Failing strings
- {
- R"""("!")""",
- R"""("Hello World")""",
- }
- );
- test_schema(
- "pattern with escapes",
- // Schema
- R"""({
- "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
- })""",
- // Passing strings
- {
- R"""("a^$.[]()|{}*+?b")""",
- },
- // Failing strings
- {
- R"""("ab")""",
- }
- );
- test_schema(
- "",
- // Schema
- R"""(
- {
- "type": ["array", "null"],
- "items": { "type": "string" }
- }
- )""",
- // Passing strings
- {
- "null",
- "[]",
- "[\"123\"]",
- "[\"foo\", \"bar\"]",
- },
- // Failing strings
- {
- "",
- "[123]",
- "\"foo\"",
- "[\"foo\", 42]",
- }
- );
- test_schema(
- "min+max items",
- // Schema
- R"""({
- "items": {
- "type": ["number", "integer"]
- },
- "minItems": 3,
- "maxItems": 5
- })""",
- // Passing strings
- {
- R"""([1, 2, 3])""",
- R"""([1, 2, 3, 4])""",
- R"""([1, 2, 3, 4, 5])""",
- },
- // Failing strings
- {
- R"""([1, 2])""",
- R"""([1, 2, 3, 4, 5, 6])""",
- R"""(1)""",
- }
- );
- // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
- test_schema(
- "object properties",
- // Schema
- R"""({
- "type": "object",
- "properties": {
- "number": { "type": "number" },
- "street_name": { "type": "string" },
- "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
- }
- })""",
- // Passing strings
- {
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // "By default, leaving out properties is valid"
- R"""({ "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
- // "By extension, even an empty object is valid"
- R"""({})""",
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
- },
- // Failing strings
- {
- // Change datatype from number to string
- R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // Reorder properties
- R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
- // Reorder properties
- R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // "Additional properties default to false for generation, even though the spec says true.
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
- }
- );
- test_schema(
- "additional properties can't override other properties",
- R"""({
- "properties": {
- "a": {"type": "integer"},
- "b": {"type": "integer"}
- },
- "additionalProperties": true
- })""",
- // Passing strings
- {
- R"""({"a": 42})""",
- R"""({"c": ""})""",
- R"""({"a": 42, "c": ""})""",
- R"""({"a_": ""})""",
- },
- // Failing strings
- {
- R"""()""",
- R"""({"a": ""})""",
- R"""({"a": "", "b": ""})""",
- }
- );
- // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
- test_schema(
- "object properties, additionalProperties: true",
- // Schema
- R"""({
- "type": "object",
- "properties": {
- "number": { "type": "number" },
- "street_name": { "type": "string" },
- "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
- },
- "additionalProperties": true
- })""",
- // Passing strings
- {
- // "By extension, even an empty object is valid"
- R"""({})""",
- R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
- // "By default, leaving out properties is valid"
- R"""({ "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
- // "By default, providing additional properties is valid"
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
- },
- // Failing strings
- {
- // Change datatype from number to string
- R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // Reorder properties
- R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
- }
- );
- // Additional properties: false
- test_schema(
- "required + optional props each in original order",
- // Schema
- R"""({
- "type": "object",
- "properties": {
- "number": { "type": "number" },
- "street_name": { "type": "string" },
- "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
- },
- "additionalProperties": false
- })""",
- // Passing strings
- {
- R"""({ "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_type":"Avenue"})""",
- R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // Spaces are permitted around enum values
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
- },
- // Failing strings
- {
- // Reorder properties
- R"""({ "street_type": "Avenue", "number": 1600 })""",
- // Add "direction"
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
- }
- );
- test_schema(
- "required + optional props each in original order",
- // Schema
- R"""({
- "properties": {
- "b": {"type": "string"},
- "a": {"type": "string"},
- "d": {"type": "string"},
- "c": {"type": "string"}
- },
- "required": ["a", "b"],
- "additionalProperties": false
- })""",
- // Passing strings
- {
- R"""({"b": "foo", "a": "bar"})""",
- R"""({"b":"foo","a":"bar","d":"qux"})""",
- R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
- },
- // Failing strings
- {
- R"""({"a": "foo", "b": "bar"})""",
- R"""({"b": "bar"})""",
- R"""({"a": "foo", "c": "baz"})""",
- R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
- }
- );
- // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
- test_schema(
- "required props",
- // Schema
- R"""({
- "$schema": "https://json-schema.org/draft/2020-12/schema",
- "$id": "https://example.com/product.schema.json",
- "title": "Product",
- "description": "A product from Acme's catalog",
- "type": "object",
- "properties": {
- "productId": {
- "description": "The unique identifier for a product",
- "type": "integer"
- },
- "productName": {
- "description": "Name of the product",
- "type": "string"
- },
- "price": {
- "description": "The price of the product",
- "type": "number",
- "exclusiveMinimum": 0
- },
- "tags": {
- "description": "Tags for the product",
- "type": "array",
- "items": {
- "type": "string"
- },
- "minItems": 1,
- "uniqueItems": true
- },
- "dimensions": {
- "type": "object",
- "properties": {
- "length": {
- "type": "number"
- },
- "width": {
- "type": "number"
- },
- "height": {
- "type": "number"
- }
- },
- "required": [ "length", "width", "height" ]
- }
- },
- "required": [ "productId", "productName", "price" ]
- })""",
- // Passing strings
- {
- R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
- },
- // Failing strings
- {
- R"""({})""", // Missing all required properties
- R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
- // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
- // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
- // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
- R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
- R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
- // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
- // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
- }
- );
- }
- int main() {
- fprintf(stdout, "Running grammar integration tests...\n");
- test_simple_grammar();
- test_complex_grammar();
- test_special_chars();
- test_quantifiers();
- test_failure_missing_root();
- test_failure_missing_reference();
- test_failure_left_recursion();
- test_json_schema();
- fprintf(stdout, "All tests passed.\n");
- return 0;
- }
|