| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140 |
- #ifdef NDEBUG
- # undef NDEBUG
- #endif
- #include "unicode.h"
- #include "sampling.h"
- #include <cassert>
- #include <string>
- #include <vector>
- static const llama_vocab * vocab;
- static bool match_string(const std::string & input, llama_sampler * grammar) {
- llama_sampler_reset(grammar);
- auto tokens = common_tokenize(vocab, input, false, false);
- auto n_vocab = llama_vocab_n_tokens(vocab);
- std::vector<llama_token_data> cur;
- cur.reserve(n_vocab);
- for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) {
- cur.emplace_back(llama_token_data{ token_id, 0.0f, 0.0f });
- }
- auto tok_arr = llama_token_data_array{ cur.data(), cur.size(), -1, false };
- for (const auto token : tokens) {
- for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) {
- cur[token_id].logit = 0.0f;
- }
- llama_sampler_apply(grammar, &tok_arr);
- if (cur[token].logit < 0.0f) {
- return false;
- }
- llama_sampler_accept(grammar, token);
- }
- // do we allow EOS at the end? if so the grammar is accepting
- auto tok_eos = llama_vocab_eot(vocab);
- if (tok_eos == LLAMA_TOKEN_NULL) {
- tok_eos = llama_vocab_eos(vocab);
- }
- cur[tok_eos].logit = 0.0f;
- llama_sampler_apply(grammar, &tok_arr);
- return cur[tok_eos].logit >= 0.0f;
- }
- static void test(const std::string & test_desc, const std::string & grammar_str,
- const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
- fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
- fflush(stderr);
- auto * grammar = llama_sampler_init_llg(vocab, "lark", grammar_str.c_str());
- fprintf(stderr, " 🔵 Valid strings:\n");
- // Passing strings
- for (const auto & test_string : passing_strings) {
- fprintf(stderr, " \"%s\" ", test_string.c_str());
- fflush(stderr);
- bool matched = match_string(test_string, grammar);
- if (!matched) {
- fprintf(stderr, "❌ (failed to match)\n");
- // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
- // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
- FILE * grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
- if (grammar_file) {
- fprintf(grammar_file, "%s", grammar_str.c_str());
- fclose(grammar_file);
- }
- // DEBUG: Write the test string to test-grammar-integration.string.txt
- FILE * string_file = fopen("test-grammar-integration.string.txt", "w");
- if (string_file) {
- fprintf(string_file, "%s", test_string.c_str());
- fclose(string_file);
- }
- fprintf(stderr,
- "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following "
- "command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf "
- "test-grammar-integration.string.txt\n\n");
- } else {
- fprintf(stdout, "✅︎\n");
- }
- assert(matched);
- }
- fprintf(stderr, " 🟠 Invalid strings:\n");
- // Failing strings
- for (const auto & test_string : failing_strings) {
- fprintf(stderr, " \"%s\" ", test_string.c_str());
- fflush(stderr);
- bool matched = match_string(test_string, grammar);
- if (matched) {
- fprintf(stderr, "❌ (incorrectly matched)\n");
- } else {
- fprintf(stdout, "✅︎\n");
- }
- assert(!matched);
- }
- llama_sampler_free(grammar);
- }
- static void test_grammar(const std::string & test_desc, const std::string & grammar_str,
- const std::vector<std::string> & passing_strings,
- const std::vector<std::string> & failing_strings) {
- test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
- }
- static void test_schema(const std::string & test_desc, const std::string & schema_str,
- const std::vector<std::string> & passing_strings,
- const std::vector<std::string> & failing_strings) {
- test(test_desc + ". Schema: " + schema_str, "%llguidance {}\nstart: %json " + schema_str, passing_strings,
- failing_strings);
- }
- static void test_simple_grammar() {
- test_schema("min 0",
- R"""({
- "type": "integer",
- "minimum": 0
- })""",
- // Passing strings
- {
- "0",
- "10",
- "12",
- "10000",
- },
- // Failing strings
- {
- "-1",
- "-10",
- "-10000",
- "-100000000000000000000000000000000",
- // "100000000000000000000000000000000",
- "00",
- "01",
- "-0",
- });
- test_schema("min 2",
- // Schema
- R"""({
- "type": "integer",
- "minimum": 2
- })""",
- // Passing strings
- {
- "2",
- "3",
- "4",
- "10",
- "20",
- "1234567890000000",
- },
- // Failing strings
- {
- "0", "1", "-1", "-100", "0", "1", "01", "02",
- // "12345678900000000",
- });
- test_schema("min 456",
- R"""({
- "type": "integer",
- "minimum": 456
- })""",
- // Passing strings
- {
- "456",
- "4560",
- "457",
- "460",
- "500",
- },
- // Failing strings
- {
- "455",
- "356",
- "50",
- "050",
- "-1",
- "-456",
- });
- test_schema("min -123",
- R"""({
- "type": "integer",
- "minimum": -123
- })""",
- // Passing strings
- {
- "-123",
- "-122",
- "-11",
- "-1",
- "0",
- "1",
- "123",
- "1234",
- "2345",
- },
- // Failing strings
- {
- "-1234",
- "-124",
- });
- test_schema("max 9999",
- // Schema
- R"""({
- "type": "integer",
- "maximum": 9999
- })""",
- // Passing strings
- {
- "-99999",
- "0",
- "9999",
- },
- // Failing strings
- {
- "10000",
- "99991",
- });
- test_schema("max -9999",
- // Schema
- R"""({
- "type": "integer",
- "maximum": -9999
- })""",
- // Passing strings
- {
- "-10000",
- "-9999",
- },
- // Failing strings
- {
- "-9998",
- "0",
- "9999",
- });
- test_schema("min 5 max 30",
- // Schema
- R"""({
- "type": "integer",
- "minimum": 5,
- "maximum": 30
- })""",
- // Passing strings
- {
- "5",
- "10",
- "30",
- },
- // Failing strings
- {
- "05",
- "4",
- "-1",
- "31",
- "123",
- "0123",
- });
- test_schema("min -1 max 1",
- R"""({
- "type": "integer",
- "minimum": -1,
- "maximum": 1
- })""",
- // Passing strings
- {
- "-1",
- "0",
- "1",
- },
- // Failing strings
- {
- "-11",
- "-10",
- "-2",
- "2",
- "10",
- "11",
- });
- test_schema("min -123 max 42",
- R"""({
- "type": "integer",
- "minimum": -123,
- "maximum": 42
- })""",
- // Passing strings
- {
- "-123",
- "-122",
- "-13",
- "-11",
- "-2",
- "-1",
- "0",
- "1",
- "5",
- "10",
- "39",
- "40",
- "42",
- },
- // Failing strings
- {
- "-0123",
- "-124",
- "-1123",
- "-200",
- "43",
- "123",
- "0123",
- });
- test_schema("exclusive min / max",
- // Schema
- R"""({
- "type": "integer",
- "exclusiveMinimum": 0,
- "exclusiveMaximum": 10000
- })""",
- // Passing strings
- {
- "1",
- "9999",
- },
- // Failing strings
- {
- "0",
- "01",
- "10000",
- "99999",
- });
- // Test case for a simple grammar
- test_grammar("simple grammar",
- R"""(
- start: expr
- expr: term ("+" term)*
- term: number
- number: /[0-9]+/ )""",
- // Passing strings
- {
- "42",
- "1+2+3+4+5",
- "123+456",
- },
- // Failing strings
- {
- "+",
- "/ 3",
- "1+2+3+4+5+",
- "12a45",
- });
- }
- static void test_complex_grammar() {
- // Test case for a more complex grammar, with both failure strings and success strings
- test_grammar("medium complexity grammar",
- // Grammar
- R"""(
- start: expression
- expression: term ws (("+"|"-") ws term)*
- term: factor ws (("*"|"/") ws factor)*
- factor: number | variable | "(" expression ")" | function-call
- number: /[0-9]+/
- variable: /[a-zA-Z_][a-zA-Z0-9_]*/
- function-call: variable ws "(" (expression ("," ws expression)*)? ")"
- ws: /[ \t\n\r]?/ )""",
- // Passing strings
- { "42",
- "1*2*3*4*5",
- "x",
- "x+10",
- "x1+y2",
- "(a+b)*(c-d)",
- "func()",
- "func(x,y+2)",
- "a*(b+c)-d/e",
- "f(g(x),h(y,z))",
- "x + 10",
- "x1 + y2",
- "(a + b) * (c - d)",
- "func()",
- "func(x, y + 2)",
- "a * (b + c) - d / e",
- "f(g(x), h(y, z))",
- "123+456",
- "123*456*789-123/456+789*123",
- "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456" },
- // Failing strings
- {
- "+",
- "/ 3x",
- "x + + y",
- "a * / b",
- "func(,)",
- "func(x y)",
- "(a + b",
- "x + y)",
- "a + b * (c - d",
- "42 +",
- "x +",
- "x + 10 +",
- "(a + b) * (c - d",
- "func(",
- "func(x, y + 2",
- "a * (b + c) - d /",
- "f(g(x), h(y, z)",
- "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
- });
- }
- static void test_special_chars() {
- // A collection of tests to exercise special characters such as "."
- test_grammar("special characters",
- // Grammar
- R"""(
- start: /.../ "abc" /.../
- )""",
- // Passing strings
- { "abcabcabc", "aaaabcccc",
- // NOTE: Also ensures that multi-byte characters still count as a single character
- "🔵🟠✅abc❌🟠🔵" },
- // Failing strings
- { "aaabcccc", "aaaaabcccc", "aaaabccc", "aaaabccccc", "🔵🟠✅❌abc❌✅🟠🔵", "🔵🟠abc🟠🔵" });
- }
- static void test_quantifiers() {
- // A collection of tests to exercise * + and ? quantifiers
- test_grammar(
- "* quantifier",
- // Grammar
- R"""(start: "a"*)""",
- // Passing strings
- { "", "a", "aaaaa", "aaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" },
- // Failing strings
- { "b", "ab", "aab", "ba", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" });
- test_grammar(
- "+ quantifier",
- // Grammar
- R"""(start: "a"+)""",
- // Passing strings
- { "a", "aaaaa", "aaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" },
- // Failing strings
- { "", "b", "ab", "aab", "ba", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" });
- test_grammar("? quantifier",
- // Grammar
- R"""(start: "a"?)""",
- // Passing strings
- { "", "a" },
- // Failing strings
- {
- "b",
- "ab",
- "aa",
- "ba",
- });
- test_grammar("mixed quantifiers",
- // Grammar
- R"""(
- start: cons+ vowel* cons? (vowel cons)*
- vowel: /[aeiouy]/
- cons: /[bcdfghjklmnpqrstvwxyz]/
- )""",
- // Passing strings
- {
- "yes",
- "no",
- "noyes",
- "crwth",
- "four",
- "bryyyy",
- },
- // Failing strings
- {
- "yess",
- "yesno",
- "forty",
- "catyyy",
- });
- test_grammar("simple exact repetition",
- // Grammar
- R"""(
- start: /[ab]{4}/
- )""",
- // Passing strings
- {
- "aaaa",
- "bbbb",
- "abab",
- },
- // Failing strings
- {
- "a",
- "b",
- "aaaaa",
- });
- test_grammar("simple min repetition",
- // Grammar
- R"""(
- start: /[ab]{4,}/
- )""",
- // Passing strings
- {
- "aaaa",
- "aaaaab",
- "bbbb",
- "ababab",
- },
- // Failing strings
- {
- "",
- "aba",
- });
- test_grammar("simple max repetition",
- // Grammar
- R"""(
- start: /[ab]{0,4}/
- )""",
- // Passing strings
- {
- "",
- "a",
- "aa",
- "aaa",
- "aaab",
- },
- // Failing strings
- {
- "aaaaa",
- });
- // test_grammar("min / max repetition",
- // // Grammar
- // R"""(
- // start: ("0x" /[A-F0-9]{2}/ " "?){3,5}
- // )""",
- // // Passing strings
- // {
- // "0xFF 0x12 0xAB",
- // "0xFF 0x12 0xAB 0x00 0x00",
- // },
- // // Failing strings
- // {
- // "",
- // "0xFF",
- // "0xFF 0x12",
- // "0xFF 0x12 0xAB 0x00 0x00 0x00",
- // });
- }
- static void test_json_schema() {
- // Note that this is similar to the regular grammar tests,
- // but we convert each json schema to a grammar before parsing.
- // Otherwise, this test structure is the same.
- test_schema("empty schema (object)",
- // Schema
- R"""(
- {"type":"object"}
- )""",
- // Passing strings
- {
- R"""({})""",
- R"""({"foo": "bar"})""",
- },
- // Failing strings
- {
- "",
- "[]",
- "null",
- R"""("")""",
- "true",
- });
- test_schema(
- "exotic formats (list)",
- // Schema
- R"""({
- "items": [
- { "format": "date" },
- { "format": "uuid" },
- { "format": "time" },
- { "format": "date-time" }
- ]
- })""",
- // Passing strings
- {
- // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
- //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
- },
- // Failing strings
- {
- R"""(["foo", "bar"])""",
- R"""(["12345678-1234-1234-1234-1234567890ab"])""",
- });
- test_schema("string",
- // Schema
- R"""({
- "type": "string"
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("")""",
- },
- // Failing strings
- {
- R"""({})""",
- R"""("foo": "bar")""",
- });
- test_schema("string w/ min length 1",
- // Schema
- R"""({
- "type": "string",
- "minLength": 1
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- },
- // Failing strings
- {
- R"""("")""",
- R"""({})""",
- R"""("foo": "bar")""",
- });
- test_schema("string w/ min length 3",
- // Schema
- R"""({
- "type": "string",
- "minLength": 3
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("foobar")""",
- },
- // Failing strings
- {
- R"""("")""",
- R"""("f")""",
- R"""("fo")""",
- });
- test_schema("string w/ max length",
- // Schema
- R"""({
- "type": "string",
- "maxLength": 3
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("")""",
- R"""("f")""",
- R"""("fo")""",
- },
- // Failing strings
- {
- R"""("foobar")""",
- });
- test_schema("string w/ min & max length",
- // Schema
- R"""({
- "type": "string",
- "minLength": 1,
- "maxLength": 4
- })""",
- // Passing strings
- {
- R"""("foo")""",
- R"""("bar")""",
- R"""("f")""",
- R"""("barf")""",
- },
- // Failing strings
- {
- R"""("")""",
- R"""("barfo")""",
- R"""("foobar")""",
- });
- test_schema("boolean",
- // Schema
- R"""({
- "type": "boolean"
- })""",
- // Passing strings
- {
- "true",
- "false",
- },
- // Failing strings
- {
- R"""("")""",
- R"""("true")""",
- R"""(True)""",
- R"""(FALSE)""",
- });
- test_schema("integer",
- // Schema
- R"""({
- "type": "integer"
- })""",
- // Passing strings
- {
- R"""(0)""",
- R"""(12345)""",
- R"""(1234567890123456)""",
- },
- // Failing strings
- {
- R"""()""",
- R"""(01)""",
- R"""(007)""",
- R"""(12345678901234567 )""",
- });
- test_schema("string const",
- // Schema
- R"""({
- "const": "foo"
- })""",
- // Passing strings
- {
- R"""("foo")""",
- },
- // Failing strings
- {
- R"""(foo)""",
- R"""("bar")""",
- });
- test_schema("non-string const",
- // Schema
- R"""({
- "const": true
- })""",
- // Passing strings
- {
- R"""(true)""",
- },
- // Failing strings
- {
- R"""()""",
- R"""(foo)""",
- R"""("true")""",
- });
- test_schema("non-string const",
- // Schema
- R"""({
- "enum": ["red", "amber", "green", null, 42, ["foo"]]
- })""",
- // Passing strings
- {
- R"""("red")""",
- R"""(null)""",
- R"""(42)""",
- R"""(["foo"])""",
- },
- // Failing strings
- {
- R"""()""",
- R"""(420)""",
- R"""(true)""",
- R"""(foo)""",
- });
- test_schema("simple pattern",
- // Schema
- R"""({
- "pattern": "^[a-zA-Z0-9_-]*$"
- })""",
- // Passing strings
- {
- R"""("")""",
- R"""("He_llo-12")""",
- },
- // Failing strings
- {
- R"""("!")""",
- R"""("Hello World")""",
- });
- test_schema("pattern with escapes",
- // Schema
- R"""({
- "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
- })""",
- // Passing strings
- {
- R"""("a^$.[]()|{}*+?b")""",
- },
- // Failing strings
- {
- R"""("ab")""",
- });
- test_schema("",
- // Schema
- R"""(
- {
- "type": ["array", "null"],
- "items": { "type": "string" }
- }
- )""",
- // Passing strings
- {
- "null",
- "[]",
- "[\"123\"]",
- "[\"foo\", \"bar\"]",
- },
- // Failing strings
- {
- "",
- "[123]",
- "\"foo\"",
- "[\"foo\", 42]",
- });
- test_schema("min+max items",
- // Schema
- R"""({
- "items": {
- "type": ["number", "integer"]
- },
- "minItems": 3,
- "maxItems": 5
- })""",
- // Passing strings
- {
- R"""([1, 2, 3])""",
- R"""([1, 2, 3, 4])""",
- R"""([1, 2, 3, 4, 5])""",
- // this is in fact correct; keyword do not apply if the type is wrong
- R"""(1)""",
- },
- // Failing strings
- {
- R"""([1, 2])""",
- R"""([1, 2, 3, 4, 5, 6])""",
- });
- // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
- test_schema("object properties",
- // Schema
- R"""({
- "type": "object",
- "properties": {
- "number": { "type": "number" },
- "street_name": { "type": "string" },
- "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
- },
- "additionalProperties": false
- })""",
- // Passing strings
- {
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // "By default, leaving out properties is valid"
- R"""({ "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
- // "By extension, even an empty object is valid"
- R"""({})""",
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
- },
- // Failing strings
- {
- // Change datatype from number to string
- R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // Reorder properties
- R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
- // Reorder properties
- R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // Additional properties set to false
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
- });
- test_schema("additional properties can't override other properties",
- R"""({
- "properties": {
- "a": {"type": "integer"},
- "b": {"type": "integer"}
- },
- "additionalProperties": true
- })""",
- // Passing strings
- {
- R"""({"a": 42})""",
- R"""({"c": ""})""",
- R"""({"a": 42, "c": ""})""",
- R"""({"a_": ""})""",
- },
- // Failing strings
- {
- R"""()""",
- R"""({"a": ""})""",
- R"""({"a": "", "b": ""})""",
- });
- // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
- test_schema("object properties, additionalProperties: true",
- // Schema
- R"""({
- "type": "object",
- "properties": {
- "number": { "type": "number" },
- "street_name": { "type": "string" },
- "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
- },
- "additionalProperties": true
- })""",
- // Passing strings
- {
- // "By extension, even an empty object is valid"
- R"""({})""",
- R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
- // "By default, leaving out properties is valid"
- R"""({ "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
- // "By default, providing additional properties is valid"
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
- },
- // Failing strings
- {
- // Change datatype from number to string
- R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // Reorder properties
- R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
- });
- // Additional properties: false
- test_schema(
- "required + optional props each in original order",
- // Schema
- R"""({
- "type": "object",
- "properties": {
- "number": { "type": "number" },
- "street_name": { "type": "string" },
- "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
- },
- "additionalProperties": false
- })""",
- // Passing strings
- {
- R"""({ "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_type":"Avenue"})""",
- R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
- // Spaces are permitted around enum values
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
- },
- // Failing strings
- {
- // Reorder properties
- R"""({ "street_type": "Avenue", "number": 1600 })""",
- // Add "direction"
- R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
- });
- test_schema("required + optional props each in original order",
- // Schema
- R"""({
- "properties": {
- "b": {"type": "string"},
- "a": {"type": "string"},
- "d": {"type": "string"},
- "c": {"type": "string"}
- },
- "required": ["a", "b"],
- "additionalProperties": false
- })""",
- // Passing strings
- {
- R"""({"b": "foo", "a": "bar"})""",
- R"""({"b":"foo","a":"bar","d":"qux"})""",
- R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
- },
- // Failing strings
- {
- R"""({"a": "foo", "b": "bar"})""",
- R"""({"b": "bar"})""",
- R"""({"a": "foo", "c": "baz"})""",
- R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
- });
- // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
- test_schema(
- "required props",
- // Schema
- R"""({
- "$schema": "https://json-schema.org/draft/2020-12/schema",
- "$id": "https://example.com/product.schema.json",
- "title": "Product",
- "description": "A product from Acme's catalog",
- "type": "object",
- "properties": {
- "productId": {
- "description": "The unique identifier for a product",
- "type": "integer"
- },
- "productName": {
- "description": "Name of the product",
- "type": "string"
- },
- "price": {
- "description": "The price of the product",
- "type": "number",
- "exclusiveMinimum": 0
- },
- "tags": {
- "description": "Tags for the product",
- "type": "array",
- "items": {
- "type": "string"
- },
- "minItems": 1,
- "DISABLED_uniqueItems": true
- },
- "dimensions": {
- "type": "object",
- "properties": {
- "length": {
- "type": "number"
- },
- "width": {
- "type": "number"
- },
- "height": {
- "type": "number"
- }
- },
- "required": [ "length", "width", "height" ]
- }
- },
- "required": [ "productId", "productName", "price" ]
- })""",
- // Passing strings
- {
- R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
- },
- // Failing strings
- {
- R"""({})""", // Missing all required properties
- R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
- // `exclusiveMinimum` is OK for llg
- R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
- R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
- R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
- R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
- // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
- // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
- });
- }
- int main(int argc, const char ** argv) {
- fprintf(stdout, "Running llguidance integration tests...\n");
- if (argc != 2) {
- fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
- return 1;
- }
- const char * vocab_file = argv[1];
- fprintf(stderr, "reading vocab from: '%s'\n", vocab_file);
- llama_model * model;
- llama_context * ctx;
- llama_backend_init();
- // load the vocab
- {
- auto mparams = llama_model_default_params();
- mparams.vocab_only = true;
- model = llama_model_load_from_file(vocab_file, mparams);
- if (model == NULL) {
- fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, vocab_file);
- return 1;
- }
- // needed?
- auto cparams = llama_context_default_params();
- ctx = llama_init_from_model(model, cparams);
- if (ctx == NULL) {
- fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, vocab_file);
- llama_model_free(model);
- return 1;
- }
- }
- vocab = llama_model_get_vocab(model);
- test_simple_grammar();
- test_complex_grammar();
- test_special_chars();
- test_quantifiers();
- test_json_schema();
- fprintf(stdout, "All tests passed.\n");
- return 0;
- }
|