|
|
@@ -0,0 +1,1140 @@
|
|
|
+#ifdef NDEBUG
|
|
|
+# undef NDEBUG
|
|
|
+#endif
|
|
|
+
|
|
|
+#include "unicode.h"
|
|
|
+#include "sampling.h"
|
|
|
+
|
|
|
+#include <cassert>
|
|
|
+#include <string>
|
|
|
+#include <vector>
|
|
|
+
|
|
|
+static const llama_vocab * vocab;
|
|
|
+
|
|
|
+static bool match_string(const std::string & input, llama_sampler * grammar) {
|
|
|
+ llama_sampler_reset(grammar);
|
|
|
+ auto tokens = common_tokenize(vocab, input, false, false);
|
|
|
+
|
|
|
+ auto n_vocab = llama_vocab_n_tokens(vocab);
|
|
|
+
|
|
|
+ std::vector<llama_token_data> cur;
|
|
|
+ cur.reserve(n_vocab);
|
|
|
+ for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) {
|
|
|
+ cur.emplace_back(llama_token_data{ token_id, 0.0f, 0.0f });
|
|
|
+ }
|
|
|
+ auto tok_arr = llama_token_data_array{ cur.data(), cur.size(), -1, false };
|
|
|
+
|
|
|
+ for (const auto token : tokens) {
|
|
|
+ for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) {
|
|
|
+ cur[token_id].logit = 0.0f;
|
|
|
+ }
|
|
|
+ llama_sampler_apply(grammar, &tok_arr);
|
|
|
+ if (cur[token].logit < 0.0f) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ llama_sampler_accept(grammar, token);
|
|
|
+ }
|
|
|
+
|
|
|
+ // do we allow EOS at the end? if so the grammar is accepting
|
|
|
+
|
|
|
+ auto tok_eos = llama_vocab_eot(vocab);
|
|
|
+ if (tok_eos == LLAMA_TOKEN_NULL) {
|
|
|
+ tok_eos = llama_vocab_eos(vocab);
|
|
|
+ }
|
|
|
+
|
|
|
+ cur[tok_eos].logit = 0.0f;
|
|
|
+ llama_sampler_apply(grammar, &tok_arr);
|
|
|
+
|
|
|
+ return cur[tok_eos].logit >= 0.0f;
|
|
|
+}
|
|
|
+
|
|
|
+static void test(const std::string & test_desc, const std::string & grammar_str,
|
|
|
+ const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
|
|
|
+ fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
|
|
|
+ fflush(stderr);
|
|
|
+
|
|
|
+ auto * grammar = llama_sampler_init_llg(vocab, "lark", grammar_str.c_str());
|
|
|
+
|
|
|
+ fprintf(stderr, " 🔵 Valid strings:\n");
|
|
|
+
|
|
|
+ // Passing strings
|
|
|
+ for (const auto & test_string : passing_strings) {
|
|
|
+ fprintf(stderr, " \"%s\" ", test_string.c_str());
|
|
|
+ fflush(stderr);
|
|
|
+
|
|
|
+ bool matched = match_string(test_string, grammar);
|
|
|
+
|
|
|
+ if (!matched) {
|
|
|
+ fprintf(stderr, "❌ (failed to match)\n");
|
|
|
+
|
|
|
+ // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
|
|
|
+ // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
|
|
|
+ FILE * grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
|
|
|
+ if (grammar_file) {
|
|
|
+ fprintf(grammar_file, "%s", grammar_str.c_str());
|
|
|
+ fclose(grammar_file);
|
|
|
+ }
|
|
|
+
|
|
|
+ // DEBUG: Write the test string to test-grammar-integration.string.txt
|
|
|
+ FILE * string_file = fopen("test-grammar-integration.string.txt", "w");
|
|
|
+ if (string_file) {
|
|
|
+ fprintf(string_file, "%s", test_string.c_str());
|
|
|
+ fclose(string_file);
|
|
|
+ }
|
|
|
+
|
|
|
+ fprintf(stderr,
|
|
|
+ "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following "
|
|
|
+ "command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf "
|
|
|
+ "test-grammar-integration.string.txt\n\n");
|
|
|
+ } else {
|
|
|
+ fprintf(stdout, "✅︎\n");
|
|
|
+ }
|
|
|
+
|
|
|
+ assert(matched);
|
|
|
+ }
|
|
|
+
|
|
|
+ fprintf(stderr, " 🟠 Invalid strings:\n");
|
|
|
+
|
|
|
+ // Failing strings
|
|
|
+ for (const auto & test_string : failing_strings) {
|
|
|
+ fprintf(stderr, " \"%s\" ", test_string.c_str());
|
|
|
+ fflush(stderr);
|
|
|
+
|
|
|
+ bool matched = match_string(test_string, grammar);
|
|
|
+
|
|
|
+ if (matched) {
|
|
|
+ fprintf(stderr, "❌ (incorrectly matched)\n");
|
|
|
+ } else {
|
|
|
+ fprintf(stdout, "✅︎\n");
|
|
|
+ }
|
|
|
+ assert(!matched);
|
|
|
+ }
|
|
|
+
|
|
|
+ llama_sampler_free(grammar);
|
|
|
+}
|
|
|
+
|
|
|
+static void test_grammar(const std::string & test_desc, const std::string & grammar_str,
|
|
|
+ const std::vector<std::string> & passing_strings,
|
|
|
+ const std::vector<std::string> & failing_strings) {
|
|
|
+ test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
|
|
|
+}
|
|
|
+
|
|
|
+static void test_schema(const std::string & test_desc, const std::string & schema_str,
|
|
|
+ const std::vector<std::string> & passing_strings,
|
|
|
+ const std::vector<std::string> & failing_strings) {
|
|
|
+ test(test_desc + ". Schema: " + schema_str, "%llguidance {}\nstart: %json " + schema_str, passing_strings,
|
|
|
+ failing_strings);
|
|
|
+}
|
|
|
+
|
|
|
+static void test_simple_grammar() {
|
|
|
+ test_schema("min 0",
|
|
|
+ R"""({
|
|
|
+ "type": "integer",
|
|
|
+ "minimum": 0
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "0",
|
|
|
+ "10",
|
|
|
+ "12",
|
|
|
+ "10000",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "-1",
|
|
|
+ "-10",
|
|
|
+ "-10000",
|
|
|
+ "-100000000000000000000000000000000",
|
|
|
+ // "100000000000000000000000000000000",
|
|
|
+ "00",
|
|
|
+ "01",
|
|
|
+ "-0",
|
|
|
+ });
|
|
|
+ test_schema("min 2",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "integer",
|
|
|
+ "minimum": 2
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "2",
|
|
|
+ "3",
|
|
|
+ "4",
|
|
|
+ "10",
|
|
|
+ "20",
|
|
|
+ "1234567890000000",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "0", "1", "-1", "-100", "0", "1", "01", "02",
|
|
|
+ // "12345678900000000",
|
|
|
+ });
|
|
|
+ test_schema("min 456",
|
|
|
+ R"""({
|
|
|
+ "type": "integer",
|
|
|
+ "minimum": 456
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "456",
|
|
|
+ "4560",
|
|
|
+ "457",
|
|
|
+ "460",
|
|
|
+ "500",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "455",
|
|
|
+ "356",
|
|
|
+ "50",
|
|
|
+ "050",
|
|
|
+ "-1",
|
|
|
+ "-456",
|
|
|
+ });
|
|
|
+ test_schema("min -123",
|
|
|
+ R"""({
|
|
|
+ "type": "integer",
|
|
|
+ "minimum": -123
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "-123",
|
|
|
+ "-122",
|
|
|
+ "-11",
|
|
|
+ "-1",
|
|
|
+ "0",
|
|
|
+ "1",
|
|
|
+ "123",
|
|
|
+ "1234",
|
|
|
+ "2345",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "-1234",
|
|
|
+ "-124",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("max 9999",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "integer",
|
|
|
+ "maximum": 9999
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "-99999",
|
|
|
+ "0",
|
|
|
+ "9999",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "10000",
|
|
|
+ "99991",
|
|
|
+ });
|
|
|
+ test_schema("max -9999",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "integer",
|
|
|
+ "maximum": -9999
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "-10000",
|
|
|
+ "-9999",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "-9998",
|
|
|
+ "0",
|
|
|
+ "9999",
|
|
|
+ });
|
|
|
+ test_schema("min 5 max 30",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "integer",
|
|
|
+ "minimum": 5,
|
|
|
+ "maximum": 30
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "5",
|
|
|
+ "10",
|
|
|
+ "30",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "05",
|
|
|
+ "4",
|
|
|
+ "-1",
|
|
|
+ "31",
|
|
|
+ "123",
|
|
|
+ "0123",
|
|
|
+ });
|
|
|
+ test_schema("min -1 max 1",
|
|
|
+ R"""({
|
|
|
+ "type": "integer",
|
|
|
+ "minimum": -1,
|
|
|
+ "maximum": 1
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "-1",
|
|
|
+ "0",
|
|
|
+ "1",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "-11",
|
|
|
+ "-10",
|
|
|
+ "-2",
|
|
|
+ "2",
|
|
|
+ "10",
|
|
|
+ "11",
|
|
|
+ });
|
|
|
+ test_schema("min -123 max 42",
|
|
|
+ R"""({
|
|
|
+ "type": "integer",
|
|
|
+ "minimum": -123,
|
|
|
+ "maximum": 42
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "-123",
|
|
|
+ "-122",
|
|
|
+ "-13",
|
|
|
+ "-11",
|
|
|
+ "-2",
|
|
|
+ "-1",
|
|
|
+ "0",
|
|
|
+ "1",
|
|
|
+ "5",
|
|
|
+ "10",
|
|
|
+ "39",
|
|
|
+ "40",
|
|
|
+ "42",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "-0123",
|
|
|
+ "-124",
|
|
|
+ "-1123",
|
|
|
+ "-200",
|
|
|
+ "43",
|
|
|
+ "123",
|
|
|
+ "0123",
|
|
|
+ });
|
|
|
+ test_schema("exclusive min / max",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "integer",
|
|
|
+ "exclusiveMinimum": 0,
|
|
|
+ "exclusiveMaximum": 10000
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "1",
|
|
|
+ "9999",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "0",
|
|
|
+ "01",
|
|
|
+ "10000",
|
|
|
+ "99999",
|
|
|
+ });
|
|
|
+
|
|
|
+ // Test case for a simple grammar
|
|
|
+ test_grammar("simple grammar",
|
|
|
+ R"""(
|
|
|
+ start: expr
|
|
|
+ expr: term ("+" term)*
|
|
|
+ term: number
|
|
|
+ number: /[0-9]+/ )""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "42",
|
|
|
+ "1+2+3+4+5",
|
|
|
+ "123+456",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "+",
|
|
|
+ "/ 3",
|
|
|
+ "1+2+3+4+5+",
|
|
|
+ "12a45",
|
|
|
+ });
|
|
|
+}
|
|
|
+
|
|
|
+static void test_complex_grammar() {
|
|
|
+ // Test case for a more complex grammar, with both failure strings and success strings
|
|
|
+ test_grammar("medium complexity grammar",
|
|
|
+ // Grammar
|
|
|
+ R"""(
|
|
|
+ start: expression
|
|
|
+ expression: term ws (("+"|"-") ws term)*
|
|
|
+ term: factor ws (("*"|"/") ws factor)*
|
|
|
+ factor: number | variable | "(" expression ")" | function-call
|
|
|
+ number: /[0-9]+/
|
|
|
+ variable: /[a-zA-Z_][a-zA-Z0-9_]*/
|
|
|
+ function-call: variable ws "(" (expression ("," ws expression)*)? ")"
|
|
|
+ ws: /[ \t\n\r]?/ )""",
|
|
|
+ // Passing strings
|
|
|
+ { "42",
|
|
|
+ "1*2*3*4*5",
|
|
|
+ "x",
|
|
|
+ "x+10",
|
|
|
+ "x1+y2",
|
|
|
+ "(a+b)*(c-d)",
|
|
|
+ "func()",
|
|
|
+ "func(x,y+2)",
|
|
|
+ "a*(b+c)-d/e",
|
|
|
+ "f(g(x),h(y,z))",
|
|
|
+ "x + 10",
|
|
|
+ "x1 + y2",
|
|
|
+ "(a + b) * (c - d)",
|
|
|
+ "func()",
|
|
|
+ "func(x, y + 2)",
|
|
|
+ "a * (b + c) - d / e",
|
|
|
+ "f(g(x), h(y, z))",
|
|
|
+ "123+456",
|
|
|
+ "123*456*789-123/456+789*123",
|
|
|
+ "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456" },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "+",
|
|
|
+ "/ 3x",
|
|
|
+ "x + + y",
|
|
|
+ "a * / b",
|
|
|
+ "func(,)",
|
|
|
+ "func(x y)",
|
|
|
+ "(a + b",
|
|
|
+ "x + y)",
|
|
|
+ "a + b * (c - d",
|
|
|
+ "42 +",
|
|
|
+ "x +",
|
|
|
+ "x + 10 +",
|
|
|
+ "(a + b) * (c - d",
|
|
|
+ "func(",
|
|
|
+ "func(x, y + 2",
|
|
|
+ "a * (b + c) - d /",
|
|
|
+ "f(g(x), h(y, z)",
|
|
|
+ "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
|
|
|
+ });
|
|
|
+}
|
|
|
+
|
|
|
+static void test_special_chars() {
|
|
|
+ // A collection of tests to exercise special characters such as "."
|
|
|
+ test_grammar("special characters",
|
|
|
+ // Grammar
|
|
|
+ R"""(
|
|
|
+ start: /.../ "abc" /.../
|
|
|
+ )""",
|
|
|
+ // Passing strings
|
|
|
+ { "abcabcabc", "aaaabcccc",
|
|
|
+ // NOTE: Also ensures that multi-byte characters still count as a single character
|
|
|
+ "🔵🟠✅abc❌🟠🔵" },
|
|
|
+ // Failing strings
|
|
|
+ { "aaabcccc", "aaaaabcccc", "aaaabccc", "aaaabccccc", "🔵🟠✅❌abc❌✅🟠🔵", "🔵🟠abc🟠🔵" });
|
|
|
+}
|
|
|
+
|
|
|
+static void test_quantifiers() {
|
|
|
+ // A collection of tests to exercise * + and ? quantifiers
|
|
|
+
|
|
|
+ test_grammar(
|
|
|
+ "* quantifier",
|
|
|
+ // Grammar
|
|
|
+ R"""(start: "a"*)""",
|
|
|
+ // Passing strings
|
|
|
+ { "", "a", "aaaaa", "aaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" },
|
|
|
+ // Failing strings
|
|
|
+ { "b", "ab", "aab", "ba", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" });
|
|
|
+ test_grammar(
|
|
|
+ "+ quantifier",
|
|
|
+ // Grammar
|
|
|
+ R"""(start: "a"+)""",
|
|
|
+ // Passing strings
|
|
|
+ { "a", "aaaaa", "aaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" },
|
|
|
+ // Failing strings
|
|
|
+ { "", "b", "ab", "aab", "ba", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" });
|
|
|
+ test_grammar("? quantifier",
|
|
|
+ // Grammar
|
|
|
+ R"""(start: "a"?)""",
|
|
|
+ // Passing strings
|
|
|
+ { "", "a" },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "b",
|
|
|
+ "ab",
|
|
|
+ "aa",
|
|
|
+ "ba",
|
|
|
+ });
|
|
|
+ test_grammar("mixed quantifiers",
|
|
|
+ // Grammar
|
|
|
+ R"""(
|
|
|
+ start: cons+ vowel* cons? (vowel cons)*
|
|
|
+ vowel: /[aeiouy]/
|
|
|
+ cons: /[bcdfghjklmnpqrstvwxyz]/
|
|
|
+ )""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "yes",
|
|
|
+ "no",
|
|
|
+ "noyes",
|
|
|
+ "crwth",
|
|
|
+ "four",
|
|
|
+ "bryyyy",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "yess",
|
|
|
+ "yesno",
|
|
|
+ "forty",
|
|
|
+ "catyyy",
|
|
|
+ });
|
|
|
+ test_grammar("simple exact repetition",
|
|
|
+ // Grammar
|
|
|
+ R"""(
|
|
|
+ start: /[ab]{4}/
|
|
|
+ )""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "aaaa",
|
|
|
+ "bbbb",
|
|
|
+ "abab",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "a",
|
|
|
+ "b",
|
|
|
+ "aaaaa",
|
|
|
+ });
|
|
|
+ test_grammar("simple min repetition",
|
|
|
+ // Grammar
|
|
|
+ R"""(
|
|
|
+ start: /[ab]{4,}/
|
|
|
+ )""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "aaaa",
|
|
|
+ "aaaaab",
|
|
|
+ "bbbb",
|
|
|
+ "ababab",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "",
|
|
|
+ "aba",
|
|
|
+ });
|
|
|
+ test_grammar("simple max repetition",
|
|
|
+ // Grammar
|
|
|
+ R"""(
|
|
|
+ start: /[ab]{0,4}/
|
|
|
+ )""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "",
|
|
|
+ "a",
|
|
|
+ "aa",
|
|
|
+ "aaa",
|
|
|
+ "aaab",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "aaaaa",
|
|
|
+ });
|
|
|
+ // test_grammar("min / max repetition",
|
|
|
+ // // Grammar
|
|
|
+ // R"""(
|
|
|
+ // start: ("0x" /[A-F0-9]{2}/ " "?){3,5}
|
|
|
+ // )""",
|
|
|
+ // // Passing strings
|
|
|
+ // {
|
|
|
+ // "0xFF 0x12 0xAB",
|
|
|
+ // "0xFF 0x12 0xAB 0x00 0x00",
|
|
|
+ // },
|
|
|
+ // // Failing strings
|
|
|
+ // {
|
|
|
+ // "",
|
|
|
+ // "0xFF",
|
|
|
+ // "0xFF 0x12",
|
|
|
+ // "0xFF 0x12 0xAB 0x00 0x00 0x00",
|
|
|
+ // });
|
|
|
+}
|
|
|
+
|
|
|
+static void test_json_schema() {
|
|
|
+ // Note that this is similar to the regular grammar tests,
|
|
|
+ // but we convert each json schema to a grammar before parsing.
|
|
|
+ // Otherwise, this test structure is the same.
|
|
|
+
|
|
|
+ test_schema("empty schema (object)",
|
|
|
+ // Schema
|
|
|
+ R"""(
|
|
|
+ {"type":"object"}
|
|
|
+ )""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""({})""",
|
|
|
+ R"""({"foo": "bar"})""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "",
|
|
|
+ "[]",
|
|
|
+ "null",
|
|
|
+ R"""("")""",
|
|
|
+ "true",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema(
|
|
|
+ "exotic formats (list)",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "items": [
|
|
|
+ { "format": "date" },
|
|
|
+ { "format": "uuid" },
|
|
|
+ { "format": "time" },
|
|
|
+ { "format": "date-time" }
|
|
|
+ ]
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
|
+ // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
|
+ R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
|
|
|
+ //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
|
+ //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""(["foo", "bar"])""",
|
|
|
+ R"""(["12345678-1234-1234-1234-1234567890ab"])""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("string",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "string"
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""("foo")""",
|
|
|
+ R"""("bar")""",
|
|
|
+ R"""("")""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""({})""",
|
|
|
+ R"""("foo": "bar")""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("string w/ min length 1",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "string",
|
|
|
+ "minLength": 1
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""("foo")""",
|
|
|
+ R"""("bar")""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""("")""",
|
|
|
+ R"""({})""",
|
|
|
+ R"""("foo": "bar")""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("string w/ min length 3",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "string",
|
|
|
+ "minLength": 3
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""("foo")""",
|
|
|
+ R"""("bar")""",
|
|
|
+ R"""("foobar")""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""("")""",
|
|
|
+ R"""("f")""",
|
|
|
+ R"""("fo")""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("string w/ max length",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "string",
|
|
|
+ "maxLength": 3
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""("foo")""",
|
|
|
+ R"""("bar")""",
|
|
|
+ R"""("")""",
|
|
|
+ R"""("f")""",
|
|
|
+ R"""("fo")""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""("foobar")""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("string w/ min & max length",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "string",
|
|
|
+ "minLength": 1,
|
|
|
+ "maxLength": 4
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""("foo")""",
|
|
|
+ R"""("bar")""",
|
|
|
+ R"""("f")""",
|
|
|
+ R"""("barf")""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""("")""",
|
|
|
+ R"""("barfo")""",
|
|
|
+ R"""("foobar")""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("boolean",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "boolean"
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "true",
|
|
|
+ "false",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""("")""",
|
|
|
+ R"""("true")""",
|
|
|
+ R"""(True)""",
|
|
|
+ R"""(FALSE)""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("integer",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "integer"
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""(0)""",
|
|
|
+ R"""(12345)""",
|
|
|
+ R"""(1234567890123456)""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""()""",
|
|
|
+ R"""(01)""",
|
|
|
+ R"""(007)""",
|
|
|
+ R"""(12345678901234567 )""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("string const",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "const": "foo"
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""("foo")""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""(foo)""",
|
|
|
+ R"""("bar")""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("non-string const",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "const": true
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""(true)""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""()""",
|
|
|
+ R"""(foo)""",
|
|
|
+ R"""("true")""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("non-string const",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "enum": ["red", "amber", "green", null, 42, ["foo"]]
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""("red")""",
|
|
|
+ R"""(null)""",
|
|
|
+ R"""(42)""",
|
|
|
+ R"""(["foo"])""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""()""",
|
|
|
+ R"""(420)""",
|
|
|
+ R"""(true)""",
|
|
|
+ R"""(foo)""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("simple pattern",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "pattern": "^[a-zA-Z0-9_-]*$"
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""("")""",
|
|
|
+ R"""("He_llo-12")""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""("!")""",
|
|
|
+ R"""("Hello World")""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("pattern with escapes",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""("a^$.[]()|{}*+?b")""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""("ab")""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("",
|
|
|
+ // Schema
|
|
|
+ R"""(
|
|
|
+ {
|
|
|
+ "type": ["array", "null"],
|
|
|
+ "items": { "type": "string" }
|
|
|
+ }
|
|
|
+ )""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ "null",
|
|
|
+ "[]",
|
|
|
+ "[\"123\"]",
|
|
|
+ "[\"foo\", \"bar\"]",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ "",
|
|
|
+ "[123]",
|
|
|
+ "\"foo\"",
|
|
|
+ "[\"foo\", 42]",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("min+max items",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "items": {
|
|
|
+ "type": ["number", "integer"]
|
|
|
+ },
|
|
|
+ "minItems": 3,
|
|
|
+ "maxItems": 5
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""([1, 2, 3])""",
|
|
|
+ R"""([1, 2, 3, 4])""",
|
|
|
+ R"""([1, 2, 3, 4, 5])""",
|
|
|
+ // this is in fact correct; keyword do not apply if the type is wrong
|
|
|
+ R"""(1)""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""([1, 2])""",
|
|
|
+ R"""([1, 2, 3, 4, 5, 6])""",
|
|
|
+ });
|
|
|
+
|
|
|
+ // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
|
|
|
+ test_schema("object properties",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "object",
|
|
|
+ "properties": {
|
|
|
+ "number": { "type": "number" },
|
|
|
+ "street_name": { "type": "string" },
|
|
|
+ "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
|
|
|
+ },
|
|
|
+ "additionalProperties": false
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
|
+ // "By default, leaving out properties is valid"
|
|
|
+ R"""({ "street_name": "Pennsylvania" })""",
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
|
|
|
+ // "By extension, even an empty object is valid"
|
|
|
+ R"""({})""",
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ // Change datatype from number to string
|
|
|
+ R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
|
+ // Reorder properties
|
|
|
+ R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
|
|
|
+ // Reorder properties
|
|
|
+ R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
|
+ // Additional properties set to false
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
|
|
|
+
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("additional properties can't override other properties",
|
|
|
+ R"""({
|
|
|
+ "properties": {
|
|
|
+ "a": {"type": "integer"},
|
|
|
+ "b": {"type": "integer"}
|
|
|
+ },
|
|
|
+ "additionalProperties": true
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""({"a": 42})""",
|
|
|
+ R"""({"c": ""})""",
|
|
|
+ R"""({"a": 42, "c": ""})""",
|
|
|
+ R"""({"a_": ""})""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""()""",
|
|
|
+ R"""({"a": ""})""",
|
|
|
+ R"""({"a": "", "b": ""})""",
|
|
|
+ });
|
|
|
+
|
|
|
+ // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
|
|
|
+ test_schema("object properties, additionalProperties: true",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "object",
|
|
|
+ "properties": {
|
|
|
+ "number": { "type": "number" },
|
|
|
+ "street_name": { "type": "string" },
|
|
|
+ "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
|
|
|
+ },
|
|
|
+ "additionalProperties": true
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ // "By extension, even an empty object is valid"
|
|
|
+ R"""({})""",
|
|
|
+ R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
|
|
|
+ // "By default, leaving out properties is valid"
|
|
|
+ R"""({ "street_name": "Pennsylvania" })""",
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
|
|
|
+ // "By default, providing additional properties is valid"
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ // Change datatype from number to string
|
|
|
+ R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
|
+ // Reorder properties
|
|
|
+ R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
|
|
|
+ });
|
|
|
+
|
|
|
+ // Additional properties: false
|
|
|
+ test_schema(
|
|
|
+ "required + optional props each in original order",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "type": "object",
|
|
|
+ "properties": {
|
|
|
+ "number": { "type": "number" },
|
|
|
+ "street_name": { "type": "string" },
|
|
|
+ "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
|
|
|
+ },
|
|
|
+ "additionalProperties": false
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""({ "street_name": "Pennsylvania" })""",
|
|
|
+ R"""({ "number": 1600, "street_type":"Avenue"})""",
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
|
+ // Spaces are permitted around enum values
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ // Reorder properties
|
|
|
+ R"""({ "street_type": "Avenue", "number": 1600 })""",
|
|
|
+ // Add "direction"
|
|
|
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
|
|
|
+ });
|
|
|
+
|
|
|
+ test_schema("required + optional props each in original order",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "properties": {
|
|
|
+ "b": {"type": "string"},
|
|
|
+ "a": {"type": "string"},
|
|
|
+ "d": {"type": "string"},
|
|
|
+ "c": {"type": "string"}
|
|
|
+ },
|
|
|
+ "required": ["a", "b"],
|
|
|
+ "additionalProperties": false
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""({"b": "foo", "a": "bar"})""",
|
|
|
+ R"""({"b":"foo","a":"bar","d":"qux"})""",
|
|
|
+ R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""({"a": "foo", "b": "bar"})""",
|
|
|
+ R"""({"b": "bar"})""",
|
|
|
+ R"""({"a": "foo", "c": "baz"})""",
|
|
|
+ R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
|
|
|
+ });
|
|
|
+
|
|
|
+ // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
|
|
|
+ test_schema(
|
|
|
+ "required props",
|
|
|
+ // Schema
|
|
|
+ R"""({
|
|
|
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
|
+ "$id": "https://example.com/product.schema.json",
|
|
|
+ "title": "Product",
|
|
|
+ "description": "A product from Acme's catalog",
|
|
|
+ "type": "object",
|
|
|
+ "properties": {
|
|
|
+ "productId": {
|
|
|
+ "description": "The unique identifier for a product",
|
|
|
+ "type": "integer"
|
|
|
+ },
|
|
|
+ "productName": {
|
|
|
+ "description": "Name of the product",
|
|
|
+ "type": "string"
|
|
|
+ },
|
|
|
+ "price": {
|
|
|
+ "description": "The price of the product",
|
|
|
+ "type": "number",
|
|
|
+ "exclusiveMinimum": 0
|
|
|
+ },
|
|
|
+ "tags": {
|
|
|
+ "description": "Tags for the product",
|
|
|
+ "type": "array",
|
|
|
+ "items": {
|
|
|
+ "type": "string"
|
|
|
+ },
|
|
|
+ "minItems": 1,
|
|
|
+ "DISABLED_uniqueItems": true
|
|
|
+ },
|
|
|
+ "dimensions": {
|
|
|
+ "type": "object",
|
|
|
+ "properties": {
|
|
|
+ "length": {
|
|
|
+ "type": "number"
|
|
|
+ },
|
|
|
+ "width": {
|
|
|
+ "type": "number"
|
|
|
+ },
|
|
|
+ "height": {
|
|
|
+ "type": "number"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "required": [ "length", "width", "height" ]
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "required": [ "productId", "productName", "price" ]
|
|
|
+ })""",
|
|
|
+ // Passing strings
|
|
|
+ {
|
|
|
+ R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
|
|
|
+ R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
|
|
|
+ R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
|
|
|
+ },
|
|
|
+ // Failing strings
|
|
|
+ {
|
|
|
+ R"""({})""", // Missing all required properties
|
|
|
+ R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
|
|
|
+ // `exclusiveMinimum` is OK for llg
|
|
|
+ R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
|
|
|
+ R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
|
|
|
+ R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
|
|
|
+ R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
|
|
|
+ R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
|
|
|
+ // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
|
|
|
+ // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
|
|
|
+ });
|
|
|
+}
|
|
|
+
|
|
|
+int main(int argc, const char ** argv) {
|
|
|
+ fprintf(stdout, "Running llguidance integration tests...\n");
|
|
|
+
|
|
|
+ if (argc != 2) {
|
|
|
+ fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ const char * vocab_file = argv[1];
|
|
|
+
|
|
|
+ fprintf(stderr, "reading vocab from: '%s'\n", vocab_file);
|
|
|
+
|
|
|
+ llama_model * model;
|
|
|
+ llama_context * ctx;
|
|
|
+
|
|
|
+ llama_backend_init();
|
|
|
+
|
|
|
+ // load the vocab
|
|
|
+ {
|
|
|
+ auto mparams = llama_model_default_params();
|
|
|
+
|
|
|
+ mparams.vocab_only = true;
|
|
|
+
|
|
|
+ model = llama_model_load_from_file(vocab_file, mparams);
|
|
|
+
|
|
|
+ if (model == NULL) {
|
|
|
+ fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, vocab_file);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ // needed?
|
|
|
+ auto cparams = llama_context_default_params();
|
|
|
+
|
|
|
+ ctx = llama_init_from_model(model, cparams);
|
|
|
+
|
|
|
+ if (ctx == NULL) {
|
|
|
+ fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, vocab_file);
|
|
|
+ llama_model_free(model);
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ vocab = llama_model_get_vocab(model);
|
|
|
+
|
|
|
+ test_simple_grammar();
|
|
|
+ test_complex_grammar();
|
|
|
+ test_special_chars();
|
|
|
+ test_quantifiers();
|
|
|
+ test_json_schema();
|
|
|
+ fprintf(stdout, "All tests passed.\n");
|
|
|
+ return 0;
|
|
|
+}
|