json-schema-to-grammar.mjs 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856
  1. // WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
  2. const SPACE_RULE = '| " " | "\\n"{1,2} [ \\t]{0,20}';
  3. function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
  4. if (maxItems == 0) {
  5. return '';
  6. }
  7. if (minItems === 0 && maxItems === 1) {
  8. return `${itemRule}?`;
  9. }
  10. const separatorRule = opts.separatorRule ?? '';
  11. const itemRuleIsLiteral = opts.itemRuleIsLiteral ?? false
  12. if (separatorRule === '') {
  13. if (minItems === 1 && maxItems === undefined) {
  14. return `${itemRule}+`;
  15. } else if (minItems === 0 && maxItems === undefined) {
  16. return `${itemRule}*`;
  17. } else {
  18. return `${itemRule}{${minItems},${maxItems !== undefined ? maxItems : ''}}`;
  19. }
  20. }
  21. const result = itemRule + ' ' + _buildRepetition(`(${separatorRule} ${itemRule})`, minItems > 0 ? minItems - 1 : 0, maxItems !== undefined ? maxItems - 1 : undefined);
  22. return minItems === 0 ? `(${result})?` : result;
  23. }
  24. function _generateMinMaxInt(minValue, maxValue, out, decimalsLeft = 16, topLevel = true) {
  25. const hasMin = minValue !== null;
  26. const hasMax = maxValue !== null;
  27. function digitRange(fromChar, toChar) {
  28. out.push("[");
  29. if (fromChar === toChar) {
  30. out.push(fromChar);
  31. } else {
  32. out.push(fromChar);
  33. out.push("-");
  34. out.push(toChar);
  35. }
  36. out.push("]");
  37. }
  38. function moreDigits(minDigits, maxDigits) {
  39. out.push("[0-9]");
  40. if (minDigits === maxDigits && minDigits === 1) {
  41. return;
  42. }
  43. out.push("{");
  44. out.push(minDigits.toString());
  45. if (maxDigits !== minDigits) {
  46. out.push(",");
  47. if (maxDigits !== Number.MAX_SAFE_INTEGER) {
  48. out.push(maxDigits.toString());
  49. }
  50. }
  51. out.push("}");
  52. }
  53. function uniformRange(fromStr, toStr) {
  54. let i = 0;
  55. while (i < fromStr.length && fromStr[i] === toStr[i]) {
  56. i++;
  57. }
  58. if (i > 0) {
  59. out.push("\"");
  60. out.push(fromStr.slice(0, i));
  61. out.push("\"");
  62. }
  63. if (i < fromStr.length) {
  64. if (i > 0) {
  65. out.push(" ");
  66. }
  67. const subLen = fromStr.length - i - 1;
  68. if (subLen > 0) {
  69. const fromSub = fromStr.slice(i + 1);
  70. const toSub = toStr.slice(i + 1);
  71. const subZeros = "0".repeat(subLen);
  72. const subNines = "9".repeat(subLen);
  73. let toReached = false;
  74. out.push("(");
  75. if (fromSub === subZeros) {
  76. digitRange(fromStr[i], String.fromCharCode(toStr.charCodeAt(i) - 1));
  77. out.push(" ");
  78. moreDigits(subLen, subLen);
  79. } else {
  80. out.push("[");
  81. out.push(fromStr[i]);
  82. out.push("] ");
  83. out.push("(");
  84. uniformRange(fromSub, subNines);
  85. out.push(")");
  86. if (fromStr.charCodeAt(i) < toStr.charCodeAt(i) - 1) {
  87. out.push(" | ");
  88. if (toSub === subNines) {
  89. digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), toStr[i]);
  90. toReached = true;
  91. } else {
  92. digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), String.fromCharCode(toStr.charCodeAt(i) - 1));
  93. }
  94. out.push(" ");
  95. moreDigits(subLen, subLen);
  96. }
  97. }
  98. if (!toReached) {
  99. out.push(" | ");
  100. digitRange(toStr[i], toStr[i]);
  101. out.push(" ");
  102. uniformRange(subZeros, toSub);
  103. }
  104. out.push(")");
  105. } else {
  106. out.push("[");
  107. out.push(fromStr[i]);
  108. out.push("-");
  109. out.push(toStr[i]);
  110. out.push("]");
  111. }
  112. }
  113. }
  114. if (hasMin && hasMax) {
  115. if (minValue < 0 && maxValue < 0) {
  116. out.push("\"-\" (");
  117. _generateMinMaxInt(-maxValue, -minValue, out, decimalsLeft, true);
  118. out.push(")");
  119. return;
  120. }
  121. if (minValue < 0) {
  122. out.push("\"-\" (");
  123. _generateMinMaxInt(0, -minValue, out, decimalsLeft, true);
  124. out.push(") | ");
  125. minValue = 0;
  126. }
  127. let minS = minValue.toString();
  128. const maxS = maxValue.toString();
  129. const minDigits = minS.length;
  130. const maxDigits = maxS.length;
  131. for (let digits = minDigits; digits < maxDigits; digits++) {
  132. uniformRange(minS, "9".repeat(digits));
  133. minS = "1" + "0".repeat(digits);
  134. out.push(" | ");
  135. }
  136. uniformRange(minS, maxS);
  137. return;
  138. }
  139. const lessDecimals = Math.max(decimalsLeft - 1, 1);
  140. if (hasMin) {
  141. if (minValue < 0) {
  142. out.push("\"-\" (");
  143. _generateMinMaxInt(null, -minValue, out, decimalsLeft, false);
  144. out.push(") | [0] | [1-9] ");
  145. moreDigits(0, decimalsLeft - 1);
  146. } else if (minValue === 0) {
  147. if (topLevel) {
  148. out.push("[0] | [1-9] ");
  149. moreDigits(0, lessDecimals);
  150. } else {
  151. moreDigits(1, decimalsLeft);
  152. }
  153. } else if (minValue <= 9) {
  154. const c = minValue.toString();
  155. const range_start = topLevel ? '1' : '0';
  156. if (c > range_start) {
  157. digitRange(range_start, String.fromCharCode(c.charCodeAt(0) - 1));
  158. out.push(" ");
  159. moreDigits(1, lessDecimals);
  160. out.push(" | ");
  161. }
  162. digitRange(c, "9");
  163. out.push(" ");
  164. moreDigits(0, lessDecimals);
  165. } else {
  166. const minS = minValue.toString();
  167. const length = minS.length;
  168. const c = minS[0];
  169. if (c > "1") {
  170. digitRange(topLevel ? "1" : "0", String.fromCharCode(c.charCodeAt(0) - 1));
  171. out.push(" ");
  172. moreDigits(length, lessDecimals);
  173. out.push(" | ");
  174. }
  175. digitRange(c, c);
  176. out.push(" (");
  177. _generateMinMaxInt(parseInt(minS.slice(1)), null, out, lessDecimals, false);
  178. out.push(")");
  179. if (c < "9") {
  180. out.push(" | ");
  181. digitRange(String.fromCharCode(c.charCodeAt(0) + 1), "9");
  182. out.push(" ");
  183. moreDigits(length - 1, lessDecimals);
  184. }
  185. }
  186. return;
  187. }
  188. if (hasMax) {
  189. if (maxValue >= 0) {
  190. if (topLevel) {
  191. out.push("\"-\" [1-9] ");
  192. moreDigits(0, lessDecimals);
  193. out.push(" | ");
  194. }
  195. _generateMinMaxInt(0, maxValue, out, decimalsLeft, true);
  196. } else {
  197. out.push("\"-\" (");
  198. _generateMinMaxInt(-maxValue, null, out, decimalsLeft, false);
  199. out.push(")");
  200. }
  201. return;
  202. }
  203. throw new Error("At least one of minValue or maxValue must be set");
  204. }
  205. class BuiltinRule {
  206. constructor(content, deps) {
  207. this.content = content;
  208. this.deps = deps || [];
  209. }
  210. }
  211. const PRIMITIVE_RULES = {
  212. boolean : new BuiltinRule('("true" | "false") space', []),
  213. 'decimal-part' : new BuiltinRule('[0-9]{1,16}', []),
  214. 'integral-part': new BuiltinRule('[0] | [1-9] [0-9]{0,15}', []),
  215. number : new BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']),
  216. integer : new BuiltinRule('("-"? integral-part) space', ['integral-part']),
  217. value : new BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
  218. object : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
  219. array : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
  220. uuid : new BuiltinRule('"\\"" [0-9a-fA-F]{8} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{12} "\\"" space', []),
  221. char : new BuiltinRule(`[^"\\\\\\x7F\\x00-\\x1F] | [\\\\] (["\\\\bfnrt] | "u" [0-9a-fA-F]{4})`, []),
  222. string : new BuiltinRule(`"\\"" char* "\\"" space`, ['char']),
  223. null : new BuiltinRule('"null" space', []),
  224. };
  225. // TODO: support "uri", "email" string formats
  226. const STRING_FORMAT_RULES = {
  227. 'date' : new BuiltinRule('[0-9]{4} "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
  228. 'time' : new BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9]{3} )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
  229. 'date-time' : new BuiltinRule('date "T" time', ['date', 'time']),
  230. 'date-string' : new BuiltinRule('"\\"" date "\\"" space', ['date']),
  231. 'time-string' : new BuiltinRule('"\\"" time "\\"" space', ['time']),
  232. 'date-time-string': new BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
  233. }
  234. const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...STRING_FORMAT_RULES};
  235. const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
  236. const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"\\]/g;
  237. const GRAMMAR_RANGE_LITERAL_ESCAPE_RE = /[\n\r"\]\-\\]/g;
  238. const GRAMMAR_LITERAL_ESCAPES = { '\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]', '\\': '\\\\' };
  239. const NON_LITERAL_SET = new Set('|.()[]{}*+?');
  240. const ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = new Set('^$.[]()|{}*+?');
  241. export class SchemaConverter {
  242. constructor(options) {
  243. this._propOrder = options.prop_order || {};
  244. this._allowFetch = options.allow_fetch || false;
  245. this._dotall = options.dotall || false;
  246. this._rules = {'space': SPACE_RULE};
  247. this._refs = {};
  248. this._refsBeingResolved = new Set();
  249. }
  250. _formatLiteral(literal) {
  251. const escaped = literal.replace(
  252. GRAMMAR_LITERAL_ESCAPE_RE,
  253. m => GRAMMAR_LITERAL_ESCAPES[m]
  254. );
  255. return `"${escaped}"`;
  256. }
  257. _formatRangeChar(literal) {
  258. return JSON.stringify(literal).slice(1, -1).replace(
  259. GRAMMAR_RANGE_LITERAL_ESCAPE_RE,
  260. m => GRAMMAR_LITERAL_ESCAPES[m]
  261. );
  262. }
  263. _addRule(name, rule) {
  264. let escName = name.replace(INVALID_RULE_CHARS_RE, '-');
  265. let key = escName;
  266. if (escName in this._rules) {
  267. if (this._rules[escName] === rule) {
  268. return key;
  269. }
  270. let i = 0;
  271. while ((`${escName}${i}` in this._rules) && (this._rules[`${escName}${i}`] !== rule)) {
  272. i += 1;
  273. }
  274. key = `${escName}${i}`;
  275. }
  276. this._rules[key] = rule;
  277. return key;
  278. }
  279. async resolveRefs(schema, url) {
  280. const visit = async (n) => {
  281. if (Array.isArray(n)) {
  282. return Promise.all(n.map(visit));
  283. } else if (typeof n === 'object' && n !== null) {
  284. let ref = n.$ref;
  285. let target;
  286. if (ref !== undefined && !this._refs[ref]) {
  287. if (ref.startsWith('https://')) {
  288. if (!this._allowFetch) {
  289. throw new Error('Fetching remote schemas is not allowed (use --allow-fetch for force)');
  290. }
  291. const fetch = (await import('node-fetch')).default;
  292. const fragSplit = ref.split('#');
  293. const baseUrl = fragSplit[0];
  294. target = this._refs[baseUrl];
  295. if (!target) {
  296. target = await this.resolveRefs(await fetch(ref).then(res => res.json()), baseUrl);
  297. this._refs[baseUrl] = target;
  298. }
  299. if (fragSplit.length === 1 || fragSplit[fragSplit.length - 1] === '') {
  300. return target;
  301. }
  302. } else if (ref.startsWith('#/')) {
  303. target = schema;
  304. ref = `${url}${ref}`;
  305. n.$ref = ref;
  306. } else {
  307. throw new Error(`Unsupported ref ${ref}`);
  308. }
  309. const selectors = ref.split('#')[1].split('/').slice(1);
  310. for (const sel of selectors) {
  311. const selIndex = parseInt(sel, 10);
  312. if (target && sel in target) {
  313. target = target[sel];
  314. } else if (target && selIndex in target) {
  315. target = target[selIndex];
  316. } else {
  317. throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
  318. }
  319. }
  320. this._refs[ref] = target;
  321. } else {
  322. await Promise.all(Object.values(n).map(visit));
  323. }
  324. }
  325. return n;
  326. };
  327. return visit(schema);
  328. }
  329. _generateUnionRule(name, altSchemas) {
  330. return altSchemas
  331. .map((altSchema, i) => this.visit(altSchema, `${name ?? ''}${name ? '-' : 'alternative-'}${i}`))
  332. .join(' | ');
  333. }
  334. _visitPattern(pattern, name) {
  335. if (!pattern.startsWith('^') || !pattern.endsWith('$')) {
  336. throw new Error('Pattern must start with "^" and end with "$"');
  337. }
  338. pattern = pattern.slice(1, -1);
  339. const subRuleIds = {};
  340. let i = 0;
  341. const length = pattern.length;
  342. const getDot = () => {
  343. let rule;
  344. if (this._dotall) {
  345. rule = '[\\U00000000-\\U0010FFFF]';
  346. } else {
  347. // Accept any character... except \n and \r line break chars (\x0A and \xOD)
  348. rule = '[^\\x0A\\x0D]';
  349. }
  350. return this._addRule('dot', rule);
  351. };
  352. const toRule = ([s, isLiteral]) => isLiteral ? "\"" + s + "\"" : s;
  353. const transform = () => {
  354. const start = i;
  355. // For each component of this sequence, store its string representation and whether it's a literal.
  356. // We only need a flat structure here to apply repetition operators to the last item, and
  357. // to merge literals at the and (we're parsing grouped ( sequences ) recursively and don't treat '|' specially
  358. // (GBNF's syntax is luckily very close to regular expressions!)
  359. const seq = [];
  360. const joinSeq = () => {
  361. const ret = [];
  362. for (const [isLiteral, g] of groupBy(seq, x => x[1])) {
  363. if (isLiteral) {
  364. ret.push([[...g].map(x => x[0]).join(''), true]);
  365. } else {
  366. ret.push(...g);
  367. }
  368. }
  369. if (ret.length === 1) {
  370. return ret[0];
  371. }
  372. return [ret.map(x => toRule(x)).join(' '), false];
  373. };
  374. while (i < length) {
  375. const c = pattern[i];
  376. if (c === '.') {
  377. seq.push([getDot(), false]);
  378. i += 1;
  379. } else if (c === '(') {
  380. i += 1;
  381. if (i < length) {
  382. if (pattern[i] === '?') {
  383. throw new Error(`Unsupported pattern syntax "${pattern[i]}" at index ${i} of /${pattern}/`);
  384. }
  385. }
  386. seq.push([`(${toRule(transform())})`, false]);
  387. } else if (c === ')') {
  388. i += 1;
  389. if (start <= 0 || pattern[start - 1] !== '(') {
  390. throw new Error(`Unbalanced parentheses; start = ${start}, i = ${i}, pattern = ${pattern}`);
  391. }
  392. return joinSeq();
  393. } else if (c === '[') {
  394. let squareBrackets = c;
  395. i += 1;
  396. while (i < length && pattern[i] !== ']') {
  397. if (pattern[i] === '\\') {
  398. squareBrackets += pattern.slice(i, i + 2);
  399. i += 2;
  400. } else {
  401. squareBrackets += pattern[i];
  402. i += 1;
  403. }
  404. }
  405. if (i >= length) {
  406. throw new Error(`Unbalanced square brackets; start = ${start}, i = ${i}, pattern = ${pattern}`);
  407. }
  408. squareBrackets += ']';
  409. i += 1;
  410. seq.push([squareBrackets, false]);
  411. } else if (c === '|') {
  412. seq.push(['|', false]);
  413. i += 1;
  414. } else if (c === '*' || c === '+' || c === '?') {
  415. seq[seq.length - 1] = [toRule(seq[seq.length - 1]) + c, false];
  416. i += 1;
  417. } else if (c === '{') {
  418. let curlyBrackets = c;
  419. i += 1;
  420. while (i < length && pattern[i] !== '}') {
  421. curlyBrackets += pattern[i];
  422. i += 1;
  423. }
  424. if (i >= length) {
  425. throw new Error(`Unbalanced curly brackets; start = ${start}, i = ${i}, pattern = ${pattern}`);
  426. }
  427. curlyBrackets += '}';
  428. i += 1;
  429. const nums = curlyBrackets.slice(1, -1).split(',').map(s => s.trim());
  430. let minTimes, maxTimes;
  431. if (nums.length === 1) {
  432. minTimes = parseInt(nums[0], 10);
  433. maxTimes = minTimes;
  434. } else {
  435. if (nums.length !== 2) {
  436. throw new Error(`Invalid quantifier ${curlyBrackets}`);
  437. }
  438. minTimes = nums[0] ? parseInt(nums[0], 10) : 0;
  439. maxTimes = nums[1] ? parseInt(nums[1], 10) : Infinity;
  440. }
  441. let [sub, subIsLiteral] = seq[seq.length - 1];
  442. if (!subIsLiteral) {
  443. let id = subRuleIds[sub];
  444. if (id === undefined) {
  445. id = this._addRule(`${name}-${Object.keys(subRuleIds).length + 1}`, sub);
  446. subRuleIds[sub] = id;
  447. }
  448. sub = id;
  449. }
  450. seq[seq.length - 1] = [
  451. _buildRepetition(subIsLiteral ? `"${sub}"` : sub, minTimes, maxTimes, {itemRuleIsLiteral: subIsLiteral}),
  452. false
  453. ];
  454. } else {
  455. let literal = '';
  456. while (i < length) {
  457. if (pattern[i] === '\\' && i < length - 1) {
  458. const next = pattern[i + 1];
  459. if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.has(next)) {
  460. i += 1;
  461. literal += pattern[i];
  462. i += 1;
  463. } else {
  464. literal += pattern.slice(i, i + 2);
  465. i += 2;
  466. }
  467. } else if (pattern[i] === '"') {
  468. literal += '\\"';
  469. i += 1;
  470. } else if (!NON_LITERAL_SET.has(pattern[i]) &&
  471. (i === length - 1 || literal === '' || pattern[i + 1] === '.' || !NON_LITERAL_SET.has(pattern[i+1]))) {
  472. literal += pattern[i];
  473. i += 1;
  474. } else {
  475. break;
  476. }
  477. }
  478. if (literal !== '') {
  479. seq.push([literal, true]);
  480. }
  481. }
  482. }
  483. return joinSeq();
  484. };
  485. return this._addRule(name, "\"\\\"\" (" + toRule(transform()) + ") \"\\\"\" space")
  486. }
  487. _notStrings(strings) {
  488. class TrieNode {
  489. constructor() {
  490. this.children = {};
  491. this.isEndOfString = false;
  492. }
  493. insert(str) {
  494. let node = this;
  495. for (const c of str) {
  496. node = node.children[c] = node.children[c] || new TrieNode();
  497. }
  498. node.isEndOfString = true;
  499. }
  500. }
  501. const trie = new TrieNode();
  502. for (const s of strings) {
  503. trie.insert(s);
  504. }
  505. const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
  506. const out = ['["] ( '];
  507. const visit = (node) => {
  508. const rejects = [];
  509. let first = true;
  510. for (const c of Object.keys(node.children).sort()) {
  511. const child = node.children[c];
  512. rejects.push(c);
  513. if (first) {
  514. first = false;
  515. } else {
  516. out.push(' | ');
  517. }
  518. out.push(`[${c}]`);
  519. if (Object.keys(child.children).length > 0) {
  520. out.push(' (');
  521. visit(child);
  522. out.push(')');
  523. } else if (child.isEndOfString) {
  524. out.push(` ${charRuleName}+`);
  525. }
  526. }
  527. if (Object.keys(node.children).length > 0) {
  528. if (!first) {
  529. out.push(' | ');
  530. }
  531. out.push(`[^"${rejects.join('')}] ${charRuleName}*`);
  532. }
  533. };
  534. visit(trie);
  535. out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`);
  536. return out.join('');
  537. }
  538. _resolveRef(ref) {
  539. let refFragment = ref.split('#').pop();
  540. let refName = 'ref' + refFragment.replace(/[^a-zA-Z0-9-]+/g, '-');
  541. if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
  542. this._refsBeingResolved.add(ref);
  543. const resolved = this._refs[ref];
  544. refName = this.visit(resolved, refName);
  545. this._refsBeingResolved.delete(ref);
  546. }
  547. return refName;
  548. }
  549. _generateConstantRule(value) {
  550. return this._formatLiteral(JSON.stringify(value));
  551. }
  552. visit(schema, name) {
  553. const schemaType = schema.type;
  554. const schemaFormat = schema.format;
  555. const ruleName = name in RESERVED_NAMES ? name + '-' : name == '' ? 'root' : name;
  556. const ref = schema.$ref;
  557. if (ref !== undefined) {
  558. return this._addRule(ruleName, this._resolveRef(ref));
  559. } else if (schema.oneOf || schema.anyOf) {
  560. return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
  561. } else if (Array.isArray(schemaType)) {
  562. return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({...schema, type: t}))));
  563. } else if ('const' in schema) {
  564. return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space');
  565. } else if ('enum' in schema) {
  566. const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
  567. return this._addRule(ruleName, rule);
  568. } else if ((schemaType === undefined || schemaType === 'object') &&
  569. ('properties' in schema ||
  570. ('additionalProperties' in schema && schema.additionalProperties !== true))) {
  571. const required = new Set(schema.required || []);
  572. const properties = Object.entries(schema.properties ?? {});
  573. return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties));
  574. } else if ((schemaType === undefined || schemaType === 'object' || schemaType === 'string') && 'allOf' in schema) {
  575. const required = new Set();
  576. const properties = [];
  577. const enumSets = [];
  578. const addComponent = (compSchema, isRequired) => {
  579. const ref = compSchema.$ref;
  580. if (ref !== undefined) {
  581. compSchema = this._refs[ref];
  582. }
  583. if ('properties' in compSchema) {
  584. for (const [propName, propSchema] of Object.entries(compSchema.properties)) {
  585. properties.push([propName, propSchema]);
  586. if (isRequired) {
  587. required.add(propName);
  588. }
  589. }
  590. }
  591. if ('enum' in compSchema) {
  592. enumSets.push(new Set(compSchema.enum || []));
  593. }
  594. };
  595. for (const t of schema.allOf) {
  596. if ('anyOf' in t) {
  597. for (const tt of t.anyOf) {
  598. addComponent(tt, false);
  599. }
  600. } else {
  601. addComponent(t, true);
  602. }
  603. }
  604. if (enumSets.length > 0) {
  605. const enumIntersection = new Set([...enumSets[0]].filter(v => enumSets.every(s => s.has(v))));
  606. if (enumIntersection.size > 0) {
  607. const sortedEnums = [...enumIntersection].sort((a, b) => a.localeCompare(b));
  608. const rule = '(' + sortedEnums.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
  609. return this._addRule(ruleName, rule);
  610. }
  611. }
  612. return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
  613. } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
  614. const items = schema.items ?? schema.prefixItems;
  615. if (Array.isArray(items)) {
  616. return this._addRule(
  617. ruleName,
  618. '"[" space ' +
  619. items.map((item, i) => this.visit(item, `${name ?? ''}${name ? '-' : ''}tuple-${i}`)).join(' "," space ') +
  620. ' "]" space'
  621. );
  622. } else {
  623. const itemRuleName = this.visit(items, `${name ?? ''}${name ? '-' : ''}item`);
  624. const minItems = schema.minItems || 0;
  625. const maxItems = schema.maxItems;
  626. return this._addRule(ruleName, '"[" space ' + _buildRepetition(itemRuleName, minItems, maxItems, {separatorRule: '"," space'}) + ' "]" space');
  627. }
  628. } else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
  629. return this._visitPattern(schema.pattern, ruleName);
  630. } else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
  631. return this._addPrimitive(
  632. ruleName === 'root' ? 'root' : schemaFormat,
  633. PRIMITIVE_RULES['uuid']
  634. );
  635. } else if ((schemaType === undefined || schemaType === 'string') && `${schema.format}-string` in STRING_FORMAT_RULES) {
  636. const primName = `${schema.format}-string`
  637. return this._addRule(ruleName, this._addPrimitive(primName, STRING_FORMAT_RULES[primName]));
  638. } else if (schemaType === 'string' && ('minLength' in schema || 'maxLength' in schema)) {
  639. const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
  640. const minLen = schema.minLength || 0;
  641. const maxLen = schema.maxLength;
  642. return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
  643. } else if (schemaType === 'integer' && ('minimum' in schema || 'exclusiveMinimum' in schema || 'maximum' in schema || 'exclusiveMaximum' in schema)) {
  644. let minValue = null;
  645. let maxValue = null;
  646. if ('minimum' in schema) {
  647. minValue = schema.minimum;
  648. } else if ('exclusiveMinimum' in schema) {
  649. minValue = schema.exclusiveMinimum + 1;
  650. }
  651. if ('maximum' in schema) {
  652. maxValue = schema.maximum;
  653. } else if ('exclusiveMaximum' in schema) {
  654. maxValue = schema.exclusiveMaximum - 1;
  655. }
  656. const out = ["("];
  657. _generateMinMaxInt(minValue, maxValue, out);
  658. out.push(") space");
  659. return this._addRule(ruleName, out.join(''));
  660. } else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
  661. return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
  662. } else {
  663. if (!(schemaType in PRIMITIVE_RULES)) {
  664. throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
  665. }
  666. // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
  667. return this._addPrimitive(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
  668. }
  669. }
  670. _addPrimitive(name, rule) {
  671. let n = this._addRule(name, rule.content);
  672. for (const dep of rule.deps) {
  673. const depRule = PRIMITIVE_RULES[dep] || STRING_FORMAT_RULES[dep];
  674. if (!depRule) {
  675. throw new Error(`Rule ${dep} not known`);
  676. }
  677. if (!(dep in this._rules)) {
  678. this._addPrimitive(dep, depRule);
  679. }
  680. }
  681. return n;
  682. }
  683. _buildObjectRule(properties, required, name, additionalProperties) {
  684. const propOrder = this._propOrder;
  685. // sort by position in prop_order (if specified) then by original order
  686. const sortedProps = properties.map(([k]) => k).sort((a, b) => {
  687. const orderA = propOrder[a] || Infinity;
  688. const orderB = propOrder[b] || Infinity;
  689. return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b);
  690. });
  691. const propKvRuleNames = {};
  692. for (const [propName, propSchema] of properties) {
  693. const propRuleName = this.visit(propSchema, `${name ?? ''}${name ? '-' : ''}${propName}`);
  694. propKvRuleNames[propName] = this._addRule(
  695. `${name ?? ''}${name ? '-' : ''}${propName}-kv`,
  696. `${this._formatLiteral(JSON.stringify(propName))} space ":" space ${propRuleName}`
  697. );
  698. }
  699. const requiredProps = sortedProps.filter(k => required.has(k));
  700. const optionalProps = sortedProps.filter(k => !required.has(k));
  701. if (additionalProperties) {
  702. const subName = `${name ?? ''}${name ? '-' : ''}additional`;
  703. const valueRule =
  704. additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)
  705. : this._addPrimitive('value', PRIMITIVE_RULES['value']);
  706. const key_rule =
  707. sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
  708. : this._addRule(`${subName}-k`, this._notStrings(sortedProps));
  709. propKvRuleNames['*'] = this._addRule(
  710. `${subName}-kv`,
  711. `${key_rule} ":" space ${valueRule}`);
  712. optionalProps.push('*');
  713. }
  714. let rule = '"{" space ';
  715. rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space ');
  716. if (optionalProps.length > 0) {
  717. rule += ' (';
  718. if (requiredProps.length > 0) {
  719. rule += ' "," space ( ';
  720. }
  721. const getRecursiveRefs = (ks, firstIsOptional) => {
  722. const [k, ...rest] = ks;
  723. const kvRuleName = propKvRuleNames[k];
  724. let res;
  725. const commaRef = `( "," space ${kvRuleName} )`;
  726. if (firstIsOptional) {
  727. res = commaRef + (k === '*' ? '*' : '?');
  728. } else {
  729. res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
  730. }
  731. if (rest.length > 0) {
  732. res += ' ' + this._addRule(
  733. `${name ?? ''}${name ? '-' : ''}${k}-rest`,
  734. getRecursiveRefs(rest, true)
  735. );
  736. }
  737. return res;
  738. };
  739. rule += optionalProps.map((_, i) => getRecursiveRefs(optionalProps.slice(i), false)).join(' | ');
  740. if (requiredProps.length > 0) {
  741. rule += ' )';
  742. }
  743. rule += ' )?';
  744. }
  745. rule += ' "}" space';
  746. return rule;
  747. }
  748. formatGrammar() {
  749. let grammar = '';
  750. for (const [name, rule] of Object.entries(this._rules).sort(([a], [b]) => a.localeCompare(b))) {
  751. grammar += `${name} ::= ${rule}\n`;
  752. }
  753. return grammar;
  754. }
  755. }
  756. // Helper function to group elements by a key function
  757. function* groupBy(iterable, keyFn) {
  758. let lastKey = null;
  759. let group = [];
  760. for (const element of iterable) {
  761. const key = keyFn(element);
  762. if (lastKey !== null && key !== lastKey) {
  763. yield [lastKey, group];
  764. group = [];
  765. }
  766. group.push(element);
  767. lastKey = key;
  768. }
  769. if (group.length > 0) {
  770. yield [lastKey, group];
  771. }
  772. }