1
0

latex-protection.test.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. /* eslint-disable no-irregular-whitespace */
  2. import { describe, it, expect, test } from 'vitest';
  3. import { maskInlineLaTeX, preprocessLaTeX } from '$lib/utils/latex-protection';
  4. describe('maskInlineLaTeX', () => {
  5. it('should protect LaTeX $x + y$ but not money $3.99', () => {
  6. const latexExpressions: string[] = [];
  7. const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.';
  8. const output = maskInlineLaTeX(input, latexExpressions);
  9. expect(output).toBe('I have $10, $3.99 and <<LATEX_0>> and <<LATEX_1>>. The amount is $2,000.');
  10. expect(latexExpressions).toEqual(['$x + y$', '$100x$']);
  11. });
  12. it('should ignore money like $5 and $12.99', () => {
  13. const latexExpressions: string[] = [];
  14. const input = 'Prices are $12.99 and $5. Tax?';
  15. const output = maskInlineLaTeX(input, latexExpressions);
  16. expect(output).toBe('Prices are $12.99 and $5. Tax?');
  17. expect(latexExpressions).toEqual([]);
  18. });
  19. it('should protect inline math $a^2 + b^2$ even after text', () => {
  20. const latexExpressions: string[] = [];
  21. const input = 'Pythagorean: $a^2 + b^2 = c^2$.';
  22. const output = maskInlineLaTeX(input, latexExpressions);
  23. expect(output).toBe('Pythagorean: <<LATEX_0>>.');
  24. expect(latexExpressions).toEqual(['$a^2 + b^2 = c^2$']);
  25. });
  26. it('should not protect math that has letter after closing $ (e.g. units)', () => {
  27. const latexExpressions: string[] = [];
  28. const input = 'The cost is $99 and change.';
  29. const output = maskInlineLaTeX(input, latexExpressions);
  30. expect(output).toBe('The cost is $99 and change.');
  31. expect(latexExpressions).toEqual([]);
  32. });
  33. it('should allow $x$ followed by punctuation', () => {
  34. const latexExpressions: string[] = [];
  35. const input = 'We know $x$, right?';
  36. const output = maskInlineLaTeX(input, latexExpressions);
  37. expect(output).toBe('We know <<LATEX_0>>, right?');
  38. expect(latexExpressions).toEqual(['$x$']);
  39. });
  40. it('should work across multiple lines', () => {
  41. const latexExpressions: string[] = [];
  42. const input = `Emma buys cupcakes for $3 each.\nHow much is $x + y$?`;
  43. const output = maskInlineLaTeX(input, latexExpressions);
  44. expect(output).toBe(`Emma buys cupcakes for $3 each.\nHow much is <<LATEX_0>>?`);
  45. expect(latexExpressions).toEqual(['$x + y$']);
  46. });
  47. it('should not protect $100 but protect $matrix$', () => {
  48. const latexExpressions: string[] = [];
  49. const input = '$100 and $\\mathrm{GL}_2(\\mathbb{F}_7)$ are different.';
  50. const output = maskInlineLaTeX(input, latexExpressions);
  51. expect(output).toBe('$100 and <<LATEX_0>> are different.');
  52. expect(latexExpressions).toEqual(['$\\mathrm{GL}_2(\\mathbb{F}_7)$']);
  53. });
  54. it('should skip if $ is followed by digit and alphanumeric after close (money)', () => {
  55. const latexExpressions: string[] = [];
  56. const input = 'I paid $5 quickly.';
  57. const output = maskInlineLaTeX(input, latexExpressions);
  58. expect(output).toBe('I paid $5 quickly.');
  59. expect(latexExpressions).toEqual([]);
  60. });
  61. it('should protect LaTeX even with special chars inside', () => {
  62. const latexExpressions: string[] = [];
  63. const input = 'Consider $\\alpha_1 + \\beta_2$ now.';
  64. const output = maskInlineLaTeX(input, latexExpressions);
  65. expect(output).toBe('Consider <<LATEX_0>> now.');
  66. expect(latexExpressions).toEqual(['$\\alpha_1 + \\beta_2$']);
  67. });
  68. it('short text', () => {
  69. const latexExpressions: string[] = ['$0$'];
  70. const input = '$a$\n$a$ and $b$';
  71. const output = maskInlineLaTeX(input, latexExpressions);
  72. expect(output).toBe('<<LATEX_1>>\n<<LATEX_2>> and <<LATEX_3>>');
  73. expect(latexExpressions).toEqual(['$0$', '$a$', '$a$', '$b$']);
  74. });
  75. it('empty text', () => {
  76. const latexExpressions: string[] = [];
  77. const input = '$\n$$\n';
  78. const output = maskInlineLaTeX(input, latexExpressions);
  79. expect(output).toBe('$\n$$\n');
  80. expect(latexExpressions).toEqual([]);
  81. });
  82. it('LaTeX-spacer preceded by backslash', () => {
  83. const latexExpressions: string[] = [];
  84. const input = `\\[
  85. \\boxed{
  86. \\begin{aligned}
  87. N_{\\text{att}}^{\\text{(MHA)}} &=
  88. h \\bigl[\\, d_{\\text{model}}\\;d_{k} + d_{\\text{model}}\\;d_{v}\\, \\bigr] && (\\text{Q,K,V の重み})\\\\
  89. &\\quad+ h(d_{k}+d_{k}+d_{v}) && (\\text{バイアス Q,K,V)}\\\\[4pt]
  90. &\\quad+ (h d_{v})\\, d_{\\text{model}} && (\\text{出力射影 }W^{O})\\\\
  91. &\\quad+ d_{\\text{model}} && (\\text{バイアス }b^{O})
  92. \\end{aligned}}
  93. \\]`;
  94. const output = maskInlineLaTeX(input, latexExpressions);
  95. expect(output).toBe(input);
  96. expect(latexExpressions).toEqual([]);
  97. });
  98. });
  99. describe('preprocessLaTeX', () => {
  100. test('converts inline \\( ... \\) to $...$', () => {
  101. const input =
  102. '\\( \\mathrm{GL}_2(\\mathbb{F}_7) \\): Group of invertible matrices with entries in \\(\\mathbb{F}_7\\).';
  103. const output = preprocessLaTeX(input);
  104. expect(output).toBe(
  105. '$ \\mathrm{GL}_2(\\mathbb{F}_7) $: Group of invertible matrices with entries in $\\mathbb{F}_7$.'
  106. );
  107. });
  108. test("don't inline \\\\( ... \\) to $...$", () => {
  109. const input =
  110. 'Chapter 20 of The TeXbook, in source "Definitions\\\\(also called Macros)", containst the formula \\((x_1,\\ldots,x_n)\\).';
  111. const output = preprocessLaTeX(input);
  112. expect(output).toBe(
  113. 'Chapter 20 of The TeXbook, in source "Definitions\\\\(also called Macros)", containst the formula $(x_1,\\ldots,x_n)$.'
  114. );
  115. });
  116. test('preserves display math \\[ ... \\] and protects adjacent text', () => {
  117. const input = `Some kernel of \\(\\mathrm{SL}_2(\\mathbb{F}_7)\\):
  118. \\[
  119. \\left\\{ \\begin{pmatrix} 1 & 0 \\\\ 0 & 1 \\end{pmatrix}, \\begin{pmatrix} -1 & 0 \\\\ 0 & -1 \\end{pmatrix} \\right\\} = \\{\\pm I\\}
  120. \\]`;
  121. const output = preprocessLaTeX(input);
  122. expect(output).toBe(`Some kernel of $\\mathrm{SL}_2(\\mathbb{F}_7)$:
  123. $$
  124. \\left\\{ \\begin{pmatrix} 1 & 0 \\\\ 0 & 1 \\end{pmatrix}, \\begin{pmatrix} -1 & 0 \\\\ 0 & -1 \\end{pmatrix} \\right\\} = \\{\\pm I\\}
  125. $$`);
  126. });
  127. test('handles standalone display math equation', () => {
  128. const input = `Algebra:
  129. \\[
  130. x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}
  131. \\]`;
  132. const output = preprocessLaTeX(input);
  133. expect(output).toBe(`Algebra:
  134. $$
  135. x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}
  136. $$`);
  137. });
  138. test('does not interpret currency values as LaTeX', () => {
  139. const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.';
  140. const output = preprocessLaTeX(input);
  141. expect(output).toBe('I have \\$10, \\$3.99 and $x + y$ and $100x$. The amount is \\$2,000.');
  142. });
  143. test('ignores dollar signs followed by digits (money), but keeps valid math $x + y$', () => {
  144. const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.';
  145. const output = preprocessLaTeX(input);
  146. expect(output).toBe('I have \\$10, \\$3.99 and $x + y$ and $100x$. The amount is \\$2,000.');
  147. });
  148. test('handles real-world word problems with amounts and no math delimiters', () => {
  149. const input =
  150. 'Emma buys 2 cupcakes for $3 each and 1 cookie for $1.50. How much money does she spend in total?';
  151. const output = preprocessLaTeX(input);
  152. expect(output).toBe(
  153. 'Emma buys 2 cupcakes for \\$3 each and 1 cookie for \\$1.50. How much money does she spend in total?'
  154. );
  155. });
  156. test('handles decimal amounts in word problem correctly', () => {
  157. const input =
  158. 'Maria has $20. She buys a notebook for $4.75 and a pack of pencils for $3.25. How much change does she receive?';
  159. const output = preprocessLaTeX(input);
  160. expect(output).toBe(
  161. 'Maria has \\$20. She buys a notebook for \\$4.75 and a pack of pencils for \\$3.25. How much change does she receive?'
  162. );
  163. });
  164. test('preserves display math with surrounding non-ASCII text', () => {
  165. const input = `1 kg の質量は
  166. \\[
  167. E = (1\\ \\text{kg}) \\times (3.0 \\times 10^8\\ \\text{m/s})^2 \\approx 9.0 \\times 10^{16}\\ \\text{J}
  168. \\]
  169. というエネルギーに相当します。これは約 21 百万トンの TNT が爆発したときのエネルギーに匹敵します。`;
  170. const output = preprocessLaTeX(input);
  171. expect(output).toBe(
  172. `1 kg の質量は
  173. $$
  174. E = (1\\ \\text{kg}) \\times (3.0 \\times 10^8\\ \\text{m/s})^2 \\approx 9.0 \\times 10^{16}\\ \\text{J}
  175. $$
  176. というエネルギーに相当します。これは約 21 百万トンの TNT が爆発したときのエネルギーに匹敵します。`
  177. );
  178. });
  179. test('LaTeX-spacer preceded by backslash', () => {
  180. const input = `\\[
  181. \\boxed{
  182. \\begin{aligned}
  183. N_{\\text{att}}^{\\text{(MHA)}} &=
  184. h \\bigl[\\, d_{\\text{model}}\\;d_{k} + d_{\\text{model}}\\;d_{v}\\, \\bigr] && (\\text{Q,K,V の重み})\\\\
  185. &\\quad+ h(d_{k}+d_{k}+d_{v}) && (\\text{バイアス Q,K,V)}\\\\[4pt]
  186. &\\quad+ (h d_{v})\\, d_{\\text{model}} && (\\text{出力射影 }W^{O})\\\\
  187. &\\quad+ d_{\\text{model}} && (\\text{バイアス }b^{O})
  188. \\end{aligned}}
  189. \\]`;
  190. const output = preprocessLaTeX(input);
  191. expect(output).toBe(
  192. `$$
  193. \\boxed{
  194. \\begin{aligned}
  195. N_{\\text{att}}^{\\text{(MHA)}} &=
  196. h \\bigl[\\, d_{\\text{model}}\\;d_{k} + d_{\\text{model}}\\;d_{v}\\, \\bigr] && (\\text{Q,K,V の重み})\\\\
  197. &\\quad+ h(d_{k}+d_{k}+d_{v}) && (\\text{バイアス Q,K,V)}\\\\[4pt]
  198. &\\quad+ (h d_{v})\\, d_{\\text{model}} && (\\text{出力射影 }W^{O})\\\\
  199. &\\quad+ d_{\\text{model}} && (\\text{バイアス }b^{O})
  200. \\end{aligned}}
  201. $$`
  202. );
  203. });
  204. test('converts \\[ ... \\] even when preceded by text without space', () => {
  205. const input = 'Some line ...\nAlgebra: \\[x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}\\]';
  206. const output = preprocessLaTeX(input);
  207. expect(output).toBe(
  208. 'Some line ...\nAlgebra: \n$$x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}$$\n'
  209. );
  210. });
  211. test('converts \\[ ... \\] in table-cells', () => {
  212. const input = `| ID | Expression |\n| #1 | \\[
  213. x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}
  214. \\] |`;
  215. const output = preprocessLaTeX(input);
  216. expect(output).toBe(
  217. '| ID | Expression |\n| #1 | $x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}$ |'
  218. );
  219. });
  220. test('escapes isolated $ before digits ($5 → \\$5), but not valid math', () => {
  221. const input = 'This costs $5 and this is math $x^2$. $100 is money.';
  222. const output = preprocessLaTeX(input);
  223. expect(output).toBe('This costs \\$5 and this is math $x^2$. \\$100 is money.');
  224. // Note: Since $x^2$ is detected as valid LaTeX, it's preserved.
  225. // $5 becomes \$5 only *after* real math is masked — but here it's correct because the masking logic avoids treating $5 as math.
  226. });
  227. test('display with LaTeX-line-breaks', () => {
  228. const input = String.raw`- Algebraic topology, Homotopy Groups of $\mathbb{S}^3$:
  229. $$\pi_n(\mathbb{S}^3) = \begin{cases}
  230. \mathbb{Z} & n = 3 \\
  231. 0 & n > 3, n \neq 4 \\
  232. \mathbb{Z}_2 & n = 4 \\
  233. \end{cases}$$`;
  234. const output = preprocessLaTeX(input);
  235. // If the formula contains '\\' the $$-delimiters should be in their own line.
  236. expect(output).toBe(`- Algebraic topology, Homotopy Groups of $\\mathbb{S}^3$:
  237. $$\n\\pi_n(\\mathbb{S}^3) = \\begin{cases}
  238. \\mathbb{Z} & n = 3 \\\\
  239. 0 & n > 3, n \\neq 4 \\\\
  240. \\mathbb{Z}_2 & n = 4 \\\\
  241. \\end{cases}\n$$`);
  242. });
  243. test('handles mhchem notation safely if present', () => {
  244. const input = 'Chemical reaction: \\( \\ce{H2O} \\) and $\\ce{CO2}$';
  245. const output = preprocessLaTeX(input);
  246. expect(output).toBe('Chemical reaction: $ \\ce{H2O} $ and $\\ce{CO2}$');
  247. });
  248. test('preserves code blocks', () => {
  249. const input = 'Inline code: `sum $total` and block:\n```\ndollar $amount\n```\nEnd.';
  250. const output = preprocessLaTeX(input);
  251. expect(output).toBe(input); // Code blocks prevent misinterpretation
  252. });
  253. test('preserves backslash parentheses in code blocks (GitHub issue)', () => {
  254. const input = '```python\nfoo = "\\(bar\\)"\n```';
  255. const output = preprocessLaTeX(input);
  256. expect(output).toBe(input); // Code blocks should not have LaTeX conversion applied
  257. });
  258. test('preserves backslash brackets in code blocks', () => {
  259. const input = '```python\nfoo = "\\[bar\\]"\n```';
  260. const output = preprocessLaTeX(input);
  261. expect(output).toBe(input); // Code blocks should not have LaTeX conversion applied
  262. });
  263. test('preserves backslash parentheses in inline code', () => {
  264. const input = 'Use `foo = "\\(bar\\)"` in your code.';
  265. const output = preprocessLaTeX(input);
  266. expect(output).toBe(input);
  267. });
  268. test('escape backslash in mchem ce', () => {
  269. const input = 'mchem ce:\n$\\ce{2H2(g) + O2(g) -> 2H2O(l)}$';
  270. const output = preprocessLaTeX(input);
  271. // mhchem-escape would insert a backslash here.
  272. expect(output).toBe('mchem ce:\n$\\ce{2H2(g) + O2(g) -> 2H2O(l)}$');
  273. });
  274. test('escape backslash in mchem pu', () => {
  275. const input = 'mchem pu:\n$\\pu{-572 kJ mol^{-1}}$';
  276. const output = preprocessLaTeX(input);
  277. // mhchem-escape would insert a backslash here.
  278. expect(output).toBe('mchem pu:\n$\\pu{-572 kJ mol^{-1}}$');
  279. });
  280. test('LaTeX in blockquotes with display math', () => {
  281. const input =
  282. '> **Definition (limit):** \n> \\[\n> \\lim_{x\\to a} f(x) = L\n> \\]\n> means that as \\(x\\) gets close to \\(a\\).';
  283. const output = preprocessLaTeX(input);
  284. // Blockquote markers should be preserved, LaTeX should be converted
  285. expect(output).toContain('> **Definition (limit):**');
  286. expect(output).toContain('$$');
  287. expect(output).toContain('$x$');
  288. expect(output).not.toContain('\\[');
  289. expect(output).not.toContain('\\]');
  290. expect(output).not.toContain('\\(');
  291. expect(output).not.toContain('\\)');
  292. });
  293. test('LaTeX in blockquotes with inline math', () => {
  294. const input =
  295. "> The derivative \\(f'(x)\\) at point \\(x=a\\) measures slope.\n> Formula: \\(f'(a)=\\lim_{h\\to 0}\\frac{f(a+h)-f(a)}{h}\\)";
  296. const output = preprocessLaTeX(input);
  297. // Blockquote markers should be preserved, inline LaTeX converted to $...$
  298. expect(output).toContain("> The derivative $f'(x)$ at point $x=a$ measures slope.");
  299. expect(output).toContain("> Formula: $f'(a)=\\lim_{h\\to 0}\\frac{f(a+h)-f(a)}{h}$");
  300. });
  301. test('Mixed content with blockquotes and regular text', () => {
  302. const input =
  303. 'Regular text with \\(x^2\\).\n\n> Quote with \\(y^2\\).\n\nMore text with \\(z^2\\).';
  304. const output = preprocessLaTeX(input);
  305. // All LaTeX should be converted, blockquote markers preserved
  306. expect(output).toBe('Regular text with $x^2$.\n\n> Quote with $y^2$.\n\nMore text with $z^2$.');
  307. });
  308. });