latex-protection.ts 1.4 KB

1234567891011121314151617181920212223242526272829303132333435
  1. /**
  2. * Matches common Markdown code blocks to exclude them from further processing (e.g. LaTeX).
  3. * - Fenced: ```...```
  4. * - Inline: `...` (does NOT support nested backticks or multi-backtick syntax)
  5. *
  6. * Note: This pattern does not handle advanced cases like:
  7. * `` `code with `backticks` `` or \\``...\\``
  8. */
  9. export const CODE_BLOCK_REGEXP = /(```[\s\S]*?```|`[^`\n]+`)/g;
  10. /**
  11. * Matches LaTeX math delimiters \(...\) and \[...\] only when not preceded by a backslash (i.e., not escaped),
  12. * while also capturing code blocks (```, `...`) so they can be skipped during processing.
  13. *
  14. * Uses negative lookbehind `(?<!\\)` to avoid matching \\( or \\[.
  15. * Using the look‑behind pattern `(?<!\\)` we skip matches
  16. * that are preceded by a backslash, e.g.
  17. * `Definitions\\(also called macros)` (title of chapter 20 in The TeXbook)
  18. * or `\\[4pt]` (LaTeX line-break).
  19. *
  20. * group 1: code-block
  21. * group 2: square-bracket
  22. * group 3: round-bracket
  23. */
  24. export const LATEX_MATH_AND_CODE_PATTERN =
  25. /(```[\S\s]*?```|`.*?`)|(?<!\\)\\\[([\S\s]*?[^\\])\\]|(?<!\\)\\\((.*?)\\\)/g;
  26. /** Regex to capture the content of a $$...\\\\...$$ block (display-formula with line-break) */
  27. export const LATEX_LINEBREAK_REGEXP = /\$\$([\s\S]*?\\\\[\s\S]*?)\$\$/;
  28. /** map from mchem-regexp to replacement */
  29. export const MHCHEM_PATTERN_MAP: readonly [RegExp, string][] = [
  30. [/(\s)\$\\ce{/g, '$1$\\\\ce{'],
  31. [/(\s)\$\\pu{/g, '$1$\\\\pu{']
  32. ] as const;