MarkdownDisplay.tsx 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. import React, { useMemo, useState } from 'react';
  2. import Markdown, { ExtraProps } from 'react-markdown';
  3. import remarkGfm from 'remark-gfm';
  4. import rehypeHightlight from 'rehype-highlight';
  5. import rehypeKatex from 'rehype-katex';
  6. import remarkMath from 'remark-math';
  7. import remarkBreaks from 'remark-breaks';
  8. import 'katex/dist/katex.min.css';
  9. import { classNames, copyStr } from '../utils/misc';
  10. import { ElementContent, Root } from 'hast';
  11. import { visit } from 'unist-util-visit';
  12. import { useAppContext } from '../utils/app.context';
  13. import { CanvasType } from '../utils/types';
  14. export default function MarkdownDisplay({
  15. content,
  16. isGenerating,
  17. }: {
  18. content: string;
  19. isGenerating?: boolean;
  20. }) {
  21. const preprocessedContent = useMemo(
  22. () => preprocessLaTeX(content),
  23. [content]
  24. );
  25. return (
  26. <Markdown
  27. remarkPlugins={[remarkGfm, remarkMath, remarkBreaks]}
  28. rehypePlugins={[rehypeHightlight, rehypeKatex, rehypeCustomCopyButton]}
  29. components={{
  30. button: (props) => (
  31. <CodeBlockButtons
  32. {...props}
  33. isGenerating={isGenerating}
  34. origContent={preprocessedContent}
  35. />
  36. ),
  37. // note: do not use "pre", "p" or other basic html elements here, it will cause the node to re-render when the message is being generated (this should be a bug with react-markdown, not sure how to fix it)
  38. }}
  39. >
  40. {preprocessedContent}
  41. </Markdown>
  42. );
  43. }
  44. const CodeBlockButtons: React.ElementType<
  45. React.ClassAttributes<HTMLButtonElement> &
  46. React.HTMLAttributes<HTMLButtonElement> &
  47. ExtraProps & { origContent: string; isGenerating?: boolean }
  48. > = ({ node, origContent, isGenerating }) => {
  49. const { config } = useAppContext();
  50. const startOffset = node?.position?.start.offset ?? 0;
  51. const endOffset = node?.position?.end.offset ?? 0;
  52. const copiedContent = useMemo(
  53. () =>
  54. origContent
  55. .substring(startOffset, endOffset)
  56. .replace(/^```[^\n]+\n/g, '')
  57. .replace(/```$/g, ''),
  58. [origContent, startOffset, endOffset]
  59. );
  60. const codeLanguage = useMemo(
  61. () =>
  62. origContent
  63. .substring(startOffset, startOffset + 10)
  64. .match(/^```([^\n]+)\n/)?.[1] ?? '',
  65. [origContent, startOffset]
  66. );
  67. const canRunCode =
  68. !isGenerating &&
  69. config.pyIntepreterEnabled &&
  70. codeLanguage.startsWith('py');
  71. return (
  72. <div
  73. className={classNames({
  74. 'text-right sticky top-[7em] mb-2 mr-2 h-0': true,
  75. 'display-none': !node?.position,
  76. })}
  77. >
  78. <CopyButton className="badge btn-mini" content={copiedContent} />
  79. {canRunCode && (
  80. <RunPyCodeButton
  81. className="badge btn-mini ml-2"
  82. content={copiedContent}
  83. />
  84. )}
  85. </div>
  86. );
  87. };
  88. export const CopyButton = ({
  89. content,
  90. className,
  91. }: {
  92. content: string;
  93. className?: string;
  94. }) => {
  95. const [copied, setCopied] = useState(false);
  96. return (
  97. <button
  98. className={className}
  99. onClick={() => {
  100. copyStr(content);
  101. setCopied(true);
  102. }}
  103. onMouseLeave={() => setCopied(false)}
  104. >
  105. {copied ? 'Copied!' : '📋 Copy'}
  106. </button>
  107. );
  108. };
  109. export const RunPyCodeButton = ({
  110. content,
  111. className,
  112. }: {
  113. content: string;
  114. className?: string;
  115. }) => {
  116. const { setCanvasData } = useAppContext();
  117. return (
  118. <>
  119. <button
  120. className={className}
  121. onClick={() =>
  122. setCanvasData({
  123. type: CanvasType.PY_INTERPRETER,
  124. content,
  125. })
  126. }
  127. >
  128. ▶️ Run
  129. </button>
  130. </>
  131. );
  132. };
  133. /**
  134. * This injects the "button" element before each "pre" element.
  135. * The actual button will be replaced with a react component in the MarkdownDisplay.
  136. * We don't replace "pre" node directly because it will cause the node to re-render, which causes this bug: https://github.com/ggerganov/llama.cpp/issues/9608
  137. */
  138. function rehypeCustomCopyButton() {
  139. return function (tree: Root) {
  140. visit(tree, 'element', function (node) {
  141. if (node.tagName === 'pre' && !node.properties.visited) {
  142. const preNode = { ...node };
  143. // replace current node
  144. preNode.properties.visited = 'true';
  145. node.tagName = 'div';
  146. node.properties = {};
  147. // add node for button
  148. const btnNode: ElementContent = {
  149. type: 'element',
  150. tagName: 'button',
  151. properties: {},
  152. children: [],
  153. position: node.position,
  154. };
  155. node.children = [btnNode, preNode];
  156. }
  157. });
  158. };
  159. }
  160. /**
  161. * The part below is copied and adapted from:
  162. * https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
  163. * (MIT License)
  164. */
  165. // Regex to check if the processed content contains any potential LaTeX patterns
  166. const containsLatexRegex =
  167. /\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/;
  168. // Regex for inline and block LaTeX expressions
  169. const inlineLatex = new RegExp(/\\\((.+?)\\\)/, 'g');
  170. const blockLatex = new RegExp(/\\\[(.*?[^\\])\\\]/, 'gs');
  171. // Function to restore code blocks
  172. const restoreCodeBlocks = (content: string, codeBlocks: string[]) => {
  173. return content.replace(
  174. /<<CODE_BLOCK_(\d+)>>/g,
  175. (_, index) => codeBlocks[index]
  176. );
  177. };
  178. // Regex to identify code blocks and inline code
  179. const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g;
  180. export const processLaTeX = (_content: string) => {
  181. let content = _content;
  182. // Temporarily replace code blocks and inline code with placeholders
  183. const codeBlocks: string[] = [];
  184. let index = 0;
  185. content = content.replace(codeBlockRegex, (match) => {
  186. codeBlocks[index] = match;
  187. return `<<CODE_BLOCK_${index++}>>`;
  188. });
  189. // Escape dollar signs followed by a digit or space and digit
  190. let processedContent = content.replace(/(\$)(?=\s?\d)/g, '\\$');
  191. // If no LaTeX patterns are found, restore code blocks and return the processed content
  192. if (!containsLatexRegex.test(processedContent)) {
  193. return restoreCodeBlocks(processedContent, codeBlocks);
  194. }
  195. // Convert LaTeX expressions to a markdown compatible format
  196. processedContent = processedContent
  197. .replace(inlineLatex, (_: string, equation: string) => `$${equation}$`) // Convert inline LaTeX
  198. .replace(blockLatex, (_: string, equation: string) => `$$${equation}$$`); // Convert block LaTeX
  199. // Restore code blocks
  200. return restoreCodeBlocks(processedContent, codeBlocks);
  201. };
  202. /**
  203. * Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
  204. *
  205. * @param content The input string containing LaTeX expressions.
  206. * @returns The processed string with replaced delimiters and escaped characters.
  207. */
  208. export function preprocessLaTeX(content: string): string {
  209. // Step 1: Protect code blocks
  210. const codeBlocks: string[] = [];
  211. content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (_, code) => {
  212. codeBlocks.push(code);
  213. return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
  214. });
  215. // Step 2: Protect existing LaTeX expressions
  216. const latexExpressions: string[] = [];
  217. // Protect block math ($$...$$), \[...\], and \(...\) as before.
  218. content = content.replace(
  219. /(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g,
  220. (match) => {
  221. latexExpressions.push(match);
  222. return `<<LATEX_${latexExpressions.length - 1}>>`;
  223. }
  224. );
  225. // Protect inline math ($...$) only if it does NOT match a currency pattern.
  226. // We assume a currency pattern is one where the inner content is purely numeric (with optional decimals).
  227. content = content.replace(/\$([^$]+)\$/g, (match, inner) => {
  228. if (/^\s*\d+(?:\.\d+)?\s*$/.test(inner)) {
  229. // This looks like a currency value (e.g. "$123" or "$12.34"),
  230. // so don't protect it.
  231. return match;
  232. } else {
  233. // Otherwise, treat it as a LaTeX expression.
  234. latexExpressions.push(match);
  235. return `<<LATEX_${latexExpressions.length - 1}>>`;
  236. }
  237. });
  238. // Step 3: Escape dollar signs that are likely currency indicators.
  239. // (Now that inline math is protected, this will only escape dollars not already protected)
  240. content = content.replace(/\$(?=\d)/g, '\\$');
  241. // Step 4: Restore LaTeX expressions
  242. content = content.replace(
  243. /<<LATEX_(\d+)>>/g,
  244. (_, index) => latexExpressions[parseInt(index)]
  245. );
  246. // Step 5: Restore code blocks
  247. content = content.replace(
  248. /<<CODE_BLOCK_(\d+)>>/g,
  249. (_, index) => codeBlocks[parseInt(index)]
  250. );
  251. // Step 6: Apply additional escaping functions
  252. content = escapeBrackets(content);
  253. content = escapeMhchem(content);
  254. return content;
  255. }
  256. export function escapeBrackets(text: string): string {
  257. const pattern =
  258. /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
  259. return text.replace(
  260. pattern,
  261. (
  262. match: string,
  263. codeBlock: string | undefined,
  264. squareBracket: string | undefined,
  265. roundBracket: string | undefined
  266. ): string => {
  267. if (codeBlock != null) {
  268. return codeBlock;
  269. } else if (squareBracket != null) {
  270. return `$$${squareBracket}$$`;
  271. } else if (roundBracket != null) {
  272. return `$${roundBracket}$`;
  273. }
  274. return match;
  275. }
  276. );
  277. }
  278. export function escapeMhchem(text: string) {
  279. return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
  280. }