MarkdownDisplay.tsx 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. import React, { useMemo, useState } from 'react';
  2. import Markdown, { ExtraProps } from 'react-markdown';
  3. import remarkGfm from 'remark-gfm';
  4. import rehypeHightlight from 'rehype-highlight';
  5. import rehypeKatex from 'rehype-katex';
  6. import remarkMath from 'remark-math';
  7. import remarkBreaks from 'remark-breaks';
  8. import 'katex/dist/katex.min.css';
  9. import { classNames, copyStr } from '../utils/misc';
  10. import { ElementContent, Root } from 'hast';
  11. import { visit } from 'unist-util-visit';
  12. import { useAppContext } from '../utils/app.context';
  13. import { CanvasType } from '../utils/types';
  14. import { BtnWithTooltips } from '../utils/common';
  15. import { DocumentDuplicateIcon, PlayIcon } from '@heroicons/react/24/outline';
  16. export default function MarkdownDisplay({
  17. content,
  18. isGenerating,
  19. }: {
  20. content: string;
  21. isGenerating?: boolean;
  22. }) {
  23. const preprocessedContent = useMemo(
  24. () => preprocessLaTeX(content),
  25. [content]
  26. );
  27. return (
  28. <Markdown
  29. remarkPlugins={[remarkGfm, remarkMath, remarkBreaks]}
  30. rehypePlugins={[rehypeHightlight, rehypeKatex, rehypeCustomCopyButton]}
  31. components={{
  32. button: (props) => (
  33. <CodeBlockButtons
  34. {...props}
  35. isGenerating={isGenerating}
  36. origContent={preprocessedContent}
  37. />
  38. ),
  39. // note: do not use "pre", "p" or other basic html elements here, it will cause the node to re-render when the message is being generated (this should be a bug with react-markdown, not sure how to fix it)
  40. }}
  41. >
  42. {preprocessedContent}
  43. </Markdown>
  44. );
  45. }
  46. const CodeBlockButtons: React.ElementType<
  47. React.ClassAttributes<HTMLButtonElement> &
  48. React.HTMLAttributes<HTMLButtonElement> &
  49. ExtraProps & { origContent: string; isGenerating?: boolean }
  50. > = ({ node, origContent, isGenerating }) => {
  51. const { config } = useAppContext();
  52. const startOffset = node?.position?.start.offset ?? 0;
  53. const endOffset = node?.position?.end.offset ?? 0;
  54. const copiedContent = useMemo(
  55. () =>
  56. origContent
  57. .substring(startOffset, endOffset)
  58. .replace(/^```[^\n]+\n/g, '')
  59. .replace(/```$/g, ''),
  60. [origContent, startOffset, endOffset]
  61. );
  62. const codeLanguage = useMemo(
  63. () =>
  64. origContent
  65. .substring(startOffset, startOffset + 10)
  66. .match(/^```([^\n]+)\n/)?.[1] ?? '',
  67. [origContent, startOffset]
  68. );
  69. const canRunCode =
  70. !isGenerating &&
  71. config.pyIntepreterEnabled &&
  72. codeLanguage.startsWith('py');
  73. return (
  74. <div
  75. className={classNames({
  76. 'text-right sticky top-[7em] mb-2 mr-2 h-0': true,
  77. 'display-none': !node?.position,
  78. })}
  79. >
  80. <CopyButton
  81. className="badge btn-mini btn-soft shadow-sm"
  82. content={copiedContent}
  83. />
  84. {canRunCode && (
  85. <RunPyCodeButton
  86. className="badge btn-mini shadow-sm ml-2"
  87. content={copiedContent}
  88. />
  89. )}
  90. </div>
  91. );
  92. };
  93. export const CopyButton = ({
  94. content,
  95. className,
  96. }: {
  97. content: string;
  98. className?: string;
  99. }) => {
  100. const [copied, setCopied] = useState(false);
  101. return (
  102. <BtnWithTooltips
  103. className={className}
  104. onClick={() => {
  105. copyStr(content);
  106. setCopied(true);
  107. }}
  108. onMouseLeave={() => setCopied(false)}
  109. tooltipsContent={copied ? 'Copied!' : 'Copy'}
  110. >
  111. <DocumentDuplicateIcon className="h-4 w-4" />
  112. </BtnWithTooltips>
  113. );
  114. };
  115. export const RunPyCodeButton = ({
  116. content,
  117. className,
  118. }: {
  119. content: string;
  120. className?: string;
  121. }) => {
  122. const { setCanvasData } = useAppContext();
  123. return (
  124. <>
  125. <BtnWithTooltips
  126. className={className}
  127. onClick={() =>
  128. setCanvasData({
  129. type: CanvasType.PY_INTERPRETER,
  130. content,
  131. })
  132. }
  133. tooltipsContent="Run code"
  134. >
  135. <PlayIcon className="h-4 w-4" />
  136. </BtnWithTooltips>
  137. </>
  138. );
  139. };
  140. /**
  141. * This injects the "button" element before each "pre" element.
  142. * The actual button will be replaced with a react component in the MarkdownDisplay.
  143. * We don't replace "pre" node directly because it will cause the node to re-render, which causes this bug: https://github.com/ggerganov/llama.cpp/issues/9608
  144. */
  145. function rehypeCustomCopyButton() {
  146. return function (tree: Root) {
  147. visit(tree, 'element', function (node) {
  148. if (node.tagName === 'pre' && !node.properties.visited) {
  149. const preNode = { ...node };
  150. // replace current node
  151. preNode.properties.visited = 'true';
  152. node.tagName = 'div';
  153. node.properties = {};
  154. // add node for button
  155. const btnNode: ElementContent = {
  156. type: 'element',
  157. tagName: 'button',
  158. properties: {},
  159. children: [],
  160. position: node.position,
  161. };
  162. node.children = [btnNode, preNode];
  163. }
  164. });
  165. };
  166. }
  167. /**
  168. * The part below is copied and adapted from:
  169. * https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
  170. * (MIT License)
  171. */
  172. // Regex to check if the processed content contains any potential LaTeX patterns
  173. const containsLatexRegex =
  174. /\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/;
  175. // Regex for inline and block LaTeX expressions
  176. const inlineLatex = new RegExp(/\\\((.+?)\\\)/, 'g');
  177. const blockLatex = new RegExp(/\\\[(.*?[^\\])\\\]/, 'gs');
  178. // Function to restore code blocks
  179. const restoreCodeBlocks = (content: string, codeBlocks: string[]) => {
  180. return content.replace(
  181. /<<CODE_BLOCK_(\d+)>>/g,
  182. (_, index) => codeBlocks[index]
  183. );
  184. };
  185. // Regex to identify code blocks and inline code
  186. const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g;
  187. export const processLaTeX = (_content: string) => {
  188. let content = _content;
  189. // Temporarily replace code blocks and inline code with placeholders
  190. const codeBlocks: string[] = [];
  191. let index = 0;
  192. content = content.replace(codeBlockRegex, (match) => {
  193. codeBlocks[index] = match;
  194. return `<<CODE_BLOCK_${index++}>>`;
  195. });
  196. // Escape dollar signs followed by a digit or space and digit
  197. let processedContent = content.replace(/(\$)(?=\s?\d)/g, '\\$');
  198. // If no LaTeX patterns are found, restore code blocks and return the processed content
  199. if (!containsLatexRegex.test(processedContent)) {
  200. return restoreCodeBlocks(processedContent, codeBlocks);
  201. }
  202. // Convert LaTeX expressions to a markdown compatible format
  203. processedContent = processedContent
  204. .replace(inlineLatex, (_: string, equation: string) => `$${equation}$`) // Convert inline LaTeX
  205. .replace(blockLatex, (_: string, equation: string) => `$$${equation}$$`); // Convert block LaTeX
  206. // Restore code blocks
  207. return restoreCodeBlocks(processedContent, codeBlocks);
  208. };
  209. /**
  210. * Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
  211. *
  212. * @param content The input string containing LaTeX expressions.
  213. * @returns The processed string with replaced delimiters and escaped characters.
  214. */
  215. export function preprocessLaTeX(content: string): string {
  216. // Step 1: Protect code blocks
  217. const codeBlocks: string[] = [];
  218. content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (_, code) => {
  219. codeBlocks.push(code);
  220. return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
  221. });
  222. // Step 2: Protect existing LaTeX expressions
  223. const latexExpressions: string[] = [];
  224. // Protect block math ($$...$$), \[...\], and \(...\) as before.
  225. content = content.replace(
  226. /(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g,
  227. (match) => {
  228. latexExpressions.push(match);
  229. return `<<LATEX_${latexExpressions.length - 1}>>`;
  230. }
  231. );
  232. // Protect inline math ($...$) only if it does NOT match a currency pattern.
  233. // We assume a currency pattern is one where the inner content is purely numeric (with optional decimals).
  234. content = content.replace(/\$([^$]+)\$/g, (match, inner) => {
  235. if (/^\s*\d+(?:\.\d+)?\s*$/.test(inner)) {
  236. // This looks like a currency value (e.g. "$123" or "$12.34"),
  237. // so don't protect it.
  238. return match;
  239. } else {
  240. // Otherwise, treat it as a LaTeX expression.
  241. latexExpressions.push(match);
  242. return `<<LATEX_${latexExpressions.length - 1}>>`;
  243. }
  244. });
  245. // Step 3: Escape dollar signs that are likely currency indicators.
  246. // (Now that inline math is protected, this will only escape dollars not already protected)
  247. content = content.replace(/\$(?=\d)/g, '\\$');
  248. // Step 4: Restore LaTeX expressions
  249. content = content.replace(
  250. /<<LATEX_(\d+)>>/g,
  251. (_, index) => latexExpressions[parseInt(index)]
  252. );
  253. // Step 5: Restore code blocks
  254. content = content.replace(
  255. /<<CODE_BLOCK_(\d+)>>/g,
  256. (_, index) => codeBlocks[parseInt(index)]
  257. );
  258. // Step 6: Apply additional escaping functions
  259. content = escapeBrackets(content);
  260. content = escapeMhchem(content);
  261. return content;
  262. }
  263. export function escapeBrackets(text: string): string {
  264. const pattern =
  265. /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
  266. return text.replace(
  267. pattern,
  268. (
  269. match: string,
  270. codeBlock: string | undefined,
  271. squareBracket: string | undefined,
  272. roundBracket: string | undefined
  273. ): string => {
  274. if (codeBlock != null) {
  275. return codeBlock;
  276. } else if (squareBracket != null) {
  277. return `$$${squareBracket}$$`;
  278. } else if (roundBracket != null) {
  279. return `$${roundBracket}$`;
  280. }
  281. return match;
  282. }
  283. );
  284. }
  285. export function escapeMhchem(text: string) {
  286. return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
  287. }