useChatExtraContext.tsx 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. import { useState } from 'react';
  2. import { MessageExtra } from '../utils/types';
  3. import toast from 'react-hot-toast';
  4. import { useAppContext } from '../utils/app.context';
  5. import * as pdfjs from 'pdfjs-dist';
  6. import pdfjsWorkerSrc from 'pdfjs-dist/build/pdf.worker.min.mjs?url';
  7. import { TextContent, TextItem } from 'pdfjs-dist/types/src/display/api';
  8. pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorkerSrc;
  9. // This file handles uploading extra context items (a.k.a files)
  10. // It allows processing these kinds of files:
  11. // - image files (converted to base64)
  12. // - audio files (converted to base64)
  13. // - text files (including code files)
  14. // - pdf (converted to text)
  15. // Interface describing the API returned by the hook
  16. export interface ChatExtraContextApi {
  17. items?: MessageExtra[]; // undefined if empty, similar to Message['extra']
  18. addItems: (items: MessageExtra[]) => void;
  19. removeItem: (idx: number) => void;
  20. clearItems: () => void;
  21. onFileAdded: (files: File[]) => void; // used by "upload" button
  22. }
  23. export function useChatExtraContext(): ChatExtraContextApi {
  24. const { serverProps, config } = useAppContext();
  25. const [items, setItems] = useState<MessageExtra[]>([]);
  26. const addItems = (newItems: MessageExtra[]) => {
  27. setItems((prev) => [...prev, ...newItems]);
  28. };
  29. const removeItem = (idx: number) => {
  30. setItems((prev) => prev.filter((_, i) => i !== idx));
  31. };
  32. const clearItems = () => {
  33. setItems([]);
  34. };
  35. const isSupportVision = serverProps?.modalities?.vision;
  36. const onFileAdded = async (files: File[]) => {
  37. try {
  38. for (const file of files) {
  39. const mimeType = file.type;
  40. // this limit is only to prevent accidental uploads of huge files
  41. // it can potentially crashes the browser because we read the file as base64
  42. if (file.size > 500 * 1024 * 1024) {
  43. toast.error('File is too large. Maximum size is 500MB.');
  44. break;
  45. }
  46. if (mimeType.startsWith('image/')) {
  47. if (!isSupportVision) {
  48. toast.error('Multimodal is not supported by this server or model.');
  49. break;
  50. }
  51. let base64Url = await getFileAsBase64(file);
  52. if (mimeType === 'image/svg+xml') {
  53. // Convert SVG to PNG
  54. base64Url = await svgBase64UrlToPngDataURL(base64Url);
  55. }
  56. addItems([
  57. {
  58. type: 'imageFile',
  59. name: file.name,
  60. base64Url,
  61. },
  62. ]);
  63. } else if (mimeType.startsWith('video/')) {
  64. toast.error('Video files are not supported yet.');
  65. break;
  66. } else if (mimeType.startsWith('audio/')) {
  67. if (!/mpeg|wav/.test(mimeType)) {
  68. toast.error('Only mp3 and wav audio files are supported.');
  69. break;
  70. }
  71. // plain base64, not a data URL
  72. const base64Data = await getFileAsBase64(file, false);
  73. addItems([
  74. {
  75. type: 'audioFile',
  76. name: file.name,
  77. mimeType,
  78. base64Data,
  79. },
  80. ]);
  81. } else if (mimeType.startsWith('application/pdf')) {
  82. if (config.pdfAsImage && !isSupportVision) {
  83. toast(
  84. 'Multimodal is not supported, PDF will be converted to text instead of image.'
  85. );
  86. break;
  87. }
  88. if (config.pdfAsImage && isSupportVision) {
  89. // Convert PDF to images
  90. const base64Urls = await convertPDFToImage(file);
  91. addItems(
  92. base64Urls.map((base64Url) => ({
  93. type: 'imageFile',
  94. name: file.name,
  95. base64Url,
  96. }))
  97. );
  98. } else {
  99. // Convert PDF to text
  100. const content = await convertPDFToText(file);
  101. addItems([
  102. {
  103. type: 'textFile',
  104. name: file.name,
  105. content,
  106. },
  107. ]);
  108. if (isSupportVision) {
  109. toast.success(
  110. 'PDF file converted to text. You can also convert it to image, see in Settings.'
  111. );
  112. }
  113. }
  114. break;
  115. } else {
  116. // Because there can be many text file types (like code file), we will not check the mime type
  117. // and will just check if the file is not binary.
  118. const reader = new FileReader();
  119. reader.onload = (event) => {
  120. if (event.target?.result) {
  121. const content = event.target.result as string;
  122. if (!isLikelyNotBinary(content)) {
  123. toast.error('File is binary. Please upload a text file.');
  124. return;
  125. }
  126. addItems([
  127. {
  128. type: 'textFile',
  129. name: file.name,
  130. content,
  131. },
  132. ]);
  133. }
  134. };
  135. reader.readAsText(file);
  136. }
  137. }
  138. } catch (error) {
  139. const message = error instanceof Error ? error.message : String(error);
  140. const errorMessage = `Error processing file: ${message}`;
  141. toast.error(errorMessage);
  142. }
  143. };
  144. return {
  145. items: items.length > 0 ? items : undefined,
  146. addItems,
  147. removeItem,
  148. clearItems,
  149. onFileAdded,
  150. };
  151. }
  152. async function getFileAsBase64(file: File, outputUrl = true): Promise<string> {
  153. return new Promise((resolve, reject) => {
  154. const reader = new FileReader();
  155. reader.onload = (event) => {
  156. if (event.target?.result) {
  157. let result = event.target.result as string;
  158. if (!outputUrl) {
  159. // remove base64 url prefix and correct characters
  160. result = result.substring(result.indexOf(',') + 1);
  161. }
  162. resolve(result);
  163. } else {
  164. reject(new Error('Failed to read file.'));
  165. }
  166. };
  167. reader.readAsDataURL(file);
  168. });
  169. }
  170. async function getFileAsBuffer(file: File): Promise<ArrayBuffer> {
  171. return new Promise((resolve, reject) => {
  172. const reader = new FileReader();
  173. reader.onload = (event) => {
  174. if (event.target?.result) {
  175. resolve(event.target.result as ArrayBuffer);
  176. } else {
  177. reject(new Error('Failed to read file.'));
  178. }
  179. };
  180. reader.readAsArrayBuffer(file);
  181. });
  182. }
  183. async function convertPDFToText(file: File): Promise<string> {
  184. const buffer = await getFileAsBuffer(file);
  185. const pdf = await pdfjs.getDocument(buffer).promise;
  186. const numPages = pdf.numPages;
  187. const textContentPromises: Promise<TextContent>[] = [];
  188. for (let i = 1; i <= numPages; i++) {
  189. textContentPromises.push(
  190. pdf.getPage(i).then((page) => page.getTextContent())
  191. );
  192. }
  193. const textContents = await Promise.all(textContentPromises);
  194. const textItems = textContents.flatMap((textContent: TextContent) =>
  195. textContent.items.map((item) => (item as TextItem).str ?? '')
  196. );
  197. return textItems.join('\n');
  198. }
  199. // returns list of base64 images
  200. async function convertPDFToImage(file: File): Promise<string[]> {
  201. const buffer = await getFileAsBuffer(file);
  202. const doc = await pdfjs.getDocument(buffer).promise;
  203. const pages: Promise<string>[] = [];
  204. for (let i = 1; i <= doc.numPages; i++) {
  205. const page = await doc.getPage(i);
  206. const viewport = page.getViewport({ scale: 1.5 });
  207. const canvas = document.createElement('canvas');
  208. const ctx = canvas.getContext('2d');
  209. canvas.width = viewport.width;
  210. canvas.height = viewport.height;
  211. if (!ctx) {
  212. throw new Error('Failed to get 2D context from canvas');
  213. }
  214. const task = page.render({ canvasContext: ctx, viewport: viewport });
  215. pages.push(
  216. task.promise.then(() => {
  217. return canvas.toDataURL();
  218. })
  219. );
  220. }
  221. return await Promise.all(pages);
  222. }
  223. // WARN: vibe code below
  224. // This code is a heuristic to determine if a string is likely not binary.
  225. // It is necessary because input file can have various mime types which we don't have time to investigate.
  226. // For example, a python file can be text/plain, application/x-python, etc.
  227. function isLikelyNotBinary(str: string): boolean {
  228. const options = {
  229. prefixLength: 1024 * 10, // Check the first 10KB of the string
  230. suspiciousCharThresholdRatio: 0.15, // Allow up to 15% suspicious chars
  231. maxAbsoluteNullBytes: 2,
  232. };
  233. if (!str) {
  234. return true; // Empty string is considered "not binary" or trivially text.
  235. }
  236. const sampleLength = Math.min(str.length, options.prefixLength);
  237. if (sampleLength === 0) {
  238. return true; // Effectively an empty string after considering prefixLength.
  239. }
  240. let suspiciousCharCount = 0;
  241. let nullByteCount = 0;
  242. for (let i = 0; i < sampleLength; i++) {
  243. const charCode = str.charCodeAt(i);
  244. // 1. Check for Unicode Replacement Character (U+FFFD)
  245. // This is a strong indicator if the string was created from decoding bytes as UTF-8.
  246. if (charCode === 0xfffd) {
  247. suspiciousCharCount++;
  248. continue;
  249. }
  250. // 2. Check for Null Bytes (U+0000)
  251. if (charCode === 0x0000) {
  252. nullByteCount++;
  253. // We also count nulls towards the general suspicious character count,
  254. // as they are less common in typical text files.
  255. suspiciousCharCount++;
  256. continue;
  257. }
  258. // 3. Check for C0 Control Characters (U+0001 to U+001F)
  259. // Exclude common text control characters: TAB (9), LF (10), CR (13).
  260. // We can also be a bit lenient with BEL (7) and BS (8) which sometimes appear in logs.
  261. if (charCode < 32) {
  262. if (
  263. charCode !== 9 && // TAB
  264. charCode !== 10 && // LF
  265. charCode !== 13 && // CR
  266. charCode !== 7 && // BEL (Bell) - sometimes in logs
  267. charCode !== 8 // BS (Backspace) - less common, but possible
  268. ) {
  269. suspiciousCharCount++;
  270. }
  271. }
  272. // Characters from 32 (space) up to 126 (~) are printable ASCII.
  273. // Characters 127 (DEL) is a control character.
  274. // Characters >= 128 are extended ASCII / multi-byte Unicode.
  275. // If they resulted in U+FFFD, we caught it. Otherwise, they are valid
  276. // (though perhaps unusual) Unicode characters from JS's perspective.
  277. // The main concern is if those higher characters came from misinterpreting
  278. // a single-byte encoding as UTF-8, which again, U+FFFD would usually flag.
  279. }
  280. // Check absolute null byte count
  281. if (nullByteCount > options.maxAbsoluteNullBytes) {
  282. return false; // Too many null bytes is a strong binary indicator
  283. }
  284. // Check ratio of suspicious characters
  285. const ratio = suspiciousCharCount / sampleLength;
  286. return ratio <= options.suspiciousCharThresholdRatio;
  287. }
  288. // WARN: vibe code below
  289. // Converts a Base64URL encoded SVG string to a PNG Data URL using browser Canvas API.
  290. function svgBase64UrlToPngDataURL(base64UrlSvg: string): Promise<string> {
  291. const backgroundColor = 'white'; // Default background color for PNG
  292. return new Promise((resolve, reject) => {
  293. try {
  294. const img = new Image();
  295. img.onload = () => {
  296. const canvas = document.createElement('canvas');
  297. const ctx = canvas.getContext('2d');
  298. if (!ctx) {
  299. reject(new Error('Failed to get 2D canvas context.'));
  300. return;
  301. }
  302. // Use provided dimensions or SVG's natural dimensions, with fallbacks
  303. // Fallbacks (e.g., 300x300) are for SVGs without explicit width/height
  304. // or when naturalWidth/Height might be 0 before full processing.
  305. const targetWidth = img.naturalWidth || 300;
  306. const targetHeight = img.naturalHeight || 300;
  307. canvas.width = targetWidth;
  308. canvas.height = targetHeight;
  309. if (backgroundColor) {
  310. ctx.fillStyle = backgroundColor;
  311. ctx.fillRect(0, 0, canvas.width, canvas.height);
  312. }
  313. ctx.drawImage(img, 0, 0, targetWidth, targetHeight);
  314. resolve(canvas.toDataURL('image/png'));
  315. };
  316. img.onerror = () => {
  317. reject(
  318. new Error('Failed to load SVG image. Ensure the SVG data is valid.')
  319. );
  320. };
  321. // Load SVG string into an Image element
  322. img.src = base64UrlSvg;
  323. } catch (error) {
  324. const message = error instanceof Error ? error.message : String(error);
  325. const errorMessage = `Error converting SVG to PNG: ${message}`;
  326. toast.error(errorMessage);
  327. reject(new Error(errorMessage));
  328. }
  329. });
  330. }