chat.ts 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. import { config } from '$lib/stores/settings.svelte';
  2. import { slotsService } from './slots';
  3. /**
  4. * ChatService - Low-level API communication layer for llama.cpp server interactions
  5. *
  6. * This service handles direct communication with the llama.cpp server's chat completion API.
  7. * It provides the network layer abstraction for AI model interactions while remaining
  8. * stateless and focused purely on API communication.
  9. *
  10. * **Architecture & Relationship with ChatStore:**
  11. * - **ChatService** (this class): Stateless API communication layer
  12. * - Handles HTTP requests/responses with llama.cpp server
  13. * - Manages streaming and non-streaming response parsing
  14. * - Provides request abortion capabilities
  15. * - Converts database messages to API format
  16. * - Handles error translation and context detection
  17. *
  18. * - **ChatStore**: Stateful orchestration and UI state management
  19. * - Uses ChatService for all AI model communication
  20. * - Manages conversation state, message history, and UI reactivity
  21. * - Coordinates with DatabaseStore for persistence
  22. * - Handles complex workflows like branching and regeneration
  23. *
  24. * **Key Responsibilities:**
  25. * - Message format conversion (DatabaseMessage → API format)
  26. * - Streaming response handling with real-time callbacks
  27. * - Reasoning content extraction and processing
  28. * - File attachment processing (images, PDFs, audio, text)
  29. * - Context error detection and reporting
  30. * - Request lifecycle management (abort, cleanup)
  31. */
  32. export class ChatService {
  33. private abortController: AbortController | null = null;
  34. /**
  35. * Sends a chat completion request to the llama.cpp server.
  36. * Supports both streaming and non-streaming responses with comprehensive parameter configuration.
  37. * Automatically converts database messages with attachments to the appropriate API format.
  38. *
  39. * @param messages - Array of chat messages to send to the API (supports both ApiChatMessageData and DatabaseMessage with attachments)
  40. * @param options - Configuration options for the chat completion request. See `SettingsChatServiceOptions` type for details.
  41. * @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
  42. * @throws {Error} if the request fails or is aborted
  43. */
  44. async sendMessage(
  45. messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[],
  46. options: SettingsChatServiceOptions = {}
  47. ): Promise<string | void> {
  48. const {
  49. stream,
  50. onChunk,
  51. onComplete,
  52. onError,
  53. // Generation parameters
  54. temperature,
  55. max_tokens,
  56. // Sampling parameters
  57. dynatemp_range,
  58. dynatemp_exponent,
  59. top_k,
  60. top_p,
  61. min_p,
  62. xtc_probability,
  63. xtc_threshold,
  64. typ_p,
  65. // Penalty parameters
  66. repeat_last_n,
  67. repeat_penalty,
  68. presence_penalty,
  69. frequency_penalty,
  70. dry_multiplier,
  71. dry_base,
  72. dry_allowed_length,
  73. dry_penalty_last_n,
  74. // Other parameters
  75. samplers,
  76. custom,
  77. timings_per_token
  78. } = options;
  79. const currentConfig = config();
  80. // Cancel any ongoing request and create a new abort controller
  81. this.abort();
  82. this.abortController = new AbortController();
  83. // Convert database messages with attachments to API format if needed
  84. const normalizedMessages: ApiChatMessageData[] = messages
  85. .map((msg) => {
  86. // Check if this is a DatabaseMessage by checking for DatabaseMessage-specific fields
  87. if ('id' in msg && 'convId' in msg && 'timestamp' in msg) {
  88. // This is a DatabaseMessage, convert it
  89. const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] };
  90. return ChatService.convertMessageToChatServiceData(dbMsg);
  91. } else {
  92. // This is already an ApiChatMessageData object
  93. return msg as ApiChatMessageData;
  94. }
  95. })
  96. .filter((msg) => {
  97. // Filter out empty system messages
  98. if (msg.role === 'system') {
  99. const content = typeof msg.content === 'string' ? msg.content : '';
  100. return content.trim().length > 0;
  101. }
  102. return true;
  103. });
  104. // Build base request body with system message injection
  105. const processedMessages = this.injectSystemMessage(normalizedMessages);
  106. const requestBody: ApiChatCompletionRequest = {
  107. messages: processedMessages.map((msg: ApiChatMessageData) => ({
  108. role: msg.role,
  109. content: msg.content
  110. })),
  111. stream
  112. };
  113. requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto';
  114. if (temperature !== undefined) requestBody.temperature = temperature;
  115. if (max_tokens !== undefined) {
  116. // Set max_tokens to -1 (infinite) when explicitly configured as 0 or null
  117. requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
  118. }
  119. if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
  120. if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
  121. if (top_k !== undefined) requestBody.top_k = top_k;
  122. if (top_p !== undefined) requestBody.top_p = top_p;
  123. if (min_p !== undefined) requestBody.min_p = min_p;
  124. if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
  125. if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
  126. if (typ_p !== undefined) requestBody.typ_p = typ_p;
  127. if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
  128. if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
  129. if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
  130. if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
  131. if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
  132. if (dry_base !== undefined) requestBody.dry_base = dry_base;
  133. if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
  134. if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
  135. if (samplers !== undefined) {
  136. requestBody.samplers =
  137. typeof samplers === 'string'
  138. ? samplers.split(';').filter((s: string) => s.trim())
  139. : samplers;
  140. }
  141. if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
  142. if (custom) {
  143. try {
  144. const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
  145. Object.assign(requestBody, customParams);
  146. } catch (error) {
  147. console.warn('Failed to parse custom parameters:', error);
  148. }
  149. }
  150. try {
  151. const apiKey = currentConfig.apiKey?.toString().trim();
  152. const response = await fetch(`./v1/chat/completions`, {
  153. method: 'POST',
  154. headers: {
  155. 'Content-Type': 'application/json',
  156. ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
  157. },
  158. body: JSON.stringify(requestBody),
  159. signal: this.abortController.signal
  160. });
  161. if (!response.ok) {
  162. // Use the new parseErrorResponse method to handle structured errors
  163. const error = await this.parseErrorResponse(response);
  164. if (onError) {
  165. onError(error);
  166. }
  167. throw error;
  168. }
  169. if (stream) {
  170. return this.handleStreamResponse(
  171. response,
  172. onChunk,
  173. onComplete,
  174. onError,
  175. options.onReasoningChunk
  176. );
  177. } else {
  178. return this.handleNonStreamResponse(response, onComplete, onError);
  179. }
  180. } catch (error) {
  181. if (error instanceof Error && error.name === 'AbortError') {
  182. console.log('Chat completion request was aborted');
  183. return;
  184. }
  185. let userFriendlyError: Error;
  186. if (error instanceof Error) {
  187. if (error.name === 'TypeError' && error.message.includes('fetch')) {
  188. userFriendlyError = new Error(
  189. 'Unable to connect to server - please check if the server is running'
  190. );
  191. } else if (error.message.includes('ECONNREFUSED')) {
  192. userFriendlyError = new Error('Connection refused - server may be offline');
  193. } else if (error.message.includes('ETIMEDOUT')) {
  194. userFriendlyError = new Error('Request timeout - server may be overloaded');
  195. } else {
  196. userFriendlyError = error;
  197. }
  198. } else {
  199. userFriendlyError = new Error('Unknown error occurred while sending message');
  200. }
  201. console.error('Error in sendMessage:', error);
  202. if (onError) {
  203. onError(userFriendlyError);
  204. }
  205. throw userFriendlyError;
  206. }
  207. }
  208. /**
  209. * Handles streaming response from the chat completion API.
  210. * Processes server-sent events and extracts content chunks from the stream.
  211. *
  212. * @param response - The fetch Response object containing the streaming data
  213. * @param onChunk - Optional callback invoked for each content chunk received
  214. * @param onComplete - Optional callback invoked when the stream is complete with full response
  215. * @param onError - Optional callback invoked if an error occurs during streaming
  216. * @param onReasoningChunk - Optional callback invoked for each reasoning content chunk
  217. * @returns {Promise<void>} Promise that resolves when streaming is complete
  218. * @throws {Error} if the stream cannot be read or parsed
  219. */
  220. private async handleStreamResponse(
  221. response: Response,
  222. onChunk?: (chunk: string) => void,
  223. onComplete?: (
  224. response: string,
  225. reasoningContent?: string,
  226. timings?: ChatMessageTimings
  227. ) => void,
  228. onError?: (error: Error) => void,
  229. onReasoningChunk?: (chunk: string) => void
  230. ): Promise<void> {
  231. const reader = response.body?.getReader();
  232. if (!reader) {
  233. throw new Error('No response body');
  234. }
  235. const decoder = new TextDecoder();
  236. let aggregatedContent = '';
  237. let fullReasoningContent = '';
  238. let hasReceivedData = false;
  239. let lastTimings: ChatMessageTimings | undefined;
  240. try {
  241. let chunk = '';
  242. while (true) {
  243. const { done, value } = await reader.read();
  244. if (done) break;
  245. chunk += decoder.decode(value, { stream: true });
  246. const lines = chunk.split('\n');
  247. chunk = lines.pop() || ''; // Save incomplete line for next read
  248. for (const line of lines) {
  249. if (line.startsWith('data: ')) {
  250. const data = line.slice(6);
  251. if (data === '[DONE]') {
  252. if (!hasReceivedData && aggregatedContent.length === 0) {
  253. const contextError = new Error(
  254. 'The request exceeds the available context size. Try increasing the context size or enable context shift.'
  255. );
  256. contextError.name = 'ContextError';
  257. onError?.(contextError);
  258. return;
  259. }
  260. onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
  261. return;
  262. }
  263. try {
  264. const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
  265. const content = parsed.choices[0]?.delta?.content;
  266. const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
  267. const timings = parsed.timings;
  268. const promptProgress = parsed.prompt_progress;
  269. if (timings || promptProgress) {
  270. this.updateProcessingState(timings, promptProgress);
  271. // Store the latest timing data
  272. if (timings) {
  273. lastTimings = timings;
  274. }
  275. }
  276. if (content) {
  277. hasReceivedData = true;
  278. aggregatedContent += content;
  279. onChunk?.(content);
  280. }
  281. if (reasoningContent) {
  282. hasReceivedData = true;
  283. fullReasoningContent += reasoningContent;
  284. onReasoningChunk?.(reasoningContent);
  285. }
  286. } catch (e) {
  287. console.error('Error parsing JSON chunk:', e);
  288. }
  289. }
  290. }
  291. }
  292. if (!hasReceivedData && aggregatedContent.length === 0) {
  293. const contextError = new Error(
  294. 'The request exceeds the available context size. Try increasing the context size or enable context shift.'
  295. );
  296. contextError.name = 'ContextError';
  297. onError?.(contextError);
  298. return;
  299. }
  300. } catch (error) {
  301. const err = error instanceof Error ? error : new Error('Stream error');
  302. onError?.(err);
  303. throw err;
  304. } finally {
  305. reader.releaseLock();
  306. }
  307. }
  308. /**
  309. * Handles non-streaming response from the chat completion API.
  310. * Parses the JSON response and extracts the generated content.
  311. *
  312. * @param response - The fetch Response object containing the JSON data
  313. * @param onComplete - Optional callback invoked when response is successfully parsed
  314. * @param onError - Optional callback invoked if an error occurs during parsing
  315. * @returns {Promise<string>} Promise that resolves to the generated content string
  316. * @throws {Error} if the response cannot be parsed or is malformed
  317. */
  318. private async handleNonStreamResponse(
  319. response: Response,
  320. onComplete?: (
  321. response: string,
  322. reasoningContent?: string,
  323. timings?: ChatMessageTimings
  324. ) => void,
  325. onError?: (error: Error) => void
  326. ): Promise<string> {
  327. try {
  328. const responseText = await response.text();
  329. if (!responseText.trim()) {
  330. const contextError = new Error(
  331. 'The request exceeds the available context size. Try increasing the context size or enable context shift.'
  332. );
  333. contextError.name = 'ContextError';
  334. onError?.(contextError);
  335. throw contextError;
  336. }
  337. const data: ApiChatCompletionResponse = JSON.parse(responseText);
  338. const content = data.choices[0]?.message?.content || '';
  339. const reasoningContent = data.choices[0]?.message?.reasoning_content;
  340. if (reasoningContent) {
  341. console.log('Full reasoning content:', reasoningContent);
  342. }
  343. if (!content.trim()) {
  344. const contextError = new Error(
  345. 'The request exceeds the available context size. Try increasing the context size or enable context shift.'
  346. );
  347. contextError.name = 'ContextError';
  348. onError?.(contextError);
  349. throw contextError;
  350. }
  351. onComplete?.(content, reasoningContent);
  352. return content;
  353. } catch (error) {
  354. if (error instanceof Error && error.name === 'ContextError') {
  355. throw error;
  356. }
  357. const err = error instanceof Error ? error : new Error('Parse error');
  358. onError?.(err);
  359. throw err;
  360. }
  361. }
  362. /**
  363. * Converts a database message with attachments to API chat message format.
  364. * Processes various attachment types (images, text files, PDFs) and formats them
  365. * as content parts suitable for the chat completion API.
  366. *
  367. * @param message - Database message object with optional extra attachments
  368. * @param message.content - The text content of the message
  369. * @param message.role - The role of the message sender (user, assistant, system)
  370. * @param message.extra - Optional array of message attachments (images, files, etc.)
  371. * @returns {ApiChatMessageData} object formatted for the chat completion API
  372. * @static
  373. */
  374. static convertMessageToChatServiceData(
  375. message: DatabaseMessage & { extra?: DatabaseMessageExtra[] }
  376. ): ApiChatMessageData {
  377. if (!message.extra || message.extra.length === 0) {
  378. return {
  379. role: message.role as 'user' | 'assistant' | 'system',
  380. content: message.content
  381. };
  382. }
  383. const contentParts: ApiChatMessageContentPart[] = [];
  384. if (message.content) {
  385. contentParts.push({
  386. type: 'text',
  387. text: message.content
  388. });
  389. }
  390. const imageFiles = message.extra.filter(
  391. (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile =>
  392. extra.type === 'imageFile'
  393. );
  394. for (const image of imageFiles) {
  395. contentParts.push({
  396. type: 'image_url',
  397. image_url: { url: image.base64Url }
  398. });
  399. }
  400. const textFiles = message.extra.filter(
  401. (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile =>
  402. extra.type === 'textFile'
  403. );
  404. for (const textFile of textFiles) {
  405. contentParts.push({
  406. type: 'text',
  407. text: `\n\n--- File: ${textFile.name} ---\n${textFile.content}`
  408. });
  409. }
  410. const audioFiles = message.extra.filter(
  411. (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile =>
  412. extra.type === 'audioFile'
  413. );
  414. for (const audio of audioFiles) {
  415. contentParts.push({
  416. type: 'input_audio',
  417. input_audio: {
  418. data: audio.base64Data,
  419. format: audio.mimeType.includes('wav') ? 'wav' : 'mp3'
  420. }
  421. });
  422. }
  423. const pdfFiles = message.extra.filter(
  424. (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile =>
  425. extra.type === 'pdfFile'
  426. );
  427. for (const pdfFile of pdfFiles) {
  428. if (pdfFile.processedAsImages && pdfFile.images) {
  429. for (let i = 0; i < pdfFile.images.length; i++) {
  430. contentParts.push({
  431. type: 'image_url',
  432. image_url: { url: pdfFile.images[i] }
  433. });
  434. }
  435. } else {
  436. contentParts.push({
  437. type: 'text',
  438. text: `\n\n--- PDF File: ${pdfFile.name} ---\n${pdfFile.content}`
  439. });
  440. }
  441. }
  442. return {
  443. role: message.role as 'user' | 'assistant' | 'system',
  444. content: contentParts
  445. };
  446. }
  447. /**
  448. * Get server properties - static method for API compatibility
  449. */
  450. static async getServerProps(): Promise<ApiLlamaCppServerProps> {
  451. try {
  452. const currentConfig = config();
  453. const apiKey = currentConfig.apiKey?.toString().trim();
  454. const response = await fetch(`./props`, {
  455. headers: {
  456. 'Content-Type': 'application/json',
  457. ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
  458. }
  459. });
  460. if (!response.ok) {
  461. throw new Error(`Failed to fetch server props: ${response.status}`);
  462. }
  463. const data = await response.json();
  464. return data;
  465. } catch (error) {
  466. console.error('Error fetching server props:', error);
  467. throw error;
  468. }
  469. }
  470. /**
  471. * Aborts any ongoing chat completion request.
  472. * Cancels the current request and cleans up the abort controller.
  473. *
  474. * @public
  475. */
  476. public abort(): void {
  477. if (this.abortController) {
  478. this.abortController.abort();
  479. this.abortController = null;
  480. }
  481. }
  482. /**
  483. * Injects a system message at the beginning of the conversation if configured in settings.
  484. * Checks for existing system messages to avoid duplication and retrieves the system message
  485. * from the current configuration settings.
  486. *
  487. * @param messages - Array of chat messages to process
  488. * @returns Array of messages with system message injected at the beginning if configured
  489. * @private
  490. */
  491. private injectSystemMessage(messages: ApiChatMessageData[]): ApiChatMessageData[] {
  492. const currentConfig = config();
  493. const systemMessage = currentConfig.systemMessage?.toString().trim();
  494. if (!systemMessage) {
  495. return messages;
  496. }
  497. if (messages.length > 0 && messages[0].role === 'system') {
  498. if (messages[0].content !== systemMessage) {
  499. const updatedMessages = [...messages];
  500. updatedMessages[0] = {
  501. role: 'system',
  502. content: systemMessage
  503. };
  504. return updatedMessages;
  505. }
  506. return messages;
  507. }
  508. const systemMsg: ApiChatMessageData = {
  509. role: 'system',
  510. content: systemMessage
  511. };
  512. return [systemMsg, ...messages];
  513. }
  514. /**
  515. * Parses error response and creates appropriate error with context information
  516. * @param response - HTTP response object
  517. * @returns Promise<Error> - Parsed error with context info if available
  518. */
  519. private async parseErrorResponse(response: Response): Promise<Error> {
  520. try {
  521. const errorText = await response.text();
  522. const errorData: ApiErrorResponse = JSON.parse(errorText);
  523. if (errorData.error?.type === 'exceed_context_size_error') {
  524. const contextError = errorData.error as ApiContextSizeError;
  525. const error = new Error(contextError.message);
  526. error.name = 'ContextError';
  527. // Attach structured context information
  528. (
  529. error as Error & {
  530. contextInfo?: { promptTokens: number; maxContext: number; estimatedTokens: number };
  531. }
  532. ).contextInfo = {
  533. promptTokens: contextError.n_prompt_tokens,
  534. maxContext: contextError.n_ctx,
  535. estimatedTokens: contextError.n_prompt_tokens
  536. };
  537. return error;
  538. }
  539. // Fallback for other error types
  540. const message = errorData.error?.message || 'Unknown server error';
  541. return new Error(message);
  542. } catch {
  543. // If we can't parse the error response, return a generic error
  544. return new Error(`Server error (${response.status}): ${response.statusText}`);
  545. }
  546. }
  547. /**
  548. * Updates the processing state with timing information from the server response
  549. * @param timings - Timing data from the API response
  550. * @param promptProgress - Progress data from the API response
  551. */
  552. private updateProcessingState(
  553. timings?: ChatMessageTimings,
  554. promptProgress?: ChatMessagePromptProgress
  555. ): void {
  556. // Calculate tokens per second from timing data
  557. const tokensPerSecond =
  558. timings?.predicted_ms && timings?.predicted_n
  559. ? (timings.predicted_n / timings.predicted_ms) * 1000
  560. : 0;
  561. // Update slots service with timing data (async but don't wait)
  562. slotsService
  563. .updateFromTimingData({
  564. prompt_n: timings?.prompt_n || 0,
  565. predicted_n: timings?.predicted_n || 0,
  566. predicted_per_second: tokensPerSecond,
  567. cache_n: timings?.cache_n || 0,
  568. prompt_progress: promptProgress
  569. })
  570. .catch((error) => {
  571. console.warn('Failed to update processing state:', error);
  572. });
  573. }
  574. }
  575. export const chatService = new ChatService();