|
@@ -1,6 +1,25 @@
|
|
|
import { config } from '$lib/stores/settings.svelte';
|
|
import { config } from '$lib/stores/settings.svelte';
|
|
|
import { selectedModelName } from '$lib/stores/models.svelte';
|
|
import { selectedModelName } from '$lib/stores/models.svelte';
|
|
|
import { slotsService } from './slots';
|
|
import { slotsService } from './slots';
|
|
|
|
|
+import type {
|
|
|
|
|
+ ApiChatCompletionRequest,
|
|
|
|
|
+ ApiChatCompletionResponse,
|
|
|
|
|
+ ApiChatCompletionStreamChunk,
|
|
|
|
|
+ ApiChatCompletionToolCall,
|
|
|
|
|
+ ApiChatCompletionToolCallDelta,
|
|
|
|
|
+ ApiChatMessageData
|
|
|
|
|
+} from '$lib/types/api';
|
|
|
|
|
+import type {
|
|
|
|
|
+ DatabaseMessage,
|
|
|
|
|
+ DatabaseMessageExtra,
|
|
|
|
|
+ DatabaseMessageExtraAudioFile,
|
|
|
|
|
+ DatabaseMessageExtraImageFile,
|
|
|
|
|
+ DatabaseMessageExtraLegacyContext,
|
|
|
|
|
+ DatabaseMessageExtraPdfFile,
|
|
|
|
|
+ DatabaseMessageExtraTextFile
|
|
|
|
|
+} from '$lib/types/database';
|
|
|
|
|
+import type { ChatMessagePromptProgress, ChatMessageTimings } from '$lib/types/chat';
|
|
|
|
|
+import type { SettingsChatServiceOptions } from '$lib/types/settings';
|
|
|
/**
|
|
/**
|
|
|
* ChatService - Low-level API communication layer for llama.cpp server interactions
|
|
* ChatService - Low-level API communication layer for llama.cpp server interactions
|
|
|
*
|
|
*
|
|
@@ -53,6 +72,7 @@ export class ChatService {
|
|
|
onComplete,
|
|
onComplete,
|
|
|
onError,
|
|
onError,
|
|
|
onReasoningChunk,
|
|
onReasoningChunk,
|
|
|
|
|
+ onToolCallChunk,
|
|
|
onModel,
|
|
onModel,
|
|
|
onFirstValidChunk,
|
|
onFirstValidChunk,
|
|
|
// Generation parameters
|
|
// Generation parameters
|
|
@@ -201,6 +221,7 @@ export class ChatService {
|
|
|
onComplete,
|
|
onComplete,
|
|
|
onError,
|
|
onError,
|
|
|
onReasoningChunk,
|
|
onReasoningChunk,
|
|
|
|
|
+ onToolCallChunk,
|
|
|
onModel,
|
|
onModel,
|
|
|
onFirstValidChunk,
|
|
onFirstValidChunk,
|
|
|
conversationId,
|
|
conversationId,
|
|
@@ -208,7 +229,13 @@ export class ChatService {
|
|
|
);
|
|
);
|
|
|
return;
|
|
return;
|
|
|
} else {
|
|
} else {
|
|
|
- return this.handleNonStreamResponse(response, onComplete, onError, onModel);
|
|
|
|
|
|
|
+ return this.handleNonStreamResponse(
|
|
|
|
|
+ response,
|
|
|
|
|
+ onComplete,
|
|
|
|
|
+ onError,
|
|
|
|
|
+ onToolCallChunk,
|
|
|
|
|
+ onModel
|
|
|
|
|
+ );
|
|
|
}
|
|
}
|
|
|
} catch (error) {
|
|
} catch (error) {
|
|
|
if (error instanceof Error && error.name === 'AbortError') {
|
|
if (error instanceof Error && error.name === 'AbortError') {
|
|
@@ -264,10 +291,12 @@ export class ChatService {
|
|
|
onComplete?: (
|
|
onComplete?: (
|
|
|
response: string,
|
|
response: string,
|
|
|
reasoningContent?: string,
|
|
reasoningContent?: string,
|
|
|
- timings?: ChatMessageTimings
|
|
|
|
|
|
|
+ timings?: ChatMessageTimings,
|
|
|
|
|
+ toolCalls?: string
|
|
|
) => void,
|
|
) => void,
|
|
|
onError?: (error: Error) => void,
|
|
onError?: (error: Error) => void,
|
|
|
onReasoningChunk?: (chunk: string) => void,
|
|
onReasoningChunk?: (chunk: string) => void,
|
|
|
|
|
+ onToolCallChunk?: (chunk: string) => void,
|
|
|
onModel?: (model: string) => void,
|
|
onModel?: (model: string) => void,
|
|
|
onFirstValidChunk?: () => void,
|
|
onFirstValidChunk?: () => void,
|
|
|
conversationId?: string,
|
|
conversationId?: string,
|
|
@@ -282,11 +311,53 @@ export class ChatService {
|
|
|
const decoder = new TextDecoder();
|
|
const decoder = new TextDecoder();
|
|
|
let aggregatedContent = '';
|
|
let aggregatedContent = '';
|
|
|
let fullReasoningContent = '';
|
|
let fullReasoningContent = '';
|
|
|
|
|
+ let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
|
|
|
let hasReceivedData = false;
|
|
let hasReceivedData = false;
|
|
|
let lastTimings: ChatMessageTimings | undefined;
|
|
let lastTimings: ChatMessageTimings | undefined;
|
|
|
let streamFinished = false;
|
|
let streamFinished = false;
|
|
|
let modelEmitted = false;
|
|
let modelEmitted = false;
|
|
|
let firstValidChunkEmitted = false;
|
|
let firstValidChunkEmitted = false;
|
|
|
|
|
+ let toolCallIndexOffset = 0;
|
|
|
|
|
+ let hasOpenToolCallBatch = false;
|
|
|
|
|
+
|
|
|
|
|
+ const finalizeOpenToolCallBatch = () => {
|
|
|
|
|
+ if (!hasOpenToolCallBatch) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ toolCallIndexOffset = aggregatedToolCalls.length;
|
|
|
|
|
+ hasOpenToolCallBatch = false;
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
|
|
|
|
|
+ if (!toolCalls || toolCalls.length === 0) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ aggregatedToolCalls = this.mergeToolCallDeltas(
|
|
|
|
|
+ aggregatedToolCalls,
|
|
|
|
|
+ toolCalls,
|
|
|
|
|
+ toolCallIndexOffset
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ if (aggregatedToolCalls.length === 0) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ hasOpenToolCallBatch = true;
|
|
|
|
|
+
|
|
|
|
|
+ const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
|
|
|
|
|
+
|
|
|
|
|
+ if (!serializedToolCalls) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ hasReceivedData = true;
|
|
|
|
|
+
|
|
|
|
|
+ if (!abortSignal?.aborted) {
|
|
|
|
|
+ onToolCallChunk?.(serializedToolCalls);
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
|
|
|
try {
|
|
try {
|
|
|
let chunk = '';
|
|
let chunk = '';
|
|
@@ -325,6 +396,7 @@ export class ChatService {
|
|
|
|
|
|
|
|
const content = parsed.choices[0]?.delta?.content;
|
|
const content = parsed.choices[0]?.delta?.content;
|
|
|
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
|
|
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
|
|
|
|
|
+ const toolCalls = parsed.choices[0]?.delta?.tool_calls;
|
|
|
const timings = parsed.timings;
|
|
const timings = parsed.timings;
|
|
|
const promptProgress = parsed.prompt_progress;
|
|
const promptProgress = parsed.prompt_progress;
|
|
|
|
|
|
|
@@ -342,6 +414,7 @@ export class ChatService {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if (content) {
|
|
if (content) {
|
|
|
|
|
+ finalizeOpenToolCallBatch();
|
|
|
hasReceivedData = true;
|
|
hasReceivedData = true;
|
|
|
aggregatedContent += content;
|
|
aggregatedContent += content;
|
|
|
if (!abortSignal?.aborted) {
|
|
if (!abortSignal?.aborted) {
|
|
@@ -350,12 +423,15 @@ export class ChatService {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if (reasoningContent) {
|
|
if (reasoningContent) {
|
|
|
|
|
+ finalizeOpenToolCallBatch();
|
|
|
hasReceivedData = true;
|
|
hasReceivedData = true;
|
|
|
fullReasoningContent += reasoningContent;
|
|
fullReasoningContent += reasoningContent;
|
|
|
if (!abortSignal?.aborted) {
|
|
if (!abortSignal?.aborted) {
|
|
|
onReasoningChunk?.(reasoningContent);
|
|
onReasoningChunk?.(reasoningContent);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ processToolCallDelta(toolCalls);
|
|
|
} catch (e) {
|
|
} catch (e) {
|
|
|
console.error('Error parsing JSON chunk:', e);
|
|
console.error('Error parsing JSON chunk:', e);
|
|
|
}
|
|
}
|
|
@@ -368,12 +444,26 @@ export class ChatService {
|
|
|
if (abortSignal?.aborted) return;
|
|
if (abortSignal?.aborted) return;
|
|
|
|
|
|
|
|
if (streamFinished) {
|
|
if (streamFinished) {
|
|
|
- if (!hasReceivedData && aggregatedContent.length === 0) {
|
|
|
|
|
|
|
+ finalizeOpenToolCallBatch();
|
|
|
|
|
+
|
|
|
|
|
+ if (
|
|
|
|
|
+ !hasReceivedData &&
|
|
|
|
|
+ aggregatedContent.length === 0 &&
|
|
|
|
|
+ aggregatedToolCalls.length === 0
|
|
|
|
|
+ ) {
|
|
|
const noResponseError = new Error('No response received from server. Please try again.');
|
|
const noResponseError = new Error('No response received from server. Please try again.');
|
|
|
throw noResponseError;
|
|
throw noResponseError;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
|
|
|
|
|
|
|
+ const finalToolCalls =
|
|
|
|
|
+ aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
|
|
|
|
|
+
|
|
|
|
|
+ onComplete?.(
|
|
|
|
|
+ aggregatedContent,
|
|
|
|
|
+ fullReasoningContent || undefined,
|
|
|
|
|
+ lastTimings,
|
|
|
|
|
+ finalToolCalls
|
|
|
|
|
+ );
|
|
|
}
|
|
}
|
|
|
} catch (error) {
|
|
} catch (error) {
|
|
|
const err = error instanceof Error ? error : new Error('Stream error');
|
|
const err = error instanceof Error ? error : new Error('Stream error');
|
|
@@ -386,6 +476,54 @@ export class ChatService {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ private mergeToolCallDeltas(
|
|
|
|
|
+ existing: ApiChatCompletionToolCall[],
|
|
|
|
|
+ deltas: ApiChatCompletionToolCallDelta[],
|
|
|
|
|
+ indexOffset = 0
|
|
|
|
|
+ ): ApiChatCompletionToolCall[] {
|
|
|
|
|
+ const result = existing.map((call) => ({
|
|
|
|
|
+ ...call,
|
|
|
|
|
+ function: call.function ? { ...call.function } : undefined
|
|
|
|
|
+ }));
|
|
|
|
|
+
|
|
|
|
|
+ for (const delta of deltas) {
|
|
|
|
|
+ const index =
|
|
|
|
|
+ typeof delta.index === 'number' && delta.index >= 0
|
|
|
|
|
+ ? delta.index + indexOffset
|
|
|
|
|
+ : result.length;
|
|
|
|
|
+
|
|
|
|
|
+ while (result.length <= index) {
|
|
|
|
|
+ result.push({ function: undefined });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const target = result[index]!;
|
|
|
|
|
+
|
|
|
|
|
+ if (delta.id) {
|
|
|
|
|
+ target.id = delta.id;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (delta.type) {
|
|
|
|
|
+ target.type = delta.type;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (delta.function) {
|
|
|
|
|
+ const fn = target.function ? { ...target.function } : {};
|
|
|
|
|
+
|
|
|
|
|
+ if (delta.function.name) {
|
|
|
|
|
+ fn.name = delta.function.name;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (delta.function.arguments) {
|
|
|
|
|
+ fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ target.function = fn;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return result;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
/**
|
|
/**
|
|
|
* Handles non-streaming response from the chat completion API.
|
|
* Handles non-streaming response from the chat completion API.
|
|
|
* Parses the JSON response and extracts the generated content.
|
|
* Parses the JSON response and extracts the generated content.
|
|
@@ -401,9 +539,11 @@ export class ChatService {
|
|
|
onComplete?: (
|
|
onComplete?: (
|
|
|
response: string,
|
|
response: string,
|
|
|
reasoningContent?: string,
|
|
reasoningContent?: string,
|
|
|
- timings?: ChatMessageTimings
|
|
|
|
|
|
|
+ timings?: ChatMessageTimings,
|
|
|
|
|
+ toolCalls?: string
|
|
|
) => void,
|
|
) => void,
|
|
|
onError?: (error: Error) => void,
|
|
onError?: (error: Error) => void,
|
|
|
|
|
+ onToolCallChunk?: (chunk: string) => void,
|
|
|
onModel?: (model: string) => void
|
|
onModel?: (model: string) => void
|
|
|
): Promise<string> {
|
|
): Promise<string> {
|
|
|
try {
|
|
try {
|
|
@@ -423,17 +563,31 @@ export class ChatService {
|
|
|
|
|
|
|
|
const content = data.choices[0]?.message?.content || '';
|
|
const content = data.choices[0]?.message?.content || '';
|
|
|
const reasoningContent = data.choices[0]?.message?.reasoning_content;
|
|
const reasoningContent = data.choices[0]?.message?.reasoning_content;
|
|
|
|
|
+ const toolCalls = data.choices[0]?.message?.tool_calls;
|
|
|
|
|
|
|
|
if (reasoningContent) {
|
|
if (reasoningContent) {
|
|
|
console.log('Full reasoning content:', reasoningContent);
|
|
console.log('Full reasoning content:', reasoningContent);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- if (!content.trim()) {
|
|
|
|
|
|
|
+ let serializedToolCalls: string | undefined;
|
|
|
|
|
+
|
|
|
|
|
+ if (toolCalls && toolCalls.length > 0) {
|
|
|
|
|
+ const mergedToolCalls = this.mergeToolCallDeltas([], toolCalls);
|
|
|
|
|
+
|
|
|
|
|
+ if (mergedToolCalls.length > 0) {
|
|
|
|
|
+ serializedToolCalls = JSON.stringify(mergedToolCalls);
|
|
|
|
|
+ if (serializedToolCalls) {
|
|
|
|
|
+ onToolCallChunk?.(serializedToolCalls);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (!content.trim() && !serializedToolCalls) {
|
|
|
const noResponseError = new Error('No response received from server. Please try again.');
|
|
const noResponseError = new Error('No response received from server. Please try again.');
|
|
|
throw noResponseError;
|
|
throw noResponseError;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- onComplete?.(content, reasoningContent);
|
|
|
|
|
|
|
+ onComplete?.(content, reasoningContent, undefined, serializedToolCalls);
|
|
|
|
|
|
|
|
return content;
|
|
return content;
|
|
|
} catch (error) {
|
|
} catch (error) {
|