slots.ts 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. import { config } from '$lib/stores/settings.svelte';
  2. /**
  3. * SlotsService - Real-time processing state monitoring and token rate calculation
  4. *
  5. * This service provides real-time information about generation progress, token rates,
  6. * and context usage based on timing data from ChatService streaming responses.
  7. * It manages streaming session tracking and provides accurate processing state updates.
  8. *
  9. * **Architecture & Relationships:**
  10. * - **SlotsService** (this class): Processing state monitoring
  11. * - Receives timing data from ChatService streaming responses
  12. * - Calculates token generation rates and context usage
  13. * - Manages streaming session lifecycle
  14. * - Provides real-time updates to UI components
  15. *
  16. * - **ChatService**: Provides timing data from `/chat/completions` streaming
  17. * - **UI Components**: Subscribe to processing state for progress indicators
  18. *
  19. * **Key Features:**
  20. * - **Real-time Monitoring**: Live processing state during generation
  21. * - **Token Rate Calculation**: Accurate tokens/second from timing data
  22. * - **Context Tracking**: Current context usage and remaining capacity
  23. * - **Streaming Lifecycle**: Start/stop tracking for streaming sessions
  24. * - **Timing Data Processing**: Converts streaming timing data to structured state
  25. * - **Error Handling**: Graceful handling when timing data is unavailable
  26. *
  27. * **Processing States:**
  28. * - `idle`: No active processing
  29. * - `generating`: Actively generating tokens
  30. *
  31. * **Token Rate Calculation:**
  32. * Uses timing data from `/chat/completions` streaming response for accurate
  33. * real-time token generation rate measurement.
  34. */
  35. export class SlotsService {
  36. private callbacks: Set<(state: ApiProcessingState | null) => void> = new Set();
  37. private isStreamingActive: boolean = false;
  38. private lastKnownState: ApiProcessingState | null = null;
  39. private conversationStates: Map<string, ApiProcessingState | null> = new Map();
  40. private activeConversationId: string | null = null;
  41. /**
  42. * Start streaming session tracking
  43. */
  44. startStreaming(): void {
  45. this.isStreamingActive = true;
  46. }
  47. /**
  48. * Stop streaming session tracking
  49. */
  50. stopStreaming(): void {
  51. this.isStreamingActive = false;
  52. }
  53. /**
  54. * Clear the current processing state
  55. * Used when switching to a conversation without timing data
  56. */
  57. clearState(): void {
  58. this.lastKnownState = null;
  59. for (const callback of this.callbacks) {
  60. try {
  61. callback(null);
  62. } catch (error) {
  63. console.error('Error in clearState callback:', error);
  64. }
  65. }
  66. }
  67. /**
  68. * Check if currently in a streaming session
  69. */
  70. isStreaming(): boolean {
  71. return this.isStreamingActive;
  72. }
  73. /**
  74. * Set the active conversation for statistics display
  75. */
  76. setActiveConversation(conversationId: string | null): void {
  77. this.activeConversationId = conversationId;
  78. this.notifyCallbacks();
  79. }
  80. /**
  81. * Update processing state for a specific conversation
  82. */
  83. updateConversationState(conversationId: string, state: ApiProcessingState | null): void {
  84. this.conversationStates.set(conversationId, state);
  85. if (conversationId === this.activeConversationId) {
  86. this.lastKnownState = state;
  87. this.notifyCallbacks();
  88. }
  89. }
  90. /**
  91. * Get processing state for a specific conversation
  92. */
  93. getConversationState(conversationId: string): ApiProcessingState | null {
  94. return this.conversationStates.get(conversationId) || null;
  95. }
  96. /**
  97. * Clear state for a specific conversation
  98. */
  99. clearConversationState(conversationId: string): void {
  100. this.conversationStates.delete(conversationId);
  101. if (conversationId === this.activeConversationId) {
  102. this.lastKnownState = null;
  103. this.notifyCallbacks();
  104. }
  105. }
  106. /**
  107. * Notify all callbacks with current state
  108. */
  109. private notifyCallbacks(): void {
  110. const currentState = this.activeConversationId
  111. ? this.conversationStates.get(this.activeConversationId) || null
  112. : this.lastKnownState;
  113. for (const callback of this.callbacks) {
  114. try {
  115. callback(currentState);
  116. } catch (error) {
  117. console.error('Error in slots service callback:', error);
  118. }
  119. }
  120. }
  121. /**
  122. * @deprecated Polling is no longer used - timing data comes from ChatService streaming response
  123. * This method logs a warning if called to help identify outdated usage
  124. */
  125. fetchAndNotify(): void {
  126. console.warn(
  127. 'SlotsService.fetchAndNotify() is deprecated - use timing data from ChatService instead'
  128. );
  129. }
  130. subscribe(callback: (state: ApiProcessingState | null) => void): () => void {
  131. this.callbacks.add(callback);
  132. if (this.lastKnownState) {
  133. callback(this.lastKnownState);
  134. }
  135. return () => {
  136. this.callbacks.delete(callback);
  137. };
  138. }
  139. /**
  140. * Updates processing state with timing data from ChatService streaming response
  141. */
  142. async updateFromTimingData(
  143. timingData: {
  144. prompt_n: number;
  145. predicted_n: number;
  146. predicted_per_second: number;
  147. cache_n: number;
  148. prompt_progress?: ChatMessagePromptProgress;
  149. },
  150. conversationId?: string
  151. ): Promise<void> {
  152. const processingState = await this.parseCompletionTimingData(timingData);
  153. if (processingState === null) {
  154. console.warn('Failed to parse timing data - skipping update');
  155. return;
  156. }
  157. if (conversationId) {
  158. this.updateConversationState(conversationId, processingState);
  159. } else {
  160. this.lastKnownState = processingState;
  161. this.notifyCallbacks();
  162. }
  163. }
  164. /**
  165. * Gets context total from last known slots data or fetches from server
  166. */
  167. private async getContextTotal(): Promise<number | null> {
  168. if (this.lastKnownState && this.lastKnownState.contextTotal > 0) {
  169. return this.lastKnownState.contextTotal;
  170. }
  171. try {
  172. const currentConfig = config();
  173. const apiKey = currentConfig.apiKey?.toString().trim();
  174. const response = await fetch(`./slots`, {
  175. headers: {
  176. ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
  177. }
  178. });
  179. if (response.ok) {
  180. const slotsData = await response.json();
  181. if (Array.isArray(slotsData) && slotsData.length > 0) {
  182. const slot = slotsData[0];
  183. if (slot.n_ctx && slot.n_ctx > 0) {
  184. return slot.n_ctx;
  185. }
  186. }
  187. }
  188. } catch (error) {
  189. console.warn('Failed to fetch context total from /slots:', error);
  190. }
  191. return 4096;
  192. }
  193. private async parseCompletionTimingData(
  194. timingData: Record<string, unknown>
  195. ): Promise<ApiProcessingState | null> {
  196. const promptTokens = (timingData.prompt_n as number) || 0;
  197. const predictedTokens = (timingData.predicted_n as number) || 0;
  198. const tokensPerSecond = (timingData.predicted_per_second as number) || 0;
  199. const cacheTokens = (timingData.cache_n as number) || 0;
  200. const promptProgress = timingData.prompt_progress as
  201. | {
  202. total: number;
  203. cache: number;
  204. processed: number;
  205. time_ms: number;
  206. }
  207. | undefined;
  208. const contextTotal = await this.getContextTotal();
  209. if (contextTotal === null) {
  210. console.warn('No context total available - cannot calculate processing state');
  211. return null;
  212. }
  213. const currentConfig = config();
  214. const outputTokensMax = currentConfig.max_tokens || -1;
  215. const contextUsed = promptTokens + cacheTokens + predictedTokens;
  216. const outputTokensUsed = predictedTokens;
  217. const progressPercent = promptProgress
  218. ? Math.round((promptProgress.processed / promptProgress.total) * 100)
  219. : undefined;
  220. return {
  221. status: predictedTokens > 0 ? 'generating' : promptProgress ? 'preparing' : 'idle',
  222. tokensDecoded: predictedTokens,
  223. tokensRemaining: outputTokensMax - predictedTokens,
  224. contextUsed,
  225. contextTotal,
  226. outputTokensUsed,
  227. outputTokensMax,
  228. hasNextToken: predictedTokens > 0,
  229. tokensPerSecond,
  230. temperature: currentConfig.temperature ?? 0.8,
  231. topP: currentConfig.top_p ?? 0.95,
  232. speculative: false,
  233. progressPercent,
  234. promptTokens,
  235. cacheTokens
  236. };
  237. }
  238. /**
  239. * Get current processing state
  240. * Returns the last known state from timing data, or null if no data available
  241. * If activeConversationId is set, returns state for that conversation
  242. */
  243. async getCurrentState(): Promise<ApiProcessingState | null> {
  244. if (this.activeConversationId) {
  245. const conversationState = this.conversationStates.get(this.activeConversationId);
  246. if (conversationState) {
  247. return conversationState;
  248. }
  249. }
  250. if (this.lastKnownState) {
  251. return this.lastKnownState;
  252. }
  253. try {
  254. const { chatStore } = await import('$lib/stores/chat.svelte');
  255. const messages = chatStore.activeMessages;
  256. for (let i = messages.length - 1; i >= 0; i--) {
  257. const message = messages[i];
  258. if (message.role === 'assistant' && message.timings) {
  259. const restoredState = await this.parseCompletionTimingData({
  260. prompt_n: message.timings.prompt_n || 0,
  261. predicted_n: message.timings.predicted_n || 0,
  262. predicted_per_second:
  263. message.timings.predicted_n && message.timings.predicted_ms
  264. ? (message.timings.predicted_n / message.timings.predicted_ms) * 1000
  265. : 0,
  266. cache_n: message.timings.cache_n || 0
  267. });
  268. if (restoredState) {
  269. this.lastKnownState = restoredState;
  270. return restoredState;
  271. }
  272. }
  273. }
  274. } catch (error) {
  275. console.warn('Failed to restore timing data from messages:', error);
  276. }
  277. return null;
  278. }
  279. }
  280. export const slotsService = new SlotsService();