completion.js 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. const paramDefaults = {
  2. stream: true,
  3. temperature: 0.2,
  4. };
  5. let generation_settings = null;
  6. export class CompletionError extends Error {
  7. constructor(message, name, data) {
  8. super(message);
  9. this.name = name;
  10. }
  11. };
  12. // Completes the prompt as a generator. Recommended for most use cases.
  13. //
  14. // Example:
  15. //
  16. // import { llama } from '/completion.js'
  17. //
  18. // const request = llama("Tell me a joke", {n_predict: 800})
  19. // for await (const chunk of request) {
  20. // document.write(chunk.data.content)
  21. // }
  22. //
  23. export async function* llama(prompt, params = {}, config = {}) {
  24. let controller = config.controller;
  25. const api_url = config.api_url?.replace(/\/+$/, '') || "";
  26. if (!controller) {
  27. controller = new AbortController();
  28. }
  29. const completionParams = { ...paramDefaults, ...params, prompt };
  30. const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
  31. method: 'POST',
  32. body: JSON.stringify(completionParams),
  33. headers: {
  34. 'Connection': 'keep-alive',
  35. 'Content-Type': 'application/json',
  36. 'Accept': 'text/event-stream',
  37. ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
  38. },
  39. signal: controller.signal,
  40. });
  41. const status = response.status;
  42. if (status !== 200) {
  43. try {
  44. const body = await response.json();
  45. if (body && body.error && body.error.message) {
  46. throw new CompletionError(body.error.message, 'ServerError');
  47. }
  48. } catch (err) {
  49. throw new CompletionError(err.message, 'ServerError');
  50. }
  51. }
  52. const reader = response.body.getReader();
  53. const decoder = new TextDecoder();
  54. let content = "";
  55. let leftover = ""; // Buffer for partially read lines
  56. try {
  57. let cont = true;
  58. while (cont) {
  59. const result = await reader.read();
  60. if (result.done) {
  61. break;
  62. }
  63. // Add any leftover data to the current chunk of data
  64. const text = leftover + decoder.decode(result.value);
  65. // Check if the last character is a line break
  66. const endsWithLineBreak = text.endsWith('\n');
  67. // Split the text into lines
  68. let lines = text.split('\n');
  69. // If the text doesn't end with a line break, then the last line is incomplete
  70. // Store it in leftover to be added to the next chunk of data
  71. if (!endsWithLineBreak) {
  72. leftover = lines.pop();
  73. } else {
  74. leftover = ""; // Reset leftover if we have a line break at the end
  75. }
  76. // Parse all sse events and add them to result
  77. const regex = /^(\S+):\s(.*)$/gm;
  78. for (const line of lines) {
  79. const match = regex.exec(line);
  80. if (match) {
  81. result[match[1]] = match[2];
  82. if (result.data === '[DONE]') {
  83. cont = false;
  84. break;
  85. }
  86. // since we know this is llama.cpp, let's just decode the json in data
  87. if (result.data) {
  88. result.data = JSON.parse(result.data);
  89. content += result.data.content;
  90. // yield
  91. yield result;
  92. // if we got a stop token from server, we will break here
  93. if (result.data.stop) {
  94. if (result.data.generation_settings) {
  95. generation_settings = result.data.generation_settings;
  96. }
  97. cont = false;
  98. break;
  99. }
  100. }
  101. if (result.error) {
  102. try {
  103. result.error = JSON.parse(result.error);
  104. if (result.error.message.includes('slot unavailable')) {
  105. // Throw an error to be caught by upstream callers
  106. throw new Error('slot unavailable');
  107. } else {
  108. console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
  109. }
  110. } catch(e) {
  111. console.error(`llama.cpp error ${result.error}`)
  112. }
  113. }
  114. }
  115. }
  116. }
  117. } catch (e) {
  118. if (e.name !== 'AbortError') {
  119. console.error("llama error: ", e);
  120. }
  121. throw e;
  122. }
  123. finally {
  124. controller.abort();
  125. }
  126. return content;
  127. }
  128. // Call llama, return an event target that you can subscribe to
  129. //
  130. // Example:
  131. //
  132. // import { llamaEventTarget } from '/completion.js'
  133. //
  134. // const conn = llamaEventTarget(prompt)
  135. // conn.addEventListener("message", (chunk) => {
  136. // document.write(chunk.detail.content)
  137. // })
  138. //
  139. export const llamaEventTarget = (prompt, params = {}, config = {}) => {
  140. const eventTarget = new EventTarget();
  141. (async () => {
  142. let content = "";
  143. for await (const chunk of llama(prompt, params, config)) {
  144. if (chunk.data) {
  145. content += chunk.data.content;
  146. eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
  147. }
  148. if (chunk.data.generation_settings) {
  149. eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
  150. }
  151. if (chunk.data.timings) {
  152. eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
  153. }
  154. }
  155. eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
  156. })();
  157. return eventTarget;
  158. }
  159. // Call llama, return a promise that resolves to the completed text. This does not support streaming
  160. //
  161. // Example:
  162. //
  163. // llamaPromise(prompt).then((content) => {
  164. // document.write(content)
  165. // })
  166. //
  167. // or
  168. //
  169. // const content = await llamaPromise(prompt)
  170. // document.write(content)
  171. //
  172. export const llamaPromise = (prompt, params = {}, config = {}) => {
  173. return new Promise(async (resolve, reject) => {
  174. let content = "";
  175. try {
  176. for await (const chunk of llama(prompt, params, config)) {
  177. content += chunk.data.content;
  178. }
  179. resolve(content);
  180. } catch (error) {
  181. reject(error);
  182. }
  183. });
  184. };
  185. /**
  186. * (deprecated)
  187. */
  188. export const llamaComplete = async (params, controller, callback) => {
  189. for await (const chunk of llama(params.prompt, params, { controller })) {
  190. callback(chunk);
  191. }
  192. }
  193. // Get the model info from the server. This is useful for getting the context window and so on.
  194. export const llamaModelInfo = async (config = {}) => {
  195. if (!generation_settings) {
  196. const api_url = config.api_url?.replace(/\/+$/, '') || "";
  197. const props = await fetch(`${api_url}/props`).then(r => r.json());
  198. generation_settings = props.default_generation_settings;
  199. }
  200. return generation_settings;
  201. }