completion.js 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. const paramDefaults = {
  2. stream: true,
  3. n_predict: 500,
  4. temperature: 0.2,
  5. stop: ["</s>"]
  6. };
  7. let generation_settings = null;
  8. // Completes the prompt as a generator. Recommended for most use cases.
  9. //
  10. // Example:
  11. //
  12. // import { llama } from '/completion.js'
  13. //
  14. // const request = llama("Tell me a joke", {n_predict: 800})
  15. // for await (const chunk of request) {
  16. // document.write(chunk.data.content)
  17. // }
  18. //
  19. export async function* llama(prompt, params = {}, config = {}) {
  20. let controller = config.controller;
  21. const api_url = config.api_url?.replace(/\/+$/, '') || "";
  22. if (!controller) {
  23. controller = new AbortController();
  24. }
  25. const completionParams = { ...paramDefaults, ...params, prompt };
  26. const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
  27. method: 'POST',
  28. body: JSON.stringify(completionParams),
  29. headers: {
  30. 'Connection': 'keep-alive',
  31. 'Content-Type': 'application/json',
  32. 'Accept': 'text/event-stream',
  33. ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
  34. },
  35. signal: controller.signal,
  36. });
  37. const reader = response.body.getReader();
  38. const decoder = new TextDecoder();
  39. let content = "";
  40. let leftover = ""; // Buffer for partially read lines
  41. try {
  42. let cont = true;
  43. while (cont) {
  44. const result = await reader.read();
  45. if (result.done) {
  46. break;
  47. }
  48. // Add any leftover data to the current chunk of data
  49. const text = leftover + decoder.decode(result.value);
  50. // Check if the last character is a line break
  51. const endsWithLineBreak = text.endsWith('\n');
  52. // Split the text into lines
  53. let lines = text.split('\n');
  54. // If the text doesn't end with a line break, then the last line is incomplete
  55. // Store it in leftover to be added to the next chunk of data
  56. if (!endsWithLineBreak) {
  57. leftover = lines.pop();
  58. } else {
  59. leftover = ""; // Reset leftover if we have a line break at the end
  60. }
  61. // Parse all sse events and add them to result
  62. const regex = /^(\S+):\s(.*)$/gm;
  63. for (const line of lines) {
  64. const match = regex.exec(line);
  65. if (match) {
  66. result[match[1]] = match[2];
  67. if (result.data === '[DONE]') {
  68. cont = false;
  69. break;
  70. }
  71. // since we know this is llama.cpp, let's just decode the json in data
  72. if (result.data) {
  73. result.data = JSON.parse(result.data);
  74. content += result.data.content;
  75. // yield
  76. yield result;
  77. // if we got a stop token from server, we will break here
  78. if (result.data.stop) {
  79. if (result.data.generation_settings) {
  80. generation_settings = result.data.generation_settings;
  81. }
  82. cont = false;
  83. break;
  84. }
  85. }
  86. if (result.error) {
  87. try {
  88. result.error = JSON.parse(result.error);
  89. if (result.error.message.includes('slot unavailable')) {
  90. // Throw an error to be caught by upstream callers
  91. throw new Error('slot unavailable');
  92. } else {
  93. console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
  94. }
  95. } catch(e) {
  96. console.error(`llama.cpp error ${result.error}`)
  97. }
  98. }
  99. }
  100. }
  101. }
  102. } catch (e) {
  103. if (e.name !== 'AbortError') {
  104. console.error("llama error: ", e);
  105. }
  106. throw e;
  107. }
  108. finally {
  109. controller.abort();
  110. }
  111. return content;
  112. }
  113. // Call llama, return an event target that you can subscribe to
  114. //
  115. // Example:
  116. //
  117. // import { llamaEventTarget } from '/completion.js'
  118. //
  119. // const conn = llamaEventTarget(prompt)
  120. // conn.addEventListener("message", (chunk) => {
  121. // document.write(chunk.detail.content)
  122. // })
  123. //
  124. export const llamaEventTarget = (prompt, params = {}, config = {}) => {
  125. const eventTarget = new EventTarget();
  126. (async () => {
  127. let content = "";
  128. for await (const chunk of llama(prompt, params, config)) {
  129. if (chunk.data) {
  130. content += chunk.data.content;
  131. eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
  132. }
  133. if (chunk.data.generation_settings) {
  134. eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
  135. }
  136. if (chunk.data.timings) {
  137. eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
  138. }
  139. }
  140. eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
  141. })();
  142. return eventTarget;
  143. }
  144. // Call llama, return a promise that resolves to the completed text. This does not support streaming
  145. //
  146. // Example:
  147. //
  148. // llamaPromise(prompt).then((content) => {
  149. // document.write(content)
  150. // })
  151. //
  152. // or
  153. //
  154. // const content = await llamaPromise(prompt)
  155. // document.write(content)
  156. //
  157. export const llamaPromise = (prompt, params = {}, config = {}) => {
  158. return new Promise(async (resolve, reject) => {
  159. let content = "";
  160. try {
  161. for await (const chunk of llama(prompt, params, config)) {
  162. content += chunk.data.content;
  163. }
  164. resolve(content);
  165. } catch (error) {
  166. reject(error);
  167. }
  168. });
  169. };
  170. /**
  171. * (deprecated)
  172. */
  173. export const llamaComplete = async (params, controller, callback) => {
  174. for await (const chunk of llama(params.prompt, params, { controller })) {
  175. callback(chunk);
  176. }
  177. }
  178. // Get the model info from the server. This is useful for getting the context window and so on.
  179. export const llamaModelInfo = async (config = {}) => {
  180. if (!generation_settings) {
  181. const api_url = config.api_url?.replace(/\/+$/, '') || "";
  182. const props = await fetch(`${api_url}/props`).then(r => r.json());
  183. generation_settings = props.default_generation_settings;
  184. }
  185. return generation_settings;
  186. }