completion.js 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. const paramDefaults = {
  2. stream: true,
  3. n_predict: 500,
  4. temperature: 0.2,
  5. stop: ["</s>"]
  6. };
  7. let generation_settings = null;
  8. // Completes the prompt as a generator. Recommended for most use cases.
  9. //
  10. // Example:
  11. //
  12. // import { llama } from '/completion.js'
  13. //
  14. // const request = llama("Tell me a joke", {n_predict: 800})
  15. // for await (const chunk of request) {
  16. // document.write(chunk.data.content)
  17. // }
  18. //
  19. export async function* llama(prompt, params = {}, config = {}) {
  20. let controller = config.controller;
  21. if (!controller) {
  22. controller = new AbortController();
  23. }
  24. const completionParams = { ...paramDefaults, ...params, prompt };
  25. const response = await fetch("/completion", {
  26. method: 'POST',
  27. body: JSON.stringify(completionParams),
  28. headers: {
  29. 'Connection': 'keep-alive',
  30. 'Content-Type': 'application/json',
  31. 'Accept': 'text/event-stream',
  32. ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
  33. },
  34. signal: controller.signal,
  35. });
  36. const reader = response.body.getReader();
  37. const decoder = new TextDecoder();
  38. let content = "";
  39. let leftover = ""; // Buffer for partially read lines
  40. try {
  41. let cont = true;
  42. while (cont) {
  43. const result = await reader.read();
  44. if (result.done) {
  45. break;
  46. }
  47. // Add any leftover data to the current chunk of data
  48. const text = leftover + decoder.decode(result.value);
  49. // Check if the last character is a line break
  50. const endsWithLineBreak = text.endsWith('\n');
  51. // Split the text into lines
  52. let lines = text.split('\n');
  53. // If the text doesn't end with a line break, then the last line is incomplete
  54. // Store it in leftover to be added to the next chunk of data
  55. if (!endsWithLineBreak) {
  56. leftover = lines.pop();
  57. } else {
  58. leftover = ""; // Reset leftover if we have a line break at the end
  59. }
  60. // Parse all sse events and add them to result
  61. const regex = /^(\S+):\s(.*)$/gm;
  62. for (const line of lines) {
  63. const match = regex.exec(line);
  64. if (match) {
  65. result[match[1]] = match[2]
  66. // since we know this is llama.cpp, let's just decode the json in data
  67. if (result.data) {
  68. result.data = JSON.parse(result.data);
  69. content += result.data.content;
  70. // yield
  71. yield result;
  72. // if we got a stop token from server, we will break here
  73. if (result.data.stop) {
  74. if (result.data.generation_settings) {
  75. generation_settings = result.data.generation_settings;
  76. }
  77. cont = false;
  78. break;
  79. }
  80. }
  81. if (result.error) {
  82. result.error = JSON.parse(result.error);
  83. console.error(`llama.cpp error: ${result.error.content}`);
  84. }
  85. }
  86. }
  87. }
  88. } catch (e) {
  89. if (e.name !== 'AbortError') {
  90. console.error("llama error: ", e);
  91. }
  92. throw e;
  93. }
  94. finally {
  95. controller.abort();
  96. }
  97. return content;
  98. }
  99. // Call llama, return an event target that you can subscribe to
  100. //
  101. // Example:
  102. //
  103. // import { llamaEventTarget } from '/completion.js'
  104. //
  105. // const conn = llamaEventTarget(prompt)
  106. // conn.addEventListener("message", (chunk) => {
  107. // document.write(chunk.detail.content)
  108. // })
  109. //
  110. export const llamaEventTarget = (prompt, params = {}, config = {}) => {
  111. const eventTarget = new EventTarget();
  112. (async () => {
  113. let content = "";
  114. for await (const chunk of llama(prompt, params, config)) {
  115. if (chunk.data) {
  116. content += chunk.data.content;
  117. eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
  118. }
  119. if (chunk.data.generation_settings) {
  120. eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
  121. }
  122. if (chunk.data.timings) {
  123. eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
  124. }
  125. }
  126. eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
  127. })();
  128. return eventTarget;
  129. }
  130. // Call llama, return a promise that resolves to the completed text. This does not support streaming
  131. //
  132. // Example:
  133. //
  134. // llamaPromise(prompt).then((content) => {
  135. // document.write(content)
  136. // })
  137. //
  138. // or
  139. //
  140. // const content = await llamaPromise(prompt)
  141. // document.write(content)
  142. //
  143. export const llamaPromise = (prompt, params = {}, config = {}) => {
  144. return new Promise(async (resolve, reject) => {
  145. let content = "";
  146. try {
  147. for await (const chunk of llama(prompt, params, config)) {
  148. content += chunk.data.content;
  149. }
  150. resolve(content);
  151. } catch (error) {
  152. reject(error);
  153. }
  154. });
  155. };
  156. /**
  157. * (deprecated)
  158. */
  159. export const llamaComplete = async (params, controller, callback) => {
  160. for await (const chunk of llama(params.prompt, params, { controller })) {
  161. callback(chunk);
  162. }
  163. }
  164. // Get the model info from the server. This is useful for getting the context window and so on.
  165. export const llamaModelInfo = async () => {
  166. if (!generation_settings) {
  167. generation_settings = await fetch("/model.json").then(r => r.json());
  168. }
  169. return generation_settings;
  170. }