completion.js 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. const paramDefaults = {
  2. stream: true,
  3. n_predict: 500,
  4. temperature: 0.2,
  5. stop: ["</s>"]
  6. };
  7. let generation_settings = null;
  8. // Completes the prompt as a generator. Recommended for most use cases.
  9. //
  10. // Example:
  11. //
  12. // import { llama } from '/completion.js'
  13. //
  14. // const request = llama("Tell me a joke", {n_predict: 800})
  15. // for await (const chunk of request) {
  16. // document.write(chunk.data.content)
  17. // }
  18. //
  19. export async function* llama(prompt, params = {}, config = {}) {
  20. let controller = config.controller;
  21. if (!controller) {
  22. controller = new AbortController();
  23. }
  24. const completionParams = { ...paramDefaults, ...params, prompt };
  25. const response = await fetch("/completion", {
  26. method: 'POST',
  27. body: JSON.stringify(completionParams),
  28. headers: {
  29. 'Connection': 'keep-alive',
  30. 'Content-Type': 'application/json',
  31. 'Accept': 'text/event-stream'
  32. },
  33. signal: controller.signal,
  34. });
  35. const reader = response.body.getReader();
  36. const decoder = new TextDecoder();
  37. let content = "";
  38. let leftover = ""; // Buffer for partially read lines
  39. try {
  40. let cont = true;
  41. while (cont) {
  42. const result = await reader.read();
  43. if (result.done) {
  44. break;
  45. }
  46. // Add any leftover data to the current chunk of data
  47. const text = leftover + decoder.decode(result.value);
  48. // Check if the last character is a line break
  49. const endsWithLineBreak = text.endsWith('\n');
  50. // Split the text into lines
  51. let lines = text.split('\n');
  52. // If the text doesn't end with a line break, then the last line is incomplete
  53. // Store it in leftover to be added to the next chunk of data
  54. if (!endsWithLineBreak) {
  55. leftover = lines.pop();
  56. } else {
  57. leftover = ""; // Reset leftover if we have a line break at the end
  58. }
  59. // Parse all sse events and add them to result
  60. const regex = /^(\S+):\s(.*)$/gm;
  61. for (const line of lines) {
  62. const match = regex.exec(line);
  63. if (match) {
  64. result[match[1]] = match[2]
  65. // since we know this is llama.cpp, let's just decode the json in data
  66. if (result.data) {
  67. result.data = JSON.parse(result.data);
  68. content += result.data.content;
  69. // yield
  70. yield result;
  71. // if we got a stop token from server, we will break here
  72. if (result.data.stop) {
  73. if (result.data.generation_settings) {
  74. generation_settings = result.data.generation_settings;
  75. }
  76. cont = false;
  77. break;
  78. }
  79. }
  80. }
  81. }
  82. }
  83. } catch (e) {
  84. if (e.name !== 'AbortError') {
  85. console.error("llama error: ", e);
  86. }
  87. throw e;
  88. }
  89. finally {
  90. controller.abort();
  91. }
  92. return content;
  93. }
  94. // Call llama, return an event target that you can subcribe to
  95. //
  96. // Example:
  97. //
  98. // import { llamaEventTarget } from '/completion.js'
  99. //
  100. // const conn = llamaEventTarget(prompt)
  101. // conn.addEventListener("message", (chunk) => {
  102. // document.write(chunk.detail.content)
  103. // })
  104. //
  105. export const llamaEventTarget = (prompt, params = {}, config = {}) => {
  106. const eventTarget = new EventTarget();
  107. (async () => {
  108. let content = "";
  109. for await (const chunk of llama(prompt, params, config)) {
  110. if (chunk.data) {
  111. content += chunk.data.content;
  112. eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
  113. }
  114. if (chunk.data.generation_settings) {
  115. eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
  116. }
  117. if (chunk.data.timings) {
  118. eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
  119. }
  120. }
  121. eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
  122. })();
  123. return eventTarget;
  124. }
  125. // Call llama, return a promise that resolves to the completed text. This does not support streaming
  126. //
  127. // Example:
  128. //
  129. // llamaPromise(prompt).then((content) => {
  130. // document.write(content)
  131. // })
  132. //
  133. // or
  134. //
  135. // const content = await llamaPromise(prompt)
  136. // document.write(content)
  137. //
  138. export const llamaPromise = (prompt, params = {}, config = {}) => {
  139. return new Promise(async (resolve, reject) => {
  140. let content = "";
  141. try {
  142. for await (const chunk of llama(prompt, params, config)) {
  143. content += chunk.data.content;
  144. }
  145. resolve(content);
  146. } catch (error) {
  147. reject(error);
  148. }
  149. });
  150. };
  151. /**
  152. * (deprecated)
  153. */
  154. export const llamaComplete = async (params, controller, callback) => {
  155. for await (const chunk of llama(params.prompt, params, { controller })) {
  156. callback(chunk);
  157. }
  158. }
  159. // Get the model info from the server. This is useful for getting the context window and so on.
  160. export const llamaModelInfo = async () => {
  161. if (!generation_settings) {
  162. generation_settings = await fetch("/model.json").then(r => r.json());
  163. }
  164. return generation_settings;
  165. }