completion.js 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. const paramDefaults = {
  2. stream: true,
  3. n_predict: 500,
  4. temperature: 0.2,
  5. stop: ["</s>"]
  6. };
  7. let generation_settings = null;
  8. // Completes the prompt as a generator. Recommended for most use cases.
  9. //
  10. // Example:
  11. //
  12. // import { llama } from '/completion.js'
  13. //
  14. // const request = llama("Tell me a joke", {n_predict: 800})
  15. // for await (const chunk of request) {
  16. // document.write(chunk.data.content)
  17. // }
  18. //
  19. export async function* llama(prompt, params = {}, config = {}) {
  20. let controller = config.controller;
  21. if (!controller) {
  22. controller = new AbortController();
  23. }
  24. const completionParams = { ...paramDefaults, ...params, prompt };
  25. const response = await fetch("/completion", {
  26. method: 'POST',
  27. body: JSON.stringify(completionParams),
  28. headers: {
  29. 'Connection': 'keep-alive',
  30. 'Content-Type': 'application/json',
  31. 'Accept': 'text/event-stream'
  32. },
  33. signal: controller.signal,
  34. });
  35. const reader = response.body.getReader();
  36. const decoder = new TextDecoder();
  37. let content = "";
  38. try {
  39. let cont = true;
  40. while (cont) {
  41. const result = await reader.read();
  42. if (result.done) {
  43. break;
  44. }
  45. // sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
  46. // mainly care about the data: key here, which we expect as json
  47. const text = decoder.decode(result.value);
  48. // parse all sse events and add them to result
  49. const regex = /^(\S+):\s(.*)$/gm;
  50. for (const match of text.matchAll(regex)) {
  51. result[match[1]] = match[2]
  52. }
  53. // since we know this is llama.cpp, let's just decode the json in data
  54. result.data = JSON.parse(result.data);
  55. content += result.data.content;
  56. // yield
  57. yield result;
  58. // if we got a stop token from server, we will break here
  59. if (result.data.stop) {
  60. if (result.data.generation_settings) {
  61. generation_settings = result.data.generation_settings;
  62. }
  63. break;
  64. }
  65. }
  66. } catch (e) {
  67. if (e.name !== 'AbortError') {
  68. console.error("llama error: ", e);
  69. }
  70. throw e;
  71. }
  72. finally {
  73. controller.abort();
  74. }
  75. return content;
  76. }
  77. // Call llama, return an event target that you can subcribe to
  78. //
  79. // Example:
  80. //
  81. // import { llamaEventTarget } from '/completion.js'
  82. //
  83. // const conn = llamaEventTarget(prompt)
  84. // conn.addEventListener("message", (chunk) => {
  85. // document.write(chunk.detail.content)
  86. // })
  87. //
  88. export const llamaEventTarget = (prompt, params = {}, config = {}) => {
  89. const eventTarget = new EventTarget();
  90. (async () => {
  91. let content = "";
  92. for await (const chunk of llama(prompt, params, config)) {
  93. if (chunk.data) {
  94. content += chunk.data.content;
  95. eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
  96. }
  97. if (chunk.data.generation_settings) {
  98. eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
  99. }
  100. if (chunk.data.timings) {
  101. eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
  102. }
  103. }
  104. eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
  105. })();
  106. return eventTarget;
  107. }
  108. // Call llama, return a promise that resolves to the completed text. This does not support streaming
  109. //
  110. // Example:
  111. //
  112. // llamaPromise(prompt).then((content) => {
  113. // document.write(content)
  114. // })
  115. //
  116. // or
  117. //
  118. // const content = await llamaPromise(prompt)
  119. // document.write(content)
  120. //
  121. export const llamaPromise = (prompt, params = {}, config = {}) => {
  122. return new Promise(async (resolve, reject) => {
  123. let content = "";
  124. try {
  125. for await (const chunk of llama(prompt, params, config)) {
  126. content += chunk.data.content;
  127. }
  128. resolve(content);
  129. } catch (error) {
  130. reject(error);
  131. }
  132. });
  133. };
  134. /**
  135. * (deprecated)
  136. */
  137. export const llamaComplete = async (params, controller, callback) => {
  138. for await (const chunk of llama(params.prompt, params, { controller })) {
  139. callback(chunk);
  140. }
  141. }
  142. // Get the model info from the server. This is useful for getting the context window and so on.
  143. export const llamaModelInfo = async () => {
  144. if (!generation_settings) {
  145. generation_settings = await fetch("/model.json").then(r => r.json());
  146. }
  147. return generation_settings;
  148. }