|
|
@@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) {
|
|
|
const decoder = new TextDecoder();
|
|
|
|
|
|
let content = "";
|
|
|
+ let leftover = ""; // Buffer for partially read lines
|
|
|
|
|
|
try {
|
|
|
let cont = true;
|
|
|
@@ -53,29 +54,47 @@ export async function* llama(prompt, params = {}, config = {}) {
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
- // sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
|
|
|
- // mainly care about the data: key here, which we expect as json
|
|
|
- const text = decoder.decode(result.value);
|
|
|
+ // Add any leftover data to the current chunk of data
|
|
|
+ const text = leftover + decoder.decode(result.value);
|
|
|
|
|
|
- // parse all sse events and add them to result
|
|
|
- const regex = /^(\S+):\s(.*)$/gm;
|
|
|
- for (const match of text.matchAll(regex)) {
|
|
|
- result[match[1]] = match[2]
|
|
|
- }
|
|
|
+ // Check if the last character is a line break
|
|
|
+ const endsWithLineBreak = text.endsWith('\n');
|
|
|
|
|
|
- // since we know this is llama.cpp, let's just decode the json in data
|
|
|
- result.data = JSON.parse(result.data);
|
|
|
- content += result.data.content;
|
|
|
+ // Split the text into lines
|
|
|
+ let lines = text.split('\n');
|
|
|
|
|
|
- // yield
|
|
|
- yield result;
|
|
|
+ // If the text doesn't end with a line break, then the last line is incomplete
|
|
|
+ // Store it in leftover to be added to the next chunk of data
|
|
|
+ if (!endsWithLineBreak) {
|
|
|
+ leftover = lines.pop();
|
|
|
+ } else {
|
|
|
+ leftover = ""; // Reset leftover if we have a line break at the end
|
|
|
+ }
|
|
|
|
|
|
- // if we got a stop token from server, we will break here
|
|
|
- if (result.data.stop) {
|
|
|
- if (result.data.generation_settings) {
|
|
|
- generation_settings = result.data.generation_settings;
|
|
|
+ // Parse all sse events and add them to result
|
|
|
+ const regex = /^(\S+):\s(.*)$/gm;
|
|
|
+ for (const line of lines) {
|
|
|
+ const match = regex.exec(line);
|
|
|
+ if (match) {
|
|
|
+ result[match[1]] = match[2]
|
|
|
+ // since we know this is llama.cpp, let's just decode the json in data
|
|
|
+ if (result.data) {
|
|
|
+ result.data = JSON.parse(result.data);
|
|
|
+ content += result.data.content;
|
|
|
+
|
|
|
+ // yield
|
|
|
+ yield result;
|
|
|
+
|
|
|
+ // if we got a stop token from server, we will break here
|
|
|
+ if (result.data.stop) {
|
|
|
+ if (result.data.generation_settings) {
|
|
|
+ generation_settings = result.data.generation_settings;
|
|
|
+ }
|
|
|
+ cont = false;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
- break;
|
|
|
}
|
|
|
}
|
|
|
} catch (e) {
|