1 an în urmă · 4ffc7a17d4
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1163,13 +1163,30 @@ struct llama_server_context
 
				         task.multitask_id = multitask_id;
			
 
				 
			
 
				         // when a completion task's prompt array is not a singleton, we split it into multiple requests
			
 
				-        if (task.data.count("prompt") && task.data.at("prompt").size() > 1)
			
 
				-        {
			
 
				-            split_multiprompt_task(task_id, task);
			
 
				-        }
			
 
				-
			
 
				         // otherwise, it's a single-prompt task, we actually queue it
			
 
				-        queue_tasks.post(task);
			
 
				+        // if there's numbers in the prompt array it will be treated as an array of tokens
			
 
				+        if (task.data.count("prompt") != 0 && task.data.at("prompt").size() > 1) {
			
 
				+            bool numbers = false;
			
 
				+            for (const auto& e : task.data.at("prompt")) {
			
 
				+                if (e.is_number()) {
			
 
				+                    numbers = true;
			
 
				+                    break;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            // NOTE: split_multiprompt_task() does not handle a mix of strings and numbers,
			
 
				+            // it will completely stall the server. I don't know where the bug for this is.
			
 
				+            //
			
 
				+            // if there are numbers, it needs to be treated like a single prompt,
			
 
				+            // queue_tasks handles a mix of strings and numbers just fine.
			
 
				+            if (numbers) {
			
 
				+                queue_tasks.post(task);
			
 
				+            } else {
			
 
				+                split_multiprompt_task(task_id, task);
			
 
				+            }
			
 
				+        } else {
			
 
				+            queue_tasks.post(task);
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				     // for multiple images processing
			
@@ -1251,7 +1268,10 @@ struct llama_server_context
 
				     void split_multiprompt_task(int multitask_id, task_server& multiprompt_task)
			
 
				     {
			
 
				         int prompt_count = multiprompt_task.data.at("prompt").size();
			
 
				-        assert(prompt_count > 1);
			
 
				+        if (prompt_count <= 1) {
			
 
				+            send_error(multiprompt_task, "error while handling multiple prompts");
			
 
				+            return;
			
 
				+        }
			
 
				 
			
 
				         // generate all the ID for subtask
			
 
				         std::vector<int> subtask_ids(prompt_count);