|
@@ -725,12 +725,12 @@ struct server_context {
|
|
|
return nullptr;
|
|
return nullptr;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- server_slot * get_available_slot(const std::string & prompt) {
|
|
|
|
|
|
|
+ server_slot * get_available_slot(const server_task & task) {
|
|
|
server_slot * ret = nullptr;
|
|
server_slot * ret = nullptr;
|
|
|
|
|
|
|
|
// find the slot that has at least n% prompt similarity
|
|
// find the slot that has at least n% prompt similarity
|
|
|
- if (ret == nullptr && slot_prompt_similarity != 0.0f && !prompt.empty()) {
|
|
|
|
|
- int max_lcp_len = 0;
|
|
|
|
|
|
|
+ if (ret == nullptr && slot_prompt_similarity != 0.0f) {
|
|
|
|
|
+ int max_lcs_len = 0;
|
|
|
float similarity = 0;
|
|
float similarity = 0;
|
|
|
|
|
|
|
|
for (server_slot & slot : slots) {
|
|
for (server_slot & slot : slots) {
|
|
@@ -740,25 +740,25 @@ struct server_context {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// skip the slot if it does not contains cached tokens
|
|
// skip the slot if it does not contains cached tokens
|
|
|
- if (slot.prompt_tokens.empty()) {
|
|
|
|
|
|
|
+ if (slot.cache_tokens.empty()) {
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // length of the Longest Common Prefix between the current slot's prompt and the input prompt
|
|
|
|
|
- int lcp_len = longest_common_prefix(slot.cache_tokens, slot.prompt_tokens);
|
|
|
|
|
|
|
+ // length of the Longest Common Subsequence between the current slot's prompt and the input prompt
|
|
|
|
|
+ int lcs_len = longest_common_subsequence(slot.cache_tokens, task.prompt_tokens);
|
|
|
|
|
|
|
|
- // fraction of the common substring length compared to the current slot's prompt length
|
|
|
|
|
- similarity = static_cast<float>(lcp_len) / static_cast<int>(slot.prompt_tokens.size());
|
|
|
|
|
|
|
+ // fraction of the common subsequence length compared to the current slot's prompt length
|
|
|
|
|
+ similarity = static_cast<float>(lcs_len) / static_cast<int>(slot.cache_tokens.size());
|
|
|
|
|
|
|
|
// select the current slot if the criteria match
|
|
// select the current slot if the criteria match
|
|
|
- if (lcp_len > max_lcp_len && similarity > slot_prompt_similarity) {
|
|
|
|
|
- max_lcp_len = lcp_len;
|
|
|
|
|
|
|
+ if (lcs_len > max_lcs_len && similarity > slot_prompt_similarity) {
|
|
|
|
|
+ max_lcs_len = lcs_len;
|
|
|
ret = &slot;
|
|
ret = &slot;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if (ret != nullptr) {
|
|
if (ret != nullptr) {
|
|
|
- SLT_DBG(*ret, "selected slot by lcp similarity, max_lcp_len = %d, similarity = %f\n", max_lcp_len, similarity);
|
|
|
|
|
|
|
+ SLT_DBG(*ret, "selected slot by lcs similarity, max_lcs_len = %d, similarity = %f\n", max_lcs_len, similarity);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -1514,18 +1514,7 @@ struct server_context {
|
|
|
{
|
|
{
|
|
|
const int id_slot = json_value(task.data, "id_slot", -1);
|
|
const int id_slot = json_value(task.data, "id_slot", -1);
|
|
|
|
|
|
|
|
- server_slot * slot;
|
|
|
|
|
-
|
|
|
|
|
- if (id_slot != -1) {
|
|
|
|
|
- slot = get_slot_by_id(id_slot);
|
|
|
|
|
- } else {
|
|
|
|
|
- std::string prompt;
|
|
|
|
|
- if (task.data.contains("prompt") && task.data.at("prompt").is_string()) {
|
|
|
|
|
- prompt = json_value(task.data, "prompt", std::string());
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- slot = get_available_slot(prompt);
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ server_slot * slot = id_slot != -1 ? get_slot_by_id(id_slot) : get_available_slot(task);
|
|
|
|
|
|
|
|
if (slot == nullptr) {
|
|
if (slot == nullptr) {
|
|
|
// if no slot is available, we defer this task for processing later
|
|
// if no slot is available, we defer this task for processing later
|