|
@@ -138,6 +138,9 @@ struct slot_params {
|
|
|
std::string oaicompat_cmpl_id;
|
|
std::string oaicompat_cmpl_id;
|
|
|
common_chat_syntax oaicompat_chat_syntax;
|
|
common_chat_syntax oaicompat_chat_syntax;
|
|
|
|
|
|
|
|
|
|
+ // Embeddings
|
|
|
|
|
+ int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm)
|
|
|
|
|
+
|
|
|
json to_json() const {
|
|
json to_json() const {
|
|
|
std::vector<std::string> samplers;
|
|
std::vector<std::string> samplers;
|
|
|
samplers.reserve(sampling.samplers.size());
|
|
samplers.reserve(sampling.samplers.size());
|
|
@@ -2601,7 +2604,7 @@ struct server_context {
|
|
|
|
|
|
|
|
// normalize only when there is pooling
|
|
// normalize only when there is pooling
|
|
|
if (llama_pooling_type(slot.ctx) != LLAMA_POOLING_TYPE_NONE) {
|
|
if (llama_pooling_type(slot.ctx) != LLAMA_POOLING_TYPE_NONE) {
|
|
|
- common_embd_normalize(embd, embd_res.data(), n_embd, 2);
|
|
|
|
|
|
|
+ common_embd_normalize(embd, embd_res.data(), n_embd, slot.params.embd_normalize);
|
|
|
res->embedding.push_back(embd_res);
|
|
res->embedding.push_back(embd_res);
|
|
|
break;
|
|
break;
|
|
|
} else {
|
|
} else {
|
|
@@ -4614,6 +4617,14 @@ int main(int argc, char ** argv) {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ int embd_normalize = 2; // default to Euclidean/L2 norm
|
|
|
|
|
+ if (body.count("embd_normalize") != 0) {
|
|
|
|
|
+ embd_normalize = body.at("embd_normalize");
|
|
|
|
|
+ if (llama_pooling_type(ctx_server.ctx) == LLAMA_POOLING_TYPE_NONE) {
|
|
|
|
|
+ SRV_DBG("embd_normalize is not supported by pooling type %d, ignoring it\n", llama_pooling_type(ctx_server.ctx));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
// create and queue the task
|
|
// create and queue the task
|
|
|
json responses = json::array();
|
|
json responses = json::array();
|
|
|
bool error = false;
|
|
bool error = false;
|
|
@@ -4629,6 +4640,7 @@ int main(int argc, char ** argv) {
|
|
|
|
|
|
|
|
// OAI-compat
|
|
// OAI-compat
|
|
|
task.params.oaicompat = oaicompat;
|
|
task.params.oaicompat = oaicompat;
|
|
|
|
|
+ task.params.embd_normalize = embd_normalize;
|
|
|
|
|
|
|
|
tasks.push_back(std::move(task));
|
|
tasks.push_back(std::move(task));
|
|
|
}
|
|
}
|