|
@@ -1974,19 +1974,33 @@ struct server_context_impl {
|
|
|
|
|
|
|
|
if (!slot.can_split()) {
|
|
if (!slot.can_split()) {
|
|
|
if (slot.task->n_tokens() > n_ubatch) {
|
|
if (slot.task->n_tokens() > n_ubatch) {
|
|
|
- send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER);
|
|
|
|
|
|
|
+ send_error(slot,
|
|
|
|
|
+ string_format(
|
|
|
|
|
+ "input (%d tokens) is too large to process. increase the physical batch "
|
|
|
|
|
+ "size (current batch size: %d)",
|
|
|
|
|
+ slot.task->n_tokens(), n_ubatch),
|
|
|
|
|
+ ERROR_TYPE_SERVER);
|
|
|
slot.release();
|
|
slot.release();
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if (slot.task->n_tokens() > slot.n_ctx) {
|
|
if (slot.task->n_tokens() > slot.n_ctx) {
|
|
|
- send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_EXCEED_CONTEXT_SIZE);
|
|
|
|
|
|
|
+ send_error(
|
|
|
|
|
+ slot,
|
|
|
|
|
+ string_format(
|
|
|
|
|
+ "input (%d tokens) is larger than the max context size (%d tokens). skipping",
|
|
|
|
|
+ slot.task->n_tokens(), slot.n_ctx),
|
|
|
|
|
+ ERROR_TYPE_EXCEED_CONTEXT_SIZE);
|
|
|
slot.release();
|
|
slot.release();
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
} else {
|
|
} else {
|
|
|
if (slot.task->n_tokens() >= slot.n_ctx) {
|
|
if (slot.task->n_tokens() >= slot.n_ctx) {
|
|
|
- send_error(slot, "the request exceeds the available context size, try increasing it", ERROR_TYPE_EXCEED_CONTEXT_SIZE);
|
|
|
|
|
|
|
+ send_error(slot,
|
|
|
|
|
+ string_format("request (%d tokens) exceeds the available context size (%d "
|
|
|
|
|
+ "tokens), try increasing it",
|
|
|
|
|
+ slot.task->n_tokens(), slot.n_ctx),
|
|
|
|
|
+ ERROR_TYPE_EXCEED_CONTEXT_SIZE);
|
|
|
slot.release();
|
|
slot.release();
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|