|
|
@@ -3591,13 +3591,13 @@ struct server_context {
|
|
|
// next, batch any pending prompts without exceeding n_batch
|
|
|
if (params_base.cont_batching || batch.n_tokens == 0) {
|
|
|
for (auto & slot : slots) {
|
|
|
+ if (!slot.is_processing()) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
// check if we can batch this slot with the previous one
|
|
|
- if (slot.is_processing()) {
|
|
|
- if (!slot_batched) {
|
|
|
- slot_batched = &slot;
|
|
|
- } else if (!slot_batched->can_batch_with(slot)) {
|
|
|
- continue;
|
|
|
- }
|
|
|
+ if (slot_batched && !slot_batched->can_batch_with(slot)) {
|
|
|
+ continue;
|
|
|
}
|
|
|
|
|
|
// this slot still has a prompt to be processed
|
|
|
@@ -4028,6 +4028,10 @@ struct server_context {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ if (!slot_batched) {
|
|
|
+ slot_batched = &slot;
|
|
|
+ }
|
|
|
+
|
|
|
if (batch.n_tokens >= n_batch) {
|
|
|
break;
|
|
|
}
|