|
|
@@ -261,15 +261,29 @@ bool llama_batch_allocr::init(
|
|
|
|
|
|
const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;
|
|
|
|
|
|
- if (p0 >= 0 && p0 >= seq_pos_min(s)) {
|
|
|
- LLAMA_LOG_ERROR(
|
|
|
- "%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n"
|
|
|
- " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
|
|
|
- " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
|
|
|
- " for M-RoPE, it is required that the position satisfies: X < Y\n",
|
|
|
- __func__, s, s, p0, s, seq_pos_min(s));
|
|
|
+ if (batch.token) {
|
|
|
+ if (p0 >= 0 && p0 >= seq_pos_min(s)) {
|
|
|
+ LLAMA_LOG_ERROR(
|
|
|
+ "%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n"
|
|
|
+ " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
|
|
|
+ " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
|
|
|
+ " for M-RoPE, it is required that the position satisfies: X < Y\n",
|
|
|
+ __func__, s, s, p0, s, seq_pos_min(s));
|
|
|
|
|
|
- return false;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ // embedding inputs can have overlapping positions
|
|
|
+ if (p0 >= 0 && p0 > seq_pos_min(s)) {
|
|
|
+ LLAMA_LOG_ERROR(
|
|
|
+ "%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n"
|
|
|
+ " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
|
|
|
+ " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
|
|
|
+ " for M-RoPE, it is required that the position satisfies: X <= Y\n",
|
|
|
+ __func__, s, s, p0, s, seq_pos_min(s));
|
|
|
+
|
|
|
+ return false;
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
} else {
|