6 hónapja · 62af464227
--- a/src/llama-batch.cpp
+++ b/src/llama-batch.cpp
@@ -244,11 +244,13 @@ bool llama_batch_allocr::init(
 
				             continue;
			
 
				         }
			
 
				 
			
 
				-        if (memory) {
			
 
				+        const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;
			
 
				+
			
 
				+        if (p0 >= 0) {
			
 
				             bool ok = true;
			
 
				 
			
 
				             if (batch.token) {
			
 
				-                if (seq_pos_min(s) != memory->seq_pos_max(s) + 1) {
			
 
				+                if (seq_pos_min(s) != p0 + 1) {
			
 
				                     ok = false;
			
 
				                 }
			
 
				             } else {
			
@@ -256,7 +258,7 @@ bool llama_batch_allocr::init(
 
				 
			
 
				                 // for embeddings (typically used as vision input), we allow them to have repeating positions
			
 
				                 // ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762
			
 
				-                if (seq_pos_min(s) != memory->seq_pos_max(s) && seq_pos_min(s) != memory->seq_pos_max(s) + 1) {
			
 
				+                if (seq_pos_min(s) != p0 && seq_pos_min(s) != p0 + 1) {
			
 
				                     ok = false;
			
 
				                 }
			
 
				             }
			
@@ -267,7 +269,7 @@ bool llama_batch_allocr::init(
 
				                         " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
			
 
				                         " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
			
 
				                         " it is required that the sequence positions remain consecutive: Y = X + 1\n",
			
 
				-                        __func__, s, s, memory->seq_pos_max(s), s, seq_pos_min(s));
			
 
				+                        __func__, s, s, p0, s, seq_pos_min(s));
			
 
				 
			
 
				                 return false;
			
 
				             }