1 jaar geleden · a61a94e543
--- a/llama.cpp
+++ b/llama.cpp
@@ -2475,7 +2475,6 @@ static bool llama_kv_cache_init(
 
				 static bool llama_kv_cache_find_slot(
			
 
				            struct llama_kv_cache & cache,
			
 
				         const struct llama_batch & batch) {
			
 
				-    const uint32_t n_ctx    = cache.size;
			
 
				     const uint32_t n_tokens = batch.n_tokens;
			
 
				 
			
 
				     if (cache.recurrent) {
			
@@ -2526,16 +2525,16 @@ static bool llama_kv_cache_find_slot(
 
				     }
			
 
				     // otherwise, one cell per token.
			
 
				 
			
 
				-    if (n_tokens > n_ctx) {
			
 
				-        LLAMA_LOG_ERROR("%s: n_tokens=%d > n_ctx=%d\n", __func__, n_tokens, n_ctx);
			
 
				+    if (n_tokens > cache.size) {
			
 
				+        LLAMA_LOG_ERROR("%s: n_tokens=%d > cache.size=%d\n", __func__, n_tokens, cache.size);
			
 
				         return false;
			
 
				     }
			
 
				 
			
 
				     uint32_t n_tested = 0;
			
 
				 
			
 
				     while (true) {
			
 
				-        if (cache.head + n_tokens > n_ctx) {
			
 
				-            n_tested += n_ctx - cache.head;
			
 
				+        if (cache.head + n_tokens > cache.size) {
			
 
				+            n_tested += cache.size - cache.head;
			
 
				             cache.head = 0;
			
 
				             continue;
			
 
				         }
			
@@ -2554,7 +2553,7 @@ static bool llama_kv_cache_find_slot(
 
				             break;
			
 
				         }
			
 
				 
			
 
				-        if (n_tested >= n_ctx) {
			
 
				+        if (n_tested >= cache.size) {
			
 
				             //LLAMA_LOG_ERROR("%s: failed to find a slot for %d tokens\n", __func__, n_tokens);
			
 
				             return false;
			
 
				         }