7 mesi fa · df0c0c7d02
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2994,9 +2994,12 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
 
				             {
			
 
				                 struct ggml_tensor * a = op->src[0];
			
 
				                 struct ggml_tensor * b = op->src[1];
			
 
				-                // for small weight matrices the active device can end up without any rows, don't use row split in those cases
			
 
				-                // this avoids some edge cases (and the performance would not be good anyways)
			
 
				                 if (a->buffer && ggml_backend_buft_is_cuda_split(a->buffer->buft)) {
			
 
				+                    if (a->ne[2] > 1 || a->ne[3] > 1) {
			
 
				+                        return false;
			
 
				+                    }
			
 
				+                    // for small weight matrices the active device can end up without any rows, don't use row split in those cases
			
 
				+                    // this avoids some edge cases (and the performance would not be good anyways)
			
 
				                     ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) a->buffer->buft->context;
			
 
				                     int64_t row_low;
			
 
				                     int64_t row_high;