пре 1 година · 87fb5b4234
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -15776,7 +15776,7 @@ static void ggml_sycl_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
 
															 #ifdef GGML_SYCL_FORCE_DMMV
														
 
															             const bool use_mul_mat_vec_q = false;
														
 
															 #else
														
 
															-            bool use_mul_mat_vec_q = min_compute_capability >= VER_4VEC && ggml_is_quantized(src0->type) && ggml_nrows(src1) == 1;
														
 
															+            bool use_mul_mat_vec_q = min_compute_capability >= VER_4VEC && ggml_is_quantized(src0->type);
														
 
															             use_mul_mat_vec_q = use_mul_mat_vec_q ||
														
 
															                 (src0->type == GGML_TYPE_IQ2_XXS) || (src0->type == GGML_TYPE_IQ2_XS) || (src0->type == GGML_TYPE_IQ2_S) ||
														
 
															                 (src0->type == GGML_TYPE_IQ3_XXS) || (src0->type == GGML_TYPE_IQ3_S) ||
														
@@ -15787,7 +15787,6 @@ static void ggml_sycl_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
 
															 #endif // GGML_SYCL_FORCE_DMMV
														
 
															             if (use_mul_mat_vec_q) {
														
 
															-                // NOTE: this kernel does not support ggml_nrows(src1) > 1
														
 
															                 // GGML_SYCL_DEBUG("ggml_sycl_mul_mat ggml_sycl_op_mul_mat_vec_q path\n");
														
 
															                 ggml_sycl_op_mul_mat(src0, src1, dst, ggml_sycl_op_mul_mat_vec_q, true);
														
 
															             } else {