hai 11 meses · be5ef7963f
--- a/ggml/src/ggml-cuda/softmax.cu
+++ b/ggml/src/ggml-cuda/softmax.cu
@@ -13,6 +13,12 @@ __device__ float __forceinline__ t2f32<half>(half val) {
 
															     return __half2float(val);
														
 
															 }
														
 
															+// When ncols_template == 0 the bounds for the loops in this function are not known and can't be unrolled.
														
 
															+// As we want to keep pragma unroll for all other cases we supress the clang transformation warning here.
														
 
															+#ifdef __clang__
														
 
															+#pragma clang diagnostic push
														
 
															+#pragma clang diagnostic ignored "-Wpass-failed"
														
 
															+#endif
														
 
															 template <bool use_shared, int ncols_template, int block_size_template, typename T>
														
 
															 static __global__ void soft_max_f32(
														
 
															         const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y,
														
@@ -118,6 +124,9 @@ static __global__ void soft_max_f32(
 
															         dst[col] = vals[col] * inv_sum;
														
 
															     }
														
 
															 }
														
 
															+#ifdef __clang__
														
 
															+#pragma clang diagnostic pop
														
 
															+#endif
														
 
															 static __global__ void soft_max_back_f32(
														
 
															         const float * grad, const float * dstf, float * dst, const int ncols, const float scale) {