|
|
@@ -464,25 +464,21 @@ static __device__ __forceinline__ half ggml_cuda_hmax(const half a, const half b
|
|
|
}
|
|
|
|
|
|
static __device__ __forceinline__ half2 ggml_cuda_hmax2(const half2 a, const half2 b) {
|
|
|
-#if defined(GGML_USE_HIP) && HIP_VERSION >= 50700000
|
|
|
+#if defined(GGML_USE_HIP)
|
|
|
return half2(__hmax(a.x, b.x), __hmax(a.y, b.y));
|
|
|
-#elif !defined(GGML_USE_HIP) && CUDART_VERSION >= CUDART_HMAX
|
|
|
+#elif CUDART_VERSION >= CUDART_HMAX
|
|
|
return __hmax2(a, b);
|
|
|
-#elif !defined(GGML_USE_HIP)
|
|
|
+#else
|
|
|
half2 ret;
|
|
|
reinterpret_cast<half&>(ret.x) = __float2half(fmaxf( __low2float(a), __low2float(b)));
|
|
|
reinterpret_cast<half&>(ret.y) = __float2half(fmaxf(__high2float(a), __high2float(b)));
|
|
|
return ret;
|
|
|
-#else
|
|
|
- GGML_UNUSED(a);
|
|
|
- GGML_UNUSED(b);
|
|
|
- NO_DEVICE_CODE;
|
|
|
#endif
|
|
|
}
|
|
|
|
|
|
template<int width = WARP_SIZE>
|
|
|
static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
|
|
|
-#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000)
|
|
|
+#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
|
|
|
#pragma unroll
|
|
|
for (int offset = width/2; offset > 0; offset >>= 1) {
|
|
|
x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, width));
|
|
|
@@ -491,7 +487,7 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
|
|
|
#else
|
|
|
GGML_UNUSED(x);
|
|
|
NO_DEVICE_CODE;
|
|
|
-#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000)
|
|
|
+#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
|
|
|
}
|
|
|
|
|
|
#if CUDART_VERSION < CUDART_HMASK
|