5 luni în urmă · 29c8fbe4e0
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -443,7 +443,7 @@ jobs:
 
				 
			
 
				   ubuntu-22-cmake-hip:
			
 
				     runs-on: ubuntu-22.04
			
 
				-    container: rocm/dev-ubuntu-22.04:6.0.2
			
 
				+    container: rocm/dev-ubuntu-22.04:6.1.2
			
 
				 
			
 
				     steps:
			
 
				       - name: Clone
			
@@ -471,16 +471,6 @@ jobs:
 
				             -DGGML_HIP=ON
			
 
				           cmake --build build --config Release -j $(nproc)
			
 
				 
			
 
				-      - name: Build with legacy HIP support
			
 
				-        id: cmake_build_legacy_hip
			
 
				-        run: |
			
 
				-          cmake -B build2 -S . \
			
 
				-            -DCMAKE_C_COMPILER=hipcc \
			
 
				-            -DCMAKE_CXX_COMPILER=hipcc \
			
 
				-            -DGGML_HIP_ROCWMMA_FATTN=ON \
			
 
				-            -DGGML_HIP=ON
			
 
				-          cmake --build build2 --config Release -j $(nproc)
			
 
				-
			
 
				   ubuntu-22-cmake-musa:
			
 
				     runs-on: ubuntu-22.04
			
 
				     container: mthreads/musa:rc4.2.0-devel-ubuntu22.04-amd64
			
--- a/ggml/src/ggml-cuda/common.cuh
+++ b/ggml/src/ggml-cuda/common.cuh
@@ -464,25 +464,21 @@ static __device__ __forceinline__ half ggml_cuda_hmax(const half a, const half b
 
				 }
			
 
				 
			
 
				 static __device__ __forceinline__ half2 ggml_cuda_hmax2(const half2 a, const half2 b) {
			
 
				-#if defined(GGML_USE_HIP) && HIP_VERSION >= 50700000
			
 
				+#if defined(GGML_USE_HIP)
			
 
				     return half2(__hmax(a.x, b.x), __hmax(a.y, b.y));
			
 
				-#elif !defined(GGML_USE_HIP) && CUDART_VERSION >= CUDART_HMAX
			
 
				+#elif CUDART_VERSION >= CUDART_HMAX
			
 
				     return __hmax2(a, b);
			
 
				-#elif !defined(GGML_USE_HIP)
			
 
				+#else
			
 
				     half2 ret;
			
 
				     reinterpret_cast<half&>(ret.x) = __float2half(fmaxf( __low2float(a),  __low2float(b)));
			
 
				     reinterpret_cast<half&>(ret.y) = __float2half(fmaxf(__high2float(a), __high2float(b)));
			
 
				     return ret;
			
 
				-#else
			
 
				-    GGML_UNUSED(a);
			
 
				-    GGML_UNUSED(b);
			
 
				-    NO_DEVICE_CODE;
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				 template<int width = WARP_SIZE>
			
 
				 static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
			
 
				-#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000)
			
 
				+#if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
			
 
				 #pragma unroll
			
 
				    for (int offset = width/2; offset > 0; offset >>= 1) {
			
 
				        x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, width));
			
@@ -491,7 +487,7 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
 
				 #else
			
 
				    GGML_UNUSED(x);
			
 
				    NO_DEVICE_CODE;
			
 
				-#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000)
			
 
				+#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
			
 
				 }
			
 
				 
			
 
				 #if CUDART_VERSION < CUDART_HMASK
			
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -180,30 +180,6 @@ static int ggml_cuda_parse_id(char devName[]) {
 
				 #endif // defined(GGML_USE_HIP)
			
 
				 
			
 
				 static ggml_cuda_device_info ggml_cuda_init() {
			
 
				-#if defined(GGML_USE_HIP)
			
 
				-    // Workaround for a rocBLAS bug when using multiple graphics cards:
			
 
				-    // https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1346
			
 
				-    {
			
 
				-        int major_version = 0;
			
 
				-        size_t version_length = 0;
			
 
				-        if (rocblas_get_version_string_size(&version_length) == rocblas_status_success) {
			
 
				-            std::vector<char> version(version_length+1, '\0');
			
 
				-            if (rocblas_get_version_string(version.data(), version.size()) == rocblas_status_success) {
			
 
				-                version.resize(::strlen(version.data()));
			
 
				-                int parsed_value = 0;
			
 
				-                if (std::from_chars(version.data(), version.data() + version.size(), parsed_value).ec == std::errc()) {
			
 
				-                    major_version = parsed_value;
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-        if (major_version < 4) {
			
 
				-            GGML_LOG_DEBUG(GGML_CUDA_NAME " calling rocblas_initialize as a workaround for a rocBLAS bug\n");
			
 
				-            rocblas_initialize();
			
 
				-            CUDA_CHECK(cudaDeviceSynchronize());
			
 
				-        }
			
 
				-    }
			
 
				-#endif
			
 
				-
			
 
				     ggml_cuda_device_info info = {};
			
 
				 
			
 
				     cudaError_t err = cudaGetDeviceCount(&info.device_count);
			
--- a/ggml/src/ggml-cuda/vendors/hip.h
+++ b/ggml/src/ggml-cuda/vendors/hip.h
@@ -5,8 +5,6 @@
 
				 #include <hipblas/hipblas.h>
			
 
				 #include <hip/hip_fp16.h>
			
 
				 #include <hip/hip_bfloat16.h>
			
 
				-// for rocblas_initialize()
			
 
				-#include "rocblas/rocblas.h"
			
 
				 
			
 
				 #define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT
			
 
				 #define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT
			
@@ -251,17 +249,3 @@ static __device__ __forceinline__ unsigned int __vcmpne4(unsigned int a, unsigne
 
				     }
			
 
				     return c;
			
 
				 }
			
 
				-
			
 
				-#if HIP_VERSION < 50600000
			
 
				-// __shfl_xor() for half2 was added in ROCm 5.6
			
 
				-static __device__ __forceinline__ half2 __shfl_xor(half2 var, int laneMask, int width) {
			
 
				-    typedef union half2_b32 {
			
 
				-        half2 val;
			
 
				-        int   b32;
			
 
				-    } half2_b32_t;
			
 
				-    half2_b32_t tmp;
			
 
				-    tmp.val = var;
			
 
				-    tmp.b32 = __shfl_xor(tmp.b32, laneMask, width);
			
 
				-    return tmp.val;
			
 
				-}
			
 
				-#endif // HIP_VERSION < 50600000
			
--- a/ggml/src/ggml-hip/CMakeLists.txt
+++ b/ggml/src/ggml-hip/CMakeLists.txt
@@ -46,8 +46,8 @@ if (GGML_HIP_ROCWMMA_FATTN)
 
				     endif()
			
 
				 endif()
			
 
				 
			
 
				-if (${hip_VERSION} VERSION_LESS 5.5)
			
 
				-    message(FATAL_ERROR "At least ROCM/HIP V5.5 is required")
			
 
				+if (${hip_VERSION} VERSION_LESS 6.1)
			
 
				+    message(FATAL_ERROR "At least ROCM/HIP V6.1 is required")
			
 
				 endif()
			
 
				 
			
 
				 message(STATUS "HIP and hipBLAS found")