Просмотр исходного кода

HIP: force max threads per block to be 1024 (#11621)

Some old/vendor forked version of llvm still use 256. Explicitly set it to 1024 to align with upstream llvm.

Signed-off-by: fxzjshm <fxzjshm@163.com>
fxzjshm 11 месяцев назад
Родитель
Сommit
3ec9fd4b77
1 измененных файлов с 3 добавлено и 0 удалено
  1. 3 0
      ggml/src/ggml-hip/CMakeLists.txt

+ 3 - 0
ggml/src/ggml-hip/CMakeLists.txt

@@ -46,6 +46,9 @@ endif()
 
 
 message(STATUS "HIP and hipBLAS found")
 message(STATUS "HIP and hipBLAS found")
 
 
+# Workaround old compilers
+set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} --gpu-max-threads-per-block=1024")
+
 file(GLOB   GGML_HEADERS_ROCM "../ggml-cuda/*.cuh")
 file(GLOB   GGML_HEADERS_ROCM "../ggml-cuda/*.cuh")
 list(APPEND GGML_HEADERS_ROCM "../../include/ggml-cuda.h")
 list(APPEND GGML_HEADERS_ROCM "../../include/ggml-cuda.h")