|
@@ -250,6 +250,15 @@ if (LLAMA_CUBLAS)
|
|
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
|
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
|
|
|
endif()
|
|
endif()
|
|
|
|
|
|
|
|
|
|
+ if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
|
|
|
|
+ if (LLAMA_CUDA_DMMV_F16)
|
|
|
|
|
+ set(CMAKE_CUDA_ARCHITECTURES "61") # needed for f16 CUDA intrinsics
|
|
|
|
|
+ else()
|
|
|
|
|
+ set(CMAKE_CUDA_ARCHITECTURES "52") # lowest CUDA 12 standard
|
|
|
|
|
+ endif()
|
|
|
|
|
+ endif()
|
|
|
|
|
+ message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
|
|
|
|
+
|
|
|
else()
|
|
else()
|
|
|
message(WARNING "cuBLAS not found")
|
|
message(WARNING "cuBLAS not found")
|
|
|
endif()
|
|
endif()
|
|
@@ -493,22 +502,6 @@ if (BUILD_SHARED_LIBS)
|
|
|
endif()
|
|
endif()
|
|
|
endif()
|
|
endif()
|
|
|
|
|
|
|
|
-if (GGML_SOURCES_CUDA)
|
|
|
|
|
- message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
|
|
|
|
|
- set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES "native")
|
|
|
|
|
- set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
|
|
|
|
|
-
|
|
|
|
|
- set_property(TARGET ggml_static PROPERTY CUDA_ARCHITECTURES "native")
|
|
|
|
|
- set_property(TARGET ggml_static PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
|
|
|
|
|
-
|
|
|
|
|
- if (BUILD_SHARED_LIBS)
|
|
|
|
|
- set_property(TARGET ggml_shared PROPERTY CUDA_ARCHITECTURES "native")
|
|
|
|
|
- set_property(TARGET ggml_shared PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
|
|
|
|
|
- endif()
|
|
|
|
|
-
|
|
|
|
|
- set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES "native")
|
|
|
|
|
-endif()
|
|
|
|
|
-
|
|
|
|
|
|
|
|
|
|
#
|
|
#
|
|
|
# programs, examples and tests
|
|
# programs, examples and tests
|