CMakeLists.txt 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
  2. find_package(CUDAToolkit)
  3. if (CUDAToolkit_FOUND)
  4. message(STATUS "CUDA Toolkit found")
  5. if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
  6. # native == GPUs available at build time
  7. # 50 == Maxwell, lowest CUDA 12 standard
  8. # 60 == P100, FP16 CUDA intrinsics
  9. # 61 == Pascal, __dp4a instruction (per-byte integer dot product)
  10. # 70 == V100, FP16 tensor cores
  11. # 75 == Turing, int8 tensor cores
  12. if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
  13. set(CMAKE_CUDA_ARCHITECTURES "native")
  14. elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
  15. set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75;80")
  16. else()
  17. set(CMAKE_CUDA_ARCHITECTURES "50;61;70;75;80")
  18. endif()
  19. endif()
  20. message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
  21. enable_language(CUDA)
  22. file(GLOB GGML_HEADERS_CUDA "*.cuh")
  23. list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h")
  24. file(GLOB GGML_SOURCES_CUDA "*.cu")
  25. file(GLOB SRCS "template-instances/fattn-mma*.cu")
  26. list(APPEND GGML_SOURCES_CUDA ${SRCS})
  27. file(GLOB SRCS "template-instances/mmq*.cu")
  28. list(APPEND GGML_SOURCES_CUDA ${SRCS})
  29. if (GGML_CUDA_FA_ALL_QUANTS)
  30. file(GLOB SRCS "template-instances/fattn-vec*.cu")
  31. list(APPEND GGML_SOURCES_CUDA ${SRCS})
  32. add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
  33. else()
  34. file(GLOB SRCS "template-instances/fattn-vec*q4_0-q4_0.cu")
  35. list(APPEND GGML_SOURCES_CUDA ${SRCS})
  36. file(GLOB SRCS "template-instances/fattn-vec*q8_0-q8_0.cu")
  37. list(APPEND GGML_SOURCES_CUDA ${SRCS})
  38. file(GLOB SRCS "template-instances/fattn-vec*f16-f16.cu")
  39. list(APPEND GGML_SOURCES_CUDA ${SRCS})
  40. endif()
  41. ggml_add_backend_library(ggml-cuda
  42. ${GGML_HEADERS_CUDA}
  43. ${GGML_SOURCES_CUDA}
  44. )
  45. add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
  46. if (GGML_CUDA_GRAPHS)
  47. add_compile_definitions(GGML_CUDA_USE_GRAPHS)
  48. endif()
  49. if (GGML_CUDA_FORCE_MMQ)
  50. add_compile_definitions(GGML_CUDA_FORCE_MMQ)
  51. endif()
  52. if (GGML_CUDA_FORCE_CUBLAS)
  53. add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
  54. endif()
  55. if (GGML_CUDA_NO_VMM)
  56. add_compile_definitions(GGML_CUDA_NO_VMM)
  57. endif()
  58. if (NOT GGML_CUDA_FA)
  59. add_compile_definitions(GGML_CUDA_NO_FA)
  60. endif()
  61. if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
  62. add_compile_definitions(GGML_CUDA_F16)
  63. endif()
  64. if (GGML_CUDA_NO_PEER_COPY)
  65. add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
  66. endif()
  67. if (GGML_STATIC)
  68. if (WIN32)
  69. # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
  70. target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
  71. else ()
  72. target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
  73. endif()
  74. else()
  75. target_link_libraries(ggml-cuda PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt)
  76. endif()
  77. if (GGML_CUDA_NO_VMM)
  78. # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
  79. else()
  80. target_link_libraries(ggml-cuda PRIVATE CUDA::cuda_driver)
  81. endif()
  82. set(CUDA_CXX_FLAGS "")
  83. set(CUDA_FLAGS -use_fast_math)
  84. if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
  85. # Options are:
  86. # - none (not recommended)
  87. # - speed (nvcc's default)
  88. # - balance
  89. # - size
  90. list(APPEND CUDA_FLAGS -compress-mode=${GGML_CUDA_COMPRESSION_MODE})
  91. endif()
  92. if (GGML_FATAL_WARNINGS)
  93. list(APPEND CUDA_FLAGS -Werror all-warnings)
  94. endif()
  95. if (GGML_ALL_WARNINGS AND NOT MSVC)
  96. set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
  97. if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
  98. list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
  99. endif()
  100. execute_process(
  101. COMMAND ${NVCC_CMD} -Xcompiler --version
  102. OUTPUT_VARIABLE CUDA_CCFULLVER
  103. ERROR_QUIET
  104. )
  105. if (NOT CUDA_CCFULLVER MATCHES clang)
  106. set(CUDA_CCID "GNU")
  107. execute_process(
  108. COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
  109. OUTPUT_VARIABLE CUDA_CCVER
  110. ERROR_QUIET
  111. OUTPUT_STRIP_TRAILING_WHITESPACE
  112. )
  113. else()
  114. if (CUDA_CCFULLVER MATCHES Apple)
  115. set(CUDA_CCID "AppleClang")
  116. else()
  117. set(CUDA_CCID "Clang")
  118. endif()
  119. string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
  120. endif()
  121. message(STATUS "CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")
  122. ggml_get_flags(${CUDA_CCID} ${CUDA_CCVER})
  123. list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later
  124. endif()
  125. if (NOT MSVC)
  126. list(APPEND CUDA_CXX_FLAGS -Wno-pedantic)
  127. endif()
  128. list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument
  129. if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
  130. list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
  131. endif()
  132. target_compile_options(ggml-cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
  133. else()
  134. message(FATAL_ERROR "CUDA Toolkit not found")
  135. endif()