1 month ago · b0fb0f0aee
--- a/ggml/src/ggml-cuda/CMakeLists.txt
+++ b/ggml/src/ggml-cuda/CMakeLists.txt
@@ -35,16 +35,33 @@ if (CUDAToolkit_FOUND)
 
				             if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8")
			
 
				                 list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real)
			
 
				             endif()
			
 
				-
			
 
				-            if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
			
 
				-                list(APPEND CMAKE_CUDA_ARCHITECTURES 120f-virtual)
			
 
				-            endif()
			
 
				         endif()
			
 
				     endif()
			
 
				     message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
			
 
				 
			
 
				     enable_language(CUDA)
			
 
				 
			
 
				+    # Replace any 12x-real architectures with 12x{a}-real. FP4 ptx instructions are not available in just 12x
			
 
				+    if (GGML_NATIVE)
			
 
				+        set(PROCESSED_ARCHITECTURES "")
			
 
				+        foreach(ARCH ${CMAKE_CUDA_ARCHITECTURES_NATIVE})
			
 
				+            if(ARCH MATCHES "^12[0-9]$")
			
 
				+                string(REGEX REPLACE "^(12[0-9]).*$" "\\1" BASE_ARCH ${ARCH})
			
 
				+                message(STATUS "Replacing ${ARCH} with ${BASE_ARCH}a-real")
			
 
				+                list(APPEND PROCESSED_ARCHITECTURES "${BASE_ARCH}a-real")
			
 
				+            else()
			
 
				+                list(APPEND PROCESSED_ARCHITECTURES ${ARCH})
			
 
				+            endif()
			
 
				+        endforeach()
			
 
				+        set(CMAKE_CUDA_ARCHITECTURES ${PROCESSED_ARCHITECTURES})
			
 
				+    else()
			
 
				+        foreach(ARCH ${CMAKE_CUDA_ARCHITECTURES})
			
 
				+            if(ARCH MATCHES "^12[0-9]$")
			
 
				+                message(FATAL_ERROR "Compute capability ${ARCH} used, use ${ARCH}a or ${ARCH}f for Blackwell specific optimizations")
			
 
				+            endif()
			
 
				+        endforeach()
			
 
				+    endif()
			
 
				+
			
 
				     file(GLOB   GGML_HEADERS_CUDA "*.cuh")
			
 
				     list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h")