|
|
@@ -312,11 +312,11 @@ static bool turing_mma_available(const int cc) {
|
|
|
}
|
|
|
|
|
|
static bool ampere_mma_available(const int cc) {
|
|
|
- return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
|
|
|
+ return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
|
|
|
}
|
|
|
|
|
|
static bool cp_async_available(const int cc) {
|
|
|
- return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
|
|
|
+ return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_AMPERE;
|
|
|
}
|
|
|
|
|
|
static constexpr __device__ int ggml_cuda_get_physical_warp_size() {
|