|
|
@@ -227,7 +227,7 @@ typedef float2 dfloat2;
|
|
|
#define FP16_MMA_AVAILABLE
|
|
|
#endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4)))
|
|
|
|
|
|
-#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
|
|
|
+#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3) && !defined(GGML_HIP_NO_MMQ_MFMA)
|
|
|
#define AMD_MFMA_AVAILABLE
|
|
|
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
|
|
|
|
|
|
@@ -295,7 +295,11 @@ static bool fp32_mma_hardware_available(const int cc) {
|
|
|
|
|
|
// AMD CDNA3 matrix cores.. Will add support for other CDNA generations later.
|
|
|
static bool amd_mfma_available(const int cc) {
|
|
|
- return cc >= GGML_CUDA_CC_OFFSET_AMD && GGML_CUDA_CC_IS_CDNA3(cc);
|
|
|
+#if !defined(GGML_HIP_NO_MMQ_MFMA)
|
|
|
+ return GGML_CUDA_CC_IS_CDNA3(cc);
|
|
|
+#else
|
|
|
+ return false;
|
|
|
+#endif //!defined(GGML_HIP_NO_MMQ_MFMA)
|
|
|
}
|
|
|
|
|
|
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.
|