|
|
@@ -84,12 +84,12 @@
|
|
|
|
|
|
#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210) // MTT S80, MTT S3000
|
|
|
#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220) // MTT S4000
|
|
|
-#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310) // TBD
|
|
|
+#define GGML_CUDA_CC_PH1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310) // MTT S5000
|
|
|
|
|
|
#define GGML_CUDA_CC_IS_MTHREADS(cc) (cc >= GGML_CUDA_CC_OFFSET_MTHREADS && cc < GGML_CUDA_CC_OFFSET_AMD)
|
|
|
#define GGML_CUDA_CC_IS_QY1(cc) (cc >= GGML_CUDA_CC_QY1 && cc < GGML_CUDA_CC_QY2)
|
|
|
-#define GGML_CUDA_CC_IS_QY2(cc) (cc >= GGML_CUDA_CC_QY2 && cc < GGML_CUDA_CC_NG)
|
|
|
-#define GGML_CUDA_CC_IS_NG(cc) (cc >= GGML_CUDA_CC_NG)
|
|
|
+#define GGML_CUDA_CC_IS_QY2(cc) (cc >= GGML_CUDA_CC_QY2 && cc < GGML_CUDA_CC_PH1)
|
|
|
+#define GGML_CUDA_CC_IS_PH1(cc) (cc >= GGML_CUDA_CC_PH1)
|
|
|
|
|
|
#if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11070
|
|
|
# define GGML_CUDA_USE_CUB
|
|
|
@@ -212,9 +212,9 @@ static const char * cu_get_error_str(CUresult err) {
|
|
|
#define GGML_USE_VMM
|
|
|
#endif // (!defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)) || (defined(GGML_USE_HIP) && !defined(GGML_HIP_NO_VMM))
|
|
|
|
|
|
-#if defined(GGML_USE_HIP) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
|
|
|
+#if defined(GGML_USE_HIP) || defined(GGML_USE_MUSA) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
|
|
|
#define FP16_AVAILABLE
|
|
|
-#endif // defined(GGML_USE_HIP) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
|
|
|
+#endif // defined(GGML_USE_HIP) || defined(GGML_USE_MUSA) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
|
|
|
|
|
|
#if defined(FP16_AVAILABLE) && __CUDA_ARCH__ != 610
|
|
|
#define FAST_FP16_AVAILABLE
|
|
|
@@ -250,12 +250,14 @@ static const char * cu_get_error_str(CUresult err) {
|
|
|
#endif // !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ < 220)
|
|
|
|
|
|
static bool fp16_available(const int cc) {
|
|
|
- return ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_PASCAL;
|
|
|
+ return ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_PASCAL ||
|
|
|
+ (GGML_CUDA_CC_IS_MTHREADS(cc) && cc >= GGML_CUDA_CC_PH1);
|
|
|
}
|
|
|
|
|
|
static bool fast_fp16_available(const int cc) {
|
|
|
return GGML_CUDA_CC_IS_AMD(cc) ||
|
|
|
- (GGML_CUDA_CC_IS_NVIDIA(cc) && fp16_available(cc) && ggml_cuda_highest_compiled_arch(cc) != 610);
|
|
|
+ (GGML_CUDA_CC_IS_NVIDIA(cc) && fp16_available(cc) && ggml_cuda_highest_compiled_arch(cc) != 610) ||
|
|
|
+ (GGML_CUDA_CC_IS_MTHREADS(cc) && fp16_available(cc));
|
|
|
}
|
|
|
|
|
|
// To be used for feature selection of external libraries, e.g. cuBLAS.
|
|
|
@@ -272,7 +274,9 @@ static bool fp16_mma_hardware_available(const int cc) {
|
|
|
}
|
|
|
|
|
|
static bool bf16_mma_hardware_available(const int cc) {
|
|
|
- return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE) || GGML_CUDA_CC_IS_CDNA(cc) || cc >= GGML_CUDA_CC_RDNA3;
|
|
|
+ return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE) ||
|
|
|
+ GGML_CUDA_CC_IS_CDNA(cc) || cc >= GGML_CUDA_CC_RDNA3 ||
|
|
|
+ (GGML_CUDA_CC_IS_MTHREADS(cc) && cc >= GGML_CUDA_CC_PH1);
|
|
|
}
|
|
|
|
|
|
static bool fp32_mma_hardware_available(const int cc) {
|