|
@@ -1,6 +1,12 @@
|
|
|
#include "common.cuh"
|
|
#include "common.cuh"
|
|
|
#include "fattn-common.cuh"
|
|
#include "fattn-common.cuh"
|
|
|
|
|
|
|
|
|
|
+// Currenlty llvm with the amdgcn target dose not support unrolling loops
|
|
|
|
|
+// that contain a break that can not be resolved at compile time.
|
|
|
|
|
+#ifdef __clang__
|
|
|
|
|
+#pragma clang diagnostic push
|
|
|
|
|
+#pragma clang diagnostic ignored "-Wpass-failed"
|
|
|
|
|
+#endif // __clang__
|
|
|
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
|
|
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
|
|
|
#ifndef GGML_USE_HIP
|
|
#ifndef GGML_USE_HIP
|
|
|
__launch_bounds__(D, 1)
|
|
__launch_bounds__(D, 1)
|
|
@@ -341,6 +347,9 @@ static __global__ void flash_attn_vec_ext_f16(
|
|
|
NO_DEVICE_CODE;
|
|
NO_DEVICE_CODE;
|
|
|
#endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
|
|
#endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
|
|
|
}
|
|
}
|
|
|
|
|
+#ifdef __clang__
|
|
|
|
|
+#pragma clang diagnostic pop
|
|
|
|
|
+#endif // __clang__
|
|
|
|
|
|
|
|
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
|
|
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
|
|
|
void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|