3 månader sedan · 91a2a56556
--- a/ggml/src/ggml-cuda/fattn-vec.cuh
+++ b/ggml/src/ggml-cuda/fattn-vec.cuh
@@ -535,8 +535,6 @@ void ggml_cuda_flash_attn_ext_vec_case(ggml_backend_cuda_context & ctx, ggml_ten
 
															     float logit_softcap;
														
 
															     memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float));
														
 
															-    const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
														
 
															-
														
 
															     if (Q->ne[1] == 1) {
														
 
															         constexpr int cols_per_block = 1;
														
 
															         if (logit_softcap == 0.0f) {
														
--- a/ggml/src/ggml-cuda/topk-moe.cu
+++ b/ggml/src/ggml-cuda/topk-moe.cu
@@ -13,7 +13,7 @@
 
															     It is intended as fusion of softmax->top-k->get_rows pipeline for MoE models
														
 
															 */
														
 
															-template <size_t n_experts, bool with_norm>
														
 
															+template <int n_experts, bool with_norm>
														
 
															 __launch_bounds__(4 * WARP_SIZE, 1) __global__ void topk_moe_cuda(const float * logits,
														
 
															                                                                   float *       weights,
														
 
															                                                                   int32_t *     ids,
														
@@ -204,8 +204,6 @@ void ggml_cuda_op_topk_moe(ggml_backend_cuda_context & ctx,
 
															     GGML_ASSERT(ids->nb[1] / ggml_type_size(ids->type) == (size_t) n_experts);
														
 
															-    cudaStream_t stream = ctx.stream();
														
 
															-
														
 
															     const int n_expert_used = weights->ne[1];
														
 
															     if (with_norm) {
														
--- a/ggml/src/ggml-musa/CMakeLists.txt
+++ b/ggml/src/ggml-musa/CMakeLists.txt
@@ -56,7 +56,7 @@ if (MUSAToolkit_FOUND)
 
															     set_source_files_properties(${GGML_SOURCES_MUSA} PROPERTIES LANGUAGE CXX)
														
 
															     foreach(SOURCE ${GGML_SOURCES_MUSA})
														
 
															-        set(COMPILE_FLAGS "-fsigned-char -x musa -mtgpu")
														
 
															+        set(COMPILE_FLAGS "-Od3 -fno-strict-aliasing -ffast-math -fsigned-char -x musa -mtgpu -fmusa-flush-denormals-to-zero")
														
 
															         foreach(ARCH ${MUSA_ARCHITECTURES})
														
 
															             set(COMPILE_FLAGS "${COMPILE_FLAGS} --cuda-gpu-arch=mp_${ARCH}")
														
 
															         endforeach()