|
@@ -5312,7 +5312,7 @@ void ggml_mul_mat_set_prec(
|
|
|
as -> [cols, rows, n_expert]
|
|
as -> [cols, rows, n_expert]
|
|
|
ids -> [n_experts_used, n_tokens] (i32)
|
|
ids -> [n_experts_used, n_tokens] (i32)
|
|
|
b -> [cols, n_expert_used, n_tokens]
|
|
b -> [cols, n_expert_used, n_tokens]
|
|
|
- c -> [cols, n_expert_used, n_tokens]
|
|
|
|
|
|
|
+ c -> [rows, n_expert_used, n_tokens]
|
|
|
|
|
|
|
|
in b, n_experts_used can be broadcasted to match the n_expert_used of ids
|
|
in b, n_experts_used can be broadcasted to match the n_expert_used of ids
|
|
|
|
|
|