|
|
cur = ggml_flash_attn_ext(ctx, q, k, v, kq_mask, kq_scale, hparams.f_max_alibi_bias,
|
|
cur = ggml_flash_attn_ext(ctx, q, k, v, kq_mask, kq_scale, hparams.f_max_alibi_bias,
|
|
|
ggml_flash_attn_ext_set_prec(cur, GGML_PREC_F32);
|
|
ggml_flash_attn_ext_set_prec(cur, GGML_PREC_F32);
|