|
|
@@ -1,7 +1,5 @@
|
|
|
#include "models.h"
|
|
|
|
|
|
-
|
|
|
-
|
|
|
llm_build_ernie4_5::llm_build_ernie4_5(const llama_model & model, const llm_graph_params & params) :
|
|
|
llm_graph_context(params) {
|
|
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
|
|
@@ -19,6 +17,8 @@ llm_build_ernie4_5::llm_build_ernie4_5(const llama_model & model, const llm_grap
|
|
|
|
|
|
auto * inp_attn = build_attn_inp_kv();
|
|
|
|
|
|
+ ggml_tensor * inp_out_ids = build_inp_out_ids();
|
|
|
+
|
|
|
for (int il = 0; il < n_layer; ++il) {
|
|
|
ggml_tensor * inpSA = inpL;
|
|
|
|
|
|
@@ -67,9 +67,8 @@ llm_build_ernie4_5::llm_build_ernie4_5(const llama_model & model, const llm_grap
|
|
|
}
|
|
|
if (il == n_layer - 1) {
|
|
|
// skip computing output for unused tokens
|
|
|
- ggml_tensor * inp_out_ids = build_inp_out_ids();
|
|
|
- cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
|
|
- inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
|
|
|
+ cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
|
|
+ inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
|
|
|
}
|
|
|
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
|
|
|
cb(ffn_inp, "ffn_inp", il);
|