|
|
@@ -205,6 +205,9 @@ struct clip_layer {
|
|
|
ggml_tensor * o_w = nullptr;
|
|
|
ggml_tensor * o_b = nullptr;
|
|
|
|
|
|
+ ggml_tensor * k_norm = nullptr;
|
|
|
+ ggml_tensor * q_norm = nullptr;
|
|
|
+
|
|
|
// layernorm 1
|
|
|
ggml_tensor * ln_1_w = nullptr;
|
|
|
ggml_tensor * ln_1_b = nullptr;
|
|
|
@@ -1363,6 +1366,16 @@ private:
|
|
|
Vcur = ggml_add(ctx0, Vcur, layer.v_b);
|
|
|
}
|
|
|
|
|
|
+ if (layer.q_norm) {
|
|
|
+ Qcur = build_norm(Qcur, layer.q_norm, NULL, norm_t, eps, il);
|
|
|
+ cb(Qcur, "Qcur_norm", il);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (layer.k_norm) {
|
|
|
+ Kcur = build_norm(Kcur, layer.k_norm, NULL, norm_t, eps, il);
|
|
|
+ cb(Kcur, "Kcur_norm", il);
|
|
|
+ }
|
|
|
+
|
|
|
Qcur = ggml_reshape_3d(ctx0, Qcur, d_head, n_head, n_pos);
|
|
|
Kcur = ggml_reshape_3d(ctx0, Kcur, d_head, n_head, n_pos);
|
|
|
Vcur = ggml_reshape_3d(ctx0, Vcur, d_head, n_head, n_pos);
|
|
|
@@ -1988,6 +2001,8 @@ struct clip_model_loader {
|
|
|
layer.q_w = get_tensor(string_format(TN_ATTN_Q, "v", il, "weight"));
|
|
|
layer.v_w = get_tensor(string_format(TN_ATTN_V, "v", il, "weight"));
|
|
|
layer.o_w = get_tensor(string_format(TN_ATTN_OUTPUT, "v", il, "weight"));
|
|
|
+ layer.k_norm = get_tensor(string_format(TN_ATTN_K_NORM, "v", il, "weight"), false);
|
|
|
+ layer.q_norm = get_tensor(string_format(TN_ATTN_Q_NORM, "v", il, "weight"), false);
|
|
|
layer.ln_1_w = get_tensor(string_format(TN_LN_1, "v", il, "weight"), false);
|
|
|
layer.ln_2_w = get_tensor(string_format(TN_LN_2, "v", il, "weight"), false);
|
|
|
layer.ls_1_w = get_tensor(string_format(TN_LS_1, "v", il, "weight"), false); // no bias
|