|
|
@@ -1,7 +1,5 @@
|
|
|
#include "models.h"
|
|
|
|
|
|
-
|
|
|
-
|
|
|
llm_build_gemma3n_iswa::llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params) :
|
|
|
llm_graph_context(params),
|
|
|
model(model),
|
|
|
@@ -15,10 +13,9 @@ llm_build_gemma3n_iswa::llm_build_gemma3n_iswa(const llama_model & model, const
|
|
|
inpL = build_inp_embd(model.tok_embd);
|
|
|
|
|
|
// important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings)
|
|
|
- if (ubatch.token) {
|
|
|
- inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd));
|
|
|
- cb(inpL, "inp_scaled", -1);
|
|
|
- }
|
|
|
+ inpL = ggml_scale(ctx0, inpL, ubatch.token ? sqrtf(n_embd) : 1.0f);
|
|
|
+ cb(inpL, "inp_scaled", -1);
|
|
|
+
|
|
|
// inp_pos - contains the positions
|
|
|
ggml_tensor * inp_pos = build_inp_pos();
|
|
|
|
|
|
@@ -248,7 +245,7 @@ ggml_tensor * llm_build_gemma3n_iswa::view_2d_slice(ggml_tensor * x, int idx) {
|
|
|
// equivalent to get_per_layer_inputs() in python code
|
|
|
// output shape: [n_embd_altup, n_layer, n_tokens]
|
|
|
ggml_tensor * llm_build_gemma3n_iswa::get_per_layer_inputs() {
|
|
|
- auto inp = std::make_unique<llm_graph_input_embd>();
|
|
|
+ auto inp = std::make_unique<llm_graph_input_embd>();
|
|
|
ggml_tensor * inp_per_layer;
|
|
|
if (ubatch.token) {
|
|
|
inp->tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ubatch.n_tokens);
|