3 месяцев назад · 397cd9fd67
--- a/src/models/llm_build_arcee.cpp
+++ b/src/models/llm_build_arcee.cpp
@@ -136,4 +136,4 @@ llm_build_arcee::llm_build_arcee(const llama_model & model, const llm_graph_para
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_arcee.h
+++ b/src/models/llm_build_arcee.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_arcee : public llm_graph_context {
														
 
															     llm_build_arcee(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_arctic.cpp
+++ b/src/models/llm_build_arctic.cpp
@@ -139,4 +139,4 @@ llm_build_arctic::llm_build_arctic(const llama_model & model, const llm_graph_pa
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_arctic.h
+++ b/src/models/llm_build_arctic.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_arctic : public llm_graph_context {
														
 
															     llm_build_arctic(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_arwkv7.cpp
+++ b/src/models/llm_build_arwkv7.cpp
@@ -88,4 +88,4 @@ llm_build_arwkv7::llm_build_arwkv7(const llama_model & model, const llm_graph_pa
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_arwkv7.h
+++ b/src/models/llm_build_arwkv7.h
@@ -8,4 +8,4 @@
 
															 struct llm_build_arwkv7 : public llm_build_rwkv7_base {
														
 
															     llm_build_arwkv7(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_baichuan.cpp
+++ b/src/models/llm_build_baichuan.cpp
@@ -128,4 +128,4 @@ llm_build_baichuan::llm_build_baichuan(const llama_model & model, const llm_grap
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_baichuan.h
+++ b/src/models/llm_build_baichuan.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_baichuan : public llm_graph_context {
														
 
															     llm_build_baichuan(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_bailingmoe.cpp
+++ b/src/models/llm_build_bailingmoe.cpp
@@ -145,4 +145,4 @@ llm_build_bailingmoe::llm_build_bailingmoe(const llama_model & model, const llm_
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_bailingmoe.h
+++ b/src/models/llm_build_bailingmoe.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_bailingmoe : public llm_graph_context {
														
 
															     llm_build_bailingmoe(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_bert.cpp
+++ b/src/models/llm_build_bert.cpp
@@ -191,4 +191,4 @@ llm_build_bert::llm_build_bert(const llama_model & model, const llm_graph_params
 
															     res->t_embd = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_bert.h
+++ b/src/models/llm_build_bert.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_bert : public llm_graph_context {
														
 
															     llm_build_bert(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_bitnet.cpp
+++ b/src/models/llm_build_bitnet.cpp
@@ -161,4 +161,4 @@ llm_build_bitnet::llm_build_bitnet(const llama_model & model, const llm_graph_pa
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_bitnet.h
+++ b/src/models/llm_build_bitnet.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_bitnet : public llm_graph_context {
														
 
															     llm_build_bitnet(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_bloom.cpp
+++ b/src/models/llm_build_bloom.cpp
@@ -102,4 +102,4 @@ llm_build_bloom::llm_build_bloom(const llama_model & model, const llm_graph_para
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_bloom.h
+++ b/src/models/llm_build_bloom.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_bloom : public llm_graph_context {
														
 
															     llm_build_bloom(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_chameleon.cpp
+++ b/src/models/llm_build_chameleon.cpp
@@ -179,4 +179,4 @@ llm_build_chameleon::llm_build_chameleon(const llama_model & model, const llm_gr
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_chameleon.h
+++ b/src/models/llm_build_chameleon.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_chameleon : public llm_graph_context {
														
 
															     llm_build_chameleon(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_chatglm.cpp
+++ b/src/models/llm_build_chatglm.cpp
@@ -133,4 +133,4 @@ llm_build_chatglm::llm_build_chatglm(const llama_model & model, const llm_graph_
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_chatglm.h
+++ b/src/models/llm_build_chatglm.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_chatglm : public llm_graph_context {
														
 
															     llm_build_chatglm(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_codeshell.cpp
+++ b/src/models/llm_build_codeshell.cpp
@@ -112,4 +112,4 @@ llm_build_codeshell::llm_build_codeshell(const llama_model & model, const llm_gr
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_codeshell.h
+++ b/src/models/llm_build_codeshell.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_codeshell : public llm_graph_context {
														
 
															     llm_build_codeshell(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_cohere2_iswa.cpp
+++ b/src/models/llm_build_cohere2_iswa.cpp
@@ -130,4 +130,4 @@ llm_build_cohere2_iswa::llm_build_cohere2_iswa(const llama_model & model, const
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_cohere2_iswa.h
+++ b/src/models/llm_build_cohere2_iswa.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_cohere2_iswa : public llm_graph_context {
														
 
															     llm_build_cohere2_iswa(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_command_r.h
+++ b/src/models/llm_build_command_r.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_command_r : public llm_graph_context {
														
 
															     llm_build_command_r(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_dbrx.cpp
+++ b/src/models/llm_build_dbrx.cpp
@@ -124,4 +124,4 @@ llm_build_dbrx::llm_build_dbrx(const llama_model & model, const llm_graph_params
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_dbrx.h
+++ b/src/models/llm_build_dbrx.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_dbrx : public llm_graph_context {
														
 
															     llm_build_dbrx(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_deci.h
+++ b/src/models/llm_build_deci.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_deci : public llm_graph_context {
														
 
															     llm_build_deci(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_deepseek.h
+++ b/src/models/llm_build_deepseek.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_deepseek : public llm_graph_context {
														
 
															     llm_build_deepseek(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_deepseek2.h
+++ b/src/models/llm_build_deepseek2.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_deepseek2 : public llm_graph_context {
														
 
															     llm_build_deepseek2(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_dots1.h
+++ b/src/models/llm_build_dots1.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_dots1 : public llm_graph_context {
														
 
															     llm_build_dots1(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_dream.h
+++ b/src/models/llm_build_dream.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_dream : public llm_graph_context {
														
 
															     llm_build_dream(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_ernie4_5.h
+++ b/src/models/llm_build_ernie4_5.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_ernie4_5 : public llm_graph_context {
														
 
															     llm_build_ernie4_5(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_ernie4_5_moe.h
+++ b/src/models/llm_build_ernie4_5_moe.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_ernie4_5_moe : public llm_graph_context {
														
 
															     llm_build_ernie4_5_moe(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_exaone.h
+++ b/src/models/llm_build_exaone.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_exaone : public llm_graph_context {
														
 
															     llm_build_exaone(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_exaone4.h
+++ b/src/models/llm_build_exaone4.h
@@ -8,4 +8,4 @@
 
															 template <bool iswa>
														
 
															 struct llm_build_exaone4 : public llm_graph_context {
														
 
															     llm_build_exaone4(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_falcon.cpp
+++ b/src/models/llm_build_falcon.cpp
@@ -121,4 +121,4 @@ llm_build_falcon::llm_build_falcon(const llama_model & model, const llm_graph_pa
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_falcon.h
+++ b/src/models/llm_build_falcon.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_falcon : public llm_graph_context {
														
 
															     llm_build_falcon(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_falcon_h1.h
+++ b/src/models/llm_build_falcon_h1.h
@@ -8,4 +8,4 @@
 
															 struct llm_build_falcon_h1 : public llm_graph_context_mamba {
														
 
															     llm_build_falcon_h1(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_gemma.cpp
+++ b/src/models/llm_build_gemma.cpp
@@ -117,4 +117,4 @@ llm_build_gemma::llm_build_gemma(const llama_model & model, const llm_graph_para
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_gemma.h
+++ b/src/models/llm_build_gemma.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_gemma : public llm_graph_context {
														
 
															     llm_build_gemma(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_gemma2_iswa.cpp
+++ b/src/models/llm_build_gemma2_iswa.cpp
@@ -130,4 +130,4 @@ llm_build_gemma2_iswa::llm_build_gemma2_iswa(const llama_model & model, const ll
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_gemma2_iswa.h
+++ b/src/models/llm_build_gemma2_iswa.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_gemma2_iswa : public llm_graph_context {
														
 
															     llm_build_gemma2_iswa(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_gemma3_iswa.cpp
+++ b/src/models/llm_build_gemma3_iswa.cpp
@@ -137,4 +137,4 @@ llm_build_gemma3_iswa::llm_build_gemma3_iswa(const llama_model & model, const ll
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_gemma3_iswa.h
+++ b/src/models/llm_build_gemma3_iswa.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_gemma3_iswa : public llm_graph_context {
														
 
															     llm_build_gemma3_iswa(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_gemma3n_iswa.h
+++ b/src/models/llm_build_gemma3n_iswa.h
@@ -25,4 +25,4 @@ struct llm_build_gemma3n_iswa : public llm_graph_context {
 
															     ggml_tensor * altup_predict(ggml_tensor * cur, int il);
														
 
															     ggml_tensor * laurel(ggml_tensor * cur, int il);
														
 
															     ggml_tensor * altup_correct(ggml_tensor * predictions, ggml_tensor * activated, int il);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_gemma_embedding_iswa.h
+++ b/src/models/llm_build_gemma_embedding_iswa.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_gemma_embedding_iswa : public llm_graph_context {
														
 
															     llm_build_gemma_embedding_iswa(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_glm4.h
+++ b/src/models/llm_build_glm4.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_glm4 : public llm_graph_context {
														
 
															     llm_build_glm4(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_glm4_moe.cpp
+++ b/src/models/llm_build_glm4_moe.cpp
@@ -159,4 +159,4 @@ llm_build_glm4_moe::llm_build_glm4_moe(const llama_model & model, const llm_grap
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_glm4_moe.h
+++ b/src/models/llm_build_glm4_moe.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_glm4_moe : public llm_graph_context {
														
 
															     llm_build_glm4_moe(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_gpt2.cpp
+++ b/src/models/llm_build_gpt2.cpp
@@ -106,4 +106,4 @@ llm_build_gpt2::llm_build_gpt2(const llama_model & model, const llm_graph_params
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_gpt2.h
+++ b/src/models/llm_build_gpt2.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_gpt2 : public llm_graph_context {
														
 
															     llm_build_gpt2(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_gptneox.cpp
+++ b/src/models/llm_build_gptneox.cpp
@@ -148,4 +148,4 @@ llm_build_gptneox::llm_build_gptneox(const llama_model & model, const llm_graph_
 
															     res->t_logits = cur;
														
 
															     ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_gptneox.h
+++ b/src/models/llm_build_gptneox.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_gptneox : public llm_graph_context {
														
 
															     llm_build_gptneox(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_granite.cpp
+++ b/src/models/llm_build_granite.cpp
@@ -220,4 +220,4 @@ ggml_tensor * llm_build_granite::build_layer_ffn(
 
															     cb(cur, "l_out", il);
														
 
															     return cur;
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_granite.h
+++ b/src/models/llm_build_granite.h
@@ -22,4 +22,4 @@ private:
 
															               ggml_tensor       * inpSA,
														
 
															         const llama_model       & model,
														
 
															         const int                 il);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_granite_hybrid.h
+++ b/src/models/llm_build_granite_hybrid.h
@@ -8,6 +8,7 @@
 
															 struct llm_build_granite_hybrid : public llm_graph_context_mamba {
														
 
															     llm_build_granite_hybrid(const llama_model & model, const llm_graph_params & params);
														
 
															     ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il);
														
 
															-    ggml_tensor * build_attention_layer(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn, 
														
 
															+    ggml_tensor * build_attention_layer(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn,
														
 
															         const llama_model & model,const int64_t n_embd_head, const int il);
														
 
															-};
														
 
															+};
														
 
															+
														
--- a/src/models/llm_build_grok.cpp
+++ b/src/models/llm_build_grok.cpp
@@ -169,4 +169,4 @@ llm_build_grok::llm_build_grok(const llama_model & model, const llm_graph_params
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_grok.h
+++ b/src/models/llm_build_grok.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_grok : public llm_graph_context {
														
 
															     llm_build_grok(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_hunyuan_dense.cpp
+++ b/src/models/llm_build_hunyuan_dense.cpp
@@ -133,4 +133,4 @@ llm_build_hunyuan_dense::llm_build_hunyuan_dense(const llama_model & model, cons
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_hunyuan_dense.h
+++ b/src/models/llm_build_hunyuan_dense.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_hunyuan_dense : public llm_graph_context {
														
 
															     llm_build_hunyuan_dense(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_hunyuan_moe.cpp
+++ b/src/models/llm_build_hunyuan_moe.cpp
@@ -161,4 +161,4 @@ llm_build_hunyuan_moe::llm_build_hunyuan_moe(const llama_model & model, const ll
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_hunyuan_moe.h
+++ b/src/models/llm_build_hunyuan_moe.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_hunyuan_moe : public llm_graph_context {
														
 
															     llm_build_hunyuan_moe(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_internlm2.cpp
+++ b/src/models/llm_build_internlm2.cpp
@@ -128,4 +128,4 @@ llm_build_internlm2::llm_build_internlm2(const llama_model & model, const llm_gr
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_internlm2.h
+++ b/src/models/llm_build_internlm2.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_internlm2 : public llm_graph_context {
														
 
															     llm_build_internlm2(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_jais.cpp
+++ b/src/models/llm_build_jais.cpp
@@ -87,4 +87,4 @@ llm_build_jais::llm_build_jais(const llama_model & model, const llm_graph_params
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_jais.h
+++ b/src/models/llm_build_jais.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_jais : public llm_graph_context {
														
 
															     llm_build_jais(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_jamba.cpp
+++ b/src/models/llm_build_jamba.cpp
@@ -112,4 +112,4 @@ llm_build_jamba::llm_build_jamba(const llama_model & model, const llm_graph_para
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_jamba.h
+++ b/src/models/llm_build_jamba.h
@@ -8,4 +8,4 @@
 
															 struct llm_build_jamba : public llm_graph_context_mamba {
														
 
															     llm_build_jamba(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_lfm2.cpp
+++ b/src/models/llm_build_lfm2.cpp
@@ -158,3 +158,4 @@ ggml_tensor * llm_build_lfm2::build_shortconv_block(ggml_tensor * cur, llm_graph
 
															     return y;
														
 
															 }
														
 
															+
														
--- a/src/models/llm_build_lfm2.h
+++ b/src/models/llm_build_lfm2.h
@@ -7,10 +7,10 @@
 
															 struct llm_build_lfm2 : public llm_graph_context {
														
 
															     const llama_model & model;
														
 
															-    
														
 
															+
														
 
															     llm_build_lfm2(const llama_model & model, const llm_graph_params & params);
														
 
															     ggml_tensor * build_feed_forward(ggml_tensor * cur, int il) const;
														
 
															     ggml_tensor * build_attn_block(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn, int il) const;
														
 
															     ggml_tensor * build_shortconv_block(ggml_tensor * cur, llm_graph_input_rs * inp_recr, int il);
														
 
															-    
														
 
															-};
														
 
															+
														
 
															+};
														
--- a/src/models/llm_build_llada.cpp
+++ b/src/models/llm_build_llada.cpp
@@ -102,4 +102,4 @@ llm_build_llada::llm_build_llada(const llama_model & model, const llm_graph_para
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_llada.h
+++ b/src/models/llm_build_llada.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_llada : public llm_graph_context {
														
 
															     llm_build_llada(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_llada_moe.cpp
+++ b/src/models/llm_build_llada_moe.cpp
@@ -127,4 +127,4 @@ llm_build_llada_moe::llm_build_llada_moe(const llama_model & model, const llm_gr
 
															         ggml_build_forward_expand(gf, cur);
														
 
															 }
														
 
															-;
														
 
															+;
														
--- a/src/models/llm_build_llada_moe.h
+++ b/src/models/llm_build_llada_moe.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_llada_moe : public llm_graph_context {
														
 
															     llm_build_llada_moe(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_llama.cpp
+++ b/src/models/llm_build_llama.cpp
@@ -162,4 +162,4 @@ llm_build_llama::llm_build_llama(const llama_model & model, const llm_graph_para
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-    }
														
 
															+    }
														
--- a/src/models/llm_build_llama.h
+++ b/src/models/llm_build_llama.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_llama : public llm_graph_context {
														
 
															     llm_build_llama(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_llama_iswa.h
+++ b/src/models/llm_build_llama_iswa.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_llama_iswa : public llm_graph_context {
														
 
															     llm_build_llama_iswa(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_mamba.cpp
+++ b/src/models/llm_build_mamba.cpp
@@ -58,3 +58,4 @@ llm_build_mamba::llm_build_mamba(const llama_model & model, const llm_graph_para
 
															     ggml_build_forward_expand(gf, cur);
														
 
															 }
														
 
															+
														
--- a/src/models/llm_build_mamba.h
+++ b/src/models/llm_build_mamba.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_mamba : public llm_graph_context_mamba {
														
 
															     llm_build_mamba(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_minicpm3.cpp
+++ b/src/models/llm_build_minicpm3.cpp
@@ -205,4 +205,4 @@ llm_build_minicpm3::llm_build_minicpm3(const llama_model & model, const llm_grap
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_minicpm3.h
+++ b/src/models/llm_build_minicpm3.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_minicpm3 : public llm_graph_context {
														
 
															     llm_build_minicpm3(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_mpt.cpp
+++ b/src/models/llm_build_mpt.cpp
@@ -136,4 +136,4 @@ llm_build_mpt::llm_build_mpt(const llama_model & model, const llm_graph_params &
 
															         ggml_build_forward_expand(gf, cur);
														
 
															 }
														
 
															-;
														
 
															+;
														
--- a/src/models/llm_build_mpt.h
+++ b/src/models/llm_build_mpt.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_mpt : public llm_graph_context {
														
 
															     llm_build_mpt(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_nemotron.cpp
+++ b/src/models/llm_build_nemotron.cpp
@@ -129,4 +129,4 @@ llm_build_nemotron::llm_build_nemotron(const llama_model & model, const llm_grap
 
															         res->t_logits = cur;
														
 
															         ggml_build_forward_expand(gf, cur);
														
 
															-}
														
 
															+}
														
--- a/src/models/llm_build_nemotron.h
+++ b/src/models/llm_build_nemotron.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_nemotron : public llm_graph_context {
														
 
															     llm_build_nemotron(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_nemotron_h.h
+++ b/src/models/llm_build_nemotron_h.h
@@ -10,4 +10,4 @@ struct llm_build_nemotron_h : public llm_graph_context_mamba {
 
															     ggml_tensor * build_ffn_layer(ggml_tensor * cur, const llama_model & model, const int il);
														
 
															     ggml_tensor * build_attention_layer(ggml_tensor * cur, llm_graph_input_attn_kv * inp_attn,
														
 
															         const llama_model & model, const int64_t n_embd_head, const int il);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_neo_bert.cpp
+++ b/src/models/llm_build_neo_bert.cpp
@@ -109,4 +109,4 @@ llm_build_neo_bert::llm_build_neo_bert(const llama_model & model, const llm_grap
 
															         ggml_build_forward_expand(gf, cur);
														
 
															 }
														
 
															-;
														
 
															+;
														
--- a/src/models/llm_build_neo_bert.h
+++ b/src/models/llm_build_neo_bert.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_neo_bert : public llm_graph_context {
														
 
															     llm_build_neo_bert(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_olmo.cpp
+++ b/src/models/llm_build_olmo.cpp
@@ -129,4 +129,4 @@ llm_build_olmo::llm_build_olmo(const llama_model & model, const llm_graph_params
 
															         ggml_build_forward_expand(gf, cur);
														
 
															 }
														
 
															-;
														
 
															+;
														
--- a/src/models/llm_build_olmo.h
+++ b/src/models/llm_build_olmo.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_olmo : public llm_graph_context {
														
 
															     llm_build_olmo(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_olmo2.h
+++ b/src/models/llm_build_olmo2.h
@@ -8,4 +8,4 @@
 
															 template <bool iswa>
														
 
															 struct llm_build_olmo2 : public llm_graph_context {
														
 
															     llm_build_olmo2(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_olmoe.cpp
+++ b/src/models/llm_build_olmoe.cpp
@@ -129,4 +129,4 @@ llm_build_olmoe::llm_build_olmoe(const llama_model & model, const llm_graph_para
 
															         ggml_build_forward_expand(gf, cur);
														
 
															 }
														
 
															-;
														
 
															+;
														
--- a/src/models/llm_build_olmoe.h
+++ b/src/models/llm_build_olmoe.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_olmoe : public llm_graph_context {
														
 
															     llm_build_olmoe(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_openai_moe_iswa.cpp
+++ b/src/models/llm_build_openai_moe_iswa.cpp
@@ -131,4 +131,4 @@ llm_build_openai_moe_iswa::llm_build_openai_moe_iswa(const llama_model & model,
 
															         ggml_build_forward_expand(gf, cur);
														
 
															 }
														
 
															-;
														
 
															+;
														
--- a/src/models/llm_build_openai_moe_iswa.h
+++ b/src/models/llm_build_openai_moe_iswa.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_openai_moe_iswa : public llm_graph_context {
														
 
															     llm_build_openai_moe_iswa(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_openelm.cpp
+++ b/src/models/llm_build_openelm.cpp
@@ -130,4 +130,4 @@ llm_build_openelm::llm_build_openelm(const llama_model & model, const llm_graph_
 
															         ggml_build_forward_expand(gf, cur);
														
 
															 }
														
 
															-;
														
 
															+;
														
--- a/src/models/llm_build_openelm.h
+++ b/src/models/llm_build_openelm.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_openelm : public llm_graph_context {
														
 
															     llm_build_openelm(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};
														
--- a/src/models/llm_build_orion.cpp
+++ b/src/models/llm_build_orion.cpp
@@ -128,4 +128,4 @@ llm_build_orion::llm_build_orion(const llama_model & model, const llm_graph_para
 
															         ggml_build_forward_expand(gf, cur);
														
 
															 }
														
 
															-;
														
 
															+;
														
--- a/src/models/llm_build_orion.h
+++ b/src/models/llm_build_orion.h
@@ -7,4 +7,4 @@
 
															 struct llm_build_orion : public llm_graph_context {
														
 
															     llm_build_orion(const llama_model & model, const llm_graph_params & params);
														
 
															-};
														
 
															+};