1 месяц назад · 6783b11fb0
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
@@ -2237,7 +2237,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
 
				             { LLM_TENSOR_SHORTCONV_INPROJ,  "blk.%d.shortconv.in_proj" },
			
 
				             { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
			
 
				             { LLM_TENSOR_TOKEN_EMBD,        "token_embd" },
			
 
				-            { LLM_TENSOR_TOKEN_EMBD_NORM,   "token_embd_norm" },
			
 
				+            { LLM_TENSOR_OUTPUT_NORM,       "token_embd_norm" }, // note: wrong tensor name
			
 
				             { LLM_TENSOR_OUTPUT,            "output" },
			
 
				         }
			
 
				     },
			
@@ -2259,7 +2259,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
 
				             { LLM_TENSOR_SHORTCONV_INPROJ,  "blk.%d.shortconv.in_proj" },
			
 
				             { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
			
 
				             { LLM_TENSOR_TOKEN_EMBD,        "token_embd" },
			
 
				-            { LLM_TENSOR_TOKEN_EMBD_NORM,   "token_embd_norm" },
			
 
				+            { LLM_TENSOR_OUTPUT_NORM,       "token_embd_norm" }, // note: wrong tensor name
			
 
				             { LLM_TENSOR_FFN_GATE_INP,      "blk.%d.ffn_gate_inp" },
			
 
				             { LLM_TENSOR_FFN_GATE_EXPS,     "blk.%d.ffn_gate_exps" },
			
 
				             { LLM_TENSOR_FFN_DOWN_EXPS,     "blk.%d.ffn_down_exps" },
			
@@ -2490,8 +2490,8 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
 
				 static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
			
 
				     {LLM_TENSOR_TOKEN_EMBD,                 {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
			
 
				     {LLM_TENSOR_POS_EMBD,                   {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
			
 
				-    {LLM_TENSOR_TOKEN_EMBD_NORM,            {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
			
 
				     {LLM_TENSOR_TOKEN_TYPES,                {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
			
 
				+    {LLM_TENSOR_TOKEN_EMBD_NORM,            {LLM_TENSOR_LAYER_INPUT, GGML_OP_MUL}},
			
 
				     {LLM_TENSOR_OUTPUT,                     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
			
 
				     {LLM_TENSOR_CLS,                        {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
			
 
				     {LLM_TENSOR_CLS_OUT,                    {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
			
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -6133,9 +6133,10 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
 
				             case LLM_ARCH_LFM2:
			
 
				             case LLM_ARCH_LFM2MOE:
			
 
				                 {
			
 
				-                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD,      "weight"), {n_embd, n_vocab}, 0);
			
 
				-                    tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
			
 
				-                    output   = create_tensor(tn(LLM_TENSOR_OUTPUT,          "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
			
 
				+                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
			
 
				+
			
 
				+                    output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
			
 
				+                    output      = create_tensor(tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
			
 
				 
			
 
				                     if (output == NULL) {
			
 
				                         output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
			
--- a/src/models/lfm2.cpp
+++ b/src/models/lfm2.cpp
@@ -9,6 +9,8 @@ llm_build_lfm2::llm_build_lfm2(const llama_model & model, const llm_graph_params
 
				     ggml_tensor * cur = build_inp_embd(model.tok_embd);
			
 
				     cb(cur, "model.embed_tokens", -1);
			
 
				 
			
 
				+    ggml_build_forward_expand(gf, cur);
			
 
				+
			
 
				     ggml_tensor * inp_pos     = build_inp_pos();
			
 
				     auto *        inp_hybrid  = build_inp_mem_hybrid();
			
 
				     ggml_tensor * inp_out_ids = build_inp_out_ids();
			
@@ -40,12 +42,12 @@ llm_build_lfm2::llm_build_lfm2(const llama_model & model, const llm_graph_params
 
				         cur = ggml_add(ctx0, cur, ffn_out);
			
 
				     }
			
 
				 
			
 
				-    cur = build_norm(cur, model.tok_norm, NULL, LLM_NORM_RMS, -1);
			
 
				-    cb(cur, "model.embedding_norm", -1);
			
 
				+    cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);
			
 
				+    cb(cur, "result_norm", -1);
			
 
				     res->t_embd = cur;
			
 
				 
			
 
				     cur = build_lora_mm(model.output, cur);
			
 
				-    cb(cur, "lm_head", -1);
			
 
				+    cb(cur, "result_output", -1);
			
 
				 
			
 
				     res->t_logits = cur;