1 month ago · 7f2b2f3c77
--- a/docs/development/HOWTO-add-model.md
+++ b/docs/development/HOWTO-add-model.md
@@ -97,7 +97,7 @@ The model params and tensors layout must be defined in `llama.cpp` source files:
 
				 1. Define a new `llm_arch` enum value in `src/llama-arch.h`.
			
 
				 2. In `src/llama-arch.cpp`:
			
 
				     - Add the architecture name to the `LLM_ARCH_NAMES` map.
			
 
				-    - Add the tensor mappings to the `LLM_TENSOR_NAMES` map.
			
 
				+    - Add the list of model tensors to `llm_get_tensor_names` (you may also need to update `LLM_TENSOR_NAMES`)
			
 
				 3. Add any non-standard metadata loading in the `llama_model_loader` constructor in `src/llama-model-loader.cpp`.
			
 
				 4. If the model has a RoPE operation, add a case for the architecture in `llama_model_rope_type` function in `src/llama-model.cpp`.
			
 
				 
			
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
--- a/src/llama-arch.h
+++ b/src/llama-arch.h
@@ -3,6 +3,7 @@
 
				 #include "ggml.h" // ggml_op
			
 
				 
			
 
				 #include <string>
			
 
				+#include <set>
			
 
				 
			
 
				 //
			
 
				 // gguf constants (sync with gguf.py)
			
@@ -316,6 +317,7 @@ enum llm_tensor {
 
				     LLM_TENSOR_DENSE_3_OUT,
			
 
				     LLM_TENSOR_OUTPUT,
			
 
				     LLM_TENSOR_OUTPUT_NORM,
			
 
				+    LLM_TENSOR_OUTPUT_NORM_LFM2, // fix for wrong tensor name
			
 
				     LLM_TENSOR_ROPE_FREQS,
			
 
				     LLM_TENSOR_ROPE_FACTORS_LONG,
			
 
				     LLM_TENSOR_ROPE_FACTORS_SHORT,
			
@@ -526,6 +528,10 @@ struct LLM_TN_IMPL {
 
				     const int bid;
			
 
				     const int xid;
			
 
				 
			
 
				+    const std::set<llm_tensor> model_tensors;
			
 
				+
			
 
				+    LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid);
			
 
				+
			
 
				     std::string str() const;
			
 
				 
			
 
				     operator std::string() const {
			
@@ -547,11 +553,11 @@ struct LLM_TN {
 
				     llm_arch arch;
			
 
				 
			
 
				     LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
			
 
				-        return { arch, tensor, suffix, bid, xid };
			
 
				+        return LLM_TN_IMPL(arch, tensor, suffix, bid, xid);
			
 
				     }
			
 
				 
			
 
				     LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
			
 
				-        return { arch, tensor, nullptr, bid, xid };
			
 
				+        return LLM_TN_IMPL(arch, tensor, nullptr, bid, xid);
			
 
				     }
			
 
				 };