Переглянути джерело

arch: refactor LLM_TENSOR_NAMES (#18051)

* arch: refactor LLM_TENSOR_NAMES

* update docs

* typo

* fix LLM_ARCH_NEMOTRON_H_MOE

* show more meaningful error message on missing tensor

* fix and tested LLM_ARCH_NEMOTRON_H_MOE
Xuan-Son Nguyen 1 місяць тому
батько
коміт
7f2b2f3c77
3 змінених файлів з 1886 додано та 2281 видалено
  1. 1 1
      docs/development/HOWTO-add-model.md
  2. 1877 2278
      src/llama-arch.cpp
  3. 8 2
      src/llama-arch.h

+ 1 - 1
docs/development/HOWTO-add-model.md

@@ -97,7 +97,7 @@ The model params and tensors layout must be defined in `llama.cpp` source files:
 1. Define a new `llm_arch` enum value in `src/llama-arch.h`.
 2. In `src/llama-arch.cpp`:
     - Add the architecture name to the `LLM_ARCH_NAMES` map.
-    - Add the tensor mappings to the `LLM_TENSOR_NAMES` map.
+    - Add the list of model tensors to `llm_get_tensor_names` (you may also need to update `LLM_TENSOR_NAMES`)
 3. Add any non-standard metadata loading in the `llama_model_loader` constructor in `src/llama-model-loader.cpp`.
 4. If the model has a RoPE operation, add a case for the architecture in `llama_model_rope_type` function in `src/llama-model.cpp`.
 

Різницю між файлами не показано, бо вона завелика
+ 1877 - 2278
src/llama-arch.cpp


+ 8 - 2
src/llama-arch.h

@@ -3,6 +3,7 @@
 #include "ggml.h" // ggml_op
 
 #include <string>
+#include <set>
 
 //
 // gguf constants (sync with gguf.py)
@@ -316,6 +317,7 @@ enum llm_tensor {
     LLM_TENSOR_DENSE_3_OUT,
     LLM_TENSOR_OUTPUT,
     LLM_TENSOR_OUTPUT_NORM,
+    LLM_TENSOR_OUTPUT_NORM_LFM2, // fix for wrong tensor name
     LLM_TENSOR_ROPE_FREQS,
     LLM_TENSOR_ROPE_FACTORS_LONG,
     LLM_TENSOR_ROPE_FACTORS_SHORT,
@@ -526,6 +528,10 @@ struct LLM_TN_IMPL {
     const int bid;
     const int xid;
 
+    const std::set<llm_tensor> model_tensors;
+
+    LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid);
+
     std::string str() const;
 
     operator std::string() const {
@@ -547,11 +553,11 @@ struct LLM_TN {
     llm_arch arch;
 
     LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
-        return { arch, tensor, suffix, bid, xid };
+        return LLM_TN_IMPL(arch, tensor, suffix, bid, xid);
     }
 
     LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
-        return { arch, tensor, nullptr, bid, xid };
+        return LLM_TN_IMPL(arch, tensor, nullptr, bid, xid);
     }
 };
 

Деякі файли не було показано, через те що забагато файлів було змінено