Parcourir la source

train : add general name (#6752)

* llama : make general.name optional

* train: Add 'general.name' to model metadata

Signed-off-by: teleprint-me <77757836+teleprint-me@users.noreply.github.com>

---------

Signed-off-by: teleprint-me <77757836+teleprint-me@users.noreply.github.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Austin il y a 1 an
Parent
commit
8b1b1f4982
1 fichiers modifiés avec 3 ajouts et 0 suppressions
  1. 3 0
      examples/train-text-from-scratch/train-text-from-scratch.cpp

+ 3 - 0
examples/train-text-from-scratch/train-text-from-scratch.cpp

@@ -73,6 +73,7 @@ struct my_llama_model {
 static const char * LLM_KV_TRAINING_TYPE_TRAIN_MODEL     = "train_model";
 static const char * LLM_KV_TRAINING_TYPE                 = "training.type";
 
+static const char * LLM_KV_GENERAL_NAME                = "general.name";
 static const char * LLM_KV_GENERAL_ARCHITECTURE        = "general.architecture";
 static const char * LLM_KV_GENERAL_FILE_TYPE           = "general.file_type";
 
@@ -529,6 +530,7 @@ static void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_contex
 
 static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vocab_model, struct my_llama_model * model) {
     const char * arch = "llama";
+
     enum llama_ftype ftype = LLAMA_FTYPE_ALL_F32;
 
     std::vector<char> keybuf;
@@ -540,6 +542,7 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo
 
     // set arch
     gguf_set_val_str(fctx, LLM_KV_GENERAL_ARCHITECTURE, arch);
+    gguf_set_val_str(fctx, LLM_KV_GENERAL_NAME, arch);
     gguf_set_val_u32(fctx, LLM_KV_GENERAL_FILE_TYPE, ftype);
 
     // set hparams