Просмотр исходного кода

common : add --system-prompt parameter, replace behavior of -p in conversation mode (#12131)

* Add --system-prompt parameter

* use user defined system prompt

* clarify

Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>

* add warning

* clarify

Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>

---------

Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
Sigbjørn Skjæret 10 месяцев назад
Родитель
Сommit
45a8e76745
3 измененных файлов с 14 добавлено и 4 удалено
  1. 8 3
      common/arg.cpp
  2. 1 0
      common/common.h
  3. 5 1
      examples/main/main.cpp

+ 8 - 3
common/arg.cpp

@@ -813,13 +813,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_env("LLAMA_ARG_FLASH_ATTN"));
     ).set_env("LLAMA_ARG_FLASH_ATTN"));
     add_opt(common_arg(
     add_opt(common_arg(
         {"-p", "--prompt"}, "PROMPT",
         {"-p", "--prompt"}, "PROMPT",
-        ex == LLAMA_EXAMPLE_MAIN
-            ? "prompt to start generation with\nif -cnv is set, this will be used as system prompt"
-            : "prompt to start generation with",
+        "prompt to start generation with; for system message, use -sys",
         [](common_params & params, const std::string & value) {
         [](common_params & params, const std::string & value) {
             params.prompt = value;
             params.prompt = value;
         }
         }
     ).set_excludes({LLAMA_EXAMPLE_SERVER}));
     ).set_excludes({LLAMA_EXAMPLE_SERVER}));
+    add_opt(common_arg(
+        {"-sys", "--system-prompt"}, "PROMPT",
+        "system prompt to use with model (if applicable, depending on chat template)",
+        [](common_params & params, const std::string & value) {
+            params.system_prompt = value;
+        }
+    ).set_examples({LLAMA_EXAMPLE_MAIN}));
     add_opt(common_arg(
     add_opt(common_arg(
         {"--no-perf"},
         {"--no-perf"},
         string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),
         string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),

+ 1 - 0
common/common.h

@@ -261,6 +261,7 @@ struct common_params {
     std::string hf_repo              = ""; // HF repo                                                       // NOLINT
     std::string hf_repo              = ""; // HF repo                                                       // NOLINT
     std::string hf_file              = ""; // HF file                                                       // NOLINT
     std::string hf_file              = ""; // HF file                                                       // NOLINT
     std::string prompt               = "";                                                                  // NOLINT
     std::string prompt               = "";                                                                  // NOLINT
+    std::string system_prompt        = "";                                                                  // NOLINT
     std::string prompt_file          = ""; // store the external prompt file name                           // NOLINT
     std::string prompt_file          = ""; // store the external prompt file name                           // NOLINT
     std::string path_prompt_cache    = ""; // path to file for saving/loading prompt eval state             // NOLINT
     std::string path_prompt_cache    = ""; // path to file for saving/loading prompt eval state             // NOLINT
     std::string input_prefix         = ""; // string to prefix user inputs with                             // NOLINT
     std::string input_prefix         = ""; // string to prefix user inputs with                             // NOLINT

+ 5 - 1
examples/main/main.cpp

@@ -219,6 +219,10 @@ int main(int argc, char ** argv) {
     // print chat template example in conversation mode
     // print chat template example in conversation mode
     if (params.conversation_mode) {
     if (params.conversation_mode) {
         if (params.enable_chat_template) {
         if (params.enable_chat_template) {
+            if (!params.prompt.empty()) {
+                LOG_WRN("*** User-specified prompt in conversation mode will be ignored, did you mean to set --system-prompt (-sys) instead?\n");
+            }
+
             LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(chat_templates.get(), params.use_jinja).c_str());
             LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(chat_templates.get(), params.use_jinja).c_str());
         } else {
         } else {
             LOG_INF("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__);
             LOG_INF("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__);
@@ -276,7 +280,7 @@ int main(int argc, char ** argv) {
     {
     {
         auto prompt = (params.conversation_mode && params.enable_chat_template)
         auto prompt = (params.conversation_mode && params.enable_chat_template)
             // format the system prompt in conversation mode (fallback to default if empty)
             // format the system prompt in conversation mode (fallback to default if empty)
-            ? chat_add_and_format("system", params.prompt.empty() ? DEFAULT_SYSTEM_MESSAGE : params.prompt)
+            ? chat_add_and_format("system", params.system_prompt.empty() ? DEFAULT_SYSTEM_MESSAGE : params.system_prompt)
             // otherwise use the prompt as is
             // otherwise use the prompt as is
             : params.prompt;
             : params.prompt;
         if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
         if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {