2 лет назад · 722d33f34e
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -617,6 +617,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
 
				             params.numa = true;
			
 
				         } else if (arg == "--verbose-prompt") {
			
 
				             params.verbose_prompt = true;
			
 
				+        } else if (arg == "--no-display-prompt") {
			
 
				+            params.display_prompt = false;
			
 
				         } else if (arg == "-r" || arg == "--reverse-prompt") {
			
 
				             if (++i >= argc) {
			
 
				                 invalid_param = true;
			
@@ -936,11 +938,12 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
 
				     printf("  -mg i, --main-gpu i   the GPU to use for the model (with split-mode = none),\n");
			
 
				     printf("                        or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu);
			
 
				 #endif
			
 
				+    printf("  --verbose-prompt      print a verbose prompt before generation (default: %s)\n", params.verbose_prompt ? "true" : "false");
			
 
				+    printf("  --no-display-prompt   don't print prompt at generation (default: %s)\n", !params.display_prompt ? "true" : "false");
			
 
				     printf("  -gan N, --grp-attn-n N\n");
			
 
				     printf("                        group-attention factor (default: %d)\n", params.grp_attn_n);
			
 
				     printf("  -gaw N, --grp-attn-w N\n");
			
 
				     printf("                        group-attention width (default: %.1f)\n", (double)params.grp_attn_w);
			
 
				-    printf("  --verbose-prompt      print prompt before generation\n");
			
 
				     printf("  -dkvc, --dump-kv-cache\n");
			
 
				     printf("                        verbose print of the KV cache\n");
			
 
				     printf("  -nkvo, --no-kv-offload\n");
			
@@ -1582,6 +1585,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
 
				     fprintf(stream, "min_p: %f # default: 0.0\n", sparams.min_p);
			
 
				     fprintf(stream, "typical_p: %f # default: 1.0\n", sparams.typical_p);
			
 
				     fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false");
			
 
				+    fprintf(stream, "display_prompt: %s # default: true\n", params.display_prompt ? "true" : "false");
			
 
				 }
			
 
				 
			
 
				 //
			
--- a/common/common.h
+++ b/common/common.h
@@ -126,6 +126,7 @@ struct gpt_params {
 
				     bool use_mlock         = false; // use mlock to keep model in memory
			
 
				     bool numa              = false; // attempt optimizations that help on some NUMA systems
			
 
				     bool verbose_prompt    = false; // print prompt tokens before generation
			
 
				+    bool display_prompt    = true;  // print prompt before generation
			
 
				     bool infill            = false; // use infill mode
			
 
				     bool dump_kv_cache     = false; // dump the KV cache contents for debugging purposes
			
 
				     bool no_kv_offload     = false; // disable KV offloading
			
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -477,6 +477,7 @@ int main(int argc, char ** argv) {
 
				 
			
 
				     bool is_antiprompt        = false;
			
 
				     bool input_echo           = true;
			
 
				+    bool display              = true;
			
 
				     bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < embd_inp.size();
			
 
				 
			
 
				     int n_past             = 0;
			
@@ -491,6 +492,7 @@ int main(int argc, char ** argv) {
 
				 
			
 
				     // the first thing we will do is to output the prompt, so set color accordingly
			
 
				     console::set_display(console::prompt);
			
 
				+    display = params.display_prompt;
			
 
				 
			
 
				     std::vector<llama_token> embd;
			
 
				     std::vector<llama_token> embd_guidance;
			
@@ -707,7 +709,7 @@ int main(int argc, char ** argv) {
 
				         }
			
 
				 
			
 
				         // display text
			
 
				-        if (input_echo) {
			
 
				+        if (input_echo && display) {
			
 
				             for (auto id : embd) {
			
 
				                 const std::string token_str = llama_token_to_piece(ctx, id);
			
 
				                 printf("%s", token_str.c_str());
			
@@ -724,6 +726,7 @@ int main(int argc, char ** argv) {
 
				         // reset color to default if there is no pending user input
			
 
				         if (input_echo && (int) embd_inp.size() == n_consumed) {
			
 
				             console::set_display(console::reset);
			
 
				+            display = true;
			
 
				         }
			
 
				 
			
 
				         // if not currently processing queued inputs;
			
@@ -796,6 +799,7 @@ int main(int argc, char ** argv) {
 
				 
			
 
				                 // color user input only
			
 
				                 console::set_display(console::user_input);
			
 
				+                display = params.display_prompt;
			
 
				 
			
 
				                 std::string line;
			
 
				                 bool another_line = true;
			
@@ -806,6 +810,7 @@ int main(int argc, char ** argv) {
 
				 
			
 
				                 // done taking input, reset color
			
 
				                 console::set_display(console::reset);
			
 
				+                display = true;
			
 
				 
			
 
				                 // Add tokens to embd only if the input buffer is non-empty
			
 
				                 // Entering a empty line lets the user pass control back