před 2 roky · 57ad015dc3
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -122,6 +122,8 @@ node index.js
 
				 
			
 
				     `top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95).
			
 
				 
			
 
				+    `min_p`: The minimum probability for a token to be considered, relative to the probability of the most likely token (default: 0.05).
			
 
				+
			
 
				     `n_predict`: Set the maximum number of tokens to predict when generating text. **Note:** May exceed the set limit slightly if the last token is a partial multibyte character. When 0, no tokens will be generated but the prompt is evaluated into the cache. (default: -1, -1 = infinity).
			
 
				 
			
 
				     `n_keep`: Specify the number of tokens from the prompt to retain when the context size is exceeded and tokens need to be discarded.
			
--- a/examples/server/index.html.hpp
+++ b/examples/server/index.html.hpp
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@@ -219,6 +219,7 @@
 
				       repeat_penalty: 1.18, // 1.0 = disabled
			
 
				       top_k: 40, // <= 0 to use vocab size
			
 
				       top_p: 0.5, // 1.0 = disabled
			
 
				+      min_p: 0.05, // 0 = disabled
			
 
				       tfs_z: 1.0, // 1.0 = disabled
			
 
				       typical_p: 1.0, // 1.0 = disabled
			
 
				       presence_penalty: 0.0, // 0.0 = disabled
			
@@ -744,6 +745,7 @@
 
				             ${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
			
 
				             ${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
			
 
				             ${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
			
 
				+            ${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
			
 
				           </fieldset>
			
 
				           <details>
			
 
				             <summary>More options</summary>
			
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -679,6 +679,7 @@ struct llama_server_context
 
				         slot->params.n_predict        = json_value(data, "n_predict",         default_params.n_predict);
			
 
				         slot->sparams.top_k           = json_value(data, "top_k",             default_sparams.top_k);
			
 
				         slot->sparams.top_p           = json_value(data, "top_p",             default_sparams.top_p);
			
 
				+        slot->sparams.min_p           = json_value(data, "min_p",             default_sparams.min_p);
			
 
				         slot->sparams.tfs_z           = json_value(data, "tfs_z",             default_sparams.tfs_z);
			
 
				         slot->sparams.typical_p       = json_value(data, "typical_p",         default_sparams.typical_p);
			
 
				         slot->sparams.temp            = json_value(data, "temperature",       default_sparams.temp);
			
@@ -1113,6 +1114,7 @@ struct llama_server_context
 
				             {"temp",              slot.sparams.temp},
			
 
				             {"top_k",             slot.sparams.top_k},
			
 
				             {"top_p",             slot.sparams.top_p},
			
 
				+            {"min_p",             slot.sparams.min_p},
			
 
				             {"tfs_z",             slot.sparams.tfs_z},
			
 
				             {"typical_p",         slot.sparams.typical_p},
			
 
				             {"repeat_last_n",     slot.sparams.penalty_last_n},