Jelajahi Sumber

server : add min_p param (#3877)

* Update server.cpp with min_p after it was introduced in https://github.com/ggerganov/llama.cpp/pull/3841

* Use spaces instead of tabs

* Update index.html.hpp after running deps.sh

* Fix test - fix line ending
Mihai 2 tahun lalu
induk
melakukan
57ad015dc3

+ 2 - 0
examples/server/README.md

@@ -122,6 +122,8 @@ node index.js
 
     `top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95).
 
+    `min_p`: The minimum probability for a token to be considered, relative to the probability of the most likely token (default: 0.05).
+
     `n_predict`: Set the maximum number of tokens to predict when generating text. **Note:** May exceed the set limit slightly if the last token is a partial multibyte character. When 0, no tokens will be generated but the prompt is evaluated into the cache. (default: -1, -1 = infinity).
 
     `n_keep`: Specify the number of tokens from the prompt to retain when the context size is exceeded and tokens need to be discarded.

File diff ditekan karena terlalu besar
+ 1097 - 1125
examples/server/index.html.hpp


+ 2 - 0
examples/server/public/index.html

@@ -219,6 +219,7 @@
       repeat_penalty: 1.18, // 1.0 = disabled
       top_k: 40, // <= 0 to use vocab size
       top_p: 0.5, // 1.0 = disabled
+      min_p: 0.05, // 0 = disabled
       tfs_z: 1.0, // 1.0 = disabled
       typical_p: 1.0, // 1.0 = disabled
       presence_penalty: 0.0, // 0.0 = disabled
@@ -744,6 +745,7 @@
             ${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
             ${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
             ${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
+            ${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
           </fieldset>
           <details>
             <summary>More options</summary>

+ 2 - 0
examples/server/server.cpp

@@ -679,6 +679,7 @@ struct llama_server_context
         slot->params.n_predict        = json_value(data, "n_predict",         default_params.n_predict);
         slot->sparams.top_k           = json_value(data, "top_k",             default_sparams.top_k);
         slot->sparams.top_p           = json_value(data, "top_p",             default_sparams.top_p);
+        slot->sparams.min_p           = json_value(data, "min_p",             default_sparams.min_p);
         slot->sparams.tfs_z           = json_value(data, "tfs_z",             default_sparams.tfs_z);
         slot->sparams.typical_p       = json_value(data, "typical_p",         default_sparams.typical_p);
         slot->sparams.temp            = json_value(data, "temperature",       default_sparams.temp);
@@ -1113,6 +1114,7 @@ struct llama_server_context
             {"temp",              slot.sparams.temp},
             {"top_k",             slot.sparams.top_k},
             {"top_p",             slot.sparams.top_p},
+            {"min_p",             slot.sparams.min_p},
             {"tfs_z",             slot.sparams.tfs_z},
             {"typical_p",         slot.sparams.typical_p},
             {"repeat_last_n",     slot.sparams.penalty_last_n},

Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini