Procházet zdrojové kódy

server : add min_p param (#3877)

* Update server.cpp with min_p after it was introduced in https://github.com/ggerganov/llama.cpp/pull/3841

* Use spaces instead of tabs

* Update index.html.hpp after running deps.sh

* Fix test - fix line ending
Mihai před 2 roky
rodič
revize
57ad015dc3

+ 2 - 0
examples/server/README.md

@@ -122,6 +122,8 @@ node index.js
 
     `top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95).
 
+    `min_p`: The minimum probability for a token to be considered, relative to the probability of the most likely token (default: 0.05).
+
     `n_predict`: Set the maximum number of tokens to predict when generating text. **Note:** May exceed the set limit slightly if the last token is a partial multibyte character. When 0, no tokens will be generated but the prompt is evaluated into the cache. (default: -1, -1 = infinity).
 
     `n_keep`: Specify the number of tokens from the prompt to retain when the context size is exceeded and tokens need to be discarded.

Rozdílová data souboru nebyla zobrazena, protože soubor je příliš velký
+ 1097 - 1125
examples/server/index.html.hpp


+ 2 - 0
examples/server/public/index.html

@@ -219,6 +219,7 @@
       repeat_penalty: 1.18, // 1.0 = disabled
       top_k: 40, // <= 0 to use vocab size
       top_p: 0.5, // 1.0 = disabled
+      min_p: 0.05, // 0 = disabled
       tfs_z: 1.0, // 1.0 = disabled
       typical_p: 1.0, // 1.0 = disabled
       presence_penalty: 0.0, // 0.0 = disabled
@@ -744,6 +745,7 @@
             ${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
             ${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
             ${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
+            ${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
           </fieldset>
           <details>
             <summary>More options</summary>

+ 2 - 0
examples/server/server.cpp

@@ -679,6 +679,7 @@ struct llama_server_context
         slot->params.n_predict        = json_value(data, "n_predict",         default_params.n_predict);
         slot->sparams.top_k           = json_value(data, "top_k",             default_sparams.top_k);
         slot->sparams.top_p           = json_value(data, "top_p",             default_sparams.top_p);
+        slot->sparams.min_p           = json_value(data, "min_p",             default_sparams.min_p);
         slot->sparams.tfs_z           = json_value(data, "tfs_z",             default_sparams.tfs_z);
         slot->sparams.typical_p       = json_value(data, "typical_p",         default_sparams.typical_p);
         slot->sparams.temp            = json_value(data, "temperature",       default_sparams.temp);
@@ -1113,6 +1114,7 @@ struct llama_server_context
             {"temp",              slot.sparams.temp},
             {"top_k",             slot.sparams.top_k},
             {"top_p",             slot.sparams.top_p},
+            {"min_p",             slot.sparams.min_p},
             {"tfs_z",             slot.sparams.tfs_z},
             {"typical_p",         slot.sparams.typical_p},
             {"repeat_last_n",     slot.sparams.penalty_last_n},

Některé soubory nejsou zobrazeny, neboť je v těchto rozdílových datech změněno mnoho souborů