1 год назад · 37246b1031
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2328,12 +2328,12 @@ std::vector<llama_token> llama_tokenize(
 
															     return result;
														
 
															 }
														
 
															-std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
														
 
															+std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
														
 
															     std::vector<char> result(8, 0);
														
 
															-    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
														
 
															+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
														
 
															     if (n_tokens < 0) {
														
 
															         result.resize(-n_tokens);
														
 
															-        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
														
 
															+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
														
 
															         GGML_ASSERT(check == -n_tokens);
														
 
															     } else {
														
 
															         result.resize(n_tokens);
														
--- a/common/common.h
+++ b/common/common.h
@@ -237,11 +237,12 @@ std::vector<llama_token> llama_tokenize(
 
															                         bool   add_special,
														
 
															                         bool   parse_special = false);
														
 
															-// tokenizes a token into a piece
														
 
															+// tokenizes a token into a piece, optionally renders special/control tokens
														
 
															 // should work similar to Python's `tokenizer.id_to_piece`
														
 
															 std::string llama_token_to_piece(
														
 
															         const struct llama_context * ctx,
														
 
															-                       llama_token   token);
														
 
															+                       llama_token   token,
														
 
															+                       bool          special = true);
														
 
															 // TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
														
 
															 //       that takes into account the tokenizer type and decides how to handle the leading space
														
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1117,7 +1117,7 @@ struct server_context {
 
															     bool process_token(completion_token_output & result, server_slot & slot) {
														
 
															         // remember which tokens were sampled - used for repetition penalties during sampling
														
 
															-        const std::string token_str = llama_token_to_piece(ctx, result.tok);
														
 
															+        const std::string token_str = llama_token_to_piece(ctx, result.tok, false);
														
 
															         slot.sampled = result.tok;
														
 
															         // search stop word and delete it