|
|
@@ -1182,7 +1182,7 @@ struct server_context {
|
|
|
|
|
|
bool process_token(completion_token_output & result, server_slot & slot) {
|
|
|
// remember which tokens were sampled - used for repetition penalties during sampling
|
|
|
- const std::string token_str = llama_token_to_piece(ctx, result.tok, false);
|
|
|
+ const std::string token_str = llama_token_to_piece(ctx, result.tok, params.special);
|
|
|
slot.sampled = result.tok;
|
|
|
|
|
|
// search stop word and delete it
|