há 1 ano atrás · 911b3900dd
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -331,7 +331,7 @@ Notice that each `probs` is an array of length `n_probs`.
 
				 
			
 
				     `content`: Set the text to tokenize.
			
 
				 
			
 
				-    Note that a special `BOS` token is never inserted.
			
 
				+    `add_special`: Boolean indicating if special tokens, i.e. `BOS`, should be inserted.  Default: `false`
			
 
				 
			
 
				 - **POST** `/detokenize`: Convert tokens to text.
			
 
				 
			
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -3647,7 +3647,8 @@ int main(int argc, char ** argv) {
 
				 
			
 
				         std::vector<llama_token> tokens;
			
 
				         if (body.count("content") != 0) {
			
 
				-            tokens = ctx_server.tokenize(body["content"], false);
			
 
				+            const bool add_special = json_value(body, "add_special", false);
			
 
				+            tokens = ctx_server.tokenize(body["content"], add_special);
			
 
				         }
			
 
				         const json data = format_tokenizer_response(tokens);
			
 
				         return res.set_content(data.dump(), "application/json; charset=utf-8");
			
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -7,6 +7,7 @@ Feature: llama.cpp server
 
				     And   a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
			
 
				     And   a model file test-model.gguf
			
 
				     And   a model alias tinyllama-2
			
 
				+    And   BOS token is 1
			
 
				     And   42 as server seed
			
 
				       # KV Cache corresponds to the total amount of tokens
			
 
				       # that can be stored across all independent sequences: #4130
			
@@ -91,7 +92,18 @@ Feature: llama.cpp server
 
				     """
			
 
				     What is the capital of France ?
			
 
				     """
			
 
				-    Then tokens can be detokenize
			
 
				+    Then tokens can be detokenized
			
 
				+    And  tokens do not begin with BOS
			
 
				+
			
 
				+  Scenario: Tokenize w/ BOS
			
 
				+    Given adding special tokens
			
 
				+    When  tokenizing:
			
 
				+    """
			
 
				+    What is the capital of Germany?
			
 
				+    """
			
 
				+    Then  tokens begin with BOS
			
 
				+    Given first token is removed
			
 
				+    Then  tokens can be detokenized
			
 
				 
			
 
				   Scenario: Models available
			
 
				     Given available models
			
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -376,6 +376,11 @@ def step_seed(context, seed):
 
				         context.seed.append(seed)
			
 
				 
			
 
				 
			
 
				+@step('BOS token is {bos:d}')
			
 
				+def step_bos_token(context, bos):
			
 
				+    context.bos = bos
			
 
				+
			
 
				+
			
 
				 @step('a prefix prompt')
			
 
				 def step_prompt_prefix(context):
			
 
				     context.prompt_prefix = context_text(context)
			
@@ -656,21 +661,29 @@ async def all_embeddings_are_generated(context):
 
				         assert_embeddings(context.tasks_result.pop().pop())
			
 
				 
			
 
				 
			
 
				+@step('adding special tokens')
			
 
				+def step_tokenize_set_add_special(context):
			
 
				+    context.tokenize_add_special = True
			
 
				+
			
 
				+
			
 
				 @step('tokenizing')
			
 
				 @async_run_until_complete
			
 
				 async def step_tokenize(context):
			
 
				     context.tokenized_text = context_text(context)
			
 
				     async with aiohttp.ClientSession() as session:
			
 
				+        tokenize_args = {
			
 
				+            "content": context.tokenized_text,
			
 
				+        }
			
 
				+        if getattr(context, 'tokenize_add_special', None) is not None:
			
 
				+            tokenize_args['add_special'] = context.tokenize_add_special
			
 
				         async with session.post(f'{context.base_url}/tokenize',
			
 
				-                                json={
			
 
				-                                    "content": context.tokenized_text,
			
 
				-                                }) as response:
			
 
				+                                json=tokenize_args) as response:
			
 
				             assert response.status == 200
			
 
				             tokenize_json = await response.json()
			
 
				             context.tokens = tokenize_json['tokens']
			
 
				 
			
 
				 
			
 
				-@step('tokens can be detokenize')
			
 
				+@step('tokens can be detokenized')
			
 
				 @async_run_until_complete
			
 
				 async def step_detokenize(context):
			
 
				     assert len(context.tokens) > 0
			
@@ -685,6 +698,21 @@ async def step_detokenize(context):
 
				             assert context.tokenized_text == detokenize_json['content'].strip()
			
 
				 
			
 
				 
			
 
				+@step('tokens begin with BOS')
			
 
				+def step_strings_for_tokenization(context):
			
 
				+    assert context.tokens[0] == context.bos
			
 
				+
			
 
				+
			
 
				+@step('tokens do not begin with BOS')
			
 
				+def step_strings_for_tokenization(context):
			
 
				+    assert context.tokens[0] != context.bos
			
 
				+
			
 
				+
			
 
				+@step('first token is removed')
			
 
				+def step_strings_for_tokenization(context):
			
 
				+    context.tokens = context.tokens[1:]
			
 
				+
			
 
				+
			
 
				 @step('an OPTIONS request is sent from {origin}')
			
 
				 @async_run_until_complete
			
 
				 async def step_options_request(context, origin):