há 1 ano atrás · 6f63d646c1
--- a/examples/tokenize/tokenize.cpp
+++ b/examples/tokenize/tokenize.cpp
@@ -30,6 +30,7 @@ static void print_usage_information(const char * argv0, FILE * stream) {
 
				     fprintf(stream, "    --stdin                              read prompt from standard input.\n");
			
 
				     fprintf(stream, "    --no-bos                             do not ever add a BOS token to the prompt, even if normally the model uses a BOS token.\n");
			
 
				     fprintf(stream, "    --log-disable                        disable logs. Makes stderr quiet when loading the model.\n");
			
 
				+    fprintf(stream, "    --show-count                         print the total number of tokens.\n");
			
 
				 }
			
 
				 
			
 
				 static void llama_log_callback_null(ggml_log_level level, const char * text, void * user_data) {
			
@@ -195,6 +196,7 @@ int main(int raw_argc, char ** raw_argv) {
 
				     bool printing_ids = false;
			
 
				     bool no_bos = false;
			
 
				     bool disable_logging = false;
			
 
				+    bool show_token_count = false;
			
 
				     const char * model_path = NULL;
			
 
				     const char * prompt_path = NULL;
			
 
				     const char * prompt_arg = NULL;
			
@@ -249,6 +251,9 @@ int main(int raw_argc, char ** raw_argv) {
 
				         else if (arg == "--log-disable") {
			
 
				             disable_logging = true;
			
 
				         }
			
 
				+        else if (arg == "--show-count") {
			
 
				+            show_token_count = true;
			
 
				+        }
			
 
				         else {
			
 
				             fprintf(stderr, "Error: unknown option '%s'\n", argv[iarg].c_str());
			
 
				             return 1;
			
@@ -384,6 +389,9 @@ int main(int raw_argc, char ** raw_argv) {
 
				         printf("]\n");
			
 
				     }
			
 
				 
			
 
				+    if (show_token_count) {
			
 
				+        printf("Total number of tokens: %ld\n", tokens.size());
			
 
				+    }
			
 
				     // silence valgrind
			
 
				     llama_free(ctx);
			
 
				     llama_free_model(model);