2 年之前 · 28a2e6e7d4
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 
				 BUILD_TARGETS = \
			
 
				 	main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
			
 
				 	simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search  \
			
 
				-	speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
			
 
				+	speculative infill tokenize benchmark-matmult parallel finetune export-lora tests/test-c.o
			
 
				 
			
 
				 # Binaries only useful for tests
			
 
				 TEST_TARGETS = \
			
@@ -594,6 +594,9 @@ infill: examples/infill/infill.cpp                            ggml.o llama.o $(C
 
				 simple: examples/simple/simple.cpp                            ggml.o llama.o $(COMMON_DEPS) $(OBJS)
			
 
				 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
			
 
				 
			
 
				+tokenize: examples/tokenize/tokenize.cpp                      ggml.o llama.o $(COMMON_DEPS) $(OBJS)
			
 
				+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
			
 
				+
			
 
				 batched: examples/batched/batched.cpp                         ggml.o llama.o $(COMMON_DEPS) $(OBJS)
			
 
				 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
			
 
				 
			
--- a/examples/tokenize/tokenize.cpp
+++ b/examples/tokenize/tokenize.cpp
@@ -26,7 +26,7 @@ int main(int argc, char ** argv) {
 
				     llama_context_params ctx_params = llama_context_default_params();
			
 
				     llama_context * ctx = llama_new_context_with_model(model, ctx_params);
			
 
				 
			
 
				-    const bool add_bos = true;
			
 
				+    const bool add_bos = llama_should_add_bos_token(model);
			
 
				 
			
 
				     std::vector<llama_token> tokens;