vor 1 Monat · 1be97831e4
--- a/src/unicode.cpp
+++ b/src/unicode.cpp
@@ -499,7 +499,7 @@ static std::vector<size_t> unicode_regex_split_custom_llama3(const std::string &
 
				 
			
 
				 // use std::wregex to split the text
			
 
				 static std::vector<size_t> unicode_regex_split_stl(const std::wstring & wtext, const std::wstring & regex_expr, const std::vector<size_t> & offsets) {
			
 
				-    std::wregex expr(regex_expr);
			
 
				+    std::wregex expr(regex_expr, std::regex_constants::optimize | std::regex_constants::nosubs);
			
 
				     std::vector<size_t> bpe_offsets; // store the offset of each word
			
 
				     bpe_offsets.reserve(offsets.size()); // Reserve memory for the approximate size
			
 
				     size_t start = 0;
			
@@ -529,7 +529,7 @@ static std::vector<size_t> unicode_regex_split_stl(const std::wstring & wtext, c
 
				 
			
 
				 // use std::regex to split the text
			
 
				 static std::vector<size_t> unicode_regex_split_stl(const std::string & text, const std::string & regex_expr, const std::vector<size_t> & offsets) {
			
 
				-    std::regex expr(regex_expr);
			
 
				+    std::regex expr(regex_expr, std::regex_constants::optimize | std::regex_constants::nosubs);
			
 
				     std::vector<size_t> bpe_offsets; // store the offset of each word
			
 
				     bpe_offsets.reserve(offsets.size()); // Reserve memory for the approximate size
			
 
				     size_t start = 0;