il y a 9 mois · c80a7759da
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1807,6 +1807,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				                         || t.first == "<end_of_turn>"
			
 
				                         || t.first == "<|endoftext|>"
			
 
				                         || t.first == "<EOT>"
			
 
				+                        || t.first == "_<EOT>"
			
 
				                         || t.first == "<｜end▁of▁sentence｜>" // DeepSeek
			
 
				                    ) {
			
 
				                     special_eot_id = t.second;
			
@@ -1839,6 +1840,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				                         || t.first == "<fim-prefix>"
			
 
				                         || t.first == "<｜fim▁begin｜>" // DeepSeek
			
 
				                         || t.first == "<PRE>"
			
 
				+                        || t.first == "▁<PRE>"          // CodeLlama
			
 
				                         ) {
			
 
				                     special_fim_pre_id = t.second;
			
 
				                     if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
			
@@ -1856,6 +1858,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				                         || t.first == "<fim-suffix>"
			
 
				                         || t.first == "<｜fim▁hole｜>" // DeepSeek
			
 
				                         || t.first == "<SUF>"
			
 
				+                        || t.first == "▁<SUF>"         // CodeLlama
			
 
				                         ) {
			
 
				                     special_fim_suf_id = t.second;
			
 
				                     if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
			
@@ -1873,6 +1876,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				                         || t.first == "<fim-middle>"
			
 
				                         || t.first == "<｜fim▁end｜>"  // DeepSeek
			
 
				                         || t.first == "<MID>"
			
 
				+                        || t.first == "▁<MID>"         // CodeLlama
			
 
				                         ) {
			
 
				                     special_fim_mid_id = t.second;
			
 
				                     if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
			
@@ -1957,6 +1961,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				                     || t.first == "<|endoftext|>"
			
 
				                     || t.first == "<|eom_id|>"
			
 
				                     || t.first == "<EOT>"
			
 
				+                    || t.first == "_<EOT>"
			
 
				                ) {
			
 
				                 special_eog_ids.insert(t.second);
			
 
				                 if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {