6 月之前 · 4bb625b713
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -40,6 +40,7 @@ const char * llm_type_name(llm_type type) {
 
				         case LLM_TYPE_190M:          return "190M";
			
 
				         case LLM_TYPE_220M:          return "220M";
			
 
				         case LLM_TYPE_250M:          return "250M";
			
 
				+        case LLM_TYPE_256M:          return "256M";
			
 
				         case LLM_TYPE_270M:          return "270M";
			
 
				         case LLM_TYPE_335M:          return "335M";
			
 
				         case LLM_TYPE_410M:          return "410M";
			
@@ -581,6 +582,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
 
				                         case 22: type = LLM_TYPE_1B; break;
			
 
				                         case 26: type = LLM_TYPE_3B; break;
			
 
				                         case 28: type = LLM_TYPE_3B; break; // Llama 3.2 3B
			
 
				+                        case 30: type = LLM_TYPE_256M; break; // smoldocling 256M
			
 
				                         // granite uses a vocab with len 49152
			
 
				                         case 32: type = n_vocab == 49152 ? LLM_TYPE_3B : (n_vocab < 40000 ? LLM_TYPE_7B : LLM_TYPE_8B); break;
			
 
				                         case 36: type = LLM_TYPE_8B; break; // granite
			
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -32,6 +32,7 @@ enum llm_type {
 
				     LLM_TYPE_190M,
			
 
				     LLM_TYPE_220M,
			
 
				     LLM_TYPE_250M,
			
 
				+    LLM_TYPE_256M,
			
 
				     LLM_TYPE_270M,
			
 
				     LLM_TYPE_335M,
			
 
				     LLM_TYPE_410M,
			
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1846,6 +1846,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				                         || t.first == "<EOT>"
			
 
				                         || t.first == "_<EOT>"
			
 
				                         || t.first == "<｜end▁of▁sentence｜>" // DeepSeek
			
 
				+                        || t.first == "<end_of_utterance>" // smoldocling
			
 
				                    ) {
			
 
				                     special_eot_id = t.second;
			
 
				                     if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
			
@@ -2005,6 +2006,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				                     || t.first == "<EOT>"
			
 
				                     || t.first == "_<EOT>"
			
 
				                     || t.first == "<|end_of_text|>"
			
 
				+                    || t.first == "<end_of_utterance>" // smoldocling
			
 
				                ) {
			
 
				                 special_eog_ids.insert(t.second);
			
 
				                 if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {