3 weeks ago · f4f5019254
--- a/common/chat-parser.cpp
+++ b/common/chat-parser.cpp
@@ -1395,6 +1395,14 @@ static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
 
				     builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
			
 
				 }
			
 
				 
			
 
				+static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
			
 
				+    builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
			
 
				+
			
 
				+    // TODO: Tool calling
			
 
				+
			
 
				+    builder.add_content(builder.consume_rest());
			
 
				+}
			
 
				+
			
 
				 static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
			
 
				     builder.try_parse_reasoning("<think>", "</think>");
			
 
				     builder.add_content(builder.consume_rest());
			
@@ -1479,6 +1487,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
 
				         case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
			
 
				             common_chat_parse_xiaomi_mimo(builder);
			
 
				             break;
			
 
				+        case COMMON_CHAT_FORMAT_SOLAR_OPEN:
			
 
				+            common_chat_parse_solar_open(builder);
			
 
				+            break;
			
 
				         default:
			
 
				             throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
			
 
				     }
			
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -669,6 +669,7 @@ const char * common_chat_format_name(common_chat_format format) {
 
				         case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
			
 
				         case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
			
 
				         case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
			
 
				+        case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
			
 
				         case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
			
 
				         case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
			
 
				         case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
			
@@ -2517,6 +2518,27 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
 
				     return data;
			
 
				 }
			
 
				 
			
 
				+static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
			
 
				+    common_chat_params data;
			
 
				+
			
 
				+    // TODO: Reasoning effort
			
 
				+    json additional_context = {};
			
 
				+
			
 
				+    data.prompt = apply(tmpl, inputs, std::nullopt, std::nullopt, additional_context);
			
 
				+    data.format = COMMON_CHAT_FORMAT_SOLAR_OPEN;
			
 
				+
			
 
				+    data.preserved_tokens = {
			
 
				+        "<|think|>",
			
 
				+        "<|content|>",
			
 
				+        "<|begin|>",
			
 
				+        "<|end|>",
			
 
				+    };
			
 
				+
			
 
				+    // TODO: Tool calling
			
 
				+
			
 
				+    return data;
			
 
				+}
			
 
				+
			
 
				 static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
			
 
				     common_chat_params data;
			
 
				     data.prompt = apply(tmpl, inputs);
			
@@ -2780,6 +2802,13 @@ static common_chat_params common_chat_templates_apply_jinja(
 
				         return common_chat_params_init_magistral(tmpl, params);
			
 
				     }
			
 
				 
			
 
				+    // Solar Open
			
 
				+    if (src.find("<|tool_response:begin|>") != std::string::npos &&
			
 
				+        src.find("<|tool_response:name|>") != std::string::npos &&
			
 
				+        src.find("<|tool_response:result|>") != std::string::npos) {
			
 
				+        return common_chat_params_init_solar_open(tmpl, params);
			
 
				+    }
			
 
				+
			
 
				     // Plain handler (no tools)
			
 
				     if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
			
 
				         return common_chat_params_init_without_tools(tmpl, params);
			
--- a/common/chat.h
+++ b/common/chat.h
@@ -124,6 +124,7 @@ enum common_chat_format {
 
				     COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
			
 
				     COMMON_CHAT_FORMAT_APRIEL_1_5,
			
 
				     COMMON_CHAT_FORMAT_XIAOMI_MIMO,
			
 
				+    COMMON_CHAT_FORMAT_SOLAR_OPEN,
			
 
				 
			
 
				     // These are intended to be parsed by the PEG parser
			
 
				     COMMON_CHAT_FORMAT_PEG_SIMPLE,
			
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1230,6 +1230,9 @@ class TextModel(ModelBase):
 
				         if chkhsh == "4a2e2abae11ca2b86d570fc5b44be4d5eb5e72cc8f22dd136a94b37da83ab665":
			
 
				             # ref: https://huggingface.co/KORMo-Team/KORMo-tokenizer
			
 
				             res = "kormo"
			
 
				+        if chkhsh == "16389f0a1f51ee53e562ffd51c371dc508639ab0e4261502071836e50e223e91":
			
 
				+            # ref: https://huggingface.co/upstage/Solar-Open-100B
			
 
				+            res = "solar-open"
			
 
				 
			
 
				         if res is None:
			
 
				             logger.warning("\n")
			
@@ -10617,6 +10620,26 @@ class JanusProVisionModel(MmprojModel):
 
				         return []
			
 
				 
			
 
				 
			
 
				+@ModelBase.register("SolarOpenForCausalLM")
			
 
				+class SolarOpenModel(Glm4MoeModel):
			
 
				+    model_arch = gguf.MODEL_ARCH.GLM4_MOE
			
 
				+
			
 
				+    def set_vocab(self):
			
 
				+        from transformers import AutoTokenizer
			
 
				+        tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
			
 
				+        special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
			
 
				+        tokens, toktypes, tokpre = self.get_vocab_base()
			
 
				+        self.gguf_writer.add_tokenizer_model("gpt2")
			
 
				+        self.gguf_writer.add_tokenizer_pre(tokpre)
			
 
				+        self.gguf_writer.add_token_list(tokens)
			
 
				+        self.gguf_writer.add_token_types(toktypes)
			
 
				+        special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
			
 
				+        special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|endoftext|>"])
			
 
				+        special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<unk>"])
			
 
				+        special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|startoftext|>"])
			
 
				+        special_vocab.add_to_gguf(self.gguf_writer)
			
 
				+
			
 
				+
			
 
				 ###### CONVERSION LOGIC ######
			
 
				 
			
 
				 
			
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -145,6 +145,7 @@ models = [
 
				     {"name": "granite-docling",  "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
			
 
				     {"name": "minimax-m2",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },
			
 
				     {"name": "kormo",            "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/KORMo-Team/KORMo-tokenizer", },
			
 
				+    {"name": "solar-open",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", },
			
 
				 ]
			
 
				 
			
 
				 # some models are known to be broken upstream, so we will skip them as exceptions
			
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@@ -74,6 +74,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
 
				     { "seed_oss",          LLM_CHAT_TEMPLATE_SEED_OSS          },
			
 
				     { "grok-2",            LLM_CHAT_TEMPLATE_GROK_2            },
			
 
				     { "pangu-embedded",    LLM_CHAT_TEMPLATE_PANGU_EMBED       },
			
 
				+    { "solar-open",        LLM_CHAT_TEMPLATE_SOLAR_OPEN        },
			
 
				 };
			
 
				 
			
 
				 llm_chat_template llm_chat_template_from_str(const std::string & name) {
			
@@ -216,6 +217,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
 
				         return LLM_CHAT_TEMPLATE_GROK_2;
			
 
				     } else if (tmpl_contains(LU8("[unused9]系统：[unused10]"))) {
			
 
				         return LLM_CHAT_TEMPLATE_PANGU_EMBED;
			
 
				+    } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
			
 
				+        return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
			
 
				     }
			
 
				     return LLM_CHAT_TEMPLATE_UNKNOWN;
			
 
				 }
			
@@ -845,6 +848,14 @@ int32_t llm_chat_apply_template(
 
				         if (add_ass) {
			
 
				             ss << "[unused9]助手：";
			
 
				         }
			
 
				+    } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
			
 
				+        for (auto message : chat) {
			
 
				+            std::string role(message->role);
			
 
				+            ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
			
 
				+        }
			
 
				+        if (add_ass) {
			
 
				+            ss << "<|begin|>assistant";
			
 
				+        }
			
 
				     } else {
			
 
				         // template not supported
			
 
				         return -1;
			
--- a/src/llama-chat.h
+++ b/src/llama-chat.h
@@ -54,6 +54,7 @@ enum llm_chat_template {
 
				     LLM_CHAT_TEMPLATE_SEED_OSS,
			
 
				     LLM_CHAT_TEMPLATE_GROK_2,
			
 
				     LLM_CHAT_TEMPLATE_PANGU_EMBED,
			
 
				+    LLM_CHAT_TEMPLATE_SOLAR_OPEN,
			
 
				     LLM_CHAT_TEMPLATE_UNKNOWN,
			
 
				 };
			
 
				 
			
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -126,6 +126,7 @@ const char * llm_type_name(llm_type type) {
 
				         case LLM_TYPE_31B_A3_5B:     return "31B.A3.5B";
			
 
				         case LLM_TYPE_80B_A3B:       return "80B.A3B";
			
 
				         case LLM_TYPE_100B_A6B:      return "100B.A6B";
			
 
				+        case LLM_TYPE_102B_A12B:     return "102B.A12B";
			
 
				         case LLM_TYPE_106B_A12B:     return "106B.A12B";
			
 
				         case LLM_TYPE_230B_A10B:     return "230B.A10B";
			
 
				         case LLM_TYPE_235B_A22B:     return "235B.A22B";
			
@@ -1778,6 +1779,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
 
				 
			
 
				                 switch (hparams.n_layer) {
			
 
				                     case 47: type = LLM_TYPE_106B_A12B; break; // GLM-4.5-Air (46 layers + 1 NextN layer)
			
 
				+                    case 48: type = LLM_TYPE_102B_A12B; break; // Solar Open
			
 
				                     case 93: type = LLM_TYPE_355B_A32B; break; // GLM-4.5 (92 layers + 1 NextN layer)
			
 
				                     default: type = LLM_TYPE_UNKNOWN;
			
 
				                 }
			
@@ -5206,9 +5208,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
 
				                         layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, flags);
			
 
				                         layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_k_gqa }, flags);
			
 
				                         layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_v_gqa }, flags);
			
 
				-                        layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, flags);
			
 
				-                        layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, flags);
			
 
				-                        layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, flags);
			
 
				+                        layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), { n_embd_head_k * n_head }, TENSOR_NOT_REQUIRED | flags);
			
 
				+                        layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), { n_embd_k_gqa }, TENSOR_NOT_REQUIRED | flags);
			
 
				+                        layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), { n_embd_v_gqa }, TENSOR_NOT_REQUIRED | flags);
			
 
				 
			
 
				                         layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, flags);
			
 
				 
			
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -119,6 +119,7 @@ enum llm_type {
 
				     LLM_TYPE_31B_A3_5B,
			
 
				     LLM_TYPE_80B_A3B, // Qwen3 Next
			
 
				     LLM_TYPE_100B_A6B,
			
 
				+    LLM_TYPE_102B_A12B, // Solar-Open
			
 
				     LLM_TYPE_106B_A12B, // GLM-4.5-Air
			
 
				     LLM_TYPE_230B_A10B, // Minimax M2
			
 
				     LLM_TYPE_235B_A22B,
			
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -355,6 +355,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
 
				             case LLAMA_VOCAB_PRE_TYPE_STABLELM2:
			
 
				             case LLAMA_VOCAB_PRE_TYPE_QWEN2:
			
 
				             case LLAMA_VOCAB_PRE_TYPE_HUNYUAN:
			
 
				+            case LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN:
			
 
				                 regex_exprs = {
			
 
				                     // original regex from tokenizer.json
			
 
				                     // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
			
@@ -2015,6 +2016,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				                 tokenizer_pre == "minimax-m2") {
			
 
				                 pre_type = LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2;
			
 
				                 clean_spaces = false;
			
 
				+            } else if (
			
 
				+                tokenizer_pre == "solar-open") {
			
 
				+                pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN;
			
 
				+                clean_spaces = false;
			
 
				             } else {
			
 
				                 throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
			
 
				             }
			
@@ -2358,6 +2363,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				                     || t.first == "<|end|>"
			
 
				                     || t.first == "<|return|>" // o200k_harmony
			
 
				                     || t.first == "<|call|>"   // o200k_harmony
			
 
				+                    || t.first == "<|flush|>"  // solar-open
			
 
				+                    || t.first == "<|calls|>"  // solar-open
			
 
				                     || t.first == "<end_of_turn>"
			
 
				                     || t.first == "<|endoftext|>"
			
 
				                     || t.first == "<|eom_id|>"
			
@@ -2404,13 +2411,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				             LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
			
 
				         }
			
 
				 
			
 
				-        // TODO: workaround for o200k_harmony tokenizer: the "<|end|>" token should not be EOG
			
 
				-        //       we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens,
			
 
				+        // TODO: workaround for o200k_harmony and solar-open tokenizer: the "<|end|>" token should not be EOG
			
 
				+        //       we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens ("<|calls|>" and "<|flush|>" for solar-open),
			
 
				         //       we remove the "<|end|>" token from the EOG list
			
 
				         {
			
 
				             bool has_return = false;
			
 
				             bool has_call   = false;
			
 
				             bool has_end    = false;
			
 
				+            bool has_flush  = false;
			
 
				 
			
 
				             llama_token end_id = LLAMA_TOKEN_NULL;
			
 
				 
			
@@ -2420,18 +2428,20 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
				 
			
 
				                 if (id_to_token[tid].text == "<|return|>") {
			
 
				                     has_return = true;
			
 
				-                } else if (id_to_token[tid].text == "<|call|>") {
			
 
				+                } else if (id_to_token[tid].text == "<|call|>" || id_to_token[tid].text == "<|calls|>") {
			
 
				                     has_call = true;
			
 
				+                } else if (id_to_token[tid].text == "<|flush|>") {
			
 
				+                    has_flush = true;
			
 
				                 } else if (id_to_token[tid].text == "<|end|>") {
			
 
				                     has_end = true;
			
 
				                     end_id = tid;
			
 
				                 }
			
 
				             }
			
 
				 
			
 
				-            if (has_return && has_call && has_end) {
			
 
				+            if ((has_return && has_call && has_end) || (has_call && has_flush && has_end)) {
			
 
				                 special_eog_ids.erase(end_id);
			
 
				                 id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
			
 
				-                LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
			
 
				+                LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>', or '<|calls|>' and '<|flush|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
			
 
				             }
			
 
				         }
			
 
				     }
			
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -51,6 +51,7 @@ enum llama_vocab_pre_type {
 
				     LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40,
			
 
				     LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2      = 41,
			
 
				     LLAMA_VOCAB_PRE_TYPE_AFMOE           = 42,
			
 
				+    LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN      = 43,
			
 
				 };
			
 
				 
			
 
				 struct LLM_KV;