Explorar o código

Move the norm shift to conversion, Gemma 2 style

Piotr Wilkin hai 3 meses
pai
achega
c2a82a1773
Modificáronse 2 ficheiros con 6 adicións e 3 borrados
  1. 3 1
      convert_hf_to_gguf.py
  2. 3 2
      src/models/llm_build_qwen3next.cpp

+ 3 - 1
convert_hf_to_gguf.py

@@ -3773,7 +3773,9 @@ class Qwen3NextModel(Qwen3MoeModel):
             name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
         elif "conv1d" in name:
             data_torch = data_torch.squeeze()
-
+        elif name.endswith("norm.weight") and not name.endswith("linear_attn.norm.weight"):
+            data_torch = data_torch + 1
+            
         yield from Qwen2MoeModel.modify_tensors(self, data_torch, name, bid)
 
 

+ 3 - 2
src/models/llm_build_qwen3next.cpp

@@ -127,8 +127,9 @@ llm_build_qwen3next::llm_build_qwen3next(const llama_model & model, const llm_gr
 }
 
 struct ggml_tensor * llm_build_qwen3next::build_q3n_norm(struct ggml_tensor * input, struct ggml_tensor * weights, int layer) {
-    ggml_tensor * input_norm = ggml_scale_bias(ctx0, weights, 1.0f, 1.0f);
-    return build_norm(input, input_norm, nullptr, LLM_NORM_RMS, layer);
+    // ggml_tensor * input_norm = ggml_scale_bias(ctx0, weights, 1.0f, 1.0f);
+    // EDIT: we moved the shifting part to the conversion, so we just call normal build_norm
+    return build_norm(input, weights, nullptr, LLM_NORM_RMS, layer);
 }
 
 struct ggml_tensor * llm_build_qwen3next::build_q3n_gated_norm(struct ggml_tensor * input, struct ggml_tensor * weights, struct ggml_tensor * gate, int layer) {