3 months ago · c2a82a1773
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -3773,7 +3773,9 @@ class Qwen3NextModel(Qwen3MoeModel):
 
				             name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
			
 
				         elif "conv1d" in name:
			
 
				             data_torch = data_torch.squeeze()
			
 
				-
			
 
				+        elif name.endswith("norm.weight") and not name.endswith("linear_attn.norm.weight"):
			
 
				+            data_torch = data_torch + 1
			
 
				+            
			
 
				         yield from Qwen2MoeModel.modify_tensors(self, data_torch, name, bid)
			
 
				 
			
 
				 
			
--- a/src/models/llm_build_qwen3next.cpp
+++ b/src/models/llm_build_qwen3next.cpp
@@ -127,8 +127,9 @@ llm_build_qwen3next::llm_build_qwen3next(const llama_model & model, const llm_gr
 
				 }
			
 
				 
			
 
				 struct ggml_tensor * llm_build_qwen3next::build_q3n_norm(struct ggml_tensor * input, struct ggml_tensor * weights, int layer) {
			
 
				-    ggml_tensor * input_norm = ggml_scale_bias(ctx0, weights, 1.0f, 1.0f);
			
 
				-    return build_norm(input, input_norm, nullptr, LLM_NORM_RMS, layer);
			
 
				+    // ggml_tensor * input_norm = ggml_scale_bias(ctx0, weights, 1.0f, 1.0f);
			
 
				+    // EDIT: we moved the shifting part to the conversion, so we just call normal build_norm
			
 
				+    return build_norm(input, weights, nullptr, LLM_NORM_RMS, layer);
			
 
				 }
			
 
				 
			
 
				 struct ggml_tensor * llm_build_qwen3next::build_q3n_gated_norm(struct ggml_tensor * input, struct ggml_tensor * weights, struct ggml_tensor * gate, int layer) {