Browse Source

argh again

Piotr Wilkin 3 months ago
parent
commit
54bb6f1eb9
2 changed files with 7 additions and 5 deletions
  1. 3 0
      examples/model-conversion/qwen3stories.sh
  2. 4 5
      src/models/llm_build_qwen3next.cpp

+ 3 - 0
examples/model-conversion/qwen3stories.sh

@@ -0,0 +1,3 @@
+export MODEL_PATH=/devel/tools/llama.cpp/reference/theo77186_Qwen3-Next-70M-TinyStories
+export CONVERTED_MODEL=/devel/tools/llama.cpp/reference/theo77186_Qwen3-Next-70M-TinyStories/theo77186_Qwen3-Next-70M-TinyStories.gguf
+make causal-verify-logits

+ 4 - 5
src/models/llm_build_qwen3next.cpp

@@ -279,14 +279,13 @@ struct ggml_tensor * llm_build_qwen3next::delta_net(
     cb(q, "q_postscale", il);
     cb(beta, "beta_sigmoid", il);   
 
-    // Pad first along the token dimension  
-    q = ggml_pad(ctx, q, 0, 0, pad_size, 0); 
-    k = ggml_pad(ctx, k, 0, 0, pad_size, 0);
-    v = ggml_pad(ctx, v, 0, 0, pad_size, 0);
-
     q = ggml_cont(ctx, ggml_permute(ctx, q, 0, 2, 1, 3));
     k = ggml_cont(ctx, ggml_permute(ctx, k, 0, 2, 1, 3));
     v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3));
+
+    q = ggml_pad(ctx, q, 0, pad_size, 0, 0); 
+    k = ggml_pad(ctx, k, 0, pad_size, 0, 0);
+    v = ggml_pad(ctx, v, 0, pad_size, 0, 0);
     
     beta = ggml_cont(ctx, ggml_permute(ctx, beta, 1, 2, 0, 3));
     cb(beta, "beta_reshape", il);