před 3 měsíci · 835d389fc5
--- a/src/models/llm_build_qwen3next.cpp
+++ b/src/models/llm_build_qwen3next.cpp
@@ -317,14 +317,13 @@ ggml_tensor * llm_build_qwen3next::build_qwen3next_linear_attn_layer(llm_graph_i
 
				         num_v_heads / num_k_heads   // alpha size
			
 
				     };
			
 
				 
			
 
				-    ggml_tensor * b =
			
 
				-        ggml_view_4d(ctx0, mixed_ba_reshaped, split_sizes_ba[0], num_k_heads, n_tokens, n_seqs,
			
 
				-                     split_sizes_ba[0] * sizeof(float), mixed_ba_reshaped->nb[1], mixed_ba_reshaped->nb[2], 0);
			
 
				+    ggml_tensor * b = ggml_view_4d(ctx0, mixed_ba_reshaped, split_sizes_ba[0], num_k_heads, n_tokens, n_seqs,
			
 
				+                        mixed_ba_reshaped->nb[1], mixed_ba_reshaped->nb[2], mixed_ba_reshaped->nb[3], 0);
			
 
				     cb(b, "b", il);
			
 
				 
			
 
				     ggml_tensor * a = ggml_view_4d(ctx0, mixed_ba_reshaped, split_sizes_ba[1], num_k_heads, n_tokens, n_seqs,
			
 
				-                                   split_sizes_ba[1] * sizeof(float), mixed_ba_reshaped->nb[1],
			
 
				-                                   mixed_ba_reshaped->nb[2], split_sizes_ba[0] * sizeof(float));
			
 
				+                        mixed_ba_reshaped->nb[1], mixed_ba_reshaped->nb[2], mixed_ba_reshaped->nb[3], 
			
 
				+                        split_sizes_ba[0] * ggml_element_size(mixed_ba_reshaped));
			
 
				     cb(a, "a", il);
			
 
				 
			
 
				     // Reshape b and a to merge head dimensions: [batch, seq_len, num_k_heads, num_v_heads/num_k_heads] -> [batch, seq_len, num_v_heads]