4 месяцев назад · 6ab397e12b
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@@ -1273,7 +1273,7 @@ ggml_tensor * llm_graph_context::build_attn_mha(
 
				     // split the batch into streams if needed
			
 
				     const auto n_stream = k->ne[3];
			
 
				 
			
 
				-    q = ggml_reshape_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream);
			
 
				+    q = ggml_view_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream, q->nb[1], q->nb[2], q->nb[3]/n_stream, 0);
			
 
				 
			
 
				     q = ggml_permute(ctx0, q, 0, 2, 1, 3);
			
 
				     k = ggml_permute(ctx0, k, 0, 2, 1, 3);