2 лет назад · 0be54f75a6
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -566,8 +566,8 @@ struct ggml_tensor * forward(
 
				             // wk   shape [n_embd, n_embd, 1, 1]
			
 
				             // Qcur shape [n_embd/n_head, n_head, N, 1]
			
 
				             // Kcur shape [n_embd/n_head, n_head, N, 1]
			
 
				-            struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
			
 
				-            struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
			
 
				+            struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
			
 
				+            struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
			
 
				 
			
 
				             // store key and value to memory
			
 
				             {
			
@@ -823,8 +823,8 @@ struct ggml_tensor * forward_batch(
 
				             // wk   shape [n_embd, n_embd, 1, 1]
			
 
				             // Qcur shape [n_embd/n_head, n_head, N, n_batch]
			
 
				             // Kcur shape [n_embd/n_head, n_head, N, n_batch]
			
 
				-            struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0);
			
 
				-            struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0);
			
 
				+            struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0, 0);
			
 
				+            struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0, 0);
			
 
				             assert_shape_4d(Qcur, n_embd/n_head, n_head, N, n_batch);
			
 
				             assert_shape_4d(Kcur, n_embd/n_head, n_head, N, n_batch);
			
 
				 
			
@@ -1116,7 +1116,7 @@ struct ggml_tensor * forward_lora(
 
				                                                         model->layers[il].wqb,
			
 
				                                                         cur)),
			
 
				                                                 n_embd/n_head, n_head, N),
			
 
				-                                            n_past, n_rot, 0);
			
 
				+                                            n_past, n_rot, 0, 0);
			
 
				             struct ggml_tensor * Kcur = ggml_rope(ctx0,
			
 
				                                             ggml_reshape_3d(ctx0,
			
 
				                                                 ggml_mul_mat(ctx0,
			
@@ -1125,7 +1125,7 @@ struct ggml_tensor * forward_lora(
 
				                                                         model->layers[il].wkb,
			
 
				                                                         cur)),
			
 
				                                                 n_embd/n_head, n_head, N),
			
 
				-                                            n_past, n_rot, 0);
			
 
				+                                            n_past, n_rot, 0, 0);
			
 
				 
			
 
				             // store key and value to memory
			
 
				             {