hace 1 año · dc39012cba
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -7181,12 +7181,12 @@ static bool weight_buft_supported(const llama_hparams & hparams, ggml_tensor * w
 
				             } break;
			
 
				         case GGML_OP_ADD:
			
 
				             {
			
 
				-                ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, w->ne[0], 512);
			
 
				+                ggml_tensor * a = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, w->ne[0], w->ne[1], w->ne[2], w->ne[3]);
			
 
				                 op_tensor = ggml_add(ctx, a, w);
			
 
				             } break;
			
 
				         case GGML_OP_MUL:
			
 
				             {
			
 
				-                ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, w->ne[0], 512);
			
 
				+                ggml_tensor * a = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, w->ne[0], w->ne[1], w->ne[2], w->ne[3]);
			
 
				                 op_tensor = ggml_mul(ctx, a, w);
			
 
				             } break;
			
 
				         case GGML_OP_DIV: