2 лет назад · b97ca431db
--- a/ggml.c
+++ b/ggml.c
--- a/ggml.h
+++ b/ggml.h
@@ -303,6 +303,7 @@ extern "C" {
 
				         GGML_OP_STEP,
			
 
				         GGML_OP_RELU,
			
 
				         GGML_OP_GELU,
			
 
				+        GGML_OP_GELU_QUICK,
			
 
				         GGML_OP_SILU,
			
 
				         GGML_OP_SILU_BACK,
			
 
				         GGML_OP_NORM, // normalize
			
@@ -331,12 +332,15 @@ extern "C" {
 
				         GGML_OP_ROPE_BACK,
			
 
				         GGML_OP_ALIBI,
			
 
				         GGML_OP_CLAMP,
			
 
				-        GGML_OP_CONV_1D_1S,
			
 
				-        GGML_OP_CONV_1D_2S,
			
 
				+        GGML_OP_CONV_1D_S1_PH,
			
 
				+        GGML_OP_CONV_1D_S2_PH,
			
 
				+        GGML_OP_CONV_2D_SK_P0,
			
 
				 
			
 
				         GGML_OP_FLASH_ATTN,
			
 
				         GGML_OP_FLASH_FF,
			
 
				         GGML_OP_FLASH_ATTN_BACK,
			
 
				+        GGML_OP_WIN_PART,
			
 
				+        GGML_OP_WIN_UNPART,
			
 
				 
			
 
				         GGML_OP_MAP_UNARY,
			
 
				         GGML_OP_MAP_BINARY,
			
@@ -557,8 +561,8 @@ extern "C" {
 
				     GGML_API void *  ggml_get_data    (const struct ggml_tensor * tensor);
			
 
				     GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
			
 
				 
			
 
				-    GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
			
 
				-    GGML_API void         ggml_set_name(struct ggml_tensor * tensor, const char * name);
			
 
				+    GGML_API const char *         ggml_get_name(const struct ggml_tensor * tensor);
			
 
				+    GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
			
 
				 
			
 
				     //
			
 
				     // operations on tensors with backpropagation
			
@@ -611,24 +615,47 @@ extern "C" {
 
				             struct ggml_tensor  * a,
			
 
				             struct ggml_tensor  * b);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_sub_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a,
			
 
				+            struct ggml_tensor  * b);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_mul(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a,
			
 
				             struct ggml_tensor  * b);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_mul_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a,
			
 
				+            struct ggml_tensor  * b);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_div(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a,
			
 
				             struct ggml_tensor  * b);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_div_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a,
			
 
				+            struct ggml_tensor  * b);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_sqr(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_sqr_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_sqrt(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_sqrt_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_log(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
@@ -668,31 +695,67 @@ extern "C" {
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_abs_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_sgn(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_sgn_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_neg(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_neg_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_step(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_step_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_relu(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_relu_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     // TODO: double-check this computation is correct
			
 
				     GGML_API struct ggml_tensor * ggml_gelu(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_gelu_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				+    GGML_API struct ggml_tensor * ggml_gelu_quick(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				+    GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_silu(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_silu_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     // a - x
			
 
				     // b - dy
			
 
				     GGML_API struct ggml_tensor * ggml_silu_back(
			
@@ -706,10 +769,18 @@ extern "C" {
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_norm_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     GGML_API struct ggml_tensor * ggml_rms_norm(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a);
			
 
				 
			
 
				+    GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a);
			
 
				+
			
 
				     // a - x
			
 
				     // b - dy
			
 
				     GGML_API struct ggml_tensor * ggml_rms_norm_back(
			
@@ -999,16 +1070,55 @@ extern "C" {
 
				             float                 min,
			
 
				             float                 max);
			
 
				 
			
 
				-    // padding = 1
			
 
				+    // TODO: implement general-purpose convolutions
			
 
				+    // GGML_API struct ggml_tensor * ggml_conv_1d(
			
 
				+    //        struct ggml_context * ctx,
			
 
				+    //        struct ggml_tensor  * a,
			
 
				+    //        struct ggml_tensor  * b,
			
 
				+    //        int                   s0
			
 
				+    //        int                   p0,
			
 
				+    //        int                   d0);
			
 
				+    //
			
 
				+    // GGML_API struct ggml_tensor * ggml_conv_2d(
			
 
				+    //        struct ggml_context * ctx,
			
 
				+    //        struct ggml_tensor  * a,
			
 
				+    //        struct ggml_tensor  * b,
			
 
				+    //        int                   s0,
			
 
				+    //        int                   s1,
			
 
				+    //        int                   p0,
			
 
				+    //        int                   p1,
			
 
				+    //        int                   d0,
			
 
				+    //        int                   d1);
			
 
				+
			
 
				+    // padding = half
			
 
				     // TODO: we don't support extra parameters for now
			
 
				     //       that's why we are hard-coding the stride, padding, and dilation
			
 
				     //       not great ..
			
 
				-    GGML_API struct ggml_tensor * ggml_conv_1d_1s(
			
 
				+    // example:
			
 
				+    // a:      3   80  768    1
			
 
				+    // b:   3000   80    1    1
			
 
				+    // res: 3000  768    1    1
			
 
				+    // used in whisper
			
 
				+    GGML_API struct ggml_tensor * ggml_conv_1d_s1_ph(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a,
			
 
				             struct ggml_tensor  * b);
			
 
				 
			
 
				-    GGML_API struct ggml_tensor * ggml_conv_1d_2s(
			
 
				+    // used in whisper
			
 
				+    GGML_API struct ggml_tensor * ggml_conv_1d_s2_ph(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a,
			
 
				+            struct ggml_tensor  * b);
			
 
				+
			
 
				+    // kernel size is a->ne[0] x a->ne[1]
			
 
				+    // stride is equal to kernel size
			
 
				+    // padding is zero
			
 
				+    // example:
			
 
				+    // a:     16   16    3  768
			
 
				+    // b:   1024 1024    3    1
			
 
				+    // res:   64   64  768    1
			
 
				+    // used in sam
			
 
				+    GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
			
 
				             struct ggml_context * ctx,
			
 
				             struct ggml_tensor  * a,
			
 
				             struct ggml_tensor  * b);
			
@@ -1036,6 +1146,26 @@ extern "C" {
 
				             struct ggml_tensor  * c0,
			
 
				             struct ggml_tensor  * c1);
			
 
				 
			
 
				+    // partition into non-overlapping windows with padding if needed
			
 
				+    // example:
			
 
				+    // a:   768   64   64    1
			
 
				+    // w:    14
			
 
				+    // res: 768   14   14    25
			
 
				+    // used in sam
			
 
				+    GGML_API struct ggml_tensor * ggml_win_part(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a,
			
 
				+            int                   w);
			
 
				+
			
 
				+    // reverse of ggml_win_part
			
 
				+    // used in sam
			
 
				+    GGML_API struct ggml_tensor * ggml_win_unpart(
			
 
				+            struct ggml_context * ctx,
			
 
				+            struct ggml_tensor  * a,
			
 
				+            int                   w0,
			
 
				+            int                   h0,
			
 
				+            int                   w);
			
 
				+
			
 
				     // Mapping operations
			
 
				     typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
			
 
				     typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);