|
|
@@ -2691,6 +2691,109 @@ static void ggml_compute_forward_gelu(
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+// ggml_compute_forward_gelu_erf
|
|
|
+
|
|
|
+static void ggml_compute_forward_gelu_erf_f32(
|
|
|
+ const ggml_compute_params * params,
|
|
|
+ ggml_tensor * dst) {
|
|
|
+
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
+
|
|
|
+ assert(ggml_is_contiguous_1(src0));
|
|
|
+ assert(ggml_is_contiguous_1(dst));
|
|
|
+ assert(ggml_are_same_shape(src0, dst));
|
|
|
+
|
|
|
+ const int ith = params->ith;
|
|
|
+ const int nth = params->nth;
|
|
|
+
|
|
|
+ const int nc = src0->ne[0];
|
|
|
+ const int nr = ggml_nrows(src0);
|
|
|
+
|
|
|
+ // rows per thread
|
|
|
+ const int dr = (nr + nth - 1)/nth;
|
|
|
+
|
|
|
+ // row range for this thread
|
|
|
+ const int ir0 = dr*ith;
|
|
|
+ const int ir1 = MIN(ir0 + dr, nr);
|
|
|
+
|
|
|
+ for (int i1 = ir0; i1 < ir1; i1++) {
|
|
|
+ ggml_vec_gelu_erf_f32(nc,
|
|
|
+ (float *) ((char *) dst->data + i1*( dst->nb[1])),
|
|
|
+ (float *) ((char *) src0->data + i1*(src0->nb[1])));
|
|
|
+
|
|
|
+#ifndef NDEBUG
|
|
|
+ for (int k = 0; k < nc; k++) {
|
|
|
+ const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
|
+ GGML_UNUSED(x);
|
|
|
+ assert(!isnan(x));
|
|
|
+ assert(!isinf(x));
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void ggml_compute_forward_gelu_erf_f16(
|
|
|
+ const ggml_compute_params * params,
|
|
|
+ ggml_tensor * dst) {
|
|
|
+
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
+
|
|
|
+ assert(ggml_is_contiguous_1(src0));
|
|
|
+ assert(ggml_is_contiguous_1(dst));
|
|
|
+ assert(ggml_are_same_shape(src0, dst));
|
|
|
+
|
|
|
+ const int ith = params->ith;
|
|
|
+ const int nth = params->nth;
|
|
|
+
|
|
|
+ const int nc = src0->ne[0];
|
|
|
+ const int nr = ggml_nrows(src0);
|
|
|
+
|
|
|
+ // rows per thread
|
|
|
+ const int dr = (nr + nth - 1)/nth;
|
|
|
+
|
|
|
+ // row range for this thread
|
|
|
+ const int ir0 = dr*ith;
|
|
|
+ const int ir1 = MIN(ir0 + dr, nr);
|
|
|
+
|
|
|
+ for (int i1 = ir0; i1 < ir1; i1++) {
|
|
|
+ ggml_vec_gelu_erf_f16(nc,
|
|
|
+ (ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])),
|
|
|
+ (ggml_fp16_t *) ((char *) src0->data + i1*(src0->nb[1])));
|
|
|
+
|
|
|
+#ifndef NDEBUG
|
|
|
+ for (int k = 0; k < nc; k++) {
|
|
|
+ const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
|
+ const float v = GGML_FP16_TO_FP32(x);
|
|
|
+ GGML_UNUSED(v);
|
|
|
+ assert(!isnan(v));
|
|
|
+ assert(!isinf(v));
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void ggml_compute_forward_gelu_erf(
|
|
|
+ const ggml_compute_params * params,
|
|
|
+ ggml_tensor * dst) {
|
|
|
+
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
+
|
|
|
+ switch (src0->type) {
|
|
|
+ case GGML_TYPE_F32:
|
|
|
+ {
|
|
|
+ ggml_compute_forward_gelu_erf_f32(params, dst);
|
|
|
+ } break;
|
|
|
+ case GGML_TYPE_F16:
|
|
|
+ {
|
|
|
+ ggml_compute_forward_gelu_erf_f16(params, dst);
|
|
|
+ } break;
|
|
|
+ default:
|
|
|
+ {
|
|
|
+ GGML_ABORT("fatal error");
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
// ggml_compute_forward_gelu_quick
|
|
|
|
|
|
static void ggml_compute_forward_gelu_quick_f32(
|
|
|
@@ -7749,6 +7852,10 @@ void ggml_compute_forward_unary(
|
|
|
{
|
|
|
ggml_compute_forward_gelu(params, dst);
|
|
|
} break;
|
|
|
+ case GGML_UNARY_OP_GELU_ERF:
|
|
|
+ {
|
|
|
+ ggml_compute_forward_gelu_erf(params, dst);
|
|
|
+ } break;
|
|
|
case GGML_UNARY_OP_GELU_QUICK:
|
|
|
{
|
|
|
ggml_compute_forward_gelu_quick(params, dst);
|