|
@@ -3184,6 +3184,435 @@ void ggml_compute_forward_silu_back(
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+// ggml_compute_forward_reglu
|
|
|
|
|
+
|
|
|
|
|
+static void ggml_compute_forward_reglu_f32(
|
|
|
|
|
+ const ggml_compute_params * params,
|
|
|
|
|
+ ggml_tensor * dst) {
|
|
|
|
|
+
|
|
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
|
|
+ const ggml_tensor * src1 = dst->src[1];
|
|
|
|
|
+ char * src0_d = (char *) src0->data;
|
|
|
|
|
+ char * src1_d = (char *) (src1 ? src1->data : src0->data);
|
|
|
|
|
+ const size_t src0_o = src0->nb[1];
|
|
|
|
|
+ const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1];
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src0));
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(dst));
|
|
|
|
|
+
|
|
|
|
|
+ if (src1) {
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src1));
|
|
|
|
|
+ GGML_ASSERT(src0->type == src1->type);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const int ith = params->ith;
|
|
|
|
|
+ const int nth = params->nth;
|
|
|
|
|
+
|
|
|
|
|
+ const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2;
|
|
|
|
|
+ const int nr = ggml_nrows(src0);
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(dst->ne[0] == nc);
|
|
|
|
|
+ GGML_ASSERT(ggml_nrows(dst) == nr);
|
|
|
|
|
+
|
|
|
|
|
+ const int32_t swapped = ggml_get_op_params_i32(dst, 1);
|
|
|
|
|
+
|
|
|
|
|
+ // rows per thread
|
|
|
|
|
+ const int dr = (nr + nth - 1)/nth;
|
|
|
|
|
+
|
|
|
|
|
+ // row range for this thread
|
|
|
|
|
+ const int ir0 = dr*ith;
|
|
|
|
|
+ const int ir1 = MIN(ir0 + dr, nr);
|
|
|
|
|
+
|
|
|
|
|
+ for (int i1 = ir0; i1 < ir1; i1++) {
|
|
|
|
|
+ float * src0_p = (float *) (src0_d + i1*src0_o);
|
|
|
|
|
+ float * src1_p = (float *) (src1_d + i1*src1_o);
|
|
|
|
|
+
|
|
|
|
|
+ if (!src1) {
|
|
|
|
|
+ src0_p += swapped ? nc : 0;
|
|
|
|
|
+ src1_p += swapped ? 0 : nc;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ggml_vec_reglu_f32(nc, (float *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p);
|
|
|
|
|
+
|
|
|
|
|
+#ifndef NDEBUG
|
|
|
|
|
+ for (int k = 0; k < nc; k++) {
|
|
|
|
|
+ const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
|
|
|
+ GGML_UNUSED(x);
|
|
|
|
|
+ assert(!isnan(x));
|
|
|
|
|
+ assert(!isinf(x));
|
|
|
|
|
+ }
|
|
|
|
|
+#endif
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void ggml_compute_forward_reglu_f16(
|
|
|
|
|
+ const ggml_compute_params * params,
|
|
|
|
|
+ ggml_tensor * dst) {
|
|
|
|
|
+
|
|
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
|
|
+ const ggml_tensor * src1 = dst->src[1];
|
|
|
|
|
+ char * src0_d = (char *) src0->data;
|
|
|
|
|
+ char * src1_d = (char *) (src1 ? src1->data : src0->data);
|
|
|
|
|
+ const size_t src0_o = src0->nb[1];
|
|
|
|
|
+ const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1];
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src0));
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(dst));
|
|
|
|
|
+
|
|
|
|
|
+ if (src1) {
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src1));
|
|
|
|
|
+ GGML_ASSERT(src0->type == src1->type);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const int ith = params->ith;
|
|
|
|
|
+ const int nth = params->nth;
|
|
|
|
|
+
|
|
|
|
|
+ const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2;
|
|
|
|
|
+ const int nr = ggml_nrows(src0);
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(dst->ne[0] == nc);
|
|
|
|
|
+ GGML_ASSERT(ggml_nrows(dst) == nr);
|
|
|
|
|
+
|
|
|
|
|
+ const int32_t swapped = ggml_get_op_params_i32(dst, 1);
|
|
|
|
|
+
|
|
|
|
|
+ // rows per thread
|
|
|
|
|
+ const int dr = (nr + nth - 1)/nth;
|
|
|
|
|
+
|
|
|
|
|
+ // row range for this thread
|
|
|
|
|
+ const int ir0 = dr*ith;
|
|
|
|
|
+ const int ir1 = MIN(ir0 + dr, nr);
|
|
|
|
|
+
|
|
|
|
|
+ for (int i1 = ir0; i1 < ir1; i1++) {
|
|
|
|
|
+ ggml_fp16_t * src0_p = (ggml_fp16_t *) (src0_d + i1*src0_o);
|
|
|
|
|
+ ggml_fp16_t * src1_p = (ggml_fp16_t *) (src1_d + i1*src1_o);
|
|
|
|
|
+
|
|
|
|
|
+ if (!src1) {
|
|
|
|
|
+ src0_p += swapped ? nc : 0;
|
|
|
|
|
+ src1_p += swapped ? 0 : nc;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ggml_vec_reglu_f16(nc, (ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p);
|
|
|
|
|
+
|
|
|
|
|
+#ifndef NDEBUG
|
|
|
|
|
+ for (int k = 0; k < nc; k++) {
|
|
|
|
|
+ const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
|
|
|
+ const float v = GGML_FP16_TO_FP32(x);
|
|
|
|
|
+ GGML_UNUSED(v);
|
|
|
|
|
+ assert(!isnan(v));
|
|
|
|
|
+ assert(!isinf(v));
|
|
|
|
|
+ }
|
|
|
|
|
+#endif
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void ggml_compute_forward_reglu(
|
|
|
|
|
+ const ggml_compute_params * params,
|
|
|
|
|
+ ggml_tensor * dst) {
|
|
|
|
|
+
|
|
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
|
|
+
|
|
|
|
|
+ switch (src0->type) {
|
|
|
|
|
+ case GGML_TYPE_F32:
|
|
|
|
|
+ {
|
|
|
|
|
+ ggml_compute_forward_reglu_f32(params, dst);
|
|
|
|
|
+ } break;
|
|
|
|
|
+ case GGML_TYPE_F16:
|
|
|
|
|
+ {
|
|
|
|
|
+ ggml_compute_forward_reglu_f16(params, dst);
|
|
|
|
|
+ } break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ {
|
|
|
|
|
+ GGML_ABORT("fatal error");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// ggml_compute_forward_geglu
|
|
|
|
|
+
|
|
|
|
|
+static void ggml_compute_forward_geglu_f32(
|
|
|
|
|
+ const ggml_compute_params * params,
|
|
|
|
|
+ ggml_tensor * dst) {
|
|
|
|
|
+
|
|
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
|
|
+ const ggml_tensor * src1 = dst->src[1];
|
|
|
|
|
+ char * src0_d = (char *) src0->data;
|
|
|
|
|
+ char * src1_d = (char *) (src1 ? src1->data : src0->data);
|
|
|
|
|
+ const size_t src0_o = src0->nb[1];
|
|
|
|
|
+ const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1];
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src0));
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(dst));
|
|
|
|
|
+
|
|
|
|
|
+ if (src1) {
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src1));
|
|
|
|
|
+ GGML_ASSERT(src0->type == src1->type);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const int ith = params->ith;
|
|
|
|
|
+ const int nth = params->nth;
|
|
|
|
|
+
|
|
|
|
|
+ const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2;
|
|
|
|
|
+ const int nr = ggml_nrows(src0);
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(dst->ne[0] == nc);
|
|
|
|
|
+ GGML_ASSERT(ggml_nrows(dst) == nr);
|
|
|
|
|
+
|
|
|
|
|
+ const int32_t swapped = ggml_get_op_params_i32(dst, 1);
|
|
|
|
|
+
|
|
|
|
|
+ // rows per thread
|
|
|
|
|
+ const int dr = (nr + nth - 1)/nth;
|
|
|
|
|
+
|
|
|
|
|
+ // row range for this thread
|
|
|
|
|
+ const int ir0 = dr*ith;
|
|
|
|
|
+ const int ir1 = MIN(ir0 + dr, nr);
|
|
|
|
|
+
|
|
|
|
|
+ for (int i1 = ir0; i1 < ir1; i1++) {
|
|
|
|
|
+ float * src0_p = (float *) (src0_d + i1*src0_o);
|
|
|
|
|
+ float * src1_p = (float *) (src1_d + i1*src1_o);
|
|
|
|
|
+
|
|
|
|
|
+ if (!src1) {
|
|
|
|
|
+ src0_p += swapped ? nc : 0;
|
|
|
|
|
+ src1_p += swapped ? 0 : nc;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ggml_vec_geglu_f32(nc, (float *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p);
|
|
|
|
|
+
|
|
|
|
|
+#ifndef NDEBUG
|
|
|
|
|
+ for (int k = 0; k < nc; k++) {
|
|
|
|
|
+ const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
|
|
|
+ GGML_UNUSED(x);
|
|
|
|
|
+ assert(!isnan(x));
|
|
|
|
|
+ assert(!isinf(x));
|
|
|
|
|
+ }
|
|
|
|
|
+#endif
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void ggml_compute_forward_geglu_f16(
|
|
|
|
|
+ const ggml_compute_params * params,
|
|
|
|
|
+ ggml_tensor * dst) {
|
|
|
|
|
+
|
|
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
|
|
+ const ggml_tensor * src1 = dst->src[1];
|
|
|
|
|
+ char * src0_d = (char *) src0->data;
|
|
|
|
|
+ char * src1_d = (char *) (src1 ? src1->data : src0->data);
|
|
|
|
|
+ const size_t src0_o = src0->nb[1];
|
|
|
|
|
+ const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1];
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src0));
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(dst));
|
|
|
|
|
+
|
|
|
|
|
+ if (src1) {
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src1));
|
|
|
|
|
+ GGML_ASSERT(src0->type == src1->type);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const int ith = params->ith;
|
|
|
|
|
+ const int nth = params->nth;
|
|
|
|
|
+
|
|
|
|
|
+ const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2;
|
|
|
|
|
+ const int nr = ggml_nrows(src0);
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(dst->ne[0] == nc);
|
|
|
|
|
+ GGML_ASSERT(ggml_nrows(dst) == nr);
|
|
|
|
|
+
|
|
|
|
|
+ const int32_t swapped = ggml_get_op_params_i32(dst, 1);
|
|
|
|
|
+
|
|
|
|
|
+ // rows per thread
|
|
|
|
|
+ const int dr = (nr + nth - 1)/nth;
|
|
|
|
|
+
|
|
|
|
|
+ // row range for this thread
|
|
|
|
|
+ const int ir0 = dr*ith;
|
|
|
|
|
+ const int ir1 = MIN(ir0 + dr, nr);
|
|
|
|
|
+
|
|
|
|
|
+ for (int i1 = ir0; i1 < ir1; i1++) {
|
|
|
|
|
+ ggml_fp16_t * src0_p = (ggml_fp16_t *) (src0_d + i1*src0_o);
|
|
|
|
|
+ ggml_fp16_t * src1_p = (ggml_fp16_t *) (src1_d + i1*src1_o);
|
|
|
|
|
+
|
|
|
|
|
+ if (!src1) {
|
|
|
|
|
+ src0_p += swapped ? nc : 0;
|
|
|
|
|
+ src1_p += swapped ? 0 : nc;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ggml_vec_geglu_f16(nc, (ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p);
|
|
|
|
|
+
|
|
|
|
|
+#ifndef NDEBUG
|
|
|
|
|
+ for (int k = 0; k < nc; k++) {
|
|
|
|
|
+ const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
|
|
|
+ const float v = GGML_FP16_TO_FP32(x);
|
|
|
|
|
+ GGML_UNUSED(v);
|
|
|
|
|
+ assert(!isnan(v));
|
|
|
|
|
+ assert(!isinf(v));
|
|
|
|
|
+ }
|
|
|
|
|
+#endif
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void ggml_compute_forward_geglu(
|
|
|
|
|
+ const ggml_compute_params * params,
|
|
|
|
|
+ ggml_tensor * dst) {
|
|
|
|
|
+
|
|
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
|
|
+
|
|
|
|
|
+ switch (src0->type) {
|
|
|
|
|
+ case GGML_TYPE_F32:
|
|
|
|
|
+ {
|
|
|
|
|
+ ggml_compute_forward_geglu_f32(params, dst);
|
|
|
|
|
+ } break;
|
|
|
|
|
+ case GGML_TYPE_F16:
|
|
|
|
|
+ {
|
|
|
|
|
+ ggml_compute_forward_geglu_f16(params, dst);
|
|
|
|
|
+ } break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ {
|
|
|
|
|
+ GGML_ABORT("fatal error");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// ggml_compute_forward_swiglu
|
|
|
|
|
+
|
|
|
|
|
+static void ggml_compute_forward_swiglu_f32(
|
|
|
|
|
+ const ggml_compute_params * params,
|
|
|
|
|
+ ggml_tensor * dst) {
|
|
|
|
|
+
|
|
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
|
|
+ const ggml_tensor * src1 = dst->src[1];
|
|
|
|
|
+ char * src0_d = (char *) src0->data;
|
|
|
|
|
+ char * src1_d = (char *) (src1 ? src1->data : src0->data);
|
|
|
|
|
+ const size_t src0_o = src0->nb[1];
|
|
|
|
|
+ const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1];
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src0));
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(dst));
|
|
|
|
|
+
|
|
|
|
|
+ if (src1) {
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src1));
|
|
|
|
|
+ GGML_ASSERT(src0->type == src1->type);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const int ith = params->ith;
|
|
|
|
|
+ const int nth = params->nth;
|
|
|
|
|
+
|
|
|
|
|
+ const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2;
|
|
|
|
|
+ const int nr = ggml_nrows(src0);
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(dst->ne[0] == nc);
|
|
|
|
|
+ GGML_ASSERT(ggml_nrows(dst) == nr);
|
|
|
|
|
+
|
|
|
|
|
+ const int32_t swapped = ggml_get_op_params_i32(dst, 1);
|
|
|
|
|
+
|
|
|
|
|
+ // rows per thread
|
|
|
|
|
+ const int dr = (nr + nth - 1)/nth;
|
|
|
|
|
+
|
|
|
|
|
+ // row range for this thread
|
|
|
|
|
+ const int ir0 = dr*ith;
|
|
|
|
|
+ const int ir1 = MIN(ir0 + dr, nr);
|
|
|
|
|
+
|
|
|
|
|
+ for (int i1 = ir0; i1 < ir1; i1++) {
|
|
|
|
|
+ float * src0_p = (float *) (src0_d + i1*src0_o);
|
|
|
|
|
+ float * src1_p = (float *) (src1_d + i1*src1_o);
|
|
|
|
|
+
|
|
|
|
|
+ if (!src1) {
|
|
|
|
|
+ src0_p += swapped ? nc : 0;
|
|
|
|
|
+ src1_p += swapped ? 0 : nc;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ggml_vec_swiglu_f32(nc, (float *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p);
|
|
|
|
|
+
|
|
|
|
|
+#ifndef NDEBUG
|
|
|
|
|
+ for (int k = 0; k < nc; k++) {
|
|
|
|
|
+ const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
|
|
|
+ GGML_UNUSED(x);
|
|
|
|
|
+ assert(!isnan(x));
|
|
|
|
|
+ assert(!isinf(x));
|
|
|
|
|
+ }
|
|
|
|
|
+#endif
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void ggml_compute_forward_swiglu_f16(
|
|
|
|
|
+ const ggml_compute_params * params,
|
|
|
|
|
+ ggml_tensor * dst) {
|
|
|
|
|
+
|
|
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
|
|
+ const ggml_tensor * src1 = dst->src[1];
|
|
|
|
|
+ char * src0_d = (char *) src0->data;
|
|
|
|
|
+ char * src1_d = (char *) (src1 ? src1->data : src0->data);
|
|
|
|
|
+ const size_t src0_o = src0->nb[1];
|
|
|
|
|
+ const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1];
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src0));
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(dst));
|
|
|
|
|
+
|
|
|
|
|
+ if (src1) {
|
|
|
|
|
+ GGML_ASSERT(ggml_is_contiguous_1(src1));
|
|
|
|
|
+ GGML_ASSERT(src0->type == src1->type);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const int ith = params->ith;
|
|
|
|
|
+ const int nth = params->nth;
|
|
|
|
|
+
|
|
|
|
|
+ const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2;
|
|
|
|
|
+ const int nr = ggml_nrows(src0);
|
|
|
|
|
+
|
|
|
|
|
+ GGML_ASSERT(dst->ne[0] == nc);
|
|
|
|
|
+ GGML_ASSERT(ggml_nrows(dst) == nr);
|
|
|
|
|
+
|
|
|
|
|
+ const int32_t swapped = ggml_get_op_params_i32(dst, 1);
|
|
|
|
|
+
|
|
|
|
|
+ // rows per thread
|
|
|
|
|
+ const int dr = (nr + nth - 1)/nth;
|
|
|
|
|
+
|
|
|
|
|
+ // row range for this thread
|
|
|
|
|
+ const int ir0 = dr*ith;
|
|
|
|
|
+ const int ir1 = MIN(ir0 + dr, nr);
|
|
|
|
|
+
|
|
|
|
|
+ for (int i1 = ir0; i1 < ir1; i1++) {
|
|
|
|
|
+ ggml_fp16_t * src0_p = (ggml_fp16_t *) (src0_d + i1*src0_o);
|
|
|
|
|
+ ggml_fp16_t * src1_p = (ggml_fp16_t *) (src1_d + i1*src1_o);
|
|
|
|
|
+
|
|
|
|
|
+ if (!src1) {
|
|
|
|
|
+ src0_p += swapped ? nc : 0;
|
|
|
|
|
+ src1_p += swapped ? 0 : nc;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ggml_vec_swiglu_f16(nc, (ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p);
|
|
|
|
|
+
|
|
|
|
|
+#ifndef NDEBUG
|
|
|
|
|
+ for (int k = 0; k < nc; k++) {
|
|
|
|
|
+ const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
|
|
|
|
+ const float v = GGML_FP16_TO_FP32(x);
|
|
|
|
|
+ GGML_UNUSED(v);
|
|
|
|
|
+ assert(!isnan(v));
|
|
|
|
|
+ assert(!isinf(v));
|
|
|
|
|
+ }
|
|
|
|
|
+#endif
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static void ggml_compute_forward_swiglu(
|
|
|
|
|
+ const ggml_compute_params * params,
|
|
|
|
|
+ ggml_tensor * dst) {
|
|
|
|
|
+
|
|
|
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
|
|
|
+
|
|
|
|
|
+ switch (src0->type) {
|
|
|
|
|
+ case GGML_TYPE_F32:
|
|
|
|
|
+ {
|
|
|
|
|
+ ggml_compute_forward_swiglu_f32(params, dst);
|
|
|
|
|
+ } break;
|
|
|
|
|
+ case GGML_TYPE_F16:
|
|
|
|
|
+ {
|
|
|
|
|
+ ggml_compute_forward_swiglu_f16(params, dst);
|
|
|
|
|
+ } break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ {
|
|
|
|
|
+ GGML_ABORT("fatal error");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
// ggml_compute_forward_norm
|
|
// ggml_compute_forward_norm
|
|
|
|
|
|
|
|
static void ggml_compute_forward_norm_f32(
|
|
static void ggml_compute_forward_norm_f32(
|
|
@@ -8052,6 +8481,34 @@ void ggml_compute_forward_unary(
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+//ggml_compute_forward_glu
|
|
|
|
|
+
|
|
|
|
|
+void ggml_compute_forward_glu(
|
|
|
|
|
+ const ggml_compute_params * params,
|
|
|
|
|
+ ggml_tensor * dst) {
|
|
|
|
|
+
|
|
|
|
|
+ const ggml_glu_op op = ggml_get_glu_op(dst);
|
|
|
|
|
+
|
|
|
|
|
+ switch (op) {
|
|
|
|
|
+ case GGML_GLU_OP_REGLU:
|
|
|
|
|
+ {
|
|
|
|
|
+ ggml_compute_forward_reglu(params, dst);
|
|
|
|
|
+ } break;
|
|
|
|
|
+ case GGML_GLU_OP_GEGLU:
|
|
|
|
|
+ {
|
|
|
|
|
+ ggml_compute_forward_geglu(params, dst);
|
|
|
|
|
+ } break;
|
|
|
|
|
+ case GGML_GLU_OP_SWIGLU:
|
|
|
|
|
+ {
|
|
|
|
|
+ ggml_compute_forward_swiglu(params, dst);
|
|
|
|
|
+ } break;
|
|
|
|
|
+ default:
|
|
|
|
|
+ {
|
|
|
|
|
+ GGML_ABORT("fatal error");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
// ggml_compute_forward_get_rel_pos
|
|
// ggml_compute_forward_get_rel_pos
|
|
|
|
|
|
|
|
static void ggml_compute_forward_get_rel_pos_f16(
|
|
static void ggml_compute_forward_get_rel_pos_f16(
|