|
@@ -175,6 +175,39 @@ typedef double ggml_float;
|
|
|
#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
|
|
#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
|
|
|
#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
|
|
#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
|
|
|
|
|
|
|
|
|
|
+#elif defined(__POWER9_VECTOR__)
|
|
|
|
|
+
|
|
|
|
|
+#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
|
|
|
|
+#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
|
|
|
|
+/* the inline asm below is about 12% faster than the lookup method */
|
|
|
|
|
+#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
|
|
|
|
|
+#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
|
|
|
|
+
|
|
|
|
|
+static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
|
|
|
|
+ register float f;
|
|
|
|
|
+ register double d;
|
|
|
|
|
+ __asm__(
|
|
|
|
|
+ "mtfprd %0,%2\n"
|
|
|
|
|
+ "xscvhpdp %0,%0\n"
|
|
|
|
|
+ "frsp %1,%0\n" :
|
|
|
|
|
+ /* temp */ "=d"(d),
|
|
|
|
|
+ /* out */ "=f"(f):
|
|
|
|
|
+ /* in */ "r"(h));
|
|
|
|
|
+ return f;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
|
|
|
|
+ register double d;
|
|
|
|
|
+ register ggml_fp16_t r;
|
|
|
|
|
+ __asm__( /* xscvdphp can work on double or single precision */
|
|
|
|
|
+ "xscvdphp %0,%2\n"
|
|
|
|
|
+ "mffprd %1,%0\n" :
|
|
|
|
|
+ /* temp */ "=d"(d),
|
|
|
|
|
+ /* out */ "=r"(r):
|
|
|
|
|
+ /* in */ "f"(f));
|
|
|
|
|
+ return r;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
#else
|
|
#else
|
|
|
|
|
|
|
|
// FP16 <-> FP32
|
|
// FP16 <-> FP32
|
|
@@ -272,6 +305,7 @@ static float table_f32_f16[1 << 16];
|
|
|
|
|
|
|
|
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
|
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
|
|
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
|
|
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
|
|
|
|
|
+// This is also true for POWER9.
|
|
|
#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
|
|
#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
|
|
|
|
|
|
|
|
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
|
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
|
|
@@ -462,7 +496,7 @@ static void quantize_row_q4_0_reference(const float * restrict x, void * restric
|
|
|
void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
|
|
void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
|
|
|
assert(k % QK == 0);
|
|
assert(k % QK == 0);
|
|
|
|
|
|
|
|
-#if __ARM_NEON || defined(__AVX2__) || defined(__wasm_simd128__)
|
|
|
|
|
|
|
+#if __ARM_NEON || defined(__AVX2__) || defined(__wasm_simd128__) || defined(__POWER9_VECTOR__)
|
|
|
const int nb = k / QK;
|
|
const int nb = k / QK;
|
|
|
const size_t bs = sizeof(float) + QK/2;
|
|
const size_t bs = sizeof(float) + QK/2;
|
|
|
|
|
|
|
@@ -472,7 +506,52 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
|
|
|
uint8_t pp[QK/2];
|
|
uint8_t pp[QK/2];
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
-#if __ARM_NEON
|
|
|
|
|
|
|
+#if defined(__POWER9_VECTOR__)
|
|
|
|
|
+#if QK == 32
|
|
|
|
|
+ const vector float v85 = vec_splats(8.5f);
|
|
|
|
|
+ for (int i = 0; i < nb; i++) {
|
|
|
|
|
+ float amax = 0.0f; // absolute max
|
|
|
|
|
+
|
|
|
|
|
+ vector float srcv [8];
|
|
|
|
|
+ vector float asrcv[8];
|
|
|
|
|
+ vector float amaxv[8];
|
|
|
|
|
+
|
|
|
|
|
+ for (int l = 0; l < 8; l++) srcv[l] = *(vector float *)(x + i*32 + 4*l);
|
|
|
|
|
+ for (int l = 0; l < 8; l++) asrcv[l] = vec_abs(srcv[l]);
|
|
|
|
|
+
|
|
|
|
|
+ for (int l = 0; l < 4; l++) amaxv[2*l] = vec_max(asrcv[2*l], asrcv[2*l+1]);
|
|
|
|
|
+ //for (int l = 0; l < 2; l++) amaxv[4*l] = vec_max(amaxv[4*l], amaxv[4*l+2]);
|
|
|
|
|
+ amaxv[0] = vec_max(amaxv[0], amaxv[2]);
|
|
|
|
|
+ amaxv[4] = vec_max(amaxv[4], amaxv[6]);
|
|
|
|
|
+ //for (int l = 0; l < 1; l++) amaxv[8*l] = vec_max(amaxv[8*l], amaxv[8*l+4]);
|
|
|
|
|
+ amaxv[0] = vec_max(amaxv[0], amaxv[4]);
|
|
|
|
|
+
|
|
|
|
|
+ amax = MAX(
|
|
|
|
|
+ MAX(vec_extract(amaxv[0], 0), vec_extract(amaxv[0], 1)),
|
|
|
|
|
+ MAX(vec_extract(amaxv[0], 2), vec_extract(amaxv[0], 3)));
|
|
|
|
|
+
|
|
|
|
|
+ const float d = amax / ((1 << 3) - 1);
|
|
|
|
|
+ const float id = d ? 1.0/d : 0.0;
|
|
|
|
|
+
|
|
|
|
|
+ *(float *)pd = d;
|
|
|
|
|
+ pd += bs;
|
|
|
|
|
+
|
|
|
|
|
+ const vector float vid = vec_splats(id);
|
|
|
|
|
+ for (int l = 0; l < 8; l++) {
|
|
|
|
|
+ const vector float vf = vec_madd(srcv[l], vid, v85);
|
|
|
|
|
+ const vector signed int vi = vec_signed(vf);
|
|
|
|
|
+
|
|
|
|
|
+ pb[2*l + 0] = vec_extract(vi, 0) | (vec_extract(vi, 1) << 4);
|
|
|
|
|
+ pb[2*l + 1] = vec_extract(vi, 2) | (vec_extract(vi, 3) << 4);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ //memcpy(pb, pp, sizeof(pp));
|
|
|
|
|
+ pb += bs;
|
|
|
|
|
+ }
|
|
|
|
|
+#else
|
|
|
|
|
+#error "not implemented for QK"
|
|
|
|
|
+#endif
|
|
|
|
|
+#elif __ARM_NEON
|
|
|
#if QK == 32
|
|
#if QK == 32
|
|
|
for (int i = 0; i < nb; i++) {
|
|
for (int i = 0; i < nb; i++) {
|
|
|
float amax = 0.0f; // absolute max
|
|
float amax = 0.0f; // absolute max
|