1 yıl önce · 6a0f779484
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -2507,6 +2507,9 @@ extern "C" {
 
				     GGML_API int ggml_cpu_has_cann       (void);
			
 
				     GGML_API int ggml_cpu_has_llamafile  (void);
			
 
				 
			
 
				+    // get the sve vector length in bytes
			
 
				+    GGML_API int ggml_cpu_get_sve_cnt(void);
			
 
				+
			
 
				     //
			
 
				     // Internal types and functions exposed for tests and benchmarks
			
 
				     //
			
--- a/ggml/src/ggml-aarch64.c
+++ b/ggml/src/ggml-aarch64.c
@@ -598,15 +598,6 @@ size_t quantize_q4_0_8x8(const float * restrict src, void * restrict dst, int64_
 
				     return quantize_q4_0_nr_bl(src, dst, nrow, n_per_row, 8, 8);
			
 
				 }
			
 
				 
			
 
				-// Return the number of byte lanes in the SVE vector if SVE is supported; otherwise, returns 0 if SVE is not supported.
			
 
				-static int sve_lane_count(void) {
			
 
				-#if defined(__ARM_FEATURE_SVE)
			
 
				-    return ggml_sve_cnt_b;
			
 
				-#else
			
 
				-    return 0;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				 void ggml_gemv_q4_0_4x4_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, const void * restrict vy, int nr, int nc) {
			
 
				     const int qk = QK8_0;
			
 
				     const int nb = n / qk;
			
@@ -843,7 +834,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
 
				 
			
 
				 #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
			
 
				 #if defined(__ARM_FEATURE_SVE)
			
 
				-    if (ggml_cpu_has_sve() && sve_lane_count() == QK8_0) {
			
 
				+    if (ggml_cpu_has_sve() && ggml_cpu_get_sve_cnt() == QK8_0) {
			
 
				         const void * b_ptr = vx;
			
 
				         const void * a_ptr = vy;
			
 
				         float * res_ptr = s;
			
@@ -2020,7 +2011,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
 
				 
			
 
				 #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
			
 
				 #if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
			
 
				-    if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && sve_lane_count() == QK8_0) {
			
 
				+    if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0) {
			
 
				         const void * b_ptr = vx;
			
 
				         const void * a_ptr = vy;
			
 
				         float * res_ptr = s;
			
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -4013,7 +4013,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
 
				     svfloat32_t sumv0 = svdup_n_f32(0.0f);
			
 
				     svfloat32_t sumv1 = svdup_n_f32(0.0f);
			
 
				 
			
 
				-    const int vector_length = ggml_sve_cnt_b*8;
			
 
				+    const int vector_length = ggml_cpu_get_sve_cnt()*8;
			
 
				 
			
 
				     // VLA Implementation using switch case
			
 
				     switch (vector_length) {
			
@@ -5597,7 +5597,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
 
				     svfloat32_t sumv0 = svdup_n_f32(0.0f);
			
 
				     svfloat32_t sumv1 = svdup_n_f32(0.0f);
			
 
				 
			
 
				-    const int vector_length = ggml_sve_cnt_b*8;
			
 
				+    const int vector_length = ggml_cpu_get_sve_cnt()*8;
			
 
				 
			
 
				     //VLA Implemenation for SVE
			
 
				     switch (vector_length) {
			
--- a/ggml/src/ggml-quants.h
+++ b/ggml/src/ggml-quants.h
@@ -142,10 +142,6 @@ void iq2xs_free_impl(enum ggml_type type);
 
				 void iq3xs_init_impl(int grid_size);
			
 
				 void iq3xs_free_impl(int grid_size);
			
 
				 
			
 
				-#if defined(__ARM_FEATURE_SVE)
			
 
				-extern int ggml_sve_cnt_b;
			
 
				-#endif
			
 
				-
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -39,9 +39,6 @@
 
				 #include <unistd.h>
			
 
				 #endif
			
 
				 
			
 
				-#if defined(__ARM_FEATURE_SVE)
			
 
				-int ggml_sve_cnt_b = 0;
			
 
				-#endif
			
 
				 #if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
			
 
				 #undef GGML_USE_LLAMAFILE
			
 
				 #endif
			
@@ -455,6 +452,15 @@ static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
 
				 // precomputed f32 table for f16 (256 KB) (ggml-impl.h)
			
 
				 float ggml_table_f32_f16[1 << 16];
			
 
				 
			
 
				+#if defined(__ARM_ARCH)
			
 
				+struct ggml_arm_arch_features_type {
			
 
				+    int has_neon;
			
 
				+    int has_i8mm;
			
 
				+    int has_sve;
			
 
				+    int sve_cnt;
			
 
				+} ggml_arm_arch_features = {-1, -1, -1, 0};
			
 
				+#endif
			
 
				+
			
 
				 GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
			
 
				     switch (status) {
			
 
				         case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
			
@@ -3673,6 +3679,66 @@ static inline int ggml_up(int n, int m) {
 
				 
			
 
				 ////////////////////////////////////////////////////////////////////////////////
			
 
				 
			
 
				+#if defined(__ARM_ARCH)
			
 
				+
			
 
				+#if defined(__linux__) && defined(__aarch64__)
			
 
				+#include <sys/auxv.h>
			
 
				+#elif defined(__APPLE__)
			
 
				+#include <sys/sysctl.h>
			
 
				+#endif
			
 
				+
			
 
				+static void ggml_init_arm_arch_features(void) {
			
 
				+#if defined(__linux__) && defined(__aarch64__)
			
 
				+    uint32_t hwcap = getauxval(AT_HWCAP);
			
 
				+    uint32_t hwcap2 = getauxval(AT_HWCAP2);
			
 
				+
			
 
				+    ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
			
 
				+    ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
			
 
				+    ggml_arm_arch_features.has_sve  = !!(hwcap & HWCAP_SVE);
			
 
				+
			
 
				+#if defined(__ARM_FEATURE_SVE)
			
 
				+    ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
			
 
				+#endif
			
 
				+#elif defined(__APPLE__)
			
 
				+    int oldp = 0;
			
 
				+    size_t size = sizeof(oldp);
			
 
				+    if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
			
 
				+        oldp = 0;
			
 
				+    }
			
 
				+    ggml_arm_arch_features.has_neon = oldp;
			
 
				+
			
 
				+    if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
			
 
				+        oldp = 0;
			
 
				+    }
			
 
				+    ggml_arm_arch_features.has_i8mm = oldp;
			
 
				+
			
 
				+    ggml_arm_arch_features.has_sve = 0;
			
 
				+    ggml_arm_arch_features.sve_cnt = 0;
			
 
				+#else
			
 
				+// Run-time CPU feature detection not implemented for this platform, fallback to compile time
			
 
				+#if defined(__ARM_NEON)
			
 
				+    ggml_arm_arch_features.has_neon = 1;
			
 
				+#else
			
 
				+    ggml_arm_arch_features.has_neon = 0;
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__ARM_FEATURE_MATMUL_INT8)
			
 
				+    ggml_arm_arch_features.has_i8mm = 1;
			
 
				+#else
			
 
				+    ggml_arm_arch_features.has_i8mm = 0;
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__ARM_FEATURE_SVE)
			
 
				+    ggml_arm_arch_features.has_sve = 1;
			
 
				+    ggml_arm_arch_features.sve_cnt = 16;
			
 
				+#else
			
 
				+    ggml_arm_arch_features.has_sve = 0;
			
 
				+    ggml_arm_arch_features.sve_cnt = 0;
			
 
				+#endif
			
 
				+#endif
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 struct ggml_context * ggml_init(struct ggml_init_params params) {
			
 
				     // make this function thread safe
			
 
				     ggml_critical_section_start();
			
@@ -3723,6 +3789,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
 
				             GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
			
 
				         }
			
 
				 
			
 
				+#if defined(__ARM_ARCH)
			
 
				+        ggml_init_arm_arch_features();
			
 
				+#endif
			
 
				+
			
 
				         is_first_call = false;
			
 
				     }
			
 
				 
			
@@ -3771,12 +3841,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
 
				 
			
 
				     GGML_ASSERT_ALIGNED(ctx->mem_buffer);
			
 
				 
			
 
				-#if defined(__ARM_FEATURE_SVE)
			
 
				-    if (!ggml_sve_cnt_b) {
			
 
				-        ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
			
 
				-    }
			
 
				-#endif
			
 
				-
			
 
				     GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
			
 
				 
			
 
				     ggml_critical_section_end();
			
@@ -23578,16 +23642,16 @@ int ggml_cpu_has_fma(void) {
 
				 }
			
 
				 
			
 
				 int ggml_cpu_has_neon(void) {
			
 
				-#if defined(__ARM_NEON)
			
 
				-    return 1;
			
 
				+#if defined(__ARM_ARCH)
			
 
				+    return ggml_arm_arch_features.has_neon;
			
 
				 #else
			
 
				     return 0;
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				 int ggml_cpu_has_sve(void) {
			
 
				-#if defined(__ARM_FEATURE_SVE)
			
 
				-    return 1;
			
 
				+#if defined(__ARM_ARCH)
			
 
				+    return ggml_arm_arch_features.has_sve;
			
 
				 #else
			
 
				     return 0;
			
 
				 #endif
			
@@ -23734,11 +23798,18 @@ int ggml_cpu_has_vsx(void) {
 
				 }
			
 
				 
			
 
				 int ggml_cpu_has_matmul_int8(void) {
			
 
				-#if defined(__ARM_FEATURE_MATMUL_INT8)
			
 
				-    return 1;
			
 
				+#if defined(__ARM_ARCH)
			
 
				+    return ggml_arm_arch_features.has_i8mm;
			
 
				 #else
			
 
				     return 0;
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+int ggml_cpu_get_sve_cnt(void) {
			
 
				+#if defined(__ARM_ARCH)
			
 
				+    return ggml_arm_arch_features.sve_cnt;
			
 
				+#else
			
 
				+    return 0;
			
 
				+#endif
			
 
				+}
			
 
				 ////////////////////////////////////////////////////////////////////////////////