|
|
@@ -39,9 +39,6 @@
|
|
|
#include <unistd.h>
|
|
|
#endif
|
|
|
|
|
|
-#if defined(__ARM_FEATURE_SVE)
|
|
|
-int ggml_sve_cnt_b = 0;
|
|
|
-#endif
|
|
|
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
|
|
|
#undef GGML_USE_LLAMAFILE
|
|
|
#endif
|
|
|
@@ -455,6 +452,15 @@ static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
|
|
|
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
|
|
float ggml_table_f32_f16[1 << 16];
|
|
|
|
|
|
+#if defined(__ARM_ARCH)
|
|
|
+struct ggml_arm_arch_features_type {
|
|
|
+ int has_neon;
|
|
|
+ int has_i8mm;
|
|
|
+ int has_sve;
|
|
|
+ int sve_cnt;
|
|
|
+} ggml_arm_arch_features = {-1, -1, -1, 0};
|
|
|
+#endif
|
|
|
+
|
|
|
GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
|
|
|
switch (status) {
|
|
|
case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
|
|
|
@@ -3673,6 +3679,66 @@ static inline int ggml_up(int n, int m) {
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
+#if defined(__ARM_ARCH)
|
|
|
+
|
|
|
+#if defined(__linux__) && defined(__aarch64__)
|
|
|
+#include <sys/auxv.h>
|
|
|
+#elif defined(__APPLE__)
|
|
|
+#include <sys/sysctl.h>
|
|
|
+#endif
|
|
|
+
|
|
|
+static void ggml_init_arm_arch_features(void) {
|
|
|
+#if defined(__linux__) && defined(__aarch64__)
|
|
|
+ uint32_t hwcap = getauxval(AT_HWCAP);
|
|
|
+ uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
|
|
+
|
|
|
+ ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
|
|
|
+ ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
|
|
+ ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
|
|
|
+
|
|
|
+#if defined(__ARM_FEATURE_SVE)
|
|
|
+ ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
|
|
+#endif
|
|
|
+#elif defined(__APPLE__)
|
|
|
+ int oldp = 0;
|
|
|
+ size_t size = sizeof(oldp);
|
|
|
+ if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) {
|
|
|
+ oldp = 0;
|
|
|
+ }
|
|
|
+ ggml_arm_arch_features.has_neon = oldp;
|
|
|
+
|
|
|
+ if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) {
|
|
|
+ oldp = 0;
|
|
|
+ }
|
|
|
+ ggml_arm_arch_features.has_i8mm = oldp;
|
|
|
+
|
|
|
+ ggml_arm_arch_features.has_sve = 0;
|
|
|
+ ggml_arm_arch_features.sve_cnt = 0;
|
|
|
+#else
|
|
|
+// Run-time CPU feature detection not implemented for this platform, fallback to compile time
|
|
|
+#if defined(__ARM_NEON)
|
|
|
+ ggml_arm_arch_features.has_neon = 1;
|
|
|
+#else
|
|
|
+ ggml_arm_arch_features.has_neon = 0;
|
|
|
+#endif
|
|
|
+
|
|
|
+#if defined(__ARM_FEATURE_MATMUL_INT8)
|
|
|
+ ggml_arm_arch_features.has_i8mm = 1;
|
|
|
+#else
|
|
|
+ ggml_arm_arch_features.has_i8mm = 0;
|
|
|
+#endif
|
|
|
+
|
|
|
+#if defined(__ARM_FEATURE_SVE)
|
|
|
+ ggml_arm_arch_features.has_sve = 1;
|
|
|
+ ggml_arm_arch_features.sve_cnt = 16;
|
|
|
+#else
|
|
|
+ ggml_arm_arch_features.has_sve = 0;
|
|
|
+ ggml_arm_arch_features.sve_cnt = 0;
|
|
|
+#endif
|
|
|
+#endif
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
|
// make this function thread safe
|
|
|
ggml_critical_section_start();
|
|
|
@@ -3723,6 +3789,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
|
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
|
|
}
|
|
|
|
|
|
+#if defined(__ARM_ARCH)
|
|
|
+ ggml_init_arm_arch_features();
|
|
|
+#endif
|
|
|
+
|
|
|
is_first_call = false;
|
|
|
}
|
|
|
|
|
|
@@ -3771,12 +3841,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
|
|
|
|
GGML_ASSERT_ALIGNED(ctx->mem_buffer);
|
|
|
|
|
|
-#if defined(__ARM_FEATURE_SVE)
|
|
|
- if (!ggml_sve_cnt_b) {
|
|
|
- ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
|
|
- }
|
|
|
-#endif
|
|
|
-
|
|
|
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
|
|
|
|
|
|
ggml_critical_section_end();
|
|
|
@@ -23578,16 +23642,16 @@ int ggml_cpu_has_fma(void) {
|
|
|
}
|
|
|
|
|
|
int ggml_cpu_has_neon(void) {
|
|
|
-#if defined(__ARM_NEON)
|
|
|
- return 1;
|
|
|
+#if defined(__ARM_ARCH)
|
|
|
+ return ggml_arm_arch_features.has_neon;
|
|
|
#else
|
|
|
return 0;
|
|
|
#endif
|
|
|
}
|
|
|
|
|
|
int ggml_cpu_has_sve(void) {
|
|
|
-#if defined(__ARM_FEATURE_SVE)
|
|
|
- return 1;
|
|
|
+#if defined(__ARM_ARCH)
|
|
|
+ return ggml_arm_arch_features.has_sve;
|
|
|
#else
|
|
|
return 0;
|
|
|
#endif
|
|
|
@@ -23734,11 +23798,18 @@ int ggml_cpu_has_vsx(void) {
|
|
|
}
|
|
|
|
|
|
int ggml_cpu_has_matmul_int8(void) {
|
|
|
-#if defined(__ARM_FEATURE_MATMUL_INT8)
|
|
|
- return 1;
|
|
|
+#if defined(__ARM_ARCH)
|
|
|
+ return ggml_arm_arch_features.has_i8mm;
|
|
|
#else
|
|
|
return 0;
|
|
|
#endif
|
|
|
}
|
|
|
|
|
|
+int ggml_cpu_get_sve_cnt(void) {
|
|
|
+#if defined(__ARM_ARCH)
|
|
|
+ return ggml_arm_arch_features.sve_cnt;
|
|
|
+#else
|
|
|
+ return 0;
|
|
|
+#endif
|
|
|
+}
|
|
|
////////////////////////////////////////////////////////////////////////////////
|