1 ヶ月前 · 51604435e8
--- a/ggml/include/ggml-cpu.h
+++ b/ggml/include/ggml-cpu.h
@@ -99,6 +99,7 @@ extern "C" {
 
				     GGML_BACKEND_API int ggml_cpu_has_sme        (void);
			
 
				     // other
			
 
				     GGML_BACKEND_API int ggml_cpu_has_riscv_v    (void);
			
 
				+    GGML_BACKEND_API int ggml_cpu_get_rvv_vlen   (void);  // risc-v vector length in bytes
			
 
				     GGML_BACKEND_API int ggml_cpu_has_vsx        (void);
			
 
				     GGML_BACKEND_API int ggml_cpu_has_vxe        (void);
			
 
				     GGML_BACKEND_API int ggml_cpu_has_wasm_simd  (void);
			
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -81,6 +81,11 @@ struct ggml_arm_arch_features_type {
 
				 } ggml_arm_arch_features = { 0 };
			
 
				 #endif
			
 
				 
			
 
				+#if defined(__riscv)
			
 
				+struct ggml_riscv_arch_features_type {
			
 
				+    int rvv_vlen;
			
 
				+} ggml_riscv_arch_features = { 0 };
			
 
				+#endif
			
 
				 
			
 
				 #if defined(_WIN32)
			
 
				 
			
@@ -703,6 +708,15 @@ static void ggml_init_arm_arch_features(void) {}
 
				 #endif
			
 
				 #endif // __ARM_ARCH
			
 
				 
			
 
				+#if defined(__riscv) && defined(__riscv_v_intrinsic)
			
 
				+#include <riscv_vector.h>
			
 
				+static void ggml_init_riscv_arch_features(void) {
			
 
				+    ggml_riscv_arch_features.rvv_vlen = __riscv_vlenb();
			
 
				+}
			
 
				+#else
			
 
				+static void ggml_init_riscv_arch_features(void) {}
			
 
				+#endif
			
 
				+
			
 
				 struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
			
 
				     GGML_ASSERT(!ggml_get_no_alloc(ctx));
			
 
				 
			
@@ -3459,6 +3473,14 @@ int ggml_cpu_has_riscv_v(void) {
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+int ggml_cpu_get_rvv_vlen(void) {
			
 
				+#if defined(__riscv) && defined(__riscv_v_intrinsic)
			
 
				+    return ggml_riscv_arch_features.rvv_vlen;
			
 
				+#else
			
 
				+    return 0;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 int ggml_cpu_has_f16c(void) {
			
 
				 #if defined(__F16C__)
			
 
				     return 1;
			
@@ -3625,6 +3647,10 @@ void ggml_cpu_init(void) {
 
				         ggml_init_arm_arch_features();
			
 
				 #endif
			
 
				 
			
 
				+#if defined(__riscv)
			
 
				+        ggml_init_riscv_arch_features();
			
 
				+#endif
			
 
				+
			
 
				         is_first_call = false;
			
 
				     }
			
 
				 
			
--- a/ggml/src/ggml-cpu/ggml-cpu.cpp
+++ b/ggml/src/ggml-cpu/ggml-cpu.cpp
@@ -583,6 +583,10 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
 
				         if (ggml_cpu_has_riscv_v()) {
			
 
				             features.push_back({ "RISCV_V", "1" });
			
 
				         }
			
 
				+        if (ggml_cpu_get_rvv_vlen() > 0) {
			
 
				+            static std::string rvv_vlen = std::to_string(ggml_cpu_get_rvv_vlen());
			
 
				+            features.push_back({ "RVV_VLEN", rvv_vlen.c_str() });
			
 
				+        }
			
 
				         if (ggml_cpu_has_vsx()) {
			
 
				             features.push_back({ "VSX", "1" });
			
 
				         }
			
--- a/ggml/src/ggml-cpu/repack.cpp
+++ b/ggml/src/ggml-cpu/repack.cpp
@@ -2169,7 +2169,8 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
 
				     static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 8, 8, GGML_TYPE_Q8_0> iq4_nl_8x8_q8_0;
			
 
				 
			
 
				     if (cur->type == GGML_TYPE_Q4_0) {
			
 
				-        if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
			
 
				+        if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)
			
 
				+            || (ggml_cpu_has_riscv_v() && (ggml_cpu_get_rvv_vlen() >= QK4_0))) {
			
 
				             if (cur->ne[1] % 8 == 0) {
			
 
				                 return &q4_0_8x8_q8_0;
			
 
				             }