|
|
@@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
|
|
|
}
|
|
|
};
|
|
|
|
|
|
-// instance for Q4
|
|
|
-static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
|
|
|
-static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
|
|
|
-static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
|
|
|
-static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
|
|
|
-
|
|
|
-// instance for IQ4
|
|
|
-static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
|
|
|
-
|
|
|
} // namespace ggml::cpu::repack
|
|
|
|
|
|
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
|
|
|
+
|
|
|
+ // instance for Q4
|
|
|
+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
|
|
|
+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
|
|
|
+ static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
|
|
|
+ static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
|
|
|
+
|
|
|
+ // instance for IQ4
|
|
|
+ static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
|
|
|
+
|
|
|
if (cur->type == GGML_TYPE_Q4_0) {
|
|
|
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
|
|
|
if (cur->ne[1] % 8 == 0) {
|
|
|
- return &ggml::cpu::repack::q4_0_8x8_q8_0;
|
|
|
+ return &q4_0_8x8_q8_0;
|
|
|
}
|
|
|
}
|
|
|
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
|
|
|
if (cur->ne[1] % 4 == 0) {
|
|
|
- return &ggml::cpu::repack::q4_0_4x8_q8_0;
|
|
|
+ return &q4_0_4x8_q8_0;
|
|
|
}
|
|
|
}
|
|
|
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
|
|
if (cur->ne[1] % 4 == 0) {
|
|
|
- return &ggml::cpu::repack::q4_0_4x4_q8_0;
|
|
|
+ return &q4_0_4x4_q8_0;
|
|
|
}
|
|
|
}
|
|
|
} else if (cur->type == GGML_TYPE_Q4_K) {
|
|
|
if (ggml_cpu_has_avx2()) {
|
|
|
if (cur->ne[1] % 8 == 0) {
|
|
|
- return &ggml::cpu::repack::q4_K_8x8_q8_K;
|
|
|
+ return &q4_K_8x8_q8_K;
|
|
|
}
|
|
|
}
|
|
|
} else if (cur->type == GGML_TYPE_IQ4_NL) {
|
|
|
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
|
|
|
if (cur->ne[1] % 4 == 0) {
|
|
|
- return &ggml::cpu::repack::iq4_nl_4x4_q8_0;
|
|
|
+ return &iq4_nl_4x4_q8_0;
|
|
|
}
|
|
|
}
|
|
|
}
|