Răsfoiți Sursa

ggml : refactor forward_dup for cpu backend (#16062)

* ggml : refactor forward_dup for cpu backend

* clean up a bit

* add quant/dequant perf test
Xuan-Son Nguyen 4 luni în urmă
părinte
comite
0dd58b6877
3 a modificat fișierele cu 76 adăugiri și 848 ștergeri
  1. 14 0
      ggml/src/ggml-cpu/common.h
  2. 57 845
      ggml/src/ggml-cpu/ops.cpp
  3. 5 3
      tests/test-backend-ops.cpp

+ 14 - 0
ggml/src/ggml-cpu/common.h

@@ -28,6 +28,14 @@ static inline float bf16_to_f32(ggml_bf16_t x) {
     return GGML_BF16_TO_FP32(x);
 }
 
+static inline float i32_to_f32(int32_t x) {
+    return x;
+}
+
+static inline int32_t f32_to_i32(float x) {
+    return x;
+}
+
 static inline float f32_to_f32(float x) {
     return x;
 }
@@ -54,6 +62,12 @@ struct type_conversion_table<ggml_bf16_t> {
     static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
 };
 
+template <>
+struct type_conversion_table<int32_t> {
+    static constexpr float (*to_f32)(int32_t) = i32_to_f32;
+    static constexpr int32_t (*from_f32)(float) = f32_to_i32;
+};
+
 static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
     const int64_t ith = params->ith;
     const int64_t nth = params->nth;

Fișier diff suprimat deoarece este prea mare
+ 57 - 845
ggml/src/ggml-cpu/ops.cpp


+ 5 - 3
tests/test-backend-ops.cpp

@@ -6629,9 +6629,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
     test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1,   1, 1, 1}));
     test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 512, 1, 1}));
 
-    test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F16, {512, 3072, 1, 1}));
-    test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {8192, 512, 2, 1}, {0, 2, 1, 3}));
-    test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {3072, 512, 2, 1}, {0, 2, 1, 3}));
+    test_cases.emplace_back(new test_cpy(GGML_TYPE_F32,  GGML_TYPE_F16,  {512, 3072, 1, 1}));
+    test_cases.emplace_back(new test_cpy(GGML_TYPE_F32,  GGML_TYPE_F32,  {8192, 512, 2, 1}, {0, 2, 1, 3}));
+    test_cases.emplace_back(new test_cpy(GGML_TYPE_F32,  GGML_TYPE_F32,  {3072, 512, 2, 1}, {0, 2, 1, 3}));
+    test_cases.emplace_back(new test_cpy(GGML_TYPE_F32,  GGML_TYPE_Q4_0, {8192, 512, 2, 1}));
+    test_cases.emplace_back(new test_cpy(GGML_TYPE_Q4_0, GGML_TYPE_F32,  {8192, 512, 2, 1}));
 
     test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
     test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {12888, 256, 5, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));

Unele fișiere nu au fost afișate deoarece prea multe fișiere au fost modificate în acest diff