浏览代码

ggml : fix GGML_F32_VEC_FMA argument order in ggml_vec_mad1_f32 (#16307)

* fix GGML_F32_VEC_FMA argument order in ggml_vec_mad1_f32

* add test that fails on simd
Sigbjørn Skjæret 3 月之前
父节点
当前提交
b887d2f341
共有 2 个文件被更改,包括 2 次插入1 次删除
  1. 1 1
      ggml/src/ggml-cpu/vec.h
  2. 1 0
      tests/test-backend-ops.cpp

+ 1 - 1
ggml/src/ggml-cpu/vec.h

@@ -610,7 +610,7 @@ inline static void ggml_vec_mad1_f32(const int n, float * y, const float * x, co
         for (int i = 0; i < np; i += GGML_F32_STEP) {
             for (int j = 0; j < GGML_F32_ARR; j++) {
                 ay[j] = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
-                ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb);
+                ay[j] = GGML_F32_VEC_FMA(vb, ay[j], vs);
 
                 GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]);
             }

+ 1 - 0
tests/test-backend-ops.cpp

@@ -6155,6 +6155,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
     test_cases.emplace_back(new test_add1());
     test_cases.emplace_back(new test_scale());
     test_cases.emplace_back(new test_scale(GGML_TYPE_F32, {10, 10, 10, 10}, 2.0f, 1.0f));
+    test_cases.emplace_back(new test_scale(GGML_TYPE_F32, {100, 10, 10, 10}, 2.0f, 1.0f));
     test_cases.emplace_back(new test_softcap(GGML_TYPE_F32, {10, 10, 10, 10}, 50.0f));
     test_cases.emplace_back(new test_silu_back());