|
|
@@ -39,6 +39,7 @@
|
|
|
#include <string_view>
|
|
|
#include <thread>
|
|
|
#include <vector>
|
|
|
+#include <unordered_map>
|
|
|
|
|
|
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
|
|
|
size_t nels = ggml_nelements(tensor);
|
|
|
@@ -269,6 +270,34 @@ static double nmse(const float * a, const float * b, size_t n) {
|
|
|
return mse_a_b / mse_a_0;
|
|
|
}
|
|
|
|
|
|
+// difference between 2 integer sets (Jaccard distance, 0 - no difference, 1 - no overlap)
|
|
|
+static double jdst(const int32_t * a, const int32_t * b, size_t n) {
|
|
|
+ std::unordered_map<int32_t, size_t> set_a;
|
|
|
+ std::unordered_map<int32_t, size_t> set_b;
|
|
|
+
|
|
|
+ for (size_t i = 0; i < n; ++i) {
|
|
|
+ set_a[a[i]]++;
|
|
|
+ set_b[b[i]]++;
|
|
|
+ }
|
|
|
+
|
|
|
+ size_t diff = 0;
|
|
|
+
|
|
|
+ for (const auto & p : set_a) {
|
|
|
+ const int64_t na = p.second;
|
|
|
+ const int64_t nb = set_b.find(p.first) != set_b.end() ? set_b.at(p.first) : 0;
|
|
|
+
|
|
|
+ diff += std::abs(na - nb);
|
|
|
+ }
|
|
|
+
|
|
|
+ for (const auto & p : set_b) {
|
|
|
+ if (set_a.find(p.first) == set_a.end()) {
|
|
|
+ diff += p.second;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return (double) diff / (2*n);
|
|
|
+}
|
|
|
+
|
|
|
// maximum absolute asymmetry between a and b
|
|
|
// asymmetry: (a - b) / (a + b)
|
|
|
// This is more stable than relative error if one of the values fluctuates towards zero.
|
|
|
@@ -1051,6 +1080,14 @@ struct test_case {
|
|
|
return 1e-4;
|
|
|
}
|
|
|
|
|
|
+ virtual double max_err() {
|
|
|
+ return max_nmse_err();
|
|
|
+ }
|
|
|
+
|
|
|
+ virtual double err(const float * a, const float * b, size_t n) {
|
|
|
+ return nmse(a, b, n);
|
|
|
+ }
|
|
|
+
|
|
|
virtual float grad_eps() {
|
|
|
return 1e-1f;
|
|
|
}
|
|
|
@@ -1257,16 +1294,16 @@ struct test_case {
|
|
|
// compare
|
|
|
struct callback_userdata {
|
|
|
bool ok;
|
|
|
- double max_err;
|
|
|
+ test_case * tc;
|
|
|
ggml_backend_t backend1;
|
|
|
ggml_backend_t backend2;
|
|
|
};
|
|
|
|
|
|
callback_userdata ud {
|
|
|
true,
|
|
|
- max_nmse_err(),
|
|
|
+ this,
|
|
|
backend1,
|
|
|
- backend2
|
|
|
+ backend2,
|
|
|
};
|
|
|
|
|
|
auto callback = [](int index, ggml_tensor * t1, ggml_tensor * t2, void * user_data) -> bool {
|
|
|
@@ -1314,9 +1351,9 @@ struct test_case {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- double err = nmse(f1.data(), f2.data(), f1.size());
|
|
|
- if (err > ud->max_err) {
|
|
|
- printf("[%s] NMSE = %.9f > %.9f ", ggml_op_desc(t1), err, ud->max_err);
|
|
|
+ double err = ud->tc->err(f1.data(), f2.data(), f1.size());
|
|
|
+ if (err > ud->tc->max_err()) {
|
|
|
+ printf("[%s] ERR = %.9f > %.9f ", ggml_op_desc(t1), err, ud->tc->max_err());
|
|
|
//for (int i = 0; i < (int) f1.size(); i++) {
|
|
|
// printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
|
|
|
//}
|
|
|
@@ -4943,7 +4980,71 @@ struct test_argsort : public test_case {
|
|
|
}
|
|
|
};
|
|
|
|
|
|
-struct test_topk_moe: public test_case {
|
|
|
+// GGML_OP_TOP_K
|
|
|
+struct test_top_k : public test_case {
|
|
|
+ const ggml_type type;
|
|
|
+ const std::array<int64_t, 4> ne;
|
|
|
+ const int k;
|
|
|
+
|
|
|
+ std::string vars() override {
|
|
|
+ return VARS_TO_STR3(type, ne, k);
|
|
|
+ }
|
|
|
+
|
|
|
+ test_top_k(ggml_type type = GGML_TYPE_F32,
|
|
|
+ std::array<int64_t, 4> ne = {16, 10, 10, 10},
|
|
|
+ int k = 4)
|
|
|
+ : type(type), ne(ne), k(k) {}
|
|
|
+
|
|
|
+ double max_err() override {
|
|
|
+ return 0.0;
|
|
|
+ }
|
|
|
+
|
|
|
+ double err(const float * a, const float * b, size_t n) override {
|
|
|
+ std::vector<int32_t> ia(n);
|
|
|
+ std::vector<int32_t> ib(n);
|
|
|
+
|
|
|
+ double diff = 0.0f;
|
|
|
+
|
|
|
+ for (size_t i = 0; i < n; i++) {
|
|
|
+ ia[i] = (int32_t) a[i];
|
|
|
+ ib[i] = (int32_t) b[i];
|
|
|
+
|
|
|
+ // penalize the result if the data is not integer valued
|
|
|
+ diff += std::fabs(a[i] - ia[i]);
|
|
|
+ diff += std::fabs(b[i] - ib[i]);
|
|
|
+ }
|
|
|
+
|
|
|
+ return diff + jdst(ia.data(), ib.data(), n);
|
|
|
+ }
|
|
|
+
|
|
|
+ ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
|
+ ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
|
+ ggml_set_name(a, "a");
|
|
|
+
|
|
|
+ ggml_tensor * out = ggml_top_k(ctx, a, k);
|
|
|
+ ggml_set_name(out, "out");
|
|
|
+
|
|
|
+ return out;
|
|
|
+ }
|
|
|
+
|
|
|
+ void initialize_tensors(ggml_context * ctx) override {
|
|
|
+ std::random_device rd;
|
|
|
+ std::default_random_engine rng(rd());
|
|
|
+ for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
|
+ // initialize with unique values to avoid ties
|
|
|
+ for (int64_t r = 0; r < ggml_nrows(t); r++) {
|
|
|
+ std::vector<float> data(t->ne[0]);
|
|
|
+ for (int i = 0; i < t->ne[0]; i++) {
|
|
|
+ data[i] = i;
|
|
|
+ }
|
|
|
+ std::shuffle(data.begin(), data.end(), rng);
|
|
|
+ ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+};
|
|
|
+
|
|
|
+struct test_topk_moe : public test_case {
|
|
|
const std::array<int64_t, 4> ne;
|
|
|
const int n_expert_used;
|
|
|
const bool with_norm;
|
|
|
@@ -4976,7 +5077,7 @@ struct test_topk_moe: public test_case {
|
|
|
|
|
|
ggml_tensor * logits = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne.data());
|
|
|
ggml_tensor * probs = delayed_softmax ? logits : ggml_soft_max(ctx, logits);
|
|
|
- ggml_tensor * selected_experts = ggml_top_k(ctx, probs, n_expert_used); // [n_expert_used, n_tokens]
|
|
|
+ ggml_tensor * selected_experts = ggml_argsort_top_k(ctx, probs, n_expert_used); // [n_expert_used, n_tokens]
|
|
|
|
|
|
ggml_tensor * out = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
|
|
|
|
|
|
@@ -7534,6 +7635,23 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {2, 8, 8192, 1}, order)); // bailingmoe2 (group selection)
|
|
|
}
|
|
|
|
|
|
+ for (int k : {1, 2, 3, 7, 15}) {
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {16, 10, 10, 10}, k));
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {60, 10, 10, 10}, k));
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {1023, 2, 1, 3}, k));
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {1024, 2, 1, 3}, k));
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {1025, 2, 1, 3}, k));
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {16384, 1, 1, 1}, k));
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {2047, 2, 1, 3}, k));
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {2048, 2, 1, 3}, k));
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {2049, 2, 1, 3}, k));
|
|
|
+ }
|
|
|
+
|
|
|
+ // exhaustive top_k tests
|
|
|
+ //for (int i = 1; i < 9999; ++i) {
|
|
|
+ // test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {i, 2, 1, 3}, rand() % i + 1));
|
|
|
+ //}
|
|
|
+
|
|
|
for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC}) {
|
|
|
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode));
|
|
|
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true));
|
|
|
@@ -7914,6 +8032,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|
|
}
|
|
|
|
|
|
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {65000, 16, 1, 1}));
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {65000, 16, 1, 1}, 40));
|
|
|
|
|
|
return test_cases;
|
|
|
}
|