|
|
@@ -2347,11 +2347,12 @@ struct test_soft_max : public test_case {
|
|
|
const ggml_type type;
|
|
|
const std::array<int64_t, 4> ne;
|
|
|
const bool mask;
|
|
|
+ const ggml_type m_prec;
|
|
|
const float scale;
|
|
|
const float max_bias;
|
|
|
|
|
|
std::string vars() override {
|
|
|
- return VARS_TO_STR5(type, ne, mask, scale, max_bias);
|
|
|
+ return VARS_TO_STR6(type, ne, mask, m_prec, scale, max_bias);
|
|
|
}
|
|
|
|
|
|
// the 1024 test with bias occasionally fails:
|
|
|
@@ -2363,9 +2364,10 @@ struct test_soft_max : public test_case {
|
|
|
test_soft_max(ggml_type type = GGML_TYPE_F32,
|
|
|
std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
|
bool mask = false,
|
|
|
+ ggml_type m_prec = GGML_TYPE_F32,
|
|
|
float scale = 1.0f,
|
|
|
float max_bias = 0.0f)
|
|
|
- : type(type), ne(ne), mask(mask), scale(scale), max_bias(max_bias) {}
|
|
|
+ : type(type), ne(ne), mask(mask), m_prec(m_prec), scale(scale), max_bias(max_bias) {}
|
|
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
|
@@ -2374,7 +2376,7 @@ struct test_soft_max : public test_case {
|
|
|
|
|
|
ggml_tensor * mask = nullptr;
|
|
|
if (this->mask) {
|
|
|
- mask = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne[0], ne[1]);
|
|
|
+ mask = ggml_new_tensor_2d(ctx, m_prec, ne[0], ne[1]);
|
|
|
ggml_set_name(mask, "mask");
|
|
|
}
|
|
|
|
|
|
@@ -4150,17 +4152,28 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
for (float scale : {1.0f, 0.1f}) {
|
|
|
for (int64_t ne0 : {16, 1024}) {
|
|
|
for (int64_t ne1 : {16, 1024}) {
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, scale, max_bias));
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, scale, max_bias));
|
|
|
+ if (mask) {
|
|
|
+ for (ggml_type m_prec : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, m_prec, scale, max_bias));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, m_prec, scale, max_bias));
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ /* The precision of mask here doesn't matter as boolean mask is false */
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, GGML_TYPE_F32, scale, max_bias));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, GGML_TYPE_F32, scale, max_bias));
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, 0.1f, 0.0f));
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, false, 0.1f, 0.0f));
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f, 0.0f));
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, 0.1f, 8.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, false, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 8.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 8.0f));
|
|
|
|
|
|
for (float max_bias : {0.0f, 8.0f}) {
|
|
|
for (float scale : {1.0f, 0.1f}) {
|
|
|
@@ -4296,13 +4309,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {8192, 512, 2, 1}, {0, 2, 1, 3}));
|
|
|
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {3072, 512, 2, 1}, {0, 2, 1, 3}));
|
|
|
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, 1.0f, 0.0f));
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 4096, 5, 1}, false, 1.0f, 0.0f));
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {1024, 1024, 10, 1}, false, 1.0f, 0.0f));
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 1024, 10, 1}, false, 1.0f, 0.0f));
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {256, 256, 20, 1}, false, 1.0f, 0.0f));
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {64, 64, 20, 1}, false, 1.0f, 0.0f));
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 64, 20, 1}, false, 1.0f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 4096, 5, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {1024, 1024, 10, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 1024, 10, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {256, 256, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {64, 64, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 64, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
|
|
|
|
test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32, 10, 1, 1}));
|
|
|
test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {1024, 10, 1, 1}));
|