|
|
@@ -1446,14 +1446,14 @@ struct test_case {
|
|
|
const uint64_t target_flops_cpu = 8ULL * GFLOP;
|
|
|
const uint64_t target_flops_gpu = 100ULL * GFLOP;
|
|
|
uint64_t target_flops = is_cpu ? target_flops_cpu : target_flops_gpu;
|
|
|
- n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_flops / op_flops(out)) + 1;
|
|
|
+ n_runs = (int)std::min<int64_t>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_flops / op_flops(out)) + 1;
|
|
|
} else {
|
|
|
// based on memory size
|
|
|
const size_t GB = 1ULL << 30;
|
|
|
const size_t target_size_cpu = 8 * GB;
|
|
|
const size_t target_size_gpu = 32 * GB;
|
|
|
size_t target_size = is_cpu ? target_size_cpu : target_size_gpu;
|
|
|
- n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_size / op_size(out)) + 1;
|
|
|
+ n_runs = (int)std::min<int64_t>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_size / op_size(out)) + 1;
|
|
|
}
|
|
|
|
|
|
// duplicate the op
|
|
|
@@ -8043,7 +8043,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|
|
}
|
|
|
|
|
|
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {65000, 16, 1, 1}));
|
|
|
- for (auto k : {1, 10, 40}) {
|
|
|
+
|
|
|
+ test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {2, 1, 1, 1}, 1));
|
|
|
+ for (auto k : {1, 10, 40, 400}) {
|
|
|
for (auto nrows : {1, 16}) {
|
|
|
for (auto cols : {k, 1000, 65000, 200000}) {
|
|
|
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {cols, nrows, 1, 1}, k));
|