test-opt.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904
  1. #include "ggml.h"
  2. #include "ggml-alloc.h"
  3. #include "ggml-backend.h"
  4. #include "ggml-cpu.h"
  5. #include "ggml-opt.h"
  6. #include <cmath>
  7. #include <cinttypes>
  8. #include <random>
  9. #include <string>
  10. #include <thread>
  11. #include <vector>
  12. static bool almost_equal(const double a, const double b, const double atol) {
  13. return fabs(a - b) < atol;
  14. }
  15. constexpr int64_t ne_datapoint = 2;
  16. constexpr int64_t ne_label = 1;
  17. constexpr int64_t ndata = 6;
  18. struct helper_ctx_data {
  19. std::vector<ggml_opt_dataset_t> datasets_supervised;
  20. std::vector<struct ggml_tensor *> data_batch;
  21. std::vector<struct ggml_tensor *> labels_batch;
  22. ggml_opt_dataset_t dataset_unsupervised;
  23. struct ggml_context * ctx_static;
  24. struct ggml_context * ctx_compute;
  25. struct ggml_opt_params opt_params;
  26. ggml_opt_context_t opt_ctx;
  27. struct ggml_tensor * inputs;
  28. struct ggml_tensor * weights;
  29. struct ggml_tensor * outputs;
  30. ggml_backend_buffer_t buf;
  31. ggml_opt_result_t result;
  32. ggml_opt_result_t result2;
  33. };
  34. // These default values make it easier to check optimization results vs. expected values.
  35. static ggml_opt_optimizer_params helper_get_test_opt_pars(void * userdata) {
  36. ggml_opt_optimizer_params result = ggml_opt_get_default_optimizer_params(userdata);
  37. result.adamw.alpha = 1.0f;
  38. result.adamw.beta1 = 0.0f;
  39. result.adamw.beta2 = 0.0f;
  40. result.adamw.eps = 0.0f;
  41. return result;
  42. }
  43. static helper_ctx_data helper_get_ctx_data(
  44. ggml_backend_sched_t backend_sched,
  45. ggml_backend_t backend,
  46. const bool init_opt_ctx = true,
  47. const bool optimizer_defaults = true,
  48. int64_t nbatch_logical = 1,
  49. int64_t nbatch_physical = 1,
  50. enum ggml_opt_loss_type loss_type = GGML_OPT_LOSS_TYPE_SUM) {
  51. std::vector<ggml_opt_dataset_t> datasets(ndata);
  52. for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
  53. ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
  54. GGML_TYPE_F32, GGML_TYPE_F32, ne_datapoint, ne_label, ndata, ndata_shard);
  55. float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
  56. float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
  57. for (int64_t idata = 0; idata < ndata; ++idata) {
  58. for (int64_t id = 0; id < ne_datapoint; ++id) {
  59. data[ idata*ne_datapoint + id] = 16*idata + id;
  60. }
  61. for (int64_t il = 0; il < ne_label; ++il) {
  62. labels[idata*ne_label + il] = 16*(16*idata + il);
  63. }
  64. }
  65. datasets[ndata_shard-1] = dataset;
  66. }
  67. ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(
  68. GGML_TYPE_F32, GGML_TYPE_F32, 1, 0, ndata, /*ndata_shard =*/ 1);
  69. float * data = ggml_get_data_f32(ggml_opt_dataset_data(dataset_unsupervised));
  70. for (int64_t idata = 0; idata < ndata; ++idata) {
  71. data[idata] = idata;
  72. }
  73. struct ggml_context * ctx_static;
  74. struct ggml_context * ctx_compute;
  75. {
  76. struct ggml_init_params params = {
  77. /*.mem_size =*/ (2*ndata + 2)*ggml_tensor_overhead(),
  78. /*.mem_buffer =*/ nullptr,
  79. /*.no_alloc =*/ true,
  80. };
  81. ctx_static = ggml_init(params);
  82. }
  83. {
  84. struct ggml_init_params params = {
  85. /*.mem_size =*/ GGML_DEFAULT_GRAPH_SIZE*ggml_tensor_overhead() + 3*ggml_graph_overhead(),
  86. /*.mem_buffer =*/ nullptr,
  87. /*.no_alloc =*/ true,
  88. };
  89. ctx_compute = ggml_init(params);
  90. }
  91. std::vector<struct ggml_tensor *> data_batch(ndata);
  92. std::vector<struct ggml_tensor *> labels_batch(ndata);
  93. for (int64_t ndata_batch = 1; ndata_batch <= ndata; ++ndata_batch) {
  94. data_batch[ndata_batch-1] = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, ndata_batch*ne_datapoint);
  95. labels_batch[ndata_batch-1] = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, ndata_batch*ne_label);
  96. }
  97. struct ggml_tensor * inputs = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, nbatch_physical);
  98. ggml_set_name(inputs, "inputs");
  99. struct ggml_tensor * weights = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
  100. ggml_set_name(weights, "weights");
  101. ggml_set_param(weights);
  102. struct ggml_tensor * intermediary = ggml_add(ctx_compute, inputs, weights);
  103. struct ggml_tensor * outputs = ggml_scale(ctx_compute, intermediary, 1.0f);
  104. ggml_set_name(outputs, "outputs");
  105. ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
  106. const float w0 = float(ndata)/2;
  107. ggml_backend_tensor_set(weights, &w0, 0, sizeof(float));
  108. GGML_ASSERT(nbatch_logical % nbatch_physical == 0);
  109. const int32_t opt_period = nbatch_logical / nbatch_physical;
  110. struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, loss_type);
  111. opt_params.ctx_compute = ctx_compute;
  112. opt_params.inputs = inputs;
  113. opt_params.outputs = outputs;
  114. opt_params.opt_period = opt_period;
  115. if (!optimizer_defaults) {
  116. opt_params.get_opt_pars = helper_get_test_opt_pars;
  117. }
  118. ggml_opt_context_t opt_ctx = init_opt_ctx ? ggml_opt_init(opt_params) : nullptr;
  119. ggml_opt_result_t result = ggml_opt_result_init();
  120. ggml_opt_result_t result2 = ggml_opt_result_init();
  121. return {datasets, data_batch, labels_batch, dataset_unsupervised, ctx_static, ctx_compute, opt_params, opt_ctx, inputs, weights, outputs, buf, result, result2};
  122. }
  123. static void helper_free_ctx_data(struct helper_ctx_data ctx_data) {
  124. ggml_opt_result_free(ctx_data.result);
  125. ggml_opt_result_free(ctx_data.result2);
  126. ggml_opt_free(ctx_data.opt_ctx);
  127. ggml_backend_buffer_free(ctx_data.buf);
  128. ggml_free(ctx_data.ctx_static);
  129. ggml_free(ctx_data.ctx_compute);
  130. for (ggml_opt_dataset_t dataset : ctx_data.datasets_supervised) {
  131. ggml_opt_dataset_free(dataset);
  132. }
  133. ggml_opt_dataset_free(ctx_data.dataset_unsupervised);
  134. }
  135. static void helper_after_test(
  136. const char * func, const bool high_level, const std::string options,
  137. const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
  138. printf(" %s(high_level=%s%s, subtest=%s): ",
  139. func, high_level ? "yes" : "no", options.c_str(), subtest.c_str());
  140. if (subtest_ok) {
  141. printf("\033[1;32mOK\033[0m\n");
  142. npass++;
  143. } else {
  144. printf("\033[1;31mFAIL\033[0m\n");
  145. }
  146. ntest++;
  147. }
  148. static std::pair<int, int> test_dataset(ggml_backend_sched_t backend_sched, ggml_backend_t backend, const bool shuffle) {
  149. int ntest = 0;
  150. int npass = 0;
  151. struct helper_ctx_data cd = helper_get_ctx_data(backend_sched, backend);
  152. for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
  153. ggml_opt_dataset_t dataset = cd.datasets_supervised[ndata_shard-1];
  154. if (shuffle) {
  155. ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
  156. }
  157. for (int64_t ndata_batch = 1; ndata_batch <= ndata; ++ndata_batch) {
  158. if (ndata_batch % ndata_shard != 0) {
  159. continue;
  160. }
  161. bool subtest_ok = true;
  162. struct ggml_tensor * data_batch = cd.data_batch[ndata_batch-1];
  163. struct ggml_tensor * labels_batch = cd.labels_batch[ndata_batch-1];
  164. std::vector<float> data(ggml_nelements( data_batch));
  165. std::vector<float> labels(ggml_nelements(labels_batch));
  166. std::vector<int64_t> idata_shuffled;
  167. const int64_t nbatches = ndata / ndata_batch;
  168. for (int64_t ibatch = 0; ibatch < nbatches; ++ibatch) {
  169. ggml_opt_dataset_get_batch(dataset, data_batch, labels_batch, ibatch);
  170. ggml_backend_tensor_get( data_batch, data.data(), 0, ggml_nbytes( data_batch));
  171. ggml_backend_tensor_get(labels_batch, labels.data(), 0, ggml_nbytes(labels_batch));
  172. for (int64_t idata_batch = 0; idata_batch < ndata_batch; ++idata_batch) {
  173. const int64_t idata = ibatch*ndata_batch + idata_batch;
  174. const int64_t idata_found = data[idata_batch*ne_datapoint] / 16;
  175. subtest_ok = subtest_ok && (shuffle || idata_found == idata);
  176. idata_shuffled.push_back(idata_found);
  177. for (int64_t id = 0; id < ne_datapoint; ++id) {
  178. if (data[ idata_batch*ne_datapoint + id] != 16*idata_found + id) {
  179. subtest_ok = false;
  180. }
  181. }
  182. for (int64_t il = 0; il < ne_label; ++il) {
  183. if (labels[idata_batch*ne_label + il] != 16*(16*idata_found + il)) {
  184. subtest_ok = false;
  185. }
  186. }
  187. }
  188. }
  189. if (!shuffle || ndata % ndata_batch == 0) {
  190. const int ndata_max = (ndata / ndata_batch) * ndata_batch;
  191. for (int64_t idata = 0; subtest_ok && idata < ndata_max; ++idata) {
  192. int ninstances = 0;
  193. for (int64_t id : idata_shuffled) {
  194. ninstances += id == idata;
  195. }
  196. if (ninstances != 1) {
  197. subtest_ok = false;
  198. }
  199. }
  200. }
  201. printf(" %s(shuffle=%s, ndata_shard=%" PRId64 ", ndata_batch=%" PRId64 "): ",
  202. __func__, shuffle ? "yes" : "no", ndata_shard, ndata_batch);
  203. if (subtest_ok) {
  204. printf("\033[1;32mOK\033[0m\n");
  205. npass++;
  206. } else {
  207. printf("\033[1;31mFAIL\033[0m\n");
  208. }
  209. ntest++;
  210. }
  211. }
  212. helper_free_ctx_data(cd);
  213. return std::make_pair(npass, ntest);
  214. }
  215. static std::pair<int, int> test_grad(ggml_backend_sched_t backend_sched, ggml_backend_t backend) {
  216. int ntest = 0;
  217. int npass = 0;
  218. struct helper_ctx_data cd = helper_get_ctx_data(backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false,
  219. /*nbatch_logical =*/ 999999, /*nbatch_physical =*/ 1);
  220. std::vector<float> grad_history(ndata);
  221. for (int64_t idata = 0; idata < ndata; ++idata) {
  222. grad_history[idata] = NAN;
  223. }
  224. for (int idata = 0; idata < ndata; ++idata) {
  225. const float idataf = idata;
  226. ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
  227. ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
  228. ggml_opt_eval(cd.opt_ctx, cd.result);
  229. ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, sizeof(float));
  230. }
  231. {
  232. bool subtest_ok = true;
  233. for (int idata = 0; idata < ndata; ++idata) {
  234. if (grad_history[idata] != idata + 1) {
  235. subtest_ok = false;
  236. }
  237. }
  238. printf(" %s(): ", __func__);
  239. if (subtest_ok) {
  240. printf("\033[1;32mOK\033[0m\n");
  241. npass++;
  242. } else {
  243. printf("\033[1;31mFAIL\033[0m\n");
  244. }
  245. ntest++;
  246. }
  247. helper_free_ctx_data(cd);
  248. return std::make_pair(npass, ntest);
  249. }
  250. static void helper_after_test_forward_backward(
  251. const char * func, const bool high_level, const bool shuffle,
  252. const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
  253. std::string options = ", shuffle=";
  254. options += shuffle ? "yes" : "no";
  255. helper_after_test(func, high_level, options, subtest, subtest_ok, ntest, npass);
  256. }
  257. static std::pair<int, int> test_forward_backward(
  258. ggml_backend_sched_t backend_sched, ggml_backend_t backend, const bool high_level, const bool shuffle) {
  259. int ntest = 0;
  260. int npass = 0;
  261. struct helper_ctx_data cd = helper_get_ctx_data(backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false);
  262. struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
  263. std::vector<float> loss_history(ndata);
  264. for (int64_t idata = 0; idata < ndata; ++idata) {
  265. loss_history[idata] = NAN;
  266. }
  267. {
  268. int64_t ndata;
  269. ggml_opt_result_ndata(cd.result, &ndata);
  270. double loss;
  271. double loss_unc;
  272. ggml_opt_result_loss(cd.result, &loss, &loss_unc);
  273. double accuracy;
  274. double accuracy_unc;
  275. ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
  276. const bool subtest_ok = ndata == 0 && loss == 0.0 && std::isnan(loss_unc) && std::isnan(accuracy) && std::isnan(accuracy_unc);
  277. helper_after_test_forward_backward(__func__, high_level, shuffle, "results_initial", subtest_ok, ntest, npass);
  278. }
  279. if (high_level) {
  280. ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
  281. if (shuffle) {
  282. ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
  283. }
  284. ggml_opt_epoch(cd.opt_ctx, dataset, nullptr, cd.result, 0, nullptr, nullptr);
  285. } else {
  286. for (int idata = 0; idata < ndata; ++idata) {
  287. const float idataf = idata;
  288. ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
  289. ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
  290. ggml_opt_eval(cd.opt_ctx, cd.result);
  291. ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
  292. }
  293. }
  294. {
  295. float weights;
  296. ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
  297. const bool subtest_ok = weights == ndata/2;
  298. helper_after_test_forward_backward(__func__, high_level, shuffle, "weights_after_forward", subtest_ok, ntest, npass);
  299. }
  300. {
  301. int64_t ndata;
  302. ggml_opt_result_ndata(cd.result, &ndata);
  303. bool subtest_ok = ndata == 6;
  304. double loss;
  305. double loss_unc;
  306. ggml_opt_result_loss(cd.result, &loss, &loss_unc);
  307. subtest_ok = subtest_ok && loss == 33.0 && almost_equal(loss_unc, sqrt(3.5), 1e-10);
  308. double accuracy;
  309. double accuracy_unc;
  310. ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
  311. subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
  312. helper_after_test_forward_backward(__func__, high_level, shuffle, "results_after_forward", subtest_ok, ntest, npass);
  313. }
  314. float w0;
  315. ggml_backend_tensor_get(cd.weights, &w0, 0, sizeof(float));
  316. for (int i = 0; i < 10; ++i) {
  317. ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
  318. ggml_opt_eval(cd.opt_ctx, cd.result);
  319. }
  320. ggml_backend_tensor_set(cd.weights, &w0, 0, sizeof(float));
  321. ggml_opt_reset(cd.opt_ctx, /*optimizer =*/ false);
  322. ggml_opt_result_reset(cd.result);
  323. for (int64_t idata = 0; idata < ndata; ++idata) {
  324. loss_history[idata] = NAN;
  325. }
  326. if (high_level) {
  327. ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
  328. if (shuffle) {
  329. ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
  330. }
  331. ggml_opt_epoch(cd.opt_ctx, dataset, cd.result, nullptr, ndata, nullptr, nullptr);
  332. } else {
  333. for (int idata = 0; idata < ndata; ++idata) {
  334. const float idataf = idata;
  335. ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
  336. ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
  337. ggml_opt_eval(cd.opt_ctx, cd.result);
  338. ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
  339. }
  340. }
  341. {
  342. float weights;
  343. ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
  344. const bool subtest_ok = weights == -ndata/2;
  345. helper_after_test_forward_backward(__func__, high_level, shuffle, "weights_after_forward_backward", subtest_ok, ntest, npass);
  346. }
  347. {
  348. int64_t ndata;
  349. ggml_opt_result_ndata(cd.result, &ndata);
  350. bool subtest_ok = ndata == 6;
  351. double loss;
  352. double loss_unc;
  353. ggml_opt_result_loss(cd.result, &loss, &loss_unc);
  354. subtest_ok = subtest_ok && loss == 18.0 && (shuffle || loss_unc == 0.0);
  355. double accuracy;
  356. double accuracy_unc;
  357. ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
  358. subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
  359. helper_after_test_forward_backward(__func__, high_level, shuffle, "result_after_forward_backward", subtest_ok, ntest, npass);
  360. }
  361. helper_free_ctx_data(cd);
  362. return std::make_pair(npass, ntest);
  363. }
  364. static std::pair<int, int> test_epoch_vs_fit(ggml_backend_sched_t backend_sched, ggml_backend_t backend) {
  365. int ntest = 0;
  366. int npass = 0;
  367. float weights_epoch;
  368. float weights_fit;
  369. {
  370. struct helper_ctx_data cd = helper_get_ctx_data(backend_sched, backend, /*init_opt_ctx =*/ true);
  371. ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
  372. ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
  373. ggml_opt_epoch(cd.opt_ctx, dataset, cd.result, nullptr, ndata, nullptr, nullptr);
  374. ggml_backend_tensor_get(cd.weights, &weights_epoch, 0, ggml_nbytes(cd.weights));
  375. helper_free_ctx_data(cd);
  376. }
  377. {
  378. struct helper_ctx_data cd = helper_get_ctx_data(backend_sched, backend, /*init_opt_ctx =*/ false);
  379. ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
  380. ggml_opt_fit(backend_sched, cd.ctx_compute, cd.inputs, cd.outputs, dataset,
  381. GGML_OPT_LOSS_TYPE_SUM, ggml_opt_get_default_optimizer_params, 1, 1, 0.0f, true);
  382. ggml_backend_tensor_get(cd.weights, &weights_fit, 0, ggml_nbytes(cd.weights));
  383. helper_free_ctx_data(cd);
  384. }
  385. const bool subtest_ok = weights_epoch == weights_fit;
  386. printf(" %s(): ", __func__);
  387. if (subtest_ok) {
  388. printf("\033[1;32mOK\033[0m\n");
  389. npass++;
  390. } else {
  391. printf("\033[1;31mFAIL\033[0m\n");
  392. }
  393. ntest++;
  394. return std::make_pair(npass, ntest);
  395. }
  396. static void helper_after_test_idata_split(
  397. const char * func, const bool high_level, const int epoch,
  398. const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
  399. std::string options = ", epoch=";
  400. options += std::to_string(epoch);
  401. helper_after_test(func, high_level, options, subtest, subtest_ok, ntest, npass);
  402. }
  403. static std::pair<int, int> test_idata_split(ggml_backend_sched_t backend_sched, ggml_backend_t backend, const bool high_level) {
  404. int ntest = 0;
  405. int npass = 0;
  406. struct helper_ctx_data cd = helper_get_ctx_data(backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false);
  407. struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
  408. const int idata_split = ndata * 2/3;
  409. std::vector<float> loss_history(ndata);
  410. for (int64_t idata = 0; idata < ndata; ++idata) {
  411. loss_history[idata] = NAN;
  412. }
  413. for (int epoch = 1; epoch <= 4; ++epoch) {
  414. if (high_level) {
  415. ggml_opt_epoch(cd.opt_ctx, cd.dataset_unsupervised, cd.result, cd.result2, idata_split, nullptr, nullptr);
  416. } else {
  417. int idata = 0;
  418. for (; idata < idata_split; ++idata) {
  419. const float idataf = idata;
  420. ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
  421. ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
  422. ggml_opt_eval(cd.opt_ctx, cd.result);
  423. ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
  424. }
  425. for (; idata < ndata; ++idata) {
  426. const float idataf = idata;
  427. ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
  428. ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
  429. ggml_opt_eval(cd.opt_ctx, cd.result2);
  430. ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
  431. }
  432. }
  433. {
  434. float weights;
  435. ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
  436. const bool subtest_ok = weights == ndata/2 - epoch*idata_split;
  437. helper_after_test_idata_split(__func__, high_level, epoch, "weights", subtest_ok, ntest, npass);
  438. }
  439. {
  440. int64_t ndata_result;
  441. ggml_opt_result_ndata(cd.result, &ndata_result);
  442. bool subtest_ok = ndata_result == idata_split;
  443. double loss;
  444. double loss_unc;
  445. ggml_opt_result_loss(cd.result, &loss, &loss_unc);
  446. subtest_ok = subtest_ok && loss == 28.0 - epoch*16.0 && loss_unc == 0.0;
  447. double accuracy;
  448. double accuracy_unc;
  449. ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
  450. subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
  451. helper_after_test_idata_split(__func__, high_level, epoch, "results_backward", subtest_ok, ntest, npass);
  452. }
  453. {
  454. int64_t ndata_result;
  455. ggml_opt_result_ndata(cd.result2, &ndata_result);
  456. bool subtest_ok = ndata_result == ndata - idata_split;
  457. double loss;
  458. double loss_unc;
  459. ggml_opt_result_loss(cd.result2, &loss, &loss_unc);
  460. subtest_ok = subtest_ok && loss == 15.0 - epoch*8 && almost_equal(loss_unc, sqrt(0.5), 1e-10);
  461. double accuracy;
  462. double accuracy_unc;
  463. ggml_opt_result_accuracy(cd.result2, &accuracy, &accuracy_unc);
  464. subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
  465. helper_after_test_idata_split(__func__, high_level, epoch, "results_forward", subtest_ok, ntest, npass);
  466. }
  467. ggml_opt_result_reset(cd.result);
  468. ggml_opt_result_reset(cd.result2);
  469. }
  470. helper_free_ctx_data(cd);
  471. return std::make_pair(npass, ntest);
  472. }
  473. static void helper_after_test_gradient_accumulation(
  474. const char * func, const int nbatch_physical, const enum ggml_opt_loss_type loss_type, const int epoch,
  475. const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
  476. std::string options = ", nbatch_physical=";
  477. options += std::to_string(nbatch_physical);
  478. options += ", loss_type=";
  479. options += loss_type == GGML_OPT_LOSS_TYPE_MEAN ? "mean" : "sum";
  480. options += ", epoch=";
  481. options += std::to_string(epoch);
  482. helper_after_test(func, false, options, subtest, subtest_ok, ntest, npass);
  483. }
  484. static std::pair<int, int> test_gradient_accumulation(
  485. ggml_backend_sched_t backend_sched, ggml_backend_t backend, const int32_t nbatch_physical, const enum ggml_opt_loss_type loss_type) {
  486. int ntest = 0;
  487. int npass = 0;
  488. struct helper_ctx_data cd = helper_get_ctx_data(
  489. backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false, /*nbatch_logical =*/ 6, nbatch_physical, loss_type);
  490. std::vector<float> grad_history(ndata);
  491. for (int64_t idata = 0; idata < ndata; ++idata) {
  492. grad_history[idata] = NAN;
  493. }
  494. for (int epoch = 1; epoch <= 4; ++epoch) {
  495. if (nbatch_physical == 1) {
  496. for (int idata = 0; idata < ndata; ++idata) {
  497. const float idataf = idata;
  498. ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
  499. ggml_backend_tensor_set(cd.inputs, &idataf, 0, 1*sizeof(float));
  500. ggml_opt_eval(cd.opt_ctx, cd.result);
  501. ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, 1*sizeof(float));
  502. }
  503. } else if (nbatch_physical == 2) {
  504. for (int idata = 0; idata < ndata; idata += 2) {
  505. const float idataf[2] = {float(idata + 0), float(idata + 1)};
  506. ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
  507. ggml_backend_tensor_set(cd.inputs, idataf, 0, 2*sizeof(float));
  508. ggml_opt_eval(cd.opt_ctx, cd.result);
  509. grad_history[idata + 0] = 0.0f;
  510. ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata + 1, 0, 1*sizeof(float));
  511. }
  512. } else {
  513. GGML_ASSERT(false);
  514. }
  515. {
  516. GGML_ASSERT(ndata == 6);
  517. constexpr double atol = 1e-6;
  518. bool subtest_ok = true;
  519. if (loss_type == GGML_OPT_LOSS_TYPE_SUM) {
  520. if (nbatch_physical == 1) {
  521. subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0, atol);
  522. subtest_ok = subtest_ok && almost_equal(grad_history[2], 3.0, atol);
  523. subtest_ok = subtest_ok && almost_equal(grad_history[4], 5.0, atol);
  524. } else {
  525. subtest_ok = subtest_ok && almost_equal(grad_history[0], 0.0, atol);
  526. subtest_ok = subtest_ok && almost_equal(grad_history[2], 0.0, atol);
  527. subtest_ok = subtest_ok && almost_equal(grad_history[4], 0.0, atol);
  528. }
  529. subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0, atol);
  530. subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0, atol);
  531. subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0, atol);
  532. } else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
  533. if (nbatch_physical == 1) {
  534. subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0/ndata, atol);
  535. subtest_ok = subtest_ok && almost_equal(grad_history[2], 3.0/ndata, atol);
  536. subtest_ok = subtest_ok && almost_equal(grad_history[4], 5.0/ndata, atol);
  537. } else {
  538. subtest_ok = subtest_ok && almost_equal(grad_history[0], 0.0/ndata, atol);
  539. subtest_ok = subtest_ok && almost_equal(grad_history[2], 0.0/ndata, atol);
  540. subtest_ok = subtest_ok && almost_equal(grad_history[4], 0.0/ndata, atol);
  541. }
  542. subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0/ndata, atol);
  543. subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0/ndata, atol);
  544. subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0/ndata, atol);
  545. } else {
  546. GGML_ASSERT(false);
  547. }
  548. helper_after_test_gradient_accumulation(__func__, nbatch_physical, loss_type, epoch, "grads", subtest_ok, ntest, npass);
  549. }
  550. {
  551. float weights;
  552. ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
  553. const bool subtest_ok = weights == (ndata/2) - epoch;
  554. helper_after_test_gradient_accumulation(__func__, nbatch_physical, loss_type, epoch, "weights", subtest_ok, ntest, npass);
  555. }
  556. {
  557. int64_t ndata_result;
  558. ggml_opt_result_ndata(cd.result, &ndata_result);
  559. bool subtest_ok = ndata_result == ndata/nbatch_physical;
  560. double loss;
  561. ggml_opt_result_loss(cd.result, &loss, /*loss_unc =*/ nullptr);
  562. if (loss_type == GGML_OPT_LOSS_TYPE_SUM) {
  563. subtest_ok = subtest_ok && loss == (39.0 - epoch*6.0);
  564. } else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
  565. subtest_ok = subtest_ok && almost_equal(loss, (39.0 - epoch*6.0) / ndata, 1e-6);
  566. } else {
  567. GGML_ASSERT(false);
  568. }
  569. double accuracy;
  570. double accuracy_unc;
  571. ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
  572. subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
  573. helper_after_test_gradient_accumulation(__func__, nbatch_physical, loss_type, epoch, "results", subtest_ok, ntest, npass);
  574. }
  575. ggml_opt_result_reset(cd.result);
  576. }
  577. helper_free_ctx_data(cd);
  578. return std::make_pair(npass, ntest);
  579. }
  580. static ggml_opt_optimizer_params helper_get_regression_opt_pars(void * userdata) {
  581. ggml_opt_optimizer_params result = ggml_opt_get_default_optimizer_params(userdata);
  582. result.adamw.alpha = 0.1f;
  583. return result;
  584. }
  585. static std::pair<int, int> test_regression(ggml_backend_sched_t backend_sched, ggml_backend_t backend) {
  586. int ntest = 0;
  587. int npass = 0;
  588. // Test for simple regression with f(x) = a*x + b
  589. constexpr int64_t ndata_regression = 201;
  590. constexpr float a_true = 1.2f;
  591. constexpr float b_true = 3.4f;
  592. std::mt19937 gen(12345);
  593. std::normal_distribution<float> nd{0.0f, 0.1f};
  594. ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
  595. GGML_TYPE_F32, GGML_TYPE_F32, 1, 1, ndata_regression, ndata_regression);
  596. float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
  597. float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
  598. constexpr float x_min = -100.0f;
  599. constexpr float x_max = 100.0f;
  600. for (int64_t idata = 0; idata < ndata_regression; ++idata) {
  601. const float x = x_min + (x_max - x_min) * idata/(ndata_regression-1);
  602. const float y = a_true*x + b_true + nd(gen);
  603. data[idata] = x;
  604. labels[idata] = y;
  605. }
  606. struct ggml_context * ctx_static;
  607. struct ggml_context * ctx_compute;
  608. {
  609. struct ggml_init_params params = {
  610. /*.mem_size =*/ 3*ggml_tensor_overhead(),
  611. /*.mem_buffer =*/ nullptr,
  612. /*.no_alloc =*/ true,
  613. };
  614. ctx_static = ggml_init(params);
  615. }
  616. {
  617. struct ggml_init_params params = {
  618. /*.mem_size =*/ GGML_DEFAULT_GRAPH_SIZE*ggml_tensor_overhead() + 3*ggml_graph_overhead(),
  619. /*.mem_buffer =*/ nullptr,
  620. /*.no_alloc =*/ true,
  621. };
  622. ctx_compute = ggml_init(params);
  623. }
  624. // The first dimension is the dimension of the datapoints, the second dimension is the number of datapoints.
  625. struct ggml_tensor * x = ggml_new_tensor_2d(ctx_static, GGML_TYPE_F32, 1, ndata_regression);
  626. ggml_set_name(x, "x");
  627. struct ggml_tensor * a = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
  628. ggml_set_name(a, "a");
  629. ggml_set_param(a);
  630. struct ggml_tensor * b = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
  631. ggml_set_name(b, "b");
  632. ggml_set_param(b);
  633. struct ggml_tensor * f = ggml_add(ctx_compute, ggml_mul(ctx_compute, x, a), b);
  634. ggml_set_name(f, "f");
  635. ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
  636. const float a0 = 1.0f;
  637. const float b0 = 3.0f;
  638. ggml_backend_tensor_set(a, &a0, 0, sizeof(float));
  639. ggml_backend_tensor_set(b, &b0, 0, sizeof(float));
  640. ggml_opt_fit(backend_sched, ctx_compute, x, f, dataset, GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR,
  641. helper_get_regression_opt_pars, 100, ndata_regression, 0.0f, true);
  642. {
  643. float a_fit;
  644. ggml_backend_tensor_get(a, &a_fit, 0, sizeof(float));
  645. float b_fit;
  646. ggml_backend_tensor_get(b, &b_fit, 0, sizeof(float));
  647. const bool subtest_ok = almost_equal(a_fit, a_true, 1e-2) && almost_equal(b_fit, b_true, 1e-2);
  648. printf(" %s(subtest=weights): ", __func__);
  649. if (subtest_ok) {
  650. printf("\033[1;32mOK\033[0m\n");
  651. npass++;
  652. } else {
  653. printf("\033[1;31mFAIL\033[0m\n");
  654. }
  655. ntest++;
  656. }
  657. ggml_backend_buffer_free(buf);
  658. ggml_free(ctx_static);
  659. ggml_opt_dataset_free(dataset);
  660. return std::make_pair(npass, ntest);
  661. }
  662. static std::pair<int, int> test_backend(ggml_backend_sched_t backend_sched, ggml_backend_t backend) {
  663. int npass = 0;
  664. int ntest = 0;
  665. for (bool shuffle : {false, true}) {
  666. std::pair<int, int> partial = test_dataset(backend_sched, backend, shuffle);
  667. npass += partial.first;
  668. ntest += partial.second;
  669. }
  670. {
  671. std::pair<int, int> partial = test_grad(backend_sched, backend);
  672. npass += partial.first;
  673. ntest += partial.second;
  674. }
  675. for (bool high_level : {false, true}){
  676. for (bool shuffle : {false, true}) {
  677. if (!high_level && shuffle) {
  678. continue;
  679. }
  680. std::pair<int, int> partial = test_forward_backward(backend_sched, backend, high_level, shuffle);
  681. npass += partial.first;
  682. ntest += partial.second;
  683. }
  684. }
  685. {
  686. std::pair<int, int> partial = test_epoch_vs_fit(backend_sched, backend);
  687. npass += partial.first;
  688. ntest += partial.second;
  689. }
  690. for (bool high_level : {false, true}){
  691. std::pair<int, int> partial = test_idata_split(backend_sched, backend, high_level);
  692. npass += partial.first;
  693. ntest += partial.second;
  694. }
  695. for (int32_t nbatch_physical : {2, 1}) {
  696. for (enum ggml_opt_loss_type loss_type : {GGML_OPT_LOSS_TYPE_SUM, GGML_OPT_LOSS_TYPE_MEAN}) {
  697. std::pair<int, int> partial = test_gradient_accumulation(backend_sched, backend, nbatch_physical, loss_type);
  698. npass += partial.first;
  699. ntest += partial.second;
  700. }
  701. }
  702. {
  703. std::pair<int, int> partial = test_regression(backend_sched, backend);
  704. npass += partial.first;
  705. ntest += partial.second;
  706. }
  707. return std::make_pair(npass, ntest);
  708. }
  709. int main(void) {
  710. const size_t dev_count = ggml_backend_dev_count();
  711. printf("Testing %zu devices\n\n", dev_count);
  712. size_t n_ok = 0;
  713. std::vector<ggml_backend_dev_t> devs;
  714. std::vector<ggml_backend_t> backends;
  715. for (size_t i = 0; i < dev_count; ++i) {
  716. devs.push_back(ggml_backend_dev_get(i));
  717. ggml_backend_t backend = ggml_backend_dev_init(devs[i], NULL);
  718. GGML_ASSERT(backend != NULL);
  719. if (ggml_backend_is_cpu(backend)) {
  720. ggml_backend_cpu_set_n_threads(backend, std::thread::hardware_concurrency() / 2);
  721. }
  722. backends.push_back(backend);
  723. }
  724. for (size_t i = 0; i < dev_count; ++i) {
  725. // Put the backend to be tested in front so that it's prioritized:
  726. std::vector<ggml_backend_t> backends_modded = {backends[i]};
  727. backends_modded.insert(backends_modded.end(), backends.begin(), backends.end());
  728. ggml_backend_sched_t backend_sched = ggml_backend_sched_new(
  729. backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
  730. printf("Backend %zu/%zu: %s\n", i + 1, dev_count, ggml_backend_dev_name(devs[i]));
  731. printf(" Device description: %s\n", ggml_backend_dev_description(devs[i]));
  732. size_t free, total; // NOLINT
  733. ggml_backend_dev_memory(devs[i], &free, &total);
  734. printf(" Device memory: %zu MB (%zu MB free)\n", total / 1024 / 1024, free / 1024 / 1024);
  735. printf("\n");
  736. std::pair<int, int> result = test_backend(backend_sched, backends[i]);
  737. printf(" %d/%d tests passed\n", result.first, result.second);
  738. printf(" Backend %s: ", ggml_backend_name(backends[i]));
  739. if (result.first == result.second) {
  740. printf("\033[1;32mOK\033[0m\n");
  741. n_ok++;
  742. } else {
  743. printf("\033[1;31mFAIL\033[0m\n");
  744. }
  745. printf("\n");
  746. ggml_backend_sched_free(backend_sched);
  747. }
  748. for (ggml_backend_t backend : backends) {
  749. ggml_backend_free(backend);
  750. }
  751. printf("%zu/%zu backends passed\n", n_ok, dev_count);
  752. if (n_ok != dev_count) {
  753. printf("\033[1;31mFAIL\033[0m\n");
  754. return 1;
  755. }
  756. printf("\033[1;32mOK\033[0m\n");
  757. return 0;
  758. }