test-grad0.cpp 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684
  1. #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnings on Windows
  2. #include "ggml.h"
  3. #include "ggml-cpu.h"
  4. #include <cfloat>
  5. #include <cmath>
  6. #include <cstdint>
  7. #include <cstdio>
  8. #include <cstdlib>
  9. #include <cassert>
  10. #include <initializer_list>
  11. #include <vector>
  12. #if defined(_MSC_VER)
  13. #pragma warning(disable: 4244 4267) // possible loss of data
  14. #endif
  15. #if defined(__GNUC__)
  16. #pragma GCC diagnostic ignored "-Wdouble-promotion"
  17. #endif
  18. #define MAX_NARGS 3
  19. #undef MIN
  20. #undef MAX
  21. #define MIN(a, b) ((a) < (b) ? (a) : (b))
  22. #define MAX(a, b) ((a) > (b) ? (a) : (b))
  23. #define GGML_SILU_FP16
  24. //
  25. // logging
  26. //
  27. #if (GGML_DEBUG >= 1)
  28. #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
  29. #else
  30. #define GGML_PRINT_DEBUG(...)
  31. #endif
  32. #if (GGML_DEBUG >= 5)
  33. #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
  34. #else
  35. #define GGML_PRINT_DEBUG_5(...)
  36. #endif
  37. #if (GGML_DEBUG >= 10)
  38. #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
  39. #else
  40. #define GGML_PRINT_DEBUG_10(...)
  41. #endif
  42. #define GGML_PRINT(...) printf(__VA_ARGS__)
  43. static float frand(void) {
  44. return (float)rand()/(float)RAND_MAX;
  45. }
  46. static int irand(int n) {
  47. if (n == 0) return 0;
  48. return rand()%n;
  49. }
  50. static void get_random_dims(int64_t * dims, int ndims) {
  51. dims[0] = dims[1] = dims[2] = dims[3] = 1;
  52. for (int i = 0; i < ndims; i++) {
  53. dims[i] = 1 + irand(4);
  54. }
  55. }
  56. static struct ggml_tensor * get_random_tensor_f32(
  57. struct ggml_context * ctx0,
  58. int ndims,
  59. int64_t ne[],
  60. float fmin,
  61. float fmax) {
  62. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
  63. switch (ndims) {
  64. case 1:
  65. for (int i0 = 0; i0 < ne[0]; i0++) {
  66. ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin;
  67. }
  68. break;
  69. case 2:
  70. for (int i1 = 0; i1 < ne[1]; i1++) {
  71. for (int i0 = 0; i0 < ne[0]; i0++) {
  72. ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  73. }
  74. }
  75. break;
  76. case 3:
  77. for (int i2 = 0; i2 < ne[2]; i2++) {
  78. for (int i1 = 0; i1 < ne[1]; i1++) {
  79. for (int i0 = 0; i0 < ne[0]; i0++) {
  80. ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  81. }
  82. }
  83. }
  84. break;
  85. case 4:
  86. for (int i3 = 0; i3 < ne[3]; i3++) {
  87. for (int i2 = 0; i2 < ne[2]; i2++) {
  88. for (int i1 = 0; i1 < ne[1]; i1++) {
  89. for (int i0 = 0; i0 < ne[0]; i0++) {
  90. ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  91. }
  92. }
  93. }
  94. }
  95. break;
  96. default:
  97. assert(false);
  98. }
  99. return result;
  100. }
  101. static struct ggml_tensor * get_random_tensor_f16(
  102. struct ggml_context * ctx0,
  103. int ndims,
  104. int64_t ne[],
  105. float fmin,
  106. float fmax) {
  107. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne);
  108. switch (ndims) {
  109. case 1:
  110. for (int i0 = 0; i0 < ne[0]; i0++) {
  111. ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
  112. }
  113. break;
  114. case 2:
  115. for (int i1 = 0; i1 < ne[1]; i1++) {
  116. for (int i0 = 0; i0 < ne[0]; i0++) {
  117. ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
  118. }
  119. }
  120. break;
  121. case 3:
  122. for (int i2 = 0; i2 < ne[2]; i2++) {
  123. for (int i1 = 0; i1 < ne[1]; i1++) {
  124. for (int i0 = 0; i0 < ne[0]; i0++) {
  125. ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
  126. }
  127. }
  128. }
  129. break;
  130. case 4:
  131. for (int i3 = 0; i3 < ne[3]; i3++) {
  132. for (int i2 = 0; i2 < ne[2]; i2++) {
  133. for (int i1 = 0; i1 < ne[1]; i1++) {
  134. for (int i0 = 0; i0 < ne[0]; i0++) {
  135. ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
  136. }
  137. }
  138. }
  139. }
  140. break;
  141. default:
  142. assert(false);
  143. }
  144. return result;
  145. }
  146. static struct ggml_tensor * get_random_tensor_i32(
  147. struct ggml_context * ctx0,
  148. int ndims,
  149. int64_t ne[],
  150. int32_t imin,
  151. int32_t imax) {
  152. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne);
  153. switch (ndims) {
  154. case 1:
  155. for (int i0 = 0; i0 < ne[0]; i0++) {
  156. ((int32_t *)result->data)[i0] = irand(imax - imin) + imin;
  157. }
  158. break;
  159. case 2:
  160. for (int i1 = 0; i1 < ne[1]; i1++) {
  161. for (int i0 = 0; i0 < ne[0]; i0++) {
  162. ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin;
  163. }
  164. }
  165. break;
  166. case 3:
  167. for (int i2 = 0; i2 < ne[2]; i2++) {
  168. for (int i1 = 0; i1 < ne[1]; i1++) {
  169. for (int i0 = 0; i0 < ne[0]; i0++) {
  170. ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  171. }
  172. }
  173. }
  174. break;
  175. case 4:
  176. for (int i3 = 0; i3 < ne[3]; i3++) {
  177. for (int i2 = 0; i2 < ne[2]; i2++) {
  178. for (int i1 = 0; i1 < ne[1]; i1++) {
  179. for (int i0 = 0; i0 < ne[0]; i0++) {
  180. ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  181. }
  182. }
  183. }
  184. }
  185. break;
  186. default:
  187. assert(false);
  188. }
  189. return result;
  190. }
  191. static bool check_gradient(
  192. const char * op_name,
  193. struct ggml_context * ctx0,
  194. struct ggml_tensor * x[],
  195. struct ggml_tensor * f,
  196. int ndims,
  197. int nargs,
  198. float eps,
  199. float max_error_abs,
  200. float max_error_rel,
  201. std::vector<double> expected_vals) {
  202. static int n_threads = -1;
  203. if (n_threads < 0) {
  204. n_threads = GGML_DEFAULT_N_THREADS;
  205. const char *env = getenv("GGML_N_THREADS");
  206. if (env) {
  207. n_threads = atoi(env);
  208. }
  209. printf("GGML_N_THREADS = %d\n", n_threads);
  210. }
  211. struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
  212. struct ggml_cgraph * gb = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
  213. ggml_build_forward_expand(gf, f);
  214. ggml_graph_cpy(gf, gb);
  215. ggml_build_backward_expand(ctx0, gf, gb, false);
  216. ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
  217. ggml_graph_reset(gb);
  218. if (f->grad) {
  219. ggml_set_f32(f->grad, 1.0f);
  220. }
  221. ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
  222. // ggml_graph_dump_dot(gf, NULL, "test-grad0-forward.dot");
  223. // ggml_graph_dump_dot(gb, gf, "test-grad0-backward.dot");
  224. for (int i = 0; i < nargs; ++i) {
  225. bool all_g0_bad = true;
  226. const int nelements = ggml_nelements(x[i]);
  227. for (int k = 0; k < nelements; ++k) {
  228. // Calculate gradient numerically:
  229. const float x0 = ggml_get_f32_1d(x[i], k);
  230. const float xm = x0 - eps;
  231. const float xp = x0 + eps;
  232. ggml_set_f32_1d(x[i], k, xp);
  233. ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
  234. const double f0 = ggml_get_f32_1d(f, 0);
  235. ggml_set_f32_1d(x[i], k, xm);
  236. ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
  237. const double f1 = ggml_get_f32_1d(f, 0);
  238. const double g0 = (f0 - f1)/(2.0*(double) eps);
  239. // The numerical calculation of the gradient fails around noncontinuities (e.g. 0 for ReLU).
  240. // In such cases, provide a vector of expected values and skip the comparison for failed calculations.
  241. if (!expected_vals.empty()) {
  242. bool matches_any = false;
  243. for (const double & ev : expected_vals) {
  244. const double error_abs = std::fabs(g0 - ev);
  245. if (error_abs > max_error_abs) {
  246. continue;
  247. }
  248. const double error_rel = g0 != 0.0 ? fabs(g0 - ev)/fabs(g0) : 0.0;
  249. if (error_rel > max_error_rel) {
  250. continue;
  251. }
  252. matches_any = true;
  253. break;
  254. }
  255. if (!matches_any) {
  256. continue;
  257. }
  258. }
  259. all_g0_bad = false;
  260. ggml_set_f32_1d(x[i], k, x0);
  261. // compute gradient using backward graph
  262. ggml_graph_reset(gb);
  263. if (f->grad) {
  264. ggml_set_f32(f->grad, 1.0f);
  265. }
  266. ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
  267. const double g1 = ggml_get_f32_1d(x[i]->grad, k);
  268. const double error_abs = fabs(g0 - g1);
  269. const double error_rel = g0 != 0.0 ? fabs(g0 - g1)/fabs(g0) : 0.0;
  270. if (error_abs > max_error_abs || error_rel > max_error_rel) {
  271. printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n",
  272. op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel);
  273. //assert(false);
  274. return false;
  275. }
  276. }
  277. if (all_g0_bad) {
  278. printf("%s: numerical calculation of the gradient failed for all values\n", op_name);
  279. return false;
  280. }
  281. }
  282. return true;
  283. }
  284. // TODO: clean-up this ..
  285. static bool check_mat_mul(
  286. const struct ggml_tensor * y,
  287. const struct ggml_tensor * x0,
  288. const struct ggml_tensor * x1) {
  289. float * dst = (float *) y->data;
  290. float * src0 = (float *) x0->data;
  291. float * src1 = (float *) x1->data;
  292. const int nc = x0->ne[1];
  293. const int nr = x1->ne[1];
  294. const int nk = x0->ne[0];
  295. GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk);
  296. GGML_PRINT_DEBUG("x0:\n");
  297. for (int j = 0; j < x0->ne[1]; ++j) {
  298. for (int i = 0; i < x0->ne[0]; ++i) {
  299. GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]);
  300. }
  301. GGML_PRINT_DEBUG("\n");
  302. }
  303. GGML_PRINT_DEBUG("\n");
  304. GGML_PRINT_DEBUG("x1:\n");
  305. for (int j = 0; j < x1->ne[1]; ++j) {
  306. for (int i = 0; i < x1->ne[0]; ++i) {
  307. GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]);
  308. }
  309. GGML_PRINT_DEBUG("\n");
  310. }
  311. GGML_PRINT_DEBUG("\n");
  312. GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]);
  313. for (int j = 0; j < y->ne[1]; ++j) {
  314. for (int i = 0; i < y->ne[0]; ++i) {
  315. GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]);
  316. }
  317. GGML_PRINT_DEBUG("\n");
  318. }
  319. for (int i = 0; i < nr; ++i) {
  320. for (int j = 0; j < nc; ++j) {
  321. float sum = 0.0f;
  322. for (int k = 0; k < nk; ++k) {
  323. sum += src0[j*nk + k]*src1[i*nk + k];
  324. }
  325. if (fabsf(dst[i*nc + j] - sum) > 1e-5f) {
  326. fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum);
  327. assert(false);
  328. return false;
  329. }
  330. }
  331. }
  332. return true;
  333. }
  334. #define NUM_PERMUTATIONS (4*3*2*1)
  335. int main(int argc, const char ** argv) {
  336. struct ggml_init_params params = {
  337. /* .mem_size = */ 256*1024*1024,
  338. /* .mem_buffer = */ NULL,
  339. /* .no_alloc = */ false,
  340. };
  341. int64_t ne[4];
  342. int all_permutations[4 * NUM_PERMUTATIONS];
  343. {
  344. int count = 0;
  345. for (int ax0=0; ax0<4; ++ax0) {
  346. for (int ax1=0; ax1<4; ++ax1) {
  347. if (ax1 == ax0) continue;
  348. for (int ax2=0; ax2<4; ++ax2) {
  349. if (ax2 == ax0) continue;
  350. if (ax2 == ax1) continue;
  351. for (int ax3=0; ax3<4; ++ax3) {
  352. if (ax3 == ax0) continue;
  353. if (ax3 == ax1) continue;
  354. if (ax3 == ax2) continue;
  355. assert(count < NUM_PERMUTATIONS);
  356. all_permutations[count*4+0] = ax0;
  357. all_permutations[count*4+1] = ax1;
  358. all_permutations[count*4+2] = ax2;
  359. all_permutations[count*4+3] = ax3;
  360. ++count;
  361. }
  362. }
  363. }
  364. }
  365. }
  366. unsigned seed_iter = 1;
  367. // original loop: 1000
  368. int niter = 4;
  369. const char *env = getenv("GGML_NLOOP");
  370. if (env != NULL) {
  371. niter = atoi(env);
  372. }
  373. if (argc > 1) {
  374. niter = atoi(argv[1]);
  375. }
  376. for (int iter = 0; iter < niter; ++iter) {
  377. srand(seed_iter);
  378. seed_iter = rand();
  379. unsigned seed = rand();
  380. printf("test-grad0: iter:%d/%d\n", (iter+1), niter);
  381. struct ggml_context * ctx0 = ggml_init(params);
  382. get_random_dims(ne, 4);
  383. struct ggml_tensor * x[MAX_NARGS];
  384. // add f32
  385. {
  386. srand(seed);
  387. const int nargs = 2;
  388. for (int ndims = 1; ndims <= 4; ++ndims) {
  389. for (int i = 0; i < nargs; ++i) {
  390. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  391. ggml_set_param(ctx0, x[i]);
  392. }
  393. struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
  394. check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f, {});
  395. }
  396. }
  397. // add f16
  398. {
  399. srand(seed);
  400. const int nargs = 2;
  401. for (int ndims = 1; ndims <= 4; ++ndims) {
  402. for (int i = 0; i < nargs; ++i) {
  403. x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
  404. ggml_set_param(ctx0, x[i]);
  405. }
  406. struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
  407. check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f, {});
  408. }
  409. }
  410. // sub
  411. {
  412. srand(seed);
  413. const int nargs = 2;
  414. for (int ndims = 1; ndims <= 4; ++ndims) {
  415. for (int i = 0; i < nargs; ++i) {
  416. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  417. ggml_set_param(ctx0, x[i]);
  418. }
  419. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1]));
  420. check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  421. }
  422. }
  423. // mul
  424. {
  425. srand(seed);
  426. const int nargs = 2;
  427. for (int ndims = 1; ndims <= 4; ++ndims) {
  428. for (int i = 0; i < nargs; ++i) {
  429. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  430. ggml_set_param(ctx0, x[i]);
  431. }
  432. struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1]));
  433. check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  434. }
  435. }
  436. // div
  437. {
  438. srand(seed);
  439. const int nargs = 2;
  440. for (int ndims = 1; ndims <= 4; ++ndims) {
  441. for (int i = 0; i < nargs; ++i) {
  442. x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f);
  443. ggml_set_param(ctx0, x[i]);
  444. }
  445. struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1]));
  446. check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f, {});
  447. }
  448. }
  449. // sqr
  450. {
  451. srand(seed);
  452. const int nargs = 1;
  453. for (int ndims = 1; ndims <= 2; ++ndims) {
  454. for (int i = 0; i < nargs; ++i) {
  455. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  456. ggml_set_param(ctx0, x[i]);
  457. }
  458. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0]));
  459. check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  460. }
  461. }
  462. // sqrt
  463. {
  464. srand(seed);
  465. const int nargs = 1;
  466. for (int ndims = 1; ndims <= 2; ++ndims) {
  467. for (int i = 0; i < nargs; ++i) {
  468. x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  469. ggml_set_param(ctx0, x[i]);
  470. }
  471. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0]));
  472. check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, 2e-2f, 1e-1f, {});
  473. }
  474. }
  475. // log
  476. {
  477. srand(seed);
  478. const int nargs = 1;
  479. for (int ndims = 1; ndims <= 2; ++ndims) {
  480. for (int i = 0; i < nargs; ++i) {
  481. x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  482. ggml_set_param(ctx0, x[i]);
  483. }
  484. struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0]));
  485. check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f, {});
  486. }
  487. }
  488. // sum
  489. {
  490. srand(seed);
  491. const int nargs = 1;
  492. for (int ndims = 1; ndims <= 2; ++ndims) {
  493. for (int i = 0; i < nargs; ++i) {
  494. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  495. ggml_set_param(ctx0, x[i]);
  496. }
  497. struct ggml_tensor * f = ggml_sum(ctx0, x[0]);
  498. check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  499. }
  500. }
  501. // sum_rows
  502. {
  503. srand(seed);
  504. const int nargs = 1;
  505. for (int ndims = 1; ndims <= 4; ++ndims) {
  506. for (int i = 0; i < nargs; ++i) {
  507. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  508. ggml_set_param(ctx0, x[i]);
  509. }
  510. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0])));
  511. check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
  512. }
  513. }
  514. // mean, not yet fully implemented
  515. if(0)
  516. {
  517. srand(seed);
  518. const int nargs = 1;
  519. for (int ndims = 1; ndims <= 4; ++ndims) {
  520. for (int i = 0; i < nargs; ++i) {
  521. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  522. ggml_set_param(ctx0, x[i]);
  523. }
  524. struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0]));
  525. check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  526. }
  527. }
  528. // argmax
  529. if (0)
  530. {
  531. srand(seed);
  532. const int nargs = 1;
  533. for (int ndims = 1; ndims <= 4; ++ndims) {
  534. for (int i = 0; i < nargs; ++i) {
  535. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  536. ggml_set_param(ctx0, x[i]);
  537. }
  538. struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0]));
  539. check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  540. }
  541. }
  542. // repeat
  543. {
  544. srand(seed);
  545. int64_t ne2[4];
  546. get_random_dims(ne2, 4);
  547. ne2[0] = ne[0] * ne2[0];
  548. ne2[1] = ne[1] * ne2[1];
  549. ne2[2] = 1;
  550. ne2[3] = 1;
  551. const int nargs = 1;
  552. for (int ndims = 1; ndims <= 2; ++ndims) {
  553. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  554. x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  555. ggml_set_param(ctx0, x[0]);
  556. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
  557. check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
  558. }
  559. }
  560. // repeat back
  561. {
  562. srand(seed);
  563. int64_t ne2[4];
  564. get_random_dims(ne2, 4);
  565. ne2[0] = ne[0] * ne2[0];
  566. ne2[1] = ne[1] * ne2[1];
  567. ne2[2] = 1;
  568. ne2[3] = 1;
  569. const int nargs = 1;
  570. for (int ndims = 1; ndims <= 2; ++ndims) {
  571. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  572. x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  573. ggml_set_param(ctx0, x[0]);
  574. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0]))));
  575. check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
  576. }
  577. }
  578. // abs
  579. {
  580. const int nargs = 1;
  581. for (int ndims = 1; ndims <= 4; ++ndims) {
  582. for (int i = 0; i < nargs; ++i) {
  583. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  584. ggml_set_param(ctx0, x[i]);
  585. }
  586. struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
  587. check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f, {-1.0, 1.0});
  588. }
  589. }
  590. // sgn
  591. {
  592. srand(seed);
  593. const int nargs = 1;
  594. for (int ndims = 1; ndims <= 4; ++ndims) {
  595. for (int i = 0; i < nargs; ++i) {
  596. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  597. ggml_set_param(ctx0, x[i]);
  598. }
  599. struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0]));
  600. check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {0.0});
  601. }
  602. }
  603. // neg
  604. {
  605. srand(seed);
  606. const int nargs = 1;
  607. for (int ndims = 1; ndims <= 4; ++ndims) {
  608. for (int i = 0; i < nargs; ++i) {
  609. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  610. ggml_set_param(ctx0, x[i]);
  611. }
  612. struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0]));
  613. check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  614. }
  615. }
  616. // step
  617. {
  618. srand(seed);
  619. const int nargs = 1;
  620. for (int ndims = 1; ndims <= 4; ++ndims) {
  621. for (int i = 0; i < nargs; ++i) {
  622. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  623. ggml_set_param(ctx0, x[i]);
  624. }
  625. struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0]));
  626. check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {0.0});
  627. }
  628. }
  629. // tanh, not yet fully implemented
  630. if(0)
  631. {
  632. srand(seed);
  633. const int nargs = 1;
  634. for (int ndims = 1; ndims <= 4; ++ndims) {
  635. for (int i = 0; i < nargs; ++i) {
  636. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  637. ggml_set_param(ctx0, x[i]);
  638. }
  639. struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0]));
  640. check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  641. }
  642. }
  643. // mul_mat
  644. {
  645. srand(seed);
  646. const int nargs = 2;
  647. for (int ndims = 2; ndims <= 4; ++ndims) {
  648. int max_nrep = (ndims >= 3) ? 2 : 1;
  649. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  650. for (int nrep2 = 1; nrep2 < max_nrep; ++nrep2) {
  651. for (int nrep3 = 1; nrep3 < max_nrep; ++nrep3) {
  652. {
  653. int64_t ne2[4];
  654. get_random_dims(ne2, 4);
  655. ne2[0] = ne[0];
  656. ne2[2] = nrep2 * ne[2];
  657. ne2[3] = nrep3 * ne[3];
  658. x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  659. }
  660. ggml_set_param(ctx0, x[0]);
  661. ggml_set_param(ctx0, x[1]);
  662. struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
  663. struct ggml_tensor * f = ggml_sum(ctx0, m);
  664. GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
  665. check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  666. if (ndims == 2) {
  667. // check_mat_mul does not support ndims > 2
  668. check_mat_mul(m, x[1], x[0]);
  669. }
  670. }
  671. }
  672. }
  673. }
  674. // elu, not yet fully implemented
  675. if(0)
  676. {
  677. srand(seed);
  678. const int nargs = 1;
  679. for (int ndims = 1; ndims <= 4; ++ndims) {
  680. for (int i = 0; i < nargs; ++i) {
  681. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  682. ggml_set_param(ctx0, x[i]);
  683. }
  684. struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0]));
  685. check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  686. }
  687. }
  688. // relu
  689. {
  690. srand(seed);
  691. const int nargs = 1;
  692. for (int ndims = 1; ndims <= 4; ++ndims) {
  693. for (int i = 0; i < nargs; ++i) {
  694. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  695. ggml_set_param(ctx0, x[i]);
  696. }
  697. struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0]));
  698. check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {0.0, 1.0});
  699. }
  700. }
  701. // gelu, not yet fully implemented
  702. if(0)
  703. {
  704. srand(seed);
  705. const int nargs = 1;
  706. for (int ndims = 1; ndims <= 4; ++ndims) {
  707. for (int i = 0; i < nargs; ++i) {
  708. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  709. ggml_set_param(ctx0, x[i]);
  710. }
  711. struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0]));
  712. check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  713. }
  714. }
  715. // silu
  716. {
  717. srand(seed);
  718. const int nargs = 1;
  719. for (int ndims = 1; ndims <= 2; ++ndims) {
  720. for (int i = 0; i < nargs; ++i) {
  721. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  722. ggml_set_param(ctx0, x[i]);
  723. }
  724. struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0]));
  725. #ifdef GGML_SILU_FP16
  726. // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds.
  727. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY, {});
  728. #else
  729. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  730. #endif
  731. }
  732. }
  733. // rms_norm
  734. {
  735. srand(seed);
  736. const int nargs = 1;
  737. for (int ndims = 1; ndims <= 2; ++ndims) {
  738. for (int i = 0; i < nargs; ++i) {
  739. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  740. ggml_set_param(ctx0, x[i]);
  741. }
  742. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0], 1e-6f));
  743. check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY, {});
  744. }
  745. }
  746. // scale
  747. {
  748. srand(seed);
  749. const int nargs = 1;
  750. for (int ndims = 1; ndims <= 2; ++ndims) {
  751. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  752. const float s = -1.0f + 2.0f*frand();
  753. ggml_set_param(ctx0, x[0]);
  754. struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], s));
  755. check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  756. }
  757. }
  758. // cpy f32
  759. {
  760. srand(seed);
  761. const int nargs = 2;
  762. for (int ndims = 1; ndims <= 2; ++ndims) {
  763. for (int i = 0; i < nargs; ++i) {
  764. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  765. ggml_set_param(ctx0, x[i]);
  766. }
  767. // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
  768. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
  769. check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  770. }
  771. }
  772. // cpy f16
  773. {
  774. srand(seed);
  775. const int nargs = 2;
  776. for (int ndims = 1; ndims <= 2; ++ndims) {
  777. for (int i = 0; i < nargs; ++i) {
  778. x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
  779. ggml_set_param(ctx0, x[i]);
  780. }
  781. // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
  782. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
  783. check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY, {});
  784. }
  785. }
  786. // reshape (1d->nd)
  787. {
  788. srand(seed);
  789. const int nargs = 1;
  790. for (int ndims = 1; ndims <= 2; ++ndims) {
  791. int64_t ne2[4];
  792. ne2[0] = 1;
  793. ne2[1] = 1;
  794. ne2[2] = 1;
  795. ne2[3] = 1;
  796. for (int i = 0; i < ndims; ++i) {
  797. ne2[0] *= ne[i];
  798. }
  799. x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
  800. x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  801. ggml_set_param(ctx0, x[0]);
  802. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  803. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  804. }
  805. }
  806. // reshape (nd->1d)
  807. {
  808. srand(seed);
  809. const int nargs = 1;
  810. for (int ndims = 1; ndims <= 2; ++ndims) {
  811. int64_t ne2[4];
  812. ne2[0] = 1;
  813. ne2[1] = 1;
  814. ne2[2] = 1;
  815. ne2[3] = 1;
  816. for (int i = 0; i < ndims; ++i) {
  817. ne2[0] *= ne[i];
  818. }
  819. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  820. x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
  821. ggml_set_param(ctx0, x[0]);
  822. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  823. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  824. }
  825. }
  826. // acc 1d
  827. {
  828. srand(seed);
  829. int64_t ne2[4] = { 1, 1, 1, 1 };
  830. const int nargs = 2;
  831. for (int ndims = 1; ndims <= 4; ++ndims) {
  832. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  833. ggml_set_param(ctx0, x[0]);
  834. get_random_dims(ne2, 1);
  835. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  836. get_random_dims(ne2, 1);
  837. }
  838. x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
  839. ggml_set_param(ctx0, x[1]);
  840. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  841. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  842. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  843. check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  844. }
  845. }
  846. // acc 2d
  847. {
  848. srand(seed);
  849. int64_t ne2[4] = { 1, 1, 1, 1 };
  850. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  851. int64_t offsets[4] = { 0, 0, 0, 0 };
  852. const int nargs = 2;
  853. for (int ndims = 2; ndims <= 4; ++ndims) {
  854. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  855. ggml_set_param(ctx0, x[0]);
  856. get_random_dims(ne2, 2);
  857. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  858. get_random_dims(ne2, 2);
  859. }
  860. x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
  861. ggml_set_param(ctx0, x[1]);
  862. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  863. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  864. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  865. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  866. const int offset = offsets[0] + offsets[1];
  867. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  868. check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  869. }
  870. }
  871. // acc 3d
  872. {
  873. srand(seed);
  874. int64_t ne2[4] = { 1, 1, 1, 1 };
  875. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  876. int64_t offsets[4] = { 0, 0, 0, 0 };
  877. const int nargs = 2;
  878. for (int ndims = 3; ndims <= 4; ++ndims) {
  879. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  880. ggml_set_param(ctx0, x[0]);
  881. get_random_dims(ne2, 3);
  882. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) {
  883. get_random_dims(ne2, 3);
  884. }
  885. x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f);
  886. ggml_set_param(ctx0, x[1]);
  887. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  888. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  889. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  890. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  891. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  892. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  893. const int offset = offsets[0] + offsets[1] + offsets[2];
  894. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  895. check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  896. }
  897. }
  898. // acc 4d
  899. {
  900. srand(seed);
  901. int64_t ne2[4] = { 1, 1, 1, 1 };
  902. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  903. int64_t offsets[4] = { 0, 0, 0, 0 };
  904. const int nargs = 2;
  905. for (int ndims = 4; ndims <= 4; ++ndims) {
  906. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  907. ggml_set_param(ctx0, x[0]);
  908. get_random_dims(ne2, 4);
  909. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) {
  910. get_random_dims(ne2, 4);
  911. }
  912. x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
  913. ggml_set_param(ctx0, x[1]);
  914. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  915. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  916. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  917. max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]);
  918. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  919. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  920. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  921. offsets[3] = irand(max_offsets[3]) * x[0]->nb[3];
  922. const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3];
  923. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  924. check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  925. }
  926. }
  927. // set_1d
  928. {
  929. srand(seed);
  930. int64_t ne2[4];
  931. const int nargs = 2;
  932. for (int ndims = 1; ndims <= 4; ++ndims) {
  933. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  934. ggml_set_param(ctx0, x[0]);
  935. get_random_dims(ne2, 1);
  936. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  937. get_random_dims(ne2, 1);
  938. }
  939. x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
  940. ggml_set_param(ctx0, x[1]);
  941. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  942. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  943. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset));
  944. check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  945. }
  946. }
  947. // set_2d
  948. {
  949. srand(seed);
  950. int64_t ne2[4];
  951. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  952. int64_t offsets[4] = { 0, 0, 0, 0 };
  953. const int nargs = 1;
  954. for (int ndims = 2; ndims <= 4; ++ndims) {
  955. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  956. ggml_set_param(ctx0, x[0]);
  957. get_random_dims(ne2, 2);
  958. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  959. get_random_dims(ne2, 2);
  960. }
  961. x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
  962. ggml_set_param(ctx0, x[1]);
  963. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  964. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  965. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  966. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  967. const int offset = offsets[0] + offsets[1];
  968. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset));
  969. check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  970. }
  971. }
  972. // view_1d
  973. {
  974. srand(seed);
  975. const int nargs = 1;
  976. for (int ndims = 1; ndims <= 4; ++ndims) {
  977. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  978. ggml_set_param(ctx0, x[0]);
  979. const int k0 = irand(ggml_nelements(x[0]));
  980. const int k1 = irand(ggml_nelements(x[0]));
  981. const int i0 = MIN(k0, k1);
  982. const int i1 = MAX(k0, k1);
  983. const int offset = i0 * sizeof(float);
  984. const int nelem = i1 - i0;
  985. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset));
  986. check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  987. }
  988. }
  989. // view_2d
  990. {
  991. srand(seed);
  992. int64_t ne2[4];
  993. int64_t nb2[4];
  994. const int nargs = 1;
  995. for (int ndims = 1; ndims <= 4; ++ndims) {
  996. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  997. get_random_dims(ne2, 2);
  998. while (ne2[0]*ne2[1] > ggml_nelements(x[0])) {
  999. get_random_dims(ne2, 2);
  1000. }
  1001. const int count = ne2[0]*ne2[1];
  1002. nb2[0] = sizeof(float);
  1003. nb2[1] = nb2[0]*ne2[0];
  1004. ggml_set_param(ctx0, x[0]);
  1005. const int max_offset = ggml_nelements(x[0]) - count;
  1006. const int offset = irand(max_offset+1) * sizeof(float);
  1007. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset));
  1008. check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1009. }
  1010. }
  1011. // view_3d
  1012. {
  1013. srand(seed);
  1014. int64_t ne2[4] = {1,1,1,1};
  1015. int64_t nb2[4] = {0,0,0,0};
  1016. const int nargs = 1;
  1017. for (int ndims = 1; ndims <= 4; ++ndims) {
  1018. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  1019. get_random_dims(ne2, 3);
  1020. while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) {
  1021. get_random_dims(ne2, 3);
  1022. }
  1023. const int count = ne2[0]*ne2[1]*ne2[2];
  1024. nb2[0] = sizeof(float);
  1025. nb2[1] = nb2[0]*ne2[0];
  1026. nb2[2] = nb2[1]*ne2[1];
  1027. ggml_set_param(ctx0, x[0]);
  1028. const int max_offset = ggml_nelements(x[0]) - count;
  1029. const int offset = irand(max_offset+1) * sizeof(float);
  1030. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset));
  1031. check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1032. }
  1033. }
  1034. // permute
  1035. {
  1036. srand(seed);
  1037. int64_t ne2[4];
  1038. const int nargs = 1;
  1039. for (int ndims = 1; ndims <= 4; ++ndims)
  1040. {
  1041. // ggml_permute will set axes of dimensions below n_dims to 1.
  1042. // to make ggml_permute work correctly on all axes,
  1043. // the input tensor needs maximal n_dim of 4.
  1044. for (int i=0; i<ndims; ++i) {
  1045. ne2[i] = ne[i];
  1046. }
  1047. for (int i=ndims; i<4; ++i) {
  1048. ne2[i] = 1;
  1049. }
  1050. x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
  1051. ggml_set_param(ctx0, x[0]);
  1052. const int p = irand(NUM_PERMUTATIONS);
  1053. const int ax0 = all_permutations[p*4+0];
  1054. const int ax1 = all_permutations[p*4+1];
  1055. const int ax2 = all_permutations[p*4+2];
  1056. const int ax3 = all_permutations[p*4+3];
  1057. // sum requires contiguous tensor rows
  1058. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3)));
  1059. check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1060. }
  1061. }
  1062. // transpose
  1063. {
  1064. srand(seed);
  1065. int64_t ne2[4];
  1066. const int nargs = 1;
  1067. for (int ndims = 1; ndims <= 4; ++ndims)
  1068. {
  1069. // ggml_transpose will set axes of dimensions below n_dims to 1.
  1070. // to make ggml_transpose work correctly on all axes,
  1071. // the input tensor needs maximal n_dim of 4.
  1072. for (int i=0; i<ndims; ++i) {
  1073. ne2[i] = ne[i];
  1074. }
  1075. for (int i=ndims; i<4; ++i) {
  1076. ne2[i] = 1;
  1077. }
  1078. x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
  1079. ggml_set_param(ctx0, x[0]);
  1080. // sum requires contiguous tensor rows
  1081. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0])));
  1082. check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1083. }
  1084. }
  1085. // get_rows
  1086. {
  1087. srand(seed);
  1088. int64_t ne2[4] = {ne[0], ne[1], 1, 1};
  1089. int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
  1090. const int nargs = 1;
  1091. const int ndims = 2;
  1092. x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  1093. x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]);
  1094. ggml_set_param(ctx0, x[0]);
  1095. struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1]));
  1096. check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1097. }
  1098. // diag_mask_inf
  1099. {
  1100. srand(seed);
  1101. const int nargs = 1;
  1102. const int ndims = 2;
  1103. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  1104. ggml_set_param(ctx0, x[0]);
  1105. int n_past = irand(ne[0]);
  1106. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past));
  1107. check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1108. }
  1109. // diag_mask_zero
  1110. {
  1111. srand(seed);
  1112. const int nargs = 1;
  1113. const int ndims = 2;
  1114. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  1115. ggml_set_param(ctx0, x[0]);
  1116. int n_past = irand(ne[0]);
  1117. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past));
  1118. check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1119. }
  1120. // softmax
  1121. {
  1122. srand(seed);
  1123. const int nargs = 1;
  1124. int64_t ne2[4];
  1125. get_random_dims(ne2, 4);
  1126. for (int ndims = 1; ndims <= 3; ++ndims) {
  1127. x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  1128. ggml_set_param(ctx0, x[0]);
  1129. float eps = 1e-6f;
  1130. // dont use only sum as aggregation, because sum of softmax is always 1 -> finite differences should not work
  1131. // instead use sum(log(soft_max()*(1-eps)+eps)); use eps to avoid log(0)
  1132. struct ggml_tensor * f = ggml_sum(ctx0,
  1133. ggml_log(ctx0,
  1134. ggml_add1(ctx0,
  1135. ggml_scale(ctx0,
  1136. ggml_soft_max(ctx0, x[0]),
  1137. 1.0f - eps),
  1138. ggml_new_f32(ctx0, eps))));
  1139. check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY, {});
  1140. // NOTE: softmax forward is computed using f16 table lookup instead of using actual expf, but backward assumes actual expf.
  1141. // this may result in different gradients too finite differences.
  1142. // when this test reports errors, first try to replace the table lookup with actual expf and test again to see if just that was the cause.
  1143. // if only the table lookup causes gradients to differ this is acceptable.
  1144. }
  1145. }
  1146. // cross_entropy_loss
  1147. {
  1148. srand(seed);
  1149. const int nargs = 1;
  1150. int64_t ne2[4];
  1151. get_random_dims(ne2, 4);
  1152. for (int ndims = 1; ndims <= 4; ++ndims) {
  1153. x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  1154. x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f);
  1155. // the second argument to cross_entropy_loss must sum up to 1 for each row
  1156. int nr = ggml_nrows(x[1]);
  1157. int nc = ggml_nelements(x[1]) / nr;
  1158. for (int ir = 0; ir < nr; ++ir) {
  1159. float sum = 0;
  1160. for (int ic = 0; ic < nc; ++ic) {
  1161. sum += ((float *) x[1]->data)[ic + ir*nc];
  1162. }
  1163. for (int ic = 0; ic < nc; ++ic) {
  1164. ((float *) x[1]->data)[ic + ir*nc] /= sum;
  1165. }
  1166. }
  1167. ggml_set_param(ctx0, x[0]);
  1168. struct ggml_tensor * f = ggml_cross_entropy_loss(ctx0, x[0], x[1]);
  1169. check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1170. }
  1171. }
  1172. // rope f32
  1173. {
  1174. srand(seed);
  1175. const int nargs = 1;
  1176. int64_t ne2[4];
  1177. get_random_dims(ne2, 4);
  1178. ne2[0] += ne2[0] % 2;
  1179. int n_rot = ne2[0];
  1180. for (int ndims = 3; ndims <= 4; ++ndims) {
  1181. for (int mode = 0; mode < 4; ++mode) {
  1182. for (int n_past = 1; n_past < ne2[2]; ++n_past) {
  1183. x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  1184. struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]);
  1185. for (int i = 0; i < ne2[2]; ++i) {
  1186. ((int32_t *) p->data)[i] = n_past + i;
  1187. }
  1188. ggml_set_param(ctx0, x[0]);
  1189. const bool skip_past = (mode & 1);
  1190. if (skip_past) {
  1191. // we have no past, so this would have to work on uninitialized memory.
  1192. // we only test the gradients here;
  1193. // skip_past should have no influence on gradient computation.
  1194. // so when other modes work, we assume that this does as well.
  1195. continue;
  1196. }
  1197. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode));
  1198. GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
  1199. check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY, {});
  1200. }
  1201. }
  1202. }
  1203. }
  1204. // rope f16
  1205. {
  1206. srand(seed);
  1207. const int nargs = 1;
  1208. int64_t ne2[4];
  1209. get_random_dims(ne2, 4);
  1210. ne2[0] += ne2[0] % 2;
  1211. int n_rot = ne2[0];
  1212. for (int ndims = 3; ndims <= 4; ++ndims) {
  1213. for (int mode = 0; mode < 4; ++mode) {
  1214. for (int n_past = 1; n_past < ne2[2]; ++n_past) {
  1215. x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f);
  1216. struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]);
  1217. for (int i = 0; i < ne2[2]; ++i) {
  1218. ((int32_t *) p->data)[i] = n_past + i;
  1219. }
  1220. ggml_set_param(ctx0, x[0]);
  1221. const bool skip_past = (mode & 1);
  1222. if (skip_past) {
  1223. // we have no past, so this would have to work on uninitialized memory.
  1224. // we only test the gradients here;
  1225. // skip_past should have no influence on gradient computation.
  1226. // so when other modes work, we assume that this does as well.
  1227. continue;
  1228. }
  1229. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode));
  1230. GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
  1231. check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY, {});
  1232. }
  1233. }
  1234. }
  1235. }
  1236. // im2col f32
  1237. {
  1238. srand(seed);
  1239. const int nargs = 1;
  1240. const int ndims = 4;
  1241. for (const bool is_2D : {false, true}) {
  1242. int64_t ne0[ndims];
  1243. int64_t ne1[ndims];
  1244. get_random_dims(ne0, ndims);
  1245. get_random_dims(ne1, ndims);
  1246. // // Ensure that the output is not zero-sized:
  1247. ne1[0] += 8;
  1248. ne1[1] += 8;
  1249. if (is_2D) {
  1250. ne1[2] = ne0[2];
  1251. } else {
  1252. ne1[1] = ne0[1];
  1253. ne0[3] = 1;
  1254. ne1[3] = 1;
  1255. }
  1256. // The order of arguments is swapped because the first tensor is only used for its shape.
  1257. x[1] = get_random_tensor_f16(ctx0, ndims, ne0, -1.0f, 1.0f);
  1258. x[0] = get_random_tensor_f32(ctx0, ndims, ne1, -1.0f, 1.0f);
  1259. ggml_set_param(ctx0, x[0]);
  1260. const int s0 = 1 + irand(2);
  1261. const int s1 = is_2D ? 1 + irand(2) : 0;
  1262. const int p0 = 0 + irand(2);
  1263. const int p1 = is_2D ? 0 + irand(2) : 0;
  1264. const int d0 = 1 + irand(2);
  1265. const int d1 = is_2D ? 1 + irand(2) : 0;
  1266. struct ggml_tensor * f = ggml_sum(ctx0, ggml_im2col(ctx0, x[1], x[0], s0, s1, p0, p1, d0, d1, is_2D, GGML_TYPE_F32));
  1267. GGML_PRINT_DEBUG("im2col f32: is_2D=%s, s0=%d, s1=%d, p0=%d, p1=%d, d0=%d, d1=%d\n", is_2D ? "yes" : "no", s0, s1, p0, p1, d0, d1);
  1268. check_gradient("im2col f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY, {});
  1269. }
  1270. }
  1271. // pool_2d f32
  1272. {
  1273. srand(seed);
  1274. const int nargs = 1;
  1275. const int ndims = 4;
  1276. for (const enum ggml_op_pool op : {GGML_OP_POOL_AVG, GGML_OP_POOL_MAX}) {
  1277. int64_t ne0[ndims];
  1278. get_random_dims(ne0, ndims);
  1279. ne0[0] += 8;
  1280. ne0[1] += 8;
  1281. x[0] = get_random_tensor_f32(ctx0, ndims, ne0, -1.0f, 1.0f);
  1282. ggml_set_param(ctx0, x[0]);
  1283. const int k0 = 2 + irand(2);
  1284. const int k1 = 2 + irand(2);
  1285. const int s0 = 2 + irand(2);
  1286. const int s1 = 2 + irand(2);
  1287. const int p0 = 0 + irand(2);
  1288. const int p1 = 0 + irand(2);
  1289. struct ggml_tensor * f = ggml_sum(ctx0, ggml_pool_2d(ctx0, x[0], op, k0, k1, s0, s1, p0, p1));
  1290. GGML_PRINT_DEBUG("ggml_pool_2d f32: op=%s k0=%d, k1=%d, s0=%d, s1=%d, p0=%d, p1=%d\n",
  1291. op == GGML_OP_POOL_MAX ? "max" : "avg", k0, k1, s0, s1, p0, p1);
  1292. std::vector<double> expected_vals;
  1293. if (op == GGML_OP_POOL_MAX) {
  1294. expected_vals.push_back(0.0);
  1295. expected_vals.push_back(1.0);
  1296. }
  1297. check_gradient("ggml_pool_2d f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, expected_vals);
  1298. }
  1299. }
  1300. // flash_attn f32
  1301. // TODO: adapt to ggml_flash_attn_ext() changes
  1302. //{
  1303. // srand(seed);
  1304. // const int nargs = 3;
  1305. // int64_t ne2[4];
  1306. // get_random_dims(ne2, 4);
  1307. // int64_t D = ne2[0];
  1308. // int64_t N = ne2[1];
  1309. // int64_t M = ne2[2] + N;
  1310. // int64_t B = ne2[3];
  1311. // for (int masked = 0; masked <= 1; ++masked) {
  1312. // for (int ndims = 2; ndims <= 4; ++ndims) {
  1313. // int max_nrep = (ndims >= 3) ? 2 : 1;
  1314. // for (int nrep = 1; nrep < max_nrep; ++nrep) {
  1315. // int64_t neq[4] = { D, N, B*nrep, ne[3] };
  1316. // int64_t nek[4] = { D, M, B, ne[3] };
  1317. // int64_t nev[4] = { M, D, B, ne[3] };
  1318. // if (ndims == 2) {
  1319. // neq[2] = 1; neq[3] = 1;
  1320. // nek[2] = 1; nek[3] = 1;
  1321. // nev[2] = 1; nev[3] = 1;
  1322. // } else if (ndims == 3) {
  1323. // neq[3] = 1;
  1324. // nek[3] = 1;
  1325. // nev[3] = 1;
  1326. // }
  1327. // x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f);
  1328. // x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f);
  1329. // x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f);
  1330. // ggml_set_param(ctx0, x[0]);
  1331. // ggml_set_param(ctx0, x[1]);
  1332. // ggml_set_param(ctx0, x[2]);
  1333. // struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
  1334. // check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY, {});
  1335. // }
  1336. // }
  1337. // }
  1338. //}
  1339. ggml_free(ctx0);
  1340. }
  1341. return 0;
  1342. }