test-grad0.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189
  1. #include "ggml.h"
  2. #include <math.h>
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <assert.h>
  6. #define MAX_NARGS 3
  7. #undef MIN
  8. #undef MAX
  9. #define MIN(a, b) ((a) < (b) ? (a) : (b))
  10. #define MAX(a, b) ((a) > (b) ? (a) : (b))
  11. #define GGML_SILU_FP16
  12. //
  13. // logging
  14. //
  15. #if (GGML_DEBUG >= 1)
  16. #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
  17. #else
  18. #define GGML_PRINT_DEBUG(...)
  19. #endif
  20. #if (GGML_DEBUG >= 5)
  21. #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
  22. #else
  23. #define GGML_PRINT_DEBUG_5(...)
  24. #endif
  25. #if (GGML_DEBUG >= 10)
  26. #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
  27. #else
  28. #define GGML_PRINT_DEBUG_10(...)
  29. #endif
  30. #define GGML_PRINT(...) printf(__VA_ARGS__)
  31. float frand(void) {
  32. return (float)rand()/(float)RAND_MAX;
  33. }
  34. int irand(int n) {
  35. if (n == 0) return 0;
  36. else return rand()%n;
  37. }
  38. void get_random_dims(int64_t * dims, int ndims) {
  39. dims[0] = dims[1] = dims[2] = dims[3] = 1;
  40. for (int i = 0; i < ndims; i++) {
  41. dims[i] = 1 + irand(4);
  42. }
  43. }
  44. struct ggml_tensor * get_random_tensor(
  45. struct ggml_context * ctx0,
  46. int ndims,
  47. int64_t ne[],
  48. float fmin,
  49. float fmax) {
  50. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
  51. switch (ndims) {
  52. case 1:
  53. for (int i0 = 0; i0 < ne[0]; i0++) {
  54. ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin;
  55. }
  56. break;
  57. case 2:
  58. for (int i1 = 0; i1 < ne[1]; i1++) {
  59. for (int i0 = 0; i0 < ne[0]; i0++) {
  60. ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  61. }
  62. }
  63. break;
  64. case 3:
  65. for (int i2 = 0; i2 < ne[2]; i2++) {
  66. for (int i1 = 0; i1 < ne[1]; i1++) {
  67. for (int i0 = 0; i0 < ne[0]; i0++) {
  68. ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  69. }
  70. }
  71. }
  72. break;
  73. case 4:
  74. for (int i3 = 0; i3 < ne[3]; i3++) {
  75. for (int i2 = 0; i2 < ne[2]; i2++) {
  76. for (int i1 = 0; i1 < ne[1]; i1++) {
  77. for (int i0 = 0; i0 < ne[0]; i0++) {
  78. ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  79. }
  80. }
  81. }
  82. }
  83. break;
  84. default:
  85. assert(false);
  86. };
  87. return result;
  88. }
  89. struct ggml_tensor * get_random_tensor_int(
  90. struct ggml_context * ctx0,
  91. int ndims,
  92. int64_t ne[],
  93. int32_t imin,
  94. int32_t imax) {
  95. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne);
  96. switch (ndims) {
  97. case 1:
  98. for (int i0 = 0; i0 < ne[0]; i0++) {
  99. ((int32_t *)result->data)[i0] = irand(imax - imin) + imin;
  100. }
  101. break;
  102. case 2:
  103. for (int i1 = 0; i1 < ne[1]; i1++) {
  104. for (int i0 = 0; i0 < ne[0]; i0++) {
  105. ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin;
  106. }
  107. }
  108. break;
  109. case 3:
  110. for (int i2 = 0; i2 < ne[2]; i2++) {
  111. for (int i1 = 0; i1 < ne[1]; i1++) {
  112. for (int i0 = 0; i0 < ne[0]; i0++) {
  113. ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  114. }
  115. }
  116. }
  117. break;
  118. case 4:
  119. for (int i3 = 0; i3 < ne[3]; i3++) {
  120. for (int i2 = 0; i2 < ne[2]; i2++) {
  121. for (int i1 = 0; i1 < ne[1]; i1++) {
  122. for (int i0 = 0; i0 < ne[0]; i0++) {
  123. ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  124. }
  125. }
  126. }
  127. }
  128. break;
  129. default:
  130. assert(false);
  131. };
  132. return result;
  133. }
  134. float get_element(const struct ggml_tensor * t, int idx) {
  135. if (t->type == GGML_TYPE_F32) {
  136. return ((float *)t->data)[idx];
  137. } else if (t->type == GGML_TYPE_I32) {
  138. return ((int32_t *)t->data)[idx];
  139. } else {
  140. assert(false);
  141. return INFINITY;
  142. }
  143. }
  144. void set_element(struct ggml_tensor * t, int idx, float value) {
  145. ((float *)t->data)[idx] = value;
  146. }
  147. void print_elements(const char* label, const struct ggml_tensor * t) {
  148. if (!t) {
  149. printf("%s: %s = null\n", __func__, label);
  150. return;
  151. }
  152. const int nelements = ggml_nelements(t);
  153. printf("%s: %s = [", __func__, label);
  154. for (int k = 0; k < nelements; ++k) {
  155. if (k > 0) { printf(", "); }
  156. printf("%.5f", get_element(t, k));
  157. }
  158. printf("] shape: [");
  159. for (int k = 0; k < t->n_dims; ++k) {
  160. if (k > 0) { printf(", "); }
  161. printf("%d", (int)t->ne[k]);
  162. }
  163. printf("]\n");
  164. }
  165. bool check_gradient(
  166. const char * op_name,
  167. struct ggml_context * ctx0,
  168. struct ggml_tensor * x[],
  169. struct ggml_tensor * f,
  170. int ndims,
  171. int nargs,
  172. float eps,
  173. float max_error_abs,
  174. float max_error_rel) {
  175. struct ggml_cgraph gf = ggml_build_forward (f);
  176. struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
  177. ggml_graph_compute(ctx0, &gf);
  178. ggml_graph_reset (&gf);
  179. ggml_set_f32 (f->grad, 1.0f);
  180. ggml_graph_compute(ctx0, &gb);
  181. // ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot");
  182. // ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot");
  183. for (int i = 0; i < nargs; ++i) {
  184. const int nelements = ggml_nelements(x[i]);
  185. for (int k = 0; k < nelements; ++k) {
  186. // compute gradient using finite differences
  187. const float x0 = get_element(x[i], k);
  188. const float xm = x0 - eps;
  189. const float xp = x0 + eps;
  190. set_element(x[i], k, xp);
  191. ggml_graph_compute(ctx0, &gf);
  192. const float f0 = ggml_get_f32_1d(f, 0);
  193. set_element(x[i], k, xm);
  194. ggml_graph_compute(ctx0, &gf);
  195. const float f1 = ggml_get_f32_1d(f, 0);
  196. const float g0 = (f0 - f1)/(2.0f*eps);
  197. set_element(x[i], k, x0);
  198. // compute gradient using backward graph
  199. ggml_graph_reset (&gf);
  200. ggml_set_f32 (f->grad, 1.0f);
  201. ggml_graph_compute(ctx0, &gb);
  202. const float g1 = get_element(x[i]->grad, k);
  203. const float error_abs = fabsf(g0 - g1);
  204. const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabs(g0) : 0;
  205. if (error_abs > max_error_abs || error_rel > max_error_rel) {
  206. printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n",
  207. op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel);
  208. //assert(false);
  209. return false;
  210. }
  211. }
  212. }
  213. return true;
  214. }
  215. // TODO: clean-up this ..
  216. bool check_mat_mul(
  217. const struct ggml_tensor * y,
  218. const struct ggml_tensor * x0,
  219. const struct ggml_tensor * x1) {
  220. float * dst = (float *) y->data;
  221. float * src0 = (float *) x0->data;
  222. float * src1 = (float *) x1->data;
  223. const int nc = x0->ne[1];
  224. const int nr = x1->ne[1];
  225. const int nk = x0->ne[0];
  226. GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk);
  227. GGML_PRINT_DEBUG("x0:\n");
  228. for (int j = 0; j < x0->ne[1]; ++j) {
  229. for (int i = 0; i < x0->ne[0]; ++i) {
  230. GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]);
  231. }
  232. GGML_PRINT_DEBUG("\n");
  233. }
  234. GGML_PRINT_DEBUG("\n");
  235. GGML_PRINT_DEBUG("x1:\n");
  236. for (int j = 0; j < x1->ne[1]; ++j) {
  237. for (int i = 0; i < x1->ne[0]; ++i) {
  238. GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]);
  239. }
  240. GGML_PRINT_DEBUG("\n");
  241. }
  242. GGML_PRINT_DEBUG("\n");
  243. GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]);
  244. for (int j = 0; j < y->ne[1]; ++j) {
  245. for (int i = 0; i < y->ne[0]; ++i) {
  246. GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]);
  247. }
  248. GGML_PRINT_DEBUG("\n");
  249. }
  250. for (int i = 0; i < nr; ++i) {
  251. for (int j = 0; j < nc; ++j) {
  252. float sum = 0.0f;
  253. for (int k = 0; k < nk; ++k) {
  254. sum += src0[j*nk + k]*src1[i*nk + k];
  255. }
  256. if (fabsf(dst[i*nc + j] - sum) > 1e-5f) {
  257. fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum);
  258. assert(false);
  259. return false;
  260. }
  261. }
  262. }
  263. return true;
  264. }
  265. #define NUM_PERMUTATIONS (4*3*2*1)
  266. int main(int argc, const char ** argv) {
  267. struct ggml_init_params params = {
  268. .mem_size = 128*1024*1024,
  269. .mem_buffer = NULL,
  270. .no_alloc = false,
  271. };
  272. int64_t ne[4];
  273. int all_permutations[4 * NUM_PERMUTATIONS];
  274. {
  275. int count = 0;
  276. for (int ax0=0; ax0<4; ++ax0) {
  277. for (int ax1=0; ax1<4; ++ax1) {
  278. if (ax1 == ax0) continue;
  279. for (int ax2=0; ax2<4; ++ax2) {
  280. if (ax2 == ax0) continue;
  281. if (ax2 == ax1) continue;
  282. for (int ax3=0; ax3<4; ++ax3) {
  283. if (ax3 == ax0) continue;
  284. if (ax3 == ax1) continue;
  285. if (ax3 == ax2) continue;
  286. assert(count < NUM_PERMUTATIONS);
  287. all_permutations[count*4+0] = ax0;
  288. all_permutations[count*4+1] = ax1;
  289. all_permutations[count*4+2] = ax2;
  290. all_permutations[count*4+3] = ax3;
  291. ++count;
  292. }
  293. }
  294. }
  295. }
  296. }
  297. // original loop: 1000
  298. int niter = 4;
  299. const char *env = getenv("GGML_NLOOP");
  300. if (env != NULL) {
  301. niter = atoi(env);
  302. }
  303. if (argc > 1) {
  304. niter = atoi(argv[1]);
  305. }
  306. for (int iter = 0; iter < niter; ++iter) {
  307. printf("test-grad0: iter:%d/%d\n", iter, niter);
  308. struct ggml_context * ctx0 = ggml_init(params);
  309. get_random_dims(ne, 4);
  310. struct ggml_tensor * x[MAX_NARGS];
  311. // add
  312. {
  313. const int nargs = 2;
  314. for (int ndims = 1; ndims <= 4; ++ndims) {
  315. for (int i = 0; i < nargs; ++i) {
  316. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  317. ggml_set_param(ctx0, x[i]);
  318. }
  319. struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
  320. check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f);
  321. }
  322. }
  323. // sub
  324. {
  325. const int nargs = 2;
  326. for (int ndims = 1; ndims <= 4; ++ndims) {
  327. for (int i = 0; i < nargs; ++i) {
  328. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  329. ggml_set_param(ctx0, x[i]);
  330. }
  331. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1]));
  332. check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
  333. }
  334. }
  335. // mul
  336. {
  337. const int nargs = 2;
  338. for (int ndims = 1; ndims <= 4; ++ndims) {
  339. for (int i = 0; i < nargs; ++i) {
  340. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  341. ggml_set_param(ctx0, x[i]);
  342. }
  343. struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1]));
  344. check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  345. }
  346. }
  347. // div
  348. {
  349. const int nargs = 2;
  350. for (int ndims = 1; ndims <= 4; ++ndims) {
  351. for (int i = 0; i < nargs; ++i) {
  352. x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f);
  353. ggml_set_param(ctx0, x[i]);
  354. }
  355. struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1]));
  356. check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f);
  357. }
  358. }
  359. // sqr
  360. {
  361. const int nargs = 1;
  362. for (int ndims = 1; ndims <= 2; ++ndims) {
  363. for (int i = 0; i < nargs; ++i) {
  364. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  365. ggml_set_param(ctx0, x[i]);
  366. }
  367. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0]));
  368. check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  369. }
  370. }
  371. // sqrt
  372. {
  373. const int nargs = 1;
  374. for (int ndims = 1; ndims <= 2; ++ndims) {
  375. for (int i = 0; i < nargs; ++i) {
  376. x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  377. ggml_set_param(ctx0, x[i]);
  378. }
  379. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0]));
  380. check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
  381. }
  382. }
  383. // log
  384. {
  385. const int nargs = 1;
  386. for (int ndims = 1; ndims <= 2; ++ndims) {
  387. for (int i = 0; i < nargs; ++i) {
  388. x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  389. ggml_set_param(ctx0, x[i]);
  390. }
  391. struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0]));
  392. check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
  393. }
  394. }
  395. // sum
  396. {
  397. const int nargs = 1;
  398. for (int ndims = 1; ndims <= 2; ++ndims) {
  399. for (int i = 0; i < nargs; ++i) {
  400. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  401. ggml_set_param(ctx0, x[i]);
  402. }
  403. struct ggml_tensor * f = ggml_sum(ctx0, x[0]);
  404. check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
  405. }
  406. }
  407. // sum_rows
  408. {
  409. const int nargs = 1;
  410. for (int ndims = 1; ndims <= 4; ++ndims) {
  411. for (int i = 0; i < nargs; ++i) {
  412. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  413. ggml_set_param(ctx0, x[i]);
  414. }
  415. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0])));
  416. check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
  417. }
  418. }
  419. // repeat
  420. {
  421. int64_t ne2[4];
  422. get_random_dims(ne2, 4);
  423. ne2[0] = ne[0] * ne2[0];
  424. ne2[1] = ne[1] * ne2[1];
  425. ne2[2] = 1;
  426. ne2[3] = 1;
  427. const int nargs = 1;
  428. for (int ndims = 1; ndims <= 2; ++ndims) {
  429. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  430. x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  431. ggml_set_param(ctx0, x[0]);
  432. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
  433. check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
  434. }
  435. }
  436. // abs (finite differences do not work)
  437. //{
  438. // const int nargs = 1;
  439. // for (int ndims = 1; ndims <= 2; ++ndims) {
  440. // for (int i = 0; i < nargs; ++i) {
  441. // x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  442. // ggml_set_param(ctx0, x[i]);
  443. // }
  444. // struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
  445. // check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f);
  446. // }
  447. //}
  448. // mul_mat
  449. {
  450. const int nargs = 2;
  451. for (int ndims = 2; ndims <= 2; ++ndims) {
  452. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  453. {
  454. int64_t ne2[4];
  455. get_random_dims(ne2, 4);
  456. ne2[0] = ne[0];
  457. x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  458. }
  459. ggml_set_param(ctx0, x[0]);
  460. ggml_set_param(ctx0, x[1]);
  461. struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
  462. struct ggml_tensor * f = ggml_sum(ctx0, m);
  463. GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
  464. check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  465. check_mat_mul(m, x[1], x[0]);
  466. }
  467. }
  468. // silu
  469. {
  470. const int nargs = 1;
  471. for (int ndims = 1; ndims <= 2; ++ndims) {
  472. for (int i = 0; i < nargs; ++i) {
  473. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  474. ggml_set_param(ctx0, x[i]);
  475. }
  476. struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0]));
  477. #ifdef GGML_SILU_FP16
  478. // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds.
  479. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY);
  480. #else
  481. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  482. #endif
  483. }
  484. }
  485. // rms_norm
  486. {
  487. const int nargs = 1;
  488. for (int ndims = 1; ndims <= 2; ++ndims) {
  489. for (int i = 0; i < nargs; ++i) {
  490. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  491. ggml_set_param(ctx0, x[i]);
  492. }
  493. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0]));
  494. check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY);
  495. }
  496. }
  497. // scale
  498. {
  499. const int nargs = 2;
  500. int64_t ne2[4];
  501. ne2[0] = 1;
  502. for (int ndims = 1; ndims <= 2; ++ndims) {
  503. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  504. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  505. ggml_set_param(ctx0, x[0]);
  506. ggml_set_param(ctx0, x[1]);
  507. struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], x[1]));
  508. check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  509. }
  510. }
  511. // cpy
  512. {
  513. const int nargs = 2;
  514. for (int ndims = 1; ndims <= 2; ++ndims) {
  515. for (int i = 0; i < nargs; ++i) {
  516. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  517. ggml_set_param(ctx0, x[i]);
  518. }
  519. // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
  520. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
  521. check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  522. }
  523. }
  524. // reshape (1d->nd)
  525. {
  526. const int nargs = 1;
  527. for (int ndims = 1; ndims <= 2; ++ndims) {
  528. int64_t ne2[4];
  529. ne2[0] = 1;
  530. ne2[1] = 1;
  531. ne2[2] = 1;
  532. ne2[3] = 1;
  533. for (int i = 0; i < ndims; ++i) {
  534. ne2[0] *= ne[i];
  535. }
  536. x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  537. x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  538. ggml_set_param(ctx0, x[0]);
  539. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  540. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  541. }
  542. }
  543. // reshape (nd->1d)
  544. {
  545. const int nargs = 1;
  546. for (int ndims = 1; ndims <= 2; ++ndims) {
  547. int64_t ne2[4];
  548. ne2[0] = 1;
  549. ne2[1] = 1;
  550. ne2[2] = 1;
  551. ne2[3] = 1;
  552. for (int i = 0; i < ndims; ++i) {
  553. ne2[0] *= ne[i];
  554. }
  555. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  556. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  557. ggml_set_param(ctx0, x[0]);
  558. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  559. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  560. }
  561. }
  562. // acc 1d
  563. {
  564. int64_t ne2[4] = { 1, 1, 1, 1 };
  565. const int nargs = 2;
  566. for (int ndims = 1; ndims <= 4; ++ndims) {
  567. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  568. ggml_set_param(ctx0, x[0]);
  569. get_random_dims(ne2, 1);
  570. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  571. get_random_dims(ne2, 1);
  572. }
  573. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  574. ggml_set_param(ctx0, x[1]);
  575. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  576. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  577. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  578. check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  579. }
  580. }
  581. // acc 2d
  582. {
  583. int64_t ne2[4] = { 1, 1, 1, 1 };
  584. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  585. int64_t offsets[4] = { 0, 0, 0, 0 };
  586. const int nargs = 2;
  587. for (int ndims = 2; ndims <= 4; ++ndims) {
  588. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  589. ggml_set_param(ctx0, x[0]);
  590. get_random_dims(ne2, 2);
  591. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  592. get_random_dims(ne2, 2);
  593. }
  594. x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f);
  595. ggml_set_param(ctx0, x[1]);
  596. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  597. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  598. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  599. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  600. const int offset = offsets[0] + offsets[1];
  601. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  602. check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  603. }
  604. }
  605. // acc 3d
  606. {
  607. int64_t ne2[4] = { 1, 1, 1, 1 };
  608. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  609. int64_t offsets[4] = { 0, 0, 0, 0 };
  610. const int nargs = 2;
  611. for (int ndims = 3; ndims <= 4; ++ndims) {
  612. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  613. ggml_set_param(ctx0, x[0]);
  614. get_random_dims(ne2, 3);
  615. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) {
  616. get_random_dims(ne2, 3);
  617. }
  618. x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f);
  619. ggml_set_param(ctx0, x[1]);
  620. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  621. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  622. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  623. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  624. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  625. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  626. const int offset = offsets[0] + offsets[1] + offsets[2];
  627. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  628. check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  629. }
  630. }
  631. // acc 4d
  632. {
  633. int64_t ne2[4] = { 1, 1, 1, 1 };
  634. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  635. int64_t offsets[4] = { 0, 0, 0, 0 };
  636. const int nargs = 2;
  637. for (int ndims = 4; ndims <= 4; ++ndims) {
  638. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  639. ggml_set_param(ctx0, x[0]);
  640. get_random_dims(ne2, 4);
  641. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) {
  642. get_random_dims(ne2, 4);
  643. }
  644. x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  645. ggml_set_param(ctx0, x[1]);
  646. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  647. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  648. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  649. max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]);
  650. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  651. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  652. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  653. offsets[3] = irand(max_offsets[3]) * x[0]->nb[3];
  654. const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3];
  655. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  656. check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  657. }
  658. }
  659. // set_1d
  660. {
  661. int64_t ne2[4];
  662. const int nargs = 2;
  663. for (int ndims = 1; ndims <= 4; ++ndims) {
  664. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  665. ggml_set_param(ctx0, x[0]);
  666. get_random_dims(ne2, 1);
  667. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  668. get_random_dims(ne2, 1);
  669. }
  670. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  671. ggml_set_param(ctx0, x[1]);
  672. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  673. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  674. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset));
  675. check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  676. }
  677. }
  678. // set_2d
  679. {
  680. int64_t ne2[4];
  681. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  682. int64_t offsets[4] = { 0, 0, 0, 0 };
  683. const int nargs = 1;
  684. for (int ndims = 2; ndims <= 4; ++ndims) {
  685. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  686. ggml_set_param(ctx0, x[0]);
  687. get_random_dims(ne2, 2);
  688. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  689. get_random_dims(ne2, 2);
  690. }
  691. x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f);
  692. ggml_set_param(ctx0, x[1]);
  693. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  694. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  695. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  696. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  697. const int offset = offsets[0] + offsets[1];
  698. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset));
  699. check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  700. }
  701. }
  702. // view_1d
  703. {
  704. const int nargs = 1;
  705. for (int ndims = 1; ndims <= 4; ++ndims) {
  706. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  707. ggml_set_param(ctx0, x[0]);
  708. const int k0 = irand(ggml_nelements(x[0]));
  709. const int k1 = irand(ggml_nelements(x[0]));
  710. const int i0 = MIN(k0, k1);
  711. const int i1 = MAX(k0, k1);
  712. const int offset = i0 * sizeof(float);
  713. const int nelem = i1 - i0;
  714. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset));
  715. check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  716. }
  717. }
  718. // view_2d
  719. {
  720. int64_t ne2[4];
  721. int64_t nb2[4];
  722. const int nargs = 1;
  723. for (int ndims = 1; ndims <= 4; ++ndims) {
  724. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  725. get_random_dims(ne2, 2);
  726. while (ne2[0]*ne2[1] > ggml_nelements(x[0])) {
  727. get_random_dims(ne2, 2);
  728. }
  729. const int count = ne2[0]*ne2[1];
  730. nb2[0] = sizeof(float);
  731. nb2[1] = nb2[0]*ne2[0];
  732. ggml_set_param(ctx0, x[0]);
  733. const int max_offset = ggml_nelements(x[0]) - count;
  734. const int offset = irand(max_offset+1) * sizeof(float);
  735. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset));
  736. check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  737. }
  738. }
  739. // view_3d
  740. {
  741. int64_t ne2[4] = {1,1,1,1};
  742. int64_t nb2[4] = {0,0,0,0};
  743. const int nargs = 1;
  744. for (int ndims = 1; ndims <= 4; ++ndims) {
  745. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  746. get_random_dims(ne2, 3);
  747. while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) {
  748. get_random_dims(ne2, 3);
  749. }
  750. const int count = ne2[0]*ne2[1]*ne2[2];
  751. nb2[0] = sizeof(float);
  752. nb2[1] = nb2[0]*ne2[0];
  753. nb2[2] = nb2[1]*ne2[1];
  754. ggml_set_param(ctx0, x[0]);
  755. const int max_offset = ggml_nelements(x[0]) - count;
  756. const int offset = irand(max_offset+1) * sizeof(float);
  757. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset));
  758. check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  759. }
  760. }
  761. // permute
  762. {
  763. int64_t ne2[4];
  764. const int nargs = 1;
  765. for (int ndims = 1; ndims <= 4; ++ndims)
  766. {
  767. // ggml_permute will set axes of dimensions below n_dims to 1.
  768. // to make ggml_permute work correctly on all axes,
  769. // the input tensor needs maximal n_dim of 4.
  770. for (int i=0; i<ndims; ++i) {
  771. ne2[i] = ne[i];
  772. }
  773. for (int i=ndims; i<4; ++i) {
  774. ne2[i] = 1;
  775. }
  776. x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  777. ggml_set_param(ctx0, x[0]);
  778. const int p = irand(NUM_PERMUTATIONS);
  779. const int ax0 = all_permutations[p*4+0];
  780. const int ax1 = all_permutations[p*4+1];
  781. const int ax2 = all_permutations[p*4+2];
  782. const int ax3 = all_permutations[p*4+3];
  783. // sum requires contiguous tensor rows
  784. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3)));
  785. check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  786. }
  787. }
  788. // transpose
  789. {
  790. int64_t ne2[4];
  791. const int nargs = 1;
  792. for (int ndims = 1; ndims <= 4; ++ndims)
  793. {
  794. // ggml_transpose will set axes of dimensions below n_dims to 1.
  795. // to make ggml_transpose work correctly on all axes,
  796. // the input tensor needs maximal n_dim of 4.
  797. for (int i=0; i<ndims; ++i) {
  798. ne2[i] = ne[i];
  799. }
  800. for (int i=ndims; i<4; ++i) {
  801. ne2[i] = 1;
  802. }
  803. x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  804. ggml_set_param(ctx0, x[0]);
  805. // sum requires contiguous tensor rows
  806. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0])));
  807. check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  808. }
  809. }
  810. // get_rows
  811. {
  812. int64_t ne2[4] = {ne[0], ne[1], 1, 1};
  813. int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
  814. const int nargs = 1;
  815. const int ndims = 2;
  816. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  817. x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]);
  818. ggml_set_param(ctx0, x[0]);
  819. struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1]));
  820. check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  821. }
  822. // diag_mask_inf
  823. {
  824. const int nargs = 1;
  825. const int ndims = 2;
  826. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  827. ggml_set_param(ctx0, x[0]);
  828. int n_past = irand(ne[0]);
  829. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past));
  830. check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  831. }
  832. // diag_mask_zero
  833. {
  834. const int nargs = 1;
  835. const int ndims = 2;
  836. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  837. ggml_set_param(ctx0, x[0]);
  838. int n_past = irand(ne[0]);
  839. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past));
  840. check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  841. }
  842. // softmax
  843. {
  844. const int nargs = 1;
  845. int64_t ne2[4];
  846. get_random_dims(ne2, 4);
  847. for (int ndims = 1; ndims <= 3; ++ndims) {
  848. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  849. ggml_set_param(ctx0, x[0]);
  850. struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0]));
  851. check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  852. }
  853. }
  854. // cross_entropy_loss
  855. {
  856. const int nargs = 1;
  857. int64_t ne2[4];
  858. get_random_dims(ne2, 4);
  859. for (int ndims = 1; ndims <= 3; ++ndims) {
  860. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  861. x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f);
  862. ggml_set_param(ctx0, x[0]);
  863. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1]));
  864. check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-1f, 1e-2f, INFINITY);
  865. // finite differences regularly fails!
  866. }
  867. }
  868. // rope
  869. {
  870. const int nargs = 1;
  871. int64_t ne2[4];
  872. get_random_dims(ne2, 4);
  873. ne2[0] += ne2[0] % 2;
  874. int n_rot = ne2[0];
  875. for (int ndims = 3; ndims <= 4; ++ndims) {
  876. for (int mode = 0; mode < 4; ++mode) {
  877. for (int n_past = 1; n_past < ne2[2]; ++n_past) {
  878. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  879. ggml_set_param(ctx0, x[0]);
  880. const bool skip_past = (mode & 1);
  881. if (skip_past) {
  882. // we have no past, so this would have to work on uninitialized memory.
  883. // we only test the gradients here;
  884. // skip_past should have no influence on gradient computation.
  885. // so when other modes work, we assume that this does as well.
  886. continue;
  887. }
  888. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode));
  889. GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
  890. check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY);
  891. }
  892. }
  893. }
  894. }
  895. // flash_attn
  896. {
  897. const int nargs = 3;
  898. int64_t ne2[4];
  899. get_random_dims(ne2, 4);
  900. int64_t D = ne2[0];
  901. int64_t N = ne2[1];
  902. int64_t M = ne2[2] + N;
  903. int64_t B = ne2[3];
  904. for (int masked = 0; masked <= 1; ++masked) {
  905. for (int ndims = 2; ndims <= 4; ++ndims) {
  906. int64_t neq[4] = { D, N, B, ne[3] };
  907. int64_t nek[4] = { D, M, B, ne[3] };
  908. int64_t nev[4] = { M, D, B, ne[3] };
  909. if (ndims == 2) {
  910. neq[2] = 1; neq[3] = 1;
  911. nek[2] = 1; nek[3] = 1;
  912. nev[2] = 1; nev[3] = 1;
  913. } else if (ndims == 3) {
  914. neq[3] = 1;
  915. nek[3] = 1;
  916. nev[3] = 1;
  917. }
  918. x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f);
  919. x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f);
  920. x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f);
  921. ggml_set_param(ctx0, x[0]);
  922. ggml_set_param(ctx0, x[1]);
  923. ggml_set_param(ctx0, x[2]);
  924. struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
  925. check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f);
  926. }
  927. }
  928. }
  929. ggml_free(ctx0);
  930. }
  931. return 0;
  932. }