test-grad0.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216
  1. #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
  2. #include "ggml.h"
  3. #include <math.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <assert.h>
  7. #if defined(_MSC_VER)
  8. #pragma warning(disable: 4244 4267) // possible loss of data
  9. #endif
  10. #if defined(__GNUC__)
  11. #pragma GCC diagnostic ignored "-Wdouble-promotion"
  12. #endif
  13. #define MAX_NARGS 3
  14. #undef MIN
  15. #undef MAX
  16. #define MIN(a, b) ((a) < (b) ? (a) : (b))
  17. #define MAX(a, b) ((a) > (b) ? (a) : (b))
  18. #define GGML_SILU_FP16
  19. //
  20. // logging
  21. //
  22. #if (GGML_DEBUG >= 1)
  23. #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
  24. #else
  25. #define GGML_PRINT_DEBUG(...)
  26. #endif
  27. #if (GGML_DEBUG >= 5)
  28. #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
  29. #else
  30. #define GGML_PRINT_DEBUG_5(...)
  31. #endif
  32. #if (GGML_DEBUG >= 10)
  33. #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
  34. #else
  35. #define GGML_PRINT_DEBUG_10(...)
  36. #endif
  37. #define GGML_PRINT(...) printf(__VA_ARGS__)
  38. float frand(void) {
  39. return (float)rand()/(float)RAND_MAX;
  40. }
  41. int irand(int n) {
  42. if (n == 0) return 0;
  43. return rand()%n;
  44. }
  45. void get_random_dims(int64_t * dims, int ndims) {
  46. dims[0] = dims[1] = dims[2] = dims[3] = 1;
  47. for (int i = 0; i < ndims; i++) {
  48. dims[i] = 1 + irand(4);
  49. }
  50. }
  51. struct ggml_tensor * get_random_tensor(
  52. struct ggml_context * ctx0,
  53. int ndims,
  54. int64_t ne[],
  55. float fmin,
  56. float fmax) {
  57. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
  58. switch (ndims) {
  59. case 1:
  60. for (int i0 = 0; i0 < ne[0]; i0++) {
  61. ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin;
  62. }
  63. break;
  64. case 2:
  65. for (int i1 = 0; i1 < ne[1]; i1++) {
  66. for (int i0 = 0; i0 < ne[0]; i0++) {
  67. ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  68. }
  69. }
  70. break;
  71. case 3:
  72. for (int i2 = 0; i2 < ne[2]; i2++) {
  73. for (int i1 = 0; i1 < ne[1]; i1++) {
  74. for (int i0 = 0; i0 < ne[0]; i0++) {
  75. ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  76. }
  77. }
  78. }
  79. break;
  80. case 4:
  81. for (int i3 = 0; i3 < ne[3]; i3++) {
  82. for (int i2 = 0; i2 < ne[2]; i2++) {
  83. for (int i1 = 0; i1 < ne[1]; i1++) {
  84. for (int i0 = 0; i0 < ne[0]; i0++) {
  85. ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  86. }
  87. }
  88. }
  89. }
  90. break;
  91. default:
  92. assert(false);
  93. };
  94. return result;
  95. }
  96. struct ggml_tensor * get_random_tensor_int(
  97. struct ggml_context * ctx0,
  98. int ndims,
  99. int64_t ne[],
  100. int32_t imin,
  101. int32_t imax) {
  102. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne);
  103. switch (ndims) {
  104. case 1:
  105. for (int i0 = 0; i0 < ne[0]; i0++) {
  106. ((int32_t *)result->data)[i0] = irand(imax - imin) + imin;
  107. }
  108. break;
  109. case 2:
  110. for (int i1 = 0; i1 < ne[1]; i1++) {
  111. for (int i0 = 0; i0 < ne[0]; i0++) {
  112. ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin;
  113. }
  114. }
  115. break;
  116. case 3:
  117. for (int i2 = 0; i2 < ne[2]; i2++) {
  118. for (int i1 = 0; i1 < ne[1]; i1++) {
  119. for (int i0 = 0; i0 < ne[0]; i0++) {
  120. ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  121. }
  122. }
  123. }
  124. break;
  125. case 4:
  126. for (int i3 = 0; i3 < ne[3]; i3++) {
  127. for (int i2 = 0; i2 < ne[2]; i2++) {
  128. for (int i1 = 0; i1 < ne[1]; i1++) {
  129. for (int i0 = 0; i0 < ne[0]; i0++) {
  130. ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  131. }
  132. }
  133. }
  134. }
  135. break;
  136. default:
  137. assert(false);
  138. };
  139. return result;
  140. }
  141. float get_element(const struct ggml_tensor * t, int idx) {
  142. if (t->type == GGML_TYPE_F32) {
  143. return ((float *)t->data)[idx];
  144. }
  145. if (t->type == GGML_TYPE_I32) {
  146. return ((int32_t *)t->data)[idx];
  147. }
  148. assert(false);
  149. return INFINITY;
  150. }
  151. void set_element(struct ggml_tensor * t, int idx, float value) {
  152. ((float *)t->data)[idx] = value;
  153. }
  154. void print_elements(const char* label, const struct ggml_tensor * t) {
  155. if (!t) {
  156. printf("%s: %s = null\n", __func__, label);
  157. return;
  158. }
  159. const int nelements = ggml_nelements(t);
  160. printf("%s: %s = [", __func__, label);
  161. for (int k = 0; k < nelements; ++k) {
  162. if (k > 0) { printf(", "); }
  163. printf("%.5f", get_element(t, k));
  164. }
  165. printf("] shape: [");
  166. for (int k = 0; k < t->n_dims; ++k) {
  167. if (k > 0) { printf(", "); }
  168. printf("%d", (int)t->ne[k]);
  169. }
  170. printf("]\n");
  171. }
  172. bool check_gradient(
  173. const char * op_name,
  174. struct ggml_context * ctx0,
  175. struct ggml_tensor * x[],
  176. struct ggml_tensor * f,
  177. int ndims,
  178. int nargs,
  179. float eps,
  180. float max_error_abs,
  181. float max_error_rel) {
  182. static int n_threads = -1;
  183. if (n_threads < 0) {
  184. n_threads = GGML_DEFAULT_N_THREADS;
  185. const char *env = getenv("GGML_N_THREADS");
  186. if (env) {
  187. n_threads = atoi(env);
  188. }
  189. printf("GGML_N_THREADS = %d\n", n_threads);
  190. }
  191. struct ggml_cgraph gf = ggml_build_forward (f);
  192. struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
  193. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  194. ggml_graph_reset (&gf);
  195. ggml_set_f32 (f->grad, 1.0f);
  196. ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
  197. // ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot");
  198. // ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot");
  199. for (int i = 0; i < nargs; ++i) {
  200. const int nelements = ggml_nelements(x[i]);
  201. for (int k = 0; k < nelements; ++k) {
  202. // compute gradient using finite differences
  203. const float x0 = get_element(x[i], k);
  204. const float xm = x0 - eps;
  205. const float xp = x0 + eps;
  206. set_element(x[i], k, xp);
  207. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  208. const float f0 = ggml_get_f32_1d(f, 0);
  209. set_element(x[i], k, xm);
  210. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  211. const float f1 = ggml_get_f32_1d(f, 0);
  212. const float g0 = (f0 - f1)/(2.0f*eps);
  213. set_element(x[i], k, x0);
  214. // compute gradient using backward graph
  215. ggml_graph_reset (&gf);
  216. ggml_set_f32 (f->grad, 1.0f);
  217. ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
  218. const float g1 = get_element(x[i]->grad, k);
  219. const float error_abs = fabsf(g0 - g1);
  220. const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0;
  221. if (error_abs > max_error_abs || error_rel > max_error_rel) {
  222. printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n",
  223. op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel);
  224. //assert(false);
  225. return false;
  226. }
  227. }
  228. }
  229. return true;
  230. }
  231. // TODO: clean-up this ..
  232. bool check_mat_mul(
  233. const struct ggml_tensor * y,
  234. const struct ggml_tensor * x0,
  235. const struct ggml_tensor * x1) {
  236. float * dst = (float *) y->data;
  237. float * src0 = (float *) x0->data;
  238. float * src1 = (float *) x1->data;
  239. const int nc = x0->ne[1];
  240. const int nr = x1->ne[1];
  241. const int nk = x0->ne[0];
  242. GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk);
  243. GGML_PRINT_DEBUG("x0:\n");
  244. for (int j = 0; j < x0->ne[1]; ++j) {
  245. for (int i = 0; i < x0->ne[0]; ++i) {
  246. GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]);
  247. }
  248. GGML_PRINT_DEBUG("\n");
  249. }
  250. GGML_PRINT_DEBUG("\n");
  251. GGML_PRINT_DEBUG("x1:\n");
  252. for (int j = 0; j < x1->ne[1]; ++j) {
  253. for (int i = 0; i < x1->ne[0]; ++i) {
  254. GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]);
  255. }
  256. GGML_PRINT_DEBUG("\n");
  257. }
  258. GGML_PRINT_DEBUG("\n");
  259. GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]);
  260. for (int j = 0; j < y->ne[1]; ++j) {
  261. for (int i = 0; i < y->ne[0]; ++i) {
  262. GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]);
  263. }
  264. GGML_PRINT_DEBUG("\n");
  265. }
  266. for (int i = 0; i < nr; ++i) {
  267. for (int j = 0; j < nc; ++j) {
  268. float sum = 0.0f;
  269. for (int k = 0; k < nk; ++k) {
  270. sum += src0[j*nk + k]*src1[i*nk + k];
  271. }
  272. if (fabsf(dst[i*nc + j] - sum) > 1e-5f) {
  273. fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum);
  274. assert(false);
  275. return false;
  276. }
  277. }
  278. }
  279. return true;
  280. }
  281. #define NUM_PERMUTATIONS (4*3*2*1)
  282. int main(int argc, const char ** argv) {
  283. struct ggml_init_params params = {
  284. .mem_size = 128*1024*1024,
  285. .mem_buffer = NULL,
  286. .no_alloc = false,
  287. };
  288. int64_t ne[4];
  289. int all_permutations[4 * NUM_PERMUTATIONS];
  290. {
  291. int count = 0;
  292. for (int ax0=0; ax0<4; ++ax0) {
  293. for (int ax1=0; ax1<4; ++ax1) {
  294. if (ax1 == ax0) continue;
  295. for (int ax2=0; ax2<4; ++ax2) {
  296. if (ax2 == ax0) continue;
  297. if (ax2 == ax1) continue;
  298. for (int ax3=0; ax3<4; ++ax3) {
  299. if (ax3 == ax0) continue;
  300. if (ax3 == ax1) continue;
  301. if (ax3 == ax2) continue;
  302. assert(count < NUM_PERMUTATIONS);
  303. all_permutations[count*4+0] = ax0;
  304. all_permutations[count*4+1] = ax1;
  305. all_permutations[count*4+2] = ax2;
  306. all_permutations[count*4+3] = ax3;
  307. ++count;
  308. }
  309. }
  310. }
  311. }
  312. }
  313. // original loop: 1000
  314. int niter = 4;
  315. const char *env = getenv("GGML_NLOOP");
  316. if (env != NULL) {
  317. niter = atoi(env);
  318. }
  319. if (argc > 1) {
  320. niter = atoi(argv[1]);
  321. }
  322. for (int iter = 0; iter < niter; ++iter) {
  323. printf("test-grad0: iter:%d/%d\n", iter, niter);
  324. struct ggml_context * ctx0 = ggml_init(params);
  325. get_random_dims(ne, 4);
  326. struct ggml_tensor * x[MAX_NARGS];
  327. // add
  328. {
  329. const int nargs = 2;
  330. for (int ndims = 1; ndims <= 4; ++ndims) {
  331. for (int i = 0; i < nargs; ++i) {
  332. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  333. ggml_set_param(ctx0, x[i]);
  334. }
  335. struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
  336. check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f);
  337. }
  338. }
  339. // sub
  340. {
  341. const int nargs = 2;
  342. for (int ndims = 1; ndims <= 4; ++ndims) {
  343. for (int i = 0; i < nargs; ++i) {
  344. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  345. ggml_set_param(ctx0, x[i]);
  346. }
  347. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1]));
  348. check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
  349. }
  350. }
  351. // mul
  352. {
  353. const int nargs = 2;
  354. for (int ndims = 1; ndims <= 4; ++ndims) {
  355. for (int i = 0; i < nargs; ++i) {
  356. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  357. ggml_set_param(ctx0, x[i]);
  358. }
  359. struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1]));
  360. check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  361. }
  362. }
  363. // div
  364. {
  365. const int nargs = 2;
  366. for (int ndims = 1; ndims <= 4; ++ndims) {
  367. for (int i = 0; i < nargs; ++i) {
  368. x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f);
  369. ggml_set_param(ctx0, x[i]);
  370. }
  371. struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1]));
  372. check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f);
  373. }
  374. }
  375. // sqr
  376. {
  377. const int nargs = 1;
  378. for (int ndims = 1; ndims <= 2; ++ndims) {
  379. for (int i = 0; i < nargs; ++i) {
  380. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  381. ggml_set_param(ctx0, x[i]);
  382. }
  383. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0]));
  384. check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  385. }
  386. }
  387. // sqrt
  388. {
  389. const int nargs = 1;
  390. for (int ndims = 1; ndims <= 2; ++ndims) {
  391. for (int i = 0; i < nargs; ++i) {
  392. x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  393. ggml_set_param(ctx0, x[i]);
  394. }
  395. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0]));
  396. check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
  397. }
  398. }
  399. // log
  400. {
  401. const int nargs = 1;
  402. for (int ndims = 1; ndims <= 2; ++ndims) {
  403. for (int i = 0; i < nargs; ++i) {
  404. x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  405. ggml_set_param(ctx0, x[i]);
  406. }
  407. struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0]));
  408. check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
  409. }
  410. }
  411. // sum
  412. {
  413. const int nargs = 1;
  414. for (int ndims = 1; ndims <= 2; ++ndims) {
  415. for (int i = 0; i < nargs; ++i) {
  416. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  417. ggml_set_param(ctx0, x[i]);
  418. }
  419. struct ggml_tensor * f = ggml_sum(ctx0, x[0]);
  420. check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
  421. }
  422. }
  423. // sum_rows
  424. {
  425. const int nargs = 1;
  426. for (int ndims = 1; ndims <= 4; ++ndims) {
  427. for (int i = 0; i < nargs; ++i) {
  428. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  429. ggml_set_param(ctx0, x[i]);
  430. }
  431. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0])));
  432. check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
  433. }
  434. }
  435. // repeat
  436. {
  437. int64_t ne2[4];
  438. get_random_dims(ne2, 4);
  439. ne2[0] = ne[0] * ne2[0];
  440. ne2[1] = ne[1] * ne2[1];
  441. ne2[2] = 1;
  442. ne2[3] = 1;
  443. const int nargs = 1;
  444. for (int ndims = 1; ndims <= 2; ++ndims) {
  445. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  446. x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  447. ggml_set_param(ctx0, x[0]);
  448. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
  449. check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
  450. }
  451. }
  452. // abs (finite differences do not work)
  453. //{
  454. // const int nargs = 1;
  455. // for (int ndims = 1; ndims <= 2; ++ndims) {
  456. // for (int i = 0; i < nargs; ++i) {
  457. // x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  458. // ggml_set_param(ctx0, x[i]);
  459. // }
  460. // struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
  461. // check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f);
  462. // }
  463. //}
  464. // mul_mat
  465. {
  466. const int nargs = 2;
  467. for (int ndims = 2; ndims <= 2; ++ndims) {
  468. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  469. {
  470. int64_t ne2[4];
  471. get_random_dims(ne2, 4);
  472. ne2[0] = ne[0];
  473. x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  474. }
  475. ggml_set_param(ctx0, x[0]);
  476. ggml_set_param(ctx0, x[1]);
  477. struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
  478. struct ggml_tensor * f = ggml_sum(ctx0, m);
  479. GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
  480. check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  481. check_mat_mul(m, x[1], x[0]);
  482. }
  483. }
  484. // silu
  485. {
  486. const int nargs = 1;
  487. for (int ndims = 1; ndims <= 2; ++ndims) {
  488. for (int i = 0; i < nargs; ++i) {
  489. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  490. ggml_set_param(ctx0, x[i]);
  491. }
  492. struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0]));
  493. #ifdef GGML_SILU_FP16
  494. // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds.
  495. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY);
  496. #else
  497. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  498. #endif
  499. }
  500. }
  501. // rms_norm
  502. {
  503. const int nargs = 1;
  504. for (int ndims = 1; ndims <= 2; ++ndims) {
  505. for (int i = 0; i < nargs; ++i) {
  506. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  507. ggml_set_param(ctx0, x[i]);
  508. }
  509. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0]));
  510. check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY);
  511. }
  512. }
  513. // scale
  514. {
  515. const int nargs = 2;
  516. int64_t ne2[4];
  517. ne2[0] = 1;
  518. for (int ndims = 1; ndims <= 2; ++ndims) {
  519. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  520. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  521. ggml_set_param(ctx0, x[0]);
  522. ggml_set_param(ctx0, x[1]);
  523. struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], x[1]));
  524. check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  525. }
  526. }
  527. // cpy
  528. {
  529. const int nargs = 2;
  530. for (int ndims = 1; ndims <= 2; ++ndims) {
  531. for (int i = 0; i < nargs; ++i) {
  532. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  533. ggml_set_param(ctx0, x[i]);
  534. }
  535. // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
  536. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
  537. check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  538. }
  539. }
  540. // reshape (1d->nd)
  541. {
  542. const int nargs = 1;
  543. for (int ndims = 1; ndims <= 2; ++ndims) {
  544. int64_t ne2[4];
  545. ne2[0] = 1;
  546. ne2[1] = 1;
  547. ne2[2] = 1;
  548. ne2[3] = 1;
  549. for (int i = 0; i < ndims; ++i) {
  550. ne2[0] *= ne[i];
  551. }
  552. x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  553. x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  554. ggml_set_param(ctx0, x[0]);
  555. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  556. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  557. }
  558. }
  559. // reshape (nd->1d)
  560. {
  561. const int nargs = 1;
  562. for (int ndims = 1; ndims <= 2; ++ndims) {
  563. int64_t ne2[4];
  564. ne2[0] = 1;
  565. ne2[1] = 1;
  566. ne2[2] = 1;
  567. ne2[3] = 1;
  568. for (int i = 0; i < ndims; ++i) {
  569. ne2[0] *= ne[i];
  570. }
  571. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  572. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  573. ggml_set_param(ctx0, x[0]);
  574. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  575. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  576. }
  577. }
  578. // acc 1d
  579. {
  580. int64_t ne2[4] = { 1, 1, 1, 1 };
  581. const int nargs = 2;
  582. for (int ndims = 1; ndims <= 4; ++ndims) {
  583. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  584. ggml_set_param(ctx0, x[0]);
  585. get_random_dims(ne2, 1);
  586. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  587. get_random_dims(ne2, 1);
  588. }
  589. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  590. ggml_set_param(ctx0, x[1]);
  591. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  592. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  593. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  594. check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  595. }
  596. }
  597. // acc 2d
  598. {
  599. int64_t ne2[4] = { 1, 1, 1, 1 };
  600. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  601. int64_t offsets[4] = { 0, 0, 0, 0 };
  602. const int nargs = 2;
  603. for (int ndims = 2; ndims <= 4; ++ndims) {
  604. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  605. ggml_set_param(ctx0, x[0]);
  606. get_random_dims(ne2, 2);
  607. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  608. get_random_dims(ne2, 2);
  609. }
  610. x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f);
  611. ggml_set_param(ctx0, x[1]);
  612. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  613. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  614. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  615. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  616. const int offset = offsets[0] + offsets[1];
  617. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  618. check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  619. }
  620. }
  621. // acc 3d
  622. {
  623. int64_t ne2[4] = { 1, 1, 1, 1 };
  624. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  625. int64_t offsets[4] = { 0, 0, 0, 0 };
  626. const int nargs = 2;
  627. for (int ndims = 3; ndims <= 4; ++ndims) {
  628. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  629. ggml_set_param(ctx0, x[0]);
  630. get_random_dims(ne2, 3);
  631. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) {
  632. get_random_dims(ne2, 3);
  633. }
  634. x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f);
  635. ggml_set_param(ctx0, x[1]);
  636. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  637. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  638. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  639. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  640. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  641. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  642. const int offset = offsets[0] + offsets[1] + offsets[2];
  643. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  644. check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  645. }
  646. }
  647. // acc 4d
  648. {
  649. int64_t ne2[4] = { 1, 1, 1, 1 };
  650. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  651. int64_t offsets[4] = { 0, 0, 0, 0 };
  652. const int nargs = 2;
  653. for (int ndims = 4; ndims <= 4; ++ndims) {
  654. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  655. ggml_set_param(ctx0, x[0]);
  656. get_random_dims(ne2, 4);
  657. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) {
  658. get_random_dims(ne2, 4);
  659. }
  660. x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  661. ggml_set_param(ctx0, x[1]);
  662. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  663. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  664. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  665. max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]);
  666. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  667. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  668. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  669. offsets[3] = irand(max_offsets[3]) * x[0]->nb[3];
  670. const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3];
  671. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  672. check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  673. }
  674. }
  675. // set_1d
  676. {
  677. int64_t ne2[4];
  678. const int nargs = 2;
  679. for (int ndims = 1; ndims <= 4; ++ndims) {
  680. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  681. ggml_set_param(ctx0, x[0]);
  682. get_random_dims(ne2, 1);
  683. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  684. get_random_dims(ne2, 1);
  685. }
  686. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  687. ggml_set_param(ctx0, x[1]);
  688. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  689. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  690. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset));
  691. check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  692. }
  693. }
  694. // set_2d
  695. {
  696. int64_t ne2[4];
  697. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  698. int64_t offsets[4] = { 0, 0, 0, 0 };
  699. const int nargs = 1;
  700. for (int ndims = 2; ndims <= 4; ++ndims) {
  701. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  702. ggml_set_param(ctx0, x[0]);
  703. get_random_dims(ne2, 2);
  704. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  705. get_random_dims(ne2, 2);
  706. }
  707. x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f);
  708. ggml_set_param(ctx0, x[1]);
  709. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  710. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  711. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  712. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  713. const int offset = offsets[0] + offsets[1];
  714. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset));
  715. check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  716. }
  717. }
  718. // view_1d
  719. {
  720. const int nargs = 1;
  721. for (int ndims = 1; ndims <= 4; ++ndims) {
  722. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  723. ggml_set_param(ctx0, x[0]);
  724. const int k0 = irand(ggml_nelements(x[0]));
  725. const int k1 = irand(ggml_nelements(x[0]));
  726. const int i0 = MIN(k0, k1);
  727. const int i1 = MAX(k0, k1);
  728. const int offset = i0 * sizeof(float);
  729. const int nelem = i1 - i0;
  730. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset));
  731. check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  732. }
  733. }
  734. // view_2d
  735. {
  736. int64_t ne2[4];
  737. int64_t nb2[4];
  738. const int nargs = 1;
  739. for (int ndims = 1; ndims <= 4; ++ndims) {
  740. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  741. get_random_dims(ne2, 2);
  742. while (ne2[0]*ne2[1] > ggml_nelements(x[0])) {
  743. get_random_dims(ne2, 2);
  744. }
  745. const int count = ne2[0]*ne2[1];
  746. nb2[0] = sizeof(float);
  747. nb2[1] = nb2[0]*ne2[0];
  748. ggml_set_param(ctx0, x[0]);
  749. const int max_offset = ggml_nelements(x[0]) - count;
  750. const int offset = irand(max_offset+1) * sizeof(float);
  751. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset));
  752. check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  753. }
  754. }
  755. // view_3d
  756. {
  757. int64_t ne2[4] = {1,1,1,1};
  758. int64_t nb2[4] = {0,0,0,0};
  759. const int nargs = 1;
  760. for (int ndims = 1; ndims <= 4; ++ndims) {
  761. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  762. get_random_dims(ne2, 3);
  763. while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) {
  764. get_random_dims(ne2, 3);
  765. }
  766. const int count = ne2[0]*ne2[1]*ne2[2];
  767. nb2[0] = sizeof(float);
  768. nb2[1] = nb2[0]*ne2[0];
  769. nb2[2] = nb2[1]*ne2[1];
  770. ggml_set_param(ctx0, x[0]);
  771. const int max_offset = ggml_nelements(x[0]) - count;
  772. const int offset = irand(max_offset+1) * sizeof(float);
  773. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset));
  774. check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  775. }
  776. }
  777. // permute
  778. {
  779. int64_t ne2[4];
  780. const int nargs = 1;
  781. for (int ndims = 1; ndims <= 4; ++ndims)
  782. {
  783. // ggml_permute will set axes of dimensions below n_dims to 1.
  784. // to make ggml_permute work correctly on all axes,
  785. // the input tensor needs maximal n_dim of 4.
  786. for (int i=0; i<ndims; ++i) {
  787. ne2[i] = ne[i];
  788. }
  789. for (int i=ndims; i<4; ++i) {
  790. ne2[i] = 1;
  791. }
  792. x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  793. ggml_set_param(ctx0, x[0]);
  794. const int p = irand(NUM_PERMUTATIONS);
  795. const int ax0 = all_permutations[p*4+0];
  796. const int ax1 = all_permutations[p*4+1];
  797. const int ax2 = all_permutations[p*4+2];
  798. const int ax3 = all_permutations[p*4+3];
  799. // sum requires contiguous tensor rows
  800. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3)));
  801. check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  802. }
  803. }
  804. // transpose
  805. {
  806. int64_t ne2[4];
  807. const int nargs = 1;
  808. for (int ndims = 1; ndims <= 4; ++ndims)
  809. {
  810. // ggml_transpose will set axes of dimensions below n_dims to 1.
  811. // to make ggml_transpose work correctly on all axes,
  812. // the input tensor needs maximal n_dim of 4.
  813. for (int i=0; i<ndims; ++i) {
  814. ne2[i] = ne[i];
  815. }
  816. for (int i=ndims; i<4; ++i) {
  817. ne2[i] = 1;
  818. }
  819. x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  820. ggml_set_param(ctx0, x[0]);
  821. // sum requires contiguous tensor rows
  822. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0])));
  823. check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  824. }
  825. }
  826. // get_rows
  827. {
  828. int64_t ne2[4] = {ne[0], ne[1], 1, 1};
  829. int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
  830. const int nargs = 1;
  831. const int ndims = 2;
  832. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  833. x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]);
  834. ggml_set_param(ctx0, x[0]);
  835. struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1]));
  836. check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  837. }
  838. // diag_mask_inf
  839. {
  840. const int nargs = 1;
  841. const int ndims = 2;
  842. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  843. ggml_set_param(ctx0, x[0]);
  844. int n_past = irand(ne[0]);
  845. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past));
  846. check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  847. }
  848. // diag_mask_zero
  849. {
  850. const int nargs = 1;
  851. const int ndims = 2;
  852. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  853. ggml_set_param(ctx0, x[0]);
  854. int n_past = irand(ne[0]);
  855. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past));
  856. check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  857. }
  858. // softmax
  859. {
  860. const int nargs = 1;
  861. int64_t ne2[4];
  862. get_random_dims(ne2, 4);
  863. for (int ndims = 1; ndims <= 3; ++ndims) {
  864. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  865. ggml_set_param(ctx0, x[0]);
  866. struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0]));
  867. check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  868. }
  869. }
  870. // cross_entropy_loss
  871. {
  872. const int nargs = 1;
  873. int64_t ne2[4];
  874. get_random_dims(ne2, 4);
  875. for (int ndims = 1; ndims <= 3; ++ndims) {
  876. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  877. x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f);
  878. ggml_set_param(ctx0, x[0]);
  879. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1]));
  880. check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-1f, 1e-2f, INFINITY);
  881. // finite differences regularly fails!
  882. }
  883. }
  884. // rope
  885. {
  886. const int nargs = 1;
  887. int64_t ne2[4];
  888. get_random_dims(ne2, 4);
  889. ne2[0] += ne2[0] % 2;
  890. int n_rot = ne2[0];
  891. for (int ndims = 3; ndims <= 4; ++ndims) {
  892. for (int mode = 0; mode < 4; ++mode) {
  893. for (int n_past = 1; n_past < ne2[2]; ++n_past) {
  894. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  895. ggml_set_param(ctx0, x[0]);
  896. const bool skip_past = (mode & 1);
  897. if (skip_past) {
  898. // we have no past, so this would have to work on uninitialized memory.
  899. // we only test the gradients here;
  900. // skip_past should have no influence on gradient computation.
  901. // so when other modes work, we assume that this does as well.
  902. continue;
  903. }
  904. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0));
  905. GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
  906. check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY);
  907. }
  908. }
  909. }
  910. }
  911. // flash_attn
  912. {
  913. const int nargs = 3;
  914. int64_t ne2[4];
  915. get_random_dims(ne2, 4);
  916. int64_t D = ne2[0];
  917. int64_t N = ne2[1];
  918. int64_t M = ne2[2] + N;
  919. int64_t B = ne2[3];
  920. for (int masked = 0; masked <= 1; ++masked) {
  921. for (int ndims = 2; ndims <= 4; ++ndims) {
  922. int64_t neq[4] = { D, N, B, ne[3] };
  923. int64_t nek[4] = { D, M, B, ne[3] };
  924. int64_t nev[4] = { M, D, B, ne[3] };
  925. if (ndims == 2) {
  926. neq[2] = 1; neq[3] = 1;
  927. nek[2] = 1; nek[3] = 1;
  928. nev[2] = 1; nev[3] = 1;
  929. } else if (ndims == 3) {
  930. neq[3] = 1;
  931. nek[3] = 1;
  932. nev[3] = 1;
  933. }
  934. x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f);
  935. x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f);
  936. x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f);
  937. ggml_set_param(ctx0, x[0]);
  938. ggml_set_param(ctx0, x[1]);
  939. ggml_set_param(ctx0, x[2]);
  940. struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
  941. check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f);
  942. }
  943. }
  944. }
  945. ggml_free(ctx0);
  946. }
  947. return 0;
  948. }