test-grad0.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209
  1. #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
  2. #include "ggml.h"
  3. #include <math.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <assert.h>
  7. #if defined(_MSC_VER)
  8. #pragma warning(disable: 4244 4267) // possible loss of data
  9. #endif
  10. #define MAX_NARGS 3
  11. #undef MIN
  12. #undef MAX
  13. #define MIN(a, b) ((a) < (b) ? (a) : (b))
  14. #define MAX(a, b) ((a) > (b) ? (a) : (b))
  15. #define GGML_SILU_FP16
  16. //
  17. // logging
  18. //
  19. #if (GGML_DEBUG >= 1)
  20. #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
  21. #else
  22. #define GGML_PRINT_DEBUG(...)
  23. #endif
  24. #if (GGML_DEBUG >= 5)
  25. #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
  26. #else
  27. #define GGML_PRINT_DEBUG_5(...)
  28. #endif
  29. #if (GGML_DEBUG >= 10)
  30. #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
  31. #else
  32. #define GGML_PRINT_DEBUG_10(...)
  33. #endif
  34. #define GGML_PRINT(...) printf(__VA_ARGS__)
  35. float frand(void) {
  36. return (float)rand()/(float)RAND_MAX;
  37. }
  38. int irand(int n) {
  39. if (n == 0) return 0;
  40. else return rand()%n;
  41. }
  42. void get_random_dims(int64_t * dims, int ndims) {
  43. dims[0] = dims[1] = dims[2] = dims[3] = 1;
  44. for (int i = 0; i < ndims; i++) {
  45. dims[i] = 1 + irand(4);
  46. }
  47. }
  48. struct ggml_tensor * get_random_tensor(
  49. struct ggml_context * ctx0,
  50. int ndims,
  51. int64_t ne[],
  52. float fmin,
  53. float fmax) {
  54. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
  55. switch (ndims) {
  56. case 1:
  57. for (int i0 = 0; i0 < ne[0]; i0++) {
  58. ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin;
  59. }
  60. break;
  61. case 2:
  62. for (int i1 = 0; i1 < ne[1]; i1++) {
  63. for (int i0 = 0; i0 < ne[0]; i0++) {
  64. ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  65. }
  66. }
  67. break;
  68. case 3:
  69. for (int i2 = 0; i2 < ne[2]; i2++) {
  70. for (int i1 = 0; i1 < ne[1]; i1++) {
  71. for (int i0 = 0; i0 < ne[0]; i0++) {
  72. ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  73. }
  74. }
  75. }
  76. break;
  77. case 4:
  78. for (int i3 = 0; i3 < ne[3]; i3++) {
  79. for (int i2 = 0; i2 < ne[2]; i2++) {
  80. for (int i1 = 0; i1 < ne[1]; i1++) {
  81. for (int i0 = 0; i0 < ne[0]; i0++) {
  82. ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  83. }
  84. }
  85. }
  86. }
  87. break;
  88. default:
  89. assert(false);
  90. };
  91. return result;
  92. }
  93. struct ggml_tensor * get_random_tensor_int(
  94. struct ggml_context * ctx0,
  95. int ndims,
  96. int64_t ne[],
  97. int32_t imin,
  98. int32_t imax) {
  99. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne);
  100. switch (ndims) {
  101. case 1:
  102. for (int i0 = 0; i0 < ne[0]; i0++) {
  103. ((int32_t *)result->data)[i0] = irand(imax - imin) + imin;
  104. }
  105. break;
  106. case 2:
  107. for (int i1 = 0; i1 < ne[1]; i1++) {
  108. for (int i0 = 0; i0 < ne[0]; i0++) {
  109. ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin;
  110. }
  111. }
  112. break;
  113. case 3:
  114. for (int i2 = 0; i2 < ne[2]; i2++) {
  115. for (int i1 = 0; i1 < ne[1]; i1++) {
  116. for (int i0 = 0; i0 < ne[0]; i0++) {
  117. ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  118. }
  119. }
  120. }
  121. break;
  122. case 4:
  123. for (int i3 = 0; i3 < ne[3]; i3++) {
  124. for (int i2 = 0; i2 < ne[2]; i2++) {
  125. for (int i1 = 0; i1 < ne[1]; i1++) {
  126. for (int i0 = 0; i0 < ne[0]; i0++) {
  127. ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  128. }
  129. }
  130. }
  131. }
  132. break;
  133. default:
  134. assert(false);
  135. };
  136. return result;
  137. }
  138. float get_element(const struct ggml_tensor * t, int idx) {
  139. if (t->type == GGML_TYPE_F32) {
  140. return ((float *)t->data)[idx];
  141. } else if (t->type == GGML_TYPE_I32) {
  142. return ((int32_t *)t->data)[idx];
  143. } else {
  144. assert(false);
  145. return INFINITY;
  146. }
  147. }
  148. void set_element(struct ggml_tensor * t, int idx, float value) {
  149. ((float *)t->data)[idx] = value;
  150. }
  151. void print_elements(const char* label, const struct ggml_tensor * t) {
  152. if (!t) {
  153. printf("%s: %s = null\n", __func__, label);
  154. return;
  155. }
  156. const int nelements = ggml_nelements(t);
  157. printf("%s: %s = [", __func__, label);
  158. for (int k = 0; k < nelements; ++k) {
  159. if (k > 0) { printf(", "); }
  160. printf("%.5f", get_element(t, k));
  161. }
  162. printf("] shape: [");
  163. for (int k = 0; k < t->n_dims; ++k) {
  164. if (k > 0) { printf(", "); }
  165. printf("%d", (int)t->ne[k]);
  166. }
  167. printf("]\n");
  168. }
  169. bool check_gradient(
  170. const char * op_name,
  171. struct ggml_context * ctx0,
  172. struct ggml_tensor * x[],
  173. struct ggml_tensor * f,
  174. int ndims,
  175. int nargs,
  176. float eps,
  177. float max_error_abs,
  178. float max_error_rel) {
  179. static int n_threads = -1;
  180. if (n_threads < 0) {
  181. n_threads = GGML_DEFAULT_N_THREADS;
  182. const char *env = getenv("GGML_N_THREADS");
  183. if (env) {
  184. n_threads = atoi(env);
  185. }
  186. printf("GGML_N_THREADS = %d\n", n_threads);
  187. }
  188. struct ggml_cgraph gf = ggml_build_forward (f);
  189. gf.n_threads = n_threads;
  190. struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
  191. gb.n_threads = n_threads;
  192. ggml_graph_compute(ctx0, &gf);
  193. ggml_graph_reset (&gf);
  194. ggml_set_f32 (f->grad, 1.0f);
  195. ggml_graph_compute(ctx0, &gb);
  196. // ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot");
  197. // ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot");
  198. for (int i = 0; i < nargs; ++i) {
  199. const int nelements = ggml_nelements(x[i]);
  200. for (int k = 0; k < nelements; ++k) {
  201. // compute gradient using finite differences
  202. const float x0 = get_element(x[i], k);
  203. const float xm = x0 - eps;
  204. const float xp = x0 + eps;
  205. set_element(x[i], k, xp);
  206. ggml_graph_compute(ctx0, &gf);
  207. const float f0 = ggml_get_f32_1d(f, 0);
  208. set_element(x[i], k, xm);
  209. ggml_graph_compute(ctx0, &gf);
  210. const float f1 = ggml_get_f32_1d(f, 0);
  211. const float g0 = (f0 - f1)/(2.0f*eps);
  212. set_element(x[i], k, x0);
  213. // compute gradient using backward graph
  214. ggml_graph_reset (&gf);
  215. ggml_set_f32 (f->grad, 1.0f);
  216. ggml_graph_compute(ctx0, &gb);
  217. const float g1 = get_element(x[i]->grad, k);
  218. const float error_abs = fabsf(g0 - g1);
  219. const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabs(g0) : 0;
  220. if (error_abs > max_error_abs || error_rel > max_error_rel) {
  221. printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n",
  222. op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel);
  223. //assert(false);
  224. return false;
  225. }
  226. }
  227. }
  228. return true;
  229. }
  230. // TODO: clean-up this ..
  231. bool check_mat_mul(
  232. const struct ggml_tensor * y,
  233. const struct ggml_tensor * x0,
  234. const struct ggml_tensor * x1) {
  235. float * dst = (float *) y->data;
  236. float * src0 = (float *) x0->data;
  237. float * src1 = (float *) x1->data;
  238. const int nc = x0->ne[1];
  239. const int nr = x1->ne[1];
  240. const int nk = x0->ne[0];
  241. GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk);
  242. GGML_PRINT_DEBUG("x0:\n");
  243. for (int j = 0; j < x0->ne[1]; ++j) {
  244. for (int i = 0; i < x0->ne[0]; ++i) {
  245. GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]);
  246. }
  247. GGML_PRINT_DEBUG("\n");
  248. }
  249. GGML_PRINT_DEBUG("\n");
  250. GGML_PRINT_DEBUG("x1:\n");
  251. for (int j = 0; j < x1->ne[1]; ++j) {
  252. for (int i = 0; i < x1->ne[0]; ++i) {
  253. GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]);
  254. }
  255. GGML_PRINT_DEBUG("\n");
  256. }
  257. GGML_PRINT_DEBUG("\n");
  258. GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]);
  259. for (int j = 0; j < y->ne[1]; ++j) {
  260. for (int i = 0; i < y->ne[0]; ++i) {
  261. GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]);
  262. }
  263. GGML_PRINT_DEBUG("\n");
  264. }
  265. for (int i = 0; i < nr; ++i) {
  266. for (int j = 0; j < nc; ++j) {
  267. float sum = 0.0f;
  268. for (int k = 0; k < nk; ++k) {
  269. sum += src0[j*nk + k]*src1[i*nk + k];
  270. }
  271. if (fabsf(dst[i*nc + j] - sum) > 1e-5f) {
  272. fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum);
  273. assert(false);
  274. return false;
  275. }
  276. }
  277. }
  278. return true;
  279. }
  280. #define NUM_PERMUTATIONS (4*3*2*1)
  281. int main(int argc, const char ** argv) {
  282. struct ggml_init_params params = {
  283. .mem_size = 128*1024*1024,
  284. .mem_buffer = NULL,
  285. .no_alloc = false,
  286. };
  287. int64_t ne[4];
  288. int all_permutations[4 * NUM_PERMUTATIONS];
  289. {
  290. int count = 0;
  291. for (int ax0=0; ax0<4; ++ax0) {
  292. for (int ax1=0; ax1<4; ++ax1) {
  293. if (ax1 == ax0) continue;
  294. for (int ax2=0; ax2<4; ++ax2) {
  295. if (ax2 == ax0) continue;
  296. if (ax2 == ax1) continue;
  297. for (int ax3=0; ax3<4; ++ax3) {
  298. if (ax3 == ax0) continue;
  299. if (ax3 == ax1) continue;
  300. if (ax3 == ax2) continue;
  301. assert(count < NUM_PERMUTATIONS);
  302. all_permutations[count*4+0] = ax0;
  303. all_permutations[count*4+1] = ax1;
  304. all_permutations[count*4+2] = ax2;
  305. all_permutations[count*4+3] = ax3;
  306. ++count;
  307. }
  308. }
  309. }
  310. }
  311. }
  312. // original loop: 1000
  313. int niter = 4;
  314. const char *env = getenv("GGML_NLOOP");
  315. if (env != NULL) {
  316. niter = atoi(env);
  317. }
  318. if (argc > 1) {
  319. niter = atoi(argv[1]);
  320. }
  321. for (int iter = 0; iter < niter; ++iter) {
  322. printf("test-grad0: iter:%d/%d\n", iter, niter);
  323. struct ggml_context * ctx0 = ggml_init(params);
  324. get_random_dims(ne, 4);
  325. struct ggml_tensor * x[MAX_NARGS];
  326. // add
  327. {
  328. const int nargs = 2;
  329. for (int ndims = 1; ndims <= 4; ++ndims) {
  330. for (int i = 0; i < nargs; ++i) {
  331. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  332. ggml_set_param(ctx0, x[i]);
  333. }
  334. struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
  335. check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f);
  336. }
  337. }
  338. // sub
  339. {
  340. const int nargs = 2;
  341. for (int ndims = 1; ndims <= 4; ++ndims) {
  342. for (int i = 0; i < nargs; ++i) {
  343. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  344. ggml_set_param(ctx0, x[i]);
  345. }
  346. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1]));
  347. check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
  348. }
  349. }
  350. // mul
  351. {
  352. const int nargs = 2;
  353. for (int ndims = 1; ndims <= 4; ++ndims) {
  354. for (int i = 0; i < nargs; ++i) {
  355. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  356. ggml_set_param(ctx0, x[i]);
  357. }
  358. struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1]));
  359. check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  360. }
  361. }
  362. // div
  363. {
  364. const int nargs = 2;
  365. for (int ndims = 1; ndims <= 4; ++ndims) {
  366. for (int i = 0; i < nargs; ++i) {
  367. x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f);
  368. ggml_set_param(ctx0, x[i]);
  369. }
  370. struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1]));
  371. check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f);
  372. }
  373. }
  374. // sqr
  375. {
  376. const int nargs = 1;
  377. for (int ndims = 1; ndims <= 2; ++ndims) {
  378. for (int i = 0; i < nargs; ++i) {
  379. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  380. ggml_set_param(ctx0, x[i]);
  381. }
  382. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0]));
  383. check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  384. }
  385. }
  386. // sqrt
  387. {
  388. const int nargs = 1;
  389. for (int ndims = 1; ndims <= 2; ++ndims) {
  390. for (int i = 0; i < nargs; ++i) {
  391. x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  392. ggml_set_param(ctx0, x[i]);
  393. }
  394. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0]));
  395. check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
  396. }
  397. }
  398. // log
  399. {
  400. const int nargs = 1;
  401. for (int ndims = 1; ndims <= 2; ++ndims) {
  402. for (int i = 0; i < nargs; ++i) {
  403. x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  404. ggml_set_param(ctx0, x[i]);
  405. }
  406. struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0]));
  407. check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
  408. }
  409. }
  410. // sum
  411. {
  412. const int nargs = 1;
  413. for (int ndims = 1; ndims <= 2; ++ndims) {
  414. for (int i = 0; i < nargs; ++i) {
  415. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  416. ggml_set_param(ctx0, x[i]);
  417. }
  418. struct ggml_tensor * f = ggml_sum(ctx0, x[0]);
  419. check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
  420. }
  421. }
  422. // sum_rows
  423. {
  424. const int nargs = 1;
  425. for (int ndims = 1; ndims <= 4; ++ndims) {
  426. for (int i = 0; i < nargs; ++i) {
  427. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  428. ggml_set_param(ctx0, x[i]);
  429. }
  430. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0])));
  431. check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
  432. }
  433. }
  434. // repeat
  435. {
  436. int64_t ne2[4];
  437. get_random_dims(ne2, 4);
  438. ne2[0] = ne[0] * ne2[0];
  439. ne2[1] = ne[1] * ne2[1];
  440. ne2[2] = 1;
  441. ne2[3] = 1;
  442. const int nargs = 1;
  443. for (int ndims = 1; ndims <= 2; ++ndims) {
  444. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  445. x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  446. ggml_set_param(ctx0, x[0]);
  447. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
  448. check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
  449. }
  450. }
  451. // abs (finite differences do not work)
  452. //{
  453. // const int nargs = 1;
  454. // for (int ndims = 1; ndims <= 2; ++ndims) {
  455. // for (int i = 0; i < nargs; ++i) {
  456. // x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  457. // ggml_set_param(ctx0, x[i]);
  458. // }
  459. // struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
  460. // check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f);
  461. // }
  462. //}
  463. // mul_mat
  464. {
  465. const int nargs = 2;
  466. for (int ndims = 2; ndims <= 2; ++ndims) {
  467. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  468. {
  469. int64_t ne2[4];
  470. get_random_dims(ne2, 4);
  471. ne2[0] = ne[0];
  472. x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  473. }
  474. ggml_set_param(ctx0, x[0]);
  475. ggml_set_param(ctx0, x[1]);
  476. struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
  477. struct ggml_tensor * f = ggml_sum(ctx0, m);
  478. GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
  479. check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  480. check_mat_mul(m, x[1], x[0]);
  481. }
  482. }
  483. // silu
  484. {
  485. const int nargs = 1;
  486. for (int ndims = 1; ndims <= 2; ++ndims) {
  487. for (int i = 0; i < nargs; ++i) {
  488. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  489. ggml_set_param(ctx0, x[i]);
  490. }
  491. struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0]));
  492. #ifdef GGML_SILU_FP16
  493. // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds.
  494. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY);
  495. #else
  496. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  497. #endif
  498. }
  499. }
  500. // rms_norm
  501. {
  502. const int nargs = 1;
  503. for (int ndims = 1; ndims <= 2; ++ndims) {
  504. for (int i = 0; i < nargs; ++i) {
  505. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  506. ggml_set_param(ctx0, x[i]);
  507. }
  508. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0]));
  509. check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY);
  510. }
  511. }
  512. // scale
  513. {
  514. const int nargs = 2;
  515. int64_t ne2[4];
  516. ne2[0] = 1;
  517. for (int ndims = 1; ndims <= 2; ++ndims) {
  518. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  519. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  520. ggml_set_param(ctx0, x[0]);
  521. ggml_set_param(ctx0, x[1]);
  522. struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], x[1]));
  523. check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  524. }
  525. }
  526. // cpy
  527. {
  528. const int nargs = 2;
  529. for (int ndims = 1; ndims <= 2; ++ndims) {
  530. for (int i = 0; i < nargs; ++i) {
  531. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  532. ggml_set_param(ctx0, x[i]);
  533. }
  534. // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
  535. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
  536. check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  537. }
  538. }
  539. // reshape (1d->nd)
  540. {
  541. const int nargs = 1;
  542. for (int ndims = 1; ndims <= 2; ++ndims) {
  543. int64_t ne2[4];
  544. ne2[0] = 1;
  545. ne2[1] = 1;
  546. ne2[2] = 1;
  547. ne2[3] = 1;
  548. for (int i = 0; i < ndims; ++i) {
  549. ne2[0] *= ne[i];
  550. }
  551. x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  552. x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  553. ggml_set_param(ctx0, x[0]);
  554. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  555. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  556. }
  557. }
  558. // reshape (nd->1d)
  559. {
  560. const int nargs = 1;
  561. for (int ndims = 1; ndims <= 2; ++ndims) {
  562. int64_t ne2[4];
  563. ne2[0] = 1;
  564. ne2[1] = 1;
  565. ne2[2] = 1;
  566. ne2[3] = 1;
  567. for (int i = 0; i < ndims; ++i) {
  568. ne2[0] *= ne[i];
  569. }
  570. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  571. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  572. ggml_set_param(ctx0, x[0]);
  573. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  574. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  575. }
  576. }
  577. // acc 1d
  578. {
  579. int64_t ne2[4] = { 1, 1, 1, 1 };
  580. const int nargs = 2;
  581. for (int ndims = 1; ndims <= 4; ++ndims) {
  582. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  583. ggml_set_param(ctx0, x[0]);
  584. get_random_dims(ne2, 1);
  585. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  586. get_random_dims(ne2, 1);
  587. }
  588. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  589. ggml_set_param(ctx0, x[1]);
  590. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  591. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  592. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  593. check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  594. }
  595. }
  596. // acc 2d
  597. {
  598. int64_t ne2[4] = { 1, 1, 1, 1 };
  599. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  600. int64_t offsets[4] = { 0, 0, 0, 0 };
  601. const int nargs = 2;
  602. for (int ndims = 2; ndims <= 4; ++ndims) {
  603. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  604. ggml_set_param(ctx0, x[0]);
  605. get_random_dims(ne2, 2);
  606. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  607. get_random_dims(ne2, 2);
  608. }
  609. x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f);
  610. ggml_set_param(ctx0, x[1]);
  611. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  612. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  613. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  614. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  615. const int offset = offsets[0] + offsets[1];
  616. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  617. check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  618. }
  619. }
  620. // acc 3d
  621. {
  622. int64_t ne2[4] = { 1, 1, 1, 1 };
  623. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  624. int64_t offsets[4] = { 0, 0, 0, 0 };
  625. const int nargs = 2;
  626. for (int ndims = 3; ndims <= 4; ++ndims) {
  627. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  628. ggml_set_param(ctx0, x[0]);
  629. get_random_dims(ne2, 3);
  630. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) {
  631. get_random_dims(ne2, 3);
  632. }
  633. x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f);
  634. ggml_set_param(ctx0, x[1]);
  635. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  636. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  637. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  638. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  639. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  640. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  641. const int offset = offsets[0] + offsets[1] + offsets[2];
  642. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  643. check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  644. }
  645. }
  646. // acc 4d
  647. {
  648. int64_t ne2[4] = { 1, 1, 1, 1 };
  649. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  650. int64_t offsets[4] = { 0, 0, 0, 0 };
  651. const int nargs = 2;
  652. for (int ndims = 4; ndims <= 4; ++ndims) {
  653. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  654. ggml_set_param(ctx0, x[0]);
  655. get_random_dims(ne2, 4);
  656. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) {
  657. get_random_dims(ne2, 4);
  658. }
  659. x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  660. ggml_set_param(ctx0, x[1]);
  661. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  662. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  663. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  664. max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]);
  665. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  666. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  667. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  668. offsets[3] = irand(max_offsets[3]) * x[0]->nb[3];
  669. const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3];
  670. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  671. check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  672. }
  673. }
  674. // set_1d
  675. {
  676. int64_t ne2[4];
  677. const int nargs = 2;
  678. for (int ndims = 1; ndims <= 4; ++ndims) {
  679. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  680. ggml_set_param(ctx0, x[0]);
  681. get_random_dims(ne2, 1);
  682. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  683. get_random_dims(ne2, 1);
  684. }
  685. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  686. ggml_set_param(ctx0, x[1]);
  687. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  688. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  689. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset));
  690. check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  691. }
  692. }
  693. // set_2d
  694. {
  695. int64_t ne2[4];
  696. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  697. int64_t offsets[4] = { 0, 0, 0, 0 };
  698. const int nargs = 1;
  699. for (int ndims = 2; ndims <= 4; ++ndims) {
  700. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  701. ggml_set_param(ctx0, x[0]);
  702. get_random_dims(ne2, 2);
  703. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  704. get_random_dims(ne2, 2);
  705. }
  706. x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f);
  707. ggml_set_param(ctx0, x[1]);
  708. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  709. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  710. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  711. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  712. const int offset = offsets[0] + offsets[1];
  713. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset));
  714. check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  715. }
  716. }
  717. // view_1d
  718. {
  719. const int nargs = 1;
  720. for (int ndims = 1; ndims <= 4; ++ndims) {
  721. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  722. ggml_set_param(ctx0, x[0]);
  723. const int k0 = irand(ggml_nelements(x[0]));
  724. const int k1 = irand(ggml_nelements(x[0]));
  725. const int i0 = MIN(k0, k1);
  726. const int i1 = MAX(k0, k1);
  727. const int offset = i0 * sizeof(float);
  728. const int nelem = i1 - i0;
  729. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset));
  730. check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  731. }
  732. }
  733. // view_2d
  734. {
  735. int64_t ne2[4];
  736. int64_t nb2[4];
  737. const int nargs = 1;
  738. for (int ndims = 1; ndims <= 4; ++ndims) {
  739. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  740. get_random_dims(ne2, 2);
  741. while (ne2[0]*ne2[1] > ggml_nelements(x[0])) {
  742. get_random_dims(ne2, 2);
  743. }
  744. const int count = ne2[0]*ne2[1];
  745. nb2[0] = sizeof(float);
  746. nb2[1] = nb2[0]*ne2[0];
  747. ggml_set_param(ctx0, x[0]);
  748. const int max_offset = ggml_nelements(x[0]) - count;
  749. const int offset = irand(max_offset+1) * sizeof(float);
  750. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset));
  751. check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  752. }
  753. }
  754. // view_3d
  755. {
  756. int64_t ne2[4] = {1,1,1,1};
  757. int64_t nb2[4] = {0,0,0,0};
  758. const int nargs = 1;
  759. for (int ndims = 1; ndims <= 4; ++ndims) {
  760. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  761. get_random_dims(ne2, 3);
  762. while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) {
  763. get_random_dims(ne2, 3);
  764. }
  765. const int count = ne2[0]*ne2[1]*ne2[2];
  766. nb2[0] = sizeof(float);
  767. nb2[1] = nb2[0]*ne2[0];
  768. nb2[2] = nb2[1]*ne2[1];
  769. ggml_set_param(ctx0, x[0]);
  770. const int max_offset = ggml_nelements(x[0]) - count;
  771. const int offset = irand(max_offset+1) * sizeof(float);
  772. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset));
  773. check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  774. }
  775. }
  776. // permute
  777. {
  778. int64_t ne2[4];
  779. const int nargs = 1;
  780. for (int ndims = 1; ndims <= 4; ++ndims)
  781. {
  782. // ggml_permute will set axes of dimensions below n_dims to 1.
  783. // to make ggml_permute work correctly on all axes,
  784. // the input tensor needs maximal n_dim of 4.
  785. for (int i=0; i<ndims; ++i) {
  786. ne2[i] = ne[i];
  787. }
  788. for (int i=ndims; i<4; ++i) {
  789. ne2[i] = 1;
  790. }
  791. x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  792. ggml_set_param(ctx0, x[0]);
  793. const int p = irand(NUM_PERMUTATIONS);
  794. const int ax0 = all_permutations[p*4+0];
  795. const int ax1 = all_permutations[p*4+1];
  796. const int ax2 = all_permutations[p*4+2];
  797. const int ax3 = all_permutations[p*4+3];
  798. // sum requires contiguous tensor rows
  799. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3)));
  800. check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  801. }
  802. }
  803. // transpose
  804. {
  805. int64_t ne2[4];
  806. const int nargs = 1;
  807. for (int ndims = 1; ndims <= 4; ++ndims)
  808. {
  809. // ggml_transpose will set axes of dimensions below n_dims to 1.
  810. // to make ggml_transpose work correctly on all axes,
  811. // the input tensor needs maximal n_dim of 4.
  812. for (int i=0; i<ndims; ++i) {
  813. ne2[i] = ne[i];
  814. }
  815. for (int i=ndims; i<4; ++i) {
  816. ne2[i] = 1;
  817. }
  818. x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  819. ggml_set_param(ctx0, x[0]);
  820. // sum requires contiguous tensor rows
  821. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0])));
  822. check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  823. }
  824. }
  825. // get_rows
  826. {
  827. int64_t ne2[4] = {ne[0], ne[1], 1, 1};
  828. int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
  829. const int nargs = 1;
  830. const int ndims = 2;
  831. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  832. x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]);
  833. ggml_set_param(ctx0, x[0]);
  834. struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1]));
  835. check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  836. }
  837. // diag_mask_inf
  838. {
  839. const int nargs = 1;
  840. const int ndims = 2;
  841. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  842. ggml_set_param(ctx0, x[0]);
  843. int n_past = irand(ne[0]);
  844. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past));
  845. check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  846. }
  847. // diag_mask_zero
  848. {
  849. const int nargs = 1;
  850. const int ndims = 2;
  851. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  852. ggml_set_param(ctx0, x[0]);
  853. int n_past = irand(ne[0]);
  854. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past));
  855. check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  856. }
  857. // softmax
  858. {
  859. const int nargs = 1;
  860. int64_t ne2[4];
  861. get_random_dims(ne2, 4);
  862. for (int ndims = 1; ndims <= 3; ++ndims) {
  863. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  864. ggml_set_param(ctx0, x[0]);
  865. struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0]));
  866. check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  867. }
  868. }
  869. // cross_entropy_loss
  870. {
  871. const int nargs = 1;
  872. int64_t ne2[4];
  873. get_random_dims(ne2, 4);
  874. for (int ndims = 1; ndims <= 3; ++ndims) {
  875. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  876. x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f);
  877. ggml_set_param(ctx0, x[0]);
  878. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1]));
  879. check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-1f, 1e-2f, INFINITY);
  880. // finite differences regularly fails!
  881. }
  882. }
  883. // rope
  884. {
  885. const int nargs = 1;
  886. int64_t ne2[4];
  887. get_random_dims(ne2, 4);
  888. ne2[0] += ne2[0] % 2;
  889. int n_rot = ne2[0];
  890. for (int ndims = 3; ndims <= 4; ++ndims) {
  891. for (int mode = 0; mode < 4; ++mode) {
  892. for (int n_past = 1; n_past < ne2[2]; ++n_past) {
  893. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  894. ggml_set_param(ctx0, x[0]);
  895. const bool skip_past = (mode & 1);
  896. if (skip_past) {
  897. // we have no past, so this would have to work on uninitialized memory.
  898. // we only test the gradients here;
  899. // skip_past should have no influence on gradient computation.
  900. // so when other modes work, we assume that this does as well.
  901. continue;
  902. }
  903. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0));
  904. GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
  905. check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY);
  906. }
  907. }
  908. }
  909. }
  910. // flash_attn
  911. {
  912. const int nargs = 3;
  913. int64_t ne2[4];
  914. get_random_dims(ne2, 4);
  915. int64_t D = ne2[0];
  916. int64_t N = ne2[1];
  917. int64_t M = ne2[2] + N;
  918. int64_t B = ne2[3];
  919. for (int masked = 0; masked <= 1; ++masked) {
  920. for (int ndims = 2; ndims <= 4; ++ndims) {
  921. int64_t neq[4] = { D, N, B, ne[3] };
  922. int64_t nek[4] = { D, M, B, ne[3] };
  923. int64_t nev[4] = { M, D, B, ne[3] };
  924. if (ndims == 2) {
  925. neq[2] = 1; neq[3] = 1;
  926. nek[2] = 1; nek[3] = 1;
  927. nev[2] = 1; nev[3] = 1;
  928. } else if (ndims == 3) {
  929. neq[3] = 1;
  930. nek[3] = 1;
  931. nev[3] = 1;
  932. }
  933. x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f);
  934. x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f);
  935. x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f);
  936. ggml_set_param(ctx0, x[0]);
  937. ggml_set_param(ctx0, x[1]);
  938. ggml_set_param(ctx0, x[2]);
  939. struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
  940. check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f);
  941. }
  942. }
  943. }
  944. ggml_free(ctx0);
  945. }
  946. return 0;
  947. }