test-grad0.c 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214
  1. #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
  2. #include "ggml.h"
  3. #include <math.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <assert.h>
  7. #if defined(_MSC_VER)
  8. #pragma warning(disable: 4244 4267) // possible loss of data
  9. #endif
  10. #pragma GCC diagnostic ignored "-Wdouble-promotion"
  11. #define MAX_NARGS 3
  12. #undef MIN
  13. #undef MAX
  14. #define MIN(a, b) ((a) < (b) ? (a) : (b))
  15. #define MAX(a, b) ((a) > (b) ? (a) : (b))
  16. #define GGML_SILU_FP16
  17. //
  18. // logging
  19. //
  20. #if (GGML_DEBUG >= 1)
  21. #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
  22. #else
  23. #define GGML_PRINT_DEBUG(...)
  24. #endif
  25. #if (GGML_DEBUG >= 5)
  26. #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
  27. #else
  28. #define GGML_PRINT_DEBUG_5(...)
  29. #endif
  30. #if (GGML_DEBUG >= 10)
  31. #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
  32. #else
  33. #define GGML_PRINT_DEBUG_10(...)
  34. #endif
  35. #define GGML_PRINT(...) printf(__VA_ARGS__)
  36. float frand(void) {
  37. return (float)rand()/(float)RAND_MAX;
  38. }
  39. int irand(int n) {
  40. if (n == 0) return 0;
  41. return rand()%n;
  42. }
  43. void get_random_dims(int64_t * dims, int ndims) {
  44. dims[0] = dims[1] = dims[2] = dims[3] = 1;
  45. for (int i = 0; i < ndims; i++) {
  46. dims[i] = 1 + irand(4);
  47. }
  48. }
  49. struct ggml_tensor * get_random_tensor(
  50. struct ggml_context * ctx0,
  51. int ndims,
  52. int64_t ne[],
  53. float fmin,
  54. float fmax) {
  55. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
  56. switch (ndims) {
  57. case 1:
  58. for (int i0 = 0; i0 < ne[0]; i0++) {
  59. ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin;
  60. }
  61. break;
  62. case 2:
  63. for (int i1 = 0; i1 < ne[1]; i1++) {
  64. for (int i0 = 0; i0 < ne[0]; i0++) {
  65. ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  66. }
  67. }
  68. break;
  69. case 3:
  70. for (int i2 = 0; i2 < ne[2]; i2++) {
  71. for (int i1 = 0; i1 < ne[1]; i1++) {
  72. for (int i0 = 0; i0 < ne[0]; i0++) {
  73. ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  74. }
  75. }
  76. }
  77. break;
  78. case 4:
  79. for (int i3 = 0; i3 < ne[3]; i3++) {
  80. for (int i2 = 0; i2 < ne[2]; i2++) {
  81. for (int i1 = 0; i1 < ne[1]; i1++) {
  82. for (int i0 = 0; i0 < ne[0]; i0++) {
  83. ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  84. }
  85. }
  86. }
  87. }
  88. break;
  89. default:
  90. assert(false);
  91. };
  92. return result;
  93. }
  94. struct ggml_tensor * get_random_tensor_int(
  95. struct ggml_context * ctx0,
  96. int ndims,
  97. int64_t ne[],
  98. int32_t imin,
  99. int32_t imax) {
  100. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne);
  101. switch (ndims) {
  102. case 1:
  103. for (int i0 = 0; i0 < ne[0]; i0++) {
  104. ((int32_t *)result->data)[i0] = irand(imax - imin) + imin;
  105. }
  106. break;
  107. case 2:
  108. for (int i1 = 0; i1 < ne[1]; i1++) {
  109. for (int i0 = 0; i0 < ne[0]; i0++) {
  110. ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin;
  111. }
  112. }
  113. break;
  114. case 3:
  115. for (int i2 = 0; i2 < ne[2]; i2++) {
  116. for (int i1 = 0; i1 < ne[1]; i1++) {
  117. for (int i0 = 0; i0 < ne[0]; i0++) {
  118. ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  119. }
  120. }
  121. }
  122. break;
  123. case 4:
  124. for (int i3 = 0; i3 < ne[3]; i3++) {
  125. for (int i2 = 0; i2 < ne[2]; i2++) {
  126. for (int i1 = 0; i1 < ne[1]; i1++) {
  127. for (int i0 = 0; i0 < ne[0]; i0++) {
  128. ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  129. }
  130. }
  131. }
  132. }
  133. break;
  134. default:
  135. assert(false);
  136. };
  137. return result;
  138. }
  139. float get_element(const struct ggml_tensor * t, int idx) {
  140. if (t->type == GGML_TYPE_F32) {
  141. return ((float *)t->data)[idx];
  142. }
  143. if (t->type == GGML_TYPE_I32) {
  144. return ((int32_t *)t->data)[idx];
  145. }
  146. assert(false);
  147. return INFINITY;
  148. }
  149. void set_element(struct ggml_tensor * t, int idx, float value) {
  150. ((float *)t->data)[idx] = value;
  151. }
  152. void print_elements(const char* label, const struct ggml_tensor * t) {
  153. if (!t) {
  154. printf("%s: %s = null\n", __func__, label);
  155. return;
  156. }
  157. const int nelements = ggml_nelements(t);
  158. printf("%s: %s = [", __func__, label);
  159. for (int k = 0; k < nelements; ++k) {
  160. if (k > 0) { printf(", "); }
  161. printf("%.5f", get_element(t, k));
  162. }
  163. printf("] shape: [");
  164. for (int k = 0; k < t->n_dims; ++k) {
  165. if (k > 0) { printf(", "); }
  166. printf("%d", (int)t->ne[k]);
  167. }
  168. printf("]\n");
  169. }
  170. bool check_gradient(
  171. const char * op_name,
  172. struct ggml_context * ctx0,
  173. struct ggml_tensor * x[],
  174. struct ggml_tensor * f,
  175. int ndims,
  176. int nargs,
  177. float eps,
  178. float max_error_abs,
  179. float max_error_rel) {
  180. static int n_threads = -1;
  181. if (n_threads < 0) {
  182. n_threads = GGML_DEFAULT_N_THREADS;
  183. const char *env = getenv("GGML_N_THREADS");
  184. if (env) {
  185. n_threads = atoi(env);
  186. }
  187. printf("GGML_N_THREADS = %d\n", n_threads);
  188. }
  189. struct ggml_cgraph gf = ggml_build_forward (f);
  190. struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
  191. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  192. ggml_graph_reset (&gf);
  193. ggml_set_f32 (f->grad, 1.0f);
  194. ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
  195. // ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot");
  196. // ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot");
  197. for (int i = 0; i < nargs; ++i) {
  198. const int nelements = ggml_nelements(x[i]);
  199. for (int k = 0; k < nelements; ++k) {
  200. // compute gradient using finite differences
  201. const float x0 = get_element(x[i], k);
  202. const float xm = x0 - eps;
  203. const float xp = x0 + eps;
  204. set_element(x[i], k, xp);
  205. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  206. const float f0 = ggml_get_f32_1d(f, 0);
  207. set_element(x[i], k, xm);
  208. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  209. const float f1 = ggml_get_f32_1d(f, 0);
  210. const float g0 = (f0 - f1)/(2.0f*eps);
  211. set_element(x[i], k, x0);
  212. // compute gradient using backward graph
  213. ggml_graph_reset (&gf);
  214. ggml_set_f32 (f->grad, 1.0f);
  215. ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
  216. const float g1 = get_element(x[i]->grad, k);
  217. const float error_abs = fabsf(g0 - g1);
  218. const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0;
  219. if (error_abs > max_error_abs || error_rel > max_error_rel) {
  220. printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n",
  221. op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel);
  222. //assert(false);
  223. return false;
  224. }
  225. }
  226. }
  227. return true;
  228. }
  229. // TODO: clean-up this ..
  230. bool check_mat_mul(
  231. const struct ggml_tensor * y,
  232. const struct ggml_tensor * x0,
  233. const struct ggml_tensor * x1) {
  234. float * dst = (float *) y->data;
  235. float * src0 = (float *) x0->data;
  236. float * src1 = (float *) x1->data;
  237. const int nc = x0->ne[1];
  238. const int nr = x1->ne[1];
  239. const int nk = x0->ne[0];
  240. GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk);
  241. GGML_PRINT_DEBUG("x0:\n");
  242. for (int j = 0; j < x0->ne[1]; ++j) {
  243. for (int i = 0; i < x0->ne[0]; ++i) {
  244. GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]);
  245. }
  246. GGML_PRINT_DEBUG("\n");
  247. }
  248. GGML_PRINT_DEBUG("\n");
  249. GGML_PRINT_DEBUG("x1:\n");
  250. for (int j = 0; j < x1->ne[1]; ++j) {
  251. for (int i = 0; i < x1->ne[0]; ++i) {
  252. GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]);
  253. }
  254. GGML_PRINT_DEBUG("\n");
  255. }
  256. GGML_PRINT_DEBUG("\n");
  257. GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]);
  258. for (int j = 0; j < y->ne[1]; ++j) {
  259. for (int i = 0; i < y->ne[0]; ++i) {
  260. GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]);
  261. }
  262. GGML_PRINT_DEBUG("\n");
  263. }
  264. for (int i = 0; i < nr; ++i) {
  265. for (int j = 0; j < nc; ++j) {
  266. float sum = 0.0f;
  267. for (int k = 0; k < nk; ++k) {
  268. sum += src0[j*nk + k]*src1[i*nk + k];
  269. }
  270. if (fabsf(dst[i*nc + j] - sum) > 1e-5f) {
  271. fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum);
  272. assert(false);
  273. return false;
  274. }
  275. }
  276. }
  277. return true;
  278. }
  279. #define NUM_PERMUTATIONS (4*3*2*1)
  280. int main(int argc, const char ** argv) {
  281. struct ggml_init_params params = {
  282. .mem_size = 128*1024*1024,
  283. .mem_buffer = NULL,
  284. .no_alloc = false,
  285. };
  286. int64_t ne[4];
  287. int all_permutations[4 * NUM_PERMUTATIONS];
  288. {
  289. int count = 0;
  290. for (int ax0=0; ax0<4; ++ax0) {
  291. for (int ax1=0; ax1<4; ++ax1) {
  292. if (ax1 == ax0) continue;
  293. for (int ax2=0; ax2<4; ++ax2) {
  294. if (ax2 == ax0) continue;
  295. if (ax2 == ax1) continue;
  296. for (int ax3=0; ax3<4; ++ax3) {
  297. if (ax3 == ax0) continue;
  298. if (ax3 == ax1) continue;
  299. if (ax3 == ax2) continue;
  300. assert(count < NUM_PERMUTATIONS);
  301. all_permutations[count*4+0] = ax0;
  302. all_permutations[count*4+1] = ax1;
  303. all_permutations[count*4+2] = ax2;
  304. all_permutations[count*4+3] = ax3;
  305. ++count;
  306. }
  307. }
  308. }
  309. }
  310. }
  311. // original loop: 1000
  312. int niter = 4;
  313. const char *env = getenv("GGML_NLOOP");
  314. if (env != NULL) {
  315. niter = atoi(env);
  316. }
  317. if (argc > 1) {
  318. niter = atoi(argv[1]);
  319. }
  320. for (int iter = 0; iter < niter; ++iter) {
  321. printf("test-grad0: iter:%d/%d\n", iter, niter);
  322. struct ggml_context * ctx0 = ggml_init(params);
  323. get_random_dims(ne, 4);
  324. struct ggml_tensor * x[MAX_NARGS];
  325. // add
  326. {
  327. const int nargs = 2;
  328. for (int ndims = 1; ndims <= 4; ++ndims) {
  329. for (int i = 0; i < nargs; ++i) {
  330. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  331. ggml_set_param(ctx0, x[i]);
  332. }
  333. struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
  334. check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f);
  335. }
  336. }
  337. // sub
  338. {
  339. const int nargs = 2;
  340. for (int ndims = 1; ndims <= 4; ++ndims) {
  341. for (int i = 0; i < nargs; ++i) {
  342. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  343. ggml_set_param(ctx0, x[i]);
  344. }
  345. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1]));
  346. check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
  347. }
  348. }
  349. // mul
  350. {
  351. const int nargs = 2;
  352. for (int ndims = 1; ndims <= 4; ++ndims) {
  353. for (int i = 0; i < nargs; ++i) {
  354. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  355. ggml_set_param(ctx0, x[i]);
  356. }
  357. struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1]));
  358. check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  359. }
  360. }
  361. // div
  362. {
  363. const int nargs = 2;
  364. for (int ndims = 1; ndims <= 4; ++ndims) {
  365. for (int i = 0; i < nargs; ++i) {
  366. x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f);
  367. ggml_set_param(ctx0, x[i]);
  368. }
  369. struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1]));
  370. check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f);
  371. }
  372. }
  373. // sqr
  374. {
  375. const int nargs = 1;
  376. for (int ndims = 1; ndims <= 2; ++ndims) {
  377. for (int i = 0; i < nargs; ++i) {
  378. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  379. ggml_set_param(ctx0, x[i]);
  380. }
  381. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0]));
  382. check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  383. }
  384. }
  385. // sqrt
  386. {
  387. const int nargs = 1;
  388. for (int ndims = 1; ndims <= 2; ++ndims) {
  389. for (int i = 0; i < nargs; ++i) {
  390. x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  391. ggml_set_param(ctx0, x[i]);
  392. }
  393. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0]));
  394. check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
  395. }
  396. }
  397. // log
  398. {
  399. const int nargs = 1;
  400. for (int ndims = 1; ndims <= 2; ++ndims) {
  401. for (int i = 0; i < nargs; ++i) {
  402. x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  403. ggml_set_param(ctx0, x[i]);
  404. }
  405. struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0]));
  406. check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
  407. }
  408. }
  409. // sum
  410. {
  411. const int nargs = 1;
  412. for (int ndims = 1; ndims <= 2; ++ndims) {
  413. for (int i = 0; i < nargs; ++i) {
  414. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  415. ggml_set_param(ctx0, x[i]);
  416. }
  417. struct ggml_tensor * f = ggml_sum(ctx0, x[0]);
  418. check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
  419. }
  420. }
  421. // sum_rows
  422. {
  423. const int nargs = 1;
  424. for (int ndims = 1; ndims <= 4; ++ndims) {
  425. for (int i = 0; i < nargs; ++i) {
  426. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  427. ggml_set_param(ctx0, x[i]);
  428. }
  429. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0])));
  430. check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
  431. }
  432. }
  433. // repeat
  434. {
  435. int64_t ne2[4];
  436. get_random_dims(ne2, 4);
  437. ne2[0] = ne[0] * ne2[0];
  438. ne2[1] = ne[1] * ne2[1];
  439. ne2[2] = 1;
  440. ne2[3] = 1;
  441. const int nargs = 1;
  442. for (int ndims = 1; ndims <= 2; ++ndims) {
  443. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  444. x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  445. ggml_set_param(ctx0, x[0]);
  446. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
  447. check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
  448. }
  449. }
  450. // abs (finite differences do not work)
  451. //{
  452. // const int nargs = 1;
  453. // for (int ndims = 1; ndims <= 2; ++ndims) {
  454. // for (int i = 0; i < nargs; ++i) {
  455. // x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  456. // ggml_set_param(ctx0, x[i]);
  457. // }
  458. // struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
  459. // check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f);
  460. // }
  461. //}
  462. // mul_mat
  463. {
  464. const int nargs = 2;
  465. for (int ndims = 2; ndims <= 2; ++ndims) {
  466. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  467. {
  468. int64_t ne2[4];
  469. get_random_dims(ne2, 4);
  470. ne2[0] = ne[0];
  471. x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  472. }
  473. ggml_set_param(ctx0, x[0]);
  474. ggml_set_param(ctx0, x[1]);
  475. struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
  476. struct ggml_tensor * f = ggml_sum(ctx0, m);
  477. GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
  478. check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  479. check_mat_mul(m, x[1], x[0]);
  480. }
  481. }
  482. // silu
  483. {
  484. const int nargs = 1;
  485. for (int ndims = 1; ndims <= 2; ++ndims) {
  486. for (int i = 0; i < nargs; ++i) {
  487. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  488. ggml_set_param(ctx0, x[i]);
  489. }
  490. struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0]));
  491. #ifdef GGML_SILU_FP16
  492. // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds.
  493. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY);
  494. #else
  495. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  496. #endif
  497. }
  498. }
  499. // rms_norm
  500. {
  501. const int nargs = 1;
  502. for (int ndims = 1; ndims <= 2; ++ndims) {
  503. for (int i = 0; i < nargs; ++i) {
  504. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  505. ggml_set_param(ctx0, x[i]);
  506. }
  507. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0]));
  508. check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY);
  509. }
  510. }
  511. // scale
  512. {
  513. const int nargs = 2;
  514. int64_t ne2[4];
  515. ne2[0] = 1;
  516. for (int ndims = 1; ndims <= 2; ++ndims) {
  517. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  518. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  519. ggml_set_param(ctx0, x[0]);
  520. ggml_set_param(ctx0, x[1]);
  521. struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], x[1]));
  522. check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  523. }
  524. }
  525. // cpy
  526. {
  527. const int nargs = 2;
  528. for (int ndims = 1; ndims <= 2; ++ndims) {
  529. for (int i = 0; i < nargs; ++i) {
  530. x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  531. ggml_set_param(ctx0, x[i]);
  532. }
  533. // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
  534. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
  535. check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  536. }
  537. }
  538. // reshape (1d->nd)
  539. {
  540. const int nargs = 1;
  541. for (int ndims = 1; ndims <= 2; ++ndims) {
  542. int64_t ne2[4];
  543. ne2[0] = 1;
  544. ne2[1] = 1;
  545. ne2[2] = 1;
  546. ne2[3] = 1;
  547. for (int i = 0; i < ndims; ++i) {
  548. ne2[0] *= ne[i];
  549. }
  550. x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  551. x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  552. ggml_set_param(ctx0, x[0]);
  553. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  554. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  555. }
  556. }
  557. // reshape (nd->1d)
  558. {
  559. const int nargs = 1;
  560. for (int ndims = 1; ndims <= 2; ++ndims) {
  561. int64_t ne2[4];
  562. ne2[0] = 1;
  563. ne2[1] = 1;
  564. ne2[2] = 1;
  565. ne2[3] = 1;
  566. for (int i = 0; i < ndims; ++i) {
  567. ne2[0] *= ne[i];
  568. }
  569. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  570. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  571. ggml_set_param(ctx0, x[0]);
  572. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  573. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  574. }
  575. }
  576. // acc 1d
  577. {
  578. int64_t ne2[4] = { 1, 1, 1, 1 };
  579. const int nargs = 2;
  580. for (int ndims = 1; ndims <= 4; ++ndims) {
  581. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  582. ggml_set_param(ctx0, x[0]);
  583. get_random_dims(ne2, 1);
  584. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  585. get_random_dims(ne2, 1);
  586. }
  587. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  588. ggml_set_param(ctx0, x[1]);
  589. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  590. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  591. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  592. check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  593. }
  594. }
  595. // acc 2d
  596. {
  597. int64_t ne2[4] = { 1, 1, 1, 1 };
  598. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  599. int64_t offsets[4] = { 0, 0, 0, 0 };
  600. const int nargs = 2;
  601. for (int ndims = 2; ndims <= 4; ++ndims) {
  602. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  603. ggml_set_param(ctx0, x[0]);
  604. get_random_dims(ne2, 2);
  605. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  606. get_random_dims(ne2, 2);
  607. }
  608. x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f);
  609. ggml_set_param(ctx0, x[1]);
  610. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  611. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  612. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  613. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  614. const int offset = offsets[0] + offsets[1];
  615. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  616. check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  617. }
  618. }
  619. // acc 3d
  620. {
  621. int64_t ne2[4] = { 1, 1, 1, 1 };
  622. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  623. int64_t offsets[4] = { 0, 0, 0, 0 };
  624. const int nargs = 2;
  625. for (int ndims = 3; ndims <= 4; ++ndims) {
  626. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  627. ggml_set_param(ctx0, x[0]);
  628. get_random_dims(ne2, 3);
  629. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) {
  630. get_random_dims(ne2, 3);
  631. }
  632. x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f);
  633. ggml_set_param(ctx0, x[1]);
  634. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  635. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  636. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  637. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  638. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  639. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  640. const int offset = offsets[0] + offsets[1] + offsets[2];
  641. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  642. check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  643. }
  644. }
  645. // acc 4d
  646. {
  647. int64_t ne2[4] = { 1, 1, 1, 1 };
  648. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  649. int64_t offsets[4] = { 0, 0, 0, 0 };
  650. const int nargs = 2;
  651. for (int ndims = 4; ndims <= 4; ++ndims) {
  652. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  653. ggml_set_param(ctx0, x[0]);
  654. get_random_dims(ne2, 4);
  655. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) {
  656. get_random_dims(ne2, 4);
  657. }
  658. x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  659. ggml_set_param(ctx0, x[1]);
  660. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  661. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  662. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  663. max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]);
  664. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  665. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  666. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  667. offsets[3] = irand(max_offsets[3]) * x[0]->nb[3];
  668. const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3];
  669. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  670. check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  671. }
  672. }
  673. // set_1d
  674. {
  675. int64_t ne2[4];
  676. const int nargs = 2;
  677. for (int ndims = 1; ndims <= 4; ++ndims) {
  678. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  679. ggml_set_param(ctx0, x[0]);
  680. get_random_dims(ne2, 1);
  681. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  682. get_random_dims(ne2, 1);
  683. }
  684. x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f);
  685. ggml_set_param(ctx0, x[1]);
  686. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  687. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  688. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset));
  689. check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  690. }
  691. }
  692. // set_2d
  693. {
  694. int64_t ne2[4];
  695. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  696. int64_t offsets[4] = { 0, 0, 0, 0 };
  697. const int nargs = 1;
  698. for (int ndims = 2; ndims <= 4; ++ndims) {
  699. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  700. ggml_set_param(ctx0, x[0]);
  701. get_random_dims(ne2, 2);
  702. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  703. get_random_dims(ne2, 2);
  704. }
  705. x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f);
  706. ggml_set_param(ctx0, x[1]);
  707. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  708. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  709. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  710. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  711. const int offset = offsets[0] + offsets[1];
  712. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset));
  713. check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  714. }
  715. }
  716. // view_1d
  717. {
  718. const int nargs = 1;
  719. for (int ndims = 1; ndims <= 4; ++ndims) {
  720. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  721. ggml_set_param(ctx0, x[0]);
  722. const int k0 = irand(ggml_nelements(x[0]));
  723. const int k1 = irand(ggml_nelements(x[0]));
  724. const int i0 = MIN(k0, k1);
  725. const int i1 = MAX(k0, k1);
  726. const int offset = i0 * sizeof(float);
  727. const int nelem = i1 - i0;
  728. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset));
  729. check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  730. }
  731. }
  732. // view_2d
  733. {
  734. int64_t ne2[4];
  735. int64_t nb2[4];
  736. const int nargs = 1;
  737. for (int ndims = 1; ndims <= 4; ++ndims) {
  738. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  739. get_random_dims(ne2, 2);
  740. while (ne2[0]*ne2[1] > ggml_nelements(x[0])) {
  741. get_random_dims(ne2, 2);
  742. }
  743. const int count = ne2[0]*ne2[1];
  744. nb2[0] = sizeof(float);
  745. nb2[1] = nb2[0]*ne2[0];
  746. ggml_set_param(ctx0, x[0]);
  747. const int max_offset = ggml_nelements(x[0]) - count;
  748. const int offset = irand(max_offset+1) * sizeof(float);
  749. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset));
  750. check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  751. }
  752. }
  753. // view_3d
  754. {
  755. int64_t ne2[4] = {1,1,1,1};
  756. int64_t nb2[4] = {0,0,0,0};
  757. const int nargs = 1;
  758. for (int ndims = 1; ndims <= 4; ++ndims) {
  759. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  760. get_random_dims(ne2, 3);
  761. while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) {
  762. get_random_dims(ne2, 3);
  763. }
  764. const int count = ne2[0]*ne2[1]*ne2[2];
  765. nb2[0] = sizeof(float);
  766. nb2[1] = nb2[0]*ne2[0];
  767. nb2[2] = nb2[1]*ne2[1];
  768. ggml_set_param(ctx0, x[0]);
  769. const int max_offset = ggml_nelements(x[0]) - count;
  770. const int offset = irand(max_offset+1) * sizeof(float);
  771. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset));
  772. check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  773. }
  774. }
  775. // permute
  776. {
  777. int64_t ne2[4];
  778. const int nargs = 1;
  779. for (int ndims = 1; ndims <= 4; ++ndims)
  780. {
  781. // ggml_permute will set axes of dimensions below n_dims to 1.
  782. // to make ggml_permute work correctly on all axes,
  783. // the input tensor needs maximal n_dim of 4.
  784. for (int i=0; i<ndims; ++i) {
  785. ne2[i] = ne[i];
  786. }
  787. for (int i=ndims; i<4; ++i) {
  788. ne2[i] = 1;
  789. }
  790. x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  791. ggml_set_param(ctx0, x[0]);
  792. const int p = irand(NUM_PERMUTATIONS);
  793. const int ax0 = all_permutations[p*4+0];
  794. const int ax1 = all_permutations[p*4+1];
  795. const int ax2 = all_permutations[p*4+2];
  796. const int ax3 = all_permutations[p*4+3];
  797. // sum requires contiguous tensor rows
  798. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3)));
  799. check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  800. }
  801. }
  802. // transpose
  803. {
  804. int64_t ne2[4];
  805. const int nargs = 1;
  806. for (int ndims = 1; ndims <= 4; ++ndims)
  807. {
  808. // ggml_transpose will set axes of dimensions below n_dims to 1.
  809. // to make ggml_transpose work correctly on all axes,
  810. // the input tensor needs maximal n_dim of 4.
  811. for (int i=0; i<ndims; ++i) {
  812. ne2[i] = ne[i];
  813. }
  814. for (int i=ndims; i<4; ++i) {
  815. ne2[i] = 1;
  816. }
  817. x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f);
  818. ggml_set_param(ctx0, x[0]);
  819. // sum requires contiguous tensor rows
  820. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0])));
  821. check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  822. }
  823. }
  824. // get_rows
  825. {
  826. int64_t ne2[4] = {ne[0], ne[1], 1, 1};
  827. int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
  828. const int nargs = 1;
  829. const int ndims = 2;
  830. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  831. x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]);
  832. ggml_set_param(ctx0, x[0]);
  833. struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1]));
  834. check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  835. }
  836. // diag_mask_inf
  837. {
  838. const int nargs = 1;
  839. const int ndims = 2;
  840. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  841. ggml_set_param(ctx0, x[0]);
  842. int n_past = irand(ne[0]);
  843. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past));
  844. check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  845. }
  846. // diag_mask_zero
  847. {
  848. const int nargs = 1;
  849. const int ndims = 2;
  850. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  851. ggml_set_param(ctx0, x[0]);
  852. int n_past = irand(ne[0]);
  853. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past));
  854. check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  855. }
  856. // softmax
  857. {
  858. const int nargs = 1;
  859. int64_t ne2[4];
  860. get_random_dims(ne2, 4);
  861. for (int ndims = 1; ndims <= 3; ++ndims) {
  862. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  863. ggml_set_param(ctx0, x[0]);
  864. struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0]));
  865. check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  866. }
  867. }
  868. // cross_entropy_loss
  869. {
  870. const int nargs = 1;
  871. int64_t ne2[4];
  872. get_random_dims(ne2, 4);
  873. for (int ndims = 1; ndims <= 3; ++ndims) {
  874. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  875. x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f);
  876. ggml_set_param(ctx0, x[0]);
  877. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1]));
  878. check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-1f, 1e-2f, INFINITY);
  879. // finite differences regularly fails!
  880. }
  881. }
  882. // rope
  883. {
  884. const int nargs = 1;
  885. int64_t ne2[4];
  886. get_random_dims(ne2, 4);
  887. ne2[0] += ne2[0] % 2;
  888. int n_rot = ne2[0];
  889. for (int ndims = 3; ndims <= 4; ++ndims) {
  890. for (int mode = 0; mode < 4; ++mode) {
  891. for (int n_past = 1; n_past < ne2[2]; ++n_past) {
  892. x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f);
  893. ggml_set_param(ctx0, x[0]);
  894. const bool skip_past = (mode & 1);
  895. if (skip_past) {
  896. // we have no past, so this would have to work on uninitialized memory.
  897. // we only test the gradients here;
  898. // skip_past should have no influence on gradient computation.
  899. // so when other modes work, we assume that this does as well.
  900. continue;
  901. }
  902. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0));
  903. GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
  904. check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY);
  905. }
  906. }
  907. }
  908. }
  909. // flash_attn
  910. {
  911. const int nargs = 3;
  912. int64_t ne2[4];
  913. get_random_dims(ne2, 4);
  914. int64_t D = ne2[0];
  915. int64_t N = ne2[1];
  916. int64_t M = ne2[2] + N;
  917. int64_t B = ne2[3];
  918. for (int masked = 0; masked <= 1; ++masked) {
  919. for (int ndims = 2; ndims <= 4; ++ndims) {
  920. int64_t neq[4] = { D, N, B, ne[3] };
  921. int64_t nek[4] = { D, M, B, ne[3] };
  922. int64_t nev[4] = { M, D, B, ne[3] };
  923. if (ndims == 2) {
  924. neq[2] = 1; neq[3] = 1;
  925. nek[2] = 1; nek[3] = 1;
  926. nev[2] = 1; nev[3] = 1;
  927. } else if (ndims == 3) {
  928. neq[3] = 1;
  929. nek[3] = 1;
  930. nev[3] = 1;
  931. }
  932. x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f);
  933. x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f);
  934. x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f);
  935. ggml_set_param(ctx0, x[0]);
  936. ggml_set_param(ctx0, x[1]);
  937. ggml_set_param(ctx0, x[2]);
  938. struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
  939. check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f);
  940. }
  941. }
  942. }
  943. ggml_free(ctx0);
  944. }
  945. return 0;
  946. }