test-grad0.cpp 57 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679
  1. #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnings on Windows
  2. #include "ggml.h"
  3. #include <cfloat>
  4. #include <cmath>
  5. #include <cstdint>
  6. #include <cstdio>
  7. #include <cstdlib>
  8. #include <cassert>
  9. #include <initializer_list>
  10. #include <vector>
  11. #if defined(_MSC_VER)
  12. #pragma warning(disable: 4244 4267) // possible loss of data
  13. #endif
  14. #if defined(__GNUC__)
  15. #pragma GCC diagnostic ignored "-Wdouble-promotion"
  16. #endif
  17. #define MAX_NARGS 3
  18. #undef MIN
  19. #undef MAX
  20. #define MIN(a, b) ((a) < (b) ? (a) : (b))
  21. #define MAX(a, b) ((a) > (b) ? (a) : (b))
  22. #define GGML_SILU_FP16
  23. //
  24. // logging
  25. //
  26. #if (GGML_DEBUG >= 1)
  27. #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
  28. #else
  29. #define GGML_PRINT_DEBUG(...)
  30. #endif
  31. #if (GGML_DEBUG >= 5)
  32. #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
  33. #else
  34. #define GGML_PRINT_DEBUG_5(...)
  35. #endif
  36. #if (GGML_DEBUG >= 10)
  37. #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
  38. #else
  39. #define GGML_PRINT_DEBUG_10(...)
  40. #endif
  41. #define GGML_PRINT(...) printf(__VA_ARGS__)
  42. static float frand(void) {
  43. return (float)rand()/(float)RAND_MAX;
  44. }
  45. static int irand(int n) {
  46. if (n == 0) return 0;
  47. return rand()%n;
  48. }
  49. static void get_random_dims(int64_t * dims, int ndims) {
  50. dims[0] = dims[1] = dims[2] = dims[3] = 1;
  51. for (int i = 0; i < ndims; i++) {
  52. dims[i] = 1 + irand(4);
  53. }
  54. }
  55. static struct ggml_tensor * get_random_tensor_f32(
  56. struct ggml_context * ctx0,
  57. int ndims,
  58. int64_t ne[],
  59. float fmin,
  60. float fmax) {
  61. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
  62. switch (ndims) {
  63. case 1:
  64. for (int i0 = 0; i0 < ne[0]; i0++) {
  65. ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin;
  66. }
  67. break;
  68. case 2:
  69. for (int i1 = 0; i1 < ne[1]; i1++) {
  70. for (int i0 = 0; i0 < ne[0]; i0++) {
  71. ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  72. }
  73. }
  74. break;
  75. case 3:
  76. for (int i2 = 0; i2 < ne[2]; i2++) {
  77. for (int i1 = 0; i1 < ne[1]; i1++) {
  78. for (int i0 = 0; i0 < ne[0]; i0++) {
  79. ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  80. }
  81. }
  82. }
  83. break;
  84. case 4:
  85. for (int i3 = 0; i3 < ne[3]; i3++) {
  86. for (int i2 = 0; i2 < ne[2]; i2++) {
  87. for (int i1 = 0; i1 < ne[1]; i1++) {
  88. for (int i0 = 0; i0 < ne[0]; i0++) {
  89. ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  90. }
  91. }
  92. }
  93. }
  94. break;
  95. default:
  96. assert(false);
  97. }
  98. return result;
  99. }
  100. static struct ggml_tensor * get_random_tensor_f16(
  101. struct ggml_context * ctx0,
  102. int ndims,
  103. int64_t ne[],
  104. float fmin,
  105. float fmax) {
  106. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne);
  107. switch (ndims) {
  108. case 1:
  109. for (int i0 = 0; i0 < ne[0]; i0++) {
  110. ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
  111. }
  112. break;
  113. case 2:
  114. for (int i1 = 0; i1 < ne[1]; i1++) {
  115. for (int i0 = 0; i0 < ne[0]; i0++) {
  116. ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
  117. }
  118. }
  119. break;
  120. case 3:
  121. for (int i2 = 0; i2 < ne[2]; i2++) {
  122. for (int i1 = 0; i1 < ne[1]; i1++) {
  123. for (int i0 = 0; i0 < ne[0]; i0++) {
  124. ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
  125. }
  126. }
  127. }
  128. break;
  129. case 4:
  130. for (int i3 = 0; i3 < ne[3]; i3++) {
  131. for (int i2 = 0; i2 < ne[2]; i2++) {
  132. for (int i1 = 0; i1 < ne[1]; i1++) {
  133. for (int i0 = 0; i0 < ne[0]; i0++) {
  134. ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
  135. }
  136. }
  137. }
  138. }
  139. break;
  140. default:
  141. assert(false);
  142. }
  143. return result;
  144. }
  145. static struct ggml_tensor * get_random_tensor_i32(
  146. struct ggml_context * ctx0,
  147. int ndims,
  148. int64_t ne[],
  149. int32_t imin,
  150. int32_t imax) {
  151. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne);
  152. switch (ndims) {
  153. case 1:
  154. for (int i0 = 0; i0 < ne[0]; i0++) {
  155. ((int32_t *)result->data)[i0] = irand(imax - imin) + imin;
  156. }
  157. break;
  158. case 2:
  159. for (int i1 = 0; i1 < ne[1]; i1++) {
  160. for (int i0 = 0; i0 < ne[0]; i0++) {
  161. ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin;
  162. }
  163. }
  164. break;
  165. case 3:
  166. for (int i2 = 0; i2 < ne[2]; i2++) {
  167. for (int i1 = 0; i1 < ne[1]; i1++) {
  168. for (int i0 = 0; i0 < ne[0]; i0++) {
  169. ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  170. }
  171. }
  172. }
  173. break;
  174. case 4:
  175. for (int i3 = 0; i3 < ne[3]; i3++) {
  176. for (int i2 = 0; i2 < ne[2]; i2++) {
  177. for (int i1 = 0; i1 < ne[1]; i1++) {
  178. for (int i0 = 0; i0 < ne[0]; i0++) {
  179. ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
  180. }
  181. }
  182. }
  183. }
  184. break;
  185. default:
  186. assert(false);
  187. }
  188. return result;
  189. }
  190. static bool check_gradient(
  191. const char * op_name,
  192. struct ggml_context * ctx0,
  193. struct ggml_tensor * x[],
  194. struct ggml_tensor * f,
  195. int ndims,
  196. int nargs,
  197. float eps,
  198. float max_error_abs,
  199. float max_error_rel,
  200. std::vector<double> expected_vals) {
  201. static int n_threads = -1;
  202. if (n_threads < 0) {
  203. n_threads = GGML_DEFAULT_N_THREADS;
  204. const char *env = getenv("GGML_N_THREADS");
  205. if (env) {
  206. n_threads = atoi(env);
  207. }
  208. printf("GGML_N_THREADS = %d\n", n_threads);
  209. }
  210. struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
  211. struct ggml_cgraph * gb = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
  212. ggml_build_forward_expand(gf, f);
  213. ggml_graph_cpy(gf, gb);
  214. ggml_build_backward_expand(ctx0, gf, gb, false);
  215. ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
  216. ggml_graph_reset (gf);
  217. ggml_set_f32 (f->grad, 1.0f);
  218. ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
  219. // ggml_graph_dump_dot(gf, NULL, "test-grad0-forward.dot");
  220. // ggml_graph_dump_dot(gb, gf, "test-grad0-backward.dot");
  221. for (int i = 0; i < nargs; ++i) {
  222. bool all_g0_bad = true;
  223. const int nelements = ggml_nelements(x[i]);
  224. for (int k = 0; k < nelements; ++k) {
  225. // Calculate gradient numerically:
  226. const float x0 = ggml_get_f32_1d(x[i], k);
  227. const float xm = x0 - eps;
  228. const float xp = x0 + eps;
  229. ggml_set_f32_1d(x[i], k, xp);
  230. ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
  231. const double f0 = ggml_get_f32_1d(f, 0);
  232. ggml_set_f32_1d(x[i], k, xm);
  233. ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
  234. const double f1 = ggml_get_f32_1d(f, 0);
  235. const double g0 = (f0 - f1)/(2.0*(double) eps);
  236. // The numerical calculation of the gradient fails around noncontinuities (e.g. 0 for ReLU).
  237. // In such cases, provide a vector of expected values and skip the comparison for failed calculations.
  238. if (!expected_vals.empty()) {
  239. bool matches_any = false;
  240. for (const double & ev : expected_vals) {
  241. const double error_abs = std::fabs(g0 - ev);
  242. if (error_abs > max_error_abs) {
  243. continue;
  244. }
  245. const double error_rel = g0 != 0.0 ? fabs(g0 - ev)/fabs(g0) : 0.0;
  246. if (error_rel > max_error_rel) {
  247. continue;
  248. }
  249. matches_any = true;
  250. break;
  251. }
  252. if (!matches_any) {
  253. continue;
  254. }
  255. }
  256. all_g0_bad = false;
  257. ggml_set_f32_1d(x[i], k, x0);
  258. // compute gradient using backward graph
  259. ggml_graph_reset (gf);
  260. ggml_set_f32 (f->grad, 1.0f);
  261. ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
  262. const double g1 = ggml_get_f32_1d(x[i]->grad, k);
  263. const double error_abs = fabs(g0 - g1);
  264. const double error_rel = g0 != 0.0 ? fabs(g0 - g1)/fabs(g0) : 0.0;
  265. if (error_abs > max_error_abs || error_rel > max_error_rel) {
  266. printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n",
  267. op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel);
  268. //assert(false);
  269. return false;
  270. }
  271. }
  272. if (all_g0_bad) {
  273. printf("%s: numerical calculation of the gradient failed for all values\n", op_name);
  274. return false;
  275. }
  276. }
  277. return true;
  278. }
  279. // TODO: clean-up this ..
  280. static bool check_mat_mul(
  281. const struct ggml_tensor * y,
  282. const struct ggml_tensor * x0,
  283. const struct ggml_tensor * x1) {
  284. float * dst = (float *) y->data;
  285. float * src0 = (float *) x0->data;
  286. float * src1 = (float *) x1->data;
  287. const int nc = x0->ne[1];
  288. const int nr = x1->ne[1];
  289. const int nk = x0->ne[0];
  290. GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk);
  291. GGML_PRINT_DEBUG("x0:\n");
  292. for (int j = 0; j < x0->ne[1]; ++j) {
  293. for (int i = 0; i < x0->ne[0]; ++i) {
  294. GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]);
  295. }
  296. GGML_PRINT_DEBUG("\n");
  297. }
  298. GGML_PRINT_DEBUG("\n");
  299. GGML_PRINT_DEBUG("x1:\n");
  300. for (int j = 0; j < x1->ne[1]; ++j) {
  301. for (int i = 0; i < x1->ne[0]; ++i) {
  302. GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]);
  303. }
  304. GGML_PRINT_DEBUG("\n");
  305. }
  306. GGML_PRINT_DEBUG("\n");
  307. GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]);
  308. for (int j = 0; j < y->ne[1]; ++j) {
  309. for (int i = 0; i < y->ne[0]; ++i) {
  310. GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]);
  311. }
  312. GGML_PRINT_DEBUG("\n");
  313. }
  314. for (int i = 0; i < nr; ++i) {
  315. for (int j = 0; j < nc; ++j) {
  316. float sum = 0.0f;
  317. for (int k = 0; k < nk; ++k) {
  318. sum += src0[j*nk + k]*src1[i*nk + k];
  319. }
  320. if (fabsf(dst[i*nc + j] - sum) > 1e-5f) {
  321. fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum);
  322. assert(false);
  323. return false;
  324. }
  325. }
  326. }
  327. return true;
  328. }
  329. #define NUM_PERMUTATIONS (4*3*2*1)
  330. int main(int argc, const char ** argv) {
  331. struct ggml_init_params params = {
  332. /* .mem_size = */ 256*1024*1024,
  333. /* .mem_buffer = */ NULL,
  334. /* .no_alloc = */ false,
  335. };
  336. int64_t ne[4];
  337. int all_permutations[4 * NUM_PERMUTATIONS];
  338. {
  339. int count = 0;
  340. for (int ax0=0; ax0<4; ++ax0) {
  341. for (int ax1=0; ax1<4; ++ax1) {
  342. if (ax1 == ax0) continue;
  343. for (int ax2=0; ax2<4; ++ax2) {
  344. if (ax2 == ax0) continue;
  345. if (ax2 == ax1) continue;
  346. for (int ax3=0; ax3<4; ++ax3) {
  347. if (ax3 == ax0) continue;
  348. if (ax3 == ax1) continue;
  349. if (ax3 == ax2) continue;
  350. assert(count < NUM_PERMUTATIONS);
  351. all_permutations[count*4+0] = ax0;
  352. all_permutations[count*4+1] = ax1;
  353. all_permutations[count*4+2] = ax2;
  354. all_permutations[count*4+3] = ax3;
  355. ++count;
  356. }
  357. }
  358. }
  359. }
  360. }
  361. unsigned seed_iter = 1;
  362. // original loop: 1000
  363. int niter = 4;
  364. const char *env = getenv("GGML_NLOOP");
  365. if (env != NULL) {
  366. niter = atoi(env);
  367. }
  368. if (argc > 1) {
  369. niter = atoi(argv[1]);
  370. }
  371. for (int iter = 0; iter < niter; ++iter) {
  372. srand(seed_iter);
  373. seed_iter = rand();
  374. unsigned seed = rand();
  375. printf("test-grad0: iter:%d/%d\n", (iter+1), niter);
  376. struct ggml_context * ctx0 = ggml_init(params);
  377. get_random_dims(ne, 4);
  378. struct ggml_tensor * x[MAX_NARGS];
  379. // add f32
  380. {
  381. srand(seed);
  382. const int nargs = 2;
  383. for (int ndims = 1; ndims <= 4; ++ndims) {
  384. for (int i = 0; i < nargs; ++i) {
  385. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  386. ggml_set_param(ctx0, x[i]);
  387. }
  388. struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
  389. check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f, {});
  390. }
  391. }
  392. // add f16
  393. {
  394. srand(seed);
  395. const int nargs = 2;
  396. for (int ndims = 1; ndims <= 4; ++ndims) {
  397. for (int i = 0; i < nargs; ++i) {
  398. x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
  399. ggml_set_param(ctx0, x[i]);
  400. }
  401. struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
  402. check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f, {});
  403. }
  404. }
  405. // sub
  406. {
  407. srand(seed);
  408. const int nargs = 2;
  409. for (int ndims = 1; ndims <= 4; ++ndims) {
  410. for (int i = 0; i < nargs; ++i) {
  411. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  412. ggml_set_param(ctx0, x[i]);
  413. }
  414. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1]));
  415. check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  416. }
  417. }
  418. // mul
  419. {
  420. srand(seed);
  421. const int nargs = 2;
  422. for (int ndims = 1; ndims <= 4; ++ndims) {
  423. for (int i = 0; i < nargs; ++i) {
  424. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  425. ggml_set_param(ctx0, x[i]);
  426. }
  427. struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1]));
  428. check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  429. }
  430. }
  431. // div
  432. {
  433. srand(seed);
  434. const int nargs = 2;
  435. for (int ndims = 1; ndims <= 4; ++ndims) {
  436. for (int i = 0; i < nargs; ++i) {
  437. x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f);
  438. ggml_set_param(ctx0, x[i]);
  439. }
  440. struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1]));
  441. check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f, {});
  442. }
  443. }
  444. // sqr
  445. {
  446. srand(seed);
  447. const int nargs = 1;
  448. for (int ndims = 1; ndims <= 2; ++ndims) {
  449. for (int i = 0; i < nargs; ++i) {
  450. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  451. ggml_set_param(ctx0, x[i]);
  452. }
  453. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0]));
  454. check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  455. }
  456. }
  457. // sqrt
  458. {
  459. srand(seed);
  460. const int nargs = 1;
  461. for (int ndims = 1; ndims <= 2; ++ndims) {
  462. for (int i = 0; i < nargs; ++i) {
  463. x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  464. ggml_set_param(ctx0, x[i]);
  465. }
  466. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0]));
  467. check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, 2e-2f, 1e-1f, {});
  468. }
  469. }
  470. // log
  471. {
  472. srand(seed);
  473. const int nargs = 1;
  474. for (int ndims = 1; ndims <= 2; ++ndims) {
  475. for (int i = 0; i < nargs; ++i) {
  476. x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
  477. ggml_set_param(ctx0, x[i]);
  478. }
  479. struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0]));
  480. check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f, {});
  481. }
  482. }
  483. // sum
  484. {
  485. srand(seed);
  486. const int nargs = 1;
  487. for (int ndims = 1; ndims <= 2; ++ndims) {
  488. for (int i = 0; i < nargs; ++i) {
  489. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  490. ggml_set_param(ctx0, x[i]);
  491. }
  492. struct ggml_tensor * f = ggml_sum(ctx0, x[0]);
  493. check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  494. }
  495. }
  496. // sum_rows
  497. {
  498. srand(seed);
  499. const int nargs = 1;
  500. for (int ndims = 1; ndims <= 4; ++ndims) {
  501. for (int i = 0; i < nargs; ++i) {
  502. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  503. ggml_set_param(ctx0, x[i]);
  504. }
  505. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0])));
  506. check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
  507. }
  508. }
  509. // mean, not yet fully implemented
  510. if(0)
  511. {
  512. srand(seed);
  513. const int nargs = 1;
  514. for (int ndims = 1; ndims <= 4; ++ndims) {
  515. for (int i = 0; i < nargs; ++i) {
  516. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  517. ggml_set_param(ctx0, x[i]);
  518. }
  519. struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0]));
  520. check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  521. }
  522. }
  523. // argmax
  524. if (0)
  525. {
  526. srand(seed);
  527. const int nargs = 1;
  528. for (int ndims = 1; ndims <= 4; ++ndims) {
  529. for (int i = 0; i < nargs; ++i) {
  530. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  531. ggml_set_param(ctx0, x[i]);
  532. }
  533. struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0]));
  534. check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  535. }
  536. }
  537. // repeat
  538. {
  539. srand(seed);
  540. int64_t ne2[4];
  541. get_random_dims(ne2, 4);
  542. ne2[0] = ne[0] * ne2[0];
  543. ne2[1] = ne[1] * ne2[1];
  544. ne2[2] = 1;
  545. ne2[3] = 1;
  546. const int nargs = 1;
  547. for (int ndims = 1; ndims <= 2; ++ndims) {
  548. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  549. x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  550. ggml_set_param(ctx0, x[0]);
  551. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
  552. check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
  553. }
  554. }
  555. // repeat back
  556. {
  557. srand(seed);
  558. int64_t ne2[4];
  559. get_random_dims(ne2, 4);
  560. ne2[0] = ne[0] * ne2[0];
  561. ne2[1] = ne[1] * ne2[1];
  562. ne2[2] = 1;
  563. ne2[3] = 1;
  564. const int nargs = 1;
  565. for (int ndims = 1; ndims <= 2; ++ndims) {
  566. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  567. x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  568. ggml_set_param(ctx0, x[0]);
  569. struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0]))));
  570. check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
  571. }
  572. }
  573. // abs
  574. {
  575. const int nargs = 1;
  576. for (int ndims = 1; ndims <= 4; ++ndims) {
  577. for (int i = 0; i < nargs; ++i) {
  578. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  579. ggml_set_param(ctx0, x[i]);
  580. }
  581. struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
  582. check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f, {-1.0, 1.0});
  583. }
  584. }
  585. // sgn
  586. {
  587. srand(seed);
  588. const int nargs = 1;
  589. for (int ndims = 1; ndims <= 4; ++ndims) {
  590. for (int i = 0; i < nargs; ++i) {
  591. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  592. ggml_set_param(ctx0, x[i]);
  593. }
  594. struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0]));
  595. check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {0.0});
  596. }
  597. }
  598. // neg
  599. {
  600. srand(seed);
  601. const int nargs = 1;
  602. for (int ndims = 1; ndims <= 4; ++ndims) {
  603. for (int i = 0; i < nargs; ++i) {
  604. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  605. ggml_set_param(ctx0, x[i]);
  606. }
  607. struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0]));
  608. check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  609. }
  610. }
  611. // step
  612. {
  613. srand(seed);
  614. const int nargs = 1;
  615. for (int ndims = 1; ndims <= 4; ++ndims) {
  616. for (int i = 0; i < nargs; ++i) {
  617. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  618. ggml_set_param(ctx0, x[i]);
  619. }
  620. struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0]));
  621. check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {0.0});
  622. }
  623. }
  624. // tanh, not yet fully implemented
  625. if(0)
  626. {
  627. srand(seed);
  628. const int nargs = 1;
  629. for (int ndims = 1; ndims <= 4; ++ndims) {
  630. for (int i = 0; i < nargs; ++i) {
  631. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  632. ggml_set_param(ctx0, x[i]);
  633. }
  634. struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0]));
  635. check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  636. }
  637. }
  638. // mul_mat
  639. {
  640. srand(seed);
  641. const int nargs = 2;
  642. for (int ndims = 2; ndims <= 4; ++ndims) {
  643. int max_nrep = (ndims >= 3) ? 2 : 1;
  644. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  645. for (int nrep2 = 1; nrep2 < max_nrep; ++nrep2) {
  646. for (int nrep3 = 1; nrep3 < max_nrep; ++nrep3) {
  647. {
  648. int64_t ne2[4];
  649. get_random_dims(ne2, 4);
  650. ne2[0] = ne[0];
  651. ne2[2] = nrep2 * ne[2];
  652. ne2[3] = nrep3 * ne[3];
  653. x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  654. }
  655. ggml_set_param(ctx0, x[0]);
  656. ggml_set_param(ctx0, x[1]);
  657. struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
  658. struct ggml_tensor * f = ggml_sum(ctx0, m);
  659. GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
  660. check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  661. if (ndims == 2) {
  662. // check_mat_mul does not support ndims > 2
  663. check_mat_mul(m, x[1], x[0]);
  664. }
  665. }
  666. }
  667. }
  668. }
  669. // elu, not yet fully implemented
  670. if(0)
  671. {
  672. srand(seed);
  673. const int nargs = 1;
  674. for (int ndims = 1; ndims <= 4; ++ndims) {
  675. for (int i = 0; i < nargs; ++i) {
  676. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  677. ggml_set_param(ctx0, x[i]);
  678. }
  679. struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0]));
  680. check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  681. }
  682. }
  683. // relu
  684. {
  685. srand(seed);
  686. const int nargs = 1;
  687. for (int ndims = 1; ndims <= 4; ++ndims) {
  688. for (int i = 0; i < nargs; ++i) {
  689. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  690. ggml_set_param(ctx0, x[i]);
  691. }
  692. struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0]));
  693. check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {0.0, 1.0});
  694. }
  695. }
  696. // gelu, not yet fully implemented
  697. if(0)
  698. {
  699. srand(seed);
  700. const int nargs = 1;
  701. for (int ndims = 1; ndims <= 4; ++ndims) {
  702. for (int i = 0; i < nargs; ++i) {
  703. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  704. ggml_set_param(ctx0, x[i]);
  705. }
  706. struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0]));
  707. check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
  708. }
  709. }
  710. // silu
  711. {
  712. srand(seed);
  713. const int nargs = 1;
  714. for (int ndims = 1; ndims <= 2; ++ndims) {
  715. for (int i = 0; i < nargs; ++i) {
  716. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  717. ggml_set_param(ctx0, x[i]);
  718. }
  719. struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0]));
  720. #ifdef GGML_SILU_FP16
  721. // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds.
  722. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY, {});
  723. #else
  724. check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  725. #endif
  726. }
  727. }
  728. // rms_norm
  729. {
  730. srand(seed);
  731. const int nargs = 1;
  732. for (int ndims = 1; ndims <= 2; ++ndims) {
  733. for (int i = 0; i < nargs; ++i) {
  734. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  735. ggml_set_param(ctx0, x[i]);
  736. }
  737. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0], 1e-6f));
  738. check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY, {});
  739. }
  740. }
  741. // scale
  742. {
  743. srand(seed);
  744. const int nargs = 1;
  745. for (int ndims = 1; ndims <= 2; ++ndims) {
  746. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  747. const float s = -1.0f + 2.0f*frand();
  748. ggml_set_param(ctx0, x[0]);
  749. struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], s));
  750. check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  751. }
  752. }
  753. // cpy f32
  754. {
  755. srand(seed);
  756. const int nargs = 2;
  757. for (int ndims = 1; ndims <= 2; ++ndims) {
  758. for (int i = 0; i < nargs; ++i) {
  759. x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  760. ggml_set_param(ctx0, x[i]);
  761. }
  762. // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
  763. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
  764. check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  765. }
  766. }
  767. // cpy f16
  768. {
  769. srand(seed);
  770. const int nargs = 2;
  771. for (int ndims = 1; ndims <= 2; ++ndims) {
  772. for (int i = 0; i < nargs; ++i) {
  773. x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
  774. ggml_set_param(ctx0, x[i]);
  775. }
  776. // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
  777. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
  778. check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY, {});
  779. }
  780. }
  781. // reshape (1d->nd)
  782. {
  783. srand(seed);
  784. const int nargs = 1;
  785. for (int ndims = 1; ndims <= 2; ++ndims) {
  786. int64_t ne2[4];
  787. ne2[0] = 1;
  788. ne2[1] = 1;
  789. ne2[2] = 1;
  790. ne2[3] = 1;
  791. for (int i = 0; i < ndims; ++i) {
  792. ne2[0] *= ne[i];
  793. }
  794. x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
  795. x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  796. ggml_set_param(ctx0, x[0]);
  797. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  798. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  799. }
  800. }
  801. // reshape (nd->1d)
  802. {
  803. srand(seed);
  804. const int nargs = 1;
  805. for (int ndims = 1; ndims <= 2; ++ndims) {
  806. int64_t ne2[4];
  807. ne2[0] = 1;
  808. ne2[1] = 1;
  809. ne2[2] = 1;
  810. ne2[3] = 1;
  811. for (int i = 0; i < ndims; ++i) {
  812. ne2[0] *= ne[i];
  813. }
  814. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  815. x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
  816. ggml_set_param(ctx0, x[0]);
  817. struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
  818. check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  819. }
  820. }
  821. // acc 1d
  822. {
  823. srand(seed);
  824. int64_t ne2[4] = { 1, 1, 1, 1 };
  825. const int nargs = 2;
  826. for (int ndims = 1; ndims <= 4; ++ndims) {
  827. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  828. ggml_set_param(ctx0, x[0]);
  829. get_random_dims(ne2, 1);
  830. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  831. get_random_dims(ne2, 1);
  832. }
  833. x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
  834. ggml_set_param(ctx0, x[1]);
  835. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  836. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  837. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  838. check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  839. }
  840. }
  841. // acc 2d
  842. {
  843. srand(seed);
  844. int64_t ne2[4] = { 1, 1, 1, 1 };
  845. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  846. int64_t offsets[4] = { 0, 0, 0, 0 };
  847. const int nargs = 2;
  848. for (int ndims = 2; ndims <= 4; ++ndims) {
  849. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  850. ggml_set_param(ctx0, x[0]);
  851. get_random_dims(ne2, 2);
  852. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  853. get_random_dims(ne2, 2);
  854. }
  855. x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
  856. ggml_set_param(ctx0, x[1]);
  857. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  858. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  859. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  860. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  861. const int offset = offsets[0] + offsets[1];
  862. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  863. check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  864. }
  865. }
  866. // acc 3d
  867. {
  868. srand(seed);
  869. int64_t ne2[4] = { 1, 1, 1, 1 };
  870. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  871. int64_t offsets[4] = { 0, 0, 0, 0 };
  872. const int nargs = 2;
  873. for (int ndims = 3; ndims <= 4; ++ndims) {
  874. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  875. ggml_set_param(ctx0, x[0]);
  876. get_random_dims(ne2, 3);
  877. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) {
  878. get_random_dims(ne2, 3);
  879. }
  880. x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f);
  881. ggml_set_param(ctx0, x[1]);
  882. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  883. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  884. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  885. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  886. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  887. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  888. const int offset = offsets[0] + offsets[1] + offsets[2];
  889. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  890. check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  891. }
  892. }
  893. // acc 4d
  894. {
  895. srand(seed);
  896. int64_t ne2[4] = { 1, 1, 1, 1 };
  897. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  898. int64_t offsets[4] = { 0, 0, 0, 0 };
  899. const int nargs = 2;
  900. for (int ndims = 4; ndims <= 4; ++ndims) {
  901. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  902. ggml_set_param(ctx0, x[0]);
  903. get_random_dims(ne2, 4);
  904. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) {
  905. get_random_dims(ne2, 4);
  906. }
  907. x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
  908. ggml_set_param(ctx0, x[1]);
  909. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  910. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  911. max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
  912. max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]);
  913. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  914. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  915. offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
  916. offsets[3] = irand(max_offsets[3]) * x[0]->nb[3];
  917. const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3];
  918. struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
  919. check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  920. }
  921. }
  922. // set_1d
  923. {
  924. srand(seed);
  925. int64_t ne2[4];
  926. const int nargs = 2;
  927. for (int ndims = 1; ndims <= 4; ++ndims) {
  928. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  929. ggml_set_param(ctx0, x[0]);
  930. get_random_dims(ne2, 1);
  931. while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
  932. get_random_dims(ne2, 1);
  933. }
  934. x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
  935. ggml_set_param(ctx0, x[1]);
  936. const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
  937. const int offset = irand(max_offset) * ggml_element_size(x[0]);
  938. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset));
  939. check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  940. }
  941. }
  942. // set_2d
  943. {
  944. srand(seed);
  945. int64_t ne2[4];
  946. int64_t max_offsets[4] = { 0, 0, 0, 0 };
  947. int64_t offsets[4] = { 0, 0, 0, 0 };
  948. const int nargs = 1;
  949. for (int ndims = 2; ndims <= 4; ++ndims) {
  950. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  951. ggml_set_param(ctx0, x[0]);
  952. get_random_dims(ne2, 2);
  953. while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
  954. get_random_dims(ne2, 2);
  955. }
  956. x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
  957. ggml_set_param(ctx0, x[1]);
  958. max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
  959. max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
  960. offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
  961. offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
  962. const int offset = offsets[0] + offsets[1];
  963. struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset));
  964. check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  965. }
  966. }
  967. // view_1d
  968. {
  969. srand(seed);
  970. const int nargs = 1;
  971. for (int ndims = 1; ndims <= 4; ++ndims) {
  972. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  973. ggml_set_param(ctx0, x[0]);
  974. const int k0 = irand(ggml_nelements(x[0]));
  975. const int k1 = irand(ggml_nelements(x[0]));
  976. const int i0 = MIN(k0, k1);
  977. const int i1 = MAX(k0, k1);
  978. const int offset = i0 * sizeof(float);
  979. const int nelem = i1 - i0;
  980. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset));
  981. check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  982. }
  983. }
  984. // view_2d
  985. {
  986. srand(seed);
  987. int64_t ne2[4];
  988. int64_t nb2[4];
  989. const int nargs = 1;
  990. for (int ndims = 1; ndims <= 4; ++ndims) {
  991. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  992. get_random_dims(ne2, 2);
  993. while (ne2[0]*ne2[1] > ggml_nelements(x[0])) {
  994. get_random_dims(ne2, 2);
  995. }
  996. const int count = ne2[0]*ne2[1];
  997. nb2[0] = sizeof(float);
  998. nb2[1] = nb2[0]*ne2[0];
  999. ggml_set_param(ctx0, x[0]);
  1000. const int max_offset = ggml_nelements(x[0]) - count;
  1001. const int offset = irand(max_offset+1) * sizeof(float);
  1002. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset));
  1003. check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1004. }
  1005. }
  1006. // view_3d
  1007. {
  1008. srand(seed);
  1009. int64_t ne2[4] = {1,1,1,1};
  1010. int64_t nb2[4] = {0,0,0,0};
  1011. const int nargs = 1;
  1012. for (int ndims = 1; ndims <= 4; ++ndims) {
  1013. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  1014. get_random_dims(ne2, 3);
  1015. while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) {
  1016. get_random_dims(ne2, 3);
  1017. }
  1018. const int count = ne2[0]*ne2[1]*ne2[2];
  1019. nb2[0] = sizeof(float);
  1020. nb2[1] = nb2[0]*ne2[0];
  1021. nb2[2] = nb2[1]*ne2[1];
  1022. ggml_set_param(ctx0, x[0]);
  1023. const int max_offset = ggml_nelements(x[0]) - count;
  1024. const int offset = irand(max_offset+1) * sizeof(float);
  1025. struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset));
  1026. check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1027. }
  1028. }
  1029. // permute
  1030. {
  1031. srand(seed);
  1032. int64_t ne2[4];
  1033. const int nargs = 1;
  1034. for (int ndims = 1; ndims <= 4; ++ndims)
  1035. {
  1036. // ggml_permute will set axes of dimensions below n_dims to 1.
  1037. // to make ggml_permute work correctly on all axes,
  1038. // the input tensor needs maximal n_dim of 4.
  1039. for (int i=0; i<ndims; ++i) {
  1040. ne2[i] = ne[i];
  1041. }
  1042. for (int i=ndims; i<4; ++i) {
  1043. ne2[i] = 1;
  1044. }
  1045. x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
  1046. ggml_set_param(ctx0, x[0]);
  1047. const int p = irand(NUM_PERMUTATIONS);
  1048. const int ax0 = all_permutations[p*4+0];
  1049. const int ax1 = all_permutations[p*4+1];
  1050. const int ax2 = all_permutations[p*4+2];
  1051. const int ax3 = all_permutations[p*4+3];
  1052. // sum requires contiguous tensor rows
  1053. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3)));
  1054. check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1055. }
  1056. }
  1057. // transpose
  1058. {
  1059. srand(seed);
  1060. int64_t ne2[4];
  1061. const int nargs = 1;
  1062. for (int ndims = 1; ndims <= 4; ++ndims)
  1063. {
  1064. // ggml_transpose will set axes of dimensions below n_dims to 1.
  1065. // to make ggml_transpose work correctly on all axes,
  1066. // the input tensor needs maximal n_dim of 4.
  1067. for (int i=0; i<ndims; ++i) {
  1068. ne2[i] = ne[i];
  1069. }
  1070. for (int i=ndims; i<4; ++i) {
  1071. ne2[i] = 1;
  1072. }
  1073. x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
  1074. ggml_set_param(ctx0, x[0]);
  1075. // sum requires contiguous tensor rows
  1076. struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0])));
  1077. check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1078. }
  1079. }
  1080. // get_rows
  1081. {
  1082. srand(seed);
  1083. int64_t ne2[4] = {ne[0], ne[1], 1, 1};
  1084. int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
  1085. const int nargs = 1;
  1086. const int ndims = 2;
  1087. x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  1088. x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]);
  1089. ggml_set_param(ctx0, x[0]);
  1090. struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1]));
  1091. check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1092. }
  1093. // diag_mask_inf
  1094. {
  1095. srand(seed);
  1096. const int nargs = 1;
  1097. const int ndims = 2;
  1098. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  1099. ggml_set_param(ctx0, x[0]);
  1100. int n_past = irand(ne[0]);
  1101. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past));
  1102. check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1103. }
  1104. // diag_mask_zero
  1105. {
  1106. srand(seed);
  1107. const int nargs = 1;
  1108. const int ndims = 2;
  1109. x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
  1110. ggml_set_param(ctx0, x[0]);
  1111. int n_past = irand(ne[0]);
  1112. struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past));
  1113. check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1114. }
  1115. // softmax
  1116. {
  1117. srand(seed);
  1118. const int nargs = 1;
  1119. int64_t ne2[4];
  1120. get_random_dims(ne2, 4);
  1121. for (int ndims = 1; ndims <= 3; ++ndims) {
  1122. x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  1123. ggml_set_param(ctx0, x[0]);
  1124. float eps = 1e-6f;
  1125. // dont use only sum as aggregation, because sum of softmax is always 1 -> finite differences should not work
  1126. // instead use sum(log(soft_max()*(1-eps)+eps)); use eps to avoid log(0)
  1127. struct ggml_tensor * f = ggml_sum(ctx0,
  1128. ggml_log(ctx0,
  1129. ggml_add1(ctx0,
  1130. ggml_scale(ctx0,
  1131. ggml_soft_max(ctx0, x[0]),
  1132. 1.0f - eps),
  1133. ggml_new_f32(ctx0, eps))));
  1134. check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY, {});
  1135. // NOTE: softmax forward is computed using f16 table lookup instead of using actual expf, but backward assumes actual expf.
  1136. // this may result in different gradients too finite differences.
  1137. // when this test reports errors, first try to replace the table lookup with actual expf and test again to see if just that was the cause.
  1138. // if only the table lookup causes gradients to differ this is acceptable.
  1139. }
  1140. }
  1141. // cross_entropy_loss
  1142. {
  1143. srand(seed);
  1144. const int nargs = 1;
  1145. int64_t ne2[4];
  1146. get_random_dims(ne2, 4);
  1147. for (int ndims = 1; ndims <= 4; ++ndims) {
  1148. x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  1149. x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f);
  1150. // the second argument to cross_entropy_loss must sum up to 1 for each row
  1151. int nr = ggml_nrows(x[1]);
  1152. int nc = ggml_nelements(x[1]) / nr;
  1153. for (int ir = 0; ir < nr; ++ir) {
  1154. float sum = 0;
  1155. for (int ic = 0; ic < nc; ++ic) {
  1156. sum += ((float *) x[1]->data)[ic + ir*nc];
  1157. }
  1158. for (int ic = 0; ic < nc; ++ic) {
  1159. ((float *) x[1]->data)[ic + ir*nc] /= sum;
  1160. }
  1161. }
  1162. ggml_set_param(ctx0, x[0]);
  1163. struct ggml_tensor * f = ggml_cross_entropy_loss(ctx0, x[0], x[1]);
  1164. check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
  1165. }
  1166. }
  1167. // rope f32
  1168. {
  1169. srand(seed);
  1170. const int nargs = 1;
  1171. int64_t ne2[4];
  1172. get_random_dims(ne2, 4);
  1173. ne2[0] += ne2[0] % 2;
  1174. int n_rot = ne2[0];
  1175. for (int ndims = 3; ndims <= 4; ++ndims) {
  1176. for (int mode = 0; mode < 4; ++mode) {
  1177. for (int n_past = 1; n_past < ne2[2]; ++n_past) {
  1178. x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
  1179. struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]);
  1180. for (int i = 0; i < ne2[2]; ++i) {
  1181. ((int32_t *) p->data)[i] = n_past + i;
  1182. }
  1183. ggml_set_param(ctx0, x[0]);
  1184. const bool skip_past = (mode & 1);
  1185. if (skip_past) {
  1186. // we have no past, so this would have to work on uninitialized memory.
  1187. // we only test the gradients here;
  1188. // skip_past should have no influence on gradient computation.
  1189. // so when other modes work, we assume that this does as well.
  1190. continue;
  1191. }
  1192. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode));
  1193. GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
  1194. check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY, {});
  1195. }
  1196. }
  1197. }
  1198. }
  1199. // rope f16
  1200. {
  1201. srand(seed);
  1202. const int nargs = 1;
  1203. int64_t ne2[4];
  1204. get_random_dims(ne2, 4);
  1205. ne2[0] += ne2[0] % 2;
  1206. int n_rot = ne2[0];
  1207. for (int ndims = 3; ndims <= 4; ++ndims) {
  1208. for (int mode = 0; mode < 4; ++mode) {
  1209. for (int n_past = 1; n_past < ne2[2]; ++n_past) {
  1210. x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f);
  1211. struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]);
  1212. for (int i = 0; i < ne2[2]; ++i) {
  1213. ((int32_t *) p->data)[i] = n_past + i;
  1214. }
  1215. ggml_set_param(ctx0, x[0]);
  1216. const bool skip_past = (mode & 1);
  1217. if (skip_past) {
  1218. // we have no past, so this would have to work on uninitialized memory.
  1219. // we only test the gradients here;
  1220. // skip_past should have no influence on gradient computation.
  1221. // so when other modes work, we assume that this does as well.
  1222. continue;
  1223. }
  1224. struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode));
  1225. GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
  1226. check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY, {});
  1227. }
  1228. }
  1229. }
  1230. }
  1231. // im2col f32
  1232. {
  1233. srand(seed);
  1234. const int nargs = 1;
  1235. const int ndims = 4;
  1236. for (const bool is_2D : {false, true}) {
  1237. int64_t ne0[ndims];
  1238. int64_t ne1[ndims];
  1239. get_random_dims(ne0, ndims);
  1240. get_random_dims(ne1, ndims);
  1241. // // Ensure that the output is not zero-sized:
  1242. ne1[0] += 8;
  1243. ne1[1] += 8;
  1244. if (is_2D) {
  1245. ne1[2] = ne0[2];
  1246. } else {
  1247. ne1[1] = ne0[1];
  1248. ne0[3] = 1;
  1249. ne1[3] = 1;
  1250. }
  1251. // The order of arguments is swapped because the first tensor is only used for its shape.
  1252. x[1] = get_random_tensor_f16(ctx0, ndims, ne0, -1.0f, 1.0f);
  1253. x[0] = get_random_tensor_f32(ctx0, ndims, ne1, -1.0f, 1.0f);
  1254. ggml_set_param(ctx0, x[0]);
  1255. const int s0 = 1 + irand(2);
  1256. const int s1 = is_2D ? 1 + irand(2) : 0;
  1257. const int p0 = 0 + irand(2);
  1258. const int p1 = is_2D ? 0 + irand(2) : 0;
  1259. const int d0 = 1 + irand(2);
  1260. const int d1 = is_2D ? 1 + irand(2) : 0;
  1261. struct ggml_tensor * f = ggml_sum(ctx0, ggml_im2col(ctx0, x[1], x[0], s0, s1, p0, p1, d0, d1, is_2D, GGML_TYPE_F32));
  1262. GGML_PRINT_DEBUG("im2col f32: is_2D=%s, s0=%d, s1=%d, p0=%d, p1=%d, d0=%d, d1=%d\n", is_2D ? "yes" : "no", s0, s1, p0, p1, d0, d1);
  1263. check_gradient("im2col f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY, {});
  1264. }
  1265. }
  1266. // pool_2d f32
  1267. {
  1268. srand(seed);
  1269. const int nargs = 1;
  1270. const int ndims = 4;
  1271. for (const enum ggml_op_pool op : {GGML_OP_POOL_AVG, GGML_OP_POOL_MAX}) {
  1272. int64_t ne0[ndims];
  1273. get_random_dims(ne0, ndims);
  1274. ne0[0] += 8;
  1275. ne0[1] += 8;
  1276. x[0] = get_random_tensor_f32(ctx0, ndims, ne0, -1.0f, 1.0f);
  1277. ggml_set_param(ctx0, x[0]);
  1278. const int k0 = 2 + irand(2);
  1279. const int k1 = 2 + irand(2);
  1280. const int s0 = 2 + irand(2);
  1281. const int s1 = 2 + irand(2);
  1282. const int p0 = 0 + irand(2);
  1283. const int p1 = 0 + irand(2);
  1284. struct ggml_tensor * f = ggml_sum(ctx0, ggml_pool_2d(ctx0, x[0], op, k0, k1, s0, s1, p0, p1));
  1285. GGML_PRINT_DEBUG("ggml_pool_2d f32: op=%s k0=%d, k1=%d, s0=%d, s1=%d, p0=%d, p1=%d\n",
  1286. op == GGML_OP_POOL_MAX ? "max" : "avg", k0, k1, s0, s1, p0, p1);
  1287. std::vector<double> expected_vals;
  1288. if (op == GGML_OP_POOL_MAX) {
  1289. expected_vals.push_back(0.0);
  1290. expected_vals.push_back(1.0);
  1291. }
  1292. check_gradient("ggml_pool_2d f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, expected_vals);
  1293. }
  1294. }
  1295. // flash_attn f32
  1296. // TODO: adapt to ggml_flash_attn_ext() changes
  1297. //{
  1298. // srand(seed);
  1299. // const int nargs = 3;
  1300. // int64_t ne2[4];
  1301. // get_random_dims(ne2, 4);
  1302. // int64_t D = ne2[0];
  1303. // int64_t N = ne2[1];
  1304. // int64_t M = ne2[2] + N;
  1305. // int64_t B = ne2[3];
  1306. // for (int masked = 0; masked <= 1; ++masked) {
  1307. // for (int ndims = 2; ndims <= 4; ++ndims) {
  1308. // int max_nrep = (ndims >= 3) ? 2 : 1;
  1309. // for (int nrep = 1; nrep < max_nrep; ++nrep) {
  1310. // int64_t neq[4] = { D, N, B*nrep, ne[3] };
  1311. // int64_t nek[4] = { D, M, B, ne[3] };
  1312. // int64_t nev[4] = { M, D, B, ne[3] };
  1313. // if (ndims == 2) {
  1314. // neq[2] = 1; neq[3] = 1;
  1315. // nek[2] = 1; nek[3] = 1;
  1316. // nev[2] = 1; nev[3] = 1;
  1317. // } else if (ndims == 3) {
  1318. // neq[3] = 1;
  1319. // nek[3] = 1;
  1320. // nev[3] = 1;
  1321. // }
  1322. // x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f);
  1323. // x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f);
  1324. // x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f);
  1325. // ggml_set_param(ctx0, x[0]);
  1326. // ggml_set_param(ctx0, x[1]);
  1327. // ggml_set_param(ctx0, x[2]);
  1328. // struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
  1329. // check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY, {});
  1330. // }
  1331. // }
  1332. // }
  1333. //}
  1334. ggml_free(ctx0);
  1335. }
  1336. return 0;
  1337. }