ggml.c 205 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577
  1. #define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
  2. #define _USE_MATH_DEFINES // For M_PI on MSVC
  3. #include "ggml-backend.h"
  4. #include "ggml-impl.h"
  5. #include "ggml-threading.h"
  6. #include "ggml-cpu.h"
  7. #include "ggml.h"
  8. // FIXME: required here for quantization functions
  9. #include "ggml-quants.h"
  10. #ifdef GGML_USE_CPU_HBM
  11. #include <hbwmalloc.h>
  12. #endif
  13. #if defined(_MSC_VER) || defined(__MINGW32__)
  14. #include <malloc.h> // using malloc.h with MSC/MINGW
  15. #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
  16. #include <alloca.h>
  17. #endif
  18. #include <assert.h>
  19. #include <errno.h>
  20. #include <time.h>
  21. #include <math.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <stdint.h>
  25. #include <inttypes.h>
  26. #include <stdio.h>
  27. #include <float.h>
  28. #include <limits.h>
  29. #include <stdarg.h>
  30. #include <signal.h>
  31. #if defined(__gnu_linux__)
  32. #include <syscall.h>
  33. #endif
  34. #if defined(__APPLE__)
  35. #include <unistd.h>
  36. #include <mach/mach.h>
  37. #include <TargetConditionals.h>
  38. #endif
  39. #if defined(_WIN32)
  40. #define WIN32_LEAN_AND_MEAN
  41. #ifndef NOMINMAX
  42. #define NOMINMAX
  43. #endif
  44. #include <windows.h>
  45. #endif
  46. #define UNUSED GGML_UNUSED
  47. #if defined(_MSC_VER)
  48. #define m512bh(p) p
  49. #define m512i(p) p
  50. #else
  51. #define m512bh(p) (__m512bh)(p)
  52. #define m512i(p) (__m512i)(p)
  53. #endif
  54. // precomputed f32 table for f16 (256 KB) (ggml-impl.h)
  55. float ggml_table_f32_f16[1 << 16];
  56. #if defined(__linux__) || \
  57. defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
  58. (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
  59. #include <unistd.h>
  60. #include <sys/types.h>
  61. #include <sys/stat.h>
  62. #include <sys/wait.h>
  63. #if defined(__linux__)
  64. #include <sys/prctl.h>
  65. #endif
  66. #if defined(__ANDROID__)
  67. #include <unwind.h>
  68. #include <dlfcn.h>
  69. #include <stdio.h>
  70. struct backtrace_state {
  71. void ** current;
  72. void ** end;
  73. };
  74. static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
  75. struct backtrace_state * state = (struct backtrace_state *)arg;
  76. uintptr_t pc = _Unwind_GetIP(context);
  77. if (pc) {
  78. if (state->current == state->end) {
  79. return _URC_END_OF_STACK;
  80. } else {
  81. *state->current++ = (void*)pc;
  82. }
  83. }
  84. return _URC_NO_REASON;
  85. }
  86. static void ggml_print_backtrace_symbols(void) {
  87. const int max = 100;
  88. void* buffer[max];
  89. struct backtrace_state state = {buffer, buffer + max};
  90. _Unwind_Backtrace(unwind_callback, &state);
  91. int count = state.current - buffer;
  92. for (int idx = 0; idx < count; ++idx) {
  93. const void * addr = buffer[idx];
  94. const char * symbol = "";
  95. Dl_info info;
  96. if (dladdr(addr, &info) && info.dli_sname) {
  97. symbol = info.dli_sname;
  98. }
  99. fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
  100. }
  101. }
  102. #elif defined(__linux__) && defined(__GLIBC__)
  103. #include <execinfo.h>
  104. static void ggml_print_backtrace_symbols(void) {
  105. void * trace[100];
  106. int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
  107. backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
  108. }
  109. #else
  110. static void ggml_print_backtrace_symbols(void) {
  111. // platform not supported
  112. }
  113. #endif
  114. void ggml_print_backtrace(void) {
  115. const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
  116. if (GGML_NO_BACKTRACE) {
  117. return;
  118. }
  119. #if defined(__linux__)
  120. FILE * f = fopen("/proc/self/status", "r");
  121. size_t size = 0;
  122. char * line = NULL;
  123. ssize_t length = 0;
  124. while ((length = getline(&line, &size, f)) > 0) {
  125. if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
  126. (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
  127. // Already being debugged, and the breakpoint is the later abort()
  128. free(line);
  129. fclose(f);
  130. return;
  131. }
  132. }
  133. free(line);
  134. fclose(f);
  135. int lock[2] = { -1, -1 };
  136. (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
  137. #endif
  138. const int parent_pid = getpid();
  139. const int child_pid = fork();
  140. if (child_pid < 0) { // error
  141. #if defined(__linux__)
  142. close(lock[1]);
  143. close(lock[0]);
  144. #endif
  145. return;
  146. } else if (child_pid == 0) { // child
  147. char attach[32];
  148. snprintf(attach, sizeof(attach), "attach %d", parent_pid);
  149. #if defined(__linux__)
  150. close(lock[1]);
  151. (void) !read(lock[0], lock, 1);
  152. close(lock[0]);
  153. #endif
  154. // try gdb
  155. execlp("gdb", "gdb", "--batch",
  156. "-ex", "set style enabled on",
  157. "-ex", attach,
  158. "-ex", "bt -frame-info source-and-location",
  159. "-ex", "detach",
  160. "-ex", "quit",
  161. (char *) NULL);
  162. // try lldb
  163. execlp("lldb", "lldb", "--batch",
  164. "-o", "bt",
  165. "-o", "quit",
  166. "-p", &attach[sizeof("attach ") - 1],
  167. (char *) NULL);
  168. // gdb failed, fallback to backtrace_symbols
  169. ggml_print_backtrace_symbols();
  170. _Exit(0);
  171. } else { // parent
  172. #if defined(__linux__)
  173. prctl(PR_SET_PTRACER, child_pid);
  174. close(lock[1]);
  175. close(lock[0]);
  176. #endif
  177. waitpid(child_pid, NULL, 0);
  178. }
  179. }
  180. #else
  181. void ggml_print_backtrace(void) {
  182. // platform not supported
  183. }
  184. #endif
  185. void ggml_abort(const char * file, int line, const char * fmt, ...) {
  186. fflush(stdout);
  187. fprintf(stderr, "%s:%d: ", file, line);
  188. va_list args;
  189. va_start(args, fmt);
  190. vfprintf(stderr, fmt, args);
  191. va_end(args);
  192. fprintf(stderr, "\n");
  193. ggml_print_backtrace();
  194. abort();
  195. }
  196. // ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
  197. //
  198. // logging
  199. //
  200. struct ggml_logger_state {
  201. ggml_log_callback log_callback;
  202. void * log_callback_user_data;
  203. };
  204. static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
  205. static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
  206. if (format == NULL) {
  207. return;
  208. }
  209. va_list args_copy;
  210. va_copy(args_copy, args);
  211. char buffer[128];
  212. int len = vsnprintf(buffer, 128, format, args);
  213. if (len < 128) {
  214. g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
  215. } else {
  216. char * buffer2 = (char *) calloc(len + 1, sizeof(char));
  217. vsnprintf(buffer2, len + 1, format, args_copy);
  218. buffer2[len] = 0;
  219. g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
  220. free(buffer2);
  221. }
  222. va_end(args_copy);
  223. }
  224. void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
  225. va_list args;
  226. va_start(args, format);
  227. ggml_log_internal_v(level, format, args);
  228. va_end(args);
  229. }
  230. void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
  231. (void) level;
  232. (void) user_data;
  233. fputs(text, stderr);
  234. fflush(stderr);
  235. }
  236. //
  237. // end of logging block
  238. //
  239. #ifdef GGML_USE_ACCELERATE
  240. // uncomment to use vDSP for soft max computation
  241. // note: not sure if it is actually faster
  242. //#define GGML_SOFT_MAX_ACCELERATE
  243. #endif
  244. void * ggml_aligned_malloc(size_t size) {
  245. #if defined(__s390x__)
  246. const int alignment = 256;
  247. #else
  248. const int alignment = 64;
  249. #endif
  250. #if defined(_MSC_VER) || defined(__MINGW32__)
  251. return _aligned_malloc(size, alignment);
  252. #else
  253. if (size == 0) {
  254. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
  255. return NULL;
  256. }
  257. void * aligned_memory = NULL;
  258. #ifdef GGML_USE_CPU_HBM
  259. int result = hbw_posix_memalign(&aligned_memory, alignment, size);
  260. #elif TARGET_OS_OSX
  261. GGML_UNUSED(alignment);
  262. kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
  263. int result = EFAULT;
  264. switch (alloc_status) {
  265. case KERN_SUCCESS:
  266. result = 0;
  267. break;
  268. case KERN_INVALID_ADDRESS:
  269. result = EINVAL;
  270. break;
  271. case KERN_NO_SPACE:
  272. result = ENOMEM;
  273. break;
  274. default:
  275. result = EFAULT;
  276. break;
  277. }
  278. #else
  279. int result = posix_memalign(&aligned_memory, alignment, size);
  280. #endif
  281. if (result != 0) {
  282. // Handle allocation failure
  283. const char *error_desc = "unknown allocation error";
  284. switch (result) {
  285. case EINVAL:
  286. error_desc = "invalid alignment value";
  287. break;
  288. case ENOMEM:
  289. error_desc = "insufficient memory";
  290. break;
  291. }
  292. GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
  293. return NULL;
  294. }
  295. return aligned_memory;
  296. #endif
  297. }
  298. void ggml_aligned_free(void * ptr, size_t size) {
  299. GGML_UNUSED(size);
  300. #if defined(_MSC_VER) || defined(__MINGW32__)
  301. _aligned_free(ptr);
  302. #elif GGML_USE_CPU_HBM
  303. if (ptr != NULL) {
  304. hbw_free(ptr);
  305. }
  306. #elif TARGET_OS_OSX
  307. if (ptr != NULL) {
  308. vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
  309. }
  310. #else
  311. free(ptr);
  312. #endif
  313. }
  314. inline static void * ggml_malloc(size_t size) {
  315. if (size == 0) {
  316. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
  317. return NULL;
  318. }
  319. void * result = malloc(size);
  320. if (result == NULL) {
  321. GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
  322. GGML_ABORT("fatal error");
  323. }
  324. return result;
  325. }
  326. // calloc
  327. inline static void * ggml_calloc(size_t num, size_t size) {
  328. if (num == 0 || size == 0) {
  329. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
  330. return NULL;
  331. }
  332. void * result = calloc(num, size);
  333. if (result == NULL) {
  334. GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
  335. GGML_ABORT("fatal error");
  336. }
  337. return result;
  338. }
  339. #define GGML_MALLOC(size) ggml_malloc(size)
  340. #define GGML_CALLOC(num, size) ggml_calloc(num, size)
  341. #define GGML_FREE(ptr) free(ptr)
  342. const char * ggml_status_to_string(enum ggml_status status) {
  343. switch (status) {
  344. case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
  345. case GGML_STATUS_FAILED: return "GGML status: error (operation failed)";
  346. case GGML_STATUS_SUCCESS: return "GGML status: success";
  347. case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)";
  348. }
  349. return "GGML status: unknown";
  350. }
  351. float ggml_fp16_to_fp32(ggml_fp16_t x) {
  352. #define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
  353. return GGML_FP16_TO_FP32(x);
  354. }
  355. ggml_fp16_t ggml_fp32_to_fp16(float x) {
  356. #define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
  357. return GGML_FP32_TO_FP16(x);
  358. }
  359. float ggml_bf16_to_fp32(ggml_bf16_t x) {
  360. #define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
  361. return GGML_BF16_TO_FP32(x); // it just left shifts
  362. }
  363. ggml_bf16_t ggml_fp32_to_bf16(float x) {
  364. #define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
  365. return GGML_FP32_TO_BF16(x);
  366. }
  367. void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
  368. for (int64_t i = 0; i < n; i++) {
  369. y[i] = GGML_FP16_TO_FP32(x[i]);
  370. }
  371. }
  372. void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
  373. int i = 0;
  374. for (; i < n; ++i) {
  375. y[i] = GGML_FP32_TO_FP16(x[i]);
  376. }
  377. }
  378. void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
  379. int i = 0;
  380. for (; i < n; ++i) {
  381. y[i] = GGML_BF16_TO_FP32(x[i]);
  382. }
  383. }
  384. void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
  385. for (int i = 0; i < n; i++) {
  386. y[i] = ggml_compute_fp32_to_bf16(x[i]);
  387. }
  388. }
  389. void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
  390. int i = 0;
  391. #if defined(__AVX512BF16__)
  392. // subnormals are flushed to zero on this platform
  393. for (; i + 32 <= n; i += 32) {
  394. _mm512_storeu_si512(
  395. (__m512i *)(y + i),
  396. m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
  397. _mm512_loadu_ps(x + i))));
  398. }
  399. #endif
  400. for (; i < n; i++) {
  401. y[i] = GGML_FP32_TO_BF16(x[i]);
  402. }
  403. }
  404. bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
  405. return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
  406. }
  407. //
  408. // timing
  409. //
  410. #if defined(_MSC_VER) || defined(__MINGW32__)
  411. static int64_t timer_freq, timer_start;
  412. void ggml_time_init(void) {
  413. LARGE_INTEGER t;
  414. QueryPerformanceFrequency(&t);
  415. timer_freq = t.QuadPart;
  416. // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
  417. // and the uptime is high enough.
  418. // We subtract the program start time to reduce the likelihood of that happening.
  419. QueryPerformanceCounter(&t);
  420. timer_start = t.QuadPart;
  421. }
  422. int64_t ggml_time_ms(void) {
  423. LARGE_INTEGER t;
  424. QueryPerformanceCounter(&t);
  425. return ((t.QuadPart-timer_start) * 1000) / timer_freq;
  426. }
  427. int64_t ggml_time_us(void) {
  428. LARGE_INTEGER t;
  429. QueryPerformanceCounter(&t);
  430. return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
  431. }
  432. #else
  433. void ggml_time_init(void) {}
  434. int64_t ggml_time_ms(void) {
  435. struct timespec ts;
  436. clock_gettime(CLOCK_MONOTONIC, &ts);
  437. return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
  438. }
  439. int64_t ggml_time_us(void) {
  440. struct timespec ts;
  441. clock_gettime(CLOCK_MONOTONIC, &ts);
  442. return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
  443. }
  444. #endif
  445. int64_t ggml_cycles(void) {
  446. return clock();
  447. }
  448. int64_t ggml_cycles_per_ms(void) {
  449. return CLOCKS_PER_SEC/1000;
  450. }
  451. //
  452. // cross-platform UTF-8 file paths
  453. //
  454. #ifdef _WIN32
  455. static wchar_t * ggml_mbstowcs(const char * mbs) {
  456. int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
  457. if (!wlen) {
  458. errno = EINVAL;
  459. return NULL;
  460. }
  461. wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
  462. wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
  463. if (!wlen) {
  464. GGML_FREE(wbuf);
  465. errno = EINVAL;
  466. return NULL;
  467. }
  468. return wbuf;
  469. }
  470. #endif
  471. FILE * ggml_fopen(const char * fname, const char * mode) {
  472. #ifdef _WIN32
  473. FILE * file = NULL;
  474. // convert fname (UTF-8)
  475. wchar_t * wfname = ggml_mbstowcs(fname);
  476. if (wfname) {
  477. // convert mode (ANSI)
  478. wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
  479. wchar_t * wmode_p = wmode;
  480. do {
  481. *wmode_p++ = (wchar_t)*mode;
  482. } while (*mode++);
  483. // open file
  484. file = _wfopen(wfname, wmode);
  485. GGML_FREE(wfname);
  486. GGML_FREE(wmode);
  487. }
  488. return file;
  489. #else
  490. return fopen(fname, mode);
  491. #endif
  492. }
  493. static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc);
  494. static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc);
  495. static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc);
  496. static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
  497. [GGML_TYPE_I8] = {
  498. .type_name = "i8",
  499. .blck_size = 1,
  500. .type_size = sizeof(int8_t),
  501. .is_quantized = false,
  502. },
  503. [GGML_TYPE_I16] = {
  504. .type_name = "i16",
  505. .blck_size = 1,
  506. .type_size = sizeof(int16_t),
  507. .is_quantized = false,
  508. },
  509. [GGML_TYPE_I32] = {
  510. .type_name = "i32",
  511. .blck_size = 1,
  512. .type_size = sizeof(int32_t),
  513. .is_quantized = false,
  514. },
  515. [GGML_TYPE_I64] = {
  516. .type_name = "i64",
  517. .blck_size = 1,
  518. .type_size = sizeof(int64_t),
  519. .is_quantized = false,
  520. },
  521. [GGML_TYPE_F64] = {
  522. .type_name = "f64",
  523. .blck_size = 1,
  524. .type_size = sizeof(double),
  525. .is_quantized = false,
  526. },
  527. [GGML_TYPE_F32] = {
  528. .type_name = "f32",
  529. .blck_size = 1,
  530. .type_size = sizeof(float),
  531. .is_quantized = false,
  532. },
  533. [GGML_TYPE_F16] = {
  534. .type_name = "f16",
  535. .blck_size = 1,
  536. .type_size = sizeof(ggml_fp16_t),
  537. .is_quantized = false,
  538. .to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
  539. .from_float_ref = (ggml_from_float_t) ggml_fp32_to_fp16_row,
  540. },
  541. [GGML_TYPE_Q4_0] = {
  542. .type_name = "q4_0",
  543. .blck_size = QK4_0,
  544. .type_size = sizeof(block_q4_0),
  545. .is_quantized = true,
  546. .to_float = (ggml_to_float_t) dequantize_row_q4_0,
  547. .from_float_ref = (ggml_from_float_t) quantize_row_q4_0_ref,
  548. },
  549. [GGML_TYPE_Q4_1] = {
  550. .type_name = "q4_1",
  551. .blck_size = QK4_1,
  552. .type_size = sizeof(block_q4_1),
  553. .is_quantized = true,
  554. .to_float = (ggml_to_float_t) dequantize_row_q4_1,
  555. .from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref,
  556. },
  557. [4] = { // GGML_TYPE_Q4_2
  558. .type_name = "DEPRECATED",
  559. .blck_size = 0,
  560. .type_size = 0,
  561. .is_quantized = false,
  562. },
  563. [5] = { // GGML_TYPE_Q4_3
  564. .type_name = "DEPRECATED",
  565. .blck_size = 0,
  566. .type_size = 0,
  567. .is_quantized = false,
  568. },
  569. [GGML_TYPE_Q5_0] = {
  570. .type_name = "q5_0",
  571. .blck_size = QK5_0,
  572. .type_size = sizeof(block_q5_0),
  573. .is_quantized = true,
  574. .to_float = (ggml_to_float_t) dequantize_row_q5_0,
  575. .from_float_ref = (ggml_from_float_t) quantize_row_q5_0_ref,
  576. },
  577. [GGML_TYPE_Q5_1] = {
  578. .type_name = "q5_1",
  579. .blck_size = QK5_1,
  580. .type_size = sizeof(block_q5_1),
  581. .is_quantized = true,
  582. .to_float = (ggml_to_float_t) dequantize_row_q5_1,
  583. .from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref,
  584. },
  585. [GGML_TYPE_Q8_0] = {
  586. .type_name = "q8_0",
  587. .blck_size = QK8_0,
  588. .type_size = sizeof(block_q8_0),
  589. .is_quantized = true,
  590. .to_float = (ggml_to_float_t) dequantize_row_q8_0,
  591. .from_float_ref = (ggml_from_float_t) quantize_row_q8_0_ref,
  592. },
  593. [GGML_TYPE_Q8_1] = {
  594. .type_name = "q8_1",
  595. .blck_size = QK8_1,
  596. .type_size = sizeof(block_q8_1),
  597. .is_quantized = true,
  598. .from_float_ref = (ggml_from_float_t) quantize_row_q8_1_ref,
  599. },
  600. [GGML_TYPE_Q2_K] = {
  601. .type_name = "q2_K",
  602. .blck_size = QK_K,
  603. .type_size = sizeof(block_q2_K),
  604. .is_quantized = true,
  605. .to_float = (ggml_to_float_t) dequantize_row_q2_K,
  606. .from_float_ref = (ggml_from_float_t) quantize_row_q2_K_ref,
  607. },
  608. [GGML_TYPE_Q3_K] = {
  609. .type_name = "q3_K",
  610. .blck_size = QK_K,
  611. .type_size = sizeof(block_q3_K),
  612. .is_quantized = true,
  613. .to_float = (ggml_to_float_t) dequantize_row_q3_K,
  614. .from_float_ref = (ggml_from_float_t) quantize_row_q3_K_ref,
  615. },
  616. [GGML_TYPE_Q4_K] = {
  617. .type_name = "q4_K",
  618. .blck_size = QK_K,
  619. .type_size = sizeof(block_q4_K),
  620. .is_quantized = true,
  621. .to_float = (ggml_to_float_t) dequantize_row_q4_K,
  622. .from_float_ref = (ggml_from_float_t) quantize_row_q4_K_ref,
  623. },
  624. [GGML_TYPE_Q5_K] = {
  625. .type_name = "q5_K",
  626. .blck_size = QK_K,
  627. .type_size = sizeof(block_q5_K),
  628. .is_quantized = true,
  629. .to_float = (ggml_to_float_t) dequantize_row_q5_K,
  630. .from_float_ref = (ggml_from_float_t) quantize_row_q5_K_ref,
  631. },
  632. [GGML_TYPE_Q6_K] = {
  633. .type_name = "q6_K",
  634. .blck_size = QK_K,
  635. .type_size = sizeof(block_q6_K),
  636. .is_quantized = true,
  637. .to_float = (ggml_to_float_t) dequantize_row_q6_K,
  638. .from_float_ref = (ggml_from_float_t) quantize_row_q6_K_ref,
  639. },
  640. [GGML_TYPE_IQ2_XXS] = {
  641. .type_name = "iq2_xxs",
  642. .blck_size = QK_K,
  643. .type_size = sizeof(block_iq2_xxs),
  644. .is_quantized = true,
  645. .to_float = (ggml_to_float_t) dequantize_row_iq2_xxs,
  646. .from_float_ref = NULL,
  647. },
  648. [GGML_TYPE_IQ2_XS] = {
  649. .type_name = "iq2_xs",
  650. .blck_size = QK_K,
  651. .type_size = sizeof(block_iq2_xs),
  652. .is_quantized = true,
  653. .to_float = (ggml_to_float_t) dequantize_row_iq2_xs,
  654. .from_float_ref = NULL,
  655. },
  656. [GGML_TYPE_IQ3_XXS] = {
  657. .type_name = "iq3_xxs",
  658. .blck_size = QK_K,
  659. .type_size = sizeof(block_iq3_xxs),
  660. .is_quantized = true,
  661. .to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
  662. .from_float_ref = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
  663. },
  664. [GGML_TYPE_IQ3_S] = {
  665. .type_name = "iq3_s",
  666. .blck_size = QK_K,
  667. .type_size = sizeof(block_iq3_s),
  668. .is_quantized = true,
  669. .to_float = (ggml_to_float_t) dequantize_row_iq3_s,
  670. .from_float_ref = (ggml_from_float_t)quantize_row_iq3_s_ref,
  671. },
  672. [GGML_TYPE_IQ2_S] = {
  673. .type_name = "iq2_s",
  674. .blck_size = QK_K,
  675. .type_size = sizeof(block_iq2_s),
  676. .is_quantized = true,
  677. .to_float = (ggml_to_float_t) dequantize_row_iq2_s,
  678. .from_float_ref = (ggml_from_float_t)quantize_row_iq2_s_ref,
  679. },
  680. [GGML_TYPE_IQ1_S] = {
  681. .type_name = "iq1_s",
  682. .blck_size = QK_K,
  683. .type_size = sizeof(block_iq1_s),
  684. .is_quantized = true,
  685. .to_float = (ggml_to_float_t) dequantize_row_iq1_s,
  686. .from_float_ref = NULL,
  687. },
  688. [GGML_TYPE_IQ1_M] = {
  689. .type_name = "iq1_m",
  690. .blck_size = QK_K,
  691. .type_size = sizeof(block_iq1_m),
  692. .is_quantized = true,
  693. .to_float = (ggml_to_float_t) dequantize_row_iq1_m,
  694. .from_float_ref = NULL,
  695. },
  696. [GGML_TYPE_IQ4_NL] = {
  697. .type_name = "iq4_nl",
  698. .blck_size = QK4_NL,
  699. .type_size = sizeof(block_iq4_nl),
  700. .is_quantized = true,
  701. .to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
  702. .from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref,
  703. },
  704. [GGML_TYPE_IQ4_XS] = {
  705. .type_name = "iq4_xs",
  706. .blck_size = QK_K,
  707. .type_size = sizeof(block_iq4_xs),
  708. .is_quantized = true,
  709. .to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
  710. .from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_ref,
  711. },
  712. [GGML_TYPE_Q8_K] = {
  713. .type_name = "q8_K",
  714. .blck_size = QK_K,
  715. .type_size = sizeof(block_q8_K),
  716. .is_quantized = true,
  717. },
  718. [GGML_TYPE_BF16] = {
  719. .type_name = "bf16",
  720. .blck_size = 1,
  721. .type_size = sizeof(ggml_bf16_t),
  722. .is_quantized = false,
  723. .to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row,
  724. .from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
  725. },
  726. [31] = { // GGML_TYPE_Q4_0_4_4
  727. .type_name = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
  728. .blck_size = 0,
  729. .type_size = 0,
  730. .is_quantized = false,
  731. },
  732. [32] = { // GGML_TYPE_Q4_0_4_8
  733. .type_name = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
  734. .blck_size = 0,
  735. .type_size = 0,
  736. .is_quantized = false,
  737. },
  738. [33] = { // GGML_TYPE_Q4_0_8_8
  739. .type_name = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
  740. .blck_size = 0,
  741. .type_size = 0,
  742. .is_quantized = false,
  743. },
  744. [GGML_TYPE_TQ1_0] = {
  745. .type_name = "tq1_0",
  746. .blck_size = QK_K,
  747. .type_size = sizeof(block_tq1_0),
  748. .is_quantized = true,
  749. .to_float = (ggml_to_float_t) dequantize_row_tq1_0,
  750. .from_float_ref = (ggml_from_float_t) quantize_row_tq1_0_ref,
  751. },
  752. [GGML_TYPE_TQ2_0] = {
  753. .type_name = "tq2_0",
  754. .blck_size = QK_K,
  755. .type_size = sizeof(block_tq2_0),
  756. .is_quantized = true,
  757. .to_float = (ggml_to_float_t) dequantize_row_tq2_0,
  758. .from_float_ref = (ggml_from_float_t) quantize_row_tq2_0_ref,
  759. },
  760. [36] = { // GGML_TYPE_IQ4_NL_4_4
  761. .type_name = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
  762. .blck_size = 0,
  763. .type_size = 0,
  764. .is_quantized = false,
  765. },
  766. [37] = { // GGML_TYPE_IQ4_NL_4_8
  767. .type_name = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
  768. .blck_size = 0,
  769. .type_size = 0,
  770. .is_quantized = false,
  771. },
  772. [38] = { // GGML_TYPE_IQ4_NL_8_8
  773. .type_name = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
  774. .blck_size = 0,
  775. .type_size = 0,
  776. .is_quantized = false,
  777. },
  778. };
  779. const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
  780. GGML_ASSERT(type < GGML_TYPE_COUNT);
  781. return &type_traits[type];
  782. }
  783. //
  784. // ggml object
  785. //
  786. struct ggml_object {
  787. size_t offs;
  788. size_t size;
  789. struct ggml_object * next;
  790. enum ggml_object_type type;
  791. char padding[4];
  792. };
  793. static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
  794. //
  795. // ggml context
  796. //
  797. struct ggml_context {
  798. size_t mem_size;
  799. void * mem_buffer;
  800. bool mem_buffer_owned;
  801. bool no_alloc;
  802. int n_objects;
  803. struct ggml_object * objects_begin;
  804. struct ggml_object * objects_end;
  805. };
  806. struct ggml_context_container {
  807. bool used;
  808. struct ggml_context context;
  809. };
  810. //
  811. // data types
  812. //
  813. static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
  814. "NONE",
  815. "DUP",
  816. "ADD",
  817. "ADD1",
  818. "ACC",
  819. "SUB",
  820. "MUL",
  821. "DIV",
  822. "SQR",
  823. "SQRT",
  824. "LOG",
  825. "SIN",
  826. "COS",
  827. "SUM",
  828. "SUM_ROWS",
  829. "MEAN",
  830. "ARGMAX",
  831. "COUNT_EQUAL",
  832. "REPEAT",
  833. "REPEAT_BACK",
  834. "CONCAT",
  835. "SILU_BACK",
  836. "NORM",
  837. "RMS_NORM",
  838. "RMS_NORM_BACK",
  839. "GROUP_NORM",
  840. "L2_NORM",
  841. "MUL_MAT",
  842. "MUL_MAT_ID",
  843. "OUT_PROD",
  844. "SCALE",
  845. "SET",
  846. "CPY",
  847. "CONT",
  848. "RESHAPE",
  849. "VIEW",
  850. "PERMUTE",
  851. "TRANSPOSE",
  852. "GET_ROWS",
  853. "GET_ROWS_BACK",
  854. "DIAG",
  855. "DIAG_MASK_INF",
  856. "DIAG_MASK_ZERO",
  857. "SOFT_MAX",
  858. "SOFT_MAX_BACK",
  859. "ROPE",
  860. "ROPE_BACK",
  861. "CLAMP",
  862. "CONV_TRANSPOSE_1D",
  863. "IM2COL",
  864. "IM2COL_BACK",
  865. "CONV_2D_DW",
  866. "CONV_TRANSPOSE_2D",
  867. "POOL_1D",
  868. "POOL_2D",
  869. "POOL_2D_BACK",
  870. "UPSCALE",
  871. "PAD",
  872. "PAD_REFLECT_1D",
  873. "ARANGE",
  874. "TIMESTEP_EMBEDDING",
  875. "ARGSORT",
  876. "LEAKY_RELU",
  877. "FLASH_ATTN_EXT",
  878. "FLASH_ATTN_BACK",
  879. "SSM_CONV",
  880. "SSM_SCAN",
  881. "WIN_PART",
  882. "WIN_UNPART",
  883. "GET_REL_POS",
  884. "ADD_REL_POS",
  885. "RWKV_WKV6",
  886. "GATED_LINEAR_ATTN",
  887. "RWKV_WKV7",
  888. "UNARY",
  889. "MAP_CUSTOM1",
  890. "MAP_CUSTOM2",
  891. "MAP_CUSTOM3",
  892. "CUSTOM",
  893. "CROSS_ENTROPY_LOSS",
  894. "CROSS_ENTROPY_LOSS_BACK",
  895. "OPT_STEP_ADAMW",
  896. };
  897. static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
  898. static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
  899. "none",
  900. "x",
  901. "x+y",
  902. "x+y",
  903. "view(x,nb,offset)+=y->x",
  904. "x-y",
  905. "x*y",
  906. "x/y",
  907. "x^2",
  908. "√x",
  909. "log(x)",
  910. "sin(x)",
  911. "cos(x)",
  912. "Σx",
  913. "Σx_k",
  914. "Σx/n",
  915. "argmax(x)",
  916. "count_equal(x)",
  917. "repeat(x)",
  918. "repeat_back(x)",
  919. "concat(x, y)",
  920. "silu_back(x)",
  921. "norm(x)",
  922. "rms_norm(x)",
  923. "rms_norm_back(x)",
  924. "group_norm(x)",
  925. "l2_norm(x)",
  926. "X*Y",
  927. "X[i]*Y",
  928. "X*Y",
  929. "x*v",
  930. "y-\\>view(x)",
  931. "x-\\>y",
  932. "cont(x)",
  933. "reshape(x)",
  934. "view(x)",
  935. "permute(x)",
  936. "transpose(x)",
  937. "get_rows(x)",
  938. "get_rows_back(x)",
  939. "diag(x)",
  940. "diag_mask_inf(x)",
  941. "diag_mask_zero(x)",
  942. "soft_max(x)",
  943. "soft_max_back(x)",
  944. "rope(x)",
  945. "rope_back(x)",
  946. "clamp(x)",
  947. "conv_transpose_1d(x)",
  948. "im2col(x)",
  949. "im2col_back(x)",
  950. "conv_2d_dw(x)",
  951. "conv_transpose_2d(x)",
  952. "pool_1d(x)",
  953. "pool_2d(x)",
  954. "pool_2d_back(x)",
  955. "upscale(x)",
  956. "pad(x)",
  957. "pad_reflect_1d(x)",
  958. "arange(start, stop, step)",
  959. "timestep_embedding(timesteps, dim, max_period)",
  960. "argsort(x)",
  961. "leaky_relu(x)",
  962. "flash_attn_ext(x)",
  963. "flash_attn_back(x)",
  964. "ssm_conv(x)",
  965. "ssm_scan(x)",
  966. "win_part(x)",
  967. "win_unpart(x)",
  968. "get_rel_pos(x)",
  969. "add_rel_pos(x)",
  970. "rwkv_wkv6(k, v, r, tf, td, s)",
  971. "gated_linear_attn(k, v, q, gate, s)",
  972. "rwkv_wkv7(r, w, k, v, a, b, s)",
  973. "unary(x)",
  974. "map_custom(x)",
  975. "map_custom(x,y)",
  976. "map_custom(x,y,z)",
  977. "custom(x)",
  978. "cross_entropy_loss(x,y)",
  979. "cross_entropy_loss_back(x,y)",
  980. "adamw(x)",
  981. };
  982. static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
  983. static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
  984. static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
  985. "ABS",
  986. "SGN",
  987. "NEG",
  988. "STEP",
  989. "TANH",
  990. "ELU",
  991. "RELU",
  992. "SIGMOID",
  993. "GELU",
  994. "GELU_QUICK",
  995. "SILU",
  996. "HARDSWISH",
  997. "HARDSIGMOID",
  998. "EXP",
  999. "GELU_ERF",
  1000. };
  1001. static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
  1002. static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
  1003. static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
  1004. ////////////////////////////////////////////////////////////////////////////////
  1005. void ggml_print_object(const struct ggml_object * obj) {
  1006. GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
  1007. obj->type, obj->offs, obj->size, (const void *) obj->next);
  1008. }
  1009. void ggml_print_objects(const struct ggml_context * ctx) {
  1010. struct ggml_object * obj = ctx->objects_begin;
  1011. GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
  1012. while (obj != NULL) {
  1013. ggml_print_object(obj);
  1014. obj = obj->next;
  1015. }
  1016. GGML_LOG_INFO("%s: --- end ---\n", __func__);
  1017. }
  1018. int64_t ggml_nelements(const struct ggml_tensor * tensor) {
  1019. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1020. return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
  1021. }
  1022. int64_t ggml_nrows(const struct ggml_tensor * tensor) {
  1023. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1024. return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
  1025. }
  1026. size_t ggml_nbytes(const struct ggml_tensor * tensor) {
  1027. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1028. if (tensor->ne[i] <= 0) {
  1029. return 0;
  1030. }
  1031. }
  1032. size_t nbytes;
  1033. const size_t blck_size = ggml_blck_size(tensor->type);
  1034. if (blck_size == 1) {
  1035. nbytes = ggml_type_size(tensor->type);
  1036. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1037. nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
  1038. }
  1039. }
  1040. else {
  1041. nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
  1042. for (int i = 1; i < GGML_MAX_DIMS; ++i) {
  1043. nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
  1044. }
  1045. }
  1046. return nbytes;
  1047. }
  1048. size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
  1049. return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
  1050. }
  1051. int64_t ggml_blck_size(enum ggml_type type) {
  1052. return type_traits[type].blck_size;
  1053. }
  1054. size_t ggml_type_size(enum ggml_type type) {
  1055. return type_traits[type].type_size;
  1056. }
  1057. size_t ggml_row_size(enum ggml_type type, int64_t ne) {
  1058. assert(ne % ggml_blck_size(type) == 0);
  1059. return ggml_type_size(type)*ne/ggml_blck_size(type);
  1060. }
  1061. double ggml_type_sizef(enum ggml_type type) {
  1062. return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
  1063. }
  1064. const char * ggml_type_name(enum ggml_type type) {
  1065. return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
  1066. }
  1067. bool ggml_is_quantized(enum ggml_type type) {
  1068. return type_traits[type].is_quantized;
  1069. }
  1070. const char * ggml_op_name(enum ggml_op op) {
  1071. return GGML_OP_NAME[op];
  1072. }
  1073. const char * ggml_op_symbol(enum ggml_op op) {
  1074. return GGML_OP_SYMBOL[op];
  1075. }
  1076. const char * ggml_unary_op_name(enum ggml_unary_op op) {
  1077. return GGML_UNARY_OP_NAME[op];
  1078. }
  1079. const char * ggml_op_desc(const struct ggml_tensor * t) {
  1080. if (t->op == GGML_OP_UNARY) {
  1081. enum ggml_unary_op uop = ggml_get_unary_op(t);
  1082. return ggml_unary_op_name(uop);
  1083. }
  1084. return ggml_op_name(t->op);
  1085. }
  1086. size_t ggml_element_size(const struct ggml_tensor * tensor) {
  1087. return ggml_type_size(tensor->type);
  1088. }
  1089. bool ggml_is_scalar(const struct ggml_tensor * tensor) {
  1090. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1091. return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1092. }
  1093. bool ggml_is_vector(const struct ggml_tensor * tensor) {
  1094. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1095. return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1096. }
  1097. bool ggml_is_matrix(const struct ggml_tensor * tensor) {
  1098. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1099. return tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1100. }
  1101. bool ggml_is_3d(const struct ggml_tensor * tensor) {
  1102. return tensor->ne[3] == 1;
  1103. }
  1104. int ggml_n_dims(const struct ggml_tensor * tensor) {
  1105. for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
  1106. if (tensor->ne[i] > 1) {
  1107. return i + 1;
  1108. }
  1109. }
  1110. return 1;
  1111. }
  1112. enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
  1113. enum ggml_type wtype = GGML_TYPE_COUNT;
  1114. switch (ftype) {
  1115. case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break;
  1116. case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break;
  1117. case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break;
  1118. case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break;
  1119. case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break;
  1120. case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
  1121. case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
  1122. case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
  1123. case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
  1124. case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
  1125. case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
  1126. case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break;
  1127. case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
  1128. case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
  1129. case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
  1130. case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
  1131. case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
  1132. case GGML_FTYPE_MOSTLY_IQ1_M: wtype = GGML_TYPE_IQ1_M; break;
  1133. case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
  1134. case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
  1135. case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
  1136. case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
  1137. case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
  1138. case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
  1139. }
  1140. GGML_ASSERT(wtype != GGML_TYPE_COUNT);
  1141. return wtype;
  1142. }
  1143. size_t ggml_tensor_overhead(void) {
  1144. return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
  1145. }
  1146. bool ggml_is_transposed(const struct ggml_tensor * tensor) {
  1147. return tensor->nb[0] > tensor->nb[1];
  1148. }
  1149. static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
  1150. size_t next_nb = ggml_type_size(tensor->type);
  1151. if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
  1152. return false;
  1153. }
  1154. next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
  1155. for (int i = 1; i < GGML_MAX_DIMS; i++) {
  1156. if (tensor->ne[i] != 1) {
  1157. if (i > n) {
  1158. if (tensor->nb[i] != next_nb) {
  1159. return false;
  1160. }
  1161. next_nb *= tensor->ne[i];
  1162. } else {
  1163. // this dimension does not need to be contiguous
  1164. next_nb = tensor->ne[i]*tensor->nb[i];
  1165. }
  1166. }
  1167. }
  1168. return true;
  1169. }
  1170. bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
  1171. return ggml_is_contiguous_0(tensor);
  1172. }
  1173. bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
  1174. return ggml_is_contiguous_n(tensor, 0);
  1175. }
  1176. bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
  1177. return ggml_is_contiguous_n(tensor, 1);
  1178. }
  1179. bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
  1180. return ggml_is_contiguous_n(tensor, 2);
  1181. }
  1182. bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
  1183. return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
  1184. }
  1185. bool ggml_is_permuted(const struct ggml_tensor * tensor) {
  1186. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1187. return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
  1188. }
  1189. bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
  1190. return
  1191. tensor->nb[0] > tensor->nb[2] &&
  1192. tensor->nb[1] > tensor->nb[0] &&
  1193. tensor->nb[2] == ggml_type_size(tensor->type);
  1194. }
  1195. static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
  1196. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1197. return
  1198. tensor->nb[0] == ggml_type_size(tensor->type) &&
  1199. tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
  1200. tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
  1201. }
  1202. bool ggml_is_empty(const struct ggml_tensor * tensor) {
  1203. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1204. if (tensor->ne[i] == 0) {
  1205. // empty if any dimension has no elements
  1206. return true;
  1207. }
  1208. }
  1209. return false;
  1210. }
  1211. bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1212. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1213. return
  1214. (t0->ne[0] == t1->ne[0]) &&
  1215. (t0->ne[1] == t1->ne[1]) &&
  1216. (t0->ne[2] == t1->ne[2]) &&
  1217. (t0->ne[3] == t1->ne[3]);
  1218. }
  1219. bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1220. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1221. return
  1222. (t0->nb[0] == t1->nb[0]) &&
  1223. (t0->nb[1] == t1->nb[1]) &&
  1224. (t0->nb[2] == t1->nb[2]) &&
  1225. (t0->nb[3] == t1->nb[3]);
  1226. }
  1227. // check if t1 can be represented as a repetition of t0
  1228. bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1229. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1230. return ggml_is_empty(t0) ? ggml_is_empty(t1) :
  1231. (t1->ne[0]%t0->ne[0] == 0) &&
  1232. (t1->ne[1]%t0->ne[1] == 0) &&
  1233. (t1->ne[2]%t0->ne[2] == 0) &&
  1234. (t1->ne[3]%t0->ne[3] == 0);
  1235. }
  1236. static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1237. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1238. return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
  1239. }
  1240. // assert that pointer is aligned to GGML_MEM_ALIGN
  1241. #define GGML_ASSERT_ALIGNED(ptr) \
  1242. GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
  1243. ////////////////////////////////////////////////////////////////////////////////
  1244. struct ggml_context * ggml_init(struct ggml_init_params params) {
  1245. static bool is_first_call = true;
  1246. ggml_critical_section_start();
  1247. if (is_first_call) {
  1248. // initialize time system (required on Windows)
  1249. ggml_time_init();
  1250. for (int i = 0; i < (1 << 16); ++i) {
  1251. union {
  1252. uint16_t u16;
  1253. ggml_fp16_t fp16;
  1254. } u = {i};
  1255. ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
  1256. }
  1257. is_first_call = false;
  1258. }
  1259. ggml_critical_section_end();
  1260. struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
  1261. // allow to call ggml_init with 0 size
  1262. if (params.mem_size == 0) {
  1263. params.mem_size = GGML_MEM_ALIGN;
  1264. }
  1265. const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
  1266. *ctx = (struct ggml_context) {
  1267. /*.mem_size =*/ mem_size,
  1268. /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
  1269. /*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
  1270. /*.no_alloc =*/ params.no_alloc,
  1271. /*.n_objects =*/ 0,
  1272. /*.objects_begin =*/ NULL,
  1273. /*.objects_end =*/ NULL,
  1274. };
  1275. GGML_ASSERT(ctx->mem_buffer != NULL);
  1276. GGML_ASSERT_ALIGNED(ctx->mem_buffer);
  1277. GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
  1278. return ctx;
  1279. }
  1280. void ggml_reset(struct ggml_context * ctx) {
  1281. if (ctx == NULL) {
  1282. return;
  1283. }
  1284. ctx->n_objects = 0;
  1285. ctx->objects_begin = NULL;
  1286. ctx->objects_end = NULL;
  1287. }
  1288. void ggml_free(struct ggml_context * ctx) {
  1289. if (ctx == NULL) {
  1290. return;
  1291. }
  1292. if (ctx->mem_buffer_owned) {
  1293. ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
  1294. }
  1295. GGML_FREE(ctx);
  1296. }
  1297. size_t ggml_used_mem(const struct ggml_context * ctx) {
  1298. return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
  1299. }
  1300. bool ggml_get_no_alloc(struct ggml_context * ctx) {
  1301. return ctx->no_alloc;
  1302. }
  1303. void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
  1304. ctx->no_alloc = no_alloc;
  1305. }
  1306. void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
  1307. return ctx->mem_buffer;
  1308. }
  1309. size_t ggml_get_mem_size(const struct ggml_context * ctx) {
  1310. return ctx->mem_size;
  1311. }
  1312. size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
  1313. size_t max_size = 0;
  1314. for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
  1315. size_t bytes = ggml_nbytes(tensor);
  1316. max_size = MAX(max_size, bytes);
  1317. }
  1318. return max_size;
  1319. }
  1320. ////////////////////////////////////////////////////////////////////////////////
  1321. static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
  1322. // always insert objects at the end of the context's memory pool
  1323. struct ggml_object * obj_cur = ctx->objects_end;
  1324. const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
  1325. const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
  1326. const size_t cur_end = cur_offs + cur_size;
  1327. // align to GGML_MEM_ALIGN
  1328. size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
  1329. char * const mem_buffer = ctx->mem_buffer;
  1330. struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
  1331. if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
  1332. GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
  1333. __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
  1334. #ifndef NDEBUG
  1335. GGML_ABORT("not enough space in the context's memory pool");
  1336. #endif
  1337. return NULL;
  1338. }
  1339. *obj_new = (struct ggml_object) {
  1340. .offs = cur_end + GGML_OBJECT_SIZE,
  1341. .size = size_needed,
  1342. .next = NULL,
  1343. .type = type,
  1344. };
  1345. GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
  1346. if (obj_cur != NULL) {
  1347. obj_cur->next = obj_new;
  1348. } else {
  1349. // this is the first object in this context
  1350. ctx->objects_begin = obj_new;
  1351. }
  1352. ctx->objects_end = obj_new;
  1353. //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
  1354. return obj_new;
  1355. }
  1356. static struct ggml_tensor * ggml_new_tensor_impl(
  1357. struct ggml_context * ctx,
  1358. enum ggml_type type,
  1359. int n_dims,
  1360. const int64_t * ne,
  1361. struct ggml_tensor * view_src,
  1362. size_t view_offs) {
  1363. GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
  1364. GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
  1365. // find the base tensor and absolute offset
  1366. if (view_src != NULL && view_src->view_src != NULL) {
  1367. view_offs += view_src->view_offs;
  1368. view_src = view_src->view_src;
  1369. }
  1370. size_t data_size = ggml_row_size(type, ne[0]);
  1371. for (int i = 1; i < n_dims; i++) {
  1372. data_size *= ne[i];
  1373. }
  1374. GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
  1375. void * data = view_src != NULL ? view_src->data : NULL;
  1376. if (data != NULL) {
  1377. data = (char *) data + view_offs;
  1378. }
  1379. size_t obj_alloc_size = 0;
  1380. if (view_src == NULL && !ctx->no_alloc) {
  1381. // allocate tensor data in the context's memory pool
  1382. obj_alloc_size = data_size;
  1383. }
  1384. struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
  1385. GGML_ASSERT(obj_new);
  1386. struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
  1387. *result = (struct ggml_tensor) {
  1388. /*.type =*/ type,
  1389. /*.buffer =*/ NULL,
  1390. /*.ne =*/ { 1, 1, 1, 1 },
  1391. /*.nb =*/ { 0, 0, 0, 0 },
  1392. /*.op =*/ GGML_OP_NONE,
  1393. /*.op_params =*/ { 0 },
  1394. /*.flags =*/ 0,
  1395. /*.src =*/ { NULL },
  1396. /*.view_src =*/ view_src,
  1397. /*.view_offs =*/ view_offs,
  1398. /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
  1399. /*.name =*/ { 0 },
  1400. /*.extra =*/ NULL,
  1401. /*.padding =*/ { 0 },
  1402. };
  1403. // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
  1404. //GGML_ASSERT_ALIGNED(result->data);
  1405. for (int i = 0; i < n_dims; i++) {
  1406. result->ne[i] = ne[i];
  1407. }
  1408. result->nb[0] = ggml_type_size(type);
  1409. result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
  1410. for (int i = 2; i < GGML_MAX_DIMS; i++) {
  1411. result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
  1412. }
  1413. ctx->n_objects++;
  1414. return result;
  1415. }
  1416. struct ggml_tensor * ggml_new_tensor(
  1417. struct ggml_context * ctx,
  1418. enum ggml_type type,
  1419. int n_dims,
  1420. const int64_t * ne) {
  1421. return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
  1422. }
  1423. struct ggml_tensor * ggml_new_tensor_1d(
  1424. struct ggml_context * ctx,
  1425. enum ggml_type type,
  1426. int64_t ne0) {
  1427. return ggml_new_tensor(ctx, type, 1, &ne0);
  1428. }
  1429. struct ggml_tensor * ggml_new_tensor_2d(
  1430. struct ggml_context * ctx,
  1431. enum ggml_type type,
  1432. int64_t ne0,
  1433. int64_t ne1) {
  1434. const int64_t ne[2] = { ne0, ne1 };
  1435. return ggml_new_tensor(ctx, type, 2, ne);
  1436. }
  1437. struct ggml_tensor * ggml_new_tensor_3d(
  1438. struct ggml_context * ctx,
  1439. enum ggml_type type,
  1440. int64_t ne0,
  1441. int64_t ne1,
  1442. int64_t ne2) {
  1443. const int64_t ne[3] = { ne0, ne1, ne2 };
  1444. return ggml_new_tensor(ctx, type, 3, ne);
  1445. }
  1446. struct ggml_tensor * ggml_new_tensor_4d(
  1447. struct ggml_context * ctx,
  1448. enum ggml_type type,
  1449. int64_t ne0,
  1450. int64_t ne1,
  1451. int64_t ne2,
  1452. int64_t ne3) {
  1453. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  1454. return ggml_new_tensor(ctx, type, 4, ne);
  1455. }
  1456. void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
  1457. struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
  1458. return (uint8_t *)ctx->mem_buffer + obj->offs;
  1459. }
  1460. struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
  1461. return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
  1462. }
  1463. void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
  1464. const int64_t ne2 = tensor->ne[2];
  1465. const int64_t ne1 = tensor->ne[1];
  1466. const int64_t ne0 = tensor->ne[0];
  1467. const int64_t i3_ = (i/(ne2*ne1*ne0));
  1468. const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
  1469. const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
  1470. const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
  1471. if (i0) {
  1472. * i0 = i0_;
  1473. }
  1474. if (i1) {
  1475. * i1 = i1_;
  1476. }
  1477. if (i2) {
  1478. * i2 = i2_;
  1479. }
  1480. if (i3) {
  1481. * i3 = i3_;
  1482. }
  1483. }
  1484. void * ggml_get_data(const struct ggml_tensor * tensor) {
  1485. return tensor->data;
  1486. }
  1487. float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
  1488. assert(tensor->type == GGML_TYPE_F32);
  1489. return (float *)(tensor->data);
  1490. }
  1491. enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
  1492. GGML_ASSERT(tensor->op == GGML_OP_UNARY);
  1493. return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
  1494. }
  1495. const char * ggml_get_name(const struct ggml_tensor * tensor) {
  1496. return tensor->name;
  1497. }
  1498. struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
  1499. size_t i;
  1500. for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
  1501. tensor->name[i] = name[i];
  1502. }
  1503. tensor->name[i] = '\0';
  1504. return tensor;
  1505. }
  1506. struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
  1507. va_list args;
  1508. va_start(args, fmt);
  1509. vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
  1510. va_end(args);
  1511. return tensor;
  1512. }
  1513. struct ggml_tensor * ggml_view_tensor(
  1514. struct ggml_context * ctx,
  1515. struct ggml_tensor * src) {
  1516. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
  1517. ggml_format_name(result, "%s (view)", src->name);
  1518. for (int i = 0; i < GGML_MAX_DIMS; i++) {
  1519. result->nb[i] = src->nb[i];
  1520. }
  1521. return result;
  1522. }
  1523. struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
  1524. struct ggml_object * obj = ctx->objects_begin;
  1525. char * const mem_buffer = ctx->mem_buffer;
  1526. while (obj != NULL) {
  1527. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1528. return (struct ggml_tensor *)(mem_buffer + obj->offs);
  1529. }
  1530. obj = obj->next;
  1531. }
  1532. return NULL;
  1533. }
  1534. struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
  1535. struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
  1536. obj = obj->next;
  1537. char * const mem_buffer = ctx->mem_buffer;
  1538. while (obj != NULL) {
  1539. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1540. return (struct ggml_tensor *)(mem_buffer + obj->offs);
  1541. }
  1542. obj = obj->next;
  1543. }
  1544. return NULL;
  1545. }
  1546. struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
  1547. struct ggml_object * obj = ctx->objects_begin;
  1548. char * const mem_buffer = ctx->mem_buffer;
  1549. while (obj != NULL) {
  1550. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1551. struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
  1552. if (strcmp(cur->name, name) == 0) {
  1553. return cur;
  1554. }
  1555. }
  1556. obj = obj->next;
  1557. }
  1558. return NULL;
  1559. }
  1560. ////////////////////////////////////////////////////////////////////////////////
  1561. // ggml_dup
  1562. static struct ggml_tensor * ggml_dup_impl(
  1563. struct ggml_context * ctx,
  1564. struct ggml_tensor * a,
  1565. bool inplace) {
  1566. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1567. result->op = GGML_OP_DUP;
  1568. result->src[0] = a;
  1569. return result;
  1570. }
  1571. struct ggml_tensor * ggml_dup(
  1572. struct ggml_context * ctx,
  1573. struct ggml_tensor * a) {
  1574. return ggml_dup_impl(ctx, a, false);
  1575. }
  1576. struct ggml_tensor * ggml_dup_inplace(
  1577. struct ggml_context * ctx,
  1578. struct ggml_tensor * a) {
  1579. return ggml_dup_impl(ctx, a, true);
  1580. }
  1581. // ggml_add
  1582. static struct ggml_tensor * ggml_add_impl(
  1583. struct ggml_context * ctx,
  1584. struct ggml_tensor * a,
  1585. struct ggml_tensor * b,
  1586. bool inplace) {
  1587. GGML_ASSERT(ggml_can_repeat(b, a));
  1588. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1589. result->op = GGML_OP_ADD;
  1590. result->src[0] = a;
  1591. result->src[1] = b;
  1592. return result;
  1593. }
  1594. struct ggml_tensor * ggml_add(
  1595. struct ggml_context * ctx,
  1596. struct ggml_tensor * a,
  1597. struct ggml_tensor * b) {
  1598. return ggml_add_impl(ctx, a, b, false);
  1599. }
  1600. struct ggml_tensor * ggml_add_inplace(
  1601. struct ggml_context * ctx,
  1602. struct ggml_tensor * a,
  1603. struct ggml_tensor * b) {
  1604. return ggml_add_impl(ctx, a, b, true);
  1605. }
  1606. // ggml_add_cast
  1607. static struct ggml_tensor * ggml_add_cast_impl(
  1608. struct ggml_context * ctx,
  1609. struct ggml_tensor * a,
  1610. struct ggml_tensor * b,
  1611. enum ggml_type type) {
  1612. // TODO: support less-strict constraint
  1613. // GGML_ASSERT(ggml_can_repeat(b, a));
  1614. GGML_ASSERT(ggml_can_repeat_rows(b, a));
  1615. // currently only supported for quantized input and f16
  1616. GGML_ASSERT(ggml_is_quantized(a->type) ||
  1617. a->type == GGML_TYPE_F16 ||
  1618. a->type == GGML_TYPE_BF16);
  1619. struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
  1620. result->op = GGML_OP_ADD;
  1621. result->src[0] = a;
  1622. result->src[1] = b;
  1623. return result;
  1624. }
  1625. struct ggml_tensor * ggml_add_cast(
  1626. struct ggml_context * ctx,
  1627. struct ggml_tensor * a,
  1628. struct ggml_tensor * b,
  1629. enum ggml_type type) {
  1630. return ggml_add_cast_impl(ctx, a, b, type);
  1631. }
  1632. // ggml_add1
  1633. static struct ggml_tensor * ggml_add1_impl(
  1634. struct ggml_context * ctx,
  1635. struct ggml_tensor * a,
  1636. struct ggml_tensor * b,
  1637. bool inplace) {
  1638. GGML_ASSERT(ggml_is_scalar(b));
  1639. GGML_ASSERT(ggml_is_padded_1d(a));
  1640. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1641. result->op = GGML_OP_ADD1;
  1642. result->src[0] = a;
  1643. result->src[1] = b;
  1644. return result;
  1645. }
  1646. struct ggml_tensor * ggml_add1(
  1647. struct ggml_context * ctx,
  1648. struct ggml_tensor * a,
  1649. struct ggml_tensor * b) {
  1650. return ggml_add1_impl(ctx, a, b, false);
  1651. }
  1652. struct ggml_tensor * ggml_add1_inplace(
  1653. struct ggml_context * ctx,
  1654. struct ggml_tensor * a,
  1655. struct ggml_tensor * b) {
  1656. return ggml_add1_impl(ctx, a, b, true);
  1657. }
  1658. // ggml_acc
  1659. static struct ggml_tensor * ggml_acc_impl(
  1660. struct ggml_context * ctx,
  1661. struct ggml_tensor * a,
  1662. struct ggml_tensor * b,
  1663. size_t nb1,
  1664. size_t nb2,
  1665. size_t nb3,
  1666. size_t offset,
  1667. bool inplace) {
  1668. GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
  1669. GGML_ASSERT(ggml_is_contiguous(a));
  1670. GGML_ASSERT(a->type == GGML_TYPE_F32);
  1671. GGML_ASSERT(b->type == GGML_TYPE_F32);
  1672. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1673. int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
  1674. ggml_set_op_params(result, params, sizeof(params));
  1675. result->op = GGML_OP_ACC;
  1676. result->src[0] = a;
  1677. result->src[1] = b;
  1678. return result;
  1679. }
  1680. struct ggml_tensor * ggml_acc(
  1681. struct ggml_context * ctx,
  1682. struct ggml_tensor * a,
  1683. struct ggml_tensor * b,
  1684. size_t nb1,
  1685. size_t nb2,
  1686. size_t nb3,
  1687. size_t offset) {
  1688. return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
  1689. }
  1690. struct ggml_tensor * ggml_acc_inplace(
  1691. struct ggml_context * ctx,
  1692. struct ggml_tensor * a,
  1693. struct ggml_tensor * b,
  1694. size_t nb1,
  1695. size_t nb2,
  1696. size_t nb3,
  1697. size_t offset) {
  1698. return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
  1699. }
  1700. // ggml_sub
  1701. static struct ggml_tensor * ggml_sub_impl(
  1702. struct ggml_context * ctx,
  1703. struct ggml_tensor * a,
  1704. struct ggml_tensor * b,
  1705. bool inplace) {
  1706. GGML_ASSERT(ggml_can_repeat(b, a));
  1707. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1708. result->op = GGML_OP_SUB;
  1709. result->src[0] = a;
  1710. result->src[1] = b;
  1711. return result;
  1712. }
  1713. struct ggml_tensor * ggml_sub(
  1714. struct ggml_context * ctx,
  1715. struct ggml_tensor * a,
  1716. struct ggml_tensor * b) {
  1717. return ggml_sub_impl(ctx, a, b, false);
  1718. }
  1719. struct ggml_tensor * ggml_sub_inplace(
  1720. struct ggml_context * ctx,
  1721. struct ggml_tensor * a,
  1722. struct ggml_tensor * b) {
  1723. return ggml_sub_impl(ctx, a, b, true);
  1724. }
  1725. // ggml_mul
  1726. static struct ggml_tensor * ggml_mul_impl(
  1727. struct ggml_context * ctx,
  1728. struct ggml_tensor * a,
  1729. struct ggml_tensor * b,
  1730. bool inplace) {
  1731. GGML_ASSERT(ggml_can_repeat(b, a));
  1732. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1733. result->op = GGML_OP_MUL;
  1734. result->src[0] = a;
  1735. result->src[1] = b;
  1736. return result;
  1737. }
  1738. struct ggml_tensor * ggml_mul(
  1739. struct ggml_context * ctx,
  1740. struct ggml_tensor * a,
  1741. struct ggml_tensor * b) {
  1742. return ggml_mul_impl(ctx, a, b, false);
  1743. }
  1744. struct ggml_tensor * ggml_mul_inplace(
  1745. struct ggml_context * ctx,
  1746. struct ggml_tensor * a,
  1747. struct ggml_tensor * b) {
  1748. return ggml_mul_impl(ctx, a, b, true);
  1749. }
  1750. // ggml_div
  1751. static struct ggml_tensor * ggml_div_impl(
  1752. struct ggml_context * ctx,
  1753. struct ggml_tensor * a,
  1754. struct ggml_tensor * b,
  1755. bool inplace) {
  1756. GGML_ASSERT(ggml_can_repeat(b, a));
  1757. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1758. result->op = GGML_OP_DIV;
  1759. result->src[0] = a;
  1760. result->src[1] = b;
  1761. return result;
  1762. }
  1763. struct ggml_tensor * ggml_div(
  1764. struct ggml_context * ctx,
  1765. struct ggml_tensor * a,
  1766. struct ggml_tensor * b) {
  1767. return ggml_div_impl(ctx, a, b, false);
  1768. }
  1769. struct ggml_tensor * ggml_div_inplace(
  1770. struct ggml_context * ctx,
  1771. struct ggml_tensor * a,
  1772. struct ggml_tensor * b) {
  1773. return ggml_div_impl(ctx, a, b, true);
  1774. }
  1775. // ggml_sqr
  1776. static struct ggml_tensor * ggml_sqr_impl(
  1777. struct ggml_context * ctx,
  1778. struct ggml_tensor * a,
  1779. bool inplace) {
  1780. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1781. result->op = GGML_OP_SQR;
  1782. result->src[0] = a;
  1783. return result;
  1784. }
  1785. struct ggml_tensor * ggml_sqr(
  1786. struct ggml_context * ctx,
  1787. struct ggml_tensor * a) {
  1788. return ggml_sqr_impl(ctx, a, false);
  1789. }
  1790. struct ggml_tensor * ggml_sqr_inplace(
  1791. struct ggml_context * ctx,
  1792. struct ggml_tensor * a) {
  1793. return ggml_sqr_impl(ctx, a, true);
  1794. }
  1795. // ggml_sqrt
  1796. static struct ggml_tensor * ggml_sqrt_impl(
  1797. struct ggml_context * ctx,
  1798. struct ggml_tensor * a,
  1799. bool inplace) {
  1800. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1801. result->op = GGML_OP_SQRT;
  1802. result->src[0] = a;
  1803. return result;
  1804. }
  1805. struct ggml_tensor * ggml_sqrt(
  1806. struct ggml_context * ctx,
  1807. struct ggml_tensor * a) {
  1808. return ggml_sqrt_impl(ctx, a, false);
  1809. }
  1810. struct ggml_tensor * ggml_sqrt_inplace(
  1811. struct ggml_context * ctx,
  1812. struct ggml_tensor * a) {
  1813. return ggml_sqrt_impl(ctx, a, true);
  1814. }
  1815. // ggml_log
  1816. static struct ggml_tensor * ggml_log_impl(
  1817. struct ggml_context * ctx,
  1818. struct ggml_tensor * a,
  1819. bool inplace) {
  1820. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1821. result->op = GGML_OP_LOG;
  1822. result->src[0] = a;
  1823. return result;
  1824. }
  1825. struct ggml_tensor * ggml_log(
  1826. struct ggml_context * ctx,
  1827. struct ggml_tensor * a) {
  1828. return ggml_log_impl(ctx, a, false);
  1829. }
  1830. struct ggml_tensor * ggml_log_inplace(
  1831. struct ggml_context * ctx,
  1832. struct ggml_tensor * a) {
  1833. return ggml_log_impl(ctx, a, true);
  1834. }
  1835. // ggml_sin
  1836. static struct ggml_tensor * ggml_sin_impl(
  1837. struct ggml_context * ctx,
  1838. struct ggml_tensor * a,
  1839. bool inplace) {
  1840. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1841. result->op = GGML_OP_SIN;
  1842. result->src[0] = a;
  1843. return result;
  1844. }
  1845. struct ggml_tensor * ggml_sin(
  1846. struct ggml_context * ctx,
  1847. struct ggml_tensor * a) {
  1848. return ggml_sin_impl(ctx, a, false);
  1849. }
  1850. struct ggml_tensor * ggml_sin_inplace(
  1851. struct ggml_context * ctx,
  1852. struct ggml_tensor * a) {
  1853. return ggml_sin_impl(ctx, a, true);
  1854. }
  1855. // ggml_cos
  1856. static struct ggml_tensor * ggml_cos_impl(
  1857. struct ggml_context * ctx,
  1858. struct ggml_tensor * a,
  1859. bool inplace) {
  1860. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1861. result->op = GGML_OP_COS;
  1862. result->src[0] = a;
  1863. return result;
  1864. }
  1865. struct ggml_tensor * ggml_cos(
  1866. struct ggml_context * ctx,
  1867. struct ggml_tensor * a) {
  1868. return ggml_cos_impl(ctx, a, false);
  1869. }
  1870. struct ggml_tensor * ggml_cos_inplace(
  1871. struct ggml_context * ctx,
  1872. struct ggml_tensor * a) {
  1873. return ggml_cos_impl(ctx, a, true);
  1874. }
  1875. // ggml_sum
  1876. struct ggml_tensor * ggml_sum(
  1877. struct ggml_context * ctx,
  1878. struct ggml_tensor * a) {
  1879. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
  1880. result->op = GGML_OP_SUM;
  1881. result->src[0] = a;
  1882. return result;
  1883. }
  1884. // ggml_sum_rows
  1885. struct ggml_tensor * ggml_sum_rows(
  1886. struct ggml_context * ctx,
  1887. struct ggml_tensor * a) {
  1888. int64_t ne[GGML_MAX_DIMS] = { 1 };
  1889. for (int i = 1; i < GGML_MAX_DIMS; ++i) {
  1890. ne[i] = a->ne[i];
  1891. }
  1892. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
  1893. result->op = GGML_OP_SUM_ROWS;
  1894. result->src[0] = a;
  1895. return result;
  1896. }
  1897. // ggml_mean
  1898. struct ggml_tensor * ggml_mean(
  1899. struct ggml_context * ctx,
  1900. struct ggml_tensor * a) {
  1901. int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
  1902. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  1903. result->op = GGML_OP_MEAN;
  1904. result->src[0] = a;
  1905. return result;
  1906. }
  1907. // ggml_argmax
  1908. struct ggml_tensor * ggml_argmax(
  1909. struct ggml_context * ctx,
  1910. struct ggml_tensor * a) {
  1911. GGML_ASSERT(ggml_is_matrix(a));
  1912. GGML_ASSERT(a->ne[0] <= INT32_MAX);
  1913. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
  1914. result->op = GGML_OP_ARGMAX;
  1915. result->src[0] = a;
  1916. return result;
  1917. }
  1918. // ggml_count_equal
  1919. struct ggml_tensor * ggml_count_equal(
  1920. struct ggml_context * ctx,
  1921. struct ggml_tensor * a,
  1922. struct ggml_tensor * b) {
  1923. GGML_ASSERT(ggml_are_same_shape(a, b));
  1924. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
  1925. result->op = GGML_OP_COUNT_EQUAL;
  1926. result->src[0] = a;
  1927. result->src[1] = b;
  1928. return result;
  1929. }
  1930. // ggml_repeat
  1931. struct ggml_tensor * ggml_repeat(
  1932. struct ggml_context * ctx,
  1933. struct ggml_tensor * a,
  1934. struct ggml_tensor * b) {
  1935. GGML_ASSERT(ggml_can_repeat(a, b));
  1936. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
  1937. result->op = GGML_OP_REPEAT;
  1938. result->src[0] = a;
  1939. return result;
  1940. }
  1941. struct ggml_tensor * ggml_repeat_4d(
  1942. struct ggml_context * ctx,
  1943. struct ggml_tensor * a,
  1944. int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
  1945. const bool can_repeat = ggml_is_empty(a) || (
  1946. (ne0 % a->ne[0] == 0) &&
  1947. (ne1 % a->ne[1] == 0) &&
  1948. (ne2 % a->ne[2] == 0) &&
  1949. (ne3 % a->ne[3] == 0)
  1950. );
  1951. GGML_ASSERT(can_repeat);
  1952. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  1953. result->op = GGML_OP_REPEAT;
  1954. result->src[0] = a;
  1955. return result;
  1956. }
  1957. // ggml_repeat_back
  1958. struct ggml_tensor * ggml_repeat_back(
  1959. struct ggml_context * ctx,
  1960. struct ggml_tensor * a,
  1961. struct ggml_tensor * b) {
  1962. GGML_ASSERT(ggml_can_repeat(b, a));
  1963. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
  1964. result->op = GGML_OP_REPEAT_BACK;
  1965. result->src[0] = a;
  1966. return result;
  1967. }
  1968. // ggml_concat
  1969. struct ggml_tensor * ggml_concat(
  1970. struct ggml_context * ctx,
  1971. struct ggml_tensor * a,
  1972. struct ggml_tensor * b,
  1973. int dim) {
  1974. GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
  1975. GGML_ASSERT(a->type == b->type);
  1976. int64_t ne[GGML_MAX_DIMS];
  1977. for (int d = 0; d < GGML_MAX_DIMS; ++d) {
  1978. if (d == dim) {
  1979. ne[d] = a->ne[d] + b->ne[d];
  1980. continue;
  1981. }
  1982. GGML_ASSERT(a->ne[d] == b->ne[d]);
  1983. ne[d] = a->ne[d];
  1984. }
  1985. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
  1986. ggml_set_op_params_i32(result, 0, dim);
  1987. result->op = GGML_OP_CONCAT;
  1988. result->src[0] = a;
  1989. result->src[1] = b;
  1990. return result;
  1991. }
  1992. // ggml_abs
  1993. struct ggml_tensor * ggml_abs(
  1994. struct ggml_context * ctx,
  1995. struct ggml_tensor * a) {
  1996. return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
  1997. }
  1998. struct ggml_tensor * ggml_abs_inplace(
  1999. struct ggml_context * ctx,
  2000. struct ggml_tensor * a) {
  2001. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
  2002. }
  2003. // ggml_sgn
  2004. struct ggml_tensor * ggml_sgn(
  2005. struct ggml_context * ctx,
  2006. struct ggml_tensor * a) {
  2007. return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
  2008. }
  2009. struct ggml_tensor * ggml_sgn_inplace(
  2010. struct ggml_context * ctx,
  2011. struct ggml_tensor * a) {
  2012. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
  2013. }
  2014. // ggml_neg
  2015. struct ggml_tensor * ggml_neg(
  2016. struct ggml_context * ctx,
  2017. struct ggml_tensor * a) {
  2018. return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
  2019. }
  2020. struct ggml_tensor * ggml_neg_inplace(
  2021. struct ggml_context * ctx,
  2022. struct ggml_tensor * a) {
  2023. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
  2024. }
  2025. // ggml_step
  2026. struct ggml_tensor * ggml_step(
  2027. struct ggml_context * ctx,
  2028. struct ggml_tensor * a) {
  2029. return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
  2030. }
  2031. struct ggml_tensor * ggml_step_inplace(
  2032. struct ggml_context * ctx,
  2033. struct ggml_tensor * a) {
  2034. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
  2035. }
  2036. // ggml_tanh
  2037. struct ggml_tensor * ggml_tanh(
  2038. struct ggml_context * ctx,
  2039. struct ggml_tensor * a) {
  2040. return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
  2041. }
  2042. struct ggml_tensor * ggml_tanh_inplace(
  2043. struct ggml_context * ctx,
  2044. struct ggml_tensor * a) {
  2045. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
  2046. }
  2047. // ggml_elu
  2048. struct ggml_tensor * ggml_elu(
  2049. struct ggml_context * ctx,
  2050. struct ggml_tensor * a) {
  2051. return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
  2052. }
  2053. struct ggml_tensor * ggml_elu_inplace(
  2054. struct ggml_context * ctx,
  2055. struct ggml_tensor * a) {
  2056. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
  2057. }
  2058. // ggml_relu
  2059. struct ggml_tensor * ggml_relu(
  2060. struct ggml_context * ctx,
  2061. struct ggml_tensor * a) {
  2062. return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
  2063. }
  2064. struct ggml_tensor * ggml_relu_inplace(
  2065. struct ggml_context * ctx,
  2066. struct ggml_tensor * a) {
  2067. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
  2068. }
  2069. // ggml_leaky_relu
  2070. struct ggml_tensor * ggml_leaky_relu(
  2071. struct ggml_context * ctx,
  2072. struct ggml_tensor * a,
  2073. float negative_slope,
  2074. bool inplace) {
  2075. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2076. ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
  2077. result->op = GGML_OP_LEAKY_RELU;
  2078. result->src[0] = a;
  2079. return result;
  2080. }
  2081. // ggml_sigmoid
  2082. struct ggml_tensor * ggml_sigmoid(
  2083. struct ggml_context * ctx,
  2084. struct ggml_tensor * a) {
  2085. return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
  2086. }
  2087. struct ggml_tensor * ggml_sigmoid_inplace(
  2088. struct ggml_context * ctx,
  2089. struct ggml_tensor * a) {
  2090. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
  2091. }
  2092. // ggml_gelu
  2093. struct ggml_tensor * ggml_gelu(
  2094. struct ggml_context * ctx,
  2095. struct ggml_tensor * a) {
  2096. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
  2097. }
  2098. struct ggml_tensor * ggml_gelu_inplace(
  2099. struct ggml_context * ctx,
  2100. struct ggml_tensor * a) {
  2101. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
  2102. }
  2103. // ggml_gelu_erf
  2104. struct ggml_tensor * ggml_gelu_erf(
  2105. struct ggml_context * ctx,
  2106. struct ggml_tensor * a) {
  2107. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
  2108. }
  2109. struct ggml_tensor * ggml_gelu_erf_inplace(
  2110. struct ggml_context * ctx,
  2111. struct ggml_tensor * a) {
  2112. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
  2113. }
  2114. // ggml_gelu_quick
  2115. struct ggml_tensor * ggml_gelu_quick(
  2116. struct ggml_context * ctx,
  2117. struct ggml_tensor * a) {
  2118. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
  2119. }
  2120. struct ggml_tensor * ggml_gelu_quick_inplace(
  2121. struct ggml_context * ctx,
  2122. struct ggml_tensor * a) {
  2123. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
  2124. }
  2125. // ggml_silu
  2126. struct ggml_tensor * ggml_silu(
  2127. struct ggml_context * ctx,
  2128. struct ggml_tensor * a) {
  2129. return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
  2130. }
  2131. struct ggml_tensor * ggml_silu_inplace(
  2132. struct ggml_context * ctx,
  2133. struct ggml_tensor * a) {
  2134. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
  2135. }
  2136. // ggml_silu_back
  2137. struct ggml_tensor * ggml_silu_back(
  2138. struct ggml_context * ctx,
  2139. struct ggml_tensor * a,
  2140. struct ggml_tensor * b) {
  2141. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2142. result->op = GGML_OP_SILU_BACK;
  2143. result->src[0] = a;
  2144. result->src[1] = b;
  2145. return result;
  2146. }
  2147. // ggml hardswish
  2148. struct ggml_tensor * ggml_hardswish(
  2149. struct ggml_context * ctx,
  2150. struct ggml_tensor * a) {
  2151. return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
  2152. }
  2153. // ggml hardsigmoid
  2154. struct ggml_tensor * ggml_hardsigmoid(
  2155. struct ggml_context * ctx,
  2156. struct ggml_tensor * a) {
  2157. return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
  2158. }
  2159. // ggml exp
  2160. struct ggml_tensor * ggml_exp(
  2161. struct ggml_context * ctx,
  2162. struct ggml_tensor * a) {
  2163. return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
  2164. }
  2165. struct ggml_tensor * ggml_exp_inplace(
  2166. struct ggml_context * ctx,
  2167. struct ggml_tensor * a) {
  2168. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
  2169. }
  2170. // ggml_norm
  2171. static struct ggml_tensor * ggml_norm_impl(
  2172. struct ggml_context * ctx,
  2173. struct ggml_tensor * a,
  2174. float eps,
  2175. bool inplace) {
  2176. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2177. ggml_set_op_params(result, &eps, sizeof(eps));
  2178. result->op = GGML_OP_NORM;
  2179. result->src[0] = a;
  2180. return result;
  2181. }
  2182. struct ggml_tensor * ggml_norm(
  2183. struct ggml_context * ctx,
  2184. struct ggml_tensor * a,
  2185. float eps) {
  2186. return ggml_norm_impl(ctx, a, eps, false);
  2187. }
  2188. struct ggml_tensor * ggml_norm_inplace(
  2189. struct ggml_context * ctx,
  2190. struct ggml_tensor * a,
  2191. float eps) {
  2192. return ggml_norm_impl(ctx, a, eps, true);
  2193. }
  2194. // ggml_rms_norm
  2195. static struct ggml_tensor * ggml_rms_norm_impl(
  2196. struct ggml_context * ctx,
  2197. struct ggml_tensor * a,
  2198. float eps,
  2199. bool inplace) {
  2200. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2201. ggml_set_op_params(result, &eps, sizeof(eps));
  2202. result->op = GGML_OP_RMS_NORM;
  2203. result->src[0] = a;
  2204. return result;
  2205. }
  2206. struct ggml_tensor * ggml_rms_norm(
  2207. struct ggml_context * ctx,
  2208. struct ggml_tensor * a,
  2209. float eps) {
  2210. return ggml_rms_norm_impl(ctx, a, eps, false);
  2211. }
  2212. struct ggml_tensor * ggml_rms_norm_inplace(
  2213. struct ggml_context * ctx,
  2214. struct ggml_tensor * a,
  2215. float eps) {
  2216. return ggml_rms_norm_impl(ctx, a, eps, true);
  2217. }
  2218. // ggml_rms_norm_back
  2219. struct ggml_tensor * ggml_rms_norm_back(
  2220. struct ggml_context * ctx,
  2221. struct ggml_tensor * a,
  2222. struct ggml_tensor * b,
  2223. float eps) {
  2224. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2225. ggml_set_op_params(result, &eps, sizeof(eps));
  2226. result->op = GGML_OP_RMS_NORM_BACK;
  2227. result->src[0] = a;
  2228. result->src[1] = b;
  2229. return result;
  2230. }
  2231. // ggml_group_norm
  2232. static struct ggml_tensor * ggml_group_norm_impl(
  2233. struct ggml_context * ctx,
  2234. struct ggml_tensor * a,
  2235. int n_groups,
  2236. float eps,
  2237. bool inplace) {
  2238. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2239. ggml_set_op_params_i32(result, 0, n_groups);
  2240. ggml_set_op_params_f32(result, 1, eps);
  2241. result->op = GGML_OP_GROUP_NORM;
  2242. result->src[0] = a;
  2243. return result;
  2244. }
  2245. struct ggml_tensor * ggml_group_norm(
  2246. struct ggml_context * ctx,
  2247. struct ggml_tensor * a,
  2248. int n_groups,
  2249. float eps) {
  2250. return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
  2251. }
  2252. struct ggml_tensor * ggml_group_norm_inplace(
  2253. struct ggml_context * ctx,
  2254. struct ggml_tensor * a,
  2255. int n_groups,
  2256. float eps) {
  2257. return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
  2258. }
  2259. // ggml_l2_norm
  2260. static struct ggml_tensor * ggml_l2_norm_impl(
  2261. struct ggml_context * ctx,
  2262. struct ggml_tensor * a,
  2263. float eps,
  2264. bool inplace) {
  2265. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2266. ggml_set_op_params_f32(result, 0, eps);
  2267. result->op = GGML_OP_L2_NORM;
  2268. result->src[0] = a;
  2269. return result;
  2270. }
  2271. struct ggml_tensor * ggml_l2_norm(
  2272. struct ggml_context * ctx,
  2273. struct ggml_tensor * a,
  2274. float eps) {
  2275. return ggml_l2_norm_impl(ctx, a, eps, false);
  2276. }
  2277. struct ggml_tensor * ggml_l2_norm_inplace(
  2278. struct ggml_context * ctx,
  2279. struct ggml_tensor * a,
  2280. float eps) {
  2281. return ggml_l2_norm_impl(ctx, a, eps, true);
  2282. }
  2283. // ggml_mul_mat
  2284. static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  2285. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  2286. return (t0->ne[0] == t1->ne[0]) &&
  2287. (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
  2288. (t1->ne[3]%t0->ne[3] == 0);
  2289. }
  2290. struct ggml_tensor * ggml_mul_mat(
  2291. struct ggml_context * ctx,
  2292. struct ggml_tensor * a,
  2293. struct ggml_tensor * b) {
  2294. GGML_ASSERT(ggml_can_mul_mat(a, b));
  2295. GGML_ASSERT(!ggml_is_transposed(a));
  2296. const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
  2297. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2298. result->op = GGML_OP_MUL_MAT;
  2299. result->src[0] = a;
  2300. result->src[1] = b;
  2301. return result;
  2302. }
  2303. void ggml_mul_mat_set_prec(
  2304. struct ggml_tensor * a,
  2305. enum ggml_prec prec) {
  2306. GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
  2307. const int32_t prec_i32 = (int32_t) prec;
  2308. ggml_set_op_params_i32(a, 0, prec_i32);
  2309. }
  2310. // ggml_mul_mat_id
  2311. /*
  2312. c = ggml_mul_mat_id(ctx, as, b, ids);
  2313. as -> [cols, rows, n_expert]
  2314. b -> [cols, n_expert_used, n_tokens]
  2315. ids -> [n_expert_used, n_tokens] (i32)
  2316. c -> [rows, n_expert_used, n_tokens]
  2317. in b, n_expert_used can be broadcasted to match the n_expert_used of ids
  2318. c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
  2319. */
  2320. struct ggml_tensor * ggml_mul_mat_id(
  2321. struct ggml_context * ctx,
  2322. struct ggml_tensor * as,
  2323. struct ggml_tensor * b,
  2324. struct ggml_tensor * ids) {
  2325. GGML_ASSERT(!ggml_is_transposed(as));
  2326. GGML_ASSERT(ids->type == GGML_TYPE_I32);
  2327. GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
  2328. GGML_ASSERT(b->ne[3] == 1); // b is 3d
  2329. GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
  2330. GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
  2331. GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
  2332. GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
  2333. const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
  2334. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2335. result->op = GGML_OP_MUL_MAT_ID;
  2336. result->src[0] = as;
  2337. result->src[1] = b;
  2338. result->src[2] = ids;
  2339. return result;
  2340. }
  2341. // ggml_out_prod
  2342. static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  2343. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  2344. return (t0->ne[1] == t1->ne[1]) &&
  2345. (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
  2346. (t1->ne[3]%t0->ne[3] == 0);
  2347. }
  2348. struct ggml_tensor * ggml_out_prod(
  2349. struct ggml_context * ctx,
  2350. struct ggml_tensor * a,
  2351. struct ggml_tensor * b) {
  2352. GGML_ASSERT(ggml_can_out_prod(a, b));
  2353. GGML_ASSERT(!ggml_is_transposed(a));
  2354. // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
  2355. const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
  2356. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2357. result->op = GGML_OP_OUT_PROD;
  2358. result->src[0] = a;
  2359. result->src[1] = b;
  2360. return result;
  2361. }
  2362. // ggml_scale
  2363. static struct ggml_tensor * ggml_scale_impl(
  2364. struct ggml_context * ctx,
  2365. struct ggml_tensor * a,
  2366. float s,
  2367. bool inplace) {
  2368. GGML_ASSERT(ggml_is_padded_1d(a));
  2369. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2370. ggml_set_op_params(result, &s, sizeof(s));
  2371. result->op = GGML_OP_SCALE;
  2372. result->src[0] = a;
  2373. return result;
  2374. }
  2375. struct ggml_tensor * ggml_scale(
  2376. struct ggml_context * ctx,
  2377. struct ggml_tensor * a,
  2378. float s) {
  2379. return ggml_scale_impl(ctx, a, s, false);
  2380. }
  2381. struct ggml_tensor * ggml_scale_inplace(
  2382. struct ggml_context * ctx,
  2383. struct ggml_tensor * a,
  2384. float s) {
  2385. return ggml_scale_impl(ctx, a, s, true);
  2386. }
  2387. // ggml_set
  2388. static struct ggml_tensor * ggml_set_impl(
  2389. struct ggml_context * ctx,
  2390. struct ggml_tensor * a,
  2391. struct ggml_tensor * b,
  2392. size_t nb1,
  2393. size_t nb2,
  2394. size_t nb3,
  2395. size_t offset,
  2396. bool inplace) {
  2397. GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
  2398. // make a view of the destination
  2399. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2400. GGML_ASSERT(offset < (size_t)(1 << 30));
  2401. int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
  2402. ggml_set_op_params(result, params, sizeof(params));
  2403. result->op = GGML_OP_SET;
  2404. result->src[0] = a;
  2405. result->src[1] = b;
  2406. return result;
  2407. }
  2408. struct ggml_tensor * ggml_set(
  2409. struct ggml_context * ctx,
  2410. struct ggml_tensor * a,
  2411. struct ggml_tensor * b,
  2412. size_t nb1,
  2413. size_t nb2,
  2414. size_t nb3,
  2415. size_t offset) {
  2416. return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
  2417. }
  2418. struct ggml_tensor * ggml_set_inplace(
  2419. struct ggml_context * ctx,
  2420. struct ggml_tensor * a,
  2421. struct ggml_tensor * b,
  2422. size_t nb1,
  2423. size_t nb2,
  2424. size_t nb3,
  2425. size_t offset) {
  2426. return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
  2427. }
  2428. struct ggml_tensor * ggml_set_1d(
  2429. struct ggml_context * ctx,
  2430. struct ggml_tensor * a,
  2431. struct ggml_tensor * b,
  2432. size_t offset) {
  2433. return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
  2434. }
  2435. struct ggml_tensor * ggml_set_1d_inplace(
  2436. struct ggml_context * ctx,
  2437. struct ggml_tensor * a,
  2438. struct ggml_tensor * b,
  2439. size_t offset) {
  2440. return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
  2441. }
  2442. struct ggml_tensor * ggml_set_2d(
  2443. struct ggml_context * ctx,
  2444. struct ggml_tensor * a,
  2445. struct ggml_tensor * b,
  2446. size_t nb1,
  2447. size_t offset) {
  2448. return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
  2449. }
  2450. struct ggml_tensor * ggml_set_2d_inplace(
  2451. struct ggml_context * ctx,
  2452. struct ggml_tensor * a,
  2453. struct ggml_tensor * b,
  2454. size_t nb1,
  2455. size_t offset) {
  2456. return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
  2457. }
  2458. // ggml_cpy
  2459. static struct ggml_tensor * ggml_cpy_impl(
  2460. struct ggml_context * ctx,
  2461. struct ggml_tensor * a,
  2462. struct ggml_tensor * b) {
  2463. GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
  2464. // make a view of the destination
  2465. struct ggml_tensor * result = ggml_view_tensor(ctx, b);
  2466. if (strlen(b->name) > 0) {
  2467. ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
  2468. } else {
  2469. ggml_format_name(result, "%s (copy)", a->name);
  2470. }
  2471. result->op = GGML_OP_CPY;
  2472. result->src[0] = a;
  2473. result->src[1] = b;
  2474. return result;
  2475. }
  2476. struct ggml_tensor * ggml_cpy(
  2477. struct ggml_context * ctx,
  2478. struct ggml_tensor * a,
  2479. struct ggml_tensor * b) {
  2480. return ggml_cpy_impl(ctx, a, b);
  2481. }
  2482. struct ggml_tensor * ggml_cast(
  2483. struct ggml_context * ctx,
  2484. struct ggml_tensor * a,
  2485. enum ggml_type type) {
  2486. struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
  2487. ggml_format_name(result, "%s (copy)", a->name);
  2488. result->op = GGML_OP_CPY;
  2489. result->src[0] = a;
  2490. result->src[1] = result;
  2491. return result;
  2492. }
  2493. // ggml_cont
  2494. static struct ggml_tensor * ggml_cont_impl(
  2495. struct ggml_context * ctx,
  2496. struct ggml_tensor * a) {
  2497. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2498. ggml_format_name(result, "%s (cont)", a->name);
  2499. result->op = GGML_OP_CONT;
  2500. result->src[0] = a;
  2501. return result;
  2502. }
  2503. struct ggml_tensor * ggml_cont(
  2504. struct ggml_context * ctx,
  2505. struct ggml_tensor * a) {
  2506. return ggml_cont_impl(ctx, a);
  2507. }
  2508. // make contiguous, with new shape
  2509. GGML_API struct ggml_tensor * ggml_cont_1d(
  2510. struct ggml_context * ctx,
  2511. struct ggml_tensor * a,
  2512. int64_t ne0) {
  2513. return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
  2514. }
  2515. GGML_API struct ggml_tensor * ggml_cont_2d(
  2516. struct ggml_context * ctx,
  2517. struct ggml_tensor * a,
  2518. int64_t ne0,
  2519. int64_t ne1) {
  2520. return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
  2521. }
  2522. GGML_API struct ggml_tensor * ggml_cont_3d(
  2523. struct ggml_context * ctx,
  2524. struct ggml_tensor * a,
  2525. int64_t ne0,
  2526. int64_t ne1,
  2527. int64_t ne2) {
  2528. return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
  2529. }
  2530. struct ggml_tensor * ggml_cont_4d(
  2531. struct ggml_context * ctx,
  2532. struct ggml_tensor * a,
  2533. int64_t ne0,
  2534. int64_t ne1,
  2535. int64_t ne2,
  2536. int64_t ne3) {
  2537. GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
  2538. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  2539. ggml_format_name(result, "%s (cont)", a->name);
  2540. result->op = GGML_OP_CONT;
  2541. result->src[0] = a;
  2542. return result;
  2543. }
  2544. // ggml_reshape
  2545. struct ggml_tensor * ggml_reshape(
  2546. struct ggml_context * ctx,
  2547. struct ggml_tensor * a,
  2548. struct ggml_tensor * b) {
  2549. GGML_ASSERT(ggml_is_contiguous(a));
  2550. // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
  2551. GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
  2552. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
  2553. ggml_format_name(result, "%s (reshaped)", a->name);
  2554. result->op = GGML_OP_RESHAPE;
  2555. result->src[0] = a;
  2556. return result;
  2557. }
  2558. struct ggml_tensor * ggml_reshape_1d(
  2559. struct ggml_context * ctx,
  2560. struct ggml_tensor * a,
  2561. int64_t ne0) {
  2562. GGML_ASSERT(ggml_is_contiguous(a));
  2563. GGML_ASSERT(ggml_nelements(a) == ne0);
  2564. const int64_t ne[1] = { ne0 };
  2565. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
  2566. ggml_format_name(result, "%s (reshaped)", a->name);
  2567. result->op = GGML_OP_RESHAPE;
  2568. result->src[0] = a;
  2569. return result;
  2570. }
  2571. struct ggml_tensor * ggml_reshape_2d(
  2572. struct ggml_context * ctx,
  2573. struct ggml_tensor * a,
  2574. int64_t ne0,
  2575. int64_t ne1) {
  2576. GGML_ASSERT(ggml_is_contiguous(a));
  2577. GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
  2578. const int64_t ne[2] = { ne0, ne1 };
  2579. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
  2580. ggml_format_name(result, "%s (reshaped)", a->name);
  2581. result->op = GGML_OP_RESHAPE;
  2582. result->src[0] = a;
  2583. return result;
  2584. }
  2585. struct ggml_tensor * ggml_reshape_3d(
  2586. struct ggml_context * ctx,
  2587. struct ggml_tensor * a,
  2588. int64_t ne0,
  2589. int64_t ne1,
  2590. int64_t ne2) {
  2591. GGML_ASSERT(ggml_is_contiguous(a));
  2592. GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
  2593. const int64_t ne[3] = { ne0, ne1, ne2 };
  2594. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
  2595. ggml_format_name(result, "%s (reshaped)", a->name);
  2596. result->op = GGML_OP_RESHAPE;
  2597. result->src[0] = a;
  2598. return result;
  2599. }
  2600. struct ggml_tensor * ggml_reshape_4d(
  2601. struct ggml_context * ctx,
  2602. struct ggml_tensor * a,
  2603. int64_t ne0,
  2604. int64_t ne1,
  2605. int64_t ne2,
  2606. int64_t ne3) {
  2607. GGML_ASSERT(ggml_is_contiguous(a));
  2608. GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
  2609. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  2610. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
  2611. ggml_format_name(result, "%s (reshaped)", a->name);
  2612. result->op = GGML_OP_RESHAPE;
  2613. result->src[0] = a;
  2614. return result;
  2615. }
  2616. static struct ggml_tensor * ggml_view_impl(
  2617. struct ggml_context * ctx,
  2618. struct ggml_tensor * a,
  2619. int n_dims,
  2620. const int64_t * ne,
  2621. size_t offset) {
  2622. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
  2623. ggml_format_name(result, "%s (view)", a->name);
  2624. ggml_set_op_params(result, &offset, sizeof(offset));
  2625. result->op = GGML_OP_VIEW;
  2626. result->src[0] = a;
  2627. return result;
  2628. }
  2629. // ggml_view_1d
  2630. struct ggml_tensor * ggml_view_1d(
  2631. struct ggml_context * ctx,
  2632. struct ggml_tensor * a,
  2633. int64_t ne0,
  2634. size_t offset) {
  2635. struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
  2636. return result;
  2637. }
  2638. // ggml_view_2d
  2639. struct ggml_tensor * ggml_view_2d(
  2640. struct ggml_context * ctx,
  2641. struct ggml_tensor * a,
  2642. int64_t ne0,
  2643. int64_t ne1,
  2644. size_t nb1,
  2645. size_t offset) {
  2646. const int64_t ne[2] = { ne0, ne1 };
  2647. struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
  2648. result->nb[1] = nb1;
  2649. result->nb[2] = result->nb[1]*ne1;
  2650. result->nb[3] = result->nb[2];
  2651. return result;
  2652. }
  2653. // ggml_view_3d
  2654. struct ggml_tensor * ggml_view_3d(
  2655. struct ggml_context * ctx,
  2656. struct ggml_tensor * a,
  2657. int64_t ne0,
  2658. int64_t ne1,
  2659. int64_t ne2,
  2660. size_t nb1,
  2661. size_t nb2,
  2662. size_t offset) {
  2663. const int64_t ne[3] = { ne0, ne1, ne2 };
  2664. struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
  2665. result->nb[1] = nb1;
  2666. result->nb[2] = nb2;
  2667. result->nb[3] = result->nb[2]*ne2;
  2668. return result;
  2669. }
  2670. // ggml_view_4d
  2671. struct ggml_tensor * ggml_view_4d(
  2672. struct ggml_context * ctx,
  2673. struct ggml_tensor * a,
  2674. int64_t ne0,
  2675. int64_t ne1,
  2676. int64_t ne2,
  2677. int64_t ne3,
  2678. size_t nb1,
  2679. size_t nb2,
  2680. size_t nb3,
  2681. size_t offset) {
  2682. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  2683. struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
  2684. result->nb[1] = nb1;
  2685. result->nb[2] = nb2;
  2686. result->nb[3] = nb3;
  2687. return result;
  2688. }
  2689. // ggml_permute
  2690. struct ggml_tensor * ggml_permute(
  2691. struct ggml_context * ctx,
  2692. struct ggml_tensor * a,
  2693. int axis0,
  2694. int axis1,
  2695. int axis2,
  2696. int axis3) {
  2697. GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
  2698. GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
  2699. GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
  2700. GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
  2701. GGML_ASSERT(axis0 != axis1);
  2702. GGML_ASSERT(axis0 != axis2);
  2703. GGML_ASSERT(axis0 != axis3);
  2704. GGML_ASSERT(axis1 != axis2);
  2705. GGML_ASSERT(axis1 != axis3);
  2706. GGML_ASSERT(axis2 != axis3);
  2707. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  2708. ggml_format_name(result, "%s (permuted)", a->name);
  2709. int ne[GGML_MAX_DIMS];
  2710. int nb[GGML_MAX_DIMS];
  2711. ne[axis0] = a->ne[0];
  2712. ne[axis1] = a->ne[1];
  2713. ne[axis2] = a->ne[2];
  2714. ne[axis3] = a->ne[3];
  2715. nb[axis0] = a->nb[0];
  2716. nb[axis1] = a->nb[1];
  2717. nb[axis2] = a->nb[2];
  2718. nb[axis3] = a->nb[3];
  2719. result->ne[0] = ne[0];
  2720. result->ne[1] = ne[1];
  2721. result->ne[2] = ne[2];
  2722. result->ne[3] = ne[3];
  2723. result->nb[0] = nb[0];
  2724. result->nb[1] = nb[1];
  2725. result->nb[2] = nb[2];
  2726. result->nb[3] = nb[3];
  2727. result->op = GGML_OP_PERMUTE;
  2728. result->src[0] = a;
  2729. int32_t params[] = { axis0, axis1, axis2, axis3 };
  2730. ggml_set_op_params(result, params, sizeof(params));
  2731. return result;
  2732. }
  2733. // ggml_transpose
  2734. struct ggml_tensor * ggml_transpose(
  2735. struct ggml_context * ctx,
  2736. struct ggml_tensor * a) {
  2737. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  2738. ggml_format_name(result, "%s (transposed)", a->name);
  2739. result->ne[0] = a->ne[1];
  2740. result->ne[1] = a->ne[0];
  2741. result->nb[0] = a->nb[1];
  2742. result->nb[1] = a->nb[0];
  2743. result->op = GGML_OP_TRANSPOSE;
  2744. result->src[0] = a;
  2745. return result;
  2746. }
  2747. // ggml_get_rows
  2748. struct ggml_tensor * ggml_get_rows(
  2749. struct ggml_context * ctx,
  2750. struct ggml_tensor * a,
  2751. struct ggml_tensor * b) {
  2752. GGML_ASSERT(a->ne[2] == b->ne[1]);
  2753. GGML_ASSERT(b->ne[3] == 1);
  2754. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2755. // TODO: implement non F32 return
  2756. enum ggml_type type = GGML_TYPE_F32;
  2757. if (a->type == GGML_TYPE_I32) {
  2758. type = a->type;
  2759. }
  2760. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
  2761. result->op = GGML_OP_GET_ROWS;
  2762. result->src[0] = a;
  2763. result->src[1] = b;
  2764. return result;
  2765. }
  2766. // ggml_get_rows_back
  2767. struct ggml_tensor * ggml_get_rows_back(
  2768. struct ggml_context * ctx,
  2769. struct ggml_tensor * a,
  2770. struct ggml_tensor * b,
  2771. struct ggml_tensor * c) {
  2772. GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
  2773. GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
  2774. // TODO: implement non F32 return
  2775. //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
  2776. struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
  2777. result->op = GGML_OP_GET_ROWS_BACK;
  2778. result->src[0] = a;
  2779. result->src[1] = b;
  2780. return result;
  2781. }
  2782. // ggml_diag
  2783. struct ggml_tensor * ggml_diag(
  2784. struct ggml_context * ctx,
  2785. struct ggml_tensor * a) {
  2786. GGML_ASSERT(a->ne[1] == 1);
  2787. const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
  2788. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
  2789. result->op = GGML_OP_DIAG;
  2790. result->src[0] = a;
  2791. return result;
  2792. }
  2793. // ggml_diag_mask_inf
  2794. static struct ggml_tensor * ggml_diag_mask_inf_impl(
  2795. struct ggml_context * ctx,
  2796. struct ggml_tensor * a,
  2797. int n_past,
  2798. bool inplace) {
  2799. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2800. int32_t params[] = { n_past };
  2801. ggml_set_op_params(result, params, sizeof(params));
  2802. result->op = GGML_OP_DIAG_MASK_INF;
  2803. result->src[0] = a;
  2804. return result;
  2805. }
  2806. struct ggml_tensor * ggml_diag_mask_inf(
  2807. struct ggml_context * ctx,
  2808. struct ggml_tensor * a,
  2809. int n_past) {
  2810. return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
  2811. }
  2812. struct ggml_tensor * ggml_diag_mask_inf_inplace(
  2813. struct ggml_context * ctx,
  2814. struct ggml_tensor * a,
  2815. int n_past) {
  2816. return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
  2817. }
  2818. // ggml_diag_mask_zero
  2819. static struct ggml_tensor * ggml_diag_mask_zero_impl(
  2820. struct ggml_context * ctx,
  2821. struct ggml_tensor * a,
  2822. int n_past,
  2823. bool inplace) {
  2824. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2825. int32_t params[] = { n_past };
  2826. ggml_set_op_params(result, params, sizeof(params));
  2827. result->op = GGML_OP_DIAG_MASK_ZERO;
  2828. result->src[0] = a;
  2829. return result;
  2830. }
  2831. struct ggml_tensor * ggml_diag_mask_zero(
  2832. struct ggml_context * ctx,
  2833. struct ggml_tensor * a,
  2834. int n_past) {
  2835. return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
  2836. }
  2837. struct ggml_tensor * ggml_diag_mask_zero_inplace(
  2838. struct ggml_context * ctx,
  2839. struct ggml_tensor * a,
  2840. int n_past) {
  2841. return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
  2842. }
  2843. // ggml_soft_max
  2844. static struct ggml_tensor * ggml_soft_max_impl(
  2845. struct ggml_context * ctx,
  2846. struct ggml_tensor * a,
  2847. struct ggml_tensor * mask,
  2848. float scale,
  2849. float max_bias,
  2850. bool inplace) {
  2851. GGML_ASSERT(ggml_is_contiguous(a));
  2852. if (mask) {
  2853. GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
  2854. GGML_ASSERT(ggml_is_contiguous(mask));
  2855. GGML_ASSERT(ggml_is_matrix(mask));
  2856. GGML_ASSERT(mask->ne[0] == a->ne[0]);
  2857. GGML_ASSERT(mask->ne[1] >= a->ne[1]);
  2858. }
  2859. if (max_bias > 0.0f) {
  2860. GGML_ASSERT(mask);
  2861. }
  2862. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2863. float params[] = { scale, max_bias };
  2864. ggml_set_op_params(result, params, sizeof(params));
  2865. result->op = GGML_OP_SOFT_MAX;
  2866. result->src[0] = a;
  2867. result->src[1] = mask;
  2868. return result;
  2869. }
  2870. struct ggml_tensor * ggml_soft_max(
  2871. struct ggml_context * ctx,
  2872. struct ggml_tensor * a) {
  2873. return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
  2874. }
  2875. struct ggml_tensor * ggml_soft_max_inplace(
  2876. struct ggml_context * ctx,
  2877. struct ggml_tensor * a) {
  2878. return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
  2879. }
  2880. struct ggml_tensor * ggml_soft_max_ext(
  2881. struct ggml_context * ctx,
  2882. struct ggml_tensor * a,
  2883. struct ggml_tensor * mask,
  2884. float scale,
  2885. float max_bias) {
  2886. return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
  2887. }
  2888. // ggml_soft_max_ext_back
  2889. static struct ggml_tensor * ggml_soft_max_ext_back_impl(
  2890. struct ggml_context * ctx,
  2891. struct ggml_tensor * a,
  2892. struct ggml_tensor * b,
  2893. float scale,
  2894. float max_bias,
  2895. bool inplace) {
  2896. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2897. result->op = GGML_OP_SOFT_MAX_BACK;
  2898. result->src[0] = a;
  2899. result->src[1] = b;
  2900. memcpy((float *) result->op_params + 0, &scale, sizeof(float));
  2901. memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
  2902. return result;
  2903. }
  2904. struct ggml_tensor * ggml_soft_max_ext_back(
  2905. struct ggml_context * ctx,
  2906. struct ggml_tensor * a,
  2907. struct ggml_tensor * b,
  2908. float scale,
  2909. float max_bias) {
  2910. return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
  2911. }
  2912. struct ggml_tensor * ggml_soft_max_ext_back_inplace(
  2913. struct ggml_context * ctx,
  2914. struct ggml_tensor * a,
  2915. struct ggml_tensor * b,
  2916. float scale,
  2917. float max_bias) {
  2918. return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
  2919. }
  2920. // ggml_rope
  2921. static struct ggml_tensor * ggml_rope_impl(
  2922. struct ggml_context * ctx,
  2923. struct ggml_tensor * a,
  2924. struct ggml_tensor * b,
  2925. struct ggml_tensor * c,
  2926. int n_dims,
  2927. int mode,
  2928. int n_ctx_orig,
  2929. float freq_base,
  2930. float freq_scale,
  2931. float ext_factor,
  2932. float attn_factor,
  2933. float beta_fast,
  2934. float beta_slow,
  2935. bool inplace) {
  2936. GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
  2937. GGML_ASSERT(ggml_is_vector(b));
  2938. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2939. GGML_ASSERT(a->ne[2] == b->ne[0]);
  2940. if (c) {
  2941. GGML_ASSERT(c->type == GGML_TYPE_F32);
  2942. GGML_ASSERT(c->ne[0] >= n_dims / 2);
  2943. }
  2944. int sections[4] = {0, 0, 0, 0};
  2945. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2946. int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
  2947. memcpy(params + 5, &freq_base, sizeof(float));
  2948. memcpy(params + 6, &freq_scale, sizeof(float));
  2949. memcpy(params + 7, &ext_factor, sizeof(float));
  2950. memcpy(params + 8, &attn_factor, sizeof(float));
  2951. memcpy(params + 9, &beta_fast, sizeof(float));
  2952. memcpy(params + 10, &beta_slow, sizeof(float));
  2953. memcpy(params + 11, &sections, sizeof(int)*4);
  2954. ggml_set_op_params(result, params, sizeof(params));
  2955. result->op = GGML_OP_ROPE;
  2956. result->src[0] = a;
  2957. result->src[1] = b;
  2958. result->src[2] = c;
  2959. return result;
  2960. }
  2961. struct ggml_tensor * ggml_rope(
  2962. struct ggml_context * ctx,
  2963. struct ggml_tensor * a,
  2964. struct ggml_tensor * b,
  2965. int n_dims,
  2966. int mode) {
  2967. return ggml_rope_impl(
  2968. ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
  2969. );
  2970. }
  2971. struct ggml_tensor * ggml_rope_multi(
  2972. struct ggml_context * ctx,
  2973. struct ggml_tensor * a,
  2974. struct ggml_tensor * b,
  2975. struct ggml_tensor * c,
  2976. int n_dims,
  2977. int sections[4],
  2978. int mode,
  2979. int n_ctx_orig,
  2980. float freq_base,
  2981. float freq_scale,
  2982. float ext_factor,
  2983. float attn_factor,
  2984. float beta_fast,
  2985. float beta_slow) {
  2986. // Multimodal Rotary Position Embedding
  2987. GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
  2988. GGML_ASSERT(ggml_is_vector(b));
  2989. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2990. GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
  2991. if (c) {
  2992. GGML_ASSERT(c->type == GGML_TYPE_F32);
  2993. GGML_ASSERT(c->ne[0] >= n_dims / 2);
  2994. }
  2995. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2996. int32_t params[11 + 4] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
  2997. memcpy(params + 5, &freq_base, sizeof(float));
  2998. memcpy(params + 6, &freq_scale, sizeof(float));
  2999. memcpy(params + 7, &ext_factor, sizeof(float));
  3000. memcpy(params + 8, &attn_factor, sizeof(float));
  3001. memcpy(params + 9, &beta_fast, sizeof(float));
  3002. memcpy(params + 10, &beta_slow, sizeof(float));
  3003. memcpy(&params[11], sections, sizeof(int)*4);
  3004. ggml_set_op_params(result, params, sizeof(params));
  3005. result->op = GGML_OP_ROPE;
  3006. result->src[0] = a;
  3007. result->src[1] = b;
  3008. result->src[2] = c;
  3009. return result;
  3010. }
  3011. struct ggml_tensor * ggml_rope_inplace(
  3012. struct ggml_context * ctx,
  3013. struct ggml_tensor * a,
  3014. struct ggml_tensor * b,
  3015. int n_dims,
  3016. int mode) {
  3017. return ggml_rope_impl(
  3018. ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
  3019. );
  3020. }
  3021. struct ggml_tensor * ggml_rope_ext(
  3022. struct ggml_context * ctx,
  3023. struct ggml_tensor * a,
  3024. struct ggml_tensor * b,
  3025. struct ggml_tensor * c,
  3026. int n_dims,
  3027. int mode,
  3028. int n_ctx_orig,
  3029. float freq_base,
  3030. float freq_scale,
  3031. float ext_factor,
  3032. float attn_factor,
  3033. float beta_fast,
  3034. float beta_slow) {
  3035. return ggml_rope_impl(
  3036. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3037. ext_factor, attn_factor, beta_fast, beta_slow, false
  3038. );
  3039. }
  3040. struct ggml_tensor * ggml_rope_ext_inplace(
  3041. struct ggml_context * ctx,
  3042. struct ggml_tensor * a,
  3043. struct ggml_tensor * b,
  3044. struct ggml_tensor * c,
  3045. int n_dims,
  3046. int mode,
  3047. int n_ctx_orig,
  3048. float freq_base,
  3049. float freq_scale,
  3050. float ext_factor,
  3051. float attn_factor,
  3052. float beta_fast,
  3053. float beta_slow) {
  3054. return ggml_rope_impl(
  3055. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3056. ext_factor, attn_factor, beta_fast, beta_slow, true
  3057. );
  3058. }
  3059. struct ggml_tensor * ggml_rope_custom(
  3060. struct ggml_context * ctx,
  3061. struct ggml_tensor * a,
  3062. struct ggml_tensor * b,
  3063. int n_dims,
  3064. int mode,
  3065. int n_ctx_orig,
  3066. float freq_base,
  3067. float freq_scale,
  3068. float ext_factor,
  3069. float attn_factor,
  3070. float beta_fast,
  3071. float beta_slow) {
  3072. return ggml_rope_impl(
  3073. ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3074. ext_factor, attn_factor, beta_fast, beta_slow, false
  3075. );
  3076. }
  3077. struct ggml_tensor * ggml_rope_custom_inplace(
  3078. struct ggml_context * ctx,
  3079. struct ggml_tensor * a,
  3080. struct ggml_tensor * b,
  3081. int n_dims,
  3082. int mode,
  3083. int n_ctx_orig,
  3084. float freq_base,
  3085. float freq_scale,
  3086. float ext_factor,
  3087. float attn_factor,
  3088. float beta_fast,
  3089. float beta_slow) {
  3090. return ggml_rope_impl(
  3091. ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3092. ext_factor, attn_factor, beta_fast, beta_slow, true
  3093. );
  3094. }
  3095. // Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
  3096. // `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
  3097. static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
  3098. return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
  3099. }
  3100. void ggml_rope_yarn_corr_dims(
  3101. int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
  3102. ) {
  3103. // start and end correction dims
  3104. float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
  3105. float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
  3106. dims[0] = MAX(0, start);
  3107. dims[1] = MIN(n_dims - 1, end);
  3108. }
  3109. // ggml_rope_back
  3110. struct ggml_tensor * ggml_rope_ext_back(
  3111. struct ggml_context * ctx,
  3112. struct ggml_tensor * a,
  3113. struct ggml_tensor * b,
  3114. struct ggml_tensor * c,
  3115. int n_dims,
  3116. int mode,
  3117. int n_ctx_orig,
  3118. float freq_base,
  3119. float freq_scale,
  3120. float ext_factor,
  3121. float attn_factor,
  3122. float beta_fast,
  3123. float beta_slow) {
  3124. struct ggml_tensor * result = ggml_rope_ext(
  3125. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
  3126. result->op = GGML_OP_ROPE_BACK;
  3127. return result;
  3128. }
  3129. struct ggml_tensor * ggml_rope_multi_back(
  3130. struct ggml_context * ctx,
  3131. struct ggml_tensor * a,
  3132. struct ggml_tensor * b,
  3133. struct ggml_tensor * c,
  3134. int n_dims,
  3135. int sections[4],
  3136. int mode,
  3137. int n_ctx_orig,
  3138. float freq_base,
  3139. float freq_scale,
  3140. float ext_factor,
  3141. float attn_factor,
  3142. float beta_fast,
  3143. float beta_slow) {
  3144. struct ggml_tensor * result = ggml_rope_multi(
  3145. ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
  3146. result->op = GGML_OP_ROPE_BACK;
  3147. return result;
  3148. }
  3149. // ggml_clamp
  3150. struct ggml_tensor * ggml_clamp(
  3151. struct ggml_context * ctx,
  3152. struct ggml_tensor * a,
  3153. float min,
  3154. float max) {
  3155. // TODO: when implement backward, fix this:
  3156. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  3157. float params[] = { min, max };
  3158. ggml_set_op_params(result, params, sizeof(params));
  3159. result->op = GGML_OP_CLAMP;
  3160. result->src[0] = a;
  3161. return result;
  3162. }
  3163. static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
  3164. return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
  3165. }
  3166. // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
  3167. // a: [OC,IC, KH, KW]
  3168. // b: [N, IC, IH, IW]
  3169. // result: [N, OH, OW, IC*KH*KW]
  3170. struct ggml_tensor * ggml_im2col(
  3171. struct ggml_context * ctx,
  3172. struct ggml_tensor * a,
  3173. struct ggml_tensor * b,
  3174. int s0,
  3175. int s1,
  3176. int p0,
  3177. int p1,
  3178. int d0,
  3179. int d1,
  3180. bool is_2D,
  3181. enum ggml_type dst_type) {
  3182. if (is_2D) {
  3183. GGML_ASSERT(a->ne[2] == b->ne[2]);
  3184. } else {
  3185. //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
  3186. GGML_ASSERT(b->ne[1] == a->ne[1]);
  3187. GGML_ASSERT(b->ne[3] == 1);
  3188. }
  3189. const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
  3190. const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
  3191. GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
  3192. GGML_ASSERT((OW > 0) && "b too small compared to a");
  3193. const int64_t ne[4] = {
  3194. is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
  3195. OW,
  3196. is_2D ? OH : b->ne[2],
  3197. is_2D ? b->ne[3] : 1,
  3198. };
  3199. struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
  3200. int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
  3201. ggml_set_op_params(result, params, sizeof(params));
  3202. result->op = GGML_OP_IM2COL;
  3203. result->src[0] = a;
  3204. result->src[1] = b;
  3205. return result;
  3206. }
  3207. struct ggml_tensor * ggml_im2col_back(
  3208. struct ggml_context * ctx,
  3209. struct ggml_tensor * a,
  3210. struct ggml_tensor * b,
  3211. int64_t * ne,
  3212. int s0,
  3213. int s1,
  3214. int p0,
  3215. int p1,
  3216. int d0,
  3217. int d1,
  3218. bool is_2D) {
  3219. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3220. int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
  3221. ggml_set_op_params(result, params, sizeof(params));
  3222. result->op = GGML_OP_IM2COL_BACK;
  3223. result->src[0] = a;
  3224. result->src[1] = b;
  3225. return result;
  3226. }
  3227. // ggml_conv_1d
  3228. struct ggml_tensor * ggml_conv_1d(
  3229. struct ggml_context * ctx,
  3230. struct ggml_tensor * a,
  3231. struct ggml_tensor * b,
  3232. int s0,
  3233. int p0,
  3234. int d0) {
  3235. struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
  3236. struct ggml_tensor * result =
  3237. ggml_mul_mat(ctx,
  3238. ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
  3239. ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2])); // [OC,IC, K] => [OC, IC * K]
  3240. result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
  3241. return result;
  3242. }
  3243. // ggml_conv_1d_ph
  3244. struct ggml_tensor* ggml_conv_1d_ph(
  3245. struct ggml_context * ctx,
  3246. struct ggml_tensor * a,
  3247. struct ggml_tensor * b,
  3248. int s,
  3249. int d) {
  3250. return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
  3251. }
  3252. // ggml_conv_1d_dw
  3253. struct ggml_tensor * ggml_conv_1d_dw(
  3254. struct ggml_context * ctx,
  3255. struct ggml_tensor * a,
  3256. struct ggml_tensor * b,
  3257. int s0,
  3258. int p0,
  3259. int d0) {
  3260. struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
  3261. struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
  3262. struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
  3263. struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
  3264. result = ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
  3265. return result;
  3266. }
  3267. // ggml_conv_1d_dw_ph
  3268. struct ggml_tensor * ggml_conv_1d_dw_ph(
  3269. struct ggml_context * ctx,
  3270. struct ggml_tensor * a,
  3271. struct ggml_tensor * b,
  3272. int s0,
  3273. int d0) {
  3274. return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
  3275. }
  3276. // ggml_conv_transpose_1d
  3277. static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
  3278. return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
  3279. }
  3280. GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
  3281. struct ggml_context * ctx,
  3282. struct ggml_tensor * a,
  3283. struct ggml_tensor * b,
  3284. int s0,
  3285. int p0,
  3286. int d0) {
  3287. GGML_ASSERT(ggml_is_matrix(b));
  3288. GGML_ASSERT(a->ne[2] == b->ne[1]);
  3289. GGML_ASSERT(a->ne[3] == 1);
  3290. GGML_ASSERT(p0 == 0);
  3291. GGML_ASSERT(d0 == 1);
  3292. const int64_t ne[4] = {
  3293. ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
  3294. a->ne[1], b->ne[2], 1,
  3295. };
  3296. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3297. int32_t params[] = { s0, p0, d0 };
  3298. ggml_set_op_params(result, params, sizeof(params));
  3299. result->op = GGML_OP_CONV_TRANSPOSE_1D;
  3300. result->src[0] = a;
  3301. result->src[1] = b;
  3302. return result;
  3303. }
  3304. // ggml_conv_2d
  3305. // a: [OC,IC, KH, KW]
  3306. // b: [N, IC, IH, IW]
  3307. // result: [N, OC, OH, OW]
  3308. struct ggml_tensor * ggml_conv_2d(
  3309. struct ggml_context * ctx,
  3310. struct ggml_tensor * a,
  3311. struct ggml_tensor * b,
  3312. int s0,
  3313. int s1,
  3314. int p0,
  3315. int p1,
  3316. int d0,
  3317. int d1) {
  3318. struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
  3319. struct ggml_tensor * result =
  3320. ggml_mul_mat(ctx,
  3321. ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
  3322. ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
  3323. result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
  3324. result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
  3325. return result;
  3326. }
  3327. // ggml_conv_2d_sk_p0
  3328. struct ggml_tensor * ggml_conv_2d_sk_p0(
  3329. struct ggml_context * ctx,
  3330. struct ggml_tensor * a,
  3331. struct ggml_tensor * b) {
  3332. return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
  3333. }
  3334. // ggml_conv_2d_s1_ph
  3335. struct ggml_tensor * ggml_conv_2d_s1_ph(
  3336. struct ggml_context * ctx,
  3337. struct ggml_tensor * a,
  3338. struct ggml_tensor * b) {
  3339. return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
  3340. }
  3341. // ggml_conv_2d_dw
  3342. struct ggml_tensor * ggml_conv_2d_dw(
  3343. struct ggml_context * ctx,
  3344. struct ggml_tensor * a,
  3345. struct ggml_tensor * b,
  3346. int s0,
  3347. int s1,
  3348. int p0,
  3349. int p1,
  3350. int d0,
  3351. int d1) {
  3352. struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
  3353. struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
  3354. ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
  3355. s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
  3356. struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
  3357. new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
  3358. struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
  3359. result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
  3360. return result;
  3361. }
  3362. // ggml_conv_2d_dw_direct
  3363. struct ggml_tensor * ggml_conv_2d_dw_direct(
  3364. struct ggml_context * ctx,
  3365. struct ggml_tensor * a,
  3366. struct ggml_tensor * b,
  3367. int stride0,
  3368. int stride1,
  3369. int pad0,
  3370. int pad1,
  3371. int dilation0,
  3372. int dilation1) {
  3373. GGML_ASSERT(a->ne[2] == 1);
  3374. GGML_ASSERT(a->ne[3] == b->ne[2]);
  3375. int64_t ne[4];
  3376. ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
  3377. ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
  3378. ne[2] = b->ne[2];
  3379. ne[3] = b->ne[3];
  3380. struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
  3381. if (ggml_is_contiguous_channels(b)) {
  3382. // Result will be permuted the same way as input (CWHN order)
  3383. const int64_t type_size = ggml_type_size(result->type);
  3384. GGML_ASSERT(ggml_blck_size(result->type) == 1);
  3385. result->nb[0] = result->ne[2] * type_size;
  3386. result->nb[1] = result->ne[0] * result->nb[0];
  3387. result->nb[2] = type_size;
  3388. }
  3389. int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
  3390. ggml_set_op_params(result, params, sizeof(params));
  3391. result->op = GGML_OP_CONV_2D_DW;
  3392. result->src[0] = a;
  3393. result->src[1] = b;
  3394. return result;
  3395. }
  3396. // ggml_conv_transpose_2d_p0
  3397. static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
  3398. return (ins - 1) * s - 2 * p + ks;
  3399. }
  3400. struct ggml_tensor * ggml_conv_transpose_2d_p0(
  3401. struct ggml_context * ctx,
  3402. struct ggml_tensor * a,
  3403. struct ggml_tensor * b,
  3404. int stride) {
  3405. GGML_ASSERT(a->ne[3] == b->ne[2]);
  3406. const int64_t ne[4] = {
  3407. ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
  3408. ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
  3409. a->ne[2], b->ne[3],
  3410. };
  3411. struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3412. ggml_set_op_params_i32(result, 0, stride);
  3413. result->op = GGML_OP_CONV_TRANSPOSE_2D;
  3414. result->src[0] = a;
  3415. result->src[1] = b;
  3416. return result;
  3417. }
  3418. // ggml_pool_*
  3419. static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
  3420. return (ins + 2 * p - ks) / s + 1;
  3421. }
  3422. // ggml_pool_1d
  3423. struct ggml_tensor * ggml_pool_1d(
  3424. struct ggml_context * ctx,
  3425. struct ggml_tensor * a,
  3426. enum ggml_op_pool op,
  3427. int k0,
  3428. int s0,
  3429. int p0) {
  3430. const int64_t ne[4] = {
  3431. ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
  3432. a->ne[1],
  3433. a->ne[2],
  3434. a->ne[3],
  3435. };
  3436. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3437. int32_t params[] = { op, k0, s0, p0 };
  3438. ggml_set_op_params(result, params, sizeof(params));
  3439. result->op = GGML_OP_POOL_1D;
  3440. result->src[0] = a;
  3441. return result;
  3442. }
  3443. // ggml_pool_2d
  3444. struct ggml_tensor * ggml_pool_2d(
  3445. struct ggml_context * ctx,
  3446. struct ggml_tensor * a,
  3447. enum ggml_op_pool op,
  3448. int k0,
  3449. int k1,
  3450. int s0,
  3451. int s1,
  3452. float p0,
  3453. float p1) {
  3454. struct ggml_tensor * result;
  3455. const int64_t ne[4] = {
  3456. ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
  3457. ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
  3458. a->ne[2],
  3459. a->ne[3],
  3460. };
  3461. result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3462. int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
  3463. ggml_set_op_params(result, params, sizeof(params));
  3464. result->op = GGML_OP_POOL_2D;
  3465. result->src[0] = a;
  3466. return result;
  3467. }
  3468. struct ggml_tensor * ggml_pool_2d_back(
  3469. struct ggml_context * ctx,
  3470. struct ggml_tensor * a,
  3471. struct ggml_tensor * af,
  3472. enum ggml_op_pool op,
  3473. int k0,
  3474. int k1,
  3475. int s0,
  3476. int s1,
  3477. float p0,
  3478. float p1) {
  3479. struct ggml_tensor * result;
  3480. result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
  3481. int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
  3482. ggml_set_op_params(result, params, sizeof(params));
  3483. result->op = GGML_OP_POOL_2D_BACK;
  3484. result->src[0] = a;
  3485. result->src[1] = af;
  3486. return result;
  3487. }
  3488. // ggml_upscale
  3489. static struct ggml_tensor * ggml_upscale_impl(
  3490. struct ggml_context * ctx,
  3491. struct ggml_tensor * a,
  3492. int ne0,
  3493. int ne1,
  3494. int ne2,
  3495. int ne3,
  3496. enum ggml_scale_mode mode) {
  3497. GGML_ASSERT(a->ne[0] <= ne0);
  3498. GGML_ASSERT(a->ne[1] <= ne1);
  3499. GGML_ASSERT(a->ne[2] <= ne2);
  3500. GGML_ASSERT(a->ne[3] <= ne3);
  3501. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  3502. ggml_set_op_params_i32(result, 0, mode);
  3503. result->op = GGML_OP_UPSCALE;
  3504. result->src[0] = a;
  3505. return result;
  3506. }
  3507. struct ggml_tensor * ggml_upscale(
  3508. struct ggml_context * ctx,
  3509. struct ggml_tensor * a,
  3510. int scale_factor,
  3511. enum ggml_scale_mode mode) {
  3512. return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
  3513. }
  3514. struct ggml_tensor * ggml_upscale_ext(
  3515. struct ggml_context * ctx,
  3516. struct ggml_tensor * a,
  3517. int ne0,
  3518. int ne1,
  3519. int ne2,
  3520. int ne3,
  3521. enum ggml_scale_mode mode) {
  3522. return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
  3523. }
  3524. // ggml_pad
  3525. struct ggml_tensor * ggml_pad(
  3526. struct ggml_context * ctx,
  3527. struct ggml_tensor * a,
  3528. int p0,
  3529. int p1,
  3530. int p2,
  3531. int p3) {
  3532. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
  3533. a->ne[0] + p0,
  3534. a->ne[1] + p1,
  3535. a->ne[2] + p2,
  3536. a->ne[3] + p3);
  3537. result->op = GGML_OP_PAD;
  3538. result->src[0] = a;
  3539. return result;
  3540. }
  3541. // ggml_pad_reflect_1d
  3542. struct ggml_tensor * ggml_pad_reflect_1d(
  3543. struct ggml_context * ctx,
  3544. struct ggml_tensor * a,
  3545. int p0,
  3546. int p1) {
  3547. GGML_ASSERT(p0 >= 0);
  3548. GGML_ASSERT(p1 >= 0);
  3549. GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
  3550. GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
  3551. GGML_ASSERT(ggml_is_contiguous(a));
  3552. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3553. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
  3554. a->ne[0] + p0 + p1,
  3555. a->ne[1],
  3556. a->ne[2],
  3557. a->ne[3]);
  3558. int32_t params[] = { p0, p1 };
  3559. ggml_set_op_params(result, params, sizeof(params));
  3560. result->op = GGML_OP_PAD_REFLECT_1D;
  3561. result->src[0] = a;
  3562. return result;
  3563. }
  3564. // ggml_arange
  3565. struct ggml_tensor * ggml_arange(
  3566. struct ggml_context * ctx,
  3567. float start,
  3568. float stop,
  3569. float step) {
  3570. GGML_ASSERT(stop > start);
  3571. const int64_t steps = (int64_t) ceilf((stop - start) / step);
  3572. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
  3573. ggml_set_op_params_f32(result, 0, start);
  3574. ggml_set_op_params_f32(result, 1, stop);
  3575. ggml_set_op_params_f32(result, 2, step);
  3576. result->op = GGML_OP_ARANGE;
  3577. return result;
  3578. }
  3579. // ggml_timestep_embedding
  3580. struct ggml_tensor * ggml_timestep_embedding(
  3581. struct ggml_context * ctx,
  3582. struct ggml_tensor * timesteps,
  3583. int dim,
  3584. int max_period) {
  3585. int actual_dim = dim;
  3586. if (dim % 2 != 0) {
  3587. actual_dim = dim + 1;
  3588. }
  3589. struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
  3590. ggml_set_op_params_i32(result, 0, dim);
  3591. ggml_set_op_params_i32(result, 1, max_period);
  3592. result->op = GGML_OP_TIMESTEP_EMBEDDING;
  3593. result->src[0] = timesteps;
  3594. return result;
  3595. }
  3596. // ggml_argsort
  3597. struct ggml_tensor * ggml_argsort(
  3598. struct ggml_context * ctx,
  3599. struct ggml_tensor * a,
  3600. enum ggml_sort_order order) {
  3601. GGML_ASSERT(a->ne[0] <= INT32_MAX);
  3602. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
  3603. ggml_set_op_params_i32(result, 0, (int32_t) order);
  3604. result->op = GGML_OP_ARGSORT;
  3605. result->src[0] = a;
  3606. return result;
  3607. }
  3608. // ggml_top_k
  3609. struct ggml_tensor * ggml_top_k(
  3610. struct ggml_context * ctx,
  3611. struct ggml_tensor * a,
  3612. int k) {
  3613. GGML_ASSERT(a->ne[0] >= k);
  3614. struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
  3615. result = ggml_view_4d(ctx, result,
  3616. k, result->ne[1], result->ne[2], result->ne[3],
  3617. result->nb[1], result->nb[2], result->nb[3],
  3618. 0);
  3619. return result;
  3620. }
  3621. // ggml_flash_attn_ext
  3622. struct ggml_tensor * ggml_flash_attn_ext(
  3623. struct ggml_context * ctx,
  3624. struct ggml_tensor * q,
  3625. struct ggml_tensor * k,
  3626. struct ggml_tensor * v,
  3627. struct ggml_tensor * mask,
  3628. float scale,
  3629. float max_bias,
  3630. float logit_softcap) {
  3631. GGML_ASSERT(ggml_can_mul_mat(k, q));
  3632. // TODO: check if vT can be multiplied by (k*qT)
  3633. if (mask) {
  3634. GGML_ASSERT(ggml_is_contiguous(mask));
  3635. GGML_ASSERT(mask->ne[2] == 1);
  3636. GGML_ASSERT(mask->ne[3] == 1);
  3637. GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
  3638. "the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
  3639. //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
  3640. }
  3641. if (max_bias > 0.0f) {
  3642. GGML_ASSERT(mask);
  3643. }
  3644. // permute(0, 2, 1, 3)
  3645. int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
  3646. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3647. float params[] = { scale, max_bias, logit_softcap };
  3648. ggml_set_op_params(result, params, sizeof(params));
  3649. result->op = GGML_OP_FLASH_ATTN_EXT;
  3650. result->src[0] = q;
  3651. result->src[1] = k;
  3652. result->src[2] = v;
  3653. result->src[3] = mask;
  3654. return result;
  3655. }
  3656. void ggml_flash_attn_ext_set_prec(
  3657. struct ggml_tensor * a,
  3658. enum ggml_prec prec) {
  3659. GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
  3660. const int32_t prec_i32 = (int32_t) prec;
  3661. ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
  3662. }
  3663. enum ggml_prec ggml_flash_attn_ext_get_prec(
  3664. const struct ggml_tensor * a) {
  3665. GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
  3666. const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
  3667. return (enum ggml_prec) prec_i32;
  3668. }
  3669. // ggml_flash_attn_back
  3670. struct ggml_tensor * ggml_flash_attn_back(
  3671. struct ggml_context * ctx,
  3672. struct ggml_tensor * q,
  3673. struct ggml_tensor * k,
  3674. struct ggml_tensor * v,
  3675. struct ggml_tensor * d,
  3676. bool masked) {
  3677. GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
  3678. GGML_ASSERT(ggml_can_mul_mat(k, q));
  3679. // TODO: check if vT can be multiplied by (k*qT)
  3680. // d shape [D,N,ne2,ne3]
  3681. // q shape [D,N,ne2,ne3]
  3682. // k shape [D,M,kvne2,ne3]
  3683. // v shape [M,D,kvne2,ne3]
  3684. const int64_t D = q->ne[0];
  3685. const int64_t N = q->ne[1];
  3686. const int64_t M = k->ne[1];
  3687. const int64_t ne2 = q->ne[2];
  3688. const int64_t ne3 = q->ne[3];
  3689. const int64_t kvne2 = k->ne[2];
  3690. GGML_ASSERT(k->ne[0] == D);
  3691. GGML_ASSERT(v->ne[0] == M);
  3692. GGML_ASSERT(v->ne[1] == D);
  3693. GGML_ASSERT(d->ne[0] == D);
  3694. GGML_ASSERT(d->ne[1] == N);
  3695. GGML_ASSERT(k->ne[2] == kvne2);
  3696. GGML_ASSERT(k->ne[3] == ne3);
  3697. GGML_ASSERT(v->ne[2] == kvne2);
  3698. GGML_ASSERT(v->ne[3] == ne3);
  3699. GGML_ASSERT(d->ne[2] == ne2);
  3700. GGML_ASSERT(d->ne[3] == ne3);
  3701. GGML_ASSERT(ne2 % kvne2 == 0);
  3702. // store gradients of q, k and v as continuous tensors concatenated in result.
  3703. // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
  3704. const int64_t elem_q = ggml_nelements(q);
  3705. const int64_t elem_k = ggml_nelements(k);
  3706. const int64_t elem_v = ggml_nelements(v);
  3707. enum ggml_type result_type = GGML_TYPE_F32;
  3708. GGML_ASSERT(ggml_blck_size(result_type) == 1);
  3709. const size_t tsize = ggml_type_size(result_type);
  3710. const size_t offs_q = 0;
  3711. const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
  3712. const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
  3713. const size_t end = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
  3714. const size_t nelements = (end + tsize - 1)/tsize;
  3715. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
  3716. int32_t masked_i = masked ? 1 : 0;
  3717. ggml_set_op_params(result, &masked_i, sizeof(masked_i));
  3718. result->op = GGML_OP_FLASH_ATTN_BACK;
  3719. result->src[0] = q;
  3720. result->src[1] = k;
  3721. result->src[2] = v;
  3722. result->src[3] = d;
  3723. return result;
  3724. }
  3725. // ggml_ssm_conv
  3726. struct ggml_tensor * ggml_ssm_conv(
  3727. struct ggml_context * ctx,
  3728. struct ggml_tensor * sx,
  3729. struct ggml_tensor * c) {
  3730. GGML_ASSERT(ggml_is_3d(sx));
  3731. GGML_ASSERT(ggml_is_matrix(c));
  3732. const int64_t d_conv = c->ne[0];
  3733. const int64_t d_inner = c->ne[1];
  3734. const int64_t n_t = sx->ne[0] - d_conv + 1; // tokens per sequence
  3735. const int64_t n_s = sx->ne[2];
  3736. // TODO: maybe support other strides than 1?
  3737. // FIXME: this is always true?
  3738. GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
  3739. GGML_ASSERT(sx->ne[1] == d_inner);
  3740. GGML_ASSERT(n_t >= 0);
  3741. struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
  3742. result->op = GGML_OP_SSM_CONV;
  3743. result->src[0] = sx;
  3744. result->src[1] = c;
  3745. return result;
  3746. }
  3747. // ggml_ssm_scan
  3748. struct ggml_tensor * ggml_ssm_scan(
  3749. struct ggml_context * ctx,
  3750. struct ggml_tensor * s,
  3751. struct ggml_tensor * x,
  3752. struct ggml_tensor * dt,
  3753. struct ggml_tensor * A,
  3754. struct ggml_tensor * B,
  3755. struct ggml_tensor * C) {
  3756. GGML_ASSERT(ggml_is_contiguous(s));
  3757. GGML_ASSERT(ggml_is_contiguous(x));
  3758. GGML_ASSERT(ggml_is_contiguous(dt));
  3759. GGML_ASSERT(ggml_is_contiguous(A));
  3760. GGML_ASSERT(ggml_is_matrix(A));
  3761. GGML_ASSERT(ggml_is_3d(B));
  3762. GGML_ASSERT(ggml_is_3d(s));
  3763. GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
  3764. GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
  3765. GGML_ASSERT(ggml_are_same_shape(x, dt));
  3766. GGML_ASSERT(ggml_are_same_shape(B, C));
  3767. {
  3768. const int64_t d_state = s->ne[0];
  3769. const int64_t d_inner = s->ne[1];
  3770. const int64_t n_seq_tokens = x->ne[1];
  3771. const int64_t n_seqs = x->ne[2];
  3772. GGML_ASSERT(s->ne[2] == n_seqs);
  3773. GGML_ASSERT(x->ne[0] == d_inner);
  3774. GGML_ASSERT(A->ne[0] == d_state);
  3775. GGML_ASSERT(A->ne[1] == d_inner);
  3776. GGML_ASSERT(B->ne[0] == d_state);
  3777. GGML_ASSERT(B->ne[1] == n_seq_tokens);
  3778. GGML_ASSERT(B->ne[2] == n_seqs);
  3779. }
  3780. // concatenated y + ssm_states
  3781. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + ggml_nelements(s));
  3782. result->op = GGML_OP_SSM_SCAN;
  3783. result->src[0] = s;
  3784. result->src[1] = x;
  3785. result->src[2] = dt;
  3786. result->src[3] = A;
  3787. result->src[4] = B;
  3788. result->src[5] = C;
  3789. return result;
  3790. }
  3791. // ggml_win_part
  3792. struct ggml_tensor * ggml_win_part(
  3793. struct ggml_context * ctx,
  3794. struct ggml_tensor * a,
  3795. int w) {
  3796. GGML_ASSERT(a->ne[3] == 1);
  3797. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3798. // padding
  3799. const int px = (w - a->ne[1]%w)%w;
  3800. const int py = (w - a->ne[2]%w)%w;
  3801. const int npx = (px + a->ne[1])/w;
  3802. const int npy = (py + a->ne[2])/w;
  3803. const int np = npx*npy;
  3804. const int64_t ne[4] = { a->ne[0], w, w, np, };
  3805. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3806. int32_t params[] = { npx, npy, w };
  3807. ggml_set_op_params(result, params, sizeof(params));
  3808. result->op = GGML_OP_WIN_PART;
  3809. result->src[0] = a;
  3810. return result;
  3811. }
  3812. // ggml_win_unpart
  3813. struct ggml_tensor * ggml_win_unpart(
  3814. struct ggml_context * ctx,
  3815. struct ggml_tensor * a,
  3816. int w0,
  3817. int h0,
  3818. int w) {
  3819. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3820. const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
  3821. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
  3822. int32_t params[] = { w };
  3823. ggml_set_op_params(result, params, sizeof(params));
  3824. result->op = GGML_OP_WIN_UNPART;
  3825. result->src[0] = a;
  3826. return result;
  3827. }
  3828. // ggml_get_rel_pos
  3829. struct ggml_tensor * ggml_get_rel_pos(
  3830. struct ggml_context * ctx,
  3831. struct ggml_tensor * a,
  3832. int qh,
  3833. int kh) {
  3834. GGML_ASSERT(qh == kh);
  3835. GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
  3836. const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
  3837. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
  3838. result->op = GGML_OP_GET_REL_POS;
  3839. result->src[0] = a;
  3840. return result;
  3841. }
  3842. // ggml_add_rel_pos
  3843. static struct ggml_tensor * ggml_add_rel_pos_impl(
  3844. struct ggml_context * ctx,
  3845. struct ggml_tensor * a,
  3846. struct ggml_tensor * pw,
  3847. struct ggml_tensor * ph,
  3848. bool inplace) {
  3849. GGML_ASSERT(ggml_are_same_shape(pw, ph));
  3850. GGML_ASSERT(ggml_is_contiguous(a));
  3851. GGML_ASSERT(ggml_is_contiguous(pw));
  3852. GGML_ASSERT(ggml_is_contiguous(ph));
  3853. GGML_ASSERT(ph->type == GGML_TYPE_F32);
  3854. GGML_ASSERT(pw->type == GGML_TYPE_F32);
  3855. GGML_ASSERT(pw->ne[3] == a->ne[2]);
  3856. GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
  3857. GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
  3858. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3859. ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
  3860. result->op = GGML_OP_ADD_REL_POS;
  3861. result->src[0] = a;
  3862. result->src[1] = pw;
  3863. result->src[2] = ph;
  3864. return result;
  3865. }
  3866. struct ggml_tensor * ggml_add_rel_pos(
  3867. struct ggml_context * ctx,
  3868. struct ggml_tensor * a,
  3869. struct ggml_tensor * pw,
  3870. struct ggml_tensor * ph) {
  3871. return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
  3872. }
  3873. struct ggml_tensor * ggml_add_rel_pos_inplace(
  3874. struct ggml_context * ctx,
  3875. struct ggml_tensor * a,
  3876. struct ggml_tensor * pw,
  3877. struct ggml_tensor * ph) {
  3878. return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
  3879. }
  3880. // ggml_rwkv_wkv6
  3881. struct ggml_tensor * ggml_rwkv_wkv6(
  3882. struct ggml_context * ctx,
  3883. struct ggml_tensor * k,
  3884. struct ggml_tensor * v,
  3885. struct ggml_tensor * r,
  3886. struct ggml_tensor * tf,
  3887. struct ggml_tensor * td,
  3888. struct ggml_tensor * state) {
  3889. GGML_ASSERT(ggml_is_contiguous(k));
  3890. GGML_ASSERT(ggml_is_contiguous(v));
  3891. GGML_ASSERT(ggml_is_contiguous(r));
  3892. GGML_ASSERT(ggml_is_contiguous(tf));
  3893. GGML_ASSERT(ggml_is_contiguous(td));
  3894. GGML_ASSERT(ggml_is_contiguous(state));
  3895. const int64_t S = k->ne[0];
  3896. const int64_t H = k->ne[1];
  3897. const int64_t n_tokens = k->ne[2];
  3898. const int64_t n_seqs = state->ne[1];
  3899. {
  3900. GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
  3901. GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
  3902. GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
  3903. GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
  3904. }
  3905. // concat output and new_state
  3906. const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
  3907. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3908. result->op = GGML_OP_RWKV_WKV6;
  3909. result->src[0] = k;
  3910. result->src[1] = v;
  3911. result->src[2] = r;
  3912. result->src[3] = tf;
  3913. result->src[4] = td;
  3914. result->src[5] = state;
  3915. return result;
  3916. }
  3917. // ggml_gated_linear_attn
  3918. struct ggml_tensor * ggml_gated_linear_attn(
  3919. struct ggml_context * ctx,
  3920. struct ggml_tensor * k,
  3921. struct ggml_tensor * v,
  3922. struct ggml_tensor * q,
  3923. struct ggml_tensor * g,
  3924. struct ggml_tensor * state,
  3925. float scale) {
  3926. GGML_ASSERT(ggml_is_contiguous(k));
  3927. GGML_ASSERT(ggml_is_contiguous(v));
  3928. GGML_ASSERT(ggml_is_contiguous(q));
  3929. GGML_ASSERT(ggml_is_contiguous(g));
  3930. GGML_ASSERT(ggml_is_contiguous(state));
  3931. const int64_t S = k->ne[0];
  3932. const int64_t H = k->ne[1];
  3933. const int64_t n_tokens = k->ne[2];
  3934. const int64_t n_seqs = state->ne[1];
  3935. {
  3936. GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
  3937. GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
  3938. GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
  3939. GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
  3940. }
  3941. // concat output and new_state
  3942. const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
  3943. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3944. ggml_set_op_params_f32(result, 0, scale);
  3945. result->op = GGML_OP_GATED_LINEAR_ATTN;
  3946. result->src[0] = k;
  3947. result->src[1] = v;
  3948. result->src[2] = q;
  3949. result->src[3] = g;
  3950. result->src[4] = state;
  3951. return result;
  3952. }
  3953. // ggml_rwkv_wkv7
  3954. struct ggml_tensor * ggml_rwkv_wkv7(
  3955. struct ggml_context * ctx,
  3956. struct ggml_tensor * r,
  3957. struct ggml_tensor * w,
  3958. struct ggml_tensor * k,
  3959. struct ggml_tensor * v,
  3960. struct ggml_tensor * a,
  3961. struct ggml_tensor * b,
  3962. struct ggml_tensor * state) {
  3963. GGML_ASSERT(ggml_is_contiguous(r));
  3964. GGML_ASSERT(ggml_is_contiguous(w));
  3965. GGML_ASSERT(ggml_is_contiguous(k));
  3966. GGML_ASSERT(ggml_is_contiguous(v));
  3967. GGML_ASSERT(ggml_is_contiguous(a));
  3968. GGML_ASSERT(ggml_is_contiguous(b));
  3969. GGML_ASSERT(ggml_is_contiguous(state));
  3970. const int64_t S = k->ne[0];
  3971. const int64_t H = k->ne[1];
  3972. const int64_t n_tokens = k->ne[2];
  3973. const int64_t n_seqs = state->ne[1];
  3974. {
  3975. GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
  3976. GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
  3977. GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
  3978. GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
  3979. GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
  3980. GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
  3981. }
  3982. // concat output and new_state
  3983. const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
  3984. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3985. result->op = GGML_OP_RWKV_WKV7;
  3986. result->src[0] = r;
  3987. result->src[1] = w;
  3988. result->src[2] = k;
  3989. result->src[3] = v;
  3990. result->src[4] = a;
  3991. result->src[5] = b;
  3992. result->src[6] = state;
  3993. return result;
  3994. }
  3995. // ggml_unary
  3996. static struct ggml_tensor * ggml_unary_impl(
  3997. struct ggml_context * ctx,
  3998. struct ggml_tensor * a,
  3999. enum ggml_unary_op op,
  4000. bool inplace) {
  4001. GGML_ASSERT(ggml_is_contiguous_1(a));
  4002. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4003. ggml_set_op_params_i32(result, 0, (int32_t) op);
  4004. result->op = GGML_OP_UNARY;
  4005. result->src[0] = a;
  4006. return result;
  4007. }
  4008. struct ggml_tensor * ggml_unary(
  4009. struct ggml_context * ctx,
  4010. struct ggml_tensor * a,
  4011. enum ggml_unary_op op) {
  4012. return ggml_unary_impl(ctx, a, op, false);
  4013. }
  4014. struct ggml_tensor * ggml_unary_inplace(
  4015. struct ggml_context * ctx,
  4016. struct ggml_tensor * a,
  4017. enum ggml_unary_op op) {
  4018. return ggml_unary_impl(ctx, a, op, true);
  4019. }
  4020. // ggml_map_custom1
  4021. static struct ggml_tensor * ggml_map_custom1_impl(
  4022. struct ggml_context * ctx,
  4023. struct ggml_tensor * a,
  4024. const ggml_custom1_op_t fun,
  4025. int n_tasks,
  4026. void * userdata,
  4027. bool inplace) {
  4028. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  4029. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4030. struct ggml_map_custom1_op_params params = {
  4031. /*.fun =*/ fun,
  4032. /*.n_tasks =*/ n_tasks,
  4033. /*.userdata =*/ userdata
  4034. };
  4035. ggml_set_op_params(result, &params, sizeof(params));
  4036. result->op = GGML_OP_MAP_CUSTOM1;
  4037. result->src[0] = a;
  4038. return result;
  4039. }
  4040. struct ggml_tensor * ggml_map_custom1(
  4041. struct ggml_context * ctx,
  4042. struct ggml_tensor * a,
  4043. const ggml_custom1_op_t fun,
  4044. int n_tasks,
  4045. void * userdata) {
  4046. return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
  4047. }
  4048. struct ggml_tensor * ggml_map_custom1_inplace(
  4049. struct ggml_context * ctx,
  4050. struct ggml_tensor * a,
  4051. const ggml_custom1_op_t fun,
  4052. int n_tasks,
  4053. void * userdata) {
  4054. return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
  4055. }
  4056. // ggml_map_custom2
  4057. static struct ggml_tensor * ggml_map_custom2_impl(
  4058. struct ggml_context * ctx,
  4059. struct ggml_tensor * a,
  4060. struct ggml_tensor * b,
  4061. const ggml_custom2_op_t fun,
  4062. int n_tasks,
  4063. void * userdata,
  4064. bool inplace) {
  4065. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  4066. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4067. struct ggml_map_custom2_op_params params = {
  4068. /*.fun =*/ fun,
  4069. /*.n_tasks =*/ n_tasks,
  4070. /*.userdata =*/ userdata
  4071. };
  4072. ggml_set_op_params(result, &params, sizeof(params));
  4073. result->op = GGML_OP_MAP_CUSTOM2;
  4074. result->src[0] = a;
  4075. result->src[1] = b;
  4076. return result;
  4077. }
  4078. struct ggml_tensor * ggml_map_custom2(
  4079. struct ggml_context * ctx,
  4080. struct ggml_tensor * a,
  4081. struct ggml_tensor * b,
  4082. const ggml_custom2_op_t fun,
  4083. int n_tasks,
  4084. void * userdata) {
  4085. return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
  4086. }
  4087. struct ggml_tensor * ggml_map_custom2_inplace(
  4088. struct ggml_context * ctx,
  4089. struct ggml_tensor * a,
  4090. struct ggml_tensor * b,
  4091. const ggml_custom2_op_t fun,
  4092. int n_tasks,
  4093. void * userdata) {
  4094. return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
  4095. }
  4096. // ggml_map_custom3
  4097. static struct ggml_tensor * ggml_map_custom3_impl(
  4098. struct ggml_context * ctx,
  4099. struct ggml_tensor * a,
  4100. struct ggml_tensor * b,
  4101. struct ggml_tensor * c,
  4102. const ggml_custom3_op_t fun,
  4103. int n_tasks,
  4104. void * userdata,
  4105. bool inplace) {
  4106. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  4107. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4108. struct ggml_map_custom3_op_params params = {
  4109. /*.fun =*/ fun,
  4110. /*.n_tasks =*/ n_tasks,
  4111. /*.userdata =*/ userdata
  4112. };
  4113. ggml_set_op_params(result, &params, sizeof(params));
  4114. result->op = GGML_OP_MAP_CUSTOM3;
  4115. result->src[0] = a;
  4116. result->src[1] = b;
  4117. result->src[2] = c;
  4118. return result;
  4119. }
  4120. struct ggml_tensor * ggml_map_custom3(
  4121. struct ggml_context * ctx,
  4122. struct ggml_tensor * a,
  4123. struct ggml_tensor * b,
  4124. struct ggml_tensor * c,
  4125. const ggml_custom3_op_t fun,
  4126. int n_tasks,
  4127. void * userdata) {
  4128. return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
  4129. }
  4130. struct ggml_tensor * ggml_map_custom3_inplace(
  4131. struct ggml_context * ctx,
  4132. struct ggml_tensor * a,
  4133. struct ggml_tensor * b,
  4134. struct ggml_tensor * c,
  4135. const ggml_custom3_op_t fun,
  4136. int n_tasks,
  4137. void * userdata) {
  4138. return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
  4139. }
  4140. struct ggml_tensor * ggml_custom_4d(
  4141. struct ggml_context * ctx,
  4142. enum ggml_type type,
  4143. int64_t ne0,
  4144. int64_t ne1,
  4145. int64_t ne2,
  4146. int64_t ne3,
  4147. struct ggml_tensor ** args,
  4148. int n_args,
  4149. ggml_custom_op_t fun,
  4150. int n_tasks,
  4151. void * userdata) {
  4152. GGML_ASSERT(n_args < GGML_MAX_SRC);
  4153. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
  4154. struct ggml_custom_op_params params = {
  4155. /*.fun =*/ fun,
  4156. /*.n_tasks =*/ n_tasks,
  4157. /*.userdata =*/ userdata
  4158. };
  4159. ggml_set_op_params(result, &params, sizeof(params));
  4160. result->op = GGML_OP_CUSTOM;
  4161. for (int i = 0; i < n_args; i++) {
  4162. result->src[i] = args[i];
  4163. }
  4164. return result;
  4165. }
  4166. struct ggml_tensor * ggml_custom_inplace(
  4167. struct ggml_context * ctx,
  4168. struct ggml_tensor * a,
  4169. struct ggml_tensor ** args,
  4170. int n_args,
  4171. ggml_custom_op_t fun,
  4172. int n_tasks,
  4173. void * userdata) {
  4174. GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
  4175. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  4176. struct ggml_custom_op_params params = {
  4177. /*.fun =*/ fun,
  4178. /*.n_tasks =*/ n_tasks,
  4179. /*.userdata =*/ userdata
  4180. };
  4181. ggml_set_op_params(result, &params, sizeof(params));
  4182. result->op = GGML_OP_CUSTOM;
  4183. result->src[0] = a;
  4184. for (int i = 0; i < n_args; i++) {
  4185. result->src[i + 1] = args[i];
  4186. }
  4187. return result;
  4188. }
  4189. // ggml_cross_entropy_loss
  4190. struct ggml_tensor * ggml_cross_entropy_loss(
  4191. struct ggml_context * ctx,
  4192. struct ggml_tensor * a,
  4193. struct ggml_tensor * b) {
  4194. GGML_ASSERT(ggml_are_same_shape(a, b));
  4195. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
  4196. result->op = GGML_OP_CROSS_ENTROPY_LOSS;
  4197. result->src[0] = a;
  4198. result->src[1] = b;
  4199. return result;
  4200. }
  4201. // ggml_cross_entropy_loss_back
  4202. struct ggml_tensor * ggml_cross_entropy_loss_back(
  4203. struct ggml_context * ctx,
  4204. struct ggml_tensor * a,
  4205. struct ggml_tensor * b,
  4206. struct ggml_tensor * c) {
  4207. GGML_ASSERT(ggml_is_scalar(a));
  4208. GGML_ASSERT(ggml_are_same_shape(b, c));
  4209. struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
  4210. result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
  4211. result->src[0] = a;
  4212. result->src[1] = b;
  4213. result->src[2] = c;
  4214. return result;
  4215. }
  4216. // opt_step_adamw
  4217. struct ggml_tensor * ggml_opt_step_adamw(
  4218. struct ggml_context * ctx,
  4219. struct ggml_tensor * a,
  4220. struct ggml_tensor * grad,
  4221. struct ggml_tensor * m,
  4222. struct ggml_tensor * v,
  4223. struct ggml_tensor * adamw_params) {
  4224. GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
  4225. GGML_ASSERT(ggml_are_same_shape(a, grad));
  4226. GGML_ASSERT(ggml_are_same_shape(a, m));
  4227. GGML_ASSERT(ggml_are_same_shape(a, v));
  4228. GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
  4229. GGML_ASSERT(ggml_nelements(adamw_params) == 7);
  4230. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  4231. result->op = GGML_OP_OPT_STEP_ADAMW;
  4232. result->src[0] = a;
  4233. result->src[1] = grad;
  4234. result->src[2] = m;
  4235. result->src[3] = v;
  4236. result->src[4] = adamw_params;
  4237. return result;
  4238. }
  4239. ////////////////////////////////////////////////////////////////////////////////
  4240. struct ggml_hash_set ggml_hash_set_new(size_t size) {
  4241. size = ggml_hash_size(size);
  4242. struct ggml_hash_set result;
  4243. result.size = size;
  4244. result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
  4245. result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
  4246. return result;
  4247. }
  4248. void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
  4249. memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
  4250. }
  4251. void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
  4252. GGML_FREE(hash_set->used);
  4253. GGML_FREE(hash_set->keys);
  4254. }
  4255. size_t ggml_hash_size(size_t min_sz) {
  4256. // next primes after powers of two
  4257. static const size_t primes[] = {
  4258. 2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
  4259. 2053, 4099, 8209, 16411, 32771, 65537, 131101,
  4260. 262147, 524309, 1048583, 2097169, 4194319, 8388617,
  4261. 16777259, 33554467, 67108879, 134217757, 268435459,
  4262. 536870923, 1073741827, 2147483659
  4263. };
  4264. static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
  4265. // find the smallest prime that is larger or equal than min_sz
  4266. size_t l = 0;
  4267. size_t r = n_primes;
  4268. while (l < r) {
  4269. size_t m = (l + r)/2;
  4270. if (primes[m] < min_sz) {
  4271. l = m + 1;
  4272. } else {
  4273. r = m;
  4274. }
  4275. }
  4276. size_t sz = l < n_primes ? primes[l] : min_sz | 1;
  4277. return sz;
  4278. }
  4279. struct hash_map {
  4280. struct ggml_hash_set set;
  4281. struct ggml_tensor ** vals;
  4282. };
  4283. static struct hash_map * ggml_new_hash_map(size_t size) {
  4284. struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
  4285. result->set = ggml_hash_set_new(size);
  4286. result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
  4287. return result;
  4288. }
  4289. static void ggml_hash_map_free(struct hash_map * map) {
  4290. ggml_hash_set_free(&map->set);
  4291. GGML_FREE(map->vals);
  4292. GGML_FREE(map);
  4293. }
  4294. // utility functions to change gradients
  4295. // isrc is the index of tensor in cgraph->visited_has_set.keys
  4296. // the corresponding gradient (accumulators) are also at position isrc
  4297. // if tensor has a gradient accumulator, modify that accumulator in-place
  4298. // else if there is no gradient for tensor, set the corresponding value
  4299. // else, just add/subtract/etc. the gradients
  4300. static void ggml_add_or_set(
  4301. struct ggml_context * ctx,
  4302. struct ggml_cgraph * cgraph,
  4303. size_t isrc,
  4304. struct ggml_tensor * tensor) {
  4305. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4306. GGML_ASSERT(src);
  4307. if (cgraph->grads[isrc]) {
  4308. cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
  4309. } else {
  4310. cgraph->grads[isrc] = tensor;
  4311. }
  4312. ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
  4313. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4314. }
  4315. static void ggml_acc_or_set(
  4316. struct ggml_context * ctx,
  4317. struct ggml_cgraph * cgraph,
  4318. size_t isrc,
  4319. struct ggml_tensor * tensor,
  4320. const size_t nb1,
  4321. const size_t nb2,
  4322. const size_t nb3,
  4323. const size_t offset) {
  4324. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4325. GGML_ASSERT(src);
  4326. if (cgraph->grads[isrc]) {
  4327. cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
  4328. } else {
  4329. struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
  4330. cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
  4331. }
  4332. ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
  4333. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4334. }
  4335. static void ggml_add1_or_set(
  4336. struct ggml_context * ctx,
  4337. struct ggml_cgraph * cgraph,
  4338. size_t isrc,
  4339. struct ggml_tensor * tensor) {
  4340. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4341. GGML_ASSERT(src);
  4342. if (cgraph->grads[isrc]) {
  4343. cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
  4344. } else {
  4345. cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
  4346. }
  4347. ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
  4348. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4349. }
  4350. static void ggml_sub_or_set(
  4351. struct ggml_context * ctx,
  4352. struct ggml_cgraph * cgraph,
  4353. size_t isrc,
  4354. struct ggml_tensor * tensor) {
  4355. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4356. GGML_ASSERT(src);
  4357. if (cgraph->grads[isrc]) {
  4358. cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
  4359. } else {
  4360. cgraph->grads[isrc] = ggml_neg(ctx, tensor);
  4361. }
  4362. ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
  4363. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4364. }
  4365. static void ggml_compute_backward(
  4366. struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
  4367. struct ggml_tensor * tensor = cgraph->nodes[i];
  4368. struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, tensor);
  4369. if (!grad) {
  4370. return;
  4371. }
  4372. struct ggml_tensor * src0 = tensor->src[0];
  4373. struct ggml_tensor * src1 = tensor->src[1];
  4374. struct ggml_tensor * src2 = tensor->src[2];
  4375. struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
  4376. const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
  4377. const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
  4378. const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
  4379. const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
  4380. const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
  4381. const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
  4382. switch (tensor->op) {
  4383. case GGML_OP_DUP: {
  4384. if (src0_needs_grads) {
  4385. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4386. }
  4387. } break;
  4388. case GGML_OP_ADD: {
  4389. if (src0_needs_grads) {
  4390. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4391. }
  4392. if (src1_needs_grads) {
  4393. struct ggml_tensor * tmp = grad;
  4394. if (!ggml_are_same_shape(src0, src1)) {
  4395. tmp = ggml_repeat_back(ctx, tmp, src1);
  4396. }
  4397. ggml_add_or_set(ctx, cgraph, isrc1, tmp);
  4398. }
  4399. } break;
  4400. case GGML_OP_ADD1: {
  4401. if (src0_needs_grads) {
  4402. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4403. }
  4404. if (src1_needs_grads) {
  4405. ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
  4406. }
  4407. } break;
  4408. case GGML_OP_ACC: {
  4409. if (src0_needs_grads) {
  4410. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4411. }
  4412. if (src1_needs_grads) {
  4413. const size_t nb1 = ((int32_t *) tensor->op_params)[0];
  4414. const size_t nb2 = ((int32_t *) tensor->op_params)[1];
  4415. const size_t nb3 = ((int32_t *) tensor->op_params)[2];
  4416. const size_t offset = ((int32_t *) tensor->op_params)[3];
  4417. struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
  4418. grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
  4419. nb1, nb2, nb3, offset);
  4420. ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
  4421. }
  4422. } break;
  4423. case GGML_OP_SUB: {
  4424. if (src0_needs_grads) {
  4425. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4426. }
  4427. if (src1_needs_grads) {
  4428. ggml_sub_or_set(ctx, cgraph, isrc1, grad);
  4429. }
  4430. } break;
  4431. case GGML_OP_MUL: {
  4432. if (src0_needs_grads) {
  4433. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
  4434. }
  4435. if (src1_needs_grads) {
  4436. struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
  4437. if (!ggml_are_same_shape(src0, src1)) {
  4438. tmp = ggml_repeat_back(ctx, tmp, src1);
  4439. }
  4440. ggml_add_or_set(ctx, cgraph, isrc1, tmp);
  4441. }
  4442. } break;
  4443. case GGML_OP_DIV: {
  4444. if (src0_needs_grads) {
  4445. ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
  4446. }
  4447. if (src1_needs_grads) {
  4448. ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
  4449. }
  4450. } break;
  4451. case GGML_OP_SQR: {
  4452. if (src0_needs_grads) {
  4453. ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
  4454. }
  4455. } break;
  4456. case GGML_OP_SQRT: {
  4457. if (src0_needs_grads) {
  4458. ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
  4459. }
  4460. } break;
  4461. case GGML_OP_LOG: {
  4462. if (src0_needs_grads) {
  4463. ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
  4464. }
  4465. } break;
  4466. case GGML_OP_SIN: {
  4467. if (src0_needs_grads) {
  4468. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
  4469. }
  4470. } break;
  4471. case GGML_OP_COS: {
  4472. if (src0_needs_grads) {
  4473. ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
  4474. }
  4475. } break;
  4476. case GGML_OP_SUM: {
  4477. if (src0_needs_grads) {
  4478. ggml_add1_or_set(ctx, cgraph, isrc0, grad);
  4479. }
  4480. } break;
  4481. case GGML_OP_SUM_ROWS: {
  4482. if (src0_needs_grads) {
  4483. ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
  4484. }
  4485. } break;
  4486. case GGML_OP_MEAN: {
  4487. if (src0_needs_grads) {
  4488. ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false));
  4489. }
  4490. } break;
  4491. case GGML_OP_REPEAT: {
  4492. if (src0_needs_grads) {
  4493. ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
  4494. }
  4495. } break;
  4496. case GGML_OP_REPEAT_BACK: {
  4497. if (src0_needs_grads) {
  4498. ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
  4499. }
  4500. } break;
  4501. case GGML_OP_RMS_NORM: {
  4502. if (src0_needs_grads) {
  4503. float eps;
  4504. memcpy(&eps, tensor->op_params, sizeof(float));
  4505. ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
  4506. }
  4507. } break;
  4508. case GGML_OP_MUL_MAT: {
  4509. // https://cs231n.github.io/optimization-2/#staged
  4510. // # forward pass
  4511. // s0 = np.random.randn(5, 10)
  4512. // s1 = np.random.randn(10, 3)
  4513. // t = s0.dot(s1)
  4514. // # now suppose we had the gradient on t from above in the circuit
  4515. // dt = np.random.randn(*t.shape) # same shape as t
  4516. // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
  4517. // ds1 = t.T.dot(dt)
  4518. // tensor.shape [m,p,qq,rr]
  4519. // src0.shape [n,m,q1,r1]
  4520. // src1.shape [n,p,qq,rr]
  4521. if (src0_needs_grads) {
  4522. GGML_ASSERT(grad->ne[2] == src1->ne[2]);
  4523. GGML_ASSERT(grad->ne[3] == src1->ne[3]);
  4524. struct ggml_tensor * tmp =
  4525. ggml_out_prod(ctx, // [n,m,qq,rr]
  4526. src1, // [n,p,qq,rr]
  4527. grad); // [m,p,qq,rr]
  4528. if (!ggml_are_same_shape(tmp, src0)) {
  4529. GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
  4530. GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
  4531. GGML_ASSERT(tmp->ne[3] == 1);
  4532. const int64_t nr2 = tmp->ne[2] / src0->ne[2];
  4533. const size_t nb2 = tmp->nb[2] * nr2;
  4534. const size_t nb3 = tmp->nb[2];
  4535. tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
  4536. tmp = ggml_repeat_back(ctx, tmp, src0);
  4537. }
  4538. ggml_add_or_set(ctx, cgraph, isrc0, tmp);
  4539. }
  4540. if (src1_needs_grads) {
  4541. ggml_add_or_set(ctx, cgraph, isrc1,
  4542. // ggml_mul_mat(ctx, // [n,p,qq,rr]
  4543. // ggml_cont(ctx, // [m,n,q1,r1]
  4544. // ggml_transpose(ctx, src0)), // [m,n,q1,r1]
  4545. // grad), // [m,p,qq,rr]
  4546. // when src0 is bigger than tensor->grad (this is mostly the case in llama),
  4547. // avoid transpose of src0, rather transpose smaller tensor->grad
  4548. // and then use ggml_out_prod
  4549. ggml_out_prod(ctx, // [n,p,qq,rr]
  4550. src0, // [n,m,q1,r1]
  4551. ggml_transpose(ctx, // [p,m,qq,rr]
  4552. grad))); // [m,p,qq,rr]
  4553. }
  4554. } break;
  4555. case GGML_OP_SCALE: {
  4556. if (src0_needs_grads) {
  4557. float s;
  4558. memcpy(&s, tensor->op_params, sizeof(float));
  4559. ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, false));
  4560. }
  4561. } break;
  4562. case GGML_OP_SET: {
  4563. const size_t nb1 = ((const int32_t *) tensor->op_params)[0];
  4564. const size_t nb2 = ((const int32_t *) tensor->op_params)[1];
  4565. const size_t nb3 = ((const int32_t *) tensor->op_params)[2];
  4566. const size_t offset = ((const int32_t *) tensor->op_params)[3];
  4567. struct ggml_tensor * tensor_grad_view = NULL;
  4568. if (src0_needs_grads || src1_needs_grads) {
  4569. GGML_ASSERT(src0->type == tensor->type);
  4570. GGML_ASSERT(!cgraph->grads[isrc0] || cgraph->grads[isrc0]->type == grad->type);
  4571. GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
  4572. tensor_grad_view = ggml_view_4d(ctx,
  4573. grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
  4574. nb1, nb2, nb3, offset);
  4575. }
  4576. if (src0_needs_grads) {
  4577. struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
  4578. ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
  4579. }
  4580. if (src1_needs_grads) {
  4581. ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
  4582. }
  4583. } break;
  4584. case GGML_OP_CPY: {
  4585. // cpy overwrites value of src1 by src0 and returns view(src1)
  4586. // the overwriting is mathematically equivalent to:
  4587. // tensor = src0 * 1 + src1 * 0
  4588. if (src0_needs_grads) {
  4589. // dsrc0 = dtensor * 1
  4590. ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
  4591. }
  4592. if (src1_needs_grads) {
  4593. // dsrc1 = dtensor * 0 -> noop
  4594. }
  4595. } break;
  4596. case GGML_OP_CONT: {
  4597. // same as cpy
  4598. if (src0_needs_grads) {
  4599. GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
  4600. GGML_ASSERT(ggml_is_contiguous(grad));
  4601. GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
  4602. ggml_add_or_set(ctx, cgraph, isrc0,
  4603. ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
  4604. }
  4605. } break;
  4606. case GGML_OP_RESHAPE: {
  4607. if (src0_needs_grads) {
  4608. struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
  4609. ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
  4610. }
  4611. } break;
  4612. case GGML_OP_VIEW: {
  4613. if (src0_needs_grads) {
  4614. size_t offset;
  4615. memcpy(&offset, tensor->op_params, sizeof(offset));
  4616. size_t nb1 = tensor->nb[1];
  4617. size_t nb2 = tensor->nb[2];
  4618. size_t nb3 = tensor->nb[3];
  4619. if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
  4620. // gradient is typically F32, but src0 could be other type
  4621. size_t ng = ggml_element_size(cgraph->grads[isrc0]);
  4622. size_t n0 = ggml_element_size(src0);
  4623. GGML_ASSERT(offset % n0 == 0);
  4624. GGML_ASSERT(nb1 % n0 == 0);
  4625. GGML_ASSERT(nb2 % n0 == 0);
  4626. GGML_ASSERT(nb3 % n0 == 0);
  4627. offset = (offset / n0) * ng;
  4628. nb1 = (nb1 / n0) * ng;
  4629. nb2 = (nb2 / n0) * ng;
  4630. nb3 = (nb3 / n0) * ng;
  4631. }
  4632. ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
  4633. }
  4634. } break;
  4635. case GGML_OP_PERMUTE: {
  4636. if (src0_needs_grads) {
  4637. const int32_t * axes = (const int32_t *) tensor->op_params;
  4638. const int axis0 = axes[0] & 0x3;
  4639. const int axis1 = axes[1] & 0x3;
  4640. const int axis2 = axes[2] & 0x3;
  4641. const int axis3 = axes[3] & 0x3;
  4642. int axb[4] = {0,0,0,0}; // axes backward
  4643. axb[axis0] = 0;
  4644. axb[axis1] = 1;
  4645. axb[axis2] = 2;
  4646. axb[axis3] = 3;
  4647. ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
  4648. }
  4649. } break;
  4650. case GGML_OP_TRANSPOSE: {
  4651. if (src0_needs_grads) {
  4652. ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
  4653. }
  4654. } break;
  4655. case GGML_OP_GET_ROWS: {
  4656. if (src0_needs_grads) {
  4657. ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
  4658. }
  4659. if (src1_needs_grads) {
  4660. // noop
  4661. }
  4662. } break;
  4663. case GGML_OP_DIAG_MASK_INF: {
  4664. if (src0_needs_grads) {
  4665. /* ggml_diag_mask_inf_impl() shouldn't be here */
  4666. /* ref: https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */
  4667. const int n_past = ((const int32_t *) tensor->op_params)[0];
  4668. ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
  4669. }
  4670. } break;
  4671. case GGML_OP_DIAG_MASK_ZERO: {
  4672. if (src0_needs_grads) {
  4673. const int n_past = ((const int32_t *) tensor->op_params)[0];
  4674. ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
  4675. }
  4676. } break;
  4677. case GGML_OP_SOFT_MAX: {
  4678. if (src0_needs_grads) {
  4679. float scale = 1.0f;
  4680. float max_bias = 0.0f;
  4681. memcpy(&scale, (const float *) tensor->op_params + 0, sizeof(float));
  4682. memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
  4683. ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
  4684. }
  4685. GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
  4686. } break;
  4687. case GGML_OP_ROPE: {
  4688. if (src0_needs_grads) {
  4689. //const int n_past = ((int32_t *) tensor->op_params)[0];
  4690. const int n_dims = ((const int32_t *) tensor->op_params)[1];
  4691. const int mode = ((const int32_t *) tensor->op_params)[2];
  4692. //const int n_ctx = ((int32_t *) tensor->op_params)[3];
  4693. const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
  4694. float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
  4695. int sections[4] = {0, 0, 0, 0};
  4696. memcpy(&freq_base, (const float *) tensor->op_params + 5, sizeof(float));
  4697. memcpy(&freq_scale, (const float *) tensor->op_params + 6, sizeof(float));
  4698. memcpy(&ext_factor, (const float *) tensor->op_params + 7, sizeof(float));
  4699. memcpy(&attn_factor, (const float *) tensor->op_params + 8, sizeof(float));
  4700. memcpy(&beta_fast, (const float *) tensor->op_params + 9, sizeof(float));
  4701. memcpy(&beta_slow, (const float *) tensor->op_params + 10, sizeof(float));
  4702. memcpy(&sections, tensor->op_params + 11, sizeof(sections));
  4703. struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
  4704. ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
  4705. mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
  4706. ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
  4707. mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
  4708. ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
  4709. }
  4710. GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
  4711. } break;
  4712. case GGML_OP_IM2COL: {
  4713. if (src1_needs_grads) {
  4714. const int32_t s0 = ggml_get_op_params_i32(tensor, 0);
  4715. const int32_t s1 = ggml_get_op_params_i32(tensor, 1);
  4716. const int32_t p0 = ggml_get_op_params_i32(tensor, 2);
  4717. const int32_t p1 = ggml_get_op_params_i32(tensor, 3);
  4718. const int32_t d0 = ggml_get_op_params_i32(tensor, 4);
  4719. const int32_t d1 = ggml_get_op_params_i32(tensor, 5);
  4720. const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
  4721. ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
  4722. }
  4723. } break;
  4724. case GGML_OP_POOL_2D: {
  4725. if (src0_needs_grads) {
  4726. const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
  4727. const int32_t k0 = ggml_get_op_params_i32(tensor, 1);
  4728. const int32_t k1 = ggml_get_op_params_i32(tensor, 2);
  4729. const int32_t s0 = ggml_get_op_params_i32(tensor, 3);
  4730. const int32_t s1 = ggml_get_op_params_i32(tensor, 4);
  4731. const int32_t p0 = ggml_get_op_params_i32(tensor, 5);
  4732. const int32_t p1 = ggml_get_op_params_i32(tensor, 6);
  4733. ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
  4734. }
  4735. } break;
  4736. case GGML_OP_WIN_PART:
  4737. case GGML_OP_WIN_UNPART:
  4738. case GGML_OP_UNARY: {
  4739. switch (ggml_get_unary_op(tensor)) {
  4740. case GGML_UNARY_OP_ABS: {
  4741. if (src0_needs_grads) {
  4742. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
  4743. }
  4744. } break;
  4745. case GGML_UNARY_OP_SGN: {
  4746. // noop
  4747. } break;
  4748. case GGML_UNARY_OP_NEG: {
  4749. if (src0_needs_grads) {
  4750. ggml_sub_or_set(ctx, cgraph, isrc0, grad);
  4751. }
  4752. } break;
  4753. case GGML_UNARY_OP_STEP: {
  4754. // noop
  4755. } break;
  4756. case GGML_UNARY_OP_RELU: {
  4757. if (src0_needs_grads) {
  4758. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
  4759. }
  4760. } break;
  4761. case GGML_UNARY_OP_SILU: {
  4762. if (src0_needs_grads) {
  4763. ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
  4764. }
  4765. } break;
  4766. case GGML_UNARY_OP_EXP: {
  4767. if (src0_needs_grads) {
  4768. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
  4769. }
  4770. } break;
  4771. default: {
  4772. fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
  4773. __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
  4774. GGML_ABORT("fatal error");
  4775. } //break;
  4776. }
  4777. } break;
  4778. case GGML_OP_CROSS_ENTROPY_LOSS: {
  4779. if (src0_needs_grads) {
  4780. ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
  4781. }
  4782. GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
  4783. } break;
  4784. case GGML_OP_NONE: {
  4785. // noop
  4786. } break;
  4787. case GGML_OP_COUNT:
  4788. default: {
  4789. fprintf(stderr, "%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
  4790. GGML_ABORT("fatal error");
  4791. } //break;
  4792. }
  4793. GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
  4794. GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
  4795. GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
  4796. }
  4797. static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
  4798. // check if already visited
  4799. if (ggml_hash_insert(&cgraph->visited_hash_set, node) == GGML_HASHSET_ALREADY_EXISTS) {
  4800. return;
  4801. }
  4802. for (int i = 0; i < GGML_MAX_SRC; ++i) {
  4803. const int k =
  4804. (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
  4805. (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
  4806. /* unknown order, just fall back to using i*/ i;
  4807. if (node->src[k]) {
  4808. ggml_visit_parents(cgraph, node->src[k]);
  4809. }
  4810. }
  4811. if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
  4812. // reached a leaf node, not part of the gradient graph (e.g. a constant)
  4813. GGML_ASSERT(cgraph->n_leafs < cgraph->size);
  4814. if (strlen(node->name) == 0) {
  4815. ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
  4816. }
  4817. cgraph->leafs[cgraph->n_leafs] = node;
  4818. cgraph->n_leafs++;
  4819. } else {
  4820. GGML_ASSERT(cgraph->n_nodes < cgraph->size);
  4821. if (strlen(node->name) == 0) {
  4822. ggml_format_name(node, "node_%d", cgraph->n_nodes);
  4823. }
  4824. cgraph->nodes[cgraph->n_nodes] = node;
  4825. cgraph->n_nodes++;
  4826. }
  4827. }
  4828. static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
  4829. if (!expand) {
  4830. // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
  4831. ggml_graph_clear(cgraph);
  4832. }
  4833. const int n0 = cgraph->n_nodes;
  4834. ggml_visit_parents(cgraph, tensor);
  4835. const int n_new = cgraph->n_nodes - n0;
  4836. GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
  4837. if (n_new > 0) {
  4838. // the last added node should always be starting point
  4839. GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
  4840. }
  4841. }
  4842. void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
  4843. ggml_build_forward_impl(cgraph, tensor, true);
  4844. }
  4845. void ggml_build_backward_expand(
  4846. struct ggml_context * ctx,
  4847. struct ggml_cgraph * cgraph,
  4848. struct ggml_tensor ** grad_accs) {
  4849. GGML_ASSERT(cgraph->n_nodes > 0);
  4850. GGML_ASSERT(cgraph->grads);
  4851. GGML_ASSERT(cgraph->grad_accs);
  4852. const int n_nodes_f = cgraph->n_nodes;
  4853. memset(cgraph->grads, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
  4854. memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
  4855. bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
  4856. {
  4857. bool any_params = false;
  4858. bool any_loss = false;
  4859. for (int i = 0; i < n_nodes_f; ++i) {
  4860. struct ggml_tensor * node = cgraph->nodes[i];
  4861. any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
  4862. any_loss = any_loss || (node->flags & GGML_TENSOR_FLAG_LOSS);
  4863. }
  4864. GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
  4865. GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
  4866. }
  4867. for (int i = 0; i < n_nodes_f; ++i) {
  4868. struct ggml_tensor * node = cgraph->nodes[i];
  4869. if (node->type == GGML_TYPE_I32) {
  4870. continue;
  4871. }
  4872. bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
  4873. bool ignore_src[GGML_MAX_SRC] = {false};
  4874. switch (node->op) {
  4875. // gradients in node->src[0] for one reason or another have no effect on output gradients
  4876. case GGML_OP_IM2COL: // only used for its shape
  4877. case GGML_OP_IM2COL_BACK: // same as IM2COL
  4878. ignore_src[0] = true;
  4879. break;
  4880. case GGML_OP_UNARY: {
  4881. const enum ggml_unary_op uop = ggml_get_unary_op(node);
  4882. // SGN and STEP unary ops are piecewise constant
  4883. if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
  4884. ignore_src[0] = true;
  4885. }
  4886. } break;
  4887. // gradients in node->src[1] for one reason or another have no effect on output gradients
  4888. case GGML_OP_CPY: // gradients in CPY target are irrelevant
  4889. case GGML_OP_GET_ROWS: // row indices not differentiable
  4890. case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
  4891. case GGML_OP_ROPE: // positions not differentiable
  4892. ignore_src[1] = true;
  4893. break;
  4894. default:
  4895. break;
  4896. }
  4897. for (int j = 0; j < GGML_MAX_SRC; ++j) {
  4898. if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
  4899. continue;
  4900. }
  4901. GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
  4902. node_needs_grad = true;
  4903. break;
  4904. }
  4905. if (!node_needs_grad) {
  4906. continue;
  4907. }
  4908. // inplace operations are currently not supported
  4909. GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
  4910. node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
  4911. const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
  4912. GGML_ASSERT(ihash != GGML_HASHSET_FULL);
  4913. GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
  4914. if (grad_accs && grad_accs[i]) {
  4915. cgraph->grad_accs[ihash] = grad_accs[i];
  4916. cgraph->grads[ihash] = cgraph->grad_accs[ihash];
  4917. } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
  4918. // loss tensors always need a gradient accumulator
  4919. cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
  4920. cgraph->grads[ihash] = cgraph->grad_accs[ihash];
  4921. }
  4922. grads_needed[ihash] = true;
  4923. }
  4924. for (int i = n_nodes_f - 1; i >= 0; --i) {
  4925. // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
  4926. // use allocator to automatically make inplace operations
  4927. ggml_compute_backward(ctx, cgraph, i, grads_needed);
  4928. }
  4929. free(grads_needed);
  4930. }
  4931. static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
  4932. void * ptr = *p;
  4933. ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
  4934. *p = (void *) ((char *) ptr + size);
  4935. return ptr;
  4936. }
  4937. static size_t ggml_graph_nbytes(size_t size, bool grads) {
  4938. size_t hash_size = ggml_hash_size(size * 2);
  4939. void * p = 0;
  4940. incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
  4941. incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
  4942. incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
  4943. incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
  4944. if (grads) {
  4945. incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
  4946. incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
  4947. }
  4948. incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
  4949. size_t nbytes = (size_t) p;
  4950. return nbytes;
  4951. }
  4952. size_t ggml_graph_overhead_custom(size_t size, bool grads) {
  4953. return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
  4954. }
  4955. size_t ggml_graph_overhead(void) {
  4956. return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
  4957. }
  4958. struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
  4959. const size_t obj_size = ggml_graph_nbytes(size, grads);
  4960. struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
  4961. struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
  4962. // the size of the hash table is doubled since it needs to hold both nodes and leafs
  4963. size_t hash_size = ggml_hash_size(size * 2);
  4964. void * p = cgraph + 1;
  4965. struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  4966. struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  4967. struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  4968. struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
  4969. struct ggml_tensor ** grad_accs_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
  4970. ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
  4971. // check that we allocated the correct amount of memory
  4972. assert(obj_size == (size_t)((char *)p - (char *)cgraph));
  4973. *cgraph = (struct ggml_cgraph) {
  4974. /*.size =*/ size,
  4975. /*.n_nodes =*/ 0,
  4976. /*.n_leafs =*/ 0,
  4977. /*.nodes =*/ nodes_ptr,
  4978. /*.grads =*/ grads_ptr,
  4979. /*.grad_accs =*/ grad_accs_ptr,
  4980. /*.leafs =*/ leafs_ptr,
  4981. /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
  4982. /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
  4983. };
  4984. ggml_hash_set_reset(&cgraph->visited_hash_set);
  4985. if (grads) {
  4986. memset(cgraph->grads, 0, hash_size*sizeof(struct ggml_tensor *));
  4987. memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
  4988. }
  4989. return cgraph;
  4990. }
  4991. struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
  4992. return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
  4993. }
  4994. struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
  4995. struct ggml_cgraph cgraph = {
  4996. /*.size =*/ 0,
  4997. /*.n_nodes =*/ i1 - i0,
  4998. /*.n_leafs =*/ 0,
  4999. /*.nodes =*/ cgraph0->nodes + i0,
  5000. /*.grads =*/ NULL, // gradients would need visited_hash_set
  5001. /*.grad_accs =*/ NULL,
  5002. /*.leafs =*/ NULL,
  5003. /*.visited_hash_set =*/ { 0, NULL, NULL },
  5004. /*.order =*/ cgraph0->order,
  5005. };
  5006. return cgraph;
  5007. }
  5008. void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
  5009. GGML_ASSERT(dst->size >= src->n_leafs);
  5010. GGML_ASSERT(dst->size >= src->n_nodes);
  5011. GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
  5012. dst->n_leafs = src->n_leafs;
  5013. dst->n_nodes = src->n_nodes;
  5014. dst->order = src->order;
  5015. for (int i = 0; i < src->n_leafs; ++i) {
  5016. dst->leafs[i] = src->leafs[i];
  5017. }
  5018. for (int i = 0; i < src->n_nodes; ++i) {
  5019. dst->nodes[i] = src->nodes[i];
  5020. }
  5021. for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
  5022. // copy all hashset keys (tensors) that are in use
  5023. if (ggml_bitset_get(src->visited_hash_set.used, i)) {
  5024. ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
  5025. }
  5026. }
  5027. if (dst->grads) {
  5028. memset(dst->grads, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
  5029. memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
  5030. }
  5031. if (src->grads) {
  5032. GGML_ASSERT(dst->grads != NULL);
  5033. GGML_ASSERT(dst->grad_accs != NULL);
  5034. for (int i = 0; i < src->n_nodes; ++i) {
  5035. const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
  5036. const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
  5037. GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
  5038. GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
  5039. GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
  5040. GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
  5041. dst->grads[igrad_dst] = src->grads[igrad_src];
  5042. dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
  5043. }
  5044. }
  5045. }
  5046. struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
  5047. struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
  5048. ggml_graph_cpy(cgraph, result);
  5049. return result;
  5050. }
  5051. struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
  5052. if (ggml_is_empty(tensor)) {
  5053. return tensor;
  5054. }
  5055. if (tensor->buffer) {
  5056. ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
  5057. } else {
  5058. GGML_ASSERT(tensor->data);
  5059. memset(tensor->data, 0, ggml_nbytes(tensor));
  5060. }
  5061. return tensor;
  5062. }
  5063. void ggml_graph_reset(struct ggml_cgraph * cgraph) {
  5064. if (!cgraph) {
  5065. return;
  5066. }
  5067. GGML_ASSERT(cgraph->grads != NULL);
  5068. for (int i = 0; i < cgraph->n_nodes; i++) {
  5069. struct ggml_tensor * node = cgraph->nodes[i];
  5070. struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
  5071. if (node->op == GGML_OP_OPT_STEP_ADAMW) {
  5072. // clear momenta
  5073. ggml_set_zero(node->src[2]);
  5074. ggml_set_zero(node->src[3]);
  5075. }
  5076. // initial gradients of loss should be 1, 0 otherwise
  5077. if (grad_acc) {
  5078. if (node->flags & GGML_TENSOR_FLAG_LOSS) {
  5079. GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
  5080. GGML_ASSERT(ggml_is_scalar(grad_acc));
  5081. const float onef = 1.0f;
  5082. if (grad_acc->buffer) {
  5083. ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
  5084. } else {
  5085. GGML_ASSERT(grad_acc->data);
  5086. *((float *) grad_acc->data) = onef;
  5087. }
  5088. } else {
  5089. ggml_set_zero(grad_acc);
  5090. }
  5091. }
  5092. }
  5093. }
  5094. void ggml_graph_clear(struct ggml_cgraph * cgraph) {
  5095. cgraph->n_leafs = 0;
  5096. cgraph->n_nodes = 0;
  5097. ggml_hash_set_reset(&cgraph->visited_hash_set);
  5098. }
  5099. int ggml_graph_size(struct ggml_cgraph * cgraph) {
  5100. return cgraph->size;
  5101. }
  5102. struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
  5103. if (i < 0) {
  5104. GGML_ASSERT(cgraph->n_nodes + i >= 0);
  5105. return cgraph->nodes[cgraph->n_nodes + i];
  5106. }
  5107. GGML_ASSERT(i < cgraph->n_nodes);
  5108. return cgraph->nodes[i];
  5109. }
  5110. struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
  5111. return cgraph->nodes;
  5112. }
  5113. int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
  5114. return cgraph->n_nodes;
  5115. }
  5116. void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
  5117. GGML_ASSERT(cgraph->size > cgraph->n_nodes);
  5118. cgraph->nodes[cgraph->n_nodes] = tensor;
  5119. cgraph->n_nodes++;
  5120. }
  5121. struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
  5122. for (int i = 0; i < cgraph->n_leafs; i++) {
  5123. struct ggml_tensor * leaf = cgraph->leafs[i];
  5124. if (strcmp(leaf->name, name) == 0) {
  5125. return leaf;
  5126. }
  5127. }
  5128. for (int i = 0; i < cgraph->n_nodes; i++) {
  5129. struct ggml_tensor * node = cgraph->nodes[i];
  5130. if (strcmp(node->name, name) == 0) {
  5131. return node;
  5132. }
  5133. }
  5134. return NULL;
  5135. }
  5136. struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5137. const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
  5138. return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
  5139. }
  5140. struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5141. const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
  5142. return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
  5143. }
  5144. void ggml_graph_print(const struct ggml_cgraph * cgraph) {
  5145. GGML_LOG_INFO("=== GRAPH ===\n");
  5146. GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
  5147. for (int i = 0; i < cgraph->n_nodes; i++) {
  5148. struct ggml_tensor * node = cgraph->nodes[i];
  5149. GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
  5150. i,
  5151. node->ne[0], node->ne[1], node->ne[2],
  5152. ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
  5153. ggml_graph_get_grad(cgraph, node) ? "g" : " ");
  5154. }
  5155. GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
  5156. for (int i = 0; i < cgraph->n_leafs; i++) {
  5157. struct ggml_tensor * node = cgraph->leafs[i];
  5158. GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
  5159. i,
  5160. node->ne[0], node->ne[1],
  5161. ggml_op_name(node->op),
  5162. ggml_get_name(node));
  5163. }
  5164. GGML_LOG_INFO("========================================\n");
  5165. }
  5166. // check if node is part of the graph
  5167. static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5168. if (cgraph == NULL) {
  5169. return true;
  5170. }
  5171. for (int i = 0; i < cgraph->n_nodes; i++) {
  5172. if (cgraph->nodes[i] == node) {
  5173. return true;
  5174. }
  5175. }
  5176. return false;
  5177. }
  5178. static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5179. for (int i = 0; i < cgraph->n_nodes; i++) {
  5180. struct ggml_tensor * parent = cgraph->nodes[i];
  5181. struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
  5182. if (grad == node) {
  5183. return parent;
  5184. }
  5185. }
  5186. return NULL;
  5187. }
  5188. static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
  5189. struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
  5190. struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
  5191. fprintf(fp, " \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
  5192. gparent0 ? (void *) gparent0 : (void *) parent,
  5193. gparent0 ? "g" : "x",
  5194. gparent ? (void *) gparent : (void *) node,
  5195. gparent ? "g" : "x",
  5196. gparent ? "empty" : "vee",
  5197. gparent ? "dashed" : "solid",
  5198. label);
  5199. }
  5200. static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
  5201. fprintf(fp, " \"%p\":%s -> \"%p\":%s [ label = \"%s\"; ]\n",
  5202. (void *) parent, "x",
  5203. (void *) node, "x",
  5204. label);
  5205. }
  5206. void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
  5207. char color[16];
  5208. FILE * fp = ggml_fopen(filename, "w");
  5209. GGML_ASSERT(fp);
  5210. fprintf(fp, "digraph G {\n");
  5211. fprintf(fp, " newrank = true;\n");
  5212. fprintf(fp, " rankdir = TB;\n");
  5213. for (int i = 0; i < gb->n_nodes; i++) {
  5214. struct ggml_tensor * node = gb->nodes[i];
  5215. struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
  5216. if (ggml_graph_get_parent(gb, node) != NULL) {
  5217. continue;
  5218. }
  5219. if (node->flags & GGML_TENSOR_FLAG_PARAM) {
  5220. snprintf(color, sizeof(color), "yellow");
  5221. } else if (grad) {
  5222. if (ggml_graph_find(gf, node)) {
  5223. snprintf(color, sizeof(color), "green");
  5224. } else {
  5225. snprintf(color, sizeof(color), "lightblue");
  5226. }
  5227. } else {
  5228. snprintf(color, sizeof(color), "white");
  5229. }
  5230. fprintf(fp, " \"%p\" [ "
  5231. "style = filled; fillcolor = %s; shape = record; "
  5232. "label=\"",
  5233. (void *) node, color);
  5234. if (strlen(node->name) > 0) {
  5235. fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
  5236. } else {
  5237. fprintf(fp, "(%s)|", ggml_type_name(node->type));
  5238. }
  5239. if (ggml_is_matrix(node)) {
  5240. fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
  5241. } else {
  5242. fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
  5243. }
  5244. if (grad) {
  5245. fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
  5246. } else {
  5247. fprintf(fp, "\"; ]\n");
  5248. }
  5249. }
  5250. for (int i = 0; i < gb->n_leafs; i++) {
  5251. struct ggml_tensor * node = gb->leafs[i];
  5252. snprintf(color, sizeof(color), "pink");
  5253. fprintf(fp, " \"%p\" [ "
  5254. "style = filled; fillcolor = %s; shape = record; "
  5255. "label=\"<x>",
  5256. (void *) node, color);
  5257. if (strlen(node->name) > 0) {
  5258. fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
  5259. } else {
  5260. fprintf(fp, "(%s)|", ggml_type_name(node->type));
  5261. }
  5262. fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
  5263. if (ggml_nelements(node) < 5 && node->data != NULL) {
  5264. fprintf(fp, " | (");
  5265. for (int j = 0; j < ggml_nelements(node); j++) {
  5266. // FIXME: use ggml-backend to obtain the tensor data
  5267. //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
  5268. // fprintf(fp, "%d", ggml_get_i32_1d(node, j));
  5269. //}
  5270. //else if (node->type == GGML_TYPE_F32 ||
  5271. // node->type == GGML_TYPE_F16 ||
  5272. // node->type == GGML_TYPE_BF16) {
  5273. // fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
  5274. //}
  5275. //else
  5276. {
  5277. fprintf(fp, "#");
  5278. }
  5279. if (j < ggml_nelements(node) - 1) {
  5280. fprintf(fp, ", ");
  5281. }
  5282. }
  5283. fprintf(fp, ")");
  5284. }
  5285. fprintf(fp, "\"; ]\n");
  5286. }
  5287. for (int i = 0; i < gb->n_nodes; i++) {
  5288. struct ggml_tensor * node = gb->nodes[i];
  5289. for (int j = 0; j < GGML_MAX_SRC; j++) {
  5290. if (node->src[j]) {
  5291. char label[16];
  5292. snprintf(label, sizeof(label), "src %d", j);
  5293. ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
  5294. }
  5295. }
  5296. }
  5297. for (int i = 0; i < gb->n_leafs; i++) {
  5298. struct ggml_tensor * node = gb->leafs[i];
  5299. for (int j = 0; j < GGML_MAX_SRC; j++) {
  5300. if (node->src[j]) {
  5301. char label[16];
  5302. snprintf(label, sizeof(label), "src %d", j);
  5303. ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
  5304. }
  5305. }
  5306. }
  5307. fprintf(fp, "}\n");
  5308. fclose(fp);
  5309. GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
  5310. }
  5311. ////////////////////////////////////////////////////////////////////////////////
  5312. void ggml_set_input(struct ggml_tensor * tensor) {
  5313. tensor->flags |= GGML_TENSOR_FLAG_INPUT;
  5314. }
  5315. void ggml_set_output(struct ggml_tensor * tensor) {
  5316. tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
  5317. }
  5318. void ggml_set_param(struct ggml_tensor * tensor) {
  5319. GGML_ASSERT(tensor->op == GGML_OP_NONE);
  5320. tensor->flags |= GGML_TENSOR_FLAG_PARAM;
  5321. }
  5322. void ggml_set_loss(struct ggml_tensor * tensor) {
  5323. GGML_ASSERT(ggml_is_scalar(tensor));
  5324. GGML_ASSERT(tensor->type == GGML_TYPE_F32);
  5325. tensor->flags |= GGML_TENSOR_FLAG_LOSS;
  5326. }
  5327. ////////////////////////////////////////////////////////////////////////////////
  5328. void ggml_quantize_init(enum ggml_type type) {
  5329. ggml_critical_section_start();
  5330. switch (type) {
  5331. case GGML_TYPE_IQ2_XXS:
  5332. case GGML_TYPE_IQ2_XS:
  5333. case GGML_TYPE_IQ2_S:
  5334. case GGML_TYPE_IQ1_S:
  5335. case GGML_TYPE_IQ1_M: iq2xs_init_impl(type); break;
  5336. case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
  5337. case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break;
  5338. default: // nothing
  5339. break;
  5340. }
  5341. ggml_critical_section_end();
  5342. }
  5343. void ggml_quantize_free(void) {
  5344. ggml_critical_section_start();
  5345. iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
  5346. iq2xs_free_impl(GGML_TYPE_IQ2_XS);
  5347. iq2xs_free_impl(GGML_TYPE_IQ1_S);
  5348. iq3xs_free_impl(256);
  5349. ggml_critical_section_end();
  5350. }
  5351. bool ggml_quantize_requires_imatrix(enum ggml_type type) {
  5352. return
  5353. type == GGML_TYPE_IQ2_XXS ||
  5354. type == GGML_TYPE_IQ2_XS ||
  5355. type == GGML_TYPE_IQ1_S;// ||
  5356. //type == GGML_TYPE_IQ1_M;
  5357. }
  5358. size_t ggml_quantize_chunk(
  5359. enum ggml_type type,
  5360. const float * src,
  5361. void * dst,
  5362. int64_t start,
  5363. int64_t nrows,
  5364. int64_t n_per_row,
  5365. const float * imatrix) {
  5366. const int64_t n = (int64_t) nrows * n_per_row;
  5367. if (ggml_quantize_requires_imatrix(type)) {
  5368. GGML_ASSERT(imatrix != NULL);
  5369. }
  5370. GGML_ASSERT(start % type_traits[type].blck_size == 0);
  5371. GGML_ASSERT(start % n_per_row == 0);
  5372. ggml_quantize_init(type); // this is noop if already initialized
  5373. const size_t start_row = start / n_per_row;
  5374. const size_t row_size = ggml_row_size(type, n_per_row);
  5375. size_t result = 0;
  5376. switch (type) {
  5377. case GGML_TYPE_Q4_0: result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5378. case GGML_TYPE_Q4_1: result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5379. case GGML_TYPE_Q5_0: result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5380. case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5381. case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5382. case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5383. case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5384. case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5385. case GGML_TYPE_Q5_K: result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5386. case GGML_TYPE_Q6_K: result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5387. case GGML_TYPE_TQ1_0: result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5388. case GGML_TYPE_TQ2_0: result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5389. case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5390. case GGML_TYPE_IQ2_XS: result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5391. case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5392. case GGML_TYPE_IQ3_S: result = quantize_iq3_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5393. case GGML_TYPE_IQ2_S: result = quantize_iq2_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5394. case GGML_TYPE_IQ1_S: result = quantize_iq1_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5395. case GGML_TYPE_IQ1_M: result = quantize_iq1_m (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5396. case GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5397. case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5398. case GGML_TYPE_F16:
  5399. {
  5400. size_t elemsize = sizeof(ggml_fp16_t);
  5401. ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
  5402. result = n * elemsize;
  5403. } break;
  5404. case GGML_TYPE_BF16:
  5405. {
  5406. size_t elemsize = sizeof(ggml_bf16_t);
  5407. ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
  5408. result = n * elemsize;
  5409. } break;
  5410. case GGML_TYPE_F32:
  5411. {
  5412. size_t elemsize = sizeof(float);
  5413. result = n * elemsize;
  5414. memcpy((uint8_t *)dst + start * elemsize, src + start, result);
  5415. } break;
  5416. default:
  5417. assert(false);
  5418. }
  5419. GGML_ASSERT(result == nrows * row_size);
  5420. return result;
  5421. }
  5422. ////////////////////////////////////////////////////////////////////////////////
  5423. void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
  5424. g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
  5425. g_logger_state.log_callback_user_data = user_data;
  5426. }
  5427. void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
  5428. p->n_threads = n_threads;
  5429. p->prio = 0; // default priority (usually means normal or inherited)
  5430. p->poll = 50; // hybrid-polling enabled
  5431. p->strict_cpu = false; // no strict placement (all threads share same cpumask)
  5432. p->paused = false; // threads are ready to go
  5433. memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
  5434. }
  5435. struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
  5436. struct ggml_threadpool_params p;
  5437. ggml_threadpool_params_init(&p, n_threads);
  5438. return p;
  5439. }
  5440. bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
  5441. if (p0->n_threads != p1->n_threads ) return false;
  5442. if (p0->prio != p1->prio ) return false;
  5443. if (p0->poll != p1->poll ) return false;
  5444. if (p0->strict_cpu != p1->strict_cpu ) return false;
  5445. return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
  5446. }