ggml.c 266 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387
  1. #define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
  2. #define _USE_MATH_DEFINES // For M_PI on MSVC
  3. #include "ggml-backend.h"
  4. #include "ggml-impl.h"
  5. #include "ggml-cpu-impl.h"
  6. #include "ggml-quants.h"
  7. #include "ggml.h"
  8. #include "ggml-aarch64.h"
  9. #if defined(_MSC_VER) || defined(__MINGW32__)
  10. #include <malloc.h> // using malloc.h with MSC/MINGW
  11. #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
  12. #include <alloca.h>
  13. #endif
  14. #include <assert.h>
  15. #include <errno.h>
  16. #include <time.h>
  17. #include <math.h>
  18. #include <stdlib.h>
  19. #include <string.h>
  20. #include <stdint.h>
  21. #include <inttypes.h>
  22. #include <stdio.h>
  23. #include <float.h>
  24. #include <limits.h>
  25. #include <stdarg.h>
  26. #include <signal.h>
  27. #if defined(__gnu_linux__)
  28. #include <syscall.h>
  29. #endif
  30. #if defined(__APPLE__)
  31. #include <unistd.h>
  32. #include <mach/mach.h>
  33. #include <TargetConditionals.h>
  34. #endif
  35. #if defined(_WIN32)
  36. #define WIN32_LEAN_AND_MEAN
  37. #ifndef NOMINMAX
  38. #define NOMINMAX
  39. #endif
  40. #include <windows.h>
  41. #endif
  42. #define UNUSED GGML_UNUSED
  43. #if (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)) && \
  44. (!defined(TARGET_OS_TV) && !defined(TARGET_OS_WATCH))
  45. #include <unistd.h>
  46. #include <sys/types.h>
  47. #include <sys/stat.h>
  48. #include <sys/wait.h>
  49. #if defined(__ANDROID__)
  50. #include <unwind.h>
  51. #include <dlfcn.h>
  52. #include <stdio.h>
  53. struct backtrace_state {
  54. void ** current;
  55. void ** end;
  56. };
  57. static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
  58. struct backtrace_state * state = (struct backtrace_state *)arg;
  59. uintptr_t pc = _Unwind_GetIP(context);
  60. if (pc) {
  61. if (state->current == state->end) {
  62. return _URC_END_OF_STACK;
  63. } else {
  64. *state->current++ = (void*)pc;
  65. }
  66. }
  67. return _URC_NO_REASON;
  68. }
  69. static void ggml_print_backtrace_symbols(void) {
  70. const int max = 100;
  71. void* buffer[max];
  72. struct backtrace_state state = {buffer, buffer + max};
  73. _Unwind_Backtrace(unwind_callback, &state);
  74. int count = state.current - buffer;
  75. for (int idx = 0; idx < count; ++idx) {
  76. const void * addr = buffer[idx];
  77. const char * symbol = "";
  78. Dl_info info;
  79. if (dladdr(addr, &info) && info.dli_sname) {
  80. symbol = info.dli_sname;
  81. }
  82. fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
  83. }
  84. }
  85. #elif defined(__linux__) && defined(__GLIBC__)
  86. #include <execinfo.h>
  87. static void ggml_print_backtrace_symbols(void) {
  88. void * trace[100];
  89. int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
  90. backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
  91. }
  92. #else
  93. static void ggml_print_backtrace_symbols(void) {
  94. // platform not supported
  95. }
  96. #endif
  97. static void ggml_print_backtrace(void) {
  98. char attach[32];
  99. snprintf(attach, sizeof(attach), "attach %d", getpid());
  100. int pid = fork();
  101. if (pid == 0) {
  102. // try gdb
  103. execlp("gdb", "gdb", "--batch",
  104. "-ex", "set style enabled on",
  105. "-ex", attach,
  106. "-ex", "bt -frame-info source-and-location",
  107. "-ex", "detach",
  108. "-ex", "quit",
  109. (char *) NULL);
  110. // try lldb
  111. execlp("lldb", "lldb", "--batch",
  112. "-o", "bt",
  113. "-o", "quit",
  114. "-p", attach,
  115. (char *) NULL);
  116. exit(EXIT_FAILURE);
  117. } else {
  118. int wstatus;
  119. waitpid(pid, &wstatus, 0);
  120. if (WIFEXITED(wstatus)) {
  121. if (WEXITSTATUS(wstatus) == EXIT_FAILURE) {
  122. // gdb failed, fallback to backtrace_symbols
  123. ggml_print_backtrace_symbols();
  124. }
  125. }
  126. }
  127. }
  128. #else
  129. static void ggml_print_backtrace(void) {
  130. // platform not supported
  131. }
  132. #endif
  133. void ggml_abort(const char * file, int line, const char * fmt, ...) {
  134. fflush(stdout);
  135. fprintf(stderr, "%s:%d: ", file, line);
  136. va_list args;
  137. va_start(args, fmt);
  138. vfprintf(stderr, fmt, args);
  139. va_end(args);
  140. fprintf(stderr, "\n");
  141. ggml_print_backtrace();
  142. abort();
  143. }
  144. //
  145. // logging
  146. //
  147. struct ggml_logger_state {
  148. ggml_log_callback log_callback;
  149. void * log_callback_user_data;
  150. };
  151. static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
  152. static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
  153. if (format == NULL) {
  154. return;
  155. }
  156. va_list args_copy;
  157. va_copy(args_copy, args);
  158. char buffer[128];
  159. int len = vsnprintf(buffer, 128, format, args);
  160. if (len < 128) {
  161. g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
  162. } else {
  163. char * buffer2 = (char *) calloc(len + 1, sizeof(char));
  164. vsnprintf(buffer2, len + 1, format, args_copy);
  165. buffer2[len] = 0;
  166. g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
  167. free(buffer2);
  168. }
  169. va_end(args_copy);
  170. }
  171. void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
  172. va_list args;
  173. va_start(args, format);
  174. ggml_log_internal_v(level, format, args);
  175. va_end(args);
  176. }
  177. void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
  178. (void) level;
  179. (void) user_data;
  180. fputs(text, stderr);
  181. fflush(stderr);
  182. }
  183. //
  184. // end of logging block
  185. //
  186. #ifdef GGML_USE_ACCELERATE
  187. // uncomment to use vDSP for soft max computation
  188. // note: not sure if it is actually faster
  189. //#define GGML_SOFT_MAX_ACCELERATE
  190. #endif
  191. void * ggml_aligned_malloc(size_t size) {
  192. const int alignment = 64;
  193. #if defined(_MSC_VER) || defined(__MINGW32__)
  194. return _aligned_malloc(size, alignment);
  195. #else
  196. if (size == 0) {
  197. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
  198. return NULL;
  199. }
  200. void * aligned_memory = NULL;
  201. #ifdef GGML_USE_CPU_HBM
  202. int result = hbw_posix_memalign(&aligned_memory, alignment, size);
  203. #elif TARGET_OS_OSX
  204. GGML_UNUSED(alignment);
  205. kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
  206. int result = EFAULT;
  207. switch (alloc_status) {
  208. case KERN_SUCCESS:
  209. result = 0;
  210. break;
  211. case KERN_INVALID_ADDRESS:
  212. result = EINVAL;
  213. break;
  214. case KERN_NO_SPACE:
  215. result = ENOMEM;
  216. break;
  217. default:
  218. result = EFAULT;
  219. break;
  220. }
  221. #else
  222. int result = posix_memalign(&aligned_memory, alignment, size);
  223. #endif
  224. if (result != 0) {
  225. // Handle allocation failure
  226. const char *error_desc = "unknown allocation error";
  227. switch (result) {
  228. case EINVAL:
  229. error_desc = "invalid alignment value";
  230. break;
  231. case ENOMEM:
  232. error_desc = "insufficient memory";
  233. break;
  234. }
  235. GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
  236. return NULL;
  237. }
  238. return aligned_memory;
  239. #endif
  240. }
  241. void ggml_aligned_free(void * ptr, size_t size) {
  242. GGML_UNUSED(size);
  243. #if defined(_MSC_VER) || defined(__MINGW32__)
  244. _aligned_free(ptr);
  245. #elif GGML_USE_CPU_HBM
  246. if (ptr != NULL) {
  247. hbw_free(ptr);
  248. }
  249. #elif TARGET_OS_OSX
  250. if (ptr != NULL) {
  251. vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
  252. }
  253. #else
  254. free(ptr);
  255. #endif
  256. }
  257. inline static void * ggml_malloc(size_t size) {
  258. if (size == 0) {
  259. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
  260. return NULL;
  261. }
  262. void * result = malloc(size);
  263. if (result == NULL) {
  264. GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
  265. GGML_ABORT("fatal error");
  266. }
  267. return result;
  268. }
  269. // calloc
  270. inline static void * ggml_calloc(size_t num, size_t size) {
  271. if (num == 0 || size == 0) {
  272. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
  273. return NULL;
  274. }
  275. void * result = calloc(num, size);
  276. if (result == NULL) {
  277. GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
  278. GGML_ABORT("fatal error");
  279. }
  280. return result;
  281. }
  282. #define GGML_MALLOC(size) ggml_malloc(size)
  283. #define GGML_CALLOC(num, size) ggml_calloc(num, size)
  284. #define GGML_FREE(ptr) free(ptr)
  285. const char * ggml_status_to_string(enum ggml_status status) {
  286. switch (status) {
  287. case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
  288. case GGML_STATUS_FAILED: return "GGML status: error (operation failed)";
  289. case GGML_STATUS_SUCCESS: return "GGML status: success";
  290. case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)";
  291. }
  292. return "GGML status: unknown";
  293. }
  294. float ggml_fp16_to_fp32(ggml_fp16_t x) {
  295. #define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
  296. return GGML_FP16_TO_FP32(x);
  297. }
  298. ggml_fp16_t ggml_fp32_to_fp16(float x) {
  299. #define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
  300. return GGML_FP32_TO_FP16(x);
  301. }
  302. float ggml_bf16_to_fp32(ggml_bf16_t x) {
  303. #define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
  304. return GGML_BF16_TO_FP32(x); // it just left shifts
  305. }
  306. ggml_bf16_t ggml_fp32_to_bf16(float x) {
  307. #define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
  308. return GGML_FP32_TO_BF16(x);
  309. }
  310. void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
  311. for (int64_t i = 0; i < n; i++) {
  312. y[i] = GGML_FP16_TO_FP32(x[i]);
  313. }
  314. }
  315. // FIXME: these functions must detect the instruction set at runtime, since they are part of the core ggml library
  316. // currently, the ggml_cpu_has_* functions are entirely compile-time
  317. void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
  318. int64_t i = 0;
  319. #if defined(__F16C__)
  320. if (ggml_cpu_has_f16c()) {
  321. for (; i + 7 < n; i += 8) {
  322. __m256 x_vec = _mm256_loadu_ps(x + i);
  323. __m128i y_vec = _mm256_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
  324. _mm_storeu_si128((__m128i *)(y + i), y_vec);
  325. }
  326. for(; i + 3 < n; i += 4) {
  327. __m128 x_vec = _mm_loadu_ps(x + i);
  328. __m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
  329. _mm_storel_epi64((__m128i *)(y + i), y_vec);
  330. }
  331. }
  332. #endif
  333. for (; i < n; i++) {
  334. y[i] = GGML_FP32_TO_FP16(x[i]);
  335. }
  336. }
  337. void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
  338. int64_t i = 0;
  339. #if defined(__AVX512F__)
  340. if (ggml_cpu_has_avx512()) {
  341. for (; i + 16 <= n; i += 16) {
  342. _mm512_storeu_ps(y + i,
  343. _mm512_castsi512_ps(
  344. _mm512_slli_epi32(
  345. _mm512_cvtepu16_epi32(
  346. _mm256_loadu_si256(
  347. (const __m256i *)(x + i))),
  348. 16)));
  349. }
  350. }
  351. #endif
  352. #if defined(__AVX2__)
  353. if (ggml_cpu_has_avx2()) {
  354. for (; i + 8 <= n; i += 8) {
  355. _mm256_storeu_ps(y + i,
  356. _mm256_castsi256_ps(
  357. _mm256_slli_epi32(
  358. _mm256_cvtepu16_epi32(
  359. _mm_loadu_si128(
  360. (const __m128i *)(x + i))),
  361. 16)));
  362. }
  363. }
  364. #endif
  365. for (; i < n; i++) {
  366. y[i] = GGML_BF16_TO_FP32(x[i]);
  367. }
  368. }
  369. void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
  370. for (int i = 0; i < n; i++) {
  371. y[i] = ggml_compute_fp32_to_bf16(x[i]);
  372. }
  373. }
  374. void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
  375. int i = 0;
  376. #if defined(__AVX512BF16__)
  377. // subnormals are flushed to zero on this platform
  378. for (; i + 32 <= n; i += 32) {
  379. _mm512_storeu_si512(
  380. (__m512i *)(y + i),
  381. m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
  382. _mm512_loadu_ps(x + i))));
  383. }
  384. #endif
  385. for (; i < n; i++) {
  386. y[i] = GGML_FP32_TO_BF16(x[i]);
  387. }
  388. }
  389. bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
  390. return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
  391. }
  392. //
  393. // timing
  394. //
  395. #if defined(_MSC_VER) || defined(__MINGW32__)
  396. static int64_t timer_freq, timer_start;
  397. void ggml_time_init(void) {
  398. LARGE_INTEGER t;
  399. QueryPerformanceFrequency(&t);
  400. timer_freq = t.QuadPart;
  401. // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
  402. // and the uptime is high enough.
  403. // We subtract the program start time to reduce the likelihood of that happening.
  404. QueryPerformanceCounter(&t);
  405. timer_start = t.QuadPart;
  406. }
  407. int64_t ggml_time_ms(void) {
  408. LARGE_INTEGER t;
  409. QueryPerformanceCounter(&t);
  410. return ((t.QuadPart-timer_start) * 1000) / timer_freq;
  411. }
  412. int64_t ggml_time_us(void) {
  413. LARGE_INTEGER t;
  414. QueryPerformanceCounter(&t);
  415. return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
  416. }
  417. #else
  418. void ggml_time_init(void) {}
  419. int64_t ggml_time_ms(void) {
  420. struct timespec ts;
  421. clock_gettime(CLOCK_MONOTONIC, &ts);
  422. return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
  423. }
  424. int64_t ggml_time_us(void) {
  425. struct timespec ts;
  426. clock_gettime(CLOCK_MONOTONIC, &ts);
  427. return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
  428. }
  429. #endif
  430. int64_t ggml_cycles(void) {
  431. return clock();
  432. }
  433. int64_t ggml_cycles_per_ms(void) {
  434. return CLOCKS_PER_SEC/1000;
  435. }
  436. //
  437. // cross-platform UTF-8 file paths
  438. //
  439. #ifdef _WIN32
  440. static wchar_t * ggml_mbstowcs(const char * mbs) {
  441. int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
  442. if (!wlen) {
  443. errno = EINVAL;
  444. return NULL;
  445. }
  446. wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
  447. wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
  448. if (!wlen) {
  449. GGML_FREE(wbuf);
  450. errno = EINVAL;
  451. return NULL;
  452. }
  453. return wbuf;
  454. }
  455. #endif
  456. FILE * ggml_fopen(const char * fname, const char * mode) {
  457. #ifdef _WIN32
  458. FILE * file = NULL;
  459. // convert fname (UTF-8)
  460. wchar_t * wfname = ggml_mbstowcs(fname);
  461. if (wfname) {
  462. // convert mode (ANSI)
  463. wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
  464. wchar_t * wmode_p = wmode;
  465. do {
  466. *wmode_p++ = (wchar_t)*mode;
  467. } while (*mode++);
  468. // open file
  469. file = _wfopen(wfname, wmode);
  470. GGML_FREE(wfname);
  471. GGML_FREE(wmode);
  472. }
  473. return file;
  474. #else
  475. return fopen(fname, mode);
  476. #endif
  477. }
  478. static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc);
  479. static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
  480. static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
  481. static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
  482. [GGML_TYPE_I8] = {
  483. .type_name = "i8",
  484. .blck_size = 1,
  485. .type_size = sizeof(int8_t),
  486. .is_quantized = false,
  487. },
  488. [GGML_TYPE_I16] = {
  489. .type_name = "i16",
  490. .blck_size = 1,
  491. .type_size = sizeof(int16_t),
  492. .is_quantized = false,
  493. },
  494. [GGML_TYPE_I32] = {
  495. .type_name = "i32",
  496. .blck_size = 1,
  497. .type_size = sizeof(int32_t),
  498. .is_quantized = false,
  499. },
  500. [GGML_TYPE_I64] = {
  501. .type_name = "i64",
  502. .blck_size = 1,
  503. .type_size = sizeof(int64_t),
  504. .is_quantized = false,
  505. },
  506. [GGML_TYPE_F64] = {
  507. .type_name = "f64",
  508. .blck_size = 1,
  509. .type_size = sizeof(double),
  510. .is_quantized = false,
  511. },
  512. [GGML_TYPE_F32] = {
  513. .type_name = "f32",
  514. .blck_size = 1,
  515. .type_size = sizeof(float),
  516. .is_quantized = false,
  517. },
  518. [GGML_TYPE_F16] = {
  519. .type_name = "f16",
  520. .blck_size = 1,
  521. .type_size = sizeof(ggml_fp16_t),
  522. .is_quantized = false,
  523. .to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
  524. .from_float = (ggml_from_float_t) ggml_fp32_to_fp16_row,
  525. .from_float_ref = (ggml_from_float_t) ggml_fp32_to_fp16_row,
  526. },
  527. [GGML_TYPE_Q4_0] = {
  528. .type_name = "q4_0",
  529. .blck_size = QK4_0,
  530. .type_size = sizeof(block_q4_0),
  531. .is_quantized = true,
  532. .to_float = (ggml_to_float_t) dequantize_row_q4_0,
  533. .from_float = quantize_row_q4_0,
  534. .from_float_ref = (ggml_from_float_t) quantize_row_q4_0_ref,
  535. },
  536. [GGML_TYPE_Q4_1] = {
  537. .type_name = "q4_1",
  538. .blck_size = QK4_1,
  539. .type_size = sizeof(block_q4_1),
  540. .is_quantized = true,
  541. .to_float = (ggml_to_float_t) dequantize_row_q4_1,
  542. .from_float = quantize_row_q4_1,
  543. .from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref,
  544. },
  545. [4] = { // GGML_TYPE_Q4_2
  546. .type_name = "DEPRECATED",
  547. .blck_size = 0,
  548. .type_size = 0,
  549. .is_quantized = false,
  550. .to_float = NULL,
  551. .from_float = NULL,
  552. .from_float_ref = NULL,
  553. },
  554. [5] = { // GGML_TYPE_Q4_3
  555. .type_name = "DEPRECATED",
  556. .blck_size = 0,
  557. .type_size = 0,
  558. .is_quantized = false,
  559. .to_float = NULL,
  560. .from_float = NULL,
  561. .from_float_ref = NULL,
  562. },
  563. [GGML_TYPE_Q5_0] = {
  564. .type_name = "q5_0",
  565. .blck_size = QK5_0,
  566. .type_size = sizeof(block_q5_0),
  567. .is_quantized = true,
  568. .to_float = (ggml_to_float_t) dequantize_row_q5_0,
  569. .from_float = quantize_row_q5_0,
  570. .from_float_ref = (ggml_from_float_t) quantize_row_q5_0_ref,
  571. },
  572. [GGML_TYPE_Q5_1] = {
  573. .type_name = "q5_1",
  574. .blck_size = QK5_1,
  575. .type_size = sizeof(block_q5_1),
  576. .is_quantized = true,
  577. .to_float = (ggml_to_float_t) dequantize_row_q5_1,
  578. .from_float = quantize_row_q5_1,
  579. .from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref,
  580. },
  581. [GGML_TYPE_Q8_0] = {
  582. .type_name = "q8_0",
  583. .blck_size = QK8_0,
  584. .type_size = sizeof(block_q8_0),
  585. .is_quantized = true,
  586. .to_float = (ggml_to_float_t) dequantize_row_q8_0,
  587. .from_float = quantize_row_q8_0,
  588. .from_float_ref = (ggml_from_float_t) quantize_row_q8_0_ref,
  589. },
  590. [GGML_TYPE_Q8_1] = {
  591. .type_name = "q8_1",
  592. .blck_size = QK8_1,
  593. .type_size = sizeof(block_q8_1),
  594. .is_quantized = true,
  595. .from_float = quantize_row_q8_1,
  596. .from_float_ref = (ggml_from_float_t) quantize_row_q8_1_ref,
  597. },
  598. [GGML_TYPE_Q2_K] = {
  599. .type_name = "q2_K",
  600. .blck_size = QK_K,
  601. .type_size = sizeof(block_q2_K),
  602. .is_quantized = true,
  603. .to_float = (ggml_to_float_t) dequantize_row_q2_K,
  604. .from_float = quantize_row_q2_K,
  605. .from_float_ref = (ggml_from_float_t) quantize_row_q2_K_ref,
  606. },
  607. [GGML_TYPE_Q3_K] = {
  608. .type_name = "q3_K",
  609. .blck_size = QK_K,
  610. .type_size = sizeof(block_q3_K),
  611. .is_quantized = true,
  612. .to_float = (ggml_to_float_t) dequantize_row_q3_K,
  613. .from_float = quantize_row_q3_K,
  614. .from_float_ref = (ggml_from_float_t) quantize_row_q3_K_ref,
  615. },
  616. [GGML_TYPE_Q4_K] = {
  617. .type_name = "q4_K",
  618. .blck_size = QK_K,
  619. .type_size = sizeof(block_q4_K),
  620. .is_quantized = true,
  621. .to_float = (ggml_to_float_t) dequantize_row_q4_K,
  622. .from_float = quantize_row_q4_K,
  623. .from_float_ref = (ggml_from_float_t) quantize_row_q4_K_ref,
  624. },
  625. [GGML_TYPE_Q5_K] = {
  626. .type_name = "q5_K",
  627. .blck_size = QK_K,
  628. .type_size = sizeof(block_q5_K),
  629. .is_quantized = true,
  630. .to_float = (ggml_to_float_t) dequantize_row_q5_K,
  631. .from_float = quantize_row_q5_K,
  632. .from_float_ref = (ggml_from_float_t) quantize_row_q5_K_ref,
  633. },
  634. [GGML_TYPE_Q6_K] = {
  635. .type_name = "q6_K",
  636. .blck_size = QK_K,
  637. .type_size = sizeof(block_q6_K),
  638. .is_quantized = true,
  639. .to_float = (ggml_to_float_t) dequantize_row_q6_K,
  640. .from_float = quantize_row_q6_K,
  641. .from_float_ref = (ggml_from_float_t) quantize_row_q6_K_ref,
  642. },
  643. [GGML_TYPE_IQ2_XXS] = {
  644. .type_name = "iq2_xxs",
  645. .blck_size = QK_K,
  646. .type_size = sizeof(block_iq2_xxs),
  647. .is_quantized = true,
  648. .to_float = (ggml_to_float_t) dequantize_row_iq2_xxs,
  649. .from_float = NULL,
  650. .from_float_ref = NULL,
  651. },
  652. [GGML_TYPE_IQ2_XS] = {
  653. .type_name = "iq2_xs",
  654. .blck_size = QK_K,
  655. .type_size = sizeof(block_iq2_xs),
  656. .is_quantized = true,
  657. .to_float = (ggml_to_float_t) dequantize_row_iq2_xs,
  658. .from_float = NULL,
  659. .from_float_ref = NULL,
  660. },
  661. [GGML_TYPE_IQ3_XXS] = {
  662. .type_name = "iq3_xxs",
  663. .blck_size = QK_K,
  664. .type_size = sizeof(block_iq3_xxs),
  665. .is_quantized = true,
  666. .to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
  667. .from_float = quantize_row_iq3_xxs,
  668. .from_float_ref = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
  669. },
  670. [GGML_TYPE_IQ3_S] = {
  671. .type_name = "iq3_s",
  672. .blck_size = QK_K,
  673. .type_size = sizeof(block_iq3_s),
  674. .is_quantized = true,
  675. .to_float = (ggml_to_float_t) dequantize_row_iq3_s,
  676. .from_float = quantize_row_iq3_s,
  677. .from_float_ref = (ggml_from_float_t)quantize_row_iq3_s_ref,
  678. },
  679. [GGML_TYPE_IQ2_S] = {
  680. .type_name = "iq2_s",
  681. .blck_size = QK_K,
  682. .type_size = sizeof(block_iq2_s),
  683. .is_quantized = true,
  684. .to_float = (ggml_to_float_t) dequantize_row_iq2_s,
  685. .from_float = quantize_row_iq2_s,
  686. .from_float_ref = (ggml_from_float_t)quantize_row_iq2_s_ref,
  687. },
  688. [GGML_TYPE_IQ1_S] = {
  689. .type_name = "iq1_s",
  690. .blck_size = QK_K,
  691. .type_size = sizeof(block_iq1_s),
  692. .is_quantized = true,
  693. .to_float = (ggml_to_float_t) dequantize_row_iq1_s,
  694. .from_float = NULL,
  695. .from_float_ref = NULL,
  696. },
  697. [GGML_TYPE_IQ1_M] = {
  698. .type_name = "iq1_m",
  699. .blck_size = QK_K,
  700. .type_size = sizeof(block_iq1_m),
  701. .is_quantized = true,
  702. .to_float = (ggml_to_float_t) dequantize_row_iq1_m,
  703. .from_float = NULL,
  704. .from_float_ref = NULL,
  705. },
  706. [GGML_TYPE_IQ4_NL] = {
  707. .type_name = "iq4_nl",
  708. .blck_size = QK4_NL,
  709. .type_size = sizeof(block_iq4_nl),
  710. .is_quantized = true,
  711. .to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
  712. .from_float = quantize_row_iq4_nl,
  713. .from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref,
  714. },
  715. [GGML_TYPE_IQ4_XS] = {
  716. .type_name = "iq4_xs",
  717. .blck_size = QK_K,
  718. .type_size = sizeof(block_iq4_xs),
  719. .is_quantized = true,
  720. .to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
  721. .from_float = quantize_row_iq4_xs,
  722. .from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_ref,
  723. },
  724. [GGML_TYPE_Q8_K] = {
  725. .type_name = "q8_K",
  726. .blck_size = QK_K,
  727. .type_size = sizeof(block_q8_K),
  728. .is_quantized = true,
  729. .from_float = quantize_row_q8_K,
  730. },
  731. [GGML_TYPE_BF16] = {
  732. .type_name = "bf16",
  733. .blck_size = 1,
  734. .type_size = sizeof(ggml_bf16_t),
  735. .is_quantized = false,
  736. .to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row,
  737. .from_float = (ggml_from_float_t) ggml_fp32_to_bf16_row,
  738. .from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
  739. },
  740. [GGML_TYPE_Q4_0_4_4] = {
  741. .type_name = "q4_0_4x4",
  742. .blck_size = QK4_0,
  743. .blck_size_interleave = 4,
  744. .type_size = sizeof(block_q4_0),
  745. .is_quantized = true,
  746. .to_float = NULL,
  747. .from_float = NULL,
  748. .from_float_ref = NULL,
  749. },
  750. [GGML_TYPE_Q4_0_4_8] = {
  751. .type_name = "q4_0_4x8",
  752. .blck_size = QK4_0,
  753. .blck_size_interleave = 8,
  754. .type_size = sizeof(block_q4_0),
  755. .is_quantized = true,
  756. .to_float = NULL,
  757. .from_float = NULL,
  758. .from_float_ref = NULL,
  759. },
  760. [GGML_TYPE_Q4_0_8_8] = {
  761. .type_name = "q4_0_8x8",
  762. .blck_size = QK4_0,
  763. .blck_size_interleave = 8,
  764. .type_size = sizeof(block_q4_0),
  765. .is_quantized = true,
  766. .to_float = NULL,
  767. .from_float = NULL,
  768. .from_float_ref = NULL,
  769. },
  770. [GGML_TYPE_TQ1_0] = {
  771. .type_name = "tq1_0",
  772. .blck_size = QK_K,
  773. .type_size = sizeof(block_tq1_0),
  774. .is_quantized = true,
  775. .to_float = (ggml_to_float_t) dequantize_row_tq1_0,
  776. .from_float = quantize_row_tq1_0,
  777. .from_float_ref = (ggml_from_float_t) quantize_row_tq1_0_ref,
  778. },
  779. [GGML_TYPE_TQ2_0] = {
  780. .type_name = "tq2_0",
  781. .blck_size = QK_K,
  782. .type_size = sizeof(block_tq2_0),
  783. .is_quantized = true,
  784. .to_float = (ggml_to_float_t) dequantize_row_tq2_0,
  785. .from_float = quantize_row_tq2_0,
  786. .from_float_ref = (ggml_from_float_t) quantize_row_tq2_0_ref,
  787. },
  788. };
  789. const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
  790. GGML_ASSERT(type < GGML_TYPE_COUNT);
  791. return &type_traits[type];
  792. }
  793. //
  794. // ggml object
  795. //
  796. struct ggml_object {
  797. size_t offs;
  798. size_t size;
  799. struct ggml_object * next;
  800. enum ggml_object_type type;
  801. char padding[4];
  802. };
  803. static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
  804. //
  805. // ggml context
  806. //
  807. struct ggml_context {
  808. size_t mem_size;
  809. void * mem_buffer;
  810. bool mem_buffer_owned;
  811. bool no_alloc;
  812. int n_objects;
  813. struct ggml_object * objects_begin;
  814. struct ggml_object * objects_end;
  815. };
  816. struct ggml_context_container {
  817. bool used;
  818. struct ggml_context context;
  819. };
  820. //
  821. // data types
  822. //
  823. static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
  824. "NONE",
  825. "DUP",
  826. "ADD",
  827. "ADD1",
  828. "ACC",
  829. "SUB",
  830. "MUL",
  831. "DIV",
  832. "SQR",
  833. "SQRT",
  834. "LOG",
  835. "SIN",
  836. "COS",
  837. "SUM",
  838. "SUM_ROWS",
  839. "MEAN",
  840. "ARGMAX",
  841. "COUNT_EQUAL",
  842. "REPEAT",
  843. "REPEAT_BACK",
  844. "CONCAT",
  845. "SILU_BACK",
  846. "NORM",
  847. "RMS_NORM",
  848. "RMS_NORM_BACK",
  849. "GROUP_NORM",
  850. "MUL_MAT",
  851. "MUL_MAT_ID",
  852. "OUT_PROD",
  853. "SCALE",
  854. "SET",
  855. "CPY",
  856. "CONT",
  857. "RESHAPE",
  858. "VIEW",
  859. "PERMUTE",
  860. "TRANSPOSE",
  861. "GET_ROWS",
  862. "GET_ROWS_BACK",
  863. "DIAG",
  864. "DIAG_MASK_INF",
  865. "DIAG_MASK_ZERO",
  866. "SOFT_MAX",
  867. "SOFT_MAX_BACK",
  868. "ROPE",
  869. "ROPE_BACK",
  870. "CLAMP",
  871. "CONV_TRANSPOSE_1D",
  872. "IM2COL",
  873. "IM2COL_BACK",
  874. "CONV_TRANSPOSE_2D",
  875. "POOL_1D",
  876. "POOL_2D",
  877. "POOL_2D_BACK",
  878. "UPSCALE",
  879. "PAD",
  880. "ARANGE",
  881. "TIMESTEP_EMBEDDING",
  882. "ARGSORT",
  883. "LEAKY_RELU",
  884. "FLASH_ATTN_EXT",
  885. "FLASH_ATTN_BACK",
  886. "SSM_CONV",
  887. "SSM_SCAN",
  888. "WIN_PART",
  889. "WIN_UNPART",
  890. "GET_REL_POS",
  891. "ADD_REL_POS",
  892. "RWKV_WKV6",
  893. "UNARY",
  894. "MAP_UNARY",
  895. "MAP_BINARY",
  896. "MAP_CUSTOM1_F32",
  897. "MAP_CUSTOM2_F32",
  898. "MAP_CUSTOM3_F32",
  899. "MAP_CUSTOM1",
  900. "MAP_CUSTOM2",
  901. "MAP_CUSTOM3",
  902. "CROSS_ENTROPY_LOSS",
  903. "CROSS_ENTROPY_LOSS_BACK",
  904. "OPT_STEP_ADAMW",
  905. };
  906. static_assert(GGML_OP_COUNT == 81, "GGML_OP_COUNT != 81");
  907. static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
  908. "none",
  909. "x",
  910. "x+y",
  911. "x+y",
  912. "view(x,nb,offset)+=y->x",
  913. "x-y",
  914. "x*y",
  915. "x/y",
  916. "x^2",
  917. "√x",
  918. "log(x)",
  919. "sin(x)",
  920. "cos(x)",
  921. "Σx",
  922. "Σx_k",
  923. "Σx/n",
  924. "argmax(x)",
  925. "count_equal(x)",
  926. "repeat(x)",
  927. "repeat_back(x)",
  928. "concat(x, y)",
  929. "silu_back(x)",
  930. "norm(x)",
  931. "rms_norm(x)",
  932. "rms_norm_back(x)",
  933. "group_norm(x)",
  934. "X*Y",
  935. "X[i]*Y",
  936. "X*Y",
  937. "x*v",
  938. "y-\\>view(x)",
  939. "x-\\>y",
  940. "cont(x)",
  941. "reshape(x)",
  942. "view(x)",
  943. "permute(x)",
  944. "transpose(x)",
  945. "get_rows(x)",
  946. "get_rows_back(x)",
  947. "diag(x)",
  948. "diag_mask_inf(x)",
  949. "diag_mask_zero(x)",
  950. "soft_max(x)",
  951. "soft_max_back(x)",
  952. "rope(x)",
  953. "rope_back(x)",
  954. "clamp(x)",
  955. "conv_transpose_1d(x)",
  956. "im2col(x)",
  957. "im2col_back(x)",
  958. "conv_transpose_2d(x)",
  959. "pool_1d(x)",
  960. "pool_2d(x)",
  961. "pool_2d_back(x)",
  962. "upscale(x)",
  963. "pad(x)",
  964. "arange(start, stop, step)",
  965. "timestep_embedding(timesteps, dim, max_period)",
  966. "argsort(x)",
  967. "leaky_relu(x)",
  968. "flash_attn_ext(x)",
  969. "flash_attn_back(x)",
  970. "ssm_conv(x)",
  971. "ssm_scan(x)",
  972. "win_part(x)",
  973. "win_unpart(x)",
  974. "get_rel_pos(x)",
  975. "add_rel_pos(x)",
  976. "rwkv_wkv6(k, v, r, tf, td, s)",
  977. "unary(x)",
  978. "f(x)",
  979. "f(x,y)",
  980. "custom_f32(x)",
  981. "custom_f32(x,y)",
  982. "custom_f32(x,y,z)",
  983. "custom(x)",
  984. "custom(x,y)",
  985. "custom(x,y,z)",
  986. "cross_entropy_loss(x,y)",
  987. "cross_entropy_loss_back(x,y)",
  988. "adamw(x)",
  989. };
  990. static_assert(GGML_OP_COUNT == 81, "GGML_OP_COUNT != 81");
  991. static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
  992. static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
  993. "ABS",
  994. "SGN",
  995. "NEG",
  996. "STEP",
  997. "TANH",
  998. "ELU",
  999. "RELU",
  1000. "SIGMOID",
  1001. "GELU",
  1002. "GELU_QUICK",
  1003. "SILU",
  1004. "HARDSWISH",
  1005. "HARDSIGMOID",
  1006. "EXP",
  1007. };
  1008. static_assert(GGML_UNARY_OP_COUNT == 14, "GGML_UNARY_OP_COUNT != 14");
  1009. static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
  1010. static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
  1011. ////////////////////////////////////////////////////////////////////////////////
  1012. void ggml_print_object(const struct ggml_object * obj) {
  1013. GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
  1014. obj->type, obj->offs, obj->size, (const void *) obj->next);
  1015. }
  1016. void ggml_print_objects(const struct ggml_context * ctx) {
  1017. struct ggml_object * obj = ctx->objects_begin;
  1018. GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
  1019. while (obj != NULL) {
  1020. ggml_print_object(obj);
  1021. obj = obj->next;
  1022. }
  1023. GGML_LOG_INFO("%s: --- end ---\n", __func__);
  1024. }
  1025. int64_t ggml_nelements(const struct ggml_tensor * tensor) {
  1026. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1027. return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
  1028. }
  1029. int64_t ggml_nrows(const struct ggml_tensor * tensor) {
  1030. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1031. return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
  1032. }
  1033. size_t ggml_nbytes(const struct ggml_tensor * tensor) {
  1034. size_t nbytes;
  1035. const size_t blck_size = ggml_blck_size(tensor->type);
  1036. if (blck_size == 1) {
  1037. nbytes = ggml_type_size(tensor->type);
  1038. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1039. nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
  1040. }
  1041. }
  1042. else {
  1043. nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
  1044. for (int i = 1; i < GGML_MAX_DIMS; ++i) {
  1045. nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
  1046. }
  1047. }
  1048. return nbytes;
  1049. }
  1050. size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
  1051. return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
  1052. }
  1053. int64_t ggml_blck_size(enum ggml_type type) {
  1054. return type_traits[type].blck_size;
  1055. }
  1056. size_t ggml_type_size(enum ggml_type type) {
  1057. return type_traits[type].type_size;
  1058. }
  1059. size_t ggml_row_size(enum ggml_type type, int64_t ne) {
  1060. assert(ne % ggml_blck_size(type) == 0);
  1061. return ggml_type_size(type)*ne/ggml_blck_size(type);
  1062. }
  1063. double ggml_type_sizef(enum ggml_type type) {
  1064. return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
  1065. }
  1066. const char * ggml_type_name(enum ggml_type type) {
  1067. return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
  1068. }
  1069. bool ggml_is_quantized(enum ggml_type type) {
  1070. return type_traits[type].is_quantized;
  1071. }
  1072. const char * ggml_op_name(enum ggml_op op) {
  1073. return GGML_OP_NAME[op];
  1074. }
  1075. const char * ggml_op_symbol(enum ggml_op op) {
  1076. return GGML_OP_SYMBOL[op];
  1077. }
  1078. const char * ggml_unary_op_name(enum ggml_unary_op op) {
  1079. return GGML_UNARY_OP_NAME[op];
  1080. }
  1081. const char * ggml_op_desc(const struct ggml_tensor * t) {
  1082. if (t->op == GGML_OP_UNARY) {
  1083. enum ggml_unary_op uop = ggml_get_unary_op(t);
  1084. return ggml_unary_op_name(uop);
  1085. }
  1086. return ggml_op_name(t->op);
  1087. }
  1088. size_t ggml_element_size(const struct ggml_tensor * tensor) {
  1089. return ggml_type_size(tensor->type);
  1090. }
  1091. bool ggml_is_scalar(const struct ggml_tensor * tensor) {
  1092. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1093. return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1094. }
  1095. bool ggml_is_vector(const struct ggml_tensor * tensor) {
  1096. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1097. return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1098. }
  1099. bool ggml_is_matrix(const struct ggml_tensor * tensor) {
  1100. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1101. return tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1102. }
  1103. bool ggml_is_3d(const struct ggml_tensor * tensor) {
  1104. return tensor->ne[3] == 1;
  1105. }
  1106. int ggml_n_dims(const struct ggml_tensor * tensor) {
  1107. for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
  1108. if (tensor->ne[i] > 1) {
  1109. return i + 1;
  1110. }
  1111. }
  1112. return 1;
  1113. }
  1114. enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
  1115. enum ggml_type wtype = GGML_TYPE_COUNT;
  1116. switch (ftype) {
  1117. case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break;
  1118. case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break;
  1119. case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break;
  1120. case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break;
  1121. case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break;
  1122. case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
  1123. case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
  1124. case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
  1125. case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
  1126. case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
  1127. case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
  1128. case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break;
  1129. case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
  1130. case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
  1131. case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
  1132. case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
  1133. case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
  1134. case GGML_FTYPE_MOSTLY_IQ1_M: wtype = GGML_TYPE_IQ1_M; break;
  1135. case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
  1136. case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
  1137. case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
  1138. case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
  1139. case GGML_FTYPE_MOSTLY_Q4_0_4_4: wtype = GGML_TYPE_Q4_0_4_4; break;
  1140. case GGML_FTYPE_MOSTLY_Q4_0_4_8: wtype = GGML_TYPE_Q4_0_4_8; break;
  1141. case GGML_FTYPE_MOSTLY_Q4_0_8_8: wtype = GGML_TYPE_Q4_0_8_8; break;
  1142. case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
  1143. case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
  1144. }
  1145. GGML_ASSERT(wtype != GGML_TYPE_COUNT);
  1146. return wtype;
  1147. }
  1148. size_t ggml_tensor_overhead(void) {
  1149. return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
  1150. }
  1151. bool ggml_is_transposed(const struct ggml_tensor * tensor) {
  1152. return tensor->nb[0] > tensor->nb[1];
  1153. }
  1154. static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
  1155. size_t next_nb = ggml_type_size(tensor->type);
  1156. if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
  1157. return false;
  1158. }
  1159. next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
  1160. for (int i = 1; i < GGML_MAX_DIMS; i++) {
  1161. if (tensor->ne[i] != 1) {
  1162. if (i > n) {
  1163. if (tensor->nb[i] != next_nb) {
  1164. return false;
  1165. }
  1166. next_nb *= tensor->ne[i];
  1167. } else {
  1168. // this dimension does not need to be contiguous
  1169. next_nb = tensor->ne[i]*tensor->nb[i];
  1170. }
  1171. }
  1172. }
  1173. return true;
  1174. }
  1175. bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
  1176. return ggml_is_contiguous_0(tensor);
  1177. }
  1178. bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
  1179. return ggml_is_contiguous_n(tensor, 0);
  1180. }
  1181. bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
  1182. return ggml_is_contiguous_n(tensor, 1);
  1183. }
  1184. bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
  1185. return ggml_is_contiguous_n(tensor, 2);
  1186. }
  1187. bool ggml_is_permuted(const struct ggml_tensor * tensor) {
  1188. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1189. return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
  1190. }
  1191. static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
  1192. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1193. return
  1194. tensor->nb[0] == ggml_type_size(tensor->type) &&
  1195. tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
  1196. tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
  1197. }
  1198. bool ggml_is_empty(const struct ggml_tensor * tensor) {
  1199. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1200. if (tensor->ne[i] == 0) {
  1201. // empty if any dimension has no elements
  1202. return true;
  1203. }
  1204. }
  1205. return false;
  1206. }
  1207. bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1208. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1209. return
  1210. (t0->ne[0] == t1->ne[0]) &&
  1211. (t0->ne[1] == t1->ne[1]) &&
  1212. (t0->ne[2] == t1->ne[2]) &&
  1213. (t0->ne[3] == t1->ne[3]);
  1214. }
  1215. bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1216. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1217. return
  1218. (t0->nb[0] == t1->nb[0]) &&
  1219. (t0->nb[1] == t1->nb[1]) &&
  1220. (t0->nb[2] == t1->nb[2]) &&
  1221. (t0->nb[3] == t1->nb[3]);
  1222. }
  1223. // check if t1 can be represented as a repeatition of t0
  1224. bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1225. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1226. return ggml_is_empty(t0) ? ggml_is_empty(t1) :
  1227. (t1->ne[0]%t0->ne[0] == 0) &&
  1228. (t1->ne[1]%t0->ne[1] == 0) &&
  1229. (t1->ne[2]%t0->ne[2] == 0) &&
  1230. (t1->ne[3]%t0->ne[3] == 0);
  1231. }
  1232. static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1233. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1234. return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
  1235. }
  1236. // assert that pointer is aligned to GGML_MEM_ALIGN
  1237. #define GGML_ASSERT_ALIGNED(ptr) \
  1238. GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
  1239. ////////////////////////////////////////////////////////////////////////////////
  1240. struct ggml_context * ggml_init(struct ggml_init_params params) {
  1241. static bool is_first_call = true;
  1242. ggml_critical_section_start();
  1243. if (is_first_call) {
  1244. // initialize time system (required on Windows)
  1245. ggml_time_init();
  1246. for (int i = 0; i < (1 << 16); ++i) {
  1247. union {
  1248. uint16_t u16;
  1249. ggml_fp16_t fp16;
  1250. } u = {i};
  1251. ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
  1252. }
  1253. is_first_call = false;
  1254. }
  1255. ggml_critical_section_end();
  1256. struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
  1257. // allow to call ggml_init with 0 size
  1258. if (params.mem_size == 0) {
  1259. params.mem_size = GGML_MEM_ALIGN;
  1260. }
  1261. const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
  1262. *ctx = (struct ggml_context) {
  1263. /*.mem_size =*/ mem_size,
  1264. /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
  1265. /*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
  1266. /*.no_alloc =*/ params.no_alloc,
  1267. /*.n_objects =*/ 0,
  1268. /*.objects_begin =*/ NULL,
  1269. /*.objects_end =*/ NULL,
  1270. };
  1271. GGML_ASSERT(ctx->mem_buffer != NULL);
  1272. GGML_ASSERT_ALIGNED(ctx->mem_buffer);
  1273. GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
  1274. return ctx;
  1275. }
  1276. void ggml_reset(struct ggml_context * ctx) {
  1277. if (ctx == NULL) {
  1278. return;
  1279. }
  1280. ctx->n_objects = 0;
  1281. ctx->objects_begin = NULL;
  1282. ctx->objects_end = NULL;
  1283. }
  1284. void ggml_free(struct ggml_context * ctx) {
  1285. if (ctx == NULL) {
  1286. return;
  1287. }
  1288. if (ctx->mem_buffer_owned) {
  1289. ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
  1290. }
  1291. GGML_FREE(ctx);
  1292. }
  1293. size_t ggml_used_mem(const struct ggml_context * ctx) {
  1294. return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
  1295. }
  1296. bool ggml_get_no_alloc(struct ggml_context * ctx) {
  1297. return ctx->no_alloc;
  1298. }
  1299. void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
  1300. ctx->no_alloc = no_alloc;
  1301. }
  1302. void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
  1303. return ctx->mem_buffer;
  1304. }
  1305. size_t ggml_get_mem_size(const struct ggml_context * ctx) {
  1306. return ctx->mem_size;
  1307. }
  1308. size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
  1309. size_t max_size = 0;
  1310. for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
  1311. size_t bytes = ggml_nbytes(tensor);
  1312. max_size = MAX(max_size, bytes);
  1313. }
  1314. return max_size;
  1315. }
  1316. ////////////////////////////////////////////////////////////////////////////////
  1317. static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
  1318. // always insert objects at the end of the context's memory pool
  1319. struct ggml_object * obj_cur = ctx->objects_end;
  1320. const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
  1321. const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
  1322. const size_t cur_end = cur_offs + cur_size;
  1323. // align to GGML_MEM_ALIGN
  1324. size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
  1325. char * const mem_buffer = ctx->mem_buffer;
  1326. struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
  1327. if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
  1328. GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
  1329. __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
  1330. #ifndef NDEBUG
  1331. GGML_ABORT("not enough space in the context's memory pool");
  1332. #endif
  1333. return NULL;
  1334. }
  1335. *obj_new = (struct ggml_object) {
  1336. .offs = cur_end + GGML_OBJECT_SIZE,
  1337. .size = size_needed,
  1338. .next = NULL,
  1339. .type = type,
  1340. };
  1341. GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
  1342. if (obj_cur != NULL) {
  1343. obj_cur->next = obj_new;
  1344. } else {
  1345. // this is the first object in this context
  1346. ctx->objects_begin = obj_new;
  1347. }
  1348. ctx->objects_end = obj_new;
  1349. //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
  1350. return obj_new;
  1351. }
  1352. static struct ggml_tensor * ggml_new_tensor_impl(
  1353. struct ggml_context * ctx,
  1354. enum ggml_type type,
  1355. int n_dims,
  1356. const int64_t * ne,
  1357. struct ggml_tensor * view_src,
  1358. size_t view_offs) {
  1359. GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
  1360. GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
  1361. // find the base tensor and absolute offset
  1362. if (view_src != NULL && view_src->view_src != NULL) {
  1363. view_offs += view_src->view_offs;
  1364. view_src = view_src->view_src;
  1365. }
  1366. size_t data_size = ggml_row_size(type, ne[0]);
  1367. for (int i = 1; i < n_dims; i++) {
  1368. data_size *= ne[i];
  1369. }
  1370. GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
  1371. void * data = view_src != NULL ? view_src->data : NULL;
  1372. if (data != NULL) {
  1373. data = (char *) data + view_offs;
  1374. }
  1375. size_t obj_alloc_size = 0;
  1376. if (view_src == NULL && !ctx->no_alloc) {
  1377. // allocate tensor data in the context's memory pool
  1378. obj_alloc_size = data_size;
  1379. }
  1380. struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
  1381. GGML_ASSERT(obj_new);
  1382. struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
  1383. #ifdef __clang__
  1384. // temporary until ggml_tensor::backend is removed
  1385. #pragma clang diagnostic push
  1386. #pragma clang diagnostic ignored "-Wdeprecated-declarations"
  1387. #endif
  1388. *result = (struct ggml_tensor) {
  1389. /*.type =*/ type,
  1390. /*.backend =*/ GGML_BACKEND_TYPE_CPU,
  1391. /*.buffer =*/ NULL,
  1392. /*.ne =*/ { 1, 1, 1, 1 },
  1393. /*.nb =*/ { 0, 0, 0, 0 },
  1394. /*.op =*/ GGML_OP_NONE,
  1395. /*.op_params =*/ { 0 },
  1396. /*.flags =*/ 0,
  1397. /*.grad =*/ NULL,
  1398. /*.src =*/ { NULL },
  1399. /*.view_src =*/ view_src,
  1400. /*.view_offs =*/ view_offs,
  1401. /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
  1402. /*.name =*/ { 0 },
  1403. /*.extra =*/ NULL,
  1404. ///*.padding =*/ { 0 },
  1405. };
  1406. #ifdef __clang__
  1407. #pragma clang diagnostic pop
  1408. #endif
  1409. // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
  1410. //GGML_ASSERT_ALIGNED(result->data);
  1411. for (int i = 0; i < n_dims; i++) {
  1412. result->ne[i] = ne[i];
  1413. }
  1414. result->nb[0] = ggml_type_size(type);
  1415. result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
  1416. for (int i = 2; i < GGML_MAX_DIMS; i++) {
  1417. result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
  1418. }
  1419. ctx->n_objects++;
  1420. return result;
  1421. }
  1422. struct ggml_tensor * ggml_new_tensor(
  1423. struct ggml_context * ctx,
  1424. enum ggml_type type,
  1425. int n_dims,
  1426. const int64_t * ne) {
  1427. return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
  1428. }
  1429. struct ggml_tensor * ggml_new_tensor_1d(
  1430. struct ggml_context * ctx,
  1431. enum ggml_type type,
  1432. int64_t ne0) {
  1433. return ggml_new_tensor(ctx, type, 1, &ne0);
  1434. }
  1435. struct ggml_tensor * ggml_new_tensor_2d(
  1436. struct ggml_context * ctx,
  1437. enum ggml_type type,
  1438. int64_t ne0,
  1439. int64_t ne1) {
  1440. const int64_t ne[2] = { ne0, ne1 };
  1441. return ggml_new_tensor(ctx, type, 2, ne);
  1442. }
  1443. struct ggml_tensor * ggml_new_tensor_3d(
  1444. struct ggml_context * ctx,
  1445. enum ggml_type type,
  1446. int64_t ne0,
  1447. int64_t ne1,
  1448. int64_t ne2) {
  1449. const int64_t ne[3] = { ne0, ne1, ne2 };
  1450. return ggml_new_tensor(ctx, type, 3, ne);
  1451. }
  1452. struct ggml_tensor * ggml_new_tensor_4d(
  1453. struct ggml_context * ctx,
  1454. enum ggml_type type,
  1455. int64_t ne0,
  1456. int64_t ne1,
  1457. int64_t ne2,
  1458. int64_t ne3) {
  1459. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  1460. return ggml_new_tensor(ctx, type, 4, ne);
  1461. }
  1462. void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
  1463. struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
  1464. return (uint8_t *)ctx->mem_buffer + obj->offs;
  1465. }
  1466. struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
  1467. return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
  1468. }
  1469. void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
  1470. const int64_t ne2 = tensor->ne[2];
  1471. const int64_t ne1 = tensor->ne[1];
  1472. const int64_t ne0 = tensor->ne[0];
  1473. const int64_t i3_ = (i/(ne2*ne1*ne0));
  1474. const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
  1475. const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
  1476. const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
  1477. if (i0) {
  1478. * i0 = i0_;
  1479. }
  1480. if (i1) {
  1481. * i1 = i1_;
  1482. }
  1483. if (i2) {
  1484. * i2 = i2_;
  1485. }
  1486. if (i3) {
  1487. * i3 = i3_;
  1488. }
  1489. }
  1490. void * ggml_get_data(const struct ggml_tensor * tensor) {
  1491. return tensor->data;
  1492. }
  1493. float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
  1494. assert(tensor->type == GGML_TYPE_F32);
  1495. return (float *)(tensor->data);
  1496. }
  1497. enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
  1498. GGML_ASSERT(tensor->op == GGML_OP_UNARY);
  1499. return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
  1500. }
  1501. const char * ggml_get_name(const struct ggml_tensor * tensor) {
  1502. return tensor->name;
  1503. }
  1504. struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
  1505. size_t i;
  1506. for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
  1507. tensor->name[i] = name[i];
  1508. }
  1509. tensor->name[i] = '\0';
  1510. return tensor;
  1511. }
  1512. struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
  1513. va_list args;
  1514. va_start(args, fmt);
  1515. vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
  1516. va_end(args);
  1517. return tensor;
  1518. }
  1519. struct ggml_tensor * ggml_view_tensor(
  1520. struct ggml_context * ctx,
  1521. struct ggml_tensor * src) {
  1522. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
  1523. ggml_format_name(result, "%s (view)", src->name);
  1524. for (int i = 0; i < GGML_MAX_DIMS; i++) {
  1525. result->nb[i] = src->nb[i];
  1526. }
  1527. return result;
  1528. }
  1529. struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
  1530. struct ggml_object * obj = ctx->objects_begin;
  1531. char * const mem_buffer = ctx->mem_buffer;
  1532. while (obj != NULL) {
  1533. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1534. return (struct ggml_tensor *)(mem_buffer + obj->offs);
  1535. }
  1536. obj = obj->next;
  1537. }
  1538. return NULL;
  1539. }
  1540. struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
  1541. struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
  1542. obj = obj->next;
  1543. char * const mem_buffer = ctx->mem_buffer;
  1544. while (obj != NULL) {
  1545. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1546. return (struct ggml_tensor *)(mem_buffer + obj->offs);
  1547. }
  1548. obj = obj->next;
  1549. }
  1550. return NULL;
  1551. }
  1552. struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
  1553. struct ggml_object * obj = ctx->objects_begin;
  1554. char * const mem_buffer = ctx->mem_buffer;
  1555. while (obj != NULL) {
  1556. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1557. struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
  1558. if (strcmp(cur->name, name) == 0) {
  1559. return cur;
  1560. }
  1561. }
  1562. obj = obj->next;
  1563. }
  1564. return NULL;
  1565. }
  1566. ////////////////////////////////////////////////////////////////////////////////
  1567. // ggml_dup
  1568. static struct ggml_tensor * ggml_dup_impl(
  1569. struct ggml_context * ctx,
  1570. struct ggml_tensor * a,
  1571. bool inplace) {
  1572. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1573. result->op = GGML_OP_DUP;
  1574. result->src[0] = a;
  1575. return result;
  1576. }
  1577. struct ggml_tensor * ggml_dup(
  1578. struct ggml_context * ctx,
  1579. struct ggml_tensor * a) {
  1580. return ggml_dup_impl(ctx, a, false);
  1581. }
  1582. struct ggml_tensor * ggml_dup_inplace(
  1583. struct ggml_context * ctx,
  1584. struct ggml_tensor * a) {
  1585. return ggml_dup_impl(ctx, a, true);
  1586. }
  1587. // ggml_add
  1588. static struct ggml_tensor * ggml_add_impl(
  1589. struct ggml_context * ctx,
  1590. struct ggml_tensor * a,
  1591. struct ggml_tensor * b,
  1592. bool inplace) {
  1593. GGML_ASSERT(ggml_can_repeat(b, a));
  1594. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1595. result->op = GGML_OP_ADD;
  1596. result->src[0] = a;
  1597. result->src[1] = b;
  1598. return result;
  1599. }
  1600. struct ggml_tensor * ggml_add(
  1601. struct ggml_context * ctx,
  1602. struct ggml_tensor * a,
  1603. struct ggml_tensor * b) {
  1604. return ggml_add_impl(ctx, a, b, false);
  1605. }
  1606. struct ggml_tensor * ggml_add_inplace(
  1607. struct ggml_context * ctx,
  1608. struct ggml_tensor * a,
  1609. struct ggml_tensor * b) {
  1610. return ggml_add_impl(ctx, a, b, true);
  1611. }
  1612. // ggml_add_cast
  1613. static struct ggml_tensor * ggml_add_cast_impl(
  1614. struct ggml_context * ctx,
  1615. struct ggml_tensor * a,
  1616. struct ggml_tensor * b,
  1617. enum ggml_type type) {
  1618. // TODO: support less-strict constraint
  1619. // GGML_ASSERT(ggml_can_repeat(b, a));
  1620. GGML_ASSERT(ggml_can_repeat_rows(b, a));
  1621. // currently only supported for quantized input and f16
  1622. GGML_ASSERT(ggml_is_quantized(a->type) ||
  1623. a->type == GGML_TYPE_F16 ||
  1624. a->type == GGML_TYPE_BF16);
  1625. struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
  1626. result->op = GGML_OP_ADD;
  1627. result->src[0] = a;
  1628. result->src[1] = b;
  1629. return result;
  1630. }
  1631. struct ggml_tensor * ggml_add_cast(
  1632. struct ggml_context * ctx,
  1633. struct ggml_tensor * a,
  1634. struct ggml_tensor * b,
  1635. enum ggml_type type) {
  1636. return ggml_add_cast_impl(ctx, a, b, type);
  1637. }
  1638. // ggml_add1
  1639. static struct ggml_tensor * ggml_add1_impl(
  1640. struct ggml_context * ctx,
  1641. struct ggml_tensor * a,
  1642. struct ggml_tensor * b,
  1643. bool inplace) {
  1644. GGML_ASSERT(ggml_is_scalar(b));
  1645. GGML_ASSERT(ggml_is_padded_1d(a));
  1646. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1647. result->op = GGML_OP_ADD1;
  1648. result->src[0] = a;
  1649. result->src[1] = b;
  1650. return result;
  1651. }
  1652. struct ggml_tensor * ggml_add1(
  1653. struct ggml_context * ctx,
  1654. struct ggml_tensor * a,
  1655. struct ggml_tensor * b) {
  1656. return ggml_add1_impl(ctx, a, b, false);
  1657. }
  1658. struct ggml_tensor * ggml_add1_inplace(
  1659. struct ggml_context * ctx,
  1660. struct ggml_tensor * a,
  1661. struct ggml_tensor * b) {
  1662. return ggml_add1_impl(ctx, a, b, true);
  1663. }
  1664. // ggml_acc
  1665. static struct ggml_tensor * ggml_acc_impl(
  1666. struct ggml_context * ctx,
  1667. struct ggml_tensor * a,
  1668. struct ggml_tensor * b,
  1669. size_t nb1,
  1670. size_t nb2,
  1671. size_t nb3,
  1672. size_t offset,
  1673. bool inplace) {
  1674. GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
  1675. GGML_ASSERT(ggml_is_contiguous(a));
  1676. GGML_ASSERT(a->type == GGML_TYPE_F32);
  1677. GGML_ASSERT(b->type == GGML_TYPE_F32);
  1678. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1679. int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
  1680. ggml_set_op_params(result, params, sizeof(params));
  1681. result->op = GGML_OP_ACC;
  1682. result->src[0] = a;
  1683. result->src[1] = b;
  1684. return result;
  1685. }
  1686. struct ggml_tensor * ggml_acc(
  1687. struct ggml_context * ctx,
  1688. struct ggml_tensor * a,
  1689. struct ggml_tensor * b,
  1690. size_t nb1,
  1691. size_t nb2,
  1692. size_t nb3,
  1693. size_t offset) {
  1694. return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
  1695. }
  1696. struct ggml_tensor * ggml_acc_inplace(
  1697. struct ggml_context * ctx,
  1698. struct ggml_tensor * a,
  1699. struct ggml_tensor * b,
  1700. size_t nb1,
  1701. size_t nb2,
  1702. size_t nb3,
  1703. size_t offset) {
  1704. return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
  1705. }
  1706. // ggml_sub
  1707. static struct ggml_tensor * ggml_sub_impl(
  1708. struct ggml_context * ctx,
  1709. struct ggml_tensor * a,
  1710. struct ggml_tensor * b,
  1711. bool inplace) {
  1712. GGML_ASSERT(ggml_can_repeat(b, a));
  1713. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1714. result->op = GGML_OP_SUB;
  1715. result->src[0] = a;
  1716. result->src[1] = b;
  1717. return result;
  1718. }
  1719. struct ggml_tensor * ggml_sub(
  1720. struct ggml_context * ctx,
  1721. struct ggml_tensor * a,
  1722. struct ggml_tensor * b) {
  1723. return ggml_sub_impl(ctx, a, b, false);
  1724. }
  1725. struct ggml_tensor * ggml_sub_inplace(
  1726. struct ggml_context * ctx,
  1727. struct ggml_tensor * a,
  1728. struct ggml_tensor * b) {
  1729. return ggml_sub_impl(ctx, a, b, true);
  1730. }
  1731. // ggml_mul
  1732. static struct ggml_tensor * ggml_mul_impl(
  1733. struct ggml_context * ctx,
  1734. struct ggml_tensor * a,
  1735. struct ggml_tensor * b,
  1736. bool inplace) {
  1737. GGML_ASSERT(ggml_can_repeat(b, a));
  1738. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1739. result->op = GGML_OP_MUL;
  1740. result->src[0] = a;
  1741. result->src[1] = b;
  1742. return result;
  1743. }
  1744. struct ggml_tensor * ggml_mul(
  1745. struct ggml_context * ctx,
  1746. struct ggml_tensor * a,
  1747. struct ggml_tensor * b) {
  1748. return ggml_mul_impl(ctx, a, b, false);
  1749. }
  1750. struct ggml_tensor * ggml_mul_inplace(
  1751. struct ggml_context * ctx,
  1752. struct ggml_tensor * a,
  1753. struct ggml_tensor * b) {
  1754. return ggml_mul_impl(ctx, a, b, true);
  1755. }
  1756. // ggml_div
  1757. static struct ggml_tensor * ggml_div_impl(
  1758. struct ggml_context * ctx,
  1759. struct ggml_tensor * a,
  1760. struct ggml_tensor * b,
  1761. bool inplace) {
  1762. GGML_ASSERT(ggml_can_repeat(b, a));
  1763. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1764. result->op = GGML_OP_DIV;
  1765. result->src[0] = a;
  1766. result->src[1] = b;
  1767. return result;
  1768. }
  1769. struct ggml_tensor * ggml_div(
  1770. struct ggml_context * ctx,
  1771. struct ggml_tensor * a,
  1772. struct ggml_tensor * b) {
  1773. return ggml_div_impl(ctx, a, b, false);
  1774. }
  1775. struct ggml_tensor * ggml_div_inplace(
  1776. struct ggml_context * ctx,
  1777. struct ggml_tensor * a,
  1778. struct ggml_tensor * b) {
  1779. return ggml_div_impl(ctx, a, b, true);
  1780. }
  1781. // ggml_sqr
  1782. static struct ggml_tensor * ggml_sqr_impl(
  1783. struct ggml_context * ctx,
  1784. struct ggml_tensor * a,
  1785. bool inplace) {
  1786. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1787. result->op = GGML_OP_SQR;
  1788. result->src[0] = a;
  1789. return result;
  1790. }
  1791. struct ggml_tensor * ggml_sqr(
  1792. struct ggml_context * ctx,
  1793. struct ggml_tensor * a) {
  1794. return ggml_sqr_impl(ctx, a, false);
  1795. }
  1796. struct ggml_tensor * ggml_sqr_inplace(
  1797. struct ggml_context * ctx,
  1798. struct ggml_tensor * a) {
  1799. return ggml_sqr_impl(ctx, a, true);
  1800. }
  1801. // ggml_sqrt
  1802. static struct ggml_tensor * ggml_sqrt_impl(
  1803. struct ggml_context * ctx,
  1804. struct ggml_tensor * a,
  1805. bool inplace) {
  1806. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1807. result->op = GGML_OP_SQRT;
  1808. result->src[0] = a;
  1809. return result;
  1810. }
  1811. struct ggml_tensor * ggml_sqrt(
  1812. struct ggml_context * ctx,
  1813. struct ggml_tensor * a) {
  1814. return ggml_sqrt_impl(ctx, a, false);
  1815. }
  1816. struct ggml_tensor * ggml_sqrt_inplace(
  1817. struct ggml_context * ctx,
  1818. struct ggml_tensor * a) {
  1819. return ggml_sqrt_impl(ctx, a, true);
  1820. }
  1821. // ggml_log
  1822. static struct ggml_tensor * ggml_log_impl(
  1823. struct ggml_context * ctx,
  1824. struct ggml_tensor * a,
  1825. bool inplace) {
  1826. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1827. result->op = GGML_OP_LOG;
  1828. result->src[0] = a;
  1829. return result;
  1830. }
  1831. struct ggml_tensor * ggml_log(
  1832. struct ggml_context * ctx,
  1833. struct ggml_tensor * a) {
  1834. return ggml_log_impl(ctx, a, false);
  1835. }
  1836. struct ggml_tensor * ggml_log_inplace(
  1837. struct ggml_context * ctx,
  1838. struct ggml_tensor * a) {
  1839. return ggml_log_impl(ctx, a, true);
  1840. }
  1841. // ggml_sin
  1842. static struct ggml_tensor * ggml_sin_impl(
  1843. struct ggml_context * ctx,
  1844. struct ggml_tensor * a,
  1845. bool inplace) {
  1846. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1847. result->op = GGML_OP_SIN;
  1848. result->src[0] = a;
  1849. return result;
  1850. }
  1851. struct ggml_tensor * ggml_sin(
  1852. struct ggml_context * ctx,
  1853. struct ggml_tensor * a) {
  1854. return ggml_sin_impl(ctx, a, false);
  1855. }
  1856. struct ggml_tensor * ggml_sin_inplace(
  1857. struct ggml_context * ctx,
  1858. struct ggml_tensor * a) {
  1859. return ggml_sin_impl(ctx, a, true);
  1860. }
  1861. // ggml_cos
  1862. static struct ggml_tensor * ggml_cos_impl(
  1863. struct ggml_context * ctx,
  1864. struct ggml_tensor * a,
  1865. bool inplace) {
  1866. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1867. result->op = GGML_OP_COS;
  1868. result->src[0] = a;
  1869. return result;
  1870. }
  1871. struct ggml_tensor * ggml_cos(
  1872. struct ggml_context * ctx,
  1873. struct ggml_tensor * a) {
  1874. return ggml_cos_impl(ctx, a, false);
  1875. }
  1876. struct ggml_tensor * ggml_cos_inplace(
  1877. struct ggml_context * ctx,
  1878. struct ggml_tensor * a) {
  1879. return ggml_cos_impl(ctx, a, true);
  1880. }
  1881. // ggml_sum
  1882. struct ggml_tensor * ggml_sum(
  1883. struct ggml_context * ctx,
  1884. struct ggml_tensor * a) {
  1885. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
  1886. result->op = GGML_OP_SUM;
  1887. result->src[0] = a;
  1888. return result;
  1889. }
  1890. // ggml_sum_rows
  1891. struct ggml_tensor * ggml_sum_rows(
  1892. struct ggml_context * ctx,
  1893. struct ggml_tensor * a) {
  1894. int64_t ne[GGML_MAX_DIMS] = { 1 };
  1895. for (int i = 1; i < GGML_MAX_DIMS; ++i) {
  1896. ne[i] = a->ne[i];
  1897. }
  1898. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
  1899. result->op = GGML_OP_SUM_ROWS;
  1900. result->src[0] = a;
  1901. return result;
  1902. }
  1903. // ggml_mean
  1904. struct ggml_tensor * ggml_mean(
  1905. struct ggml_context * ctx,
  1906. struct ggml_tensor * a) {
  1907. int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
  1908. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  1909. result->op = GGML_OP_MEAN;
  1910. result->src[0] = a;
  1911. return result;
  1912. }
  1913. // ggml_argmax
  1914. struct ggml_tensor * ggml_argmax(
  1915. struct ggml_context * ctx,
  1916. struct ggml_tensor * a) {
  1917. GGML_ASSERT(ggml_is_matrix(a));
  1918. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
  1919. result->op = GGML_OP_ARGMAX;
  1920. result->src[0] = a;
  1921. return result;
  1922. }
  1923. // ggml_count_equal
  1924. struct ggml_tensor * ggml_count_equal(
  1925. struct ggml_context * ctx,
  1926. struct ggml_tensor * a,
  1927. struct ggml_tensor * b) {
  1928. GGML_ASSERT(ggml_are_same_shape(a, b));
  1929. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
  1930. result->op = GGML_OP_COUNT_EQUAL;
  1931. result->src[0] = a;
  1932. result->src[1] = b;
  1933. return result;
  1934. }
  1935. // ggml_repeat
  1936. struct ggml_tensor * ggml_repeat(
  1937. struct ggml_context * ctx,
  1938. struct ggml_tensor * a,
  1939. struct ggml_tensor * b) {
  1940. GGML_ASSERT(ggml_can_repeat(a, b));
  1941. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
  1942. result->op = GGML_OP_REPEAT;
  1943. result->src[0] = a;
  1944. return result;
  1945. }
  1946. // ggml_repeat_back
  1947. struct ggml_tensor * ggml_repeat_back(
  1948. struct ggml_context * ctx,
  1949. struct ggml_tensor * a,
  1950. struct ggml_tensor * b) {
  1951. GGML_ASSERT(ggml_can_repeat(b, a));
  1952. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
  1953. result->op = GGML_OP_REPEAT_BACK;
  1954. result->src[0] = a;
  1955. return result;
  1956. }
  1957. // ggml_concat
  1958. struct ggml_tensor * ggml_concat(
  1959. struct ggml_context * ctx,
  1960. struct ggml_tensor * a,
  1961. struct ggml_tensor * b,
  1962. int dim) {
  1963. GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
  1964. int64_t ne[GGML_MAX_DIMS];
  1965. for (int d = 0; d < GGML_MAX_DIMS; ++d) {
  1966. if (d == dim) {
  1967. ne[d] = a->ne[d] + b->ne[d];
  1968. continue;
  1969. }
  1970. GGML_ASSERT(a->ne[d] == b->ne[d]);
  1971. ne[d] = a->ne[d];
  1972. }
  1973. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
  1974. ggml_set_op_params_i32(result, 0, dim);
  1975. result->op = GGML_OP_CONCAT;
  1976. result->src[0] = a;
  1977. result->src[1] = b;
  1978. return result;
  1979. }
  1980. // ggml_abs
  1981. struct ggml_tensor * ggml_abs(
  1982. struct ggml_context * ctx,
  1983. struct ggml_tensor * a) {
  1984. return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
  1985. }
  1986. struct ggml_tensor * ggml_abs_inplace(
  1987. struct ggml_context * ctx,
  1988. struct ggml_tensor * a) {
  1989. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
  1990. }
  1991. // ggml_sgn
  1992. struct ggml_tensor * ggml_sgn(
  1993. struct ggml_context * ctx,
  1994. struct ggml_tensor * a) {
  1995. return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
  1996. }
  1997. struct ggml_tensor * ggml_sgn_inplace(
  1998. struct ggml_context * ctx,
  1999. struct ggml_tensor * a) {
  2000. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
  2001. }
  2002. // ggml_neg
  2003. struct ggml_tensor * ggml_neg(
  2004. struct ggml_context * ctx,
  2005. struct ggml_tensor * a) {
  2006. return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
  2007. }
  2008. struct ggml_tensor * ggml_neg_inplace(
  2009. struct ggml_context * ctx,
  2010. struct ggml_tensor * a) {
  2011. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
  2012. }
  2013. // ggml_step
  2014. struct ggml_tensor * ggml_step(
  2015. struct ggml_context * ctx,
  2016. struct ggml_tensor * a) {
  2017. return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
  2018. }
  2019. struct ggml_tensor * ggml_step_inplace(
  2020. struct ggml_context * ctx,
  2021. struct ggml_tensor * a) {
  2022. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
  2023. }
  2024. // ggml_tanh
  2025. struct ggml_tensor * ggml_tanh(
  2026. struct ggml_context * ctx,
  2027. struct ggml_tensor * a) {
  2028. return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
  2029. }
  2030. struct ggml_tensor * ggml_tanh_inplace(
  2031. struct ggml_context * ctx,
  2032. struct ggml_tensor * a) {
  2033. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
  2034. }
  2035. // ggml_elu
  2036. struct ggml_tensor * ggml_elu(
  2037. struct ggml_context * ctx,
  2038. struct ggml_tensor * a) {
  2039. return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
  2040. }
  2041. struct ggml_tensor * ggml_elu_inplace(
  2042. struct ggml_context * ctx,
  2043. struct ggml_tensor * a) {
  2044. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
  2045. }
  2046. // ggml_relu
  2047. struct ggml_tensor * ggml_relu(
  2048. struct ggml_context * ctx,
  2049. struct ggml_tensor * a) {
  2050. return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
  2051. }
  2052. struct ggml_tensor * ggml_relu_inplace(
  2053. struct ggml_context * ctx,
  2054. struct ggml_tensor * a) {
  2055. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
  2056. }
  2057. // ggml_leaky_relu
  2058. struct ggml_tensor * ggml_leaky_relu(
  2059. struct ggml_context * ctx,
  2060. struct ggml_tensor * a,
  2061. float negative_slope,
  2062. bool inplace) {
  2063. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2064. ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
  2065. result->op = GGML_OP_LEAKY_RELU;
  2066. result->src[0] = a;
  2067. return result;
  2068. }
  2069. // ggml_sigmoid
  2070. struct ggml_tensor * ggml_sigmoid(
  2071. struct ggml_context * ctx,
  2072. struct ggml_tensor * a) {
  2073. return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
  2074. }
  2075. struct ggml_tensor * ggml_sigmoid_inplace(
  2076. struct ggml_context * ctx,
  2077. struct ggml_tensor * a) {
  2078. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
  2079. }
  2080. // ggml_gelu
  2081. struct ggml_tensor * ggml_gelu(
  2082. struct ggml_context * ctx,
  2083. struct ggml_tensor * a) {
  2084. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
  2085. }
  2086. struct ggml_tensor * ggml_gelu_inplace(
  2087. struct ggml_context * ctx,
  2088. struct ggml_tensor * a) {
  2089. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
  2090. }
  2091. // ggml_gelu_quick
  2092. struct ggml_tensor * ggml_gelu_quick(
  2093. struct ggml_context * ctx,
  2094. struct ggml_tensor * a) {
  2095. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
  2096. }
  2097. struct ggml_tensor * ggml_gelu_quick_inplace(
  2098. struct ggml_context * ctx,
  2099. struct ggml_tensor * a) {
  2100. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
  2101. }
  2102. // ggml_silu
  2103. struct ggml_tensor * ggml_silu(
  2104. struct ggml_context * ctx,
  2105. struct ggml_tensor * a) {
  2106. return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
  2107. }
  2108. struct ggml_tensor * ggml_silu_inplace(
  2109. struct ggml_context * ctx,
  2110. struct ggml_tensor * a) {
  2111. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
  2112. }
  2113. // ggml_silu_back
  2114. struct ggml_tensor * ggml_silu_back(
  2115. struct ggml_context * ctx,
  2116. struct ggml_tensor * a,
  2117. struct ggml_tensor * b) {
  2118. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2119. result->op = GGML_OP_SILU_BACK;
  2120. result->src[0] = a;
  2121. result->src[1] = b;
  2122. return result;
  2123. }
  2124. // ggml hardswish
  2125. struct ggml_tensor * ggml_hardswish(
  2126. struct ggml_context * ctx,
  2127. struct ggml_tensor * a) {
  2128. return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
  2129. }
  2130. // ggml hardsigmoid
  2131. struct ggml_tensor * ggml_hardsigmoid(
  2132. struct ggml_context * ctx,
  2133. struct ggml_tensor * a) {
  2134. return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
  2135. }
  2136. // ggml exp
  2137. struct ggml_tensor * ggml_exp(
  2138. struct ggml_context * ctx,
  2139. struct ggml_tensor * a) {
  2140. return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
  2141. }
  2142. struct ggml_tensor * ggml_exp_inplace(
  2143. struct ggml_context * ctx,
  2144. struct ggml_tensor * a) {
  2145. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
  2146. }
  2147. // ggml_norm
  2148. static struct ggml_tensor * ggml_norm_impl(
  2149. struct ggml_context * ctx,
  2150. struct ggml_tensor * a,
  2151. float eps,
  2152. bool inplace) {
  2153. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2154. ggml_set_op_params(result, &eps, sizeof(eps));
  2155. result->op = GGML_OP_NORM;
  2156. result->src[0] = a;
  2157. return result;
  2158. }
  2159. struct ggml_tensor * ggml_norm(
  2160. struct ggml_context * ctx,
  2161. struct ggml_tensor * a,
  2162. float eps) {
  2163. return ggml_norm_impl(ctx, a, eps, false);
  2164. }
  2165. struct ggml_tensor * ggml_norm_inplace(
  2166. struct ggml_context * ctx,
  2167. struct ggml_tensor * a,
  2168. float eps) {
  2169. return ggml_norm_impl(ctx, a, eps, true);
  2170. }
  2171. // ggml_rms_norm
  2172. static struct ggml_tensor * ggml_rms_norm_impl(
  2173. struct ggml_context * ctx,
  2174. struct ggml_tensor * a,
  2175. float eps,
  2176. bool inplace) {
  2177. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2178. ggml_set_op_params(result, &eps, sizeof(eps));
  2179. result->op = GGML_OP_RMS_NORM;
  2180. result->src[0] = a;
  2181. return result;
  2182. }
  2183. struct ggml_tensor * ggml_rms_norm(
  2184. struct ggml_context * ctx,
  2185. struct ggml_tensor * a,
  2186. float eps) {
  2187. return ggml_rms_norm_impl(ctx, a, eps, false);
  2188. }
  2189. struct ggml_tensor * ggml_rms_norm_inplace(
  2190. struct ggml_context * ctx,
  2191. struct ggml_tensor * a,
  2192. float eps) {
  2193. return ggml_rms_norm_impl(ctx, a, eps, true);
  2194. }
  2195. // ggml_rms_norm_back
  2196. struct ggml_tensor * ggml_rms_norm_back(
  2197. struct ggml_context * ctx,
  2198. struct ggml_tensor * a,
  2199. struct ggml_tensor * b,
  2200. float eps) {
  2201. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2202. ggml_set_op_params(result, &eps, sizeof(eps));
  2203. result->op = GGML_OP_RMS_NORM_BACK;
  2204. result->src[0] = a;
  2205. result->src[1] = b;
  2206. return result;
  2207. }
  2208. // ggml_group_norm
  2209. static struct ggml_tensor * ggml_group_norm_impl(
  2210. struct ggml_context * ctx,
  2211. struct ggml_tensor * a,
  2212. int n_groups,
  2213. float eps,
  2214. bool inplace) {
  2215. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2216. ggml_set_op_params_i32(result, 0, n_groups);
  2217. ggml_set_op_params_f32(result, 1, eps);
  2218. result->op = GGML_OP_GROUP_NORM;
  2219. result->src[0] = a;
  2220. return result;
  2221. }
  2222. struct ggml_tensor * ggml_group_norm(
  2223. struct ggml_context * ctx,
  2224. struct ggml_tensor * a,
  2225. int n_groups,
  2226. float eps) {
  2227. return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
  2228. }
  2229. struct ggml_tensor * ggml_group_norm_inplace(
  2230. struct ggml_context * ctx,
  2231. struct ggml_tensor * a,
  2232. int n_groups,
  2233. float eps) {
  2234. return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
  2235. }
  2236. // ggml_mul_mat
  2237. static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  2238. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  2239. return (t0->ne[0] == t1->ne[0]) &&
  2240. (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
  2241. (t1->ne[3]%t0->ne[3] == 0);
  2242. }
  2243. struct ggml_tensor * ggml_mul_mat(
  2244. struct ggml_context * ctx,
  2245. struct ggml_tensor * a,
  2246. struct ggml_tensor * b) {
  2247. GGML_ASSERT(ggml_can_mul_mat(a, b));
  2248. GGML_ASSERT(!ggml_is_transposed(a));
  2249. const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
  2250. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2251. result->op = GGML_OP_MUL_MAT;
  2252. result->src[0] = a;
  2253. result->src[1] = b;
  2254. return result;
  2255. }
  2256. void ggml_mul_mat_set_prec(
  2257. struct ggml_tensor * a,
  2258. enum ggml_prec prec) {
  2259. GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
  2260. const int32_t prec_i32 = (int32_t) prec;
  2261. ggml_set_op_params_i32(a, 0, prec_i32);
  2262. }
  2263. // ggml_mul_mat_id
  2264. /*
  2265. c = ggml_mul_mat_id(ctx, as, b, ids);
  2266. as -> [cols, rows, n_expert]
  2267. ids -> [n_experts_used, n_tokens] (i32)
  2268. b -> [cols, n_expert_used, n_tokens]
  2269. c -> [rows, n_expert_used, n_tokens]
  2270. in b, n_experts_used can be broadcasted to match the n_expert_used of ids
  2271. c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
  2272. */
  2273. struct ggml_tensor * ggml_mul_mat_id(
  2274. struct ggml_context * ctx,
  2275. struct ggml_tensor * as,
  2276. struct ggml_tensor * b,
  2277. struct ggml_tensor * ids) {
  2278. GGML_ASSERT(!ggml_is_transposed(as));
  2279. GGML_ASSERT(ids->type == GGML_TYPE_I32);
  2280. GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
  2281. GGML_ASSERT(b->ne[3] == 1); // b is 3d
  2282. GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
  2283. GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
  2284. GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
  2285. GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
  2286. const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
  2287. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2288. result->op = GGML_OP_MUL_MAT_ID;
  2289. result->src[0] = as;
  2290. result->src[1] = b;
  2291. result->src[2] = ids;
  2292. return result;
  2293. }
  2294. // ggml_out_prod
  2295. static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  2296. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  2297. return (t0->ne[1] == t1->ne[1]) &&
  2298. (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
  2299. (t1->ne[3]%t0->ne[3] == 0);
  2300. }
  2301. struct ggml_tensor * ggml_out_prod(
  2302. struct ggml_context * ctx,
  2303. struct ggml_tensor * a,
  2304. struct ggml_tensor * b) {
  2305. GGML_ASSERT(ggml_can_out_prod(a, b));
  2306. GGML_ASSERT(!ggml_is_transposed(a));
  2307. // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
  2308. const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
  2309. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2310. result->op = GGML_OP_OUT_PROD;
  2311. result->src[0] = a;
  2312. result->src[1] = b;
  2313. return result;
  2314. }
  2315. // ggml_scale
  2316. static struct ggml_tensor * ggml_scale_impl(
  2317. struct ggml_context * ctx,
  2318. struct ggml_tensor * a,
  2319. float s,
  2320. bool inplace) {
  2321. GGML_ASSERT(ggml_is_padded_1d(a));
  2322. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2323. ggml_set_op_params(result, &s, sizeof(s));
  2324. result->op = GGML_OP_SCALE;
  2325. result->src[0] = a;
  2326. return result;
  2327. }
  2328. struct ggml_tensor * ggml_scale(
  2329. struct ggml_context * ctx,
  2330. struct ggml_tensor * a,
  2331. float s) {
  2332. return ggml_scale_impl(ctx, a, s, false);
  2333. }
  2334. struct ggml_tensor * ggml_scale_inplace(
  2335. struct ggml_context * ctx,
  2336. struct ggml_tensor * a,
  2337. float s) {
  2338. return ggml_scale_impl(ctx, a, s, true);
  2339. }
  2340. // ggml_set
  2341. static struct ggml_tensor * ggml_set_impl(
  2342. struct ggml_context * ctx,
  2343. struct ggml_tensor * a,
  2344. struct ggml_tensor * b,
  2345. size_t nb1,
  2346. size_t nb2,
  2347. size_t nb3,
  2348. size_t offset,
  2349. bool inplace) {
  2350. GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
  2351. // make a view of the destination
  2352. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2353. GGML_ASSERT(offset < (size_t)(1 << 30));
  2354. int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
  2355. ggml_set_op_params(result, params, sizeof(params));
  2356. result->op = GGML_OP_SET;
  2357. result->src[0] = a;
  2358. result->src[1] = b;
  2359. return result;
  2360. }
  2361. struct ggml_tensor * ggml_set(
  2362. struct ggml_context * ctx,
  2363. struct ggml_tensor * a,
  2364. struct ggml_tensor * b,
  2365. size_t nb1,
  2366. size_t nb2,
  2367. size_t nb3,
  2368. size_t offset) {
  2369. return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
  2370. }
  2371. struct ggml_tensor * ggml_set_inplace(
  2372. struct ggml_context * ctx,
  2373. struct ggml_tensor * a,
  2374. struct ggml_tensor * b,
  2375. size_t nb1,
  2376. size_t nb2,
  2377. size_t nb3,
  2378. size_t offset) {
  2379. return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
  2380. }
  2381. struct ggml_tensor * ggml_set_1d(
  2382. struct ggml_context * ctx,
  2383. struct ggml_tensor * a,
  2384. struct ggml_tensor * b,
  2385. size_t offset) {
  2386. return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
  2387. }
  2388. struct ggml_tensor * ggml_set_1d_inplace(
  2389. struct ggml_context * ctx,
  2390. struct ggml_tensor * a,
  2391. struct ggml_tensor * b,
  2392. size_t offset) {
  2393. return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
  2394. }
  2395. struct ggml_tensor * ggml_set_2d(
  2396. struct ggml_context * ctx,
  2397. struct ggml_tensor * a,
  2398. struct ggml_tensor * b,
  2399. size_t nb1,
  2400. size_t offset) {
  2401. return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
  2402. }
  2403. struct ggml_tensor * ggml_set_2d_inplace(
  2404. struct ggml_context * ctx,
  2405. struct ggml_tensor * a,
  2406. struct ggml_tensor * b,
  2407. size_t nb1,
  2408. size_t offset) {
  2409. return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
  2410. }
  2411. // ggml_cpy
  2412. static struct ggml_tensor * ggml_cpy_impl(
  2413. struct ggml_context * ctx,
  2414. struct ggml_tensor * a,
  2415. struct ggml_tensor * b) {
  2416. GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
  2417. // make a view of the destination
  2418. struct ggml_tensor * result = ggml_view_tensor(ctx, b);
  2419. if (strlen(b->name) > 0) {
  2420. ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
  2421. } else {
  2422. ggml_format_name(result, "%s (copy)", a->name);
  2423. }
  2424. result->op = GGML_OP_CPY;
  2425. result->src[0] = a;
  2426. result->src[1] = b;
  2427. return result;
  2428. }
  2429. struct ggml_tensor * ggml_cpy(
  2430. struct ggml_context * ctx,
  2431. struct ggml_tensor * a,
  2432. struct ggml_tensor * b) {
  2433. return ggml_cpy_impl(ctx, a, b);
  2434. }
  2435. struct ggml_tensor * ggml_cast(
  2436. struct ggml_context * ctx,
  2437. struct ggml_tensor * a,
  2438. enum ggml_type type) {
  2439. struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
  2440. ggml_format_name(result, "%s (copy)", a->name);
  2441. result->op = GGML_OP_CPY;
  2442. result->src[0] = a;
  2443. result->src[1] = result;
  2444. return result;
  2445. }
  2446. // ggml_cont
  2447. static struct ggml_tensor * ggml_cont_impl(
  2448. struct ggml_context * ctx,
  2449. struct ggml_tensor * a) {
  2450. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2451. ggml_format_name(result, "%s (cont)", a->name);
  2452. result->op = GGML_OP_CONT;
  2453. result->src[0] = a;
  2454. return result;
  2455. }
  2456. struct ggml_tensor * ggml_cont(
  2457. struct ggml_context * ctx,
  2458. struct ggml_tensor * a) {
  2459. return ggml_cont_impl(ctx, a);
  2460. }
  2461. // make contiguous, with new shape
  2462. GGML_API struct ggml_tensor * ggml_cont_1d(
  2463. struct ggml_context * ctx,
  2464. struct ggml_tensor * a,
  2465. int64_t ne0) {
  2466. return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
  2467. }
  2468. GGML_API struct ggml_tensor * ggml_cont_2d(
  2469. struct ggml_context * ctx,
  2470. struct ggml_tensor * a,
  2471. int64_t ne0,
  2472. int64_t ne1) {
  2473. return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
  2474. }
  2475. GGML_API struct ggml_tensor * ggml_cont_3d(
  2476. struct ggml_context * ctx,
  2477. struct ggml_tensor * a,
  2478. int64_t ne0,
  2479. int64_t ne1,
  2480. int64_t ne2) {
  2481. return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
  2482. }
  2483. struct ggml_tensor * ggml_cont_4d(
  2484. struct ggml_context * ctx,
  2485. struct ggml_tensor * a,
  2486. int64_t ne0,
  2487. int64_t ne1,
  2488. int64_t ne2,
  2489. int64_t ne3) {
  2490. GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
  2491. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  2492. ggml_format_name(result, "%s (cont)", a->name);
  2493. result->op = GGML_OP_CONT;
  2494. result->src[0] = a;
  2495. return result;
  2496. }
  2497. // ggml_reshape
  2498. struct ggml_tensor * ggml_reshape(
  2499. struct ggml_context * ctx,
  2500. struct ggml_tensor * a,
  2501. struct ggml_tensor * b) {
  2502. GGML_ASSERT(ggml_is_contiguous(a));
  2503. // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
  2504. GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
  2505. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
  2506. ggml_format_name(result, "%s (reshaped)", a->name);
  2507. result->op = GGML_OP_RESHAPE;
  2508. result->src[0] = a;
  2509. return result;
  2510. }
  2511. struct ggml_tensor * ggml_reshape_1d(
  2512. struct ggml_context * ctx,
  2513. struct ggml_tensor * a,
  2514. int64_t ne0) {
  2515. GGML_ASSERT(ggml_is_contiguous(a));
  2516. GGML_ASSERT(ggml_nelements(a) == ne0);
  2517. const int64_t ne[1] = { ne0 };
  2518. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
  2519. ggml_format_name(result, "%s (reshaped)", a->name);
  2520. result->op = GGML_OP_RESHAPE;
  2521. result->src[0] = a;
  2522. return result;
  2523. }
  2524. struct ggml_tensor * ggml_reshape_2d(
  2525. struct ggml_context * ctx,
  2526. struct ggml_tensor * a,
  2527. int64_t ne0,
  2528. int64_t ne1) {
  2529. GGML_ASSERT(ggml_is_contiguous(a));
  2530. GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
  2531. const int64_t ne[2] = { ne0, ne1 };
  2532. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
  2533. ggml_format_name(result, "%s (reshaped)", a->name);
  2534. result->op = GGML_OP_RESHAPE;
  2535. result->src[0] = a;
  2536. return result;
  2537. }
  2538. struct ggml_tensor * ggml_reshape_3d(
  2539. struct ggml_context * ctx,
  2540. struct ggml_tensor * a,
  2541. int64_t ne0,
  2542. int64_t ne1,
  2543. int64_t ne2) {
  2544. GGML_ASSERT(ggml_is_contiguous(a));
  2545. GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
  2546. const int64_t ne[3] = { ne0, ne1, ne2 };
  2547. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
  2548. ggml_format_name(result, "%s (reshaped)", a->name);
  2549. result->op = GGML_OP_RESHAPE;
  2550. result->src[0] = a;
  2551. return result;
  2552. }
  2553. struct ggml_tensor * ggml_reshape_4d(
  2554. struct ggml_context * ctx,
  2555. struct ggml_tensor * a,
  2556. int64_t ne0,
  2557. int64_t ne1,
  2558. int64_t ne2,
  2559. int64_t ne3) {
  2560. GGML_ASSERT(ggml_is_contiguous(a));
  2561. GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
  2562. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  2563. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
  2564. ggml_format_name(result, "%s (reshaped)", a->name);
  2565. result->op = GGML_OP_RESHAPE;
  2566. result->src[0] = a;
  2567. return result;
  2568. }
  2569. static struct ggml_tensor * ggml_view_impl(
  2570. struct ggml_context * ctx,
  2571. struct ggml_tensor * a,
  2572. int n_dims,
  2573. const int64_t * ne,
  2574. size_t offset) {
  2575. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
  2576. ggml_format_name(result, "%s (view)", a->name);
  2577. ggml_set_op_params(result, &offset, sizeof(offset));
  2578. result->op = GGML_OP_VIEW;
  2579. result->src[0] = a;
  2580. return result;
  2581. }
  2582. // ggml_view_1d
  2583. struct ggml_tensor * ggml_view_1d(
  2584. struct ggml_context * ctx,
  2585. struct ggml_tensor * a,
  2586. int64_t ne0,
  2587. size_t offset) {
  2588. struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
  2589. return result;
  2590. }
  2591. // ggml_view_2d
  2592. struct ggml_tensor * ggml_view_2d(
  2593. struct ggml_context * ctx,
  2594. struct ggml_tensor * a,
  2595. int64_t ne0,
  2596. int64_t ne1,
  2597. size_t nb1,
  2598. size_t offset) {
  2599. const int64_t ne[2] = { ne0, ne1 };
  2600. struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
  2601. result->nb[1] = nb1;
  2602. result->nb[2] = result->nb[1]*ne1;
  2603. result->nb[3] = result->nb[2];
  2604. return result;
  2605. }
  2606. // ggml_view_3d
  2607. struct ggml_tensor * ggml_view_3d(
  2608. struct ggml_context * ctx,
  2609. struct ggml_tensor * a,
  2610. int64_t ne0,
  2611. int64_t ne1,
  2612. int64_t ne2,
  2613. size_t nb1,
  2614. size_t nb2,
  2615. size_t offset) {
  2616. const int64_t ne[3] = { ne0, ne1, ne2 };
  2617. struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
  2618. result->nb[1] = nb1;
  2619. result->nb[2] = nb2;
  2620. result->nb[3] = result->nb[2]*ne2;
  2621. return result;
  2622. }
  2623. // ggml_view_4d
  2624. struct ggml_tensor * ggml_view_4d(
  2625. struct ggml_context * ctx,
  2626. struct ggml_tensor * a,
  2627. int64_t ne0,
  2628. int64_t ne1,
  2629. int64_t ne2,
  2630. int64_t ne3,
  2631. size_t nb1,
  2632. size_t nb2,
  2633. size_t nb3,
  2634. size_t offset) {
  2635. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  2636. struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
  2637. result->nb[1] = nb1;
  2638. result->nb[2] = nb2;
  2639. result->nb[3] = nb3;
  2640. return result;
  2641. }
  2642. // ggml_permute
  2643. struct ggml_tensor * ggml_permute(
  2644. struct ggml_context * ctx,
  2645. struct ggml_tensor * a,
  2646. int axis0,
  2647. int axis1,
  2648. int axis2,
  2649. int axis3) {
  2650. GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
  2651. GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
  2652. GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
  2653. GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
  2654. GGML_ASSERT(axis0 != axis1);
  2655. GGML_ASSERT(axis0 != axis2);
  2656. GGML_ASSERT(axis0 != axis3);
  2657. GGML_ASSERT(axis1 != axis2);
  2658. GGML_ASSERT(axis1 != axis3);
  2659. GGML_ASSERT(axis2 != axis3);
  2660. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  2661. ggml_format_name(result, "%s (permuted)", a->name);
  2662. int ne[GGML_MAX_DIMS];
  2663. int nb[GGML_MAX_DIMS];
  2664. ne[axis0] = a->ne[0];
  2665. ne[axis1] = a->ne[1];
  2666. ne[axis2] = a->ne[2];
  2667. ne[axis3] = a->ne[3];
  2668. nb[axis0] = a->nb[0];
  2669. nb[axis1] = a->nb[1];
  2670. nb[axis2] = a->nb[2];
  2671. nb[axis3] = a->nb[3];
  2672. result->ne[0] = ne[0];
  2673. result->ne[1] = ne[1];
  2674. result->ne[2] = ne[2];
  2675. result->ne[3] = ne[3];
  2676. result->nb[0] = nb[0];
  2677. result->nb[1] = nb[1];
  2678. result->nb[2] = nb[2];
  2679. result->nb[3] = nb[3];
  2680. result->op = GGML_OP_PERMUTE;
  2681. result->src[0] = a;
  2682. int32_t params[] = { axis0, axis1, axis2, axis3 };
  2683. ggml_set_op_params(result, params, sizeof(params));
  2684. return result;
  2685. }
  2686. // ggml_transpose
  2687. struct ggml_tensor * ggml_transpose(
  2688. struct ggml_context * ctx,
  2689. struct ggml_tensor * a) {
  2690. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  2691. ggml_format_name(result, "%s (transposed)", a->name);
  2692. result->ne[0] = a->ne[1];
  2693. result->ne[1] = a->ne[0];
  2694. result->nb[0] = a->nb[1];
  2695. result->nb[1] = a->nb[0];
  2696. result->op = GGML_OP_TRANSPOSE;
  2697. result->src[0] = a;
  2698. return result;
  2699. }
  2700. // ggml_get_rows
  2701. struct ggml_tensor * ggml_get_rows(
  2702. struct ggml_context * ctx,
  2703. struct ggml_tensor * a,
  2704. struct ggml_tensor * b) {
  2705. GGML_ASSERT(a->ne[2] == b->ne[1]);
  2706. GGML_ASSERT(b->ne[3] == 1);
  2707. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2708. // TODO: implement non F32 return
  2709. enum ggml_type type = GGML_TYPE_F32;
  2710. if (a->type == GGML_TYPE_I32) {
  2711. type = a->type;
  2712. }
  2713. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
  2714. result->op = GGML_OP_GET_ROWS;
  2715. result->src[0] = a;
  2716. result->src[1] = b;
  2717. return result;
  2718. }
  2719. // ggml_get_rows_back
  2720. struct ggml_tensor * ggml_get_rows_back(
  2721. struct ggml_context * ctx,
  2722. struct ggml_tensor * a,
  2723. struct ggml_tensor * b,
  2724. struct ggml_tensor * c) {
  2725. GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
  2726. GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
  2727. // TODO: implement non F32 return
  2728. //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
  2729. struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
  2730. result->op = GGML_OP_GET_ROWS_BACK;
  2731. result->src[0] = a;
  2732. result->src[1] = b;
  2733. return result;
  2734. }
  2735. // ggml_diag
  2736. struct ggml_tensor * ggml_diag(
  2737. struct ggml_context * ctx,
  2738. struct ggml_tensor * a) {
  2739. GGML_ASSERT(a->ne[1] == 1);
  2740. const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
  2741. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
  2742. result->op = GGML_OP_DIAG;
  2743. result->src[0] = a;
  2744. return result;
  2745. }
  2746. // ggml_diag_mask_inf
  2747. static struct ggml_tensor * ggml_diag_mask_inf_impl(
  2748. struct ggml_context * ctx,
  2749. struct ggml_tensor * a,
  2750. int n_past,
  2751. bool inplace) {
  2752. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2753. int32_t params[] = { n_past };
  2754. ggml_set_op_params(result, params, sizeof(params));
  2755. result->op = GGML_OP_DIAG_MASK_INF;
  2756. result->src[0] = a;
  2757. return result;
  2758. }
  2759. struct ggml_tensor * ggml_diag_mask_inf(
  2760. struct ggml_context * ctx,
  2761. struct ggml_tensor * a,
  2762. int n_past) {
  2763. return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
  2764. }
  2765. struct ggml_tensor * ggml_diag_mask_inf_inplace(
  2766. struct ggml_context * ctx,
  2767. struct ggml_tensor * a,
  2768. int n_past) {
  2769. return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
  2770. }
  2771. // ggml_diag_mask_zero
  2772. static struct ggml_tensor * ggml_diag_mask_zero_impl(
  2773. struct ggml_context * ctx,
  2774. struct ggml_tensor * a,
  2775. int n_past,
  2776. bool inplace) {
  2777. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2778. int32_t params[] = { n_past };
  2779. ggml_set_op_params(result, params, sizeof(params));
  2780. result->op = GGML_OP_DIAG_MASK_ZERO;
  2781. result->src[0] = a;
  2782. return result;
  2783. }
  2784. struct ggml_tensor * ggml_diag_mask_zero(
  2785. struct ggml_context * ctx,
  2786. struct ggml_tensor * a,
  2787. int n_past) {
  2788. return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
  2789. }
  2790. struct ggml_tensor * ggml_diag_mask_zero_inplace(
  2791. struct ggml_context * ctx,
  2792. struct ggml_tensor * a,
  2793. int n_past) {
  2794. return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
  2795. }
  2796. // ggml_soft_max
  2797. static struct ggml_tensor * ggml_soft_max_impl(
  2798. struct ggml_context * ctx,
  2799. struct ggml_tensor * a,
  2800. struct ggml_tensor * mask,
  2801. float scale,
  2802. float max_bias,
  2803. bool inplace) {
  2804. GGML_ASSERT(ggml_is_contiguous(a));
  2805. if (mask) {
  2806. GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
  2807. GGML_ASSERT(ggml_is_contiguous(mask));
  2808. GGML_ASSERT(ggml_is_matrix(mask));
  2809. GGML_ASSERT(mask->ne[0] == a->ne[0]);
  2810. GGML_ASSERT(mask->ne[1] >= a->ne[1]);
  2811. }
  2812. if (max_bias > 0.0f) {
  2813. GGML_ASSERT(mask);
  2814. }
  2815. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2816. float params[] = { scale, max_bias };
  2817. ggml_set_op_params(result, params, sizeof(params));
  2818. result->op = GGML_OP_SOFT_MAX;
  2819. result->src[0] = a;
  2820. result->src[1] = mask;
  2821. return result;
  2822. }
  2823. struct ggml_tensor * ggml_soft_max(
  2824. struct ggml_context * ctx,
  2825. struct ggml_tensor * a) {
  2826. return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
  2827. }
  2828. struct ggml_tensor * ggml_soft_max_inplace(
  2829. struct ggml_context * ctx,
  2830. struct ggml_tensor * a) {
  2831. return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
  2832. }
  2833. struct ggml_tensor * ggml_soft_max_ext(
  2834. struct ggml_context * ctx,
  2835. struct ggml_tensor * a,
  2836. struct ggml_tensor * mask,
  2837. float scale,
  2838. float max_bias) {
  2839. return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
  2840. }
  2841. // ggml_soft_max_back
  2842. static struct ggml_tensor * ggml_soft_max_back_impl(
  2843. struct ggml_context * ctx,
  2844. struct ggml_tensor * a,
  2845. struct ggml_tensor * b,
  2846. bool inplace) {
  2847. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2848. result->op = GGML_OP_SOFT_MAX_BACK;
  2849. result->src[0] = a;
  2850. result->src[1] = b;
  2851. return result;
  2852. }
  2853. struct ggml_tensor * ggml_soft_max_back(
  2854. struct ggml_context * ctx,
  2855. struct ggml_tensor * a,
  2856. struct ggml_tensor * b) {
  2857. return ggml_soft_max_back_impl(ctx, a, b, false);
  2858. }
  2859. struct ggml_tensor * ggml_soft_max_back_inplace(
  2860. struct ggml_context * ctx,
  2861. struct ggml_tensor * a,
  2862. struct ggml_tensor * b) {
  2863. return ggml_soft_max_back_impl(ctx, a, b, true);
  2864. }
  2865. // ggml_rope
  2866. static struct ggml_tensor * ggml_rope_impl(
  2867. struct ggml_context * ctx,
  2868. struct ggml_tensor * a,
  2869. struct ggml_tensor * b,
  2870. struct ggml_tensor * c,
  2871. int n_dims,
  2872. int mode,
  2873. int n_ctx_orig,
  2874. float freq_base,
  2875. float freq_scale,
  2876. float ext_factor,
  2877. float attn_factor,
  2878. float beta_fast,
  2879. float beta_slow,
  2880. bool inplace) {
  2881. GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
  2882. GGML_ASSERT(ggml_is_vector(b));
  2883. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2884. GGML_ASSERT(a->ne[2] == b->ne[0]);
  2885. if (c) {
  2886. GGML_ASSERT(c->type == GGML_TYPE_F32);
  2887. GGML_ASSERT(c->ne[0] >= n_dims / 2);
  2888. }
  2889. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2890. int32_t params[11] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
  2891. memcpy(params + 5, &freq_base, sizeof(float));
  2892. memcpy(params + 6, &freq_scale, sizeof(float));
  2893. memcpy(params + 7, &ext_factor, sizeof(float));
  2894. memcpy(params + 8, &attn_factor, sizeof(float));
  2895. memcpy(params + 9, &beta_fast, sizeof(float));
  2896. memcpy(params + 10, &beta_slow, sizeof(float));
  2897. ggml_set_op_params(result, params, sizeof(params));
  2898. result->op = GGML_OP_ROPE;
  2899. result->src[0] = a;
  2900. result->src[1] = b;
  2901. result->src[2] = c;
  2902. return result;
  2903. }
  2904. struct ggml_tensor * ggml_rope(
  2905. struct ggml_context * ctx,
  2906. struct ggml_tensor * a,
  2907. struct ggml_tensor * b,
  2908. int n_dims,
  2909. int mode) {
  2910. return ggml_rope_impl(
  2911. ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
  2912. );
  2913. }
  2914. struct ggml_tensor * ggml_rope_inplace(
  2915. struct ggml_context * ctx,
  2916. struct ggml_tensor * a,
  2917. struct ggml_tensor * b,
  2918. int n_dims,
  2919. int mode) {
  2920. return ggml_rope_impl(
  2921. ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
  2922. );
  2923. }
  2924. struct ggml_tensor * ggml_rope_ext(
  2925. struct ggml_context * ctx,
  2926. struct ggml_tensor * a,
  2927. struct ggml_tensor * b,
  2928. struct ggml_tensor * c,
  2929. int n_dims,
  2930. int mode,
  2931. int n_ctx_orig,
  2932. float freq_base,
  2933. float freq_scale,
  2934. float ext_factor,
  2935. float attn_factor,
  2936. float beta_fast,
  2937. float beta_slow) {
  2938. return ggml_rope_impl(
  2939. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  2940. ext_factor, attn_factor, beta_fast, beta_slow, false
  2941. );
  2942. }
  2943. struct ggml_tensor * ggml_rope_ext_inplace(
  2944. struct ggml_context * ctx,
  2945. struct ggml_tensor * a,
  2946. struct ggml_tensor * b,
  2947. struct ggml_tensor * c,
  2948. int n_dims,
  2949. int mode,
  2950. int n_ctx_orig,
  2951. float freq_base,
  2952. float freq_scale,
  2953. float ext_factor,
  2954. float attn_factor,
  2955. float beta_fast,
  2956. float beta_slow) {
  2957. return ggml_rope_impl(
  2958. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  2959. ext_factor, attn_factor, beta_fast, beta_slow, true
  2960. );
  2961. }
  2962. struct ggml_tensor * ggml_rope_custom(
  2963. struct ggml_context * ctx,
  2964. struct ggml_tensor * a,
  2965. struct ggml_tensor * b,
  2966. int n_dims,
  2967. int mode,
  2968. int n_ctx_orig,
  2969. float freq_base,
  2970. float freq_scale,
  2971. float ext_factor,
  2972. float attn_factor,
  2973. float beta_fast,
  2974. float beta_slow) {
  2975. return ggml_rope_impl(
  2976. ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  2977. ext_factor, attn_factor, beta_fast, beta_slow, false
  2978. );
  2979. }
  2980. struct ggml_tensor * ggml_rope_custom_inplace(
  2981. struct ggml_context * ctx,
  2982. struct ggml_tensor * a,
  2983. struct ggml_tensor * b,
  2984. int n_dims,
  2985. int mode,
  2986. int n_ctx_orig,
  2987. float freq_base,
  2988. float freq_scale,
  2989. float ext_factor,
  2990. float attn_factor,
  2991. float beta_fast,
  2992. float beta_slow) {
  2993. return ggml_rope_impl(
  2994. ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  2995. ext_factor, attn_factor, beta_fast, beta_slow, true
  2996. );
  2997. }
  2998. // ggml_rope_back
  2999. struct ggml_tensor * ggml_rope_back(
  3000. struct ggml_context * ctx,
  3001. struct ggml_tensor * a,
  3002. struct ggml_tensor * b,
  3003. struct ggml_tensor * c,
  3004. int n_dims,
  3005. int mode,
  3006. int n_ctx_orig,
  3007. float freq_base,
  3008. float freq_scale,
  3009. float ext_factor,
  3010. float attn_factor,
  3011. float beta_fast,
  3012. float beta_slow) {
  3013. GGML_ASSERT(ggml_is_vector(b));
  3014. GGML_ASSERT(b->type == GGML_TYPE_I32);
  3015. GGML_ASSERT(a->ne[2] == b->ne[0]);
  3016. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  3017. int32_t params[11] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
  3018. memcpy(params + 5, &freq_base, sizeof(float));
  3019. memcpy(params + 6, &freq_scale, sizeof(float));
  3020. memcpy(params + 7, &ext_factor, sizeof(float));
  3021. memcpy(params + 8, &attn_factor, sizeof(float));
  3022. memcpy(params + 9, &beta_fast, sizeof(float));
  3023. memcpy(params + 10, &beta_slow, sizeof(float));
  3024. ggml_set_op_params(result, params, sizeof(params));
  3025. result->op = GGML_OP_ROPE_BACK;
  3026. result->src[0] = a;
  3027. result->src[1] = b;
  3028. result->src[2] = c;
  3029. return result;
  3030. }
  3031. // ggml_clamp
  3032. struct ggml_tensor * ggml_clamp(
  3033. struct ggml_context * ctx,
  3034. struct ggml_tensor * a,
  3035. float min,
  3036. float max) {
  3037. // TODO: when implement backward, fix this:
  3038. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  3039. float params[] = { min, max };
  3040. ggml_set_op_params(result, params, sizeof(params));
  3041. result->op = GGML_OP_CLAMP;
  3042. result->src[0] = a;
  3043. return result;
  3044. }
  3045. // ggml_conv_1d
  3046. static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
  3047. return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
  3048. }
  3049. GGML_API struct ggml_tensor * ggml_conv_1d(
  3050. struct ggml_context * ctx,
  3051. struct ggml_tensor * a,
  3052. struct ggml_tensor * b,
  3053. int s0,
  3054. int p0,
  3055. int d0) {
  3056. struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
  3057. struct ggml_tensor * result =
  3058. ggml_mul_mat(ctx,
  3059. ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
  3060. ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2])); // [OC,IC, K] => [OC, IC * K]
  3061. result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
  3062. return result;
  3063. }
  3064. // ggml_conv_1d_ph
  3065. struct ggml_tensor* ggml_conv_1d_ph(
  3066. struct ggml_context * ctx,
  3067. struct ggml_tensor * a,
  3068. struct ggml_tensor * b,
  3069. int s,
  3070. int d) {
  3071. return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
  3072. }
  3073. // ggml_conv_transpose_1d
  3074. static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
  3075. return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
  3076. }
  3077. GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
  3078. struct ggml_context * ctx,
  3079. struct ggml_tensor * a,
  3080. struct ggml_tensor * b,
  3081. int s0,
  3082. int p0,
  3083. int d0) {
  3084. GGML_ASSERT(ggml_is_matrix(b));
  3085. GGML_ASSERT(a->ne[2] == b->ne[1]);
  3086. GGML_ASSERT(a->ne[3] == 1);
  3087. GGML_ASSERT(p0 == 0);
  3088. GGML_ASSERT(d0 == 1);
  3089. const int64_t ne[4] = {
  3090. ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
  3091. a->ne[1], b->ne[2], 1,
  3092. };
  3093. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3094. int32_t params[] = { s0, p0, d0 };
  3095. ggml_set_op_params(result, params, sizeof(params));
  3096. result->op = GGML_OP_CONV_TRANSPOSE_1D;
  3097. result->src[0] = a;
  3098. result->src[1] = b;
  3099. return result;
  3100. }
  3101. // ggml_conv_depthwise
  3102. struct ggml_tensor * ggml_conv_depthwise_2d(
  3103. struct ggml_context * ctx,
  3104. struct ggml_tensor * a,
  3105. struct ggml_tensor * b,
  3106. int s0,
  3107. int s1,
  3108. int p0,
  3109. int p1,
  3110. int d0,
  3111. int d1) {
  3112. struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
  3113. struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
  3114. ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
  3115. s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
  3116. struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
  3117. new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
  3118. struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
  3119. result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
  3120. return result;
  3121. }
  3122. // ggml_conv_2d
  3123. // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
  3124. // a: [OC,IC, KH, KW]
  3125. // b: [N, IC, IH, IW]
  3126. // result: [N, OH, OW, IC*KH*KW]
  3127. struct ggml_tensor * ggml_im2col(
  3128. struct ggml_context * ctx,
  3129. struct ggml_tensor * a,
  3130. struct ggml_tensor * b,
  3131. int s0,
  3132. int s1,
  3133. int p0,
  3134. int p1,
  3135. int d0,
  3136. int d1,
  3137. bool is_2D,
  3138. enum ggml_type dst_type) {
  3139. if(is_2D) {
  3140. GGML_ASSERT(a->ne[2] == b->ne[2]);
  3141. } else {
  3142. GGML_ASSERT(a->ne[1] == b->ne[1]);
  3143. GGML_ASSERT(b->ne[3] == 1);
  3144. }
  3145. const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
  3146. const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
  3147. GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
  3148. GGML_ASSERT((OW > 0) && "b too small compared to a");
  3149. const int64_t ne[4] = {
  3150. is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
  3151. OW,
  3152. is_2D ? OH : b->ne[2],
  3153. is_2D ? b->ne[3] : 1,
  3154. };
  3155. struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
  3156. int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
  3157. ggml_set_op_params(result, params, sizeof(params));
  3158. result->op = GGML_OP_IM2COL;
  3159. result->src[0] = a;
  3160. result->src[1] = b;
  3161. return result;
  3162. }
  3163. struct ggml_tensor * ggml_im2col_back(
  3164. struct ggml_context * ctx,
  3165. struct ggml_tensor * a,
  3166. struct ggml_tensor * b,
  3167. int64_t * ne,
  3168. int s0,
  3169. int s1,
  3170. int p0,
  3171. int p1,
  3172. int d0,
  3173. int d1,
  3174. bool is_2D) {
  3175. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3176. int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
  3177. ggml_set_op_params(result, params, sizeof(params));
  3178. result->op = GGML_OP_IM2COL_BACK;
  3179. result->src[0] = a;
  3180. result->src[1] = b;
  3181. return result;
  3182. }
  3183. // a: [OC,IC, KH, KW]
  3184. // b: [N, IC, IH, IW]
  3185. // result: [N, OC, OH, OW]
  3186. struct ggml_tensor * ggml_conv_2d(
  3187. struct ggml_context * ctx,
  3188. struct ggml_tensor * a,
  3189. struct ggml_tensor * b,
  3190. int s0,
  3191. int s1,
  3192. int p0,
  3193. int p1,
  3194. int d0,
  3195. int d1) {
  3196. struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
  3197. struct ggml_tensor * result =
  3198. ggml_mul_mat(ctx,
  3199. ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
  3200. ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
  3201. result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
  3202. result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
  3203. return result;
  3204. }
  3205. // ggml_conv_2d_sk_p0
  3206. struct ggml_tensor * ggml_conv_2d_sk_p0(
  3207. struct ggml_context * ctx,
  3208. struct ggml_tensor * a,
  3209. struct ggml_tensor * b) {
  3210. return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
  3211. }
  3212. // ggml_conv_2d_s1_ph
  3213. struct ggml_tensor * ggml_conv_2d_s1_ph(
  3214. struct ggml_context * ctx,
  3215. struct ggml_tensor * a,
  3216. struct ggml_tensor * b) {
  3217. return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
  3218. }
  3219. // ggml_conv_transpose_2d_p0
  3220. static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
  3221. return (ins - 1) * s - 2 * p + ks;
  3222. }
  3223. struct ggml_tensor * ggml_conv_transpose_2d_p0(
  3224. struct ggml_context * ctx,
  3225. struct ggml_tensor * a,
  3226. struct ggml_tensor * b,
  3227. int stride) {
  3228. GGML_ASSERT(a->ne[3] == b->ne[2]);
  3229. const int64_t ne[4] = {
  3230. ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
  3231. ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
  3232. a->ne[2], b->ne[3],
  3233. };
  3234. struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3235. ggml_set_op_params_i32(result, 0, stride);
  3236. result->op = GGML_OP_CONV_TRANSPOSE_2D;
  3237. result->src[0] = a;
  3238. result->src[1] = b;
  3239. return result;
  3240. }
  3241. // ggml_pool_*
  3242. static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
  3243. return (ins + 2 * p - ks) / s + 1;
  3244. }
  3245. // ggml_pool_1d
  3246. struct ggml_tensor * ggml_pool_1d(
  3247. struct ggml_context * ctx,
  3248. struct ggml_tensor * a,
  3249. enum ggml_op_pool op,
  3250. int k0,
  3251. int s0,
  3252. int p0) {
  3253. const int64_t ne[4] = {
  3254. ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
  3255. a->ne[1],
  3256. a->ne[2],
  3257. a->ne[3],
  3258. };
  3259. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3260. int32_t params[] = { op, k0, s0, p0 };
  3261. ggml_set_op_params(result, params, sizeof(params));
  3262. result->op = GGML_OP_POOL_1D;
  3263. result->src[0] = a;
  3264. return result;
  3265. }
  3266. // ggml_pool_2d
  3267. struct ggml_tensor * ggml_pool_2d(
  3268. struct ggml_context * ctx,
  3269. struct ggml_tensor * a,
  3270. enum ggml_op_pool op,
  3271. int k0,
  3272. int k1,
  3273. int s0,
  3274. int s1,
  3275. float p0,
  3276. float p1) {
  3277. struct ggml_tensor * result;
  3278. const int64_t ne[4] = {
  3279. ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
  3280. ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
  3281. a->ne[2],
  3282. a->ne[3],
  3283. };
  3284. result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3285. int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
  3286. ggml_set_op_params(result, params, sizeof(params));
  3287. result->op = GGML_OP_POOL_2D;
  3288. result->src[0] = a;
  3289. return result;
  3290. }
  3291. struct ggml_tensor * ggml_pool_2d_back(
  3292. struct ggml_context * ctx,
  3293. struct ggml_tensor * a,
  3294. struct ggml_tensor * af,
  3295. enum ggml_op_pool op,
  3296. int k0,
  3297. int k1,
  3298. int s0,
  3299. int s1,
  3300. float p0,
  3301. float p1) {
  3302. struct ggml_tensor * result;
  3303. result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
  3304. int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
  3305. ggml_set_op_params(result, params, sizeof(params));
  3306. result->op = GGML_OP_POOL_2D_BACK;
  3307. result->src[0] = a;
  3308. result->src[1] = af;
  3309. return result;
  3310. }
  3311. // ggml_upscale
  3312. static struct ggml_tensor * ggml_upscale_impl(
  3313. struct ggml_context * ctx,
  3314. struct ggml_tensor * a,
  3315. int ne0,
  3316. int ne1,
  3317. int ne2,
  3318. int ne3) {
  3319. GGML_ASSERT(a->ne[0] <= ne0);
  3320. GGML_ASSERT(a->ne[1] <= ne1);
  3321. GGML_ASSERT(a->ne[2] <= ne2);
  3322. GGML_ASSERT(a->ne[3] <= ne3);
  3323. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  3324. result->op = GGML_OP_UPSCALE;
  3325. result->src[0] = a;
  3326. return result;
  3327. }
  3328. struct ggml_tensor * ggml_upscale(
  3329. struct ggml_context * ctx,
  3330. struct ggml_tensor * a,
  3331. int scale_factor) {
  3332. return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]);
  3333. }
  3334. struct ggml_tensor * ggml_upscale_ext(
  3335. struct ggml_context * ctx,
  3336. struct ggml_tensor * a,
  3337. int ne0,
  3338. int ne1,
  3339. int ne2,
  3340. int ne3) {
  3341. return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3);
  3342. }
  3343. // ggml_pad
  3344. struct ggml_tensor * ggml_pad(
  3345. struct ggml_context * ctx,
  3346. struct ggml_tensor * a,
  3347. int p0,
  3348. int p1,
  3349. int p2,
  3350. int p3) {
  3351. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
  3352. a->ne[0] + p0,
  3353. a->ne[1] + p1,
  3354. a->ne[2] + p2,
  3355. a->ne[3] + p3);
  3356. result->op = GGML_OP_PAD;
  3357. result->src[0] = a;
  3358. return result;
  3359. }
  3360. // ggml_arange
  3361. struct ggml_tensor * ggml_arange(
  3362. struct ggml_context * ctx,
  3363. float start,
  3364. float stop,
  3365. float step) {
  3366. GGML_ASSERT(stop > start);
  3367. const int64_t steps = (int64_t) ceilf((stop - start) / step);
  3368. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
  3369. ggml_set_op_params_f32(result, 0, start);
  3370. ggml_set_op_params_f32(result, 1, stop);
  3371. ggml_set_op_params_f32(result, 2, step);
  3372. result->op = GGML_OP_ARANGE;
  3373. return result;
  3374. }
  3375. // ggml_timestep_embedding
  3376. struct ggml_tensor * ggml_timestep_embedding(
  3377. struct ggml_context * ctx,
  3378. struct ggml_tensor * timesteps,
  3379. int dim,
  3380. int max_period) {
  3381. int actual_dim = dim;
  3382. if (dim % 2 != 0) {
  3383. actual_dim = dim + 1;
  3384. }
  3385. struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
  3386. ggml_set_op_params_i32(result, 0, dim);
  3387. ggml_set_op_params_i32(result, 1, max_period);
  3388. result->op = GGML_OP_TIMESTEP_EMBEDDING;
  3389. result->src[0] = timesteps;
  3390. return result;
  3391. }
  3392. // ggml_argsort
  3393. struct ggml_tensor * ggml_argsort(
  3394. struct ggml_context * ctx,
  3395. struct ggml_tensor * a,
  3396. enum ggml_sort_order order) {
  3397. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
  3398. ggml_set_op_params_i32(result, 0, (int32_t) order);
  3399. result->op = GGML_OP_ARGSORT;
  3400. result->src[0] = a;
  3401. return result;
  3402. }
  3403. // ggml_top_k
  3404. struct ggml_tensor * ggml_top_k(
  3405. struct ggml_context * ctx,
  3406. struct ggml_tensor * a,
  3407. int k) {
  3408. GGML_ASSERT(a->ne[0] >= k);
  3409. struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
  3410. result = ggml_view_4d(ctx, result,
  3411. k, result->ne[1], result->ne[2], result->ne[3],
  3412. result->nb[1], result->nb[2], result->nb[3],
  3413. 0);
  3414. return result;
  3415. }
  3416. // ggml_flash_attn_ext
  3417. struct ggml_tensor * ggml_flash_attn_ext(
  3418. struct ggml_context * ctx,
  3419. struct ggml_tensor * q,
  3420. struct ggml_tensor * k,
  3421. struct ggml_tensor * v,
  3422. struct ggml_tensor * mask,
  3423. float scale,
  3424. float max_bias,
  3425. float logit_softcap) {
  3426. GGML_ASSERT(ggml_can_mul_mat(k, q));
  3427. // TODO: check if vT can be multiplied by (k*qT)
  3428. if (mask) {
  3429. GGML_ASSERT(ggml_is_contiguous(mask));
  3430. GGML_ASSERT(mask->ne[2] == 1);
  3431. GGML_ASSERT(mask->ne[3] == 1);
  3432. GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
  3433. "the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
  3434. //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
  3435. }
  3436. if (max_bias > 0.0f) {
  3437. GGML_ASSERT(mask);
  3438. }
  3439. bool is_node = false;
  3440. // permute(0, 2, 1, 3)
  3441. int64_t ne[4] = { q->ne[0], q->ne[2], q->ne[1], q->ne[3] };
  3442. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3443. float params[] = { scale, max_bias, logit_softcap };
  3444. ggml_set_op_params(result, params, sizeof(params));
  3445. result->op = GGML_OP_FLASH_ATTN_EXT;
  3446. result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
  3447. result->src[0] = q;
  3448. result->src[1] = k;
  3449. result->src[2] = v;
  3450. result->src[3] = mask;
  3451. return result;
  3452. }
  3453. void ggml_flash_attn_ext_set_prec(
  3454. struct ggml_tensor * a,
  3455. enum ggml_prec prec) {
  3456. GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
  3457. const int32_t prec_i32 = (int32_t) prec;
  3458. ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
  3459. }
  3460. enum ggml_prec ggml_flash_attn_ext_get_prec(
  3461. const struct ggml_tensor * a) {
  3462. GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
  3463. const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
  3464. return (enum ggml_prec) prec_i32;
  3465. }
  3466. // ggml_flash_attn_back
  3467. struct ggml_tensor * ggml_flash_attn_back(
  3468. struct ggml_context * ctx,
  3469. struct ggml_tensor * q,
  3470. struct ggml_tensor * k,
  3471. struct ggml_tensor * v,
  3472. struct ggml_tensor * d,
  3473. bool masked) {
  3474. GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
  3475. GGML_ASSERT(ggml_can_mul_mat(k, q));
  3476. // TODO: check if vT can be multiplied by (k*qT)
  3477. // d shape [D,N,ne2,ne3]
  3478. // q shape [D,N,ne2,ne3]
  3479. // k shape [D,M,kvne2,ne3]
  3480. // v shape [M,D,kvne2,ne3]
  3481. const int64_t D = q->ne[0];
  3482. const int64_t N = q->ne[1];
  3483. const int64_t M = k->ne[1];
  3484. const int64_t ne2 = q->ne[2];
  3485. const int64_t ne3 = q->ne[3];
  3486. const int64_t kvne2 = k->ne[2];
  3487. GGML_ASSERT(k->ne[0] == D);
  3488. GGML_ASSERT(v->ne[0] == M);
  3489. GGML_ASSERT(v->ne[1] == D);
  3490. GGML_ASSERT(d->ne[0] == D);
  3491. GGML_ASSERT(d->ne[1] == N);
  3492. GGML_ASSERT(k->ne[2] == kvne2);
  3493. GGML_ASSERT(k->ne[3] == ne3);
  3494. GGML_ASSERT(v->ne[2] == kvne2);
  3495. GGML_ASSERT(v->ne[3] == ne3);
  3496. GGML_ASSERT(d->ne[2] == ne2);
  3497. GGML_ASSERT(d->ne[3] == ne3);
  3498. GGML_ASSERT(ne2 % kvne2 == 0);
  3499. bool is_node = false;
  3500. if (q->grad || k->grad || v->grad) {
  3501. // when using this operation (in backwards pass) these grads are set.
  3502. // we don't want to create (big) grad of our result, so is_node is false.
  3503. is_node = false;
  3504. }
  3505. // store gradients of q, k and v as continuous tensors concatenated in result.
  3506. // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
  3507. const int64_t elem_q = ggml_nelements(q);
  3508. const int64_t elem_k = ggml_nelements(k);
  3509. const int64_t elem_v = ggml_nelements(v);
  3510. enum ggml_type result_type = GGML_TYPE_F32;
  3511. GGML_ASSERT(ggml_blck_size(result_type) == 1);
  3512. const size_t tsize = ggml_type_size(result_type);
  3513. const size_t offs_q = 0;
  3514. const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
  3515. const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
  3516. const size_t end = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
  3517. const size_t nelements = (end + tsize - 1)/tsize;
  3518. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
  3519. int32_t masked_i = masked ? 1 : 0;
  3520. ggml_set_op_params(result, &masked_i, sizeof(masked_i));
  3521. result->op = GGML_OP_FLASH_ATTN_BACK;
  3522. result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
  3523. result->src[0] = q;
  3524. result->src[1] = k;
  3525. result->src[2] = v;
  3526. result->src[3] = d;
  3527. return result;
  3528. }
  3529. // ggml_ssm_conv
  3530. struct ggml_tensor * ggml_ssm_conv(
  3531. struct ggml_context * ctx,
  3532. struct ggml_tensor * sx,
  3533. struct ggml_tensor * c) {
  3534. GGML_ASSERT(ggml_is_3d(sx));
  3535. GGML_ASSERT(ggml_is_matrix(c));
  3536. const int64_t d_conv = c->ne[0];
  3537. const int64_t d_inner = c->ne[1];
  3538. const int64_t n_t = sx->ne[0] - d_conv + 1; // tokens per sequence
  3539. const int64_t n_s = sx->ne[2];
  3540. // TODO: maybe support other strides than 1?
  3541. // FIXME: this is always true?
  3542. GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
  3543. GGML_ASSERT(sx->ne[1] == d_inner);
  3544. GGML_ASSERT(n_t >= 0);
  3545. struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
  3546. result->op = GGML_OP_SSM_CONV;
  3547. result->src[0] = sx;
  3548. result->src[1] = c;
  3549. return result;
  3550. }
  3551. // ggml_ssm_scan
  3552. struct ggml_tensor * ggml_ssm_scan(
  3553. struct ggml_context * ctx,
  3554. struct ggml_tensor * s,
  3555. struct ggml_tensor * x,
  3556. struct ggml_tensor * dt,
  3557. struct ggml_tensor * A,
  3558. struct ggml_tensor * B,
  3559. struct ggml_tensor * C) {
  3560. GGML_ASSERT(ggml_is_contiguous(s));
  3561. GGML_ASSERT(ggml_is_contiguous(x));
  3562. GGML_ASSERT(ggml_is_contiguous(dt));
  3563. GGML_ASSERT(ggml_is_contiguous(A));
  3564. GGML_ASSERT(ggml_is_matrix(A));
  3565. GGML_ASSERT(ggml_is_3d(B));
  3566. GGML_ASSERT(ggml_is_3d(s));
  3567. GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
  3568. GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
  3569. GGML_ASSERT(ggml_are_same_shape(x, dt));
  3570. GGML_ASSERT(ggml_are_same_shape(B, C));
  3571. {
  3572. const int64_t d_state = s->ne[0];
  3573. const int64_t d_inner = s->ne[1];
  3574. const int64_t n_seq_tokens = x->ne[1];
  3575. const int64_t n_seqs = x->ne[2];
  3576. GGML_ASSERT(s->ne[2] == n_seqs);
  3577. GGML_ASSERT(x->ne[0] == d_inner);
  3578. GGML_ASSERT(A->ne[0] == d_state);
  3579. GGML_ASSERT(A->ne[1] == d_inner);
  3580. GGML_ASSERT(B->ne[0] == d_state);
  3581. GGML_ASSERT(B->ne[1] == n_seq_tokens);
  3582. GGML_ASSERT(B->ne[2] == n_seqs);
  3583. }
  3584. // concatenated y + ssm_states
  3585. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + ggml_nelements(s));
  3586. result->op = GGML_OP_SSM_SCAN;
  3587. result->src[0] = s;
  3588. result->src[1] = x;
  3589. result->src[2] = dt;
  3590. result->src[3] = A;
  3591. result->src[4] = B;
  3592. result->src[5] = C;
  3593. return result;
  3594. }
  3595. // ggml_win_part
  3596. struct ggml_tensor * ggml_win_part(
  3597. struct ggml_context * ctx,
  3598. struct ggml_tensor * a,
  3599. int w) {
  3600. GGML_ASSERT(a->ne[3] == 1);
  3601. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3602. // padding
  3603. const int px = (w - a->ne[1]%w)%w;
  3604. const int py = (w - a->ne[2]%w)%w;
  3605. const int npx = (px + a->ne[1])/w;
  3606. const int npy = (py + a->ne[2])/w;
  3607. const int np = npx*npy;
  3608. const int64_t ne[4] = { a->ne[0], w, w, np, };
  3609. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3610. int32_t params[] = { npx, npy, w };
  3611. ggml_set_op_params(result, params, sizeof(params));
  3612. result->op = GGML_OP_WIN_PART;
  3613. result->src[0] = a;
  3614. return result;
  3615. }
  3616. // ggml_win_unpart
  3617. struct ggml_tensor * ggml_win_unpart(
  3618. struct ggml_context * ctx,
  3619. struct ggml_tensor * a,
  3620. int w0,
  3621. int h0,
  3622. int w) {
  3623. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3624. const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
  3625. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
  3626. int32_t params[] = { w };
  3627. ggml_set_op_params(result, params, sizeof(params));
  3628. result->op = GGML_OP_WIN_UNPART;
  3629. result->src[0] = a;
  3630. return result;
  3631. }
  3632. // ggml_get_rel_pos
  3633. struct ggml_tensor * ggml_get_rel_pos(
  3634. struct ggml_context * ctx,
  3635. struct ggml_tensor * a,
  3636. int qh,
  3637. int kh) {
  3638. GGML_ASSERT(qh == kh);
  3639. GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
  3640. const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
  3641. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
  3642. result->op = GGML_OP_GET_REL_POS;
  3643. result->src[0] = a;
  3644. return result;
  3645. }
  3646. // ggml_add_rel_pos
  3647. static struct ggml_tensor * ggml_add_rel_pos_impl(
  3648. struct ggml_context * ctx,
  3649. struct ggml_tensor * a,
  3650. struct ggml_tensor * pw,
  3651. struct ggml_tensor * ph,
  3652. bool inplace) {
  3653. GGML_ASSERT(ggml_are_same_shape(pw, ph));
  3654. GGML_ASSERT(ggml_is_contiguous(a));
  3655. GGML_ASSERT(ggml_is_contiguous(pw));
  3656. GGML_ASSERT(ggml_is_contiguous(ph));
  3657. GGML_ASSERT(ph->type == GGML_TYPE_F32);
  3658. GGML_ASSERT(pw->type == GGML_TYPE_F32);
  3659. GGML_ASSERT(pw->ne[3] == a->ne[2]);
  3660. GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
  3661. GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
  3662. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3663. ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
  3664. result->op = GGML_OP_ADD_REL_POS;
  3665. result->src[0] = a;
  3666. result->src[1] = pw;
  3667. result->src[2] = ph;
  3668. return result;
  3669. }
  3670. struct ggml_tensor * ggml_add_rel_pos(
  3671. struct ggml_context * ctx,
  3672. struct ggml_tensor * a,
  3673. struct ggml_tensor * pw,
  3674. struct ggml_tensor * ph) {
  3675. return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
  3676. }
  3677. struct ggml_tensor * ggml_add_rel_pos_inplace(
  3678. struct ggml_context * ctx,
  3679. struct ggml_tensor * a,
  3680. struct ggml_tensor * pw,
  3681. struct ggml_tensor * ph) {
  3682. return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
  3683. }
  3684. // ggml_rwkv_wkv6
  3685. struct ggml_tensor * ggml_rwkv_wkv6(
  3686. struct ggml_context * ctx,
  3687. struct ggml_tensor * k,
  3688. struct ggml_tensor * v,
  3689. struct ggml_tensor * r,
  3690. struct ggml_tensor * tf,
  3691. struct ggml_tensor * td,
  3692. struct ggml_tensor * state) {
  3693. GGML_ASSERT(ggml_is_contiguous(k));
  3694. GGML_ASSERT(ggml_is_contiguous(v));
  3695. GGML_ASSERT(ggml_is_contiguous(r));
  3696. GGML_ASSERT(ggml_is_contiguous(tf));
  3697. GGML_ASSERT(ggml_is_contiguous(td));
  3698. GGML_ASSERT(ggml_is_contiguous(state));
  3699. const int64_t S = k->ne[0];
  3700. const int64_t H = k->ne[2];
  3701. const int64_t n_tokens = k->ne[3];
  3702. const int64_t n_seqs = state->ne[1];
  3703. {
  3704. GGML_ASSERT(k->ne[1] == 1);
  3705. GGML_ASSERT(v->ne[0] == 1 && v->ne[1] == S && v->ne[2] == H && v->ne[3] == n_tokens);
  3706. GGML_ASSERT(r->ne[0] == 1 && r->ne[1] == S && r->ne[2] == H && r->ne[3] == n_tokens);
  3707. // TODO: RWKV v4 and v5
  3708. GGML_ASSERT(td->ne[0] == 1 && td->ne[1] == S && td->ne[2] == H && td->ne[3] == n_tokens);
  3709. GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
  3710. }
  3711. // concat output and new_state
  3712. const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
  3713. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3714. result->op = GGML_OP_RWKV_WKV6;
  3715. result->src[0] = k;
  3716. result->src[1] = v;
  3717. result->src[2] = r;
  3718. result->src[3] = tf;
  3719. result->src[4] = td;
  3720. result->src[5] = state;
  3721. return result;
  3722. }
  3723. // ggml_unary
  3724. static struct ggml_tensor * ggml_unary_impl(
  3725. struct ggml_context * ctx,
  3726. struct ggml_tensor * a,
  3727. enum ggml_unary_op op,
  3728. bool inplace) {
  3729. GGML_ASSERT(ggml_is_contiguous_1(a));
  3730. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3731. ggml_set_op_params_i32(result, 0, (int32_t) op);
  3732. result->op = GGML_OP_UNARY;
  3733. result->src[0] = a;
  3734. return result;
  3735. }
  3736. struct ggml_tensor * ggml_unary(
  3737. struct ggml_context * ctx,
  3738. struct ggml_tensor * a,
  3739. enum ggml_unary_op op) {
  3740. return ggml_unary_impl(ctx, a, op, false);
  3741. }
  3742. struct ggml_tensor * ggml_unary_inplace(
  3743. struct ggml_context * ctx,
  3744. struct ggml_tensor * a,
  3745. enum ggml_unary_op op) {
  3746. return ggml_unary_impl(ctx, a, op, true);
  3747. }
  3748. // ggml_map_unary
  3749. static struct ggml_tensor * ggml_map_unary_impl_f32(
  3750. struct ggml_context * ctx,
  3751. struct ggml_tensor * a,
  3752. const ggml_unary_op_f32_t fun,
  3753. bool inplace) {
  3754. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3755. ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
  3756. result->op = GGML_OP_MAP_UNARY;
  3757. result->src[0] = a;
  3758. return result;
  3759. }
  3760. struct ggml_tensor * ggml_map_unary_f32(
  3761. struct ggml_context * ctx,
  3762. struct ggml_tensor * a,
  3763. const ggml_unary_op_f32_t fun) {
  3764. return ggml_map_unary_impl_f32(ctx, a, fun, false);
  3765. }
  3766. struct ggml_tensor * ggml_map_unary_inplace_f32(
  3767. struct ggml_context * ctx,
  3768. struct ggml_tensor * a,
  3769. const ggml_unary_op_f32_t fun) {
  3770. return ggml_map_unary_impl_f32(ctx, a, fun, true);
  3771. }
  3772. // ggml_map_binary
  3773. static struct ggml_tensor * ggml_map_binary_impl_f32(
  3774. struct ggml_context * ctx,
  3775. struct ggml_tensor * a,
  3776. struct ggml_tensor * b,
  3777. const ggml_binary_op_f32_t fun,
  3778. bool inplace) {
  3779. GGML_ASSERT(ggml_are_same_shape(a, b));
  3780. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3781. ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
  3782. result->op = GGML_OP_MAP_BINARY;
  3783. result->src[0] = a;
  3784. result->src[1] = b;
  3785. return result;
  3786. }
  3787. struct ggml_tensor * ggml_map_binary_f32(
  3788. struct ggml_context * ctx,
  3789. struct ggml_tensor * a,
  3790. struct ggml_tensor * b,
  3791. const ggml_binary_op_f32_t fun) {
  3792. return ggml_map_binary_impl_f32(ctx, a, b, fun, false);
  3793. }
  3794. struct ggml_tensor * ggml_map_binary_inplace_f32(
  3795. struct ggml_context * ctx,
  3796. struct ggml_tensor * a,
  3797. struct ggml_tensor * b,
  3798. const ggml_binary_op_f32_t fun) {
  3799. return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
  3800. }
  3801. // ggml_map_custom1_f32
  3802. static struct ggml_tensor * ggml_map_custom1_impl_f32(
  3803. struct ggml_context * ctx,
  3804. struct ggml_tensor * a,
  3805. const ggml_custom1_op_f32_t fun,
  3806. bool inplace) {
  3807. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3808. ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
  3809. result->op = GGML_OP_MAP_CUSTOM1_F32;
  3810. result->src[0] = a;
  3811. return result;
  3812. }
  3813. struct ggml_tensor * ggml_map_custom1_f32(
  3814. struct ggml_context * ctx,
  3815. struct ggml_tensor * a,
  3816. const ggml_custom1_op_f32_t fun) {
  3817. return ggml_map_custom1_impl_f32(ctx, a, fun, false);
  3818. }
  3819. struct ggml_tensor * ggml_map_custom1_inplace_f32(
  3820. struct ggml_context * ctx,
  3821. struct ggml_tensor * a,
  3822. const ggml_custom1_op_f32_t fun) {
  3823. return ggml_map_custom1_impl_f32(ctx, a, fun, true);
  3824. }
  3825. // ggml_map_custom2_f32
  3826. static struct ggml_tensor * ggml_map_custom2_impl_f32(
  3827. struct ggml_context * ctx,
  3828. struct ggml_tensor * a,
  3829. struct ggml_tensor * b,
  3830. const ggml_custom2_op_f32_t fun,
  3831. bool inplace) {
  3832. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3833. ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
  3834. result->op = GGML_OP_MAP_CUSTOM2_F32;
  3835. result->src[0] = a;
  3836. result->src[1] = b;
  3837. return result;
  3838. }
  3839. struct ggml_tensor * ggml_map_custom2_f32(
  3840. struct ggml_context * ctx,
  3841. struct ggml_tensor * a,
  3842. struct ggml_tensor * b,
  3843. const ggml_custom2_op_f32_t fun) {
  3844. return ggml_map_custom2_impl_f32(ctx, a, b, fun, false);
  3845. }
  3846. struct ggml_tensor * ggml_map_custom2_inplace_f32(
  3847. struct ggml_context * ctx,
  3848. struct ggml_tensor * a,
  3849. struct ggml_tensor * b,
  3850. const ggml_custom2_op_f32_t fun) {
  3851. return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
  3852. }
  3853. // ggml_map_custom3_f32
  3854. static struct ggml_tensor * ggml_map_custom3_impl_f32(
  3855. struct ggml_context * ctx,
  3856. struct ggml_tensor * a,
  3857. struct ggml_tensor * b,
  3858. struct ggml_tensor * c,
  3859. const ggml_custom3_op_f32_t fun,
  3860. bool inplace) {
  3861. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3862. ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
  3863. result->op = GGML_OP_MAP_CUSTOM3_F32;
  3864. result->src[0] = a;
  3865. result->src[1] = b;
  3866. result->src[2] = c;
  3867. return result;
  3868. }
  3869. struct ggml_tensor * ggml_map_custom3_f32(
  3870. struct ggml_context * ctx,
  3871. struct ggml_tensor * a,
  3872. struct ggml_tensor * b,
  3873. struct ggml_tensor * c,
  3874. const ggml_custom3_op_f32_t fun) {
  3875. return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, false);
  3876. }
  3877. struct ggml_tensor * ggml_map_custom3_inplace_f32(
  3878. struct ggml_context * ctx,
  3879. struct ggml_tensor * a,
  3880. struct ggml_tensor * b,
  3881. struct ggml_tensor * c,
  3882. const ggml_custom3_op_f32_t fun) {
  3883. return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
  3884. }
  3885. // ggml_map_custom1
  3886. static struct ggml_tensor * ggml_map_custom1_impl(
  3887. struct ggml_context * ctx,
  3888. struct ggml_tensor * a,
  3889. const ggml_custom1_op_t fun,
  3890. int n_tasks,
  3891. void * userdata,
  3892. bool inplace) {
  3893. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  3894. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3895. struct ggml_map_custom1_op_params params = {
  3896. /*.fun =*/ fun,
  3897. /*.n_tasks =*/ n_tasks,
  3898. /*.userdata =*/ userdata
  3899. };
  3900. ggml_set_op_params(result, (const void *) &params, sizeof(params));
  3901. result->op = GGML_OP_MAP_CUSTOM1;
  3902. result->src[0] = a;
  3903. return result;
  3904. }
  3905. struct ggml_tensor * ggml_map_custom1(
  3906. struct ggml_context * ctx,
  3907. struct ggml_tensor * a,
  3908. const ggml_custom1_op_t fun,
  3909. int n_tasks,
  3910. void * userdata) {
  3911. return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
  3912. }
  3913. struct ggml_tensor * ggml_map_custom1_inplace(
  3914. struct ggml_context * ctx,
  3915. struct ggml_tensor * a,
  3916. const ggml_custom1_op_t fun,
  3917. int n_tasks,
  3918. void * userdata) {
  3919. return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
  3920. }
  3921. // ggml_map_custom2
  3922. static struct ggml_tensor * ggml_map_custom2_impl(
  3923. struct ggml_context * ctx,
  3924. struct ggml_tensor * a,
  3925. struct ggml_tensor * b,
  3926. const ggml_custom2_op_t fun,
  3927. int n_tasks,
  3928. void * userdata,
  3929. bool inplace) {
  3930. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  3931. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3932. struct ggml_map_custom2_op_params params = {
  3933. /*.fun =*/ fun,
  3934. /*.n_tasks =*/ n_tasks,
  3935. /*.userdata =*/ userdata
  3936. };
  3937. ggml_set_op_params(result, (const void *) &params, sizeof(params));
  3938. result->op = GGML_OP_MAP_CUSTOM2;
  3939. result->src[0] = a;
  3940. result->src[1] = b;
  3941. return result;
  3942. }
  3943. struct ggml_tensor * ggml_map_custom2(
  3944. struct ggml_context * ctx,
  3945. struct ggml_tensor * a,
  3946. struct ggml_tensor * b,
  3947. const ggml_custom2_op_t fun,
  3948. int n_tasks,
  3949. void * userdata) {
  3950. return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
  3951. }
  3952. struct ggml_tensor * ggml_map_custom2_inplace(
  3953. struct ggml_context * ctx,
  3954. struct ggml_tensor * a,
  3955. struct ggml_tensor * b,
  3956. const ggml_custom2_op_t fun,
  3957. int n_tasks,
  3958. void * userdata) {
  3959. return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
  3960. }
  3961. // ggml_map_custom3
  3962. static struct ggml_tensor * ggml_map_custom3_impl(
  3963. struct ggml_context * ctx,
  3964. struct ggml_tensor * a,
  3965. struct ggml_tensor * b,
  3966. struct ggml_tensor * c,
  3967. const ggml_custom3_op_t fun,
  3968. int n_tasks,
  3969. void * userdata,
  3970. bool inplace) {
  3971. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  3972. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3973. struct ggml_map_custom3_op_params params = {
  3974. /*.fun =*/ fun,
  3975. /*.n_tasks =*/ n_tasks,
  3976. /*.userdata =*/ userdata
  3977. };
  3978. ggml_set_op_params(result, (const void *) &params, sizeof(params));
  3979. result->op = GGML_OP_MAP_CUSTOM3;
  3980. result->src[0] = a;
  3981. result->src[1] = b;
  3982. result->src[2] = c;
  3983. return result;
  3984. }
  3985. struct ggml_tensor * ggml_map_custom3(
  3986. struct ggml_context * ctx,
  3987. struct ggml_tensor * a,
  3988. struct ggml_tensor * b,
  3989. struct ggml_tensor * c,
  3990. const ggml_custom3_op_t fun,
  3991. int n_tasks,
  3992. void * userdata) {
  3993. return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
  3994. }
  3995. struct ggml_tensor * ggml_map_custom3_inplace(
  3996. struct ggml_context * ctx,
  3997. struct ggml_tensor * a,
  3998. struct ggml_tensor * b,
  3999. struct ggml_tensor * c,
  4000. const ggml_custom3_op_t fun,
  4001. int n_tasks,
  4002. void * userdata) {
  4003. return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
  4004. }
  4005. // ggml_cross_entropy_loss
  4006. struct ggml_tensor * ggml_cross_entropy_loss(
  4007. struct ggml_context * ctx,
  4008. struct ggml_tensor * a,
  4009. struct ggml_tensor * b) {
  4010. GGML_ASSERT(ggml_are_same_shape(a, b));
  4011. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
  4012. result->op = GGML_OP_CROSS_ENTROPY_LOSS;
  4013. result->src[0] = a;
  4014. result->src[1] = b;
  4015. return result;
  4016. }
  4017. // ggml_cross_entropy_loss_back
  4018. struct ggml_tensor * ggml_cross_entropy_loss_back(
  4019. struct ggml_context * ctx,
  4020. struct ggml_tensor * a,
  4021. struct ggml_tensor * b,
  4022. struct ggml_tensor * c) {
  4023. GGML_ASSERT(ggml_are_same_shape(a, b));
  4024. GGML_ASSERT(ggml_is_scalar(c));
  4025. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  4026. result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
  4027. result->src[0] = a;
  4028. result->src[1] = b;
  4029. result->src[2] = c;
  4030. return result;
  4031. }
  4032. // opt_step_adamw
  4033. struct ggml_tensor * ggml_opt_step_adamw(
  4034. struct ggml_context * ctx,
  4035. struct ggml_tensor * a,
  4036. struct ggml_tensor * grad,
  4037. float alpha,
  4038. float beta1,
  4039. float beta2,
  4040. float eps,
  4041. float wd) {
  4042. GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
  4043. GGML_ASSERT(ggml_are_same_shape(a, grad));
  4044. GGML_ASSERT(alpha > 0.0f);
  4045. GGML_ASSERT(beta1 >= 0.0f && beta1 <= 1.0f);
  4046. GGML_ASSERT(beta2 >= 0.0f && beta2 <= 1.0f);
  4047. GGML_ASSERT(eps >= 0.0f);
  4048. GGML_ASSERT(wd >= 0.0f && wd <= 1.0f);
  4049. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  4050. const int64_t iter = 1;
  4051. memcpy(&result->op_params[0], &iter, sizeof(int64_t));
  4052. ggml_set_op_params_f32(result, 2, alpha);
  4053. ggml_set_op_params_f32(result, 3, beta1);
  4054. ggml_set_op_params_f32(result, 4, beta2);
  4055. ggml_set_op_params_f32(result, 5, eps);
  4056. ggml_set_op_params_f32(result, 6, wd);
  4057. result->op = GGML_OP_OPT_STEP_ADAMW;
  4058. result->src[0] = a;
  4059. result->src[1] = grad;
  4060. result->src[2] = ggml_dup_tensor(ctx, grad);
  4061. result->src[3] = ggml_dup_tensor(ctx, grad);
  4062. return result;
  4063. }
  4064. ////////////////////////////////////////////////////////////////////////////////
  4065. struct ggml_hash_set ggml_hash_set_new(size_t size) {
  4066. size = ggml_hash_size(size);
  4067. struct ggml_hash_set result;
  4068. result.size = size;
  4069. result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
  4070. result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
  4071. return result;
  4072. }
  4073. void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
  4074. memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
  4075. }
  4076. void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
  4077. GGML_FREE(hash_set->used);
  4078. GGML_FREE(hash_set->keys);
  4079. }
  4080. size_t ggml_hash_size(size_t min_sz) {
  4081. // next primes after powers of two
  4082. static const size_t primes[] = {
  4083. 2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
  4084. 2053, 4099, 8209, 16411, 32771, 65537, 131101,
  4085. 262147, 524309, 1048583, 2097169, 4194319, 8388617,
  4086. 16777259, 33554467, 67108879, 134217757, 268435459,
  4087. 536870923, 1073741827, 2147483659
  4088. };
  4089. static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
  4090. // find the smallest prime that is larger or equal than min_sz
  4091. size_t l = 0;
  4092. size_t r = n_primes;
  4093. while (l < r) {
  4094. size_t m = (l + r)/2;
  4095. if (primes[m] < min_sz) {
  4096. l = m + 1;
  4097. } else {
  4098. r = m;
  4099. }
  4100. }
  4101. size_t sz = l < n_primes ? primes[l] : min_sz | 1;
  4102. return sz;
  4103. }
  4104. struct hash_map {
  4105. struct ggml_hash_set set;
  4106. struct ggml_tensor ** vals;
  4107. };
  4108. static struct hash_map * ggml_new_hash_map(size_t size) {
  4109. struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
  4110. result->set = ggml_hash_set_new(size);
  4111. result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
  4112. return result;
  4113. }
  4114. static void ggml_hash_map_free(struct hash_map * map) {
  4115. ggml_hash_set_free(&map->set);
  4116. GGML_FREE(map->vals);
  4117. GGML_FREE(map);
  4118. }
  4119. // gradient checkpointing
  4120. static struct ggml_tensor * ggml_recompute_graph_node(
  4121. struct ggml_context * ctx,
  4122. struct ggml_cgraph * graph,
  4123. struct hash_map * replacements,
  4124. struct ggml_tensor * node) {
  4125. if (node == NULL) {
  4126. return NULL;
  4127. }
  4128. if (node->flags & GGML_TENSOR_FLAG_PARAM) {
  4129. return node;
  4130. }
  4131. if (!ggml_hash_contains(&graph->visited_hash_set, node)) {
  4132. return node;
  4133. }
  4134. int count_children = 0;
  4135. for (int k = 0; k < GGML_MAX_SRC; ++k) {
  4136. if (node->src[k]) {
  4137. ++count_children;
  4138. }
  4139. }
  4140. if (count_children == 0) {
  4141. return node;
  4142. }
  4143. size_t i = ggml_hash_find(&replacements->set, node);
  4144. GGML_ASSERT(i != GGML_HASHSET_FULL); // assert that not full
  4145. if (replacements->set.keys[i] == node) {
  4146. return replacements->vals[i];
  4147. }
  4148. struct ggml_tensor * clone = ggml_new_tensor(ctx, node->type, GGML_MAX_DIMS, node->ne);
  4149. // insert clone into replacements
  4150. GGML_ASSERT(replacements->set.keys[i] == NULL); // assert that we don't overwrite
  4151. replacements->set.keys[i] = node;
  4152. replacements->vals[i] = clone;
  4153. clone->op = node->op;
  4154. clone->grad = node->grad;
  4155. clone->flags = node->flags;
  4156. clone->extra = node->extra;
  4157. for (int k = 0; k < GGML_MAX_DIMS; ++k) {
  4158. clone->nb[k] = node->nb[k];
  4159. }
  4160. for (int k = 0; k < GGML_MAX_SRC; ++k) {
  4161. clone->src[k] = ggml_recompute_graph_node(ctx, graph, replacements, node->src[k]);
  4162. }
  4163. if (node->view_src != NULL) {
  4164. clone->data = (node->view_src->data == NULL)
  4165. ? NULL // view_src not yet allocated
  4166. : (char *) node->view_src->data // view_src already allocated
  4167. + node->view_offs;
  4168. clone->view_src = node->view_src;
  4169. clone->view_offs = node->view_offs;
  4170. }
  4171. GGML_ASSERT(sizeof(node->op_params) == sizeof(int32_t) * (GGML_MAX_OP_PARAMS / sizeof(int32_t)));
  4172. GGML_ASSERT(sizeof(node->name) == GGML_MAX_NAME);
  4173. memcpy(clone->op_params, node->op_params, sizeof(node->op_params));
  4174. ggml_format_name(clone, "%s (clone)", ggml_get_name(node));
  4175. return clone;
  4176. }
  4177. void ggml_build_backward_gradient_checkpointing(
  4178. struct ggml_context * ctx,
  4179. struct ggml_cgraph * gf,
  4180. struct ggml_cgraph * gb,
  4181. struct ggml_cgraph * gb_tmp,
  4182. struct ggml_tensor * * checkpoints,
  4183. int n_checkpoints) {
  4184. ggml_graph_cpy(gf, gb_tmp);
  4185. ggml_build_backward_expand(ctx, gf, gb_tmp, false);
  4186. if (n_checkpoints <= 0) {
  4187. ggml_graph_cpy(gb_tmp, gb);
  4188. return;
  4189. }
  4190. struct hash_map * replacements = ggml_new_hash_map(gf->n_nodes + gf->n_leafs + n_checkpoints);
  4191. // insert checkpoints in replacements
  4192. for (int i = 0; i < n_checkpoints; ++i) {
  4193. size_t k = ggml_hash_find(&replacements->set, checkpoints[i]);
  4194. GGML_ASSERT(k != GGML_HASHSET_FULL); // assert that not full
  4195. GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite
  4196. replacements->set.keys[k] = checkpoints[i];
  4197. replacements->vals[k] = checkpoints[i];
  4198. }
  4199. ggml_graph_cpy(gf, gb);
  4200. // rewrite gb_tmp->nodes[gf->n_nodes:gb_tmp->n_nodes],
  4201. // replacing references to gb_tmp->nodes[0:gf->n_nodes] ( == gf->nodes[0:gf->n_nodes]),
  4202. // by recomputing them from checkpoints
  4203. for (int i = gf->n_nodes; i<gb_tmp->n_nodes; ++i) {
  4204. struct ggml_tensor * node = gb_tmp->nodes[i];
  4205. for (int k = 0; k < GGML_MAX_SRC; ++k) {
  4206. // insert new tensors recomputing src, reusing already made replacements,
  4207. // remember replacements: remember new tensors with mapping from corresponding gf nodes
  4208. // recurse for input tensors,
  4209. // unless (i.e. terminating when) input tensors are replacements (like checkpoints)
  4210. node->src[k] = ggml_recompute_graph_node(ctx, gf, replacements, node->src[k]);
  4211. }
  4212. // insert rewritten backward node with replacements made into resulting backward graph gb
  4213. ggml_build_forward_expand(gb, node);
  4214. }
  4215. ggml_hash_map_free(replacements);
  4216. }
  4217. // utility functions to change gradients
  4218. // if a is in acc_table, modify gradients in-place and mark result as gradient accumulator
  4219. // else if a is in zero_table, replace a
  4220. // else, just add/subtract/etc. the gradients
  4221. static struct ggml_tensor * ggml_add_or_set(
  4222. struct ggml_context * ctx,
  4223. struct ggml_tensor * a,
  4224. struct ggml_tensor * b,
  4225. struct ggml_hash_set * zero_table,
  4226. struct ggml_hash_set * acc_table) {
  4227. if (ggml_hash_contains(acc_table, a)) {
  4228. struct ggml_tensor * ret = ggml_add_impl(ctx, a, b, true);
  4229. const size_t insert_result = ggml_hash_insert(acc_table, ret);
  4230. GGML_ASSERT(insert_result != GGML_HASHSET_FULL);
  4231. GGML_ASSERT(insert_result != GGML_HASHSET_ALREADY_EXISTS);
  4232. return ret;
  4233. }
  4234. if (ggml_hash_contains(zero_table, a)) {
  4235. return b;
  4236. }
  4237. return ggml_add_impl(ctx, a, b, false);
  4238. }
  4239. static struct ggml_tensor * ggml_acc_or_set(
  4240. struct ggml_context * ctx,
  4241. struct ggml_tensor * a,
  4242. struct ggml_tensor * b,
  4243. const size_t nb1,
  4244. const size_t nb2,
  4245. const size_t nb3,
  4246. const size_t offset,
  4247. struct ggml_hash_set * zero_table,
  4248. struct ggml_hash_set * acc_table) {
  4249. if (ggml_hash_contains(acc_table, a)) {
  4250. struct ggml_tensor * ret = ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
  4251. const size_t insert_result = ggml_hash_insert(acc_table, ret);
  4252. GGML_ASSERT(insert_result != GGML_HASHSET_FULL);
  4253. GGML_ASSERT(insert_result != GGML_HASHSET_ALREADY_EXISTS);
  4254. return ret;
  4255. }
  4256. if (ggml_hash_contains(zero_table, a)) {
  4257. struct ggml_tensor * a_zero = ggml_scale(ctx, a, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
  4258. return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false);
  4259. }
  4260. return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
  4261. }
  4262. static struct ggml_tensor * ggml_add1_or_set(
  4263. struct ggml_context * ctx,
  4264. struct ggml_tensor * a,
  4265. struct ggml_tensor * b,
  4266. struct ggml_hash_set * zero_table,
  4267. struct ggml_hash_set * acc_table) {
  4268. if (ggml_hash_contains(acc_table, a)) {
  4269. struct ggml_tensor * ret = ggml_add1_impl(ctx, a, b, true);
  4270. const size_t insert_result = ggml_hash_insert(acc_table, ret);
  4271. GGML_ASSERT(insert_result != GGML_HASHSET_FULL);
  4272. GGML_ASSERT(insert_result != GGML_HASHSET_ALREADY_EXISTS);
  4273. return ret;
  4274. }
  4275. if (ggml_hash_contains(zero_table, a)) {
  4276. return ggml_repeat(ctx, b, a);
  4277. }
  4278. return ggml_add1_impl(ctx, a, b, false);
  4279. }
  4280. static struct ggml_tensor * ggml_sub_or_set(
  4281. struct ggml_context * ctx,
  4282. struct ggml_tensor * a,
  4283. struct ggml_tensor * b,
  4284. struct ggml_hash_set * zero_table,
  4285. struct ggml_hash_set * acc_table) {
  4286. if (ggml_hash_contains(acc_table, a)) {
  4287. struct ggml_tensor * ret = ggml_sub_impl(ctx, a, b, true);
  4288. const size_t insert_result = ggml_hash_insert(acc_table, ret);
  4289. GGML_ASSERT(insert_result != GGML_HASHSET_FULL);
  4290. GGML_ASSERT(insert_result != GGML_HASHSET_ALREADY_EXISTS);
  4291. return ret;
  4292. }
  4293. if (ggml_hash_contains(zero_table, a)) {
  4294. return ggml_neg(ctx, b);
  4295. }
  4296. return ggml_sub_impl(ctx, a, b, false);
  4297. }
  4298. static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set * zero_table, struct ggml_hash_set * acc_table) {
  4299. struct ggml_tensor * src0 = tensor->src[0];
  4300. struct ggml_tensor * src1 = tensor->src[1];
  4301. struct ggml_tensor * src2 = tensor->src[2];
  4302. switch (tensor->op) {
  4303. case GGML_OP_DUP:
  4304. {
  4305. if (src0->grad) {
  4306. src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table, acc_table);
  4307. }
  4308. } break;
  4309. case GGML_OP_ADD:
  4310. {
  4311. if (src0->grad) {
  4312. src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table, acc_table);
  4313. }
  4314. if (src1->grad) {
  4315. if (ggml_are_same_shape(src0, src1)) {
  4316. src1->grad = ggml_add_or_set(ctx, src1->grad, tensor->grad, zero_table, acc_table);
  4317. } else {
  4318. src1->grad = ggml_add_or_set(ctx, src1->grad, ggml_repeat_back(ctx, tensor->grad, src1), zero_table, acc_table);
  4319. }
  4320. }
  4321. } break;
  4322. case GGML_OP_ADD1:
  4323. {
  4324. if (src0->grad) {
  4325. src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table, acc_table);
  4326. }
  4327. if (src1->grad) {
  4328. src1->grad = ggml_add_or_set(ctx,
  4329. src1->grad,
  4330. ggml_mean(ctx, tensor->grad), // TODO: should probably be sum instead of mean
  4331. zero_table, acc_table);
  4332. }
  4333. } break;
  4334. case GGML_OP_ACC:
  4335. {
  4336. if (src0->grad) {
  4337. src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table, acc_table);
  4338. }
  4339. if (src1->grad) {
  4340. const size_t nb1 = ((int32_t *) tensor->op_params)[0];
  4341. const size_t nb2 = ((int32_t *) tensor->op_params)[1];
  4342. const size_t nb3 = ((int32_t *) tensor->op_params)[2];
  4343. const size_t offset = ((int32_t *) tensor->op_params)[3];
  4344. struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
  4345. tensor->grad,
  4346. src1->grad->ne[0],
  4347. src1->grad->ne[1],
  4348. src1->grad->ne[2],
  4349. src1->grad->ne[3],
  4350. nb1, nb2, nb3, offset);
  4351. src1->grad =
  4352. ggml_add_or_set(ctx,
  4353. src1->grad,
  4354. ggml_reshape(ctx,
  4355. ggml_cont(ctx, tensor_grad_view),
  4356. src1->grad),
  4357. zero_table, acc_table);
  4358. }
  4359. } break;
  4360. case GGML_OP_SUB:
  4361. {
  4362. if (src0->grad) {
  4363. src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table, acc_table);
  4364. }
  4365. if (src1->grad) {
  4366. src1->grad = ggml_sub_or_set(ctx, src1->grad, tensor->grad, zero_table, acc_table);
  4367. }
  4368. } break;
  4369. case GGML_OP_MUL:
  4370. {
  4371. if (src0->grad) {
  4372. src0->grad =
  4373. ggml_add_or_set(ctx,
  4374. src0->grad,
  4375. ggml_mul(ctx, src1, tensor->grad),
  4376. zero_table, acc_table);
  4377. }
  4378. if (src1->grad) {
  4379. src1->grad =
  4380. ggml_add_or_set(ctx,
  4381. src1->grad,
  4382. ggml_mul(ctx, src0, tensor->grad),
  4383. zero_table, acc_table);
  4384. }
  4385. } break;
  4386. case GGML_OP_DIV:
  4387. {
  4388. if (src0->grad) {
  4389. src0->grad =
  4390. ggml_add_or_set(ctx,
  4391. src0->grad,
  4392. ggml_div(ctx, tensor->grad, src1),
  4393. zero_table, acc_table);
  4394. }
  4395. if (src1->grad) {
  4396. src1->grad =
  4397. ggml_sub_or_set(ctx,
  4398. src1->grad,
  4399. ggml_mul(ctx,
  4400. tensor->grad,
  4401. ggml_div(ctx, tensor, src1)),
  4402. zero_table, acc_table);
  4403. }
  4404. } break;
  4405. case GGML_OP_SQR:
  4406. {
  4407. if (src0->grad) {
  4408. src0->grad =
  4409. ggml_add_or_set(ctx,
  4410. src0->grad,
  4411. ggml_scale(ctx,
  4412. ggml_mul(ctx, src0, tensor->grad),
  4413. 2.0f),
  4414. zero_table, acc_table);
  4415. }
  4416. } break;
  4417. case GGML_OP_SQRT:
  4418. {
  4419. if (src0->grad) {
  4420. src0->grad =
  4421. ggml_add_or_set(ctx,
  4422. src0->grad,
  4423. ggml_scale(ctx,
  4424. ggml_div(ctx,
  4425. tensor->grad,
  4426. tensor),
  4427. 0.5f),
  4428. zero_table, acc_table);
  4429. }
  4430. } break;
  4431. case GGML_OP_LOG:
  4432. {
  4433. if (src0->grad) {
  4434. src0->grad =
  4435. ggml_add_or_set(ctx,
  4436. src0->grad,
  4437. ggml_div(ctx,
  4438. tensor->grad,
  4439. src0),
  4440. zero_table, acc_table);
  4441. }
  4442. } break;
  4443. case GGML_OP_SIN:
  4444. {
  4445. if (src0->grad) {
  4446. src0->grad =
  4447. ggml_add_or_set(ctx,
  4448. src0->grad,
  4449. ggml_mul(ctx,
  4450. tensor->grad,
  4451. ggml_cos(ctx, src0)),
  4452. zero_table, acc_table);
  4453. }
  4454. } break;
  4455. case GGML_OP_COS:
  4456. {
  4457. if (src0->grad) {
  4458. src0->grad =
  4459. ggml_sub_or_set(ctx,
  4460. src0->grad,
  4461. ggml_mul(ctx,
  4462. tensor->grad,
  4463. ggml_sin(ctx, src0)),
  4464. zero_table, acc_table);
  4465. }
  4466. } break;
  4467. case GGML_OP_SUM:
  4468. {
  4469. if (src0->grad) {
  4470. src0->grad =
  4471. ggml_add1_or_set(ctx,
  4472. src0->grad,
  4473. tensor->grad,
  4474. zero_table, acc_table);
  4475. }
  4476. } break;
  4477. case GGML_OP_SUM_ROWS:
  4478. {
  4479. if (src0->grad) {
  4480. src0->grad =
  4481. ggml_add_or_set(ctx,
  4482. src0->grad,
  4483. ggml_repeat(ctx,
  4484. tensor->grad,
  4485. src0->grad),
  4486. zero_table, acc_table);
  4487. }
  4488. } break;
  4489. case GGML_OP_MEAN:
  4490. case GGML_OP_ARGMAX:
  4491. case GGML_OP_COUNT_EQUAL:
  4492. {
  4493. GGML_ABORT("fatal error"); // TODO: implement
  4494. }
  4495. case GGML_OP_REPEAT:
  4496. {
  4497. // necessary for llama
  4498. if (src0->grad) {
  4499. src0->grad = ggml_add_or_set(ctx,
  4500. src0->grad,
  4501. ggml_repeat_back(ctx, tensor->grad, src0->grad),
  4502. zero_table, acc_table);
  4503. }
  4504. } break;
  4505. case GGML_OP_REPEAT_BACK:
  4506. {
  4507. if (src0->grad) {
  4508. // TODO: test this
  4509. src0->grad = ggml_add_or_set(ctx,
  4510. src0->grad,
  4511. ggml_repeat(ctx, tensor->grad, src0->grad),
  4512. zero_table, acc_table);
  4513. }
  4514. } break;
  4515. case GGML_OP_CONCAT:
  4516. {
  4517. GGML_ABORT("fatal error"); // TODO: implement
  4518. }
  4519. case GGML_OP_SILU_BACK:
  4520. {
  4521. GGML_ABORT("fatal error"); // TODO: not implemented
  4522. }
  4523. case GGML_OP_NORM:
  4524. {
  4525. GGML_ABORT("fatal error"); // TODO: not implemented
  4526. }
  4527. case GGML_OP_RMS_NORM:
  4528. {
  4529. // necessary for llama
  4530. if (src0->grad) {
  4531. float eps;
  4532. memcpy(&eps, tensor->op_params, sizeof(float));
  4533. src0->grad = ggml_add_or_set(ctx,
  4534. src0->grad,
  4535. ggml_rms_norm_back(ctx, src0, tensor->grad, eps),
  4536. zero_table, acc_table);
  4537. }
  4538. } break;
  4539. case GGML_OP_RMS_NORM_BACK:
  4540. {
  4541. GGML_ABORT("fatal error"); // TODO: not implemented
  4542. }
  4543. case GGML_OP_GROUP_NORM:
  4544. {
  4545. GGML_ABORT("fatal error"); // TODO: not implemented
  4546. }
  4547. case GGML_OP_MUL_MAT:
  4548. {
  4549. // https://cs231n.github.io/optimization-2/#staged
  4550. // # forward pass
  4551. // s0 = np.random.randn(5, 10)
  4552. // s1 = np.random.randn(10, 3)
  4553. // t = s0.dot(s1)
  4554. // # now suppose we had the gradient on t from above in the circuit
  4555. // dt = np.random.randn(*t.shape) # same shape as t
  4556. // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
  4557. // ds1 = t.T.dot(dt)
  4558. // tensor.shape [m,p,qq,rr]
  4559. // src0.shape [n,m,q1,r1]
  4560. // src1.shape [n,p,qq,rr]
  4561. // necessary for llama
  4562. if (src0->grad) {
  4563. struct ggml_tensor * s1_tg =
  4564. ggml_out_prod(ctx, // [n,m,qq,rr]
  4565. src1, // [n,p,qq,rr]
  4566. tensor->grad); // [m,p,qq,rr]
  4567. const int64_t qq = s1_tg->ne[2];
  4568. const int64_t rr = s1_tg->ne[3];
  4569. const int64_t q1 = src0->ne[2];
  4570. const int64_t r1 = src0->ne[3];
  4571. const bool ne2_broadcasted = qq > q1;
  4572. const bool ne3_broadcasted = rr > r1;
  4573. if (ne2_broadcasted || ne3_broadcasted) {
  4574. // sum broadcast repetitions of s1_tg into shape of src0
  4575. s1_tg = ggml_repeat_back(ctx, s1_tg, src0);
  4576. }
  4577. src0->grad =
  4578. ggml_add_or_set(ctx,
  4579. src0->grad, // [n,m,q1,r1]
  4580. s1_tg, // [n,m,q1,r1]
  4581. zero_table, acc_table);
  4582. }
  4583. if (src1->grad) {
  4584. src1->grad =
  4585. ggml_add_or_set(ctx,
  4586. src1->grad, // [n,p,qq,rr]
  4587. // ggml_mul_mat(ctx, // [n,p,qq,rr]
  4588. // ggml_cont(ctx, // [m,n,q1,r1]
  4589. // ggml_transpose(ctx, src0)), // [m,n,q1,r1]
  4590. // tensor->grad), // [m,p,qq,rr]
  4591. // // when src0 is bigger than tensor->grad (this is mostly the case in llama),
  4592. // // avoid transpose of src0, rather transpose smaller tensor->grad
  4593. // // and then use ggml_out_prod
  4594. ggml_out_prod(ctx, // [n,p,qq,rr]
  4595. src0, // [n,m,q1,r1]
  4596. ggml_transpose(ctx, // [p,m,qq,rr]
  4597. tensor->grad)), // [m,p,qq,rr]
  4598. zero_table, acc_table);
  4599. }
  4600. } break;
  4601. case GGML_OP_MUL_MAT_ID:
  4602. {
  4603. GGML_ABORT("fatal error"); // TODO: not implemented
  4604. }
  4605. case GGML_OP_OUT_PROD:
  4606. {
  4607. GGML_ABORT("fatal error"); // TODO: not implemented
  4608. }
  4609. case GGML_OP_SCALE:
  4610. {
  4611. // necessary for llama
  4612. if (src0->grad) {
  4613. float s;
  4614. memcpy(&s, tensor->op_params, sizeof(float));
  4615. src0->grad =
  4616. ggml_add_or_set(ctx,
  4617. src0->grad,
  4618. ggml_scale_impl(ctx, tensor->grad, s, false),
  4619. zero_table, acc_table);
  4620. }
  4621. } break;
  4622. case GGML_OP_SET:
  4623. {
  4624. const size_t nb1 = ((int32_t *) tensor->op_params)[0];
  4625. const size_t nb2 = ((int32_t *) tensor->op_params)[1];
  4626. const size_t nb3 = ((int32_t *) tensor->op_params)[2];
  4627. const size_t offset = ((int32_t *) tensor->op_params)[3];
  4628. struct ggml_tensor * tensor_grad_view = NULL;
  4629. if (src0->grad || src1->grad) {
  4630. GGML_ASSERT(src0->type == tensor->type);
  4631. GGML_ASSERT(tensor->grad->type == tensor->type);
  4632. GGML_ASSERT(!src1->grad || src1->grad->type == tensor->grad->type);
  4633. tensor_grad_view = ggml_view_4d(ctx,
  4634. tensor->grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
  4635. nb1, nb2, nb3, offset);
  4636. }
  4637. if (src0->grad) {
  4638. src0->grad = ggml_add_or_set(ctx,
  4639. src0->grad,
  4640. ggml_acc_impl(ctx,
  4641. tensor->grad,
  4642. ggml_neg(ctx, tensor_grad_view),
  4643. nb1, nb2, nb3, offset, false),
  4644. zero_table, acc_table);
  4645. }
  4646. if (src1->grad) {
  4647. src1->grad =
  4648. ggml_add_or_set(ctx,
  4649. src1->grad,
  4650. ggml_reshape(ctx,
  4651. ggml_cont(ctx, tensor_grad_view),
  4652. src1->grad),
  4653. zero_table, acc_table);
  4654. }
  4655. } break;
  4656. case GGML_OP_CPY:
  4657. {
  4658. // necessary for llama
  4659. // cpy overwrites value of src1 by src0 and returns view(src1)
  4660. // the overwriting is mathematically equivalent to:
  4661. // tensor = src0 * 1 + src1 * 0
  4662. if (src0->grad) {
  4663. // dsrc0 = dtensor * 1
  4664. src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table, acc_table);
  4665. }
  4666. if (src1->grad) {
  4667. // dsrc1 = dtensor * 0 -> noop
  4668. }
  4669. } break;
  4670. case GGML_OP_CONT:
  4671. {
  4672. // same as cpy
  4673. if (src0->grad) {
  4674. GGML_ASSERT(ggml_is_contiguous(src0->grad));
  4675. GGML_ASSERT(ggml_is_contiguous(tensor->grad));
  4676. src0->grad = ggml_add_or_set(ctx, src0->grad, tensor->grad, zero_table, acc_table);
  4677. }
  4678. } break;
  4679. case GGML_OP_RESHAPE:
  4680. {
  4681. // necessary for llama
  4682. if (src0->grad) {
  4683. src0->grad =
  4684. ggml_add_or_set(ctx, src0->grad,
  4685. ggml_reshape(ctx,
  4686. ggml_is_contiguous(tensor->grad)
  4687. ? tensor->grad
  4688. : ggml_cont(ctx, tensor->grad),
  4689. src0->grad),
  4690. zero_table, acc_table);
  4691. }
  4692. } break;
  4693. case GGML_OP_VIEW:
  4694. {
  4695. // necessary for llama
  4696. if (src0->grad) {
  4697. size_t offset;
  4698. memcpy(&offset, tensor->op_params, sizeof(offset));
  4699. size_t nb1 = tensor->nb[1];
  4700. size_t nb2 = tensor->nb[2];
  4701. size_t nb3 = tensor->nb[3];
  4702. if (src0->type != src0->grad->type) {
  4703. // gradient is typically F32, but src0 could be other type
  4704. size_t ng = ggml_element_size(src0->grad);
  4705. size_t n0 = ggml_element_size(src0);
  4706. GGML_ASSERT(offset % n0 == 0);
  4707. GGML_ASSERT(nb1 % n0 == 0);
  4708. GGML_ASSERT(nb2 % n0 == 0);
  4709. GGML_ASSERT(nb3 % n0 == 0);
  4710. offset = (offset / n0) * ng;
  4711. nb1 = (nb1 / n0) * ng;
  4712. nb2 = (nb2 / n0) * ng;
  4713. nb3 = (nb3 / n0) * ng;
  4714. }
  4715. src0->grad = ggml_acc_or_set(ctx, src0->grad, tensor->grad, nb1, nb2, nb3, offset, zero_table, acc_table);
  4716. }
  4717. } break;
  4718. case GGML_OP_PERMUTE:
  4719. {
  4720. // necessary for llama
  4721. if (src0->grad) {
  4722. int32_t * axes = (int32_t *) tensor->op_params;
  4723. int axis0 = axes[0] & 0x3;
  4724. int axis1 = axes[1] & 0x3;
  4725. int axis2 = axes[2] & 0x3;
  4726. int axis3 = axes[3] & 0x3;
  4727. int axes_backward[4] = {0,0,0,0};
  4728. axes_backward[axis0] = 0;
  4729. axes_backward[axis1] = 1;
  4730. axes_backward[axis2] = 2;
  4731. axes_backward[axis3] = 3;
  4732. src0->grad =
  4733. ggml_add_or_set(ctx, src0->grad,
  4734. ggml_permute(ctx,
  4735. tensor->grad,
  4736. axes_backward[0],
  4737. axes_backward[1],
  4738. axes_backward[2],
  4739. axes_backward[3]),
  4740. zero_table, acc_table);
  4741. }
  4742. } break;
  4743. case GGML_OP_TRANSPOSE:
  4744. {
  4745. // necessary for llama
  4746. if (src0->grad) {
  4747. src0->grad =
  4748. ggml_add_or_set(ctx, src0->grad,
  4749. ggml_transpose(ctx, tensor->grad),
  4750. zero_table, acc_table);
  4751. }
  4752. } break;
  4753. case GGML_OP_GET_ROWS:
  4754. {
  4755. // necessary for llama (only for tokenizer)
  4756. if (src0->grad) {
  4757. src0->grad =
  4758. ggml_add_or_set(ctx, src0->grad,
  4759. // last ggml_get_rows_back argument src0->grad is only
  4760. // necessary to setup correct output shape
  4761. ggml_get_rows_back(ctx, tensor->grad, src1, src0->grad),
  4762. zero_table, acc_table);
  4763. }
  4764. if (src1->grad) {
  4765. // noop
  4766. }
  4767. } break;
  4768. case GGML_OP_GET_ROWS_BACK:
  4769. {
  4770. GGML_ABORT("fatal error"); // TODO: not implemented
  4771. }
  4772. case GGML_OP_DIAG:
  4773. {
  4774. GGML_ABORT("fatal error"); // TODO: not implemented
  4775. }
  4776. case GGML_OP_DIAG_MASK_INF:
  4777. {
  4778. // necessary for llama
  4779. if (src0->grad) {
  4780. const int n_past = ((int32_t *) tensor->op_params)[0];
  4781. src0->grad =
  4782. ggml_add_or_set(ctx, src0->grad,
  4783. /* ggml_diag_mask_inf_impl() shouldn't be here */
  4784. /* ref: https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */
  4785. ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
  4786. zero_table, acc_table);
  4787. }
  4788. } break;
  4789. case GGML_OP_DIAG_MASK_ZERO:
  4790. {
  4791. // necessary for llama
  4792. if (src0->grad) {
  4793. const int n_past = ((int32_t *) tensor->op_params)[0];
  4794. src0->grad =
  4795. ggml_add_or_set(ctx, src0->grad,
  4796. ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
  4797. zero_table, acc_table);
  4798. }
  4799. } break;
  4800. case GGML_OP_SOFT_MAX:
  4801. {
  4802. // necessary for llama
  4803. if (src0->grad) {
  4804. src0->grad =
  4805. ggml_add_or_set(ctx, src0->grad,
  4806. ggml_soft_max_back(ctx, tensor->grad, tensor),
  4807. zero_table, acc_table);
  4808. }
  4809. GGML_ASSERT((!src1 || !src1->grad) && "backward pass for softmax mask not implemented");
  4810. } break;
  4811. case GGML_OP_SOFT_MAX_BACK:
  4812. {
  4813. GGML_ABORT("fatal error"); // TODO: not implemented
  4814. }
  4815. case GGML_OP_ROPE:
  4816. {
  4817. // necessary for llama
  4818. if (src0->grad) {
  4819. //const int n_past = ((int32_t *) tensor->op_params)[0];
  4820. const int n_dims = ((int32_t *) tensor->op_params)[1];
  4821. const int mode = ((int32_t *) tensor->op_params)[2];
  4822. //const int n_ctx = ((int32_t *) tensor->op_params)[3];
  4823. const int n_ctx_orig = ((int32_t *) tensor->op_params)[4];
  4824. float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
  4825. memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
  4826. memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
  4827. memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
  4828. memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
  4829. memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
  4830. memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
  4831. src0->grad = ggml_add_or_set(ctx,
  4832. src0->grad,
  4833. ggml_rope_back(ctx,
  4834. tensor->grad,
  4835. src1,
  4836. src2,
  4837. n_dims,
  4838. mode,
  4839. n_ctx_orig,
  4840. freq_base,
  4841. freq_scale,
  4842. ext_factor,
  4843. attn_factor,
  4844. beta_fast,
  4845. beta_slow),
  4846. zero_table, acc_table);
  4847. }
  4848. GGML_ASSERT((!src2 || !src2->grad) && "gradients for freq factors not implemented");
  4849. } break;
  4850. case GGML_OP_ROPE_BACK:
  4851. {
  4852. if (src0->grad) {
  4853. //const int n_past = ((int32_t *) tensor->op_params)[0];
  4854. const int n_dims = ((int32_t *) tensor->op_params)[1];
  4855. const int mode = ((int32_t *) tensor->op_params)[2];
  4856. //const int n_ctx = ((int32_t *) tensor->op_params)[3];
  4857. const int n_ctx_orig = ((int32_t *) tensor->op_params)[4];
  4858. float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
  4859. memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
  4860. memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
  4861. memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
  4862. memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
  4863. memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
  4864. memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
  4865. src0->grad = ggml_add_or_set(ctx,
  4866. src0->grad,
  4867. ggml_rope_impl(ctx,
  4868. tensor->grad,
  4869. src1,
  4870. src2,
  4871. n_dims,
  4872. mode,
  4873. n_ctx_orig,
  4874. freq_base,
  4875. freq_scale,
  4876. ext_factor,
  4877. attn_factor,
  4878. beta_fast,
  4879. beta_slow,
  4880. false),
  4881. zero_table, acc_table);
  4882. }
  4883. } break;
  4884. case GGML_OP_CLAMP:
  4885. {
  4886. GGML_ABORT("fatal error"); // TODO: not implemented
  4887. }
  4888. case GGML_OP_CONV_TRANSPOSE_1D:
  4889. {
  4890. GGML_ABORT("fatal error"); // TODO: not implemented
  4891. }
  4892. case GGML_OP_IM2COL:
  4893. {
  4894. if (src1->grad) {
  4895. const int32_t s0 = ggml_get_op_params_i32(tensor, 0);
  4896. const int32_t s1 = ggml_get_op_params_i32(tensor, 1);
  4897. const int32_t p0 = ggml_get_op_params_i32(tensor, 2);
  4898. const int32_t p1 = ggml_get_op_params_i32(tensor, 3);
  4899. const int32_t d0 = ggml_get_op_params_i32(tensor, 4);
  4900. const int32_t d1 = ggml_get_op_params_i32(tensor, 5);
  4901. const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
  4902. src1->grad = ggml_add_or_set(ctx,
  4903. src1->grad,
  4904. ggml_im2col_back(ctx, src0, tensor->grad, src1->ne, s0, s1, p0, p1, d0, d1, is_2D),
  4905. zero_table, acc_table);
  4906. }
  4907. } break;
  4908. case GGML_OP_IM2COL_BACK:
  4909. {
  4910. GGML_ABORT("fatal error"); // TODO: not implemented
  4911. }
  4912. case GGML_OP_CONV_TRANSPOSE_2D:
  4913. {
  4914. GGML_ABORT("fatal error"); // TODO: not implemented
  4915. }
  4916. case GGML_OP_POOL_1D:
  4917. {
  4918. GGML_ABORT("fatal error"); // TODO: not implemented
  4919. }
  4920. case GGML_OP_POOL_2D:
  4921. {
  4922. if (src0->grad) {
  4923. const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
  4924. const int32_t k0 = ggml_get_op_params_i32(tensor, 1);
  4925. const int32_t k1 = ggml_get_op_params_i32(tensor, 2);
  4926. const int32_t s0 = ggml_get_op_params_i32(tensor, 3);
  4927. const int32_t s1 = ggml_get_op_params_i32(tensor, 4);
  4928. const int32_t p0 = ggml_get_op_params_i32(tensor, 5);
  4929. const int32_t p1 = ggml_get_op_params_i32(tensor, 6);
  4930. src0->grad = ggml_add_or_set(ctx,
  4931. src0->grad,
  4932. ggml_pool_2d_back(ctx, tensor->grad, src0, op, k0, k1, s0, s1, p0, p1),
  4933. zero_table, acc_table);
  4934. }
  4935. } break;
  4936. case GGML_OP_POOL_2D_BACK:
  4937. {
  4938. GGML_ABORT("fatal error"); // TODO: not implemented
  4939. }
  4940. case GGML_OP_UPSCALE:
  4941. {
  4942. GGML_ABORT("fatal error"); // TODO: not implemented
  4943. }
  4944. case GGML_OP_PAD:
  4945. {
  4946. GGML_ABORT("fatal error"); // TODO: not implemented
  4947. }
  4948. case GGML_OP_ARANGE:
  4949. {
  4950. GGML_ABORT("fatal error"); // TODO: not implemented
  4951. }
  4952. case GGML_OP_TIMESTEP_EMBEDDING:
  4953. {
  4954. GGML_ABORT("fatal error"); // TODO: not implemented
  4955. }
  4956. case GGML_OP_ARGSORT:
  4957. {
  4958. GGML_ABORT("fatal error"); // TODO: not implemented
  4959. }
  4960. case GGML_OP_LEAKY_RELU:
  4961. {
  4962. GGML_ABORT("fatal error"); // TODO: not implemented
  4963. }
  4964. case GGML_OP_FLASH_ATTN_EXT:
  4965. {
  4966. GGML_ABORT("FA backward pass not adapted after rework");
  4967. struct ggml_tensor * flash_grad = NULL;
  4968. if (src0->grad || src1->grad || tensor->src[2]->grad) {
  4969. int32_t t = ggml_get_op_params_i32(tensor, 0);
  4970. GGML_ASSERT(t == 0 || t == 1);
  4971. bool masked = t != 0;
  4972. flash_grad =
  4973. ggml_flash_attn_back(ctx,
  4974. src0,
  4975. src1,
  4976. tensor->src[2],
  4977. tensor->grad,
  4978. masked);
  4979. }
  4980. const int64_t elem_q = ggml_nelements(src0);
  4981. const int64_t elem_k = ggml_nelements(src1);
  4982. const int64_t elem_v = ggml_nelements(src2);
  4983. enum ggml_type result_type = flash_grad->type;
  4984. GGML_ASSERT(ggml_blck_size(result_type) == 1);
  4985. const size_t tsize = ggml_type_size(result_type);
  4986. const size_t offs_q = 0;
  4987. const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
  4988. const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
  4989. if (src0->grad) {
  4990. struct ggml_tensor * view_q = ggml_view_1d(ctx, flash_grad, elem_q, offs_q);
  4991. struct ggml_tensor * grad_q = ggml_reshape(ctx, view_q, src0);
  4992. src0->grad = ggml_add_or_set(ctx,
  4993. src0->grad,
  4994. grad_q,
  4995. zero_table, acc_table);
  4996. }
  4997. if (src1->grad) {
  4998. struct ggml_tensor * view_k = ggml_view_1d(ctx, flash_grad, elem_k, offs_k);
  4999. struct ggml_tensor * grad_k = ggml_reshape(ctx, view_k, src1);
  5000. src1->grad = ggml_add_or_set(ctx,
  5001. src1->grad,
  5002. grad_k,
  5003. zero_table, acc_table);
  5004. }
  5005. if (src2->grad) {
  5006. struct ggml_tensor * view_v = ggml_view_1d(ctx, flash_grad, elem_v, offs_v);
  5007. struct ggml_tensor * grad_v = ggml_reshape(ctx, view_v, src2);
  5008. src2->grad = ggml_add_or_set(ctx,
  5009. src2->grad,
  5010. grad_v,
  5011. zero_table, acc_table);
  5012. }
  5013. } break;
  5014. case GGML_OP_FLASH_ATTN_BACK:
  5015. {
  5016. GGML_ABORT("fatal error"); // not supported
  5017. }
  5018. case GGML_OP_SSM_CONV:
  5019. case GGML_OP_SSM_SCAN:
  5020. {
  5021. GGML_ABORT("fatal error"); // TODO: not implemented
  5022. }
  5023. case GGML_OP_WIN_PART:
  5024. case GGML_OP_WIN_UNPART:
  5025. case GGML_OP_UNARY:
  5026. {
  5027. switch (ggml_get_unary_op(tensor)) {
  5028. case GGML_UNARY_OP_ABS:
  5029. {
  5030. if (src0->grad) {
  5031. src0->grad =
  5032. ggml_add_or_set(ctx,
  5033. src0->grad,
  5034. ggml_mul(ctx,
  5035. ggml_sgn(ctx, src0),
  5036. tensor->grad),
  5037. zero_table, acc_table);
  5038. }
  5039. } break;
  5040. case GGML_UNARY_OP_SGN:
  5041. {
  5042. if (src0->grad) {
  5043. // noop
  5044. }
  5045. } break;
  5046. case GGML_UNARY_OP_NEG:
  5047. {
  5048. if (src0->grad) {
  5049. src0->grad = ggml_sub_or_set(ctx, src0->grad, tensor->grad, zero_table, acc_table);
  5050. }
  5051. } break;
  5052. case GGML_UNARY_OP_STEP:
  5053. {
  5054. if (src0->grad) {
  5055. // noop
  5056. }
  5057. } break;
  5058. case GGML_UNARY_OP_TANH:
  5059. {
  5060. GGML_ABORT("fatal error"); // TODO: not implemented
  5061. }
  5062. case GGML_UNARY_OP_ELU:
  5063. {
  5064. GGML_ABORT("fatal error"); // TODO: not implemented
  5065. }
  5066. case GGML_UNARY_OP_RELU:
  5067. {
  5068. if (src0->grad) {
  5069. src0->grad = ggml_add_or_set(ctx,
  5070. src0->grad,
  5071. ggml_mul(ctx,
  5072. ggml_step(ctx, src0),
  5073. tensor->grad),
  5074. zero_table, acc_table);
  5075. }
  5076. } break;
  5077. case GGML_UNARY_OP_SIGMOID:
  5078. {
  5079. GGML_ABORT("fatal error"); // TODO: not implemented
  5080. }
  5081. case GGML_UNARY_OP_GELU:
  5082. {
  5083. GGML_ABORT("fatal error"); // TODO: not implemented
  5084. }
  5085. case GGML_UNARY_OP_GELU_QUICK:
  5086. {
  5087. GGML_ABORT("fatal error"); // TODO: not implemented
  5088. }
  5089. case GGML_UNARY_OP_SILU:
  5090. {
  5091. // necessary for llama
  5092. if (src0->grad) {
  5093. src0->grad = ggml_add_or_set(ctx,
  5094. src0->grad,
  5095. ggml_silu_back(ctx, src0, tensor->grad),
  5096. zero_table, acc_table);
  5097. }
  5098. } break;
  5099. case GGML_UNARY_OP_EXP:
  5100. {
  5101. if (src0->grad) {
  5102. src0->grad = ggml_add_or_set(ctx,
  5103. src0->grad,
  5104. ggml_mul(ctx, tensor, tensor->grad),
  5105. zero_table, acc_table);
  5106. }
  5107. } break;
  5108. default:
  5109. GGML_ABORT("fatal error");
  5110. }
  5111. } break;
  5112. case GGML_OP_GET_REL_POS:
  5113. case GGML_OP_ADD_REL_POS:
  5114. case GGML_OP_RWKV_WKV6:
  5115. case GGML_OP_MAP_UNARY:
  5116. case GGML_OP_MAP_BINARY:
  5117. case GGML_OP_MAP_CUSTOM1_F32:
  5118. case GGML_OP_MAP_CUSTOM2_F32:
  5119. case GGML_OP_MAP_CUSTOM3_F32:
  5120. case GGML_OP_MAP_CUSTOM1:
  5121. case GGML_OP_MAP_CUSTOM2:
  5122. case GGML_OP_MAP_CUSTOM3:
  5123. {
  5124. GGML_ABORT("fatal error"); // not supported
  5125. }
  5126. case GGML_OP_CROSS_ENTROPY_LOSS:
  5127. {
  5128. if (src0->grad) {
  5129. src0->grad = ggml_add_or_set(ctx,
  5130. src0->grad,
  5131. ggml_cross_entropy_loss_back(ctx,
  5132. src0,
  5133. src1,
  5134. tensor->grad),
  5135. zero_table, acc_table);
  5136. }
  5137. GGML_ASSERT(!src1->grad && "backward pass for labels not implemented");
  5138. } break;
  5139. case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
  5140. {
  5141. GGML_ABORT("fatal error"); // not supported
  5142. }
  5143. case GGML_OP_OPT_STEP_ADAMW:
  5144. {
  5145. GGML_ABORT("fatal error"); // not supported
  5146. }
  5147. case GGML_OP_NONE:
  5148. {
  5149. // nop
  5150. } break;
  5151. case GGML_OP_COUNT:
  5152. {
  5153. GGML_ABORT("fatal error");
  5154. }
  5155. }
  5156. for (int i = 0; i < GGML_MAX_SRC; ++i) {
  5157. if (tensor->src[i] && tensor->src[i]->grad) {
  5158. GGML_ASSERT(ggml_are_same_shape(tensor->src[i], tensor->src[i]->grad));
  5159. }
  5160. }
  5161. }
  5162. static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
  5163. if (node->grad == NULL) {
  5164. // this usually happens when we generate intermediate nodes from constants in the backward pass
  5165. // it can also happen during forward pass, if the user performs computations with constants
  5166. if (node->op != GGML_OP_NONE) {
  5167. //GGML_PRINT_DEBUG("%s: warning: node %p has no grad, but op %d\n", __func__, (void *) node, node->op);
  5168. }
  5169. }
  5170. // check if already visited
  5171. if (ggml_hash_insert(&cgraph->visited_hash_set, node) == GGML_HASHSET_ALREADY_EXISTS) {
  5172. return;
  5173. }
  5174. for (int i = 0; i < GGML_MAX_SRC; ++i) {
  5175. const int k =
  5176. (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
  5177. (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
  5178. /* unknown order, just fall back to using i*/ i;
  5179. if (node->src[k]) {
  5180. ggml_visit_parents(cgraph, node->src[k]);
  5181. }
  5182. }
  5183. if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
  5184. // reached a leaf node, not part of the gradient graph (e.g. a constant)
  5185. GGML_ASSERT(cgraph->n_leafs < cgraph->size);
  5186. if (strlen(node->name) == 0) {
  5187. ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
  5188. }
  5189. cgraph->leafs[cgraph->n_leafs] = node;
  5190. cgraph->n_leafs++;
  5191. } else {
  5192. GGML_ASSERT(cgraph->n_nodes < cgraph->size);
  5193. if (strlen(node->name) == 0) {
  5194. ggml_format_name(node, "node_%d", cgraph->n_nodes);
  5195. }
  5196. cgraph->nodes[cgraph->n_nodes] = node;
  5197. cgraph->n_nodes++;
  5198. }
  5199. }
  5200. static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
  5201. if (!expand) {
  5202. // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
  5203. ggml_graph_clear(cgraph);
  5204. }
  5205. const int n0 = cgraph->n_nodes;
  5206. ggml_visit_parents(cgraph, tensor);
  5207. const int n_new = cgraph->n_nodes - n0;
  5208. GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
  5209. if (n_new > 0) {
  5210. // the last added node should always be starting point
  5211. GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
  5212. }
  5213. }
  5214. void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
  5215. ggml_build_forward_impl(cgraph, tensor, true);
  5216. }
  5217. void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool accumulate) {
  5218. GGML_ASSERT(gf->n_nodes > 0);
  5219. GGML_ASSERT(gf->grads);
  5220. for (int i = 0; i < gf->n_nodes; ++i) {
  5221. struct ggml_tensor * node = gf->nodes[i];
  5222. if (node->type == GGML_TYPE_I32) {
  5223. continue;
  5224. }
  5225. bool needs_grad = node->flags & GGML_TENSOR_FLAG_PARAM;
  5226. bool ignore_src[GGML_MAX_SRC] = {false};
  5227. switch (node->op) {
  5228. // gradients in node->src[0] for one reason or another have no effect on output gradients
  5229. case GGML_OP_IM2COL: // only used for its shape
  5230. case GGML_OP_IM2COL_BACK: // same as IM2COL
  5231. ignore_src[0] = true;
  5232. break;
  5233. case GGML_OP_UNARY: {
  5234. const enum ggml_unary_op uop = ggml_get_unary_op(node);
  5235. // SGN and STEP unary ops are piecewise constant
  5236. if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
  5237. ignore_src[0] = true;
  5238. }
  5239. } break;
  5240. // gradients in node->src[1] for one reason or another have no effect on output gradients
  5241. case GGML_OP_CPY: // gradients in CPY target are irrelevant
  5242. case GGML_OP_GET_ROWS: // row indices not differentiable
  5243. case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
  5244. case GGML_OP_ROPE: // positions not differentiable
  5245. ignore_src[1] = true;
  5246. break;
  5247. default:
  5248. break;
  5249. }
  5250. for (int j = 0; j < GGML_MAX_SRC; ++j) {
  5251. if (!node->src[j] || !node->src[j]->grad || ignore_src[j]) {
  5252. continue;
  5253. }
  5254. GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
  5255. needs_grad = true;
  5256. break;
  5257. }
  5258. if (!needs_grad) {
  5259. continue;
  5260. }
  5261. // inplace operations are currently not supported
  5262. GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
  5263. node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
  5264. // create a new tensor with the same type and shape as the node and set it as grad
  5265. node->grad = ggml_dup_tensor(ctx, node);
  5266. }
  5267. // keep tables of original gradients for replacement/accumulation logic
  5268. struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size);
  5269. struct ggml_hash_set acc_table = ggml_hash_set_new(gf->size);
  5270. for (int i = 0; i < gf->n_nodes; i++) {
  5271. struct ggml_tensor * node = gf->nodes[i];
  5272. if (node->grad) {
  5273. {
  5274. const size_t insert_result = ggml_hash_insert(&zero_table, node->grad);
  5275. GGML_ASSERT(insert_result != GGML_HASHSET_FULL);
  5276. GGML_ASSERT(insert_result != GGML_HASHSET_ALREADY_EXISTS);
  5277. }
  5278. // only gradients of trainable parameters should be accumulated
  5279. if (accumulate && (node->flags & GGML_TENSOR_FLAG_PARAM)) {
  5280. const size_t insert_result = ggml_hash_insert(&acc_table, node->grad);
  5281. GGML_ASSERT(insert_result != GGML_HASHSET_FULL);
  5282. GGML_ASSERT(insert_result != GGML_HASHSET_ALREADY_EXISTS);
  5283. }
  5284. }
  5285. }
  5286. for (int i = gf->n_nodes - 1; i >= 0; i--) {
  5287. struct ggml_tensor * node = gf->nodes[i];
  5288. // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
  5289. // use allocator to automatically make inplace operations
  5290. if (node->grad) {
  5291. ggml_compute_backward(ctx, node, &zero_table, &acc_table);
  5292. }
  5293. }
  5294. for (int i = 0; i < gf->n_nodes; i++) {
  5295. struct ggml_tensor * node = gf->nodes[i];
  5296. if (node->flags & GGML_TENSOR_FLAG_PARAM) {
  5297. GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
  5298. ggml_build_forward_expand(gb, node->grad);
  5299. }
  5300. }
  5301. ggml_hash_set_free(&zero_table);
  5302. ggml_hash_set_free(&acc_table);
  5303. }
  5304. void ggml_build_opt_adamw(
  5305. struct ggml_context * ctx,
  5306. struct ggml_cgraph * gf,
  5307. struct ggml_cgraph * gb,
  5308. float alpha,
  5309. float beta1,
  5310. float beta2,
  5311. float eps,
  5312. float wd) {
  5313. for (int i = 0; i < gf->n_nodes; i++) {
  5314. struct ggml_tensor * node = gf->nodes[i];
  5315. if (node->flags & GGML_TENSOR_FLAG_PARAM) {
  5316. GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
  5317. struct ggml_tensor * opt_step = ggml_opt_step_adamw(ctx, node, node->grad, alpha, beta1, beta2, eps, wd);
  5318. ggml_build_forward_expand(gb, opt_step);
  5319. }
  5320. }
  5321. }
  5322. static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
  5323. void * ptr = *p;
  5324. ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
  5325. *p = (void *) ((char *) ptr + size);
  5326. return ptr;
  5327. }
  5328. static size_t ggml_graph_nbytes(size_t size, bool grads) {
  5329. size_t hash_size = ggml_hash_size(size * 2);
  5330. void * p = 0;
  5331. incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
  5332. incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
  5333. incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
  5334. incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
  5335. if (grads) {
  5336. incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
  5337. }
  5338. incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
  5339. size_t nbytes = (size_t) p;
  5340. return nbytes;
  5341. }
  5342. size_t ggml_graph_overhead_custom(size_t size, bool grads) {
  5343. return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
  5344. }
  5345. size_t ggml_graph_overhead(void) {
  5346. return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
  5347. }
  5348. struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
  5349. const size_t obj_size = ggml_graph_nbytes(size, grads);
  5350. struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
  5351. struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
  5352. // the size of the hash table is doubled since it needs to hold both nodes and leafs
  5353. size_t hash_size = ggml_hash_size(size * 2);
  5354. void * p = cgraph + 1;
  5355. struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  5356. struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  5357. struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  5358. struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
  5359. ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
  5360. // check that we allocated the correct amount of memory
  5361. assert(obj_size == (size_t)((char *)p - (char *)cgraph));
  5362. *cgraph = (struct ggml_cgraph) {
  5363. /*.size =*/ size,
  5364. /*.n_nodes =*/ 0,
  5365. /*.n_leafs =*/ 0,
  5366. /*.nodes =*/ nodes_ptr,
  5367. /*.grads =*/ grads_ptr,
  5368. /*.leafs =*/ leafs_ptr,
  5369. /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
  5370. /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
  5371. };
  5372. ggml_hash_set_reset(&cgraph->visited_hash_set);
  5373. return cgraph;
  5374. }
  5375. struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
  5376. return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
  5377. }
  5378. struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
  5379. struct ggml_cgraph cgraph = {
  5380. /*.size =*/ 0,
  5381. /*.n_nodes =*/ i1 - i0,
  5382. /*.n_leafs =*/ 0,
  5383. /*.nodes =*/ cgraph0->nodes + i0,
  5384. /*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL,
  5385. /*.leafs =*/ NULL,
  5386. /*.hash_table =*/ { 0, NULL, NULL },
  5387. /*.order =*/ cgraph0->order,
  5388. };
  5389. return cgraph;
  5390. }
  5391. void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
  5392. GGML_ASSERT(dst->size >= src->n_leafs);
  5393. GGML_ASSERT(dst->size >= src->n_nodes);
  5394. GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
  5395. dst->n_leafs = src->n_leafs;
  5396. dst->n_nodes = src->n_nodes;
  5397. dst->order = src->order;
  5398. for (int i = 0; i < src->n_leafs; ++i) {
  5399. dst->leafs[i] = src->leafs[i];
  5400. }
  5401. for (int i = 0; i < src->n_nodes; ++i) {
  5402. dst->nodes[i] = src->nodes[i];
  5403. }
  5404. if (src->grads) {
  5405. GGML_ASSERT(dst->grads != NULL);
  5406. for (int i = 0; i < src->n_nodes; ++i) {
  5407. dst->grads[i] = src->grads[i];
  5408. }
  5409. }
  5410. for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
  5411. // copy all hashset keys (tensors) that are in use
  5412. if (ggml_bitset_get(src->visited_hash_set.used, i)) {
  5413. ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
  5414. }
  5415. }
  5416. }
  5417. struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
  5418. struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads != NULL);
  5419. ggml_graph_cpy(cgraph, result);
  5420. return result;
  5421. }
  5422. struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
  5423. if (ggml_is_empty(tensor)) {
  5424. return tensor;
  5425. }
  5426. if (tensor->buffer) {
  5427. ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
  5428. } else {
  5429. GGML_ASSERT(tensor->data);
  5430. memset(tensor->data, 0, ggml_nbytes(tensor));
  5431. }
  5432. return tensor;
  5433. }
  5434. void ggml_graph_reset(struct ggml_cgraph * cgraph) {
  5435. GGML_ASSERT(cgraph->grads != NULL);
  5436. for (int i = 0; i < cgraph->n_nodes; i++) {
  5437. struct ggml_tensor * node = cgraph->nodes[i];
  5438. // initial gradients of loss should be 1, 0 otherwise
  5439. if (node->grad) {
  5440. if (node->flags & GGML_TENSOR_FLAG_LOSS) {
  5441. GGML_ASSERT(node->grad->buffer);
  5442. GGML_ASSERT(node->type == GGML_TYPE_F32);
  5443. GGML_ASSERT(ggml_is_scalar(node));
  5444. const float onef = 1.0f;
  5445. ggml_backend_tensor_set(node->grad, &onef, 0, ggml_nbytes(node->grad));
  5446. } else {
  5447. ggml_set_zero(node->grad);
  5448. }
  5449. }
  5450. GGML_ASSERT(node);
  5451. if (node->op == GGML_OP_OPT_STEP_ADAMW) {
  5452. // set iteration to 1 and clear momenta
  5453. ggml_set_op_params_i32(node, 0, 1);
  5454. ggml_set_zero(node->src[2]);
  5455. ggml_set_zero(node->src[3]);
  5456. }
  5457. }
  5458. }
  5459. void ggml_graph_clear(struct ggml_cgraph * cgraph) {
  5460. cgraph->n_leafs = 0;
  5461. cgraph->n_nodes = 0;
  5462. ggml_hash_set_reset(&cgraph->visited_hash_set);
  5463. }
  5464. int ggml_graph_size(struct ggml_cgraph * cgraph) {
  5465. return cgraph->size;
  5466. }
  5467. struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
  5468. if (i < 0) {
  5469. GGML_ASSERT(cgraph->n_nodes + i >= 0);
  5470. return cgraph->nodes[cgraph->n_nodes + i];
  5471. }
  5472. GGML_ASSERT(i < cgraph->n_nodes);
  5473. return cgraph->nodes[i];
  5474. }
  5475. struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
  5476. return cgraph->nodes;
  5477. }
  5478. int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
  5479. return cgraph->n_nodes;
  5480. }
  5481. void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
  5482. GGML_ASSERT(cgraph->size > cgraph->n_nodes);
  5483. cgraph->nodes[cgraph->n_nodes] = tensor;
  5484. cgraph->n_nodes++;
  5485. }
  5486. struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
  5487. for (int i = 0; i < cgraph->n_leafs; i++) {
  5488. struct ggml_tensor * leaf = cgraph->leafs[i];
  5489. if (strcmp(leaf->name, name) == 0) {
  5490. return leaf;
  5491. }
  5492. }
  5493. for (int i = 0; i < cgraph->n_nodes; i++) {
  5494. struct ggml_tensor * node = cgraph->nodes[i];
  5495. if (strcmp(node->name, name) == 0) {
  5496. return node;
  5497. }
  5498. }
  5499. return NULL;
  5500. }
  5501. void ggml_graph_print(const struct ggml_cgraph * cgraph) {
  5502. GGML_LOG_INFO("=== GRAPH ===\n");
  5503. GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
  5504. for (int i = 0; i < cgraph->n_nodes; i++) {
  5505. struct ggml_tensor * node = cgraph->nodes[i];
  5506. GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
  5507. i,
  5508. node->ne[0], node->ne[1], node->ne[2],
  5509. ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ");
  5510. }
  5511. GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
  5512. for (int i = 0; i < cgraph->n_leafs; i++) {
  5513. struct ggml_tensor * node = cgraph->leafs[i];
  5514. GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
  5515. i,
  5516. node->ne[0], node->ne[1],
  5517. ggml_op_name(node->op),
  5518. ggml_get_name(node));
  5519. }
  5520. GGML_LOG_INFO("========================================\n");
  5521. }
  5522. // check if node is part of the graph
  5523. static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5524. if (cgraph == NULL) {
  5525. return true;
  5526. }
  5527. for (int i = 0; i < cgraph->n_nodes; i++) {
  5528. if (cgraph->nodes[i] == node) {
  5529. return true;
  5530. }
  5531. }
  5532. return false;
  5533. }
  5534. static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5535. for (int i = 0; i < cgraph->n_nodes; i++) {
  5536. struct ggml_tensor * parent = cgraph->nodes[i];
  5537. if (parent->grad == node) {
  5538. return parent;
  5539. }
  5540. }
  5541. return NULL;
  5542. }
  5543. static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
  5544. struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
  5545. struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
  5546. fprintf(fp, " \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
  5547. gparent0 ? (void *) gparent0 : (void *) parent,
  5548. gparent0 ? "g" : "x",
  5549. gparent ? (void *) gparent : (void *) node,
  5550. gparent ? "g" : "x",
  5551. gparent ? "empty" : "vee",
  5552. gparent ? "dashed" : "solid",
  5553. label);
  5554. }
  5555. static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
  5556. fprintf(fp, " \"%p\":%s -> \"%p\":%s [ label = \"%s\"; ]\n",
  5557. (void *) parent, "x",
  5558. (void *) node, "x",
  5559. label);
  5560. }
  5561. void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
  5562. char color[16];
  5563. FILE * fp = ggml_fopen(filename, "w");
  5564. GGML_ASSERT(fp);
  5565. fprintf(fp, "digraph G {\n");
  5566. fprintf(fp, " newrank = true;\n");
  5567. fprintf(fp, " rankdir = TB;\n");
  5568. for (int i = 0; i < gb->n_nodes; i++) {
  5569. struct ggml_tensor * node = gb->nodes[i];
  5570. if (ggml_graph_get_parent(gb, node) != NULL) {
  5571. continue;
  5572. }
  5573. if (node->flags & GGML_TENSOR_FLAG_PARAM) {
  5574. snprintf(color, sizeof(color), "yellow");
  5575. } else if (node->grad) {
  5576. if (ggml_graph_find(gf, node)) {
  5577. snprintf(color, sizeof(color), "green");
  5578. } else {
  5579. snprintf(color, sizeof(color), "lightblue");
  5580. }
  5581. } else {
  5582. snprintf(color, sizeof(color), "white");
  5583. }
  5584. fprintf(fp, " \"%p\" [ "
  5585. "style = filled; fillcolor = %s; shape = record; "
  5586. "label=\"",
  5587. (void *) node, color);
  5588. if (strlen(node->name) > 0) {
  5589. fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
  5590. } else {
  5591. fprintf(fp, "(%s)|", ggml_type_name(node->type));
  5592. }
  5593. if (ggml_is_matrix(node)) {
  5594. fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
  5595. } else {
  5596. fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
  5597. }
  5598. if (node->grad) {
  5599. fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(node->grad->op));
  5600. } else {
  5601. fprintf(fp, "\"; ]\n");
  5602. }
  5603. }
  5604. for (int i = 0; i < gb->n_leafs; i++) {
  5605. struct ggml_tensor * node = gb->leafs[i];
  5606. snprintf(color, sizeof(color), "pink");
  5607. fprintf(fp, " \"%p\" [ "
  5608. "style = filled; fillcolor = %s; shape = record; "
  5609. "label=\"<x>",
  5610. (void *) node, color);
  5611. if (strlen(node->name) > 0) {
  5612. fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
  5613. } else {
  5614. fprintf(fp, "(%s)|", ggml_type_name(node->type));
  5615. }
  5616. fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
  5617. if (ggml_nelements(node) < 5 && node->data != NULL) {
  5618. fprintf(fp, " | (");
  5619. for (int j = 0; j < ggml_nelements(node); j++) {
  5620. // FIXME: use ggml-backend to obtain the tensor data
  5621. //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
  5622. // fprintf(fp, "%d", ggml_get_i32_1d(node, j));
  5623. //}
  5624. //else if (node->type == GGML_TYPE_F32 ||
  5625. // node->type == GGML_TYPE_F16 ||
  5626. // node->type == GGML_TYPE_BF16) {
  5627. // fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
  5628. //}
  5629. //else
  5630. {
  5631. fprintf(fp, "#");
  5632. }
  5633. if (j < ggml_nelements(node) - 1) {
  5634. fprintf(fp, ", ");
  5635. }
  5636. }
  5637. fprintf(fp, ")");
  5638. }
  5639. fprintf(fp, "\"; ]\n");
  5640. }
  5641. for (int i = 0; i < gb->n_nodes; i++) {
  5642. struct ggml_tensor * node = gb->nodes[i];
  5643. for (int j = 0; j < GGML_MAX_SRC; j++) {
  5644. if (node->src[j]) {
  5645. char label[16];
  5646. snprintf(label, sizeof(label), "src %d", j);
  5647. ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
  5648. }
  5649. }
  5650. }
  5651. for (int i = 0; i < gb->n_leafs; i++) {
  5652. struct ggml_tensor * node = gb->leafs[i];
  5653. for (int j = 0; j < GGML_MAX_SRC; j++) {
  5654. if (node->src[j]) {
  5655. char label[16];
  5656. snprintf(label, sizeof(label), "src %d", j);
  5657. ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
  5658. }
  5659. }
  5660. }
  5661. fprintf(fp, "}\n");
  5662. fclose(fp);
  5663. GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
  5664. }
  5665. ////////////////////////////////////////////////////////////////////////////////
  5666. void ggml_set_input(struct ggml_tensor * tensor) {
  5667. tensor->flags |= GGML_TENSOR_FLAG_INPUT;
  5668. }
  5669. void ggml_set_output(struct ggml_tensor * tensor) {
  5670. tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
  5671. }
  5672. void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor) {
  5673. GGML_UNUSED(ctx); // TODO: remove this parameter
  5674. tensor->flags |= GGML_TENSOR_FLAG_PARAM;
  5675. }
  5676. void ggml_set_loss(struct ggml_tensor * tensor) {
  5677. GGML_ASSERT(ggml_is_scalar(tensor));
  5678. GGML_ASSERT(tensor->type == GGML_TYPE_F32);
  5679. tensor->flags |= GGML_TENSOR_FLAG_LOSS;
  5680. }
  5681. ////////////////////////////////////////////////////////////////////////////////
  5682. void ggml_quantize_init(enum ggml_type type) {
  5683. ggml_critical_section_start();
  5684. switch (type) {
  5685. case GGML_TYPE_IQ2_XXS:
  5686. case GGML_TYPE_IQ2_XS:
  5687. case GGML_TYPE_IQ2_S:
  5688. case GGML_TYPE_IQ1_S:
  5689. case GGML_TYPE_IQ1_M: iq2xs_init_impl(type); break;
  5690. case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
  5691. case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break;
  5692. default: // nothing
  5693. break;
  5694. }
  5695. ggml_critical_section_end();
  5696. }
  5697. void ggml_quantize_free(void) {
  5698. ggml_critical_section_start();
  5699. iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
  5700. iq2xs_free_impl(GGML_TYPE_IQ2_XS);
  5701. iq2xs_free_impl(GGML_TYPE_IQ1_S);
  5702. iq3xs_free_impl(256);
  5703. ggml_critical_section_end();
  5704. }
  5705. bool ggml_quantize_requires_imatrix(enum ggml_type type) {
  5706. return
  5707. type == GGML_TYPE_IQ2_XXS ||
  5708. type == GGML_TYPE_IQ2_XS ||
  5709. type == GGML_TYPE_IQ1_S;// ||
  5710. //type == GGML_TYPE_IQ1_M;
  5711. }
  5712. size_t ggml_quantize_chunk(
  5713. enum ggml_type type,
  5714. const float * src,
  5715. void * dst,
  5716. int64_t start,
  5717. int64_t nrows,
  5718. int64_t n_per_row,
  5719. const float * imatrix) {
  5720. const int64_t n = (int64_t) nrows * n_per_row;
  5721. if (ggml_quantize_requires_imatrix(type)) {
  5722. GGML_ASSERT(imatrix != NULL);
  5723. }
  5724. GGML_ASSERT(start % type_traits[type].blck_size == 0);
  5725. GGML_ASSERT(start % n_per_row == 0);
  5726. ggml_quantize_init(type); // this is noop if already initialized
  5727. const size_t start_row = start / n_per_row;
  5728. const size_t row_size = ggml_row_size(type, n_per_row);
  5729. size_t result = 0;
  5730. switch (type) {
  5731. case GGML_TYPE_Q4_0: result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5732. case GGML_TYPE_Q4_1: result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5733. case GGML_TYPE_Q5_0: result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5734. case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5735. case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5736. case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5737. case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5738. case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5739. case GGML_TYPE_Q5_K: result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5740. case GGML_TYPE_Q6_K: result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5741. case GGML_TYPE_TQ1_0: result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5742. case GGML_TYPE_TQ2_0: result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5743. case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5744. case GGML_TYPE_IQ2_XS: result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5745. case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5746. case GGML_TYPE_IQ3_S: result = quantize_iq3_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5747. case GGML_TYPE_IQ2_S: result = quantize_iq2_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5748. case GGML_TYPE_IQ1_S: result = quantize_iq1_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5749. case GGML_TYPE_IQ1_M: result = quantize_iq1_m (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5750. case GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5751. case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5752. case GGML_TYPE_Q4_0_4_4: result = quantize_q4_0_4x4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5753. case GGML_TYPE_Q4_0_4_8: result = quantize_q4_0_4x8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5754. case GGML_TYPE_Q4_0_8_8: result = quantize_q4_0_8x8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5755. case GGML_TYPE_F16:
  5756. {
  5757. size_t elemsize = sizeof(ggml_fp16_t);
  5758. ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
  5759. result = n * elemsize;
  5760. } break;
  5761. case GGML_TYPE_BF16:
  5762. {
  5763. size_t elemsize = sizeof(ggml_bf16_t);
  5764. ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
  5765. result = n * elemsize;
  5766. } break;
  5767. case GGML_TYPE_F32:
  5768. {
  5769. size_t elemsize = sizeof(float);
  5770. result = n * elemsize;
  5771. memcpy((uint8_t *)dst + start * elemsize, src + start, result);
  5772. } break;
  5773. default:
  5774. assert(false);
  5775. }
  5776. GGML_ASSERT(result == nrows * row_size);
  5777. return result;
  5778. }
  5779. ////////////////////////////////////////////////////////////////////////////////
  5780. struct gguf_str {
  5781. uint64_t n; // GGUFv2
  5782. char * data;
  5783. };
  5784. static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
  5785. [GGUF_TYPE_UINT8] = sizeof(uint8_t),
  5786. [GGUF_TYPE_INT8] = sizeof(int8_t),
  5787. [GGUF_TYPE_UINT16] = sizeof(uint16_t),
  5788. [GGUF_TYPE_INT16] = sizeof(int16_t),
  5789. [GGUF_TYPE_UINT32] = sizeof(uint32_t),
  5790. [GGUF_TYPE_INT32] = sizeof(int32_t),
  5791. [GGUF_TYPE_FLOAT32] = sizeof(float),
  5792. [GGUF_TYPE_BOOL] = sizeof(bool),
  5793. [GGUF_TYPE_STRING] = sizeof(struct gguf_str),
  5794. [GGUF_TYPE_UINT64] = sizeof(uint64_t),
  5795. [GGUF_TYPE_INT64] = sizeof(int64_t),
  5796. [GGUF_TYPE_FLOAT64] = sizeof(double),
  5797. [GGUF_TYPE_ARRAY] = 0, // undefined
  5798. };
  5799. static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
  5800. static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
  5801. [GGUF_TYPE_UINT8] = "u8",
  5802. [GGUF_TYPE_INT8] = "i8",
  5803. [GGUF_TYPE_UINT16] = "u16",
  5804. [GGUF_TYPE_INT16] = "i16",
  5805. [GGUF_TYPE_UINT32] = "u32",
  5806. [GGUF_TYPE_INT32] = "i32",
  5807. [GGUF_TYPE_FLOAT32] = "f32",
  5808. [GGUF_TYPE_BOOL] = "bool",
  5809. [GGUF_TYPE_STRING] = "str",
  5810. [GGUF_TYPE_ARRAY] = "arr",
  5811. [GGUF_TYPE_UINT64] = "u64",
  5812. [GGUF_TYPE_INT64] = "i64",
  5813. [GGUF_TYPE_FLOAT64] = "f64",
  5814. };
  5815. static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
  5816. union gguf_value {
  5817. uint8_t uint8;
  5818. int8_t int8;
  5819. uint16_t uint16;
  5820. int16_t int16;
  5821. uint32_t uint32;
  5822. int32_t int32;
  5823. float float32;
  5824. uint64_t uint64;
  5825. int64_t int64;
  5826. double float64;
  5827. bool bool_;
  5828. struct gguf_str str;
  5829. struct {
  5830. enum gguf_type type;
  5831. uint64_t n; // GGUFv2
  5832. void * data;
  5833. } arr;
  5834. };
  5835. struct gguf_kv {
  5836. struct gguf_str key;
  5837. enum gguf_type type;
  5838. union gguf_value value;
  5839. };
  5840. struct gguf_header {
  5841. char magic[4];
  5842. uint32_t version;
  5843. uint64_t n_tensors; // GGUFv2
  5844. uint64_t n_kv; // GGUFv2
  5845. };
  5846. struct gguf_tensor_info {
  5847. struct gguf_str name;
  5848. uint32_t n_dims;
  5849. uint64_t ne[GGML_MAX_DIMS];
  5850. enum ggml_type type;
  5851. uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
  5852. // for writing API
  5853. const void * data;
  5854. size_t size;
  5855. };
  5856. struct gguf_context {
  5857. struct gguf_header header;
  5858. struct gguf_kv * kv;
  5859. struct gguf_tensor_info * infos;
  5860. size_t alignment;
  5861. size_t offset; // offset of `data` from beginning of file
  5862. size_t size; // size of `data` in bytes
  5863. //uint8_t * padding;
  5864. void * data;
  5865. };
  5866. static size_t gguf_type_size(enum gguf_type type) {
  5867. GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
  5868. return GGUF_TYPE_SIZE[type];
  5869. }
  5870. static bool gguf_tensor_info_sanitize(struct gguf_tensor_info * info) {
  5871. if (info->n_dims > GGML_MAX_DIMS) {
  5872. fprintf(stderr, "%s: invalid number of dimensions (%" PRIu32 ")\n", __func__, info->n_dims);
  5873. return false;
  5874. }
  5875. if (info->type < 0 || info->type >= GGML_TYPE_COUNT) {
  5876. fprintf(stderr, "%s: invalid type (%d)\n", __func__, info->type);
  5877. return false;
  5878. }
  5879. if (strlen(info->name.data) >= GGML_MAX_NAME) {
  5880. fprintf(stderr, "%s: tensor '%s' name is too long\n", __func__, info->name.data);
  5881. return false;
  5882. }
  5883. for (uint32_t i = 0; i < info->n_dims; ++i) {
  5884. if (info->ne[i] <= 0) {
  5885. fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[i]);
  5886. return false;
  5887. }
  5888. }
  5889. // prevent overflow for total number of elements
  5890. if (INT64_MAX/info->ne[1] <= info->ne[0]) {
  5891. fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[1]);
  5892. return false;
  5893. }
  5894. if (INT64_MAX/info->ne[2] <= info->ne[0]*info->ne[1]) {
  5895. fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[2]);
  5896. return false;
  5897. }
  5898. if (INT64_MAX/info->ne[3] <= info->ne[0]*info->ne[1]*info->ne[2]) {
  5899. fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[3]);
  5900. return false;
  5901. }
  5902. return true;
  5903. }
  5904. static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
  5905. const size_t n = fread(dst, 1, size, file);
  5906. *offset += n;
  5907. return n == size;
  5908. }
  5909. static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
  5910. p->n = 0;
  5911. p->data = NULL;
  5912. bool ok = true;
  5913. ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset);
  5914. // early exit if string length is invalid, prevents from integer overflow
  5915. if (p->n == SIZE_MAX) {
  5916. fprintf(stderr, "%s: invalid string length (%" PRIu64 ")\n", __func__, p->n);
  5917. return false;
  5918. }
  5919. p->data = calloc(p->n + 1, 1);
  5920. if (!p->data) {
  5921. fprintf(stderr, "%s: failed to allocate memory for string of length %" PRIu64 "\n", __func__, p->n);
  5922. return false;
  5923. }
  5924. ok = ok && gguf_fread_el(file, p->data, p->n, offset);
  5925. return ok;
  5926. }
  5927. static void gguf_free_kv(struct gguf_kv * kv) {
  5928. if (kv->key.data) {
  5929. GGML_FREE(kv->key.data);
  5930. }
  5931. if (kv->type == GGUF_TYPE_STRING) {
  5932. if (kv->value.str.data) {
  5933. GGML_FREE(kv->value.str.data);
  5934. }
  5935. }
  5936. if (kv->type == GGUF_TYPE_ARRAY) {
  5937. if (kv->value.arr.data) {
  5938. if (kv->value.arr.type == GGUF_TYPE_STRING) {
  5939. for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
  5940. struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
  5941. if (str->data) {
  5942. GGML_FREE(str->data);
  5943. }
  5944. }
  5945. }
  5946. GGML_FREE(kv->value.arr.data);
  5947. }
  5948. }
  5949. }
  5950. struct gguf_context * gguf_init_empty(void) {
  5951. struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
  5952. if (!ctx) {
  5953. fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
  5954. return NULL;
  5955. }
  5956. memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
  5957. ctx->header.version = GGUF_VERSION;
  5958. ctx->header.n_tensors = 0;
  5959. ctx->header.n_kv = 0;
  5960. ctx->kv = NULL;
  5961. ctx->infos = NULL;
  5962. ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
  5963. ctx->offset = 0;
  5964. ctx->size = 0;
  5965. ctx->data = NULL;
  5966. return ctx;
  5967. }
  5968. struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
  5969. FILE * file = ggml_fopen(fname, "rb");
  5970. if (!file) {
  5971. fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
  5972. return NULL;
  5973. }
  5974. // offset from start of file
  5975. size_t offset = 0;
  5976. char magic[4];
  5977. // check the magic before making allocations
  5978. {
  5979. gguf_fread_el(file, &magic, sizeof(magic), &offset);
  5980. for (uint32_t i = 0; i < sizeof(magic); i++) {
  5981. if (magic[i] != GGUF_MAGIC[i]) {
  5982. fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
  5983. fclose(file);
  5984. return NULL;
  5985. }
  5986. }
  5987. }
  5988. bool ok = true;
  5989. struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
  5990. if (!ctx) {
  5991. fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
  5992. fclose(file);
  5993. return NULL;
  5994. }
  5995. // read the header
  5996. {
  5997. strncpy(ctx->header.magic, magic, 4);
  5998. ctx->kv = NULL;
  5999. ctx->infos = NULL;
  6000. ctx->data = NULL;
  6001. ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
  6002. ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
  6003. ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
  6004. if (ctx->header.version == 1) {
  6005. fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
  6006. fclose(file);
  6007. gguf_free(ctx);
  6008. return NULL;
  6009. }
  6010. // sanity-checks to prevent from integer/buffer overflows
  6011. ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/sizeof(struct gguf_tensor_info));
  6012. ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/ggml_tensor_overhead());
  6013. ok = ok && (ctx->header.n_kv < (SIZE_MAX/2)/sizeof(struct gguf_kv));
  6014. if (!ok) {
  6015. fprintf(stderr, "%s: failed to read header\n", __func__);
  6016. fclose(file);
  6017. gguf_free(ctx);
  6018. return NULL;
  6019. }
  6020. }
  6021. // read the kv pairs
  6022. {
  6023. const uint64_t n_kv = ctx->header.n_kv;
  6024. ctx->kv = calloc(n_kv, sizeof(struct gguf_kv));
  6025. if (!ctx->kv) {
  6026. fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
  6027. fclose(file);
  6028. gguf_free(ctx);
  6029. return NULL;
  6030. }
  6031. for (uint64_t i = 0; i < n_kv; ++i) {
  6032. struct gguf_kv * kv = &ctx->kv[i];
  6033. //fprintf(stderr, "%s: reading kv %d\n", __func__, i);
  6034. ok = ok && gguf_fread_str(file, &kv->key, &offset);
  6035. ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
  6036. //fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
  6037. switch (kv->type) {
  6038. case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
  6039. case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
  6040. case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
  6041. case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
  6042. case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
  6043. case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
  6044. case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
  6045. case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
  6046. case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
  6047. case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
  6048. case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
  6049. case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
  6050. case GGUF_TYPE_ARRAY:
  6051. {
  6052. ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
  6053. ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
  6054. switch (kv->value.arr.type) {
  6055. case GGUF_TYPE_UINT8:
  6056. case GGUF_TYPE_INT8:
  6057. case GGUF_TYPE_UINT16:
  6058. case GGUF_TYPE_INT16:
  6059. case GGUF_TYPE_UINT32:
  6060. case GGUF_TYPE_INT32:
  6061. case GGUF_TYPE_FLOAT32:
  6062. case GGUF_TYPE_UINT64:
  6063. case GGUF_TYPE_INT64:
  6064. case GGUF_TYPE_FLOAT64:
  6065. case GGUF_TYPE_BOOL:
  6066. {
  6067. // prevent from integer overflow in the malloc below
  6068. if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
  6069. fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
  6070. fclose(file);
  6071. gguf_free(ctx);
  6072. return NULL;
  6073. }
  6074. kv->value.arr.data = calloc(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
  6075. if (!kv->value.arr.data) {
  6076. fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
  6077. fclose(file);
  6078. gguf_free(ctx);
  6079. return NULL;
  6080. }
  6081. ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
  6082. } break;
  6083. case GGUF_TYPE_STRING:
  6084. {
  6085. // prevent from integer overflow in the malloc below
  6086. if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
  6087. fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
  6088. fclose(file);
  6089. gguf_free(ctx);
  6090. return NULL;
  6091. }
  6092. kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct gguf_str));
  6093. if (!kv->value.arr.data) {
  6094. fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
  6095. fclose(file);
  6096. gguf_free(ctx);
  6097. return NULL;
  6098. }
  6099. for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
  6100. ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
  6101. }
  6102. } break;
  6103. case GGUF_TYPE_ARRAY:
  6104. default:
  6105. {
  6106. fprintf(stderr, "%s: invalid array type %d\n", __func__, kv->value.arr.type);
  6107. ok = false;
  6108. } break;
  6109. }
  6110. } break;
  6111. default:
  6112. {
  6113. fprintf(stderr, "%s: invalid type %d\n", __func__, kv->type);
  6114. ok = false;
  6115. } break;
  6116. }
  6117. if (!ok) {
  6118. break;
  6119. }
  6120. }
  6121. if (!ok) {
  6122. fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
  6123. fclose(file);
  6124. gguf_free(ctx);
  6125. return NULL;
  6126. }
  6127. }
  6128. // read the tensor infos
  6129. if (ctx->header.n_tensors > 0) {
  6130. ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
  6131. if (!ctx->infos) {
  6132. fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
  6133. fclose(file);
  6134. gguf_free(ctx);
  6135. return NULL;
  6136. }
  6137. for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
  6138. struct gguf_tensor_info * info = &ctx->infos[i];
  6139. for (int j = 0; j < GGML_MAX_DIMS; ++j) {
  6140. info->ne[j] = 1;
  6141. }
  6142. ok = ok && gguf_fread_str(file, &info->name, &offset);
  6143. ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
  6144. ok = ok && (info->n_dims <= GGML_MAX_DIMS);
  6145. for (uint32_t j = 0; j < info->n_dims; ++j) {
  6146. ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
  6147. }
  6148. ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
  6149. ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
  6150. ok = ok && gguf_tensor_info_sanitize(info);
  6151. // make sure there is no duplicated tensor names
  6152. for (uint64_t j = 0; j < i && ok; ++j) {
  6153. if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) {
  6154. fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data);
  6155. ok = false;
  6156. }
  6157. }
  6158. if (!ok) {
  6159. fprintf(stderr, "%s: failed to read tensor info\n", __func__);
  6160. fclose(file);
  6161. gguf_free(ctx);
  6162. return NULL;
  6163. }
  6164. }
  6165. }
  6166. ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
  6167. int alignment_idx = gguf_find_key(ctx, "general.alignment");
  6168. if (alignment_idx != -1) {
  6169. ctx->alignment = gguf_get_val_u32(ctx, alignment_idx);
  6170. }
  6171. // we require the data section to be aligned, so take into account any padding
  6172. {
  6173. const size_t offset_pad = offset % ctx->alignment;
  6174. if (offset_pad != 0) {
  6175. offset += ctx->alignment - offset_pad;
  6176. fseek(file, offset, SEEK_SET);
  6177. }
  6178. }
  6179. // store the current file offset - this is where the data section starts
  6180. ctx->offset = offset;
  6181. // compute the total size of the data section, taking into account the alignment
  6182. {
  6183. ctx->size = 0;
  6184. for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
  6185. struct gguf_tensor_info * info = &ctx->infos[i];
  6186. const int64_t ne =
  6187. (int64_t) info->ne[0] *
  6188. (int64_t) info->ne[1] *
  6189. (int64_t) info->ne[2] *
  6190. (int64_t) info->ne[3];
  6191. if (ggml_blck_size(info->type) == 0 || ne % ggml_blck_size(info->type) != 0) {
  6192. fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
  6193. __func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
  6194. fclose(file);
  6195. gguf_free(ctx);
  6196. return NULL;
  6197. }
  6198. const size_t size_cur = ggml_row_size(info->type, ne);
  6199. ctx->size += GGML_PAD(size_cur, ctx->alignment);
  6200. }
  6201. }
  6202. // load the tensor data only if requested
  6203. if (params.ctx != NULL) {
  6204. // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
  6205. // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
  6206. // the ggml_tensor structs to the appropriate locations in the binary blob
  6207. // compute the exact size needed for the new ggml_context
  6208. const size_t mem_size =
  6209. params.no_alloc ?
  6210. (ctx->header.n_tensors )*ggml_tensor_overhead() :
  6211. (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
  6212. struct ggml_init_params pdata = {
  6213. .mem_size = mem_size,
  6214. .mem_buffer = NULL,
  6215. .no_alloc = params.no_alloc,
  6216. };
  6217. *params.ctx = ggml_init(pdata);
  6218. if (*params.ctx == NULL) {
  6219. fprintf(stderr, "%s: failed to initialize context\n", __func__);
  6220. fclose(file);
  6221. gguf_free(ctx);
  6222. return NULL;
  6223. }
  6224. struct ggml_context * ctx_data = *params.ctx;
  6225. struct ggml_tensor * data = NULL;
  6226. if (!params.no_alloc) {
  6227. data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
  6228. ok = ok && data != NULL;
  6229. // read the binary blob with the tensor data
  6230. ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset);
  6231. if (!ok) {
  6232. fprintf(stderr, "%s: failed to read tensor data\n", __func__);
  6233. fclose(file);
  6234. ggml_free(ctx_data);
  6235. gguf_free(ctx);
  6236. return NULL;
  6237. }
  6238. ctx->data = data->data;
  6239. }
  6240. ggml_set_no_alloc(ctx_data, true);
  6241. // create the tensors
  6242. for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
  6243. const int64_t ne[GGML_MAX_DIMS] = {
  6244. ctx->infos[i].ne[0],
  6245. ctx->infos[i].ne[1],
  6246. ctx->infos[i].ne[2],
  6247. ctx->infos[i].ne[3],
  6248. };
  6249. struct ggml_tensor * cur = ggml_new_tensor(ctx_data, ctx->infos[i].type, ctx->infos[i].n_dims, ne);
  6250. ok = ok && cur != NULL;
  6251. if (!ok) {
  6252. break;
  6253. }
  6254. ggml_set_name(cur, ctx->infos[i].name.data);
  6255. // point the data member to the appropriate location in the binary blob using the tensor infos
  6256. if (!params.no_alloc) {
  6257. //cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
  6258. cur->data = (char *) data->data + ctx->infos[i].offset; // offset from data
  6259. }
  6260. }
  6261. if (!ok) {
  6262. fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
  6263. fclose(file);
  6264. ggml_free(ctx_data);
  6265. gguf_free(ctx);
  6266. return NULL;
  6267. }
  6268. ggml_set_no_alloc(ctx_data, params.no_alloc);
  6269. }
  6270. fclose(file);
  6271. return ctx;
  6272. }
  6273. void gguf_free(struct gguf_context * ctx) {
  6274. if (ctx == NULL) {
  6275. return;
  6276. }
  6277. if (ctx->kv) {
  6278. // free string memory - not great..
  6279. for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
  6280. gguf_free_kv(&ctx->kv[i]);
  6281. }
  6282. GGML_FREE(ctx->kv);
  6283. }
  6284. if (ctx->infos) {
  6285. for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
  6286. struct gguf_tensor_info * info = &ctx->infos[i];
  6287. if (info->name.data) {
  6288. GGML_FREE(info->name.data);
  6289. }
  6290. }
  6291. GGML_FREE(ctx->infos);
  6292. }
  6293. GGML_FREE(ctx);
  6294. }
  6295. const char * gguf_type_name(enum gguf_type type) {
  6296. return GGUF_TYPE_NAME[type];
  6297. }
  6298. int gguf_get_version(const struct gguf_context * ctx) {
  6299. return ctx->header.version;
  6300. }
  6301. size_t gguf_get_alignment(const struct gguf_context * ctx) {
  6302. return ctx->alignment;
  6303. }
  6304. size_t gguf_get_data_offset(const struct gguf_context * ctx) {
  6305. return ctx->offset;
  6306. }
  6307. void * gguf_get_data(const struct gguf_context * ctx) {
  6308. return ctx->data;
  6309. }
  6310. int gguf_get_n_kv(const struct gguf_context * ctx) {
  6311. return ctx->header.n_kv;
  6312. }
  6313. int gguf_find_key(const struct gguf_context * ctx, const char * key) {
  6314. // return -1 if key not found
  6315. int keyfound = -1;
  6316. const int n_kv = gguf_get_n_kv(ctx);
  6317. for (int i = 0; i < n_kv; ++i) {
  6318. if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
  6319. keyfound = i;
  6320. break;
  6321. }
  6322. }
  6323. return keyfound;
  6324. }
  6325. const char * gguf_get_key(const struct gguf_context * ctx, int key_id) {
  6326. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6327. return ctx->kv[key_id].key.data;
  6328. }
  6329. enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
  6330. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6331. return ctx->kv[key_id].type;
  6332. }
  6333. enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
  6334. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6335. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
  6336. return ctx->kv[key_id].value.arr.type;
  6337. }
  6338. const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
  6339. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6340. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
  6341. return ctx->kv[key_id].value.arr.data;
  6342. }
  6343. const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
  6344. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6345. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
  6346. struct gguf_kv * kv = &ctx->kv[key_id];
  6347. struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
  6348. return str->data;
  6349. }
  6350. int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
  6351. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6352. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
  6353. return ctx->kv[key_id].value.arr.n;
  6354. }
  6355. uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) {
  6356. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6357. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8);
  6358. return ctx->kv[key_id].value.uint8;
  6359. }
  6360. int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) {
  6361. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6362. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8);
  6363. return ctx->kv[key_id].value.int8;
  6364. }
  6365. uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) {
  6366. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6367. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16);
  6368. return ctx->kv[key_id].value.uint16;
  6369. }
  6370. int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) {
  6371. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6372. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16);
  6373. return ctx->kv[key_id].value.int16;
  6374. }
  6375. uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) {
  6376. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6377. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32);
  6378. return ctx->kv[key_id].value.uint32;
  6379. }
  6380. int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) {
  6381. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6382. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32);
  6383. return ctx->kv[key_id].value.int32;
  6384. }
  6385. float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) {
  6386. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6387. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32);
  6388. return ctx->kv[key_id].value.float32;
  6389. }
  6390. uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) {
  6391. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6392. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64);
  6393. return ctx->kv[key_id].value.uint64;
  6394. }
  6395. int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) {
  6396. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6397. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64);
  6398. return ctx->kv[key_id].value.int64;
  6399. }
  6400. double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) {
  6401. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6402. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64);
  6403. return ctx->kv[key_id].value.float64;
  6404. }
  6405. bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) {
  6406. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6407. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL);
  6408. return ctx->kv[key_id].value.bool_;
  6409. }
  6410. const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
  6411. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6412. GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING);
  6413. return ctx->kv[key_id].value.str.data;
  6414. }
  6415. const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
  6416. GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
  6417. GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
  6418. GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
  6419. return &ctx->kv[key_id].value;
  6420. }
  6421. int gguf_get_n_tensors(const struct gguf_context * ctx) {
  6422. return ctx->header.n_tensors;
  6423. }
  6424. int gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
  6425. // return -1 if tensor not found
  6426. int tensorfound = -1;
  6427. const int n_tensors = gguf_get_n_tensors(ctx);
  6428. for (int i = 0; i < n_tensors; ++i) {
  6429. if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
  6430. tensorfound = i;
  6431. break;
  6432. }
  6433. }
  6434. return tensorfound;
  6435. }
  6436. size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) {
  6437. return ctx->infos[i].offset;
  6438. }
  6439. char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) {
  6440. return ctx->infos[i].name.data;
  6441. }
  6442. enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int i) {
  6443. return ctx->infos[i].type;
  6444. }
  6445. // returns the index
  6446. static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
  6447. const int idx = gguf_find_key(ctx, key);
  6448. if (idx >= 0) {
  6449. return idx;
  6450. }
  6451. const int n_kv = gguf_get_n_kv(ctx);
  6452. ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv));
  6453. ctx->kv[n_kv].key.n = strlen(key);
  6454. ctx->kv[n_kv].key.data = strdup(key);
  6455. ctx->header.n_kv++;
  6456. return n_kv;
  6457. }
  6458. void gguf_remove_key(struct gguf_context * ctx, const char * key) {
  6459. const int idx = gguf_find_key(ctx, key);
  6460. if (idx >= 0) {
  6461. const int n_kv = gguf_get_n_kv(ctx);
  6462. gguf_free_kv(&ctx->kv[idx]);
  6463. for (int i = idx; i < n_kv-1; ++i) {
  6464. ctx->kv[i] = ctx->kv[i+1];
  6465. }
  6466. ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
  6467. ctx->header.n_kv--;
  6468. }
  6469. }
  6470. void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
  6471. const int idx = gguf_get_or_add_key(ctx, key);
  6472. ctx->kv[idx].type = GGUF_TYPE_UINT8;
  6473. ctx->kv[idx].value.uint8 = val;
  6474. }
  6475. void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
  6476. const int idx = gguf_get_or_add_key(ctx, key);
  6477. ctx->kv[idx].type = GGUF_TYPE_INT8;
  6478. ctx->kv[idx].value.int8 = val;
  6479. }
  6480. void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
  6481. const int idx = gguf_get_or_add_key(ctx, key);
  6482. ctx->kv[idx].type = GGUF_TYPE_UINT16;
  6483. ctx->kv[idx].value.uint16 = val;
  6484. }
  6485. void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
  6486. const int idx = gguf_get_or_add_key(ctx, key);
  6487. ctx->kv[idx].type = GGUF_TYPE_INT16;
  6488. ctx->kv[idx].value.int16 = val;
  6489. }
  6490. void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
  6491. const int idx = gguf_get_or_add_key(ctx, key);
  6492. ctx->kv[idx].type = GGUF_TYPE_UINT32;
  6493. ctx->kv[idx].value.uint32 = val;
  6494. }
  6495. void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
  6496. const int idx = gguf_get_or_add_key(ctx, key);
  6497. ctx->kv[idx].type = GGUF_TYPE_INT32;
  6498. ctx->kv[idx].value.int32 = val;
  6499. }
  6500. void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
  6501. const int idx = gguf_get_or_add_key(ctx, key);
  6502. ctx->kv[idx].type = GGUF_TYPE_FLOAT32;
  6503. ctx->kv[idx].value.float32 = val;
  6504. }
  6505. void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
  6506. const int idx = gguf_get_or_add_key(ctx, key);
  6507. ctx->kv[idx].type = GGUF_TYPE_UINT64;
  6508. ctx->kv[idx].value.uint64 = val;
  6509. }
  6510. void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
  6511. const int idx = gguf_get_or_add_key(ctx, key);
  6512. ctx->kv[idx].type = GGUF_TYPE_INT64;
  6513. ctx->kv[idx].value.int64 = val;
  6514. }
  6515. void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
  6516. const int idx = gguf_get_or_add_key(ctx, key);
  6517. ctx->kv[idx].type = GGUF_TYPE_FLOAT64;
  6518. ctx->kv[idx].value.float64 = val;
  6519. }
  6520. void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
  6521. const int idx = gguf_get_or_add_key(ctx, key);
  6522. ctx->kv[idx].type = GGUF_TYPE_BOOL;
  6523. ctx->kv[idx].value.bool_ = val;
  6524. }
  6525. void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
  6526. const int idx = gguf_get_or_add_key(ctx, key);
  6527. ctx->kv[idx].type = GGUF_TYPE_STRING;
  6528. ctx->kv[idx].value.str.n = strlen(val);
  6529. ctx->kv[idx].value.str.data = strdup(val);
  6530. }
  6531. void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) {
  6532. const int idx = gguf_get_or_add_key(ctx, key);
  6533. ctx->kv[idx].type = GGUF_TYPE_ARRAY;
  6534. ctx->kv[idx].value.arr.type = type;
  6535. ctx->kv[idx].value.arr.n = n;
  6536. ctx->kv[idx].value.arr.data = GGML_CALLOC(n, gguf_type_size(type));
  6537. memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
  6538. }
  6539. void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
  6540. const int idx = gguf_get_or_add_key(ctx, key);
  6541. ctx->kv[idx].type = GGUF_TYPE_ARRAY;
  6542. ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
  6543. ctx->kv[idx].value.arr.n = n;
  6544. ctx->kv[idx].value.arr.data = GGML_CALLOC(n, sizeof(struct gguf_str));
  6545. for (int i = 0; i < n; i++) {
  6546. struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
  6547. str->n = strlen(data[i]);
  6548. str->data = strdup(data[i]);
  6549. }
  6550. }
  6551. // set or add KV pairs from another context
  6552. void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
  6553. for (uint32_t i = 0; i < src->header.n_kv; i++) {
  6554. switch (src->kv[i].type) {
  6555. case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break;
  6556. case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break;
  6557. case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break;
  6558. case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break;
  6559. case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
  6560. case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
  6561. case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
  6562. case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
  6563. case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
  6564. case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
  6565. case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
  6566. case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
  6567. case GGUF_TYPE_ARRAY:
  6568. {
  6569. if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
  6570. const char ** data = GGML_CALLOC(src->kv[i].value.arr.n, sizeof(char *));
  6571. for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
  6572. data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
  6573. }
  6574. gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
  6575. GGML_FREE((void *)data);
  6576. } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
  6577. GGML_ABORT("nested arrays not supported");
  6578. } else {
  6579. gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
  6580. }
  6581. } break;
  6582. default: GGML_ABORT("invalid type");
  6583. }
  6584. }
  6585. }
  6586. void gguf_add_tensor(
  6587. struct gguf_context * ctx,
  6588. const struct ggml_tensor * tensor) {
  6589. GGML_ASSERT(tensor);
  6590. if (gguf_find_tensor(ctx, tensor->name) != -1) {
  6591. GGML_ABORT("duplicated tensor name");
  6592. }
  6593. const int idx = ctx->header.n_tensors;
  6594. ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
  6595. ctx->infos[idx].name.n = strlen(tensor->name);
  6596. ctx->infos[idx].name.data = strdup(tensor->name);
  6597. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  6598. ctx->infos[idx].ne[i] = 1;
  6599. }
  6600. ctx->infos[idx].n_dims = ggml_n_dims(tensor);
  6601. for (uint32_t i = 0; i < ctx->infos[idx].n_dims; i++) {
  6602. ctx->infos[idx].ne[i] = tensor->ne[i];
  6603. }
  6604. ctx->infos[idx].type = tensor->type;
  6605. ctx->infos[idx].offset = 0;
  6606. ctx->infos[idx].data = tensor->data;
  6607. ctx->infos[idx].size = ggml_nbytes(tensor);
  6608. if (ctx->header.n_tensors > 0) {
  6609. ctx->infos[idx].offset = ctx->infos[idx - 1].offset + GGML_PAD(ctx->infos[idx - 1].size, ctx->alignment);
  6610. }
  6611. ctx->header.n_tensors++;
  6612. }
  6613. void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
  6614. const int idx = gguf_find_tensor(ctx, name);
  6615. if (idx < 0) {
  6616. GGML_ABORT("tensor not found");
  6617. }
  6618. ctx->infos[idx].type = type;
  6619. }
  6620. void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
  6621. const int idx = gguf_find_tensor(ctx, name);
  6622. if (idx < 0) {
  6623. GGML_ABORT("tensor not found");
  6624. }
  6625. ctx->infos[idx].data = data;
  6626. ctx->infos[idx].size = size;
  6627. // update offsets
  6628. for (uint32_t i = idx + 1; i < ctx->header.n_tensors; ++i) {
  6629. ctx->infos[i].offset = ctx->infos[i - 1].offset + GGML_PAD(ctx->infos[i - 1].size, ctx->alignment);
  6630. }
  6631. }
  6632. //static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) {
  6633. // fwrite(&val->n, sizeof(val->n), 1, file);
  6634. // fwrite(val->data, sizeof(char), val->n, file);
  6635. //}
  6636. //
  6637. //static void gguf_fwrite_el(FILE * file, const void * val, size_t size) {
  6638. // fwrite(val, sizeof(char), size, file);
  6639. //}
  6640. struct gguf_buf {
  6641. void * data;
  6642. size_t size;
  6643. size_t offset;
  6644. };
  6645. static struct gguf_buf gguf_buf_init(size_t size) {
  6646. struct gguf_buf buf = {
  6647. /*buf.data =*/ size == 0 ? NULL : GGML_CALLOC(1, size),
  6648. /*buf.size =*/ size,
  6649. /*buf.offset =*/ 0,
  6650. };
  6651. return buf;
  6652. }
  6653. static void gguf_buf_free(struct gguf_buf buf) {
  6654. if (buf.data) {
  6655. GGML_FREE(buf.data);
  6656. }
  6657. }
  6658. static void gguf_buf_grow(struct gguf_buf * buf, size_t size) {
  6659. if (buf->offset + size > buf->size) {
  6660. buf->size = 1.5*(buf->offset + size);
  6661. if (buf->data) {
  6662. buf->data = realloc(buf->data, buf->size);
  6663. }
  6664. }
  6665. }
  6666. static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) {
  6667. gguf_buf_grow(buf, sizeof(val->n) + val->n);
  6668. if (buf->data) {
  6669. memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
  6670. }
  6671. buf->offset += sizeof(val->n);
  6672. if (buf->data) {
  6673. memcpy((char *) buf->data + buf->offset, val->data, val->n);
  6674. }
  6675. buf->offset += val->n;
  6676. }
  6677. static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) {
  6678. gguf_buf_grow(buf, el_size);
  6679. if (buf->data) {
  6680. memcpy((char *) buf->data + buf->offset, val, el_size);
  6681. }
  6682. buf->offset += el_size;
  6683. }
  6684. static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
  6685. // write header
  6686. gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
  6687. gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
  6688. gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
  6689. gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
  6690. // write key-value pairs
  6691. for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
  6692. struct gguf_kv * kv = &ctx->kv[i];
  6693. gguf_bwrite_str(buf, &kv->key);
  6694. gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
  6695. switch (kv->type) {
  6696. case GGUF_TYPE_UINT8: gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
  6697. case GGUF_TYPE_INT8: gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break;
  6698. case GGUF_TYPE_UINT16: gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break;
  6699. case GGUF_TYPE_INT16: gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break;
  6700. case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
  6701. case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
  6702. case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
  6703. case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
  6704. case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
  6705. case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
  6706. case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
  6707. case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
  6708. case GGUF_TYPE_ARRAY:
  6709. {
  6710. gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
  6711. gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) );
  6712. switch (kv->value.arr.type) {
  6713. case GGUF_TYPE_UINT8:
  6714. case GGUF_TYPE_INT8:
  6715. case GGUF_TYPE_UINT16:
  6716. case GGUF_TYPE_INT16:
  6717. case GGUF_TYPE_UINT32:
  6718. case GGUF_TYPE_INT32:
  6719. case GGUF_TYPE_FLOAT32:
  6720. case GGUF_TYPE_UINT64:
  6721. case GGUF_TYPE_INT64:
  6722. case GGUF_TYPE_FLOAT64:
  6723. case GGUF_TYPE_BOOL:
  6724. {
  6725. gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type));
  6726. } break;
  6727. case GGUF_TYPE_STRING:
  6728. {
  6729. for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
  6730. gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
  6731. }
  6732. } break;
  6733. case GGUF_TYPE_ARRAY:
  6734. default: GGML_ABORT("invalid type");
  6735. }
  6736. } break;
  6737. default: GGML_ABORT("invalid type");
  6738. }
  6739. }
  6740. // write tensor infos
  6741. for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
  6742. struct gguf_tensor_info * info = &ctx->infos[i];
  6743. gguf_bwrite_str(buf, &info->name);
  6744. gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims));
  6745. for (uint32_t j = 0; j < info->n_dims; ++j) {
  6746. gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j]));
  6747. }
  6748. gguf_bwrite_el(buf, &info->type, sizeof(info->type));
  6749. gguf_bwrite_el(buf, &info->offset, sizeof(info->offset));
  6750. }
  6751. // we require the data section to be aligned, so take into account any padding
  6752. {
  6753. const size_t offset = buf->offset;
  6754. const size_t offset_pad = GGML_PAD(offset, ctx->alignment);
  6755. if (offset_pad != offset) {
  6756. uint8_t pad = 0;
  6757. for (size_t i = 0; i < offset_pad - offset; ++i) {
  6758. gguf_bwrite_el(buf, &pad, sizeof(pad));
  6759. }
  6760. }
  6761. }
  6762. if (only_meta) {
  6763. return;
  6764. }
  6765. size_t offset = 0;
  6766. // write tensor data
  6767. for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
  6768. struct gguf_tensor_info * info = &ctx->infos[i];
  6769. const size_t size = info->size;
  6770. const size_t size_pad = GGML_PAD(size, ctx->alignment);
  6771. gguf_bwrite_el(buf, info->data, size);
  6772. if (size_pad != size) {
  6773. uint8_t pad = 0;
  6774. for (size_t j = 0; j < size_pad - size; ++j) {
  6775. gguf_bwrite_el(buf, &pad, sizeof(pad));
  6776. }
  6777. }
  6778. GGML_ASSERT(offset == info->offset);
  6779. offset += size_pad;
  6780. }
  6781. }
  6782. void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
  6783. FILE * file = ggml_fopen(fname, "wb");
  6784. if (!file) {
  6785. GGML_ABORT("failed to open file for writing");
  6786. }
  6787. struct gguf_buf buf = gguf_buf_init(16*1024);
  6788. gguf_write_to_buf(ctx, &buf, only_meta);
  6789. fwrite(buf.data, 1, buf.offset, file);
  6790. gguf_buf_free(buf);
  6791. fclose(file);
  6792. }
  6793. size_t gguf_get_meta_size(const struct gguf_context * ctx) {
  6794. // no allocs - only compute size
  6795. struct gguf_buf buf = gguf_buf_init(0);
  6796. gguf_write_to_buf(ctx, &buf, true);
  6797. return buf.offset;
  6798. }
  6799. void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
  6800. struct gguf_buf buf = gguf_buf_init(16*1024);
  6801. gguf_write_to_buf(ctx, &buf, true);
  6802. memcpy(data, buf.data, buf.offset);
  6803. gguf_buf_free(buf);
  6804. }
  6805. ////////////////////////////////////////////////////////////////////////////////
  6806. int ggml_cpu_has_avx(void) {
  6807. #if defined(__AVX__)
  6808. return 1;
  6809. #else
  6810. return 0;
  6811. #endif
  6812. }
  6813. int ggml_cpu_has_avx_vnni(void) {
  6814. #if defined(__AVXVNNI__)
  6815. return 1;
  6816. #else
  6817. return 0;
  6818. #endif
  6819. }
  6820. int ggml_cpu_has_avx2(void) {
  6821. #if defined(__AVX2__)
  6822. return 1;
  6823. #else
  6824. return 0;
  6825. #endif
  6826. }
  6827. int ggml_cpu_has_avx512(void) {
  6828. #if defined(__AVX512F__)
  6829. return 1;
  6830. #else
  6831. return 0;
  6832. #endif
  6833. }
  6834. int ggml_cpu_has_avx512_vbmi(void) {
  6835. #if defined(__AVX512VBMI__)
  6836. return 1;
  6837. #else
  6838. return 0;
  6839. #endif
  6840. }
  6841. int ggml_cpu_has_avx512_vnni(void) {
  6842. #if defined(__AVX512VNNI__)
  6843. return 1;
  6844. #else
  6845. return 0;
  6846. #endif
  6847. }
  6848. int ggml_cpu_has_avx512_bf16(void) {
  6849. #if defined(__AVX512BF16__)
  6850. return 1;
  6851. #else
  6852. return 0;
  6853. #endif
  6854. }
  6855. int ggml_cpu_has_amx_int8(void) {
  6856. #if defined(__AMX_INT8__)
  6857. return 1;
  6858. #else
  6859. return 0;
  6860. #endif
  6861. }
  6862. int ggml_cpu_has_fma(void) {
  6863. #if defined(__FMA__)
  6864. return 1;
  6865. #else
  6866. return 0;
  6867. #endif
  6868. }
  6869. int ggml_cpu_has_arm_fma(void) {
  6870. #if defined(__ARM_FEATURE_FMA)
  6871. return 1;
  6872. #else
  6873. return 0;
  6874. #endif
  6875. }
  6876. int ggml_cpu_has_riscv_v(void) {
  6877. #if defined(__riscv_v_intrinsic)
  6878. return 1;
  6879. #else
  6880. return 0;
  6881. #endif
  6882. }
  6883. int ggml_cpu_has_metal(void) {
  6884. #if defined(GGML_USE_METAL)
  6885. return 1;
  6886. #else
  6887. return 0;
  6888. #endif
  6889. }
  6890. int ggml_cpu_has_f16c(void) {
  6891. #if defined(__F16C__)
  6892. return 1;
  6893. #else
  6894. return 0;
  6895. #endif
  6896. }
  6897. int ggml_cpu_has_fp16_va(void) {
  6898. #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
  6899. return 1;
  6900. #else
  6901. return 0;
  6902. #endif
  6903. }
  6904. int ggml_cpu_has_wasm_simd(void) {
  6905. #if defined(__wasm_simd128__)
  6906. return 1;
  6907. #else
  6908. return 0;
  6909. #endif
  6910. }
  6911. int ggml_cpu_has_blas(void) {
  6912. #if defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
  6913. return 1;
  6914. #else
  6915. return 0;
  6916. #endif
  6917. }
  6918. int ggml_cpu_has_cuda(void) {
  6919. #if defined(GGML_USE_CUDA)
  6920. return 1;
  6921. #else
  6922. return 0;
  6923. #endif
  6924. }
  6925. int ggml_cpu_has_vulkan(void) {
  6926. #if defined(GGML_USE_VULKAN)
  6927. return 1;
  6928. #else
  6929. return 0;
  6930. #endif
  6931. }
  6932. int ggml_cpu_has_kompute(void) {
  6933. #if defined(GGML_USE_KOMPUTE)
  6934. return 1;
  6935. #else
  6936. return 0;
  6937. #endif
  6938. }
  6939. int ggml_cpu_has_sycl(void) {
  6940. #if defined(GGML_USE_SYCL)
  6941. return 1;
  6942. #else
  6943. return 0;
  6944. #endif
  6945. }
  6946. int ggml_cpu_has_rpc(void) {
  6947. #if defined(GGML_USE_RPC)
  6948. return 1;
  6949. #else
  6950. return 0;
  6951. #endif
  6952. }
  6953. int ggml_cpu_has_cann(void) {
  6954. #if defined(GGML_USE_CANN)
  6955. return 1;
  6956. #else
  6957. return 0;
  6958. #endif
  6959. }
  6960. int ggml_cpu_has_llamafile(void) {
  6961. #if defined(GGML_USE_LLAMAFILE)
  6962. return 1;
  6963. #else
  6964. return 0;
  6965. #endif
  6966. }
  6967. int ggml_cpu_has_gpublas(void) {
  6968. return ggml_cpu_has_cuda() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() || ggml_cpu_has_sycl();
  6969. }
  6970. int ggml_cpu_has_sse3(void) {
  6971. #if defined(__SSE3__)
  6972. return 1;
  6973. #else
  6974. return 0;
  6975. #endif
  6976. }
  6977. int ggml_cpu_has_ssse3(void) {
  6978. #if defined(__SSSE3__)
  6979. return 1;
  6980. #else
  6981. return 0;
  6982. #endif
  6983. }
  6984. int ggml_cpu_has_vsx(void) {
  6985. #if defined(__POWER9_VECTOR__)
  6986. return 1;
  6987. #else
  6988. return 0;
  6989. #endif
  6990. }
  6991. void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
  6992. g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
  6993. g_logger_state.log_callback_user_data = user_data;
  6994. }
  6995. ////////////////////////////////////////////////////////////////////////////////