ggml.c 205 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571
  1. #define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
  2. #define _USE_MATH_DEFINES // For M_PI on MSVC
  3. #include "ggml-backend.h"
  4. #include "ggml-impl.h"
  5. #include "ggml-threading.h"
  6. #include "ggml-cpu.h"
  7. #include "ggml.h"
  8. // FIXME: required here for quantization functions
  9. #include "ggml-quants.h"
  10. #ifdef GGML_USE_CPU_HBM
  11. #include <hbwmalloc.h>
  12. #endif
  13. #if defined(_MSC_VER) || defined(__MINGW32__)
  14. #include <malloc.h> // using malloc.h with MSC/MINGW
  15. #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
  16. #include <alloca.h>
  17. #endif
  18. #include <assert.h>
  19. #include <errno.h>
  20. #include <time.h>
  21. #include <math.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <stdint.h>
  25. #include <inttypes.h>
  26. #include <stdio.h>
  27. #include <float.h>
  28. #include <limits.h>
  29. #include <stdarg.h>
  30. #include <signal.h>
  31. #if defined(__gnu_linux__)
  32. #include <syscall.h>
  33. #endif
  34. #if defined(__APPLE__)
  35. #include <unistd.h>
  36. #include <mach/mach.h>
  37. #include <TargetConditionals.h>
  38. #endif
  39. #if defined(_WIN32)
  40. #define WIN32_LEAN_AND_MEAN
  41. #ifndef NOMINMAX
  42. #define NOMINMAX
  43. #endif
  44. #include <windows.h>
  45. #endif
  46. #define UNUSED GGML_UNUSED
  47. #if defined(_MSC_VER)
  48. #define m512bh(p) p
  49. #define m512i(p) p
  50. #else
  51. #define m512bh(p) (__m512bh)(p)
  52. #define m512i(p) (__m512i)(p)
  53. #endif
  54. // precomputed f32 table for f16 (256 KB) (ggml-impl.h)
  55. float ggml_table_f32_f16[1 << 16];
  56. #if defined(__linux__) || \
  57. defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
  58. (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
  59. #include <unistd.h>
  60. #include <sys/types.h>
  61. #include <sys/stat.h>
  62. #include <sys/wait.h>
  63. #if defined(__linux__)
  64. #include <sys/prctl.h>
  65. #endif
  66. #if defined(__ANDROID__)
  67. #include <unwind.h>
  68. #include <dlfcn.h>
  69. #include <stdio.h>
  70. struct backtrace_state {
  71. void ** current;
  72. void ** end;
  73. };
  74. static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
  75. struct backtrace_state * state = (struct backtrace_state *)arg;
  76. uintptr_t pc = _Unwind_GetIP(context);
  77. if (pc) {
  78. if (state->current == state->end) {
  79. return _URC_END_OF_STACK;
  80. } else {
  81. *state->current++ = (void*)pc;
  82. }
  83. }
  84. return _URC_NO_REASON;
  85. }
  86. static void ggml_print_backtrace_symbols(void) {
  87. const int max = 100;
  88. void* buffer[max];
  89. struct backtrace_state state = {buffer, buffer + max};
  90. _Unwind_Backtrace(unwind_callback, &state);
  91. int count = state.current - buffer;
  92. for (int idx = 0; idx < count; ++idx) {
  93. const void * addr = buffer[idx];
  94. const char * symbol = "";
  95. Dl_info info;
  96. if (dladdr(addr, &info) && info.dli_sname) {
  97. symbol = info.dli_sname;
  98. }
  99. fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
  100. }
  101. }
  102. #elif defined(__linux__) && defined(__GLIBC__)
  103. #include <execinfo.h>
  104. static void ggml_print_backtrace_symbols(void) {
  105. void * trace[100];
  106. int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
  107. backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
  108. }
  109. #else
  110. static void ggml_print_backtrace_symbols(void) {
  111. // platform not supported
  112. }
  113. #endif
  114. void ggml_print_backtrace(void) {
  115. const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
  116. if (GGML_NO_BACKTRACE) {
  117. return;
  118. }
  119. #if defined(__linux__)
  120. FILE * f = fopen("/proc/self/status", "r");
  121. size_t size = 0;
  122. char * line = NULL;
  123. ssize_t length = 0;
  124. while ((length = getline(&line, &size, f)) > 0) {
  125. if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
  126. (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
  127. // Already being debugged, and the breakpoint is the later abort()
  128. free(line);
  129. fclose(f);
  130. return;
  131. }
  132. }
  133. free(line);
  134. fclose(f);
  135. int lock[2] = { -1, -1 };
  136. (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
  137. #endif
  138. const int parent_pid = getpid();
  139. const int child_pid = fork();
  140. if (child_pid < 0) { // error
  141. #if defined(__linux__)
  142. close(lock[1]);
  143. close(lock[0]);
  144. #endif
  145. return;
  146. } else if (child_pid == 0) { // child
  147. char attach[32];
  148. snprintf(attach, sizeof(attach), "attach %d", parent_pid);
  149. #if defined(__linux__)
  150. close(lock[1]);
  151. (void) !read(lock[0], lock, 1);
  152. close(lock[0]);
  153. #endif
  154. // try gdb
  155. execlp("gdb", "gdb", "--batch",
  156. "-ex", "set style enabled on",
  157. "-ex", attach,
  158. "-ex", "bt -frame-info source-and-location",
  159. "-ex", "detach",
  160. "-ex", "quit",
  161. (char *) NULL);
  162. // try lldb
  163. execlp("lldb", "lldb", "--batch",
  164. "-o", "bt",
  165. "-o", "quit",
  166. "-p", &attach[sizeof("attach ") - 1],
  167. (char *) NULL);
  168. // gdb failed, fallback to backtrace_symbols
  169. ggml_print_backtrace_symbols();
  170. _Exit(0);
  171. } else { // parent
  172. #if defined(__linux__)
  173. prctl(PR_SET_PTRACER, child_pid);
  174. close(lock[1]);
  175. close(lock[0]);
  176. #endif
  177. waitpid(child_pid, NULL, 0);
  178. }
  179. }
  180. #else
  181. void ggml_print_backtrace(void) {
  182. // platform not supported
  183. }
  184. #endif
  185. void ggml_abort(const char * file, int line, const char * fmt, ...) {
  186. fflush(stdout);
  187. fprintf(stderr, "%s:%d: ", file, line);
  188. va_list args;
  189. va_start(args, fmt);
  190. vfprintf(stderr, fmt, args);
  191. va_end(args);
  192. fprintf(stderr, "\n");
  193. ggml_print_backtrace();
  194. abort();
  195. }
  196. // ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
  197. //
  198. // logging
  199. //
  200. struct ggml_logger_state {
  201. ggml_log_callback log_callback;
  202. void * log_callback_user_data;
  203. };
  204. static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
  205. static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
  206. if (format == NULL) {
  207. return;
  208. }
  209. va_list args_copy;
  210. va_copy(args_copy, args);
  211. char buffer[128];
  212. int len = vsnprintf(buffer, 128, format, args);
  213. if (len < 128) {
  214. g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
  215. } else {
  216. char * buffer2 = (char *) calloc(len + 1, sizeof(char));
  217. vsnprintf(buffer2, len + 1, format, args_copy);
  218. buffer2[len] = 0;
  219. g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
  220. free(buffer2);
  221. }
  222. va_end(args_copy);
  223. }
  224. void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
  225. va_list args;
  226. va_start(args, format);
  227. ggml_log_internal_v(level, format, args);
  228. va_end(args);
  229. }
  230. void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
  231. (void) level;
  232. (void) user_data;
  233. fputs(text, stderr);
  234. fflush(stderr);
  235. }
  236. //
  237. // end of logging block
  238. //
  239. #ifdef GGML_USE_ACCELERATE
  240. // uncomment to use vDSP for soft max computation
  241. // note: not sure if it is actually faster
  242. //#define GGML_SOFT_MAX_ACCELERATE
  243. #endif
  244. void * ggml_aligned_malloc(size_t size) {
  245. #if defined(__s390x__)
  246. const int alignment = 256;
  247. #else
  248. const int alignment = 64;
  249. #endif
  250. #if defined(_MSC_VER) || defined(__MINGW32__)
  251. return _aligned_malloc(size, alignment);
  252. #else
  253. if (size == 0) {
  254. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
  255. return NULL;
  256. }
  257. void * aligned_memory = NULL;
  258. #ifdef GGML_USE_CPU_HBM
  259. int result = hbw_posix_memalign(&aligned_memory, alignment, size);
  260. #elif TARGET_OS_OSX
  261. GGML_UNUSED(alignment);
  262. kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
  263. int result = EFAULT;
  264. switch (alloc_status) {
  265. case KERN_SUCCESS:
  266. result = 0;
  267. break;
  268. case KERN_INVALID_ADDRESS:
  269. result = EINVAL;
  270. break;
  271. case KERN_NO_SPACE:
  272. result = ENOMEM;
  273. break;
  274. default:
  275. result = EFAULT;
  276. break;
  277. }
  278. #else
  279. int result = posix_memalign(&aligned_memory, alignment, size);
  280. #endif
  281. if (result != 0) {
  282. // Handle allocation failure
  283. const char *error_desc = "unknown allocation error";
  284. switch (result) {
  285. case EINVAL:
  286. error_desc = "invalid alignment value";
  287. break;
  288. case ENOMEM:
  289. error_desc = "insufficient memory";
  290. break;
  291. }
  292. GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
  293. return NULL;
  294. }
  295. return aligned_memory;
  296. #endif
  297. }
  298. void ggml_aligned_free(void * ptr, size_t size) {
  299. GGML_UNUSED(size);
  300. #if defined(_MSC_VER) || defined(__MINGW32__)
  301. _aligned_free(ptr);
  302. #elif GGML_USE_CPU_HBM
  303. if (ptr != NULL) {
  304. hbw_free(ptr);
  305. }
  306. #elif TARGET_OS_OSX
  307. if (ptr != NULL) {
  308. vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
  309. }
  310. #else
  311. free(ptr);
  312. #endif
  313. }
  314. inline static void * ggml_malloc(size_t size) {
  315. if (size == 0) {
  316. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
  317. return NULL;
  318. }
  319. void * result = malloc(size);
  320. if (result == NULL) {
  321. GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
  322. GGML_ABORT("fatal error");
  323. }
  324. return result;
  325. }
  326. // calloc
  327. inline static void * ggml_calloc(size_t num, size_t size) {
  328. if (num == 0 || size == 0) {
  329. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
  330. return NULL;
  331. }
  332. void * result = calloc(num, size);
  333. if (result == NULL) {
  334. GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
  335. GGML_ABORT("fatal error");
  336. }
  337. return result;
  338. }
  339. #define GGML_MALLOC(size) ggml_malloc(size)
  340. #define GGML_CALLOC(num, size) ggml_calloc(num, size)
  341. #define GGML_FREE(ptr) free(ptr)
  342. const char * ggml_status_to_string(enum ggml_status status) {
  343. switch (status) {
  344. case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
  345. case GGML_STATUS_FAILED: return "GGML status: error (operation failed)";
  346. case GGML_STATUS_SUCCESS: return "GGML status: success";
  347. case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)";
  348. }
  349. return "GGML status: unknown";
  350. }
  351. float ggml_fp16_to_fp32(ggml_fp16_t x) {
  352. #define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
  353. return GGML_FP16_TO_FP32(x);
  354. }
  355. ggml_fp16_t ggml_fp32_to_fp16(float x) {
  356. #define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
  357. return GGML_FP32_TO_FP16(x);
  358. }
  359. float ggml_bf16_to_fp32(ggml_bf16_t x) {
  360. #define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
  361. return GGML_BF16_TO_FP32(x); // it just left shifts
  362. }
  363. ggml_bf16_t ggml_fp32_to_bf16(float x) {
  364. #define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
  365. return GGML_FP32_TO_BF16(x);
  366. }
  367. void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
  368. for (int64_t i = 0; i < n; i++) {
  369. y[i] = GGML_FP16_TO_FP32(x[i]);
  370. }
  371. }
  372. void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
  373. int i = 0;
  374. for (; i < n; ++i) {
  375. y[i] = GGML_FP32_TO_FP16(x[i]);
  376. }
  377. }
  378. void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
  379. int i = 0;
  380. for (; i < n; ++i) {
  381. y[i] = GGML_BF16_TO_FP32(x[i]);
  382. }
  383. }
  384. void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
  385. for (int i = 0; i < n; i++) {
  386. y[i] = ggml_compute_fp32_to_bf16(x[i]);
  387. }
  388. }
  389. void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
  390. int i = 0;
  391. #if defined(__AVX512BF16__)
  392. // subnormals are flushed to zero on this platform
  393. for (; i + 32 <= n; i += 32) {
  394. _mm512_storeu_si512(
  395. (__m512i *)(y + i),
  396. m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
  397. _mm512_loadu_ps(x + i))));
  398. }
  399. #endif
  400. for (; i < n; i++) {
  401. y[i] = GGML_FP32_TO_BF16(x[i]);
  402. }
  403. }
  404. bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
  405. return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
  406. }
  407. //
  408. // timing
  409. //
  410. #if defined(_MSC_VER) || defined(__MINGW32__)
  411. static int64_t timer_freq, timer_start;
  412. void ggml_time_init(void) {
  413. LARGE_INTEGER t;
  414. QueryPerformanceFrequency(&t);
  415. timer_freq = t.QuadPart;
  416. // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
  417. // and the uptime is high enough.
  418. // We subtract the program start time to reduce the likelihood of that happening.
  419. QueryPerformanceCounter(&t);
  420. timer_start = t.QuadPart;
  421. }
  422. int64_t ggml_time_ms(void) {
  423. LARGE_INTEGER t;
  424. QueryPerformanceCounter(&t);
  425. return ((t.QuadPart-timer_start) * 1000) / timer_freq;
  426. }
  427. int64_t ggml_time_us(void) {
  428. LARGE_INTEGER t;
  429. QueryPerformanceCounter(&t);
  430. return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
  431. }
  432. #else
  433. void ggml_time_init(void) {}
  434. int64_t ggml_time_ms(void) {
  435. struct timespec ts;
  436. clock_gettime(CLOCK_MONOTONIC, &ts);
  437. return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
  438. }
  439. int64_t ggml_time_us(void) {
  440. struct timespec ts;
  441. clock_gettime(CLOCK_MONOTONIC, &ts);
  442. return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
  443. }
  444. #endif
  445. int64_t ggml_cycles(void) {
  446. return clock();
  447. }
  448. int64_t ggml_cycles_per_ms(void) {
  449. return CLOCKS_PER_SEC/1000;
  450. }
  451. //
  452. // cross-platform UTF-8 file paths
  453. //
  454. #ifdef _WIN32
  455. static wchar_t * ggml_mbstowcs(const char * mbs) {
  456. int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
  457. if (!wlen) {
  458. errno = EINVAL;
  459. return NULL;
  460. }
  461. wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
  462. wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
  463. if (!wlen) {
  464. GGML_FREE(wbuf);
  465. errno = EINVAL;
  466. return NULL;
  467. }
  468. return wbuf;
  469. }
  470. #endif
  471. FILE * ggml_fopen(const char * fname, const char * mode) {
  472. #ifdef _WIN32
  473. FILE * file = NULL;
  474. // convert fname (UTF-8)
  475. wchar_t * wfname = ggml_mbstowcs(fname);
  476. if (wfname) {
  477. // convert mode (ANSI)
  478. wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
  479. wchar_t * wmode_p = wmode;
  480. do {
  481. *wmode_p++ = (wchar_t)*mode;
  482. } while (*mode++);
  483. // open file
  484. file = _wfopen(wfname, wmode);
  485. GGML_FREE(wfname);
  486. GGML_FREE(wmode);
  487. }
  488. return file;
  489. #else
  490. return fopen(fname, mode);
  491. #endif
  492. }
  493. static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc);
  494. static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc);
  495. static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc);
  496. static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
  497. [GGML_TYPE_I8] = {
  498. .type_name = "i8",
  499. .blck_size = 1,
  500. .type_size = sizeof(int8_t),
  501. .is_quantized = false,
  502. },
  503. [GGML_TYPE_I16] = {
  504. .type_name = "i16",
  505. .blck_size = 1,
  506. .type_size = sizeof(int16_t),
  507. .is_quantized = false,
  508. },
  509. [GGML_TYPE_I32] = {
  510. .type_name = "i32",
  511. .blck_size = 1,
  512. .type_size = sizeof(int32_t),
  513. .is_quantized = false,
  514. },
  515. [GGML_TYPE_I64] = {
  516. .type_name = "i64",
  517. .blck_size = 1,
  518. .type_size = sizeof(int64_t),
  519. .is_quantized = false,
  520. },
  521. [GGML_TYPE_F64] = {
  522. .type_name = "f64",
  523. .blck_size = 1,
  524. .type_size = sizeof(double),
  525. .is_quantized = false,
  526. },
  527. [GGML_TYPE_F32] = {
  528. .type_name = "f32",
  529. .blck_size = 1,
  530. .type_size = sizeof(float),
  531. .is_quantized = false,
  532. },
  533. [GGML_TYPE_F16] = {
  534. .type_name = "f16",
  535. .blck_size = 1,
  536. .type_size = sizeof(ggml_fp16_t),
  537. .is_quantized = false,
  538. .to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
  539. .from_float_ref = (ggml_from_float_t) ggml_fp32_to_fp16_row,
  540. },
  541. [GGML_TYPE_Q4_0] = {
  542. .type_name = "q4_0",
  543. .blck_size = QK4_0,
  544. .type_size = sizeof(block_q4_0),
  545. .is_quantized = true,
  546. .to_float = (ggml_to_float_t) dequantize_row_q4_0,
  547. .from_float_ref = (ggml_from_float_t) quantize_row_q4_0_ref,
  548. },
  549. [GGML_TYPE_Q4_1] = {
  550. .type_name = "q4_1",
  551. .blck_size = QK4_1,
  552. .type_size = sizeof(block_q4_1),
  553. .is_quantized = true,
  554. .to_float = (ggml_to_float_t) dequantize_row_q4_1,
  555. .from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref,
  556. },
  557. [4] = { // GGML_TYPE_Q4_2
  558. .type_name = "DEPRECATED",
  559. .blck_size = 0,
  560. .type_size = 0,
  561. .is_quantized = false,
  562. },
  563. [5] = { // GGML_TYPE_Q4_3
  564. .type_name = "DEPRECATED",
  565. .blck_size = 0,
  566. .type_size = 0,
  567. .is_quantized = false,
  568. },
  569. [GGML_TYPE_Q5_0] = {
  570. .type_name = "q5_0",
  571. .blck_size = QK5_0,
  572. .type_size = sizeof(block_q5_0),
  573. .is_quantized = true,
  574. .to_float = (ggml_to_float_t) dequantize_row_q5_0,
  575. .from_float_ref = (ggml_from_float_t) quantize_row_q5_0_ref,
  576. },
  577. [GGML_TYPE_Q5_1] = {
  578. .type_name = "q5_1",
  579. .blck_size = QK5_1,
  580. .type_size = sizeof(block_q5_1),
  581. .is_quantized = true,
  582. .to_float = (ggml_to_float_t) dequantize_row_q5_1,
  583. .from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref,
  584. },
  585. [GGML_TYPE_Q8_0] = {
  586. .type_name = "q8_0",
  587. .blck_size = QK8_0,
  588. .type_size = sizeof(block_q8_0),
  589. .is_quantized = true,
  590. .to_float = (ggml_to_float_t) dequantize_row_q8_0,
  591. .from_float_ref = (ggml_from_float_t) quantize_row_q8_0_ref,
  592. },
  593. [GGML_TYPE_Q8_1] = {
  594. .type_name = "q8_1",
  595. .blck_size = QK8_1,
  596. .type_size = sizeof(block_q8_1),
  597. .is_quantized = true,
  598. .from_float_ref = (ggml_from_float_t) quantize_row_q8_1_ref,
  599. },
  600. [GGML_TYPE_Q2_K] = {
  601. .type_name = "q2_K",
  602. .blck_size = QK_K,
  603. .type_size = sizeof(block_q2_K),
  604. .is_quantized = true,
  605. .to_float = (ggml_to_float_t) dequantize_row_q2_K,
  606. .from_float_ref = (ggml_from_float_t) quantize_row_q2_K_ref,
  607. },
  608. [GGML_TYPE_Q3_K] = {
  609. .type_name = "q3_K",
  610. .blck_size = QK_K,
  611. .type_size = sizeof(block_q3_K),
  612. .is_quantized = true,
  613. .to_float = (ggml_to_float_t) dequantize_row_q3_K,
  614. .from_float_ref = (ggml_from_float_t) quantize_row_q3_K_ref,
  615. },
  616. [GGML_TYPE_Q4_K] = {
  617. .type_name = "q4_K",
  618. .blck_size = QK_K,
  619. .type_size = sizeof(block_q4_K),
  620. .is_quantized = true,
  621. .to_float = (ggml_to_float_t) dequantize_row_q4_K,
  622. .from_float_ref = (ggml_from_float_t) quantize_row_q4_K_ref,
  623. },
  624. [GGML_TYPE_Q5_K] = {
  625. .type_name = "q5_K",
  626. .blck_size = QK_K,
  627. .type_size = sizeof(block_q5_K),
  628. .is_quantized = true,
  629. .to_float = (ggml_to_float_t) dequantize_row_q5_K,
  630. .from_float_ref = (ggml_from_float_t) quantize_row_q5_K_ref,
  631. },
  632. [GGML_TYPE_Q6_K] = {
  633. .type_name = "q6_K",
  634. .blck_size = QK_K,
  635. .type_size = sizeof(block_q6_K),
  636. .is_quantized = true,
  637. .to_float = (ggml_to_float_t) dequantize_row_q6_K,
  638. .from_float_ref = (ggml_from_float_t) quantize_row_q6_K_ref,
  639. },
  640. [GGML_TYPE_IQ2_XXS] = {
  641. .type_name = "iq2_xxs",
  642. .blck_size = QK_K,
  643. .type_size = sizeof(block_iq2_xxs),
  644. .is_quantized = true,
  645. .to_float = (ggml_to_float_t) dequantize_row_iq2_xxs,
  646. .from_float_ref = NULL,
  647. },
  648. [GGML_TYPE_IQ2_XS] = {
  649. .type_name = "iq2_xs",
  650. .blck_size = QK_K,
  651. .type_size = sizeof(block_iq2_xs),
  652. .is_quantized = true,
  653. .to_float = (ggml_to_float_t) dequantize_row_iq2_xs,
  654. .from_float_ref = NULL,
  655. },
  656. [GGML_TYPE_IQ3_XXS] = {
  657. .type_name = "iq3_xxs",
  658. .blck_size = QK_K,
  659. .type_size = sizeof(block_iq3_xxs),
  660. .is_quantized = true,
  661. .to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
  662. .from_float_ref = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
  663. },
  664. [GGML_TYPE_IQ3_S] = {
  665. .type_name = "iq3_s",
  666. .blck_size = QK_K,
  667. .type_size = sizeof(block_iq3_s),
  668. .is_quantized = true,
  669. .to_float = (ggml_to_float_t) dequantize_row_iq3_s,
  670. .from_float_ref = (ggml_from_float_t)quantize_row_iq3_s_ref,
  671. },
  672. [GGML_TYPE_IQ2_S] = {
  673. .type_name = "iq2_s",
  674. .blck_size = QK_K,
  675. .type_size = sizeof(block_iq2_s),
  676. .is_quantized = true,
  677. .to_float = (ggml_to_float_t) dequantize_row_iq2_s,
  678. .from_float_ref = (ggml_from_float_t)quantize_row_iq2_s_ref,
  679. },
  680. [GGML_TYPE_IQ1_S] = {
  681. .type_name = "iq1_s",
  682. .blck_size = QK_K,
  683. .type_size = sizeof(block_iq1_s),
  684. .is_quantized = true,
  685. .to_float = (ggml_to_float_t) dequantize_row_iq1_s,
  686. .from_float_ref = NULL,
  687. },
  688. [GGML_TYPE_IQ1_M] = {
  689. .type_name = "iq1_m",
  690. .blck_size = QK_K,
  691. .type_size = sizeof(block_iq1_m),
  692. .is_quantized = true,
  693. .to_float = (ggml_to_float_t) dequantize_row_iq1_m,
  694. .from_float_ref = NULL,
  695. },
  696. [GGML_TYPE_IQ4_NL] = {
  697. .type_name = "iq4_nl",
  698. .blck_size = QK4_NL,
  699. .type_size = sizeof(block_iq4_nl),
  700. .is_quantized = true,
  701. .to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
  702. .from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref,
  703. },
  704. [GGML_TYPE_IQ4_XS] = {
  705. .type_name = "iq4_xs",
  706. .blck_size = QK_K,
  707. .type_size = sizeof(block_iq4_xs),
  708. .is_quantized = true,
  709. .to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
  710. .from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_ref,
  711. },
  712. [GGML_TYPE_Q8_K] = {
  713. .type_name = "q8_K",
  714. .blck_size = QK_K,
  715. .type_size = sizeof(block_q8_K),
  716. .is_quantized = true,
  717. },
  718. [GGML_TYPE_BF16] = {
  719. .type_name = "bf16",
  720. .blck_size = 1,
  721. .type_size = sizeof(ggml_bf16_t),
  722. .is_quantized = false,
  723. .to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row,
  724. .from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
  725. },
  726. [31] = { // GGML_TYPE_Q4_0_4_4
  727. .type_name = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
  728. .blck_size = 0,
  729. .type_size = 0,
  730. .is_quantized = false,
  731. },
  732. [32] = { // GGML_TYPE_Q4_0_4_8
  733. .type_name = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
  734. .blck_size = 0,
  735. .type_size = 0,
  736. .is_quantized = false,
  737. },
  738. [33] = { // GGML_TYPE_Q4_0_8_8
  739. .type_name = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
  740. .blck_size = 0,
  741. .type_size = 0,
  742. .is_quantized = false,
  743. },
  744. [GGML_TYPE_TQ1_0] = {
  745. .type_name = "tq1_0",
  746. .blck_size = QK_K,
  747. .type_size = sizeof(block_tq1_0),
  748. .is_quantized = true,
  749. .to_float = (ggml_to_float_t) dequantize_row_tq1_0,
  750. .from_float_ref = (ggml_from_float_t) quantize_row_tq1_0_ref,
  751. },
  752. [GGML_TYPE_TQ2_0] = {
  753. .type_name = "tq2_0",
  754. .blck_size = QK_K,
  755. .type_size = sizeof(block_tq2_0),
  756. .is_quantized = true,
  757. .to_float = (ggml_to_float_t) dequantize_row_tq2_0,
  758. .from_float_ref = (ggml_from_float_t) quantize_row_tq2_0_ref,
  759. },
  760. [36] = { // GGML_TYPE_IQ4_NL_4_4
  761. .type_name = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
  762. .blck_size = 0,
  763. .type_size = 0,
  764. .is_quantized = false,
  765. },
  766. [37] = { // GGML_TYPE_IQ4_NL_4_8
  767. .type_name = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
  768. .blck_size = 0,
  769. .type_size = 0,
  770. .is_quantized = false,
  771. },
  772. [38] = { // GGML_TYPE_IQ4_NL_8_8
  773. .type_name = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
  774. .blck_size = 0,
  775. .type_size = 0,
  776. .is_quantized = false,
  777. },
  778. };
  779. const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
  780. GGML_ASSERT(type < GGML_TYPE_COUNT);
  781. return &type_traits[type];
  782. }
  783. //
  784. // ggml object
  785. //
  786. struct ggml_object {
  787. size_t offs;
  788. size_t size;
  789. struct ggml_object * next;
  790. enum ggml_object_type type;
  791. char padding[4];
  792. };
  793. static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
  794. //
  795. // ggml context
  796. //
  797. struct ggml_context {
  798. size_t mem_size;
  799. void * mem_buffer;
  800. bool mem_buffer_owned;
  801. bool no_alloc;
  802. int n_objects;
  803. struct ggml_object * objects_begin;
  804. struct ggml_object * objects_end;
  805. };
  806. //
  807. // data types
  808. //
  809. static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
  810. "NONE",
  811. "DUP",
  812. "ADD",
  813. "ADD1",
  814. "ACC",
  815. "SUB",
  816. "MUL",
  817. "DIV",
  818. "SQR",
  819. "SQRT",
  820. "LOG",
  821. "SIN",
  822. "COS",
  823. "SUM",
  824. "SUM_ROWS",
  825. "MEAN",
  826. "ARGMAX",
  827. "COUNT_EQUAL",
  828. "REPEAT",
  829. "REPEAT_BACK",
  830. "CONCAT",
  831. "SILU_BACK",
  832. "NORM",
  833. "RMS_NORM",
  834. "RMS_NORM_BACK",
  835. "GROUP_NORM",
  836. "L2_NORM",
  837. "MUL_MAT",
  838. "MUL_MAT_ID",
  839. "OUT_PROD",
  840. "SCALE",
  841. "SET",
  842. "CPY",
  843. "CONT",
  844. "RESHAPE",
  845. "VIEW",
  846. "PERMUTE",
  847. "TRANSPOSE",
  848. "GET_ROWS",
  849. "GET_ROWS_BACK",
  850. "DIAG",
  851. "DIAG_MASK_INF",
  852. "DIAG_MASK_ZERO",
  853. "SOFT_MAX",
  854. "SOFT_MAX_BACK",
  855. "ROPE",
  856. "ROPE_BACK",
  857. "CLAMP",
  858. "CONV_TRANSPOSE_1D",
  859. "IM2COL",
  860. "IM2COL_BACK",
  861. "CONV_2D_DW",
  862. "CONV_TRANSPOSE_2D",
  863. "POOL_1D",
  864. "POOL_2D",
  865. "POOL_2D_BACK",
  866. "UPSCALE",
  867. "PAD",
  868. "PAD_REFLECT_1D",
  869. "ARANGE",
  870. "TIMESTEP_EMBEDDING",
  871. "ARGSORT",
  872. "LEAKY_RELU",
  873. "FLASH_ATTN_EXT",
  874. "FLASH_ATTN_BACK",
  875. "SSM_CONV",
  876. "SSM_SCAN",
  877. "WIN_PART",
  878. "WIN_UNPART",
  879. "GET_REL_POS",
  880. "ADD_REL_POS",
  881. "RWKV_WKV6",
  882. "GATED_LINEAR_ATTN",
  883. "RWKV_WKV7",
  884. "UNARY",
  885. "MAP_CUSTOM1",
  886. "MAP_CUSTOM2",
  887. "MAP_CUSTOM3",
  888. "CUSTOM",
  889. "CROSS_ENTROPY_LOSS",
  890. "CROSS_ENTROPY_LOSS_BACK",
  891. "OPT_STEP_ADAMW",
  892. };
  893. static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
  894. static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
  895. "none",
  896. "x",
  897. "x+y",
  898. "x+y",
  899. "view(x,nb,offset)+=y->x",
  900. "x-y",
  901. "x*y",
  902. "x/y",
  903. "x^2",
  904. "√x",
  905. "log(x)",
  906. "sin(x)",
  907. "cos(x)",
  908. "Σx",
  909. "Σx_k",
  910. "Σx/n",
  911. "argmax(x)",
  912. "count_equal(x)",
  913. "repeat(x)",
  914. "repeat_back(x)",
  915. "concat(x, y)",
  916. "silu_back(x)",
  917. "norm(x)",
  918. "rms_norm(x)",
  919. "rms_norm_back(x)",
  920. "group_norm(x)",
  921. "l2_norm(x)",
  922. "X*Y",
  923. "X[i]*Y",
  924. "X*Y",
  925. "x*v",
  926. "y-\\>view(x)",
  927. "x-\\>y",
  928. "cont(x)",
  929. "reshape(x)",
  930. "view(x)",
  931. "permute(x)",
  932. "transpose(x)",
  933. "get_rows(x)",
  934. "get_rows_back(x)",
  935. "diag(x)",
  936. "diag_mask_inf(x)",
  937. "diag_mask_zero(x)",
  938. "soft_max(x)",
  939. "soft_max_back(x)",
  940. "rope(x)",
  941. "rope_back(x)",
  942. "clamp(x)",
  943. "conv_transpose_1d(x)",
  944. "im2col(x)",
  945. "im2col_back(x)",
  946. "conv_2d_dw(x)",
  947. "conv_transpose_2d(x)",
  948. "pool_1d(x)",
  949. "pool_2d(x)",
  950. "pool_2d_back(x)",
  951. "upscale(x)",
  952. "pad(x)",
  953. "pad_reflect_1d(x)",
  954. "arange(start, stop, step)",
  955. "timestep_embedding(timesteps, dim, max_period)",
  956. "argsort(x)",
  957. "leaky_relu(x)",
  958. "flash_attn_ext(x)",
  959. "flash_attn_back(x)",
  960. "ssm_conv(x)",
  961. "ssm_scan(x)",
  962. "win_part(x)",
  963. "win_unpart(x)",
  964. "get_rel_pos(x)",
  965. "add_rel_pos(x)",
  966. "rwkv_wkv6(k, v, r, tf, td, s)",
  967. "gated_linear_attn(k, v, q, gate, s)",
  968. "rwkv_wkv7(r, w, k, v, a, b, s)",
  969. "unary(x)",
  970. "map_custom(x)",
  971. "map_custom(x,y)",
  972. "map_custom(x,y,z)",
  973. "custom(x)",
  974. "cross_entropy_loss(x,y)",
  975. "cross_entropy_loss_back(x,y)",
  976. "adamw(x)",
  977. };
  978. static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
  979. static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
  980. static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
  981. "ABS",
  982. "SGN",
  983. "NEG",
  984. "STEP",
  985. "TANH",
  986. "ELU",
  987. "RELU",
  988. "SIGMOID",
  989. "GELU",
  990. "GELU_QUICK",
  991. "SILU",
  992. "HARDSWISH",
  993. "HARDSIGMOID",
  994. "EXP",
  995. "GELU_ERF",
  996. };
  997. static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
  998. static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
  999. static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
  1000. ////////////////////////////////////////////////////////////////////////////////
  1001. void ggml_print_object(const struct ggml_object * obj) {
  1002. GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
  1003. obj->type, obj->offs, obj->size, (const void *) obj->next);
  1004. }
  1005. void ggml_print_objects(const struct ggml_context * ctx) {
  1006. struct ggml_object * obj = ctx->objects_begin;
  1007. GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
  1008. while (obj != NULL) {
  1009. ggml_print_object(obj);
  1010. obj = obj->next;
  1011. }
  1012. GGML_LOG_INFO("%s: --- end ---\n", __func__);
  1013. }
  1014. int64_t ggml_nelements(const struct ggml_tensor * tensor) {
  1015. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1016. return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
  1017. }
  1018. int64_t ggml_nrows(const struct ggml_tensor * tensor) {
  1019. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1020. return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
  1021. }
  1022. size_t ggml_nbytes(const struct ggml_tensor * tensor) {
  1023. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1024. if (tensor->ne[i] <= 0) {
  1025. return 0;
  1026. }
  1027. }
  1028. size_t nbytes;
  1029. const size_t blck_size = ggml_blck_size(tensor->type);
  1030. if (blck_size == 1) {
  1031. nbytes = ggml_type_size(tensor->type);
  1032. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1033. nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
  1034. }
  1035. }
  1036. else {
  1037. nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
  1038. for (int i = 1; i < GGML_MAX_DIMS; ++i) {
  1039. nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
  1040. }
  1041. }
  1042. return nbytes;
  1043. }
  1044. size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
  1045. return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
  1046. }
  1047. int64_t ggml_blck_size(enum ggml_type type) {
  1048. return type_traits[type].blck_size;
  1049. }
  1050. size_t ggml_type_size(enum ggml_type type) {
  1051. return type_traits[type].type_size;
  1052. }
  1053. size_t ggml_row_size(enum ggml_type type, int64_t ne) {
  1054. assert(ne % ggml_blck_size(type) == 0);
  1055. return ggml_type_size(type)*ne/ggml_blck_size(type);
  1056. }
  1057. double ggml_type_sizef(enum ggml_type type) {
  1058. return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
  1059. }
  1060. const char * ggml_type_name(enum ggml_type type) {
  1061. return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
  1062. }
  1063. bool ggml_is_quantized(enum ggml_type type) {
  1064. return type_traits[type].is_quantized;
  1065. }
  1066. const char * ggml_op_name(enum ggml_op op) {
  1067. return GGML_OP_NAME[op];
  1068. }
  1069. const char * ggml_op_symbol(enum ggml_op op) {
  1070. return GGML_OP_SYMBOL[op];
  1071. }
  1072. const char * ggml_unary_op_name(enum ggml_unary_op op) {
  1073. return GGML_UNARY_OP_NAME[op];
  1074. }
  1075. const char * ggml_op_desc(const struct ggml_tensor * t) {
  1076. if (t->op == GGML_OP_UNARY) {
  1077. enum ggml_unary_op uop = ggml_get_unary_op(t);
  1078. return ggml_unary_op_name(uop);
  1079. }
  1080. return ggml_op_name(t->op);
  1081. }
  1082. size_t ggml_element_size(const struct ggml_tensor * tensor) {
  1083. return ggml_type_size(tensor->type);
  1084. }
  1085. bool ggml_is_scalar(const struct ggml_tensor * tensor) {
  1086. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1087. return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1088. }
  1089. bool ggml_is_vector(const struct ggml_tensor * tensor) {
  1090. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1091. return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1092. }
  1093. bool ggml_is_matrix(const struct ggml_tensor * tensor) {
  1094. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1095. return tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1096. }
  1097. bool ggml_is_3d(const struct ggml_tensor * tensor) {
  1098. return tensor->ne[3] == 1;
  1099. }
  1100. int ggml_n_dims(const struct ggml_tensor * tensor) {
  1101. for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
  1102. if (tensor->ne[i] > 1) {
  1103. return i + 1;
  1104. }
  1105. }
  1106. return 1;
  1107. }
  1108. enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
  1109. enum ggml_type wtype = GGML_TYPE_COUNT;
  1110. switch (ftype) {
  1111. case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break;
  1112. case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break;
  1113. case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break;
  1114. case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break;
  1115. case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break;
  1116. case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
  1117. case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
  1118. case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
  1119. case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
  1120. case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
  1121. case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
  1122. case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break;
  1123. case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
  1124. case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
  1125. case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
  1126. case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
  1127. case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
  1128. case GGML_FTYPE_MOSTLY_IQ1_M: wtype = GGML_TYPE_IQ1_M; break;
  1129. case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
  1130. case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
  1131. case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
  1132. case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
  1133. case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
  1134. case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
  1135. }
  1136. GGML_ASSERT(wtype != GGML_TYPE_COUNT);
  1137. return wtype;
  1138. }
  1139. size_t ggml_tensor_overhead(void) {
  1140. return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
  1141. }
  1142. bool ggml_is_transposed(const struct ggml_tensor * tensor) {
  1143. return tensor->nb[0] > tensor->nb[1];
  1144. }
  1145. static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
  1146. size_t next_nb = ggml_type_size(tensor->type);
  1147. if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
  1148. return false;
  1149. }
  1150. next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
  1151. for (int i = 1; i < GGML_MAX_DIMS; i++) {
  1152. if (tensor->ne[i] != 1) {
  1153. if (i > n) {
  1154. if (tensor->nb[i] != next_nb) {
  1155. return false;
  1156. }
  1157. next_nb *= tensor->ne[i];
  1158. } else {
  1159. // this dimension does not need to be contiguous
  1160. next_nb = tensor->ne[i]*tensor->nb[i];
  1161. }
  1162. }
  1163. }
  1164. return true;
  1165. }
  1166. bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
  1167. return ggml_is_contiguous_0(tensor);
  1168. }
  1169. bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
  1170. return ggml_is_contiguous_n(tensor, 0);
  1171. }
  1172. bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
  1173. return ggml_is_contiguous_n(tensor, 1);
  1174. }
  1175. bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
  1176. return ggml_is_contiguous_n(tensor, 2);
  1177. }
  1178. bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
  1179. return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
  1180. }
  1181. bool ggml_is_permuted(const struct ggml_tensor * tensor) {
  1182. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1183. return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
  1184. }
  1185. bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
  1186. return
  1187. tensor->nb[0] > tensor->nb[2] &&
  1188. tensor->nb[1] > tensor->nb[0] &&
  1189. tensor->nb[2] == ggml_type_size(tensor->type);
  1190. }
  1191. static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
  1192. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1193. return
  1194. tensor->nb[0] == ggml_type_size(tensor->type) &&
  1195. tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
  1196. tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
  1197. }
  1198. bool ggml_is_empty(const struct ggml_tensor * tensor) {
  1199. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1200. if (tensor->ne[i] == 0) {
  1201. // empty if any dimension has no elements
  1202. return true;
  1203. }
  1204. }
  1205. return false;
  1206. }
  1207. bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1208. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1209. return
  1210. (t0->ne[0] == t1->ne[0]) &&
  1211. (t0->ne[1] == t1->ne[1]) &&
  1212. (t0->ne[2] == t1->ne[2]) &&
  1213. (t0->ne[3] == t1->ne[3]);
  1214. }
  1215. bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1216. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1217. return
  1218. (t0->nb[0] == t1->nb[0]) &&
  1219. (t0->nb[1] == t1->nb[1]) &&
  1220. (t0->nb[2] == t1->nb[2]) &&
  1221. (t0->nb[3] == t1->nb[3]);
  1222. }
  1223. // check if t1 can be represented as a repetition of t0
  1224. bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1225. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1226. return ggml_is_empty(t0) ? ggml_is_empty(t1) :
  1227. (t1->ne[0]%t0->ne[0] == 0) &&
  1228. (t1->ne[1]%t0->ne[1] == 0) &&
  1229. (t1->ne[2]%t0->ne[2] == 0) &&
  1230. (t1->ne[3]%t0->ne[3] == 0);
  1231. }
  1232. static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1233. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1234. return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
  1235. }
  1236. // assert that pointer is aligned to GGML_MEM_ALIGN
  1237. #define GGML_ASSERT_ALIGNED(ptr) \
  1238. GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
  1239. ////////////////////////////////////////////////////////////////////////////////
  1240. struct ggml_context * ggml_init(struct ggml_init_params params) {
  1241. static bool is_first_call = true;
  1242. ggml_critical_section_start();
  1243. if (is_first_call) {
  1244. // initialize time system (required on Windows)
  1245. ggml_time_init();
  1246. for (int i = 0; i < (1 << 16); ++i) {
  1247. union {
  1248. uint16_t u16;
  1249. ggml_fp16_t fp16;
  1250. } u = {i};
  1251. ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
  1252. }
  1253. is_first_call = false;
  1254. }
  1255. ggml_critical_section_end();
  1256. struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
  1257. // allow to call ggml_init with 0 size
  1258. if (params.mem_size == 0) {
  1259. params.mem_size = GGML_MEM_ALIGN;
  1260. }
  1261. const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
  1262. *ctx = (struct ggml_context) {
  1263. /*.mem_size =*/ mem_size,
  1264. /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
  1265. /*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
  1266. /*.no_alloc =*/ params.no_alloc,
  1267. /*.n_objects =*/ 0,
  1268. /*.objects_begin =*/ NULL,
  1269. /*.objects_end =*/ NULL,
  1270. };
  1271. GGML_ASSERT(ctx->mem_buffer != NULL);
  1272. GGML_ASSERT_ALIGNED(ctx->mem_buffer);
  1273. GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
  1274. return ctx;
  1275. }
  1276. void ggml_reset(struct ggml_context * ctx) {
  1277. if (ctx == NULL) {
  1278. return;
  1279. }
  1280. ctx->n_objects = 0;
  1281. ctx->objects_begin = NULL;
  1282. ctx->objects_end = NULL;
  1283. }
  1284. void ggml_free(struct ggml_context * ctx) {
  1285. if (ctx == NULL) {
  1286. return;
  1287. }
  1288. if (ctx->mem_buffer_owned) {
  1289. ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
  1290. }
  1291. GGML_FREE(ctx);
  1292. }
  1293. size_t ggml_used_mem(const struct ggml_context * ctx) {
  1294. return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
  1295. }
  1296. bool ggml_get_no_alloc(struct ggml_context * ctx) {
  1297. return ctx->no_alloc;
  1298. }
  1299. void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
  1300. ctx->no_alloc = no_alloc;
  1301. }
  1302. void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
  1303. return ctx->mem_buffer;
  1304. }
  1305. size_t ggml_get_mem_size(const struct ggml_context * ctx) {
  1306. return ctx->mem_size;
  1307. }
  1308. size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
  1309. size_t max_size = 0;
  1310. for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
  1311. size_t bytes = ggml_nbytes(tensor);
  1312. max_size = MAX(max_size, bytes);
  1313. }
  1314. return max_size;
  1315. }
  1316. ////////////////////////////////////////////////////////////////////////////////
  1317. static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
  1318. // always insert objects at the end of the context's memory pool
  1319. struct ggml_object * obj_cur = ctx->objects_end;
  1320. const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
  1321. const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
  1322. const size_t cur_end = cur_offs + cur_size;
  1323. // align to GGML_MEM_ALIGN
  1324. size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
  1325. char * const mem_buffer = ctx->mem_buffer;
  1326. struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
  1327. if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
  1328. GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
  1329. __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
  1330. #ifndef NDEBUG
  1331. GGML_ABORT("not enough space in the context's memory pool");
  1332. #endif
  1333. return NULL;
  1334. }
  1335. *obj_new = (struct ggml_object) {
  1336. .offs = cur_end + GGML_OBJECT_SIZE,
  1337. .size = size_needed,
  1338. .next = NULL,
  1339. .type = type,
  1340. };
  1341. GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
  1342. if (obj_cur != NULL) {
  1343. obj_cur->next = obj_new;
  1344. } else {
  1345. // this is the first object in this context
  1346. ctx->objects_begin = obj_new;
  1347. }
  1348. ctx->objects_end = obj_new;
  1349. //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
  1350. return obj_new;
  1351. }
  1352. static struct ggml_tensor * ggml_new_tensor_impl(
  1353. struct ggml_context * ctx,
  1354. enum ggml_type type,
  1355. int n_dims,
  1356. const int64_t * ne,
  1357. struct ggml_tensor * view_src,
  1358. size_t view_offs) {
  1359. GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
  1360. GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
  1361. // find the base tensor and absolute offset
  1362. if (view_src != NULL && view_src->view_src != NULL) {
  1363. view_offs += view_src->view_offs;
  1364. view_src = view_src->view_src;
  1365. }
  1366. size_t data_size = ggml_row_size(type, ne[0]);
  1367. for (int i = 1; i < n_dims; i++) {
  1368. data_size *= ne[i];
  1369. }
  1370. GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
  1371. void * data = view_src != NULL ? view_src->data : NULL;
  1372. if (data != NULL) {
  1373. data = (char *) data + view_offs;
  1374. }
  1375. size_t obj_alloc_size = 0;
  1376. if (view_src == NULL && !ctx->no_alloc) {
  1377. // allocate tensor data in the context's memory pool
  1378. obj_alloc_size = data_size;
  1379. }
  1380. struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
  1381. GGML_ASSERT(obj_new);
  1382. struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
  1383. *result = (struct ggml_tensor) {
  1384. /*.type =*/ type,
  1385. /*.buffer =*/ NULL,
  1386. /*.ne =*/ { 1, 1, 1, 1 },
  1387. /*.nb =*/ { 0, 0, 0, 0 },
  1388. /*.op =*/ GGML_OP_NONE,
  1389. /*.op_params =*/ { 0 },
  1390. /*.flags =*/ 0,
  1391. /*.src =*/ { NULL },
  1392. /*.view_src =*/ view_src,
  1393. /*.view_offs =*/ view_offs,
  1394. /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
  1395. /*.name =*/ { 0 },
  1396. /*.extra =*/ NULL,
  1397. /*.padding =*/ { 0 },
  1398. };
  1399. // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
  1400. //GGML_ASSERT_ALIGNED(result->data);
  1401. for (int i = 0; i < n_dims; i++) {
  1402. result->ne[i] = ne[i];
  1403. }
  1404. result->nb[0] = ggml_type_size(type);
  1405. result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
  1406. for (int i = 2; i < GGML_MAX_DIMS; i++) {
  1407. result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
  1408. }
  1409. ctx->n_objects++;
  1410. return result;
  1411. }
  1412. struct ggml_tensor * ggml_new_tensor(
  1413. struct ggml_context * ctx,
  1414. enum ggml_type type,
  1415. int n_dims,
  1416. const int64_t * ne) {
  1417. return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
  1418. }
  1419. struct ggml_tensor * ggml_new_tensor_1d(
  1420. struct ggml_context * ctx,
  1421. enum ggml_type type,
  1422. int64_t ne0) {
  1423. return ggml_new_tensor(ctx, type, 1, &ne0);
  1424. }
  1425. struct ggml_tensor * ggml_new_tensor_2d(
  1426. struct ggml_context * ctx,
  1427. enum ggml_type type,
  1428. int64_t ne0,
  1429. int64_t ne1) {
  1430. const int64_t ne[2] = { ne0, ne1 };
  1431. return ggml_new_tensor(ctx, type, 2, ne);
  1432. }
  1433. struct ggml_tensor * ggml_new_tensor_3d(
  1434. struct ggml_context * ctx,
  1435. enum ggml_type type,
  1436. int64_t ne0,
  1437. int64_t ne1,
  1438. int64_t ne2) {
  1439. const int64_t ne[3] = { ne0, ne1, ne2 };
  1440. return ggml_new_tensor(ctx, type, 3, ne);
  1441. }
  1442. struct ggml_tensor * ggml_new_tensor_4d(
  1443. struct ggml_context * ctx,
  1444. enum ggml_type type,
  1445. int64_t ne0,
  1446. int64_t ne1,
  1447. int64_t ne2,
  1448. int64_t ne3) {
  1449. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  1450. return ggml_new_tensor(ctx, type, 4, ne);
  1451. }
  1452. void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
  1453. struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
  1454. return (uint8_t *)ctx->mem_buffer + obj->offs;
  1455. }
  1456. struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
  1457. return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
  1458. }
  1459. void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
  1460. const int64_t ne2 = tensor->ne[2];
  1461. const int64_t ne1 = tensor->ne[1];
  1462. const int64_t ne0 = tensor->ne[0];
  1463. const int64_t i3_ = (i/(ne2*ne1*ne0));
  1464. const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
  1465. const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
  1466. const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
  1467. if (i0) {
  1468. * i0 = i0_;
  1469. }
  1470. if (i1) {
  1471. * i1 = i1_;
  1472. }
  1473. if (i2) {
  1474. * i2 = i2_;
  1475. }
  1476. if (i3) {
  1477. * i3 = i3_;
  1478. }
  1479. }
  1480. void * ggml_get_data(const struct ggml_tensor * tensor) {
  1481. return tensor->data;
  1482. }
  1483. float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
  1484. assert(tensor->type == GGML_TYPE_F32);
  1485. return (float *)(tensor->data);
  1486. }
  1487. enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
  1488. GGML_ASSERT(tensor->op == GGML_OP_UNARY);
  1489. return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
  1490. }
  1491. const char * ggml_get_name(const struct ggml_tensor * tensor) {
  1492. return tensor->name;
  1493. }
  1494. struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
  1495. size_t i;
  1496. for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
  1497. tensor->name[i] = name[i];
  1498. }
  1499. tensor->name[i] = '\0';
  1500. return tensor;
  1501. }
  1502. struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
  1503. va_list args;
  1504. va_start(args, fmt);
  1505. vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
  1506. va_end(args);
  1507. return tensor;
  1508. }
  1509. struct ggml_tensor * ggml_view_tensor(
  1510. struct ggml_context * ctx,
  1511. struct ggml_tensor * src) {
  1512. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
  1513. ggml_format_name(result, "%s (view)", src->name);
  1514. for (int i = 0; i < GGML_MAX_DIMS; i++) {
  1515. result->nb[i] = src->nb[i];
  1516. }
  1517. return result;
  1518. }
  1519. struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
  1520. struct ggml_object * obj = ctx->objects_begin;
  1521. char * const mem_buffer = ctx->mem_buffer;
  1522. while (obj != NULL) {
  1523. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1524. return (struct ggml_tensor *)(mem_buffer + obj->offs);
  1525. }
  1526. obj = obj->next;
  1527. }
  1528. return NULL;
  1529. }
  1530. struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
  1531. struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
  1532. obj = obj->next;
  1533. char * const mem_buffer = ctx->mem_buffer;
  1534. while (obj != NULL) {
  1535. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1536. return (struct ggml_tensor *)(mem_buffer + obj->offs);
  1537. }
  1538. obj = obj->next;
  1539. }
  1540. return NULL;
  1541. }
  1542. struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
  1543. struct ggml_object * obj = ctx->objects_begin;
  1544. char * const mem_buffer = ctx->mem_buffer;
  1545. while (obj != NULL) {
  1546. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1547. struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
  1548. if (strcmp(cur->name, name) == 0) {
  1549. return cur;
  1550. }
  1551. }
  1552. obj = obj->next;
  1553. }
  1554. return NULL;
  1555. }
  1556. ////////////////////////////////////////////////////////////////////////////////
  1557. // ggml_dup
  1558. static struct ggml_tensor * ggml_dup_impl(
  1559. struct ggml_context * ctx,
  1560. struct ggml_tensor * a,
  1561. bool inplace) {
  1562. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1563. result->op = GGML_OP_DUP;
  1564. result->src[0] = a;
  1565. return result;
  1566. }
  1567. struct ggml_tensor * ggml_dup(
  1568. struct ggml_context * ctx,
  1569. struct ggml_tensor * a) {
  1570. return ggml_dup_impl(ctx, a, false);
  1571. }
  1572. struct ggml_tensor * ggml_dup_inplace(
  1573. struct ggml_context * ctx,
  1574. struct ggml_tensor * a) {
  1575. return ggml_dup_impl(ctx, a, true);
  1576. }
  1577. // ggml_add
  1578. static struct ggml_tensor * ggml_add_impl(
  1579. struct ggml_context * ctx,
  1580. struct ggml_tensor * a,
  1581. struct ggml_tensor * b,
  1582. bool inplace) {
  1583. GGML_ASSERT(ggml_can_repeat(b, a));
  1584. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1585. result->op = GGML_OP_ADD;
  1586. result->src[0] = a;
  1587. result->src[1] = b;
  1588. return result;
  1589. }
  1590. struct ggml_tensor * ggml_add(
  1591. struct ggml_context * ctx,
  1592. struct ggml_tensor * a,
  1593. struct ggml_tensor * b) {
  1594. return ggml_add_impl(ctx, a, b, false);
  1595. }
  1596. struct ggml_tensor * ggml_add_inplace(
  1597. struct ggml_context * ctx,
  1598. struct ggml_tensor * a,
  1599. struct ggml_tensor * b) {
  1600. return ggml_add_impl(ctx, a, b, true);
  1601. }
  1602. // ggml_add_cast
  1603. static struct ggml_tensor * ggml_add_cast_impl(
  1604. struct ggml_context * ctx,
  1605. struct ggml_tensor * a,
  1606. struct ggml_tensor * b,
  1607. enum ggml_type type) {
  1608. // TODO: support less-strict constraint
  1609. // GGML_ASSERT(ggml_can_repeat(b, a));
  1610. GGML_ASSERT(ggml_can_repeat_rows(b, a));
  1611. // currently only supported for quantized input and f16
  1612. GGML_ASSERT(ggml_is_quantized(a->type) ||
  1613. a->type == GGML_TYPE_F16 ||
  1614. a->type == GGML_TYPE_BF16);
  1615. struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
  1616. result->op = GGML_OP_ADD;
  1617. result->src[0] = a;
  1618. result->src[1] = b;
  1619. return result;
  1620. }
  1621. struct ggml_tensor * ggml_add_cast(
  1622. struct ggml_context * ctx,
  1623. struct ggml_tensor * a,
  1624. struct ggml_tensor * b,
  1625. enum ggml_type type) {
  1626. return ggml_add_cast_impl(ctx, a, b, type);
  1627. }
  1628. // ggml_add1
  1629. static struct ggml_tensor * ggml_add1_impl(
  1630. struct ggml_context * ctx,
  1631. struct ggml_tensor * a,
  1632. struct ggml_tensor * b,
  1633. bool inplace) {
  1634. GGML_ASSERT(ggml_is_scalar(b));
  1635. GGML_ASSERT(ggml_is_padded_1d(a));
  1636. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1637. result->op = GGML_OP_ADD1;
  1638. result->src[0] = a;
  1639. result->src[1] = b;
  1640. return result;
  1641. }
  1642. struct ggml_tensor * ggml_add1(
  1643. struct ggml_context * ctx,
  1644. struct ggml_tensor * a,
  1645. struct ggml_tensor * b) {
  1646. return ggml_add1_impl(ctx, a, b, false);
  1647. }
  1648. struct ggml_tensor * ggml_add1_inplace(
  1649. struct ggml_context * ctx,
  1650. struct ggml_tensor * a,
  1651. struct ggml_tensor * b) {
  1652. return ggml_add1_impl(ctx, a, b, true);
  1653. }
  1654. // ggml_acc
  1655. static struct ggml_tensor * ggml_acc_impl(
  1656. struct ggml_context * ctx,
  1657. struct ggml_tensor * a,
  1658. struct ggml_tensor * b,
  1659. size_t nb1,
  1660. size_t nb2,
  1661. size_t nb3,
  1662. size_t offset,
  1663. bool inplace) {
  1664. GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
  1665. GGML_ASSERT(ggml_is_contiguous(a));
  1666. GGML_ASSERT(a->type == GGML_TYPE_F32);
  1667. GGML_ASSERT(b->type == GGML_TYPE_F32);
  1668. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1669. int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
  1670. ggml_set_op_params(result, params, sizeof(params));
  1671. result->op = GGML_OP_ACC;
  1672. result->src[0] = a;
  1673. result->src[1] = b;
  1674. return result;
  1675. }
  1676. struct ggml_tensor * ggml_acc(
  1677. struct ggml_context * ctx,
  1678. struct ggml_tensor * a,
  1679. struct ggml_tensor * b,
  1680. size_t nb1,
  1681. size_t nb2,
  1682. size_t nb3,
  1683. size_t offset) {
  1684. return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
  1685. }
  1686. struct ggml_tensor * ggml_acc_inplace(
  1687. struct ggml_context * ctx,
  1688. struct ggml_tensor * a,
  1689. struct ggml_tensor * b,
  1690. size_t nb1,
  1691. size_t nb2,
  1692. size_t nb3,
  1693. size_t offset) {
  1694. return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
  1695. }
  1696. // ggml_sub
  1697. static struct ggml_tensor * ggml_sub_impl(
  1698. struct ggml_context * ctx,
  1699. struct ggml_tensor * a,
  1700. struct ggml_tensor * b,
  1701. bool inplace) {
  1702. GGML_ASSERT(ggml_can_repeat(b, a));
  1703. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1704. result->op = GGML_OP_SUB;
  1705. result->src[0] = a;
  1706. result->src[1] = b;
  1707. return result;
  1708. }
  1709. struct ggml_tensor * ggml_sub(
  1710. struct ggml_context * ctx,
  1711. struct ggml_tensor * a,
  1712. struct ggml_tensor * b) {
  1713. return ggml_sub_impl(ctx, a, b, false);
  1714. }
  1715. struct ggml_tensor * ggml_sub_inplace(
  1716. struct ggml_context * ctx,
  1717. struct ggml_tensor * a,
  1718. struct ggml_tensor * b) {
  1719. return ggml_sub_impl(ctx, a, b, true);
  1720. }
  1721. // ggml_mul
  1722. static struct ggml_tensor * ggml_mul_impl(
  1723. struct ggml_context * ctx,
  1724. struct ggml_tensor * a,
  1725. struct ggml_tensor * b,
  1726. bool inplace) {
  1727. GGML_ASSERT(ggml_can_repeat(b, a));
  1728. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1729. result->op = GGML_OP_MUL;
  1730. result->src[0] = a;
  1731. result->src[1] = b;
  1732. return result;
  1733. }
  1734. struct ggml_tensor * ggml_mul(
  1735. struct ggml_context * ctx,
  1736. struct ggml_tensor * a,
  1737. struct ggml_tensor * b) {
  1738. return ggml_mul_impl(ctx, a, b, false);
  1739. }
  1740. struct ggml_tensor * ggml_mul_inplace(
  1741. struct ggml_context * ctx,
  1742. struct ggml_tensor * a,
  1743. struct ggml_tensor * b) {
  1744. return ggml_mul_impl(ctx, a, b, true);
  1745. }
  1746. // ggml_div
  1747. static struct ggml_tensor * ggml_div_impl(
  1748. struct ggml_context * ctx,
  1749. struct ggml_tensor * a,
  1750. struct ggml_tensor * b,
  1751. bool inplace) {
  1752. GGML_ASSERT(ggml_can_repeat(b, a));
  1753. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1754. result->op = GGML_OP_DIV;
  1755. result->src[0] = a;
  1756. result->src[1] = b;
  1757. return result;
  1758. }
  1759. struct ggml_tensor * ggml_div(
  1760. struct ggml_context * ctx,
  1761. struct ggml_tensor * a,
  1762. struct ggml_tensor * b) {
  1763. return ggml_div_impl(ctx, a, b, false);
  1764. }
  1765. struct ggml_tensor * ggml_div_inplace(
  1766. struct ggml_context * ctx,
  1767. struct ggml_tensor * a,
  1768. struct ggml_tensor * b) {
  1769. return ggml_div_impl(ctx, a, b, true);
  1770. }
  1771. // ggml_sqr
  1772. static struct ggml_tensor * ggml_sqr_impl(
  1773. struct ggml_context * ctx,
  1774. struct ggml_tensor * a,
  1775. bool inplace) {
  1776. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1777. result->op = GGML_OP_SQR;
  1778. result->src[0] = a;
  1779. return result;
  1780. }
  1781. struct ggml_tensor * ggml_sqr(
  1782. struct ggml_context * ctx,
  1783. struct ggml_tensor * a) {
  1784. return ggml_sqr_impl(ctx, a, false);
  1785. }
  1786. struct ggml_tensor * ggml_sqr_inplace(
  1787. struct ggml_context * ctx,
  1788. struct ggml_tensor * a) {
  1789. return ggml_sqr_impl(ctx, a, true);
  1790. }
  1791. // ggml_sqrt
  1792. static struct ggml_tensor * ggml_sqrt_impl(
  1793. struct ggml_context * ctx,
  1794. struct ggml_tensor * a,
  1795. bool inplace) {
  1796. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1797. result->op = GGML_OP_SQRT;
  1798. result->src[0] = a;
  1799. return result;
  1800. }
  1801. struct ggml_tensor * ggml_sqrt(
  1802. struct ggml_context * ctx,
  1803. struct ggml_tensor * a) {
  1804. return ggml_sqrt_impl(ctx, a, false);
  1805. }
  1806. struct ggml_tensor * ggml_sqrt_inplace(
  1807. struct ggml_context * ctx,
  1808. struct ggml_tensor * a) {
  1809. return ggml_sqrt_impl(ctx, a, true);
  1810. }
  1811. // ggml_log
  1812. static struct ggml_tensor * ggml_log_impl(
  1813. struct ggml_context * ctx,
  1814. struct ggml_tensor * a,
  1815. bool inplace) {
  1816. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1817. result->op = GGML_OP_LOG;
  1818. result->src[0] = a;
  1819. return result;
  1820. }
  1821. struct ggml_tensor * ggml_log(
  1822. struct ggml_context * ctx,
  1823. struct ggml_tensor * a) {
  1824. return ggml_log_impl(ctx, a, false);
  1825. }
  1826. struct ggml_tensor * ggml_log_inplace(
  1827. struct ggml_context * ctx,
  1828. struct ggml_tensor * a) {
  1829. return ggml_log_impl(ctx, a, true);
  1830. }
  1831. // ggml_sin
  1832. static struct ggml_tensor * ggml_sin_impl(
  1833. struct ggml_context * ctx,
  1834. struct ggml_tensor * a,
  1835. bool inplace) {
  1836. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1837. result->op = GGML_OP_SIN;
  1838. result->src[0] = a;
  1839. return result;
  1840. }
  1841. struct ggml_tensor * ggml_sin(
  1842. struct ggml_context * ctx,
  1843. struct ggml_tensor * a) {
  1844. return ggml_sin_impl(ctx, a, false);
  1845. }
  1846. struct ggml_tensor * ggml_sin_inplace(
  1847. struct ggml_context * ctx,
  1848. struct ggml_tensor * a) {
  1849. return ggml_sin_impl(ctx, a, true);
  1850. }
  1851. // ggml_cos
  1852. static struct ggml_tensor * ggml_cos_impl(
  1853. struct ggml_context * ctx,
  1854. struct ggml_tensor * a,
  1855. bool inplace) {
  1856. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1857. result->op = GGML_OP_COS;
  1858. result->src[0] = a;
  1859. return result;
  1860. }
  1861. struct ggml_tensor * ggml_cos(
  1862. struct ggml_context * ctx,
  1863. struct ggml_tensor * a) {
  1864. return ggml_cos_impl(ctx, a, false);
  1865. }
  1866. struct ggml_tensor * ggml_cos_inplace(
  1867. struct ggml_context * ctx,
  1868. struct ggml_tensor * a) {
  1869. return ggml_cos_impl(ctx, a, true);
  1870. }
  1871. // ggml_sum
  1872. struct ggml_tensor * ggml_sum(
  1873. struct ggml_context * ctx,
  1874. struct ggml_tensor * a) {
  1875. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
  1876. result->op = GGML_OP_SUM;
  1877. result->src[0] = a;
  1878. return result;
  1879. }
  1880. // ggml_sum_rows
  1881. struct ggml_tensor * ggml_sum_rows(
  1882. struct ggml_context * ctx,
  1883. struct ggml_tensor * a) {
  1884. int64_t ne[GGML_MAX_DIMS] = { 1 };
  1885. for (int i = 1; i < GGML_MAX_DIMS; ++i) {
  1886. ne[i] = a->ne[i];
  1887. }
  1888. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
  1889. result->op = GGML_OP_SUM_ROWS;
  1890. result->src[0] = a;
  1891. return result;
  1892. }
  1893. // ggml_mean
  1894. struct ggml_tensor * ggml_mean(
  1895. struct ggml_context * ctx,
  1896. struct ggml_tensor * a) {
  1897. int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
  1898. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  1899. result->op = GGML_OP_MEAN;
  1900. result->src[0] = a;
  1901. return result;
  1902. }
  1903. // ggml_argmax
  1904. struct ggml_tensor * ggml_argmax(
  1905. struct ggml_context * ctx,
  1906. struct ggml_tensor * a) {
  1907. GGML_ASSERT(ggml_is_matrix(a));
  1908. GGML_ASSERT(a->ne[0] <= INT32_MAX);
  1909. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
  1910. result->op = GGML_OP_ARGMAX;
  1911. result->src[0] = a;
  1912. return result;
  1913. }
  1914. // ggml_count_equal
  1915. struct ggml_tensor * ggml_count_equal(
  1916. struct ggml_context * ctx,
  1917. struct ggml_tensor * a,
  1918. struct ggml_tensor * b) {
  1919. GGML_ASSERT(ggml_are_same_shape(a, b));
  1920. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
  1921. result->op = GGML_OP_COUNT_EQUAL;
  1922. result->src[0] = a;
  1923. result->src[1] = b;
  1924. return result;
  1925. }
  1926. // ggml_repeat
  1927. struct ggml_tensor * ggml_repeat(
  1928. struct ggml_context * ctx,
  1929. struct ggml_tensor * a,
  1930. struct ggml_tensor * b) {
  1931. GGML_ASSERT(ggml_can_repeat(a, b));
  1932. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
  1933. result->op = GGML_OP_REPEAT;
  1934. result->src[0] = a;
  1935. return result;
  1936. }
  1937. struct ggml_tensor * ggml_repeat_4d(
  1938. struct ggml_context * ctx,
  1939. struct ggml_tensor * a,
  1940. int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
  1941. const bool can_repeat = ggml_is_empty(a) || (
  1942. (ne0 % a->ne[0] == 0) &&
  1943. (ne1 % a->ne[1] == 0) &&
  1944. (ne2 % a->ne[2] == 0) &&
  1945. (ne3 % a->ne[3] == 0)
  1946. );
  1947. GGML_ASSERT(can_repeat);
  1948. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  1949. result->op = GGML_OP_REPEAT;
  1950. result->src[0] = a;
  1951. return result;
  1952. }
  1953. // ggml_repeat_back
  1954. struct ggml_tensor * ggml_repeat_back(
  1955. struct ggml_context * ctx,
  1956. struct ggml_tensor * a,
  1957. struct ggml_tensor * b) {
  1958. GGML_ASSERT(ggml_can_repeat(b, a));
  1959. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
  1960. result->op = GGML_OP_REPEAT_BACK;
  1961. result->src[0] = a;
  1962. return result;
  1963. }
  1964. // ggml_concat
  1965. struct ggml_tensor * ggml_concat(
  1966. struct ggml_context * ctx,
  1967. struct ggml_tensor * a,
  1968. struct ggml_tensor * b,
  1969. int dim) {
  1970. GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
  1971. GGML_ASSERT(a->type == b->type);
  1972. int64_t ne[GGML_MAX_DIMS];
  1973. for (int d = 0; d < GGML_MAX_DIMS; ++d) {
  1974. if (d == dim) {
  1975. ne[d] = a->ne[d] + b->ne[d];
  1976. continue;
  1977. }
  1978. GGML_ASSERT(a->ne[d] == b->ne[d]);
  1979. ne[d] = a->ne[d];
  1980. }
  1981. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
  1982. ggml_set_op_params_i32(result, 0, dim);
  1983. result->op = GGML_OP_CONCAT;
  1984. result->src[0] = a;
  1985. result->src[1] = b;
  1986. return result;
  1987. }
  1988. // ggml_abs
  1989. struct ggml_tensor * ggml_abs(
  1990. struct ggml_context * ctx,
  1991. struct ggml_tensor * a) {
  1992. return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
  1993. }
  1994. struct ggml_tensor * ggml_abs_inplace(
  1995. struct ggml_context * ctx,
  1996. struct ggml_tensor * a) {
  1997. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
  1998. }
  1999. // ggml_sgn
  2000. struct ggml_tensor * ggml_sgn(
  2001. struct ggml_context * ctx,
  2002. struct ggml_tensor * a) {
  2003. return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
  2004. }
  2005. struct ggml_tensor * ggml_sgn_inplace(
  2006. struct ggml_context * ctx,
  2007. struct ggml_tensor * a) {
  2008. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
  2009. }
  2010. // ggml_neg
  2011. struct ggml_tensor * ggml_neg(
  2012. struct ggml_context * ctx,
  2013. struct ggml_tensor * a) {
  2014. return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
  2015. }
  2016. struct ggml_tensor * ggml_neg_inplace(
  2017. struct ggml_context * ctx,
  2018. struct ggml_tensor * a) {
  2019. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
  2020. }
  2021. // ggml_step
  2022. struct ggml_tensor * ggml_step(
  2023. struct ggml_context * ctx,
  2024. struct ggml_tensor * a) {
  2025. return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
  2026. }
  2027. struct ggml_tensor * ggml_step_inplace(
  2028. struct ggml_context * ctx,
  2029. struct ggml_tensor * a) {
  2030. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
  2031. }
  2032. // ggml_tanh
  2033. struct ggml_tensor * ggml_tanh(
  2034. struct ggml_context * ctx,
  2035. struct ggml_tensor * a) {
  2036. return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
  2037. }
  2038. struct ggml_tensor * ggml_tanh_inplace(
  2039. struct ggml_context * ctx,
  2040. struct ggml_tensor * a) {
  2041. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
  2042. }
  2043. // ggml_elu
  2044. struct ggml_tensor * ggml_elu(
  2045. struct ggml_context * ctx,
  2046. struct ggml_tensor * a) {
  2047. return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
  2048. }
  2049. struct ggml_tensor * ggml_elu_inplace(
  2050. struct ggml_context * ctx,
  2051. struct ggml_tensor * a) {
  2052. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
  2053. }
  2054. // ggml_relu
  2055. struct ggml_tensor * ggml_relu(
  2056. struct ggml_context * ctx,
  2057. struct ggml_tensor * a) {
  2058. return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
  2059. }
  2060. struct ggml_tensor * ggml_relu_inplace(
  2061. struct ggml_context * ctx,
  2062. struct ggml_tensor * a) {
  2063. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
  2064. }
  2065. // ggml_leaky_relu
  2066. struct ggml_tensor * ggml_leaky_relu(
  2067. struct ggml_context * ctx,
  2068. struct ggml_tensor * a,
  2069. float negative_slope,
  2070. bool inplace) {
  2071. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2072. ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
  2073. result->op = GGML_OP_LEAKY_RELU;
  2074. result->src[0] = a;
  2075. return result;
  2076. }
  2077. // ggml_sigmoid
  2078. struct ggml_tensor * ggml_sigmoid(
  2079. struct ggml_context * ctx,
  2080. struct ggml_tensor * a) {
  2081. return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
  2082. }
  2083. struct ggml_tensor * ggml_sigmoid_inplace(
  2084. struct ggml_context * ctx,
  2085. struct ggml_tensor * a) {
  2086. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
  2087. }
  2088. // ggml_gelu
  2089. struct ggml_tensor * ggml_gelu(
  2090. struct ggml_context * ctx,
  2091. struct ggml_tensor * a) {
  2092. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
  2093. }
  2094. struct ggml_tensor * ggml_gelu_inplace(
  2095. struct ggml_context * ctx,
  2096. struct ggml_tensor * a) {
  2097. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
  2098. }
  2099. // ggml_gelu_erf
  2100. struct ggml_tensor * ggml_gelu_erf(
  2101. struct ggml_context * ctx,
  2102. struct ggml_tensor * a) {
  2103. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
  2104. }
  2105. struct ggml_tensor * ggml_gelu_erf_inplace(
  2106. struct ggml_context * ctx,
  2107. struct ggml_tensor * a) {
  2108. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
  2109. }
  2110. // ggml_gelu_quick
  2111. struct ggml_tensor * ggml_gelu_quick(
  2112. struct ggml_context * ctx,
  2113. struct ggml_tensor * a) {
  2114. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
  2115. }
  2116. struct ggml_tensor * ggml_gelu_quick_inplace(
  2117. struct ggml_context * ctx,
  2118. struct ggml_tensor * a) {
  2119. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
  2120. }
  2121. // ggml_silu
  2122. struct ggml_tensor * ggml_silu(
  2123. struct ggml_context * ctx,
  2124. struct ggml_tensor * a) {
  2125. return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
  2126. }
  2127. struct ggml_tensor * ggml_silu_inplace(
  2128. struct ggml_context * ctx,
  2129. struct ggml_tensor * a) {
  2130. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
  2131. }
  2132. // ggml_silu_back
  2133. struct ggml_tensor * ggml_silu_back(
  2134. struct ggml_context * ctx,
  2135. struct ggml_tensor * a,
  2136. struct ggml_tensor * b) {
  2137. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2138. result->op = GGML_OP_SILU_BACK;
  2139. result->src[0] = a;
  2140. result->src[1] = b;
  2141. return result;
  2142. }
  2143. // ggml hardswish
  2144. struct ggml_tensor * ggml_hardswish(
  2145. struct ggml_context * ctx,
  2146. struct ggml_tensor * a) {
  2147. return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
  2148. }
  2149. // ggml hardsigmoid
  2150. struct ggml_tensor * ggml_hardsigmoid(
  2151. struct ggml_context * ctx,
  2152. struct ggml_tensor * a) {
  2153. return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
  2154. }
  2155. // ggml exp
  2156. struct ggml_tensor * ggml_exp(
  2157. struct ggml_context * ctx,
  2158. struct ggml_tensor * a) {
  2159. return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
  2160. }
  2161. struct ggml_tensor * ggml_exp_inplace(
  2162. struct ggml_context * ctx,
  2163. struct ggml_tensor * a) {
  2164. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
  2165. }
  2166. // ggml_norm
  2167. static struct ggml_tensor * ggml_norm_impl(
  2168. struct ggml_context * ctx,
  2169. struct ggml_tensor * a,
  2170. float eps,
  2171. bool inplace) {
  2172. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2173. ggml_set_op_params(result, &eps, sizeof(eps));
  2174. result->op = GGML_OP_NORM;
  2175. result->src[0] = a;
  2176. return result;
  2177. }
  2178. struct ggml_tensor * ggml_norm(
  2179. struct ggml_context * ctx,
  2180. struct ggml_tensor * a,
  2181. float eps) {
  2182. return ggml_norm_impl(ctx, a, eps, false);
  2183. }
  2184. struct ggml_tensor * ggml_norm_inplace(
  2185. struct ggml_context * ctx,
  2186. struct ggml_tensor * a,
  2187. float eps) {
  2188. return ggml_norm_impl(ctx, a, eps, true);
  2189. }
  2190. // ggml_rms_norm
  2191. static struct ggml_tensor * ggml_rms_norm_impl(
  2192. struct ggml_context * ctx,
  2193. struct ggml_tensor * a,
  2194. float eps,
  2195. bool inplace) {
  2196. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2197. ggml_set_op_params(result, &eps, sizeof(eps));
  2198. result->op = GGML_OP_RMS_NORM;
  2199. result->src[0] = a;
  2200. return result;
  2201. }
  2202. struct ggml_tensor * ggml_rms_norm(
  2203. struct ggml_context * ctx,
  2204. struct ggml_tensor * a,
  2205. float eps) {
  2206. return ggml_rms_norm_impl(ctx, a, eps, false);
  2207. }
  2208. struct ggml_tensor * ggml_rms_norm_inplace(
  2209. struct ggml_context * ctx,
  2210. struct ggml_tensor * a,
  2211. float eps) {
  2212. return ggml_rms_norm_impl(ctx, a, eps, true);
  2213. }
  2214. // ggml_rms_norm_back
  2215. struct ggml_tensor * ggml_rms_norm_back(
  2216. struct ggml_context * ctx,
  2217. struct ggml_tensor * a,
  2218. struct ggml_tensor * b,
  2219. float eps) {
  2220. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2221. ggml_set_op_params(result, &eps, sizeof(eps));
  2222. result->op = GGML_OP_RMS_NORM_BACK;
  2223. result->src[0] = a;
  2224. result->src[1] = b;
  2225. return result;
  2226. }
  2227. // ggml_group_norm
  2228. static struct ggml_tensor * ggml_group_norm_impl(
  2229. struct ggml_context * ctx,
  2230. struct ggml_tensor * a,
  2231. int n_groups,
  2232. float eps,
  2233. bool inplace) {
  2234. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2235. ggml_set_op_params_i32(result, 0, n_groups);
  2236. ggml_set_op_params_f32(result, 1, eps);
  2237. result->op = GGML_OP_GROUP_NORM;
  2238. result->src[0] = a;
  2239. return result;
  2240. }
  2241. struct ggml_tensor * ggml_group_norm(
  2242. struct ggml_context * ctx,
  2243. struct ggml_tensor * a,
  2244. int n_groups,
  2245. float eps) {
  2246. return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
  2247. }
  2248. struct ggml_tensor * ggml_group_norm_inplace(
  2249. struct ggml_context * ctx,
  2250. struct ggml_tensor * a,
  2251. int n_groups,
  2252. float eps) {
  2253. return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
  2254. }
  2255. // ggml_l2_norm
  2256. static struct ggml_tensor * ggml_l2_norm_impl(
  2257. struct ggml_context * ctx,
  2258. struct ggml_tensor * a,
  2259. float eps,
  2260. bool inplace) {
  2261. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2262. ggml_set_op_params_f32(result, 0, eps);
  2263. result->op = GGML_OP_L2_NORM;
  2264. result->src[0] = a;
  2265. return result;
  2266. }
  2267. struct ggml_tensor * ggml_l2_norm(
  2268. struct ggml_context * ctx,
  2269. struct ggml_tensor * a,
  2270. float eps) {
  2271. return ggml_l2_norm_impl(ctx, a, eps, false);
  2272. }
  2273. struct ggml_tensor * ggml_l2_norm_inplace(
  2274. struct ggml_context * ctx,
  2275. struct ggml_tensor * a,
  2276. float eps) {
  2277. return ggml_l2_norm_impl(ctx, a, eps, true);
  2278. }
  2279. // ggml_mul_mat
  2280. static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  2281. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  2282. return (t0->ne[0] == t1->ne[0]) &&
  2283. (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
  2284. (t1->ne[3]%t0->ne[3] == 0);
  2285. }
  2286. struct ggml_tensor * ggml_mul_mat(
  2287. struct ggml_context * ctx,
  2288. struct ggml_tensor * a,
  2289. struct ggml_tensor * b) {
  2290. GGML_ASSERT(ggml_can_mul_mat(a, b));
  2291. GGML_ASSERT(!ggml_is_transposed(a));
  2292. const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
  2293. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2294. result->op = GGML_OP_MUL_MAT;
  2295. result->src[0] = a;
  2296. result->src[1] = b;
  2297. return result;
  2298. }
  2299. void ggml_mul_mat_set_prec(
  2300. struct ggml_tensor * a,
  2301. enum ggml_prec prec) {
  2302. GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
  2303. const int32_t prec_i32 = (int32_t) prec;
  2304. ggml_set_op_params_i32(a, 0, prec_i32);
  2305. }
  2306. // ggml_mul_mat_id
  2307. /*
  2308. c = ggml_mul_mat_id(ctx, as, b, ids);
  2309. as -> [cols, rows, n_expert]
  2310. b -> [cols, n_expert_used, n_tokens]
  2311. ids -> [n_expert_used, n_tokens] (i32)
  2312. c -> [rows, n_expert_used, n_tokens]
  2313. in b, n_expert_used can be broadcasted to match the n_expert_used of ids
  2314. c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
  2315. */
  2316. struct ggml_tensor * ggml_mul_mat_id(
  2317. struct ggml_context * ctx,
  2318. struct ggml_tensor * as,
  2319. struct ggml_tensor * b,
  2320. struct ggml_tensor * ids) {
  2321. GGML_ASSERT(!ggml_is_transposed(as));
  2322. GGML_ASSERT(ids->type == GGML_TYPE_I32);
  2323. GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
  2324. GGML_ASSERT(b->ne[3] == 1); // b is 3d
  2325. GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
  2326. GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
  2327. GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
  2328. GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
  2329. const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
  2330. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2331. result->op = GGML_OP_MUL_MAT_ID;
  2332. result->src[0] = as;
  2333. result->src[1] = b;
  2334. result->src[2] = ids;
  2335. return result;
  2336. }
  2337. // ggml_out_prod
  2338. static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  2339. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  2340. return (t0->ne[1] == t1->ne[1]) &&
  2341. (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
  2342. (t1->ne[3]%t0->ne[3] == 0);
  2343. }
  2344. struct ggml_tensor * ggml_out_prod(
  2345. struct ggml_context * ctx,
  2346. struct ggml_tensor * a,
  2347. struct ggml_tensor * b) {
  2348. GGML_ASSERT(ggml_can_out_prod(a, b));
  2349. GGML_ASSERT(!ggml_is_transposed(a));
  2350. // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
  2351. const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
  2352. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2353. result->op = GGML_OP_OUT_PROD;
  2354. result->src[0] = a;
  2355. result->src[1] = b;
  2356. return result;
  2357. }
  2358. // ggml_scale
  2359. static struct ggml_tensor * ggml_scale_impl(
  2360. struct ggml_context * ctx,
  2361. struct ggml_tensor * a,
  2362. float s,
  2363. bool inplace) {
  2364. GGML_ASSERT(ggml_is_padded_1d(a));
  2365. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2366. ggml_set_op_params(result, &s, sizeof(s));
  2367. result->op = GGML_OP_SCALE;
  2368. result->src[0] = a;
  2369. return result;
  2370. }
  2371. struct ggml_tensor * ggml_scale(
  2372. struct ggml_context * ctx,
  2373. struct ggml_tensor * a,
  2374. float s) {
  2375. return ggml_scale_impl(ctx, a, s, false);
  2376. }
  2377. struct ggml_tensor * ggml_scale_inplace(
  2378. struct ggml_context * ctx,
  2379. struct ggml_tensor * a,
  2380. float s) {
  2381. return ggml_scale_impl(ctx, a, s, true);
  2382. }
  2383. // ggml_set
  2384. static struct ggml_tensor * ggml_set_impl(
  2385. struct ggml_context * ctx,
  2386. struct ggml_tensor * a,
  2387. struct ggml_tensor * b,
  2388. size_t nb1,
  2389. size_t nb2,
  2390. size_t nb3,
  2391. size_t offset,
  2392. bool inplace) {
  2393. GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
  2394. // make a view of the destination
  2395. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2396. GGML_ASSERT(offset < (size_t)(1 << 30));
  2397. int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
  2398. ggml_set_op_params(result, params, sizeof(params));
  2399. result->op = GGML_OP_SET;
  2400. result->src[0] = a;
  2401. result->src[1] = b;
  2402. return result;
  2403. }
  2404. struct ggml_tensor * ggml_set(
  2405. struct ggml_context * ctx,
  2406. struct ggml_tensor * a,
  2407. struct ggml_tensor * b,
  2408. size_t nb1,
  2409. size_t nb2,
  2410. size_t nb3,
  2411. size_t offset) {
  2412. return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
  2413. }
  2414. struct ggml_tensor * ggml_set_inplace(
  2415. struct ggml_context * ctx,
  2416. struct ggml_tensor * a,
  2417. struct ggml_tensor * b,
  2418. size_t nb1,
  2419. size_t nb2,
  2420. size_t nb3,
  2421. size_t offset) {
  2422. return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
  2423. }
  2424. struct ggml_tensor * ggml_set_1d(
  2425. struct ggml_context * ctx,
  2426. struct ggml_tensor * a,
  2427. struct ggml_tensor * b,
  2428. size_t offset) {
  2429. return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
  2430. }
  2431. struct ggml_tensor * ggml_set_1d_inplace(
  2432. struct ggml_context * ctx,
  2433. struct ggml_tensor * a,
  2434. struct ggml_tensor * b,
  2435. size_t offset) {
  2436. return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
  2437. }
  2438. struct ggml_tensor * ggml_set_2d(
  2439. struct ggml_context * ctx,
  2440. struct ggml_tensor * a,
  2441. struct ggml_tensor * b,
  2442. size_t nb1,
  2443. size_t offset) {
  2444. return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
  2445. }
  2446. struct ggml_tensor * ggml_set_2d_inplace(
  2447. struct ggml_context * ctx,
  2448. struct ggml_tensor * a,
  2449. struct ggml_tensor * b,
  2450. size_t nb1,
  2451. size_t offset) {
  2452. return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
  2453. }
  2454. // ggml_cpy
  2455. static struct ggml_tensor * ggml_cpy_impl(
  2456. struct ggml_context * ctx,
  2457. struct ggml_tensor * a,
  2458. struct ggml_tensor * b) {
  2459. GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
  2460. // make a view of the destination
  2461. struct ggml_tensor * result = ggml_view_tensor(ctx, b);
  2462. if (strlen(b->name) > 0) {
  2463. ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
  2464. } else {
  2465. ggml_format_name(result, "%s (copy)", a->name);
  2466. }
  2467. result->op = GGML_OP_CPY;
  2468. result->src[0] = a;
  2469. result->src[1] = b;
  2470. return result;
  2471. }
  2472. struct ggml_tensor * ggml_cpy(
  2473. struct ggml_context * ctx,
  2474. struct ggml_tensor * a,
  2475. struct ggml_tensor * b) {
  2476. return ggml_cpy_impl(ctx, a, b);
  2477. }
  2478. struct ggml_tensor * ggml_cast(
  2479. struct ggml_context * ctx,
  2480. struct ggml_tensor * a,
  2481. enum ggml_type type) {
  2482. struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
  2483. ggml_format_name(result, "%s (copy)", a->name);
  2484. result->op = GGML_OP_CPY;
  2485. result->src[0] = a;
  2486. result->src[1] = result;
  2487. return result;
  2488. }
  2489. // ggml_cont
  2490. static struct ggml_tensor * ggml_cont_impl(
  2491. struct ggml_context * ctx,
  2492. struct ggml_tensor * a) {
  2493. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2494. ggml_format_name(result, "%s (cont)", a->name);
  2495. result->op = GGML_OP_CONT;
  2496. result->src[0] = a;
  2497. return result;
  2498. }
  2499. struct ggml_tensor * ggml_cont(
  2500. struct ggml_context * ctx,
  2501. struct ggml_tensor * a) {
  2502. return ggml_cont_impl(ctx, a);
  2503. }
  2504. // make contiguous, with new shape
  2505. GGML_API struct ggml_tensor * ggml_cont_1d(
  2506. struct ggml_context * ctx,
  2507. struct ggml_tensor * a,
  2508. int64_t ne0) {
  2509. return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
  2510. }
  2511. GGML_API struct ggml_tensor * ggml_cont_2d(
  2512. struct ggml_context * ctx,
  2513. struct ggml_tensor * a,
  2514. int64_t ne0,
  2515. int64_t ne1) {
  2516. return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
  2517. }
  2518. GGML_API struct ggml_tensor * ggml_cont_3d(
  2519. struct ggml_context * ctx,
  2520. struct ggml_tensor * a,
  2521. int64_t ne0,
  2522. int64_t ne1,
  2523. int64_t ne2) {
  2524. return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
  2525. }
  2526. struct ggml_tensor * ggml_cont_4d(
  2527. struct ggml_context * ctx,
  2528. struct ggml_tensor * a,
  2529. int64_t ne0,
  2530. int64_t ne1,
  2531. int64_t ne2,
  2532. int64_t ne3) {
  2533. GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
  2534. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  2535. ggml_format_name(result, "%s (cont)", a->name);
  2536. result->op = GGML_OP_CONT;
  2537. result->src[0] = a;
  2538. return result;
  2539. }
  2540. // ggml_reshape
  2541. struct ggml_tensor * ggml_reshape(
  2542. struct ggml_context * ctx,
  2543. struct ggml_tensor * a,
  2544. struct ggml_tensor * b) {
  2545. GGML_ASSERT(ggml_is_contiguous(a));
  2546. // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
  2547. GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
  2548. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
  2549. ggml_format_name(result, "%s (reshaped)", a->name);
  2550. result->op = GGML_OP_RESHAPE;
  2551. result->src[0] = a;
  2552. return result;
  2553. }
  2554. struct ggml_tensor * ggml_reshape_1d(
  2555. struct ggml_context * ctx,
  2556. struct ggml_tensor * a,
  2557. int64_t ne0) {
  2558. GGML_ASSERT(ggml_is_contiguous(a));
  2559. GGML_ASSERT(ggml_nelements(a) == ne0);
  2560. const int64_t ne[1] = { ne0 };
  2561. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
  2562. ggml_format_name(result, "%s (reshaped)", a->name);
  2563. result->op = GGML_OP_RESHAPE;
  2564. result->src[0] = a;
  2565. return result;
  2566. }
  2567. struct ggml_tensor * ggml_reshape_2d(
  2568. struct ggml_context * ctx,
  2569. struct ggml_tensor * a,
  2570. int64_t ne0,
  2571. int64_t ne1) {
  2572. GGML_ASSERT(ggml_is_contiguous(a));
  2573. GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
  2574. const int64_t ne[2] = { ne0, ne1 };
  2575. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
  2576. ggml_format_name(result, "%s (reshaped)", a->name);
  2577. result->op = GGML_OP_RESHAPE;
  2578. result->src[0] = a;
  2579. return result;
  2580. }
  2581. struct ggml_tensor * ggml_reshape_3d(
  2582. struct ggml_context * ctx,
  2583. struct ggml_tensor * a,
  2584. int64_t ne0,
  2585. int64_t ne1,
  2586. int64_t ne2) {
  2587. GGML_ASSERT(ggml_is_contiguous(a));
  2588. GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
  2589. const int64_t ne[3] = { ne0, ne1, ne2 };
  2590. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
  2591. ggml_format_name(result, "%s (reshaped)", a->name);
  2592. result->op = GGML_OP_RESHAPE;
  2593. result->src[0] = a;
  2594. return result;
  2595. }
  2596. struct ggml_tensor * ggml_reshape_4d(
  2597. struct ggml_context * ctx,
  2598. struct ggml_tensor * a,
  2599. int64_t ne0,
  2600. int64_t ne1,
  2601. int64_t ne2,
  2602. int64_t ne3) {
  2603. GGML_ASSERT(ggml_is_contiguous(a));
  2604. GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
  2605. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  2606. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
  2607. ggml_format_name(result, "%s (reshaped)", a->name);
  2608. result->op = GGML_OP_RESHAPE;
  2609. result->src[0] = a;
  2610. return result;
  2611. }
  2612. static struct ggml_tensor * ggml_view_impl(
  2613. struct ggml_context * ctx,
  2614. struct ggml_tensor * a,
  2615. int n_dims,
  2616. const int64_t * ne,
  2617. size_t offset) {
  2618. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
  2619. ggml_format_name(result, "%s (view)", a->name);
  2620. ggml_set_op_params(result, &offset, sizeof(offset));
  2621. result->op = GGML_OP_VIEW;
  2622. result->src[0] = a;
  2623. return result;
  2624. }
  2625. // ggml_view_1d
  2626. struct ggml_tensor * ggml_view_1d(
  2627. struct ggml_context * ctx,
  2628. struct ggml_tensor * a,
  2629. int64_t ne0,
  2630. size_t offset) {
  2631. struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
  2632. return result;
  2633. }
  2634. // ggml_view_2d
  2635. struct ggml_tensor * ggml_view_2d(
  2636. struct ggml_context * ctx,
  2637. struct ggml_tensor * a,
  2638. int64_t ne0,
  2639. int64_t ne1,
  2640. size_t nb1,
  2641. size_t offset) {
  2642. const int64_t ne[2] = { ne0, ne1 };
  2643. struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
  2644. result->nb[1] = nb1;
  2645. result->nb[2] = result->nb[1]*ne1;
  2646. result->nb[3] = result->nb[2];
  2647. return result;
  2648. }
  2649. // ggml_view_3d
  2650. struct ggml_tensor * ggml_view_3d(
  2651. struct ggml_context * ctx,
  2652. struct ggml_tensor * a,
  2653. int64_t ne0,
  2654. int64_t ne1,
  2655. int64_t ne2,
  2656. size_t nb1,
  2657. size_t nb2,
  2658. size_t offset) {
  2659. const int64_t ne[3] = { ne0, ne1, ne2 };
  2660. struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
  2661. result->nb[1] = nb1;
  2662. result->nb[2] = nb2;
  2663. result->nb[3] = result->nb[2]*ne2;
  2664. return result;
  2665. }
  2666. // ggml_view_4d
  2667. struct ggml_tensor * ggml_view_4d(
  2668. struct ggml_context * ctx,
  2669. struct ggml_tensor * a,
  2670. int64_t ne0,
  2671. int64_t ne1,
  2672. int64_t ne2,
  2673. int64_t ne3,
  2674. size_t nb1,
  2675. size_t nb2,
  2676. size_t nb3,
  2677. size_t offset) {
  2678. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  2679. struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
  2680. result->nb[1] = nb1;
  2681. result->nb[2] = nb2;
  2682. result->nb[3] = nb3;
  2683. return result;
  2684. }
  2685. // ggml_permute
  2686. struct ggml_tensor * ggml_permute(
  2687. struct ggml_context * ctx,
  2688. struct ggml_tensor * a,
  2689. int axis0,
  2690. int axis1,
  2691. int axis2,
  2692. int axis3) {
  2693. GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
  2694. GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
  2695. GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
  2696. GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
  2697. GGML_ASSERT(axis0 != axis1);
  2698. GGML_ASSERT(axis0 != axis2);
  2699. GGML_ASSERT(axis0 != axis3);
  2700. GGML_ASSERT(axis1 != axis2);
  2701. GGML_ASSERT(axis1 != axis3);
  2702. GGML_ASSERT(axis2 != axis3);
  2703. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  2704. ggml_format_name(result, "%s (permuted)", a->name);
  2705. int ne[GGML_MAX_DIMS];
  2706. int nb[GGML_MAX_DIMS];
  2707. ne[axis0] = a->ne[0];
  2708. ne[axis1] = a->ne[1];
  2709. ne[axis2] = a->ne[2];
  2710. ne[axis3] = a->ne[3];
  2711. nb[axis0] = a->nb[0];
  2712. nb[axis1] = a->nb[1];
  2713. nb[axis2] = a->nb[2];
  2714. nb[axis3] = a->nb[3];
  2715. result->ne[0] = ne[0];
  2716. result->ne[1] = ne[1];
  2717. result->ne[2] = ne[2];
  2718. result->ne[3] = ne[3];
  2719. result->nb[0] = nb[0];
  2720. result->nb[1] = nb[1];
  2721. result->nb[2] = nb[2];
  2722. result->nb[3] = nb[3];
  2723. result->op = GGML_OP_PERMUTE;
  2724. result->src[0] = a;
  2725. int32_t params[] = { axis0, axis1, axis2, axis3 };
  2726. ggml_set_op_params(result, params, sizeof(params));
  2727. return result;
  2728. }
  2729. // ggml_transpose
  2730. struct ggml_tensor * ggml_transpose(
  2731. struct ggml_context * ctx,
  2732. struct ggml_tensor * a) {
  2733. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  2734. ggml_format_name(result, "%s (transposed)", a->name);
  2735. result->ne[0] = a->ne[1];
  2736. result->ne[1] = a->ne[0];
  2737. result->nb[0] = a->nb[1];
  2738. result->nb[1] = a->nb[0];
  2739. result->op = GGML_OP_TRANSPOSE;
  2740. result->src[0] = a;
  2741. return result;
  2742. }
  2743. // ggml_get_rows
  2744. struct ggml_tensor * ggml_get_rows(
  2745. struct ggml_context * ctx,
  2746. struct ggml_tensor * a,
  2747. struct ggml_tensor * b) {
  2748. GGML_ASSERT(a->ne[2] == b->ne[1]);
  2749. GGML_ASSERT(b->ne[3] == 1);
  2750. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2751. // TODO: implement non F32 return
  2752. enum ggml_type type = GGML_TYPE_F32;
  2753. if (a->type == GGML_TYPE_I32) {
  2754. type = a->type;
  2755. }
  2756. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
  2757. result->op = GGML_OP_GET_ROWS;
  2758. result->src[0] = a;
  2759. result->src[1] = b;
  2760. return result;
  2761. }
  2762. // ggml_get_rows_back
  2763. struct ggml_tensor * ggml_get_rows_back(
  2764. struct ggml_context * ctx,
  2765. struct ggml_tensor * a,
  2766. struct ggml_tensor * b,
  2767. struct ggml_tensor * c) {
  2768. GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
  2769. GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
  2770. // TODO: implement non F32 return
  2771. //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
  2772. struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
  2773. result->op = GGML_OP_GET_ROWS_BACK;
  2774. result->src[0] = a;
  2775. result->src[1] = b;
  2776. return result;
  2777. }
  2778. // ggml_diag
  2779. struct ggml_tensor * ggml_diag(
  2780. struct ggml_context * ctx,
  2781. struct ggml_tensor * a) {
  2782. GGML_ASSERT(a->ne[1] == 1);
  2783. const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
  2784. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
  2785. result->op = GGML_OP_DIAG;
  2786. result->src[0] = a;
  2787. return result;
  2788. }
  2789. // ggml_diag_mask_inf
  2790. static struct ggml_tensor * ggml_diag_mask_inf_impl(
  2791. struct ggml_context * ctx,
  2792. struct ggml_tensor * a,
  2793. int n_past,
  2794. bool inplace) {
  2795. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2796. int32_t params[] = { n_past };
  2797. ggml_set_op_params(result, params, sizeof(params));
  2798. result->op = GGML_OP_DIAG_MASK_INF;
  2799. result->src[0] = a;
  2800. return result;
  2801. }
  2802. struct ggml_tensor * ggml_diag_mask_inf(
  2803. struct ggml_context * ctx,
  2804. struct ggml_tensor * a,
  2805. int n_past) {
  2806. return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
  2807. }
  2808. struct ggml_tensor * ggml_diag_mask_inf_inplace(
  2809. struct ggml_context * ctx,
  2810. struct ggml_tensor * a,
  2811. int n_past) {
  2812. return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
  2813. }
  2814. // ggml_diag_mask_zero
  2815. static struct ggml_tensor * ggml_diag_mask_zero_impl(
  2816. struct ggml_context * ctx,
  2817. struct ggml_tensor * a,
  2818. int n_past,
  2819. bool inplace) {
  2820. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2821. int32_t params[] = { n_past };
  2822. ggml_set_op_params(result, params, sizeof(params));
  2823. result->op = GGML_OP_DIAG_MASK_ZERO;
  2824. result->src[0] = a;
  2825. return result;
  2826. }
  2827. struct ggml_tensor * ggml_diag_mask_zero(
  2828. struct ggml_context * ctx,
  2829. struct ggml_tensor * a,
  2830. int n_past) {
  2831. return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
  2832. }
  2833. struct ggml_tensor * ggml_diag_mask_zero_inplace(
  2834. struct ggml_context * ctx,
  2835. struct ggml_tensor * a,
  2836. int n_past) {
  2837. return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
  2838. }
  2839. // ggml_soft_max
  2840. static struct ggml_tensor * ggml_soft_max_impl(
  2841. struct ggml_context * ctx,
  2842. struct ggml_tensor * a,
  2843. struct ggml_tensor * mask,
  2844. float scale,
  2845. float max_bias,
  2846. bool inplace) {
  2847. GGML_ASSERT(ggml_is_contiguous(a));
  2848. if (mask) {
  2849. GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
  2850. GGML_ASSERT(ggml_is_contiguous(mask));
  2851. GGML_ASSERT(ggml_is_matrix(mask));
  2852. GGML_ASSERT(mask->ne[0] == a->ne[0]);
  2853. GGML_ASSERT(mask->ne[1] >= a->ne[1]);
  2854. }
  2855. if (max_bias > 0.0f) {
  2856. GGML_ASSERT(mask);
  2857. }
  2858. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2859. float params[] = { scale, max_bias };
  2860. ggml_set_op_params(result, params, sizeof(params));
  2861. result->op = GGML_OP_SOFT_MAX;
  2862. result->src[0] = a;
  2863. result->src[1] = mask;
  2864. return result;
  2865. }
  2866. struct ggml_tensor * ggml_soft_max(
  2867. struct ggml_context * ctx,
  2868. struct ggml_tensor * a) {
  2869. return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
  2870. }
  2871. struct ggml_tensor * ggml_soft_max_inplace(
  2872. struct ggml_context * ctx,
  2873. struct ggml_tensor * a) {
  2874. return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
  2875. }
  2876. struct ggml_tensor * ggml_soft_max_ext(
  2877. struct ggml_context * ctx,
  2878. struct ggml_tensor * a,
  2879. struct ggml_tensor * mask,
  2880. float scale,
  2881. float max_bias) {
  2882. return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
  2883. }
  2884. // ggml_soft_max_ext_back
  2885. static struct ggml_tensor * ggml_soft_max_ext_back_impl(
  2886. struct ggml_context * ctx,
  2887. struct ggml_tensor * a,
  2888. struct ggml_tensor * b,
  2889. float scale,
  2890. float max_bias,
  2891. bool inplace) {
  2892. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2893. result->op = GGML_OP_SOFT_MAX_BACK;
  2894. result->src[0] = a;
  2895. result->src[1] = b;
  2896. memcpy((float *) result->op_params + 0, &scale, sizeof(float));
  2897. memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
  2898. return result;
  2899. }
  2900. struct ggml_tensor * ggml_soft_max_ext_back(
  2901. struct ggml_context * ctx,
  2902. struct ggml_tensor * a,
  2903. struct ggml_tensor * b,
  2904. float scale,
  2905. float max_bias) {
  2906. return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
  2907. }
  2908. struct ggml_tensor * ggml_soft_max_ext_back_inplace(
  2909. struct ggml_context * ctx,
  2910. struct ggml_tensor * a,
  2911. struct ggml_tensor * b,
  2912. float scale,
  2913. float max_bias) {
  2914. return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
  2915. }
  2916. // ggml_rope
  2917. static struct ggml_tensor * ggml_rope_impl(
  2918. struct ggml_context * ctx,
  2919. struct ggml_tensor * a,
  2920. struct ggml_tensor * b,
  2921. struct ggml_tensor * c,
  2922. int n_dims,
  2923. int mode,
  2924. int n_ctx_orig,
  2925. float freq_base,
  2926. float freq_scale,
  2927. float ext_factor,
  2928. float attn_factor,
  2929. float beta_fast,
  2930. float beta_slow,
  2931. bool inplace) {
  2932. GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
  2933. GGML_ASSERT(ggml_is_vector(b));
  2934. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2935. GGML_ASSERT(a->ne[2] == b->ne[0]);
  2936. if (c) {
  2937. GGML_ASSERT(c->type == GGML_TYPE_F32);
  2938. GGML_ASSERT(c->ne[0] >= n_dims / 2);
  2939. }
  2940. int sections[4] = {0, 0, 0, 0};
  2941. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2942. int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
  2943. memcpy(params + 5, &freq_base, sizeof(float));
  2944. memcpy(params + 6, &freq_scale, sizeof(float));
  2945. memcpy(params + 7, &ext_factor, sizeof(float));
  2946. memcpy(params + 8, &attn_factor, sizeof(float));
  2947. memcpy(params + 9, &beta_fast, sizeof(float));
  2948. memcpy(params + 10, &beta_slow, sizeof(float));
  2949. memcpy(params + 11, &sections, sizeof(int)*4);
  2950. ggml_set_op_params(result, params, sizeof(params));
  2951. result->op = GGML_OP_ROPE;
  2952. result->src[0] = a;
  2953. result->src[1] = b;
  2954. result->src[2] = c;
  2955. return result;
  2956. }
  2957. struct ggml_tensor * ggml_rope(
  2958. struct ggml_context * ctx,
  2959. struct ggml_tensor * a,
  2960. struct ggml_tensor * b,
  2961. int n_dims,
  2962. int mode) {
  2963. return ggml_rope_impl(
  2964. ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
  2965. );
  2966. }
  2967. struct ggml_tensor * ggml_rope_multi(
  2968. struct ggml_context * ctx,
  2969. struct ggml_tensor * a,
  2970. struct ggml_tensor * b,
  2971. struct ggml_tensor * c,
  2972. int n_dims,
  2973. int sections[4],
  2974. int mode,
  2975. int n_ctx_orig,
  2976. float freq_base,
  2977. float freq_scale,
  2978. float ext_factor,
  2979. float attn_factor,
  2980. float beta_fast,
  2981. float beta_slow) {
  2982. // Multimodal Rotary Position Embedding
  2983. GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
  2984. GGML_ASSERT(ggml_is_vector(b));
  2985. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2986. GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
  2987. if (c) {
  2988. GGML_ASSERT(c->type == GGML_TYPE_F32);
  2989. GGML_ASSERT(c->ne[0] >= n_dims / 2);
  2990. }
  2991. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2992. int32_t params[11 + 4] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
  2993. memcpy(params + 5, &freq_base, sizeof(float));
  2994. memcpy(params + 6, &freq_scale, sizeof(float));
  2995. memcpy(params + 7, &ext_factor, sizeof(float));
  2996. memcpy(params + 8, &attn_factor, sizeof(float));
  2997. memcpy(params + 9, &beta_fast, sizeof(float));
  2998. memcpy(params + 10, &beta_slow, sizeof(float));
  2999. memcpy(&params[11], sections, sizeof(int)*4);
  3000. ggml_set_op_params(result, params, sizeof(params));
  3001. result->op = GGML_OP_ROPE;
  3002. result->src[0] = a;
  3003. result->src[1] = b;
  3004. result->src[2] = c;
  3005. return result;
  3006. }
  3007. struct ggml_tensor * ggml_rope_inplace(
  3008. struct ggml_context * ctx,
  3009. struct ggml_tensor * a,
  3010. struct ggml_tensor * b,
  3011. int n_dims,
  3012. int mode) {
  3013. return ggml_rope_impl(
  3014. ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
  3015. );
  3016. }
  3017. struct ggml_tensor * ggml_rope_ext(
  3018. struct ggml_context * ctx,
  3019. struct ggml_tensor * a,
  3020. struct ggml_tensor * b,
  3021. struct ggml_tensor * c,
  3022. int n_dims,
  3023. int mode,
  3024. int n_ctx_orig,
  3025. float freq_base,
  3026. float freq_scale,
  3027. float ext_factor,
  3028. float attn_factor,
  3029. float beta_fast,
  3030. float beta_slow) {
  3031. return ggml_rope_impl(
  3032. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3033. ext_factor, attn_factor, beta_fast, beta_slow, false
  3034. );
  3035. }
  3036. struct ggml_tensor * ggml_rope_ext_inplace(
  3037. struct ggml_context * ctx,
  3038. struct ggml_tensor * a,
  3039. struct ggml_tensor * b,
  3040. struct ggml_tensor * c,
  3041. int n_dims,
  3042. int mode,
  3043. int n_ctx_orig,
  3044. float freq_base,
  3045. float freq_scale,
  3046. float ext_factor,
  3047. float attn_factor,
  3048. float beta_fast,
  3049. float beta_slow) {
  3050. return ggml_rope_impl(
  3051. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3052. ext_factor, attn_factor, beta_fast, beta_slow, true
  3053. );
  3054. }
  3055. struct ggml_tensor * ggml_rope_custom(
  3056. struct ggml_context * ctx,
  3057. struct ggml_tensor * a,
  3058. struct ggml_tensor * b,
  3059. int n_dims,
  3060. int mode,
  3061. int n_ctx_orig,
  3062. float freq_base,
  3063. float freq_scale,
  3064. float ext_factor,
  3065. float attn_factor,
  3066. float beta_fast,
  3067. float beta_slow) {
  3068. return ggml_rope_impl(
  3069. ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3070. ext_factor, attn_factor, beta_fast, beta_slow, false
  3071. );
  3072. }
  3073. struct ggml_tensor * ggml_rope_custom_inplace(
  3074. struct ggml_context * ctx,
  3075. struct ggml_tensor * a,
  3076. struct ggml_tensor * b,
  3077. int n_dims,
  3078. int mode,
  3079. int n_ctx_orig,
  3080. float freq_base,
  3081. float freq_scale,
  3082. float ext_factor,
  3083. float attn_factor,
  3084. float beta_fast,
  3085. float beta_slow) {
  3086. return ggml_rope_impl(
  3087. ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3088. ext_factor, attn_factor, beta_fast, beta_slow, true
  3089. );
  3090. }
  3091. // Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
  3092. // `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
  3093. static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
  3094. return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
  3095. }
  3096. void ggml_rope_yarn_corr_dims(
  3097. int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
  3098. ) {
  3099. // start and end correction dims
  3100. float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
  3101. float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
  3102. dims[0] = MAX(0, start);
  3103. dims[1] = MIN(n_dims - 1, end);
  3104. }
  3105. // ggml_rope_back
  3106. struct ggml_tensor * ggml_rope_ext_back(
  3107. struct ggml_context * ctx,
  3108. struct ggml_tensor * a,
  3109. struct ggml_tensor * b,
  3110. struct ggml_tensor * c,
  3111. int n_dims,
  3112. int mode,
  3113. int n_ctx_orig,
  3114. float freq_base,
  3115. float freq_scale,
  3116. float ext_factor,
  3117. float attn_factor,
  3118. float beta_fast,
  3119. float beta_slow) {
  3120. struct ggml_tensor * result = ggml_rope_ext(
  3121. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
  3122. result->op = GGML_OP_ROPE_BACK;
  3123. return result;
  3124. }
  3125. struct ggml_tensor * ggml_rope_multi_back(
  3126. struct ggml_context * ctx,
  3127. struct ggml_tensor * a,
  3128. struct ggml_tensor * b,
  3129. struct ggml_tensor * c,
  3130. int n_dims,
  3131. int sections[4],
  3132. int mode,
  3133. int n_ctx_orig,
  3134. float freq_base,
  3135. float freq_scale,
  3136. float ext_factor,
  3137. float attn_factor,
  3138. float beta_fast,
  3139. float beta_slow) {
  3140. struct ggml_tensor * result = ggml_rope_multi(
  3141. ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
  3142. result->op = GGML_OP_ROPE_BACK;
  3143. return result;
  3144. }
  3145. // ggml_clamp
  3146. struct ggml_tensor * ggml_clamp(
  3147. struct ggml_context * ctx,
  3148. struct ggml_tensor * a,
  3149. float min,
  3150. float max) {
  3151. // TODO: when implement backward, fix this:
  3152. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  3153. float params[] = { min, max };
  3154. ggml_set_op_params(result, params, sizeof(params));
  3155. result->op = GGML_OP_CLAMP;
  3156. result->src[0] = a;
  3157. return result;
  3158. }
  3159. static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
  3160. return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
  3161. }
  3162. // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
  3163. // a: [OC,IC, KH, KW]
  3164. // b: [N, IC, IH, IW]
  3165. // result: [N, OH, OW, IC*KH*KW]
  3166. struct ggml_tensor * ggml_im2col(
  3167. struct ggml_context * ctx,
  3168. struct ggml_tensor * a,
  3169. struct ggml_tensor * b,
  3170. int s0,
  3171. int s1,
  3172. int p0,
  3173. int p1,
  3174. int d0,
  3175. int d1,
  3176. bool is_2D,
  3177. enum ggml_type dst_type) {
  3178. if (is_2D) {
  3179. GGML_ASSERT(a->ne[2] == b->ne[2]);
  3180. } else {
  3181. //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
  3182. GGML_ASSERT(b->ne[1] == a->ne[1]);
  3183. GGML_ASSERT(b->ne[3] == 1);
  3184. }
  3185. const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
  3186. const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
  3187. GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
  3188. GGML_ASSERT((OW > 0) && "b too small compared to a");
  3189. const int64_t ne[4] = {
  3190. is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
  3191. OW,
  3192. is_2D ? OH : b->ne[2],
  3193. is_2D ? b->ne[3] : 1,
  3194. };
  3195. struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
  3196. int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
  3197. ggml_set_op_params(result, params, sizeof(params));
  3198. result->op = GGML_OP_IM2COL;
  3199. result->src[0] = a;
  3200. result->src[1] = b;
  3201. return result;
  3202. }
  3203. struct ggml_tensor * ggml_im2col_back(
  3204. struct ggml_context * ctx,
  3205. struct ggml_tensor * a,
  3206. struct ggml_tensor * b,
  3207. int64_t * ne,
  3208. int s0,
  3209. int s1,
  3210. int p0,
  3211. int p1,
  3212. int d0,
  3213. int d1,
  3214. bool is_2D) {
  3215. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3216. int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
  3217. ggml_set_op_params(result, params, sizeof(params));
  3218. result->op = GGML_OP_IM2COL_BACK;
  3219. result->src[0] = a;
  3220. result->src[1] = b;
  3221. return result;
  3222. }
  3223. // ggml_conv_1d
  3224. struct ggml_tensor * ggml_conv_1d(
  3225. struct ggml_context * ctx,
  3226. struct ggml_tensor * a,
  3227. struct ggml_tensor * b,
  3228. int s0,
  3229. int p0,
  3230. int d0) {
  3231. struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
  3232. struct ggml_tensor * result =
  3233. ggml_mul_mat(ctx,
  3234. ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
  3235. ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2])); // [OC,IC, K] => [OC, IC * K]
  3236. result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
  3237. return result;
  3238. }
  3239. // ggml_conv_1d_ph
  3240. struct ggml_tensor* ggml_conv_1d_ph(
  3241. struct ggml_context * ctx,
  3242. struct ggml_tensor * a,
  3243. struct ggml_tensor * b,
  3244. int s,
  3245. int d) {
  3246. return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
  3247. }
  3248. // ggml_conv_1d_dw
  3249. struct ggml_tensor * ggml_conv_1d_dw(
  3250. struct ggml_context * ctx,
  3251. struct ggml_tensor * a,
  3252. struct ggml_tensor * b,
  3253. int s0,
  3254. int p0,
  3255. int d0) {
  3256. struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
  3257. struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
  3258. struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
  3259. struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
  3260. result = ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
  3261. return result;
  3262. }
  3263. // ggml_conv_1d_dw_ph
  3264. struct ggml_tensor * ggml_conv_1d_dw_ph(
  3265. struct ggml_context * ctx,
  3266. struct ggml_tensor * a,
  3267. struct ggml_tensor * b,
  3268. int s0,
  3269. int d0) {
  3270. return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
  3271. }
  3272. // ggml_conv_transpose_1d
  3273. static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
  3274. return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
  3275. }
  3276. GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
  3277. struct ggml_context * ctx,
  3278. struct ggml_tensor * a,
  3279. struct ggml_tensor * b,
  3280. int s0,
  3281. int p0,
  3282. int d0) {
  3283. GGML_ASSERT(ggml_is_matrix(b));
  3284. GGML_ASSERT(a->ne[2] == b->ne[1]);
  3285. GGML_ASSERT(a->ne[3] == 1);
  3286. GGML_ASSERT(p0 == 0);
  3287. GGML_ASSERT(d0 == 1);
  3288. const int64_t ne[4] = {
  3289. ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
  3290. a->ne[1], b->ne[2], 1,
  3291. };
  3292. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3293. int32_t params[] = { s0, p0, d0 };
  3294. ggml_set_op_params(result, params, sizeof(params));
  3295. result->op = GGML_OP_CONV_TRANSPOSE_1D;
  3296. result->src[0] = a;
  3297. result->src[1] = b;
  3298. return result;
  3299. }
  3300. // ggml_conv_2d
  3301. // a: [OC,IC, KH, KW]
  3302. // b: [N, IC, IH, IW]
  3303. // result: [N, OC, OH, OW]
  3304. struct ggml_tensor * ggml_conv_2d(
  3305. struct ggml_context * ctx,
  3306. struct ggml_tensor * a,
  3307. struct ggml_tensor * b,
  3308. int s0,
  3309. int s1,
  3310. int p0,
  3311. int p1,
  3312. int d0,
  3313. int d1) {
  3314. struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
  3315. struct ggml_tensor * result =
  3316. ggml_mul_mat(ctx,
  3317. ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
  3318. ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
  3319. result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
  3320. result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
  3321. return result;
  3322. }
  3323. // ggml_conv_2d_sk_p0
  3324. struct ggml_tensor * ggml_conv_2d_sk_p0(
  3325. struct ggml_context * ctx,
  3326. struct ggml_tensor * a,
  3327. struct ggml_tensor * b) {
  3328. return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
  3329. }
  3330. // ggml_conv_2d_s1_ph
  3331. struct ggml_tensor * ggml_conv_2d_s1_ph(
  3332. struct ggml_context * ctx,
  3333. struct ggml_tensor * a,
  3334. struct ggml_tensor * b) {
  3335. return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
  3336. }
  3337. // ggml_conv_2d_dw
  3338. struct ggml_tensor * ggml_conv_2d_dw(
  3339. struct ggml_context * ctx,
  3340. struct ggml_tensor * a,
  3341. struct ggml_tensor * b,
  3342. int s0,
  3343. int s1,
  3344. int p0,
  3345. int p1,
  3346. int d0,
  3347. int d1) {
  3348. struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
  3349. struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
  3350. ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
  3351. s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
  3352. struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
  3353. new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
  3354. struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
  3355. result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
  3356. return result;
  3357. }
  3358. // ggml_conv_2d_dw_direct
  3359. struct ggml_tensor * ggml_conv_2d_dw_direct(
  3360. struct ggml_context * ctx,
  3361. struct ggml_tensor * a,
  3362. struct ggml_tensor * b,
  3363. int stride0,
  3364. int stride1,
  3365. int pad0,
  3366. int pad1,
  3367. int dilation0,
  3368. int dilation1) {
  3369. GGML_ASSERT(a->ne[2] == 1);
  3370. GGML_ASSERT(a->ne[3] == b->ne[2]);
  3371. int64_t ne[4];
  3372. ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
  3373. ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
  3374. ne[2] = b->ne[2];
  3375. ne[3] = b->ne[3];
  3376. struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
  3377. if (ggml_is_contiguous_channels(b)) {
  3378. // Result will be permuted the same way as input (CWHN order)
  3379. const int64_t type_size = ggml_type_size(result->type);
  3380. GGML_ASSERT(ggml_blck_size(result->type) == 1);
  3381. result->nb[0] = result->ne[2] * type_size;
  3382. result->nb[1] = result->ne[0] * result->nb[0];
  3383. result->nb[2] = type_size;
  3384. }
  3385. int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
  3386. ggml_set_op_params(result, params, sizeof(params));
  3387. result->op = GGML_OP_CONV_2D_DW;
  3388. result->src[0] = a;
  3389. result->src[1] = b;
  3390. return result;
  3391. }
  3392. // ggml_conv_transpose_2d_p0
  3393. static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
  3394. return (ins - 1) * s - 2 * p + ks;
  3395. }
  3396. struct ggml_tensor * ggml_conv_transpose_2d_p0(
  3397. struct ggml_context * ctx,
  3398. struct ggml_tensor * a,
  3399. struct ggml_tensor * b,
  3400. int stride) {
  3401. GGML_ASSERT(a->ne[3] == b->ne[2]);
  3402. const int64_t ne[4] = {
  3403. ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
  3404. ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
  3405. a->ne[2], b->ne[3],
  3406. };
  3407. struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3408. ggml_set_op_params_i32(result, 0, stride);
  3409. result->op = GGML_OP_CONV_TRANSPOSE_2D;
  3410. result->src[0] = a;
  3411. result->src[1] = b;
  3412. return result;
  3413. }
  3414. // ggml_pool_*
  3415. static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
  3416. return (ins + 2 * p - ks) / s + 1;
  3417. }
  3418. // ggml_pool_1d
  3419. struct ggml_tensor * ggml_pool_1d(
  3420. struct ggml_context * ctx,
  3421. struct ggml_tensor * a,
  3422. enum ggml_op_pool op,
  3423. int k0,
  3424. int s0,
  3425. int p0) {
  3426. const int64_t ne[4] = {
  3427. ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
  3428. a->ne[1],
  3429. a->ne[2],
  3430. a->ne[3],
  3431. };
  3432. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3433. int32_t params[] = { op, k0, s0, p0 };
  3434. ggml_set_op_params(result, params, sizeof(params));
  3435. result->op = GGML_OP_POOL_1D;
  3436. result->src[0] = a;
  3437. return result;
  3438. }
  3439. // ggml_pool_2d
  3440. struct ggml_tensor * ggml_pool_2d(
  3441. struct ggml_context * ctx,
  3442. struct ggml_tensor * a,
  3443. enum ggml_op_pool op,
  3444. int k0,
  3445. int k1,
  3446. int s0,
  3447. int s1,
  3448. float p0,
  3449. float p1) {
  3450. struct ggml_tensor * result;
  3451. const int64_t ne[4] = {
  3452. ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
  3453. ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
  3454. a->ne[2],
  3455. a->ne[3],
  3456. };
  3457. result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3458. int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
  3459. ggml_set_op_params(result, params, sizeof(params));
  3460. result->op = GGML_OP_POOL_2D;
  3461. result->src[0] = a;
  3462. return result;
  3463. }
  3464. struct ggml_tensor * ggml_pool_2d_back(
  3465. struct ggml_context * ctx,
  3466. struct ggml_tensor * a,
  3467. struct ggml_tensor * af,
  3468. enum ggml_op_pool op,
  3469. int k0,
  3470. int k1,
  3471. int s0,
  3472. int s1,
  3473. float p0,
  3474. float p1) {
  3475. struct ggml_tensor * result;
  3476. result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
  3477. int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
  3478. ggml_set_op_params(result, params, sizeof(params));
  3479. result->op = GGML_OP_POOL_2D_BACK;
  3480. result->src[0] = a;
  3481. result->src[1] = af;
  3482. return result;
  3483. }
  3484. // ggml_upscale
  3485. static struct ggml_tensor * ggml_upscale_impl(
  3486. struct ggml_context * ctx,
  3487. struct ggml_tensor * a,
  3488. int ne0,
  3489. int ne1,
  3490. int ne2,
  3491. int ne3,
  3492. enum ggml_scale_mode mode) {
  3493. GGML_ASSERT(a->ne[0] <= ne0);
  3494. GGML_ASSERT(a->ne[1] <= ne1);
  3495. GGML_ASSERT(a->ne[2] <= ne2);
  3496. GGML_ASSERT(a->ne[3] <= ne3);
  3497. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  3498. ggml_set_op_params_i32(result, 0, mode);
  3499. result->op = GGML_OP_UPSCALE;
  3500. result->src[0] = a;
  3501. return result;
  3502. }
  3503. struct ggml_tensor * ggml_upscale(
  3504. struct ggml_context * ctx,
  3505. struct ggml_tensor * a,
  3506. int scale_factor,
  3507. enum ggml_scale_mode mode) {
  3508. return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
  3509. }
  3510. struct ggml_tensor * ggml_upscale_ext(
  3511. struct ggml_context * ctx,
  3512. struct ggml_tensor * a,
  3513. int ne0,
  3514. int ne1,
  3515. int ne2,
  3516. int ne3,
  3517. enum ggml_scale_mode mode) {
  3518. return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
  3519. }
  3520. // ggml_pad
  3521. struct ggml_tensor * ggml_pad(
  3522. struct ggml_context * ctx,
  3523. struct ggml_tensor * a,
  3524. int p0,
  3525. int p1,
  3526. int p2,
  3527. int p3) {
  3528. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
  3529. a->ne[0] + p0,
  3530. a->ne[1] + p1,
  3531. a->ne[2] + p2,
  3532. a->ne[3] + p3);
  3533. result->op = GGML_OP_PAD;
  3534. result->src[0] = a;
  3535. return result;
  3536. }
  3537. // ggml_pad_reflect_1d
  3538. struct ggml_tensor * ggml_pad_reflect_1d(
  3539. struct ggml_context * ctx,
  3540. struct ggml_tensor * a,
  3541. int p0,
  3542. int p1) {
  3543. GGML_ASSERT(p0 >= 0);
  3544. GGML_ASSERT(p1 >= 0);
  3545. GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
  3546. GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
  3547. GGML_ASSERT(ggml_is_contiguous(a));
  3548. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3549. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
  3550. a->ne[0] + p0 + p1,
  3551. a->ne[1],
  3552. a->ne[2],
  3553. a->ne[3]);
  3554. int32_t params[] = { p0, p1 };
  3555. ggml_set_op_params(result, params, sizeof(params));
  3556. result->op = GGML_OP_PAD_REFLECT_1D;
  3557. result->src[0] = a;
  3558. return result;
  3559. }
  3560. // ggml_arange
  3561. struct ggml_tensor * ggml_arange(
  3562. struct ggml_context * ctx,
  3563. float start,
  3564. float stop,
  3565. float step) {
  3566. GGML_ASSERT(stop > start);
  3567. const int64_t steps = (int64_t) ceilf((stop - start) / step);
  3568. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
  3569. ggml_set_op_params_f32(result, 0, start);
  3570. ggml_set_op_params_f32(result, 1, stop);
  3571. ggml_set_op_params_f32(result, 2, step);
  3572. result->op = GGML_OP_ARANGE;
  3573. return result;
  3574. }
  3575. // ggml_timestep_embedding
  3576. struct ggml_tensor * ggml_timestep_embedding(
  3577. struct ggml_context * ctx,
  3578. struct ggml_tensor * timesteps,
  3579. int dim,
  3580. int max_period) {
  3581. int actual_dim = dim;
  3582. if (dim % 2 != 0) {
  3583. actual_dim = dim + 1;
  3584. }
  3585. struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
  3586. ggml_set_op_params_i32(result, 0, dim);
  3587. ggml_set_op_params_i32(result, 1, max_period);
  3588. result->op = GGML_OP_TIMESTEP_EMBEDDING;
  3589. result->src[0] = timesteps;
  3590. return result;
  3591. }
  3592. // ggml_argsort
  3593. struct ggml_tensor * ggml_argsort(
  3594. struct ggml_context * ctx,
  3595. struct ggml_tensor * a,
  3596. enum ggml_sort_order order) {
  3597. GGML_ASSERT(a->ne[0] <= INT32_MAX);
  3598. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
  3599. ggml_set_op_params_i32(result, 0, (int32_t) order);
  3600. result->op = GGML_OP_ARGSORT;
  3601. result->src[0] = a;
  3602. return result;
  3603. }
  3604. // ggml_top_k
  3605. struct ggml_tensor * ggml_top_k(
  3606. struct ggml_context * ctx,
  3607. struct ggml_tensor * a,
  3608. int k) {
  3609. GGML_ASSERT(a->ne[0] >= k);
  3610. struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
  3611. result = ggml_view_4d(ctx, result,
  3612. k, result->ne[1], result->ne[2], result->ne[3],
  3613. result->nb[1], result->nb[2], result->nb[3],
  3614. 0);
  3615. return result;
  3616. }
  3617. // ggml_flash_attn_ext
  3618. struct ggml_tensor * ggml_flash_attn_ext(
  3619. struct ggml_context * ctx,
  3620. struct ggml_tensor * q,
  3621. struct ggml_tensor * k,
  3622. struct ggml_tensor * v,
  3623. struct ggml_tensor * mask,
  3624. float scale,
  3625. float max_bias,
  3626. float logit_softcap) {
  3627. GGML_ASSERT(ggml_can_mul_mat(k, q));
  3628. // TODO: check if vT can be multiplied by (k*qT)
  3629. if (mask) {
  3630. GGML_ASSERT(ggml_is_contiguous(mask));
  3631. GGML_ASSERT(mask->ne[2] == 1);
  3632. GGML_ASSERT(mask->ne[3] == 1);
  3633. GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
  3634. "the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
  3635. //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
  3636. }
  3637. if (max_bias > 0.0f) {
  3638. GGML_ASSERT(mask);
  3639. }
  3640. // permute(0, 2, 1, 3)
  3641. int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
  3642. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3643. float params[] = { scale, max_bias, logit_softcap };
  3644. ggml_set_op_params(result, params, sizeof(params));
  3645. result->op = GGML_OP_FLASH_ATTN_EXT;
  3646. result->src[0] = q;
  3647. result->src[1] = k;
  3648. result->src[2] = v;
  3649. result->src[3] = mask;
  3650. return result;
  3651. }
  3652. void ggml_flash_attn_ext_set_prec(
  3653. struct ggml_tensor * a,
  3654. enum ggml_prec prec) {
  3655. GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
  3656. const int32_t prec_i32 = (int32_t) prec;
  3657. ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
  3658. }
  3659. enum ggml_prec ggml_flash_attn_ext_get_prec(
  3660. const struct ggml_tensor * a) {
  3661. GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
  3662. const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
  3663. return (enum ggml_prec) prec_i32;
  3664. }
  3665. // ggml_flash_attn_back
  3666. struct ggml_tensor * ggml_flash_attn_back(
  3667. struct ggml_context * ctx,
  3668. struct ggml_tensor * q,
  3669. struct ggml_tensor * k,
  3670. struct ggml_tensor * v,
  3671. struct ggml_tensor * d,
  3672. bool masked) {
  3673. GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
  3674. GGML_ASSERT(ggml_can_mul_mat(k, q));
  3675. // TODO: check if vT can be multiplied by (k*qT)
  3676. // d shape [D,N,ne2,ne3]
  3677. // q shape [D,N,ne2,ne3]
  3678. // k shape [D,M,kvne2,ne3]
  3679. // v shape [M,D,kvne2,ne3]
  3680. const int64_t D = q->ne[0];
  3681. const int64_t N = q->ne[1];
  3682. const int64_t M = k->ne[1];
  3683. const int64_t ne2 = q->ne[2];
  3684. const int64_t ne3 = q->ne[3];
  3685. const int64_t kvne2 = k->ne[2];
  3686. GGML_ASSERT(k->ne[0] == D);
  3687. GGML_ASSERT(v->ne[0] == M);
  3688. GGML_ASSERT(v->ne[1] == D);
  3689. GGML_ASSERT(d->ne[0] == D);
  3690. GGML_ASSERT(d->ne[1] == N);
  3691. GGML_ASSERT(k->ne[2] == kvne2);
  3692. GGML_ASSERT(k->ne[3] == ne3);
  3693. GGML_ASSERT(v->ne[2] == kvne2);
  3694. GGML_ASSERT(v->ne[3] == ne3);
  3695. GGML_ASSERT(d->ne[2] == ne2);
  3696. GGML_ASSERT(d->ne[3] == ne3);
  3697. GGML_ASSERT(ne2 % kvne2 == 0);
  3698. // store gradients of q, k and v as continuous tensors concatenated in result.
  3699. // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
  3700. const int64_t elem_q = ggml_nelements(q);
  3701. const int64_t elem_k = ggml_nelements(k);
  3702. const int64_t elem_v = ggml_nelements(v);
  3703. enum ggml_type result_type = GGML_TYPE_F32;
  3704. GGML_ASSERT(ggml_blck_size(result_type) == 1);
  3705. const size_t tsize = ggml_type_size(result_type);
  3706. const size_t offs_q = 0;
  3707. const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
  3708. const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
  3709. const size_t end = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
  3710. const size_t nelements = (end + tsize - 1)/tsize;
  3711. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
  3712. int32_t masked_i = masked ? 1 : 0;
  3713. ggml_set_op_params(result, &masked_i, sizeof(masked_i));
  3714. result->op = GGML_OP_FLASH_ATTN_BACK;
  3715. result->src[0] = q;
  3716. result->src[1] = k;
  3717. result->src[2] = v;
  3718. result->src[3] = d;
  3719. return result;
  3720. }
  3721. // ggml_ssm_conv
  3722. struct ggml_tensor * ggml_ssm_conv(
  3723. struct ggml_context * ctx,
  3724. struct ggml_tensor * sx,
  3725. struct ggml_tensor * c) {
  3726. GGML_ASSERT(ggml_is_3d(sx));
  3727. GGML_ASSERT(ggml_is_matrix(c));
  3728. const int64_t d_conv = c->ne[0];
  3729. const int64_t d_inner = c->ne[1];
  3730. const int64_t n_t = sx->ne[0] - d_conv + 1; // tokens per sequence
  3731. const int64_t n_s = sx->ne[2];
  3732. // TODO: maybe support other strides than 1?
  3733. // FIXME: this is always true?
  3734. GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
  3735. GGML_ASSERT(sx->ne[1] == d_inner);
  3736. GGML_ASSERT(n_t >= 0);
  3737. struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
  3738. result->op = GGML_OP_SSM_CONV;
  3739. result->src[0] = sx;
  3740. result->src[1] = c;
  3741. return result;
  3742. }
  3743. // ggml_ssm_scan
  3744. struct ggml_tensor * ggml_ssm_scan(
  3745. struct ggml_context * ctx,
  3746. struct ggml_tensor * s,
  3747. struct ggml_tensor * x,
  3748. struct ggml_tensor * dt,
  3749. struct ggml_tensor * A,
  3750. struct ggml_tensor * B,
  3751. struct ggml_tensor * C) {
  3752. GGML_ASSERT(ggml_is_contiguous(s));
  3753. GGML_ASSERT(ggml_is_contiguous(x));
  3754. GGML_ASSERT(ggml_is_contiguous(dt));
  3755. GGML_ASSERT(ggml_is_contiguous(A));
  3756. GGML_ASSERT(ggml_is_matrix(A));
  3757. GGML_ASSERT(ggml_is_3d(B));
  3758. GGML_ASSERT(ggml_is_3d(s));
  3759. GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
  3760. GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
  3761. GGML_ASSERT(ggml_are_same_shape(x, dt));
  3762. GGML_ASSERT(ggml_are_same_shape(B, C));
  3763. {
  3764. const int64_t d_state = s->ne[0];
  3765. const int64_t d_inner = s->ne[1];
  3766. const int64_t n_seq_tokens = x->ne[1];
  3767. const int64_t n_seqs = x->ne[2];
  3768. GGML_ASSERT(s->ne[2] == n_seqs);
  3769. GGML_ASSERT(x->ne[0] == d_inner);
  3770. GGML_ASSERT(A->ne[0] == d_state);
  3771. GGML_ASSERT(A->ne[1] == d_inner);
  3772. GGML_ASSERT(B->ne[0] == d_state);
  3773. GGML_ASSERT(B->ne[1] == n_seq_tokens);
  3774. GGML_ASSERT(B->ne[2] == n_seqs);
  3775. }
  3776. // concatenated y + ssm_states
  3777. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + ggml_nelements(s));
  3778. result->op = GGML_OP_SSM_SCAN;
  3779. result->src[0] = s;
  3780. result->src[1] = x;
  3781. result->src[2] = dt;
  3782. result->src[3] = A;
  3783. result->src[4] = B;
  3784. result->src[5] = C;
  3785. return result;
  3786. }
  3787. // ggml_win_part
  3788. struct ggml_tensor * ggml_win_part(
  3789. struct ggml_context * ctx,
  3790. struct ggml_tensor * a,
  3791. int w) {
  3792. GGML_ASSERT(a->ne[3] == 1);
  3793. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3794. // padding
  3795. const int px = (w - a->ne[1]%w)%w;
  3796. const int py = (w - a->ne[2]%w)%w;
  3797. const int npx = (px + a->ne[1])/w;
  3798. const int npy = (py + a->ne[2])/w;
  3799. const int np = npx*npy;
  3800. const int64_t ne[4] = { a->ne[0], w, w, np, };
  3801. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3802. int32_t params[] = { npx, npy, w };
  3803. ggml_set_op_params(result, params, sizeof(params));
  3804. result->op = GGML_OP_WIN_PART;
  3805. result->src[0] = a;
  3806. return result;
  3807. }
  3808. // ggml_win_unpart
  3809. struct ggml_tensor * ggml_win_unpart(
  3810. struct ggml_context * ctx,
  3811. struct ggml_tensor * a,
  3812. int w0,
  3813. int h0,
  3814. int w) {
  3815. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3816. const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
  3817. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
  3818. int32_t params[] = { w };
  3819. ggml_set_op_params(result, params, sizeof(params));
  3820. result->op = GGML_OP_WIN_UNPART;
  3821. result->src[0] = a;
  3822. return result;
  3823. }
  3824. // ggml_get_rel_pos
  3825. struct ggml_tensor * ggml_get_rel_pos(
  3826. struct ggml_context * ctx,
  3827. struct ggml_tensor * a,
  3828. int qh,
  3829. int kh) {
  3830. GGML_ASSERT(qh == kh);
  3831. GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
  3832. const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
  3833. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
  3834. result->op = GGML_OP_GET_REL_POS;
  3835. result->src[0] = a;
  3836. return result;
  3837. }
  3838. // ggml_add_rel_pos
  3839. static struct ggml_tensor * ggml_add_rel_pos_impl(
  3840. struct ggml_context * ctx,
  3841. struct ggml_tensor * a,
  3842. struct ggml_tensor * pw,
  3843. struct ggml_tensor * ph,
  3844. bool inplace) {
  3845. GGML_ASSERT(ggml_are_same_shape(pw, ph));
  3846. GGML_ASSERT(ggml_is_contiguous(a));
  3847. GGML_ASSERT(ggml_is_contiguous(pw));
  3848. GGML_ASSERT(ggml_is_contiguous(ph));
  3849. GGML_ASSERT(ph->type == GGML_TYPE_F32);
  3850. GGML_ASSERT(pw->type == GGML_TYPE_F32);
  3851. GGML_ASSERT(pw->ne[3] == a->ne[2]);
  3852. GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
  3853. GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
  3854. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3855. ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
  3856. result->op = GGML_OP_ADD_REL_POS;
  3857. result->src[0] = a;
  3858. result->src[1] = pw;
  3859. result->src[2] = ph;
  3860. return result;
  3861. }
  3862. struct ggml_tensor * ggml_add_rel_pos(
  3863. struct ggml_context * ctx,
  3864. struct ggml_tensor * a,
  3865. struct ggml_tensor * pw,
  3866. struct ggml_tensor * ph) {
  3867. return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
  3868. }
  3869. struct ggml_tensor * ggml_add_rel_pos_inplace(
  3870. struct ggml_context * ctx,
  3871. struct ggml_tensor * a,
  3872. struct ggml_tensor * pw,
  3873. struct ggml_tensor * ph) {
  3874. return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
  3875. }
  3876. // ggml_rwkv_wkv6
  3877. struct ggml_tensor * ggml_rwkv_wkv6(
  3878. struct ggml_context * ctx,
  3879. struct ggml_tensor * k,
  3880. struct ggml_tensor * v,
  3881. struct ggml_tensor * r,
  3882. struct ggml_tensor * tf,
  3883. struct ggml_tensor * td,
  3884. struct ggml_tensor * state) {
  3885. GGML_ASSERT(ggml_is_contiguous(k));
  3886. GGML_ASSERT(ggml_is_contiguous(v));
  3887. GGML_ASSERT(ggml_is_contiguous(r));
  3888. GGML_ASSERT(ggml_is_contiguous(tf));
  3889. GGML_ASSERT(ggml_is_contiguous(td));
  3890. GGML_ASSERT(ggml_is_contiguous(state));
  3891. const int64_t S = k->ne[0];
  3892. const int64_t H = k->ne[1];
  3893. const int64_t n_tokens = k->ne[2];
  3894. const int64_t n_seqs = state->ne[1];
  3895. {
  3896. GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
  3897. GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
  3898. GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
  3899. GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
  3900. }
  3901. // concat output and new_state
  3902. const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
  3903. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3904. result->op = GGML_OP_RWKV_WKV6;
  3905. result->src[0] = k;
  3906. result->src[1] = v;
  3907. result->src[2] = r;
  3908. result->src[3] = tf;
  3909. result->src[4] = td;
  3910. result->src[5] = state;
  3911. return result;
  3912. }
  3913. // ggml_gated_linear_attn
  3914. struct ggml_tensor * ggml_gated_linear_attn(
  3915. struct ggml_context * ctx,
  3916. struct ggml_tensor * k,
  3917. struct ggml_tensor * v,
  3918. struct ggml_tensor * q,
  3919. struct ggml_tensor * g,
  3920. struct ggml_tensor * state,
  3921. float scale) {
  3922. GGML_ASSERT(ggml_is_contiguous(k));
  3923. GGML_ASSERT(ggml_is_contiguous(v));
  3924. GGML_ASSERT(ggml_is_contiguous(q));
  3925. GGML_ASSERT(ggml_is_contiguous(g));
  3926. GGML_ASSERT(ggml_is_contiguous(state));
  3927. const int64_t S = k->ne[0];
  3928. const int64_t H = k->ne[1];
  3929. const int64_t n_tokens = k->ne[2];
  3930. const int64_t n_seqs = state->ne[1];
  3931. {
  3932. GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
  3933. GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
  3934. GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
  3935. GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
  3936. }
  3937. // concat output and new_state
  3938. const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
  3939. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3940. ggml_set_op_params_f32(result, 0, scale);
  3941. result->op = GGML_OP_GATED_LINEAR_ATTN;
  3942. result->src[0] = k;
  3943. result->src[1] = v;
  3944. result->src[2] = q;
  3945. result->src[3] = g;
  3946. result->src[4] = state;
  3947. return result;
  3948. }
  3949. // ggml_rwkv_wkv7
  3950. struct ggml_tensor * ggml_rwkv_wkv7(
  3951. struct ggml_context * ctx,
  3952. struct ggml_tensor * r,
  3953. struct ggml_tensor * w,
  3954. struct ggml_tensor * k,
  3955. struct ggml_tensor * v,
  3956. struct ggml_tensor * a,
  3957. struct ggml_tensor * b,
  3958. struct ggml_tensor * state) {
  3959. GGML_ASSERT(ggml_is_contiguous(r));
  3960. GGML_ASSERT(ggml_is_contiguous(w));
  3961. GGML_ASSERT(ggml_is_contiguous(k));
  3962. GGML_ASSERT(ggml_is_contiguous(v));
  3963. GGML_ASSERT(ggml_is_contiguous(a));
  3964. GGML_ASSERT(ggml_is_contiguous(b));
  3965. GGML_ASSERT(ggml_is_contiguous(state));
  3966. const int64_t S = k->ne[0];
  3967. const int64_t H = k->ne[1];
  3968. const int64_t n_tokens = k->ne[2];
  3969. const int64_t n_seqs = state->ne[1];
  3970. {
  3971. GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
  3972. GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
  3973. GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
  3974. GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
  3975. GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
  3976. GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
  3977. }
  3978. // concat output and new_state
  3979. const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
  3980. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3981. result->op = GGML_OP_RWKV_WKV7;
  3982. result->src[0] = r;
  3983. result->src[1] = w;
  3984. result->src[2] = k;
  3985. result->src[3] = v;
  3986. result->src[4] = a;
  3987. result->src[5] = b;
  3988. result->src[6] = state;
  3989. return result;
  3990. }
  3991. // ggml_unary
  3992. static struct ggml_tensor * ggml_unary_impl(
  3993. struct ggml_context * ctx,
  3994. struct ggml_tensor * a,
  3995. enum ggml_unary_op op,
  3996. bool inplace) {
  3997. GGML_ASSERT(ggml_is_contiguous_1(a));
  3998. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3999. ggml_set_op_params_i32(result, 0, (int32_t) op);
  4000. result->op = GGML_OP_UNARY;
  4001. result->src[0] = a;
  4002. return result;
  4003. }
  4004. struct ggml_tensor * ggml_unary(
  4005. struct ggml_context * ctx,
  4006. struct ggml_tensor * a,
  4007. enum ggml_unary_op op) {
  4008. return ggml_unary_impl(ctx, a, op, false);
  4009. }
  4010. struct ggml_tensor * ggml_unary_inplace(
  4011. struct ggml_context * ctx,
  4012. struct ggml_tensor * a,
  4013. enum ggml_unary_op op) {
  4014. return ggml_unary_impl(ctx, a, op, true);
  4015. }
  4016. // ggml_map_custom1
  4017. static struct ggml_tensor * ggml_map_custom1_impl(
  4018. struct ggml_context * ctx,
  4019. struct ggml_tensor * a,
  4020. const ggml_custom1_op_t fun,
  4021. int n_tasks,
  4022. void * userdata,
  4023. bool inplace) {
  4024. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  4025. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4026. struct ggml_map_custom1_op_params params = {
  4027. /*.fun =*/ fun,
  4028. /*.n_tasks =*/ n_tasks,
  4029. /*.userdata =*/ userdata
  4030. };
  4031. ggml_set_op_params(result, &params, sizeof(params));
  4032. result->op = GGML_OP_MAP_CUSTOM1;
  4033. result->src[0] = a;
  4034. return result;
  4035. }
  4036. struct ggml_tensor * ggml_map_custom1(
  4037. struct ggml_context * ctx,
  4038. struct ggml_tensor * a,
  4039. const ggml_custom1_op_t fun,
  4040. int n_tasks,
  4041. void * userdata) {
  4042. return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
  4043. }
  4044. struct ggml_tensor * ggml_map_custom1_inplace(
  4045. struct ggml_context * ctx,
  4046. struct ggml_tensor * a,
  4047. const ggml_custom1_op_t fun,
  4048. int n_tasks,
  4049. void * userdata) {
  4050. return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
  4051. }
  4052. // ggml_map_custom2
  4053. static struct ggml_tensor * ggml_map_custom2_impl(
  4054. struct ggml_context * ctx,
  4055. struct ggml_tensor * a,
  4056. struct ggml_tensor * b,
  4057. const ggml_custom2_op_t fun,
  4058. int n_tasks,
  4059. void * userdata,
  4060. bool inplace) {
  4061. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  4062. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4063. struct ggml_map_custom2_op_params params = {
  4064. /*.fun =*/ fun,
  4065. /*.n_tasks =*/ n_tasks,
  4066. /*.userdata =*/ userdata
  4067. };
  4068. ggml_set_op_params(result, &params, sizeof(params));
  4069. result->op = GGML_OP_MAP_CUSTOM2;
  4070. result->src[0] = a;
  4071. result->src[1] = b;
  4072. return result;
  4073. }
  4074. struct ggml_tensor * ggml_map_custom2(
  4075. struct ggml_context * ctx,
  4076. struct ggml_tensor * a,
  4077. struct ggml_tensor * b,
  4078. const ggml_custom2_op_t fun,
  4079. int n_tasks,
  4080. void * userdata) {
  4081. return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
  4082. }
  4083. struct ggml_tensor * ggml_map_custom2_inplace(
  4084. struct ggml_context * ctx,
  4085. struct ggml_tensor * a,
  4086. struct ggml_tensor * b,
  4087. const ggml_custom2_op_t fun,
  4088. int n_tasks,
  4089. void * userdata) {
  4090. return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
  4091. }
  4092. // ggml_map_custom3
  4093. static struct ggml_tensor * ggml_map_custom3_impl(
  4094. struct ggml_context * ctx,
  4095. struct ggml_tensor * a,
  4096. struct ggml_tensor * b,
  4097. struct ggml_tensor * c,
  4098. const ggml_custom3_op_t fun,
  4099. int n_tasks,
  4100. void * userdata,
  4101. bool inplace) {
  4102. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  4103. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4104. struct ggml_map_custom3_op_params params = {
  4105. /*.fun =*/ fun,
  4106. /*.n_tasks =*/ n_tasks,
  4107. /*.userdata =*/ userdata
  4108. };
  4109. ggml_set_op_params(result, &params, sizeof(params));
  4110. result->op = GGML_OP_MAP_CUSTOM3;
  4111. result->src[0] = a;
  4112. result->src[1] = b;
  4113. result->src[2] = c;
  4114. return result;
  4115. }
  4116. struct ggml_tensor * ggml_map_custom3(
  4117. struct ggml_context * ctx,
  4118. struct ggml_tensor * a,
  4119. struct ggml_tensor * b,
  4120. struct ggml_tensor * c,
  4121. const ggml_custom3_op_t fun,
  4122. int n_tasks,
  4123. void * userdata) {
  4124. return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
  4125. }
  4126. struct ggml_tensor * ggml_map_custom3_inplace(
  4127. struct ggml_context * ctx,
  4128. struct ggml_tensor * a,
  4129. struct ggml_tensor * b,
  4130. struct ggml_tensor * c,
  4131. const ggml_custom3_op_t fun,
  4132. int n_tasks,
  4133. void * userdata) {
  4134. return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
  4135. }
  4136. struct ggml_tensor * ggml_custom_4d(
  4137. struct ggml_context * ctx,
  4138. enum ggml_type type,
  4139. int64_t ne0,
  4140. int64_t ne1,
  4141. int64_t ne2,
  4142. int64_t ne3,
  4143. struct ggml_tensor ** args,
  4144. int n_args,
  4145. ggml_custom_op_t fun,
  4146. int n_tasks,
  4147. void * userdata) {
  4148. GGML_ASSERT(n_args < GGML_MAX_SRC);
  4149. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
  4150. struct ggml_custom_op_params params = {
  4151. /*.fun =*/ fun,
  4152. /*.n_tasks =*/ n_tasks,
  4153. /*.userdata =*/ userdata
  4154. };
  4155. ggml_set_op_params(result, &params, sizeof(params));
  4156. result->op = GGML_OP_CUSTOM;
  4157. for (int i = 0; i < n_args; i++) {
  4158. result->src[i] = args[i];
  4159. }
  4160. return result;
  4161. }
  4162. struct ggml_tensor * ggml_custom_inplace(
  4163. struct ggml_context * ctx,
  4164. struct ggml_tensor * a,
  4165. struct ggml_tensor ** args,
  4166. int n_args,
  4167. ggml_custom_op_t fun,
  4168. int n_tasks,
  4169. void * userdata) {
  4170. GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
  4171. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  4172. struct ggml_custom_op_params params = {
  4173. /*.fun =*/ fun,
  4174. /*.n_tasks =*/ n_tasks,
  4175. /*.userdata =*/ userdata
  4176. };
  4177. ggml_set_op_params(result, &params, sizeof(params));
  4178. result->op = GGML_OP_CUSTOM;
  4179. result->src[0] = a;
  4180. for (int i = 0; i < n_args; i++) {
  4181. result->src[i + 1] = args[i];
  4182. }
  4183. return result;
  4184. }
  4185. // ggml_cross_entropy_loss
  4186. struct ggml_tensor * ggml_cross_entropy_loss(
  4187. struct ggml_context * ctx,
  4188. struct ggml_tensor * a,
  4189. struct ggml_tensor * b) {
  4190. GGML_ASSERT(ggml_are_same_shape(a, b));
  4191. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
  4192. result->op = GGML_OP_CROSS_ENTROPY_LOSS;
  4193. result->src[0] = a;
  4194. result->src[1] = b;
  4195. return result;
  4196. }
  4197. // ggml_cross_entropy_loss_back
  4198. struct ggml_tensor * ggml_cross_entropy_loss_back(
  4199. struct ggml_context * ctx,
  4200. struct ggml_tensor * a,
  4201. struct ggml_tensor * b,
  4202. struct ggml_tensor * c) {
  4203. GGML_ASSERT(ggml_is_scalar(a));
  4204. GGML_ASSERT(ggml_are_same_shape(b, c));
  4205. struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
  4206. result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
  4207. result->src[0] = a;
  4208. result->src[1] = b;
  4209. result->src[2] = c;
  4210. return result;
  4211. }
  4212. // opt_step_adamw
  4213. struct ggml_tensor * ggml_opt_step_adamw(
  4214. struct ggml_context * ctx,
  4215. struct ggml_tensor * a,
  4216. struct ggml_tensor * grad,
  4217. struct ggml_tensor * m,
  4218. struct ggml_tensor * v,
  4219. struct ggml_tensor * adamw_params) {
  4220. GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
  4221. GGML_ASSERT(ggml_are_same_shape(a, grad));
  4222. GGML_ASSERT(ggml_are_same_shape(a, m));
  4223. GGML_ASSERT(ggml_are_same_shape(a, v));
  4224. GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
  4225. GGML_ASSERT(ggml_nelements(adamw_params) == 7);
  4226. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  4227. result->op = GGML_OP_OPT_STEP_ADAMW;
  4228. result->src[0] = a;
  4229. result->src[1] = grad;
  4230. result->src[2] = m;
  4231. result->src[3] = v;
  4232. result->src[4] = adamw_params;
  4233. return result;
  4234. }
  4235. ////////////////////////////////////////////////////////////////////////////////
  4236. struct ggml_hash_set ggml_hash_set_new(size_t size) {
  4237. size = ggml_hash_size(size);
  4238. struct ggml_hash_set result;
  4239. result.size = size;
  4240. result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
  4241. result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
  4242. return result;
  4243. }
  4244. void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
  4245. memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
  4246. }
  4247. void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
  4248. GGML_FREE(hash_set->used);
  4249. GGML_FREE(hash_set->keys);
  4250. }
  4251. size_t ggml_hash_size(size_t min_sz) {
  4252. // next primes after powers of two
  4253. static const size_t primes[] = {
  4254. 2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
  4255. 2053, 4099, 8209, 16411, 32771, 65537, 131101,
  4256. 262147, 524309, 1048583, 2097169, 4194319, 8388617,
  4257. 16777259, 33554467, 67108879, 134217757, 268435459,
  4258. 536870923, 1073741827, 2147483659
  4259. };
  4260. static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
  4261. // find the smallest prime that is larger or equal than min_sz
  4262. size_t l = 0;
  4263. size_t r = n_primes;
  4264. while (l < r) {
  4265. size_t m = (l + r)/2;
  4266. if (primes[m] < min_sz) {
  4267. l = m + 1;
  4268. } else {
  4269. r = m;
  4270. }
  4271. }
  4272. size_t sz = l < n_primes ? primes[l] : min_sz | 1;
  4273. return sz;
  4274. }
  4275. struct hash_map {
  4276. struct ggml_hash_set set;
  4277. struct ggml_tensor ** vals;
  4278. };
  4279. static struct hash_map * ggml_new_hash_map(size_t size) {
  4280. struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
  4281. result->set = ggml_hash_set_new(size);
  4282. result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
  4283. return result;
  4284. }
  4285. static void ggml_hash_map_free(struct hash_map * map) {
  4286. ggml_hash_set_free(&map->set);
  4287. GGML_FREE(map->vals);
  4288. GGML_FREE(map);
  4289. }
  4290. // utility functions to change gradients
  4291. // isrc is the index of tensor in cgraph->visited_has_set.keys
  4292. // the corresponding gradient (accumulators) are also at position isrc
  4293. // if tensor has a gradient accumulator, modify that accumulator in-place
  4294. // else if there is no gradient for tensor, set the corresponding value
  4295. // else, just add/subtract/etc. the gradients
  4296. static void ggml_add_or_set(
  4297. struct ggml_context * ctx,
  4298. struct ggml_cgraph * cgraph,
  4299. size_t isrc,
  4300. struct ggml_tensor * tensor) {
  4301. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4302. GGML_ASSERT(src);
  4303. if (cgraph->grads[isrc]) {
  4304. cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
  4305. } else {
  4306. cgraph->grads[isrc] = tensor;
  4307. }
  4308. ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
  4309. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4310. }
  4311. static void ggml_acc_or_set(
  4312. struct ggml_context * ctx,
  4313. struct ggml_cgraph * cgraph,
  4314. size_t isrc,
  4315. struct ggml_tensor * tensor,
  4316. const size_t nb1,
  4317. const size_t nb2,
  4318. const size_t nb3,
  4319. const size_t offset) {
  4320. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4321. GGML_ASSERT(src);
  4322. if (cgraph->grads[isrc]) {
  4323. cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
  4324. } else {
  4325. struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
  4326. cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
  4327. }
  4328. ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
  4329. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4330. }
  4331. static void ggml_add1_or_set(
  4332. struct ggml_context * ctx,
  4333. struct ggml_cgraph * cgraph,
  4334. size_t isrc,
  4335. struct ggml_tensor * tensor) {
  4336. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4337. GGML_ASSERT(src);
  4338. if (cgraph->grads[isrc]) {
  4339. cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
  4340. } else {
  4341. cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
  4342. }
  4343. ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
  4344. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4345. }
  4346. static void ggml_sub_or_set(
  4347. struct ggml_context * ctx,
  4348. struct ggml_cgraph * cgraph,
  4349. size_t isrc,
  4350. struct ggml_tensor * tensor) {
  4351. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4352. GGML_ASSERT(src);
  4353. if (cgraph->grads[isrc]) {
  4354. cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
  4355. } else {
  4356. cgraph->grads[isrc] = ggml_neg(ctx, tensor);
  4357. }
  4358. ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
  4359. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4360. }
  4361. static void ggml_compute_backward(
  4362. struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
  4363. struct ggml_tensor * tensor = cgraph->nodes[i];
  4364. struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, tensor);
  4365. if (!grad) {
  4366. return;
  4367. }
  4368. struct ggml_tensor * src0 = tensor->src[0];
  4369. struct ggml_tensor * src1 = tensor->src[1];
  4370. struct ggml_tensor * src2 = tensor->src[2];
  4371. struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
  4372. const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
  4373. const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
  4374. const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
  4375. const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
  4376. const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
  4377. const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
  4378. switch (tensor->op) {
  4379. case GGML_OP_DUP: {
  4380. if (src0_needs_grads) {
  4381. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4382. }
  4383. } break;
  4384. case GGML_OP_ADD: {
  4385. if (src0_needs_grads) {
  4386. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4387. }
  4388. if (src1_needs_grads) {
  4389. struct ggml_tensor * tmp = grad;
  4390. if (!ggml_are_same_shape(src0, src1)) {
  4391. tmp = ggml_repeat_back(ctx, tmp, src1);
  4392. }
  4393. ggml_add_or_set(ctx, cgraph, isrc1, tmp);
  4394. }
  4395. } break;
  4396. case GGML_OP_ADD1: {
  4397. if (src0_needs_grads) {
  4398. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4399. }
  4400. if (src1_needs_grads) {
  4401. ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
  4402. }
  4403. } break;
  4404. case GGML_OP_ACC: {
  4405. if (src0_needs_grads) {
  4406. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4407. }
  4408. if (src1_needs_grads) {
  4409. const size_t nb1 = ((int32_t *) tensor->op_params)[0];
  4410. const size_t nb2 = ((int32_t *) tensor->op_params)[1];
  4411. const size_t nb3 = ((int32_t *) tensor->op_params)[2];
  4412. const size_t offset = ((int32_t *) tensor->op_params)[3];
  4413. struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
  4414. grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
  4415. nb1, nb2, nb3, offset);
  4416. ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
  4417. }
  4418. } break;
  4419. case GGML_OP_SUB: {
  4420. if (src0_needs_grads) {
  4421. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4422. }
  4423. if (src1_needs_grads) {
  4424. ggml_sub_or_set(ctx, cgraph, isrc1, grad);
  4425. }
  4426. } break;
  4427. case GGML_OP_MUL: {
  4428. if (src0_needs_grads) {
  4429. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
  4430. }
  4431. if (src1_needs_grads) {
  4432. struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
  4433. if (!ggml_are_same_shape(src0, src1)) {
  4434. tmp = ggml_repeat_back(ctx, tmp, src1);
  4435. }
  4436. ggml_add_or_set(ctx, cgraph, isrc1, tmp);
  4437. }
  4438. } break;
  4439. case GGML_OP_DIV: {
  4440. if (src0_needs_grads) {
  4441. ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
  4442. }
  4443. if (src1_needs_grads) {
  4444. ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
  4445. }
  4446. } break;
  4447. case GGML_OP_SQR: {
  4448. if (src0_needs_grads) {
  4449. ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
  4450. }
  4451. } break;
  4452. case GGML_OP_SQRT: {
  4453. if (src0_needs_grads) {
  4454. ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
  4455. }
  4456. } break;
  4457. case GGML_OP_LOG: {
  4458. if (src0_needs_grads) {
  4459. ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
  4460. }
  4461. } break;
  4462. case GGML_OP_SIN: {
  4463. if (src0_needs_grads) {
  4464. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
  4465. }
  4466. } break;
  4467. case GGML_OP_COS: {
  4468. if (src0_needs_grads) {
  4469. ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
  4470. }
  4471. } break;
  4472. case GGML_OP_SUM: {
  4473. if (src0_needs_grads) {
  4474. ggml_add1_or_set(ctx, cgraph, isrc0, grad);
  4475. }
  4476. } break;
  4477. case GGML_OP_SUM_ROWS: {
  4478. if (src0_needs_grads) {
  4479. ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
  4480. }
  4481. } break;
  4482. case GGML_OP_MEAN: {
  4483. if (src0_needs_grads) {
  4484. ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false));
  4485. }
  4486. } break;
  4487. case GGML_OP_REPEAT: {
  4488. if (src0_needs_grads) {
  4489. ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
  4490. }
  4491. } break;
  4492. case GGML_OP_REPEAT_BACK: {
  4493. if (src0_needs_grads) {
  4494. ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
  4495. }
  4496. } break;
  4497. case GGML_OP_RMS_NORM: {
  4498. if (src0_needs_grads) {
  4499. float eps;
  4500. memcpy(&eps, tensor->op_params, sizeof(float));
  4501. ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
  4502. }
  4503. } break;
  4504. case GGML_OP_MUL_MAT: {
  4505. // https://cs231n.github.io/optimization-2/#staged
  4506. // # forward pass
  4507. // s0 = np.random.randn(5, 10)
  4508. // s1 = np.random.randn(10, 3)
  4509. // t = s0.dot(s1)
  4510. // # now suppose we had the gradient on t from above in the circuit
  4511. // dt = np.random.randn(*t.shape) # same shape as t
  4512. // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
  4513. // ds1 = t.T.dot(dt)
  4514. // tensor.shape [m,p,qq,rr]
  4515. // src0.shape [n,m,q1,r1]
  4516. // src1.shape [n,p,qq,rr]
  4517. if (src0_needs_grads) {
  4518. GGML_ASSERT(grad->ne[2] == src1->ne[2]);
  4519. GGML_ASSERT(grad->ne[3] == src1->ne[3]);
  4520. struct ggml_tensor * tmp =
  4521. ggml_out_prod(ctx, // [n,m,qq,rr]
  4522. src1, // [n,p,qq,rr]
  4523. grad); // [m,p,qq,rr]
  4524. if (!ggml_are_same_shape(tmp, src0)) {
  4525. GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
  4526. GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
  4527. GGML_ASSERT(tmp->ne[3] == 1);
  4528. const int64_t nr2 = tmp->ne[2] / src0->ne[2];
  4529. const size_t nb2 = tmp->nb[2] * nr2;
  4530. const size_t nb3 = tmp->nb[2];
  4531. tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
  4532. tmp = ggml_repeat_back(ctx, tmp, src0);
  4533. }
  4534. ggml_add_or_set(ctx, cgraph, isrc0, tmp);
  4535. }
  4536. if (src1_needs_grads) {
  4537. ggml_add_or_set(ctx, cgraph, isrc1,
  4538. // ggml_mul_mat(ctx, // [n,p,qq,rr]
  4539. // ggml_cont(ctx, // [m,n,q1,r1]
  4540. // ggml_transpose(ctx, src0)), // [m,n,q1,r1]
  4541. // grad), // [m,p,qq,rr]
  4542. // when src0 is bigger than tensor->grad (this is mostly the case in llama),
  4543. // avoid transpose of src0, rather transpose smaller tensor->grad
  4544. // and then use ggml_out_prod
  4545. ggml_out_prod(ctx, // [n,p,qq,rr]
  4546. src0, // [n,m,q1,r1]
  4547. ggml_transpose(ctx, // [p,m,qq,rr]
  4548. grad))); // [m,p,qq,rr]
  4549. }
  4550. } break;
  4551. case GGML_OP_SCALE: {
  4552. if (src0_needs_grads) {
  4553. float s;
  4554. memcpy(&s, tensor->op_params, sizeof(float));
  4555. ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, false));
  4556. }
  4557. } break;
  4558. case GGML_OP_SET: {
  4559. const size_t nb1 = ((const int32_t *) tensor->op_params)[0];
  4560. const size_t nb2 = ((const int32_t *) tensor->op_params)[1];
  4561. const size_t nb3 = ((const int32_t *) tensor->op_params)[2];
  4562. const size_t offset = ((const int32_t *) tensor->op_params)[3];
  4563. struct ggml_tensor * tensor_grad_view = NULL;
  4564. if (src0_needs_grads || src1_needs_grads) {
  4565. GGML_ASSERT(src0->type == tensor->type);
  4566. GGML_ASSERT(!cgraph->grads[isrc0] || cgraph->grads[isrc0]->type == grad->type);
  4567. GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
  4568. tensor_grad_view = ggml_view_4d(ctx,
  4569. grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
  4570. nb1, nb2, nb3, offset);
  4571. }
  4572. if (src0_needs_grads) {
  4573. struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
  4574. ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
  4575. }
  4576. if (src1_needs_grads) {
  4577. ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
  4578. }
  4579. } break;
  4580. case GGML_OP_CPY: {
  4581. // cpy overwrites value of src1 by src0 and returns view(src1)
  4582. // the overwriting is mathematically equivalent to:
  4583. // tensor = src0 * 1 + src1 * 0
  4584. if (src0_needs_grads) {
  4585. // dsrc0 = dtensor * 1
  4586. ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
  4587. }
  4588. if (src1_needs_grads) {
  4589. // dsrc1 = dtensor * 0 -> noop
  4590. }
  4591. } break;
  4592. case GGML_OP_CONT: {
  4593. // same as cpy
  4594. if (src0_needs_grads) {
  4595. GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
  4596. GGML_ASSERT(ggml_is_contiguous(grad));
  4597. GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
  4598. ggml_add_or_set(ctx, cgraph, isrc0,
  4599. ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
  4600. }
  4601. } break;
  4602. case GGML_OP_RESHAPE: {
  4603. if (src0_needs_grads) {
  4604. struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
  4605. ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
  4606. }
  4607. } break;
  4608. case GGML_OP_VIEW: {
  4609. if (src0_needs_grads) {
  4610. size_t offset;
  4611. memcpy(&offset, tensor->op_params, sizeof(offset));
  4612. size_t nb1 = tensor->nb[1];
  4613. size_t nb2 = tensor->nb[2];
  4614. size_t nb3 = tensor->nb[3];
  4615. if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
  4616. // gradient is typically F32, but src0 could be other type
  4617. size_t ng = ggml_element_size(cgraph->grads[isrc0]);
  4618. size_t n0 = ggml_element_size(src0);
  4619. GGML_ASSERT(offset % n0 == 0);
  4620. GGML_ASSERT(nb1 % n0 == 0);
  4621. GGML_ASSERT(nb2 % n0 == 0);
  4622. GGML_ASSERT(nb3 % n0 == 0);
  4623. offset = (offset / n0) * ng;
  4624. nb1 = (nb1 / n0) * ng;
  4625. nb2 = (nb2 / n0) * ng;
  4626. nb3 = (nb3 / n0) * ng;
  4627. }
  4628. ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
  4629. }
  4630. } break;
  4631. case GGML_OP_PERMUTE: {
  4632. if (src0_needs_grads) {
  4633. const int32_t * axes = (const int32_t *) tensor->op_params;
  4634. const int axis0 = axes[0] & 0x3;
  4635. const int axis1 = axes[1] & 0x3;
  4636. const int axis2 = axes[2] & 0x3;
  4637. const int axis3 = axes[3] & 0x3;
  4638. int axb[4] = {0,0,0,0}; // axes backward
  4639. axb[axis0] = 0;
  4640. axb[axis1] = 1;
  4641. axb[axis2] = 2;
  4642. axb[axis3] = 3;
  4643. ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
  4644. }
  4645. } break;
  4646. case GGML_OP_TRANSPOSE: {
  4647. if (src0_needs_grads) {
  4648. ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
  4649. }
  4650. } break;
  4651. case GGML_OP_GET_ROWS: {
  4652. if (src0_needs_grads) {
  4653. ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
  4654. }
  4655. if (src1_needs_grads) {
  4656. // noop
  4657. }
  4658. } break;
  4659. case GGML_OP_DIAG_MASK_INF: {
  4660. if (src0_needs_grads) {
  4661. /* ggml_diag_mask_inf_impl() shouldn't be here */
  4662. /* ref: https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */
  4663. const int n_past = ((const int32_t *) tensor->op_params)[0];
  4664. ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
  4665. }
  4666. } break;
  4667. case GGML_OP_DIAG_MASK_ZERO: {
  4668. if (src0_needs_grads) {
  4669. const int n_past = ((const int32_t *) tensor->op_params)[0];
  4670. ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
  4671. }
  4672. } break;
  4673. case GGML_OP_SOFT_MAX: {
  4674. if (src0_needs_grads) {
  4675. float scale = 1.0f;
  4676. float max_bias = 0.0f;
  4677. memcpy(&scale, (const float *) tensor->op_params + 0, sizeof(float));
  4678. memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
  4679. ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
  4680. }
  4681. GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
  4682. } break;
  4683. case GGML_OP_ROPE: {
  4684. if (src0_needs_grads) {
  4685. //const int n_past = ((int32_t *) tensor->op_params)[0];
  4686. const int n_dims = ((const int32_t *) tensor->op_params)[1];
  4687. const int mode = ((const int32_t *) tensor->op_params)[2];
  4688. //const int n_ctx = ((int32_t *) tensor->op_params)[3];
  4689. const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
  4690. float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
  4691. int sections[4] = {0, 0, 0, 0};
  4692. memcpy(&freq_base, (const float *) tensor->op_params + 5, sizeof(float));
  4693. memcpy(&freq_scale, (const float *) tensor->op_params + 6, sizeof(float));
  4694. memcpy(&ext_factor, (const float *) tensor->op_params + 7, sizeof(float));
  4695. memcpy(&attn_factor, (const float *) tensor->op_params + 8, sizeof(float));
  4696. memcpy(&beta_fast, (const float *) tensor->op_params + 9, sizeof(float));
  4697. memcpy(&beta_slow, (const float *) tensor->op_params + 10, sizeof(float));
  4698. memcpy(&sections, tensor->op_params + 11, sizeof(sections));
  4699. struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
  4700. ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
  4701. mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
  4702. ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
  4703. mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
  4704. ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
  4705. }
  4706. GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
  4707. } break;
  4708. case GGML_OP_IM2COL: {
  4709. if (src1_needs_grads) {
  4710. const int32_t s0 = ggml_get_op_params_i32(tensor, 0);
  4711. const int32_t s1 = ggml_get_op_params_i32(tensor, 1);
  4712. const int32_t p0 = ggml_get_op_params_i32(tensor, 2);
  4713. const int32_t p1 = ggml_get_op_params_i32(tensor, 3);
  4714. const int32_t d0 = ggml_get_op_params_i32(tensor, 4);
  4715. const int32_t d1 = ggml_get_op_params_i32(tensor, 5);
  4716. const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
  4717. ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
  4718. }
  4719. } break;
  4720. case GGML_OP_POOL_2D: {
  4721. if (src0_needs_grads) {
  4722. const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
  4723. const int32_t k0 = ggml_get_op_params_i32(tensor, 1);
  4724. const int32_t k1 = ggml_get_op_params_i32(tensor, 2);
  4725. const int32_t s0 = ggml_get_op_params_i32(tensor, 3);
  4726. const int32_t s1 = ggml_get_op_params_i32(tensor, 4);
  4727. const int32_t p0 = ggml_get_op_params_i32(tensor, 5);
  4728. const int32_t p1 = ggml_get_op_params_i32(tensor, 6);
  4729. ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
  4730. }
  4731. } break;
  4732. case GGML_OP_WIN_PART:
  4733. case GGML_OP_WIN_UNPART:
  4734. case GGML_OP_UNARY: {
  4735. switch (ggml_get_unary_op(tensor)) {
  4736. case GGML_UNARY_OP_ABS: {
  4737. if (src0_needs_grads) {
  4738. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
  4739. }
  4740. } break;
  4741. case GGML_UNARY_OP_SGN: {
  4742. // noop
  4743. } break;
  4744. case GGML_UNARY_OP_NEG: {
  4745. if (src0_needs_grads) {
  4746. ggml_sub_or_set(ctx, cgraph, isrc0, grad);
  4747. }
  4748. } break;
  4749. case GGML_UNARY_OP_STEP: {
  4750. // noop
  4751. } break;
  4752. case GGML_UNARY_OP_RELU: {
  4753. if (src0_needs_grads) {
  4754. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
  4755. }
  4756. } break;
  4757. case GGML_UNARY_OP_SILU: {
  4758. if (src0_needs_grads) {
  4759. ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
  4760. }
  4761. } break;
  4762. case GGML_UNARY_OP_EXP: {
  4763. if (src0_needs_grads) {
  4764. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
  4765. }
  4766. } break;
  4767. default: {
  4768. fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
  4769. __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
  4770. GGML_ABORT("fatal error");
  4771. } //break;
  4772. }
  4773. } break;
  4774. case GGML_OP_CROSS_ENTROPY_LOSS: {
  4775. if (src0_needs_grads) {
  4776. ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
  4777. }
  4778. GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
  4779. } break;
  4780. case GGML_OP_NONE: {
  4781. // noop
  4782. } break;
  4783. case GGML_OP_COUNT:
  4784. default: {
  4785. fprintf(stderr, "%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
  4786. GGML_ABORT("fatal error");
  4787. } //break;
  4788. }
  4789. GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
  4790. GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
  4791. GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
  4792. }
  4793. static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
  4794. // check if already visited
  4795. if (ggml_hash_insert(&cgraph->visited_hash_set, node) == GGML_HASHSET_ALREADY_EXISTS) {
  4796. return;
  4797. }
  4798. for (int i = 0; i < GGML_MAX_SRC; ++i) {
  4799. const int k =
  4800. (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
  4801. (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
  4802. /* unknown order, just fall back to using i*/ i;
  4803. if (node->src[k]) {
  4804. ggml_visit_parents(cgraph, node->src[k]);
  4805. }
  4806. }
  4807. if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
  4808. // reached a leaf node, not part of the gradient graph (e.g. a constant)
  4809. GGML_ASSERT(cgraph->n_leafs < cgraph->size);
  4810. if (strlen(node->name) == 0) {
  4811. ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
  4812. }
  4813. cgraph->leafs[cgraph->n_leafs] = node;
  4814. cgraph->n_leafs++;
  4815. } else {
  4816. GGML_ASSERT(cgraph->n_nodes < cgraph->size);
  4817. if (strlen(node->name) == 0) {
  4818. ggml_format_name(node, "node_%d", cgraph->n_nodes);
  4819. }
  4820. cgraph->nodes[cgraph->n_nodes] = node;
  4821. cgraph->n_nodes++;
  4822. }
  4823. }
  4824. static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
  4825. if (!expand) {
  4826. // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
  4827. ggml_graph_clear(cgraph);
  4828. }
  4829. const int n0 = cgraph->n_nodes;
  4830. ggml_visit_parents(cgraph, tensor);
  4831. const int n_new = cgraph->n_nodes - n0;
  4832. GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
  4833. if (n_new > 0) {
  4834. // the last added node should always be starting point
  4835. GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
  4836. }
  4837. }
  4838. void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
  4839. ggml_build_forward_impl(cgraph, tensor, true);
  4840. }
  4841. void ggml_build_backward_expand(
  4842. struct ggml_context * ctx,
  4843. struct ggml_cgraph * cgraph,
  4844. struct ggml_tensor ** grad_accs) {
  4845. GGML_ASSERT(cgraph->n_nodes > 0);
  4846. GGML_ASSERT(cgraph->grads);
  4847. GGML_ASSERT(cgraph->grad_accs);
  4848. const int n_nodes_f = cgraph->n_nodes;
  4849. memset(cgraph->grads, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
  4850. memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
  4851. bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
  4852. {
  4853. bool any_params = false;
  4854. bool any_loss = false;
  4855. for (int i = 0; i < n_nodes_f; ++i) {
  4856. struct ggml_tensor * node = cgraph->nodes[i];
  4857. any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
  4858. any_loss = any_loss || (node->flags & GGML_TENSOR_FLAG_LOSS);
  4859. }
  4860. GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
  4861. GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
  4862. }
  4863. for (int i = 0; i < n_nodes_f; ++i) {
  4864. struct ggml_tensor * node = cgraph->nodes[i];
  4865. if (node->type == GGML_TYPE_I32) {
  4866. continue;
  4867. }
  4868. bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
  4869. bool ignore_src[GGML_MAX_SRC] = {false};
  4870. switch (node->op) {
  4871. // gradients in node->src[0] for one reason or another have no effect on output gradients
  4872. case GGML_OP_IM2COL: // only used for its shape
  4873. case GGML_OP_IM2COL_BACK: // same as IM2COL
  4874. ignore_src[0] = true;
  4875. break;
  4876. case GGML_OP_UNARY: {
  4877. const enum ggml_unary_op uop = ggml_get_unary_op(node);
  4878. // SGN and STEP unary ops are piecewise constant
  4879. if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
  4880. ignore_src[0] = true;
  4881. }
  4882. } break;
  4883. // gradients in node->src[1] for one reason or another have no effect on output gradients
  4884. case GGML_OP_CPY: // gradients in CPY target are irrelevant
  4885. case GGML_OP_GET_ROWS: // row indices not differentiable
  4886. case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
  4887. case GGML_OP_ROPE: // positions not differentiable
  4888. ignore_src[1] = true;
  4889. break;
  4890. default:
  4891. break;
  4892. }
  4893. for (int j = 0; j < GGML_MAX_SRC; ++j) {
  4894. if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
  4895. continue;
  4896. }
  4897. GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
  4898. node_needs_grad = true;
  4899. break;
  4900. }
  4901. if (!node_needs_grad) {
  4902. continue;
  4903. }
  4904. // inplace operations are currently not supported
  4905. GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
  4906. node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
  4907. const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
  4908. GGML_ASSERT(ihash != GGML_HASHSET_FULL);
  4909. GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
  4910. if (grad_accs && grad_accs[i]) {
  4911. cgraph->grad_accs[ihash] = grad_accs[i];
  4912. cgraph->grads[ihash] = cgraph->grad_accs[ihash];
  4913. } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
  4914. // loss tensors always need a gradient accumulator
  4915. cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
  4916. cgraph->grads[ihash] = cgraph->grad_accs[ihash];
  4917. }
  4918. grads_needed[ihash] = true;
  4919. }
  4920. for (int i = n_nodes_f - 1; i >= 0; --i) {
  4921. // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
  4922. // use allocator to automatically make inplace operations
  4923. ggml_compute_backward(ctx, cgraph, i, grads_needed);
  4924. }
  4925. free(grads_needed);
  4926. }
  4927. static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
  4928. void * ptr = *p;
  4929. ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
  4930. *p = (void *) ((char *) ptr + size);
  4931. return ptr;
  4932. }
  4933. static size_t ggml_graph_nbytes(size_t size, bool grads) {
  4934. size_t hash_size = ggml_hash_size(size * 2);
  4935. void * p = 0;
  4936. incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
  4937. incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
  4938. incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
  4939. incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
  4940. if (grads) {
  4941. incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
  4942. incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
  4943. }
  4944. incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
  4945. size_t nbytes = (size_t) p;
  4946. return nbytes;
  4947. }
  4948. size_t ggml_graph_overhead_custom(size_t size, bool grads) {
  4949. return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
  4950. }
  4951. size_t ggml_graph_overhead(void) {
  4952. return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
  4953. }
  4954. struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
  4955. const size_t obj_size = ggml_graph_nbytes(size, grads);
  4956. struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
  4957. struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
  4958. // the size of the hash table is doubled since it needs to hold both nodes and leafs
  4959. size_t hash_size = ggml_hash_size(size * 2);
  4960. void * p = cgraph + 1;
  4961. struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  4962. struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  4963. struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  4964. struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
  4965. struct ggml_tensor ** grad_accs_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
  4966. ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
  4967. // check that we allocated the correct amount of memory
  4968. assert(obj_size == (size_t)((char *)p - (char *)cgraph));
  4969. *cgraph = (struct ggml_cgraph) {
  4970. /*.size =*/ size,
  4971. /*.n_nodes =*/ 0,
  4972. /*.n_leafs =*/ 0,
  4973. /*.nodes =*/ nodes_ptr,
  4974. /*.grads =*/ grads_ptr,
  4975. /*.grad_accs =*/ grad_accs_ptr,
  4976. /*.leafs =*/ leafs_ptr,
  4977. /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
  4978. /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
  4979. };
  4980. ggml_hash_set_reset(&cgraph->visited_hash_set);
  4981. if (grads) {
  4982. memset(cgraph->grads, 0, hash_size*sizeof(struct ggml_tensor *));
  4983. memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
  4984. }
  4985. return cgraph;
  4986. }
  4987. struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
  4988. return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
  4989. }
  4990. struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
  4991. struct ggml_cgraph cgraph = {
  4992. /*.size =*/ 0,
  4993. /*.n_nodes =*/ i1 - i0,
  4994. /*.n_leafs =*/ 0,
  4995. /*.nodes =*/ cgraph0->nodes + i0,
  4996. /*.grads =*/ NULL, // gradients would need visited_hash_set
  4997. /*.grad_accs =*/ NULL,
  4998. /*.leafs =*/ NULL,
  4999. /*.visited_hash_set =*/ { 0, NULL, NULL },
  5000. /*.order =*/ cgraph0->order,
  5001. };
  5002. return cgraph;
  5003. }
  5004. void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
  5005. GGML_ASSERT(dst->size >= src->n_leafs);
  5006. GGML_ASSERT(dst->size >= src->n_nodes);
  5007. GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
  5008. dst->n_leafs = src->n_leafs;
  5009. dst->n_nodes = src->n_nodes;
  5010. dst->order = src->order;
  5011. for (int i = 0; i < src->n_leafs; ++i) {
  5012. dst->leafs[i] = src->leafs[i];
  5013. }
  5014. for (int i = 0; i < src->n_nodes; ++i) {
  5015. dst->nodes[i] = src->nodes[i];
  5016. }
  5017. for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
  5018. // copy all hashset keys (tensors) that are in use
  5019. if (ggml_bitset_get(src->visited_hash_set.used, i)) {
  5020. ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
  5021. }
  5022. }
  5023. if (dst->grads) {
  5024. memset(dst->grads, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
  5025. memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
  5026. }
  5027. if (src->grads) {
  5028. GGML_ASSERT(dst->grads != NULL);
  5029. GGML_ASSERT(dst->grad_accs != NULL);
  5030. for (int i = 0; i < src->n_nodes; ++i) {
  5031. const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
  5032. const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
  5033. GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
  5034. GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
  5035. GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
  5036. GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
  5037. dst->grads[igrad_dst] = src->grads[igrad_src];
  5038. dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
  5039. }
  5040. }
  5041. }
  5042. struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
  5043. struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
  5044. ggml_graph_cpy(cgraph, result);
  5045. return result;
  5046. }
  5047. struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
  5048. if (ggml_is_empty(tensor)) {
  5049. return tensor;
  5050. }
  5051. if (tensor->buffer) {
  5052. ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
  5053. } else {
  5054. GGML_ASSERT(tensor->data);
  5055. memset(tensor->data, 0, ggml_nbytes(tensor));
  5056. }
  5057. return tensor;
  5058. }
  5059. void ggml_graph_reset(struct ggml_cgraph * cgraph) {
  5060. if (!cgraph) {
  5061. return;
  5062. }
  5063. GGML_ASSERT(cgraph->grads != NULL);
  5064. for (int i = 0; i < cgraph->n_nodes; i++) {
  5065. struct ggml_tensor * node = cgraph->nodes[i];
  5066. struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
  5067. if (node->op == GGML_OP_OPT_STEP_ADAMW) {
  5068. // clear momenta
  5069. ggml_set_zero(node->src[2]);
  5070. ggml_set_zero(node->src[3]);
  5071. }
  5072. // initial gradients of loss should be 1, 0 otherwise
  5073. if (grad_acc) {
  5074. if (node->flags & GGML_TENSOR_FLAG_LOSS) {
  5075. GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
  5076. GGML_ASSERT(ggml_is_scalar(grad_acc));
  5077. const float onef = 1.0f;
  5078. if (grad_acc->buffer) {
  5079. ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
  5080. } else {
  5081. GGML_ASSERT(grad_acc->data);
  5082. *((float *) grad_acc->data) = onef;
  5083. }
  5084. } else {
  5085. ggml_set_zero(grad_acc);
  5086. }
  5087. }
  5088. }
  5089. }
  5090. void ggml_graph_clear(struct ggml_cgraph * cgraph) {
  5091. cgraph->n_leafs = 0;
  5092. cgraph->n_nodes = 0;
  5093. ggml_hash_set_reset(&cgraph->visited_hash_set);
  5094. }
  5095. int ggml_graph_size(struct ggml_cgraph * cgraph) {
  5096. return cgraph->size;
  5097. }
  5098. struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
  5099. if (i < 0) {
  5100. GGML_ASSERT(cgraph->n_nodes + i >= 0);
  5101. return cgraph->nodes[cgraph->n_nodes + i];
  5102. }
  5103. GGML_ASSERT(i < cgraph->n_nodes);
  5104. return cgraph->nodes[i];
  5105. }
  5106. struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
  5107. return cgraph->nodes;
  5108. }
  5109. int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
  5110. return cgraph->n_nodes;
  5111. }
  5112. void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
  5113. GGML_ASSERT(cgraph->size > cgraph->n_nodes);
  5114. cgraph->nodes[cgraph->n_nodes] = tensor;
  5115. cgraph->n_nodes++;
  5116. }
  5117. struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
  5118. for (int i = 0; i < cgraph->n_leafs; i++) {
  5119. struct ggml_tensor * leaf = cgraph->leafs[i];
  5120. if (strcmp(leaf->name, name) == 0) {
  5121. return leaf;
  5122. }
  5123. }
  5124. for (int i = 0; i < cgraph->n_nodes; i++) {
  5125. struct ggml_tensor * node = cgraph->nodes[i];
  5126. if (strcmp(node->name, name) == 0) {
  5127. return node;
  5128. }
  5129. }
  5130. return NULL;
  5131. }
  5132. struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5133. const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
  5134. return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
  5135. }
  5136. struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5137. const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
  5138. return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
  5139. }
  5140. void ggml_graph_print(const struct ggml_cgraph * cgraph) {
  5141. GGML_LOG_INFO("=== GRAPH ===\n");
  5142. GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
  5143. for (int i = 0; i < cgraph->n_nodes; i++) {
  5144. struct ggml_tensor * node = cgraph->nodes[i];
  5145. GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
  5146. i,
  5147. node->ne[0], node->ne[1], node->ne[2],
  5148. ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
  5149. ggml_graph_get_grad(cgraph, node) ? "g" : " ");
  5150. }
  5151. GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
  5152. for (int i = 0; i < cgraph->n_leafs; i++) {
  5153. struct ggml_tensor * node = cgraph->leafs[i];
  5154. GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
  5155. i,
  5156. node->ne[0], node->ne[1],
  5157. ggml_op_name(node->op),
  5158. ggml_get_name(node));
  5159. }
  5160. GGML_LOG_INFO("========================================\n");
  5161. }
  5162. // check if node is part of the graph
  5163. static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5164. if (cgraph == NULL) {
  5165. return true;
  5166. }
  5167. for (int i = 0; i < cgraph->n_nodes; i++) {
  5168. if (cgraph->nodes[i] == node) {
  5169. return true;
  5170. }
  5171. }
  5172. return false;
  5173. }
  5174. static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5175. for (int i = 0; i < cgraph->n_nodes; i++) {
  5176. struct ggml_tensor * parent = cgraph->nodes[i];
  5177. struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
  5178. if (grad == node) {
  5179. return parent;
  5180. }
  5181. }
  5182. return NULL;
  5183. }
  5184. static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
  5185. struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
  5186. struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
  5187. fprintf(fp, " \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
  5188. gparent0 ? (void *) gparent0 : (void *) parent,
  5189. gparent0 ? "g" : "x",
  5190. gparent ? (void *) gparent : (void *) node,
  5191. gparent ? "g" : "x",
  5192. gparent ? "empty" : "vee",
  5193. gparent ? "dashed" : "solid",
  5194. label);
  5195. }
  5196. static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
  5197. fprintf(fp, " \"%p\":%s -> \"%p\":%s [ label = \"%s\"; ]\n",
  5198. (void *) parent, "x",
  5199. (void *) node, "x",
  5200. label);
  5201. }
  5202. void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
  5203. char color[16];
  5204. FILE * fp = ggml_fopen(filename, "w");
  5205. GGML_ASSERT(fp);
  5206. fprintf(fp, "digraph G {\n");
  5207. fprintf(fp, " newrank = true;\n");
  5208. fprintf(fp, " rankdir = TB;\n");
  5209. for (int i = 0; i < gb->n_nodes; i++) {
  5210. struct ggml_tensor * node = gb->nodes[i];
  5211. struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
  5212. if (ggml_graph_get_parent(gb, node) != NULL) {
  5213. continue;
  5214. }
  5215. if (node->flags & GGML_TENSOR_FLAG_PARAM) {
  5216. snprintf(color, sizeof(color), "yellow");
  5217. } else if (grad) {
  5218. if (ggml_graph_find(gf, node)) {
  5219. snprintf(color, sizeof(color), "green");
  5220. } else {
  5221. snprintf(color, sizeof(color), "lightblue");
  5222. }
  5223. } else {
  5224. snprintf(color, sizeof(color), "white");
  5225. }
  5226. fprintf(fp, " \"%p\" [ "
  5227. "style = filled; fillcolor = %s; shape = record; "
  5228. "label=\"",
  5229. (void *) node, color);
  5230. if (strlen(node->name) > 0) {
  5231. fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
  5232. } else {
  5233. fprintf(fp, "(%s)|", ggml_type_name(node->type));
  5234. }
  5235. if (ggml_is_matrix(node)) {
  5236. fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
  5237. } else {
  5238. fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
  5239. }
  5240. if (grad) {
  5241. fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
  5242. } else {
  5243. fprintf(fp, "\"; ]\n");
  5244. }
  5245. }
  5246. for (int i = 0; i < gb->n_leafs; i++) {
  5247. struct ggml_tensor * node = gb->leafs[i];
  5248. snprintf(color, sizeof(color), "pink");
  5249. fprintf(fp, " \"%p\" [ "
  5250. "style = filled; fillcolor = %s; shape = record; "
  5251. "label=\"<x>",
  5252. (void *) node, color);
  5253. if (strlen(node->name) > 0) {
  5254. fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
  5255. } else {
  5256. fprintf(fp, "(%s)|", ggml_type_name(node->type));
  5257. }
  5258. fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
  5259. if (ggml_nelements(node) < 5 && node->data != NULL) {
  5260. fprintf(fp, " | (");
  5261. for (int j = 0; j < ggml_nelements(node); j++) {
  5262. // FIXME: use ggml-backend to obtain the tensor data
  5263. //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
  5264. // fprintf(fp, "%d", ggml_get_i32_1d(node, j));
  5265. //}
  5266. //else if (node->type == GGML_TYPE_F32 ||
  5267. // node->type == GGML_TYPE_F16 ||
  5268. // node->type == GGML_TYPE_BF16) {
  5269. // fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
  5270. //}
  5271. //else
  5272. {
  5273. fprintf(fp, "#");
  5274. }
  5275. if (j < ggml_nelements(node) - 1) {
  5276. fprintf(fp, ", ");
  5277. }
  5278. }
  5279. fprintf(fp, ")");
  5280. }
  5281. fprintf(fp, "\"; ]\n");
  5282. }
  5283. for (int i = 0; i < gb->n_nodes; i++) {
  5284. struct ggml_tensor * node = gb->nodes[i];
  5285. for (int j = 0; j < GGML_MAX_SRC; j++) {
  5286. if (node->src[j]) {
  5287. char label[16];
  5288. snprintf(label, sizeof(label), "src %d", j);
  5289. ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
  5290. }
  5291. }
  5292. }
  5293. for (int i = 0; i < gb->n_leafs; i++) {
  5294. struct ggml_tensor * node = gb->leafs[i];
  5295. for (int j = 0; j < GGML_MAX_SRC; j++) {
  5296. if (node->src[j]) {
  5297. char label[16];
  5298. snprintf(label, sizeof(label), "src %d", j);
  5299. ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
  5300. }
  5301. }
  5302. }
  5303. fprintf(fp, "}\n");
  5304. fclose(fp);
  5305. GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
  5306. }
  5307. ////////////////////////////////////////////////////////////////////////////////
  5308. void ggml_set_input(struct ggml_tensor * tensor) {
  5309. tensor->flags |= GGML_TENSOR_FLAG_INPUT;
  5310. }
  5311. void ggml_set_output(struct ggml_tensor * tensor) {
  5312. tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
  5313. }
  5314. void ggml_set_param(struct ggml_tensor * tensor) {
  5315. GGML_ASSERT(tensor->op == GGML_OP_NONE);
  5316. tensor->flags |= GGML_TENSOR_FLAG_PARAM;
  5317. }
  5318. void ggml_set_loss(struct ggml_tensor * tensor) {
  5319. GGML_ASSERT(ggml_is_scalar(tensor));
  5320. GGML_ASSERT(tensor->type == GGML_TYPE_F32);
  5321. tensor->flags |= GGML_TENSOR_FLAG_LOSS;
  5322. }
  5323. ////////////////////////////////////////////////////////////////////////////////
  5324. void ggml_quantize_init(enum ggml_type type) {
  5325. ggml_critical_section_start();
  5326. switch (type) {
  5327. case GGML_TYPE_IQ2_XXS:
  5328. case GGML_TYPE_IQ2_XS:
  5329. case GGML_TYPE_IQ2_S:
  5330. case GGML_TYPE_IQ1_S:
  5331. case GGML_TYPE_IQ1_M: iq2xs_init_impl(type); break;
  5332. case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
  5333. case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break;
  5334. default: // nothing
  5335. break;
  5336. }
  5337. ggml_critical_section_end();
  5338. }
  5339. void ggml_quantize_free(void) {
  5340. ggml_critical_section_start();
  5341. iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
  5342. iq2xs_free_impl(GGML_TYPE_IQ2_XS);
  5343. iq2xs_free_impl(GGML_TYPE_IQ1_S);
  5344. iq3xs_free_impl(256);
  5345. ggml_critical_section_end();
  5346. }
  5347. bool ggml_quantize_requires_imatrix(enum ggml_type type) {
  5348. return
  5349. type == GGML_TYPE_IQ2_XXS ||
  5350. type == GGML_TYPE_IQ2_XS ||
  5351. type == GGML_TYPE_IQ1_S;// ||
  5352. //type == GGML_TYPE_IQ1_M;
  5353. }
  5354. size_t ggml_quantize_chunk(
  5355. enum ggml_type type,
  5356. const float * src,
  5357. void * dst,
  5358. int64_t start,
  5359. int64_t nrows,
  5360. int64_t n_per_row,
  5361. const float * imatrix) {
  5362. const int64_t n = (int64_t) nrows * n_per_row;
  5363. if (ggml_quantize_requires_imatrix(type)) {
  5364. GGML_ASSERT(imatrix != NULL);
  5365. }
  5366. GGML_ASSERT(start % type_traits[type].blck_size == 0);
  5367. GGML_ASSERT(start % n_per_row == 0);
  5368. ggml_quantize_init(type); // this is noop if already initialized
  5369. const size_t start_row = start / n_per_row;
  5370. const size_t row_size = ggml_row_size(type, n_per_row);
  5371. size_t result = 0;
  5372. switch (type) {
  5373. case GGML_TYPE_Q4_0: result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5374. case GGML_TYPE_Q4_1: result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5375. case GGML_TYPE_Q5_0: result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5376. case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5377. case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5378. case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5379. case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5380. case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5381. case GGML_TYPE_Q5_K: result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5382. case GGML_TYPE_Q6_K: result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5383. case GGML_TYPE_TQ1_0: result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5384. case GGML_TYPE_TQ2_0: result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5385. case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5386. case GGML_TYPE_IQ2_XS: result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5387. case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5388. case GGML_TYPE_IQ3_S: result = quantize_iq3_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5389. case GGML_TYPE_IQ2_S: result = quantize_iq2_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5390. case GGML_TYPE_IQ1_S: result = quantize_iq1_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5391. case GGML_TYPE_IQ1_M: result = quantize_iq1_m (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5392. case GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5393. case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5394. case GGML_TYPE_F16:
  5395. {
  5396. size_t elemsize = sizeof(ggml_fp16_t);
  5397. ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
  5398. result = n * elemsize;
  5399. } break;
  5400. case GGML_TYPE_BF16:
  5401. {
  5402. size_t elemsize = sizeof(ggml_bf16_t);
  5403. ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
  5404. result = n * elemsize;
  5405. } break;
  5406. case GGML_TYPE_F32:
  5407. {
  5408. size_t elemsize = sizeof(float);
  5409. result = n * elemsize;
  5410. memcpy((uint8_t *)dst + start * elemsize, src + start, result);
  5411. } break;
  5412. default:
  5413. assert(false);
  5414. }
  5415. GGML_ASSERT(result == nrows * row_size);
  5416. return result;
  5417. }
  5418. ////////////////////////////////////////////////////////////////////////////////
  5419. void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
  5420. g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
  5421. g_logger_state.log_callback_user_data = user_data;
  5422. }
  5423. void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
  5424. p->n_threads = n_threads;
  5425. p->prio = 0; // default priority (usually means normal or inherited)
  5426. p->poll = 50; // hybrid-polling enabled
  5427. p->strict_cpu = false; // no strict placement (all threads share same cpumask)
  5428. p->paused = false; // threads are ready to go
  5429. memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
  5430. }
  5431. struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
  5432. struct ggml_threadpool_params p;
  5433. ggml_threadpool_params_init(&p, n_threads);
  5434. return p;
  5435. }
  5436. bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
  5437. if (p0->n_threads != p1->n_threads ) return false;
  5438. if (p0->prio != p1->prio ) return false;
  5439. if (p0->poll != p1->poll ) return false;
  5440. if (p0->strict_cpu != p1->strict_cpu ) return false;
  5441. return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
  5442. }