ggml.c 206 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601
  1. #define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
  2. #define _USE_MATH_DEFINES // For M_PI on MSVC
  3. #include "ggml-backend.h"
  4. #include "ggml-impl.h"
  5. #include "ggml-threading.h"
  6. #include "ggml-cpu.h"
  7. #include "ggml.h"
  8. // FIXME: required here for quantization functions
  9. #include "ggml-quants.h"
  10. #ifdef GGML_USE_CPU_HBM
  11. #include <hbwmalloc.h>
  12. #endif
  13. #if defined(_MSC_VER) || defined(__MINGW32__)
  14. #include <malloc.h> // using malloc.h with MSC/MINGW
  15. #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
  16. #include <alloca.h>
  17. #endif
  18. #include <assert.h>
  19. #include <errno.h>
  20. #include <time.h>
  21. #include <math.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <stdint.h>
  25. #include <inttypes.h>
  26. #include <stdio.h>
  27. #include <float.h>
  28. #include <limits.h>
  29. #include <stdarg.h>
  30. #include <signal.h>
  31. #if defined(__gnu_linux__)
  32. #include <syscall.h>
  33. #endif
  34. #if defined(__APPLE__)
  35. #include <unistd.h>
  36. #include <mach/mach.h>
  37. #include <TargetConditionals.h>
  38. #endif
  39. #if defined(_WIN32)
  40. #define WIN32_LEAN_AND_MEAN
  41. #ifndef NOMINMAX
  42. #define NOMINMAX
  43. #endif
  44. #include <windows.h>
  45. #endif
  46. #define UNUSED GGML_UNUSED
  47. #if defined(_MSC_VER)
  48. #define m512bh(p) p
  49. #define m512i(p) p
  50. #else
  51. #define m512bh(p) (__m512bh)(p)
  52. #define m512i(p) (__m512i)(p)
  53. #endif
  54. // precomputed f32 table for f16 (256 KB) (ggml-impl.h)
  55. float ggml_table_f32_f16[1 << 16];
  56. #if defined(__linux__) || \
  57. defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
  58. (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
  59. #include <unistd.h>
  60. #include <sys/types.h>
  61. #include <sys/stat.h>
  62. #include <sys/wait.h>
  63. #if defined(__linux__)
  64. #include <sys/prctl.h>
  65. #endif
  66. #if defined(__ANDROID__)
  67. #include <unwind.h>
  68. #include <dlfcn.h>
  69. #include <stdio.h>
  70. struct backtrace_state {
  71. void ** current;
  72. void ** end;
  73. };
  74. static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
  75. struct backtrace_state * state = (struct backtrace_state *)arg;
  76. uintptr_t pc = _Unwind_GetIP(context);
  77. if (pc) {
  78. if (state->current == state->end) {
  79. return _URC_END_OF_STACK;
  80. } else {
  81. *state->current++ = (void*)pc;
  82. }
  83. }
  84. return _URC_NO_REASON;
  85. }
  86. static void ggml_print_backtrace_symbols(void) {
  87. const int max = 100;
  88. void* buffer[max];
  89. struct backtrace_state state = {buffer, buffer + max};
  90. _Unwind_Backtrace(unwind_callback, &state);
  91. int count = state.current - buffer;
  92. for (int idx = 0; idx < count; ++idx) {
  93. const void * addr = buffer[idx];
  94. const char * symbol = "";
  95. Dl_info info;
  96. if (dladdr(addr, &info) && info.dli_sname) {
  97. symbol = info.dli_sname;
  98. }
  99. fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
  100. }
  101. }
  102. #elif defined(__linux__) && defined(__GLIBC__)
  103. #include <execinfo.h>
  104. static void ggml_print_backtrace_symbols(void) {
  105. void * trace[100];
  106. int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
  107. backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
  108. }
  109. #else
  110. static void ggml_print_backtrace_symbols(void) {
  111. // platform not supported
  112. }
  113. #endif
  114. void ggml_print_backtrace(void) {
  115. const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
  116. if (GGML_NO_BACKTRACE) {
  117. return;
  118. }
  119. #if defined(__linux__)
  120. FILE * f = fopen("/proc/self/status", "r");
  121. size_t size = 0;
  122. char * line = NULL;
  123. ssize_t length = 0;
  124. while ((length = getline(&line, &size, f)) > 0) {
  125. if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
  126. (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
  127. // Already being debugged, and the breakpoint is the later abort()
  128. free(line);
  129. fclose(f);
  130. return;
  131. }
  132. }
  133. free(line);
  134. fclose(f);
  135. int lock[2] = { -1, -1 };
  136. (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
  137. #endif
  138. const int parent_pid = getpid();
  139. const int child_pid = fork();
  140. if (child_pid < 0) { // error
  141. #if defined(__linux__)
  142. close(lock[1]);
  143. close(lock[0]);
  144. #endif
  145. return;
  146. } else if (child_pid == 0) { // child
  147. char attach[32];
  148. snprintf(attach, sizeof(attach), "attach %d", parent_pid);
  149. #if defined(__linux__)
  150. close(lock[1]);
  151. (void) !read(lock[0], lock, 1);
  152. close(lock[0]);
  153. #endif
  154. // try gdb
  155. execlp("gdb", "gdb", "--batch",
  156. "-ex", "set style enabled on",
  157. "-ex", attach,
  158. "-ex", "bt -frame-info source-and-location",
  159. "-ex", "detach",
  160. "-ex", "quit",
  161. (char *) NULL);
  162. // try lldb
  163. execlp("lldb", "lldb", "--batch",
  164. "-o", "bt",
  165. "-o", "quit",
  166. "-p", &attach[sizeof("attach ") - 1],
  167. (char *) NULL);
  168. // gdb failed, fallback to backtrace_symbols
  169. ggml_print_backtrace_symbols();
  170. _Exit(0);
  171. } else { // parent
  172. #if defined(__linux__)
  173. prctl(PR_SET_PTRACER, child_pid);
  174. close(lock[1]);
  175. close(lock[0]);
  176. #endif
  177. waitpid(child_pid, NULL, 0);
  178. }
  179. }
  180. #else
  181. void ggml_print_backtrace(void) {
  182. // platform not supported
  183. }
  184. #endif
  185. void ggml_abort(const char * file, int line, const char * fmt, ...) {
  186. fflush(stdout);
  187. fprintf(stderr, "%s:%d: ", file, line);
  188. va_list args;
  189. va_start(args, fmt);
  190. vfprintf(stderr, fmt, args);
  191. va_end(args);
  192. fprintf(stderr, "\n");
  193. ggml_print_backtrace();
  194. abort();
  195. }
  196. // ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
  197. //
  198. // logging
  199. //
  200. struct ggml_logger_state {
  201. ggml_log_callback log_callback;
  202. void * log_callback_user_data;
  203. };
  204. static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
  205. static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
  206. if (format == NULL) {
  207. return;
  208. }
  209. va_list args_copy;
  210. va_copy(args_copy, args);
  211. char buffer[128];
  212. int len = vsnprintf(buffer, 128, format, args);
  213. if (len < 128) {
  214. g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
  215. } else {
  216. char * buffer2 = (char *) calloc(len + 1, sizeof(char));
  217. vsnprintf(buffer2, len + 1, format, args_copy);
  218. buffer2[len] = 0;
  219. g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
  220. free(buffer2);
  221. }
  222. va_end(args_copy);
  223. }
  224. void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
  225. va_list args;
  226. va_start(args, format);
  227. ggml_log_internal_v(level, format, args);
  228. va_end(args);
  229. }
  230. void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
  231. (void) level;
  232. (void) user_data;
  233. fputs(text, stderr);
  234. fflush(stderr);
  235. }
  236. //
  237. // end of logging block
  238. //
  239. #ifdef GGML_USE_ACCELERATE
  240. // uncomment to use vDSP for soft max computation
  241. // note: not sure if it is actually faster
  242. //#define GGML_SOFT_MAX_ACCELERATE
  243. #endif
  244. void * ggml_aligned_malloc(size_t size) {
  245. #if defined(__s390x__)
  246. const int alignment = 256;
  247. #else
  248. const int alignment = 64;
  249. #endif
  250. #if defined(_MSC_VER) || defined(__MINGW32__)
  251. return _aligned_malloc(size, alignment);
  252. #else
  253. if (size == 0) {
  254. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
  255. return NULL;
  256. }
  257. void * aligned_memory = NULL;
  258. #ifdef GGML_USE_CPU_HBM
  259. int result = hbw_posix_memalign(&aligned_memory, alignment, size);
  260. #elif TARGET_OS_OSX
  261. GGML_UNUSED(alignment);
  262. kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
  263. int result = EFAULT;
  264. switch (alloc_status) {
  265. case KERN_SUCCESS:
  266. result = 0;
  267. break;
  268. case KERN_INVALID_ADDRESS:
  269. result = EINVAL;
  270. break;
  271. case KERN_NO_SPACE:
  272. result = ENOMEM;
  273. break;
  274. default:
  275. result = EFAULT;
  276. break;
  277. }
  278. #else
  279. int result = posix_memalign(&aligned_memory, alignment, size);
  280. #endif
  281. if (result != 0) {
  282. // Handle allocation failure
  283. const char *error_desc = "unknown allocation error";
  284. switch (result) {
  285. case EINVAL:
  286. error_desc = "invalid alignment value";
  287. break;
  288. case ENOMEM:
  289. error_desc = "insufficient memory";
  290. break;
  291. }
  292. GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
  293. return NULL;
  294. }
  295. return aligned_memory;
  296. #endif
  297. }
  298. void ggml_aligned_free(void * ptr, size_t size) {
  299. GGML_UNUSED(size);
  300. #if defined(_MSC_VER) || defined(__MINGW32__)
  301. _aligned_free(ptr);
  302. #elif GGML_USE_CPU_HBM
  303. if (ptr != NULL) {
  304. hbw_free(ptr);
  305. }
  306. #elif TARGET_OS_OSX
  307. if (ptr != NULL) {
  308. vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
  309. }
  310. #else
  311. free(ptr);
  312. #endif
  313. }
  314. inline static void * ggml_malloc(size_t size) {
  315. if (size == 0) {
  316. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
  317. return NULL;
  318. }
  319. void * result = malloc(size);
  320. if (result == NULL) {
  321. GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
  322. GGML_ABORT("fatal error");
  323. }
  324. return result;
  325. }
  326. // calloc
  327. inline static void * ggml_calloc(size_t num, size_t size) {
  328. if (num == 0 || size == 0) {
  329. GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
  330. return NULL;
  331. }
  332. void * result = calloc(num, size);
  333. if (result == NULL) {
  334. GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
  335. GGML_ABORT("fatal error");
  336. }
  337. return result;
  338. }
  339. #define GGML_MALLOC(size) ggml_malloc(size)
  340. #define GGML_CALLOC(num, size) ggml_calloc(num, size)
  341. #define GGML_FREE(ptr) free(ptr)
  342. const char * ggml_status_to_string(enum ggml_status status) {
  343. switch (status) {
  344. case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
  345. case GGML_STATUS_FAILED: return "GGML status: error (operation failed)";
  346. case GGML_STATUS_SUCCESS: return "GGML status: success";
  347. case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)";
  348. }
  349. return "GGML status: unknown";
  350. }
  351. float ggml_fp16_to_fp32(ggml_fp16_t x) {
  352. #define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
  353. return GGML_FP16_TO_FP32(x);
  354. }
  355. ggml_fp16_t ggml_fp32_to_fp16(float x) {
  356. #define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
  357. return GGML_FP32_TO_FP16(x);
  358. }
  359. float ggml_bf16_to_fp32(ggml_bf16_t x) {
  360. #define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
  361. return GGML_BF16_TO_FP32(x); // it just left shifts
  362. }
  363. ggml_bf16_t ggml_fp32_to_bf16(float x) {
  364. #define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
  365. return GGML_FP32_TO_BF16(x);
  366. }
  367. void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
  368. for (int64_t i = 0; i < n; i++) {
  369. y[i] = GGML_FP16_TO_FP32(x[i]);
  370. }
  371. }
  372. void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
  373. int i = 0;
  374. for (; i < n; ++i) {
  375. y[i] = GGML_FP32_TO_FP16(x[i]);
  376. }
  377. }
  378. void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
  379. int i = 0;
  380. for (; i < n; ++i) {
  381. y[i] = GGML_BF16_TO_FP32(x[i]);
  382. }
  383. }
  384. void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
  385. for (int i = 0; i < n; i++) {
  386. y[i] = ggml_compute_fp32_to_bf16(x[i]);
  387. }
  388. }
  389. void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
  390. int i = 0;
  391. #if defined(__AVX512BF16__)
  392. // subnormals are flushed to zero on this platform
  393. for (; i + 32 <= n; i += 32) {
  394. _mm512_storeu_si512(
  395. (__m512i *)(y + i),
  396. m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
  397. _mm512_loadu_ps(x + i))));
  398. }
  399. #endif
  400. for (; i < n; i++) {
  401. y[i] = GGML_FP32_TO_BF16(x[i]);
  402. }
  403. }
  404. bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
  405. return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
  406. }
  407. //
  408. // timing
  409. //
  410. #if defined(_MSC_VER) || defined(__MINGW32__)
  411. static int64_t timer_freq, timer_start;
  412. void ggml_time_init(void) {
  413. LARGE_INTEGER t;
  414. QueryPerformanceFrequency(&t);
  415. timer_freq = t.QuadPart;
  416. // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
  417. // and the uptime is high enough.
  418. // We subtract the program start time to reduce the likelihood of that happening.
  419. QueryPerformanceCounter(&t);
  420. timer_start = t.QuadPart;
  421. }
  422. int64_t ggml_time_ms(void) {
  423. LARGE_INTEGER t;
  424. QueryPerformanceCounter(&t);
  425. return ((t.QuadPart-timer_start) * 1000) / timer_freq;
  426. }
  427. int64_t ggml_time_us(void) {
  428. LARGE_INTEGER t;
  429. QueryPerformanceCounter(&t);
  430. return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
  431. }
  432. #else
  433. void ggml_time_init(void) {}
  434. int64_t ggml_time_ms(void) {
  435. struct timespec ts;
  436. clock_gettime(CLOCK_MONOTONIC, &ts);
  437. return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
  438. }
  439. int64_t ggml_time_us(void) {
  440. struct timespec ts;
  441. clock_gettime(CLOCK_MONOTONIC, &ts);
  442. return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
  443. }
  444. #endif
  445. int64_t ggml_cycles(void) {
  446. return clock();
  447. }
  448. int64_t ggml_cycles_per_ms(void) {
  449. return CLOCKS_PER_SEC/1000;
  450. }
  451. //
  452. // cross-platform UTF-8 file paths
  453. //
  454. #ifdef _WIN32
  455. static wchar_t * ggml_mbstowcs(const char * mbs) {
  456. int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
  457. if (!wlen) {
  458. errno = EINVAL;
  459. return NULL;
  460. }
  461. wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
  462. wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
  463. if (!wlen) {
  464. GGML_FREE(wbuf);
  465. errno = EINVAL;
  466. return NULL;
  467. }
  468. return wbuf;
  469. }
  470. #endif
  471. FILE * ggml_fopen(const char * fname, const char * mode) {
  472. #ifdef _WIN32
  473. FILE * file = NULL;
  474. // convert fname (UTF-8)
  475. wchar_t * wfname = ggml_mbstowcs(fname);
  476. if (wfname) {
  477. // convert mode (ANSI)
  478. wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
  479. wchar_t * wmode_p = wmode;
  480. do {
  481. *wmode_p++ = (wchar_t)*mode;
  482. } while (*mode++);
  483. // open file
  484. file = _wfopen(wfname, wmode);
  485. GGML_FREE(wfname);
  486. GGML_FREE(wmode);
  487. }
  488. return file;
  489. #else
  490. return fopen(fname, mode);
  491. #endif
  492. }
  493. static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc);
  494. static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc);
  495. static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc);
  496. static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
  497. [GGML_TYPE_I8] = {
  498. .type_name = "i8",
  499. .blck_size = 1,
  500. .type_size = sizeof(int8_t),
  501. .is_quantized = false,
  502. },
  503. [GGML_TYPE_I16] = {
  504. .type_name = "i16",
  505. .blck_size = 1,
  506. .type_size = sizeof(int16_t),
  507. .is_quantized = false,
  508. },
  509. [GGML_TYPE_I32] = {
  510. .type_name = "i32",
  511. .blck_size = 1,
  512. .type_size = sizeof(int32_t),
  513. .is_quantized = false,
  514. },
  515. [GGML_TYPE_I64] = {
  516. .type_name = "i64",
  517. .blck_size = 1,
  518. .type_size = sizeof(int64_t),
  519. .is_quantized = false,
  520. },
  521. [GGML_TYPE_F64] = {
  522. .type_name = "f64",
  523. .blck_size = 1,
  524. .type_size = sizeof(double),
  525. .is_quantized = false,
  526. },
  527. [GGML_TYPE_F32] = {
  528. .type_name = "f32",
  529. .blck_size = 1,
  530. .type_size = sizeof(float),
  531. .is_quantized = false,
  532. },
  533. [GGML_TYPE_F16] = {
  534. .type_name = "f16",
  535. .blck_size = 1,
  536. .type_size = sizeof(ggml_fp16_t),
  537. .is_quantized = false,
  538. .to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
  539. .from_float_ref = (ggml_from_float_t) ggml_fp32_to_fp16_row,
  540. },
  541. [GGML_TYPE_Q4_0] = {
  542. .type_name = "q4_0",
  543. .blck_size = QK4_0,
  544. .type_size = sizeof(block_q4_0),
  545. .is_quantized = true,
  546. .to_float = (ggml_to_float_t) dequantize_row_q4_0,
  547. .from_float_ref = (ggml_from_float_t) quantize_row_q4_0_ref,
  548. },
  549. [GGML_TYPE_Q4_1] = {
  550. .type_name = "q4_1",
  551. .blck_size = QK4_1,
  552. .type_size = sizeof(block_q4_1),
  553. .is_quantized = true,
  554. .to_float = (ggml_to_float_t) dequantize_row_q4_1,
  555. .from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref,
  556. },
  557. [4] = { // GGML_TYPE_Q4_2
  558. .type_name = "DEPRECATED",
  559. .blck_size = 0,
  560. .type_size = 0,
  561. .is_quantized = false,
  562. },
  563. [5] = { // GGML_TYPE_Q4_3
  564. .type_name = "DEPRECATED",
  565. .blck_size = 0,
  566. .type_size = 0,
  567. .is_quantized = false,
  568. },
  569. [GGML_TYPE_Q5_0] = {
  570. .type_name = "q5_0",
  571. .blck_size = QK5_0,
  572. .type_size = sizeof(block_q5_0),
  573. .is_quantized = true,
  574. .to_float = (ggml_to_float_t) dequantize_row_q5_0,
  575. .from_float_ref = (ggml_from_float_t) quantize_row_q5_0_ref,
  576. },
  577. [GGML_TYPE_Q5_1] = {
  578. .type_name = "q5_1",
  579. .blck_size = QK5_1,
  580. .type_size = sizeof(block_q5_1),
  581. .is_quantized = true,
  582. .to_float = (ggml_to_float_t) dequantize_row_q5_1,
  583. .from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref,
  584. },
  585. [GGML_TYPE_Q8_0] = {
  586. .type_name = "q8_0",
  587. .blck_size = QK8_0,
  588. .type_size = sizeof(block_q8_0),
  589. .is_quantized = true,
  590. .to_float = (ggml_to_float_t) dequantize_row_q8_0,
  591. .from_float_ref = (ggml_from_float_t) quantize_row_q8_0_ref,
  592. },
  593. [GGML_TYPE_Q8_1] = {
  594. .type_name = "q8_1",
  595. .blck_size = QK8_1,
  596. .type_size = sizeof(block_q8_1),
  597. .is_quantized = true,
  598. .from_float_ref = (ggml_from_float_t) quantize_row_q8_1_ref,
  599. },
  600. [GGML_TYPE_Q2_K] = {
  601. .type_name = "q2_K",
  602. .blck_size = QK_K,
  603. .type_size = sizeof(block_q2_K),
  604. .is_quantized = true,
  605. .to_float = (ggml_to_float_t) dequantize_row_q2_K,
  606. .from_float_ref = (ggml_from_float_t) quantize_row_q2_K_ref,
  607. },
  608. [GGML_TYPE_Q3_K] = {
  609. .type_name = "q3_K",
  610. .blck_size = QK_K,
  611. .type_size = sizeof(block_q3_K),
  612. .is_quantized = true,
  613. .to_float = (ggml_to_float_t) dequantize_row_q3_K,
  614. .from_float_ref = (ggml_from_float_t) quantize_row_q3_K_ref,
  615. },
  616. [GGML_TYPE_Q4_K] = {
  617. .type_name = "q4_K",
  618. .blck_size = QK_K,
  619. .type_size = sizeof(block_q4_K),
  620. .is_quantized = true,
  621. .to_float = (ggml_to_float_t) dequantize_row_q4_K,
  622. .from_float_ref = (ggml_from_float_t) quantize_row_q4_K_ref,
  623. },
  624. [GGML_TYPE_Q5_K] = {
  625. .type_name = "q5_K",
  626. .blck_size = QK_K,
  627. .type_size = sizeof(block_q5_K),
  628. .is_quantized = true,
  629. .to_float = (ggml_to_float_t) dequantize_row_q5_K,
  630. .from_float_ref = (ggml_from_float_t) quantize_row_q5_K_ref,
  631. },
  632. [GGML_TYPE_Q6_K] = {
  633. .type_name = "q6_K",
  634. .blck_size = QK_K,
  635. .type_size = sizeof(block_q6_K),
  636. .is_quantized = true,
  637. .to_float = (ggml_to_float_t) dequantize_row_q6_K,
  638. .from_float_ref = (ggml_from_float_t) quantize_row_q6_K_ref,
  639. },
  640. [GGML_TYPE_IQ2_XXS] = {
  641. .type_name = "iq2_xxs",
  642. .blck_size = QK_K,
  643. .type_size = sizeof(block_iq2_xxs),
  644. .is_quantized = true,
  645. .to_float = (ggml_to_float_t) dequantize_row_iq2_xxs,
  646. .from_float_ref = NULL,
  647. },
  648. [GGML_TYPE_IQ2_XS] = {
  649. .type_name = "iq2_xs",
  650. .blck_size = QK_K,
  651. .type_size = sizeof(block_iq2_xs),
  652. .is_quantized = true,
  653. .to_float = (ggml_to_float_t) dequantize_row_iq2_xs,
  654. .from_float_ref = NULL,
  655. },
  656. [GGML_TYPE_IQ3_XXS] = {
  657. .type_name = "iq3_xxs",
  658. .blck_size = QK_K,
  659. .type_size = sizeof(block_iq3_xxs),
  660. .is_quantized = true,
  661. .to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
  662. .from_float_ref = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
  663. },
  664. [GGML_TYPE_IQ3_S] = {
  665. .type_name = "iq3_s",
  666. .blck_size = QK_K,
  667. .type_size = sizeof(block_iq3_s),
  668. .is_quantized = true,
  669. .to_float = (ggml_to_float_t) dequantize_row_iq3_s,
  670. .from_float_ref = (ggml_from_float_t)quantize_row_iq3_s_ref,
  671. },
  672. [GGML_TYPE_IQ2_S] = {
  673. .type_name = "iq2_s",
  674. .blck_size = QK_K,
  675. .type_size = sizeof(block_iq2_s),
  676. .is_quantized = true,
  677. .to_float = (ggml_to_float_t) dequantize_row_iq2_s,
  678. .from_float_ref = (ggml_from_float_t)quantize_row_iq2_s_ref,
  679. },
  680. [GGML_TYPE_IQ1_S] = {
  681. .type_name = "iq1_s",
  682. .blck_size = QK_K,
  683. .type_size = sizeof(block_iq1_s),
  684. .is_quantized = true,
  685. .to_float = (ggml_to_float_t) dequantize_row_iq1_s,
  686. .from_float_ref = NULL,
  687. },
  688. [GGML_TYPE_IQ1_M] = {
  689. .type_name = "iq1_m",
  690. .blck_size = QK_K,
  691. .type_size = sizeof(block_iq1_m),
  692. .is_quantized = true,
  693. .to_float = (ggml_to_float_t) dequantize_row_iq1_m,
  694. .from_float_ref = NULL,
  695. },
  696. [GGML_TYPE_IQ4_NL] = {
  697. .type_name = "iq4_nl",
  698. .blck_size = QK4_NL,
  699. .type_size = sizeof(block_iq4_nl),
  700. .is_quantized = true,
  701. .to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
  702. .from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref,
  703. },
  704. [GGML_TYPE_IQ4_XS] = {
  705. .type_name = "iq4_xs",
  706. .blck_size = QK_K,
  707. .type_size = sizeof(block_iq4_xs),
  708. .is_quantized = true,
  709. .to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
  710. .from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_ref,
  711. },
  712. [GGML_TYPE_Q8_K] = {
  713. .type_name = "q8_K",
  714. .blck_size = QK_K,
  715. .type_size = sizeof(block_q8_K),
  716. .is_quantized = true,
  717. },
  718. [GGML_TYPE_BF16] = {
  719. .type_name = "bf16",
  720. .blck_size = 1,
  721. .type_size = sizeof(ggml_bf16_t),
  722. .is_quantized = false,
  723. .to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row,
  724. .from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
  725. },
  726. [31] = { // GGML_TYPE_Q4_0_4_4
  727. .type_name = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
  728. .blck_size = 0,
  729. .type_size = 0,
  730. .is_quantized = false,
  731. },
  732. [32] = { // GGML_TYPE_Q4_0_4_8
  733. .type_name = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
  734. .blck_size = 0,
  735. .type_size = 0,
  736. .is_quantized = false,
  737. },
  738. [33] = { // GGML_TYPE_Q4_0_8_8
  739. .type_name = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
  740. .blck_size = 0,
  741. .type_size = 0,
  742. .is_quantized = false,
  743. },
  744. [GGML_TYPE_TQ1_0] = {
  745. .type_name = "tq1_0",
  746. .blck_size = QK_K,
  747. .type_size = sizeof(block_tq1_0),
  748. .is_quantized = true,
  749. .to_float = (ggml_to_float_t) dequantize_row_tq1_0,
  750. .from_float_ref = (ggml_from_float_t) quantize_row_tq1_0_ref,
  751. },
  752. [GGML_TYPE_TQ2_0] = {
  753. .type_name = "tq2_0",
  754. .blck_size = QK_K,
  755. .type_size = sizeof(block_tq2_0),
  756. .is_quantized = true,
  757. .to_float = (ggml_to_float_t) dequantize_row_tq2_0,
  758. .from_float_ref = (ggml_from_float_t) quantize_row_tq2_0_ref,
  759. },
  760. [36] = { // GGML_TYPE_IQ4_NL_4_4
  761. .type_name = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
  762. .blck_size = 0,
  763. .type_size = 0,
  764. .is_quantized = false,
  765. },
  766. [37] = { // GGML_TYPE_IQ4_NL_4_8
  767. .type_name = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
  768. .blck_size = 0,
  769. .type_size = 0,
  770. .is_quantized = false,
  771. },
  772. [38] = { // GGML_TYPE_IQ4_NL_8_8
  773. .type_name = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
  774. .blck_size = 0,
  775. .type_size = 0,
  776. .is_quantized = false,
  777. },
  778. };
  779. const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
  780. GGML_ASSERT(type < GGML_TYPE_COUNT);
  781. return &type_traits[type];
  782. }
  783. //
  784. // ggml object
  785. //
  786. struct ggml_object {
  787. size_t offs;
  788. size_t size;
  789. struct ggml_object * next;
  790. enum ggml_object_type type;
  791. char padding[4];
  792. };
  793. static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
  794. //
  795. // ggml context
  796. //
  797. struct ggml_context {
  798. size_t mem_size;
  799. void * mem_buffer;
  800. bool mem_buffer_owned;
  801. bool no_alloc;
  802. int n_objects;
  803. struct ggml_object * objects_begin;
  804. struct ggml_object * objects_end;
  805. };
  806. //
  807. // data types
  808. //
  809. static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
  810. "NONE",
  811. "DUP",
  812. "ADD",
  813. "ADD1",
  814. "ACC",
  815. "SUB",
  816. "MUL",
  817. "DIV",
  818. "SQR",
  819. "SQRT",
  820. "LOG",
  821. "SIN",
  822. "COS",
  823. "SUM",
  824. "SUM_ROWS",
  825. "MEAN",
  826. "ARGMAX",
  827. "COUNT_EQUAL",
  828. "REPEAT",
  829. "REPEAT_BACK",
  830. "CONCAT",
  831. "SILU_BACK",
  832. "NORM",
  833. "RMS_NORM",
  834. "RMS_NORM_BACK",
  835. "GROUP_NORM",
  836. "L2_NORM",
  837. "MUL_MAT",
  838. "MUL_MAT_ID",
  839. "OUT_PROD",
  840. "SCALE",
  841. "SET",
  842. "CPY",
  843. "CONT",
  844. "RESHAPE",
  845. "VIEW",
  846. "PERMUTE",
  847. "TRANSPOSE",
  848. "GET_ROWS",
  849. "GET_ROWS_BACK",
  850. "DIAG",
  851. "DIAG_MASK_INF",
  852. "DIAG_MASK_ZERO",
  853. "SOFT_MAX",
  854. "SOFT_MAX_BACK",
  855. "ROPE",
  856. "ROPE_BACK",
  857. "CLAMP",
  858. "CONV_TRANSPOSE_1D",
  859. "IM2COL",
  860. "IM2COL_BACK",
  861. "CONV_2D_DW",
  862. "CONV_TRANSPOSE_2D",
  863. "POOL_1D",
  864. "POOL_2D",
  865. "POOL_2D_BACK",
  866. "UPSCALE",
  867. "PAD",
  868. "PAD_REFLECT_1D",
  869. "ROLL",
  870. "ARANGE",
  871. "TIMESTEP_EMBEDDING",
  872. "ARGSORT",
  873. "LEAKY_RELU",
  874. "FLASH_ATTN_EXT",
  875. "FLASH_ATTN_BACK",
  876. "SSM_CONV",
  877. "SSM_SCAN",
  878. "WIN_PART",
  879. "WIN_UNPART",
  880. "GET_REL_POS",
  881. "ADD_REL_POS",
  882. "RWKV_WKV6",
  883. "GATED_LINEAR_ATTN",
  884. "RWKV_WKV7",
  885. "UNARY",
  886. "MAP_CUSTOM1",
  887. "MAP_CUSTOM2",
  888. "MAP_CUSTOM3",
  889. "CUSTOM",
  890. "CROSS_ENTROPY_LOSS",
  891. "CROSS_ENTROPY_LOSS_BACK",
  892. "OPT_STEP_ADAMW",
  893. };
  894. static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
  895. static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
  896. "none",
  897. "x",
  898. "x+y",
  899. "x+y",
  900. "view(x,nb,offset)+=y->x",
  901. "x-y",
  902. "x*y",
  903. "x/y",
  904. "x^2",
  905. "√x",
  906. "log(x)",
  907. "sin(x)",
  908. "cos(x)",
  909. "Σx",
  910. "Σx_k",
  911. "Σx/n",
  912. "argmax(x)",
  913. "count_equal(x)",
  914. "repeat(x)",
  915. "repeat_back(x)",
  916. "concat(x, y)",
  917. "silu_back(x)",
  918. "norm(x)",
  919. "rms_norm(x)",
  920. "rms_norm_back(x)",
  921. "group_norm(x)",
  922. "l2_norm(x)",
  923. "X*Y",
  924. "X[i]*Y",
  925. "X*Y",
  926. "x*v",
  927. "y-\\>view(x)",
  928. "x-\\>y",
  929. "cont(x)",
  930. "reshape(x)",
  931. "view(x)",
  932. "permute(x)",
  933. "transpose(x)",
  934. "get_rows(x)",
  935. "get_rows_back(x)",
  936. "diag(x)",
  937. "diag_mask_inf(x)",
  938. "diag_mask_zero(x)",
  939. "soft_max(x)",
  940. "soft_max_back(x)",
  941. "rope(x)",
  942. "rope_back(x)",
  943. "clamp(x)",
  944. "conv_transpose_1d(x)",
  945. "im2col(x)",
  946. "im2col_back(x)",
  947. "conv_2d_dw(x)",
  948. "conv_transpose_2d(x)",
  949. "pool_1d(x)",
  950. "pool_2d(x)",
  951. "pool_2d_back(x)",
  952. "upscale(x)",
  953. "pad(x)",
  954. "pad_reflect_1d(x)",
  955. "roll(x)",
  956. "arange(start, stop, step)",
  957. "timestep_embedding(timesteps, dim, max_period)",
  958. "argsort(x)",
  959. "leaky_relu(x)",
  960. "flash_attn_ext(x)",
  961. "flash_attn_back(x)",
  962. "ssm_conv(x)",
  963. "ssm_scan(x)",
  964. "win_part(x)",
  965. "win_unpart(x)",
  966. "get_rel_pos(x)",
  967. "add_rel_pos(x)",
  968. "rwkv_wkv6(k, v, r, tf, td, s)",
  969. "gated_linear_attn(k, v, q, gate, s)",
  970. "rwkv_wkv7(r, w, k, v, a, b, s)",
  971. "unary(x)",
  972. "map_custom(x)",
  973. "map_custom(x,y)",
  974. "map_custom(x,y,z)",
  975. "custom(x)",
  976. "cross_entropy_loss(x,y)",
  977. "cross_entropy_loss_back(x,y)",
  978. "adamw(x)",
  979. };
  980. static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
  981. static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
  982. static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
  983. "ABS",
  984. "SGN",
  985. "NEG",
  986. "STEP",
  987. "TANH",
  988. "ELU",
  989. "RELU",
  990. "SIGMOID",
  991. "GELU",
  992. "GELU_QUICK",
  993. "SILU",
  994. "HARDSWISH",
  995. "HARDSIGMOID",
  996. "EXP",
  997. "GELU_ERF",
  998. };
  999. static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
  1000. static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
  1001. static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
  1002. ////////////////////////////////////////////////////////////////////////////////
  1003. void ggml_print_object(const struct ggml_object * obj) {
  1004. GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
  1005. obj->type, obj->offs, obj->size, (const void *) obj->next);
  1006. }
  1007. void ggml_print_objects(const struct ggml_context * ctx) {
  1008. struct ggml_object * obj = ctx->objects_begin;
  1009. GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
  1010. while (obj != NULL) {
  1011. ggml_print_object(obj);
  1012. obj = obj->next;
  1013. }
  1014. GGML_LOG_INFO("%s: --- end ---\n", __func__);
  1015. }
  1016. int64_t ggml_nelements(const struct ggml_tensor * tensor) {
  1017. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1018. return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
  1019. }
  1020. int64_t ggml_nrows(const struct ggml_tensor * tensor) {
  1021. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1022. return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
  1023. }
  1024. size_t ggml_nbytes(const struct ggml_tensor * tensor) {
  1025. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1026. if (tensor->ne[i] <= 0) {
  1027. return 0;
  1028. }
  1029. }
  1030. size_t nbytes;
  1031. const size_t blck_size = ggml_blck_size(tensor->type);
  1032. if (blck_size == 1) {
  1033. nbytes = ggml_type_size(tensor->type);
  1034. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1035. nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
  1036. }
  1037. }
  1038. else {
  1039. nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
  1040. for (int i = 1; i < GGML_MAX_DIMS; ++i) {
  1041. nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
  1042. }
  1043. }
  1044. return nbytes;
  1045. }
  1046. size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
  1047. return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
  1048. }
  1049. int64_t ggml_blck_size(enum ggml_type type) {
  1050. return type_traits[type].blck_size;
  1051. }
  1052. size_t ggml_type_size(enum ggml_type type) {
  1053. return type_traits[type].type_size;
  1054. }
  1055. size_t ggml_row_size(enum ggml_type type, int64_t ne) {
  1056. assert(ne % ggml_blck_size(type) == 0);
  1057. return ggml_type_size(type)*ne/ggml_blck_size(type);
  1058. }
  1059. double ggml_type_sizef(enum ggml_type type) {
  1060. return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
  1061. }
  1062. const char * ggml_type_name(enum ggml_type type) {
  1063. return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
  1064. }
  1065. bool ggml_is_quantized(enum ggml_type type) {
  1066. return type_traits[type].is_quantized;
  1067. }
  1068. const char * ggml_op_name(enum ggml_op op) {
  1069. return GGML_OP_NAME[op];
  1070. }
  1071. const char * ggml_op_symbol(enum ggml_op op) {
  1072. return GGML_OP_SYMBOL[op];
  1073. }
  1074. const char * ggml_unary_op_name(enum ggml_unary_op op) {
  1075. return GGML_UNARY_OP_NAME[op];
  1076. }
  1077. const char * ggml_op_desc(const struct ggml_tensor * t) {
  1078. if (t->op == GGML_OP_UNARY) {
  1079. enum ggml_unary_op uop = ggml_get_unary_op(t);
  1080. return ggml_unary_op_name(uop);
  1081. }
  1082. return ggml_op_name(t->op);
  1083. }
  1084. size_t ggml_element_size(const struct ggml_tensor * tensor) {
  1085. return ggml_type_size(tensor->type);
  1086. }
  1087. bool ggml_is_scalar(const struct ggml_tensor * tensor) {
  1088. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1089. return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1090. }
  1091. bool ggml_is_vector(const struct ggml_tensor * tensor) {
  1092. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1093. return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1094. }
  1095. bool ggml_is_matrix(const struct ggml_tensor * tensor) {
  1096. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1097. return tensor->ne[2] == 1 && tensor->ne[3] == 1;
  1098. }
  1099. bool ggml_is_3d(const struct ggml_tensor * tensor) {
  1100. return tensor->ne[3] == 1;
  1101. }
  1102. int ggml_n_dims(const struct ggml_tensor * tensor) {
  1103. for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
  1104. if (tensor->ne[i] > 1) {
  1105. return i + 1;
  1106. }
  1107. }
  1108. return 1;
  1109. }
  1110. enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
  1111. enum ggml_type wtype = GGML_TYPE_COUNT;
  1112. switch (ftype) {
  1113. case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break;
  1114. case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break;
  1115. case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break;
  1116. case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break;
  1117. case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break;
  1118. case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break;
  1119. case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break;
  1120. case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break;
  1121. case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break;
  1122. case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break;
  1123. case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
  1124. case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break;
  1125. case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
  1126. case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
  1127. case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
  1128. case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
  1129. case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
  1130. case GGML_FTYPE_MOSTLY_IQ1_M: wtype = GGML_TYPE_IQ1_M; break;
  1131. case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
  1132. case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
  1133. case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
  1134. case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
  1135. case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
  1136. case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
  1137. }
  1138. GGML_ASSERT(wtype != GGML_TYPE_COUNT);
  1139. return wtype;
  1140. }
  1141. size_t ggml_tensor_overhead(void) {
  1142. return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
  1143. }
  1144. bool ggml_is_transposed(const struct ggml_tensor * tensor) {
  1145. return tensor->nb[0] > tensor->nb[1];
  1146. }
  1147. static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
  1148. size_t next_nb = ggml_type_size(tensor->type);
  1149. if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
  1150. return false;
  1151. }
  1152. next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
  1153. for (int i = 1; i < GGML_MAX_DIMS; i++) {
  1154. if (tensor->ne[i] != 1) {
  1155. if (i > n) {
  1156. if (tensor->nb[i] != next_nb) {
  1157. return false;
  1158. }
  1159. next_nb *= tensor->ne[i];
  1160. } else {
  1161. // this dimension does not need to be contiguous
  1162. next_nb = tensor->ne[i]*tensor->nb[i];
  1163. }
  1164. }
  1165. }
  1166. return true;
  1167. }
  1168. bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
  1169. return ggml_is_contiguous_0(tensor);
  1170. }
  1171. bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
  1172. return ggml_is_contiguous_n(tensor, 0);
  1173. }
  1174. bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
  1175. return ggml_is_contiguous_n(tensor, 1);
  1176. }
  1177. bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
  1178. return ggml_is_contiguous_n(tensor, 2);
  1179. }
  1180. bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
  1181. return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
  1182. }
  1183. bool ggml_is_permuted(const struct ggml_tensor * tensor) {
  1184. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1185. return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
  1186. }
  1187. bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
  1188. return
  1189. tensor->nb[0] > tensor->nb[2] &&
  1190. tensor->nb[1] > tensor->nb[0] &&
  1191. tensor->nb[2] == ggml_type_size(tensor->type);
  1192. }
  1193. static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
  1194. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1195. return
  1196. tensor->nb[0] == ggml_type_size(tensor->type) &&
  1197. tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
  1198. tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
  1199. }
  1200. bool ggml_is_empty(const struct ggml_tensor * tensor) {
  1201. for (int i = 0; i < GGML_MAX_DIMS; ++i) {
  1202. if (tensor->ne[i] == 0) {
  1203. // empty if any dimension has no elements
  1204. return true;
  1205. }
  1206. }
  1207. return false;
  1208. }
  1209. bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1210. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1211. return
  1212. (t0->ne[0] == t1->ne[0]) &&
  1213. (t0->ne[1] == t1->ne[1]) &&
  1214. (t0->ne[2] == t1->ne[2]) &&
  1215. (t0->ne[3] == t1->ne[3]);
  1216. }
  1217. bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1218. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1219. return
  1220. (t0->nb[0] == t1->nb[0]) &&
  1221. (t0->nb[1] == t1->nb[1]) &&
  1222. (t0->nb[2] == t1->nb[2]) &&
  1223. (t0->nb[3] == t1->nb[3]);
  1224. }
  1225. // check if t1 can be represented as a repetition of t0
  1226. bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1227. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1228. return ggml_is_empty(t0) ? ggml_is_empty(t1) :
  1229. (t1->ne[0]%t0->ne[0] == 0) &&
  1230. (t1->ne[1]%t0->ne[1] == 0) &&
  1231. (t1->ne[2]%t0->ne[2] == 0) &&
  1232. (t1->ne[3]%t0->ne[3] == 0);
  1233. }
  1234. static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  1235. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  1236. return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
  1237. }
  1238. // assert that pointer is aligned to GGML_MEM_ALIGN
  1239. #define GGML_ASSERT_ALIGNED(ptr) \
  1240. GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
  1241. ////////////////////////////////////////////////////////////////////////////////
  1242. struct ggml_context * ggml_init(struct ggml_init_params params) {
  1243. static bool is_first_call = true;
  1244. ggml_critical_section_start();
  1245. if (is_first_call) {
  1246. // initialize time system (required on Windows)
  1247. ggml_time_init();
  1248. for (int i = 0; i < (1 << 16); ++i) {
  1249. union {
  1250. uint16_t u16;
  1251. ggml_fp16_t fp16;
  1252. } u = {i};
  1253. ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
  1254. }
  1255. is_first_call = false;
  1256. }
  1257. ggml_critical_section_end();
  1258. struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
  1259. // allow to call ggml_init with 0 size
  1260. if (params.mem_size == 0) {
  1261. params.mem_size = GGML_MEM_ALIGN;
  1262. }
  1263. const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
  1264. *ctx = (struct ggml_context) {
  1265. /*.mem_size =*/ mem_size,
  1266. /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
  1267. /*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
  1268. /*.no_alloc =*/ params.no_alloc,
  1269. /*.n_objects =*/ 0,
  1270. /*.objects_begin =*/ NULL,
  1271. /*.objects_end =*/ NULL,
  1272. };
  1273. GGML_ASSERT(ctx->mem_buffer != NULL);
  1274. GGML_ASSERT_ALIGNED(ctx->mem_buffer);
  1275. GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
  1276. return ctx;
  1277. }
  1278. void ggml_reset(struct ggml_context * ctx) {
  1279. if (ctx == NULL) {
  1280. return;
  1281. }
  1282. ctx->n_objects = 0;
  1283. ctx->objects_begin = NULL;
  1284. ctx->objects_end = NULL;
  1285. }
  1286. void ggml_free(struct ggml_context * ctx) {
  1287. if (ctx == NULL) {
  1288. return;
  1289. }
  1290. if (ctx->mem_buffer_owned) {
  1291. ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
  1292. }
  1293. GGML_FREE(ctx);
  1294. }
  1295. size_t ggml_used_mem(const struct ggml_context * ctx) {
  1296. return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
  1297. }
  1298. bool ggml_get_no_alloc(struct ggml_context * ctx) {
  1299. return ctx->no_alloc;
  1300. }
  1301. void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
  1302. ctx->no_alloc = no_alloc;
  1303. }
  1304. void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
  1305. return ctx->mem_buffer;
  1306. }
  1307. size_t ggml_get_mem_size(const struct ggml_context * ctx) {
  1308. return ctx->mem_size;
  1309. }
  1310. size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
  1311. size_t max_size = 0;
  1312. for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
  1313. size_t bytes = ggml_nbytes(tensor);
  1314. max_size = MAX(max_size, bytes);
  1315. }
  1316. return max_size;
  1317. }
  1318. ////////////////////////////////////////////////////////////////////////////////
  1319. static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
  1320. // always insert objects at the end of the context's memory pool
  1321. struct ggml_object * obj_cur = ctx->objects_end;
  1322. const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
  1323. const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
  1324. const size_t cur_end = cur_offs + cur_size;
  1325. // align to GGML_MEM_ALIGN
  1326. size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
  1327. char * const mem_buffer = ctx->mem_buffer;
  1328. struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
  1329. if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
  1330. GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
  1331. __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
  1332. #ifndef NDEBUG
  1333. GGML_ABORT("not enough space in the context's memory pool");
  1334. #endif
  1335. return NULL;
  1336. }
  1337. *obj_new = (struct ggml_object) {
  1338. .offs = cur_end + GGML_OBJECT_SIZE,
  1339. .size = size_needed,
  1340. .next = NULL,
  1341. .type = type,
  1342. };
  1343. GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
  1344. if (obj_cur != NULL) {
  1345. obj_cur->next = obj_new;
  1346. } else {
  1347. // this is the first object in this context
  1348. ctx->objects_begin = obj_new;
  1349. }
  1350. ctx->objects_end = obj_new;
  1351. //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
  1352. return obj_new;
  1353. }
  1354. static struct ggml_tensor * ggml_new_tensor_impl(
  1355. struct ggml_context * ctx,
  1356. enum ggml_type type,
  1357. int n_dims,
  1358. const int64_t * ne,
  1359. struct ggml_tensor * view_src,
  1360. size_t view_offs) {
  1361. GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
  1362. GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
  1363. // find the base tensor and absolute offset
  1364. if (view_src != NULL && view_src->view_src != NULL) {
  1365. view_offs += view_src->view_offs;
  1366. view_src = view_src->view_src;
  1367. }
  1368. size_t data_size = ggml_row_size(type, ne[0]);
  1369. for (int i = 1; i < n_dims; i++) {
  1370. data_size *= ne[i];
  1371. }
  1372. GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
  1373. void * data = view_src != NULL ? view_src->data : NULL;
  1374. if (data != NULL) {
  1375. data = (char *) data + view_offs;
  1376. }
  1377. size_t obj_alloc_size = 0;
  1378. if (view_src == NULL && !ctx->no_alloc) {
  1379. // allocate tensor data in the context's memory pool
  1380. obj_alloc_size = data_size;
  1381. }
  1382. struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
  1383. GGML_ASSERT(obj_new);
  1384. struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
  1385. *result = (struct ggml_tensor) {
  1386. /*.type =*/ type,
  1387. /*.buffer =*/ NULL,
  1388. /*.ne =*/ { 1, 1, 1, 1 },
  1389. /*.nb =*/ { 0, 0, 0, 0 },
  1390. /*.op =*/ GGML_OP_NONE,
  1391. /*.op_params =*/ { 0 },
  1392. /*.flags =*/ 0,
  1393. /*.src =*/ { NULL },
  1394. /*.view_src =*/ view_src,
  1395. /*.view_offs =*/ view_offs,
  1396. /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
  1397. /*.name =*/ { 0 },
  1398. /*.extra =*/ NULL,
  1399. /*.padding =*/ { 0 },
  1400. };
  1401. // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
  1402. //GGML_ASSERT_ALIGNED(result->data);
  1403. for (int i = 0; i < n_dims; i++) {
  1404. result->ne[i] = ne[i];
  1405. }
  1406. result->nb[0] = ggml_type_size(type);
  1407. result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
  1408. for (int i = 2; i < GGML_MAX_DIMS; i++) {
  1409. result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
  1410. }
  1411. ctx->n_objects++;
  1412. return result;
  1413. }
  1414. struct ggml_tensor * ggml_new_tensor(
  1415. struct ggml_context * ctx,
  1416. enum ggml_type type,
  1417. int n_dims,
  1418. const int64_t * ne) {
  1419. return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
  1420. }
  1421. struct ggml_tensor * ggml_new_tensor_1d(
  1422. struct ggml_context * ctx,
  1423. enum ggml_type type,
  1424. int64_t ne0) {
  1425. return ggml_new_tensor(ctx, type, 1, &ne0);
  1426. }
  1427. struct ggml_tensor * ggml_new_tensor_2d(
  1428. struct ggml_context * ctx,
  1429. enum ggml_type type,
  1430. int64_t ne0,
  1431. int64_t ne1) {
  1432. const int64_t ne[2] = { ne0, ne1 };
  1433. return ggml_new_tensor(ctx, type, 2, ne);
  1434. }
  1435. struct ggml_tensor * ggml_new_tensor_3d(
  1436. struct ggml_context * ctx,
  1437. enum ggml_type type,
  1438. int64_t ne0,
  1439. int64_t ne1,
  1440. int64_t ne2) {
  1441. const int64_t ne[3] = { ne0, ne1, ne2 };
  1442. return ggml_new_tensor(ctx, type, 3, ne);
  1443. }
  1444. struct ggml_tensor * ggml_new_tensor_4d(
  1445. struct ggml_context * ctx,
  1446. enum ggml_type type,
  1447. int64_t ne0,
  1448. int64_t ne1,
  1449. int64_t ne2,
  1450. int64_t ne3) {
  1451. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  1452. return ggml_new_tensor(ctx, type, 4, ne);
  1453. }
  1454. void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
  1455. struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
  1456. return (uint8_t *)ctx->mem_buffer + obj->offs;
  1457. }
  1458. struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
  1459. return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
  1460. }
  1461. void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
  1462. const int64_t ne2 = tensor->ne[2];
  1463. const int64_t ne1 = tensor->ne[1];
  1464. const int64_t ne0 = tensor->ne[0];
  1465. const int64_t i3_ = (i/(ne2*ne1*ne0));
  1466. const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
  1467. const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
  1468. const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
  1469. if (i0) {
  1470. * i0 = i0_;
  1471. }
  1472. if (i1) {
  1473. * i1 = i1_;
  1474. }
  1475. if (i2) {
  1476. * i2 = i2_;
  1477. }
  1478. if (i3) {
  1479. * i3 = i3_;
  1480. }
  1481. }
  1482. void * ggml_get_data(const struct ggml_tensor * tensor) {
  1483. return tensor->data;
  1484. }
  1485. float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
  1486. assert(tensor->type == GGML_TYPE_F32);
  1487. return (float *)(tensor->data);
  1488. }
  1489. enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
  1490. GGML_ASSERT(tensor->op == GGML_OP_UNARY);
  1491. return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
  1492. }
  1493. const char * ggml_get_name(const struct ggml_tensor * tensor) {
  1494. return tensor->name;
  1495. }
  1496. struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
  1497. size_t i;
  1498. for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
  1499. tensor->name[i] = name[i];
  1500. }
  1501. tensor->name[i] = '\0';
  1502. return tensor;
  1503. }
  1504. struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
  1505. va_list args;
  1506. va_start(args, fmt);
  1507. vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
  1508. va_end(args);
  1509. return tensor;
  1510. }
  1511. struct ggml_tensor * ggml_view_tensor(
  1512. struct ggml_context * ctx,
  1513. struct ggml_tensor * src) {
  1514. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
  1515. ggml_format_name(result, "%s (view)", src->name);
  1516. for (int i = 0; i < GGML_MAX_DIMS; i++) {
  1517. result->nb[i] = src->nb[i];
  1518. }
  1519. return result;
  1520. }
  1521. struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
  1522. struct ggml_object * obj = ctx->objects_begin;
  1523. char * const mem_buffer = ctx->mem_buffer;
  1524. while (obj != NULL) {
  1525. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1526. return (struct ggml_tensor *)(mem_buffer + obj->offs);
  1527. }
  1528. obj = obj->next;
  1529. }
  1530. return NULL;
  1531. }
  1532. struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
  1533. struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
  1534. obj = obj->next;
  1535. char * const mem_buffer = ctx->mem_buffer;
  1536. while (obj != NULL) {
  1537. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1538. return (struct ggml_tensor *)(mem_buffer + obj->offs);
  1539. }
  1540. obj = obj->next;
  1541. }
  1542. return NULL;
  1543. }
  1544. struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
  1545. struct ggml_object * obj = ctx->objects_begin;
  1546. char * const mem_buffer = ctx->mem_buffer;
  1547. while (obj != NULL) {
  1548. if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
  1549. struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
  1550. if (strcmp(cur->name, name) == 0) {
  1551. return cur;
  1552. }
  1553. }
  1554. obj = obj->next;
  1555. }
  1556. return NULL;
  1557. }
  1558. ////////////////////////////////////////////////////////////////////////////////
  1559. // ggml_dup
  1560. static struct ggml_tensor * ggml_dup_impl(
  1561. struct ggml_context * ctx,
  1562. struct ggml_tensor * a,
  1563. bool inplace) {
  1564. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1565. result->op = GGML_OP_DUP;
  1566. result->src[0] = a;
  1567. return result;
  1568. }
  1569. struct ggml_tensor * ggml_dup(
  1570. struct ggml_context * ctx,
  1571. struct ggml_tensor * a) {
  1572. return ggml_dup_impl(ctx, a, false);
  1573. }
  1574. struct ggml_tensor * ggml_dup_inplace(
  1575. struct ggml_context * ctx,
  1576. struct ggml_tensor * a) {
  1577. return ggml_dup_impl(ctx, a, true);
  1578. }
  1579. // ggml_add
  1580. static struct ggml_tensor * ggml_add_impl(
  1581. struct ggml_context * ctx,
  1582. struct ggml_tensor * a,
  1583. struct ggml_tensor * b,
  1584. bool inplace) {
  1585. GGML_ASSERT(ggml_can_repeat(b, a));
  1586. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1587. result->op = GGML_OP_ADD;
  1588. result->src[0] = a;
  1589. result->src[1] = b;
  1590. return result;
  1591. }
  1592. struct ggml_tensor * ggml_add(
  1593. struct ggml_context * ctx,
  1594. struct ggml_tensor * a,
  1595. struct ggml_tensor * b) {
  1596. return ggml_add_impl(ctx, a, b, false);
  1597. }
  1598. struct ggml_tensor * ggml_add_inplace(
  1599. struct ggml_context * ctx,
  1600. struct ggml_tensor * a,
  1601. struct ggml_tensor * b) {
  1602. return ggml_add_impl(ctx, a, b, true);
  1603. }
  1604. // ggml_add_cast
  1605. static struct ggml_tensor * ggml_add_cast_impl(
  1606. struct ggml_context * ctx,
  1607. struct ggml_tensor * a,
  1608. struct ggml_tensor * b,
  1609. enum ggml_type type) {
  1610. // TODO: support less-strict constraint
  1611. // GGML_ASSERT(ggml_can_repeat(b, a));
  1612. GGML_ASSERT(ggml_can_repeat_rows(b, a));
  1613. // currently only supported for quantized input and f16
  1614. GGML_ASSERT(ggml_is_quantized(a->type) ||
  1615. a->type == GGML_TYPE_F16 ||
  1616. a->type == GGML_TYPE_BF16);
  1617. struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
  1618. result->op = GGML_OP_ADD;
  1619. result->src[0] = a;
  1620. result->src[1] = b;
  1621. return result;
  1622. }
  1623. struct ggml_tensor * ggml_add_cast(
  1624. struct ggml_context * ctx,
  1625. struct ggml_tensor * a,
  1626. struct ggml_tensor * b,
  1627. enum ggml_type type) {
  1628. return ggml_add_cast_impl(ctx, a, b, type);
  1629. }
  1630. // ggml_add1
  1631. static struct ggml_tensor * ggml_add1_impl(
  1632. struct ggml_context * ctx,
  1633. struct ggml_tensor * a,
  1634. struct ggml_tensor * b,
  1635. bool inplace) {
  1636. GGML_ASSERT(ggml_is_scalar(b));
  1637. GGML_ASSERT(ggml_is_padded_1d(a));
  1638. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1639. result->op = GGML_OP_ADD1;
  1640. result->src[0] = a;
  1641. result->src[1] = b;
  1642. return result;
  1643. }
  1644. struct ggml_tensor * ggml_add1(
  1645. struct ggml_context * ctx,
  1646. struct ggml_tensor * a,
  1647. struct ggml_tensor * b) {
  1648. return ggml_add1_impl(ctx, a, b, false);
  1649. }
  1650. struct ggml_tensor * ggml_add1_inplace(
  1651. struct ggml_context * ctx,
  1652. struct ggml_tensor * a,
  1653. struct ggml_tensor * b) {
  1654. return ggml_add1_impl(ctx, a, b, true);
  1655. }
  1656. // ggml_acc
  1657. static struct ggml_tensor * ggml_acc_impl(
  1658. struct ggml_context * ctx,
  1659. struct ggml_tensor * a,
  1660. struct ggml_tensor * b,
  1661. size_t nb1,
  1662. size_t nb2,
  1663. size_t nb3,
  1664. size_t offset,
  1665. bool inplace) {
  1666. GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
  1667. GGML_ASSERT(ggml_is_contiguous(a));
  1668. GGML_ASSERT(a->type == GGML_TYPE_F32);
  1669. GGML_ASSERT(b->type == GGML_TYPE_F32);
  1670. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1671. int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
  1672. ggml_set_op_params(result, params, sizeof(params));
  1673. result->op = GGML_OP_ACC;
  1674. result->src[0] = a;
  1675. result->src[1] = b;
  1676. return result;
  1677. }
  1678. struct ggml_tensor * ggml_acc(
  1679. struct ggml_context * ctx,
  1680. struct ggml_tensor * a,
  1681. struct ggml_tensor * b,
  1682. size_t nb1,
  1683. size_t nb2,
  1684. size_t nb3,
  1685. size_t offset) {
  1686. return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
  1687. }
  1688. struct ggml_tensor * ggml_acc_inplace(
  1689. struct ggml_context * ctx,
  1690. struct ggml_tensor * a,
  1691. struct ggml_tensor * b,
  1692. size_t nb1,
  1693. size_t nb2,
  1694. size_t nb3,
  1695. size_t offset) {
  1696. return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
  1697. }
  1698. // ggml_sub
  1699. static struct ggml_tensor * ggml_sub_impl(
  1700. struct ggml_context * ctx,
  1701. struct ggml_tensor * a,
  1702. struct ggml_tensor * b,
  1703. bool inplace) {
  1704. GGML_ASSERT(ggml_can_repeat(b, a));
  1705. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1706. result->op = GGML_OP_SUB;
  1707. result->src[0] = a;
  1708. result->src[1] = b;
  1709. return result;
  1710. }
  1711. struct ggml_tensor * ggml_sub(
  1712. struct ggml_context * ctx,
  1713. struct ggml_tensor * a,
  1714. struct ggml_tensor * b) {
  1715. return ggml_sub_impl(ctx, a, b, false);
  1716. }
  1717. struct ggml_tensor * ggml_sub_inplace(
  1718. struct ggml_context * ctx,
  1719. struct ggml_tensor * a,
  1720. struct ggml_tensor * b) {
  1721. return ggml_sub_impl(ctx, a, b, true);
  1722. }
  1723. // ggml_mul
  1724. static struct ggml_tensor * ggml_mul_impl(
  1725. struct ggml_context * ctx,
  1726. struct ggml_tensor * a,
  1727. struct ggml_tensor * b,
  1728. bool inplace) {
  1729. GGML_ASSERT(ggml_can_repeat(b, a));
  1730. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1731. result->op = GGML_OP_MUL;
  1732. result->src[0] = a;
  1733. result->src[1] = b;
  1734. return result;
  1735. }
  1736. struct ggml_tensor * ggml_mul(
  1737. struct ggml_context * ctx,
  1738. struct ggml_tensor * a,
  1739. struct ggml_tensor * b) {
  1740. return ggml_mul_impl(ctx, a, b, false);
  1741. }
  1742. struct ggml_tensor * ggml_mul_inplace(
  1743. struct ggml_context * ctx,
  1744. struct ggml_tensor * a,
  1745. struct ggml_tensor * b) {
  1746. return ggml_mul_impl(ctx, a, b, true);
  1747. }
  1748. // ggml_div
  1749. static struct ggml_tensor * ggml_div_impl(
  1750. struct ggml_context * ctx,
  1751. struct ggml_tensor * a,
  1752. struct ggml_tensor * b,
  1753. bool inplace) {
  1754. GGML_ASSERT(ggml_can_repeat(b, a));
  1755. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1756. result->op = GGML_OP_DIV;
  1757. result->src[0] = a;
  1758. result->src[1] = b;
  1759. return result;
  1760. }
  1761. struct ggml_tensor * ggml_div(
  1762. struct ggml_context * ctx,
  1763. struct ggml_tensor * a,
  1764. struct ggml_tensor * b) {
  1765. return ggml_div_impl(ctx, a, b, false);
  1766. }
  1767. struct ggml_tensor * ggml_div_inplace(
  1768. struct ggml_context * ctx,
  1769. struct ggml_tensor * a,
  1770. struct ggml_tensor * b) {
  1771. return ggml_div_impl(ctx, a, b, true);
  1772. }
  1773. // ggml_sqr
  1774. static struct ggml_tensor * ggml_sqr_impl(
  1775. struct ggml_context * ctx,
  1776. struct ggml_tensor * a,
  1777. bool inplace) {
  1778. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1779. result->op = GGML_OP_SQR;
  1780. result->src[0] = a;
  1781. return result;
  1782. }
  1783. struct ggml_tensor * ggml_sqr(
  1784. struct ggml_context * ctx,
  1785. struct ggml_tensor * a) {
  1786. return ggml_sqr_impl(ctx, a, false);
  1787. }
  1788. struct ggml_tensor * ggml_sqr_inplace(
  1789. struct ggml_context * ctx,
  1790. struct ggml_tensor * a) {
  1791. return ggml_sqr_impl(ctx, a, true);
  1792. }
  1793. // ggml_sqrt
  1794. static struct ggml_tensor * ggml_sqrt_impl(
  1795. struct ggml_context * ctx,
  1796. struct ggml_tensor * a,
  1797. bool inplace) {
  1798. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1799. result->op = GGML_OP_SQRT;
  1800. result->src[0] = a;
  1801. return result;
  1802. }
  1803. struct ggml_tensor * ggml_sqrt(
  1804. struct ggml_context * ctx,
  1805. struct ggml_tensor * a) {
  1806. return ggml_sqrt_impl(ctx, a, false);
  1807. }
  1808. struct ggml_tensor * ggml_sqrt_inplace(
  1809. struct ggml_context * ctx,
  1810. struct ggml_tensor * a) {
  1811. return ggml_sqrt_impl(ctx, a, true);
  1812. }
  1813. // ggml_log
  1814. static struct ggml_tensor * ggml_log_impl(
  1815. struct ggml_context * ctx,
  1816. struct ggml_tensor * a,
  1817. bool inplace) {
  1818. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1819. result->op = GGML_OP_LOG;
  1820. result->src[0] = a;
  1821. return result;
  1822. }
  1823. struct ggml_tensor * ggml_log(
  1824. struct ggml_context * ctx,
  1825. struct ggml_tensor * a) {
  1826. return ggml_log_impl(ctx, a, false);
  1827. }
  1828. struct ggml_tensor * ggml_log_inplace(
  1829. struct ggml_context * ctx,
  1830. struct ggml_tensor * a) {
  1831. return ggml_log_impl(ctx, a, true);
  1832. }
  1833. // ggml_sin
  1834. static struct ggml_tensor * ggml_sin_impl(
  1835. struct ggml_context * ctx,
  1836. struct ggml_tensor * a,
  1837. bool inplace) {
  1838. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1839. result->op = GGML_OP_SIN;
  1840. result->src[0] = a;
  1841. return result;
  1842. }
  1843. struct ggml_tensor * ggml_sin(
  1844. struct ggml_context * ctx,
  1845. struct ggml_tensor * a) {
  1846. return ggml_sin_impl(ctx, a, false);
  1847. }
  1848. struct ggml_tensor * ggml_sin_inplace(
  1849. struct ggml_context * ctx,
  1850. struct ggml_tensor * a) {
  1851. return ggml_sin_impl(ctx, a, true);
  1852. }
  1853. // ggml_cos
  1854. static struct ggml_tensor * ggml_cos_impl(
  1855. struct ggml_context * ctx,
  1856. struct ggml_tensor * a,
  1857. bool inplace) {
  1858. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  1859. result->op = GGML_OP_COS;
  1860. result->src[0] = a;
  1861. return result;
  1862. }
  1863. struct ggml_tensor * ggml_cos(
  1864. struct ggml_context * ctx,
  1865. struct ggml_tensor * a) {
  1866. return ggml_cos_impl(ctx, a, false);
  1867. }
  1868. struct ggml_tensor * ggml_cos_inplace(
  1869. struct ggml_context * ctx,
  1870. struct ggml_tensor * a) {
  1871. return ggml_cos_impl(ctx, a, true);
  1872. }
  1873. // ggml_sum
  1874. struct ggml_tensor * ggml_sum(
  1875. struct ggml_context * ctx,
  1876. struct ggml_tensor * a) {
  1877. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
  1878. result->op = GGML_OP_SUM;
  1879. result->src[0] = a;
  1880. return result;
  1881. }
  1882. // ggml_sum_rows
  1883. struct ggml_tensor * ggml_sum_rows(
  1884. struct ggml_context * ctx,
  1885. struct ggml_tensor * a) {
  1886. int64_t ne[GGML_MAX_DIMS] = { 1 };
  1887. for (int i = 1; i < GGML_MAX_DIMS; ++i) {
  1888. ne[i] = a->ne[i];
  1889. }
  1890. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
  1891. result->op = GGML_OP_SUM_ROWS;
  1892. result->src[0] = a;
  1893. return result;
  1894. }
  1895. // ggml_mean
  1896. struct ggml_tensor * ggml_mean(
  1897. struct ggml_context * ctx,
  1898. struct ggml_tensor * a) {
  1899. int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
  1900. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  1901. result->op = GGML_OP_MEAN;
  1902. result->src[0] = a;
  1903. return result;
  1904. }
  1905. // ggml_argmax
  1906. struct ggml_tensor * ggml_argmax(
  1907. struct ggml_context * ctx,
  1908. struct ggml_tensor * a) {
  1909. GGML_ASSERT(ggml_is_matrix(a));
  1910. GGML_ASSERT(a->ne[0] <= INT32_MAX);
  1911. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
  1912. result->op = GGML_OP_ARGMAX;
  1913. result->src[0] = a;
  1914. return result;
  1915. }
  1916. // ggml_count_equal
  1917. struct ggml_tensor * ggml_count_equal(
  1918. struct ggml_context * ctx,
  1919. struct ggml_tensor * a,
  1920. struct ggml_tensor * b) {
  1921. GGML_ASSERT(ggml_are_same_shape(a, b));
  1922. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
  1923. result->op = GGML_OP_COUNT_EQUAL;
  1924. result->src[0] = a;
  1925. result->src[1] = b;
  1926. return result;
  1927. }
  1928. // ggml_repeat
  1929. struct ggml_tensor * ggml_repeat(
  1930. struct ggml_context * ctx,
  1931. struct ggml_tensor * a,
  1932. struct ggml_tensor * b) {
  1933. GGML_ASSERT(ggml_can_repeat(a, b));
  1934. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
  1935. result->op = GGML_OP_REPEAT;
  1936. result->src[0] = a;
  1937. return result;
  1938. }
  1939. struct ggml_tensor * ggml_repeat_4d(
  1940. struct ggml_context * ctx,
  1941. struct ggml_tensor * a,
  1942. int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
  1943. const bool can_repeat = ggml_is_empty(a) || (
  1944. (ne0 % a->ne[0] == 0) &&
  1945. (ne1 % a->ne[1] == 0) &&
  1946. (ne2 % a->ne[2] == 0) &&
  1947. (ne3 % a->ne[3] == 0)
  1948. );
  1949. GGML_ASSERT(can_repeat);
  1950. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  1951. result->op = GGML_OP_REPEAT;
  1952. result->src[0] = a;
  1953. return result;
  1954. }
  1955. // ggml_repeat_back
  1956. struct ggml_tensor * ggml_repeat_back(
  1957. struct ggml_context * ctx,
  1958. struct ggml_tensor * a,
  1959. struct ggml_tensor * b) {
  1960. GGML_ASSERT(ggml_can_repeat(b, a));
  1961. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
  1962. result->op = GGML_OP_REPEAT_BACK;
  1963. result->src[0] = a;
  1964. return result;
  1965. }
  1966. // ggml_concat
  1967. struct ggml_tensor * ggml_concat(
  1968. struct ggml_context * ctx,
  1969. struct ggml_tensor * a,
  1970. struct ggml_tensor * b,
  1971. int dim) {
  1972. GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
  1973. GGML_ASSERT(a->type == b->type);
  1974. int64_t ne[GGML_MAX_DIMS];
  1975. for (int d = 0; d < GGML_MAX_DIMS; ++d) {
  1976. if (d == dim) {
  1977. ne[d] = a->ne[d] + b->ne[d];
  1978. continue;
  1979. }
  1980. GGML_ASSERT(a->ne[d] == b->ne[d]);
  1981. ne[d] = a->ne[d];
  1982. }
  1983. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
  1984. ggml_set_op_params_i32(result, 0, dim);
  1985. result->op = GGML_OP_CONCAT;
  1986. result->src[0] = a;
  1987. result->src[1] = b;
  1988. return result;
  1989. }
  1990. // ggml_abs
  1991. struct ggml_tensor * ggml_abs(
  1992. struct ggml_context * ctx,
  1993. struct ggml_tensor * a) {
  1994. return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
  1995. }
  1996. struct ggml_tensor * ggml_abs_inplace(
  1997. struct ggml_context * ctx,
  1998. struct ggml_tensor * a) {
  1999. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
  2000. }
  2001. // ggml_sgn
  2002. struct ggml_tensor * ggml_sgn(
  2003. struct ggml_context * ctx,
  2004. struct ggml_tensor * a) {
  2005. return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
  2006. }
  2007. struct ggml_tensor * ggml_sgn_inplace(
  2008. struct ggml_context * ctx,
  2009. struct ggml_tensor * a) {
  2010. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
  2011. }
  2012. // ggml_neg
  2013. struct ggml_tensor * ggml_neg(
  2014. struct ggml_context * ctx,
  2015. struct ggml_tensor * a) {
  2016. return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
  2017. }
  2018. struct ggml_tensor * ggml_neg_inplace(
  2019. struct ggml_context * ctx,
  2020. struct ggml_tensor * a) {
  2021. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
  2022. }
  2023. // ggml_step
  2024. struct ggml_tensor * ggml_step(
  2025. struct ggml_context * ctx,
  2026. struct ggml_tensor * a) {
  2027. return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
  2028. }
  2029. struct ggml_tensor * ggml_step_inplace(
  2030. struct ggml_context * ctx,
  2031. struct ggml_tensor * a) {
  2032. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
  2033. }
  2034. // ggml_tanh
  2035. struct ggml_tensor * ggml_tanh(
  2036. struct ggml_context * ctx,
  2037. struct ggml_tensor * a) {
  2038. return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
  2039. }
  2040. struct ggml_tensor * ggml_tanh_inplace(
  2041. struct ggml_context * ctx,
  2042. struct ggml_tensor * a) {
  2043. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
  2044. }
  2045. // ggml_elu
  2046. struct ggml_tensor * ggml_elu(
  2047. struct ggml_context * ctx,
  2048. struct ggml_tensor * a) {
  2049. return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
  2050. }
  2051. struct ggml_tensor * ggml_elu_inplace(
  2052. struct ggml_context * ctx,
  2053. struct ggml_tensor * a) {
  2054. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
  2055. }
  2056. // ggml_relu
  2057. struct ggml_tensor * ggml_relu(
  2058. struct ggml_context * ctx,
  2059. struct ggml_tensor * a) {
  2060. return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
  2061. }
  2062. struct ggml_tensor * ggml_relu_inplace(
  2063. struct ggml_context * ctx,
  2064. struct ggml_tensor * a) {
  2065. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
  2066. }
  2067. // ggml_leaky_relu
  2068. struct ggml_tensor * ggml_leaky_relu(
  2069. struct ggml_context * ctx,
  2070. struct ggml_tensor * a,
  2071. float negative_slope,
  2072. bool inplace) {
  2073. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2074. ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
  2075. result->op = GGML_OP_LEAKY_RELU;
  2076. result->src[0] = a;
  2077. return result;
  2078. }
  2079. // ggml_sigmoid
  2080. struct ggml_tensor * ggml_sigmoid(
  2081. struct ggml_context * ctx,
  2082. struct ggml_tensor * a) {
  2083. return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
  2084. }
  2085. struct ggml_tensor * ggml_sigmoid_inplace(
  2086. struct ggml_context * ctx,
  2087. struct ggml_tensor * a) {
  2088. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
  2089. }
  2090. // ggml_gelu
  2091. struct ggml_tensor * ggml_gelu(
  2092. struct ggml_context * ctx,
  2093. struct ggml_tensor * a) {
  2094. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
  2095. }
  2096. struct ggml_tensor * ggml_gelu_inplace(
  2097. struct ggml_context * ctx,
  2098. struct ggml_tensor * a) {
  2099. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
  2100. }
  2101. // ggml_gelu_erf
  2102. struct ggml_tensor * ggml_gelu_erf(
  2103. struct ggml_context * ctx,
  2104. struct ggml_tensor * a) {
  2105. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
  2106. }
  2107. struct ggml_tensor * ggml_gelu_erf_inplace(
  2108. struct ggml_context * ctx,
  2109. struct ggml_tensor * a) {
  2110. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
  2111. }
  2112. // ggml_gelu_quick
  2113. struct ggml_tensor * ggml_gelu_quick(
  2114. struct ggml_context * ctx,
  2115. struct ggml_tensor * a) {
  2116. return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
  2117. }
  2118. struct ggml_tensor * ggml_gelu_quick_inplace(
  2119. struct ggml_context * ctx,
  2120. struct ggml_tensor * a) {
  2121. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
  2122. }
  2123. // ggml_silu
  2124. struct ggml_tensor * ggml_silu(
  2125. struct ggml_context * ctx,
  2126. struct ggml_tensor * a) {
  2127. return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
  2128. }
  2129. struct ggml_tensor * ggml_silu_inplace(
  2130. struct ggml_context * ctx,
  2131. struct ggml_tensor * a) {
  2132. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
  2133. }
  2134. // ggml_silu_back
  2135. struct ggml_tensor * ggml_silu_back(
  2136. struct ggml_context * ctx,
  2137. struct ggml_tensor * a,
  2138. struct ggml_tensor * b) {
  2139. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2140. result->op = GGML_OP_SILU_BACK;
  2141. result->src[0] = a;
  2142. result->src[1] = b;
  2143. return result;
  2144. }
  2145. // ggml hardswish
  2146. struct ggml_tensor * ggml_hardswish(
  2147. struct ggml_context * ctx,
  2148. struct ggml_tensor * a) {
  2149. return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
  2150. }
  2151. // ggml hardsigmoid
  2152. struct ggml_tensor * ggml_hardsigmoid(
  2153. struct ggml_context * ctx,
  2154. struct ggml_tensor * a) {
  2155. return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
  2156. }
  2157. // ggml exp
  2158. struct ggml_tensor * ggml_exp(
  2159. struct ggml_context * ctx,
  2160. struct ggml_tensor * a) {
  2161. return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
  2162. }
  2163. struct ggml_tensor * ggml_exp_inplace(
  2164. struct ggml_context * ctx,
  2165. struct ggml_tensor * a) {
  2166. return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
  2167. }
  2168. // ggml_norm
  2169. static struct ggml_tensor * ggml_norm_impl(
  2170. struct ggml_context * ctx,
  2171. struct ggml_tensor * a,
  2172. float eps,
  2173. bool inplace) {
  2174. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2175. ggml_set_op_params(result, &eps, sizeof(eps));
  2176. result->op = GGML_OP_NORM;
  2177. result->src[0] = a;
  2178. return result;
  2179. }
  2180. struct ggml_tensor * ggml_norm(
  2181. struct ggml_context * ctx,
  2182. struct ggml_tensor * a,
  2183. float eps) {
  2184. return ggml_norm_impl(ctx, a, eps, false);
  2185. }
  2186. struct ggml_tensor * ggml_norm_inplace(
  2187. struct ggml_context * ctx,
  2188. struct ggml_tensor * a,
  2189. float eps) {
  2190. return ggml_norm_impl(ctx, a, eps, true);
  2191. }
  2192. // ggml_rms_norm
  2193. static struct ggml_tensor * ggml_rms_norm_impl(
  2194. struct ggml_context * ctx,
  2195. struct ggml_tensor * a,
  2196. float eps,
  2197. bool inplace) {
  2198. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2199. ggml_set_op_params(result, &eps, sizeof(eps));
  2200. result->op = GGML_OP_RMS_NORM;
  2201. result->src[0] = a;
  2202. return result;
  2203. }
  2204. struct ggml_tensor * ggml_rms_norm(
  2205. struct ggml_context * ctx,
  2206. struct ggml_tensor * a,
  2207. float eps) {
  2208. return ggml_rms_norm_impl(ctx, a, eps, false);
  2209. }
  2210. struct ggml_tensor * ggml_rms_norm_inplace(
  2211. struct ggml_context * ctx,
  2212. struct ggml_tensor * a,
  2213. float eps) {
  2214. return ggml_rms_norm_impl(ctx, a, eps, true);
  2215. }
  2216. // ggml_rms_norm_back
  2217. struct ggml_tensor * ggml_rms_norm_back(
  2218. struct ggml_context * ctx,
  2219. struct ggml_tensor * a,
  2220. struct ggml_tensor * b,
  2221. float eps) {
  2222. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2223. ggml_set_op_params(result, &eps, sizeof(eps));
  2224. result->op = GGML_OP_RMS_NORM_BACK;
  2225. result->src[0] = a;
  2226. result->src[1] = b;
  2227. return result;
  2228. }
  2229. // ggml_group_norm
  2230. static struct ggml_tensor * ggml_group_norm_impl(
  2231. struct ggml_context * ctx,
  2232. struct ggml_tensor * a,
  2233. int n_groups,
  2234. float eps,
  2235. bool inplace) {
  2236. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2237. ggml_set_op_params_i32(result, 0, n_groups);
  2238. ggml_set_op_params_f32(result, 1, eps);
  2239. result->op = GGML_OP_GROUP_NORM;
  2240. result->src[0] = a;
  2241. return result;
  2242. }
  2243. struct ggml_tensor * ggml_group_norm(
  2244. struct ggml_context * ctx,
  2245. struct ggml_tensor * a,
  2246. int n_groups,
  2247. float eps) {
  2248. return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
  2249. }
  2250. struct ggml_tensor * ggml_group_norm_inplace(
  2251. struct ggml_context * ctx,
  2252. struct ggml_tensor * a,
  2253. int n_groups,
  2254. float eps) {
  2255. return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
  2256. }
  2257. // ggml_l2_norm
  2258. static struct ggml_tensor * ggml_l2_norm_impl(
  2259. struct ggml_context * ctx,
  2260. struct ggml_tensor * a,
  2261. float eps,
  2262. bool inplace) {
  2263. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2264. ggml_set_op_params_f32(result, 0, eps);
  2265. result->op = GGML_OP_L2_NORM;
  2266. result->src[0] = a;
  2267. return result;
  2268. }
  2269. struct ggml_tensor * ggml_l2_norm(
  2270. struct ggml_context * ctx,
  2271. struct ggml_tensor * a,
  2272. float eps) {
  2273. return ggml_l2_norm_impl(ctx, a, eps, false);
  2274. }
  2275. struct ggml_tensor * ggml_l2_norm_inplace(
  2276. struct ggml_context * ctx,
  2277. struct ggml_tensor * a,
  2278. float eps) {
  2279. return ggml_l2_norm_impl(ctx, a, eps, true);
  2280. }
  2281. // ggml_mul_mat
  2282. static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  2283. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  2284. return (t0->ne[0] == t1->ne[0]) &&
  2285. (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
  2286. (t1->ne[3]%t0->ne[3] == 0);
  2287. }
  2288. struct ggml_tensor * ggml_mul_mat(
  2289. struct ggml_context * ctx,
  2290. struct ggml_tensor * a,
  2291. struct ggml_tensor * b) {
  2292. GGML_ASSERT(ggml_can_mul_mat(a, b));
  2293. GGML_ASSERT(!ggml_is_transposed(a));
  2294. const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
  2295. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2296. result->op = GGML_OP_MUL_MAT;
  2297. result->src[0] = a;
  2298. result->src[1] = b;
  2299. return result;
  2300. }
  2301. void ggml_mul_mat_set_prec(
  2302. struct ggml_tensor * a,
  2303. enum ggml_prec prec) {
  2304. GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
  2305. const int32_t prec_i32 = (int32_t) prec;
  2306. ggml_set_op_params_i32(a, 0, prec_i32);
  2307. }
  2308. // ggml_mul_mat_id
  2309. /*
  2310. c = ggml_mul_mat_id(ctx, as, b, ids);
  2311. as -> [cols, rows, n_expert]
  2312. b -> [cols, n_expert_used, n_tokens]
  2313. ids -> [n_expert_used, n_tokens] (i32)
  2314. c -> [rows, n_expert_used, n_tokens]
  2315. in b, n_expert_used can be broadcasted to match the n_expert_used of ids
  2316. c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
  2317. */
  2318. struct ggml_tensor * ggml_mul_mat_id(
  2319. struct ggml_context * ctx,
  2320. struct ggml_tensor * as,
  2321. struct ggml_tensor * b,
  2322. struct ggml_tensor * ids) {
  2323. GGML_ASSERT(!ggml_is_transposed(as));
  2324. GGML_ASSERT(ids->type == GGML_TYPE_I32);
  2325. GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
  2326. GGML_ASSERT(b->ne[3] == 1); // b is 3d
  2327. GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
  2328. GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
  2329. GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
  2330. GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
  2331. const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
  2332. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2333. result->op = GGML_OP_MUL_MAT_ID;
  2334. result->src[0] = as;
  2335. result->src[1] = b;
  2336. result->src[2] = ids;
  2337. return result;
  2338. }
  2339. // ggml_out_prod
  2340. static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
  2341. static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
  2342. return (t0->ne[1] == t1->ne[1]) &&
  2343. (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
  2344. (t1->ne[3]%t0->ne[3] == 0);
  2345. }
  2346. struct ggml_tensor * ggml_out_prod(
  2347. struct ggml_context * ctx,
  2348. struct ggml_tensor * a,
  2349. struct ggml_tensor * b) {
  2350. GGML_ASSERT(ggml_can_out_prod(a, b));
  2351. GGML_ASSERT(!ggml_is_transposed(a));
  2352. // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
  2353. const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
  2354. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  2355. result->op = GGML_OP_OUT_PROD;
  2356. result->src[0] = a;
  2357. result->src[1] = b;
  2358. return result;
  2359. }
  2360. // ggml_scale
  2361. static struct ggml_tensor * ggml_scale_impl(
  2362. struct ggml_context * ctx,
  2363. struct ggml_tensor * a,
  2364. float s,
  2365. bool inplace) {
  2366. GGML_ASSERT(ggml_is_padded_1d(a));
  2367. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2368. ggml_set_op_params(result, &s, sizeof(s));
  2369. result->op = GGML_OP_SCALE;
  2370. result->src[0] = a;
  2371. return result;
  2372. }
  2373. struct ggml_tensor * ggml_scale(
  2374. struct ggml_context * ctx,
  2375. struct ggml_tensor * a,
  2376. float s) {
  2377. return ggml_scale_impl(ctx, a, s, false);
  2378. }
  2379. struct ggml_tensor * ggml_scale_inplace(
  2380. struct ggml_context * ctx,
  2381. struct ggml_tensor * a,
  2382. float s) {
  2383. return ggml_scale_impl(ctx, a, s, true);
  2384. }
  2385. // ggml_set
  2386. static struct ggml_tensor * ggml_set_impl(
  2387. struct ggml_context * ctx,
  2388. struct ggml_tensor * a,
  2389. struct ggml_tensor * b,
  2390. size_t nb1,
  2391. size_t nb2,
  2392. size_t nb3,
  2393. size_t offset,
  2394. bool inplace) {
  2395. GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
  2396. // make a view of the destination
  2397. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2398. GGML_ASSERT(offset < (size_t)(1 << 30));
  2399. int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
  2400. ggml_set_op_params(result, params, sizeof(params));
  2401. result->op = GGML_OP_SET;
  2402. result->src[0] = a;
  2403. result->src[1] = b;
  2404. return result;
  2405. }
  2406. struct ggml_tensor * ggml_set(
  2407. struct ggml_context * ctx,
  2408. struct ggml_tensor * a,
  2409. struct ggml_tensor * b,
  2410. size_t nb1,
  2411. size_t nb2,
  2412. size_t nb3,
  2413. size_t offset) {
  2414. return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
  2415. }
  2416. struct ggml_tensor * ggml_set_inplace(
  2417. struct ggml_context * ctx,
  2418. struct ggml_tensor * a,
  2419. struct ggml_tensor * b,
  2420. size_t nb1,
  2421. size_t nb2,
  2422. size_t nb3,
  2423. size_t offset) {
  2424. return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
  2425. }
  2426. struct ggml_tensor * ggml_set_1d(
  2427. struct ggml_context * ctx,
  2428. struct ggml_tensor * a,
  2429. struct ggml_tensor * b,
  2430. size_t offset) {
  2431. return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
  2432. }
  2433. struct ggml_tensor * ggml_set_1d_inplace(
  2434. struct ggml_context * ctx,
  2435. struct ggml_tensor * a,
  2436. struct ggml_tensor * b,
  2437. size_t offset) {
  2438. return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
  2439. }
  2440. struct ggml_tensor * ggml_set_2d(
  2441. struct ggml_context * ctx,
  2442. struct ggml_tensor * a,
  2443. struct ggml_tensor * b,
  2444. size_t nb1,
  2445. size_t offset) {
  2446. return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
  2447. }
  2448. struct ggml_tensor * ggml_set_2d_inplace(
  2449. struct ggml_context * ctx,
  2450. struct ggml_tensor * a,
  2451. struct ggml_tensor * b,
  2452. size_t nb1,
  2453. size_t offset) {
  2454. return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
  2455. }
  2456. // ggml_cpy
  2457. static struct ggml_tensor * ggml_cpy_impl(
  2458. struct ggml_context * ctx,
  2459. struct ggml_tensor * a,
  2460. struct ggml_tensor * b) {
  2461. GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
  2462. // make a view of the destination
  2463. struct ggml_tensor * result = ggml_view_tensor(ctx, b);
  2464. if (strlen(b->name) > 0) {
  2465. ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
  2466. } else {
  2467. ggml_format_name(result, "%s (copy)", a->name);
  2468. }
  2469. result->op = GGML_OP_CPY;
  2470. result->src[0] = a;
  2471. result->src[1] = b;
  2472. return result;
  2473. }
  2474. struct ggml_tensor * ggml_cpy(
  2475. struct ggml_context * ctx,
  2476. struct ggml_tensor * a,
  2477. struct ggml_tensor * b) {
  2478. return ggml_cpy_impl(ctx, a, b);
  2479. }
  2480. struct ggml_tensor * ggml_cast(
  2481. struct ggml_context * ctx,
  2482. struct ggml_tensor * a,
  2483. enum ggml_type type) {
  2484. struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
  2485. ggml_format_name(result, "%s (copy)", a->name);
  2486. result->op = GGML_OP_CPY;
  2487. result->src[0] = a;
  2488. result->src[1] = result;
  2489. return result;
  2490. }
  2491. // ggml_cont
  2492. static struct ggml_tensor * ggml_cont_impl(
  2493. struct ggml_context * ctx,
  2494. struct ggml_tensor * a) {
  2495. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2496. ggml_format_name(result, "%s (cont)", a->name);
  2497. result->op = GGML_OP_CONT;
  2498. result->src[0] = a;
  2499. return result;
  2500. }
  2501. struct ggml_tensor * ggml_cont(
  2502. struct ggml_context * ctx,
  2503. struct ggml_tensor * a) {
  2504. return ggml_cont_impl(ctx, a);
  2505. }
  2506. // make contiguous, with new shape
  2507. GGML_API struct ggml_tensor * ggml_cont_1d(
  2508. struct ggml_context * ctx,
  2509. struct ggml_tensor * a,
  2510. int64_t ne0) {
  2511. return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
  2512. }
  2513. GGML_API struct ggml_tensor * ggml_cont_2d(
  2514. struct ggml_context * ctx,
  2515. struct ggml_tensor * a,
  2516. int64_t ne0,
  2517. int64_t ne1) {
  2518. return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
  2519. }
  2520. GGML_API struct ggml_tensor * ggml_cont_3d(
  2521. struct ggml_context * ctx,
  2522. struct ggml_tensor * a,
  2523. int64_t ne0,
  2524. int64_t ne1,
  2525. int64_t ne2) {
  2526. return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
  2527. }
  2528. struct ggml_tensor * ggml_cont_4d(
  2529. struct ggml_context * ctx,
  2530. struct ggml_tensor * a,
  2531. int64_t ne0,
  2532. int64_t ne1,
  2533. int64_t ne2,
  2534. int64_t ne3) {
  2535. GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
  2536. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  2537. ggml_format_name(result, "%s (cont)", a->name);
  2538. result->op = GGML_OP_CONT;
  2539. result->src[0] = a;
  2540. return result;
  2541. }
  2542. // ggml_reshape
  2543. struct ggml_tensor * ggml_reshape(
  2544. struct ggml_context * ctx,
  2545. struct ggml_tensor * a,
  2546. struct ggml_tensor * b) {
  2547. GGML_ASSERT(ggml_is_contiguous(a));
  2548. // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
  2549. GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
  2550. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
  2551. ggml_format_name(result, "%s (reshaped)", a->name);
  2552. result->op = GGML_OP_RESHAPE;
  2553. result->src[0] = a;
  2554. return result;
  2555. }
  2556. struct ggml_tensor * ggml_reshape_1d(
  2557. struct ggml_context * ctx,
  2558. struct ggml_tensor * a,
  2559. int64_t ne0) {
  2560. GGML_ASSERT(ggml_is_contiguous(a));
  2561. GGML_ASSERT(ggml_nelements(a) == ne0);
  2562. const int64_t ne[1] = { ne0 };
  2563. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
  2564. ggml_format_name(result, "%s (reshaped)", a->name);
  2565. result->op = GGML_OP_RESHAPE;
  2566. result->src[0] = a;
  2567. return result;
  2568. }
  2569. struct ggml_tensor * ggml_reshape_2d(
  2570. struct ggml_context * ctx,
  2571. struct ggml_tensor * a,
  2572. int64_t ne0,
  2573. int64_t ne1) {
  2574. GGML_ASSERT(ggml_is_contiguous(a));
  2575. GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
  2576. const int64_t ne[2] = { ne0, ne1 };
  2577. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
  2578. ggml_format_name(result, "%s (reshaped)", a->name);
  2579. result->op = GGML_OP_RESHAPE;
  2580. result->src[0] = a;
  2581. return result;
  2582. }
  2583. struct ggml_tensor * ggml_reshape_3d(
  2584. struct ggml_context * ctx,
  2585. struct ggml_tensor * a,
  2586. int64_t ne0,
  2587. int64_t ne1,
  2588. int64_t ne2) {
  2589. GGML_ASSERT(ggml_is_contiguous(a));
  2590. GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
  2591. const int64_t ne[3] = { ne0, ne1, ne2 };
  2592. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
  2593. ggml_format_name(result, "%s (reshaped)", a->name);
  2594. result->op = GGML_OP_RESHAPE;
  2595. result->src[0] = a;
  2596. return result;
  2597. }
  2598. struct ggml_tensor * ggml_reshape_4d(
  2599. struct ggml_context * ctx,
  2600. struct ggml_tensor * a,
  2601. int64_t ne0,
  2602. int64_t ne1,
  2603. int64_t ne2,
  2604. int64_t ne3) {
  2605. GGML_ASSERT(ggml_is_contiguous(a));
  2606. GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
  2607. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  2608. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
  2609. ggml_format_name(result, "%s (reshaped)", a->name);
  2610. result->op = GGML_OP_RESHAPE;
  2611. result->src[0] = a;
  2612. return result;
  2613. }
  2614. static struct ggml_tensor * ggml_view_impl(
  2615. struct ggml_context * ctx,
  2616. struct ggml_tensor * a,
  2617. int n_dims,
  2618. const int64_t * ne,
  2619. size_t offset) {
  2620. struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
  2621. ggml_format_name(result, "%s (view)", a->name);
  2622. ggml_set_op_params(result, &offset, sizeof(offset));
  2623. result->op = GGML_OP_VIEW;
  2624. result->src[0] = a;
  2625. return result;
  2626. }
  2627. // ggml_view_1d
  2628. struct ggml_tensor * ggml_view_1d(
  2629. struct ggml_context * ctx,
  2630. struct ggml_tensor * a,
  2631. int64_t ne0,
  2632. size_t offset) {
  2633. struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
  2634. return result;
  2635. }
  2636. // ggml_view_2d
  2637. struct ggml_tensor * ggml_view_2d(
  2638. struct ggml_context * ctx,
  2639. struct ggml_tensor * a,
  2640. int64_t ne0,
  2641. int64_t ne1,
  2642. size_t nb1,
  2643. size_t offset) {
  2644. const int64_t ne[2] = { ne0, ne1 };
  2645. struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
  2646. result->nb[1] = nb1;
  2647. result->nb[2] = result->nb[1]*ne1;
  2648. result->nb[3] = result->nb[2];
  2649. return result;
  2650. }
  2651. // ggml_view_3d
  2652. struct ggml_tensor * ggml_view_3d(
  2653. struct ggml_context * ctx,
  2654. struct ggml_tensor * a,
  2655. int64_t ne0,
  2656. int64_t ne1,
  2657. int64_t ne2,
  2658. size_t nb1,
  2659. size_t nb2,
  2660. size_t offset) {
  2661. const int64_t ne[3] = { ne0, ne1, ne2 };
  2662. struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
  2663. result->nb[1] = nb1;
  2664. result->nb[2] = nb2;
  2665. result->nb[3] = result->nb[2]*ne2;
  2666. return result;
  2667. }
  2668. // ggml_view_4d
  2669. struct ggml_tensor * ggml_view_4d(
  2670. struct ggml_context * ctx,
  2671. struct ggml_tensor * a,
  2672. int64_t ne0,
  2673. int64_t ne1,
  2674. int64_t ne2,
  2675. int64_t ne3,
  2676. size_t nb1,
  2677. size_t nb2,
  2678. size_t nb3,
  2679. size_t offset) {
  2680. const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
  2681. struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
  2682. result->nb[1] = nb1;
  2683. result->nb[2] = nb2;
  2684. result->nb[3] = nb3;
  2685. return result;
  2686. }
  2687. // ggml_permute
  2688. struct ggml_tensor * ggml_permute(
  2689. struct ggml_context * ctx,
  2690. struct ggml_tensor * a,
  2691. int axis0,
  2692. int axis1,
  2693. int axis2,
  2694. int axis3) {
  2695. GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
  2696. GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
  2697. GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
  2698. GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
  2699. GGML_ASSERT(axis0 != axis1);
  2700. GGML_ASSERT(axis0 != axis2);
  2701. GGML_ASSERT(axis0 != axis3);
  2702. GGML_ASSERT(axis1 != axis2);
  2703. GGML_ASSERT(axis1 != axis3);
  2704. GGML_ASSERT(axis2 != axis3);
  2705. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  2706. ggml_format_name(result, "%s (permuted)", a->name);
  2707. int ne[GGML_MAX_DIMS];
  2708. int nb[GGML_MAX_DIMS];
  2709. ne[axis0] = a->ne[0];
  2710. ne[axis1] = a->ne[1];
  2711. ne[axis2] = a->ne[2];
  2712. ne[axis3] = a->ne[3];
  2713. nb[axis0] = a->nb[0];
  2714. nb[axis1] = a->nb[1];
  2715. nb[axis2] = a->nb[2];
  2716. nb[axis3] = a->nb[3];
  2717. result->ne[0] = ne[0];
  2718. result->ne[1] = ne[1];
  2719. result->ne[2] = ne[2];
  2720. result->ne[3] = ne[3];
  2721. result->nb[0] = nb[0];
  2722. result->nb[1] = nb[1];
  2723. result->nb[2] = nb[2];
  2724. result->nb[3] = nb[3];
  2725. result->op = GGML_OP_PERMUTE;
  2726. result->src[0] = a;
  2727. int32_t params[] = { axis0, axis1, axis2, axis3 };
  2728. ggml_set_op_params(result, params, sizeof(params));
  2729. return result;
  2730. }
  2731. // ggml_transpose
  2732. struct ggml_tensor * ggml_transpose(
  2733. struct ggml_context * ctx,
  2734. struct ggml_tensor * a) {
  2735. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  2736. ggml_format_name(result, "%s (transposed)", a->name);
  2737. result->ne[0] = a->ne[1];
  2738. result->ne[1] = a->ne[0];
  2739. result->nb[0] = a->nb[1];
  2740. result->nb[1] = a->nb[0];
  2741. result->op = GGML_OP_TRANSPOSE;
  2742. result->src[0] = a;
  2743. return result;
  2744. }
  2745. // ggml_get_rows
  2746. struct ggml_tensor * ggml_get_rows(
  2747. struct ggml_context * ctx,
  2748. struct ggml_tensor * a,
  2749. struct ggml_tensor * b) {
  2750. GGML_ASSERT(a->ne[2] == b->ne[1]);
  2751. GGML_ASSERT(b->ne[3] == 1);
  2752. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2753. // TODO: implement non F32 return
  2754. enum ggml_type type = GGML_TYPE_F32;
  2755. if (a->type == GGML_TYPE_I32) {
  2756. type = a->type;
  2757. }
  2758. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
  2759. result->op = GGML_OP_GET_ROWS;
  2760. result->src[0] = a;
  2761. result->src[1] = b;
  2762. return result;
  2763. }
  2764. // ggml_get_rows_back
  2765. struct ggml_tensor * ggml_get_rows_back(
  2766. struct ggml_context * ctx,
  2767. struct ggml_tensor * a,
  2768. struct ggml_tensor * b,
  2769. struct ggml_tensor * c) {
  2770. GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
  2771. GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
  2772. // TODO: implement non F32 return
  2773. //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
  2774. struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
  2775. result->op = GGML_OP_GET_ROWS_BACK;
  2776. result->src[0] = a;
  2777. result->src[1] = b;
  2778. return result;
  2779. }
  2780. // ggml_diag
  2781. struct ggml_tensor * ggml_diag(
  2782. struct ggml_context * ctx,
  2783. struct ggml_tensor * a) {
  2784. GGML_ASSERT(a->ne[1] == 1);
  2785. const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
  2786. struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
  2787. result->op = GGML_OP_DIAG;
  2788. result->src[0] = a;
  2789. return result;
  2790. }
  2791. // ggml_diag_mask_inf
  2792. static struct ggml_tensor * ggml_diag_mask_inf_impl(
  2793. struct ggml_context * ctx,
  2794. struct ggml_tensor * a,
  2795. int n_past,
  2796. bool inplace) {
  2797. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2798. int32_t params[] = { n_past };
  2799. ggml_set_op_params(result, params, sizeof(params));
  2800. result->op = GGML_OP_DIAG_MASK_INF;
  2801. result->src[0] = a;
  2802. return result;
  2803. }
  2804. struct ggml_tensor * ggml_diag_mask_inf(
  2805. struct ggml_context * ctx,
  2806. struct ggml_tensor * a,
  2807. int n_past) {
  2808. return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
  2809. }
  2810. struct ggml_tensor * ggml_diag_mask_inf_inplace(
  2811. struct ggml_context * ctx,
  2812. struct ggml_tensor * a,
  2813. int n_past) {
  2814. return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
  2815. }
  2816. // ggml_diag_mask_zero
  2817. static struct ggml_tensor * ggml_diag_mask_zero_impl(
  2818. struct ggml_context * ctx,
  2819. struct ggml_tensor * a,
  2820. int n_past,
  2821. bool inplace) {
  2822. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2823. int32_t params[] = { n_past };
  2824. ggml_set_op_params(result, params, sizeof(params));
  2825. result->op = GGML_OP_DIAG_MASK_ZERO;
  2826. result->src[0] = a;
  2827. return result;
  2828. }
  2829. struct ggml_tensor * ggml_diag_mask_zero(
  2830. struct ggml_context * ctx,
  2831. struct ggml_tensor * a,
  2832. int n_past) {
  2833. return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
  2834. }
  2835. struct ggml_tensor * ggml_diag_mask_zero_inplace(
  2836. struct ggml_context * ctx,
  2837. struct ggml_tensor * a,
  2838. int n_past) {
  2839. return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
  2840. }
  2841. // ggml_soft_max
  2842. static struct ggml_tensor * ggml_soft_max_impl(
  2843. struct ggml_context * ctx,
  2844. struct ggml_tensor * a,
  2845. struct ggml_tensor * mask,
  2846. float scale,
  2847. float max_bias,
  2848. bool inplace) {
  2849. GGML_ASSERT(ggml_is_contiguous(a));
  2850. if (mask) {
  2851. GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
  2852. GGML_ASSERT(ggml_is_contiguous(mask));
  2853. GGML_ASSERT(ggml_is_matrix(mask));
  2854. GGML_ASSERT(mask->ne[0] == a->ne[0]);
  2855. GGML_ASSERT(mask->ne[1] >= a->ne[1]);
  2856. }
  2857. if (max_bias > 0.0f) {
  2858. GGML_ASSERT(mask);
  2859. }
  2860. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2861. float params[] = { scale, max_bias };
  2862. ggml_set_op_params(result, params, sizeof(params));
  2863. result->op = GGML_OP_SOFT_MAX;
  2864. result->src[0] = a;
  2865. result->src[1] = mask;
  2866. return result;
  2867. }
  2868. struct ggml_tensor * ggml_soft_max(
  2869. struct ggml_context * ctx,
  2870. struct ggml_tensor * a) {
  2871. return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
  2872. }
  2873. struct ggml_tensor * ggml_soft_max_inplace(
  2874. struct ggml_context * ctx,
  2875. struct ggml_tensor * a) {
  2876. return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
  2877. }
  2878. struct ggml_tensor * ggml_soft_max_ext(
  2879. struct ggml_context * ctx,
  2880. struct ggml_tensor * a,
  2881. struct ggml_tensor * mask,
  2882. float scale,
  2883. float max_bias) {
  2884. return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
  2885. }
  2886. // ggml_soft_max_ext_back
  2887. static struct ggml_tensor * ggml_soft_max_ext_back_impl(
  2888. struct ggml_context * ctx,
  2889. struct ggml_tensor * a,
  2890. struct ggml_tensor * b,
  2891. float scale,
  2892. float max_bias,
  2893. bool inplace) {
  2894. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2895. result->op = GGML_OP_SOFT_MAX_BACK;
  2896. result->src[0] = a;
  2897. result->src[1] = b;
  2898. memcpy((float *) result->op_params + 0, &scale, sizeof(float));
  2899. memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
  2900. return result;
  2901. }
  2902. struct ggml_tensor * ggml_soft_max_ext_back(
  2903. struct ggml_context * ctx,
  2904. struct ggml_tensor * a,
  2905. struct ggml_tensor * b,
  2906. float scale,
  2907. float max_bias) {
  2908. return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
  2909. }
  2910. struct ggml_tensor * ggml_soft_max_ext_back_inplace(
  2911. struct ggml_context * ctx,
  2912. struct ggml_tensor * a,
  2913. struct ggml_tensor * b,
  2914. float scale,
  2915. float max_bias) {
  2916. return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
  2917. }
  2918. // ggml_rope
  2919. static struct ggml_tensor * ggml_rope_impl(
  2920. struct ggml_context * ctx,
  2921. struct ggml_tensor * a,
  2922. struct ggml_tensor * b,
  2923. struct ggml_tensor * c,
  2924. int n_dims,
  2925. int mode,
  2926. int n_ctx_orig,
  2927. float freq_base,
  2928. float freq_scale,
  2929. float ext_factor,
  2930. float attn_factor,
  2931. float beta_fast,
  2932. float beta_slow,
  2933. bool inplace) {
  2934. GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
  2935. GGML_ASSERT(ggml_is_vector(b));
  2936. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2937. GGML_ASSERT(a->ne[2] == b->ne[0]);
  2938. if (c) {
  2939. GGML_ASSERT(c->type == GGML_TYPE_F32);
  2940. GGML_ASSERT(c->ne[0] >= n_dims / 2);
  2941. }
  2942. int sections[4] = {0, 0, 0, 0};
  2943. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  2944. int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
  2945. memcpy(params + 5, &freq_base, sizeof(float));
  2946. memcpy(params + 6, &freq_scale, sizeof(float));
  2947. memcpy(params + 7, &ext_factor, sizeof(float));
  2948. memcpy(params + 8, &attn_factor, sizeof(float));
  2949. memcpy(params + 9, &beta_fast, sizeof(float));
  2950. memcpy(params + 10, &beta_slow, sizeof(float));
  2951. memcpy(params + 11, &sections, sizeof(int)*4);
  2952. ggml_set_op_params(result, params, sizeof(params));
  2953. result->op = GGML_OP_ROPE;
  2954. result->src[0] = a;
  2955. result->src[1] = b;
  2956. result->src[2] = c;
  2957. return result;
  2958. }
  2959. struct ggml_tensor * ggml_rope(
  2960. struct ggml_context * ctx,
  2961. struct ggml_tensor * a,
  2962. struct ggml_tensor * b,
  2963. int n_dims,
  2964. int mode) {
  2965. return ggml_rope_impl(
  2966. ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
  2967. );
  2968. }
  2969. struct ggml_tensor * ggml_rope_multi(
  2970. struct ggml_context * ctx,
  2971. struct ggml_tensor * a,
  2972. struct ggml_tensor * b,
  2973. struct ggml_tensor * c,
  2974. int n_dims,
  2975. int sections[4],
  2976. int mode,
  2977. int n_ctx_orig,
  2978. float freq_base,
  2979. float freq_scale,
  2980. float ext_factor,
  2981. float attn_factor,
  2982. float beta_fast,
  2983. float beta_slow) {
  2984. // Multimodal Rotary Position Embedding
  2985. GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
  2986. GGML_ASSERT(ggml_is_vector(b));
  2987. GGML_ASSERT(b->type == GGML_TYPE_I32);
  2988. GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
  2989. if (c) {
  2990. GGML_ASSERT(c->type == GGML_TYPE_F32);
  2991. GGML_ASSERT(c->ne[0] >= n_dims / 2);
  2992. }
  2993. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  2994. int32_t params[11 + 4] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
  2995. memcpy(params + 5, &freq_base, sizeof(float));
  2996. memcpy(params + 6, &freq_scale, sizeof(float));
  2997. memcpy(params + 7, &ext_factor, sizeof(float));
  2998. memcpy(params + 8, &attn_factor, sizeof(float));
  2999. memcpy(params + 9, &beta_fast, sizeof(float));
  3000. memcpy(params + 10, &beta_slow, sizeof(float));
  3001. memcpy(&params[11], sections, sizeof(int)*4);
  3002. ggml_set_op_params(result, params, sizeof(params));
  3003. result->op = GGML_OP_ROPE;
  3004. result->src[0] = a;
  3005. result->src[1] = b;
  3006. result->src[2] = c;
  3007. return result;
  3008. }
  3009. struct ggml_tensor * ggml_rope_inplace(
  3010. struct ggml_context * ctx,
  3011. struct ggml_tensor * a,
  3012. struct ggml_tensor * b,
  3013. int n_dims,
  3014. int mode) {
  3015. return ggml_rope_impl(
  3016. ctx, a, b, NULL, n_dims, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
  3017. );
  3018. }
  3019. struct ggml_tensor * ggml_rope_ext(
  3020. struct ggml_context * ctx,
  3021. struct ggml_tensor * a,
  3022. struct ggml_tensor * b,
  3023. struct ggml_tensor * c,
  3024. int n_dims,
  3025. int mode,
  3026. int n_ctx_orig,
  3027. float freq_base,
  3028. float freq_scale,
  3029. float ext_factor,
  3030. float attn_factor,
  3031. float beta_fast,
  3032. float beta_slow) {
  3033. return ggml_rope_impl(
  3034. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3035. ext_factor, attn_factor, beta_fast, beta_slow, false
  3036. );
  3037. }
  3038. struct ggml_tensor * ggml_rope_ext_inplace(
  3039. struct ggml_context * ctx,
  3040. struct ggml_tensor * a,
  3041. struct ggml_tensor * b,
  3042. struct ggml_tensor * c,
  3043. int n_dims,
  3044. int mode,
  3045. int n_ctx_orig,
  3046. float freq_base,
  3047. float freq_scale,
  3048. float ext_factor,
  3049. float attn_factor,
  3050. float beta_fast,
  3051. float beta_slow) {
  3052. return ggml_rope_impl(
  3053. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3054. ext_factor, attn_factor, beta_fast, beta_slow, true
  3055. );
  3056. }
  3057. struct ggml_tensor * ggml_rope_custom(
  3058. struct ggml_context * ctx,
  3059. struct ggml_tensor * a,
  3060. struct ggml_tensor * b,
  3061. int n_dims,
  3062. int mode,
  3063. int n_ctx_orig,
  3064. float freq_base,
  3065. float freq_scale,
  3066. float ext_factor,
  3067. float attn_factor,
  3068. float beta_fast,
  3069. float beta_slow) {
  3070. return ggml_rope_impl(
  3071. ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3072. ext_factor, attn_factor, beta_fast, beta_slow, false
  3073. );
  3074. }
  3075. struct ggml_tensor * ggml_rope_custom_inplace(
  3076. struct ggml_context * ctx,
  3077. struct ggml_tensor * a,
  3078. struct ggml_tensor * b,
  3079. int n_dims,
  3080. int mode,
  3081. int n_ctx_orig,
  3082. float freq_base,
  3083. float freq_scale,
  3084. float ext_factor,
  3085. float attn_factor,
  3086. float beta_fast,
  3087. float beta_slow) {
  3088. return ggml_rope_impl(
  3089. ctx, a, b, NULL, n_dims, mode, n_ctx_orig, freq_base, freq_scale,
  3090. ext_factor, attn_factor, beta_fast, beta_slow, true
  3091. );
  3092. }
  3093. // Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
  3094. // `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
  3095. static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
  3096. return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
  3097. }
  3098. void ggml_rope_yarn_corr_dims(
  3099. int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
  3100. ) {
  3101. // start and end correction dims
  3102. float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
  3103. float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
  3104. dims[0] = MAX(0, start);
  3105. dims[1] = MIN(n_dims - 1, end);
  3106. }
  3107. // ggml_rope_back
  3108. struct ggml_tensor * ggml_rope_ext_back(
  3109. struct ggml_context * ctx,
  3110. struct ggml_tensor * a,
  3111. struct ggml_tensor * b,
  3112. struct ggml_tensor * c,
  3113. int n_dims,
  3114. int mode,
  3115. int n_ctx_orig,
  3116. float freq_base,
  3117. float freq_scale,
  3118. float ext_factor,
  3119. float attn_factor,
  3120. float beta_fast,
  3121. float beta_slow) {
  3122. struct ggml_tensor * result = ggml_rope_ext(
  3123. ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
  3124. result->op = GGML_OP_ROPE_BACK;
  3125. return result;
  3126. }
  3127. struct ggml_tensor * ggml_rope_multi_back(
  3128. struct ggml_context * ctx,
  3129. struct ggml_tensor * a,
  3130. struct ggml_tensor * b,
  3131. struct ggml_tensor * c,
  3132. int n_dims,
  3133. int sections[4],
  3134. int mode,
  3135. int n_ctx_orig,
  3136. float freq_base,
  3137. float freq_scale,
  3138. float ext_factor,
  3139. float attn_factor,
  3140. float beta_fast,
  3141. float beta_slow) {
  3142. struct ggml_tensor * result = ggml_rope_multi(
  3143. ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
  3144. result->op = GGML_OP_ROPE_BACK;
  3145. return result;
  3146. }
  3147. // ggml_clamp
  3148. struct ggml_tensor * ggml_clamp(
  3149. struct ggml_context * ctx,
  3150. struct ggml_tensor * a,
  3151. float min,
  3152. float max) {
  3153. // TODO: when implement backward, fix this:
  3154. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  3155. float params[] = { min, max };
  3156. ggml_set_op_params(result, params, sizeof(params));
  3157. result->op = GGML_OP_CLAMP;
  3158. result->src[0] = a;
  3159. return result;
  3160. }
  3161. static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
  3162. return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
  3163. }
  3164. // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
  3165. // a: [OC,IC, KH, KW]
  3166. // b: [N, IC, IH, IW]
  3167. // result: [N, OH, OW, IC*KH*KW]
  3168. struct ggml_tensor * ggml_im2col(
  3169. struct ggml_context * ctx,
  3170. struct ggml_tensor * a,
  3171. struct ggml_tensor * b,
  3172. int s0,
  3173. int s1,
  3174. int p0,
  3175. int p1,
  3176. int d0,
  3177. int d1,
  3178. bool is_2D,
  3179. enum ggml_type dst_type) {
  3180. if (is_2D) {
  3181. GGML_ASSERT(a->ne[2] == b->ne[2]);
  3182. } else {
  3183. //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
  3184. GGML_ASSERT(b->ne[1] == a->ne[1]);
  3185. GGML_ASSERT(b->ne[3] == 1);
  3186. }
  3187. const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
  3188. const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
  3189. GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
  3190. GGML_ASSERT((OW > 0) && "b too small compared to a");
  3191. const int64_t ne[4] = {
  3192. is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
  3193. OW,
  3194. is_2D ? OH : b->ne[2],
  3195. is_2D ? b->ne[3] : 1,
  3196. };
  3197. struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
  3198. int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
  3199. ggml_set_op_params(result, params, sizeof(params));
  3200. result->op = GGML_OP_IM2COL;
  3201. result->src[0] = a;
  3202. result->src[1] = b;
  3203. return result;
  3204. }
  3205. struct ggml_tensor * ggml_im2col_back(
  3206. struct ggml_context * ctx,
  3207. struct ggml_tensor * a,
  3208. struct ggml_tensor * b,
  3209. int64_t * ne,
  3210. int s0,
  3211. int s1,
  3212. int p0,
  3213. int p1,
  3214. int d0,
  3215. int d1,
  3216. bool is_2D) {
  3217. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3218. int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
  3219. ggml_set_op_params(result, params, sizeof(params));
  3220. result->op = GGML_OP_IM2COL_BACK;
  3221. result->src[0] = a;
  3222. result->src[1] = b;
  3223. return result;
  3224. }
  3225. // ggml_conv_1d
  3226. struct ggml_tensor * ggml_conv_1d(
  3227. struct ggml_context * ctx,
  3228. struct ggml_tensor * a,
  3229. struct ggml_tensor * b,
  3230. int s0,
  3231. int p0,
  3232. int d0) {
  3233. struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
  3234. struct ggml_tensor * result =
  3235. ggml_mul_mat(ctx,
  3236. ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
  3237. ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2])); // [OC,IC, K] => [OC, IC * K]
  3238. result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
  3239. return result;
  3240. }
  3241. // ggml_conv_1d_ph
  3242. struct ggml_tensor* ggml_conv_1d_ph(
  3243. struct ggml_context * ctx,
  3244. struct ggml_tensor * a,
  3245. struct ggml_tensor * b,
  3246. int s,
  3247. int d) {
  3248. return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
  3249. }
  3250. // ggml_conv_1d_dw
  3251. struct ggml_tensor * ggml_conv_1d_dw(
  3252. struct ggml_context * ctx,
  3253. struct ggml_tensor * a,
  3254. struct ggml_tensor * b,
  3255. int s0,
  3256. int p0,
  3257. int d0) {
  3258. struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
  3259. struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
  3260. struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
  3261. struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
  3262. result = ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
  3263. return result;
  3264. }
  3265. // ggml_conv_1d_dw_ph
  3266. struct ggml_tensor * ggml_conv_1d_dw_ph(
  3267. struct ggml_context * ctx,
  3268. struct ggml_tensor * a,
  3269. struct ggml_tensor * b,
  3270. int s0,
  3271. int d0) {
  3272. return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
  3273. }
  3274. // ggml_conv_transpose_1d
  3275. static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
  3276. return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
  3277. }
  3278. GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
  3279. struct ggml_context * ctx,
  3280. struct ggml_tensor * a,
  3281. struct ggml_tensor * b,
  3282. int s0,
  3283. int p0,
  3284. int d0) {
  3285. GGML_ASSERT(ggml_is_matrix(b));
  3286. GGML_ASSERT(a->ne[2] == b->ne[1]);
  3287. GGML_ASSERT(a->ne[3] == 1);
  3288. GGML_ASSERT(p0 == 0);
  3289. GGML_ASSERT(d0 == 1);
  3290. const int64_t ne[4] = {
  3291. ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
  3292. a->ne[1], b->ne[2], 1,
  3293. };
  3294. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3295. int32_t params[] = { s0, p0, d0 };
  3296. ggml_set_op_params(result, params, sizeof(params));
  3297. result->op = GGML_OP_CONV_TRANSPOSE_1D;
  3298. result->src[0] = a;
  3299. result->src[1] = b;
  3300. return result;
  3301. }
  3302. // ggml_conv_2d
  3303. // a: [OC,IC, KH, KW]
  3304. // b: [N, IC, IH, IW]
  3305. // result: [N, OC, OH, OW]
  3306. struct ggml_tensor * ggml_conv_2d(
  3307. struct ggml_context * ctx,
  3308. struct ggml_tensor * a,
  3309. struct ggml_tensor * b,
  3310. int s0,
  3311. int s1,
  3312. int p0,
  3313. int p1,
  3314. int d0,
  3315. int d1) {
  3316. struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
  3317. struct ggml_tensor * result =
  3318. ggml_mul_mat(ctx,
  3319. ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
  3320. ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
  3321. result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
  3322. result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
  3323. return result;
  3324. }
  3325. // ggml_conv_2d_sk_p0
  3326. struct ggml_tensor * ggml_conv_2d_sk_p0(
  3327. struct ggml_context * ctx,
  3328. struct ggml_tensor * a,
  3329. struct ggml_tensor * b) {
  3330. return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
  3331. }
  3332. // ggml_conv_2d_s1_ph
  3333. struct ggml_tensor * ggml_conv_2d_s1_ph(
  3334. struct ggml_context * ctx,
  3335. struct ggml_tensor * a,
  3336. struct ggml_tensor * b) {
  3337. return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
  3338. }
  3339. // ggml_conv_2d_dw
  3340. struct ggml_tensor * ggml_conv_2d_dw(
  3341. struct ggml_context * ctx,
  3342. struct ggml_tensor * a,
  3343. struct ggml_tensor * b,
  3344. int s0,
  3345. int s1,
  3346. int p0,
  3347. int p1,
  3348. int d0,
  3349. int d1) {
  3350. struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
  3351. struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
  3352. ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
  3353. s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
  3354. struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
  3355. new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
  3356. struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
  3357. result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
  3358. return result;
  3359. }
  3360. // ggml_conv_2d_dw_direct
  3361. struct ggml_tensor * ggml_conv_2d_dw_direct(
  3362. struct ggml_context * ctx,
  3363. struct ggml_tensor * a,
  3364. struct ggml_tensor * b,
  3365. int stride0,
  3366. int stride1,
  3367. int pad0,
  3368. int pad1,
  3369. int dilation0,
  3370. int dilation1) {
  3371. GGML_ASSERT(a->ne[2] == 1);
  3372. GGML_ASSERT(a->ne[3] == b->ne[2]);
  3373. int64_t ne[4];
  3374. ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
  3375. ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
  3376. ne[2] = b->ne[2];
  3377. ne[3] = b->ne[3];
  3378. struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
  3379. if (ggml_is_contiguous_channels(b)) {
  3380. // Result will be permuted the same way as input (CWHN order)
  3381. const int64_t type_size = ggml_type_size(result->type);
  3382. GGML_ASSERT(ggml_blck_size(result->type) == 1);
  3383. result->nb[0] = result->ne[2] * type_size;
  3384. result->nb[1] = result->ne[0] * result->nb[0];
  3385. result->nb[2] = type_size;
  3386. }
  3387. int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
  3388. ggml_set_op_params(result, params, sizeof(params));
  3389. result->op = GGML_OP_CONV_2D_DW;
  3390. result->src[0] = a;
  3391. result->src[1] = b;
  3392. return result;
  3393. }
  3394. // ggml_conv_transpose_2d_p0
  3395. static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
  3396. return (ins - 1) * s - 2 * p + ks;
  3397. }
  3398. struct ggml_tensor * ggml_conv_transpose_2d_p0(
  3399. struct ggml_context * ctx,
  3400. struct ggml_tensor * a,
  3401. struct ggml_tensor * b,
  3402. int stride) {
  3403. GGML_ASSERT(a->ne[3] == b->ne[2]);
  3404. const int64_t ne[4] = {
  3405. ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
  3406. ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
  3407. a->ne[2], b->ne[3],
  3408. };
  3409. struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3410. ggml_set_op_params_i32(result, 0, stride);
  3411. result->op = GGML_OP_CONV_TRANSPOSE_2D;
  3412. result->src[0] = a;
  3413. result->src[1] = b;
  3414. return result;
  3415. }
  3416. // ggml_pool_*
  3417. static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
  3418. return (ins + 2 * p - ks) / s + 1;
  3419. }
  3420. // ggml_pool_1d
  3421. struct ggml_tensor * ggml_pool_1d(
  3422. struct ggml_context * ctx,
  3423. struct ggml_tensor * a,
  3424. enum ggml_op_pool op,
  3425. int k0,
  3426. int s0,
  3427. int p0) {
  3428. const int64_t ne[4] = {
  3429. ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
  3430. a->ne[1],
  3431. a->ne[2],
  3432. a->ne[3],
  3433. };
  3434. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3435. int32_t params[] = { op, k0, s0, p0 };
  3436. ggml_set_op_params(result, params, sizeof(params));
  3437. result->op = GGML_OP_POOL_1D;
  3438. result->src[0] = a;
  3439. return result;
  3440. }
  3441. // ggml_pool_2d
  3442. struct ggml_tensor * ggml_pool_2d(
  3443. struct ggml_context * ctx,
  3444. struct ggml_tensor * a,
  3445. enum ggml_op_pool op,
  3446. int k0,
  3447. int k1,
  3448. int s0,
  3449. int s1,
  3450. float p0,
  3451. float p1) {
  3452. struct ggml_tensor * result;
  3453. const int64_t ne[4] = {
  3454. ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
  3455. ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
  3456. a->ne[2],
  3457. a->ne[3],
  3458. };
  3459. result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3460. int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
  3461. ggml_set_op_params(result, params, sizeof(params));
  3462. result->op = GGML_OP_POOL_2D;
  3463. result->src[0] = a;
  3464. return result;
  3465. }
  3466. struct ggml_tensor * ggml_pool_2d_back(
  3467. struct ggml_context * ctx,
  3468. struct ggml_tensor * a,
  3469. struct ggml_tensor * af,
  3470. enum ggml_op_pool op,
  3471. int k0,
  3472. int k1,
  3473. int s0,
  3474. int s1,
  3475. float p0,
  3476. float p1) {
  3477. struct ggml_tensor * result;
  3478. result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
  3479. int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
  3480. ggml_set_op_params(result, params, sizeof(params));
  3481. result->op = GGML_OP_POOL_2D_BACK;
  3482. result->src[0] = a;
  3483. result->src[1] = af;
  3484. return result;
  3485. }
  3486. // ggml_upscale
  3487. static struct ggml_tensor * ggml_upscale_impl(
  3488. struct ggml_context * ctx,
  3489. struct ggml_tensor * a,
  3490. int ne0,
  3491. int ne1,
  3492. int ne2,
  3493. int ne3,
  3494. enum ggml_scale_mode mode) {
  3495. GGML_ASSERT(a->ne[0] <= ne0);
  3496. GGML_ASSERT(a->ne[1] <= ne1);
  3497. GGML_ASSERT(a->ne[2] <= ne2);
  3498. GGML_ASSERT(a->ne[3] <= ne3);
  3499. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
  3500. ggml_set_op_params_i32(result, 0, mode);
  3501. result->op = GGML_OP_UPSCALE;
  3502. result->src[0] = a;
  3503. return result;
  3504. }
  3505. struct ggml_tensor * ggml_upscale(
  3506. struct ggml_context * ctx,
  3507. struct ggml_tensor * a,
  3508. int scale_factor,
  3509. enum ggml_scale_mode mode) {
  3510. return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
  3511. }
  3512. struct ggml_tensor * ggml_upscale_ext(
  3513. struct ggml_context * ctx,
  3514. struct ggml_tensor * a,
  3515. int ne0,
  3516. int ne1,
  3517. int ne2,
  3518. int ne3,
  3519. enum ggml_scale_mode mode) {
  3520. return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
  3521. }
  3522. // ggml_pad
  3523. struct ggml_tensor * ggml_pad(
  3524. struct ggml_context * ctx,
  3525. struct ggml_tensor * a,
  3526. int p0,
  3527. int p1,
  3528. int p2,
  3529. int p3) {
  3530. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
  3531. a->ne[0] + p0,
  3532. a->ne[1] + p1,
  3533. a->ne[2] + p2,
  3534. a->ne[3] + p3);
  3535. result->op = GGML_OP_PAD;
  3536. result->src[0] = a;
  3537. return result;
  3538. }
  3539. // ggml_pad_reflect_1d
  3540. struct ggml_tensor * ggml_pad_reflect_1d(
  3541. struct ggml_context * ctx,
  3542. struct ggml_tensor * a,
  3543. int p0,
  3544. int p1) {
  3545. GGML_ASSERT(p0 >= 0);
  3546. GGML_ASSERT(p1 >= 0);
  3547. GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
  3548. GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
  3549. GGML_ASSERT(ggml_is_contiguous(a));
  3550. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3551. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
  3552. a->ne[0] + p0 + p1,
  3553. a->ne[1],
  3554. a->ne[2],
  3555. a->ne[3]);
  3556. int32_t params[] = { p0, p1 };
  3557. ggml_set_op_params(result, params, sizeof(params));
  3558. result->op = GGML_OP_PAD_REFLECT_1D;
  3559. result->src[0] = a;
  3560. return result;
  3561. }
  3562. // ggml_roll
  3563. struct ggml_tensor * ggml_roll(
  3564. struct ggml_context * ctx,
  3565. struct ggml_tensor * a,
  3566. int shift0,
  3567. int shift1,
  3568. int shift2,
  3569. int shift3) {
  3570. GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
  3571. GGML_ASSERT(abs(shift0) < a->ne[0]);
  3572. GGML_ASSERT(abs(shift1) < a->ne[1]);
  3573. GGML_ASSERT(abs(shift2) < a->ne[2]);
  3574. GGML_ASSERT(abs(shift3) < a->ne[3]);
  3575. struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
  3576. ggml_set_op_params_i32(result, 0, shift0);
  3577. ggml_set_op_params_i32(result, 1, shift1);
  3578. ggml_set_op_params_i32(result, 2, shift2);
  3579. ggml_set_op_params_i32(result, 3, shift3);
  3580. result->op = GGML_OP_ROLL;
  3581. result->src[0] = a;
  3582. return result;
  3583. }
  3584. // ggml_arange
  3585. struct ggml_tensor * ggml_arange(
  3586. struct ggml_context * ctx,
  3587. float start,
  3588. float stop,
  3589. float step) {
  3590. GGML_ASSERT(stop > start);
  3591. const int64_t steps = (int64_t) ceilf((stop - start) / step);
  3592. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
  3593. ggml_set_op_params_f32(result, 0, start);
  3594. ggml_set_op_params_f32(result, 1, stop);
  3595. ggml_set_op_params_f32(result, 2, step);
  3596. result->op = GGML_OP_ARANGE;
  3597. return result;
  3598. }
  3599. // ggml_timestep_embedding
  3600. struct ggml_tensor * ggml_timestep_embedding(
  3601. struct ggml_context * ctx,
  3602. struct ggml_tensor * timesteps,
  3603. int dim,
  3604. int max_period) {
  3605. int actual_dim = dim;
  3606. if (dim % 2 != 0) {
  3607. actual_dim = dim + 1;
  3608. }
  3609. struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
  3610. ggml_set_op_params_i32(result, 0, dim);
  3611. ggml_set_op_params_i32(result, 1, max_period);
  3612. result->op = GGML_OP_TIMESTEP_EMBEDDING;
  3613. result->src[0] = timesteps;
  3614. return result;
  3615. }
  3616. // ggml_argsort
  3617. struct ggml_tensor * ggml_argsort(
  3618. struct ggml_context * ctx,
  3619. struct ggml_tensor * a,
  3620. enum ggml_sort_order order) {
  3621. GGML_ASSERT(a->ne[0] <= INT32_MAX);
  3622. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
  3623. ggml_set_op_params_i32(result, 0, (int32_t) order);
  3624. result->op = GGML_OP_ARGSORT;
  3625. result->src[0] = a;
  3626. return result;
  3627. }
  3628. // ggml_top_k
  3629. struct ggml_tensor * ggml_top_k(
  3630. struct ggml_context * ctx,
  3631. struct ggml_tensor * a,
  3632. int k) {
  3633. GGML_ASSERT(a->ne[0] >= k);
  3634. struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
  3635. result = ggml_view_4d(ctx, result,
  3636. k, result->ne[1], result->ne[2], result->ne[3],
  3637. result->nb[1], result->nb[2], result->nb[3],
  3638. 0);
  3639. return result;
  3640. }
  3641. // ggml_flash_attn_ext
  3642. struct ggml_tensor * ggml_flash_attn_ext(
  3643. struct ggml_context * ctx,
  3644. struct ggml_tensor * q,
  3645. struct ggml_tensor * k,
  3646. struct ggml_tensor * v,
  3647. struct ggml_tensor * mask,
  3648. float scale,
  3649. float max_bias,
  3650. float logit_softcap) {
  3651. GGML_ASSERT(ggml_can_mul_mat(k, q));
  3652. // TODO: check if vT can be multiplied by (k*qT)
  3653. if (mask) {
  3654. GGML_ASSERT(ggml_is_contiguous(mask));
  3655. GGML_ASSERT(mask->ne[2] == 1);
  3656. GGML_ASSERT(mask->ne[3] == 1);
  3657. GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
  3658. "the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
  3659. //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
  3660. }
  3661. if (max_bias > 0.0f) {
  3662. GGML_ASSERT(mask);
  3663. }
  3664. // permute(0, 2, 1, 3)
  3665. int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
  3666. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3667. float params[] = { scale, max_bias, logit_softcap };
  3668. ggml_set_op_params(result, params, sizeof(params));
  3669. result->op = GGML_OP_FLASH_ATTN_EXT;
  3670. result->src[0] = q;
  3671. result->src[1] = k;
  3672. result->src[2] = v;
  3673. result->src[3] = mask;
  3674. return result;
  3675. }
  3676. void ggml_flash_attn_ext_set_prec(
  3677. struct ggml_tensor * a,
  3678. enum ggml_prec prec) {
  3679. GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
  3680. const int32_t prec_i32 = (int32_t) prec;
  3681. ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
  3682. }
  3683. enum ggml_prec ggml_flash_attn_ext_get_prec(
  3684. const struct ggml_tensor * a) {
  3685. GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
  3686. const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
  3687. return (enum ggml_prec) prec_i32;
  3688. }
  3689. // ggml_flash_attn_back
  3690. struct ggml_tensor * ggml_flash_attn_back(
  3691. struct ggml_context * ctx,
  3692. struct ggml_tensor * q,
  3693. struct ggml_tensor * k,
  3694. struct ggml_tensor * v,
  3695. struct ggml_tensor * d,
  3696. bool masked) {
  3697. GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
  3698. GGML_ASSERT(ggml_can_mul_mat(k, q));
  3699. // TODO: check if vT can be multiplied by (k*qT)
  3700. // d shape [D,N,ne2,ne3]
  3701. // q shape [D,N,ne2,ne3]
  3702. // k shape [D,M,kvne2,ne3]
  3703. // v shape [M,D,kvne2,ne3]
  3704. const int64_t D = q->ne[0];
  3705. const int64_t N = q->ne[1];
  3706. const int64_t M = k->ne[1];
  3707. const int64_t ne2 = q->ne[2];
  3708. const int64_t ne3 = q->ne[3];
  3709. const int64_t kvne2 = k->ne[2];
  3710. GGML_ASSERT(k->ne[0] == D);
  3711. GGML_ASSERT(v->ne[0] == M);
  3712. GGML_ASSERT(v->ne[1] == D);
  3713. GGML_ASSERT(d->ne[0] == D);
  3714. GGML_ASSERT(d->ne[1] == N);
  3715. GGML_ASSERT(k->ne[2] == kvne2);
  3716. GGML_ASSERT(k->ne[3] == ne3);
  3717. GGML_ASSERT(v->ne[2] == kvne2);
  3718. GGML_ASSERT(v->ne[3] == ne3);
  3719. GGML_ASSERT(d->ne[2] == ne2);
  3720. GGML_ASSERT(d->ne[3] == ne3);
  3721. GGML_ASSERT(ne2 % kvne2 == 0);
  3722. // store gradients of q, k and v as continuous tensors concatenated in result.
  3723. // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
  3724. const int64_t elem_q = ggml_nelements(q);
  3725. const int64_t elem_k = ggml_nelements(k);
  3726. const int64_t elem_v = ggml_nelements(v);
  3727. enum ggml_type result_type = GGML_TYPE_F32;
  3728. GGML_ASSERT(ggml_blck_size(result_type) == 1);
  3729. const size_t tsize = ggml_type_size(result_type);
  3730. const size_t offs_q = 0;
  3731. const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
  3732. const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
  3733. const size_t end = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
  3734. const size_t nelements = (end + tsize - 1)/tsize;
  3735. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
  3736. int32_t masked_i = masked ? 1 : 0;
  3737. ggml_set_op_params(result, &masked_i, sizeof(masked_i));
  3738. result->op = GGML_OP_FLASH_ATTN_BACK;
  3739. result->src[0] = q;
  3740. result->src[1] = k;
  3741. result->src[2] = v;
  3742. result->src[3] = d;
  3743. return result;
  3744. }
  3745. // ggml_ssm_conv
  3746. struct ggml_tensor * ggml_ssm_conv(
  3747. struct ggml_context * ctx,
  3748. struct ggml_tensor * sx,
  3749. struct ggml_tensor * c) {
  3750. GGML_ASSERT(ggml_is_3d(sx));
  3751. GGML_ASSERT(ggml_is_matrix(c));
  3752. const int64_t d_conv = c->ne[0];
  3753. const int64_t d_inner = c->ne[1];
  3754. const int64_t n_t = sx->ne[0] - d_conv + 1; // tokens per sequence
  3755. const int64_t n_s = sx->ne[2];
  3756. // TODO: maybe support other strides than 1?
  3757. // FIXME: this is always true?
  3758. GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
  3759. GGML_ASSERT(sx->ne[1] == d_inner);
  3760. GGML_ASSERT(n_t >= 0);
  3761. struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
  3762. result->op = GGML_OP_SSM_CONV;
  3763. result->src[0] = sx;
  3764. result->src[1] = c;
  3765. return result;
  3766. }
  3767. // ggml_ssm_scan
  3768. struct ggml_tensor * ggml_ssm_scan(
  3769. struct ggml_context * ctx,
  3770. struct ggml_tensor * s,
  3771. struct ggml_tensor * x,
  3772. struct ggml_tensor * dt,
  3773. struct ggml_tensor * A,
  3774. struct ggml_tensor * B,
  3775. struct ggml_tensor * C) {
  3776. GGML_ASSERT(ggml_is_contiguous(s));
  3777. GGML_ASSERT(ggml_is_contiguous(x));
  3778. GGML_ASSERT(ggml_is_contiguous(dt));
  3779. GGML_ASSERT(ggml_is_contiguous(A));
  3780. GGML_ASSERT(ggml_is_matrix(A));
  3781. GGML_ASSERT(ggml_is_3d(B));
  3782. GGML_ASSERT(ggml_is_3d(s));
  3783. GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
  3784. GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
  3785. GGML_ASSERT(ggml_are_same_shape(x, dt));
  3786. GGML_ASSERT(ggml_are_same_shape(B, C));
  3787. {
  3788. const int64_t d_state = s->ne[0];
  3789. const int64_t d_inner = s->ne[1];
  3790. const int64_t n_seq_tokens = x->ne[1];
  3791. const int64_t n_seqs = x->ne[2];
  3792. GGML_ASSERT(s->ne[2] == n_seqs);
  3793. GGML_ASSERT(x->ne[0] == d_inner);
  3794. GGML_ASSERT(A->ne[0] == d_state);
  3795. GGML_ASSERT(A->ne[1] == d_inner);
  3796. GGML_ASSERT(B->ne[0] == d_state);
  3797. GGML_ASSERT(B->ne[1] == n_seq_tokens);
  3798. GGML_ASSERT(B->ne[2] == n_seqs);
  3799. }
  3800. // concatenated y + ssm_states
  3801. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + ggml_nelements(s));
  3802. result->op = GGML_OP_SSM_SCAN;
  3803. result->src[0] = s;
  3804. result->src[1] = x;
  3805. result->src[2] = dt;
  3806. result->src[3] = A;
  3807. result->src[4] = B;
  3808. result->src[5] = C;
  3809. return result;
  3810. }
  3811. // ggml_win_part
  3812. struct ggml_tensor * ggml_win_part(
  3813. struct ggml_context * ctx,
  3814. struct ggml_tensor * a,
  3815. int w) {
  3816. GGML_ASSERT(a->ne[3] == 1);
  3817. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3818. // padding
  3819. const int px = (w - a->ne[1]%w)%w;
  3820. const int py = (w - a->ne[2]%w)%w;
  3821. const int npx = (px + a->ne[1])/w;
  3822. const int npy = (py + a->ne[2])/w;
  3823. const int np = npx*npy;
  3824. const int64_t ne[4] = { a->ne[0], w, w, np, };
  3825. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3826. int32_t params[] = { npx, npy, w };
  3827. ggml_set_op_params(result, params, sizeof(params));
  3828. result->op = GGML_OP_WIN_PART;
  3829. result->src[0] = a;
  3830. return result;
  3831. }
  3832. // ggml_win_unpart
  3833. struct ggml_tensor * ggml_win_unpart(
  3834. struct ggml_context * ctx,
  3835. struct ggml_tensor * a,
  3836. int w0,
  3837. int h0,
  3838. int w) {
  3839. GGML_ASSERT(a->type == GGML_TYPE_F32);
  3840. const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
  3841. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
  3842. int32_t params[] = { w };
  3843. ggml_set_op_params(result, params, sizeof(params));
  3844. result->op = GGML_OP_WIN_UNPART;
  3845. result->src[0] = a;
  3846. return result;
  3847. }
  3848. // ggml_get_rel_pos
  3849. struct ggml_tensor * ggml_get_rel_pos(
  3850. struct ggml_context * ctx,
  3851. struct ggml_tensor * a,
  3852. int qh,
  3853. int kh) {
  3854. GGML_ASSERT(qh == kh);
  3855. GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
  3856. const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
  3857. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
  3858. result->op = GGML_OP_GET_REL_POS;
  3859. result->src[0] = a;
  3860. return result;
  3861. }
  3862. // ggml_add_rel_pos
  3863. static struct ggml_tensor * ggml_add_rel_pos_impl(
  3864. struct ggml_context * ctx,
  3865. struct ggml_tensor * a,
  3866. struct ggml_tensor * pw,
  3867. struct ggml_tensor * ph,
  3868. bool inplace) {
  3869. GGML_ASSERT(ggml_are_same_shape(pw, ph));
  3870. GGML_ASSERT(ggml_is_contiguous(a));
  3871. GGML_ASSERT(ggml_is_contiguous(pw));
  3872. GGML_ASSERT(ggml_is_contiguous(ph));
  3873. GGML_ASSERT(ph->type == GGML_TYPE_F32);
  3874. GGML_ASSERT(pw->type == GGML_TYPE_F32);
  3875. GGML_ASSERT(pw->ne[3] == a->ne[2]);
  3876. GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
  3877. GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
  3878. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  3879. ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
  3880. result->op = GGML_OP_ADD_REL_POS;
  3881. result->src[0] = a;
  3882. result->src[1] = pw;
  3883. result->src[2] = ph;
  3884. return result;
  3885. }
  3886. struct ggml_tensor * ggml_add_rel_pos(
  3887. struct ggml_context * ctx,
  3888. struct ggml_tensor * a,
  3889. struct ggml_tensor * pw,
  3890. struct ggml_tensor * ph) {
  3891. return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
  3892. }
  3893. struct ggml_tensor * ggml_add_rel_pos_inplace(
  3894. struct ggml_context * ctx,
  3895. struct ggml_tensor * a,
  3896. struct ggml_tensor * pw,
  3897. struct ggml_tensor * ph) {
  3898. return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
  3899. }
  3900. // ggml_rwkv_wkv6
  3901. struct ggml_tensor * ggml_rwkv_wkv6(
  3902. struct ggml_context * ctx,
  3903. struct ggml_tensor * k,
  3904. struct ggml_tensor * v,
  3905. struct ggml_tensor * r,
  3906. struct ggml_tensor * tf,
  3907. struct ggml_tensor * td,
  3908. struct ggml_tensor * state) {
  3909. GGML_ASSERT(ggml_is_contiguous(k));
  3910. GGML_ASSERT(ggml_is_contiguous(v));
  3911. GGML_ASSERT(ggml_is_contiguous(r));
  3912. GGML_ASSERT(ggml_is_contiguous(tf));
  3913. GGML_ASSERT(ggml_is_contiguous(td));
  3914. GGML_ASSERT(ggml_is_contiguous(state));
  3915. const int64_t S = k->ne[0];
  3916. const int64_t H = k->ne[1];
  3917. const int64_t n_tokens = k->ne[2];
  3918. const int64_t n_seqs = state->ne[1];
  3919. {
  3920. GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
  3921. GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
  3922. GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
  3923. GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
  3924. }
  3925. // concat output and new_state
  3926. const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
  3927. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3928. result->op = GGML_OP_RWKV_WKV6;
  3929. result->src[0] = k;
  3930. result->src[1] = v;
  3931. result->src[2] = r;
  3932. result->src[3] = tf;
  3933. result->src[4] = td;
  3934. result->src[5] = state;
  3935. return result;
  3936. }
  3937. // ggml_gated_linear_attn
  3938. struct ggml_tensor * ggml_gated_linear_attn(
  3939. struct ggml_context * ctx,
  3940. struct ggml_tensor * k,
  3941. struct ggml_tensor * v,
  3942. struct ggml_tensor * q,
  3943. struct ggml_tensor * g,
  3944. struct ggml_tensor * state,
  3945. float scale) {
  3946. GGML_ASSERT(ggml_is_contiguous(k));
  3947. GGML_ASSERT(ggml_is_contiguous(v));
  3948. GGML_ASSERT(ggml_is_contiguous(q));
  3949. GGML_ASSERT(ggml_is_contiguous(g));
  3950. GGML_ASSERT(ggml_is_contiguous(state));
  3951. const int64_t S = k->ne[0];
  3952. const int64_t H = k->ne[1];
  3953. const int64_t n_tokens = k->ne[2];
  3954. const int64_t n_seqs = state->ne[1];
  3955. {
  3956. GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
  3957. GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
  3958. GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
  3959. GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
  3960. }
  3961. // concat output and new_state
  3962. const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
  3963. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  3964. ggml_set_op_params_f32(result, 0, scale);
  3965. result->op = GGML_OP_GATED_LINEAR_ATTN;
  3966. result->src[0] = k;
  3967. result->src[1] = v;
  3968. result->src[2] = q;
  3969. result->src[3] = g;
  3970. result->src[4] = state;
  3971. return result;
  3972. }
  3973. // ggml_rwkv_wkv7
  3974. struct ggml_tensor * ggml_rwkv_wkv7(
  3975. struct ggml_context * ctx,
  3976. struct ggml_tensor * r,
  3977. struct ggml_tensor * w,
  3978. struct ggml_tensor * k,
  3979. struct ggml_tensor * v,
  3980. struct ggml_tensor * a,
  3981. struct ggml_tensor * b,
  3982. struct ggml_tensor * state) {
  3983. GGML_ASSERT(ggml_is_contiguous(r));
  3984. GGML_ASSERT(ggml_is_contiguous(w));
  3985. GGML_ASSERT(ggml_is_contiguous(k));
  3986. GGML_ASSERT(ggml_is_contiguous(v));
  3987. GGML_ASSERT(ggml_is_contiguous(a));
  3988. GGML_ASSERT(ggml_is_contiguous(b));
  3989. GGML_ASSERT(ggml_is_contiguous(state));
  3990. const int64_t S = k->ne[0];
  3991. const int64_t H = k->ne[1];
  3992. const int64_t n_tokens = k->ne[2];
  3993. const int64_t n_seqs = state->ne[1];
  3994. {
  3995. GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
  3996. GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
  3997. GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
  3998. GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
  3999. GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
  4000. GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
  4001. }
  4002. // concat output and new_state
  4003. const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
  4004. struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
  4005. result->op = GGML_OP_RWKV_WKV7;
  4006. result->src[0] = r;
  4007. result->src[1] = w;
  4008. result->src[2] = k;
  4009. result->src[3] = v;
  4010. result->src[4] = a;
  4011. result->src[5] = b;
  4012. result->src[6] = state;
  4013. return result;
  4014. }
  4015. // ggml_unary
  4016. static struct ggml_tensor * ggml_unary_impl(
  4017. struct ggml_context * ctx,
  4018. struct ggml_tensor * a,
  4019. enum ggml_unary_op op,
  4020. bool inplace) {
  4021. GGML_ASSERT(ggml_is_contiguous_1(a));
  4022. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4023. ggml_set_op_params_i32(result, 0, (int32_t) op);
  4024. result->op = GGML_OP_UNARY;
  4025. result->src[0] = a;
  4026. return result;
  4027. }
  4028. struct ggml_tensor * ggml_unary(
  4029. struct ggml_context * ctx,
  4030. struct ggml_tensor * a,
  4031. enum ggml_unary_op op) {
  4032. return ggml_unary_impl(ctx, a, op, false);
  4033. }
  4034. struct ggml_tensor * ggml_unary_inplace(
  4035. struct ggml_context * ctx,
  4036. struct ggml_tensor * a,
  4037. enum ggml_unary_op op) {
  4038. return ggml_unary_impl(ctx, a, op, true);
  4039. }
  4040. // ggml_map_custom1
  4041. static struct ggml_tensor * ggml_map_custom1_impl(
  4042. struct ggml_context * ctx,
  4043. struct ggml_tensor * a,
  4044. const ggml_custom1_op_t fun,
  4045. int n_tasks,
  4046. void * userdata,
  4047. bool inplace) {
  4048. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  4049. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4050. struct ggml_map_custom1_op_params params = {
  4051. /*.fun =*/ fun,
  4052. /*.n_tasks =*/ n_tasks,
  4053. /*.userdata =*/ userdata
  4054. };
  4055. ggml_set_op_params(result, &params, sizeof(params));
  4056. result->op = GGML_OP_MAP_CUSTOM1;
  4057. result->src[0] = a;
  4058. return result;
  4059. }
  4060. struct ggml_tensor * ggml_map_custom1(
  4061. struct ggml_context * ctx,
  4062. struct ggml_tensor * a,
  4063. const ggml_custom1_op_t fun,
  4064. int n_tasks,
  4065. void * userdata) {
  4066. return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
  4067. }
  4068. struct ggml_tensor * ggml_map_custom1_inplace(
  4069. struct ggml_context * ctx,
  4070. struct ggml_tensor * a,
  4071. const ggml_custom1_op_t fun,
  4072. int n_tasks,
  4073. void * userdata) {
  4074. return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
  4075. }
  4076. // ggml_map_custom2
  4077. static struct ggml_tensor * ggml_map_custom2_impl(
  4078. struct ggml_context * ctx,
  4079. struct ggml_tensor * a,
  4080. struct ggml_tensor * b,
  4081. const ggml_custom2_op_t fun,
  4082. int n_tasks,
  4083. void * userdata,
  4084. bool inplace) {
  4085. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  4086. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4087. struct ggml_map_custom2_op_params params = {
  4088. /*.fun =*/ fun,
  4089. /*.n_tasks =*/ n_tasks,
  4090. /*.userdata =*/ userdata
  4091. };
  4092. ggml_set_op_params(result, &params, sizeof(params));
  4093. result->op = GGML_OP_MAP_CUSTOM2;
  4094. result->src[0] = a;
  4095. result->src[1] = b;
  4096. return result;
  4097. }
  4098. struct ggml_tensor * ggml_map_custom2(
  4099. struct ggml_context * ctx,
  4100. struct ggml_tensor * a,
  4101. struct ggml_tensor * b,
  4102. const ggml_custom2_op_t fun,
  4103. int n_tasks,
  4104. void * userdata) {
  4105. return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
  4106. }
  4107. struct ggml_tensor * ggml_map_custom2_inplace(
  4108. struct ggml_context * ctx,
  4109. struct ggml_tensor * a,
  4110. struct ggml_tensor * b,
  4111. const ggml_custom2_op_t fun,
  4112. int n_tasks,
  4113. void * userdata) {
  4114. return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
  4115. }
  4116. // ggml_map_custom3
  4117. static struct ggml_tensor * ggml_map_custom3_impl(
  4118. struct ggml_context * ctx,
  4119. struct ggml_tensor * a,
  4120. struct ggml_tensor * b,
  4121. struct ggml_tensor * c,
  4122. const ggml_custom3_op_t fun,
  4123. int n_tasks,
  4124. void * userdata,
  4125. bool inplace) {
  4126. GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
  4127. struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
  4128. struct ggml_map_custom3_op_params params = {
  4129. /*.fun =*/ fun,
  4130. /*.n_tasks =*/ n_tasks,
  4131. /*.userdata =*/ userdata
  4132. };
  4133. ggml_set_op_params(result, &params, sizeof(params));
  4134. result->op = GGML_OP_MAP_CUSTOM3;
  4135. result->src[0] = a;
  4136. result->src[1] = b;
  4137. result->src[2] = c;
  4138. return result;
  4139. }
  4140. struct ggml_tensor * ggml_map_custom3(
  4141. struct ggml_context * ctx,
  4142. struct ggml_tensor * a,
  4143. struct ggml_tensor * b,
  4144. struct ggml_tensor * c,
  4145. const ggml_custom3_op_t fun,
  4146. int n_tasks,
  4147. void * userdata) {
  4148. return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
  4149. }
  4150. struct ggml_tensor * ggml_map_custom3_inplace(
  4151. struct ggml_context * ctx,
  4152. struct ggml_tensor * a,
  4153. struct ggml_tensor * b,
  4154. struct ggml_tensor * c,
  4155. const ggml_custom3_op_t fun,
  4156. int n_tasks,
  4157. void * userdata) {
  4158. return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
  4159. }
  4160. struct ggml_tensor * ggml_custom_4d(
  4161. struct ggml_context * ctx,
  4162. enum ggml_type type,
  4163. int64_t ne0,
  4164. int64_t ne1,
  4165. int64_t ne2,
  4166. int64_t ne3,
  4167. struct ggml_tensor ** args,
  4168. int n_args,
  4169. ggml_custom_op_t fun,
  4170. int n_tasks,
  4171. void * userdata) {
  4172. GGML_ASSERT(n_args < GGML_MAX_SRC);
  4173. struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
  4174. struct ggml_custom_op_params params = {
  4175. /*.fun =*/ fun,
  4176. /*.n_tasks =*/ n_tasks,
  4177. /*.userdata =*/ userdata
  4178. };
  4179. ggml_set_op_params(result, &params, sizeof(params));
  4180. result->op = GGML_OP_CUSTOM;
  4181. for (int i = 0; i < n_args; i++) {
  4182. result->src[i] = args[i];
  4183. }
  4184. return result;
  4185. }
  4186. struct ggml_tensor * ggml_custom_inplace(
  4187. struct ggml_context * ctx,
  4188. struct ggml_tensor * a,
  4189. struct ggml_tensor ** args,
  4190. int n_args,
  4191. ggml_custom_op_t fun,
  4192. int n_tasks,
  4193. void * userdata) {
  4194. GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
  4195. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  4196. struct ggml_custom_op_params params = {
  4197. /*.fun =*/ fun,
  4198. /*.n_tasks =*/ n_tasks,
  4199. /*.userdata =*/ userdata
  4200. };
  4201. ggml_set_op_params(result, &params, sizeof(params));
  4202. result->op = GGML_OP_CUSTOM;
  4203. result->src[0] = a;
  4204. for (int i = 0; i < n_args; i++) {
  4205. result->src[i + 1] = args[i];
  4206. }
  4207. return result;
  4208. }
  4209. // ggml_cross_entropy_loss
  4210. struct ggml_tensor * ggml_cross_entropy_loss(
  4211. struct ggml_context * ctx,
  4212. struct ggml_tensor * a,
  4213. struct ggml_tensor * b) {
  4214. GGML_ASSERT(ggml_are_same_shape(a, b));
  4215. struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
  4216. result->op = GGML_OP_CROSS_ENTROPY_LOSS;
  4217. result->src[0] = a;
  4218. result->src[1] = b;
  4219. return result;
  4220. }
  4221. // ggml_cross_entropy_loss_back
  4222. struct ggml_tensor * ggml_cross_entropy_loss_back(
  4223. struct ggml_context * ctx,
  4224. struct ggml_tensor * a,
  4225. struct ggml_tensor * b,
  4226. struct ggml_tensor * c) {
  4227. GGML_ASSERT(ggml_is_scalar(a));
  4228. GGML_ASSERT(ggml_are_same_shape(b, c));
  4229. struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
  4230. result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
  4231. result->src[0] = a;
  4232. result->src[1] = b;
  4233. result->src[2] = c;
  4234. return result;
  4235. }
  4236. // opt_step_adamw
  4237. struct ggml_tensor * ggml_opt_step_adamw(
  4238. struct ggml_context * ctx,
  4239. struct ggml_tensor * a,
  4240. struct ggml_tensor * grad,
  4241. struct ggml_tensor * m,
  4242. struct ggml_tensor * v,
  4243. struct ggml_tensor * adamw_params) {
  4244. GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
  4245. GGML_ASSERT(ggml_are_same_shape(a, grad));
  4246. GGML_ASSERT(ggml_are_same_shape(a, m));
  4247. GGML_ASSERT(ggml_are_same_shape(a, v));
  4248. GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
  4249. GGML_ASSERT(ggml_nelements(adamw_params) == 7);
  4250. struct ggml_tensor * result = ggml_view_tensor(ctx, a);
  4251. result->op = GGML_OP_OPT_STEP_ADAMW;
  4252. result->src[0] = a;
  4253. result->src[1] = grad;
  4254. result->src[2] = m;
  4255. result->src[3] = v;
  4256. result->src[4] = adamw_params;
  4257. return result;
  4258. }
  4259. ////////////////////////////////////////////////////////////////////////////////
  4260. struct ggml_hash_set ggml_hash_set_new(size_t size) {
  4261. size = ggml_hash_size(size);
  4262. struct ggml_hash_set result;
  4263. result.size = size;
  4264. result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
  4265. result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
  4266. return result;
  4267. }
  4268. void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
  4269. memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
  4270. }
  4271. void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
  4272. GGML_FREE(hash_set->used);
  4273. GGML_FREE(hash_set->keys);
  4274. }
  4275. size_t ggml_hash_size(size_t min_sz) {
  4276. // next primes after powers of two
  4277. static const size_t primes[] = {
  4278. 2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
  4279. 2053, 4099, 8209, 16411, 32771, 65537, 131101,
  4280. 262147, 524309, 1048583, 2097169, 4194319, 8388617,
  4281. 16777259, 33554467, 67108879, 134217757, 268435459,
  4282. 536870923, 1073741827, 2147483659
  4283. };
  4284. static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
  4285. // find the smallest prime that is larger or equal than min_sz
  4286. size_t l = 0;
  4287. size_t r = n_primes;
  4288. while (l < r) {
  4289. size_t m = (l + r)/2;
  4290. if (primes[m] < min_sz) {
  4291. l = m + 1;
  4292. } else {
  4293. r = m;
  4294. }
  4295. }
  4296. size_t sz = l < n_primes ? primes[l] : min_sz | 1;
  4297. return sz;
  4298. }
  4299. struct hash_map {
  4300. struct ggml_hash_set set;
  4301. struct ggml_tensor ** vals;
  4302. };
  4303. static struct hash_map * ggml_new_hash_map(size_t size) {
  4304. struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
  4305. result->set = ggml_hash_set_new(size);
  4306. result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
  4307. return result;
  4308. }
  4309. static void ggml_hash_map_free(struct hash_map * map) {
  4310. ggml_hash_set_free(&map->set);
  4311. GGML_FREE(map->vals);
  4312. GGML_FREE(map);
  4313. }
  4314. // utility functions to change gradients
  4315. // isrc is the index of tensor in cgraph->visited_has_set.keys
  4316. // the corresponding gradient (accumulators) are also at position isrc
  4317. // if tensor has a gradient accumulator, modify that accumulator in-place
  4318. // else if there is no gradient for tensor, set the corresponding value
  4319. // else, just add/subtract/etc. the gradients
  4320. static void ggml_add_or_set(
  4321. struct ggml_context * ctx,
  4322. struct ggml_cgraph * cgraph,
  4323. size_t isrc,
  4324. struct ggml_tensor * tensor) {
  4325. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4326. GGML_ASSERT(src);
  4327. if (cgraph->grads[isrc]) {
  4328. cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
  4329. } else {
  4330. cgraph->grads[isrc] = tensor;
  4331. }
  4332. ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
  4333. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4334. }
  4335. static void ggml_acc_or_set(
  4336. struct ggml_context * ctx,
  4337. struct ggml_cgraph * cgraph,
  4338. size_t isrc,
  4339. struct ggml_tensor * tensor,
  4340. const size_t nb1,
  4341. const size_t nb2,
  4342. const size_t nb3,
  4343. const size_t offset) {
  4344. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4345. GGML_ASSERT(src);
  4346. if (cgraph->grads[isrc]) {
  4347. cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
  4348. } else {
  4349. struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
  4350. cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
  4351. }
  4352. ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
  4353. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4354. }
  4355. static void ggml_add1_or_set(
  4356. struct ggml_context * ctx,
  4357. struct ggml_cgraph * cgraph,
  4358. size_t isrc,
  4359. struct ggml_tensor * tensor) {
  4360. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4361. GGML_ASSERT(src);
  4362. if (cgraph->grads[isrc]) {
  4363. cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
  4364. } else {
  4365. cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
  4366. }
  4367. ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
  4368. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4369. }
  4370. static void ggml_sub_or_set(
  4371. struct ggml_context * ctx,
  4372. struct ggml_cgraph * cgraph,
  4373. size_t isrc,
  4374. struct ggml_tensor * tensor) {
  4375. struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
  4376. GGML_ASSERT(src);
  4377. if (cgraph->grads[isrc]) {
  4378. cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
  4379. } else {
  4380. cgraph->grads[isrc] = ggml_neg(ctx, tensor);
  4381. }
  4382. ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
  4383. ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
  4384. }
  4385. static void ggml_compute_backward(
  4386. struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
  4387. struct ggml_tensor * tensor = cgraph->nodes[i];
  4388. struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, tensor);
  4389. if (!grad) {
  4390. return;
  4391. }
  4392. struct ggml_tensor * src0 = tensor->src[0];
  4393. struct ggml_tensor * src1 = tensor->src[1];
  4394. struct ggml_tensor * src2 = tensor->src[2];
  4395. struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
  4396. const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
  4397. const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
  4398. const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
  4399. const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
  4400. const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
  4401. const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
  4402. switch (tensor->op) {
  4403. case GGML_OP_DUP: {
  4404. if (src0_needs_grads) {
  4405. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4406. }
  4407. } break;
  4408. case GGML_OP_ADD: {
  4409. if (src0_needs_grads) {
  4410. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4411. }
  4412. if (src1_needs_grads) {
  4413. struct ggml_tensor * tmp = grad;
  4414. if (!ggml_are_same_shape(src0, src1)) {
  4415. tmp = ggml_repeat_back(ctx, tmp, src1);
  4416. }
  4417. ggml_add_or_set(ctx, cgraph, isrc1, tmp);
  4418. }
  4419. } break;
  4420. case GGML_OP_ADD1: {
  4421. if (src0_needs_grads) {
  4422. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4423. }
  4424. if (src1_needs_grads) {
  4425. ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
  4426. }
  4427. } break;
  4428. case GGML_OP_ACC: {
  4429. if (src0_needs_grads) {
  4430. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4431. }
  4432. if (src1_needs_grads) {
  4433. const size_t nb1 = ((int32_t *) tensor->op_params)[0];
  4434. const size_t nb2 = ((int32_t *) tensor->op_params)[1];
  4435. const size_t nb3 = ((int32_t *) tensor->op_params)[2];
  4436. const size_t offset = ((int32_t *) tensor->op_params)[3];
  4437. struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
  4438. grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
  4439. nb1, nb2, nb3, offset);
  4440. ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
  4441. }
  4442. } break;
  4443. case GGML_OP_SUB: {
  4444. if (src0_needs_grads) {
  4445. ggml_add_or_set(ctx, cgraph, isrc0, grad);
  4446. }
  4447. if (src1_needs_grads) {
  4448. ggml_sub_or_set(ctx, cgraph, isrc1, grad);
  4449. }
  4450. } break;
  4451. case GGML_OP_MUL: {
  4452. if (src0_needs_grads) {
  4453. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
  4454. }
  4455. if (src1_needs_grads) {
  4456. struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
  4457. if (!ggml_are_same_shape(src0, src1)) {
  4458. tmp = ggml_repeat_back(ctx, tmp, src1);
  4459. }
  4460. ggml_add_or_set(ctx, cgraph, isrc1, tmp);
  4461. }
  4462. } break;
  4463. case GGML_OP_DIV: {
  4464. if (src0_needs_grads) {
  4465. ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
  4466. }
  4467. if (src1_needs_grads) {
  4468. ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
  4469. }
  4470. } break;
  4471. case GGML_OP_SQR: {
  4472. if (src0_needs_grads) {
  4473. ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
  4474. }
  4475. } break;
  4476. case GGML_OP_SQRT: {
  4477. if (src0_needs_grads) {
  4478. ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
  4479. }
  4480. } break;
  4481. case GGML_OP_LOG: {
  4482. if (src0_needs_grads) {
  4483. ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
  4484. }
  4485. } break;
  4486. case GGML_OP_SIN: {
  4487. if (src0_needs_grads) {
  4488. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
  4489. }
  4490. } break;
  4491. case GGML_OP_COS: {
  4492. if (src0_needs_grads) {
  4493. ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
  4494. }
  4495. } break;
  4496. case GGML_OP_SUM: {
  4497. if (src0_needs_grads) {
  4498. ggml_add1_or_set(ctx, cgraph, isrc0, grad);
  4499. }
  4500. } break;
  4501. case GGML_OP_SUM_ROWS: {
  4502. if (src0_needs_grads) {
  4503. ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
  4504. }
  4505. } break;
  4506. case GGML_OP_MEAN: {
  4507. if (src0_needs_grads) {
  4508. ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false));
  4509. }
  4510. } break;
  4511. case GGML_OP_REPEAT: {
  4512. if (src0_needs_grads) {
  4513. ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
  4514. }
  4515. } break;
  4516. case GGML_OP_REPEAT_BACK: {
  4517. if (src0_needs_grads) {
  4518. ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
  4519. }
  4520. } break;
  4521. case GGML_OP_RMS_NORM: {
  4522. if (src0_needs_grads) {
  4523. float eps;
  4524. memcpy(&eps, tensor->op_params, sizeof(float));
  4525. ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
  4526. }
  4527. } break;
  4528. case GGML_OP_MUL_MAT: {
  4529. // https://cs231n.github.io/optimization-2/#staged
  4530. // # forward pass
  4531. // s0 = np.random.randn(5, 10)
  4532. // s1 = np.random.randn(10, 3)
  4533. // t = s0.dot(s1)
  4534. // # now suppose we had the gradient on t from above in the circuit
  4535. // dt = np.random.randn(*t.shape) # same shape as t
  4536. // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
  4537. // ds1 = t.T.dot(dt)
  4538. // tensor.shape [m,p,qq,rr]
  4539. // src0.shape [n,m,q1,r1]
  4540. // src1.shape [n,p,qq,rr]
  4541. if (src0_needs_grads) {
  4542. GGML_ASSERT(grad->ne[2] == src1->ne[2]);
  4543. GGML_ASSERT(grad->ne[3] == src1->ne[3]);
  4544. struct ggml_tensor * tmp =
  4545. ggml_out_prod(ctx, // [n,m,qq,rr]
  4546. src1, // [n,p,qq,rr]
  4547. grad); // [m,p,qq,rr]
  4548. if (!ggml_are_same_shape(tmp, src0)) {
  4549. GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
  4550. GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
  4551. GGML_ASSERT(tmp->ne[3] == 1);
  4552. const int64_t nr2 = tmp->ne[2] / src0->ne[2];
  4553. const size_t nb2 = tmp->nb[2] * nr2;
  4554. const size_t nb3 = tmp->nb[2];
  4555. tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
  4556. tmp = ggml_repeat_back(ctx, tmp, src0);
  4557. }
  4558. ggml_add_or_set(ctx, cgraph, isrc0, tmp);
  4559. }
  4560. if (src1_needs_grads) {
  4561. ggml_add_or_set(ctx, cgraph, isrc1,
  4562. // ggml_mul_mat(ctx, // [n,p,qq,rr]
  4563. // ggml_cont(ctx, // [m,n,q1,r1]
  4564. // ggml_transpose(ctx, src0)), // [m,n,q1,r1]
  4565. // grad), // [m,p,qq,rr]
  4566. // when src0 is bigger than tensor->grad (this is mostly the case in llama),
  4567. // avoid transpose of src0, rather transpose smaller tensor->grad
  4568. // and then use ggml_out_prod
  4569. ggml_out_prod(ctx, // [n,p,qq,rr]
  4570. src0, // [n,m,q1,r1]
  4571. ggml_transpose(ctx, // [p,m,qq,rr]
  4572. grad))); // [m,p,qq,rr]
  4573. }
  4574. } break;
  4575. case GGML_OP_SCALE: {
  4576. if (src0_needs_grads) {
  4577. float s;
  4578. memcpy(&s, tensor->op_params, sizeof(float));
  4579. ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, false));
  4580. }
  4581. } break;
  4582. case GGML_OP_SET: {
  4583. const size_t nb1 = ((const int32_t *) tensor->op_params)[0];
  4584. const size_t nb2 = ((const int32_t *) tensor->op_params)[1];
  4585. const size_t nb3 = ((const int32_t *) tensor->op_params)[2];
  4586. const size_t offset = ((const int32_t *) tensor->op_params)[3];
  4587. struct ggml_tensor * tensor_grad_view = NULL;
  4588. if (src0_needs_grads || src1_needs_grads) {
  4589. GGML_ASSERT(src0->type == tensor->type);
  4590. GGML_ASSERT(!cgraph->grads[isrc0] || cgraph->grads[isrc0]->type == grad->type);
  4591. GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
  4592. tensor_grad_view = ggml_view_4d(ctx,
  4593. grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
  4594. nb1, nb2, nb3, offset);
  4595. }
  4596. if (src0_needs_grads) {
  4597. struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
  4598. ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
  4599. }
  4600. if (src1_needs_grads) {
  4601. ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
  4602. }
  4603. } break;
  4604. case GGML_OP_CPY: {
  4605. // cpy overwrites value of src1 by src0 and returns view(src1)
  4606. // the overwriting is mathematically equivalent to:
  4607. // tensor = src0 * 1 + src1 * 0
  4608. if (src0_needs_grads) {
  4609. // dsrc0 = dtensor * 1
  4610. ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
  4611. }
  4612. if (src1_needs_grads) {
  4613. // dsrc1 = dtensor * 0 -> noop
  4614. }
  4615. } break;
  4616. case GGML_OP_CONT: {
  4617. // same as cpy
  4618. if (src0_needs_grads) {
  4619. GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
  4620. GGML_ASSERT(ggml_is_contiguous(grad));
  4621. GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
  4622. ggml_add_or_set(ctx, cgraph, isrc0,
  4623. ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
  4624. }
  4625. } break;
  4626. case GGML_OP_RESHAPE: {
  4627. if (src0_needs_grads) {
  4628. struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
  4629. ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
  4630. }
  4631. } break;
  4632. case GGML_OP_VIEW: {
  4633. if (src0_needs_grads) {
  4634. size_t offset;
  4635. memcpy(&offset, tensor->op_params, sizeof(offset));
  4636. size_t nb1 = tensor->nb[1];
  4637. size_t nb2 = tensor->nb[2];
  4638. size_t nb3 = tensor->nb[3];
  4639. if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
  4640. // gradient is typically F32, but src0 could be other type
  4641. size_t ng = ggml_element_size(cgraph->grads[isrc0]);
  4642. size_t n0 = ggml_element_size(src0);
  4643. GGML_ASSERT(offset % n0 == 0);
  4644. GGML_ASSERT(nb1 % n0 == 0);
  4645. GGML_ASSERT(nb2 % n0 == 0);
  4646. GGML_ASSERT(nb3 % n0 == 0);
  4647. offset = (offset / n0) * ng;
  4648. nb1 = (nb1 / n0) * ng;
  4649. nb2 = (nb2 / n0) * ng;
  4650. nb3 = (nb3 / n0) * ng;
  4651. }
  4652. ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
  4653. }
  4654. } break;
  4655. case GGML_OP_PERMUTE: {
  4656. if (src0_needs_grads) {
  4657. const int32_t * axes = (const int32_t *) tensor->op_params;
  4658. const int axis0 = axes[0] & 0x3;
  4659. const int axis1 = axes[1] & 0x3;
  4660. const int axis2 = axes[2] & 0x3;
  4661. const int axis3 = axes[3] & 0x3;
  4662. int axb[4] = {0,0,0,0}; // axes backward
  4663. axb[axis0] = 0;
  4664. axb[axis1] = 1;
  4665. axb[axis2] = 2;
  4666. axb[axis3] = 3;
  4667. ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
  4668. }
  4669. } break;
  4670. case GGML_OP_TRANSPOSE: {
  4671. if (src0_needs_grads) {
  4672. ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
  4673. }
  4674. } break;
  4675. case GGML_OP_GET_ROWS: {
  4676. if (src0_needs_grads) {
  4677. ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
  4678. }
  4679. if (src1_needs_grads) {
  4680. // noop
  4681. }
  4682. } break;
  4683. case GGML_OP_DIAG_MASK_INF: {
  4684. if (src0_needs_grads) {
  4685. /* ggml_diag_mask_inf_impl() shouldn't be here */
  4686. /* ref: https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */
  4687. const int n_past = ((const int32_t *) tensor->op_params)[0];
  4688. ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
  4689. }
  4690. } break;
  4691. case GGML_OP_DIAG_MASK_ZERO: {
  4692. if (src0_needs_grads) {
  4693. const int n_past = ((const int32_t *) tensor->op_params)[0];
  4694. ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
  4695. }
  4696. } break;
  4697. case GGML_OP_SOFT_MAX: {
  4698. if (src0_needs_grads) {
  4699. float scale = 1.0f;
  4700. float max_bias = 0.0f;
  4701. memcpy(&scale, (const float *) tensor->op_params + 0, sizeof(float));
  4702. memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
  4703. ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
  4704. }
  4705. GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
  4706. } break;
  4707. case GGML_OP_ROPE: {
  4708. if (src0_needs_grads) {
  4709. //const int n_past = ((int32_t *) tensor->op_params)[0];
  4710. const int n_dims = ((const int32_t *) tensor->op_params)[1];
  4711. const int mode = ((const int32_t *) tensor->op_params)[2];
  4712. //const int n_ctx = ((int32_t *) tensor->op_params)[3];
  4713. const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
  4714. float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
  4715. int sections[4] = {0, 0, 0, 0};
  4716. memcpy(&freq_base, (const float *) tensor->op_params + 5, sizeof(float));
  4717. memcpy(&freq_scale, (const float *) tensor->op_params + 6, sizeof(float));
  4718. memcpy(&ext_factor, (const float *) tensor->op_params + 7, sizeof(float));
  4719. memcpy(&attn_factor, (const float *) tensor->op_params + 8, sizeof(float));
  4720. memcpy(&beta_fast, (const float *) tensor->op_params + 9, sizeof(float));
  4721. memcpy(&beta_slow, (const float *) tensor->op_params + 10, sizeof(float));
  4722. memcpy(&sections, tensor->op_params + 11, sizeof(sections));
  4723. struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
  4724. ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
  4725. mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
  4726. ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
  4727. mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
  4728. ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
  4729. }
  4730. GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
  4731. } break;
  4732. case GGML_OP_IM2COL: {
  4733. if (src1_needs_grads) {
  4734. const int32_t s0 = ggml_get_op_params_i32(tensor, 0);
  4735. const int32_t s1 = ggml_get_op_params_i32(tensor, 1);
  4736. const int32_t p0 = ggml_get_op_params_i32(tensor, 2);
  4737. const int32_t p1 = ggml_get_op_params_i32(tensor, 3);
  4738. const int32_t d0 = ggml_get_op_params_i32(tensor, 4);
  4739. const int32_t d1 = ggml_get_op_params_i32(tensor, 5);
  4740. const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
  4741. ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
  4742. }
  4743. } break;
  4744. case GGML_OP_POOL_2D: {
  4745. if (src0_needs_grads) {
  4746. const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
  4747. const int32_t k0 = ggml_get_op_params_i32(tensor, 1);
  4748. const int32_t k1 = ggml_get_op_params_i32(tensor, 2);
  4749. const int32_t s0 = ggml_get_op_params_i32(tensor, 3);
  4750. const int32_t s1 = ggml_get_op_params_i32(tensor, 4);
  4751. const int32_t p0 = ggml_get_op_params_i32(tensor, 5);
  4752. const int32_t p1 = ggml_get_op_params_i32(tensor, 6);
  4753. ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
  4754. }
  4755. } break;
  4756. case GGML_OP_WIN_PART:
  4757. case GGML_OP_WIN_UNPART:
  4758. case GGML_OP_UNARY: {
  4759. switch (ggml_get_unary_op(tensor)) {
  4760. case GGML_UNARY_OP_ABS: {
  4761. if (src0_needs_grads) {
  4762. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
  4763. }
  4764. } break;
  4765. case GGML_UNARY_OP_SGN: {
  4766. // noop
  4767. } break;
  4768. case GGML_UNARY_OP_NEG: {
  4769. if (src0_needs_grads) {
  4770. ggml_sub_or_set(ctx, cgraph, isrc0, grad);
  4771. }
  4772. } break;
  4773. case GGML_UNARY_OP_STEP: {
  4774. // noop
  4775. } break;
  4776. case GGML_UNARY_OP_RELU: {
  4777. if (src0_needs_grads) {
  4778. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
  4779. }
  4780. } break;
  4781. case GGML_UNARY_OP_SILU: {
  4782. if (src0_needs_grads) {
  4783. ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
  4784. }
  4785. } break;
  4786. case GGML_UNARY_OP_EXP: {
  4787. if (src0_needs_grads) {
  4788. ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
  4789. }
  4790. } break;
  4791. default: {
  4792. fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
  4793. __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
  4794. GGML_ABORT("fatal error");
  4795. } //break;
  4796. }
  4797. } break;
  4798. case GGML_OP_CROSS_ENTROPY_LOSS: {
  4799. if (src0_needs_grads) {
  4800. ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
  4801. }
  4802. GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
  4803. } break;
  4804. case GGML_OP_NONE: {
  4805. // noop
  4806. } break;
  4807. case GGML_OP_COUNT:
  4808. default: {
  4809. fprintf(stderr, "%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
  4810. GGML_ABORT("fatal error");
  4811. } //break;
  4812. }
  4813. GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
  4814. GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
  4815. GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
  4816. }
  4817. static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
  4818. // check if already visited
  4819. if (ggml_hash_insert(&cgraph->visited_hash_set, node) == GGML_HASHSET_ALREADY_EXISTS) {
  4820. return;
  4821. }
  4822. for (int i = 0; i < GGML_MAX_SRC; ++i) {
  4823. const int k =
  4824. (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
  4825. (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
  4826. /* unknown order, just fall back to using i*/ i;
  4827. if (node->src[k]) {
  4828. ggml_visit_parents(cgraph, node->src[k]);
  4829. }
  4830. }
  4831. if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
  4832. // reached a leaf node, not part of the gradient graph (e.g. a constant)
  4833. GGML_ASSERT(cgraph->n_leafs < cgraph->size);
  4834. if (strlen(node->name) == 0) {
  4835. ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
  4836. }
  4837. cgraph->leafs[cgraph->n_leafs] = node;
  4838. cgraph->n_leafs++;
  4839. } else {
  4840. GGML_ASSERT(cgraph->n_nodes < cgraph->size);
  4841. if (strlen(node->name) == 0) {
  4842. ggml_format_name(node, "node_%d", cgraph->n_nodes);
  4843. }
  4844. cgraph->nodes[cgraph->n_nodes] = node;
  4845. cgraph->n_nodes++;
  4846. }
  4847. }
  4848. static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
  4849. if (!expand) {
  4850. // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
  4851. ggml_graph_clear(cgraph);
  4852. }
  4853. const int n0 = cgraph->n_nodes;
  4854. ggml_visit_parents(cgraph, tensor);
  4855. const int n_new = cgraph->n_nodes - n0;
  4856. GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
  4857. if (n_new > 0) {
  4858. // the last added node should always be starting point
  4859. GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
  4860. }
  4861. }
  4862. void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
  4863. ggml_build_forward_impl(cgraph, tensor, true);
  4864. }
  4865. void ggml_build_backward_expand(
  4866. struct ggml_context * ctx,
  4867. struct ggml_cgraph * cgraph,
  4868. struct ggml_tensor ** grad_accs) {
  4869. GGML_ASSERT(cgraph->n_nodes > 0);
  4870. GGML_ASSERT(cgraph->grads);
  4871. GGML_ASSERT(cgraph->grad_accs);
  4872. const int n_nodes_f = cgraph->n_nodes;
  4873. memset(cgraph->grads, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
  4874. memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
  4875. bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
  4876. {
  4877. bool any_params = false;
  4878. bool any_loss = false;
  4879. for (int i = 0; i < n_nodes_f; ++i) {
  4880. struct ggml_tensor * node = cgraph->nodes[i];
  4881. any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
  4882. any_loss = any_loss || (node->flags & GGML_TENSOR_FLAG_LOSS);
  4883. }
  4884. GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
  4885. GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
  4886. }
  4887. for (int i = 0; i < n_nodes_f; ++i) {
  4888. struct ggml_tensor * node = cgraph->nodes[i];
  4889. if (node->type == GGML_TYPE_I32) {
  4890. continue;
  4891. }
  4892. bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
  4893. bool ignore_src[GGML_MAX_SRC] = {false};
  4894. switch (node->op) {
  4895. // gradients in node->src[0] for one reason or another have no effect on output gradients
  4896. case GGML_OP_IM2COL: // only used for its shape
  4897. case GGML_OP_IM2COL_BACK: // same as IM2COL
  4898. ignore_src[0] = true;
  4899. break;
  4900. case GGML_OP_UNARY: {
  4901. const enum ggml_unary_op uop = ggml_get_unary_op(node);
  4902. // SGN and STEP unary ops are piecewise constant
  4903. if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
  4904. ignore_src[0] = true;
  4905. }
  4906. } break;
  4907. // gradients in node->src[1] for one reason or another have no effect on output gradients
  4908. case GGML_OP_CPY: // gradients in CPY target are irrelevant
  4909. case GGML_OP_GET_ROWS: // row indices not differentiable
  4910. case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
  4911. case GGML_OP_ROPE: // positions not differentiable
  4912. ignore_src[1] = true;
  4913. break;
  4914. default:
  4915. break;
  4916. }
  4917. for (int j = 0; j < GGML_MAX_SRC; ++j) {
  4918. if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
  4919. continue;
  4920. }
  4921. GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
  4922. node_needs_grad = true;
  4923. break;
  4924. }
  4925. if (!node_needs_grad) {
  4926. continue;
  4927. }
  4928. // inplace operations are currently not supported
  4929. GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
  4930. node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
  4931. const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
  4932. GGML_ASSERT(ihash != GGML_HASHSET_FULL);
  4933. GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
  4934. if (grad_accs && grad_accs[i]) {
  4935. cgraph->grad_accs[ihash] = grad_accs[i];
  4936. cgraph->grads[ihash] = cgraph->grad_accs[ihash];
  4937. } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
  4938. // loss tensors always need a gradient accumulator
  4939. cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
  4940. cgraph->grads[ihash] = cgraph->grad_accs[ihash];
  4941. }
  4942. grads_needed[ihash] = true;
  4943. }
  4944. for (int i = n_nodes_f - 1; i >= 0; --i) {
  4945. // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
  4946. // use allocator to automatically make inplace operations
  4947. ggml_compute_backward(ctx, cgraph, i, grads_needed);
  4948. }
  4949. free(grads_needed);
  4950. }
  4951. static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
  4952. void * ptr = *p;
  4953. ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
  4954. *p = (void *) ((char *) ptr + size);
  4955. return ptr;
  4956. }
  4957. static size_t ggml_graph_nbytes(size_t size, bool grads) {
  4958. size_t hash_size = ggml_hash_size(size * 2);
  4959. void * p = 0;
  4960. incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
  4961. incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
  4962. incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
  4963. incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
  4964. if (grads) {
  4965. incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
  4966. incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
  4967. }
  4968. incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
  4969. size_t nbytes = (size_t) p;
  4970. return nbytes;
  4971. }
  4972. size_t ggml_graph_overhead_custom(size_t size, bool grads) {
  4973. return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
  4974. }
  4975. size_t ggml_graph_overhead(void) {
  4976. return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
  4977. }
  4978. struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
  4979. const size_t obj_size = ggml_graph_nbytes(size, grads);
  4980. struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
  4981. struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
  4982. // the size of the hash table is doubled since it needs to hold both nodes and leafs
  4983. size_t hash_size = ggml_hash_size(size * 2);
  4984. void * p = cgraph + 1;
  4985. struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  4986. struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  4987. struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
  4988. struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
  4989. struct ggml_tensor ** grad_accs_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
  4990. ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
  4991. // check that we allocated the correct amount of memory
  4992. assert(obj_size == (size_t)((char *)p - (char *)cgraph));
  4993. *cgraph = (struct ggml_cgraph) {
  4994. /*.size =*/ size,
  4995. /*.n_nodes =*/ 0,
  4996. /*.n_leafs =*/ 0,
  4997. /*.nodes =*/ nodes_ptr,
  4998. /*.grads =*/ grads_ptr,
  4999. /*.grad_accs =*/ grad_accs_ptr,
  5000. /*.leafs =*/ leafs_ptr,
  5001. /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
  5002. /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
  5003. };
  5004. ggml_hash_set_reset(&cgraph->visited_hash_set);
  5005. if (grads) {
  5006. memset(cgraph->grads, 0, hash_size*sizeof(struct ggml_tensor *));
  5007. memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
  5008. }
  5009. return cgraph;
  5010. }
  5011. struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
  5012. return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
  5013. }
  5014. struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
  5015. struct ggml_cgraph cgraph = {
  5016. /*.size =*/ 0,
  5017. /*.n_nodes =*/ i1 - i0,
  5018. /*.n_leafs =*/ 0,
  5019. /*.nodes =*/ cgraph0->nodes + i0,
  5020. /*.grads =*/ NULL, // gradients would need visited_hash_set
  5021. /*.grad_accs =*/ NULL,
  5022. /*.leafs =*/ NULL,
  5023. /*.visited_hash_set =*/ { 0, NULL, NULL },
  5024. /*.order =*/ cgraph0->order,
  5025. };
  5026. return cgraph;
  5027. }
  5028. void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
  5029. GGML_ASSERT(dst->size >= src->n_leafs);
  5030. GGML_ASSERT(dst->size >= src->n_nodes);
  5031. GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
  5032. dst->n_leafs = src->n_leafs;
  5033. dst->n_nodes = src->n_nodes;
  5034. dst->order = src->order;
  5035. for (int i = 0; i < src->n_leafs; ++i) {
  5036. dst->leafs[i] = src->leafs[i];
  5037. }
  5038. for (int i = 0; i < src->n_nodes; ++i) {
  5039. dst->nodes[i] = src->nodes[i];
  5040. }
  5041. for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
  5042. // copy all hashset keys (tensors) that are in use
  5043. if (ggml_bitset_get(src->visited_hash_set.used, i)) {
  5044. ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
  5045. }
  5046. }
  5047. if (dst->grads) {
  5048. memset(dst->grads, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
  5049. memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
  5050. }
  5051. if (src->grads) {
  5052. GGML_ASSERT(dst->grads != NULL);
  5053. GGML_ASSERT(dst->grad_accs != NULL);
  5054. for (int i = 0; i < src->n_nodes; ++i) {
  5055. const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
  5056. const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
  5057. GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
  5058. GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
  5059. GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
  5060. GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
  5061. dst->grads[igrad_dst] = src->grads[igrad_src];
  5062. dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
  5063. }
  5064. }
  5065. }
  5066. struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
  5067. struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
  5068. ggml_graph_cpy(cgraph, result);
  5069. return result;
  5070. }
  5071. struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
  5072. if (ggml_is_empty(tensor)) {
  5073. return tensor;
  5074. }
  5075. if (tensor->buffer) {
  5076. ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
  5077. } else {
  5078. GGML_ASSERT(tensor->data);
  5079. memset(tensor->data, 0, ggml_nbytes(tensor));
  5080. }
  5081. return tensor;
  5082. }
  5083. void ggml_graph_reset(struct ggml_cgraph * cgraph) {
  5084. if (!cgraph) {
  5085. return;
  5086. }
  5087. GGML_ASSERT(cgraph->grads != NULL);
  5088. for (int i = 0; i < cgraph->n_nodes; i++) {
  5089. struct ggml_tensor * node = cgraph->nodes[i];
  5090. struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
  5091. if (node->op == GGML_OP_OPT_STEP_ADAMW) {
  5092. // clear momenta
  5093. ggml_set_zero(node->src[2]);
  5094. ggml_set_zero(node->src[3]);
  5095. }
  5096. // initial gradients of loss should be 1, 0 otherwise
  5097. if (grad_acc) {
  5098. if (node->flags & GGML_TENSOR_FLAG_LOSS) {
  5099. GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
  5100. GGML_ASSERT(ggml_is_scalar(grad_acc));
  5101. const float onef = 1.0f;
  5102. if (grad_acc->buffer) {
  5103. ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
  5104. } else {
  5105. GGML_ASSERT(grad_acc->data);
  5106. *((float *) grad_acc->data) = onef;
  5107. }
  5108. } else {
  5109. ggml_set_zero(grad_acc);
  5110. }
  5111. }
  5112. }
  5113. }
  5114. void ggml_graph_clear(struct ggml_cgraph * cgraph) {
  5115. cgraph->n_leafs = 0;
  5116. cgraph->n_nodes = 0;
  5117. ggml_hash_set_reset(&cgraph->visited_hash_set);
  5118. }
  5119. int ggml_graph_size(struct ggml_cgraph * cgraph) {
  5120. return cgraph->size;
  5121. }
  5122. struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
  5123. if (i < 0) {
  5124. GGML_ASSERT(cgraph->n_nodes + i >= 0);
  5125. return cgraph->nodes[cgraph->n_nodes + i];
  5126. }
  5127. GGML_ASSERT(i < cgraph->n_nodes);
  5128. return cgraph->nodes[i];
  5129. }
  5130. struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
  5131. return cgraph->nodes;
  5132. }
  5133. int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
  5134. return cgraph->n_nodes;
  5135. }
  5136. void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
  5137. GGML_ASSERT(cgraph->size > cgraph->n_nodes);
  5138. cgraph->nodes[cgraph->n_nodes] = tensor;
  5139. cgraph->n_nodes++;
  5140. }
  5141. struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
  5142. for (int i = 0; i < cgraph->n_leafs; i++) {
  5143. struct ggml_tensor * leaf = cgraph->leafs[i];
  5144. if (strcmp(leaf->name, name) == 0) {
  5145. return leaf;
  5146. }
  5147. }
  5148. for (int i = 0; i < cgraph->n_nodes; i++) {
  5149. struct ggml_tensor * node = cgraph->nodes[i];
  5150. if (strcmp(node->name, name) == 0) {
  5151. return node;
  5152. }
  5153. }
  5154. return NULL;
  5155. }
  5156. struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5157. const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
  5158. return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
  5159. }
  5160. struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5161. const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
  5162. return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
  5163. }
  5164. void ggml_graph_print(const struct ggml_cgraph * cgraph) {
  5165. GGML_LOG_INFO("=== GRAPH ===\n");
  5166. GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
  5167. for (int i = 0; i < cgraph->n_nodes; i++) {
  5168. struct ggml_tensor * node = cgraph->nodes[i];
  5169. GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
  5170. i,
  5171. node->ne[0], node->ne[1], node->ne[2],
  5172. ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
  5173. ggml_graph_get_grad(cgraph, node) ? "g" : " ");
  5174. }
  5175. GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
  5176. for (int i = 0; i < cgraph->n_leafs; i++) {
  5177. struct ggml_tensor * node = cgraph->leafs[i];
  5178. GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
  5179. i,
  5180. node->ne[0], node->ne[1],
  5181. ggml_op_name(node->op),
  5182. ggml_get_name(node));
  5183. }
  5184. GGML_LOG_INFO("========================================\n");
  5185. }
  5186. // check if node is part of the graph
  5187. static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5188. if (cgraph == NULL) {
  5189. return true;
  5190. }
  5191. for (int i = 0; i < cgraph->n_nodes; i++) {
  5192. if (cgraph->nodes[i] == node) {
  5193. return true;
  5194. }
  5195. }
  5196. return false;
  5197. }
  5198. static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
  5199. for (int i = 0; i < cgraph->n_nodes; i++) {
  5200. struct ggml_tensor * parent = cgraph->nodes[i];
  5201. struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
  5202. if (grad == node) {
  5203. return parent;
  5204. }
  5205. }
  5206. return NULL;
  5207. }
  5208. static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
  5209. struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
  5210. struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
  5211. fprintf(fp, " \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
  5212. gparent0 ? (void *) gparent0 : (void *) parent,
  5213. gparent0 ? "g" : "x",
  5214. gparent ? (void *) gparent : (void *) node,
  5215. gparent ? "g" : "x",
  5216. gparent ? "empty" : "vee",
  5217. gparent ? "dashed" : "solid",
  5218. label);
  5219. }
  5220. static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) {
  5221. fprintf(fp, " \"%p\":%s -> \"%p\":%s [ label = \"%s\"; ]\n",
  5222. (void *) parent, "x",
  5223. (void *) node, "x",
  5224. label);
  5225. }
  5226. void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
  5227. char color[16];
  5228. FILE * fp = ggml_fopen(filename, "w");
  5229. GGML_ASSERT(fp);
  5230. fprintf(fp, "digraph G {\n");
  5231. fprintf(fp, " newrank = true;\n");
  5232. fprintf(fp, " rankdir = TB;\n");
  5233. for (int i = 0; i < gb->n_nodes; i++) {
  5234. struct ggml_tensor * node = gb->nodes[i];
  5235. struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
  5236. if (ggml_graph_get_parent(gb, node) != NULL) {
  5237. continue;
  5238. }
  5239. if (node->flags & GGML_TENSOR_FLAG_PARAM) {
  5240. snprintf(color, sizeof(color), "yellow");
  5241. } else if (grad) {
  5242. if (ggml_graph_find(gf, node)) {
  5243. snprintf(color, sizeof(color), "green");
  5244. } else {
  5245. snprintf(color, sizeof(color), "lightblue");
  5246. }
  5247. } else {
  5248. snprintf(color, sizeof(color), "white");
  5249. }
  5250. fprintf(fp, " \"%p\" [ "
  5251. "style = filled; fillcolor = %s; shape = record; "
  5252. "label=\"",
  5253. (void *) node, color);
  5254. if (strlen(node->name) > 0) {
  5255. fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
  5256. } else {
  5257. fprintf(fp, "(%s)|", ggml_type_name(node->type));
  5258. }
  5259. if (ggml_is_matrix(node)) {
  5260. fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
  5261. } else {
  5262. fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
  5263. }
  5264. if (grad) {
  5265. fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
  5266. } else {
  5267. fprintf(fp, "\"; ]\n");
  5268. }
  5269. }
  5270. for (int i = 0; i < gb->n_leafs; i++) {
  5271. struct ggml_tensor * node = gb->leafs[i];
  5272. snprintf(color, sizeof(color), "pink");
  5273. fprintf(fp, " \"%p\" [ "
  5274. "style = filled; fillcolor = %s; shape = record; "
  5275. "label=\"<x>",
  5276. (void *) node, color);
  5277. if (strlen(node->name) > 0) {
  5278. fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
  5279. } else {
  5280. fprintf(fp, "(%s)|", ggml_type_name(node->type));
  5281. }
  5282. fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
  5283. if (ggml_nelements(node) < 5 && node->data != NULL) {
  5284. fprintf(fp, " | (");
  5285. for (int j = 0; j < ggml_nelements(node); j++) {
  5286. // FIXME: use ggml-backend to obtain the tensor data
  5287. //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
  5288. // fprintf(fp, "%d", ggml_get_i32_1d(node, j));
  5289. //}
  5290. //else if (node->type == GGML_TYPE_F32 ||
  5291. // node->type == GGML_TYPE_F16 ||
  5292. // node->type == GGML_TYPE_BF16) {
  5293. // fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
  5294. //}
  5295. //else
  5296. {
  5297. fprintf(fp, "#");
  5298. }
  5299. if (j < ggml_nelements(node) - 1) {
  5300. fprintf(fp, ", ");
  5301. }
  5302. }
  5303. fprintf(fp, ")");
  5304. }
  5305. fprintf(fp, "\"; ]\n");
  5306. }
  5307. for (int i = 0; i < gb->n_nodes; i++) {
  5308. struct ggml_tensor * node = gb->nodes[i];
  5309. for (int j = 0; j < GGML_MAX_SRC; j++) {
  5310. if (node->src[j]) {
  5311. char label[16];
  5312. snprintf(label, sizeof(label), "src %d", j);
  5313. ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
  5314. }
  5315. }
  5316. }
  5317. for (int i = 0; i < gb->n_leafs; i++) {
  5318. struct ggml_tensor * node = gb->leafs[i];
  5319. for (int j = 0; j < GGML_MAX_SRC; j++) {
  5320. if (node->src[j]) {
  5321. char label[16];
  5322. snprintf(label, sizeof(label), "src %d", j);
  5323. ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
  5324. }
  5325. }
  5326. }
  5327. fprintf(fp, "}\n");
  5328. fclose(fp);
  5329. GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
  5330. }
  5331. ////////////////////////////////////////////////////////////////////////////////
  5332. void ggml_set_input(struct ggml_tensor * tensor) {
  5333. tensor->flags |= GGML_TENSOR_FLAG_INPUT;
  5334. }
  5335. void ggml_set_output(struct ggml_tensor * tensor) {
  5336. tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
  5337. }
  5338. void ggml_set_param(struct ggml_tensor * tensor) {
  5339. GGML_ASSERT(tensor->op == GGML_OP_NONE);
  5340. tensor->flags |= GGML_TENSOR_FLAG_PARAM;
  5341. }
  5342. void ggml_set_loss(struct ggml_tensor * tensor) {
  5343. GGML_ASSERT(ggml_is_scalar(tensor));
  5344. GGML_ASSERT(tensor->type == GGML_TYPE_F32);
  5345. tensor->flags |= GGML_TENSOR_FLAG_LOSS;
  5346. }
  5347. ////////////////////////////////////////////////////////////////////////////////
  5348. void ggml_quantize_init(enum ggml_type type) {
  5349. ggml_critical_section_start();
  5350. switch (type) {
  5351. case GGML_TYPE_IQ2_XXS:
  5352. case GGML_TYPE_IQ2_XS:
  5353. case GGML_TYPE_IQ2_S:
  5354. case GGML_TYPE_IQ1_S:
  5355. case GGML_TYPE_IQ1_M: iq2xs_init_impl(type); break;
  5356. case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
  5357. case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break;
  5358. default: // nothing
  5359. break;
  5360. }
  5361. ggml_critical_section_end();
  5362. }
  5363. void ggml_quantize_free(void) {
  5364. ggml_critical_section_start();
  5365. iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
  5366. iq2xs_free_impl(GGML_TYPE_IQ2_XS);
  5367. iq2xs_free_impl(GGML_TYPE_IQ1_S);
  5368. iq3xs_free_impl(256);
  5369. ggml_critical_section_end();
  5370. }
  5371. bool ggml_quantize_requires_imatrix(enum ggml_type type) {
  5372. return
  5373. type == GGML_TYPE_IQ2_XXS ||
  5374. type == GGML_TYPE_IQ2_XS ||
  5375. type == GGML_TYPE_IQ1_S;// ||
  5376. //type == GGML_TYPE_IQ1_M;
  5377. }
  5378. size_t ggml_quantize_chunk(
  5379. enum ggml_type type,
  5380. const float * src,
  5381. void * dst,
  5382. int64_t start,
  5383. int64_t nrows,
  5384. int64_t n_per_row,
  5385. const float * imatrix) {
  5386. const int64_t n = (int64_t) nrows * n_per_row;
  5387. if (ggml_quantize_requires_imatrix(type)) {
  5388. GGML_ASSERT(imatrix != NULL);
  5389. }
  5390. GGML_ASSERT(start % type_traits[type].blck_size == 0);
  5391. GGML_ASSERT(start % n_per_row == 0);
  5392. ggml_quantize_init(type); // this is noop if already initialized
  5393. const size_t start_row = start / n_per_row;
  5394. const size_t row_size = ggml_row_size(type, n_per_row);
  5395. size_t result = 0;
  5396. switch (type) {
  5397. case GGML_TYPE_Q4_0: result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5398. case GGML_TYPE_Q4_1: result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5399. case GGML_TYPE_Q5_0: result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5400. case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5401. case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5402. case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5403. case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5404. case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5405. case GGML_TYPE_Q5_K: result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5406. case GGML_TYPE_Q6_K: result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5407. case GGML_TYPE_TQ1_0: result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5408. case GGML_TYPE_TQ2_0: result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5409. case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5410. case GGML_TYPE_IQ2_XS: result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5411. case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5412. case GGML_TYPE_IQ3_S: result = quantize_iq3_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5413. case GGML_TYPE_IQ2_S: result = quantize_iq2_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5414. case GGML_TYPE_IQ1_S: result = quantize_iq1_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5415. case GGML_TYPE_IQ1_M: result = quantize_iq1_m (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5416. case GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5417. case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
  5418. case GGML_TYPE_F16:
  5419. {
  5420. size_t elemsize = sizeof(ggml_fp16_t);
  5421. ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
  5422. result = n * elemsize;
  5423. } break;
  5424. case GGML_TYPE_BF16:
  5425. {
  5426. size_t elemsize = sizeof(ggml_bf16_t);
  5427. ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
  5428. result = n * elemsize;
  5429. } break;
  5430. case GGML_TYPE_F32:
  5431. {
  5432. size_t elemsize = sizeof(float);
  5433. result = n * elemsize;
  5434. memcpy((uint8_t *)dst + start * elemsize, src + start, result);
  5435. } break;
  5436. default:
  5437. assert(false);
  5438. }
  5439. GGML_ASSERT(result == nrows * row_size);
  5440. return result;
  5441. }
  5442. ////////////////////////////////////////////////////////////////////////////////
  5443. void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
  5444. g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
  5445. g_logger_state.log_callback_user_data = user_data;
  5446. }
  5447. void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
  5448. p->n_threads = n_threads;
  5449. p->prio = 0; // default priority (usually means normal or inherited)
  5450. p->poll = 50; // hybrid-polling enabled
  5451. p->strict_cpu = false; // no strict placement (all threads share same cpumask)
  5452. p->paused = false; // threads are ready to go
  5453. memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
  5454. }
  5455. struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
  5456. struct ggml_threadpool_params p;
  5457. ggml_threadpool_params_init(&p, n_threads);
  5458. return p;
  5459. }
  5460. bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
  5461. if (p0->n_threads != p1->n_threads ) return false;
  5462. if (p0->prio != p1->prio ) return false;
  5463. if (p0->poll != p1->poll ) return false;
  5464. if (p0->strict_cpu != p1->strict_cpu ) return false;
  5465. return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
  5466. }