ggml-quants.c 210 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238
  1. #define GGML_COMMON_IMPL_C
  2. #include "ggml-common.h"
  3. #include "ggml-quants.h"
  4. #include "ggml-impl.h"
  5. #include "ggml-cpu/ggml-cpu-impl.h"
  6. #include "ggml-cpu.h"
  7. #include <math.h>
  8. #include <string.h>
  9. #include <assert.h>
  10. #include <float.h>
  11. #include <stdlib.h> // for qsort
  12. #include <stdio.h> // for GGML_ASSERT
  13. #define GROUP_MAX_EPS 1e-15f
  14. #define GROUP_MAX_EPS_IQ3_XXS 1e-8f
  15. #define GROUP_MAX_EPS_IQ2_S 1e-8f
  16. #define GROUP_MAX_EPS_IQ1_M 1e-7f
  17. #define GROUP_MAX_EPS_IQ1_S 1e-12f
  18. #if defined(_MSC_VER)
  19. // disable "possible loss of data" to avoid warnings for hundreds of casts
  20. // we should just be careful :)
  21. #pragma warning(disable: 4244 4267)
  22. #endif
  23. #define UNUSED GGML_UNUSED
  24. // reference implementation for deterministic creation of model files
  25. void quantize_row_q4_0_ref(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k) {
  26. static const int qk = QK4_0;
  27. assert(k % qk == 0);
  28. const int nb = k / qk;
  29. for (int i = 0; i < nb; i++) {
  30. float amax = 0.0f; // absolute max
  31. float max = 0.0f;
  32. for (int j = 0; j < qk; j++) {
  33. const float v = x[i*qk + j];
  34. if (amax < fabsf(v)) {
  35. amax = fabsf(v);
  36. max = v;
  37. }
  38. }
  39. const float d = max / -8;
  40. const float id = d ? 1.0f/d : 0.0f;
  41. y[i].d = GGML_FP32_TO_FP16(d);
  42. for (int j = 0; j < qk/2; ++j) {
  43. const float x0 = x[i*qk + 0 + j]*id;
  44. const float x1 = x[i*qk + qk/2 + j]*id;
  45. const uint8_t xi0 = MIN(15, (int8_t)(x0 + 8.5f));
  46. const uint8_t xi1 = MIN(15, (int8_t)(x1 + 8.5f));
  47. y[i].qs[j] = xi0;
  48. y[i].qs[j] |= xi1 << 4;
  49. }
  50. }
  51. }
  52. void quantize_row_q4_1_ref(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k) {
  53. const int qk = QK4_1;
  54. assert(k % qk == 0);
  55. const int nb = k / qk;
  56. for (int i = 0; i < nb; i++) {
  57. float min = FLT_MAX;
  58. float max = -FLT_MAX;
  59. for (int j = 0; j < qk; j++) {
  60. const float v = x[i*qk + j];
  61. if (v < min) min = v;
  62. if (v > max) max = v;
  63. }
  64. const float d = (max - min) / ((1 << 4) - 1);
  65. const float id = d ? 1.0f/d : 0.0f;
  66. y[i].d = GGML_FP32_TO_FP16(d);
  67. y[i].m = GGML_FP32_TO_FP16(min);
  68. for (int j = 0; j < qk/2; ++j) {
  69. const float x0 = (x[i*qk + 0 + j] - min)*id;
  70. const float x1 = (x[i*qk + qk/2 + j] - min)*id;
  71. const uint8_t xi0 = MIN(15, (int8_t)(x0 + 0.5f));
  72. const uint8_t xi1 = MIN(15, (int8_t)(x1 + 0.5f));
  73. y[i].qs[j] = xi0;
  74. y[i].qs[j] |= xi1 << 4;
  75. }
  76. }
  77. }
  78. void quantize_row_q5_0_ref(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k) {
  79. static const int qk = QK5_0;
  80. assert(k % qk == 0);
  81. const int nb = k / qk;
  82. for (int i = 0; i < nb; i++) {
  83. float amax = 0.0f; // absolute max
  84. float max = 0.0f;
  85. for (int j = 0; j < qk; j++) {
  86. const float v = x[i*qk + j];
  87. if (amax < fabsf(v)) {
  88. amax = fabsf(v);
  89. max = v;
  90. }
  91. }
  92. const float d = max / -16;
  93. const float id = d ? 1.0f/d : 0.0f;
  94. y[i].d = GGML_FP32_TO_FP16(d);
  95. uint32_t qh = 0;
  96. for (int j = 0; j < qk/2; ++j) {
  97. const float x0 = x[i*qk + 0 + j]*id;
  98. const float x1 = x[i*qk + qk/2 + j]*id;
  99. const uint8_t xi0 = MIN(31, (int8_t)(x0 + 16.5f));
  100. const uint8_t xi1 = MIN(31, (int8_t)(x1 + 16.5f));
  101. y[i].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4);
  102. // get the 5-th bit and store it in qh at the right position
  103. qh |= ((xi0 & 0x10u) >> 4) << (j + 0);
  104. qh |= ((xi1 & 0x10u) >> 4) << (j + qk/2);
  105. }
  106. memcpy(&y[i].qh, &qh, sizeof(qh));
  107. }
  108. }
  109. void quantize_row_q5_1_ref(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k) {
  110. const int qk = QK5_1;
  111. assert(k % qk == 0);
  112. const int nb = k / qk;
  113. for (int i = 0; i < nb; i++) {
  114. float min = FLT_MAX;
  115. float max = -FLT_MAX;
  116. for (int j = 0; j < qk; j++) {
  117. const float v = x[i*qk + j];
  118. if (v < min) min = v;
  119. if (v > max) max = v;
  120. }
  121. const float d = (max - min) / ((1 << 5) - 1);
  122. const float id = d ? 1.0f/d : 0.0f;
  123. y[i].d = GGML_FP32_TO_FP16(d);
  124. y[i].m = GGML_FP32_TO_FP16(min);
  125. uint32_t qh = 0;
  126. for (int j = 0; j < qk/2; ++j) {
  127. const float x0 = (x[i*qk + 0 + j] - min)*id;
  128. const float x1 = (x[i*qk + qk/2 + j] - min)*id;
  129. const uint8_t xi0 = (uint8_t)(x0 + 0.5f);
  130. const uint8_t xi1 = (uint8_t)(x1 + 0.5f);
  131. y[i].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4);
  132. // get the 5-th bit and store it in qh at the right position
  133. qh |= ((xi0 & 0x10u) >> 4) << (j + 0);
  134. qh |= ((xi1 & 0x10u) >> 4) << (j + qk/2);
  135. }
  136. memcpy(&y[i].qh, &qh, sizeof(y[i].qh));
  137. }
  138. }
  139. // reference implementation for deterministic creation of model files
  140. void quantize_row_q8_0_ref(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k) {
  141. assert(k % QK8_0 == 0);
  142. const int nb = k / QK8_0;
  143. for (int i = 0; i < nb; i++) {
  144. float amax = 0.0f; // absolute max
  145. for (int j = 0; j < QK8_0; j++) {
  146. const float v = x[i*QK8_0 + j];
  147. amax = MAX(amax, fabsf(v));
  148. }
  149. const float d = amax / ((1 << 7) - 1);
  150. const float id = d ? 1.0f/d : 0.0f;
  151. y[i].d = GGML_FP32_TO_FP16(d);
  152. for (int j = 0; j < QK8_0; ++j) {
  153. const float x0 = x[i*QK8_0 + j]*id;
  154. y[i].qs[j] = roundf(x0);
  155. }
  156. }
  157. }
  158. // reference implementation for deterministic creation of model files
  159. void quantize_row_q8_1_ref(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k) {
  160. assert(QK8_1 == 32);
  161. assert(k % QK8_1 == 0);
  162. const int nb = k / QK8_1;
  163. for (int i = 0; i < nb; i++) {
  164. float amax = 0.0f; // absolute max
  165. for (int j = 0; j < QK8_1; j++) {
  166. const float v = x[i*QK8_1 + j];
  167. amax = MAX(amax, fabsf(v));
  168. }
  169. const float d = amax / ((1 << 7) - 1);
  170. const float id = d ? 1.0f/d : 0.0f;
  171. y[i].d = GGML_FP32_TO_FP16(d);
  172. int sum = 0;
  173. for (int j = 0; j < QK8_1/2; ++j) {
  174. const float v0 = x[i*QK8_1 + j]*id;
  175. const float v1 = x[i*QK8_1 + QK8_1/2 + j]*id;
  176. y[i].qs[ j] = roundf(v0);
  177. y[i].qs[QK8_1/2 + j] = roundf(v1);
  178. sum += y[i].qs[ j];
  179. sum += y[i].qs[QK8_1/2 + j];
  180. }
  181. y[i].s = GGML_FP32_TO_FP16(sum*d);
  182. }
  183. }
  184. void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  185. static const int qk = QK4_0;
  186. assert(k % qk == 0);
  187. const int nb = k / qk;
  188. for (int i = 0; i < nb; i++) {
  189. const float d = GGML_FP16_TO_FP32(x[i].d);
  190. for (int j = 0; j < qk/2; ++j) {
  191. const int x0 = (x[i].qs[j] & 0x0F) - 8;
  192. const int x1 = (x[i].qs[j] >> 4) - 8;
  193. y[i*qk + j + 0 ] = x0*d;
  194. y[i*qk + j + qk/2] = x1*d;
  195. }
  196. }
  197. }
  198. void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  199. static const int qk = QK4_1;
  200. assert(k % qk == 0);
  201. const int nb = k / qk;
  202. for (int i = 0; i < nb; i++) {
  203. const float d = GGML_FP16_TO_FP32(x[i].d);
  204. const float m = GGML_FP16_TO_FP32(x[i].m);
  205. for (int j = 0; j < qk/2; ++j) {
  206. const int x0 = (x[i].qs[j] & 0x0F);
  207. const int x1 = (x[i].qs[j] >> 4);
  208. y[i*qk + j + 0 ] = x0*d + m;
  209. y[i*qk + j + qk/2] = x1*d + m;
  210. }
  211. }
  212. }
  213. void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  214. static const int qk = QK5_0;
  215. assert(k % qk == 0);
  216. const int nb = k / qk;
  217. for (int i = 0; i < nb; i++) {
  218. const float d = GGML_FP16_TO_FP32(x[i].d);
  219. uint32_t qh;
  220. memcpy(&qh, x[i].qh, sizeof(qh));
  221. for (int j = 0; j < qk/2; ++j) {
  222. const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
  223. const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;
  224. const int32_t x0 = ((x[i].qs[j] & 0x0F) | xh_0) - 16;
  225. const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16;
  226. y[i*qk + j + 0 ] = x0*d;
  227. y[i*qk + j + qk/2] = x1*d;
  228. }
  229. }
  230. }
  231. void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  232. static const int qk = QK5_1;
  233. assert(k % qk == 0);
  234. const int nb = k / qk;
  235. for (int i = 0; i < nb; i++) {
  236. const float d = GGML_FP16_TO_FP32(x[i].d);
  237. const float m = GGML_FP16_TO_FP32(x[i].m);
  238. uint32_t qh;
  239. memcpy(&qh, x[i].qh, sizeof(qh));
  240. for (int j = 0; j < qk/2; ++j) {
  241. const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
  242. const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;
  243. const int x0 = (x[i].qs[j] & 0x0F) | xh_0;
  244. const int x1 = (x[i].qs[j] >> 4) | xh_1;
  245. y[i*qk + j + 0 ] = x0*d + m;
  246. y[i*qk + j + qk/2] = x1*d + m;
  247. }
  248. }
  249. }
  250. void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  251. static const int qk = QK8_0;
  252. assert(k % qk == 0);
  253. const int nb = k / qk;
  254. for (int i = 0; i < nb; i++) {
  255. const float d = GGML_FP16_TO_FP32(x[i].d);
  256. for (int j = 0; j < qk; ++j) {
  257. y[i*qk + j] = x[i].qs[j]*d;
  258. }
  259. }
  260. }
  261. //
  262. // 2-6 bit quantization in super-blocks
  263. //
  264. //
  265. // ===================== Helper functions
  266. //
  267. static inline int nearest_int(float fval) {
  268. assert(fabsf(fval) <= 4194303.f);
  269. float val = fval + 12582912.f;
  270. int i; memcpy(&i, &val, sizeof(int));
  271. return (i & 0x007fffff) - 0x00400000;
  272. }
  273. static float make_qx_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, int rmse_type,
  274. const float * GGML_RESTRICT qw) {
  275. float max = 0;
  276. float amax = 0;
  277. for (int i = 0; i < n; ++i) {
  278. float ax = fabsf(x[i]);
  279. if (ax > amax) { amax = ax; max = x[i]; }
  280. }
  281. if (amax < GROUP_MAX_EPS) { // all zero
  282. for (int i = 0; i < n; ++i) {
  283. L[i] = 0;
  284. }
  285. return 0.f;
  286. }
  287. float iscale = -nmax / max;
  288. if (rmse_type == 0) {
  289. for (int i = 0; i < n; ++i) {
  290. int l = nearest_int(iscale * x[i]);
  291. L[i] = nmax + MAX(-nmax, MIN(nmax-1, l));
  292. }
  293. return 1/iscale;
  294. }
  295. bool return_early = false;
  296. if (rmse_type < 0) {
  297. rmse_type = -rmse_type;
  298. return_early = true;
  299. }
  300. float sumlx = 0;
  301. float suml2 = 0;
  302. #ifdef HAVE_BUGGY_APPLE_LINKER
  303. // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
  304. for (volatile int i = 0; i < n; ++i) {
  305. #else
  306. for (int i = 0; i < n; ++i) {
  307. #endif
  308. int l = nearest_int(iscale * x[i]);
  309. l = MAX(-nmax, MIN(nmax-1, l));
  310. L[i] = l + nmax;
  311. float w = qw ? qw[i] : rmse_type == 1 ? x[i] * x[i] : rmse_type == 2 ? 1 : rmse_type == 3 ? fabsf(x[i]) : sqrtf(fabsf(x[i]));
  312. sumlx += w*x[i]*l;
  313. suml2 += w*l*l;
  314. }
  315. float scale = suml2 ? sumlx/suml2 : 0.0f;
  316. if (return_early) return suml2 > 0 ? 0.5f*(scale + 1/iscale) : 1/iscale;
  317. float best = scale * sumlx;
  318. for (int is = -9; is <= 9; ++is) {
  319. if (is == 0) {
  320. continue;
  321. }
  322. iscale = -(nmax + 0.1f*is) / max;
  323. sumlx = suml2 = 0;
  324. for (int i = 0; i < n; ++i) {
  325. int l = nearest_int(iscale * x[i]);
  326. l = MAX(-nmax, MIN(nmax-1, l));
  327. float w = qw ? qw[i] : rmse_type == 1 ? x[i] * x[i] : rmse_type == 2 ? 1 : rmse_type == 3 ? fabsf(x[i]) : sqrtf(fabsf(x[i]));
  328. sumlx += w*x[i]*l;
  329. suml2 += w*l*l;
  330. }
  331. if (suml2 > 0 && sumlx*sumlx > best*suml2) {
  332. for (int i = 0; i < n; ++i) {
  333. int l = nearest_int(iscale * x[i]);
  334. L[i] = nmax + MAX(-nmax, MIN(nmax-1, l));
  335. }
  336. scale = sumlx/suml2; best = scale*sumlx;
  337. }
  338. }
  339. return scale;
  340. }
  341. static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, bool do_rmse) {
  342. float max = 0;
  343. float amax = 0;
  344. for (int i = 0; i < n; ++i) {
  345. float ax = fabsf(x[i]);
  346. if (ax > amax) { amax = ax; max = x[i]; }
  347. }
  348. if (amax < GROUP_MAX_EPS) { // all zero
  349. for (int i = 0; i < n; ++i) { L[i] = 0; }
  350. return 0.f;
  351. }
  352. float iscale = -nmax / max;
  353. if (do_rmse) {
  354. float sumlx = 0;
  355. float suml2 = 0;
  356. for (int i = 0; i < n; ++i) {
  357. int l = nearest_int(iscale * x[i]);
  358. l = MAX(-nmax, MIN(nmax-1, l));
  359. L[i] = l;
  360. float w = x[i]*x[i];
  361. sumlx += w*x[i]*l;
  362. suml2 += w*l*l;
  363. }
  364. for (int itry = 0; itry < 5; ++itry) {
  365. int n_changed = 0;
  366. for (int i = 0; i < n; ++i) {
  367. float w = x[i]*x[i];
  368. float slx = sumlx - w*x[i]*L[i];
  369. if (slx > 0) {
  370. float sl2 = suml2 - w*L[i]*L[i];
  371. int new_l = nearest_int(x[i] * sl2 / slx);
  372. new_l = MAX(-nmax, MIN(nmax-1, new_l));
  373. if (new_l != L[i]) {
  374. slx += w*x[i]*new_l;
  375. sl2 += w*new_l*new_l;
  376. if (sl2 > 0 && slx*slx*suml2 > sumlx*sumlx*sl2) {
  377. L[i] = new_l; sumlx = slx; suml2 = sl2;
  378. ++n_changed;
  379. }
  380. }
  381. }
  382. }
  383. if (!n_changed) {
  384. break;
  385. }
  386. }
  387. for (int i = 0; i < n; ++i) {
  388. L[i] += nmax;
  389. }
  390. return sumlx / suml2;
  391. }
  392. for (int i = 0; i < n; ++i) {
  393. int l = nearest_int(iscale * x[i]);
  394. l = MAX(-nmax, MIN(nmax-1, l));
  395. L[i] = l + nmax;
  396. }
  397. return 1/iscale;
  398. }
  399. static float make_qkx1_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min,
  400. int ntry, float alpha) {
  401. float min = x[0];
  402. float max = x[0];
  403. for (int i = 1; i < n; ++i) {
  404. if (x[i] < min) min = x[i];
  405. if (x[i] > max) max = x[i];
  406. }
  407. if (max == min) {
  408. for (int i = 0; i < n; ++i) L[i] = 0;
  409. *the_min = 0;
  410. return 0.f;
  411. }
  412. if (min > 0) min = 0;
  413. float iscale = nmax/(max - min);
  414. float scale = 1/iscale;
  415. for (int itry = 0; itry < ntry; ++itry) {
  416. float sumlx = 0; int suml2 = 0;
  417. bool did_change = false;
  418. for (int i = 0; i < n; ++i) {
  419. int l = nearest_int(iscale*(x[i] - min));
  420. l = MAX(0, MIN(nmax, l));
  421. if (l != L[i]) {
  422. L[i] = l;
  423. did_change = true;
  424. }
  425. sumlx += (x[i] - min)*l;
  426. suml2 += l*l;
  427. }
  428. scale = sumlx/suml2;
  429. float sum = 0;
  430. for (int i = 0; i < n; ++i) {
  431. sum += x[i] - scale*L[i];
  432. }
  433. min = alpha*min + (1 - alpha)*sum/n;
  434. if (min > 0) min = 0;
  435. iscale = 1/scale;
  436. if (!did_change) break;
  437. }
  438. *the_min = -min;
  439. return scale;
  440. }
  441. static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
  442. uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
  443. float rmin, float rdelta, int nstep, bool use_mad) {
  444. float min = x[0];
  445. float max = x[0];
  446. float sum_w = weights[0];
  447. float sum_x = sum_w * x[0];
  448. #ifdef HAVE_BUGGY_APPLE_LINKER
  449. // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
  450. for (volatile int i = 1; i < n; ++i) {
  451. #else
  452. for (int i = 1; i < n; ++i) {
  453. #endif
  454. if (x[i] < min) min = x[i];
  455. if (x[i] > max) max = x[i];
  456. float w = weights[i];
  457. sum_w += w;
  458. sum_x += w * x[i];
  459. }
  460. if (min > 0) min = 0;
  461. if (max == min) {
  462. for (int i = 0; i < n; ++i) L[i] = 0;
  463. *the_min = -min;
  464. return 0.f;
  465. }
  466. float iscale = nmax/(max - min);
  467. float scale = 1/iscale;
  468. float best_mad = 0;
  469. for (int i = 0; i < n; ++i) {
  470. int l = nearest_int(iscale*(x[i] - min));
  471. L[i] = MAX(0, MIN(nmax, l));
  472. float diff = scale * L[i] + min - x[i];
  473. diff = use_mad ? fabsf(diff) : diff * diff;
  474. float w = weights[i];
  475. best_mad += w * diff;
  476. }
  477. if (nstep < 1) {
  478. *the_min = -min;
  479. return scale;
  480. }
  481. for (int is = 0; is <= nstep; ++is) {
  482. iscale = (rmin + rdelta*is + nmax)/(max - min);
  483. float sum_l = 0, sum_l2 = 0, sum_xl = 0;
  484. for (int i = 0; i < n; ++i) {
  485. int l = nearest_int(iscale*(x[i] - min));
  486. l = MAX(0, MIN(nmax, l));
  487. Laux[i] = l;
  488. float w = weights[i];
  489. sum_l += w*l;
  490. sum_l2 += w*l*l;
  491. sum_xl += w*l*x[i];
  492. }
  493. float D = sum_w * sum_l2 - sum_l * sum_l;
  494. if (D > 0) {
  495. float this_scale = (sum_w * sum_xl - sum_x * sum_l)/D;
  496. float this_min = (sum_l2 * sum_x - sum_l * sum_xl)/D;
  497. if (this_min > 0) {
  498. this_min = 0;
  499. this_scale = sum_xl / sum_l2;
  500. }
  501. float mad = 0;
  502. for (int i = 0; i < n; ++i) {
  503. float diff = this_scale * Laux[i] + this_min - x[i];
  504. diff = use_mad ? fabsf(diff) : diff * diff;
  505. float w = weights[i];
  506. mad += w * diff;
  507. }
  508. if (mad < best_mad) {
  509. for (int i = 0; i < n; ++i) {
  510. L[i] = Laux[i];
  511. }
  512. best_mad = mad;
  513. scale = this_scale;
  514. min = this_min;
  515. }
  516. }
  517. }
  518. *the_min = -min;
  519. return scale;
  520. }
  521. static inline void get_scale_min_k4(int j, const uint8_t * GGML_RESTRICT q, uint8_t * GGML_RESTRICT d, uint8_t * GGML_RESTRICT m) {
  522. if (j < 4) {
  523. *d = q[j] & 63; *m = q[j + 4] & 63;
  524. } else {
  525. *d = (q[j+4] & 0xF) | ((q[j-4] >> 6) << 4);
  526. *m = (q[j+4] >> 4) | ((q[j-0] >> 6) << 4);
  527. }
  528. }
  529. //========================- 2-bit (de)-quantization
  530. void quantize_row_q2_K_ref(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k) {
  531. assert(k % QK_K == 0);
  532. const int nb = k / QK_K;
  533. uint8_t L[QK_K];
  534. uint8_t Laux[16];
  535. float weights[16];
  536. float mins[QK_K/16];
  537. float scales[QK_K/16];
  538. const float q4scale = 15.f;
  539. for (int i = 0; i < nb; i++) {
  540. float max_scale = 0; // as we are deducting the min, scales are always positive
  541. float max_min = 0;
  542. for (int j = 0; j < QK_K/16; ++j) {
  543. for (int l = 0; l < 16; ++l) weights[l] = fabsf(x[16*j + l]);
  544. scales[j] = make_qkx2_quants(16, 3, x + 16*j, weights, L + 16*j, &mins[j], Laux, -0.5f, 0.1f, 15, true);
  545. float scale = scales[j];
  546. if (scale > max_scale) {
  547. max_scale = scale;
  548. }
  549. float min = mins[j];
  550. if (min > max_min) {
  551. max_min = min;
  552. }
  553. }
  554. if (max_scale > 0) {
  555. float iscale = q4scale/max_scale;
  556. for (int j = 0; j < QK_K/16; ++j) {
  557. int l = nearest_int(iscale*scales[j]);
  558. y[i].scales[j] = l;
  559. }
  560. y[i].d = GGML_FP32_TO_FP16(max_scale/q4scale);
  561. } else {
  562. for (int j = 0; j < QK_K/16; ++j) y[i].scales[j] = 0;
  563. y[i].d = GGML_FP32_TO_FP16(0.f);
  564. }
  565. if (max_min > 0) {
  566. float iscale = q4scale/max_min;
  567. for (int j = 0; j < QK_K/16; ++j) {
  568. int l = nearest_int(iscale*mins[j]);
  569. y[i].scales[j] |= (l << 4);
  570. }
  571. y[i].dmin = GGML_FP32_TO_FP16(max_min/q4scale);
  572. } else {
  573. y[i].dmin = GGML_FP32_TO_FP16(0.f);
  574. }
  575. for (int j = 0; j < QK_K/16; ++j) {
  576. const float d = GGML_FP16_TO_FP32(y[i].d) * (y[i].scales[j] & 0xF);
  577. if (!d) continue;
  578. const float dm = GGML_FP16_TO_FP32(y[i].dmin) * (y[i].scales[j] >> 4);
  579. for (int ii = 0; ii < 16; ++ii) {
  580. int l = nearest_int((x[16*j + ii] + dm)/d);
  581. l = MAX(0, MIN(3, l));
  582. L[16*j + ii] = l;
  583. }
  584. }
  585. for (int j = 0; j < QK_K; j += 128) {
  586. for (int l = 0; l < 32; ++l) {
  587. y[i].qs[j/4 + l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6);
  588. }
  589. }
  590. x += QK_K;
  591. }
  592. }
  593. void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  594. assert(k % QK_K == 0);
  595. const int nb = k / QK_K;
  596. for (int i = 0; i < nb; i++) {
  597. const float d = GGML_FP16_TO_FP32(x[i].d);
  598. const float min = GGML_FP16_TO_FP32(x[i].dmin);
  599. const uint8_t * q = x[i].qs;
  600. int is = 0;
  601. float dl, ml;
  602. for (int n = 0; n < QK_K; n += 128) {
  603. int shift = 0;
  604. for (int j = 0; j < 4; ++j) {
  605. uint8_t sc = x[i].scales[is++];
  606. dl = d * (sc & 0xF); ml = min * (sc >> 4);
  607. for (int l = 0; l < 16; ++l) *y++ = dl * ((int8_t)((q[l] >> shift) & 3)) - ml;
  608. sc = x[i].scales[is++];
  609. dl = d * (sc & 0xF); ml = min * (sc >> 4);
  610. for (int l = 0; l < 16; ++l) *y++ = dl * ((int8_t)((q[l+16] >> shift) & 3)) - ml;
  611. shift += 2;
  612. }
  613. q += 32;
  614. }
  615. }
  616. }
  617. static float make_qkx3_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
  618. uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
  619. float rmin, float rdelta, int nstep, bool use_mad) {
  620. float min = x[0];
  621. float max = x[0];
  622. float sum_w = weights ? weights[0] : x[0]*x[0];
  623. float sum_x = sum_w * x[0];
  624. #ifdef HAVE_BUGGY_APPLE_LINKER
  625. // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
  626. for (volatile int i = 1; i < n; ++i) {
  627. #else
  628. for (int i = 1; i < n; ++i) {
  629. #endif
  630. if (x[i] < min) min = x[i];
  631. if (x[i] > max) max = x[i];
  632. float w = weights ? weights[i] : x[i]*x[i];
  633. sum_w += w;
  634. sum_x += w * x[i];
  635. }
  636. if (min > 0) {
  637. min = 0;
  638. }
  639. if (max <= min) {
  640. memset(L, 0, n);
  641. *the_min = -min;
  642. return 0.f;
  643. }
  644. float iscale = nmax/(max - min);
  645. float scale = 1/iscale;
  646. float best_mad = 0;
  647. for (int i = 0; i < n; ++i) {
  648. int l = nearest_int(iscale*(x[i] - min));
  649. L[i] = MAX(0, MIN(nmax, l));
  650. float diff = scale * L[i] + min - x[i];
  651. diff = use_mad ? fabsf(diff) : diff*diff;
  652. float w = weights ? weights[i] : x[i]*x[i];
  653. best_mad += w * diff;
  654. }
  655. if (nstep < 1) {
  656. *the_min = -min;
  657. return scale;
  658. }
  659. for (int is = 0; is <= nstep; ++is) {
  660. iscale = (rmin + rdelta*is + nmax)/(max - min);
  661. float sum_l = 0, sum_l2 = 0, sum_xl = 0;
  662. for (int i = 0; i < n; ++i) {
  663. int l = nearest_int(iscale*(x[i] - min));
  664. l = MAX(0, MIN(nmax, l));
  665. Laux[i] = l;
  666. float w = weights ? weights[i] : x[i]*x[i];
  667. sum_l += w*l;
  668. sum_l2 += w*l*l;
  669. sum_xl += w*l*x[i];
  670. }
  671. float D = sum_w * sum_l2 - sum_l * sum_l;
  672. if (D > 0) {
  673. float this_scale = (sum_w * sum_xl - sum_x * sum_l)/D;
  674. float this_min = (sum_l2 * sum_x - sum_l * sum_xl)/D;
  675. if (this_min > 0) {
  676. this_min = 0;
  677. this_scale = sum_xl / sum_l2;
  678. }
  679. float mad = 0;
  680. for (int i = 0; i < n; ++i) {
  681. float diff = this_scale * Laux[i] + this_min - x[i];
  682. diff = use_mad ? fabsf(diff) : diff*diff;
  683. float w = weights ? weights[i] : x[i]*x[i];
  684. mad += w * diff;
  685. }
  686. if (mad < best_mad) {
  687. for (int i = 0; i < n; ++i) {
  688. L[i] = Laux[i];
  689. }
  690. best_mad = mad;
  691. scale = this_scale;
  692. min = this_min;
  693. }
  694. }
  695. }
  696. *the_min = -min;
  697. return scale;
  698. }
  699. static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, const float * quant_weights) {
  700. float max = 0;
  701. for (int i = 0; i < n; ++i) {
  702. max = MAX(max, x[i]);
  703. }
  704. if (!max) { // all zero
  705. for (int i = 0; i < n; ++i) { L[i] = 0; }
  706. return 0.f;
  707. }
  708. float iscale = nmax / max;
  709. for (int i = 0; i < n; ++i) {
  710. L[i] = nearest_int(iscale * x[i]);
  711. }
  712. float scale = 1/iscale;
  713. float best_mse = 0;
  714. for (int i = 0; i < n; ++i) {
  715. float diff = x[i] - scale*L[i];
  716. float w = quant_weights[i];
  717. best_mse += w*diff*diff;
  718. }
  719. for (int is = -4; is <= 4; ++is) {
  720. if (is == 0) continue;
  721. float iscale_is = (0.1f*is + nmax)/max;
  722. float scale_is = 1/iscale_is;
  723. float mse = 0;
  724. for (int i = 0; i < n; ++i) {
  725. int l = nearest_int(iscale_is*x[i]);
  726. l = MIN(nmax, l);
  727. float diff = x[i] - scale_is*l;
  728. float w = quant_weights[i];
  729. mse += w*diff*diff;
  730. }
  731. if (mse < best_mse) {
  732. best_mse = mse;
  733. iscale = iscale_is;
  734. }
  735. }
  736. float sumlx = 0;
  737. float suml2 = 0;
  738. for (int i = 0; i < n; ++i) {
  739. int l = nearest_int(iscale * x[i]);
  740. l = MIN(nmax, l);
  741. L[i] = l;
  742. float w = quant_weights[i];
  743. sumlx += w*x[i]*l;
  744. suml2 += w*l*l;
  745. }
  746. for (int itry = 0; itry < 5; ++itry) {
  747. int n_changed = 0;
  748. for (int i = 0; i < n; ++i) {
  749. float w = quant_weights[i];
  750. float slx = sumlx - w*x[i]*L[i];
  751. float sl2 = suml2 - w*L[i]*L[i];
  752. if (slx > 0 && sl2 > 0) {
  753. int new_l = nearest_int(x[i] * sl2 / slx);
  754. new_l = MIN(nmax, new_l);
  755. if (new_l != L[i]) {
  756. slx += w*x[i]*new_l;
  757. sl2 += w*new_l*new_l;
  758. if (slx*slx*suml2 > sumlx*sumlx*sl2) {
  759. L[i] = new_l; sumlx = slx; suml2 = sl2;
  760. ++n_changed;
  761. }
  762. }
  763. }
  764. }
  765. if (!n_changed) {
  766. break;
  767. }
  768. }
  769. return sumlx/suml2;
  770. }
  771. static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
  772. GGML_ASSERT(quant_weights);
  773. assert(k % QK_K == 0);
  774. const int nb = k / QK_K;
  775. const bool requantize = true;
  776. uint8_t L[QK_K];
  777. uint8_t Laux[16];
  778. float mins[QK_K/16];
  779. float scales[QK_K/16];
  780. float sw[QK_K/16];
  781. float weight[16];
  782. uint8_t Ls[QK_K/16], Lm[QK_K/16];
  783. for (int i = 0; i < nb; i++) {
  784. memset(sw, 0, QK_K/16*sizeof(float));
  785. float sumx2 = 0;
  786. for (int j = 0; j < QK_K; ++j) sumx2 += x[j]*x[j];
  787. float sigma2 = sumx2/QK_K;
  788. for (int j = 0; j < QK_K/16; ++j) {
  789. const float * GGML_RESTRICT qw = quant_weights + QK_K * i + 16*j;
  790. for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j + l]*x[16*j + l]);
  791. for (int l = 0; l < QK_K/16; ++l) sw[j] += weight[l];
  792. scales[j] = make_qkx3_quants(16, 3, x + 16*j, weight, L + 16*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
  793. }
  794. float dm, mm;
  795. dm = make_qp_quants(QK_K/16, 15, scales, Ls, sw);
  796. mm = make_qp_quants(QK_K/16, 15, mins, Lm, sw);
  797. y[i].d = GGML_FP32_TO_FP16(dm);
  798. y[i].dmin = GGML_FP32_TO_FP16(mm);
  799. dm = GGML_FP16_TO_FP32(y[i].d);
  800. mm = GGML_FP16_TO_FP32(y[i].dmin);
  801. for (int j = 0; j < QK_K/16; ++j) {
  802. y[i].scales[j] = Ls[j] | (Lm[j] << 4);
  803. }
  804. if (requantize) {
  805. for (int j = 0; j < QK_K/16; ++j) {
  806. const float d = dm * (y[i].scales[j] & 0xF);
  807. if (!d) continue;
  808. const float m = mm * (y[i].scales[j] >> 4);
  809. for (int ii = 0; ii < 16; ++ii) {
  810. int l = nearest_int((x[16*j + ii] + m)/d);
  811. l = MAX(0, MIN(3, l));
  812. L[16*j + ii] = l;
  813. }
  814. }
  815. }
  816. for (int j = 0; j < QK_K; j += 128) {
  817. for (int l = 0; l < 32; ++l) {
  818. y[i].qs[j/4 + l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6);
  819. }
  820. }
  821. x += QK_K;
  822. }
  823. }
  824. size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  825. size_t row_size = ggml_row_size(GGML_TYPE_Q2_K, n_per_row);
  826. if (!quant_weights) {
  827. quantize_row_q2_K_ref(src, dst, (int64_t)nrow*n_per_row);
  828. }
  829. else {
  830. char * qrow = (char *)dst;
  831. for (int64_t row = 0; row < nrow; ++row) {
  832. quantize_row_q2_K_impl(src, (block_q2_K*)qrow, n_per_row, quant_weights);
  833. src += n_per_row;
  834. qrow += row_size;
  835. }
  836. }
  837. return nrow * row_size;
  838. }
  839. //========================= 3-bit (de)-quantization
  840. void quantize_row_q3_K_ref(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k) {
  841. assert(k % QK_K == 0);
  842. const int nb = k / QK_K;
  843. int8_t L[QK_K];
  844. float scales[QK_K / 16];
  845. for (int i = 0; i < nb; i++) {
  846. float max_scale = 0;
  847. float amax = 0;
  848. for (int j = 0; j < QK_K/16; ++j) {
  849. scales[j] = make_q3_quants(16, 4, x + 16*j, L + 16*j, true);
  850. float scale = fabsf(scales[j]);
  851. if (scale > amax) {
  852. amax = scale; max_scale = scales[j];
  853. }
  854. }
  855. memset(y[i].scales, 0, 12);
  856. if (max_scale) {
  857. float iscale = -32.f/max_scale;
  858. for (int j = 0; j < QK_K/16; ++j) {
  859. int8_t l = nearest_int(iscale*scales[j]);
  860. l = MAX(-32, MIN(31, l)) + 32;
  861. if (j < 8) {
  862. y[i].scales[j] = l & 0xF;
  863. } else {
  864. y[i].scales[j-8] |= ((l & 0xF) << 4);
  865. }
  866. l >>= 4;
  867. y[i].scales[j%4 + 8] |= (l << (2*(j/4)));
  868. }
  869. y[i].d = GGML_FP32_TO_FP16(1/iscale);
  870. } else {
  871. y[i].d = GGML_FP32_TO_FP16(0.f);
  872. }
  873. int8_t sc;
  874. for (int j = 0; j < QK_K/16; ++j) {
  875. sc = j < 8 ? y[i].scales[j] & 0xF : y[i].scales[j-8] >> 4;
  876. sc = (sc | (((y[i].scales[8 + j%4] >> (2*(j/4))) & 3) << 4)) - 32;
  877. float d = GGML_FP16_TO_FP32(y[i].d) * sc;
  878. if (!d) {
  879. continue;
  880. }
  881. for (int ii = 0; ii < 16; ++ii) {
  882. int l = nearest_int(x[16*j + ii]/d);
  883. l = MAX(-4, MIN(3, l));
  884. L[16*j + ii] = l + 4;
  885. }
  886. }
  887. memset(y[i].hmask, 0, QK_K/8);
  888. // We put the high-bit for the 1st 8 quants into bit 0, the next 8 into bit 1, etc.
  889. int m = 0;
  890. uint8_t hm = 1;
  891. for (int j = 0; j < QK_K; ++j) {
  892. if (L[j] > 3) {
  893. y[i].hmask[m] |= hm;
  894. L[j] -= 4;
  895. }
  896. if (++m == QK_K/8) {
  897. m = 0; hm <<= 1;
  898. }
  899. }
  900. for (int j = 0; j < QK_K; j += 128) {
  901. for (int l = 0; l < 32; ++l) {
  902. y[i].qs[j/4 + l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6);
  903. }
  904. }
  905. x += QK_K;
  906. }
  907. }
  908. void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  909. assert(k % QK_K == 0);
  910. const int nb = k / QK_K;
  911. const uint32_t kmask1 = 0x03030303;
  912. const uint32_t kmask2 = 0x0f0f0f0f;
  913. uint32_t aux[4];
  914. const int8_t * scales = (const int8_t*)aux;
  915. for (int i = 0; i < nb; i++) {
  916. const float d_all = GGML_FP16_TO_FP32(x[i].d);
  917. const uint8_t * GGML_RESTRICT q = x[i].qs;
  918. const uint8_t * GGML_RESTRICT hm = x[i].hmask;
  919. uint8_t m = 1;
  920. memcpy(aux, x[i].scales, 12);
  921. uint32_t tmp = aux[2];
  922. aux[2] = ((aux[0] >> 4) & kmask2) | (((tmp >> 4) & kmask1) << 4);
  923. aux[3] = ((aux[1] >> 4) & kmask2) | (((tmp >> 6) & kmask1) << 4);
  924. aux[0] = (aux[0] & kmask2) | (((tmp >> 0) & kmask1) << 4);
  925. aux[1] = (aux[1] & kmask2) | (((tmp >> 2) & kmask1) << 4);
  926. int is = 0;
  927. float dl;
  928. for (int n = 0; n < QK_K; n += 128) {
  929. int shift = 0;
  930. for (int j = 0; j < 4; ++j) {
  931. dl = d_all * (scales[is++] - 32);
  932. for (int l = 0; l < 16; ++l) {
  933. *y++ = dl * ((int8_t)((q[l+ 0] >> shift) & 3) - ((hm[l+ 0] & m) ? 0 : 4));
  934. }
  935. dl = d_all * (scales[is++] - 32);
  936. for (int l = 0; l < 16; ++l) {
  937. *y++ = dl * ((int8_t)((q[l+16] >> shift) & 3) - ((hm[l+16] & m) ? 0 : 4));
  938. }
  939. shift += 2;
  940. m <<= 1;
  941. }
  942. q += 32;
  943. }
  944. }
  945. }
  946. static void quantize_row_q3_K_impl(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t n_per_row, const float * GGML_RESTRICT quant_weights) {
  947. assert(n_per_row % QK_K == 0);
  948. const int nb = n_per_row / QK_K;
  949. int8_t L[QK_K];
  950. float scales[QK_K / 16];
  951. float weight[16];
  952. float sw[QK_K / 16];
  953. int8_t Ls[QK_K / 16];
  954. for (int i = 0; i < nb; i++) {
  955. float sumx2 = 0;
  956. for (int j = 0; j < QK_K; ++j) sumx2 += x[j]*x[j];
  957. float sigma2 = 2*sumx2/QK_K;
  958. for (int j = 0; j < QK_K/16; ++j) {
  959. if (quant_weights) {
  960. const float * qw = quant_weights + QK_K * i + 16*j;
  961. for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j+l]*x[16*j+l]);
  962. } else {
  963. for (int l = 0; l < 16; ++l) weight[l] = x[16*j+l]*x[16*j+l];
  964. }
  965. float sumw = 0;
  966. for (int l = 0; l < 16; ++l) sumw += weight[l];
  967. sw[j] = sumw;
  968. scales[j] = make_qx_quants(16, 4, x + 16*j, L + 16*j, 1, weight);
  969. }
  970. memset(y[i].scales, 0, 12);
  971. float d_block = make_qx_quants(QK_K/16, 32, scales, Ls, 1, sw);
  972. for (int j = 0; j < QK_K/16; ++j) {
  973. int l = Ls[j];
  974. if (j < 8) {
  975. y[i].scales[j] = l & 0xF;
  976. } else {
  977. y[i].scales[j-8] |= ((l & 0xF) << 4);
  978. }
  979. l >>= 4;
  980. y[i].scales[j%4 + 8] |= (l << (2*(j/4)));
  981. }
  982. y[i].d = GGML_FP32_TO_FP16(d_block);
  983. int8_t sc;
  984. for (int j = 0; j < QK_K/16; ++j) {
  985. sc = j < 8 ? y[i].scales[j] & 0xF : y[i].scales[j-8] >> 4;
  986. sc = (sc | (((y[i].scales[8 + j%4] >> (2*(j/4))) & 3) << 4)) - 32;
  987. float d = GGML_FP16_TO_FP32(y[i].d) * sc;
  988. if (!d) {
  989. continue;
  990. }
  991. for (int ii = 0; ii < 16; ++ii) {
  992. int l = nearest_int(x[16*j + ii]/d);
  993. l = MAX(-4, MIN(3, l));
  994. L[16*j + ii] = l + 4;
  995. }
  996. }
  997. memset(y[i].hmask, 0, QK_K/8);
  998. // We put the high-bit for the 1st 8 quants into bit 0, the next 8 into bit 1, etc.
  999. int m = 0;
  1000. uint8_t hm = 1;
  1001. for (int j = 0; j < QK_K; ++j) {
  1002. if (L[j] > 3) {
  1003. y[i].hmask[m] |= hm;
  1004. L[j] -= 4;
  1005. }
  1006. if (++m == QK_K/8) {
  1007. m = 0; hm <<= 1;
  1008. }
  1009. }
  1010. for (int j = 0; j < QK_K; j += 128) {
  1011. for (int l = 0; l < 32; ++l) {
  1012. y[i].qs[j/4 + l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6);
  1013. }
  1014. }
  1015. x += QK_K;
  1016. }
  1017. }
  1018. size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1019. size_t row_size = ggml_row_size(GGML_TYPE_Q3_K, n_per_row);
  1020. if (!quant_weights) {
  1021. quantize_row_q3_K_ref(src, dst, (int64_t)nrow*n_per_row);
  1022. }
  1023. else {
  1024. char * qrow = (char *)dst;
  1025. for (int64_t row = 0; row < nrow; ++row) {
  1026. quantize_row_q3_K_impl(src, (block_q3_K*)qrow, n_per_row, quant_weights);
  1027. src += n_per_row;
  1028. qrow += row_size;
  1029. }
  1030. }
  1031. return nrow * row_size;
  1032. }
  1033. // ====================== 4-bit (de)-quantization
  1034. void quantize_row_q4_K_ref(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k) {
  1035. assert(k % QK_K == 0);
  1036. const int nb = k / QK_K;
  1037. uint8_t L[QK_K];
  1038. uint8_t Laux[32];
  1039. float weights[32];
  1040. float mins[QK_K/32];
  1041. float scales[QK_K/32];
  1042. for (int i = 0; i < nb; i++) {
  1043. float max_scale = 0; // as we are deducting the min, scales are always positive
  1044. float max_min = 0;
  1045. for (int j = 0; j < QK_K/32; ++j) {
  1046. //scales[j] = make_qkx1_quants(32, 15, x + 32*j, L + 32*j, &mins[j], 9, 0.5f);
  1047. float sum_x2 = 0;
  1048. for (int l = 0; l < 32; ++l) sum_x2 += x[32*j + l] * x[32*j + l];
  1049. float av_x = sqrtf(sum_x2/32);
  1050. for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]);
  1051. scales[j] = make_qkx2_quants(32, 15, x + 32*j, weights, L + 32*j, &mins[j], Laux, -1.f, 0.1f, 20, false);
  1052. float scale = scales[j];
  1053. if (scale > max_scale) {
  1054. max_scale = scale;
  1055. }
  1056. float min = mins[j];
  1057. if (min > max_min) {
  1058. max_min = min;
  1059. }
  1060. }
  1061. float inv_scale = max_scale > 0 ? 63.f/max_scale : 0.f;
  1062. float inv_min = max_min > 0 ? 63.f/max_min : 0.f;
  1063. for (int j = 0; j < QK_K/32; ++j) {
  1064. uint8_t ls = nearest_int(inv_scale*scales[j]);
  1065. uint8_t lm = nearest_int(inv_min*mins[j]);
  1066. ls = MIN(63, ls);
  1067. lm = MIN(63, lm);
  1068. if (j < 4) {
  1069. y[i].scales[j] = ls;
  1070. y[i].scales[j+4] = lm;
  1071. } else {
  1072. y[i].scales[j+4] = (ls & 0xF) | ((lm & 0xF) << 4);
  1073. y[i].scales[j-4] |= ((ls >> 4) << 6);
  1074. y[i].scales[j-0] |= ((lm >> 4) << 6);
  1075. }
  1076. }
  1077. y[i].d = GGML_FP32_TO_FP16(max_scale/63.f);
  1078. y[i].dmin = GGML_FP32_TO_FP16(max_min/63.f);
  1079. uint8_t sc, m;
  1080. for (int j = 0; j < QK_K/32; ++j) {
  1081. get_scale_min_k4(j, y[i].scales, &sc, &m);
  1082. const float d = GGML_FP16_TO_FP32(y[i].d) * sc;
  1083. if (!d) continue;
  1084. const float dm = GGML_FP16_TO_FP32(y[i].dmin) * m;
  1085. for (int ii = 0; ii < 32; ++ii) {
  1086. int l = nearest_int((x[32*j + ii] + dm)/d);
  1087. l = MAX(0, MIN(15, l));
  1088. L[32*j + ii] = l;
  1089. }
  1090. }
  1091. uint8_t * q = y[i].qs;
  1092. for (int j = 0; j < QK_K; j += 64) {
  1093. for (int l = 0; l < 32; ++l) q[l] = L[j + l] | (L[j + l + 32] << 4);
  1094. q += 32;
  1095. }
  1096. x += QK_K;
  1097. }
  1098. }
  1099. void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  1100. assert(k % QK_K == 0);
  1101. const int nb = k / QK_K;
  1102. for (int i = 0; i < nb; i++) {
  1103. const uint8_t * q = x[i].qs;
  1104. const float d = GGML_FP16_TO_FP32(x[i].d);
  1105. const float min = GGML_FP16_TO_FP32(x[i].dmin);
  1106. int is = 0;
  1107. uint8_t sc, m;
  1108. for (int j = 0; j < QK_K; j += 64) {
  1109. get_scale_min_k4(is + 0, x[i].scales, &sc, &m);
  1110. const float d1 = d * sc; const float m1 = min * m;
  1111. get_scale_min_k4(is + 1, x[i].scales, &sc, &m);
  1112. const float d2 = d * sc; const float m2 = min * m;
  1113. for (int l = 0; l < 32; ++l) *y++ = d1 * (q[l] & 0xF) - m1;
  1114. for (int l = 0; l < 32; ++l) *y++ = d2 * (q[l] >> 4) - m2;
  1115. q += 32; is += 2;
  1116. }
  1117. }
  1118. }
  1119. static void quantize_row_q4_K_impl(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
  1120. assert(n_per_row % QK_K == 0);
  1121. const int64_t nb = n_per_row / QK_K;
  1122. uint8_t L[QK_K];
  1123. uint8_t Laux[32];
  1124. uint8_t Ls[QK_K/32];
  1125. uint8_t Lm[QK_K/32];
  1126. float weights[32];
  1127. float sw[QK_K/32];
  1128. float mins[QK_K/32];
  1129. float scales[QK_K/32];
  1130. for (int i = 0; i < nb; i++) {
  1131. float sum_x2 = 0;
  1132. for (int l = 0; l < QK_K; ++l) sum_x2 += x[l] * x[l];
  1133. float sigma2 = 2*sum_x2/QK_K;
  1134. float av_x = sqrtf(sigma2);
  1135. for (int j = 0; j < QK_K/32; ++j) {
  1136. if (quant_weights) {
  1137. const float * qw = quant_weights + QK_K*i + 32*j;
  1138. for (int l = 0; l < 32; ++l) weights[l] = qw[l] * sqrtf(sigma2 + x[32*j + l]*x[32*j + l]);
  1139. } else {
  1140. for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]);
  1141. }
  1142. float sumw = 0;
  1143. for (int l = 0; l < 32; ++l) sumw += weights[l];
  1144. sw[j] = sumw;
  1145. scales[j] = make_qkx3_quants(32, 15, x + 32*j, weights, L + 32*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
  1146. }
  1147. float d_block = make_qp_quants(QK_K/32, 63, scales, Ls, sw);
  1148. float m_block = make_qp_quants(QK_K/32, 63, mins, Lm, sw);
  1149. for (int j = 0; j < QK_K/32; ++j) {
  1150. uint8_t ls = Ls[j];
  1151. uint8_t lm = Lm[j];
  1152. if (j < 4) {
  1153. y[i].scales[j] = ls;
  1154. y[i].scales[j+4] = lm;
  1155. } else {
  1156. y[i].scales[j+4] = (ls & 0xF) | ((lm & 0xF) << 4);
  1157. y[i].scales[j-4] |= ((ls >> 4) << 6);
  1158. y[i].scales[j-0] |= ((lm >> 4) << 6);
  1159. }
  1160. }
  1161. y[i].d = GGML_FP32_TO_FP16(d_block);
  1162. y[i].dmin = GGML_FP32_TO_FP16(m_block);
  1163. uint8_t sc, m;
  1164. for (int j = 0; j < QK_K/32; ++j) {
  1165. get_scale_min_k4(j, y[i].scales, &sc, &m);
  1166. const float d = GGML_FP16_TO_FP32(y[i].d) * sc;
  1167. if (!d) continue;
  1168. const float dm = GGML_FP16_TO_FP32(y[i].dmin) * m;
  1169. for (int ii = 0; ii < 32; ++ii) {
  1170. int l = nearest_int((x[32*j + ii] + dm)/d);
  1171. l = MAX(0, MIN(15, l));
  1172. L[32*j + ii] = l;
  1173. }
  1174. }
  1175. uint8_t * q = y[i].qs;
  1176. for (int j = 0; j < QK_K; j += 64) {
  1177. for (int l = 0; l < 32; ++l) q[l] = L[j + l] | (L[j + l + 32] << 4);
  1178. q += 32;
  1179. }
  1180. x += QK_K;
  1181. }
  1182. }
  1183. size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1184. size_t row_size = ggml_row_size(GGML_TYPE_Q4_K, n_per_row);
  1185. if (!quant_weights) {
  1186. quantize_row_q4_K_ref(src, dst, (int64_t)nrow*n_per_row);
  1187. }
  1188. else {
  1189. char * qrow = (char *)dst;
  1190. for (int64_t row = 0; row < nrow; ++row) {
  1191. quantize_row_q4_K_impl(src, (block_q4_K*)qrow, n_per_row, quant_weights);
  1192. src += n_per_row;
  1193. qrow += row_size;
  1194. }
  1195. }
  1196. return nrow * row_size;
  1197. }
  1198. // ====================== 5-bit (de)-quantization
  1199. void quantize_row_q5_K_ref(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k) {
  1200. assert(k % QK_K == 0);
  1201. const int64_t nb = k / QK_K;
  1202. uint8_t L[QK_K];
  1203. float mins[QK_K/32];
  1204. float scales[QK_K/32];
  1205. float weights[32];
  1206. uint8_t Laux[32];
  1207. for (int i = 0; i < nb; i++) {
  1208. float max_scale = 0; // as we are deducting the min, scales are always positive
  1209. float max_min = 0;
  1210. for (int j = 0; j < QK_K/32; ++j) {
  1211. //scales[j] = make_qkx1_quants(32, 31, x + 32*j, L + 32*j, &mins[j], 9, 0.5f);
  1212. float sum_x2 = 0;
  1213. for (int l = 0; l < 32; ++l) sum_x2 += x[32*j + l] * x[32*j + l];
  1214. float av_x = sqrtf(sum_x2/32);
  1215. for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]);
  1216. scales[j] = make_qkx2_quants(32, 31, x + 32*j, weights, L + 32*j, &mins[j], Laux, -0.5f, 0.1f, 15, false);
  1217. float scale = scales[j];
  1218. if (scale > max_scale) {
  1219. max_scale = scale;
  1220. }
  1221. float min = mins[j];
  1222. if (min > max_min) {
  1223. max_min = min;
  1224. }
  1225. }
  1226. float inv_scale = max_scale > 0 ? 63.f/max_scale : 0.f;
  1227. float inv_min = max_min > 0 ? 63.f/max_min : 0.f;
  1228. for (int j = 0; j < QK_K/32; ++j) {
  1229. uint8_t ls = nearest_int(inv_scale*scales[j]);
  1230. uint8_t lm = nearest_int(inv_min*mins[j]);
  1231. ls = MIN(63, ls);
  1232. lm = MIN(63, lm);
  1233. if (j < 4) {
  1234. y[i].scales[j] = ls;
  1235. y[i].scales[j+4] = lm;
  1236. } else {
  1237. y[i].scales[j+4] = (ls & 0xF) | ((lm & 0xF) << 4);
  1238. y[i].scales[j-4] |= ((ls >> 4) << 6);
  1239. y[i].scales[j-0] |= ((lm >> 4) << 6);
  1240. }
  1241. }
  1242. y[i].d = GGML_FP32_TO_FP16(max_scale/63.f);
  1243. y[i].dmin = GGML_FP32_TO_FP16(max_min/63.f);
  1244. uint8_t sc, m;
  1245. for (int j = 0; j < QK_K/32; ++j) {
  1246. get_scale_min_k4(j, y[i].scales, &sc, &m);
  1247. const float d = GGML_FP16_TO_FP32(y[i].d) * sc;
  1248. if (!d) continue;
  1249. const float dm = GGML_FP16_TO_FP32(y[i].dmin) * m;
  1250. for (int ii = 0; ii < 32; ++ii) {
  1251. int l = nearest_int((x[32*j + ii] + dm)/d);
  1252. l = MAX(0, MIN(31, l));
  1253. L[32*j + ii] = l;
  1254. }
  1255. }
  1256. uint8_t * GGML_RESTRICT qh = y[i].qh;
  1257. uint8_t * GGML_RESTRICT ql = y[i].qs;
  1258. memset(qh, 0, QK_K/8);
  1259. uint8_t m1 = 1, m2 = 2;
  1260. for (int n = 0; n < QK_K; n += 64) {
  1261. for (int j = 0; j < 32; ++j) {
  1262. int l1 = L[n + j];
  1263. if (l1 > 15) {
  1264. l1 -= 16; qh[j] |= m1;
  1265. }
  1266. int l2 = L[n + j + 32];
  1267. if (l2 > 15) {
  1268. l2 -= 16; qh[j] |= m2;
  1269. }
  1270. ql[j] = l1 | (l2 << 4);
  1271. }
  1272. m1 <<= 2; m2 <<= 2;
  1273. ql += 32;
  1274. }
  1275. x += QK_K;
  1276. }
  1277. }
  1278. void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  1279. assert(k % QK_K == 0);
  1280. const int64_t nb = k / QK_K;
  1281. for (int i = 0; i < nb; i++) {
  1282. const uint8_t * ql = x[i].qs;
  1283. const uint8_t * qh = x[i].qh;
  1284. const float d = GGML_FP16_TO_FP32(x[i].d);
  1285. const float min = GGML_FP16_TO_FP32(x[i].dmin);
  1286. int is = 0;
  1287. uint8_t sc, m;
  1288. uint8_t u1 = 1, u2 = 2;
  1289. for (int j = 0; j < QK_K; j += 64) {
  1290. get_scale_min_k4(is + 0, x[i].scales, &sc, &m);
  1291. const float d1 = d * sc; const float m1 = min * m;
  1292. get_scale_min_k4(is + 1, x[i].scales, &sc, &m);
  1293. const float d2 = d * sc; const float m2 = min * m;
  1294. for (int l = 0; l < 32; ++l) *y++ = d1 * ((ql[l] & 0xF) + (qh[l] & u1 ? 16 : 0)) - m1;
  1295. for (int l = 0; l < 32; ++l) *y++ = d2 * ((ql[l] >> 4) + (qh[l] & u2 ? 16 : 0)) - m2;
  1296. ql += 32; is += 2;
  1297. u1 <<= 2; u2 <<= 2;
  1298. }
  1299. }
  1300. }
  1301. static void quantize_row_q5_K_impl(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
  1302. assert(n_per_row % QK_K == 0);
  1303. const int64_t nb = n_per_row / QK_K;
  1304. uint8_t L[QK_K];
  1305. uint8_t Laux[32];
  1306. uint8_t Ls[QK_K/32];
  1307. uint8_t Lm[QK_K/32];
  1308. float mins[QK_K/32];
  1309. float scales[QK_K/32];
  1310. float sw[QK_K/32];
  1311. float weights[32];
  1312. for (int i = 0; i < nb; i++) {
  1313. float sum_x2 = 0;
  1314. for (int l = 0; l < QK_K; ++l) sum_x2 += x[l] * x[l];
  1315. float sigma2 = 2*sum_x2/QK_K;
  1316. float av_x = sqrtf(sigma2);
  1317. for (int j = 0; j < QK_K/32; ++j) {
  1318. if (quant_weights) {
  1319. const float * qw = quant_weights + QK_K*i + 32*j;
  1320. for (int l = 0; l < 32; ++l) weights[l] = qw[l] * sqrtf(sigma2 + x[32*j + l]*x[32*j + l]);
  1321. } else {
  1322. for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]);
  1323. }
  1324. float sumw = 0;
  1325. for (int l = 0; l < 32; ++l) sumw += weights[l];
  1326. sw[j] = sumw;
  1327. scales[j] = make_qkx3_quants(32, 31, x + 32*j, weights, L + 32*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
  1328. }
  1329. float d_block = make_qp_quants(QK_K/32, 63, scales, Ls, sw);
  1330. float m_block = make_qp_quants(QK_K/32, 63, mins, Lm, sw);
  1331. for (int j = 0; j < QK_K/32; ++j) {
  1332. uint8_t ls = Ls[j];
  1333. uint8_t lm = Lm[j];
  1334. ls = MIN(63, ls);
  1335. lm = MIN(63, lm);
  1336. if (j < 4) {
  1337. y[i].scales[j] = ls;
  1338. y[i].scales[j+4] = lm;
  1339. } else {
  1340. y[i].scales[j+4] = (ls & 0xF) | ((lm & 0xF) << 4);
  1341. y[i].scales[j-4] |= ((ls >> 4) << 6);
  1342. y[i].scales[j-0] |= ((lm >> 4) << 6);
  1343. }
  1344. }
  1345. y[i].d = GGML_FP32_TO_FP16(d_block);
  1346. y[i].dmin = GGML_FP32_TO_FP16(m_block);
  1347. uint8_t sc, m;
  1348. for (int j = 0; j < QK_K/32; ++j) {
  1349. get_scale_min_k4(j, y[i].scales, &sc, &m);
  1350. const float d = GGML_FP16_TO_FP32(y[i].d) * sc;
  1351. if (!d) continue;
  1352. const float dm = GGML_FP16_TO_FP32(y[i].dmin) * m;
  1353. for (int ii = 0; ii < 32; ++ii) {
  1354. int l = nearest_int((x[32*j + ii] + dm)/d);
  1355. l = MAX(0, MIN(31, l));
  1356. L[32*j + ii] = l;
  1357. }
  1358. }
  1359. uint8_t * GGML_RESTRICT qh = y[i].qh;
  1360. uint8_t * GGML_RESTRICT ql = y[i].qs;
  1361. memset(qh, 0, QK_K/8);
  1362. uint8_t m1 = 1, m2 = 2;
  1363. for (int n = 0; n < QK_K; n += 64) {
  1364. for (int j = 0; j < 32; ++j) {
  1365. int l1 = L[n + j];
  1366. if (l1 > 15) {
  1367. l1 -= 16; qh[j] |= m1;
  1368. }
  1369. int l2 = L[n + j + 32];
  1370. if (l2 > 15) {
  1371. l2 -= 16; qh[j] |= m2;
  1372. }
  1373. ql[j] = l1 | (l2 << 4);
  1374. }
  1375. m1 <<= 2; m2 <<= 2;
  1376. ql += 32;
  1377. }
  1378. x += QK_K;
  1379. }
  1380. }
  1381. size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1382. size_t row_size = ggml_row_size(GGML_TYPE_Q5_K, n_per_row);
  1383. if (!quant_weights) {
  1384. quantize_row_q5_K_ref(src, dst, (int64_t)nrow*n_per_row);
  1385. }
  1386. else {
  1387. char * qrow = (char *)dst;
  1388. for (int64_t row = 0; row < nrow; ++row) {
  1389. quantize_row_q5_K_impl(src, (block_q5_K*)qrow, n_per_row, quant_weights);
  1390. src += n_per_row;
  1391. qrow += row_size;
  1392. }
  1393. }
  1394. return nrow * row_size;
  1395. }
  1396. // ====================== 6-bit (de)-quantization
  1397. void quantize_row_q6_K_ref(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k) {
  1398. assert(k % QK_K == 0);
  1399. const int64_t nb = k / QK_K;
  1400. int8_t L[QK_K];
  1401. float scales[QK_K/16];
  1402. for (int i = 0; i < nb; i++) {
  1403. float max_scale = 0;
  1404. float max_abs_scale = 0;
  1405. for (int ib = 0; ib < QK_K/16; ++ib) {
  1406. const float scale = make_qx_quants(16, 32, x + 16*ib, L + 16*ib, 1, NULL);
  1407. scales[ib] = scale;
  1408. const float abs_scale = fabsf(scale);
  1409. if (abs_scale > max_abs_scale) {
  1410. max_abs_scale = abs_scale;
  1411. max_scale = scale;
  1412. }
  1413. }
  1414. if (max_abs_scale < GROUP_MAX_EPS) {
  1415. memset(&y[i], 0, sizeof(block_q6_K));
  1416. y[i].d = GGML_FP32_TO_FP16(0.f);
  1417. x += QK_K;
  1418. continue;
  1419. }
  1420. float iscale = -128.f/max_scale;
  1421. y[i].d = GGML_FP32_TO_FP16(1/iscale);
  1422. for (int ib = 0; ib < QK_K/16; ++ib) {
  1423. y[i].scales[ib] = MIN(127, nearest_int(iscale*scales[ib]));
  1424. }
  1425. for (int j = 0; j < QK_K/16; ++j) {
  1426. float d = GGML_FP16_TO_FP32(y[i].d) * y[i].scales[j];
  1427. if (!d) {
  1428. continue;
  1429. }
  1430. for (int ii = 0; ii < 16; ++ii) {
  1431. int l = nearest_int(x[16*j + ii]/d);
  1432. l = MAX(-32, MIN(31, l));
  1433. L[16*j + ii] = l + 32;
  1434. }
  1435. }
  1436. uint8_t * GGML_RESTRICT ql = y[i].ql;
  1437. uint8_t * GGML_RESTRICT qh = y[i].qh;
  1438. for (int j = 0; j < QK_K; j += 128) {
  1439. for (int l = 0; l < 32; ++l) {
  1440. const uint8_t q1 = L[j + l + 0] & 0xF;
  1441. const uint8_t q2 = L[j + l + 32] & 0xF;
  1442. const uint8_t q3 = L[j + l + 64] & 0xF;
  1443. const uint8_t q4 = L[j + l + 96] & 0xF;
  1444. ql[l+ 0] = q1 | (q3 << 4);
  1445. ql[l+32] = q2 | (q4 << 4);
  1446. qh[l] = (L[j + l] >> 4) | ((L[j + l + 32] >> 4) << 2) | ((L[j + l + 64] >> 4) << 4) | ((L[j + l + 96] >> 4) << 6);
  1447. }
  1448. ql += 64;
  1449. qh += 32;
  1450. }
  1451. x += QK_K;
  1452. }
  1453. }
  1454. void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  1455. assert(k % QK_K == 0);
  1456. const int64_t nb = k / QK_K;
  1457. for (int i = 0; i < nb; i++) {
  1458. const float d = GGML_FP16_TO_FP32(x[i].d);
  1459. const uint8_t * GGML_RESTRICT ql = x[i].ql;
  1460. const uint8_t * GGML_RESTRICT qh = x[i].qh;
  1461. const int8_t * GGML_RESTRICT sc = x[i].scales;
  1462. for (int n = 0; n < QK_K; n += 128) {
  1463. for (int l = 0; l < 32; ++l) {
  1464. int is = l/16;
  1465. const int8_t q1 = (int8_t)((ql[l + 0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32;
  1466. const int8_t q2 = (int8_t)((ql[l + 32] & 0xF) | (((qh[l] >> 2) & 3) << 4)) - 32;
  1467. const int8_t q3 = (int8_t)((ql[l + 0] >> 4) | (((qh[l] >> 4) & 3) << 4)) - 32;
  1468. const int8_t q4 = (int8_t)((ql[l + 32] >> 4) | (((qh[l] >> 6) & 3) << 4)) - 32;
  1469. y[l + 0] = d * sc[is + 0] * q1;
  1470. y[l + 32] = d * sc[is + 2] * q2;
  1471. y[l + 64] = d * sc[is + 4] * q3;
  1472. y[l + 96] = d * sc[is + 6] * q4;
  1473. }
  1474. y += 128;
  1475. ql += 64;
  1476. qh += 32;
  1477. sc += 8;
  1478. }
  1479. }
  1480. }
  1481. static void quantize_row_q6_K_impl(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
  1482. assert(n_per_row % QK_K == 0);
  1483. const int64_t nb = n_per_row / QK_K;
  1484. int8_t L[QK_K];
  1485. float scales[QK_K/16];
  1486. //float weights[16];
  1487. for (int i = 0; i < nb; i++) {
  1488. //float sum_x2 = 0;
  1489. //for (int j = 0; j < QK_K; ++j) sum_x2 += x[j]*x[j];
  1490. //float sigma2 = sum_x2/QK_K;
  1491. float max_scale = 0;
  1492. float max_abs_scale = 0;
  1493. for (int ib = 0; ib < QK_K/16; ++ib) {
  1494. float scale;
  1495. if (quant_weights) {
  1496. const float * qw = quant_weights + QK_K*i + 16*ib;
  1497. //for (int j = 0; j < 16; ++j) weights[j] = qw[j] * sqrtf(sigma2 + x[16*ib + j]*x[16*ib + j]);
  1498. //scale = make_qx_quants(16, 32, x + 16*ib, L + 16*ib, 1, weights);
  1499. scale = make_qx_quants(16, 32, x + 16*ib, L + 16*ib, 1, qw);
  1500. } else {
  1501. scale = make_qx_quants(16, 32, x + 16*ib, L + 16*ib, 1, NULL);
  1502. }
  1503. scales[ib] = scale;
  1504. const float abs_scale = fabsf(scale);
  1505. if (abs_scale > max_abs_scale) {
  1506. max_abs_scale = abs_scale;
  1507. max_scale = scale;
  1508. }
  1509. }
  1510. if (max_abs_scale < GROUP_MAX_EPS) {
  1511. memset(&y[i], 0, sizeof(block_q6_K));
  1512. y[i].d = GGML_FP32_TO_FP16(0.f);
  1513. x += QK_K;
  1514. continue;
  1515. }
  1516. float iscale = -128.f/max_scale;
  1517. y[i].d = GGML_FP32_TO_FP16(1/iscale);
  1518. for (int ib = 0; ib < QK_K/16; ++ib) {
  1519. y[i].scales[ib] = MIN(127, nearest_int(iscale*scales[ib]));
  1520. }
  1521. for (int j = 0; j < QK_K/16; ++j) {
  1522. float d = GGML_FP16_TO_FP32(y[i].d) * y[i].scales[j];
  1523. if (!d) {
  1524. continue;
  1525. }
  1526. for (int ii = 0; ii < 16; ++ii) {
  1527. int l = nearest_int(x[16*j + ii]/d);
  1528. l = MAX(-32, MIN(31, l));
  1529. L[16*j + ii] = l + 32;
  1530. }
  1531. }
  1532. uint8_t * GGML_RESTRICT ql = y[i].ql;
  1533. uint8_t * GGML_RESTRICT qh = y[i].qh;
  1534. for (int j = 0; j < QK_K; j += 128) {
  1535. for (int l = 0; l < 32; ++l) {
  1536. const uint8_t q1 = L[j + l + 0] & 0xF;
  1537. const uint8_t q2 = L[j + l + 32] & 0xF;
  1538. const uint8_t q3 = L[j + l + 64] & 0xF;
  1539. const uint8_t q4 = L[j + l + 96] & 0xF;
  1540. ql[l+ 0] = q1 | (q3 << 4);
  1541. ql[l+32] = q2 | (q4 << 4);
  1542. qh[l] = (L[j + l] >> 4) | ((L[j + l + 32] >> 4) << 2) | ((L[j + l + 64] >> 4) << 4) | ((L[j + l + 96] >> 4) << 6);
  1543. }
  1544. ql += 64;
  1545. qh += 32;
  1546. }
  1547. x += QK_K;
  1548. }
  1549. }
  1550. size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1551. size_t row_size = ggml_row_size(GGML_TYPE_Q6_K, n_per_row);
  1552. if (!quant_weights) {
  1553. quantize_row_q6_K_ref(src, dst, (int64_t)nrow*n_per_row);
  1554. }
  1555. else {
  1556. char * qrow = (char *)dst;
  1557. for (int64_t row = 0; row < nrow; ++row) {
  1558. quantize_row_q6_K_impl(src, (block_q6_K*)qrow, n_per_row, quant_weights);
  1559. src += n_per_row;
  1560. qrow += row_size;
  1561. }
  1562. }
  1563. return nrow * row_size;
  1564. }
  1565. static void quantize_row_q4_0_impl(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
  1566. static_assert(QK4_0 == 32, "QK4_0 must be 32");
  1567. if (!quant_weights) {
  1568. quantize_row_q4_0_ref(x, y, n_per_row);
  1569. return;
  1570. }
  1571. float weight[QK4_0];
  1572. int8_t L[QK4_0];
  1573. float sum_x2 = 0;
  1574. for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
  1575. float sigma2 = sum_x2/n_per_row;
  1576. const int64_t nb = n_per_row/QK4_0;
  1577. for (int ib = 0; ib < nb; ++ib) {
  1578. const float * xb = x + QK4_0 * ib;
  1579. const float * qw = quant_weights + QK4_0 * ib;
  1580. for (int j = 0; j < QK4_0; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
  1581. float d = make_qx_quants(QK4_0, 8, xb, L, 1, weight);
  1582. y[ib].d = GGML_FP32_TO_FP16(d);
  1583. for (int j = 0; j < 16; ++j) {
  1584. y[ib].qs[j] = L[j] | (L[j+16] << 4);
  1585. }
  1586. }
  1587. }
  1588. size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1589. if (!quant_weights) {
  1590. quantize_row_q4_0_ref(src, dst, (int64_t)nrow*n_per_row);
  1591. return nrow * ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
  1592. }
  1593. size_t row_size = ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
  1594. char * qrow = (char *)dst;
  1595. for (int64_t row = 0; row < nrow; ++row) {
  1596. quantize_row_q4_0_impl(src, (block_q4_0*)qrow, n_per_row, quant_weights);
  1597. src += n_per_row;
  1598. qrow += row_size;
  1599. }
  1600. return nrow * row_size;
  1601. }
  1602. static void quantize_row_q4_1_impl(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
  1603. static_assert(QK4_1 == 32, "QK4_1 must be 32");
  1604. if (!quant_weights) {
  1605. quantize_row_q4_1_ref(x, y, n_per_row);
  1606. return;
  1607. }
  1608. float weight[QK4_1];
  1609. uint8_t L[QK4_1], Laux[QK4_1];
  1610. float sum_x2 = 0;
  1611. for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
  1612. float sigma2 = sum_x2/n_per_row;
  1613. const int64_t nb = n_per_row/QK4_1;
  1614. for (int ib = 0; ib < nb; ++ib) {
  1615. const float * xb = x + QK4_1 * ib;
  1616. const float * qw = quant_weights + QK4_1 * ib;
  1617. for (int j = 0; j < QK4_1; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
  1618. float min;
  1619. float d = make_qkx3_quants(QK4_1, 15, xb, weight, L, &min, Laux, -0.9f, 0.05f, 36, false);
  1620. y[ib].d = GGML_FP32_TO_FP16(d);
  1621. y[ib].m = GGML_FP32_TO_FP16(-min);
  1622. for (int j = 0; j < 16; ++j) {
  1623. y[ib].qs[j] = L[j] | (L[j+16] << 4);
  1624. }
  1625. }
  1626. }
  1627. size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1628. if (!quant_weights) {
  1629. quantize_row_q4_1_ref(src, dst, (int64_t)nrow*n_per_row);
  1630. return nrow * ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
  1631. }
  1632. size_t row_size = ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
  1633. char * qrow = (char *)dst;
  1634. for (int64_t row = 0; row < nrow; ++row) {
  1635. quantize_row_q4_1_impl(src, (block_q4_1*)qrow, n_per_row, quant_weights);
  1636. src += n_per_row;
  1637. qrow += row_size;
  1638. }
  1639. return nrow * row_size;
  1640. }
  1641. static void quantize_row_q5_0_impl(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
  1642. static_assert(QK5_0 == 32, "QK5_0 must be 32");
  1643. if (!quant_weights) {
  1644. quantize_row_q5_0_ref(x, y, n_per_row);
  1645. return;
  1646. }
  1647. float weight[QK5_0];
  1648. int8_t L[QK5_0];
  1649. float sum_x2 = 0;
  1650. for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
  1651. float sigma2 = sum_x2/n_per_row;
  1652. const int64_t nb = n_per_row/QK5_0;
  1653. for (int ib = 0; ib < nb; ++ib) {
  1654. const float * xb = x + QK5_0 * ib;
  1655. const float * qw = quant_weights + QK5_0 * ib;
  1656. for (int j = 0; j < QK5_0; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
  1657. float d = make_qx_quants(QK5_0, 16, xb, L, 1, weight);
  1658. y[ib].d = GGML_FP32_TO_FP16(d);
  1659. uint32_t qh = 0;
  1660. for (int j = 0; j < 16; ++j) {
  1661. const uint8_t xi0 = L[j];
  1662. const uint8_t xi1 = L[j+16];
  1663. y[ib].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4);
  1664. // get the 5-th bit and store it in qh at the right position
  1665. qh |= ((xi0 & 0x10u) >> 4) << (j + 0);
  1666. qh |= ((xi1 & 0x10u) >> 4) << (j + QK5_0/2);
  1667. }
  1668. memcpy(&y[ib].qh, &qh, sizeof(qh));
  1669. }
  1670. }
  1671. size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1672. if (!quant_weights) {
  1673. quantize_row_q5_0_ref(src, dst, (int64_t)nrow*n_per_row);
  1674. return nrow * ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
  1675. }
  1676. size_t row_size = ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
  1677. char * qrow = (char *)dst;
  1678. for (int64_t row = 0; row < nrow; ++row) {
  1679. quantize_row_q5_0_impl(src, (block_q5_0*)qrow, n_per_row, quant_weights);
  1680. src += n_per_row;
  1681. qrow += row_size;
  1682. }
  1683. return nrow * row_size;
  1684. }
  1685. static void quantize_row_q5_1_impl(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
  1686. static_assert(QK5_1 == 32, "QK5_1 must be 32");
  1687. if (!quant_weights) {
  1688. quantize_row_q5_1_ref(x, y, n_per_row);
  1689. return;
  1690. }
  1691. float weight[QK5_1];
  1692. uint8_t L[QK5_1], Laux[QK5_1];
  1693. float sum_x2 = 0;
  1694. for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
  1695. float sigma2 = sum_x2/n_per_row;
  1696. const int64_t nb = n_per_row/QK5_1;
  1697. for (int ib = 0; ib < nb; ++ib) {
  1698. const float * xb = x + QK5_1 * ib;
  1699. const float * qw = quant_weights + QK5_1 * ib;
  1700. for (int j = 0; j < QK5_1; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
  1701. float min;
  1702. float d = make_qkx3_quants(QK5_1, 31, xb, weight, L, &min, Laux, -0.9f, 0.05f, 36, false);
  1703. y[ib].d = GGML_FP32_TO_FP16(d);
  1704. y[ib].m = GGML_FP32_TO_FP16(-min);
  1705. uint32_t qh = 0;
  1706. for (int j = 0; j < 16; ++j) {
  1707. const uint8_t xi0 = L[j];
  1708. const uint8_t xi1 = L[j+16];
  1709. y[ib].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4);
  1710. // get the 5-th bit and store it in qh at the right position
  1711. qh |= ((xi0 & 0x10u) >> 4) << (j + 0);
  1712. qh |= ((xi1 & 0x10u) >> 4) << (j + QK5_0/2);
  1713. }
  1714. memcpy(&y[ib].qh, &qh, sizeof(qh));
  1715. }
  1716. }
  1717. size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1718. if (!quant_weights) {
  1719. quantize_row_q5_1_ref(src, dst, (int64_t)nrow*n_per_row);
  1720. return nrow * ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
  1721. }
  1722. size_t row_size = ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
  1723. char * qrow = (char *)dst;
  1724. for (int64_t row = 0; row < nrow; ++row) {
  1725. quantize_row_q5_1_impl(src, (block_q5_1*)qrow, n_per_row, quant_weights);
  1726. src += n_per_row;
  1727. qrow += row_size;
  1728. }
  1729. return nrow * row_size;
  1730. }
  1731. size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1732. (void)quant_weights; // not used
  1733. const size_t row_size = ggml_row_size(GGML_TYPE_Q8_0, n_per_row);
  1734. quantize_row_q8_0_ref(src, dst, (int64_t)nrow*n_per_row);
  1735. return nrow * row_size;
  1736. }
  1737. // ====================== Ternary (de)-quantization (BitNet b1.58 and TriLMs)
  1738. void quantize_row_tq1_0_ref(const float * GGML_RESTRICT x, block_tq1_0 * GGML_RESTRICT y, int64_t k) {
  1739. assert(k % QK_K == 0);
  1740. const int64_t nb = k / QK_K;
  1741. for (int64_t i = 0; i < nb; i++) {
  1742. float amax = 0.0f; // absolute max
  1743. for (int j = 0; j < QK_K; j++) {
  1744. const float v = x[j];
  1745. amax = MAX(amax, fabsf(v));
  1746. }
  1747. const float d = amax;
  1748. const float id = d ? 1.0f/d : 0.0f;
  1749. y[i].d = GGML_FP32_TO_FP16(d);
  1750. // 5 elements per byte, along 32 bytes
  1751. for (size_t j = 0; j < sizeof(y->qs) - sizeof(y->qs) % 32; j += 32) {
  1752. for (size_t m = 0; m < 32; ++m) {
  1753. uint8_t q = 0;
  1754. for (size_t n = 0; n < 5; ++n) {
  1755. int xi = lroundf(x[m + n*32] * id) + 1; // -1, 0, 1 -> 0, 1, 2
  1756. q *= 3;
  1757. q += xi;
  1758. }
  1759. // ceiling division (243 == pow(3, 5))
  1760. q = ((uint16_t)q * 256 + (243 - 1)) / 243;
  1761. y[i].qs[j + m] = q;
  1762. }
  1763. x += 5*32;
  1764. }
  1765. // along 16 bytes
  1766. for (size_t j = sizeof(y->qs) - sizeof(y->qs) % 32; j < sizeof(y->qs); j += 16) {
  1767. for (size_t m = 0; m < 16; ++m) {
  1768. uint8_t q = 0;
  1769. for (size_t n = 0; n < 5; ++n) {
  1770. int xi = lroundf(x[m + n*16] * id) + 1; // -1, 0, 1 -> 0, 1, 2
  1771. q *= 3;
  1772. q += xi;
  1773. }
  1774. // ceiling division (243 == pow(3, 5))
  1775. q = ((uint16_t)q * 256 + (243 - 1)) / 243;
  1776. y[i].qs[j + m] = q;
  1777. }
  1778. x += 5*16;
  1779. }
  1780. // 4 elements per byte
  1781. for (size_t j = 0; j < sizeof(y->qh); ++j) {
  1782. uint8_t q = 0;
  1783. for (size_t m = 0; m < 4; ++m) {
  1784. // -1, 0, 1 -> 0, 1, 2
  1785. int xi = lroundf(x[j + m*sizeof(y->qh)] * id) + 1;
  1786. q *= 3;
  1787. q += xi;
  1788. }
  1789. // shift the first value to the most significant trit
  1790. q *= 3;
  1791. // ceiling division (243 == pow(3, 5))
  1792. q = ((uint16_t)q * 256 + (243 - 1)) / 243;
  1793. y[i].qh[j] = q;
  1794. }
  1795. x += 4*sizeof(y->qh);
  1796. }
  1797. }
  1798. void quantize_row_tq2_0_ref(const float * GGML_RESTRICT x, block_tq2_0 * GGML_RESTRICT y, int64_t k) {
  1799. assert(k % QK_K == 0);
  1800. const int64_t nb = k / QK_K;
  1801. for (int64_t i = 0; i < nb; i++) {
  1802. float amax = 0.0f; // absolute max
  1803. for (int j = 0; j < QK_K; j++) {
  1804. const float v = x[j];
  1805. amax = MAX(amax, fabsf(v));
  1806. }
  1807. const float d = amax;
  1808. const float id = d ? 1.0f/d : 0.0f;
  1809. y[i].d = GGML_FP32_TO_FP16(d);
  1810. for (size_t j = 0; j < sizeof(y->qs); j += 32) {
  1811. for (size_t m = 0; m < 32; ++m) {
  1812. uint8_t q = 0;
  1813. for (size_t n = 0; n < 4; ++n) {
  1814. // -1, 0, 1 -> 0, 1, 2
  1815. int xi = lroundf(x[m + n*32] * id) + 1;
  1816. q += (xi & 3) << (2*n);
  1817. }
  1818. y[i].qs[j + m] = q;
  1819. }
  1820. x += 4*32;
  1821. }
  1822. }
  1823. }
  1824. size_t quantize_tq1_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1825. (void)quant_weights; // not used
  1826. const size_t row_size = ggml_row_size(GGML_TYPE_TQ1_0, n_per_row);
  1827. quantize_row_tq1_0_ref(src, dst, (int64_t)nrow*n_per_row);
  1828. return nrow * row_size;
  1829. }
  1830. size_t quantize_tq2_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  1831. (void)quant_weights; // not used
  1832. const size_t row_size = ggml_row_size(GGML_TYPE_TQ2_0, n_per_row);
  1833. quantize_row_tq2_0_ref(src, dst, (int64_t)nrow*n_per_row);
  1834. return nrow * row_size;
  1835. }
  1836. void dequantize_row_tq1_0(const block_tq1_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  1837. assert(k % QK_K == 0);
  1838. const int64_t nb = k / QK_K;
  1839. const uint8_t pow3[6] = {1, 3, 9, 27, 81, 243};
  1840. for (int64_t i = 0; i < nb; ++i) {
  1841. const float d = GGML_FP16_TO_FP32(x[i].d);
  1842. for (size_t j = 0; j < sizeof(x->qs) - sizeof(x->qs) % 32; j += 32) {
  1843. for (size_t n = 0; n < 5; ++n) {
  1844. for (size_t m = 0; m < 32; ++m) {
  1845. uint8_t q = x[i].qs[j + m] * pow3[n];
  1846. int16_t xi = ((uint16_t) q * 3) >> 8;
  1847. *y++ = (float) (xi - 1) * d;
  1848. }
  1849. }
  1850. }
  1851. for (size_t j = sizeof(x->qs) - sizeof(x->qs) % 32; j < sizeof(x->qs); j += 16) {
  1852. for (size_t n = 0; n < 5; ++n) {
  1853. for (size_t m = 0; m < 16; ++m) {
  1854. uint8_t q = x[i].qs[j + m] * pow3[n];
  1855. int16_t xi = ((uint16_t) q * 3) >> 8;
  1856. *y++ = (float) (xi - 1) * d;
  1857. }
  1858. }
  1859. }
  1860. for (size_t n = 0; n < 4; ++n) {
  1861. for (size_t j = 0; j < sizeof(x->qh); ++j) {
  1862. uint8_t q = x[i].qh[j] * pow3[n];
  1863. int16_t xi = ((uint16_t) q * 3) >> 8;
  1864. *y++ = (float) (xi - 1) * d;
  1865. }
  1866. }
  1867. }
  1868. }
  1869. void dequantize_row_tq2_0(const block_tq2_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  1870. assert(k % QK_K == 0);
  1871. const int64_t nb = k / QK_K;
  1872. for (int64_t i = 0; i < nb; ++i) {
  1873. const float d = GGML_FP16_TO_FP32(x[i].d);
  1874. for (size_t j = 0; j < sizeof(x->qs); j += 32) {
  1875. for (size_t l = 0; l < 4; ++l) {
  1876. for (size_t m = 0; m < 32; ++m) {
  1877. int8_t q = (x[i].qs[j + m] >> (l*2)) & 3;
  1878. *y++ = (float) (q - 1) * d;
  1879. }
  1880. }
  1881. }
  1882. }
  1883. }
  1884. // ====================== "True" 2-bit (de)-quantization
  1885. void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  1886. assert(k % QK_K == 0);
  1887. const int64_t nb = k / QK_K;
  1888. uint32_t aux32[2];
  1889. const uint8_t * aux8 = (const uint8_t *)aux32;
  1890. for (int i = 0; i < nb; i++) {
  1891. const float d = GGML_FP16_TO_FP32(x[i].d);
  1892. for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
  1893. memcpy(aux32, x[i].qs + 4*ib32, 2*sizeof(uint32_t));
  1894. const float db = d * (0.5f + (aux32[1] >> 28)) * 0.25f;
  1895. for (int l = 0; l < 4; ++l) {
  1896. const uint8_t * grid = (const uint8_t *)(iq2xxs_grid + aux8[l]);
  1897. const uint8_t signs = ksigns_iq2xs[(aux32[1] >> 7*l) & 127];
  1898. for (int j = 0; j < 8; ++j) {
  1899. y[j] = db * grid[j] * (signs & kmask_iq2xs[j] ? -1.f : 1.f);
  1900. }
  1901. y += 8;
  1902. }
  1903. }
  1904. }
  1905. }
  1906. // ====================== 2.3125 bpw (de)-quantization
  1907. void dequantize_row_iq2_xs(const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  1908. assert(k % QK_K == 0);
  1909. const int64_t nb = k / QK_K;
  1910. float db[2];
  1911. for (int i = 0; i < nb; i++) {
  1912. const float d = GGML_FP16_TO_FP32(x[i].d);
  1913. for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
  1914. db[0] = d * (0.5f + (x[i].scales[ib32] & 0xf)) * 0.25f;
  1915. db[1] = d * (0.5f + (x[i].scales[ib32] >> 4)) * 0.25f;
  1916. for (int l = 0; l < 4; ++l) {
  1917. const uint8_t * grid = (const uint8_t *)(iq2xs_grid + (x[i].qs[4*ib32 + l] & 511));
  1918. const uint8_t signs = ksigns_iq2xs[x[i].qs[4*ib32 + l] >> 9];
  1919. for (int j = 0; j < 8; ++j) {
  1920. y[j] = db[l/2] * grid[j] * (signs & kmask_iq2xs[j] ? -1.f : 1.f);
  1921. }
  1922. y += 8;
  1923. }
  1924. }
  1925. }
  1926. }
  1927. // ====================== 2.5625 bpw (de)-quantization
  1928. void dequantize_row_iq2_s(const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  1929. assert(k % QK_K == 0);
  1930. const int64_t nb = k / QK_K;
  1931. float db[2];
  1932. for (int i = 0; i < nb; i++) {
  1933. const float d = GGML_FP16_TO_FP32(x[i].d);
  1934. const uint8_t * qs = x[i].qs;
  1935. const uint8_t * qh = x[i].qh;
  1936. const uint8_t * signs = qs + QK_K/8;
  1937. for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
  1938. db[0] = d * (0.5f + (x[i].scales[ib32] & 0xf)) * 0.25f;
  1939. db[1] = d * (0.5f + (x[i].scales[ib32] >> 4)) * 0.25f;
  1940. for (int l = 0; l < 4; ++l) {
  1941. const float dl = db[l/2];
  1942. const uint8_t * grid = (const uint8_t *)(iq2s_grid + (qs[l] | (qh[ib32] << (8-2*l) & 0x300)));
  1943. for (int j = 0; j < 8; ++j) {
  1944. y[j] = dl * grid[j] * (signs[l] & kmask_iq2xs[j] ? -1.f : 1.f);
  1945. }
  1946. y += 8;
  1947. }
  1948. qs += 4;
  1949. signs += 4;
  1950. }
  1951. }
  1952. }
  1953. // ====================== 3.0625 bpw (de)-quantization
  1954. void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  1955. assert(k % QK_K == 0);
  1956. const int64_t nb = k / QK_K;
  1957. uint32_t aux32;
  1958. for (int i = 0; i < nb; i++) {
  1959. const float d = GGML_FP16_TO_FP32(x[i].d);
  1960. const uint8_t * qs = x[i].qs;
  1961. const uint8_t * scales_and_signs = qs + QK_K/4;
  1962. for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
  1963. memcpy(&aux32, scales_and_signs + 4*ib32, sizeof(uint32_t));
  1964. const float db = d * (0.5f + (aux32 >> 28)) * 0.5f;
  1965. for (int l = 0; l < 4; ++l) {
  1966. const uint8_t signs = ksigns_iq2xs[(aux32 >> 7*l) & 127];
  1967. const uint8_t * grid1 = (const uint8_t *)(iq3xxs_grid + qs[2*l+0]);
  1968. const uint8_t * grid2 = (const uint8_t *)(iq3xxs_grid + qs[2*l+1]);
  1969. for (int j = 0; j < 4; ++j) {
  1970. y[j+0] = db * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f);
  1971. y[j+4] = db * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f);
  1972. }
  1973. y += 8;
  1974. }
  1975. qs += 8;
  1976. }
  1977. }
  1978. }
  1979. // ====================== 3.3125 bpw (de)-quantization
  1980. void dequantize_row_iq3_s(const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  1981. assert(k % QK_K == 0);
  1982. const int64_t nb = k / QK_K;
  1983. for (int i = 0; i < nb; i++) {
  1984. const float d = GGML_FP16_TO_FP32(x[i].d);
  1985. const uint8_t * qs = x[i].qs;
  1986. const uint8_t * qh = x[i].qh;
  1987. const uint8_t * signs = x[i].signs;
  1988. for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
  1989. const float db1 = d * (1 + 2*(x[i].scales[ib32/2] & 0xf));
  1990. const float db2 = d * (1 + 2*(x[i].scales[ib32/2] >> 4));
  1991. for (int l = 0; l < 4; ++l) {
  1992. const uint8_t * grid1 = (const uint8_t *)(iq3s_grid + (qs[2*l+0] | ((qh[0] << (8-2*l)) & 256)));
  1993. const uint8_t * grid2 = (const uint8_t *)(iq3s_grid + (qs[2*l+1] | ((qh[0] << (7-2*l)) & 256)));
  1994. for (int j = 0; j < 4; ++j) {
  1995. y[j+0] = db1 * grid1[j] * (signs[l] & kmask_iq2xs[j+0] ? -1.f : 1.f);
  1996. y[j+4] = db1 * grid2[j] * (signs[l] & kmask_iq2xs[j+4] ? -1.f : 1.f);
  1997. }
  1998. y += 8;
  1999. }
  2000. qs += 8;
  2001. signs += 4;
  2002. for (int l = 0; l < 4; ++l) {
  2003. const uint8_t * grid1 = (const uint8_t *)(iq3s_grid + (qs[2*l+0] | ((qh[1] << (8-2*l)) & 256)));
  2004. const uint8_t * grid2 = (const uint8_t *)(iq3s_grid + (qs[2*l+1] | ((qh[1] << (7-2*l)) & 256)));
  2005. for (int j = 0; j < 4; ++j) {
  2006. y[j+0] = db2 * grid1[j] * (signs[l] & kmask_iq2xs[j+0] ? -1.f : 1.f);
  2007. y[j+4] = db2 * grid2[j] * (signs[l] & kmask_iq2xs[j+4] ? -1.f : 1.f);
  2008. }
  2009. y += 8;
  2010. }
  2011. qh += 2;
  2012. qs += 8;
  2013. signs += 4;
  2014. }
  2015. }
  2016. }
  2017. // ====================== 1.5625 bpw (de)-quantization
  2018. void dequantize_row_iq1_s(const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  2019. assert(k % QK_K == 0);
  2020. const int64_t nb = k / QK_K;
  2021. for (int i = 0; i < nb; i++) {
  2022. const float d = GGML_FP16_TO_FP32(x[i].d);
  2023. const uint8_t * qs = x[i].qs;
  2024. const uint16_t * qh = x[i].qh;
  2025. for (int ib = 0; ib < QK_K/32; ++ib) {
  2026. const float dl = d * (2*((qh[ib] >> 12) & 7) + 1);
  2027. const float delta = qh[ib] & 0x8000 ? -IQ1S_DELTA : IQ1S_DELTA;
  2028. for (int l = 0; l < 4; ++l) {
  2029. const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8)));
  2030. for (int j = 0; j < 8; ++j) {
  2031. y[j] = dl * (grid[j] + delta);
  2032. }
  2033. y += 8;
  2034. }
  2035. qs += 4;
  2036. }
  2037. }
  2038. }
  2039. void dequantize_row_iq1_m(const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  2040. assert(k % QK_K == 0);
  2041. const int64_t nb = k / QK_K;
  2042. float delta[4];
  2043. uint16_t idx[4];
  2044. iq1m_scale_t scale;
  2045. for (int i = 0; i < nb; i++) {
  2046. const uint16_t * sc = (const uint16_t *)x[i].scales;
  2047. scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
  2048. const float d = GGML_FP16_TO_FP32(scale.f16);
  2049. const uint8_t * qs = x[i].qs;
  2050. const uint8_t * qh = x[i].qh;
  2051. for (int ib = 0; ib < QK_K/32; ++ib) {
  2052. const float dl1 = d * (2*((sc[ib/2] >> (6*(ib%2)+0)) & 0x7) + 1);
  2053. const float dl2 = d * (2*((sc[ib/2] >> (6*(ib%2)+3)) & 0x7) + 1);
  2054. idx[0] = qs[0] | ((qh[0] << 8) & 0x700);
  2055. idx[1] = qs[1] | ((qh[0] << 4) & 0x700);
  2056. idx[2] = qs[2] | ((qh[1] << 8) & 0x700);
  2057. idx[3] = qs[3] | ((qh[1] << 4) & 0x700);
  2058. delta[0] = qh[0] & 0x08 ? -IQ1S_DELTA : IQ1S_DELTA;
  2059. delta[1] = qh[0] & 0x80 ? -IQ1S_DELTA : IQ1S_DELTA;
  2060. delta[2] = qh[1] & 0x08 ? -IQ1S_DELTA : IQ1S_DELTA;
  2061. delta[3] = qh[1] & 0x80 ? -IQ1S_DELTA : IQ1S_DELTA;
  2062. for (int l = 0; l < 2; ++l) {
  2063. const int8_t * grid = (const int8_t *)(iq1s_grid + idx[l]);
  2064. for (int j = 0; j < 8; ++j) {
  2065. y[j] = dl1 * (grid[j] + delta[l]);
  2066. }
  2067. y += 8;
  2068. }
  2069. for (int l = 2; l < 4; ++l) {
  2070. const int8_t * grid = (const int8_t *)(iq1s_grid + idx[l]);
  2071. for (int j = 0; j < 8; ++j) {
  2072. y[j] = dl2 * (grid[j] + delta[l]);
  2073. }
  2074. y += 8;
  2075. }
  2076. qs += 4;
  2077. qh += 2;
  2078. }
  2079. }
  2080. }
  2081. static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
  2082. void dequantize_row_iq4_nl(const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  2083. assert(k % QK4_NL == 0);
  2084. const int64_t nb = k / QK4_NL;
  2085. for (int i = 0; i < nb; i++) {
  2086. const uint8_t * qs = x[i].qs;
  2087. const float d = GGML_FP16_TO_FP32(x[i].d);
  2088. for (int j = 0; j < QK4_NL/2; ++j) {
  2089. y[j+ 0] = d * kvalues_iq4nl[qs[j] & 0xf];
  2090. y[j+QK4_NL/2] = d * kvalues_iq4nl[qs[j] >> 4];
  2091. }
  2092. y += QK4_NL;
  2093. qs += QK4_NL/2;
  2094. }
  2095. }
  2096. void dequantize_row_iq4_xs(const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  2097. assert(k % QK_K == 0);
  2098. const int64_t nb = k / QK_K;
  2099. for (int i = 0; i < nb; i++) {
  2100. const uint8_t * qs = x[i].qs;
  2101. const float d = GGML_FP16_TO_FP32(x[i].d);
  2102. for (int ib = 0; ib < QK_K/32; ++ib) {
  2103. const int ls = ((x[i].scales_l[ib/2] >> 4*(ib%2)) & 0xf) | (((x[i].scales_h >> 2*ib) & 3) << 4);
  2104. const float dl = d * (ls - 32);
  2105. for (int j = 0; j < 16; ++j) {
  2106. y[j+ 0] = dl * kvalues_iq4nl[qs[j] & 0xf];
  2107. y[j+16] = dl * kvalues_iq4nl[qs[j] >> 4];
  2108. }
  2109. y += 32;
  2110. qs += 16;
  2111. }
  2112. }
  2113. }
  2114. //===================================== Q8_K ==============================================
  2115. void quantize_row_q8_K_ref(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k) {
  2116. assert(k % QK_K == 0);
  2117. const int64_t nb = k / QK_K;
  2118. for (int i = 0; i < nb; i++) {
  2119. float max = 0;
  2120. float amax = 0;
  2121. for (int j = 0; j < QK_K; ++j) {
  2122. float ax = fabsf(x[j]);
  2123. if (ax > amax) {
  2124. amax = ax; max = x[j];
  2125. }
  2126. }
  2127. if (!amax) {
  2128. y[i].d = 0;
  2129. memset(y[i].qs, 0, QK_K);
  2130. x += QK_K;
  2131. continue;
  2132. }
  2133. //const float iscale = -128.f/max;
  2134. // We need this change for IQ2_XXS, else the AVX implementation becomes very awkward
  2135. const float iscale = -127.f/max;
  2136. for (int j = 0; j < QK_K; ++j) {
  2137. int v = nearest_int(iscale*x[j]);
  2138. y[i].qs[j] = MIN(127, v);
  2139. }
  2140. for (int j = 0; j < QK_K/16; ++j) {
  2141. int sum = 0;
  2142. for (int ii = 0; ii < 16; ++ii) {
  2143. sum += y[i].qs[j*16 + ii];
  2144. }
  2145. y[i].bsums[j] = sum;
  2146. }
  2147. y[i].d = 1/iscale;
  2148. x += QK_K;
  2149. }
  2150. }
  2151. void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
  2152. assert(k % QK_K == 0);
  2153. const int64_t nb = k / QK_K;
  2154. for (int i = 0; i < nb; i++) {
  2155. for (int j = 0; j < QK_K; ++j) {
  2156. *y++ = x[i].d * x[i].qs[j];
  2157. }
  2158. }
  2159. }
  2160. // ================================ IQ2 quantization =============================================
  2161. typedef struct {
  2162. uint64_t * grid;
  2163. int * map;
  2164. uint16_t * neighbours;
  2165. } iq2_entry_t;
  2166. static iq2_entry_t iq2_data[4] = {
  2167. {NULL, NULL, NULL},
  2168. {NULL, NULL, NULL},
  2169. {NULL, NULL, NULL},
  2170. {NULL, NULL, NULL},
  2171. };
  2172. static inline int iq2_data_index(enum ggml_type type) {
  2173. GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M || type == GGML_TYPE_IQ2_S);
  2174. return type == GGML_TYPE_IQ2_XXS ? 0 :
  2175. type == GGML_TYPE_IQ2_XS ? 1 :
  2176. type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M ? 2 : 3;
  2177. }
  2178. static inline int iq2_grid_size(enum ggml_type type) {
  2179. GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M || type == GGML_TYPE_IQ2_S);
  2180. return type == GGML_TYPE_IQ2_XXS ? 256 :
  2181. type == GGML_TYPE_IQ2_XS ? 512 :
  2182. type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M ? NGRID_IQ1S : 1024;
  2183. }
  2184. static int iq2_compare_func(const void * left, const void * right) {
  2185. const int * l = (const int *)left;
  2186. const int * r = (const int *)right;
  2187. return l[0] < r[0] ? -1 : l[0] > r[0] ? 1 : l[1] < r[1] ? -1 : l[1] > r[1] ? 1 : 0;
  2188. }
  2189. void iq2xs_init_impl(enum ggml_type type) {
  2190. const int gindex = iq2_data_index(type);
  2191. const int grid_size = iq2_grid_size(type);
  2192. if (iq2_data[gindex].grid) {
  2193. return;
  2194. }
  2195. static const uint16_t kgrid_2bit_256[256] = {
  2196. 0, 2, 5, 8, 10, 17, 20, 32, 34, 40, 42, 65, 68, 80, 88, 97,
  2197. 100, 128, 130, 138, 162, 257, 260, 272, 277, 320, 388, 408, 512, 514, 546, 642,
  2198. 1025, 1028, 1040, 1057, 1060, 1088, 1090, 1096, 1120, 1153, 1156, 1168, 1188, 1280, 1282, 1288,
  2199. 1312, 1350, 1385, 1408, 1425, 1545, 1552, 1600, 1668, 1700, 2048, 2053, 2056, 2068, 2088, 2113,
  2200. 2116, 2128, 2130, 2184, 2308, 2368, 2562, 2580, 4097, 4100, 4112, 4129, 4160, 4192, 4228, 4240,
  2201. 4245, 4352, 4360, 4384, 4432, 4442, 4480, 4644, 4677, 5120, 5128, 5152, 5157, 5193, 5248, 5400,
  2202. 5474, 5632, 5654, 6145, 6148, 6160, 6208, 6273, 6400, 6405, 6560, 6737, 8192, 8194, 8202, 8260,
  2203. 8289, 8320, 8322, 8489, 8520, 8704, 8706, 9217, 9220, 9232, 9280, 9302, 9472, 9537, 9572, 9872,
  2204. 10248, 10272, 10388, 10820, 16385, 16388, 16400, 16408, 16417, 16420, 16448, 16456, 16470, 16480, 16513, 16516,
  2205. 16528, 16640, 16672, 16737, 16768, 16773, 16897, 16912, 16968, 16982, 17000, 17408, 17416, 17440, 17536, 17561,
  2206. 17682, 17700, 17920, 18433, 18436, 18448, 18496, 18501, 18688, 18776, 18785, 18818, 19013, 19088, 20480, 20488,
  2207. 20497, 20505, 20512, 20608, 20616, 20740, 20802, 20900, 21137, 21648, 21650, 21770, 22017, 22100, 22528, 22545,
  2208. 22553, 22628, 22848, 23048, 24580, 24592, 24640, 24680, 24832, 24917, 25112, 25184, 25600, 25605, 25872, 25874,
  2209. 25988, 26690, 32768, 32770, 32778, 32833, 32898, 33028, 33048, 33088, 33297, 33793, 33796, 33808, 33813, 33856,
  2210. 33888, 34048, 34118, 34196, 34313, 34368, 34400, 34818, 35076, 35345, 36868, 36880, 36900, 36928, 37025, 37142,
  2211. 37248, 37445, 37888, 37922, 37956, 38225, 39041, 39200, 40962, 41040, 41093, 41225, 41472, 42008, 43088, 43268,
  2212. };
  2213. static const uint16_t kgrid_2bit_512[512] = {
  2214. 0, 2, 5, 8, 10, 17, 20, 22, 25, 32, 34, 37, 40, 65, 68, 70,
  2215. 73, 80, 82, 85, 88, 97, 100, 128, 130, 133, 136, 145, 148, 153, 160, 257,
  2216. 260, 262, 265, 272, 274, 277, 280, 282, 289, 292, 320, 322, 325, 328, 337, 340,
  2217. 352, 360, 385, 388, 400, 512, 514, 517, 520, 529, 532, 544, 577, 580, 592, 597,
  2218. 640, 650, 1025, 1028, 1030, 1033, 1040, 1042, 1045, 1048, 1057, 1060, 1088, 1090, 1093, 1096,
  2219. 1105, 1108, 1110, 1120, 1153, 1156, 1168, 1280, 1282, 1285, 1288, 1297, 1300, 1312, 1345, 1348,
  2220. 1360, 1377, 1408, 1537, 1540, 1552, 1574, 1600, 1602, 1668, 2048, 2050, 2053, 2056, 2058, 2065,
  2221. 2068, 2080, 2085, 2113, 2116, 2128, 2136, 2176, 2208, 2218, 2305, 2308, 2320, 2368, 2433, 2441,
  2222. 2560, 2592, 2600, 2710, 2720, 4097, 4100, 4102, 4105, 4112, 4114, 4117, 4120, 4129, 4132, 4160,
  2223. 4162, 4165, 4168, 4177, 4180, 4192, 4202, 4225, 4228, 4240, 4352, 4354, 4357, 4360, 4369, 4372,
  2224. 4384, 4417, 4420, 4432, 4480, 4500, 4502, 4609, 4612, 4614, 4624, 4672, 4704, 5120, 5122, 5125,
  2225. 5128, 5137, 5140, 5152, 5185, 5188, 5193, 5200, 5220, 5248, 5377, 5380, 5392, 5440, 5632, 5652,
  2226. 5705, 6145, 6148, 6160, 6162, 6208, 6228, 6278, 6400, 6405, 6502, 6737, 6825, 8192, 8194, 8197,
  2227. 8200, 8202, 8209, 8212, 8224, 8257, 8260, 8272, 8320, 8352, 8449, 8452, 8464, 8512, 8520, 8549,
  2228. 8704, 8738, 8832, 8872, 9217, 9220, 9232, 9257, 9280, 9472, 9537, 9554, 9625, 9729, 9754, 9894,
  2229. 10240, 10248, 10250, 10272, 10325, 10376, 10402, 10600, 10640, 10760, 10784, 10882, 10888, 10890, 16385, 16388,
  2230. 16390, 16393, 16400, 16402, 16405, 16408, 16417, 16420, 16448, 16450, 16453, 16456, 16458, 16465, 16468, 16480,
  2231. 16485, 16513, 16516, 16528, 16640, 16642, 16645, 16648, 16657, 16660, 16672, 16705, 16708, 16720, 16768, 16773,
  2232. 16802, 16897, 16900, 16912, 16914, 16937, 16960, 17408, 17410, 17413, 17416, 17425, 17428, 17433, 17440, 17473,
  2233. 17476, 17488, 17536, 17556, 17665, 17668, 17680, 17700, 17728, 17818, 17920, 17930, 17988, 18000, 18433, 18436,
  2234. 18448, 18496, 18501, 18516, 18530, 18688, 18705, 18756, 18768, 18793, 18948, 20480, 20482, 20485, 20488, 20497,
  2235. 20500, 20512, 20520, 20545, 20548, 20560, 20608, 20737, 20740, 20752, 20757, 20800, 20802, 20992, 21060, 21162,
  2236. 21505, 21508, 21520, 21537, 21568, 21600, 21633, 21665, 21760, 21768, 21888, 21896, 22049, 22120, 22177, 22528,
  2237. 22548, 22593, 22608, 22681, 22810, 22848, 22850, 23173, 24577, 24580, 24592, 24640, 24660, 24674, 24710, 24745,
  2238. 24832, 25124, 25162, 25234, 25600, 25622, 25872, 25920, 25925, 26020, 26625, 26730, 26917, 27142, 27220, 27234,
  2239. 32768, 32770, 32773, 32776, 32785, 32788, 32800, 32810, 32833, 32836, 32848, 32896, 32898, 32936, 32938, 33025,
  2240. 33028, 33030, 33040, 33088, 33105, 33113, 33280, 33312, 33408, 33410, 33440, 33448, 33793, 33796, 33808, 33810,
  2241. 33813, 33856, 33888, 33929, 34048, 34116, 34213, 34328, 34410, 34816, 34824, 34853, 34906, 34944, 34946, 34984,
  2242. 35078, 35362, 35456, 35464, 35478, 35496, 36865, 36868, 36880, 36928, 36950, 36996, 37120, 37154, 37220, 37462,
  2243. 37513, 37888, 37893, 37956, 37968, 37976, 38185, 38288, 38290, 38465, 38993, 39078, 39241, 39445, 39520, 40960,
  2244. 40962, 40968, 40970, 40992, 41002, 41120, 41297, 41305, 41382, 41472, 41474, 41480, 41514, 41600, 41632, 42048,
  2245. 42133, 42597, 42648, 43018, 43040, 43042, 43048, 43168, 43176, 43268, 43396, 43398, 43560, 43562, 43665, 43690,
  2246. };
  2247. static const uint16_t kgrid_1bit_2048[NGRID_IQ1S] = {
  2248. 0, 2, 5, 8, 10, 17, 21, 32, 34, 40, 42, 69, 81, 84, 86, 101,
  2249. 128, 130, 136, 138, 149, 160, 162, 168, 170, 260, 261, 273, 276, 278, 281, 282,
  2250. 293, 321, 326, 329, 338, 341, 346, 353, 356, 358, 360, 389, 401, 404, 406, 421,
  2251. 512, 514, 520, 522, 533, 544, 546, 552, 554, 581, 593, 601, 612, 617, 640, 642,
  2252. 648, 650, 657, 661, 665, 672, 674, 680, 682, 1041, 1044, 1046, 1061, 1089, 1097, 1109,
  2253. 1114, 1124, 1125, 1169, 1177, 1189, 1281, 1284, 1285, 1286, 1301, 1304, 1306, 1321, 1344, 1349,
  2254. 1354, 1360, 1361, 1364, 1365, 1366, 1369, 1376, 1378, 1381, 1384, 1386, 1409, 1425, 1429, 1432,
  2255. 1434, 1441, 1444, 1445, 1446, 1449, 1556, 1561, 1601, 1604, 1616, 1618, 1621, 1624, 1632, 1633,
  2256. 1638, 1641, 1669, 1681, 1684, 1689, 2048, 2050, 2056, 2058, 2069, 2080, 2082, 2088, 2090, 2117,
  2257. 2129, 2134, 2149, 2176, 2178, 2184, 2186, 2197, 2208, 2210, 2216, 2218, 2309, 2321, 2324, 2329,
  2258. 2340, 2341, 2369, 2384, 2385, 2389, 2401, 2404, 2409, 2449, 2452, 2454, 2457, 2469, 2560, 2562,
  2259. 2568, 2570, 2581, 2592, 2594, 2600, 2602, 2629, 2641, 2649, 2657, 2661, 2688, 2690, 2693, 2696,
  2260. 2698, 2709, 2720, 2722, 2728, 2730, 4112, 4113, 4116, 4121, 4132, 4133, 4161, 4164, 4176, 4181,
  2261. 4184, 4193, 4196, 4197, 4201, 4241, 4244, 4246, 4257, 4261, 4353, 4356, 4358, 4361, 4368, 4370,
  2262. 4373, 4376, 4385, 4388, 4393, 4421, 4426, 4432, 4433, 4434, 4436, 4437, 4438, 4441, 4448, 4453,
  2263. 4484, 4498, 4501, 4513, 4516, 4625, 4628, 4630, 4645, 4672, 4678, 4681, 4690, 4693, 4696, 4698,
  2264. 4708, 4710, 4741, 4753, 4756, 4758, 4773, 5121, 5126, 5129, 5140, 5141, 5144, 5145, 5153, 5158,
  2265. 5185, 5189, 5190, 5192, 5194, 5201, 5204, 5205, 5206, 5209, 5218, 5221, 5224, 5252, 5257, 5264,
  2266. 5268, 5269, 5272, 5273, 5274, 5281, 5284, 5285, 5289, 5378, 5381, 5386, 5393, 5396, 5397, 5398,
  2267. 5401, 5408, 5410, 5413, 5416, 5418, 5441, 5444, 5445, 5446, 5457, 5458, 5460, 5461, 5462, 5465,
  2268. 5466, 5473, 5476, 5477, 5478, 5481, 5504, 5506, 5508, 5509, 5512, 5514, 5520, 5521, 5524, 5525,
  2269. 5526, 5529, 5530, 5536, 5538, 5541, 5633, 5636, 5637, 5638, 5653, 5654, 5656, 5658, 5665, 5670,
  2270. 5696, 5698, 5700, 5701, 5704, 5706, 5713, 5717, 5718, 5720, 5721, 5729, 5732, 5733, 5736, 5737,
  2271. 5738, 5766, 5770, 5778, 5781, 5796, 5801, 6161, 6166, 6181, 6209, 6212, 6214, 6217, 6224, 6229,
  2272. 6232, 6234, 6240, 6241, 6244, 6246, 6249, 6277, 6289, 6292, 6309, 6416, 6418, 6421, 6426, 6433,
  2273. 6437, 6466, 6468, 6469, 6472, 6481, 6484, 6485, 6486, 6489, 6490, 6496, 6501, 6506, 6537, 6545,
  2274. 6546, 6549, 6552, 6561, 6566, 6569, 6665, 6678, 6692, 6694, 6724, 6726, 6729, 6736, 6738, 6741,
  2275. 6744, 6753, 6758, 6761, 6789, 6801, 6806, 6810, 8192, 8194, 8200, 8202, 8213, 8224, 8226, 8229,
  2276. 8232, 8234, 8261, 8273, 8281, 8289, 8293, 8320, 8322, 8328, 8330, 8341, 8352, 8354, 8357, 8360,
  2277. 8362, 8453, 8465, 8468, 8473, 8485, 8514, 8516, 8521, 8533, 8536, 8538, 8545, 8548, 8549, 8550,
  2278. 8581, 8592, 8598, 8601, 8613, 8705, 8712, 8714, 8721, 8725, 8736, 8738, 8744, 8746, 8773, 8785,
  2279. 8790, 8793, 8805, 8833, 8840, 8842, 8849, 8853, 8864, 8866, 8872, 8874, 9221, 9236, 9238, 9241,
  2280. 9253, 9284, 9285, 9286, 9289, 9298, 9301, 9304, 9306, 9318, 9349, 9361, 9364, 9369, 9377, 9381,
  2281. 9481, 9493, 9505, 9513, 9536, 9541, 9544, 9553, 9556, 9557, 9561, 9570, 9573, 9576, 9609, 9616,
  2282. 9620, 9621, 9624, 9626, 9633, 9636, 9638, 9641, 9733, 9744, 9746, 9753, 9765, 9793, 9801, 9813,
  2283. 9824, 9825, 9833, 9860, 9862, 9872, 9882, 10240, 10242, 10248, 10250, 10261, 10272, 10274, 10280, 10282,
  2284. 10309, 10321, 10324, 10341, 10368, 10370, 10376, 10378, 10400, 10402, 10408, 10410, 10505, 10513, 10516, 10521,
  2285. 10533, 10566, 10569, 10578, 10581, 10593, 10596, 10598, 10601, 10629, 10640, 10646, 10649, 10660, 10661, 10752,
  2286. 10754, 10760, 10762, 10784, 10786, 10792, 10794, 10821, 10833, 10838, 10841, 10853, 10880, 10882, 10888, 10890,
  2287. 10901, 10912, 10914, 10920, 10922, 16389, 16401, 16406, 16421, 16457, 16466, 16469, 16472, 16474, 16481, 16484,
  2288. 16486, 16532, 16537, 16545, 16550, 16640, 16641, 16644, 16646, 16649, 16658, 16661, 16662, 16664, 16666, 16673,
  2289. 16678, 16681, 16709, 16712, 16714, 16721, 16724, 16725, 16726, 16729, 16730, 16741, 16744, 16746, 16769, 16772,
  2290. 16774, 16784, 16786, 16789, 16800, 16801, 16802, 16901, 16913, 16916, 16918, 16933, 16961, 16978, 16981, 16986,
  2291. 16996, 17001, 17033, 17044, 17061, 17409, 17429, 17433, 17449, 17477, 17480, 17482, 17489, 17492, 17493, 17494,
  2292. 17505, 17506, 17509, 17512, 17514, 17537, 17542, 17545, 17552, 17554, 17557, 17568, 17569, 17577, 17665, 17666,
  2293. 17669, 17674, 17681, 17684, 17685, 17686, 17689, 17696, 17701, 17706, 17729, 17732, 17733, 17734, 17737, 17744,
  2294. 17745, 17748, 17749, 17750, 17752, 17753, 17761, 17764, 17765, 17766, 17769, 17794, 17796, 17797, 17800, 17809,
  2295. 17812, 17813, 17814, 17817, 17818, 17829, 17832, 17834, 17921, 17925, 17929, 17940, 17941, 17944, 17946, 17953,
  2296. 17956, 17961, 17984, 17986, 17989, 17992, 18000, 18001, 18002, 18005, 18006, 18009, 18018, 18021, 18024, 18049,
  2297. 18053, 18058, 18068, 18069, 18081, 18084, 18086, 18437, 18449, 18453, 18458, 18469, 18498, 18505, 18512, 18517,
  2298. 18520, 18529, 18532, 18534, 18537, 18565, 18577, 18580, 18582, 18585, 18597, 18689, 18693, 18694, 18698, 18704,
  2299. 18708, 18709, 18712, 18721, 18724, 18726, 18752, 18757, 18762, 18769, 18770, 18772, 18773, 18774, 18777, 18784,
  2300. 18786, 18789, 18790, 18794, 18822, 18825, 18834, 18837, 18838, 18840, 18849, 18852, 18854, 18857, 18966, 19012,
  2301. 19014, 19017, 19029, 19032, 19034, 19044, 19049, 19092, 19109, 20481, 20484, 20485, 20486, 20489, 20498, 20501,
  2302. 20506, 20513, 20516, 20521, 20544, 20549, 20552, 20561, 20564, 20565, 20566, 20569, 20581, 20584, 20614, 20617,
  2303. 20629, 20632, 20640, 20641, 20646, 20649, 20741, 20744, 20745, 20746, 20753, 20756, 20757, 20758, 20760, 20761,
  2304. 20768, 20773, 20774, 20776, 20778, 20801, 20804, 20805, 20806, 20809, 20816, 20817, 20818, 20820, 20821, 20822,
  2305. 20824, 20825, 20826, 20833, 20836, 20837, 20838, 20841, 20866, 20869, 20881, 20884, 20885, 20886, 20889, 20896,
  2306. 20901, 20906, 20993, 20998, 21010, 21013, 21018, 21025, 21028, 21058, 21061, 21066, 21073, 21076, 21077, 21078,
  2307. 21081, 21090, 21093, 21125, 21136, 21138, 21141, 21145, 21146, 21156, 21508, 21509, 21521, 21524, 21525, 21526,
  2308. 21528, 21529, 21537, 21541, 21544, 21546, 21569, 21572, 21573, 21574, 21577, 21578, 21584, 21585, 21588, 21589,
  2309. 21590, 21592, 21593, 21594, 21601, 21602, 21604, 21605, 21606, 21609, 21632, 21640, 21642, 21649, 21652, 21653,
  2310. 21654, 21657, 21665, 21668, 21669, 21674, 21761, 21762, 21764, 21765, 21766, 21769, 21776, 21777, 21778, 21780,
  2311. 21781, 21782, 21785, 21786, 21793, 21796, 21797, 21798, 21801, 21824, 21825, 21826, 21828, 21829, 21830, 21832,
  2312. 21833, 21840, 21841, 21842, 21844, 21845, 21846, 21848, 21849, 21850, 21856, 21857, 21860, 21861, 21862, 21864,
  2313. 21865, 21866, 21889, 21892, 21893, 21897, 21898, 21904, 21905, 21908, 21909, 21910, 21912, 21913, 21921, 21924,
  2314. 21925, 21926, 21929, 22016, 22017, 22018, 22020, 22022, 22024, 22025, 22033, 22036, 22037, 22040, 22041, 22048,
  2315. 22049, 22050, 22052, 22053, 22054, 22056, 22057, 22081, 22085, 22086, 22088, 22089, 22090, 22096, 22097, 22098,
  2316. 22100, 22101, 22102, 22104, 22105, 22106, 22113, 22116, 22117, 22121, 22146, 22149, 22150, 22152, 22153, 22154,
  2317. 22161, 22165, 22170, 22178, 22181, 22182, 22184, 22185, 22532, 22533, 22534, 22537, 22544, 22549, 22552, 22561,
  2318. 22570, 22597, 22600, 22602, 22609, 22612, 22613, 22614, 22616, 22617, 22624, 22626, 22628, 22629, 22658, 22665,
  2319. 22672, 22674, 22677, 22680, 22689, 22697, 22785, 22786, 22789, 22794, 22801, 22804, 22805, 22806, 22809, 22821,
  2320. 22849, 22852, 22853, 22854, 22857, 22864, 22865, 22866, 22868, 22869, 22870, 22872, 22873, 22874, 22881, 22884,
  2321. 22885, 22886, 22889, 22913, 22917, 22921, 22929, 22932, 22933, 22934, 22936, 22937, 22949, 23044, 23048, 23061,
  2322. 23066, 23072, 23077, 23078, 23081, 23109, 23112, 23113, 23121, 23125, 23126, 23128, 23129, 23138, 23141, 23144,
  2323. 23146, 23169, 23178, 23186, 23189, 23190, 23192, 23194, 23201, 24581, 24596, 24598, 24601, 24613, 24644, 24656,
  2324. 24661, 24662, 24664, 24666, 24673, 24676, 24678, 24681, 24705, 24726, 24741, 24833, 24836, 24838, 24841, 24850,
  2325. 24853, 24865, 24866, 24870, 24873, 24901, 24905, 24913, 24917, 24918, 24921, 24933, 24934, 24938, 24964, 24970,
  2326. 24978, 24981, 24993, 24998, 25001, 25105, 25110, 25113, 25152, 25153, 25158, 25173, 25174, 25176, 25184, 25221,
  2327. 25233, 25238, 25253, 25617, 25618, 25621, 25622, 25626, 25633, 25638, 25641, 25664, 25666, 25669, 25672, 25674,
  2328. 25681, 25684, 25685, 25686, 25689, 25690, 25696, 25698, 25701, 25732, 25733, 25737, 25744, 25746, 25748, 25749,
  2329. 25750, 25752, 25754, 25761, 25764, 25769, 25861, 25864, 25866, 25873, 25877, 25878, 25881, 25924, 25925, 25926,
  2330. 25929, 25936, 25937, 25940, 25941, 25942, 25945, 25953, 25956, 25957, 25958, 25961, 25990, 25993, 25994, 26001,
  2331. 26005, 26006, 26009, 26010, 26018, 26021, 26022, 26024, 26114, 26121, 26133, 26144, 26150, 26152, 26153, 26176,
  2332. 26181, 26184, 26186, 26193, 26196, 26197, 26198, 26200, 26202, 26208, 26213, 26216, 26240, 26242, 26245, 26250,
  2333. 26260, 26262, 26264, 26265, 26272, 26276, 26278, 26282, 26646, 26649, 26661, 26689, 26706, 26709, 26714, 26721,
  2334. 26729, 26757, 26769, 26776, 26790, 26881, 26884, 26896, 26901, 26913, 26916, 26918, 26921, 26944, 26945, 26949,
  2335. 26950, 26952, 26961, 26964, 26965, 26966, 26969, 26976, 26981, 26986, 27010, 27012, 27018, 27029, 27041, 27044,
  2336. 27045, 27049, 27153, 27158, 27160, 27201, 27204, 27209, 27216, 27221, 27224, 27226, 27236, 27237, 27241, 27270,
  2337. 27284, 27288, 27290, 27302, 32768, 32770, 32776, 32778, 32800, 32802, 32808, 32810, 32837, 32848, 32849, 32852,
  2338. 32854, 32857, 32869, 32896, 32898, 32904, 32906, 32917, 32928, 32930, 32936, 32938, 33029, 33041, 33044, 33046,
  2339. 33049, 33061, 33089, 33092, 33097, 33104, 33106, 33109, 33110, 33112, 33113, 33124, 33126, 33129, 33157, 33161,
  2340. 33172, 33174, 33177, 33189, 33280, 33282, 33288, 33290, 33301, 33312, 33314, 33320, 33322, 33361, 33364, 33369,
  2341. 33381, 33408, 33410, 33416, 33418, 33429, 33440, 33442, 33448, 33450, 33812, 33817, 33857, 33860, 33873, 33877,
  2342. 33882, 33889, 33892, 33897, 33940, 33945, 34049, 34057, 34066, 34069, 34074, 34086, 34089, 34112, 34113, 34117,
  2343. 34120, 34129, 34132, 34133, 34134, 34137, 34138, 34149, 34150, 34152, 34154, 34177, 34180, 34182, 34185, 34192,
  2344. 34194, 34197, 34200, 34214, 34321, 34326, 34329, 34341, 34369, 34372, 34377, 34378, 34384, 34389, 34393, 34394,
  2345. 34401, 34406, 34410, 34437, 34449, 34458, 34468, 34816, 34818, 34824, 34826, 34837, 34848, 34850, 34856, 34858,
  2346. 34881, 34885, 34897, 34900, 34905, 34917, 34921, 34944, 34946, 34952, 34954, 34965, 34976, 34978, 34984, 34986,
  2347. 35077, 35078, 35089, 35092, 35094, 35109, 35137, 35140, 35142, 35145, 35152, 35154, 35157, 35162, 35169, 35172,
  2348. 35205, 35222, 35225, 35237, 35328, 35330, 35336, 35338, 35349, 35360, 35362, 35368, 35370, 35397, 35409, 35412,
  2349. 35414, 35456, 35458, 35464, 35466, 35477, 35488, 35490, 35496, 35498, 36869, 36881, 36886, 36888, 36889, 36901,
  2350. 36929, 36934, 36937, 36949, 36952, 36954, 36969, 36970, 36997, 37009, 37012, 37014, 37017, 37029, 37121, 37124,
  2351. 37126, 37129, 37136, 37141, 37144, 37146, 37153, 37156, 37158, 37161, 37184, 37189, 37200, 37201, 37204, 37205,
  2352. 37206, 37209, 37218, 37221, 37252, 37254, 37266, 37269, 37272, 37281, 37284, 37286, 37289, 37381, 37393, 37396,
  2353. 37401, 37413, 37444, 37446, 37449, 37456, 37458, 37461, 37464, 37478, 37481, 37509, 37524, 37526, 37545, 37889,
  2354. 37892, 37894, 37904, 37909, 37912, 37926, 37952, 37962, 37969, 37972, 37973, 37974, 37976, 37977, 37984, 37985,
  2355. 37986, 37989, 38020, 38022, 38034, 38036, 38037, 38040, 38049, 38057, 38144, 38149, 38152, 38154, 38160, 38161,
  2356. 38164, 38165, 38166, 38169, 38177, 38181, 38185, 38186, 38209, 38212, 38213, 38214, 38217, 38224, 38225, 38226,
  2357. 38228, 38229, 38230, 38232, 38233, 38234, 38241, 38244, 38245, 38246, 38249, 38273, 38277, 38280, 38289, 38290,
  2358. 38292, 38293, 38294, 38297, 38298, 38304, 38306, 38309, 38312, 38314, 38401, 38404, 38416, 38421, 38425, 38432,
  2359. 38438, 38441, 38469, 38472, 38473, 38481, 38482, 38485, 38486, 38489, 38501, 38504, 38530, 38532, 38537, 38538,
  2360. 38546, 38548, 38549, 38564, 38566, 38569, 38917, 38934, 38937, 38949, 38977, 38982, 38992, 38994, 38997, 38998,
  2361. 39002, 39012, 39013, 39045, 39057, 39062, 39065, 39077, 39172, 39174, 39177, 39184, 39186, 39189, 39192, 39194,
  2362. 39200, 39201, 39204, 39206, 39232, 39234, 39237, 39240, 39242, 39249, 39252, 39253, 39254, 39257, 39266, 39269,
  2363. 39270, 39274, 39297, 39300, 39312, 39314, 39317, 39322, 39329, 39334, 39429, 39445, 39461, 39492, 39494, 39497,
  2364. 39504, 39509, 39512, 39521, 39557, 39569, 39572, 39573, 39574, 40960, 40962, 40968, 40970, 40981, 40992, 40994,
  2365. 41000, 41002, 41029, 41041, 41044, 41046, 41049, 41088, 41090, 41096, 41098, 41109, 41120, 41122, 41128, 41130,
  2366. 41221, 41225, 41233, 41236, 41238, 41241, 41242, 41286, 41289, 41297, 41301, 41304, 41306, 41313, 41316, 41349,
  2367. 41360, 41362, 41366, 41369, 41474, 41480, 41482, 41488, 41497, 41506, 41512, 41514, 41541, 41553, 41558, 41561,
  2368. 41573, 41600, 41602, 41608, 41610, 41621, 41632, 41634, 41640, 41642, 42009, 42021, 42049, 42052, 42064, 42068,
  2369. 42069, 42072, 42074, 42081, 42085, 42086, 42088, 42089, 42117, 42246, 42249, 42256, 42258, 42261, 42264, 42278,
  2370. 42281, 42306, 42309, 42321, 42324, 42325, 42326, 42329, 42341, 42346, 42369, 42372, 42373, 42374, 42377, 42386,
  2371. 42389, 42392, 42501, 42513, 42518, 42522, 42529, 42533, 42564, 42566, 42570, 42578, 42581, 42582, 42584, 42592,
  2372. 42594, 42630, 42640, 42645, 42646, 42649, 42657, 42660, 42662, 43008, 43010, 43016, 43018, 43040, 43042, 43048,
  2373. 43050, 43089, 43092, 43094, 43097, 43136, 43138, 43144, 43146, 43157, 43168, 43170, 43176, 43178, 43269, 43284,
  2374. 43289, 43297, 43301, 43329, 43344, 43349, 43354, 43361, 43366, 43369, 43408, 43414, 43520, 43522, 43528, 43530,
  2375. 43552, 43554, 43560, 43562, 43601, 43604, 43606, 43648, 43650, 43656, 43658, 43669, 43680, 43682, 43688, 43690,
  2376. };
  2377. static const uint16_t kgrid_2bit_1024[1024] = {
  2378. 0, 2, 5, 8, 10, 17, 20, 22, 25, 32, 34, 37, 40, 65, 68, 70,
  2379. 73, 80, 82, 85, 88, 97, 100, 102, 105, 128, 130, 133, 136, 145, 148, 160,
  2380. 165, 170, 257, 260, 262, 265, 272, 274, 277, 280, 289, 292, 320, 322, 325, 328,
  2381. 337, 340, 342, 345, 352, 357, 360, 385, 388, 400, 402, 405, 417, 420, 512, 514,
  2382. 517, 520, 529, 532, 544, 554, 577, 580, 582, 585, 592, 597, 640, 645, 650, 660,
  2383. 674, 1025, 1028, 1030, 1033, 1040, 1042, 1045, 1048, 1057, 1060, 1062, 1065, 1088, 1090, 1093,
  2384. 1096, 1098, 1105, 1108, 1110, 1113, 1120, 1122, 1125, 1153, 1156, 1158, 1161, 1168, 1173, 1176,
  2385. 1185, 1188, 1280, 1282, 1285, 1288, 1290, 1297, 1300, 1302, 1305, 1312, 1317, 1320, 1345, 1348,
  2386. 1350, 1353, 1360, 1362, 1365, 1368, 1377, 1380, 1408, 1410, 1413, 1416, 1425, 1428, 1440, 1537,
  2387. 1540, 1542, 1545, 1552, 1557, 1600, 1605, 1608, 1617, 1620, 1632, 1665, 1668, 1680, 2048, 2050,
  2388. 2053, 2056, 2065, 2068, 2070, 2073, 2080, 2085, 2090, 2113, 2116, 2118, 2121, 2128, 2130, 2133,
  2389. 2136, 2145, 2148, 2176, 2181, 2196, 2218, 2305, 2308, 2320, 2322, 2325, 2328, 2337, 2368, 2373,
  2390. 2376, 2385, 2388, 2400, 2433, 2448, 2560, 2577, 2580, 2594, 2600, 2602, 2640, 2713, 4097, 4100,
  2391. 4102, 4105, 4112, 4114, 4117, 4120, 4129, 4132, 4134, 4160, 4162, 4165, 4168, 4177, 4180, 4182,
  2392. 4185, 4192, 4194, 4197, 4200, 4225, 4228, 4230, 4240, 4245, 4248, 4257, 4260, 4352, 4354, 4357,
  2393. 4360, 4362, 4369, 4372, 4374, 4377, 4384, 4386, 4389, 4392, 4417, 4420, 4422, 4425, 4432, 4434,
  2394. 4437, 4440, 4449, 4452, 4480, 4482, 4485, 4488, 4497, 4500, 4609, 4612, 4617, 4624, 4629, 4641,
  2395. 4644, 4672, 4677, 4689, 4692, 4737, 4740, 4752, 5120, 5122, 5125, 5128, 5137, 5140, 5142, 5145,
  2396. 5152, 5157, 5160, 5185, 5188, 5190, 5193, 5200, 5202, 5205, 5208, 5217, 5220, 5248, 5250, 5253,
  2397. 5256, 5265, 5268, 5280, 5377, 5380, 5382, 5385, 5392, 5394, 5397, 5400, 5409, 5412, 5440, 5442,
  2398. 5445, 5448, 5457, 5460, 5472, 5505, 5508, 5520, 5632, 5637, 5640, 5649, 5652, 5664, 5697, 5700,
  2399. 5712, 5760, 5802, 6145, 6148, 6150, 6153, 6160, 6165, 6168, 6177, 6208, 6210, 6213, 6216, 6225,
  2400. 6228, 6240, 6273, 6276, 6400, 6402, 6405, 6408, 6417, 6420, 6432, 6465, 6468, 6480, 6505, 6562,
  2401. 6660, 6672, 6720, 6742, 8192, 8194, 8197, 8200, 8209, 8212, 8214, 8217, 8224, 8229, 8234, 8257,
  2402. 8260, 8272, 8274, 8277, 8292, 8320, 8330, 8340, 8362, 8449, 8452, 8464, 8466, 8469, 8481, 8512,
  2403. 8514, 8517, 8529, 8532, 8544, 8577, 8580, 8592, 8704, 8714, 8738, 8744, 8746, 8772, 8784, 8840,
  2404. 8842, 8872, 9217, 9220, 9222, 9225, 9232, 9237, 9240, 9249, 9252, 9280, 9282, 9285, 9288, 9297,
  2405. 9300, 9312, 9345, 9348, 9360, 9472, 9477, 9480, 9489, 9492, 9504, 9537, 9540, 9552, 9574, 9600,
  2406. 9729, 9732, 9744, 9792, 9817, 10240, 10245, 10257, 10260, 10305, 10308, 10320, 10378, 10410, 10497, 10500,
  2407. 10512, 10645, 10762, 10786, 10852, 10888, 10890, 16385, 16388, 16390, 16393, 16400, 16402, 16405, 16408, 16410,
  2408. 16417, 16420, 16422, 16448, 16450, 16453, 16456, 16458, 16465, 16468, 16470, 16473, 16480, 16482, 16485, 16513,
  2409. 16516, 16528, 16533, 16536, 16545, 16548, 16640, 16642, 16645, 16648, 16657, 16660, 16662, 16665, 16672, 16674,
  2410. 16677, 16705, 16708, 16710, 16713, 16720, 16722, 16725, 16728, 16737, 16740, 16768, 16770, 16773, 16776, 16785,
  2411. 16788, 16800, 16897, 16900, 16912, 16914, 16917, 16920, 16932, 16960, 16965, 16968, 16977, 16980, 16992, 17025,
  2412. 17028, 17408, 17410, 17413, 17416, 17418, 17425, 17428, 17430, 17433, 17440, 17442, 17445, 17448, 17473, 17476,
  2413. 17478, 17481, 17488, 17490, 17493, 17496, 17505, 17508, 17536, 17538, 17541, 17544, 17553, 17556, 17568, 17665,
  2414. 17668, 17670, 17673, 17680, 17682, 17685, 17688, 17697, 17700, 17728, 17730, 17733, 17736, 17745, 17748, 17760,
  2415. 17770, 17793, 17796, 17808, 17920, 17922, 17925, 17928, 17937, 17940, 17952, 17985, 17988, 18000, 18048, 18085,
  2416. 18433, 18436, 18441, 18448, 18450, 18453, 18456, 18465, 18468, 18496, 18498, 18501, 18504, 18513, 18516, 18528,
  2417. 18564, 18576, 18688, 18690, 18693, 18696, 18705, 18708, 18720, 18753, 18756, 18768, 18816, 18838, 18945, 18948,
  2418. 18960, 19008, 20480, 20482, 20485, 20488, 20497, 20500, 20502, 20505, 20512, 20514, 20517, 20520, 20545, 20548,
  2419. 20550, 20553, 20560, 20562, 20565, 20568, 20577, 20580, 20608, 20610, 20613, 20616, 20625, 20628, 20737, 20740,
  2420. 20742, 20745, 20752, 20754, 20757, 20760, 20769, 20772, 20800, 20802, 20805, 20808, 20817, 20820, 20832, 20865,
  2421. 20868, 20880, 20992, 20997, 21000, 21009, 21012, 21024, 21057, 21060, 21072, 21097, 21120, 21505, 21508, 21510,
  2422. 21513, 21520, 21522, 21525, 21528, 21537, 21540, 21568, 21570, 21573, 21576, 21585, 21588, 21600, 21633, 21636,
  2423. 21648, 21760, 21762, 21765, 21768, 21777, 21780, 21792, 21825, 21828, 21840, 21888, 22017, 22020, 22032, 22054,
  2424. 22080, 22528, 22530, 22533, 22536, 22545, 22548, 22560, 22593, 22596, 22608, 22618, 22656, 22785, 22788, 22800,
  2425. 22848, 23040, 23065, 23173, 23208, 24577, 24580, 24582, 24592, 24594, 24597, 24600, 24609, 24612, 24640, 24645,
  2426. 24648, 24657, 24660, 24672, 24708, 24720, 24832, 24834, 24837, 24840, 24849, 24852, 24864, 24897, 24900, 24912,
  2427. 24960, 24985, 25092, 25104, 25152, 25174, 25249, 25600, 25605, 25608, 25617, 25620, 25632, 25665, 25668, 25680,
  2428. 25728, 25857, 25860, 25872, 25920, 25930, 25960, 26002, 26112, 26260, 26625, 26628, 26640, 26725, 26776, 26880,
  2429. 26922, 27202, 27297, 32768, 32770, 32773, 32776, 32785, 32788, 32793, 32800, 32805, 32833, 32836, 32848, 32850,
  2430. 32853, 32856, 32865, 32896, 32901, 32913, 32916, 33025, 33028, 33033, 33040, 33042, 33045, 33048, 33057, 33060,
  2431. 33088, 33090, 33093, 33096, 33105, 33108, 33153, 33156, 33168, 33193, 33280, 33285, 33290, 33297, 33300, 33345,
  2432. 33348, 33360, 33793, 33796, 33798, 33801, 33808, 33810, 33813, 33816, 33825, 33856, 33858, 33861, 33864, 33873,
  2433. 33876, 33888, 33921, 33924, 33936, 34048, 34050, 34053, 34056, 34065, 34068, 34080, 34113, 34116, 34128, 34176,
  2434. 34186, 34305, 34308, 34320, 34345, 34368, 34816, 34821, 34833, 34836, 34881, 34884, 34896, 34978, 35073, 35076,
  2435. 35136, 35173, 35362, 35416, 35418, 35458, 35490, 36865, 36868, 36873, 36880, 36882, 36885, 36888, 36900, 36928,
  2436. 36930, 36933, 36936, 36945, 36948, 36960, 36993, 36996, 37008, 37120, 37125, 37137, 37140, 37185, 37188, 37200,
  2437. 37210, 37377, 37380, 37392, 37440, 37542, 37888, 37890, 37893, 37896, 37905, 37908, 37920, 37953, 37956, 37968,
  2438. 38016, 38038, 38145, 38148, 38160, 38208, 38296, 38305, 38400, 38470, 38500, 38913, 38916, 38928, 38950, 38976,
  2439. 39081, 39168, 39241, 39250, 39568, 40960, 40965, 40970, 40980, 40994, 41002, 41025, 41028, 41040, 41122, 41130,
  2440. 41280, 41317, 41474, 41482, 41506, 41512, 41514, 41602, 41608, 41610, 41640, 41985, 41988, 42000, 42048, 42121,
  2441. 42148, 42240, 42265, 42577, 43018, 43048, 43170, 43348, 43398, 43528, 43530, 43552, 43554, 43560, 43656, 43690,
  2442. };
  2443. const int kmap_size = 43692;
  2444. //const int nwant = type == GGML_TYPE_IQ1_S ? 3 : 2;
  2445. const int nwant = type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M ? 3 : type == GGML_TYPE_IQ2_S ? 1 : 2;
  2446. const uint16_t * kgrid = type == GGML_TYPE_IQ2_XXS ? kgrid_2bit_256 :
  2447. type == GGML_TYPE_IQ2_XS ? kgrid_2bit_512 :
  2448. type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M ? kgrid_1bit_2048 : kgrid_2bit_1024;
  2449. uint64_t * kgrid_q2xs;
  2450. int * kmap_q2xs;
  2451. uint16_t * kneighbors_q2xs;
  2452. //printf("================================================================= %s(grid_size = %d)\n", __func__, grid_size);
  2453. uint64_t * the_grid = (uint64_t *)malloc(grid_size*sizeof(uint64_t));
  2454. for (int k = 0; k < grid_size; ++k) {
  2455. int8_t * pos = (int8_t *)(the_grid + k);
  2456. for (int i = 0; i < 8; ++i) {
  2457. int l = (kgrid[k] >> 2*i) & 0x3;
  2458. pos[i] = 2*l + 1;
  2459. }
  2460. }
  2461. kgrid_q2xs = the_grid;
  2462. iq2_data[gindex].grid = the_grid;
  2463. kmap_q2xs = (int *)malloc(kmap_size*sizeof(int));
  2464. iq2_data[gindex].map = kmap_q2xs;
  2465. for (int i = 0; i < kmap_size; ++i) kmap_q2xs[i] = -1;
  2466. uint64_t aux64;
  2467. uint8_t * aux8 = (uint8_t *)&aux64;
  2468. for (int i = 0; i < grid_size; ++i) {
  2469. aux64 = kgrid_q2xs[i];
  2470. uint16_t index = 0;
  2471. for (int k=0; k<8; ++k) {
  2472. uint16_t q = (aux8[k] - 1)/2;
  2473. index |= (q << 2*k);
  2474. }
  2475. kmap_q2xs[index] = i;
  2476. }
  2477. int8_t pos[8];
  2478. int * dist2 = (int *)malloc(2*grid_size*sizeof(int));
  2479. int num_neighbors = 0, num_not_in_map = 0;
  2480. for (int i = 0; i < kmap_size; ++i) {
  2481. if (kmap_q2xs[i] >= 0) continue;
  2482. ++num_not_in_map;
  2483. for (int k = 0; k < 8; ++k) {
  2484. int l = (i >> 2*k) & 0x3;
  2485. pos[k] = 2*l + 1;
  2486. }
  2487. for (int j = 0; j < grid_size; ++j) {
  2488. const int8_t * pg = (const int8_t *)(kgrid_q2xs + j);
  2489. int d2 = 0;
  2490. for (int k = 0; k < 8; ++k) d2 += (pg[k] - pos[k])*(pg[k] - pos[k]);
  2491. dist2[2*j+0] = d2;
  2492. dist2[2*j+1] = j;
  2493. }
  2494. qsort(dist2, grid_size, 2*sizeof(int), iq2_compare_func);
  2495. int n = 0; int d2 = dist2[0];
  2496. int nhave = 1;
  2497. for (int j = 0; j < grid_size; ++j) {
  2498. if (dist2[2*j] > d2) {
  2499. if (nhave == nwant) break;
  2500. d2 = dist2[2*j];
  2501. ++nhave;
  2502. }
  2503. ++n;
  2504. }
  2505. num_neighbors += n;
  2506. }
  2507. //printf("%s: %d neighbours in total\n", __func__, num_neighbors);
  2508. kneighbors_q2xs = (uint16_t *)malloc((num_neighbors + num_not_in_map)*sizeof(uint16_t));
  2509. iq2_data[gindex].neighbours = kneighbors_q2xs;
  2510. int counter = 0;
  2511. for (int i = 0; i < kmap_size; ++i) {
  2512. if (kmap_q2xs[i] >= 0) continue;
  2513. for (int k = 0; k < 8; ++k) {
  2514. int l = (i >> 2*k) & 0x3;
  2515. pos[k] = 2*l + 1;
  2516. }
  2517. for (int j = 0; j < grid_size; ++j) {
  2518. const int8_t * pg = (const int8_t *)(kgrid_q2xs + j);
  2519. int d2 = 0;
  2520. for (int k = 0; k < 8; ++k) d2 += (pg[k] - pos[k])*(pg[k] - pos[k]);
  2521. dist2[2*j+0] = d2;
  2522. dist2[2*j+1] = j;
  2523. }
  2524. qsort(dist2, grid_size, 2*sizeof(int), iq2_compare_func);
  2525. kmap_q2xs[i] = -(counter + 1);
  2526. int d2 = dist2[0];
  2527. uint16_t * start = &kneighbors_q2xs[counter++];
  2528. int n = 0, nhave = 1;
  2529. for (int j = 0; j < grid_size; ++j) {
  2530. if (dist2[2*j] > d2) {
  2531. if (nhave == nwant) break;
  2532. d2 = dist2[2*j];
  2533. ++nhave;
  2534. }
  2535. kneighbors_q2xs[counter++] = dist2[2*j+1];
  2536. ++n;
  2537. }
  2538. *start = n;
  2539. }
  2540. free(dist2);
  2541. }
  2542. void iq2xs_free_impl(enum ggml_type type) {
  2543. GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M || type == GGML_TYPE_IQ2_S);
  2544. const int gindex = iq2_data_index(type);
  2545. if (iq2_data[gindex].grid) {
  2546. free(iq2_data[gindex].grid); iq2_data[gindex].grid = NULL;
  2547. free(iq2_data[gindex].map); iq2_data[gindex].map = NULL;
  2548. free(iq2_data[gindex].neighbours); iq2_data[gindex].neighbours = NULL;
  2549. }
  2550. }
  2551. static int iq2_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
  2552. const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
  2553. int num_neighbors = neighbours[0];
  2554. GGML_ASSERT(num_neighbors > 0);
  2555. float best_d2 = FLT_MAX;
  2556. int grid_index = -1;
  2557. for (int j = 1; j <= num_neighbors; ++j) {
  2558. const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
  2559. float d2 = 0;
  2560. for (int i = 0; i < 8; ++i) {
  2561. float q = pg[i];
  2562. float diff = scale*q - xval[i];
  2563. d2 += weight[i]*diff*diff;
  2564. }
  2565. if (d2 < best_d2) {
  2566. best_d2 = d2; grid_index = neighbours[j];
  2567. }
  2568. }
  2569. GGML_ASSERT(grid_index >= 0);
  2570. const int8_t * pg = (const int8_t *)(grid + grid_index);
  2571. for (int i = 0; i < 8; ++i) L[i] = (pg[i] - 1)/2;
  2572. return grid_index;
  2573. }
  2574. static void quantize_row_iq2_xxs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
  2575. const int gindex = iq2_data_index(GGML_TYPE_IQ2_XXS);
  2576. const uint64_t * kgrid_q2xs = iq2_data[gindex].grid;
  2577. const int * kmap_q2xs = iq2_data[gindex].map;
  2578. const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
  2579. GGML_ASSERT(quant_weights && "missing quantization weights");
  2580. GGML_ASSERT(kgrid_q2xs && "forgot to call ggml_quantize_init()?");
  2581. GGML_ASSERT(kmap_q2xs && "forgot to call ggml_quantize_init()?");
  2582. GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
  2583. GGML_ASSERT(n%QK_K == 0);
  2584. const int kMaxQ = 3;
  2585. const int64_t nbl = n/QK_K;
  2586. block_iq2_xxs * y = vy;
  2587. float scales[QK_K/32];
  2588. float weight[32];
  2589. float xval[32];
  2590. int8_t L[32];
  2591. int8_t Laux[32];
  2592. float waux[32];
  2593. uint8_t block_signs[4];
  2594. uint32_t q2[2*(QK_K/32)];
  2595. for (int ibl = 0; ibl < nbl; ++ibl) {
  2596. y[ibl].d = GGML_FP32_TO_FP16(0.f);
  2597. memset(q2, 0, QK_K/4);
  2598. float max_scale = 0;
  2599. const float * xbl = x + QK_K*ibl;
  2600. float sumx2 = 0;
  2601. for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
  2602. float sigma2 = sumx2/QK_K;
  2603. for (int ib = 0; ib < QK_K/32; ++ib) {
  2604. const float * xb = xbl + 32*ib;
  2605. const float * qw = quant_weights + QK_K*ibl + 32*ib;
  2606. for (int i = 0; i < 32; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
  2607. for (int i = 0; i < 32; ++i) waux[i] = sqrtf(weight[i]);
  2608. for (int k = 0; k < 4; ++k) {
  2609. int nflip = 0;
  2610. uint8_t s = 0;
  2611. for (int i = 0; i < 8; ++i) {
  2612. if (xb[8*k + i] >= 0) xval[8*k + i] = xb[8*k + i];
  2613. else {
  2614. xval[8*k + i] = -xb[8*k + i]; ++nflip; s |= (1 << i);
  2615. }
  2616. }
  2617. if (nflip%2) {
  2618. int imin = 0; float min = weight[8*k+imin]*xb[8*k+imin]*xb[8*k+imin];
  2619. for (int i = 1; i < 8; ++i) {
  2620. float ax = weight[8*k+i]*xb[8*k+i]*xb[8*k+i];
  2621. if (ax < min) {
  2622. min = ax; imin = i;
  2623. }
  2624. }
  2625. xval[8*k+imin] = -xval[8*k+imin];
  2626. s ^= (1 << imin);
  2627. }
  2628. block_signs[k] = s & 127;
  2629. }
  2630. float max = xval[0];
  2631. for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
  2632. if (max < GROUP_MAX_EPS) {
  2633. scales[ib] = 0;
  2634. memset(L, 0, 32);
  2635. continue;
  2636. }
  2637. float scale = make_qp_quants(32, kMaxQ+1, xval, (uint8_t*)L, weight);
  2638. float eff_max = scale*kMaxQ;
  2639. float best = 0;
  2640. for (int is = -6; is <= 6; ++is) {
  2641. float id = (2*kMaxQ-1+is*0.1f)/eff_max;
  2642. float this_scale = 1/id;
  2643. for (int k = 0; k < 4; ++k) {
  2644. for (int i = 0; i < 8; ++i) {
  2645. int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
  2646. Laux[8*k+i] = MAX(0, MIN(kMaxQ-1, l));
  2647. }
  2648. uint16_t u = 0;
  2649. for (int i = 0; i < 8; ++i) u |= (Laux[8*k+i] << 2*i);
  2650. int grid_index = kmap_q2xs[u];
  2651. if (grid_index < 0) {
  2652. const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
  2653. grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, this_scale, Laux + 8*k);
  2654. }
  2655. }
  2656. float sumqx = 0, sumq2 = 0;
  2657. for (int i = 0; i < 32; ++i) {
  2658. float w = weight[i];
  2659. float q = 2*Laux[i] + 1;
  2660. sumqx += w*xval[i]*q;
  2661. sumq2 += w*q*q;
  2662. }
  2663. if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
  2664. scale = sumqx/sumq2; best = scale*sumqx;
  2665. memcpy(L, Laux, 32);
  2666. }
  2667. }
  2668. if (scale > 0) {
  2669. float id = 1/scale;
  2670. for (int k = 0; k < 4; ++k) {
  2671. uint16_t u = 0;
  2672. for (int i = 0; i < 8; ++i) {
  2673. int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
  2674. l = MAX(0, MIN(kMaxQ-1, l));
  2675. u |= (l << 2*i);
  2676. }
  2677. int grid_index = kmap_q2xs[u];
  2678. if (grid_index < 0) {
  2679. const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
  2680. grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, scale, L + 8*k);
  2681. }
  2682. const int8_t * pg = (const int8_t *)(kgrid_q2xs + grid_index);
  2683. for (int i = 0; i < 8; ++i) L[8*k+i] = (pg[i] - 1)/2;
  2684. }
  2685. float sumqx = 0, sumq2 = 0;
  2686. for (int i = 0; i < 32; ++i) {
  2687. float w = weight[i];
  2688. float q = 2*L[i] + 1;
  2689. sumqx += w*xval[i]*q;
  2690. sumq2 += w*q*q;
  2691. }
  2692. if (sumq2 > 0) scale = sumqx/sumq2;
  2693. }
  2694. if (scale < 0) {
  2695. // This should never happen, but just in case, flip scale so that it is positive (we use uint's to encode the scale)
  2696. // and correspondingly flip quant signs.
  2697. scale = -scale;
  2698. for (int k = 0; k < 4; ++k) block_signs[k] = (~block_signs[k]) & 127;
  2699. }
  2700. for (int k = 0; k < 4; ++k) {
  2701. uint16_t u = 0;
  2702. for (int i = 0; i < 8; ++i) u |= (L[8*k+i] << 2*i);
  2703. int grid_index = kmap_q2xs[u];
  2704. if (grid_index < 0) {
  2705. printf("Oops: found point %u not on grid:", u);
  2706. for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
  2707. printf("\n");
  2708. GGML_ABORT("fatal error");
  2709. }
  2710. q2[2*ib+0] |= ((uint32_t) grid_index << 8*k);
  2711. q2[2*ib+1] |= (block_signs[k] << 7*k);
  2712. }
  2713. GGML_ASSERT(scale >= 0);
  2714. scales[ib] = scale;
  2715. max_scale = MAX(max_scale, scale);
  2716. }
  2717. if (!max_scale) {
  2718. memset(y[ibl].qs, 0, QK_K/4);
  2719. continue;
  2720. }
  2721. float d = max_scale/31;
  2722. y[ibl].d = GGML_FP32_TO_FP16(d);
  2723. float id = 1/d;
  2724. for (int ib = 0; ib < QK_K/32; ++ib) {
  2725. int l = nearest_int(0.5f*(id*scales[ib]-1));
  2726. l = MAX(0, MIN(15, l));
  2727. q2[2*ib+1] |= ((uint32_t)l << 28);
  2728. }
  2729. memcpy(y[ibl].qs, q2, QK_K/4);
  2730. }
  2731. }
  2732. static void quantize_row_iq2_xs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
  2733. const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS);
  2734. const uint64_t * kgrid_q2xs = iq2_data[gindex].grid;
  2735. const int * kmap_q2xs = iq2_data[gindex].map;
  2736. const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
  2737. GGML_ASSERT(quant_weights && "missing quantization weights");
  2738. GGML_ASSERT(kmap_q2xs && "forgot to call ggml_quantize_init()?");
  2739. GGML_ASSERT(kgrid_q2xs && "forgot to call ggml_quantize_init()?");
  2740. GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
  2741. GGML_ASSERT(n%QK_K == 0);
  2742. const int kMaxQ = 3;
  2743. const int64_t nbl = n/QK_K;
  2744. block_iq2_xs * y = vy;
  2745. float scales[QK_K/16];
  2746. float weight[16];
  2747. float xval[16];
  2748. int8_t L[16];
  2749. int8_t Laux[16];
  2750. float waux[16];
  2751. bool is_on_grid[2];
  2752. bool is_on_grid_aux[2];
  2753. uint8_t block_signs[2];
  2754. uint16_t q2[2*(QK_K/16)];
  2755. for (int ibl = 0; ibl < nbl; ++ibl) {
  2756. y[ibl].d = GGML_FP32_TO_FP16(0.f);
  2757. memset(q2, 0, QK_K/4);
  2758. memset(y[ibl].scales, 0, QK_K/32);
  2759. float max_scale = 0;
  2760. const float * xbl = x + QK_K*ibl;
  2761. float sumx2 = 0;
  2762. for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
  2763. float sigma2 = sumx2/QK_K;
  2764. for (int ib = 0; ib < QK_K/16; ++ib) {
  2765. const float * xb = xbl + 16*ib;
  2766. const float * qw = quant_weights + QK_K*ibl + 16*ib;
  2767. for (int i = 0; i < 16; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
  2768. for (int i = 0; i < 16; ++i) waux[i] = sqrtf(weight[i]);
  2769. for (int k = 0; k < 2; ++k) {
  2770. int nflip = 0;
  2771. uint8_t s = 0;
  2772. for (int i = 0; i < 8; ++i) {
  2773. if (xb[8*k + i] >= 0) xval[8*k + i] = xb[8*k + i];
  2774. else {
  2775. xval[8*k + i] = -xb[8*k + i]; ++nflip; s |= (1 << i);
  2776. }
  2777. }
  2778. if (nflip%2) {
  2779. int imin = 0; float min = weight[8*k+imin]*xb[8*k+imin]*xb[8*k+imin];
  2780. for (int i = 1; i < 8; ++i) {
  2781. float ax = weight[8*k+i]*xb[8*k+i]*xb[8*k+i];
  2782. if (ax < min) {
  2783. min = ax; imin = i;
  2784. }
  2785. }
  2786. xval[8*k+imin] = -xval[8*k+imin];
  2787. s ^= (1 << imin);
  2788. }
  2789. block_signs[k] = s & 127;
  2790. }
  2791. float max = xval[0];
  2792. for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
  2793. if (max < GROUP_MAX_EPS) {
  2794. scales[ib] = 0;
  2795. memset(L, 0, 16);
  2796. continue;
  2797. }
  2798. float best = 0;
  2799. float scale = max/(2*kMaxQ-1);
  2800. is_on_grid[0] = is_on_grid[1] = true;
  2801. for (int is = -9; is <= 9; ++is) {
  2802. float id = (2*kMaxQ-1+is*0.1f)/max;
  2803. float this_scale = 1/id;
  2804. for (int k = 0; k < 2; ++k) {
  2805. for (int i = 0; i < 8; ++i) {
  2806. int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
  2807. Laux[8*k+i] = MAX(0, MIN(kMaxQ-1, l));
  2808. }
  2809. uint16_t u = 0;
  2810. for (int i = 0; i < 8; ++i) u |= (Laux[8*k+i] << 2*i);
  2811. int grid_index = kmap_q2xs[u];
  2812. is_on_grid_aux[k] = true;
  2813. if (grid_index < 0) {
  2814. is_on_grid_aux[k] = false;
  2815. const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
  2816. grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, this_scale, Laux + 8*k);
  2817. }
  2818. }
  2819. float sumqx = 0, sumq2 = 0;
  2820. for (int i = 0; i < 16; ++i) {
  2821. float w = weight[i];
  2822. float q = 2*Laux[i] + 1;
  2823. sumqx += w*xval[i]*q;
  2824. sumq2 += w*q*q;
  2825. }
  2826. if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
  2827. scale = sumqx/sumq2; best = scale*sumqx;
  2828. for (int i = 0; i < 16; ++i) L[i] = Laux[i];
  2829. for (int k = 0; k < 2; ++k) is_on_grid[k] = is_on_grid_aux[k];
  2830. }
  2831. }
  2832. int n_not_ongrid = 0;
  2833. for (int k = 0; k < 2; ++k) if (!is_on_grid[k]) ++n_not_ongrid;
  2834. if (n_not_ongrid > 0 && scale > 0) {
  2835. float id = 1/scale;
  2836. for (int k = 0; k < 2; ++k) {
  2837. if (is_on_grid[k]) continue;
  2838. uint16_t u = 0;
  2839. for (int i = 0; i < 8; ++i) {
  2840. int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
  2841. l = MAX(0, MIN(kMaxQ-1, l));
  2842. u |= (l << 2*i);
  2843. L[8*k + i] = l;
  2844. }
  2845. int grid_index = kmap_q2xs[u];
  2846. if (grid_index < 0) {
  2847. const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
  2848. grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, scale, L + 8*k);
  2849. }
  2850. }
  2851. float sumqx = 0, sumq2 = 0;
  2852. for (int i = 0; i < 16; ++i) {
  2853. float w = weight[i];
  2854. float q = 2*L[i] + 1;
  2855. sumqx += w*xval[i]*q;
  2856. sumq2 += w*q*q;
  2857. }
  2858. if (sumq2 > 0) scale = sumqx/sumq2;
  2859. }
  2860. if (scale < 0) {
  2861. scale = -scale;
  2862. for (int k = 0; k < 2; ++k) block_signs[k] = (~block_signs[k]) & 127;
  2863. }
  2864. for (int k = 0; k < 2; ++k) {
  2865. uint16_t u = 0;
  2866. for (int i = 0; i < 8; ++i) u |= (L[8*k+i] << 2*i);
  2867. int grid_index = kmap_q2xs[u];
  2868. if (grid_index < 0) {
  2869. printf("Oops: found point %u not on grid:", u);
  2870. for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
  2871. printf("\n");
  2872. GGML_ABORT("fatal error");
  2873. }
  2874. q2[2*ib+k] = grid_index | (block_signs[k] << 9);
  2875. }
  2876. GGML_ASSERT(scale >= 0);
  2877. scales[ib] = scale;
  2878. max_scale = MAX(max_scale, scale);
  2879. }
  2880. if (!max_scale) {
  2881. memset(y[ibl].qs, 0, QK_K/4);
  2882. continue;
  2883. }
  2884. float d = max_scale/31;
  2885. y[ibl].d = GGML_FP32_TO_FP16(d);
  2886. float id = 1/d;
  2887. for (int ib = 0; ib < QK_K/16; ++ib) {
  2888. int l = nearest_int(0.5f*(id*scales[ib]-1));
  2889. l = MAX(0, MIN(15, l));
  2890. if (ib%2 == 0) y[ibl].scales[ib/2] = l;
  2891. else y[ibl].scales[ib/2] |= (l << 4);
  2892. }
  2893. memcpy(y[ibl].qs, q2, QK_K/4);
  2894. }
  2895. }
  2896. size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  2897. GGML_ASSERT(n_per_row%QK_K == 0);
  2898. int64_t nblock = n_per_row/QK_K;
  2899. char * qrow = (char *)dst;
  2900. for (int64_t row = 0; row < nrow; ++row) {
  2901. quantize_row_iq2_xxs_impl(src, qrow, n_per_row, quant_weights);
  2902. src += n_per_row;
  2903. qrow += nblock*sizeof(block_iq2_xxs);
  2904. }
  2905. return nrow * nblock * sizeof(block_iq2_xxs);
  2906. }
  2907. size_t quantize_iq2_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  2908. GGML_ASSERT(n_per_row%QK_K == 0);
  2909. int64_t nblock = n_per_row/QK_K;
  2910. char * qrow = (char *)dst;
  2911. for (int64_t row = 0; row < nrow; ++row) {
  2912. quantize_row_iq2_xs_impl(src, qrow, n_per_row, quant_weights);
  2913. src += n_per_row;
  2914. qrow += nblock*sizeof(block_iq2_xs);
  2915. }
  2916. return nrow * nblock * sizeof(block_iq2_xs);
  2917. }
  2918. //
  2919. // ============================================= 3-bit using D4 lattice
  2920. //
  2921. typedef struct {
  2922. uint32_t * grid;
  2923. int * map;
  2924. uint16_t * neighbours;
  2925. } iq3_entry_t;
  2926. static iq3_entry_t iq3_data[2] = {
  2927. {NULL, NULL, NULL},
  2928. {NULL, NULL, NULL},
  2929. };
  2930. static inline int iq3_data_index(int grid_size) {
  2931. (void)grid_size;
  2932. GGML_ASSERT(grid_size == 256 || grid_size == 512);
  2933. return grid_size == 256 ? 0 : 1;
  2934. }
  2935. static int iq3_compare_func(const void * left, const void * right) {
  2936. const int * l = (const int *)left;
  2937. const int * r = (const int *)right;
  2938. return l[0] < r[0] ? -1 : l[0] > r[0] ? 1 : l[1] < r[1] ? -1 : l[1] > r[1] ? 1 : 0;
  2939. }
  2940. void iq3xs_init_impl(int grid_size) {
  2941. const int gindex = iq3_data_index(grid_size);
  2942. if (iq3_data[gindex].grid) {
  2943. return;
  2944. }
  2945. static const uint16_t kgrid_256[256] = {
  2946. 0, 2, 4, 9, 11, 15, 16, 18, 25, 34, 59, 61, 65, 67, 72, 74,
  2947. 81, 85, 88, 90, 97, 108, 120, 128, 130, 132, 137, 144, 146, 153, 155, 159,
  2948. 169, 175, 189, 193, 199, 200, 202, 213, 248, 267, 287, 292, 303, 315, 317, 321,
  2949. 327, 346, 362, 413, 436, 456, 460, 462, 483, 497, 513, 515, 520, 522, 529, 531,
  2950. 536, 538, 540, 551, 552, 576, 578, 585, 592, 594, 641, 643, 648, 650, 657, 664,
  2951. 698, 704, 706, 720, 729, 742, 758, 769, 773, 808, 848, 852, 870, 889, 901, 978,
  2952. 992, 1024, 1026, 1033, 1035, 1040, 1042, 1046, 1049, 1058, 1089, 1091, 1093, 1096, 1098, 1105,
  2953. 1112, 1139, 1143, 1144, 1152, 1154, 1161, 1167, 1168, 1170, 1183, 1184, 1197, 1217, 1224, 1228,
  2954. 1272, 1276, 1309, 1323, 1347, 1367, 1377, 1404, 1473, 1475, 1486, 1509, 1537, 1544, 1546, 1553,
  2955. 1555, 1576, 1589, 1594, 1600, 1602, 1616, 1625, 1636, 1638, 1665, 1667, 1672, 1685, 1706, 1722,
  2956. 1737, 1755, 1816, 1831, 1850, 1856, 1862, 1874, 1901, 1932, 1950, 1971, 2011, 2032, 2052, 2063,
  2957. 2077, 2079, 2091, 2095, 2172, 2192, 2207, 2208, 2224, 2230, 2247, 2277, 2308, 2345, 2356, 2389,
  2958. 2403, 2424, 2501, 2504, 2506, 2520, 2570, 2593, 2616, 2624, 2630, 2646, 2669, 2700, 2714, 2746,
  2959. 2754, 2795, 2824, 2835, 2839, 2874, 2882, 2905, 2984, 3028, 3042, 3092, 3108, 3110, 3124, 3153,
  2960. 3185, 3215, 3252, 3288, 3294, 3364, 3397, 3434, 3483, 3523, 3537, 3587, 3589, 3591, 3592, 3610,
  2961. 3626, 3670, 3680, 3722, 3749, 3754, 3776, 3789, 3803, 3824, 3857, 3873, 3904, 3906, 3924, 3992,
  2962. };
  2963. static const uint16_t kgrid_512[512] = {
  2964. 0, 1, 2, 5, 7, 8, 9, 10, 12, 14, 16, 17, 21, 27, 32, 34,
  2965. 37, 39, 41, 43, 48, 50, 57, 60, 63, 64, 65, 66, 68, 72, 73, 77,
  2966. 80, 83, 87, 89, 93, 100, 113, 117, 122, 128, 129, 133, 135, 136, 139, 142,
  2967. 145, 149, 152, 156, 162, 165, 167, 169, 171, 184, 187, 195, 201, 205, 208, 210,
  2968. 217, 219, 222, 228, 232, 234, 247, 249, 253, 256, 267, 271, 273, 276, 282, 288,
  2969. 291, 297, 312, 322, 324, 336, 338, 342, 347, 353, 357, 359, 374, 379, 390, 393,
  2970. 395, 409, 426, 441, 448, 450, 452, 464, 466, 470, 475, 488, 492, 512, 513, 514,
  2971. 516, 520, 521, 523, 525, 527, 528, 530, 537, 540, 542, 556, 558, 561, 570, 576,
  2972. 577, 579, 582, 584, 588, 593, 600, 603, 609, 616, 618, 632, 638, 640, 650, 653,
  2973. 655, 656, 660, 666, 672, 675, 685, 688, 698, 705, 708, 711, 712, 715, 721, 727,
  2974. 728, 732, 737, 754, 760, 771, 773, 778, 780, 793, 795, 802, 806, 808, 812, 833,
  2975. 840, 843, 849, 856, 858, 873, 912, 916, 919, 932, 934, 961, 963, 968, 970, 977,
  2976. 989, 993, 1010, 1016, 1024, 1025, 1027, 1029, 1031, 1032, 1034, 1036, 1038, 1041, 1043, 1047,
  2977. 1048, 1050, 1057, 1059, 1061, 1064, 1066, 1079, 1080, 1083, 1085, 1088, 1090, 1096, 1099, 1103,
  2978. 1106, 1109, 1113, 1116, 1122, 1129, 1153, 1156, 1159, 1169, 1171, 1176, 1183, 1185, 1195, 1199,
  2979. 1209, 1212, 1216, 1218, 1221, 1225, 1234, 1236, 1241, 1243, 1250, 1256, 1270, 1281, 1287, 1296,
  2980. 1299, 1306, 1309, 1313, 1338, 1341, 1348, 1353, 1362, 1375, 1376, 1387, 1400, 1408, 1410, 1415,
  2981. 1425, 1453, 1457, 1477, 1481, 1494, 1496, 1507, 1512, 1538, 1545, 1547, 1549, 1551, 1554, 1561,
  2982. 1563, 1565, 1570, 1572, 1575, 1577, 1587, 1593, 1601, 1603, 1605, 1612, 1617, 1619, 1632, 1648,
  2983. 1658, 1662, 1664, 1674, 1680, 1690, 1692, 1704, 1729, 1736, 1740, 1745, 1747, 1751, 1752, 1761,
  2984. 1763, 1767, 1773, 1787, 1795, 1801, 1806, 1810, 1817, 1834, 1840, 1844, 1857, 1864, 1866, 1877,
  2985. 1882, 1892, 1902, 1915, 1934, 1953, 1985, 1987, 2000, 2002, 2013, 2048, 2052, 2058, 2064, 2068,
  2986. 2071, 2074, 2081, 2088, 2104, 2114, 2119, 2121, 2123, 2130, 2136, 2141, 2147, 2153, 2157, 2177,
  2987. 2179, 2184, 2189, 2193, 2203, 2208, 2223, 2226, 2232, 2244, 2249, 2251, 2256, 2258, 2265, 2269,
  2988. 2304, 2306, 2324, 2335, 2336, 2361, 2373, 2375, 2385, 2418, 2443, 2460, 2480, 2504, 2509, 2520,
  2989. 2531, 2537, 2562, 2568, 2572, 2578, 2592, 2596, 2599, 2602, 2614, 2620, 2625, 2627, 2629, 2634,
  2990. 2641, 2650, 2682, 2688, 2697, 2707, 2712, 2718, 2731, 2754, 2759, 2760, 2775, 2788, 2793, 2805,
  2991. 2811, 2817, 2820, 2832, 2842, 2854, 2890, 2902, 2921, 2923, 2978, 3010, 3012, 3026, 3081, 3083,
  2992. 3085, 3097, 3099, 3120, 3136, 3152, 3159, 3188, 3210, 3228, 3234, 3245, 3250, 3256, 3264, 3276,
  2993. 3281, 3296, 3349, 3363, 3378, 3392, 3395, 3420, 3440, 3461, 3488, 3529, 3531, 3584, 3588, 3591,
  2994. 3600, 3602, 3614, 3616, 3628, 3634, 3650, 3657, 3668, 3683, 3685, 3713, 3716, 3720, 3726, 3729,
  2995. 3736, 3753, 3778, 3802, 3805, 3819, 3841, 3845, 3851, 3856, 3880, 3922, 3938, 3970, 3993, 4032,
  2996. };
  2997. const int kmap_size = 4096;
  2998. const int nwant = grid_size == 256 ? 2 : 3;
  2999. const uint16_t * kgrid = grid_size == 256 ? kgrid_256 : kgrid_512;
  3000. uint32_t * kgrid_q3xs;
  3001. int * kmap_q3xs;
  3002. uint16_t * kneighbors_q3xs;
  3003. //printf("================================================================= %s(grid_size = %d)\n", __func__, grid_size);
  3004. uint32_t * the_grid = (uint32_t *)malloc(grid_size*sizeof(uint32_t));
  3005. for (int k = 0; k < grid_size; ++k) {
  3006. int8_t * pos = (int8_t *)(the_grid + k);
  3007. for (int i = 0; i < 4; ++i) {
  3008. int l = (kgrid[k] >> 3*i) & 0x7;
  3009. pos[i] = 2*l + 1;
  3010. }
  3011. }
  3012. kgrid_q3xs = the_grid;
  3013. iq3_data[gindex].grid = the_grid;
  3014. kmap_q3xs = (int *)malloc(kmap_size*sizeof(int));
  3015. iq3_data[gindex].map = kmap_q3xs;
  3016. for (int i = 0; i < kmap_size; ++i) kmap_q3xs[i] = -1;
  3017. uint32_t aux32;
  3018. uint8_t * aux8 = (uint8_t *)&aux32;
  3019. for (int i = 0; i < grid_size; ++i) {
  3020. aux32 = kgrid_q3xs[i];
  3021. uint16_t index = 0;
  3022. for (int k=0; k<4; ++k) {
  3023. uint16_t q = (aux8[k] - 1)/2;
  3024. index |= (q << 3*k);
  3025. }
  3026. kmap_q3xs[index] = i;
  3027. }
  3028. int8_t pos[4];
  3029. int * dist2 = (int *)malloc(2*grid_size*sizeof(int));
  3030. int num_neighbors = 0, num_not_in_map = 0;
  3031. for (int i = 0; i < kmap_size; ++i) {
  3032. if (kmap_q3xs[i] >= 0) continue;
  3033. ++num_not_in_map;
  3034. for (int k = 0; k < 4; ++k) {
  3035. int l = (i >> 3*k) & 0x7;
  3036. pos[k] = 2*l + 1;
  3037. }
  3038. for (int j = 0; j < grid_size; ++j) {
  3039. const int8_t * pg = (const int8_t *)(kgrid_q3xs + j);
  3040. int d2 = 0;
  3041. for (int k = 0; k < 4; ++k) d2 += (pg[k] - pos[k])*(pg[k] - pos[k]);
  3042. dist2[2*j+0] = d2;
  3043. dist2[2*j+1] = j;
  3044. }
  3045. qsort(dist2, grid_size, 2*sizeof(int), iq3_compare_func);
  3046. int n = 0; int d2 = dist2[0];
  3047. int nhave = 1;
  3048. for (int j = 0; j < grid_size; ++j) {
  3049. if (dist2[2*j] > d2) {
  3050. if (nhave == nwant) break;
  3051. d2 = dist2[2*j];
  3052. ++nhave;
  3053. }
  3054. ++n;
  3055. }
  3056. num_neighbors += n;
  3057. }
  3058. //printf("%s: %d neighbours in total\n", __func__, num_neighbors);
  3059. kneighbors_q3xs = (uint16_t *)malloc((num_neighbors + num_not_in_map)*sizeof(uint16_t));
  3060. iq3_data[gindex].neighbours = kneighbors_q3xs;
  3061. int counter = 0;
  3062. for (int i = 0; i < kmap_size; ++i) {
  3063. if (kmap_q3xs[i] >= 0) continue;
  3064. for (int k = 0; k < 4; ++k) {
  3065. int l = (i >> 3*k) & 0x7;
  3066. pos[k] = 2*l + 1;
  3067. }
  3068. for (int j = 0; j < grid_size; ++j) {
  3069. const int8_t * pg = (const int8_t *)(kgrid_q3xs + j);
  3070. int d2 = 0;
  3071. for (int k = 0; k < 4; ++k) d2 += (pg[k] - pos[k])*(pg[k] - pos[k]);
  3072. dist2[2*j+0] = d2;
  3073. dist2[2*j+1] = j;
  3074. }
  3075. qsort(dist2, grid_size, 2*sizeof(int), iq3_compare_func);
  3076. kmap_q3xs[i] = -(counter + 1);
  3077. int d2 = dist2[0];
  3078. uint16_t * start = &kneighbors_q3xs[counter++];
  3079. int n = 0, nhave = 1;
  3080. for (int j = 0; j < grid_size; ++j) {
  3081. if (dist2[2*j] > d2) {
  3082. if (nhave == nwant) break;
  3083. d2 = dist2[2*j];
  3084. ++nhave;
  3085. }
  3086. kneighbors_q3xs[counter++] = dist2[2*j+1];
  3087. ++n;
  3088. }
  3089. *start = n;
  3090. }
  3091. free(dist2);
  3092. }
  3093. void iq3xs_free_impl(int grid_size) {
  3094. GGML_ASSERT(grid_size == 256 || grid_size == 512);
  3095. const int gindex = iq3_data_index(grid_size);
  3096. if (iq3_data[gindex].grid) {
  3097. free(iq3_data[gindex].grid); iq3_data[gindex].grid = NULL;
  3098. free(iq3_data[gindex].map); iq3_data[gindex].map = NULL;
  3099. free(iq3_data[gindex].neighbours); iq3_data[gindex].neighbours = NULL;
  3100. }
  3101. }
  3102. static int iq3_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint32_t * GGML_RESTRICT grid,
  3103. const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
  3104. int num_neighbors = neighbours[0];
  3105. GGML_ASSERT(num_neighbors > 0);
  3106. float best_d2 = FLT_MAX;
  3107. int grid_index = -1;
  3108. for (int j = 1; j <= num_neighbors; ++j) {
  3109. const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
  3110. float d2 = 0;
  3111. for (int i = 0; i < 4; ++i) {
  3112. float q = pg[i];
  3113. float diff = scale*q - xval[i];
  3114. d2 += weight[i]*diff*diff;
  3115. }
  3116. if (d2 < best_d2) {
  3117. best_d2 = d2; grid_index = neighbours[j];
  3118. }
  3119. }
  3120. GGML_ASSERT(grid_index >= 0);
  3121. const int8_t * pg = (const int8_t *)(grid + grid_index);
  3122. for (int i = 0; i < 4; ++i) L[i] = (pg[i] - 1)/2;
  3123. return grid_index;
  3124. }
  3125. static void quantize_row_iq3_xxs_impl(int grid_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n,
  3126. const float * GGML_RESTRICT quant_weights) {
  3127. const int gindex = iq3_data_index(grid_size);
  3128. const uint32_t * kgrid_q3xs = iq3_data[gindex].grid;
  3129. const int * kmap_q3xs = iq3_data[gindex].map;
  3130. const uint16_t * kneighbors_q3xs = iq3_data[gindex].neighbours;
  3131. //GGML_ASSERT(quant_weights && "missing quantization weights");
  3132. GGML_ASSERT(kgrid_q3xs && "forgot to call ggml_quantize_init()?");
  3133. GGML_ASSERT(kmap_q3xs && "forgot to call ggml_quantize_init()?");
  3134. GGML_ASSERT(kneighbors_q3xs && "forgot to call ggml_quantize_init()?");
  3135. GGML_ASSERT(n%QK_K == 0);
  3136. const int kMaxQ = 8;
  3137. const int64_t nbl = n/QK_K;
  3138. ggml_fp16_t * dh;
  3139. uint8_t * qs;
  3140. int block_size;
  3141. if (grid_size == 256) {
  3142. block_iq3_xxs * y = vy;
  3143. dh = &y->d;
  3144. qs = y->qs;
  3145. block_size = sizeof(block_iq3_xxs);
  3146. } else {
  3147. block_iq3_s * y = vy;
  3148. dh = &y->d;
  3149. qs = y->qs;
  3150. block_size = sizeof(block_iq3_s);
  3151. }
  3152. int quant_size = block_size - sizeof(ggml_fp16_t);
  3153. float scales[QK_K/32];
  3154. float weight[32];
  3155. float xval[32];
  3156. int8_t L[32];
  3157. int8_t Laux[32];
  3158. float waux[32];
  3159. bool is_on_grid[8];
  3160. bool is_on_grid_aux[8];
  3161. uint8_t block_signs[8];
  3162. uint8_t q3[3*(QK_K/8)+QK_K/32];
  3163. uint32_t * scales_and_signs = (uint32_t *)(q3 + QK_K/4);
  3164. uint8_t * qh = q3 + 3*(QK_K/8);
  3165. for (int ibl = 0; ibl < nbl; ++ibl) {
  3166. dh[0] = GGML_FP32_TO_FP16(0.f);
  3167. memset(q3, 0, 3*QK_K/8+QK_K/32);
  3168. float max_scale = 0;
  3169. const float * xbl = x + QK_K*ibl;
  3170. float sumx2 = 0;
  3171. for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
  3172. float sigma2 = 2*sumx2/QK_K;
  3173. for (int ib = 0; ib < QK_K/32; ++ib) {
  3174. const float * xb = xbl + 32*ib;
  3175. if (quant_weights) {
  3176. const float * qw = quant_weights + QK_K*ibl + 32*ib;
  3177. for (int i = 0; i < 32; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
  3178. } else {
  3179. for (int i = 0; i < 32; ++i) weight[i] = xb[i]*xb[i];
  3180. }
  3181. for (int i = 0; i < 32; ++i) waux[i] = sqrtf(weight[i]);
  3182. for (int k = 0; k < 4; ++k) {
  3183. int nflip = 0;
  3184. uint8_t s = 0;
  3185. for (int i = 0; i < 8; ++i) {
  3186. if (xb[8*k + i] >= 0) xval[8*k + i] = xb[8*k + i];
  3187. else {
  3188. xval[8*k + i] = -xb[8*k + i]; ++nflip; s |= (1 << i);
  3189. }
  3190. }
  3191. if (nflip%2) {
  3192. int imin = 0; float min = weight[8*k+imin]*xb[8*k+imin]*xb[8*k+imin];
  3193. for (int i = 1; i < 8; ++i) {
  3194. float ax = weight[8*k+i]*xb[8*k+i]*xb[8*k+i];
  3195. if (ax < min) {
  3196. min = ax; imin = i;
  3197. }
  3198. }
  3199. xval[8*k+imin] = -xval[8*k+imin];
  3200. s ^= (1 << imin);
  3201. }
  3202. block_signs[k] = s & 127;
  3203. }
  3204. float max = xval[0];
  3205. for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
  3206. if (max < GROUP_MAX_EPS_IQ3_XXS) {
  3207. scales[ib] = 0;
  3208. memset(L, 0, 32);
  3209. continue;
  3210. }
  3211. float best = 0;
  3212. float scale = max/(2*kMaxQ-1);
  3213. for (int is = -15; is <= 15; ++is) {
  3214. float id = (2*kMaxQ-1+is*0.2f)/max;
  3215. float this_scale = 1/id;
  3216. for (int k = 0; k < 8; ++k) {
  3217. for (int i = 0; i < 4; ++i) {
  3218. int l = nearest_int(0.5f*(id*xval[4*k+i]-1));
  3219. Laux[4*k+i] = MAX(0, MIN(kMaxQ-1, l));
  3220. }
  3221. uint16_t u = 0;
  3222. for (int i = 0; i < 4; ++i) u |= (Laux[4*k+i] << 3*i);
  3223. int grid_index = kmap_q3xs[u];
  3224. is_on_grid_aux[k] = true;
  3225. if (grid_index < 0) {
  3226. is_on_grid_aux[k] = false;
  3227. const uint16_t * neighbours = kneighbors_q3xs - kmap_q3xs[u] - 1;
  3228. grid_index = iq3_find_best_neighbour(neighbours, kgrid_q3xs, xval + 4*k, waux + 4*k, this_scale, Laux + 4*k);
  3229. }
  3230. }
  3231. float sumqx = 0, sumq2 = 0;
  3232. for (int i = 0; i < 32; ++i) {
  3233. float w = weight[i];
  3234. float q = 2*Laux[i] + 1;
  3235. sumqx += w*xval[i]*q;
  3236. sumq2 += w*q*q;
  3237. }
  3238. if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
  3239. scale = sumqx/sumq2; best = scale*sumqx;
  3240. for (int i = 0; i < 32; ++i) L[i] = Laux[i];
  3241. for (int k = 0; k < 8; ++k) is_on_grid[k] = is_on_grid_aux[k];
  3242. }
  3243. }
  3244. int n_not_ongrid = 0;
  3245. for (int k = 0; k < 8; ++k) if (!is_on_grid[k]) ++n_not_ongrid;
  3246. if (n_not_ongrid > 0 && scale > 0) {
  3247. float id = 1/scale;
  3248. for (int k = 0; k < 8; ++k) {
  3249. if (is_on_grid[k]) continue;
  3250. uint16_t u = 0;
  3251. for (int i = 0; i < 4; ++i) {
  3252. int l = nearest_int(0.5f*(id*xval[4*k+i]-1));
  3253. l = MAX(0, MIN(kMaxQ-1, l));
  3254. u |= (l << 3*i);
  3255. }
  3256. int grid_index = kmap_q3xs[u];
  3257. if (grid_index < 0) {
  3258. const uint16_t * neighbours = kneighbors_q3xs - kmap_q3xs[u] - 1;
  3259. grid_index = iq3_find_best_neighbour(neighbours, kgrid_q3xs, xval + 4*k, waux + 4*k, scale, L + 4*k);
  3260. }
  3261. const int8_t * pg = (const int8_t *)(kgrid_q3xs + grid_index);
  3262. for (int i = 0; i < 4; ++i) L[4*k+i] = (pg[i] - 1)/2;
  3263. }
  3264. float sumqx = 0, sumq2 = 0;
  3265. for (int i = 0; i < 32; ++i) {
  3266. float w = weight[i];
  3267. float q = 2*L[i] + 1;
  3268. sumqx += w*xval[i]*q;
  3269. sumq2 += w*q*q;
  3270. }
  3271. if (sumq2 > 0) scale = sumqx/sumq2;
  3272. }
  3273. if (scale < 0) {
  3274. // This should never happen, but just in case, flip scale so that it is positive (we use uint's to encode the scale)
  3275. // and correspondingly flip quant signs.
  3276. scale = -scale;
  3277. for (int k = 0; k < 4; ++k) block_signs[k] = (~block_signs[k]) & 127;
  3278. }
  3279. for (int k = 0; k < 8; ++k) {
  3280. uint16_t u = 0;
  3281. for (int i = 0; i < 4; ++i) u |= (L[4*k+i] << 3*i);
  3282. int grid_index = kmap_q3xs[u];
  3283. if (grid_index < 0) {
  3284. printf("Oops: found point %u not on grid:", u);
  3285. for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
  3286. printf("\n");
  3287. GGML_ABORT("fatal error");
  3288. }
  3289. if (grid_size == 256) {
  3290. q3[8*ib+k] = grid_index;
  3291. } else {
  3292. q3[8*ib+k] = grid_index & 255;
  3293. qh[ib] |= ((grid_index >> 8) << k);
  3294. }
  3295. }
  3296. scales_and_signs[ib] = block_signs[0] | (block_signs[1] << 7) | (block_signs[2] << 14) | (block_signs[3] << 21);
  3297. GGML_ASSERT(scale >= 0);
  3298. scales[ib] = scale;
  3299. max_scale = MAX(max_scale, scale);
  3300. }
  3301. if (!max_scale) {
  3302. memset(qs, 0, quant_size);
  3303. dh += block_size/sizeof(ggml_fp16_t);
  3304. qs += block_size;
  3305. continue;
  3306. }
  3307. float d = max_scale/31;
  3308. dh[0] = GGML_FP32_TO_FP16(d * 1.0125f); // small improvement via this fudge factor
  3309. float id = 1/d;
  3310. for (int ib = 0; ib < QK_K/32; ++ib) {
  3311. int l = nearest_int(0.5f*(id*scales[ib]-1));
  3312. l = MAX(0, MIN(15, l));
  3313. scales_and_signs[ib] |= ((uint32_t)l << 28);
  3314. }
  3315. memcpy(qs, q3, quant_size);
  3316. dh += block_size/sizeof(ggml_fp16_t);
  3317. qs += block_size;
  3318. }
  3319. }
  3320. size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  3321. GGML_ASSERT(n_per_row%QK_K == 0);
  3322. int64_t nblock = n_per_row/QK_K;
  3323. char * qrow = (char *)dst;
  3324. for (int64_t row = 0; row < nrow; ++row) {
  3325. quantize_row_iq3_xxs_impl(256, src, qrow, n_per_row, quant_weights);
  3326. src += n_per_row;
  3327. qrow += nblock*sizeof(block_iq3_xxs);
  3328. }
  3329. return nrow * nblock * sizeof(block_iq3_xxs);
  3330. }
  3331. void quantize_row_iq3_xxs_ref(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k) {
  3332. assert(k % QK_K == 0);
  3333. quantize_row_iq3_xxs_impl(256, x, y, k, NULL);
  3334. }
  3335. static void quantize_row_iq3_s_impl(int block_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int n,
  3336. const float * GGML_RESTRICT quant_weights,
  3337. float * scales,
  3338. float * weight,
  3339. float * xval,
  3340. int8_t * L,
  3341. int8_t * Laux,
  3342. float * waux,
  3343. bool * is_on_grid,
  3344. bool * is_on_grid_aux,
  3345. uint8_t * block_signs) {
  3346. const int gindex = iq3_data_index(512);
  3347. const uint32_t * kgrid_q3xs = iq3_data[gindex].grid;
  3348. const int * kmap_q3xs = iq3_data[gindex].map;
  3349. const uint16_t * kneighbors_q3xs = iq3_data[gindex].neighbours;
  3350. //GGML_ASSERT(quant_weights && "missing quantization weights");
  3351. GGML_ASSERT(kgrid_q3xs && "forgot to call ggml_quantize_init()?");
  3352. GGML_ASSERT(kmap_q3xs && "forgot to call ggml_quantize_init()?");
  3353. GGML_ASSERT(kneighbors_q3xs && "forgot to call ggml_quantize_init()?");
  3354. GGML_ASSERT(n%QK_K == 0);
  3355. const int kMaxQ = 8;
  3356. const int64_t nbl = n/QK_K;
  3357. block_iq3_s * y = vy;
  3358. const int bs4 = block_size/4;
  3359. const int bs8 = block_size/8;
  3360. for (int ibl = 0; ibl < nbl; ++ibl) {
  3361. memset(&y[ibl], 0, sizeof(block_iq3_s));
  3362. y[ibl].d = GGML_FP32_TO_FP16(0.f);
  3363. uint8_t * qs = y[ibl].qs;
  3364. uint8_t * qh = y[ibl].qh;
  3365. uint8_t * signs = y[ibl].signs;
  3366. float max_scale = 0;
  3367. const float * xbl = x + QK_K*ibl;
  3368. float sumx2 = 0;
  3369. for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
  3370. float sigma2 = 2*sumx2/QK_K;
  3371. for (int ib = 0; ib < QK_K/block_size; ++ib) {
  3372. const float * xb = xbl + block_size*ib;
  3373. if (quant_weights) {
  3374. const float * qw = quant_weights + QK_K*ibl + block_size*ib;
  3375. for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
  3376. } else {
  3377. for (int i = 0; i < block_size; ++i) weight[i] = xb[i]*xb[i];
  3378. }
  3379. for (int i = 0; i < block_size; ++i) waux[i] = sqrtf(weight[i]);
  3380. for (int k = 0; k < bs8; ++k) {
  3381. uint8_t s = 0;
  3382. for (int i = 0; i < 8; ++i) {
  3383. if (xb[8*k + i] >= 0) xval[8*k + i] = xb[8*k + i];
  3384. else {
  3385. xval[8*k + i] = -xb[8*k + i]; s |= (1 << i);
  3386. }
  3387. }
  3388. block_signs[k] = s;
  3389. }
  3390. float max = xval[0];
  3391. for (int i = 1; i < block_size; ++i) max = MAX(max, xval[i]);
  3392. if (!max) {
  3393. scales[ib] = 0;
  3394. continue;
  3395. }
  3396. float best = 0;
  3397. float scale = max/(2*kMaxQ-1);
  3398. for (int k = 0; k < bs4; ++k) is_on_grid[k] = false;
  3399. for (int is = -9; is <= 9; ++is) {
  3400. float id = (2*kMaxQ-1+is*0.2f)/max;
  3401. float this_scale = 1/id;
  3402. for (int k = 0; k < bs4; ++k) {
  3403. for (int i = 0; i < 4; ++i) {
  3404. int l = nearest_int(0.5f*(id*xval[4*k+i]-1));
  3405. Laux[4*k+i] = MAX(0, MIN(kMaxQ-1, l));
  3406. }
  3407. uint16_t u = 0;
  3408. for (int i = 0; i < 4; ++i) u |= (Laux[4*k+i] << 3*i);
  3409. int grid_index = kmap_q3xs[u];
  3410. is_on_grid_aux[k] = true;
  3411. if (grid_index < 0) {
  3412. is_on_grid_aux[k] = false;
  3413. const uint16_t * neighbours = kneighbors_q3xs - kmap_q3xs[u] - 1;
  3414. grid_index = iq3_find_best_neighbour(neighbours, kgrid_q3xs, xval + 4*k, waux + 4*k, this_scale, Laux + 4*k);
  3415. }
  3416. }
  3417. float sumqx = 0, sumq2 = 0;
  3418. for (int i = 0; i < block_size; ++i) {
  3419. float w = weight[i];
  3420. float q = 2*Laux[i] + 1;
  3421. sumqx += w*xval[i]*q;
  3422. sumq2 += w*q*q;
  3423. }
  3424. if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
  3425. scale = sumqx/sumq2; best = scale*sumqx;
  3426. for (int i = 0; i < block_size; ++i) L[i] = Laux[i];
  3427. for (int k = 0; k < bs4; ++k) is_on_grid[k] = is_on_grid_aux[k];
  3428. }
  3429. }
  3430. int n_not_ongrid = 0;
  3431. for (int k = 0; k < bs4; ++k) if (!is_on_grid[k]) ++n_not_ongrid;
  3432. if (n_not_ongrid > 0 && scale > 0) {
  3433. float id = 1/scale;
  3434. for (int k = 0; k < bs4; ++k) {
  3435. //if (is_on_grid[k]) continue;
  3436. uint16_t u = 0;
  3437. for (int i = 0; i < 4; ++i) {
  3438. int l = nearest_int(0.5f*(id*xval[4*k+i]-1));
  3439. l = MAX(0, MIN(kMaxQ-1, l));
  3440. u |= (l << 3*i);
  3441. }
  3442. int grid_index = kmap_q3xs[u];
  3443. if (grid_index < 0) {
  3444. const uint16_t * neighbours = kneighbors_q3xs - kmap_q3xs[u] - 1;
  3445. grid_index = iq3_find_best_neighbour(neighbours, kgrid_q3xs, xval + 4*k, waux + 4*k, scale, L + 4*k);
  3446. }
  3447. const int8_t * pg = (const int8_t *)(kgrid_q3xs + grid_index);
  3448. for (int i = 0; i < 4; ++i) L[4*k+i] = (pg[i] - 1)/2;
  3449. }
  3450. float sumqx = 0, sumq2 = 0;
  3451. for (int i = 0; i < block_size; ++i) {
  3452. float w = weight[i];
  3453. float q = 2*L[i] + 1;
  3454. sumqx += w*xval[i]*q;
  3455. sumq2 += w*q*q;
  3456. }
  3457. if (sumq2 > 0) scale = sumqx/sumq2;
  3458. }
  3459. if (scale < 0) {
  3460. // This should never happen, but just in case, flip scale so that it is positive (we use uint's to encode the scale)
  3461. // and correspondingly flip quant signs.
  3462. scale = -scale;
  3463. for (int k = 0; k < bs8; ++k) block_signs[k] = ~block_signs[k];
  3464. }
  3465. for (int k = 0; k < bs4; ++k) {
  3466. uint16_t u = 0;
  3467. for (int i = 0; i < 4; ++i) u |= (L[4*k+i] << 3*i);
  3468. int grid_index = kmap_q3xs[u];
  3469. if (grid_index < 0) {
  3470. printf("Oops: found point %u not on grid:", u);
  3471. for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
  3472. printf("\n");
  3473. GGML_ABORT("fatal error");
  3474. }
  3475. qs[k] = grid_index & 255;
  3476. qh[(ib*bs4+k)/8] |= ((grid_index >> 8) << ((ib*bs4+k)%8));
  3477. }
  3478. qs += bs4;
  3479. for (int k = 0; k < bs8; ++k) signs[k] = block_signs[k];
  3480. signs += bs8;
  3481. GGML_ASSERT(scale >= 0);
  3482. scales[ib] = scale;
  3483. max_scale = MAX(max_scale, scale);
  3484. }
  3485. if (!max_scale) {
  3486. continue;
  3487. }
  3488. float d = max_scale/31;
  3489. y[ibl].d = GGML_FP32_TO_FP16(d * 1.033f);
  3490. float id = 1/d;
  3491. for (int ib = 0; ib < QK_K/block_size; ib += 2) {
  3492. int l1 = nearest_int(0.5f*(id*scales[ib+0]-1));
  3493. l1 = MAX(0, MIN(15, l1));
  3494. int l2 = nearest_int(0.5f*(id*scales[ib+1]-1));
  3495. l2 = MAX(0, MIN(15, l2));
  3496. y[ibl].scales[ib/2] = l1 | (l2 << 4);
  3497. }
  3498. }
  3499. }
  3500. #define IQ3S_BLOCK_SIZE 32
  3501. size_t quantize_iq3_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  3502. GGML_ASSERT(n_per_row%QK_K == 0);
  3503. int64_t nblock = n_per_row/QK_K;
  3504. float scales[QK_K/IQ3S_BLOCK_SIZE];
  3505. float weight[IQ3S_BLOCK_SIZE];
  3506. float xval[IQ3S_BLOCK_SIZE];
  3507. int8_t L[IQ3S_BLOCK_SIZE];
  3508. int8_t Laux[IQ3S_BLOCK_SIZE];
  3509. float waux[IQ3S_BLOCK_SIZE];
  3510. bool is_on_grid[IQ3S_BLOCK_SIZE/4];
  3511. bool is_on_grid_aux[IQ3S_BLOCK_SIZE/4];
  3512. uint8_t block_signs[IQ3S_BLOCK_SIZE/8];
  3513. char * qrow = (char *)dst;
  3514. for (int64_t row = 0; row < nrow; ++row) {
  3515. quantize_row_iq3_s_impl(IQ3S_BLOCK_SIZE, src, qrow, n_per_row, quant_weights,
  3516. scales, weight, xval, L, Laux, waux, is_on_grid, is_on_grid_aux, block_signs);
  3517. src += n_per_row;
  3518. qrow += nblock*sizeof(block_iq3_s);
  3519. }
  3520. return nrow * nblock * sizeof(block_iq3_s);
  3521. }
  3522. void quantize_row_iq3_s_ref(const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k) {
  3523. assert(k % QK_K == 0);
  3524. quantize_iq3_s(x, y, 1, k, NULL);
  3525. }
  3526. // =================================== 1.5 bpw ===================================================
  3527. static int iq1_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
  3528. const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float * scale, int8_t * GGML_RESTRICT L, int ngrid) {
  3529. int num_neighbors = neighbours[0];
  3530. GGML_ASSERT(num_neighbors > 0);
  3531. float best_score = -FLT_MAX;
  3532. int grid_index = -1;
  3533. for (int j = 1; j <= num_neighbors; ++j) {
  3534. const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
  3535. float sumqx = 0, sumq2 = 0;
  3536. for (int i = 0; i < 8; ++i) {
  3537. float q = (pg[i] - 3)/2;
  3538. float w = weight[i];
  3539. sumqx += w*q*xval[i];
  3540. sumq2 += w*q*q;
  3541. }
  3542. if (sumqx > 0 && sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
  3543. *scale = sumqx/sumq2; best_score = *scale * sumqx;
  3544. grid_index = neighbours[j];
  3545. }
  3546. }
  3547. if (grid_index < 0) {
  3548. for (int i = 0; i < ngrid; ++i) {
  3549. const int8_t * grid_i = (const int8_t *)(grid + i);
  3550. float sumqx = 0, sumq2 = 0;
  3551. for (int j = 0; j < 8; ++j) {
  3552. float w = weight[j];
  3553. float q = (grid_i[j] - 3)/2;
  3554. sumqx += w*q*xval[j];
  3555. sumq2 += w*q*q;
  3556. }
  3557. if (sumqx > 0 && sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
  3558. *scale = sumqx/sumq2; best_score = *scale*sumqx;
  3559. grid_index = i;
  3560. }
  3561. }
  3562. }
  3563. if (grid_index < 0) {
  3564. printf("Oops, did not find grid point\n");
  3565. printf("Have %d neighbours\n", num_neighbors);
  3566. for (int j = 1; j <= num_neighbors; ++j) {
  3567. const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
  3568. float sumqx = 0, sumq2 = 0;
  3569. for (int i = 0; i < 8; ++i) {
  3570. float q = (pg[i] - 3)/2;
  3571. float w = weight[i];
  3572. sumqx += w*q*xval[i];
  3573. sumq2 += w*q*q;
  3574. }
  3575. printf(" neighbour %d: sumqx = %g sumq2 = %g\n", j, (double)sumqx, (double)sumq2);
  3576. }
  3577. }
  3578. GGML_ASSERT(grid_index >= 0);
  3579. //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  3580. *scale *= 1.05f; // This is a fudge factor. Don't ask me why it improves the result.
  3581. //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  3582. const int8_t * pg = (const int8_t *)(grid + grid_index);
  3583. for (int i = 0; i < 8; ++i) L[i] = (pg[i] - 1)/2;
  3584. return grid_index;
  3585. }
  3586. static int iq1_find_best_neighbour2(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
  3587. const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, const float * GGML_RESTRICT xg, int8_t * GGML_RESTRICT L, int ngrid) {
  3588. int num_neighbors = neighbours[0];
  3589. GGML_ASSERT(num_neighbors > 0);
  3590. float best_score = FLT_MAX;
  3591. int grid_index = -1;
  3592. for (int j = 1; j <= num_neighbors; ++j) {
  3593. const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
  3594. float d2 = 0;
  3595. for (int i = 0; i < 8; ++i) {
  3596. float q = xg[(pg[i] - 1)/2];
  3597. float w = weight[i];
  3598. float diff = scale*q - xval[i];
  3599. d2 += w*diff*diff;
  3600. }
  3601. if (d2 < best_score) {
  3602. best_score = d2;
  3603. grid_index = neighbours[j];
  3604. }
  3605. }
  3606. if (grid_index < 0) {
  3607. for (int i = 0; i < ngrid; ++i) {
  3608. const int8_t * grid_i = (const int8_t *)(grid + i);
  3609. float d2 = 0;
  3610. for (int j = 0; j < 8; ++j) {
  3611. float w = weight[j];
  3612. float q = xg[(grid_i[j] - 1)/2];
  3613. float diff = scale*q - xval[i];
  3614. d2 += w*diff*diff;
  3615. }
  3616. if (d2 < best_score) {
  3617. best_score = d2;
  3618. grid_index = i;
  3619. }
  3620. }
  3621. }
  3622. if (grid_index < 0) {
  3623. printf("Oops, did not find grid point\n");
  3624. printf("Have %d neighbours\n", num_neighbors);
  3625. for (int j = 1; j <= num_neighbors; ++j) {
  3626. const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
  3627. float sumqx = 0, sumq2 = 0;
  3628. for (int i = 0; i < 8; ++i) {
  3629. float q = xg[(pg[i] - 1)/2];
  3630. float w = weight[i];
  3631. sumqx += w*q*xval[i];
  3632. sumq2 += w*q*q;
  3633. }
  3634. printf(" neighbour %d: sumqx = %g sumq2 = %g\n", j, (double)sumqx, (double)sumq2);
  3635. }
  3636. }
  3637. GGML_ASSERT(grid_index >= 0);
  3638. const int8_t * pg = (const int8_t *)(grid + grid_index);
  3639. for (int i = 0; i < 8; ++i) L[i] = (pg[i] - 1)/2;
  3640. return grid_index;
  3641. }
  3642. static int iq1_sort_helper(const void * left, const void * right) {
  3643. const float * l = left;
  3644. const float * r = right;
  3645. return *l < *r ? -1 : *l > *r ? 1 : 0;
  3646. }
  3647. #define IQ1S_BLOCK_SIZE 32
  3648. #define IQ1M_BLOCK_SIZE 16
  3649. static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
  3650. float * scales,
  3651. float * weight,
  3652. float * sumx,
  3653. float * sumw,
  3654. float * pairs,
  3655. int8_t * L,
  3656. uint16_t * index,
  3657. int8_t * shifts) {
  3658. const int gindex = iq2_data_index(GGML_TYPE_IQ1_S);
  3659. const uint64_t * kgrid_q2xs = iq2_data[gindex].grid;
  3660. const int * kmap_q2xs = iq2_data[gindex].map;
  3661. const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
  3662. GGML_ASSERT(quant_weights && "missing quantization weights");
  3663. GGML_ASSERT(kgrid_q2xs && "forgot to call ggml_quantize_init()?");
  3664. GGML_ASSERT(kmap_q2xs && "forgot to call ggml_quantize_init()?");
  3665. GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
  3666. GGML_ASSERT(n%QK_K == 0);
  3667. block_iq1_s * y = vy;
  3668. const int64_t nbl = n/QK_K;
  3669. const int block_size = IQ1S_BLOCK_SIZE;
  3670. const float x_p[3] = {-1 + IQ1S_DELTA, IQ1S_DELTA, 1 + IQ1S_DELTA};
  3671. const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA};
  3672. int * idx = (int *)(pairs + 1);
  3673. for (int ibl = 0; ibl < nbl; ++ibl) {
  3674. y[ibl].d = GGML_FP32_TO_FP16(0.f);
  3675. memset(y[ibl].qs, 0, QK_K/8);
  3676. memset(y[ibl].qh, 0, QK_K/16);
  3677. float max_scale = 0;
  3678. const float * xbl = x + QK_K*ibl;
  3679. float sumx2 = 0;
  3680. for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
  3681. float sigma2 = 2*sumx2/QK_K;
  3682. for (int ib = 0; ib < QK_K/block_size; ++ib) {
  3683. const float * xb = xbl + block_size*ib;
  3684. const float * qw = quant_weights + QK_K*ibl + block_size*ib;
  3685. for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
  3686. float max = fabsf(xb[0]);
  3687. for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
  3688. if (max < GROUP_MAX_EPS_IQ1_S) {
  3689. scales[ib] = 0;
  3690. memset(L, 1, block_size);
  3691. continue;
  3692. }
  3693. // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
  3694. // With just 3 allowed quant values (-1, 0, 1), we can search exhaustively for the two
  3695. // boundaries that split the weights xb[i] into 3 groups. To do so, we sort the weights
  3696. // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
  3697. // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
  3698. // for each possible and score for each split.
  3699. for (int j = 0; j < block_size; ++j) {
  3700. pairs[2*j] = xb[j];
  3701. idx[2*j] = j;
  3702. }
  3703. qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
  3704. {
  3705. sumx[0] = sumw[0] = 0;
  3706. for (int j = 0; j < block_size; ++j) {
  3707. int i = idx[2*j];
  3708. sumx[j+1] = sumx[j] + weight[i]*xb[i];
  3709. sumw[j+1] = sumw[j] + weight[i];
  3710. }
  3711. }
  3712. float best_score = -FLT_MIN, scale = max;
  3713. int besti1 = -1, besti2 = -1, best_shift = 0;
  3714. for (int i1 = 0; i1 <= block_size; ++i1) {
  3715. for (int i2 = i1; i2 <= block_size; ++i2) {
  3716. float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[block_size] - sumx[i2])*x_p[2];
  3717. float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[block_size] - sumw[i2])*x_p[2]*x_p[2];
  3718. if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
  3719. scale = sumqx/sumq2; best_score = scale*sumqx;
  3720. besti1 = i1; besti2 = i2; best_shift = 1;
  3721. }
  3722. sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[block_size] - sumx[i2])*x_m[2];
  3723. sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[block_size] - sumw[i2])*x_m[2]*x_m[2];
  3724. if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
  3725. scale = sumqx/sumq2; best_score = scale*sumqx;
  3726. besti1 = i1; besti2 = i2; best_shift = -1;
  3727. }
  3728. }
  3729. }
  3730. GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
  3731. for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
  3732. for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
  3733. for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
  3734. if (scale < 0) {
  3735. for (int j = 0; j < block_size; ++j) L[j] = 2 - L[j];
  3736. scale = -scale; best_shift = -best_shift;
  3737. }
  3738. bool all_on_grid = true;
  3739. const float * xx = best_shift == 1 ? x_p : x_m;
  3740. for (int k = 0; k < block_size/8; ++k) {
  3741. uint16_t u = 0;
  3742. for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
  3743. int grid_index = kmap_q2xs[u];
  3744. if (grid_index < 0) {
  3745. all_on_grid = false;
  3746. const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
  3747. grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
  3748. GGML_ASSERT(grid_index >= 0);
  3749. }
  3750. index[k] = grid_index;
  3751. }
  3752. if (!all_on_grid) {
  3753. float sumqx = 0, sumq2 = 0;
  3754. for (int k = 0; k < block_size/8; ++k) {
  3755. const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]);
  3756. for (int j = 0; j < 8; ++j) {
  3757. float w = weight[8*k + j];
  3758. float q = xx[(pg[j] - 1)/2];
  3759. sumqx += w*q*xb[8*k+j];
  3760. sumq2 += w*q*q;
  3761. }
  3762. }
  3763. if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2;
  3764. }
  3765. uint16_t h = 0;
  3766. for (int k = 0; k < block_size/8; ++k) {
  3767. y[ibl].qs[(block_size/8)*ib + k] = index[k] & 255;
  3768. h |= (index[k] >> 8) << 3*k;
  3769. }
  3770. y[ibl].qh[ib] = h;
  3771. GGML_ASSERT(scale >= 0);
  3772. scales[ib] = scale;
  3773. shifts[ib] = best_shift;
  3774. max_scale = MAX(max_scale, scale);
  3775. }
  3776. if (!max_scale) {
  3777. continue;
  3778. }
  3779. float d = max_scale/15;
  3780. y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.125f is another fudge factor. Don't ask me why it is needed.
  3781. float id = 1/d;
  3782. for (int ib = 0; ib < QK_K/block_size; ++ib) {
  3783. int l = nearest_int(0.5f*(id*scales[ib]-1));
  3784. l = MAX(0, MIN(7, l));
  3785. if (shifts[ib] == -1) l |= 8;
  3786. y[ibl].qh[ib] |= (l << 12);
  3787. }
  3788. }
  3789. }
  3790. size_t quantize_iq1_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  3791. GGML_ASSERT(n_per_row%QK_K == 0);
  3792. float scales[QK_K/IQ1S_BLOCK_SIZE];
  3793. float weight[IQ1S_BLOCK_SIZE];
  3794. int8_t L[IQ1S_BLOCK_SIZE];
  3795. float sumx[IQ1S_BLOCK_SIZE+1];
  3796. float sumw[IQ1S_BLOCK_SIZE+1];
  3797. float pairs[2*IQ1S_BLOCK_SIZE];
  3798. uint16_t index[IQ1S_BLOCK_SIZE/8];
  3799. int8_t shifts[QK_K/IQ1S_BLOCK_SIZE];
  3800. int64_t nblock = n_per_row/QK_K;
  3801. char * qrow = (char *)dst;
  3802. for (int64_t row = 0; row < nrow; ++row) {
  3803. quantize_row_iq1_s_impl(src, qrow, n_per_row, quant_weights, scales, weight, sumx, sumw, pairs, L, index, shifts);
  3804. src += n_per_row;
  3805. qrow += nblock*sizeof(block_iq1_s);
  3806. }
  3807. return nrow * nblock * sizeof(block_iq1_s);
  3808. }
  3809. static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
  3810. float * scales,
  3811. float * weight,
  3812. float * pairs,
  3813. int8_t * L,
  3814. uint16_t * index,
  3815. int8_t * shifts) {
  3816. const int gindex = iq2_data_index(GGML_TYPE_IQ1_M);
  3817. const uint64_t * kgrid_q2xs = iq2_data[gindex].grid;
  3818. const int * kmap_q2xs = iq2_data[gindex].map;
  3819. const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
  3820. //GGML_ASSERT(quant_weights && "missing quantization weights");
  3821. GGML_ASSERT(kgrid_q2xs && "forgot to call ggml_quantize_init()?");
  3822. GGML_ASSERT(kmap_q2xs && "forgot to call ggml_quantize_init()?");
  3823. GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
  3824. GGML_ASSERT(n%QK_K == 0);
  3825. block_iq1_m * y = vy;
  3826. const int64_t nbl = n/QK_K;
  3827. const int block_size = IQ1M_BLOCK_SIZE;
  3828. const float x_p[3] = {-1 + IQ1M_DELTA, IQ1M_DELTA, 1 + IQ1M_DELTA};
  3829. const float x_m[3] = {-1 - IQ1M_DELTA, -IQ1M_DELTA, 1 - IQ1M_DELTA};
  3830. const uint8_t masks[4] = {0x00, 0x80, 0x08, 0x88};
  3831. int * idx = (int *)(pairs + 1);
  3832. float sumqx[4], sumq2[4];
  3833. iq1m_scale_t s;
  3834. const float * xx;
  3835. for (int ibl = 0; ibl < nbl; ++ibl) {
  3836. memset(y[ibl].qs, 0, QK_K/8);
  3837. memset(y[ibl].qh, 0, QK_K/16);
  3838. memset(y[ibl].scales, 0, QK_K/32);
  3839. float max_scale = 0;
  3840. const float * xbl = x + QK_K*ibl;
  3841. float sumx2 = 0;
  3842. for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
  3843. float sigma2 = 2*sumx2/QK_K;
  3844. for (int ib = 0; ib < QK_K/block_size; ++ib) {
  3845. const float * xb = xbl + block_size*ib;
  3846. if (quant_weights) {
  3847. const float * qw = quant_weights + QK_K*ibl + block_size*ib;
  3848. for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
  3849. } else {
  3850. for (int i = 0; i < block_size; ++i) weight[i] = xb[i]*xb[i];
  3851. }
  3852. float max = fabsf(xb[0]);
  3853. for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
  3854. if (max < GROUP_MAX_EPS_IQ1_M) {
  3855. scales[ib] = 0;
  3856. memset(L, 1, block_size);
  3857. continue;
  3858. }
  3859. // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
  3860. // With just 3 allowed quant values (-1, 0, 1), we can search exhaustively for the two
  3861. // boundaries that split the weights xb[i] into 3 groups. To do so, we sort the weights
  3862. // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
  3863. // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
  3864. // for each possible and score for each split.
  3865. for (int j = 0; j < block_size; ++j) {
  3866. pairs[2*j] = xb[j];
  3867. idx[2*j] = j;
  3868. }
  3869. qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
  3870. float best_score = -FLT_MIN, scale = max;
  3871. int besti1 = -1, besti2 = -1, best_k = -1;
  3872. // 0: +, +
  3873. // 1: +, -
  3874. // 2: -, +
  3875. // 3: -, -
  3876. for (int i1 = 0; i1 <= block_size; ++i1) {
  3877. for (int i2 = i1; i2 <= block_size; ++i2) {
  3878. memset(sumqx, 0, 4*sizeof(float));
  3879. memset(sumq2, 0, 4*sizeof(float));
  3880. for (int j = 0; j < i1; ++j) {
  3881. int i = idx[2*j];
  3882. if (i < block_size/2) {
  3883. sumqx[0] += weight[i]*x_p[0]*xb[i];
  3884. sumqx[1] += weight[i]*x_p[0]*xb[i];
  3885. sumqx[2] += weight[i]*x_m[0]*xb[i];
  3886. sumqx[3] += weight[i]*x_m[0]*xb[i];
  3887. sumq2[0] += weight[i]*x_p[0]*x_p[0];
  3888. sumq2[1] += weight[i]*x_p[0]*x_p[0];
  3889. sumq2[2] += weight[i]*x_m[0]*x_m[0];
  3890. sumq2[3] += weight[i]*x_m[0]*x_m[0];
  3891. } else {
  3892. sumqx[0] += weight[i]*x_p[0]*xb[i];
  3893. sumqx[2] += weight[i]*x_p[0]*xb[i];
  3894. sumqx[1] += weight[i]*x_m[0]*xb[i];
  3895. sumqx[3] += weight[i]*x_m[0]*xb[i];
  3896. sumq2[0] += weight[i]*x_p[0]*x_p[0];
  3897. sumq2[2] += weight[i]*x_p[0]*x_p[0];
  3898. sumq2[1] += weight[i]*x_m[0]*x_m[0];
  3899. sumq2[3] += weight[i]*x_m[0]*x_m[0];
  3900. }
  3901. }
  3902. for (int j = i1; j < i2; ++j) {
  3903. int i = idx[2*j];
  3904. if (i < block_size/2) {
  3905. sumqx[0] += weight[i]*x_p[1]*xb[i];
  3906. sumqx[1] += weight[i]*x_p[1]*xb[i];
  3907. sumqx[2] += weight[i]*x_m[1]*xb[i];
  3908. sumqx[3] += weight[i]*x_m[1]*xb[i];
  3909. sumq2[0] += weight[i]*x_p[1]*x_p[1];
  3910. sumq2[1] += weight[i]*x_p[1]*x_p[1];
  3911. sumq2[2] += weight[i]*x_m[1]*x_m[1];
  3912. sumq2[3] += weight[i]*x_m[1]*x_m[1];
  3913. } else {
  3914. sumqx[0] += weight[i]*x_p[1]*xb[i];
  3915. sumqx[2] += weight[i]*x_p[1]*xb[i];
  3916. sumqx[1] += weight[i]*x_m[1]*xb[i];
  3917. sumqx[3] += weight[i]*x_m[1]*xb[i];
  3918. sumq2[0] += weight[i]*x_p[1]*x_p[1];
  3919. sumq2[2] += weight[i]*x_p[1]*x_p[1];
  3920. sumq2[1] += weight[i]*x_m[1]*x_m[1];
  3921. sumq2[3] += weight[i]*x_m[1]*x_m[1];
  3922. }
  3923. }
  3924. for (int j = i2; j < block_size; ++j) {
  3925. int i = idx[2*j];
  3926. if (i < block_size/2) {
  3927. sumqx[0] += weight[i]*x_p[2]*xb[i];
  3928. sumqx[1] += weight[i]*x_p[2]*xb[i];
  3929. sumqx[2] += weight[i]*x_m[2]*xb[i];
  3930. sumqx[3] += weight[i]*x_m[2]*xb[i];
  3931. sumq2[0] += weight[i]*x_p[2]*x_p[2];
  3932. sumq2[1] += weight[i]*x_p[2]*x_p[2];
  3933. sumq2[2] += weight[i]*x_m[2]*x_m[2];
  3934. sumq2[3] += weight[i]*x_m[2]*x_m[2];
  3935. } else {
  3936. sumqx[0] += weight[i]*x_p[2]*xb[i];
  3937. sumqx[2] += weight[i]*x_p[2]*xb[i];
  3938. sumqx[1] += weight[i]*x_m[2]*xb[i];
  3939. sumqx[3] += weight[i]*x_m[2]*xb[i];
  3940. sumq2[0] += weight[i]*x_p[2]*x_p[2];
  3941. sumq2[2] += weight[i]*x_p[2]*x_p[2];
  3942. sumq2[1] += weight[i]*x_m[2]*x_m[2];
  3943. sumq2[3] += weight[i]*x_m[2]*x_m[2];
  3944. }
  3945. }
  3946. for (int k = 0; k < 4; ++k) {
  3947. if (sumq2[k] > 0 && sumqx[k]*sumqx[k] > best_score*sumq2[k]) {
  3948. scale = sumqx[k]/sumq2[k]; best_score = scale*sumqx[k];
  3949. besti1 = i1; besti2 = i2; best_k = k;
  3950. }
  3951. }
  3952. }
  3953. }
  3954. GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_k >= 0);
  3955. for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
  3956. for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
  3957. for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
  3958. if (scale < 0) {
  3959. for (int j = 0; j < block_size; ++j) L[j] = 2 - L[j];
  3960. scale = -scale;
  3961. best_k = best_k == 0 ? 3 : best_k == 1 ? 2 : best_k == 2 ? 1 : 0;
  3962. }
  3963. bool all_on_grid = true;
  3964. for (int k = 0; k < block_size/8; ++k) {
  3965. if (k == 0) xx = best_k < 2 ? x_p : x_m;
  3966. else xx = best_k%2 == 0 ? x_p : x_m;
  3967. uint16_t u = 0;
  3968. for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
  3969. int grid_index = kmap_q2xs[u];
  3970. if (grid_index < 0) {
  3971. all_on_grid = false;
  3972. const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
  3973. grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
  3974. GGML_ASSERT(grid_index >= 0);
  3975. }
  3976. index[k] = grid_index;
  3977. }
  3978. if (!all_on_grid) {
  3979. float sumqx_f = 0, sumq2_f = 0;
  3980. for (int k = 0; k < block_size/8; ++k) {
  3981. if (k == 0) xx = best_k < 2 ? x_p : x_m;
  3982. else xx = best_k%2 == 0 ? x_p : x_m;
  3983. const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]);
  3984. for (int j = 0; j < 8; ++j) {
  3985. float w = weight[8*k + j];
  3986. float q = xx[(pg[j] - 1)/2];
  3987. sumqx_f += w*q*xb[8*k+j];
  3988. sumq2_f += w*q*q;
  3989. }
  3990. }
  3991. if (sumqx_f > 0 && sumq2_f > 0) scale = sumqx_f/sumq2_f;
  3992. }
  3993. y[ibl].qs[2*ib + 0] = index[0] & 255;
  3994. y[ibl].qs[2*ib + 1] = index[1] & 255;
  3995. y[ibl].qh[ib] = (index[0] >> 8) | ((index[1] >> 8) << 4);
  3996. GGML_ASSERT(scale >= 0);
  3997. scales[ib] = scale;
  3998. shifts[ib] = best_k;
  3999. max_scale = MAX(max_scale, scale);
  4000. }
  4001. if (!max_scale) {
  4002. continue;
  4003. }
  4004. uint16_t * sc = (uint16_t *)y[ibl].scales;
  4005. float d = max_scale/15;
  4006. float id = 1/d;
  4007. float sumqx_f = 0, sumq2_f = 0;
  4008. for (int ib = 0; ib < QK_K/block_size; ++ib) {
  4009. int l = nearest_int(0.5f*(id*scales[ib+0]-1));
  4010. l = MAX(0, MIN(7, l));
  4011. sc[ib/4] |= (l << 3*(ib%4));
  4012. y[ibl].qh[ib] |= masks[shifts[ib]];
  4013. const float * xb = xbl + block_size*ib;
  4014. if (quant_weights) {
  4015. const float * qw = quant_weights + QK_K*ibl + block_size*ib;
  4016. for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
  4017. } else {
  4018. for (int i = 0; i < block_size; ++i) weight[i] = xb[i]*xb[i];
  4019. }
  4020. for (int k = 0; k < block_size/8; ++k) {
  4021. if (k == 0) xx = shifts[ib] < 2 ? x_p : x_m;
  4022. else xx = shifts[ib]%2 == 0 ? x_p : x_m;
  4023. const int8_t * pg = (const int8_t *)(kgrid_q2xs + y[ibl].qs[2*ib+k] + ((y[ibl].qh[ib] << (8 - 4*k)) & 0x700));
  4024. for (int j = 0; j < 8; ++j) {
  4025. float w = weight[8*k + j];
  4026. float q = xx[(pg[j] - 1)/2]*(2*l+1);
  4027. sumqx_f += w*q*xb[8*k+j];
  4028. sumq2_f += w*q*q;
  4029. }
  4030. }
  4031. }
  4032. if (sumq2_f > 0) d = sumqx_f/sumq2_f;
  4033. s.f16 = GGML_FP32_TO_FP16(d*1.1125f); // 1.1125f is another fudge factor. Don't ask me why it is needed.
  4034. sc[0] |= ((s.u16 & 0x000f) << 12);
  4035. sc[1] |= ((s.u16 & 0x00f0) << 8);
  4036. sc[2] |= ((s.u16 & 0x0f00) << 4);
  4037. sc[3] |= ((s.u16 & 0xf000) << 0);
  4038. }
  4039. }
  4040. size_t quantize_iq1_m(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  4041. GGML_ASSERT(n_per_row%QK_K == 0);
  4042. float scales[QK_K/IQ1M_BLOCK_SIZE];
  4043. float weight[IQ1M_BLOCK_SIZE];
  4044. int8_t L[IQ1M_BLOCK_SIZE];
  4045. float pairs[2*IQ1M_BLOCK_SIZE];
  4046. uint16_t index[IQ1M_BLOCK_SIZE/8];
  4047. int8_t shifts[QK_K/IQ1M_BLOCK_SIZE];
  4048. int64_t nblock = n_per_row/QK_K;
  4049. char * qrow = (char *)dst;
  4050. for (int64_t row = 0; row < nrow; ++row) {
  4051. quantize_row_iq1_m_impl(src, qrow, n_per_row, quant_weights, scales, weight, pairs, L, index, shifts);
  4052. src += n_per_row;
  4053. qrow += nblock*sizeof(block_iq1_m);
  4054. }
  4055. return nrow * nblock * sizeof(block_iq1_m);
  4056. }
  4057. // ============================ 4-bit non-linear quants
  4058. static inline int best_index_int8(int n, const int8_t * val, float x) {
  4059. if (x <= val[0]) return 0;
  4060. if (x >= val[n-1]) return n-1;
  4061. int ml = 0, mu = n-1;
  4062. while (mu-ml > 1) {
  4063. int mav = (ml+mu)/2;
  4064. if (x < val[mav]) mu = mav; else ml = mav;
  4065. }
  4066. return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
  4067. }
  4068. static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * GGML_RESTRICT x,
  4069. ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
  4070. float * scales, float * weight, uint8_t * L,
  4071. const int8_t * values,
  4072. const float * quant_weights,
  4073. const int ntry) {
  4074. float sigma2 = 0;
  4075. for (int j = 0; j < super_block_size; ++j) sigma2 += x[j]*x[j];
  4076. sigma2 *= 2.f/super_block_size;
  4077. memset(q4, 0, super_block_size/2);
  4078. dh[0] = GGML_FP32_TO_FP16(0.f);
  4079. float max_scale = 0, amax_scale = 0;
  4080. for (int ib = 0; ib < super_block_size/block_size; ++ib) {
  4081. const float * xb = x + ib*block_size;
  4082. uint8_t * Lb = L + ib*block_size;
  4083. if (quant_weights) {
  4084. const float * qw = quant_weights + ib*block_size;
  4085. for (int j = 0; j < block_size; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
  4086. } else {
  4087. for (int j = 0; j < block_size; ++j) weight[j] = xb[j]*xb[j];
  4088. }
  4089. float amax = 0, max = 0;
  4090. for (int j = 0; j < block_size; ++j) {
  4091. float ax = fabsf(xb[j]);
  4092. if (ax > amax) {
  4093. amax = ax; max = xb[j];
  4094. }
  4095. }
  4096. if (amax < GROUP_MAX_EPS) {
  4097. scales[ib] = 0;
  4098. continue;
  4099. }
  4100. float d = ntry > 0 ? -max/values[0] : max/values[0];
  4101. float id = 1/d;
  4102. float sumqx = 0, sumq2 = 0;
  4103. for (int j = 0; j < block_size; ++j) {
  4104. float al = id*xb[j];
  4105. int l = best_index_int8(16, values, al);
  4106. Lb[j] = l;
  4107. float q = values[l];
  4108. float w = weight[j];
  4109. sumqx += w*q*xb[j];
  4110. sumq2 += w*q*q;
  4111. }
  4112. d = sumqx/sumq2;
  4113. float best = d*sumqx;
  4114. for (int itry = -ntry; itry <= ntry; ++itry) {
  4115. id = (itry + values[0])/max;
  4116. sumqx = sumq2 = 0;
  4117. for (int j = 0; j < block_size; ++j) {
  4118. float al = id*xb[j];
  4119. int l = best_index_int8(16, values, al);
  4120. float q = values[l];
  4121. float w = weight[j];
  4122. sumqx += w*q*xb[j];
  4123. sumq2 += w*q*q;
  4124. }
  4125. if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
  4126. d = sumqx/sumq2; best = d * sumqx;
  4127. }
  4128. }
  4129. scales[ib] = d;
  4130. float abs_d = fabsf(d);
  4131. if (abs_d > amax_scale) {
  4132. amax_scale = abs_d; max_scale = d;
  4133. }
  4134. }
  4135. if (super_block_size/block_size > 1) {
  4136. int nb = super_block_size/block_size;
  4137. memset(scales_h, 0, ((nb+7)/8)*sizeof(uint16_t));
  4138. float d = -max_scale/32;
  4139. dh[0] = GGML_FP32_TO_FP16(d);
  4140. float id = d ? 1/d : 0.f;
  4141. for (int ib = 0; ib < super_block_size/block_size; ++ib) {
  4142. int l = nearest_int(id*scales[ib]);
  4143. l = MAX(-32, MIN(31, l));
  4144. float dl = d * l;
  4145. float idl = dl ? 1/dl : 0.f;
  4146. uint8_t * Lb = L + ib*block_size;
  4147. const float * xb = x + ib*block_size;
  4148. for (int j = 0; j < block_size; ++j) {
  4149. Lb[j] = best_index_int8(16, values, idl*xb[j]);
  4150. }
  4151. l += 32;
  4152. uint8_t l_l = l & 0xf;
  4153. uint8_t l_h = l >> 4;
  4154. if (ib%2 == 0) scales_l[ib/2] = l_l;
  4155. else scales_l[ib/2] |= (l_l << 4);
  4156. scales_h[ib/8] |= (l_h << 2*(ib%8));
  4157. }
  4158. } else {
  4159. dh[0] = GGML_FP32_TO_FP16(scales[0]);
  4160. if (ntry > 0) {
  4161. float id = scales[0] ? 1/scales[0] : 0;
  4162. for (int j = 0; j < super_block_size; ++j) {
  4163. L[j] = best_index_int8(16, values, id*x[j]);
  4164. }
  4165. }
  4166. }
  4167. for (int i = 0; i < super_block_size/32; ++i) {
  4168. for (int j = 0; j < 16; ++j) {
  4169. q4[16*i + j] = L[32*i + j] | (L[32*i + 16 + j] << 4);
  4170. }
  4171. }
  4172. }
  4173. size_t quantize_iq4_nl(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  4174. GGML_ASSERT(n_per_row%QK4_NL == 0);
  4175. int64_t nblock = n_per_row/QK4_NL;
  4176. char * qrow = (char *)dst;
  4177. uint8_t L[QK4_NL];
  4178. float weight[QK4_NL];
  4179. uint16_t unused_h;
  4180. uint8_t * unused_l = NULL;
  4181. float scale;
  4182. for (int64_t row = 0; row < nrow; ++row) {
  4183. block_iq4_nl * iq4 = (block_iq4_nl *)qrow;
  4184. for (int ibl = 0; ibl < nblock; ++ibl) {
  4185. const float * qw = quant_weights ? quant_weights + QK4_NL*ibl : NULL;
  4186. quantize_row_iq4_nl_impl(QK4_NL, 32, src + QK4_NL*ibl, &iq4[ibl].d, iq4[ibl].qs, &unused_h, unused_l,
  4187. &scale, weight, L, kvalues_iq4nl, qw, 7);
  4188. }
  4189. src += n_per_row;
  4190. qrow += nblock*sizeof(block_iq4_nl);
  4191. }
  4192. return nrow * nblock * sizeof(block_iq4_nl);
  4193. }
  4194. //void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
  4195. void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k) {
  4196. GGML_ASSERT(k%QK4_NL == 0);
  4197. int64_t nblock = k/QK4_NL;
  4198. uint8_t L[QK4_NL];
  4199. float weight[QK4_NL];
  4200. uint16_t unused_h;
  4201. uint8_t * unused_l = NULL;
  4202. float scale;
  4203. block_iq4_nl * iq4 = y;
  4204. for (int ibl = 0; ibl < nblock; ++ibl) {
  4205. quantize_row_iq4_nl_impl(QK4_NL, 32, x + QK4_NL*ibl, &iq4[ibl].d, iq4[ibl].qs, &unused_h, unused_l,
  4206. &scale, weight, L, kvalues_iq4nl, NULL, -1);
  4207. }
  4208. }
  4209. size_t quantize_iq4_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  4210. GGML_ASSERT(n_per_row%QK_K == 0);
  4211. int64_t nblock = n_per_row/QK_K;
  4212. char * qrow = (char *)dst;
  4213. uint8_t L[QK_K];
  4214. float weight[32];
  4215. float scales[QK_K/32];
  4216. for (int64_t row = 0; row < nrow; ++row) {
  4217. block_iq4_xs * iq4 = (block_iq4_xs *)qrow;
  4218. for (int ibl = 0; ibl < nblock; ++ibl) {
  4219. const float * qw = quant_weights ? quant_weights + QK_K*ibl : NULL;
  4220. quantize_row_iq4_nl_impl(QK_K, 32, src + QK_K*ibl, &iq4[ibl].d, iq4[ibl].qs, &iq4[ibl].scales_h, iq4[ibl].scales_l,
  4221. scales, weight, L, kvalues_iq4nl, qw, 7);
  4222. }
  4223. src += n_per_row;
  4224. qrow += nblock*sizeof(block_iq4_xs);
  4225. }
  4226. return nrow * nblock * sizeof(block_iq4_xs);
  4227. }
  4228. void quantize_row_iq4_xs_ref(const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k) {
  4229. assert(k % QK_K == 0);
  4230. quantize_iq4_xs(x, y, 1, k, NULL);
  4231. }
  4232. // =============================== 2.5625 bpw
  4233. static void quantize_row_iq2_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
  4234. const int gindex = iq2_data_index(GGML_TYPE_IQ2_S);
  4235. const uint64_t * kgrid_q2xs = iq2_data[gindex].grid;
  4236. const int * kmap_q2xs = iq2_data[gindex].map;
  4237. const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
  4238. GGML_ASSERT(kmap_q2xs && "forgot to call ggml_quantize_init()?");
  4239. GGML_ASSERT(kgrid_q2xs && "forgot to call ggml_quantize_init()?");
  4240. GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
  4241. GGML_ASSERT(n%QK_K == 0);
  4242. const int kMaxQ = 3;
  4243. const int64_t nbl = n/QK_K;
  4244. block_iq2_s * y = vy;
  4245. float scales[QK_K/16];
  4246. float weight[16];
  4247. float xval[16];
  4248. int8_t L[16];
  4249. int8_t Laux[16];
  4250. float waux[16];
  4251. bool is_on_grid[2];
  4252. bool is_on_grid_aux[2];
  4253. uint8_t block_signs[2];
  4254. for (int ibl = 0; ibl < nbl; ++ibl) {
  4255. memset(&y[ibl], 0, sizeof(block_iq2_s));
  4256. y[ibl].d = GGML_FP32_TO_FP16(0.f);
  4257. float max_scale = 0;
  4258. const float * xbl = x + QK_K*ibl;
  4259. float sumx2 = 0;
  4260. for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
  4261. float sigma2 = 2*sumx2/QK_K;
  4262. for (int ib = 0; ib < QK_K/16; ++ib) {
  4263. const float * xb = xbl + 16*ib;
  4264. if (quant_weights) {
  4265. const float * qw = quant_weights + QK_K*ibl + 16*ib;
  4266. for (int i = 0; i < 16; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
  4267. } else {
  4268. for (int i = 0; i < 16; ++i) weight[i] = 0.25f*sigma2 + xb[i]*xb[i];
  4269. }
  4270. for (int i = 0; i < 16; ++i) waux[i] = sqrtf(weight[i]);
  4271. for (int k = 0; k < 2; ++k) {
  4272. uint8_t s = 0;
  4273. for (int i = 0; i < 8; ++i) {
  4274. if (xb[8*k + i] >= 0) xval[8*k + i] = xb[8*k + i];
  4275. else {
  4276. xval[8*k + i] = -xb[8*k + i]; s |= (1 << i);
  4277. }
  4278. }
  4279. block_signs[k] = s;
  4280. }
  4281. float max = xval[0];
  4282. for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
  4283. if (max < GROUP_MAX_EPS_IQ2_S) {
  4284. scales[ib] = 0;
  4285. continue;
  4286. }
  4287. float best = 0;
  4288. float scale = max/(2*kMaxQ-1);
  4289. is_on_grid[0] = is_on_grid[1] = true;
  4290. for (int is = -9; is <= 9; ++is) {
  4291. float id = (2*kMaxQ-1+is*0.1f)/max;
  4292. float this_scale = 1/id;
  4293. for (int k = 0; k < 2; ++k) {
  4294. for (int i = 0; i < 8; ++i) {
  4295. int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
  4296. Laux[8*k+i] = MAX(0, MIN(kMaxQ-1, l));
  4297. }
  4298. uint16_t u = 0;
  4299. for (int i = 0; i < 8; ++i) u |= (Laux[8*k+i] << 2*i);
  4300. int grid_index = kmap_q2xs[u];
  4301. is_on_grid_aux[k] = true;
  4302. if (grid_index < 0) {
  4303. is_on_grid_aux[k] = false;
  4304. const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
  4305. grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, this_scale, Laux + 8*k);
  4306. }
  4307. }
  4308. float sumqx = 0, sumq2 = 0;
  4309. for (int i = 0; i < 16; ++i) {
  4310. float w = weight[i];
  4311. float q = 2*Laux[i] + 1;
  4312. sumqx += w*xval[i]*q;
  4313. sumq2 += w*q*q;
  4314. }
  4315. if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
  4316. scale = sumqx/sumq2; best = scale*sumqx;
  4317. for (int i = 0; i < 16; ++i) L[i] = Laux[i];
  4318. for (int k = 0; k < 2; ++k) is_on_grid[k] = is_on_grid_aux[k];
  4319. }
  4320. }
  4321. int n_not_ongrid = 0;
  4322. for (int k = 0; k < 2; ++k) if (!is_on_grid[k]) ++n_not_ongrid;
  4323. if (n_not_ongrid > 0 && scale > 0) {
  4324. float id = 1/scale;
  4325. for (int k = 0; k < 2; ++k) {
  4326. if (is_on_grid[k]) continue;
  4327. uint16_t u = 0;
  4328. for (int i = 0; i < 8; ++i) {
  4329. int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
  4330. l = MAX(0, MIN(kMaxQ-1, l));
  4331. u |= (l << 2*i);
  4332. L[8*k + i] = l;
  4333. }
  4334. int grid_index = kmap_q2xs[u];
  4335. if (grid_index < 0) {
  4336. const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
  4337. grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, scale, L + 8*k);
  4338. }
  4339. }
  4340. float sumqx = 0, sumq2 = 0;
  4341. for (int i = 0; i < 16; ++i) {
  4342. float w = weight[i];
  4343. float q = 2*L[i] + 1;
  4344. sumqx += w*xval[i]*q;
  4345. sumq2 += w*q*q;
  4346. }
  4347. if (sumq2 > 0) scale = sumqx/sumq2;
  4348. }
  4349. if (scale < 0) {
  4350. scale = -scale;
  4351. for (int k = 0; k < 2; ++k) block_signs[k] = ~block_signs[k];
  4352. }
  4353. for (int k = 0; k < 2; ++k) {
  4354. uint16_t u = 0;
  4355. for (int i = 0; i < 8; ++i) u |= (L[8*k+i] << 2*i);
  4356. int grid_index = kmap_q2xs[u];
  4357. if (grid_index < 0) {
  4358. printf("Oops: found point %u not on grid:", u);
  4359. for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
  4360. printf("\n");
  4361. GGML_ABORT("fatal error");
  4362. }
  4363. const int i8 = 2*ib + k;
  4364. y[ibl].qs[i8] = grid_index & 255;
  4365. y[ibl].qh[i8/4] |= ((grid_index >> 8) << 2*(i8%4));
  4366. y[ibl].qs[QK_K/8 + i8] = block_signs[k];
  4367. }
  4368. GGML_ASSERT(scale >= 0);
  4369. scales[ib] = scale;
  4370. max_scale = MAX(max_scale, scale);
  4371. }
  4372. if (!max_scale) {
  4373. continue;
  4374. }
  4375. float d = max_scale/31;
  4376. y[ibl].d = GGML_FP32_TO_FP16(d * 0.9875f);
  4377. float id = 1/d;
  4378. for (int ib = 0; ib < QK_K/16; ++ib) {
  4379. int l = nearest_int(0.5f*(id*scales[ib]-1));
  4380. l = MAX(0, MIN(15, l));
  4381. if (ib%2 == 0) y[ibl].scales[ib/2] = l;
  4382. else y[ibl].scales[ib/2] |= (l << 4);
  4383. }
  4384. }
  4385. }
  4386. size_t quantize_iq2_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
  4387. GGML_ASSERT(n_per_row%QK_K == 0);
  4388. int64_t nblock = n_per_row/QK_K;
  4389. char * qrow = (char *)dst;
  4390. for (int64_t row = 0; row < nrow; ++row) {
  4391. quantize_row_iq2_s_impl(src, qrow, n_per_row, quant_weights);
  4392. src += n_per_row;
  4393. qrow += nblock*sizeof(block_iq2_s);
  4394. }
  4395. return nrow * nblock * sizeof(block_iq2_s);
  4396. }
  4397. void quantize_row_iq2_s_ref(const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k) {
  4398. assert(k % QK_K == 0);
  4399. quantize_iq2_s(x, y, 1, k, NULL);
  4400. }
  4401. // =============================== data validation
  4402. static bool validate_float(float f, size_t i) {
  4403. if (isinf(f)) {
  4404. fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
  4405. return false;
  4406. }
  4407. if (isnan(f)) {
  4408. fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
  4409. return false;
  4410. }
  4411. return true;
  4412. }
  4413. static bool isinf_fp16(ggml_fp16_t f) {
  4414. return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) == 0;
  4415. }
  4416. static bool isnan_fp16(ggml_fp16_t f) {
  4417. return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) != 0;
  4418. }
  4419. static bool validate_fp16(ggml_fp16_t f, size_t i) {
  4420. if (isinf_fp16(f)) {
  4421. fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
  4422. return false;
  4423. }
  4424. if (isnan_fp16(f)) {
  4425. fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
  4426. return false;
  4427. }
  4428. return true;
  4429. }
  4430. #define VALIDATE_ROW_DATA_D_F16_IMPL(type, data, nb) \
  4431. const type * q = (const type *) (data); \
  4432. for (size_t i = 0; i < (nb); ++i) { \
  4433. if (!validate_fp16(q[i].d, i)) { \
  4434. return false; \
  4435. } \
  4436. }
  4437. #define VALIDATE_ROW_DATA_DM_F16_IMPL(type, data, nb, d, m) \
  4438. const type * q = (const type *) (data); \
  4439. for (size_t i = 0; i < (nb); ++i) { \
  4440. if (!validate_fp16(q[i].d, i) || !validate_fp16(q[i].m, i)) { \
  4441. return false; \
  4442. } \
  4443. }
  4444. #define VALIDATE_ROW_DATA_DVEC_F16_IMPL(type, data, nb, nr) \
  4445. const type * q = (const type *) (data); \
  4446. for (size_t i = 0; i < (nb); ++i) { \
  4447. for (size_t j = 0; j < (nr); ++j) { \
  4448. if (!validate_fp16(q[i].d[j], i)) { \
  4449. return false; \
  4450. } \
  4451. } \
  4452. }
  4453. bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes) {
  4454. if (type < 0 || type >= GGML_TYPE_COUNT) {
  4455. fprintf(stderr, "%s: invalid type %d\n", __func__, type);
  4456. return false;
  4457. }
  4458. if (nbytes % ggml_type_size(type) != 0) {
  4459. fprintf(stderr, "%s: invalid size %zu for type %s (type size = %zu)\n", __func__, nbytes, ggml_type_name(type), ggml_type_size(type));
  4460. return false;
  4461. }
  4462. const size_t nb = nbytes/ggml_type_size(type);
  4463. switch (type) {
  4464. case GGML_TYPE_BF16:
  4465. {
  4466. int nans = 0;
  4467. int infs = 0;
  4468. const unsigned short * f = (const unsigned short *) data;
  4469. for (size_t i = 0; i < nb; ++i) {
  4470. nans += (f[i] & 0x7fff) > 0x7f80;
  4471. infs += (f[i] & 0x7fff) == 0x7f80;
  4472. }
  4473. if (nans) {
  4474. fprintf(stderr, "%s: found %d NaNs in row of %zu BF16 values\n", __func__, nans, nb);
  4475. return false;
  4476. }
  4477. if (infs) {
  4478. fprintf(stderr, "%s: found %d infinities in row of %zu BF16 values\n", __func__, infs, nb);
  4479. return false;
  4480. }
  4481. } break;
  4482. case GGML_TYPE_F16:
  4483. {
  4484. const ggml_fp16_t * f = (const ggml_fp16_t *) data;
  4485. size_t i = 0;
  4486. #if defined(__AVX2__)
  4487. for (; i + 15 < nb; i += 16) {
  4488. __m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
  4489. __m256i vexp = _mm256_and_si256(v, _mm256_set1_epi16(0x7c00));
  4490. __m256i cmp = _mm256_cmpeq_epi16(vexp, _mm256_set1_epi16(0x7c00));
  4491. int mask = _mm256_movemask_epi8(cmp);
  4492. if (mask) {
  4493. for (size_t j = 0; j < 16; ++j) {
  4494. if (!validate_fp16(f[i + j], i + j)) {
  4495. return false;
  4496. }
  4497. }
  4498. GGML_UNREACHABLE();
  4499. }
  4500. }
  4501. #elif defined(__ARM_NEON)
  4502. for (; i + 7 < nb; i += 8) {
  4503. uint16x8_t v = vld1q_u16(f + i);
  4504. uint16x8_t vexp = vandq_u16(v, vdupq_n_u16(0x7c00));
  4505. uint16x8_t cmp = vceqq_u16(vexp, vdupq_n_u16(0x7c00));
  4506. uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(cmp, 4)), 0);
  4507. if (mask) {
  4508. for (size_t j = 0; j < 8; ++j) {
  4509. if (!validate_fp16(f[i + j], i + j)) {
  4510. return false;
  4511. }
  4512. }
  4513. GGML_UNREACHABLE();
  4514. }
  4515. }
  4516. #endif
  4517. for (; i < nb; ++i) {
  4518. if (!validate_fp16(f[i], i)) {
  4519. return false;
  4520. }
  4521. }
  4522. } break;
  4523. case GGML_TYPE_F32:
  4524. {
  4525. const float * f = (const float *) data;
  4526. size_t i = 0;
  4527. #if defined(__AVX2__)
  4528. for (; i + 7 < nb; i += 8) {
  4529. __m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
  4530. __m256i vexp = _mm256_and_si256(v, _mm256_set1_epi32(0x7f800000));
  4531. __m256i cmp = _mm256_cmpeq_epi32(vexp, _mm256_set1_epi32(0x7f800000));
  4532. int mask = _mm256_movemask_epi8(cmp);
  4533. if (mask) {
  4534. for (size_t j = 0; j < 8; ++j) {
  4535. if (!validate_float(f[i + j], i + j)) {
  4536. return false;
  4537. }
  4538. }
  4539. GGML_UNREACHABLE();
  4540. }
  4541. }
  4542. #elif defined(__ARM_NEON)
  4543. for (; i + 3 < nb; i += 4) {
  4544. uint32x4_t v = vld1q_u32((const uint32_t *)f + i);
  4545. uint32x4_t vexp = vandq_u32(v, vdupq_n_u32(0x7f800000));
  4546. uint32x4_t cmp = vceqq_u32(vexp, vdupq_n_u32(0x7f800000));
  4547. uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(vshrn_n_u32(cmp, 8)), 0);
  4548. if (mask) {
  4549. for (size_t j = 0; j < 4; ++j) {
  4550. if (!validate_float(f[i + j], i + j)) {
  4551. return false;
  4552. }
  4553. }
  4554. GGML_UNREACHABLE();
  4555. }
  4556. }
  4557. #endif
  4558. for (; i < nb; ++i) {
  4559. if (!validate_float(f[i], i)) {
  4560. return false;
  4561. }
  4562. }
  4563. } break;
  4564. case GGML_TYPE_F64:
  4565. {
  4566. const double * f = (const double *) data;
  4567. for (size_t i = 0; i < nb; ++i) {
  4568. if (!validate_float(f[i], i)) {
  4569. return false;
  4570. }
  4571. }
  4572. } break;
  4573. case GGML_TYPE_Q4_0:
  4574. {
  4575. VALIDATE_ROW_DATA_D_F16_IMPL(block_q4_0, data, nb);
  4576. } break;
  4577. case GGML_TYPE_Q4_1:
  4578. {
  4579. VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_1, data, nb, d, m);
  4580. } break;
  4581. case GGML_TYPE_Q5_0:
  4582. {
  4583. VALIDATE_ROW_DATA_D_F16_IMPL(block_q5_0, data, nb);
  4584. } break;
  4585. case GGML_TYPE_Q5_1:
  4586. {
  4587. VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_1, data, nb, d, m);
  4588. } break;
  4589. case GGML_TYPE_Q8_0:
  4590. {
  4591. VALIDATE_ROW_DATA_D_F16_IMPL(block_q8_0, data, nb);
  4592. } break;
  4593. case GGML_TYPE_Q2_K:
  4594. {
  4595. VALIDATE_ROW_DATA_DM_F16_IMPL(block_q2_K, data, nb, d, dmin);
  4596. } break;
  4597. case GGML_TYPE_Q3_K:
  4598. {
  4599. VALIDATE_ROW_DATA_D_F16_IMPL(block_q3_K, data, nb);
  4600. } break;
  4601. case GGML_TYPE_Q4_K:
  4602. {
  4603. VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_K, data, nb, d, dmin);
  4604. } break;
  4605. case GGML_TYPE_Q5_K:
  4606. {
  4607. VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_K, data, nb, d, dmin);
  4608. } break;
  4609. case GGML_TYPE_Q6_K:
  4610. {
  4611. VALIDATE_ROW_DATA_D_F16_IMPL(block_q6_K, data, nb);
  4612. } break;
  4613. case GGML_TYPE_Q8_K:
  4614. {
  4615. const block_q8_K * q = (const block_q8_K *) data;
  4616. for (size_t i = 0; i < nb; ++i) {
  4617. if (!validate_float(q[i].d, i)) {
  4618. return false;
  4619. }
  4620. }
  4621. } break;
  4622. case GGML_TYPE_TQ1_0:
  4623. {
  4624. VALIDATE_ROW_DATA_D_F16_IMPL(block_tq1_0, data, nb);
  4625. } break;
  4626. case GGML_TYPE_TQ2_0:
  4627. {
  4628. VALIDATE_ROW_DATA_D_F16_IMPL(block_tq2_0, data, nb);
  4629. } break;
  4630. case GGML_TYPE_IQ1_S:
  4631. {
  4632. VALIDATE_ROW_DATA_D_F16_IMPL(block_iq1_s, data, nb);
  4633. } break;
  4634. case GGML_TYPE_IQ1_M:
  4635. {
  4636. const block_iq1_m * q = (const block_iq1_m *) data;
  4637. for (size_t i = 0; i < nb; ++i) {
  4638. iq1m_scale_t scale;
  4639. const uint16_t * sc = (const uint16_t *)q[i].scales;
  4640. scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
  4641. if (!validate_fp16(scale.f16, i)) {
  4642. return false;
  4643. }
  4644. }
  4645. } break;
  4646. case GGML_TYPE_IQ2_XXS:
  4647. {
  4648. VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xxs, data, nb);
  4649. } break;
  4650. case GGML_TYPE_IQ2_XS:
  4651. {
  4652. VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xs, data, nb);
  4653. } break;
  4654. case GGML_TYPE_IQ2_S:
  4655. {
  4656. VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_s, data, nb);
  4657. } break;
  4658. case GGML_TYPE_IQ3_XXS:
  4659. {
  4660. VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_xxs, data, nb);
  4661. } break;
  4662. case GGML_TYPE_IQ3_S:
  4663. {
  4664. VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_s, data, nb);
  4665. } break;
  4666. case GGML_TYPE_IQ4_XS:
  4667. {
  4668. VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_xs, data, nb);
  4669. } break;
  4670. case GGML_TYPE_IQ4_NL:
  4671. {
  4672. VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_nl, data, nb);
  4673. } break;
  4674. case GGML_TYPE_I8:
  4675. case GGML_TYPE_I16:
  4676. case GGML_TYPE_I32:
  4677. case GGML_TYPE_I64:
  4678. // nothing to validate
  4679. break;
  4680. default:
  4681. {
  4682. fprintf(stderr, "%s: invalid type %d\n", __func__, type);
  4683. return false;
  4684. }
  4685. }
  4686. return true;
  4687. }