| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393 |
- Comparing tensors between original and converted GGML models...
- Tokens: 30, Layers: 16
- ================================================================================
- ================================================================================
- Comparing model.layers.out tensors...
- ================================================================================
- Layer 0, Token 1 (model.layers.out comparison):
- Original tensor sum: -109.202682
- Converted tensor sum: -109.202667
- Original tensor mean: -3.412584
- Converted tensor mean: -3.412583
- Mean difference: 0.00000112
- Maximum pointwise difference: 0.00000358
- Max difference location: (0, 3, 2)
- Values at max diff - Original: -3.23131371, Converted: -3.23131013
- Biggest difference in row (0, 3), sum -70.622650 vs -70.622643
- Original tensor:
- [[[ 0.53282046 0.45114386 2.2156353 -0.5117184 -1.6482054
- 4.6376505 -2.9421384 -3.4354253 ]
- [ -8.487997 -5.323722 -4.790135 -8.482631 4.4259453
- -0.7649012 -5.2080426 -3.5365663 ]
- [ -2.8659308 -0.7302124 3.4494972 -0.7121358 -4.4744496
- 1.4391303 -1.05655 -0.76109344]
- [-10.8983 -11.325392 -3.2313137 -11.594204 -13.007862
- -6.099822 -13.027901 -1.4378595 ]]]
- Converted tensor:
- [[[ 0.53281975 0.45114377 2.215636 -0.5117179 -1.6482062
- 4.6376514 -2.942138 -3.4354265 ]
- [ -8.487997 -5.323724 -4.7901373 -8.48263 4.425948
- -0.7649009 -5.208041 -3.5365672 ]
- [ -2.8659306 -0.7302135 3.4494982 -0.7121362 -4.474449
- 1.4391313 -1.0565499 -0.7610918 ]
- [-10.898299 -11.325391 -3.2313101 -11.594204 -13.00786
- -6.099819 -13.027899 -1.437861 ]]]
- Layer 1, Token 1 (model.layers.out comparison):
- Original tensor sum: -132.672058
- Converted tensor sum: -132.672043
- Original tensor mean: -4.146002
- Converted tensor mean: -4.146001
- Mean difference: 0.00000322
- Maximum pointwise difference: 0.00000763
- Max difference location: (0, 1, 0)
- Values at max diff - Original: -8.45331192, Converted: -8.45330429
- Biggest difference in row (0, 2), sum 8.045303 vs 8.045274
- Layer 2, Token 1 (model.layers.out comparison):
- Original tensor sum: -123.594589
- Converted tensor sum: -123.594765
- Original tensor mean: -3.862331
- Converted tensor mean: -3.862336
- Mean difference: 0.00001101
- Maximum pointwise difference: 0.00005722
- Max difference location: (0, 3, 0)
- Values at max diff - Original: -14.73531914, Converted: -14.73537636
- Biggest difference in row (0, 3), sum -100.578644 vs -100.578781
- Layer 3, Token 1 (model.layers.out comparison):
- Original tensor sum: -1014.197754
- Converted tensor sum: -1014.208618
- Original tensor mean: -31.693680
- Converted tensor mean: -31.694019
- Mean difference: 0.00261304
- Maximum pointwise difference: 0.00854874
- Max difference location: (0, 3, 4)
- Values at max diff - Original: -47.59802246, Converted: -47.60657120
- Biggest difference in row (0, 3), sum -413.478455 vs -413.514832
- Layer 4, Token 1 (model.layers.out comparison):
- Original tensor sum: -974.648987
- Converted tensor sum: -974.659424
- Original tensor mean: -30.457781
- Converted tensor mean: -30.458107
- Mean difference: 0.00296569
- Maximum pointwise difference: 0.00885773
- Max difference location: (0, 3, 4)
- Values at max diff - Original: -45.65669632, Converted: -45.66555405
- Biggest difference in row (0, 3), sum -380.904694 vs -380.942291
- Layer 5, Token 1 (model.layers.out comparison):
- Original tensor sum: -842.923950
- Converted tensor sum: -842.923950
- Original tensor mean: -26.341373
- Converted tensor mean: -26.341373
- Mean difference: 0.00327585
- Maximum pointwise difference: 0.00857162
- Max difference location: (0, 3, 4)
- Values at max diff - Original: -47.09656525, Converted: -47.10513687
- Biggest difference in row (0, 3), sum -366.704346 vs -366.739746
- Layer 6, Token 1 (model.layers.out comparison):
- Original tensor sum: -940.556580
- Converted tensor sum: -940.507812
- Original tensor mean: -29.392393
- Converted tensor mean: -29.390869
- Mean difference: 0.00368834
- Maximum pointwise difference: 0.00840378
- Max difference location: (0, 3, 4)
- Values at max diff - Original: -49.34116364, Converted: -49.34956741
- Biggest difference in row (0, 2), sum -130.006729 vs -129.970612
- Layer 7, Token 1 (model.layers.out comparison):
- Original tensor sum: -1838.171143
- Converted tensor sum: -1838.228271
- Original tensor mean: -57.442848
- Converted tensor mean: -57.444633
- Mean difference: 0.00574541
- Maximum pointwise difference: 0.01725769
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -91.24589539, Converted: -91.26315308
- Biggest difference in row (0, 0), sum -622.551270 vs -622.626587
- Layer 8, Token 1 (model.layers.out comparison):
- Original tensor sum: -1890.751709
- Converted tensor sum: -1890.670898
- Original tensor mean: -59.085991
- Converted tensor mean: -59.083466
- Mean difference: 0.01148558
- Maximum pointwise difference: 0.05082703
- Max difference location: (0, 2, 6)
- Values at max diff - Original: -49.12084961, Converted: -49.07002258
- Biggest difference in row (0, 2), sum -356.818451 vs -356.663208
- Layer 9, Token 1 (model.layers.out comparison):
- Original tensor sum: -1949.811523
- Converted tensor sum: -1949.711426
- Original tensor mean: -60.931610
- Converted tensor mean: -60.928482
- Mean difference: 0.01115143
- Maximum pointwise difference: 0.04758072
- Max difference location: (0, 2, 6)
- Values at max diff - Original: -49.22105789, Converted: -49.17347717
- Biggest difference in row (0, 2), sum -367.878845 vs -367.720154
- Layer 10, Token 1 (model.layers.out comparison):
- Original tensor sum: -1955.402832
- Converted tensor sum: -1955.281250
- Original tensor mean: -61.106339
- Converted tensor mean: -61.102539
- Mean difference: 0.01230341
- Maximum pointwise difference: 0.04833603
- Max difference location: (0, 2, 6)
- Values at max diff - Original: -43.91606140, Converted: -43.86772537
- Biggest difference in row (0, 2), sum -370.409668 vs -370.259583
- Layer 11, Token 1 (model.layers.out comparison):
- Original tensor sum: -3642.472900
- Converted tensor sum: -3642.428711
- Original tensor mean: -113.827278
- Converted tensor mean: -113.825897
- Mean difference: 0.01628518
- Maximum pointwise difference: 0.05126190
- Max difference location: (0, 2, 6)
- Values at max diff - Original: -94.39852142, Converted: -94.34725952
- Biggest difference in row (0, 2), sum -786.509460 vs -786.331726
- Layer 12, Token 1 (model.layers.out comparison):
- Original tensor sum: -3739.976807
- Converted tensor sum: -3739.936035
- Original tensor mean: -116.874275
- Converted tensor mean: -116.873001
- Mean difference: 0.01711488
- Maximum pointwise difference: 0.05059052
- Max difference location: (0, 2, 6)
- Values at max diff - Original: -95.09668732, Converted: -95.04609680
- Biggest difference in row (0, 2), sum -816.550781 vs -816.352295
- Layer 13, Token 1 (model.layers.out comparison):
- Original tensor sum: -3821.749268
- Converted tensor sum: -3821.721680
- Original tensor mean: -119.429665
- Converted tensor mean: -119.428802
- Mean difference: 0.01747012
- Maximum pointwise difference: 0.05052948
- Max difference location: (0, 2, 7)
- Values at max diff - Original: -79.35634613, Converted: -79.30581665
- Biggest difference in row (0, 2), sum -840.805908 vs -840.616699
- Layer 14, Token 1 (model.layers.out comparison):
- Original tensor sum: -4057.451904
- Converted tensor sum: -4057.284668
- Original tensor mean: -126.795372
- Converted tensor mean: -126.790146
- Mean difference: 0.01935625
- Maximum pointwise difference: 0.07952881
- Max difference location: (0, 2, 6)
- Values at max diff - Original: -97.11465454, Converted: -97.03512573
- Biggest difference in row (0, 2), sum -917.124573 vs -916.826172
- Error processing model.layers.out layer 15, token 1: cannot reshape array of size 8 into shape (1,4,8)
- Layer 0, Token 2 (model.layers.out comparison):
- Original tensor sum: -7.280505
- Converted tensor sum: -7.280507
- Original tensor mean: -0.910063
- Converted tensor mean: -0.910063
- Mean difference: 0.00000097
- Maximum pointwise difference: 0.00000179
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -1.49786282, Converted: -1.49786103
- Biggest difference in row (0, 0), sum -7.280505 vs -7.280507
- Layer 1, Token 2 (model.layers.out comparison):
- Original tensor sum: -7.318125
- Converted tensor sum: -7.318151
- Original tensor mean: -0.914766
- Converted tensor mean: -0.914769
- Mean difference: 0.00000331
- Maximum pointwise difference: 0.00000930
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -3.41128922, Converted: -3.41129851
- Biggest difference in row (0, 0), sum -7.318125 vs -7.318151
- Layer 2, Token 2 (model.layers.out comparison):
- Original tensor sum: 14.344932
- Converted tensor sum: 14.344961
- Original tensor mean: 1.793116
- Converted tensor mean: 1.793120
- Mean difference: 0.00000746
- Maximum pointwise difference: 0.00003266
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 3.23243976, Converted: 3.23247242
- Biggest difference in row (0, 0), sum 14.344932 vs 14.344961
- Layer 3, Token 2 (model.layers.out comparison):
- Original tensor sum: 46.801067
- Converted tensor sum: 46.811996
- Original tensor mean: 5.850133
- Converted tensor mean: 5.851500
- Mean difference: 0.00141515
- Maximum pointwise difference: 0.00275421
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 6.65637064, Converted: 6.65912485
- Biggest difference in row (0, 0), sum 46.801067 vs 46.811996
- Layer 4, Token 2 (model.layers.out comparison):
- Original tensor sum: 47.891678
- Converted tensor sum: 47.901840
- Original tensor mean: 5.986460
- Converted tensor mean: 5.987730
- Mean difference: 0.00131346
- Maximum pointwise difference: 0.00296640
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 7.13961887, Converted: 7.14258528
- Biggest difference in row (0, 0), sum 47.891678 vs 47.901840
- Layer 5, Token 2 (model.layers.out comparison):
- Original tensor sum: 45.815926
- Converted tensor sum: 45.826260
- Original tensor mean: 5.726991
- Converted tensor mean: 5.728282
- Mean difference: 0.00137006
- Maximum pointwise difference: 0.00332642
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 7.94661283, Converted: 7.94993925
- Biggest difference in row (0, 0), sum 45.815926 vs 45.826260
- Layer 6, Token 2 (model.layers.out comparison):
- Original tensor sum: 40.223167
- Converted tensor sum: 40.231720
- Original tensor mean: 5.027896
- Converted tensor mean: 5.028965
- Mean difference: 0.00155937
- Maximum pointwise difference: 0.00270462
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 7.07846451, Converted: 7.08116913
- Biggest difference in row (0, 0), sum 40.223167 vs 40.231720
- Layer 7, Token 2 (model.layers.out comparison):
- Original tensor sum: 84.588196
- Converted tensor sum: 84.602402
- Original tensor mean: 10.573524
- Converted tensor mean: 10.575300
- Mean difference: 0.00185513
- Maximum pointwise difference: 0.00356102
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 13.31151009, Converted: 13.31507111
- Biggest difference in row (0, 0), sum 84.588196 vs 84.602402
- Layer 8, Token 2 (model.layers.out comparison):
- Original tensor sum: 85.737823
- Converted tensor sum: 85.749390
- Original tensor mean: 10.717228
- Converted tensor mean: 10.718674
- Mean difference: 0.00189817
- Maximum pointwise difference: 0.00350094
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 13.90340519, Converted: 13.90690613
- Biggest difference in row (0, 0), sum 85.737823 vs 85.749390
- Layer 9, Token 2 (model.layers.out comparison):
- Original tensor sum: 83.069107
- Converted tensor sum: 83.078979
- Original tensor mean: 10.383638
- Converted tensor mean: 10.384872
- Mean difference: 0.00177890
- Maximum pointwise difference: 0.00335407
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 13.79222488, Converted: 13.79557896
- Biggest difference in row (0, 0), sum 83.069107 vs 83.078979
- Layer 10, Token 2 (model.layers.out comparison):
- Original tensor sum: 80.782455
- Converted tensor sum: 80.791588
- Original tensor mean: 10.097807
- Converted tensor mean: 10.098948
- Mean difference: 0.00190949
- Maximum pointwise difference: 0.00329256
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 6.64920282, Converted: 6.65249538
- Biggest difference in row (0, 0), sum 80.782455 vs 80.791588
- Layer 11, Token 2 (model.layers.out comparison):
- Original tensor sum: 124.938332
- Converted tensor sum: 124.953712
- Original tensor mean: 15.617291
- Converted tensor mean: 15.619214
- Mean difference: 0.00253391
- Maximum pointwise difference: 0.00420666
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 12.86635590, Converted: 12.87056255
- Biggest difference in row (0, 0), sum 124.938332 vs 124.953712
- Layer 12, Token 2 (model.layers.out comparison):
- Original tensor sum: 124.466995
- Converted tensor sum: 124.483871
- Original tensor mean: 15.558374
- Converted tensor mean: 15.560484
- Mean difference: 0.00271881
- Maximum pointwise difference: 0.00506878
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 12.41438103, Converted: 12.41944981
- Biggest difference in row (0, 0), sum 124.466995 vs 124.483871
- Layer 13, Token 2 (model.layers.out comparison):
- Original tensor sum: 121.646957
- Converted tensor sum: 121.660385
- Original tensor mean: 15.205870
- Converted tensor mean: 15.207548
- Mean difference: 0.00218880
- Maximum pointwise difference: 0.00470448
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 12.02227020, Converted: 12.02697468
- Biggest difference in row (0, 0), sum 121.646957 vs 121.660385
- Layer 14, Token 2 (model.layers.out comparison):
- Original tensor sum: 116.636169
- Converted tensor sum: 116.658142
- Original tensor mean: 14.579521
- Converted tensor mean: 14.582268
- Mean difference: 0.00299489
- Maximum pointwise difference: 0.00521469
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 18.26870537, Converted: 18.27392006
- Biggest difference in row (0, 0), sum 116.636169 vs 116.658142
- Layer 15, Token 2 (model.layers.out comparison):
- Original tensor sum: 201.843384
- Converted tensor sum: 201.865143
- Original tensor mean: 25.230423
- Converted tensor mean: 25.233143
- Mean difference: 0.00346577
- Maximum pointwise difference: 0.00746727
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 30.94509888, Converted: 30.95256615
- Biggest difference in row (0, 0), sum 201.843384 vs 201.865143
- Layer 0, Token 3 (model.layers.out comparison):
- Original tensor sum: 18.698099
- Converted tensor sum: 18.475292
- Original tensor mean: 2.337262
- Converted tensor mean: 2.309412
- Mean difference: 2.67848086
- Maximum pointwise difference: 4.89963531
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 2.51813841, Converted: 7.41777372
- Biggest difference in row (0, 0), sum 18.698099 vs 18.475292
- Layer 1, Token 3 (model.layers.out comparison):
- Original tensor sum: 13.937105
- Converted tensor sum: 11.538675
- Original tensor mean: 1.742138
- Converted tensor mean: 1.442334
- Mean difference: 2.56903791
- Maximum pointwise difference: 5.56039190
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 5.86116695, Converted: 0.30077514
- Biggest difference in row (0, 0), sum 13.937105 vs 11.538675
- Layer 2, Token 3 (model.layers.out comparison):
- Original tensor sum: 17.835873
- Converted tensor sum: 9.065081
- Original tensor mean: 2.229484
- Converted tensor mean: 1.133135
- Mean difference: 2.48439741
- Maximum pointwise difference: 7.80053854
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 7.08156919, Converted: -0.71896935
- Biggest difference in row (0, 0), sum 17.835873 vs 9.065081
- Layer 3, Token 3 (model.layers.out comparison):
- Original tensor sum: 19.733971
- Converted tensor sum: 0.388454
- Original tensor mean: 2.466746
- Converted tensor mean: 0.048557
- Mean difference: 2.74538827
- Maximum pointwise difference: 8.14173889
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 7.32600927, Converted: -0.81572962
- Biggest difference in row (0, 0), sum 19.733971 vs 0.388454
- Layer 4, Token 3 (model.layers.out comparison):
- Original tensor sum: 17.522738
- Converted tensor sum: 7.885162
- Original tensor mean: 2.190342
- Converted tensor mean: 0.985645
- Mean difference: 4.25575876
- Maximum pointwise difference: 7.97597837
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 3.98348713, Converted: -3.99249125
- Biggest difference in row (0, 0), sum 17.522738 vs 7.885162
- Layer 5, Token 3 (model.layers.out comparison):
- Original tensor sum: 21.460897
- Converted tensor sum: 15.969997
- Original tensor mean: 2.682612
- Converted tensor mean: 1.996250
- Mean difference: 4.34595299
- Maximum pointwise difference: 8.46822739
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 6.38704681, Converted: -2.08118057
- Biggest difference in row (0, 0), sum 21.460897 vs 15.969997
- Layer 6, Token 3 (model.layers.out comparison):
- Original tensor sum: 18.336536
- Converted tensor sum: 9.128950
- Original tensor mean: 2.292067
- Converted tensor mean: 1.141119
- Mean difference: 3.42625880
- Maximum pointwise difference: 9.18005276
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 6.15963268, Converted: -3.02042007
- Biggest difference in row (0, 0), sum 18.336536 vs 9.128950
- Layer 7, Token 3 (model.layers.out comparison):
- Original tensor sum: 27.127436
- Converted tensor sum: -91.853516
- Original tensor mean: 3.390929
- Converted tensor mean: -11.481689
- Mean difference: 14.87261772
- Maximum pointwise difference: 25.04354668
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 6.62252426, Converted: -18.42102242
- Biggest difference in row (0, 0), sum 27.127436 vs -91.853516
- Layer 8, Token 3 (model.layers.out comparison):
- Original tensor sum: 22.795490
- Converted tensor sum: -94.016220
- Original tensor mean: 2.849436
- Converted tensor mean: -11.752028
- Mean difference: 14.60146332
- Maximum pointwise difference: 26.14372826
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 0.59730154, Converted: -25.54642677
- Biggest difference in row (0, 0), sum 22.795490 vs -94.016220
- Layer 9, Token 3 (model.layers.out comparison):
- Original tensor sum: 16.367466
- Converted tensor sum: -159.747223
- Original tensor mean: 2.045933
- Converted tensor mean: -19.968403
- Mean difference: 22.01433563
- Maximum pointwise difference: 34.04418182
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -0.55563742, Converted: -34.59981918
- Biggest difference in row (0, 0), sum 16.367466 vs -159.747223
- Layer 10, Token 3 (model.layers.out comparison):
- Original tensor sum: 22.587862
- Converted tensor sum: -171.457092
- Original tensor mean: 2.823483
- Converted tensor mean: -21.432137
- Mean difference: 24.25561905
- Maximum pointwise difference: 40.39982224
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -0.52963394, Converted: -40.92945480
- Biggest difference in row (0, 0), sum 22.587862 vs -171.457092
- Layer 11, Token 3 (model.layers.out comparison):
- Original tensor sum: 98.501198
- Converted tensor sum: -580.205811
- Original tensor mean: 12.312650
- Converted tensor mean: -72.525726
- Mean difference: 84.83837128
- Maximum pointwise difference: 107.93860626
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 9.91925716, Converted: -98.01934814
- Biggest difference in row (0, 0), sum 98.501198 vs -580.205811
- Layer 12, Token 3 (model.layers.out comparison):
- Original tensor sum: 96.017456
- Converted tensor sum: -599.130005
- Original tensor mean: 12.002182
- Converted tensor mean: -74.891251
- Mean difference: 86.89343262
- Maximum pointwise difference: 107.37790680
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 10.14877987, Converted: -97.22912598
- Biggest difference in row (0, 0), sum 96.017456 vs -599.130005
- Layer 13, Token 3 (model.layers.out comparison):
- Original tensor sum: 93.969711
- Converted tensor sum: -604.221680
- Original tensor mean: 11.746214
- Converted tensor mean: -75.527710
- Mean difference: 87.27392578
- Maximum pointwise difference: 107.42771149
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 9.00540733, Converted: -98.42230225
- Biggest difference in row (0, 0), sum 93.969711 vs -604.221680
- Layer 14, Token 3 (model.layers.out comparison):
- Original tensor sum: 96.193565
- Converted tensor sum: -675.267456
- Original tensor mean: 12.024196
- Converted tensor mean: -84.408432
- Mean difference: 96.43263245
- Maximum pointwise difference: 115.43507385
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 9.17813015, Converted: -106.25694275
- Biggest difference in row (0, 0), sum 96.193565 vs -675.267456
- Layer 15, Token 3 (model.layers.out comparison):
- Original tensor sum: 203.967834
- Converted tensor sum: -1113.465820
- Original tensor mean: 25.495979
- Converted tensor mean: -139.183228
- Mean difference: 164.67919922
- Maximum pointwise difference: 181.33709717
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 23.16110420, Converted: -158.17599487
- Biggest difference in row (0, 0), sum 203.967834 vs -1113.465820
- Layer 0, Token 4 (model.layers.out comparison):
- Original tensor sum: 1.060196
- Converted tensor sum: -6.035928
- Original tensor mean: 0.132525
- Converted tensor mean: -0.754491
- Mean difference: 1.11038423
- Maximum pointwise difference: 2.90589857
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 1.75988472, Converted: -1.14601374
- Biggest difference in row (0, 0), sum 1.060196 vs -6.035928
- Layer 1, Token 4 (model.layers.out comparison):
- Original tensor sum: -45.922947
- Converted tensor sum: -53.028908
- Original tensor mean: -5.740368
- Converted tensor mean: -6.628613
- Mean difference: 1.58238363
- Maximum pointwise difference: 3.98315811
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -7.45665455, Converted: -11.43981266
- Biggest difference in row (0, 0), sum -45.922947 vs -53.028908
- Layer 2, Token 4 (model.layers.out comparison):
- Original tensor sum: -47.965603
- Converted tensor sum: -68.008888
- Original tensor mean: -5.995700
- Converted tensor mean: -8.501111
- Mean difference: 4.45314884
- Maximum pointwise difference: 12.72673607
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 10.93319416, Converted: -1.79354143
- Biggest difference in row (0, 0), sum -47.965603 vs -68.008888
- Layer 3, Token 4 (model.layers.out comparison):
- Original tensor sum: -224.689087
- Converted tensor sum: -313.872162
- Original tensor mean: -28.086136
- Converted tensor mean: -39.234020
- Mean difference: 11.14788437
- Maximum pointwise difference: 20.76882172
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -9.44140339, Converted: -30.21022415
- Biggest difference in row (0, 0), sum -224.689087 vs -313.872162
- Layer 4, Token 4 (model.layers.out comparison):
- Original tensor sum: -207.206879
- Converted tensor sum: -293.960205
- Original tensor mean: -25.900860
- Converted tensor mean: -36.745026
- Mean difference: 10.84416676
- Maximum pointwise difference: 23.99023056
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -13.00386524, Converted: -36.99409485
- Biggest difference in row (0, 0), sum -207.206879 vs -293.960205
- Layer 5, Token 4 (model.layers.out comparison):
- Original tensor sum: -185.594986
- Converted tensor sum: -298.454895
- Original tensor mean: -23.199373
- Converted tensor mean: -37.306862
- Mean difference: 14.10748863
- Maximum pointwise difference: 27.16260529
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -13.13538361, Converted: -40.29798889
- Biggest difference in row (0, 0), sum -185.594986 vs -298.454895
- Layer 6, Token 4 (model.layers.out comparison):
- Original tensor sum: -226.913589
- Converted tensor sum: -341.315369
- Original tensor mean: -28.364199
- Converted tensor mean: -42.664421
- Mean difference: 14.30021858
- Maximum pointwise difference: 27.83255386
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -20.03028870, Converted: -47.86284256
- Biggest difference in row (0, 0), sum -226.913589 vs -341.315369
- Layer 7, Token 4 (model.layers.out comparison):
- Original tensor sum: -431.530212
- Converted tensor sum: -553.909912
- Original tensor mean: -53.941277
- Converted tensor mean: -69.238739
- Mean difference: 15.29746723
- Maximum pointwise difference: 28.98126602
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -44.07294083, Converted: -73.05420685
- Biggest difference in row (0, 0), sum -431.530212 vs -553.909912
- Layer 8, Token 4 (model.layers.out comparison):
- Original tensor sum: -442.906403
- Converted tensor sum: -577.351807
- Original tensor mean: -55.363300
- Converted tensor mean: -72.168976
- Mean difference: 16.80567932
- Maximum pointwise difference: 24.00010681
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -44.65782166, Converted: -68.65792847
- Biggest difference in row (0, 0), sum -442.906403 vs -577.351807
- Layer 9, Token 4 (model.layers.out comparison):
- Original tensor sum: -457.224976
- Converted tensor sum: -606.660400
- Original tensor mean: -57.153122
- Converted tensor mean: -75.832550
- Mean difference: 18.67943192
- Maximum pointwise difference: 31.74385834
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -37.91560364, Converted: -69.65946198
- Biggest difference in row (0, 0), sum -457.224976 vs -606.660400
- Layer 10, Token 4 (model.layers.out comparison):
- Original tensor sum: -464.368622
- Converted tensor sum: -617.020081
- Original tensor mean: -58.046078
- Converted tensor mean: -77.127510
- Mean difference: 19.08143044
- Maximum pointwise difference: 31.15077591
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -40.76456833, Converted: -71.91534424
- Biggest difference in row (0, 0), sum -464.368622 vs -617.020081
- Layer 11, Token 4 (model.layers.out comparison):
- Original tensor sum: -848.365112
- Converted tensor sum: -1029.810791
- Original tensor mean: -106.045639
- Converted tensor mean: -128.726349
- Mean difference: 22.68070793
- Maximum pointwise difference: 33.58893585
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -79.47626495, Converted: -113.06520081
- Biggest difference in row (0, 0), sum -848.365112 vs -1029.810791
- Layer 12, Token 4 (model.layers.out comparison):
- Original tensor sum: -856.364807
- Converted tensor sum: -1034.875244
- Original tensor mean: -107.045601
- Converted tensor mean: -129.359406
- Mean difference: 22.31380081
- Maximum pointwise difference: 34.47047424
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -94.66131592, Converted: -129.13179016
- Biggest difference in row (0, 0), sum -856.364807 vs -1034.875244
- Layer 13, Token 4 (model.layers.out comparison):
- Original tensor sum: -876.941895
- Converted tensor sum: -1070.547119
- Original tensor mean: -109.617737
- Converted tensor mean: -133.818390
- Mean difference: 24.20065689
- Maximum pointwise difference: 35.39904022
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -93.12728119, Converted: -128.52632141
- Biggest difference in row (0, 0), sum -876.941895 vs -1070.547119
- Layer 14, Token 4 (model.layers.out comparison):
- Original tensor sum: -914.061707
- Converted tensor sum: -1087.587036
- Original tensor mean: -114.257713
- Converted tensor mean: -135.948380
- Mean difference: 21.69067001
- Maximum pointwise difference: 38.16375732
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -116.85905457, Converted: -155.02281189
- Biggest difference in row (0, 0), sum -914.061707 vs -1087.587036
- Layer 15, Token 4 (model.layers.out comparison):
- Original tensor sum: -1341.588623
- Converted tensor sum: -1530.308838
- Original tensor mean: -167.698578
- Converted tensor mean: -191.288605
- Mean difference: 23.59002495
- Maximum pointwise difference: 40.53677368
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -178.01094055, Converted: -218.54771423
- Biggest difference in row (0, 0), sum -1341.588623 vs -1530.308838
- Layer 0, Token 5 (model.layers.out comparison):
- Original tensor sum: 12.113814
- Converted tensor sum: 1.907211
- Original tensor mean: 1.514227
- Converted tensor mean: 0.238401
- Mean difference: 1.41127276
- Maximum pointwise difference: 3.03878593
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 1.65080941, Converted: -1.38797641
- Biggest difference in row (0, 0), sum 12.113814 vs 1.907211
- Layer 1, Token 5 (model.layers.out comparison):
- Original tensor sum: 3.328269
- Converted tensor sum: 7.141708
- Original tensor mean: 0.416034
- Converted tensor mean: 0.892714
- Mean difference: 1.73651075
- Maximum pointwise difference: 4.59446096
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -0.09795946, Converted: 4.49650145
- Biggest difference in row (0, 0), sum 3.328269 vs 7.141708
- Layer 2, Token 5 (model.layers.out comparison):
- Original tensor sum: -7.901872
- Converted tensor sum: 12.052417
- Original tensor mean: -0.987734
- Converted tensor mean: 1.506552
- Mean difference: 2.91872406
- Maximum pointwise difference: 6.22109556
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -1.61789608, Converted: 4.60319948
- Biggest difference in row (0, 0), sum -7.901872 vs 12.052417
- Layer 3, Token 5 (model.layers.out comparison):
- Original tensor sum: -206.706451
- Converted tensor sum: 38.517872
- Original tensor mean: -25.838306
- Converted tensor mean: 4.814734
- Mean difference: 30.65304184
- Maximum pointwise difference: 36.99858475
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -30.05084610, Converted: 6.94773912
- Biggest difference in row (0, 0), sum -206.706451 vs 38.517872
- Layer 4, Token 5 (model.layers.out comparison):
- Original tensor sum: -190.520950
- Converted tensor sum: 37.683086
- Original tensor mean: -23.815119
- Converted tensor mean: 4.710386
- Mean difference: 28.52550507
- Maximum pointwise difference: 36.21773911
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -29.42410278, Converted: 6.79363585
- Biggest difference in row (0, 0), sum -190.520950 vs 37.683086
- Layer 5, Token 5 (model.layers.out comparison):
- Original tensor sum: -129.615097
- Converted tensor sum: 37.492149
- Original tensor mean: -16.201887
- Converted tensor mean: 4.686519
- Mean difference: 20.88840675
- Maximum pointwise difference: 30.11524200
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -22.47561646, Converted: 7.63962507
- Biggest difference in row (0, 0), sum -129.615097 vs 37.492149
- Layer 6, Token 5 (model.layers.out comparison):
- Original tensor sum: -168.733810
- Converted tensor sum: 40.467735
- Original tensor mean: -21.091726
- Converted tensor mean: 5.058467
- Mean difference: 26.15019226
- Maximum pointwise difference: 35.40680313
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -27.34041214, Converted: 8.06639194
- Biggest difference in row (0, 0), sum -168.733810 vs 40.467735
- Layer 7, Token 5 (model.layers.out comparison):
- Original tensor sum: -375.952911
- Converted tensor sum: 84.494781
- Original tensor mean: -46.994114
- Converted tensor mean: 10.561848
- Mean difference: 57.55596161
- Maximum pointwise difference: 65.51675415
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -54.48764038, Converted: 11.02911663
- Biggest difference in row (0, 0), sum -375.952911 vs 84.494781
- Layer 8, Token 5 (model.layers.out comparison):
- Original tensor sum: -386.335632
- Converted tensor sum: 90.464653
- Original tensor mean: -48.291954
- Converted tensor mean: 11.308082
- Mean difference: 59.60003662
- Maximum pointwise difference: 70.12364197
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -54.78602219, Converted: 15.33761883
- Biggest difference in row (0, 0), sum -386.335632 vs 90.464653
- Layer 9, Token 5 (model.layers.out comparison):
- Original tensor sum: -407.643036
- Converted tensor sum: 83.872604
- Original tensor mean: -50.955379
- Converted tensor mean: 10.484076
- Mean difference: 61.43945694
- Maximum pointwise difference: 73.87419128
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -62.38755035, Converted: 11.48663712
- Biggest difference in row (0, 0), sum -407.643036 vs 83.872604
- Layer 10, Token 5 (model.layers.out comparison):
- Original tensor sum: -398.133545
- Converted tensor sum: 83.310257
- Original tensor mean: -49.766693
- Converted tensor mean: 10.413782
- Mean difference: 60.18047714
- Maximum pointwise difference: 71.93079376
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -61.05200958, Converted: 10.87878418
- Biggest difference in row (0, 0), sum -398.133545 vs 83.310257
- Layer 11, Token 5 (model.layers.out comparison):
- Original tensor sum: -795.896240
- Converted tensor sum: 161.559113
- Original tensor mean: -99.487030
- Converted tensor mean: 20.194889
- Mean difference: 119.68191528
- Maximum pointwise difference: 136.52630615
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -112.33381653, Converted: 24.19249153
- Biggest difference in row (0, 0), sum -795.896240 vs 161.559113
- Layer 12, Token 5 (model.layers.out comparison):
- Original tensor sum: -795.492065
- Converted tensor sum: 157.049652
- Original tensor mean: -99.436508
- Converted tensor mean: 19.631207
- Mean difference: 119.06771088
- Maximum pointwise difference: 138.69142151
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -115.85614014, Converted: 22.83527946
- Biggest difference in row (0, 0), sum -795.492065 vs 157.049652
- Layer 13, Token 5 (model.layers.out comparison):
- Original tensor sum: -816.679565
- Converted tensor sum: 152.172302
- Original tensor mean: -102.084946
- Converted tensor mean: 19.021538
- Mean difference: 121.10647583
- Maximum pointwise difference: 142.45770264
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -120.28170013, Converted: 22.17600250
- Biggest difference in row (0, 0), sum -816.679565 vs 152.172302
- Layer 14, Token 5 (model.layers.out comparison):
- Original tensor sum: -858.712524
- Converted tensor sum: 152.386047
- Original tensor mean: -107.339066
- Converted tensor mean: 19.048256
- Mean difference: 126.38732147
- Maximum pointwise difference: 150.80645752
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -129.48748779, Converted: 21.31897736
- Biggest difference in row (0, 0), sum -858.712524 vs 152.386047
- Layer 15, Token 5 (model.layers.out comparison):
- Original tensor sum: -1291.953247
- Converted tensor sum: 244.354996
- Original tensor mean: -161.494156
- Converted tensor mean: 30.544374
- Mean difference: 192.03852844
- Maximum pointwise difference: 220.75814819
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -189.25143433, Converted: 31.50671959
- Biggest difference in row (0, 0), sum -1291.953247 vs 244.354996
- Layer 0, Token 6 (model.layers.out comparison):
- Original tensor sum: 4.713745
- Converted tensor sum: 11.404326
- Original tensor mean: 0.589218
- Converted tensor mean: 1.425541
- Mean difference: 1.39658785
- Maximum pointwise difference: 3.99744058
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -2.16165113, Converted: 1.83578944
- Biggest difference in row (0, 0), sum 4.713745 vs 11.404326
- Layer 1, Token 6 (model.layers.out comparison):
- Original tensor sum: 2.484277
- Converted tensor sum: 9.422175
- Original tensor mean: 0.310535
- Converted tensor mean: 1.177772
- Mean difference: 1.56714785
- Maximum pointwise difference: 3.13825679
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -2.85257578, Converted: 0.28568110
- Biggest difference in row (0, 0), sum 2.484277 vs 9.422175
- Layer 2, Token 6 (model.layers.out comparison):
- Original tensor sum: -4.950438
- Converted tensor sum: -1.357174
- Original tensor mean: -0.618805
- Converted tensor mean: -0.169647
- Mean difference: 1.71385837
- Maximum pointwise difference: 3.88516402
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 3.02349472, Converted: -0.86166936
- Biggest difference in row (0, 0), sum -4.950438 vs -1.357174
- Layer 3, Token 6 (model.layers.out comparison):
- Original tensor sum: -125.927612
- Converted tensor sum: -106.782318
- Original tensor mean: -15.740952
- Converted tensor mean: -13.347790
- Mean difference: 3.11209679
- Maximum pointwise difference: 4.75263119
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -17.46803665, Converted: -12.71540546
- Biggest difference in row (0, 0), sum -125.927612 vs -106.782318
- Layer 4, Token 6 (model.layers.out comparison):
- Original tensor sum: -139.830460
- Converted tensor sum: -126.311844
- Original tensor mean: -17.478807
- Converted tensor mean: -15.788980
- Mean difference: 3.15184307
- Maximum pointwise difference: 5.99608994
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -25.84107971, Converted: -19.84498978
- Biggest difference in row (0, 0), sum -139.830460 vs -126.311844
- Layer 5, Token 6 (model.layers.out comparison):
- Original tensor sum: -142.974274
- Converted tensor sum: -73.637054
- Original tensor mean: -17.871784
- Converted tensor mean: -9.204632
- Mean difference: 10.37221718
- Maximum pointwise difference: 16.99522591
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -22.96857643, Converted: -5.97335052
- Biggest difference in row (0, 0), sum -142.974274 vs -73.637054
- Layer 6, Token 6 (model.layers.out comparison):
- Original tensor sum: -180.967728
- Converted tensor sum: -69.754128
- Original tensor mean: -22.620966
- Converted tensor mean: -8.719266
- Mean difference: 14.33841133
- Maximum pointwise difference: 25.72810745
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -36.46190262, Converted: -10.73379517
- Biggest difference in row (0, 0), sum -180.967728 vs -69.754128
- Layer 7, Token 6 (model.layers.out comparison):
- Original tensor sum: -390.468323
- Converted tensor sum: -284.137634
- Original tensor mean: -48.808540
- Converted tensor mean: -35.517204
- Mean difference: 14.31795502
- Maximum pointwise difference: 25.91625977
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -61.98001099, Converted: -36.06375122
- Biggest difference in row (0, 0), sum -390.468323 vs -284.137634
- Layer 8, Token 6 (model.layers.out comparison):
- Original tensor sum: -325.042450
- Converted tensor sum: -284.328186
- Original tensor mean: -40.630306
- Converted tensor mean: -35.541023
- Mean difference: 6.66226053
- Maximum pointwise difference: 16.25393486
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -47.66500854, Converted: -31.41107368
- Biggest difference in row (0, 0), sum -325.042450 vs -284.328186
- Layer 9, Token 6 (model.layers.out comparison):
- Original tensor sum: -350.015503
- Converted tensor sum: -313.897308
- Original tensor mean: -43.751938
- Converted tensor mean: -39.237164
- Mean difference: 9.32056522
- Maximum pointwise difference: 23.60877037
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -54.44406891, Converted: -30.83529854
- Biggest difference in row (0, 0), sum -350.015503 vs -313.897308
- Layer 10, Token 6 (model.layers.out comparison):
- Original tensor sum: -375.606720
- Converted tensor sum: -330.646790
- Original tensor mean: -46.950840
- Converted tensor mean: -41.330849
- Mean difference: 8.38710022
- Maximum pointwise difference: 27.84555435
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -60.66308594, Converted: -32.81753159
- Biggest difference in row (0, 0), sum -375.606720 vs -330.646790
- Layer 11, Token 6 (model.layers.out comparison):
- Original tensor sum: -764.285278
- Converted tensor sum: -730.992798
- Original tensor mean: -95.535660
- Converted tensor mean: -91.374100
- Mean difference: 7.89588118
- Maximum pointwise difference: 26.59626007
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -118.78226471, Converted: -92.18600464
- Biggest difference in row (0, 0), sum -764.285278 vs -730.992798
- Layer 12, Token 6 (model.layers.out comparison):
- Original tensor sum: -777.147827
- Converted tensor sum: -765.448669
- Original tensor mean: -97.143478
- Converted tensor mean: -95.681084
- Mean difference: 6.33593750
- Maximum pointwise difference: 19.02982330
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -119.55146790, Converted: -100.52164459
- Biggest difference in row (0, 0), sum -777.147827 vs -765.448669
- Layer 13, Token 6 (model.layers.out comparison):
- Original tensor sum: -787.772400
- Converted tensor sum: -777.362915
- Original tensor mean: -98.471550
- Converted tensor mean: -97.170364
- Mean difference: 7.69482183
- Maximum pointwise difference: 19.15751648
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -120.39152527, Converted: -101.23400879
- Biggest difference in row (0, 0), sum -787.772400 vs -777.362915
- Layer 14, Token 6 (model.layers.out comparison):
- Original tensor sum: -883.013428
- Converted tensor sum: -881.301514
- Original tensor mean: -110.376678
- Converted tensor mean: -110.162689
- Mean difference: 12.85068035
- Maximum pointwise difference: 28.13771820
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -129.54022217, Converted: -101.40250397
- Biggest difference in row (0, 0), sum -883.013428 vs -881.301514
- Layer 15, Token 6 (model.layers.out comparison):
- Original tensor sum: -1324.892822
- Converted tensor sum: -1316.172363
- Original tensor mean: -165.611603
- Converted tensor mean: -164.521545
- Mean difference: 12.77940941
- Maximum pointwise difference: 29.43301392
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -192.78923035, Converted: -163.35621643
- Biggest difference in row (0, 0), sum -1324.892822 vs -1316.172363
- Layer 0, Token 7 (model.layers.out comparison):
- Original tensor sum: 16.302702
- Converted tensor sum: 6.534010
- Original tensor mean: 2.037838
- Converted tensor mean: 0.816751
- Mean difference: 1.39780235
- Maximum pointwise difference: 4.86297131
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 4.45225191, Converted: -0.41071916
- Biggest difference in row (0, 0), sum 16.302702 vs 6.534010
- Layer 1, Token 7 (model.layers.out comparison):
- Original tensor sum: 7.949856
- Converted tensor sum: 10.515163
- Original tensor mean: 0.993732
- Converted tensor mean: 1.314395
- Mean difference: 1.91308641
- Maximum pointwise difference: 3.92083621
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 1.42750001, Converted: 5.34833622
- Biggest difference in row (0, 0), sum 7.949856 vs 10.515163
- Layer 2, Token 7 (model.layers.out comparison):
- Original tensor sum: 5.224671
- Converted tensor sum: 8.502550
- Original tensor mean: 0.653084
- Converted tensor mean: 1.062819
- Mean difference: 2.38619947
- Maximum pointwise difference: 6.21067238
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 4.76728964, Converted: -1.44338274
- Biggest difference in row (0, 0), sum 5.224671 vs 8.502550
- Layer 3, Token 7 (model.layers.out comparison):
- Original tensor sum: 13.283526
- Converted tensor sum: 35.439297
- Original tensor mean: 1.660441
- Converted tensor mean: 4.429912
- Mean difference: 3.47373605
- Maximum pointwise difference: 5.22519779
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 1.58731771, Converted: 6.81251574
- Biggest difference in row (0, 0), sum 13.283526 vs 35.439297
- Layer 4, Token 7 (model.layers.out comparison):
- Original tensor sum: 17.744591
- Converted tensor sum: 31.593395
- Original tensor mean: 2.218074
- Converted tensor mean: 3.949174
- Mean difference: 2.68589926
- Maximum pointwise difference: 4.57245827
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -2.52367592, Converted: 2.04878211
- Biggest difference in row (0, 0), sum 17.744591 vs 31.593395
- Layer 5, Token 7 (model.layers.out comparison):
- Original tensor sum: 23.343349
- Converted tensor sum: 33.269924
- Original tensor mean: 2.917919
- Converted tensor mean: 4.158741
- Mean difference: 2.63248682
- Maximum pointwise difference: 5.37845278
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 5.39788294, Converted: 0.01943016
- Biggest difference in row (0, 0), sum 23.343349 vs 33.269924
- Layer 6, Token 7 (model.layers.out comparison):
- Original tensor sum: 23.346264
- Converted tensor sum: 35.443920
- Original tensor mean: 2.918283
- Converted tensor mean: 4.430490
- Mean difference: 2.67119837
- Maximum pointwise difference: 4.63596630
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 6.03884697, Converted: 1.40288091
- Biggest difference in row (0, 0), sum 23.346264 vs 35.443920
- Layer 7, Token 7 (model.layers.out comparison):
- Original tensor sum: 64.039200
- Converted tensor sum: 91.760284
- Original tensor mean: 8.004900
- Converted tensor mean: 11.470036
- Mean difference: 4.01984978
- Maximum pointwise difference: 7.18059826
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 4.17877483, Converted: 11.35937309
- Biggest difference in row (0, 0), sum 64.039200 vs 91.760284
- Layer 8, Token 7 (model.layers.out comparison):
- Original tensor sum: 72.276039
- Converted tensor sum: 93.156998
- Original tensor mean: 9.034505
- Converted tensor mean: 11.644625
- Mean difference: 3.85819149
- Maximum pointwise difference: 7.09706306
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 6.97290230, Converted: 14.06996536
- Biggest difference in row (0, 0), sum 72.276039 vs 93.156998
- Layer 9, Token 7 (model.layers.out comparison):
- Original tensor sum: 77.303429
- Converted tensor sum: 87.750015
- Original tensor mean: 9.662929
- Converted tensor mean: 10.968752
- Mean difference: 3.21908855
- Maximum pointwise difference: 7.22212887
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 7.19689465, Converted: 14.41902351
- Biggest difference in row (0, 0), sum 77.303429 vs 87.750015
- Layer 10, Token 7 (model.layers.out comparison):
- Original tensor sum: 75.555130
- Converted tensor sum: 87.081650
- Original tensor mean: 9.444391
- Converted tensor mean: 10.885206
- Mean difference: 3.37582994
- Maximum pointwise difference: 7.74006128
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 13.60124302, Converted: 5.86118174
- Biggest difference in row (0, 0), sum 75.555130 vs 87.081650
- Layer 11, Token 7 (model.layers.out comparison):
- Original tensor sum: 156.940781
- Converted tensor sum: 159.013306
- Original tensor mean: 19.617598
- Converted tensor mean: 19.876663
- Mean difference: 3.38565111
- Maximum pointwise difference: 8.84408474
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 24.20116806, Converted: 15.35708332
- Biggest difference in row (0, 0), sum 156.940781 vs 159.013306
- Layer 12, Token 7 (model.layers.out comparison):
- Original tensor sum: 154.763428
- Converted tensor sum: 153.900482
- Original tensor mean: 19.345428
- Converted tensor mean: 19.237560
- Mean difference: 3.46122217
- Maximum pointwise difference: 9.50335789
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 24.17844582, Converted: 14.67508793
- Biggest difference in row (0, 0), sum 154.763428 vs 153.900482
- Layer 13, Token 7 (model.layers.out comparison):
- Original tensor sum: 153.990646
- Converted tensor sum: 150.608353
- Original tensor mean: 19.248831
- Converted tensor mean: 18.826044
- Mean difference: 3.53592730
- Maximum pointwise difference: 9.36601925
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 23.90514946, Converted: 14.53913021
- Biggest difference in row (0, 0), sum 153.990646 vs 150.608353
- Layer 14, Token 7 (model.layers.out comparison):
- Original tensor sum: 153.169525
- Converted tensor sum: 133.618896
- Original tensor mean: 19.146191
- Converted tensor mean: 16.702362
- Mean difference: 4.84187126
- Maximum pointwise difference: 11.02708149
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 24.07042313, Converted: 13.04334164
- Biggest difference in row (0, 0), sum 153.169525 vs 133.618896
- Layer 15, Token 7 (model.layers.out comparison):
- Original tensor sum: 256.612762
- Converted tensor sum: 236.694611
- Original tensor mean: 32.076595
- Converted tensor mean: 29.586826
- Mean difference: 4.89619875
- Maximum pointwise difference: 11.06676292
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 34.29892731, Converted: 23.23216438
- Biggest difference in row (0, 0), sum 256.612762 vs 236.694611
- Layer 0, Token 8 (model.layers.out comparison):
- Original tensor sum: 4.551975
- Converted tensor sum: 1.348729
- Original tensor mean: 0.568997
- Converted tensor mean: 0.168591
- Mean difference: 2.05911183
- Maximum pointwise difference: 5.11385345
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 1.91795087, Converted: -3.19590235
- Biggest difference in row (0, 0), sum 4.551975 vs 1.348729
- Layer 1, Token 8 (model.layers.out comparison):
- Original tensor sum: -10.499850
- Converted tensor sum: -11.510830
- Original tensor mean: -1.312481
- Converted tensor mean: -1.438854
- Mean difference: 3.72058988
- Maximum pointwise difference: 7.12741280
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -4.90886450, Converted: 2.21854830
- Biggest difference in row (0, 0), sum -10.499850 vs -11.510830
- Layer 2, Token 8 (model.layers.out comparison):
- Original tensor sum: 21.469618
- Converted tensor sum: 13.045154
- Original tensor mean: 2.683702
- Converted tensor mean: 1.630644
- Mean difference: 4.73055506
- Maximum pointwise difference: 11.87027359
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 6.33750200, Converted: -5.53277111
- Biggest difference in row (0, 0), sum 21.469618 vs 13.045154
- Layer 3, Token 8 (model.layers.out comparison):
- Original tensor sum: 56.933716
- Converted tensor sum: 65.067757
- Original tensor mean: 7.116714
- Converted tensor mean: 8.133470
- Mean difference: 5.21158791
- Maximum pointwise difference: 10.06817722
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 10.73284817, Converted: 0.66467106
- Biggest difference in row (0, 0), sum 56.933716 vs 65.067757
- Layer 4, Token 8 (model.layers.out comparison):
- Original tensor sum: 54.841175
- Converted tensor sum: 58.977600
- Original tensor mean: 6.855147
- Converted tensor mean: 7.372200
- Mean difference: 5.39579868
- Maximum pointwise difference: 10.23285866
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 10.38635254, Converted: 0.15349340
- Biggest difference in row (0, 0), sum 54.841175 vs 58.977600
- Layer 5, Token 8 (model.layers.out comparison):
- Original tensor sum: 59.439285
- Converted tensor sum: 59.979446
- Original tensor mean: 7.429911
- Converted tensor mean: 7.497431
- Mean difference: 5.44655371
- Maximum pointwise difference: 11.05043030
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 9.77372646, Converted: -1.27670395
- Biggest difference in row (0, 0), sum 59.439285 vs 59.979446
- Layer 6, Token 8 (model.layers.out comparison):
- Original tensor sum: 57.398651
- Converted tensor sum: 56.296188
- Original tensor mean: 7.174831
- Converted tensor mean: 7.037024
- Mean difference: 5.29393005
- Maximum pointwise difference: 9.82726002
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 9.26543045, Converted: -0.56182986
- Biggest difference in row (0, 0), sum 57.398651 vs 56.296188
- Layer 7, Token 8 (model.layers.out comparison):
- Original tensor sum: 108.492706
- Converted tensor sum: 119.552338
- Original tensor mean: 13.561588
- Converted tensor mean: 14.944042
- Mean difference: 5.49957895
- Maximum pointwise difference: 11.73512173
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 10.45698833, Converted: 22.19211006
- Biggest difference in row (0, 0), sum 108.492706 vs 119.552338
- Layer 8, Token 8 (model.layers.out comparison):
- Original tensor sum: 106.563354
- Converted tensor sum: 119.608925
- Original tensor mean: 13.320419
- Converted tensor mean: 14.951116
- Mean difference: 4.46781254
- Maximum pointwise difference: 10.82487202
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 9.85585308, Converted: 20.68072510
- Biggest difference in row (0, 0), sum 106.563354 vs 119.608925
- Layer 9, Token 8 (model.layers.out comparison):
- Original tensor sum: 111.512817
- Converted tensor sum: 109.928528
- Original tensor mean: 13.939102
- Converted tensor mean: 13.741066
- Mean difference: 4.52381039
- Maximum pointwise difference: 8.89503384
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 12.07329845, Converted: 20.96833229
- Biggest difference in row (0, 0), sum 111.512817 vs 109.928528
- Layer 10, Token 8 (model.layers.out comparison):
- Original tensor sum: 111.241730
- Converted tensor sum: 103.886688
- Original tensor mean: 13.905216
- Converted tensor mean: 12.985836
- Mean difference: 4.59785748
- Maximum pointwise difference: 8.55565834
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 11.01864815, Converted: 19.57430649
- Biggest difference in row (0, 0), sum 111.241730 vs 103.886688
- Layer 11, Token 8 (model.layers.out comparison):
- Original tensor sum: 194.094177
- Converted tensor sum: 193.564484
- Original tensor mean: 24.261772
- Converted tensor mean: 24.195560
- Mean difference: 4.49120235
- Maximum pointwise difference: 9.88864136
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 22.25957108, Converted: 32.14821243
- Biggest difference in row (0, 0), sum 194.094177 vs 193.564484
- Layer 12, Token 8 (model.layers.out comparison):
- Original tensor sum: 196.658234
- Converted tensor sum: 189.827057
- Original tensor mean: 24.582279
- Converted tensor mean: 23.728382
- Mean difference: 5.10350180
- Maximum pointwise difference: 9.80338287
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 30.23954582, Converted: 20.43616295
- Biggest difference in row (0, 0), sum 196.658234 vs 189.827057
- Layer 13, Token 8 (model.layers.out comparison):
- Original tensor sum: 193.237976
- Converted tensor sum: 184.223190
- Original tensor mean: 24.154747
- Converted tensor mean: 23.027899
- Mean difference: 5.11390686
- Maximum pointwise difference: 10.04300690
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 30.05261230, Converted: 20.00960541
- Biggest difference in row (0, 0), sum 193.237976 vs 184.223190
- Layer 14, Token 8 (model.layers.out comparison):
- Original tensor sum: 183.582977
- Converted tensor sum: 183.402130
- Original tensor mean: 22.947872
- Converted tensor mean: 22.925266
- Mean difference: 5.41123581
- Maximum pointwise difference: 10.28223228
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 19.26763725, Converted: 29.54986954
- Biggest difference in row (0, 0), sum 183.582977 vs 183.402130
- Layer 15, Token 8 (model.layers.out comparison):
- Original tensor sum: 297.650543
- Converted tensor sum: 301.053558
- Original tensor mean: 37.206318
- Converted tensor mean: 37.631695
- Mean difference: 5.31624222
- Maximum pointwise difference: 10.28567123
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 36.54620743, Converted: 46.83187866
- Biggest difference in row (0, 0), sum 297.650543 vs 301.053558
- Layer 0, Token 9 (model.layers.out comparison):
- Original tensor sum: 27.724323
- Converted tensor sum: 7.010333
- Original tensor mean: 3.465540
- Converted tensor mean: 0.876292
- Mean difference: 3.55158758
- Maximum pointwise difference: 7.14975357
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 4.03241444, Converted: -3.11733937
- Biggest difference in row (0, 0), sum 27.724323 vs 7.010333
- Layer 1, Token 9 (model.layers.out comparison):
- Original tensor sum: 17.384836
- Converted tensor sum: 7.348456
- Original tensor mean: 2.173105
- Converted tensor mean: 0.918557
- Mean difference: 3.79201698
- Maximum pointwise difference: 8.55698013
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -0.55471849, Converted: 8.00226116
- Biggest difference in row (0, 0), sum 17.384836 vs 7.348456
- Layer 2, Token 9 (model.layers.out comparison):
- Original tensor sum: 20.318661
- Converted tensor sum: 28.392349
- Original tensor mean: 2.539833
- Converted tensor mean: 3.549044
- Mean difference: 2.94842267
- Maximum pointwise difference: 9.89197159
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -1.05586541, Converted: 8.83610630
- Biggest difference in row (0, 0), sum 20.318661 vs 28.392349
- Layer 3, Token 9 (model.layers.out comparison):
- Original tensor sum: 65.513725
- Converted tensor sum: 84.414536
- Original tensor mean: 8.189216
- Converted tensor mean: 10.551817
- Mean difference: 4.41447163
- Maximum pointwise difference: 10.74111176
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 6.86948347, Converted: 17.61059570
- Biggest difference in row (0, 0), sum 65.513725 vs 84.414536
- Layer 4, Token 9 (model.layers.out comparison):
- Original tensor sum: 61.603691
- Converted tensor sum: 72.172562
- Original tensor mean: 7.700461
- Converted tensor mean: 9.021570
- Mean difference: 4.32150173
- Maximum pointwise difference: 10.51774502
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 4.71584320, Converted: 15.23358822
- Biggest difference in row (0, 0), sum 61.603691 vs 72.172562
- Layer 5, Token 9 (model.layers.out comparison):
- Original tensor sum: 61.554985
- Converted tensor sum: 60.684212
- Original tensor mean: 7.694373
- Converted tensor mean: 7.585526
- Mean difference: 4.84910297
- Maximum pointwise difference: 9.77899742
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 5.03849173, Converted: 14.81748962
- Biggest difference in row (0, 0), sum 61.554985 vs 60.684212
- Layer 6, Token 9 (model.layers.out comparison):
- Original tensor sum: 60.121288
- Converted tensor sum: 61.323517
- Original tensor mean: 7.515161
- Converted tensor mean: 7.665440
- Mean difference: 4.61501122
- Maximum pointwise difference: 10.19813538
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 4.46036100, Converted: 14.65849590
- Biggest difference in row (0, 0), sum 60.121288 vs 61.323517
- Layer 7, Token 9 (model.layers.out comparison):
- Original tensor sum: 120.854408
- Converted tensor sum: 122.564323
- Original tensor mean: 15.106801
- Converted tensor mean: 15.320540
- Mean difference: 4.58281326
- Maximum pointwise difference: 10.81363106
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 11.19677925, Converted: 22.01041031
- Biggest difference in row (0, 0), sum 120.854408 vs 122.564323
- Layer 8, Token 9 (model.layers.out comparison):
- Original tensor sum: 111.411377
- Converted tensor sum: 113.878586
- Original tensor mean: 13.926422
- Converted tensor mean: 14.234823
- Mean difference: 4.80341482
- Maximum pointwise difference: 8.54869747
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 10.95728207, Converted: 19.50597954
- Biggest difference in row (0, 0), sum 111.411377 vs 113.878586
- Layer 9, Token 9 (model.layers.out comparison):
- Original tensor sum: 104.621353
- Converted tensor sum: 99.551331
- Original tensor mean: 13.077669
- Converted tensor mean: 12.443916
- Mean difference: 4.94641495
- Maximum pointwise difference: 7.18619919
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 10.17811966, Converted: 17.36431885
- Biggest difference in row (0, 0), sum 104.621353 vs 99.551331
- Layer 10, Token 9 (model.layers.out comparison):
- Original tensor sum: 105.495895
- Converted tensor sum: 90.669807
- Original tensor mean: 13.186987
- Converted tensor mean: 11.333726
- Mean difference: 4.88313580
- Maximum pointwise difference: 8.44397736
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 11.21555328, Converted: 2.77157593
- Biggest difference in row (0, 0), sum 105.495895 vs 90.669807
- Layer 11, Token 9 (model.layers.out comparison):
- Original tensor sum: 198.914932
- Converted tensor sum: 187.657013
- Original tensor mean: 24.864367
- Converted tensor mean: 23.457127
- Mean difference: 4.87979174
- Maximum pointwise difference: 8.17332649
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 22.94329643, Converted: 14.76996994
- Biggest difference in row (0, 0), sum 198.914932 vs 187.657013
- Layer 12, Token 9 (model.layers.out comparison):
- Original tensor sum: 197.781982
- Converted tensor sum: 182.248840
- Original tensor mean: 24.722748
- Converted tensor mean: 22.781105
- Mean difference: 5.16355371
- Maximum pointwise difference: 9.60578632
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 23.29119301, Converted: 13.68540668
- Biggest difference in row (0, 0), sum 197.781982 vs 182.248840
- Layer 13, Token 9 (model.layers.out comparison):
- Original tensor sum: 191.909027
- Converted tensor sum: 177.667252
- Original tensor mean: 23.988628
- Converted tensor mean: 22.208406
- Mean difference: 5.14386559
- Maximum pointwise difference: 9.20664406
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 22.88940430, Converted: 13.68276024
- Biggest difference in row (0, 0), sum 191.909027 vs 177.667252
- Layer 14, Token 9 (model.layers.out comparison):
- Original tensor sum: 193.112854
- Converted tensor sum: 170.826324
- Original tensor mean: 24.139107
- Converted tensor mean: 21.353291
- Mean difference: 5.67996836
- Maximum pointwise difference: 10.54143143
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 29.37781715, Converted: 18.83638573
- Biggest difference in row (0, 0), sum 193.112854 vs 170.826324
- Layer 15, Token 9 (model.layers.out comparison):
- Original tensor sum: 310.393738
- Converted tensor sum: 295.392517
- Original tensor mean: 38.799217
- Converted tensor mean: 36.924065
- Mean difference: 5.11053467
- Maximum pointwise difference: 9.09804153
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 43.17533493, Converted: 34.07729340
- Biggest difference in row (0, 0), sum 310.393738 vs 295.392517
- Layer 0, Token 10 (model.layers.out comparison):
- Original tensor sum: 11.304202
- Converted tensor sum: 14.919886
- Original tensor mean: 1.413025
- Converted tensor mean: 1.864986
- Mean difference: 1.20558476
- Maximum pointwise difference: 2.02042794
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -0.23466866, Converted: 1.78575933
- Biggest difference in row (0, 0), sum 11.304202 vs 14.919886
- Layer 1, Token 10 (model.layers.out comparison):
- Original tensor sum: 4.380467
- Converted tensor sum: 1.448399
- Original tensor mean: 0.547558
- Converted tensor mean: 0.181050
- Mean difference: 1.55803418
- Maximum pointwise difference: 3.08950615
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 3.13031340, Converted: 0.04080731
- Biggest difference in row (0, 0), sum 4.380467 vs 1.448399
- Layer 2, Token 10 (model.layers.out comparison):
- Original tensor sum: 21.641123
- Converted tensor sum: 18.135971
- Original tensor mean: 2.705140
- Converted tensor mean: 2.266996
- Mean difference: 2.29236317
- Maximum pointwise difference: 5.34974813
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 4.73606253, Converted: -0.61368543
- Biggest difference in row (0, 0), sum 21.641123 vs 18.135971
- Layer 3, Token 10 (model.layers.out comparison):
- Original tensor sum: 84.183029
- Converted tensor sum: 75.554764
- Original tensor mean: 10.522879
- Converted tensor mean: 9.444345
- Mean difference: 2.50477004
- Maximum pointwise difference: 7.33609867
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 11.12465191, Converted: 3.78855324
- Biggest difference in row (0, 0), sum 84.183029 vs 75.554764
- Layer 4, Token 10 (model.layers.out comparison):
- Original tensor sum: 75.952011
- Converted tensor sum: 63.684746
- Original tensor mean: 9.494001
- Converted tensor mean: 7.960593
- Mean difference: 2.89978528
- Maximum pointwise difference: 6.58637476
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 10.07624340, Converted: 3.48986864
- Biggest difference in row (0, 0), sum 75.952011 vs 63.684746
- Layer 5, Token 10 (model.layers.out comparison):
- Original tensor sum: 67.380692
- Converted tensor sum: 51.477894
- Original tensor mean: 8.422586
- Converted tensor mean: 6.434737
- Mean difference: 2.92978549
- Maximum pointwise difference: 6.54403639
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 10.47875404, Converted: 3.93471766
- Biggest difference in row (0, 0), sum 67.380692 vs 51.477894
- Layer 6, Token 10 (model.layers.out comparison):
- Original tensor sum: 64.356155
- Converted tensor sum: 44.292259
- Original tensor mean: 8.044519
- Converted tensor mean: 5.536532
- Mean difference: 3.18394947
- Maximum pointwise difference: 7.18761826
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 9.33854580, Converted: 2.15092754
- Biggest difference in row (0, 0), sum 64.356155 vs 44.292259
- Layer 7, Token 10 (model.layers.out comparison):
- Original tensor sum: 124.955261
- Converted tensor sum: 105.713638
- Original tensor mean: 15.619408
- Converted tensor mean: 13.214205
- Mean difference: 3.17375469
- Maximum pointwise difference: 7.15706635
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 16.18268585, Converted: 9.02561951
- Biggest difference in row (0, 0), sum 124.955261 vs 105.713638
- Layer 8, Token 10 (model.layers.out comparison):
- Original tensor sum: 105.275124
- Converted tensor sum: 92.354050
- Original tensor mean: 13.159390
- Converted tensor mean: 11.544256
- Mean difference: 2.89860010
- Maximum pointwise difference: 6.96542978
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 12.28927994, Converted: 5.32385015
- Biggest difference in row (0, 0), sum 105.275124 vs 92.354050
- Layer 9, Token 10 (model.layers.out comparison):
- Original tensor sum: 89.282066
- Converted tensor sum: 75.157639
- Original tensor mean: 11.160258
- Converted tensor mean: 9.394705
- Mean difference: 2.89608860
- Maximum pointwise difference: 7.40043926
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 9.69557953, Converted: 2.29514027
- Biggest difference in row (0, 0), sum 89.282066 vs 75.157639
- Layer 10, Token 10 (model.layers.out comparison):
- Original tensor sum: 87.814186
- Converted tensor sum: 68.457840
- Original tensor mean: 10.976773
- Converted tensor mean: 8.557230
- Mean difference: 3.06474447
- Maximum pointwise difference: 8.03616142
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 10.20811844, Converted: 2.17195749
- Biggest difference in row (0, 0), sum 87.814186 vs 68.457840
- Layer 11, Token 10 (model.layers.out comparison):
- Original tensor sum: 184.781067
- Converted tensor sum: 170.778610
- Original tensor mean: 23.097633
- Converted tensor mean: 21.347326
- Mean difference: 2.85195446
- Maximum pointwise difference: 6.81012630
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 22.35528374, Converted: 15.54515743
- Biggest difference in row (0, 0), sum 184.781067 vs 170.778610
- Layer 12, Token 10 (model.layers.out comparison):
- Original tensor sum: 187.157104
- Converted tensor sum: 166.325562
- Original tensor mean: 23.394638
- Converted tensor mean: 20.790695
- Mean difference: 3.00816154
- Maximum pointwise difference: 8.29628849
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 23.84814453, Converted: 15.55185604
- Biggest difference in row (0, 0), sum 187.157104 vs 166.325562
- Layer 13, Token 10 (model.layers.out comparison):
- Original tensor sum: 180.577179
- Converted tensor sum: 161.409668
- Original tensor mean: 22.572147
- Converted tensor mean: 20.176208
- Mean difference: 3.22855854
- Maximum pointwise difference: 8.27111149
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 22.88647652, Converted: 14.61536503
- Biggest difference in row (0, 0), sum 180.577179 vs 161.409668
- Layer 14, Token 10 (model.layers.out comparison):
- Original tensor sum: 176.409912
- Converted tensor sum: 155.317413
- Original tensor mean: 22.051239
- Converted tensor mean: 19.414677
- Mean difference: 3.30306578
- Maximum pointwise difference: 8.51622581
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 21.49407005, Converted: 12.97784424
- Biggest difference in row (0, 0), sum 176.409912 vs 155.317413
- Layer 15, Token 10 (model.layers.out comparison):
- Original tensor sum: 303.652618
- Converted tensor sum: 289.143890
- Original tensor mean: 37.956577
- Converted tensor mean: 36.142986
- Mean difference: 3.20148277
- Maximum pointwise difference: 7.65085030
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 38.91091537, Converted: 31.26006508
- Biggest difference in row (0, 0), sum 303.652618 vs 289.143890
- Layer 0, Token 11 (model.layers.out comparison):
- Original tensor sum: 3.868190
- Converted tensor sum: -4.365316
- Original tensor mean: 0.483524
- Converted tensor mean: -0.545665
- Mean difference: 1.47696412
- Maximum pointwise difference: 3.49379730
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 1.60926533, Converted: -1.88453186
- Biggest difference in row (0, 0), sum 3.868190 vs -4.365316
- Layer 1, Token 11 (model.layers.out comparison):
- Original tensor sum: -4.763882
- Converted tensor sum: -8.100720
- Original tensor mean: -0.595485
- Converted tensor mean: -1.012590
- Mean difference: 2.60996270
- Maximum pointwise difference: 4.04230022
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -3.43199134, Converted: 0.61030883
- Biggest difference in row (0, 0), sum -4.763882 vs -8.100720
- Layer 2, Token 11 (model.layers.out comparison):
- Original tensor sum: -8.837991
- Converted tensor sum: -17.355688
- Original tensor mean: -1.104749
- Converted tensor mean: -2.169461
- Mean difference: 3.57004023
- Maximum pointwise difference: 7.78442717
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -2.88003159, Converted: 4.90439558
- Biggest difference in row (0, 0), sum -8.837991 vs -17.355688
- Layer 3, Token 11 (model.layers.out comparison):
- Original tensor sum: -151.825806
- Converted tensor sum: -119.589157
- Original tensor mean: -18.978226
- Converted tensor mean: -14.948645
- Mean difference: 4.57043171
- Maximum pointwise difference: 10.22036552
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -23.12115479, Converted: -12.90078926
- Biggest difference in row (0, 0), sum -151.825806 vs -119.589157
- Layer 4, Token 11 (model.layers.out comparison):
- Original tensor sum: -87.672623
- Converted tensor sum: -49.333626
- Original tensor mean: -10.959078
- Converted tensor mean: -6.166703
- Mean difference: 5.28691673
- Maximum pointwise difference: 12.21502209
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -14.83695984, Converted: -2.62193775
- Biggest difference in row (0, 0), sum -87.672623 vs -49.333626
- Layer 5, Token 11 (model.layers.out comparison):
- Original tensor sum: -19.529230
- Converted tensor sum: 51.921982
- Original tensor mean: -2.441154
- Converted tensor mean: 6.490248
- Mean difference: 8.93140125
- Maximum pointwise difference: 17.95970917
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 2.32367539, Converted: 20.28338432
- Biggest difference in row (0, 0), sum -19.529230 vs 51.921982
- Layer 6, Token 11 (model.layers.out comparison):
- Original tensor sum: -34.699642
- Converted tensor sum: 49.364166
- Original tensor mean: -4.337455
- Converted tensor mean: 6.170521
- Mean difference: 10.50797558
- Maximum pointwise difference: 19.14058685
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -4.33303738, Converted: 14.80754948
- Biggest difference in row (0, 0), sum -34.699642 vs 49.364166
- Layer 7, Token 11 (model.layers.out comparison):
- Original tensor sum: -174.093460
- Converted tensor sum: 116.110802
- Original tensor mean: -21.761683
- Converted tensor mean: 14.513850
- Mean difference: 36.27553177
- Maximum pointwise difference: 45.40389252
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -18.08833313, Converted: 27.31555748
- Biggest difference in row (0, 0), sum -174.093460 vs 116.110802
- Layer 8, Token 11 (model.layers.out comparison):
- Original tensor sum: -95.914619
- Converted tensor sum: 104.116745
- Original tensor mean: -11.989327
- Converted tensor mean: 13.014593
- Mean difference: 25.00392151
- Maximum pointwise difference: 39.39223480
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -25.33579826, Converted: 14.05643463
- Biggest difference in row (0, 0), sum -95.914619 vs 104.116745
- Layer 9, Token 11 (model.layers.out comparison):
- Original tensor sum: -76.038055
- Converted tensor sum: 86.082336
- Original tensor mean: -9.504757
- Converted tensor mean: 10.760292
- Mean difference: 20.92745209
- Maximum pointwise difference: 40.40296555
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -26.76908302, Converted: 13.63388157
- Biggest difference in row (0, 0), sum -76.038055 vs 86.082336
- Layer 10, Token 11 (model.layers.out comparison):
- Original tensor sum: -62.967239
- Converted tensor sum: 79.332596
- Original tensor mean: -7.870905
- Converted tensor mean: 9.916574
- Mean difference: 18.64283180
- Maximum pointwise difference: 40.29864883
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -27.55656052, Converted: 12.74208832
- Biggest difference in row (0, 0), sum -62.967239 vs 79.332596
- Layer 11, Token 11 (model.layers.out comparison):
- Original tensor sum: -348.172638
- Converted tensor sum: 185.268341
- Original tensor mean: -43.521580
- Converted tensor mean: 23.158543
- Mean difference: 66.68012238
- Maximum pointwise difference: 90.25902557
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -54.79597092, Converted: 35.46305466
- Biggest difference in row (0, 0), sum -348.172638 vs 185.268341
- Layer 12, Token 11 (model.layers.out comparison):
- Original tensor sum: -380.460999
- Converted tensor sum: 184.850082
- Original tensor mean: -47.557625
- Converted tensor mean: 23.106260
- Mean difference: 70.66388702
- Maximum pointwise difference: 91.58323669
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -56.39131927, Converted: 35.19191360
- Biggest difference in row (0, 0), sum -380.460999 vs 184.850082
- Layer 13, Token 11 (model.layers.out comparison):
- Original tensor sum: -387.549927
- Converted tensor sum: 178.291550
- Original tensor mean: -48.443741
- Converted tensor mean: 22.286444
- Mean difference: 70.73018646
- Maximum pointwise difference: 92.60649109
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -57.23683167, Converted: 35.36965561
- Biggest difference in row (0, 0), sum -387.549927 vs 178.291550
- Layer 14, Token 11 (model.layers.out comparison):
- Original tensor sum: -381.615417
- Converted tensor sum: 175.841187
- Original tensor mean: -47.701927
- Converted tensor mean: 21.980148
- Mean difference: 69.68207550
- Maximum pointwise difference: 95.39483643
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -61.00698853, Converted: 34.38784409
- Biggest difference in row (0, 0), sum -381.615417 vs 175.841187
- Layer 15, Token 11 (model.layers.out comparison):
- Original tensor sum: -791.898560
- Converted tensor sum: 313.297852
- Original tensor mean: -98.987320
- Converted tensor mean: 39.162231
- Mean difference: 138.14956665
- Maximum pointwise difference: 174.31031799
- Max difference location: (0, 0, 0)
- Values at max diff - Original: -120.39865875, Converted: 53.91165924
- Biggest difference in row (0, 0), sum -791.898560 vs 313.297852
- Layer 0, Token 12 (model.layers.out comparison):
- Original tensor sum: 17.494652
- Converted tensor sum: -39.301899
- Original tensor mean: 2.186831
- Converted tensor mean: -4.912737
- Mean difference: 8.11834240
- Maximum pointwise difference: 15.19715595
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 2.70196438, Converted: -12.49519157
- Biggest difference in row (0, 0), sum 17.494652 vs -39.301899
- Layer 1, Token 12 (model.layers.out comparison):
- Original tensor sum: 11.314701
- Converted tensor sum: -35.014473
- Original tensor mean: 1.414338
- Converted tensor mean: -4.376809
- Mean difference: 7.67025709
- Maximum pointwise difference: 15.05980301
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 2.28716040, Converted: -12.77264309
- Biggest difference in row (0, 0), sum 11.314701 vs -35.014473
- Layer 2, Token 12 (model.layers.out comparison):
- Original tensor sum: 3.520873
- Converted tensor sum: -23.351210
- Original tensor mean: 0.440109
- Converted tensor mean: -2.918901
- Mean difference: 7.09708309
- Maximum pointwise difference: 10.56869507
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 0.29396084, Converted: -10.27473450
- Biggest difference in row (0, 0), sum 3.520873 vs -23.351210
- Layer 3, Token 12 (model.layers.out comparison):
- Original tensor sum: -38.507721
- Converted tensor sum: -65.860725
- Original tensor mean: -4.813465
- Converted tensor mean: -8.232591
- Mean difference: 6.29614639
- Maximum pointwise difference: 10.23156357
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -4.52744627, Converted: -14.75901031
- Biggest difference in row (0, 0), sum -38.507721 vs -65.860725
- Layer 4, Token 12 (model.layers.out comparison):
- Original tensor sum: -25.538549
- Converted tensor sum: -16.346577
- Original tensor mean: -3.192319
- Converted tensor mean: -2.043322
- Mean difference: 5.56114197
- Maximum pointwise difference: 11.51591301
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -9.10746288, Converted: 2.40844989
- Biggest difference in row (0, 0), sum -25.538549 vs -16.346577
- Layer 5, Token 12 (model.layers.out comparison):
- Original tensor sum: 5.103131
- Converted tensor sum: -11.820143
- Original tensor mean: 0.637891
- Converted tensor mean: -1.477518
- Mean difference: 6.80205250
- Maximum pointwise difference: 11.26421928
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 5.06476593, Converted: -6.19945335
- Biggest difference in row (0, 0), sum 5.103131 vs -11.820143
- Layer 6, Token 12 (model.layers.out comparison):
- Original tensor sum: 1.231229
- Converted tensor sum: -13.329983
- Original tensor mean: 0.153904
- Converted tensor mean: -1.666248
- Mean difference: 7.36224794
- Maximum pointwise difference: 11.85875893
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 5.86865807, Converted: -5.99010086
- Biggest difference in row (0, 0), sum 1.231229 vs -13.329983
- Layer 7, Token 12 (model.layers.out comparison):
- Original tensor sum: 49.883171
- Converted tensor sum: -138.587738
- Original tensor mean: 6.235396
- Converted tensor mean: -17.323467
- Mean difference: 23.55886269
- Maximum pointwise difference: 38.93606567
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 13.31498432, Converted: -25.62108231
- Biggest difference in row (0, 0), sum 49.883171 vs -138.587738
- Layer 8, Token 12 (model.layers.out comparison):
- Original tensor sum: 32.997459
- Converted tensor sum: -79.532417
- Original tensor mean: 4.124682
- Converted tensor mean: -9.941552
- Mean difference: 15.04267120
- Maximum pointwise difference: 28.15183258
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 10.99009418, Converted: -17.16173744
- Biggest difference in row (0, 0), sum 32.997459 vs -79.532417
- Layer 9, Token 12 (model.layers.out comparison):
- Original tensor sum: 30.462442
- Converted tensor sum: -58.022911
- Original tensor mean: 3.807805
- Converted tensor mean: -7.252864
- Mean difference: 13.06616974
- Maximum pointwise difference: 26.93473625
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 10.51771736, Converted: -16.41701889
- Biggest difference in row (0, 0), sum 30.462442 vs -58.022911
- Layer 10, Token 12 (model.layers.out comparison):
- Original tensor sum: 31.758196
- Converted tensor sum: -31.289818
- Original tensor mean: 3.969774
- Converted tensor mean: -3.911227
- Mean difference: 11.64717674
- Maximum pointwise difference: 25.19077682
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 10.60759830, Converted: -14.58317757
- Biggest difference in row (0, 0), sum 31.758196 vs -31.289818
- Layer 11, Token 12 (model.layers.out comparison):
- Original tensor sum: 64.195580
- Converted tensor sum: -290.078918
- Original tensor mean: 8.024447
- Converted tensor mean: -36.259865
- Mean difference: 44.28431320
- Maximum pointwise difference: 58.32298279
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 14.01799965, Converted: -44.30498123
- Biggest difference in row (0, 0), sum 64.195580 vs -290.078918
- Layer 12, Token 12 (model.layers.out comparison):
- Original tensor sum: 65.652679
- Converted tensor sum: -300.691650
- Original tensor mean: 8.206585
- Converted tensor mean: -37.586456
- Mean difference: 45.79303741
- Maximum pointwise difference: 64.50979614
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 13.89292812, Converted: -50.61687088
- Biggest difference in row (0, 0), sum 65.652679 vs -300.691650
- Layer 13, Token 12 (model.layers.out comparison):
- Original tensor sum: 64.880409
- Converted tensor sum: -292.294403
- Original tensor mean: 8.110051
- Converted tensor mean: -36.536800
- Mean difference: 44.64685059
- Maximum pointwise difference: 61.03430176
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 8.34301949, Converted: -52.69128418
- Biggest difference in row (0, 0), sum 64.880409 vs -292.294403
- Layer 14, Token 12 (model.layers.out comparison):
- Original tensor sum: 55.352615
- Converted tensor sum: -232.615005
- Original tensor mean: 6.919077
- Converted tensor mean: -29.076876
- Mean difference: 35.99595261
- Maximum pointwise difference: 69.32642365
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 7.07370424, Converted: -62.25271606
- Biggest difference in row (0, 0), sum 55.352615 vs -232.615005
- Layer 15, Token 12 (model.layers.out comparison):
- Original tensor sum: 191.366241
- Converted tensor sum: -607.544556
- Original tensor mean: 23.920780
- Converted tensor mean: -75.943069
- Mean difference: 99.86384583
- Maximum pointwise difference: 121.99198914
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 33.01739502, Converted: -88.97459412
- Biggest difference in row (0, 0), sum 191.366241 vs -607.544556
- Layer 0, Token 13 (model.layers.out comparison):
- Original tensor sum: 28.716766
- Converted tensor sum: 24.262428
- Original tensor mean: 3.589596
- Converted tensor mean: 3.032804
- Mean difference: 2.20962214
- Maximum pointwise difference: 5.77315617
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 4.62014198, Converted: -1.15301442
- Biggest difference in row (0, 0), sum 28.716766 vs 24.262428
- Layer 1, Token 13 (model.layers.out comparison):
- Original tensor sum: 18.283722
- Converted tensor sum: 16.804958
- Original tensor mean: 2.285465
- Converted tensor mean: 2.100620
- Mean difference: 2.44061017
- Maximum pointwise difference: 5.48099232
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -7.47550392, Converted: -1.99451160
- Biggest difference in row (0, 0), sum 18.283722 vs 16.804958
- Layer 2, Token 13 (model.layers.out comparison):
- Original tensor sum: 14.973861
- Converted tensor sum: 10.670280
- Original tensor mean: 1.871733
- Converted tensor mean: 1.333785
- Mean difference: 2.94856715
- Maximum pointwise difference: 6.09164524
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 2.11467242, Converted: -3.97697282
- Biggest difference in row (0, 0), sum 14.973861 vs 10.670280
- Layer 3, Token 13 (model.layers.out comparison):
- Original tensor sum: 62.116623
- Converted tensor sum: 46.581398
- Original tensor mean: 7.764578
- Converted tensor mean: 5.822675
- Mean difference: 3.59710693
- Maximum pointwise difference: 6.89595842
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 11.14201260, Converted: 4.24605417
- Biggest difference in row (0, 0), sum 62.116623 vs 46.581398
- Layer 4, Token 13 (model.layers.out comparison):
- Original tensor sum: 65.792244
- Converted tensor sum: 43.042854
- Original tensor mean: 8.224030
- Converted tensor mean: 5.380357
- Mean difference: 3.63414001
- Maximum pointwise difference: 8.06606770
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 9.46925735, Converted: 1.40318930
- Biggest difference in row (0, 0), sum 65.792244 vs 43.042854
- Layer 5, Token 13 (model.layers.out comparison):
- Original tensor sum: 60.294563
- Converted tensor sum: 38.709320
- Original tensor mean: 7.536820
- Converted tensor mean: 4.838665
- Mean difference: 4.29471397
- Maximum pointwise difference: 9.28423500
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 9.56281090, Converted: 0.27857587
- Biggest difference in row (0, 0), sum 60.294563 vs 38.709320
- Layer 6, Token 13 (model.layers.out comparison):
- Original tensor sum: 60.864697
- Converted tensor sum: 41.897995
- Original tensor mean: 7.608087
- Converted tensor mean: 5.237249
- Mean difference: 4.15325356
- Maximum pointwise difference: 7.30325747
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 11.01063633, Converted: 3.70737886
- Biggest difference in row (0, 0), sum 60.864697 vs 41.897995
- Layer 7, Token 13 (model.layers.out comparison):
- Original tensor sum: 124.166924
- Converted tensor sum: 107.577675
- Original tensor mean: 15.520865
- Converted tensor mean: 13.447209
- Mean difference: 4.08049011
- Maximum pointwise difference: 7.30880928
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 17.63167572, Converted: 10.32286644
- Biggest difference in row (0, 0), sum 124.166924 vs 107.577675
- Layer 8, Token 13 (model.layers.out comparison):
- Original tensor sum: 114.534744
- Converted tensor sum: 106.782104
- Original tensor mean: 14.316843
- Converted tensor mean: 13.347763
- Mean difference: 3.79455638
- Maximum pointwise difference: 8.56559753
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 18.64526367, Converted: 10.07966614
- Biggest difference in row (0, 0), sum 114.534744 vs 106.782104
- Layer 9, Token 13 (model.layers.out comparison):
- Original tensor sum: 111.904816
- Converted tensor sum: 90.398567
- Original tensor mean: 13.988102
- Converted tensor mean: 11.299821
- Mean difference: 4.39770985
- Maximum pointwise difference: 12.01837921
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 18.37693977, Converted: 6.35856009
- Biggest difference in row (0, 0), sum 111.904816 vs 90.398567
- Layer 10, Token 13 (model.layers.out comparison):
- Original tensor sum: 106.496719
- Converted tensor sum: 84.186646
- Original tensor mean: 13.312090
- Converted tensor mean: 10.523331
- Mean difference: 4.35723734
- Maximum pointwise difference: 11.76342964
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 17.81115723, Converted: 6.04772711
- Biggest difference in row (0, 0), sum 106.496719 vs 84.186646
- Layer 11, Token 13 (model.layers.out comparison):
- Original tensor sum: 197.848022
- Converted tensor sum: 191.943436
- Original tensor mean: 24.731003
- Converted tensor mean: 23.992929
- Mean difference: 3.31890941
- Maximum pointwise difference: 10.13029099
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 27.00849915, Converted: 16.87820816
- Biggest difference in row (0, 0), sum 197.848022 vs 191.943436
- Layer 12, Token 13 (model.layers.out comparison):
- Original tensor sum: 197.513275
- Converted tensor sum: 189.807312
- Original tensor mean: 24.689159
- Converted tensor mean: 23.725914
- Mean difference: 3.50938702
- Maximum pointwise difference: 10.66487598
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 26.39979744, Converted: 15.73492146
- Biggest difference in row (0, 0), sum 197.513275 vs 189.807312
- Layer 13, Token 13 (model.layers.out comparison):
- Original tensor sum: 193.055618
- Converted tensor sum: 185.801392
- Original tensor mean: 24.131952
- Converted tensor mean: 23.225174
- Mean difference: 3.32275867
- Maximum pointwise difference: 10.17280674
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 25.57653046, Converted: 15.40372372
- Biggest difference in row (0, 0), sum 193.055618 vs 185.801392
- Layer 14, Token 13 (model.layers.out comparison):
- Original tensor sum: 190.084717
- Converted tensor sum: 186.092697
- Original tensor mean: 23.760590
- Converted tensor mean: 23.261587
- Mean difference: 3.19069362
- Maximum pointwise difference: 9.42493057
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 24.81001282, Converted: 15.38508224
- Biggest difference in row (0, 0), sum 190.084717 vs 186.092697
- Layer 15, Token 13 (model.layers.out comparison):
- Original tensor sum: 319.170319
- Converted tensor sum: 323.837036
- Original tensor mean: 39.896290
- Converted tensor mean: 40.479630
- Mean difference: 3.55193925
- Maximum pointwise difference: 8.15688324
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 46.74212265, Converted: 38.58523941
- Biggest difference in row (0, 0), sum 319.170319 vs 323.837036
- Layer 0, Token 14 (model.layers.out comparison):
- Original tensor sum: 60.062901
- Converted tensor sum: 42.401054
- Original tensor mean: 7.507863
- Converted tensor mean: 5.300132
- Mean difference: 2.97920632
- Maximum pointwise difference: 7.75320148
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 13.24933434, Converted: 5.49613285
- Biggest difference in row (0, 0), sum 60.062901 vs 42.401054
- Layer 1, Token 14 (model.layers.out comparison):
- Original tensor sum: 48.843086
- Converted tensor sum: 34.002205
- Original tensor mean: 6.105386
- Converted tensor mean: 4.250276
- Mean difference: 2.82561874
- Maximum pointwise difference: 7.41196299
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 12.44728756, Converted: 5.03532457
- Biggest difference in row (0, 0), sum 48.843086 vs 34.002205
- Layer 2, Token 14 (model.layers.out comparison):
- Original tensor sum: 49.100876
- Converted tensor sum: 29.831078
- Original tensor mean: 6.137609
- Converted tensor mean: 3.728885
- Mean difference: 3.44625640
- Maximum pointwise difference: 8.00705624
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 12.05760670, Converted: 4.05055046
- Biggest difference in row (0, 0), sum 49.100876 vs 29.831078
- Layer 3, Token 14 (model.layers.out comparison):
- Original tensor sum: 94.051392
- Converted tensor sum: 85.936119
- Original tensor mean: 11.756424
- Converted tensor mean: 10.742015
- Mean difference: 3.43988085
- Maximum pointwise difference: 6.90394783
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 18.44681168, Converted: 11.54286385
- Biggest difference in row (0, 0), sum 94.051392 vs 85.936119
- Layer 4, Token 14 (model.layers.out comparison):
- Original tensor sum: 90.357742
- Converted tensor sum: 82.357994
- Original tensor mean: 11.294718
- Converted tensor mean: 10.294749
- Mean difference: 3.55732656
- Maximum pointwise difference: 7.83766174
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 19.03264809, Converted: 11.19498634
- Biggest difference in row (0, 0), sum 90.357742 vs 82.357994
- Layer 5, Token 14 (model.layers.out comparison):
- Original tensor sum: 84.158882
- Converted tensor sum: 72.302864
- Original tensor mean: 10.519860
- Converted tensor mean: 9.037858
- Mean difference: 3.79493260
- Maximum pointwise difference: 9.27737904
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 18.81698799, Converted: 9.53960896
- Biggest difference in row (0, 0), sum 84.158882 vs 72.302864
- Layer 6, Token 14 (model.layers.out comparison):
- Original tensor sum: 82.342606
- Converted tensor sum: 74.838448
- Original tensor mean: 10.292826
- Converted tensor mean: 9.354806
- Mean difference: 3.72385550
- Maximum pointwise difference: 8.27861023
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 18.35614967, Converted: 10.07753944
- Biggest difference in row (0, 0), sum 82.342606 vs 74.838448
- Layer 7, Token 14 (model.layers.out comparison):
- Original tensor sum: 152.811584
- Converted tensor sum: 143.282593
- Original tensor mean: 19.101448
- Converted tensor mean: 17.910324
- Mean difference: 3.79641771
- Maximum pointwise difference: 8.94160843
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 28.97978973, Converted: 20.03818130
- Biggest difference in row (0, 0), sum 152.811584 vs 143.282593
- Layer 8, Token 14 (model.layers.out comparison):
- Original tensor sum: 134.962891
- Converted tensor sum: 135.762573
- Original tensor mean: 16.870361
- Converted tensor mean: 16.970322
- Mean difference: 3.42910838
- Maximum pointwise difference: 6.22266769
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 27.13297844, Converted: 20.91031075
- Biggest difference in row (0, 0), sum 134.962891 vs 135.762573
- Layer 9, Token 14 (model.layers.out comparison):
- Original tensor sum: 131.262939
- Converted tensor sum: 130.663895
- Original tensor mean: 16.407867
- Converted tensor mean: 16.332987
- Mean difference: 3.14643574
- Maximum pointwise difference: 6.41224289
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 25.90853310, Converted: 19.49629021
- Biggest difference in row (0, 0), sum 131.262939 vs 130.663895
- Layer 10, Token 14 (model.layers.out comparison):
- Original tensor sum: 130.994781
- Converted tensor sum: 121.948547
- Original tensor mean: 16.374348
- Converted tensor mean: 15.243568
- Mean difference: 3.14505911
- Maximum pointwise difference: 6.92271805
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 25.71545982, Converted: 18.79274178
- Biggest difference in row (0, 0), sum 130.994781 vs 121.948547
- Layer 11, Token 14 (model.layers.out comparison):
- Original tensor sum: 227.322296
- Converted tensor sum: 221.945038
- Original tensor mean: 28.415287
- Converted tensor mean: 27.743130
- Mean difference: 2.92038918
- Maximum pointwise difference: 6.72454262
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 35.09742355, Converted: 28.37288094
- Biggest difference in row (0, 0), sum 227.322296 vs 221.945038
- Layer 12, Token 14 (model.layers.out comparison):
- Original tensor sum: 226.411957
- Converted tensor sum: 219.124207
- Original tensor mean: 28.301495
- Converted tensor mean: 27.390526
- Mean difference: 3.00309324
- Maximum pointwise difference: 5.31435776
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 32.55270767, Converted: 27.23834991
- Biggest difference in row (0, 0), sum 226.411957 vs 219.124207
- Layer 13, Token 14 (model.layers.out comparison):
- Original tensor sum: 222.480804
- Converted tensor sum: 215.029236
- Original tensor mean: 27.810101
- Converted tensor mean: 26.878654
- Mean difference: 3.01644969
- Maximum pointwise difference: 5.75550079
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 32.34063721, Converted: 26.58513641
- Biggest difference in row (0, 0), sum 222.480804 vs 215.029236
- Layer 14, Token 14 (model.layers.out comparison):
- Original tensor sum: 217.584625
- Converted tensor sum: 210.219940
- Original tensor mean: 27.198078
- Converted tensor mean: 26.277493
- Mean difference: 3.42921877
- Maximum pointwise difference: 5.59035873
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 31.23370743, Converted: 25.64334869
- Biggest difference in row (0, 0), sum 217.584625 vs 210.219940
- Layer 15, Token 14 (model.layers.out comparison):
- Original tensor sum: 347.902100
- Converted tensor sum: 344.275635
- Original tensor mean: 43.487762
- Converted tensor mean: 43.034454
- Mean difference: 3.27294016
- Maximum pointwise difference: 5.50515747
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 41.73074341, Converted: 47.23590088
- Biggest difference in row (0, 0), sum 347.902100 vs 344.275635
- Layer 0, Token 15 (model.layers.out comparison):
- Original tensor sum: 2.268566
- Converted tensor sum: -1.956201
- Original tensor mean: 0.283571
- Converted tensor mean: -0.244525
- Mean difference: 1.30659735
- Maximum pointwise difference: 3.65664506
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 3.25675011, Converted: -0.39989486
- Biggest difference in row (0, 0), sum 2.268566 vs -1.956201
- Layer 1, Token 15 (model.layers.out comparison):
- Original tensor sum: -3.244995
- Converted tensor sum: -0.596967
- Original tensor mean: -0.405624
- Converted tensor mean: -0.074621
- Mean difference: 1.73462176
- Maximum pointwise difference: 3.99903250
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 2.07227492, Converted: -1.92675745
- Biggest difference in row (0, 0), sum -3.244995 vs -0.596967
- Layer 2, Token 15 (model.layers.out comparison):
- Original tensor sum: 18.643393
- Converted tensor sum: -7.624215
- Original tensor mean: 2.330424
- Converted tensor mean: -0.953027
- Mean difference: 3.99837518
- Maximum pointwise difference: 9.85657215
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 9.41628456, Converted: -0.44028741
- Biggest difference in row (0, 0), sum 18.643393 vs -7.624215
- Layer 3, Token 15 (model.layers.out comparison):
- Original tensor sum: 77.711205
- Converted tensor sum: -115.602707
- Original tensor mean: 9.713901
- Converted tensor mean: -14.450338
- Mean difference: 24.16423798
- Maximum pointwise difference: 33.14313507
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 17.84219551, Converted: -15.30093956
- Biggest difference in row (0, 0), sum 77.711205 vs -115.602707
- Layer 4, Token 15 (model.layers.out comparison):
- Original tensor sum: 71.264816
- Converted tensor sum: -87.184593
- Original tensor mean: 8.908102
- Converted tensor mean: -10.898074
- Mean difference: 19.80617714
- Maximum pointwise difference: 27.60903931
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 16.59056091, Converted: -11.01847839
- Biggest difference in row (0, 0), sum 71.264816 vs -87.184593
- Layer 5, Token 15 (model.layers.out comparison):
- Original tensor sum: 65.154488
- Converted tensor sum: -20.586208
- Original tensor mean: 8.144311
- Converted tensor mean: -2.573276
- Mean difference: 11.36003971
- Maximum pointwise difference: 17.89420700
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 3.50937057, Converted: -14.38483620
- Biggest difference in row (0, 0), sum 65.154488 vs -20.586208
- Layer 6, Token 15 (model.layers.out comparison):
- Original tensor sum: 62.447323
- Converted tensor sum: -39.734089
- Original tensor mean: 7.805915
- Converted tensor mean: -4.966761
- Mean difference: 12.77267647
- Maximum pointwise difference: 22.75133705
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 14.95188141, Converted: -7.79945612
- Biggest difference in row (0, 0), sum 62.447323 vs -39.734089
- Layer 7, Token 15 (model.layers.out comparison):
- Original tensor sum: 127.895920
- Converted tensor sum: -184.804230
- Original tensor mean: 15.986990
- Converted tensor mean: -23.100529
- Mean difference: 39.08751678
- Maximum pointwise difference: 51.54846191
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 16.32706261, Converted: -35.22139740
- Biggest difference in row (0, 0), sum 127.895920 vs -184.804230
- Layer 8, Token 15 (model.layers.out comparison):
- Original tensor sum: 109.946281
- Converted tensor sum: -183.545380
- Original tensor mean: 13.743285
- Converted tensor mean: -22.943172
- Mean difference: 36.68645859
- Maximum pointwise difference: 44.14192963
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 21.54407120, Converted: -22.59785843
- Biggest difference in row (0, 0), sum 109.946281 vs -183.545380
- Layer 9, Token 15 (model.layers.out comparison):
- Original tensor sum: 100.719040
- Converted tensor sum: -189.035889
- Original tensor mean: 12.589880
- Converted tensor mean: -23.629486
- Mean difference: 36.21936798
- Maximum pointwise difference: 49.71876526
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 20.62917519, Converted: -29.08958817
- Biggest difference in row (0, 0), sum 100.719040 vs -189.035889
- Layer 10, Token 15 (model.layers.out comparison):
- Original tensor sum: 94.437965
- Converted tensor sum: -184.073608
- Original tensor mean: 11.804746
- Converted tensor mean: -23.009201
- Mean difference: 34.81394577
- Maximum pointwise difference: 49.50559998
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 19.98403168, Converted: -29.52156830
- Biggest difference in row (0, 0), sum 94.437965 vs -184.073608
- Layer 11, Token 15 (model.layers.out comparison):
- Original tensor sum: 187.329086
- Converted tensor sum: -525.129150
- Original tensor mean: 23.416136
- Converted tensor mean: -65.641144
- Mean difference: 89.05728149
- Maximum pointwise difference: 114.85643005
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 32.40055466, Converted: -82.45587158
- Biggest difference in row (0, 0), sum 187.329086 vs -525.129150
- Layer 12, Token 15 (model.layers.out comparison):
- Original tensor sum: 189.391296
- Converted tensor sum: -524.645203
- Original tensor mean: 23.673912
- Converted tensor mean: -65.580650
- Mean difference: 89.25456238
- Maximum pointwise difference: 119.02915955
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 33.67853165, Converted: -85.35062408
- Biggest difference in row (0, 0), sum 189.391296 vs -524.645203
- Layer 13, Token 15 (model.layers.out comparison):
- Original tensor sum: 183.008652
- Converted tensor sum: -545.134033
- Original tensor mean: 22.876081
- Converted tensor mean: -68.141754
- Mean difference: 91.01783752
- Maximum pointwise difference: 119.28398132
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 33.81208420, Converted: -85.47189331
- Biggest difference in row (0, 0), sum 183.008652 vs -545.134033
- Layer 14, Token 15 (model.layers.out comparison):
- Original tensor sum: 179.184265
- Converted tensor sum: -590.197998
- Original tensor mean: 22.398033
- Converted tensor mean: -73.774750
- Mean difference: 96.17278290
- Maximum pointwise difference: 126.14685059
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 33.16656876, Converted: -92.98027802
- Biggest difference in row (0, 0), sum 179.184265 vs -590.197998
- Layer 15, Token 15 (model.layers.out comparison):
- Original tensor sum: 315.300140
- Converted tensor sum: -976.074097
- Original tensor mean: 39.412518
- Converted tensor mean: -122.009262
- Mean difference: 161.42178345
- Maximum pointwise difference: 201.52458191
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 52.86392212, Converted: -148.66065979
- Biggest difference in row (0, 0), sum 315.300140 vs -976.074097
- Layer 0, Token 16 (model.layers.out comparison):
- Original tensor sum: 12.044241
- Converted tensor sum: 14.548074
- Original tensor mean: 1.505530
- Converted tensor mean: 1.818509
- Mean difference: 3.51175261
- Maximum pointwise difference: 7.44231224
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -4.31869221, Converted: 3.12362027
- Biggest difference in row (0, 0), sum 12.044241 vs 14.548074
- Layer 1, Token 16 (model.layers.out comparison):
- Original tensor sum: 7.660315
- Converted tensor sum: 1.425261
- Original tensor mean: 0.957539
- Converted tensor mean: 0.178158
- Mean difference: 4.00331783
- Maximum pointwise difference: 8.79326248
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 3.55122566, Converted: -5.24203634
- Biggest difference in row (0, 0), sum 7.660315 vs 1.425261
- Layer 2, Token 16 (model.layers.out comparison):
- Original tensor sum: 5.985608
- Converted tensor sum: -2.881522
- Original tensor mean: 0.748201
- Converted tensor mean: -0.360190
- Mean difference: 6.00233269
- Maximum pointwise difference: 9.75814056
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 3.30634618, Converted: -6.45179462
- Biggest difference in row (0, 0), sum 5.985608 vs -2.881522
- Layer 3, Token 16 (model.layers.out comparison):
- Original tensor sum: 66.644623
- Converted tensor sum: 38.471397
- Original tensor mean: 8.330578
- Converted tensor mean: 4.808925
- Mean difference: 5.99987411
- Maximum pointwise difference: 11.70975304
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 11.37678432, Converted: -0.33296829
- Biggest difference in row (0, 0), sum 66.644623 vs 38.471397
- Layer 4, Token 16 (model.layers.out comparison):
- Original tensor sum: 55.084259
- Converted tensor sum: 39.585022
- Original tensor mean: 6.885532
- Converted tensor mean: 4.948128
- Mean difference: 5.54818344
- Maximum pointwise difference: 10.42512989
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 8.96806908, Converted: -1.45706093
- Biggest difference in row (0, 0), sum 55.084259 vs 39.585022
- Layer 5, Token 16 (model.layers.out comparison):
- Original tensor sum: 47.768257
- Converted tensor sum: 29.551674
- Original tensor mean: 5.971032
- Converted tensor mean: 3.693959
- Mean difference: 5.40017319
- Maximum pointwise difference: 11.83149147
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 9.62209320, Converted: -2.20939875
- Biggest difference in row (0, 0), sum 47.768257 vs 29.551674
- Layer 6, Token 16 (model.layers.out comparison):
- Original tensor sum: 47.378487
- Converted tensor sum: 33.471664
- Original tensor mean: 5.922311
- Converted tensor mean: 4.183958
- Mean difference: 5.35756683
- Maximum pointwise difference: 11.70071220
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 10.01993370, Converted: -1.68077850
- Biggest difference in row (0, 0), sum 47.378487 vs 33.471664
- Layer 7, Token 16 (model.layers.out comparison):
- Original tensor sum: 121.329849
- Converted tensor sum: 101.072693
- Original tensor mean: 15.166231
- Converted tensor mean: 12.634087
- Mean difference: 4.85845757
- Maximum pointwise difference: 11.92098331
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 18.39835739, Converted: 6.47737408
- Biggest difference in row (0, 0), sum 121.329849 vs 101.072693
- Layer 8, Token 16 (model.layers.out comparison):
- Original tensor sum: 105.626358
- Converted tensor sum: 92.869370
- Original tensor mean: 13.203295
- Converted tensor mean: 11.608671
- Mean difference: 5.01301622
- Maximum pointwise difference: 11.09072685
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 15.02331066, Converted: 3.93258405
- Biggest difference in row (0, 0), sum 105.626358 vs 92.869370
- Layer 9, Token 16 (model.layers.out comparison):
- Original tensor sum: 94.886589
- Converted tensor sum: 86.461792
- Original tensor mean: 11.860824
- Converted tensor mean: 10.807724
- Mean difference: 5.16425228
- Maximum pointwise difference: 10.79585648
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 2.03169847, Converted: 12.82755470
- Biggest difference in row (0, 0), sum 94.886589 vs 86.461792
- Layer 10, Token 16 (model.layers.out comparison):
- Original tensor sum: 93.657555
- Converted tensor sum: 77.932861
- Original tensor mean: 11.707194
- Converted tensor mean: 9.741608
- Mean difference: 5.07010078
- Maximum pointwise difference: 11.53797054
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 13.33782196, Converted: 1.79985178
- Biggest difference in row (0, 0), sum 93.657555 vs 77.932861
- Layer 11, Token 16 (model.layers.out comparison):
- Original tensor sum: 186.086578
- Converted tensor sum: 176.759811
- Original tensor mean: 23.260822
- Converted tensor mean: 22.094976
- Mean difference: 4.87584686
- Maximum pointwise difference: 10.12077332
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 14.05643463, Converted: 24.17720795
- Biggest difference in row (0, 0), sum 186.086578 vs 176.759811
- Layer 12, Token 16 (model.layers.out comparison):
- Original tensor sum: 188.253220
- Converted tensor sum: 173.150467
- Original tensor mean: 23.531652
- Converted tensor mean: 21.643808
- Mean difference: 5.08278847
- Maximum pointwise difference: 9.91738033
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 14.99966526, Converted: 24.91704559
- Biggest difference in row (0, 0), sum 188.253220 vs 173.150467
- Layer 13, Token 16 (model.layers.out comparison):
- Original tensor sum: 181.761749
- Converted tensor sum: 171.658249
- Original tensor mean: 22.720219
- Converted tensor mean: 21.457281
- Mean difference: 4.79229736
- Maximum pointwise difference: 9.82627106
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 14.16268539, Converted: 23.98895645
- Biggest difference in row (0, 0), sum 181.761749 vs 171.658249
- Layer 14, Token 16 (model.layers.out comparison):
- Original tensor sum: 176.198990
- Converted tensor sum: 170.420898
- Original tensor mean: 22.024874
- Converted tensor mean: 21.302612
- Mean difference: 4.28427029
- Maximum pointwise difference: 9.05801964
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 13.50310326, Converted: 22.56112289
- Biggest difference in row (0, 0), sum 176.198990 vs 170.420898
- Layer 15, Token 16 (model.layers.out comparison):
- Original tensor sum: 314.888916
- Converted tensor sum: 308.839905
- Original tensor mean: 39.361115
- Converted tensor mean: 38.604988
- Mean difference: 4.36002254
- Maximum pointwise difference: 9.44413185
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 25.14219856, Converted: 34.58633041
- Biggest difference in row (0, 0), sum 314.888916 vs 308.839905
- Layer 0, Token 17 (model.layers.out comparison):
- Original tensor sum: 6.615214
- Converted tensor sum: -14.476066
- Original tensor mean: 0.826902
- Converted tensor mean: -1.809508
- Mean difference: 4.01758480
- Maximum pointwise difference: 12.95696259
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 8.16467762, Converted: -4.79228544
- Biggest difference in row (0, 0), sum 6.615214 vs -14.476066
- Layer 1, Token 17 (model.layers.out comparison):
- Original tensor sum: 4.332821
- Converted tensor sum: -48.476418
- Original tensor mean: 0.541603
- Converted tensor mean: -6.059552
- Mean difference: 8.00736046
- Maximum pointwise difference: 13.83443928
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 7.88728952, Converted: -5.94714975
- Biggest difference in row (0, 0), sum 4.332821 vs -48.476418
- Layer 2, Token 17 (model.layers.out comparison):
- Original tensor sum: 13.631664
- Converted tensor sum: -24.375608
- Original tensor mean: 1.703958
- Converted tensor mean: -3.046951
- Mean difference: 9.48411465
- Maximum pointwise difference: 15.28743267
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 2.43811703, Converted: -12.84931564
- Biggest difference in row (0, 0), sum 13.631664 vs -24.375608
- Layer 3, Token 17 (model.layers.out comparison):
- Original tensor sum: 59.143936
- Converted tensor sum: -80.541725
- Original tensor mean: 7.392992
- Converted tensor mean: -10.067716
- Mean difference: 17.46070862
- Maximum pointwise difference: 28.83273697
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 9.60771275, Converted: -19.22502327
- Biggest difference in row (0, 0), sum 59.143936 vs -80.541725
- Layer 4, Token 17 (model.layers.out comparison):
- Original tensor sum: 51.750626
- Converted tensor sum: -81.567123
- Original tensor mean: 6.468828
- Converted tensor mean: -10.195890
- Mean difference: 17.13005066
- Maximum pointwise difference: 30.73341751
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 9.45896626, Converted: -21.27445221
- Biggest difference in row (0, 0), sum 51.750626 vs -81.567123
- Layer 5, Token 17 (model.layers.out comparison):
- Original tensor sum: 33.377792
- Converted tensor sum: -8.966677
- Original tensor mean: 4.172224
- Converted tensor mean: -1.120835
- Mean difference: 11.87618256
- Maximum pointwise difference: 19.17303848
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 0.59302533, Converted: 19.76606369
- Biggest difference in row (0, 0), sum 33.377792 vs -8.966677
- Layer 6, Token 17 (model.layers.out comparison):
- Original tensor sum: 34.373646
- Converted tensor sum: -17.893101
- Original tensor mean: 4.296706
- Converted tensor mean: -2.236638
- Mean difference: 12.44108009
- Maximum pointwise difference: 21.66391373
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 7.57746935, Converted: -14.08644485
- Biggest difference in row (0, 0), sum 34.373646 vs -17.893101
- Layer 7, Token 17 (model.layers.out comparison):
- Original tensor sum: 117.899002
- Converted tensor sum: -60.493092
- Original tensor mean: 14.737375
- Converted tensor mean: -7.561636
- Mean difference: 22.75322723
- Maximum pointwise difference: 41.73314667
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 20.46781158, Converted: -21.26533699
- Biggest difference in row (0, 0), sum 117.899002 vs -60.493092
- Layer 8, Token 17 (model.layers.out comparison):
- Original tensor sum: 102.151550
- Converted tensor sum: -53.178627
- Original tensor mean: 12.768944
- Converted tensor mean: -6.647328
- Mean difference: 21.35518456
- Maximum pointwise difference: 40.89769745
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 18.52126884, Converted: -22.37642860
- Biggest difference in row (0, 0), sum 102.151550 vs -53.178627
- Layer 9, Token 17 (model.layers.out comparison):
- Original tensor sum: 90.451920
- Converted tensor sum: -34.497658
- Original tensor mean: 11.306490
- Converted tensor mean: -4.312207
- Mean difference: 18.82321548
- Maximum pointwise difference: 37.83747864
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 18.08675385, Converted: -19.75072479
- Biggest difference in row (0, 0), sum 90.451920 vs -34.497658
- Layer 10, Token 17 (model.layers.out comparison):
- Original tensor sum: 87.881783
- Converted tensor sum: -25.459152
- Original tensor mean: 10.985223
- Converted tensor mean: -3.182394
- Mean difference: 17.43336678
- Maximum pointwise difference: 35.29803467
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 17.46567726, Converted: -17.83235931
- Biggest difference in row (0, 0), sum 87.881783 vs -25.459152
- Layer 11, Token 17 (model.layers.out comparison):
- Original tensor sum: 185.306732
- Converted tensor sum: -264.026886
- Original tensor mean: 23.163342
- Converted tensor mean: -33.003361
- Mean difference: 56.16670227
- Maximum pointwise difference: 73.40274048
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 27.15820312, Converted: -46.24454117
- Biggest difference in row (0, 0), sum 185.306732 vs -264.026886
- Layer 12, Token 17 (model.layers.out comparison):
- Original tensor sum: 186.018799
- Converted tensor sum: -238.738007
- Original tensor mean: 23.252350
- Converted tensor mean: -29.842251
- Mean difference: 53.09460068
- Maximum pointwise difference: 71.14258575
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 27.12987900, Converted: -44.01270676
- Biggest difference in row (0, 0), sum 186.018799 vs -238.738007
- Layer 13, Token 17 (model.layers.out comparison):
- Original tensor sum: 178.633179
- Converted tensor sum: -250.662323
- Original tensor mean: 22.329147
- Converted tensor mean: -31.332790
- Mean difference: 53.66194153
- Maximum pointwise difference: 72.33184814
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 22.06610680, Converted: -50.26573944
- Biggest difference in row (0, 0), sum 178.633179 vs -250.662323
- Layer 14, Token 17 (model.layers.out comparison):
- Original tensor sum: 171.761902
- Converted tensor sum: -301.707916
- Original tensor mean: 21.470238
- Converted tensor mean: -37.713490
- Mean difference: 59.18372726
- Maximum pointwise difference: 84.33922577
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 21.43586349, Converted: -62.90336227
- Biggest difference in row (0, 0), sum 171.761902 vs -301.707916
- Layer 15, Token 17 (model.layers.out comparison):
- Original tensor sum: 313.503632
- Converted tensor sum: -672.745667
- Original tensor mean: 39.187954
- Converted tensor mean: -84.093208
- Mean difference: 123.28115845
- Maximum pointwise difference: 153.27690125
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 38.26152039, Converted: -115.01538086
- Biggest difference in row (0, 0), sum 313.503632 vs -672.745667
- Layer 0, Token 18 (model.layers.out comparison):
- Original tensor sum: 37.370514
- Converted tensor sum: 2.800200
- Original tensor mean: 4.671314
- Converted tensor mean: 0.350025
- Mean difference: 5.54810905
- Maximum pointwise difference: 9.22967339
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 9.28797436, Converted: 0.05830121
- Biggest difference in row (0, 0), sum 37.370514 vs 2.800200
- Layer 1, Token 18 (model.layers.out comparison):
- Original tensor sum: 27.386568
- Converted tensor sum: -8.815313
- Original tensor mean: 3.423321
- Converted tensor mean: -1.101914
- Mean difference: 5.46173763
- Maximum pointwise difference: 11.23313618
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 7.72619963, Converted: -3.50693655
- Biggest difference in row (0, 0), sum 27.386568 vs -8.815313
- Layer 2, Token 18 (model.layers.out comparison):
- Original tensor sum: 22.950966
- Converted tensor sum: -26.951405
- Original tensor mean: 2.868871
- Converted tensor mean: -3.368926
- Mean difference: 7.41814232
- Maximum pointwise difference: 14.15112782
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 7.93941879, Converted: -6.21170902
- Biggest difference in row (0, 0), sum 22.950966 vs -26.951405
- Layer 3, Token 18 (model.layers.out comparison):
- Original tensor sum: 75.358887
- Converted tensor sum: -194.584152
- Original tensor mean: 9.419861
- Converted tensor mean: -24.323019
- Mean difference: 33.74287796
- Maximum pointwise difference: 39.03241730
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 16.72500801, Converted: -22.30740929
- Biggest difference in row (0, 0), sum 75.358887 vs -194.584152
- Layer 4, Token 18 (model.layers.out comparison):
- Original tensor sum: 63.885963
- Converted tensor sum: -193.801666
- Original tensor mean: 7.985745
- Converted tensor mean: -24.225208
- Mean difference: 32.21095276
- Maximum pointwise difference: 39.04253769
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 15.83776665, Converted: -23.20477104
- Biggest difference in row (0, 0), sum 63.885963 vs -193.801666
- Layer 5, Token 18 (model.layers.out comparison):
- Original tensor sum: 51.427219
- Converted tensor sum: -189.920349
- Original tensor mean: 6.428402
- Converted tensor mean: -23.740044
- Mean difference: 30.16844559
- Maximum pointwise difference: 35.64602280
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 8.10052967, Converted: -27.54549408
- Biggest difference in row (0, 0), sum 51.427219 vs -189.920349
- Layer 6, Token 18 (model.layers.out comparison):
- Original tensor sum: 52.837097
- Converted tensor sum: -237.793671
- Original tensor mean: 6.604637
- Converted tensor mean: -29.724209
- Mean difference: 36.32884598
- Maximum pointwise difference: 41.40105438
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 7.84163952, Converted: -33.55941391
- Biggest difference in row (0, 0), sum 52.837097 vs -237.793671
- Layer 7, Token 18 (model.layers.out comparison):
- Original tensor sum: 129.848618
- Converted tensor sum: -405.475128
- Original tensor mean: 16.231077
- Converted tensor mean: -50.684391
- Mean difference: 66.91546631
- Maximum pointwise difference: 75.46723938
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 22.33297348, Converted: -53.13426590
- Biggest difference in row (0, 0), sum 129.848618 vs -405.475128
- Layer 8, Token 18 (model.layers.out comparison):
- Original tensor sum: 112.813950
- Converted tensor sum: -388.213379
- Original tensor mean: 14.101744
- Converted tensor mean: -48.526672
- Mean difference: 62.62841415
- Maximum pointwise difference: 74.58121490
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 20.05025291, Converted: -54.53096390
- Biggest difference in row (0, 0), sum 112.813950 vs -388.213379
- Layer 9, Token 18 (model.layers.out comparison):
- Original tensor sum: 98.625351
- Converted tensor sum: -428.683411
- Original tensor mean: 12.328169
- Converted tensor mean: -53.585426
- Mean difference: 65.91359711
- Maximum pointwise difference: 78.76679230
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 5.12599134, Converted: -73.64080048
- Biggest difference in row (0, 0), sum 98.625351 vs -428.683411
- Layer 10, Token 18 (model.layers.out comparison):
- Original tensor sum: 93.009445
- Converted tensor sum: -432.554626
- Original tensor mean: 11.626181
- Converted tensor mean: -54.069328
- Mean difference: 65.69551086
- Maximum pointwise difference: 76.13760376
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 4.86473036, Converted: -71.27287292
- Biggest difference in row (0, 0), sum 93.009445 vs -432.554626
- Layer 11, Token 18 (model.layers.out comparison):
- Original tensor sum: 188.645950
- Converted tensor sum: -772.146790
- Original tensor mean: 23.580744
- Converted tensor mean: -96.518349
- Mean difference: 120.09909058
- Maximum pointwise difference: 140.57998657
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 17.72886276, Converted: -122.85112000
- Biggest difference in row (0, 0), sum 188.645950 vs -772.146790
- Layer 12, Token 18 (model.layers.out comparison):
- Original tensor sum: 191.028870
- Converted tensor sum: -781.472900
- Original tensor mean: 23.878609
- Converted tensor mean: -97.684113
- Mean difference: 121.56272888
- Maximum pointwise difference: 143.88111877
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 18.69833946, Converted: -125.18278503
- Biggest difference in row (0, 0), sum 191.028870 vs -781.472900
- Layer 13, Token 18 (model.layers.out comparison):
- Original tensor sum: 183.829086
- Converted tensor sum: -808.856689
- Original tensor mean: 22.978636
- Converted tensor mean: -101.107086
- Mean difference: 124.08572388
- Maximum pointwise difference: 147.60656738
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 18.44003105, Converted: -129.16653442
- Biggest difference in row (0, 0), sum 183.829086 vs -808.856689
- Layer 14, Token 18 (model.layers.out comparison):
- Original tensor sum: 177.643005
- Converted tensor sum: -844.687622
- Original tensor mean: 22.205376
- Converted tensor mean: -105.585953
- Mean difference: 127.79132843
- Maximum pointwise difference: 148.00994873
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 17.69933319, Converted: -130.31060791
- Biggest difference in row (0, 0), sum 177.643005 vs -844.687622
- Layer 15, Token 18 (model.layers.out comparison):
- Original tensor sum: 320.725769
- Converted tensor sum: -1234.242676
- Original tensor mean: 40.090721
- Converted tensor mean: -154.280334
- Mean difference: 194.37104797
- Maximum pointwise difference: 225.51652527
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 39.54684830, Converted: -185.96968079
- Biggest difference in row (0, 0), sum 320.725769 vs -1234.242676
- Layer 0, Token 19 (model.layers.out comparison):
- Original tensor sum: -9.932329
- Converted tensor sum: -1.418950
- Original tensor mean: -1.241541
- Converted tensor mean: -0.177369
- Mean difference: 1.91613591
- Maximum pointwise difference: 5.37744808
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -4.92564631, Converted: 0.45180166
- Biggest difference in row (0, 0), sum -9.932329 vs -1.418950
- Layer 1, Token 19 (model.layers.out comparison):
- Original tensor sum: -28.079020
- Converted tensor sum: 4.360578
- Original tensor mean: -3.509877
- Converted tensor mean: 0.545072
- Mean difference: 4.81566954
- Maximum pointwise difference: 12.93084526
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -11.63085365, Converted: 1.29999185
- Biggest difference in row (0, 0), sum -28.079020 vs 4.360578
- Layer 2, Token 19 (model.layers.out comparison):
- Original tensor sum: -9.719646
- Converted tensor sum: 14.192688
- Original tensor mean: -1.214956
- Converted tensor mean: 1.774086
- Mean difference: 5.83081627
- Maximum pointwise difference: 15.01737213
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -10.13109303, Converted: 4.88627911
- Biggest difference in row (0, 0), sum -9.719646 vs 14.192688
- Layer 3, Token 19 (model.layers.out comparison):
- Original tensor sum: -78.071198
- Converted tensor sum: 44.287003
- Original tensor mean: -9.758900
- Converted tensor mean: 5.535875
- Mean difference: 15.29477501
- Maximum pointwise difference: 25.90341759
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -17.29398918, Converted: 8.60942841
- Biggest difference in row (0, 0), sum -78.071198 vs 44.287003
- Layer 4, Token 19 (model.layers.out comparison):
- Original tensor sum: -17.936802
- Converted tensor sum: 43.255585
- Original tensor mean: -2.242100
- Converted tensor mean: 5.406948
- Mean difference: 9.52408981
- Maximum pointwise difference: 16.11044312
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -8.10731792, Converted: 8.00312424
- Biggest difference in row (0, 0), sum -17.936802 vs 43.255585
- Layer 5, Token 19 (model.layers.out comparison):
- Original tensor sum: 14.270342
- Converted tensor sum: 40.868690
- Original tensor mean: 1.783793
- Converted tensor mean: 5.108586
- Mean difference: 6.39925480
- Maximum pointwise difference: 13.00582123
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -10.49264050, Converted: 2.51318097
- Biggest difference in row (0, 0), sum 14.270342 vs 40.868690
- Layer 6, Token 19 (model.layers.out comparison):
- Original tensor sum: 8.770991
- Converted tensor sum: 44.250122
- Original tensor mean: 1.096374
- Converted tensor mean: 5.531265
- Mean difference: 7.05475235
- Maximum pointwise difference: 14.57606697
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -11.80261707, Converted: 2.77344990
- Biggest difference in row (0, 0), sum 8.770991 vs 44.250122
- Layer 7, Token 19 (model.layers.out comparison):
- Original tensor sum: 27.567080
- Converted tensor sum: 110.976578
- Original tensor mean: 3.445885
- Converted tensor mean: 13.872072
- Mean difference: 11.90625381
- Maximum pointwise difference: 20.18301392
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -9.75880718, Converted: 10.42420769
- Biggest difference in row (0, 0), sum 27.567080 vs 110.976578
- Layer 8, Token 19 (model.layers.out comparison):
- Original tensor sum: 12.723747
- Converted tensor sum: 112.570312
- Original tensor mean: 1.590468
- Converted tensor mean: 14.071289
- Mean difference: 12.89592552
- Maximum pointwise difference: 20.84409904
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -12.16371441, Converted: 8.68038464
- Biggest difference in row (0, 0), sum 12.723747 vs 112.570312
- Layer 9, Token 19 (model.layers.out comparison):
- Original tensor sum: 10.056442
- Converted tensor sum: 106.334442
- Original tensor mean: 1.257055
- Converted tensor mean: 13.291805
- Mean difference: 12.47594643
- Maximum pointwise difference: 22.08431053
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -14.35114861, Converted: 7.73316193
- Biggest difference in row (0, 0), sum 10.056442 vs 106.334442
- Layer 10, Token 19 (model.layers.out comparison):
- Original tensor sum: -1.989794
- Converted tensor sum: 99.182007
- Original tensor mean: -0.248724
- Converted tensor mean: 12.397751
- Mean difference: 13.17310143
- Maximum pointwise difference: 24.05181694
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -17.15268326, Converted: 6.89913368
- Biggest difference in row (0, 0), sum -1.989794 vs 99.182007
- Layer 11, Token 19 (model.layers.out comparison):
- Original tensor sum: 67.349617
- Converted tensor sum: 188.920929
- Original tensor mean: 8.418702
- Converted tensor mean: 23.615116
- Mean difference: 15.19641399
- Maximum pointwise difference: 29.17947769
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -10.99394608, Converted: 18.18553162
- Biggest difference in row (0, 0), sum 67.349617 vs 188.920929
- Layer 12, Token 19 (model.layers.out comparison):
- Original tensor sum: 65.645859
- Converted tensor sum: 187.996002
- Original tensor mean: 8.205732
- Converted tensor mean: 23.499500
- Mean difference: 15.29376984
- Maximum pointwise difference: 29.97419739
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -11.02998257, Converted: 18.94421577
- Biggest difference in row (0, 0), sum 65.645859 vs 187.996002
- Layer 13, Token 19 (model.layers.out comparison):
- Original tensor sum: 62.775318
- Converted tensor sum: 186.939407
- Original tensor mean: 7.846915
- Converted tensor mean: 23.367426
- Mean difference: 15.52051163
- Maximum pointwise difference: 30.78374863
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -11.59408474, Converted: 19.18966293
- Biggest difference in row (0, 0), sum 62.775318 vs 186.939407
- Layer 14, Token 19 (model.layers.out comparison):
- Original tensor sum: 66.572449
- Converted tensor sum: 192.538483
- Original tensor mean: 8.321556
- Converted tensor mean: 24.067310
- Mean difference: 15.74575615
- Maximum pointwise difference: 32.26174927
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -11.68817997, Converted: 20.57357025
- Biggest difference in row (0, 0), sum 66.572449 vs 192.538483
- Layer 15, Token 19 (model.layers.out comparison):
- Original tensor sum: 224.145126
- Converted tensor sum: 325.050964
- Original tensor mean: 28.018141
- Converted tensor mean: 40.631371
- Mean difference: 13.45689964
- Maximum pointwise difference: 28.69198799
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 9.77963829, Converted: 38.47162628
- Biggest difference in row (0, 0), sum 224.145126 vs 325.050964
- Layer 0, Token 20 (model.layers.out comparison):
- Original tensor sum: -29.569780
- Converted tensor sum: 10.794893
- Original tensor mean: -3.696223
- Converted tensor mean: 1.349362
- Mean difference: 6.44896221
- Maximum pointwise difference: 13.91718292
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -9.61637592, Converted: 4.30080748
- Biggest difference in row (0, 0), sum -29.569780 vs 10.794893
- Layer 1, Token 20 (model.layers.out comparison):
- Original tensor sum: 1.025735
- Converted tensor sum: 6.199029
- Original tensor mean: 0.128217
- Converted tensor mean: 0.774879
- Mean difference: 7.59240437
- Maximum pointwise difference: 14.00857544
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -9.50434303, Converted: 4.50423241
- Biggest difference in row (0, 0), sum 1.025735 vs 6.199029
- Layer 2, Token 20 (model.layers.out comparison):
- Original tensor sum: 17.293440
- Converted tensor sum: 7.479863
- Original tensor mean: 2.161680
- Converted tensor mean: 0.934983
- Mean difference: 7.88275719
- Maximum pointwise difference: 14.18584061
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -8.63929176, Converted: 5.54654837
- Biggest difference in row (0, 0), sum 17.293440 vs 7.479863
- Layer 3, Token 20 (model.layers.out comparison):
- Original tensor sum: 36.610168
- Converted tensor sum: 49.467545
- Original tensor mean: 4.576271
- Converted tensor mean: 6.183443
- Mean difference: 6.93841553
- Maximum pointwise difference: 18.18937302
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -5.24265194, Converted: 12.94672108
- Biggest difference in row (0, 0), sum 36.610168 vs 49.467545
- Layer 4, Token 20 (model.layers.out comparison):
- Original tensor sum: 29.254171
- Converted tensor sum: 47.750710
- Original tensor mean: 3.656771
- Converted tensor mean: 5.968839
- Mean difference: 7.21544361
- Maximum pointwise difference: 18.54884338
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -5.63391066, Converted: 12.91493225
- Biggest difference in row (0, 0), sum 29.254171 vs 47.750710
- Layer 5, Token 20 (model.layers.out comparison):
- Original tensor sum: 35.151703
- Converted tensor sum: 48.878067
- Original tensor mean: 4.393963
- Converted tensor mean: 6.109758
- Mean difference: 6.99968100
- Maximum pointwise difference: 14.96766090
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -3.65913010, Converted: 11.30853081
- Biggest difference in row (0, 0), sum 35.151703 vs 48.878067
- Layer 6, Token 20 (model.layers.out comparison):
- Original tensor sum: 30.034544
- Converted tensor sum: 47.318748
- Original tensor mean: 3.754318
- Converted tensor mean: 5.914844
- Mean difference: 7.24886227
- Maximum pointwise difference: 14.76261425
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -3.74199128, Converted: 11.02062321
- Biggest difference in row (0, 0), sum 30.034544 vs 47.318748
- Layer 7, Token 20 (model.layers.out comparison):
- Original tensor sum: 93.501678
- Converted tensor sum: 109.843590
- Original tensor mean: 11.687710
- Converted tensor mean: 13.730449
- Mean difference: 6.95008612
- Maximum pointwise difference: 15.00504684
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 6.03040743, Converted: 21.03545380
- Biggest difference in row (0, 0), sum 93.501678 vs 109.843590
- Layer 8, Token 20 (model.layers.out comparison):
- Original tensor sum: 79.472687
- Converted tensor sum: 102.823357
- Original tensor mean: 9.934086
- Converted tensor mean: 12.852920
- Mean difference: 7.54766369
- Maximum pointwise difference: 16.02755737
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 5.07132435, Converted: 21.09888077
- Biggest difference in row (0, 0), sum 79.472687 vs 102.823357
- Layer 9, Token 20 (model.layers.out comparison):
- Original tensor sum: 67.706139
- Converted tensor sum: 99.777931
- Original tensor mean: 8.463267
- Converted tensor mean: 12.472241
- Mean difference: 8.86232471
- Maximum pointwise difference: 16.78725052
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 4.42850208, Converted: 21.21575165
- Biggest difference in row (0, 0), sum 67.706139 vs 99.777931
- Layer 10, Token 20 (model.layers.out comparison):
- Original tensor sum: 63.760403
- Converted tensor sum: 96.691109
- Original tensor mean: 7.970050
- Converted tensor mean: 12.086389
- Mean difference: 9.02034378
- Maximum pointwise difference: 16.24016762
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 4.14136124, Converted: 20.38152885
- Biggest difference in row (0, 0), sum 63.760403 vs 96.691109
- Layer 11, Token 20 (model.layers.out comparison):
- Original tensor sum: 158.635681
- Converted tensor sum: 194.330322
- Original tensor mean: 19.829460
- Converted tensor mean: 24.291290
- Mean difference: 8.75148964
- Maximum pointwise difference: 16.25316620
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 15.08195591, Converted: 31.33512306
- Biggest difference in row (0, 0), sum 158.635681 vs 194.330322
- Layer 12, Token 20 (model.layers.out comparison):
- Original tensor sum: 159.106079
- Converted tensor sum: 194.084503
- Original tensor mean: 19.888260
- Converted tensor mean: 24.260563
- Mean difference: 8.75931835
- Maximum pointwise difference: 16.29665756
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 14.28990650, Converted: 30.58656502
- Biggest difference in row (0, 0), sum 159.106079 vs 194.084503
- Layer 13, Token 20 (model.layers.out comparison):
- Original tensor sum: 153.442200
- Converted tensor sum: 186.870270
- Original tensor mean: 19.180275
- Converted tensor mean: 23.358784
- Mean difference: 8.66864204
- Maximum pointwise difference: 15.29904747
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 14.04843903, Converted: 29.34748650
- Biggest difference in row (0, 0), sum 153.442200 vs 186.870270
- Layer 14, Token 20 (model.layers.out comparison):
- Original tensor sum: 147.691605
- Converted tensor sum: 175.338470
- Original tensor mean: 18.461451
- Converted tensor mean: 21.917309
- Mean difference: 8.84063625
- Maximum pointwise difference: 15.63497734
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 13.52752876, Converted: 29.16250610
- Biggest difference in row (0, 0), sum 147.691605 vs 175.338470
- Layer 15, Token 20 (model.layers.out comparison):
- Original tensor sum: 294.136749
- Converted tensor sum: 310.250946
- Original tensor mean: 36.767094
- Converted tensor mean: 38.781368
- Mean difference: 9.18845367
- Maximum pointwise difference: 14.23109627
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 30.77650642, Converted: 45.00760269
- Biggest difference in row (0, 0), sum 294.136749 vs 310.250946
- Layer 0, Token 21 (model.layers.out comparison):
- Original tensor sum: -18.838482
- Converted tensor sum: -1.325968
- Original tensor mean: -2.354810
- Converted tensor mean: -0.165746
- Mean difference: 2.79272628
- Maximum pointwise difference: 6.95248222
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -6.02015686, Converted: 0.93232512
- Biggest difference in row (0, 0), sum -18.838482 vs -1.325968
- Layer 1, Token 21 (model.layers.out comparison):
- Original tensor sum: -6.250936
- Converted tensor sum: -2.277201
- Original tensor mean: -0.781367
- Converted tensor mean: -0.284650
- Mean difference: 5.05594349
- Maximum pointwise difference: 9.99544907
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 6.23908186, Converted: -3.75636768
- Biggest difference in row (0, 0), sum -6.250936 vs -2.277201
- Layer 2, Token 21 (model.layers.out comparison):
- Original tensor sum: -2.587172
- Converted tensor sum: 0.977817
- Original tensor mean: -0.323396
- Converted tensor mean: 0.122227
- Mean difference: 3.66970563
- Maximum pointwise difference: 9.02869225
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -7.89160728, Converted: 1.13708520
- Biggest difference in row (0, 0), sum -2.587172 vs 0.977817
- Layer 3, Token 21 (model.layers.out comparison):
- Original tensor sum: -37.525734
- Converted tensor sum: 5.221979
- Original tensor mean: -4.690717
- Converted tensor mean: 0.652747
- Mean difference: 6.04651690
- Maximum pointwise difference: 12.38726807
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -10.43804359, Converted: 1.94922423
- Biggest difference in row (0, 0), sum -37.525734 vs 5.221979
- Layer 4, Token 21 (model.layers.out comparison):
- Original tensor sum: 4.066291
- Converted tensor sum: 13.447447
- Original tensor mean: 0.508286
- Converted tensor mean: 1.680931
- Mean difference: 5.62788296
- Maximum pointwise difference: 14.04961491
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -10.56051826, Converted: 3.48909688
- Biggest difference in row (0, 0), sum 4.066291 vs 13.447447
- Layer 5, Token 21 (model.layers.out comparison):
- Original tensor sum: 22.123846
- Converted tensor sum: 14.835675
- Original tensor mean: 2.765481
- Converted tensor mean: 1.854459
- Mean difference: 5.25254917
- Maximum pointwise difference: 11.90699482
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 7.93798828, Converted: -3.96900630
- Biggest difference in row (0, 0), sum 22.123846 vs 14.835675
- Layer 6, Token 21 (model.layers.out comparison):
- Original tensor sum: 22.319403
- Converted tensor sum: 11.047790
- Original tensor mean: 2.789925
- Converted tensor mean: 1.380974
- Mean difference: 5.50898457
- Maximum pointwise difference: 13.00136471
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 7.73285818, Converted: -5.26850653
- Biggest difference in row (0, 0), sum 22.319403 vs 11.047790
- Layer 7, Token 21 (model.layers.out comparison):
- Original tensor sum: 55.420013
- Converted tensor sum: 74.081238
- Original tensor mean: 6.927502
- Converted tensor mean: 9.260155
- Mean difference: 5.90270138
- Maximum pointwise difference: 12.46957588
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -1.65777194, Converted: 10.81180382
- Biggest difference in row (0, 0), sum 55.420013 vs 74.081238
- Layer 8, Token 21 (model.layers.out comparison):
- Original tensor sum: 39.922848
- Converted tensor sum: 72.282196
- Original tensor mean: 4.990356
- Converted tensor mean: 9.035275
- Mean difference: 6.12995577
- Maximum pointwise difference: 13.29505730
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -3.36732197, Converted: 9.92773533
- Biggest difference in row (0, 0), sum 39.922848 vs 72.282196
- Layer 9, Token 21 (model.layers.out comparison):
- Original tensor sum: 29.193859
- Converted tensor sum: 64.425896
- Original tensor mean: 3.649232
- Converted tensor mean: 8.053237
- Mean difference: 6.22422409
- Maximum pointwise difference: 14.81417084
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -3.98170996, Converted: 10.83246040
- Biggest difference in row (0, 0), sum 29.193859 vs 64.425896
- Layer 10, Token 21 (model.layers.out comparison):
- Original tensor sum: 23.706369
- Converted tensor sum: 55.726307
- Original tensor mean: 2.963296
- Converted tensor mean: 6.965788
- Mean difference: 6.04786444
- Maximum pointwise difference: 14.39242363
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -4.86538124, Converted: 9.52704239
- Biggest difference in row (0, 0), sum 23.706369 vs 55.726307
- Layer 11, Token 21 (model.layers.out comparison):
- Original tensor sum: 123.990646
- Converted tensor sum: 150.405350
- Original tensor mean: 15.498831
- Converted tensor mean: 18.800669
- Mean difference: 5.61389732
- Maximum pointwise difference: 14.70817947
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 6.49463272, Converted: 21.20281219
- Biggest difference in row (0, 0), sum 123.990646 vs 150.405350
- Layer 12, Token 21 (model.layers.out comparison):
- Original tensor sum: 120.701889
- Converted tensor sum: 144.158798
- Original tensor mean: 15.087736
- Converted tensor mean: 18.019850
- Mean difference: 5.24121237
- Maximum pointwise difference: 14.31963730
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 5.24581337, Converted: 19.56545067
- Biggest difference in row (0, 0), sum 120.701889 vs 144.158798
- Layer 13, Token 21 (model.layers.out comparison):
- Original tensor sum: 114.196152
- Converted tensor sum: 142.528229
- Original tensor mean: 14.274519
- Converted tensor mean: 17.816029
- Mean difference: 5.27994871
- Maximum pointwise difference: 14.28137684
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 4.50468159, Converted: 18.78605843
- Biggest difference in row (0, 0), sum 114.196152 vs 142.528229
- Layer 14, Token 21 (model.layers.out comparison):
- Original tensor sum: 109.654587
- Converted tensor sum: 141.504807
- Original tensor mean: 13.706823
- Converted tensor mean: 17.688101
- Mean difference: 5.26909733
- Maximum pointwise difference: 15.04267311
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 3.59476113, Converted: 18.63743401
- Biggest difference in row (0, 0), sum 109.654587 vs 141.504807
- Layer 15, Token 21 (model.layers.out comparison):
- Original tensor sum: 258.799988
- Converted tensor sum: 280.546570
- Original tensor mean: 32.349998
- Converted tensor mean: 35.068321
- Mean difference: 5.38046169
- Maximum pointwise difference: 13.79010963
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 21.50290108, Converted: 35.29301071
- Biggest difference in row (0, 0), sum 258.799988 vs 280.546570
- Layer 0, Token 22 (model.layers.out comparison):
- Original tensor sum: 22.958118
- Converted tensor sum: -3.202849
- Original tensor mean: 2.869765
- Converted tensor mean: -0.400356
- Mean difference: 4.91125917
- Maximum pointwise difference: 8.36230850
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 9.21100616, Converted: 0.84869760
- Biggest difference in row (0, 0), sum 22.958118 vs -3.202849
- Layer 1, Token 22 (model.layers.out comparison):
- Original tensor sum: 25.125549
- Converted tensor sum: -10.143456
- Original tensor mean: 3.140694
- Converted tensor mean: -1.267932
- Mean difference: 5.89313412
- Maximum pointwise difference: 9.59223843
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 9.62790585, Converted: 0.03566782
- Biggest difference in row (0, 0), sum 25.125549 vs -10.143456
- Layer 2, Token 22 (model.layers.out comparison):
- Original tensor sum: 27.315422
- Converted tensor sum: -15.748328
- Original tensor mean: 3.414428
- Converted tensor mean: -1.968541
- Mean difference: 8.97875500
- Maximum pointwise difference: 14.55634785
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 2.85774899, Converted: -11.69859886
- Biggest difference in row (0, 0), sum 27.315422 vs -15.748328
- Layer 3, Token 22 (model.layers.out comparison):
- Original tensor sum: 65.650429
- Converted tensor sum: -88.889626
- Original tensor mean: 8.206304
- Converted tensor mean: -11.111203
- Mean difference: 19.31750679
- Maximum pointwise difference: 27.03379250
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 6.11478758, Converted: -20.91900444
- Biggest difference in row (0, 0), sum 65.650429 vs -88.889626
- Layer 4, Token 22 (model.layers.out comparison):
- Original tensor sum: 61.788639
- Converted tensor sum: -42.131989
- Original tensor mean: 7.723580
- Converted tensor mean: -5.266499
- Mean difference: 12.99007797
- Maximum pointwise difference: 18.81860924
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 14.83176613, Converted: -3.98684263
- Biggest difference in row (0, 0), sum 61.788639 vs -42.131989
- Layer 5, Token 22 (model.layers.out comparison):
- Original tensor sum: 57.004955
- Converted tensor sum: 4.555844
- Original tensor mean: 7.125619
- Converted tensor mean: 0.569481
- Mean difference: 8.63973427
- Maximum pointwise difference: 19.13692093
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 2.54869914, Converted: -16.58822250
- Biggest difference in row (0, 0), sum 57.004955 vs 4.555844
- Layer 6, Token 22 (model.layers.out comparison):
- Original tensor sum: 54.908669
- Converted tensor sum: -0.669161
- Original tensor mean: 6.863584
- Converted tensor mean: -0.083645
- Mean difference: 8.70907402
- Maximum pointwise difference: 18.54141235
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 2.42641473, Converted: -16.11499786
- Biggest difference in row (0, 0), sum 54.908669 vs -0.669161
- Layer 7, Token 22 (model.layers.out comparison):
- Original tensor sum: 125.605499
- Converted tensor sum: -1.624224
- Original tensor mean: 15.700687
- Converted tensor mean: -0.203028
- Mean difference: 15.90371513
- Maximum pointwise difference: 27.27110672
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 10.48501492, Converted: -16.78609276
- Biggest difference in row (0, 0), sum 125.605499 vs -1.624224
- Layer 8, Token 22 (model.layers.out comparison):
- Original tensor sum: 109.340508
- Converted tensor sum: -1.809371
- Original tensor mean: 13.667563
- Converted tensor mean: -0.226171
- Mean difference: 14.19305420
- Maximum pointwise difference: 24.19651794
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 6.68867207, Converted: -17.50784492
- Biggest difference in row (0, 0), sum 109.340508 vs -1.809371
- Layer 9, Token 22 (model.layers.out comparison):
- Original tensor sum: 93.036400
- Converted tensor sum: -10.185041
- Original tensor mean: 11.629550
- Converted tensor mean: -1.273130
- Mean difference: 13.22967815
- Maximum pointwise difference: 22.35823822
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 2.86865139, Converted: -19.48958588
- Biggest difference in row (0, 0), sum 93.036400 vs -10.185041
- Layer 10, Token 22 (model.layers.out comparison):
- Original tensor sum: 85.756668
- Converted tensor sum: -2.302891
- Original tensor mean: 10.719584
- Converted tensor mean: -0.287861
- Mean difference: 11.49190331
- Maximum pointwise difference: 20.63401985
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 19.00829315, Converted: -1.62572634
- Biggest difference in row (0, 0), sum 85.756668 vs -2.302891
- Layer 11, Token 22 (model.layers.out comparison):
- Original tensor sum: 182.162292
- Converted tensor sum: -8.586711
- Original tensor mean: 22.770287
- Converted tensor mean: -1.073339
- Mean difference: 23.84362602
- Maximum pointwise difference: 34.19173050
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 13.77398682, Converted: -20.41774368
- Biggest difference in row (0, 0), sum 182.162292 vs -8.586711
- Layer 12, Token 22 (model.layers.out comparison):
- Original tensor sum: 182.096252
- Converted tensor sum: -6.677206
- Original tensor mean: 22.762032
- Converted tensor mean: -0.834651
- Mean difference: 23.59668159
- Maximum pointwise difference: 35.20670319
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 14.20073891, Converted: -21.00596619
- Biggest difference in row (0, 0), sum 182.096252 vs -6.677206
- Layer 13, Token 22 (model.layers.out comparison):
- Original tensor sum: 176.400360
- Converted tensor sum: -0.142300
- Original tensor mean: 22.050045
- Converted tensor mean: -0.017787
- Mean difference: 22.06783295
- Maximum pointwise difference: 34.37791824
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 13.55050278, Converted: -20.82741547
- Biggest difference in row (0, 0), sum 176.400360 vs -0.142300
- Layer 14, Token 22 (model.layers.out comparison):
- Original tensor sum: 169.308212
- Converted tensor sum: 22.573196
- Original tensor mean: 21.163527
- Converted tensor mean: 2.821650
- Mean difference: 18.34187508
- Maximum pointwise difference: 31.75983810
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 11.94197941, Converted: -19.81785965
- Biggest difference in row (0, 0), sum 169.308212 vs 22.573196
- Layer 15, Token 22 (model.layers.out comparison):
- Original tensor sum: 321.080658
- Converted tensor sum: 136.787018
- Original tensor mean: 40.135082
- Converted tensor mean: 17.098377
- Mean difference: 23.03670502
- Maximum pointwise difference: 37.83760452
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 31.89689064, Converted: -5.94071388
- Biggest difference in row (0, 0), sum 321.080658 vs 136.787018
- Layer 0, Token 23 (model.layers.out comparison):
- Original tensor sum: 3.588341
- Converted tensor sum: 9.359616
- Original tensor mean: 0.448543
- Converted tensor mean: 1.169952
- Mean difference: 3.70246077
- Maximum pointwise difference: 5.65140629
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -1.32952428, Converted: 4.32188225
- Biggest difference in row (0, 0), sum 3.588341 vs 9.359616
- Layer 1, Token 23 (model.layers.out comparison):
- Original tensor sum: -13.513486
- Converted tensor sum: 3.000220
- Original tensor mean: -1.689186
- Converted tensor mean: 0.375028
- Mean difference: 3.49640799
- Maximum pointwise difference: 8.52665997
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -4.16102409, Converted: 4.36563587
- Biggest difference in row (0, 0), sum -13.513486 vs 3.000220
- Layer 2, Token 23 (model.layers.out comparison):
- Original tensor sum: -19.782562
- Converted tensor sum: 4.253428
- Original tensor mean: -2.472820
- Converted tensor mean: 0.531678
- Mean difference: 5.22110939
- Maximum pointwise difference: 11.62318039
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -9.56802559, Converted: 2.05515456
- Biggest difference in row (0, 0), sum -19.782562 vs 4.253428
- Layer 3, Token 23 (model.layers.out comparison):
- Original tensor sum: -117.794266
- Converted tensor sum: 14.072861
- Original tensor mean: -14.724283
- Converted tensor mean: 1.759108
- Mean difference: 16.48339081
- Maximum pointwise difference: 22.75023079
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -16.32844543, Converted: 6.42178583
- Biggest difference in row (0, 0), sum -117.794266 vs 14.072861
- Layer 4, Token 23 (model.layers.out comparison):
- Original tensor sum: -73.092270
- Converted tensor sum: 6.691208
- Original tensor mean: -9.136534
- Converted tensor mean: 0.836401
- Mean difference: 10.72612858
- Maximum pointwise difference: 19.55576324
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -16.38935280, Converted: 3.16641092
- Biggest difference in row (0, 0), sum -73.092270 vs 6.691208
- Layer 5, Token 23 (model.layers.out comparison):
- Original tensor sum: -37.015450
- Converted tensor sum: 8.681388
- Original tensor mean: -4.626931
- Converted tensor mean: 1.085173
- Mean difference: 7.61082363
- Maximum pointwise difference: 20.55440712
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -13.93057537, Converted: 6.62383223
- Biggest difference in row (0, 0), sum -37.015450 vs 8.681388
- Layer 6, Token 23 (model.layers.out comparison):
- Original tensor sum: -90.333237
- Converted tensor sum: 7.396842
- Original tensor mean: -11.291655
- Converted tensor mean: 0.924605
- Mean difference: 12.82605839
- Maximum pointwise difference: 34.69086456
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -27.39507294, Converted: 7.29579258
- Biggest difference in row (0, 0), sum -90.333237 vs 7.396842
- Layer 7, Token 23 (model.layers.out comparison):
- Original tensor sum: -214.526337
- Converted tensor sum: 60.269241
- Original tensor mean: -26.815792
- Converted tensor mean: 7.533655
- Mean difference: 34.34944916
- Maximum pointwise difference: 55.60475159
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -42.02355576, Converted: 13.58119678
- Biggest difference in row (0, 0), sum -214.526337 vs 60.269241
- Layer 8, Token 23 (model.layers.out comparison):
- Original tensor sum: -138.238464
- Converted tensor sum: 48.862061
- Original tensor mean: -17.279808
- Converted tensor mean: 6.107758
- Mean difference: 23.38756561
- Maximum pointwise difference: 37.09150314
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -25.42422676, Converted: 11.66727638
- Biggest difference in row (0, 0), sum -138.238464 vs 48.862061
- Layer 9, Token 23 (model.layers.out comparison):
- Original tensor sum: -129.366013
- Converted tensor sum: 32.791050
- Original tensor mean: -16.170752
- Converted tensor mean: 4.098881
- Mean difference: 20.26963234
- Maximum pointwise difference: 31.74017334
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -22.80648041, Converted: 8.93369293
- Biggest difference in row (0, 0), sum -129.366013 vs 32.791050
- Layer 10, Token 23 (model.layers.out comparison):
- Original tensor sum: -112.076103
- Converted tensor sum: 33.542336
- Original tensor mean: -14.009513
- Converted tensor mean: 4.192792
- Mean difference: 18.20230484
- Maximum pointwise difference: 30.71049118
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -22.22323608, Converted: 8.48725605
- Biggest difference in row (0, 0), sum -112.076103 vs 33.542336
- Layer 11, Token 23 (model.layers.out comparison):
- Original tensor sum: -392.294312
- Converted tensor sum: 130.177963
- Original tensor mean: -49.036789
- Converted tensor mean: 16.272245
- Mean difference: 65.30903625
- Maximum pointwise difference: 80.34357452
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -59.32800293, Converted: 21.01557350
- Biggest difference in row (0, 0), sum -392.294312 vs 130.177963
- Layer 12, Token 23 (model.layers.out comparison):
- Original tensor sum: -416.741821
- Converted tensor sum: 126.312363
- Original tensor mean: -52.092728
- Converted tensor mean: 15.789045
- Mean difference: 67.88177490
- Maximum pointwise difference: 87.29133606
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -65.95119476, Converted: 21.34013939
- Biggest difference in row (0, 0), sum -416.741821 vs 126.312363
- Layer 13, Token 23 (model.layers.out comparison):
- Original tensor sum: -420.622223
- Converted tensor sum: 122.472458
- Original tensor mean: -52.577778
- Converted tensor mean: 15.309057
- Mean difference: 67.88684082
- Maximum pointwise difference: 89.02587891
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -68.22624969, Converted: 20.79962921
- Biggest difference in row (0, 0), sum -420.622223 vs 122.472458
- Layer 14, Token 23 (model.layers.out comparison):
- Original tensor sum: -398.408966
- Converted tensor sum: 120.881279
- Original tensor mean: -49.801121
- Converted tensor mean: 15.110160
- Mean difference: 64.91127777
- Maximum pointwise difference: 91.32544708
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -69.91543579, Converted: 21.41001320
- Biggest difference in row (0, 0), sum -398.408966 vs 120.881279
- Layer 15, Token 23 (model.layers.out comparison):
- Original tensor sum: -754.637085
- Converted tensor sum: 262.993530
- Original tensor mean: -94.329636
- Converted tensor mean: 32.874191
- Mean difference: 127.20383453
- Maximum pointwise difference: 157.75305176
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -119.83902740, Converted: 37.91403198
- Biggest difference in row (0, 0), sum -754.637085 vs 262.993530
- Layer 0, Token 24 (model.layers.out comparison):
- Original tensor sum: 14.859251
- Converted tensor sum: 2.731961
- Original tensor mean: 1.857406
- Converted tensor mean: 0.341495
- Mean difference: 4.21605587
- Maximum pointwise difference: 9.80887794
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 3.86449504, Converted: -5.94438314
- Biggest difference in row (0, 0), sum 14.859251 vs 2.731961
- Layer 1, Token 24 (model.layers.out comparison):
- Original tensor sum: 13.986740
- Converted tensor sum: -2.697716
- Original tensor mean: 1.748343
- Converted tensor mean: -0.337215
- Mean difference: 4.70033360
- Maximum pointwise difference: 10.86390495
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 3.54197407, Converted: -7.32193136
- Biggest difference in row (0, 0), sum 13.986740 vs -2.697716
- Layer 2, Token 24 (model.layers.out comparison):
- Original tensor sum: 13.856454
- Converted tensor sum: -1.915652
- Original tensor mean: 1.732057
- Converted tensor mean: -0.239456
- Mean difference: 5.10369968
- Maximum pointwise difference: 13.13724899
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 3.00663447, Converted: -10.13061428
- Biggest difference in row (0, 0), sum 13.856454 vs -1.915652
- Layer 3, Token 24 (model.layers.out comparison):
- Original tensor sum: 63.979485
- Converted tensor sum: -50.051231
- Original tensor mean: 7.997436
- Converted tensor mean: -6.256404
- Mean difference: 14.25383949
- Maximum pointwise difference: 25.59371948
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 9.45009327, Converted: -16.14362717
- Biggest difference in row (0, 0), sum 63.979485 vs -50.051231
- Layer 4, Token 24 (model.layers.out comparison):
- Original tensor sum: 60.174347
- Converted tensor sum: -64.423790
- Original tensor mean: 7.521793
- Converted tensor mean: -8.052974
- Mean difference: 15.57476616
- Maximum pointwise difference: 27.42375755
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 8.99039078, Converted: -18.43336678
- Biggest difference in row (0, 0), sum 60.174347 vs -64.423790
- Layer 5, Token 24 (model.layers.out comparison):
- Original tensor sum: 53.195156
- Converted tensor sum: -88.183350
- Original tensor mean: 6.649395
- Converted tensor mean: -11.022919
- Mean difference: 17.67231369
- Maximum pointwise difference: 35.18456650
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 7.48332596, Converted: -27.70124054
- Biggest difference in row (0, 0), sum 53.195156 vs -88.183350
- Layer 6, Token 24 (model.layers.out comparison):
- Original tensor sum: 55.262775
- Converted tensor sum: -106.113434
- Original tensor mean: 6.907847
- Converted tensor mean: -13.264179
- Mean difference: 20.17202759
- Maximum pointwise difference: 40.46305084
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 8.41111183, Converted: -32.05193710
- Biggest difference in row (0, 0), sum 55.262775 vs -106.113434
- Layer 7, Token 24 (model.layers.out comparison):
- Original tensor sum: 120.454941
- Converted tensor sum: -239.645325
- Original tensor mean: 15.056868
- Converted tensor mean: -29.955666
- Mean difference: 45.01253128
- Maximum pointwise difference: 65.79338074
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 14.88038158, Converted: -50.91299820
- Biggest difference in row (0, 0), sum 120.454941 vs -239.645325
- Layer 8, Token 24 (model.layers.out comparison):
- Original tensor sum: 103.648430
- Converted tensor sum: -223.958084
- Original tensor mean: 12.956054
- Converted tensor mean: -27.994761
- Mean difference: 40.95081329
- Maximum pointwise difference: 71.07021332
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 13.01342583, Converted: -58.05678558
- Biggest difference in row (0, 0), sum 103.648430 vs -223.958084
- Layer 9, Token 24 (model.layers.out comparison):
- Original tensor sum: 90.361565
- Converted tensor sum: -216.935654
- Original tensor mean: 11.295196
- Converted tensor mean: -27.116957
- Mean difference: 38.41215515
- Maximum pointwise difference: 69.46690369
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 9.60147953, Converted: -59.86542511
- Biggest difference in row (0, 0), sum 90.361565 vs -216.935654
- Layer 10, Token 24 (model.layers.out comparison):
- Original tensor sum: 83.880753
- Converted tensor sum: -215.275970
- Original tensor mean: 10.485094
- Converted tensor mean: -26.909496
- Mean difference: 37.39459229
- Maximum pointwise difference: 70.35929108
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 8.32141781, Converted: -62.03787613
- Biggest difference in row (0, 0), sum 83.880753 vs -215.275970
- Layer 11, Token 24 (model.layers.out comparison):
- Original tensor sum: 169.893204
- Converted tensor sum: -521.842712
- Original tensor mean: 21.236650
- Converted tensor mean: -65.230339
- Mean difference: 86.46699524
- Maximum pointwise difference: 124.57461548
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 19.84806633, Converted: -104.72654724
- Biggest difference in row (0, 0), sum 169.893204 vs -521.842712
- Layer 12, Token 24 (model.layers.out comparison):
- Original tensor sum: 170.650391
- Converted tensor sum: -527.495605
- Original tensor mean: 21.331299
- Converted tensor mean: -65.936951
- Mean difference: 87.26824951
- Maximum pointwise difference: 124.01423645
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 20.41718483, Converted: -103.59705353
- Biggest difference in row (0, 0), sum 170.650391 vs -527.495605
- Layer 13, Token 24 (model.layers.out comparison):
- Original tensor sum: 167.707260
- Converted tensor sum: -525.824341
- Original tensor mean: 20.963408
- Converted tensor mean: -65.728043
- Mean difference: 86.69145203
- Maximum pointwise difference: 120.31568909
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 18.97763062, Converted: -101.33805847
- Biggest difference in row (0, 0), sum 167.707260 vs -525.824341
- Layer 14, Token 24 (model.layers.out comparison):
- Original tensor sum: 160.910034
- Converted tensor sum: -562.698975
- Original tensor mean: 20.113754
- Converted tensor mean: -70.337372
- Mean difference: 90.45112610
- Maximum pointwise difference: 127.80590057
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 17.37784767, Converted: -110.42805481
- Biggest difference in row (0, 0), sum 160.910034 vs -562.698975
- Layer 15, Token 24 (model.layers.out comparison):
- Original tensor sum: 306.123810
- Converted tensor sum: -931.621094
- Original tensor mean: 38.265476
- Converted tensor mean: -116.452637
- Mean difference: 154.71810913
- Maximum pointwise difference: 176.81520081
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 29.99453545, Converted: -146.82066345
- Biggest difference in row (0, 0), sum 306.123810 vs -931.621094
- Layer 0, Token 25 (model.layers.out comparison):
- Original tensor sum: -6.641135
- Converted tensor sum: -3.933383
- Original tensor mean: -0.830142
- Converted tensor mean: -0.491673
- Mean difference: 3.03462601
- Maximum pointwise difference: 5.75030708
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -6.01051331, Converted: -0.26020634
- Biggest difference in row (0, 0), sum -6.641135 vs -3.933383
- Layer 1, Token 25 (model.layers.out comparison):
- Original tensor sum: -1.642994
- Converted tensor sum: -11.347046
- Original tensor mean: -0.205374
- Converted tensor mean: -1.418381
- Mean difference: 2.82665110
- Maximum pointwise difference: 5.44076443
- Max difference location: (0, 0, 3)
- Values at max diff - Original: -3.67477202, Converted: -9.11553669
- Biggest difference in row (0, 0), sum -1.642994 vs -11.347046
- Layer 2, Token 25 (model.layers.out comparison):
- Original tensor sum: 6.404377
- Converted tensor sum: -14.681939
- Original tensor mean: 0.800547
- Converted tensor mean: -1.835242
- Mean difference: 3.35868859
- Maximum pointwise difference: 7.97232580
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 5.46229649, Converted: -2.51002932
- Biggest difference in row (0, 0), sum 6.404377 vs -14.681939
- Layer 3, Token 25 (model.layers.out comparison):
- Original tensor sum: 73.178505
- Converted tensor sum: -57.235046
- Original tensor mean: 9.147313
- Converted tensor mean: -7.154381
- Mean difference: 16.30169487
- Maximum pointwise difference: 20.31940651
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 6.96401119, Converted: -13.35539532
- Biggest difference in row (0, 0), sum 73.178505 vs -57.235046
- Layer 4, Token 25 (model.layers.out comparison):
- Original tensor sum: 65.662933
- Converted tensor sum: -75.145912
- Original tensor mean: 8.207867
- Converted tensor mean: -9.393239
- Mean difference: 17.60110474
- Maximum pointwise difference: 25.96934509
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 5.62515926, Converted: -20.34418488
- Biggest difference in row (0, 0), sum 65.662933 vs -75.145912
- Layer 5, Token 25 (model.layers.out comparison):
- Original tensor sum: 54.107101
- Converted tensor sum: -105.733917
- Original tensor mean: 6.763388
- Converted tensor mean: -13.216740
- Mean difference: 19.98012924
- Maximum pointwise difference: 28.99731064
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 10.49883652, Converted: -18.49847412
- Biggest difference in row (0, 0), sum 54.107101 vs -105.733917
- Layer 6, Token 25 (model.layers.out comparison):
- Original tensor sum: 48.177361
- Converted tensor sum: -134.772308
- Original tensor mean: 6.022170
- Converted tensor mean: -16.846539
- Mean difference: 22.86870766
- Maximum pointwise difference: 36.34035110
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 2.22752476, Converted: -34.11282730
- Biggest difference in row (0, 0), sum 48.177361 vs -134.772308
- Layer 7, Token 25 (model.layers.out comparison):
- Original tensor sum: 111.839172
- Converted tensor sum: -277.301056
- Original tensor mean: 13.979897
- Converted tensor mean: -34.662632
- Mean difference: 48.64252853
- Maximum pointwise difference: 62.89208221
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 9.78997040, Converted: -53.10211182
- Biggest difference in row (0, 0), sum 111.839172 vs -277.301056
- Layer 8, Token 25 (model.layers.out comparison):
- Original tensor sum: 104.861267
- Converted tensor sum: -286.217560
- Original tensor mean: 13.107658
- Converted tensor mean: -35.777195
- Mean difference: 48.88484955
- Maximum pointwise difference: 65.30915833
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 20.24993896, Converted: -45.05921936
- Biggest difference in row (0, 0), sum 104.861267 vs -286.217560
- Layer 9, Token 25 (model.layers.out comparison):
- Original tensor sum: 96.630295
- Converted tensor sum: -313.393005
- Original tensor mean: 12.078787
- Converted tensor mean: -39.174126
- Mean difference: 51.25291061
- Maximum pointwise difference: 67.83577728
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 19.77431297, Converted: -48.06146622
- Biggest difference in row (0, 0), sum 96.630295 vs -313.393005
- Layer 10, Token 25 (model.layers.out comparison):
- Original tensor sum: 89.098160
- Converted tensor sum: -316.188721
- Original tensor mean: 11.137270
- Converted tensor mean: -39.523590
- Mean difference: 50.66085815
- Maximum pointwise difference: 63.01490784
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 18.63522339, Converted: -44.37968445
- Biggest difference in row (0, 0), sum 89.098160 vs -316.188721
- Layer 11, Token 25 (model.layers.out comparison):
- Original tensor sum: 183.329193
- Converted tensor sum: -640.859741
- Original tensor mean: 22.916149
- Converted tensor mean: -80.107468
- Mean difference: 103.02362061
- Maximum pointwise difference: 123.61917114
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 28.08130074, Converted: -95.53787231
- Biggest difference in row (0, 0), sum 183.329193 vs -640.859741
- Layer 12, Token 25 (model.layers.out comparison):
- Original tensor sum: 183.012512
- Converted tensor sum: -647.243774
- Original tensor mean: 22.876564
- Converted tensor mean: -80.905472
- Mean difference: 103.78203583
- Maximum pointwise difference: 121.95301819
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 28.78862381, Converted: -93.16439056
- Biggest difference in row (0, 0), sum 183.012512 vs -647.243774
- Layer 13, Token 25 (model.layers.out comparison):
- Original tensor sum: 179.038055
- Converted tensor sum: -675.284363
- Original tensor mean: 22.379757
- Converted tensor mean: -84.410545
- Mean difference: 106.79029846
- Maximum pointwise difference: 124.18766785
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 29.24967384, Converted: -94.93799591
- Biggest difference in row (0, 0), sum 179.038055 vs -675.284363
- Layer 14, Token 25 (model.layers.out comparison):
- Original tensor sum: 177.600830
- Converted tensor sum: -653.687622
- Original tensor mean: 22.200104
- Converted tensor mean: -81.710953
- Mean difference: 103.91105652
- Maximum pointwise difference: 120.82553864
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 21.10656929, Converted: -99.71897125
- Biggest difference in row (0, 0), sum 177.600830 vs -653.687622
- Layer 15, Token 25 (model.layers.out comparison):
- Original tensor sum: 323.013031
- Converted tensor sum: -1030.671143
- Original tensor mean: 40.376629
- Converted tensor mean: -128.833893
- Mean difference: 169.21054077
- Maximum pointwise difference: 193.25675964
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 50.33515549, Converted: -142.92160034
- Biggest difference in row (0, 0), sum 323.013031 vs -1030.671143
- Layer 0, Token 26 (model.layers.out comparison):
- Original tensor sum: 65.941025
- Converted tensor sum: -21.309677
- Original tensor mean: 8.242628
- Converted tensor mean: -2.663710
- Mean difference: 10.92460823
- Maximum pointwise difference: 22.60500336
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 19.03843307, Converted: -3.56657028
- Biggest difference in row (0, 0), sum 65.941025 vs -21.309677
- Layer 1, Token 26 (model.layers.out comparison):
- Original tensor sum: 52.076649
- Converted tensor sum: -57.925156
- Original tensor mean: 6.509581
- Converted tensor mean: -7.240644
- Mean difference: 14.23825073
- Maximum pointwise difference: 19.17949104
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 5.37531137, Converted: -13.80417919
- Biggest difference in row (0, 0), sum 52.076649 vs -57.925156
- Layer 2, Token 26 (model.layers.out comparison):
- Original tensor sum: 51.231728
- Converted tensor sum: -47.847797
- Original tensor mean: 6.403966
- Converted tensor mean: -5.980975
- Mean difference: 12.38494110
- Maximum pointwise difference: 23.60085297
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 9.02445030, Converted: -14.57640362
- Biggest difference in row (0, 0), sum 51.231728 vs -47.847797
- Layer 3, Token 26 (model.layers.out comparison):
- Original tensor sum: 107.302612
- Converted tensor sum: -173.292923
- Original tensor mean: 13.412827
- Converted tensor mean: -21.661615
- Mean difference: 35.07444000
- Maximum pointwise difference: 43.60850143
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 14.85190392, Converted: -28.75659752
- Biggest difference in row (0, 0), sum 107.302612 vs -173.292923
- Layer 4, Token 26 (model.layers.out comparison):
- Original tensor sum: 97.273697
- Converted tensor sum: -182.550171
- Original tensor mean: 12.159212
- Converted tensor mean: -22.818771
- Mean difference: 34.97798157
- Maximum pointwise difference: 46.59681320
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 14.26772594, Converted: -32.32908630
- Biggest difference in row (0, 0), sum 97.273697 vs -182.550171
- Layer 5, Token 26 (model.layers.out comparison):
- Original tensor sum: 85.259064
- Converted tensor sum: -172.859528
- Original tensor mean: 10.657383
- Converted tensor mean: -21.607441
- Mean difference: 32.26482391
- Maximum pointwise difference: 44.72983170
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 13.95336819, Converted: -30.77646255
- Biggest difference in row (0, 0), sum 85.259064 vs -172.859528
- Layer 6, Token 26 (model.layers.out comparison):
- Original tensor sum: 87.096161
- Converted tensor sum: -208.315033
- Original tensor mean: 10.887020
- Converted tensor mean: -26.039379
- Mean difference: 36.92639923
- Maximum pointwise difference: 45.54611206
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 14.15797043, Converted: -31.38814354
- Biggest difference in row (0, 0), sum 87.096161 vs -208.315033
- Layer 7, Token 26 (model.layers.out comparison):
- Original tensor sum: 160.905060
- Converted tensor sum: -356.607910
- Original tensor mean: 20.113132
- Converted tensor mean: -44.575989
- Mean difference: 64.68911743
- Maximum pointwise difference: 73.27433014
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 19.29874229, Converted: -53.97558594
- Biggest difference in row (0, 0), sum 160.905060 vs -356.607910
- Layer 8, Token 26 (model.layers.out comparison):
- Original tensor sum: 147.546188
- Converted tensor sum: -372.627655
- Original tensor mean: 18.443274
- Converted tensor mean: -46.578457
- Mean difference: 65.02172852
- Maximum pointwise difference: 75.06597900
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 17.32047462, Converted: -57.74550629
- Biggest difference in row (0, 0), sum 147.546188 vs -372.627655
- Layer 9, Token 26 (model.layers.out comparison):
- Original tensor sum: 142.108231
- Converted tensor sum: -384.533997
- Original tensor mean: 17.763529
- Converted tensor mean: -48.066750
- Mean difference: 65.83027649
- Maximum pointwise difference: 80.39822388
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 9.57865334, Converted: -70.81957245
- Biggest difference in row (0, 0), sum 142.108231 vs -384.533997
- Layer 10, Token 26 (model.layers.out comparison):
- Original tensor sum: 136.597595
- Converted tensor sum: -406.001617
- Original tensor mean: 17.074699
- Converted tensor mean: -50.750202
- Mean difference: 67.82489777
- Maximum pointwise difference: 83.06503296
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 16.25280952, Converted: -66.81222534
- Biggest difference in row (0, 0), sum 136.597595 vs -406.001617
- Layer 11, Token 26 (model.layers.out comparison):
- Original tensor sum: 234.238876
- Converted tensor sum: -719.742371
- Original tensor mean: 29.279860
- Converted tensor mean: -89.967796
- Mean difference: 119.24765778
- Maximum pointwise difference: 144.35720825
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 22.54579163, Converted: -121.81141663
- Biggest difference in row (0, 0), sum 234.238876 vs -719.742371
- Layer 12, Token 26 (model.layers.out comparison):
- Original tensor sum: 230.967987
- Converted tensor sum: -737.411499
- Original tensor mean: 28.870998
- Converted tensor mean: -92.176437
- Mean difference: 121.04743958
- Maximum pointwise difference: 145.76480103
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 23.33647728, Converted: -122.42832184
- Biggest difference in row (0, 0), sum 230.967987 vs -737.411499
- Layer 13, Token 26 (model.layers.out comparison):
- Original tensor sum: 225.836136
- Converted tensor sum: -743.471008
- Original tensor mean: 28.229517
- Converted tensor mean: -92.933876
- Mean difference: 121.16339111
- Maximum pointwise difference: 141.17944336
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 23.16177559, Converted: -118.01766205
- Biggest difference in row (0, 0), sum 225.836136 vs -743.471008
- Layer 14, Token 26 (model.layers.out comparison):
- Original tensor sum: 222.057236
- Converted tensor sum: -845.007874
- Original tensor mean: 27.757154
- Converted tensor mean: -105.625984
- Mean difference: 133.38313293
- Maximum pointwise difference: 164.57283020
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 29.71310997, Converted: -134.85972595
- Biggest difference in row (0, 0), sum 222.057236 vs -845.007874
- Layer 15, Token 26 (model.layers.out comparison):
- Original tensor sum: 366.139526
- Converted tensor sum: -1227.681152
- Original tensor mean: 45.767441
- Converted tensor mean: -153.460144
- Mean difference: 199.22756958
- Maximum pointwise difference: 235.55526733
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 46.65935516, Converted: -188.89590454
- Biggest difference in row (0, 0), sum 366.139526 vs -1227.681152
- Layer 0, Token 27 (model.layers.out comparison):
- Original tensor sum: 0.538792
- Converted tensor sum: -2.767126
- Original tensor mean: 0.067349
- Converted tensor mean: -0.345891
- Mean difference: 1.04583490
- Maximum pointwise difference: 4.03163290
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 4.54428434, Converted: 0.51265144
- Biggest difference in row (0, 0), sum 0.538792 vs -2.767126
- Layer 1, Token 27 (model.layers.out comparison):
- Original tensor sum: -13.666726
- Converted tensor sum: 4.859785
- Original tensor mean: -1.708341
- Converted tensor mean: 0.607473
- Mean difference: 3.73808312
- Maximum pointwise difference: 11.04657841
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -6.84830761, Converted: 4.19827080
- Biggest difference in row (0, 0), sum -13.666726 vs 4.859785
- Layer 2, Token 27 (model.layers.out comparison):
- Original tensor sum: 19.892342
- Converted tensor sum: 18.553621
- Original tensor mean: 2.486543
- Converted tensor mean: 2.319203
- Mean difference: 3.86019540
- Maximum pointwise difference: 12.85380554
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -5.59446335, Converted: 7.25934219
- Biggest difference in row (0, 0), sum 19.892342 vs 18.553621
- Layer 3, Token 27 (model.layers.out comparison):
- Original tensor sum: 84.246483
- Converted tensor sum: 49.827652
- Original tensor mean: 10.530810
- Converted tensor mean: 6.228456
- Mean difference: 6.56024361
- Maximum pointwise difference: 11.30776882
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 18.61387444, Converted: 7.30610561
- Biggest difference in row (0, 0), sum 84.246483 vs 49.827652
- Layer 4, Token 27 (model.layers.out comparison):
- Original tensor sum: 72.374397
- Converted tensor sum: 50.589382
- Original tensor mean: 9.046800
- Converted tensor mean: 6.323673
- Mean difference: 5.51325321
- Maximum pointwise difference: 11.16050529
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -2.93389368, Converted: 8.22661209
- Biggest difference in row (0, 0), sum 72.374397 vs 50.589382
- Layer 5, Token 27 (model.layers.out comparison):
- Original tensor sum: 68.200790
- Converted tensor sum: 51.359711
- Original tensor mean: 8.525099
- Converted tensor mean: 6.419964
- Mean difference: 4.32947350
- Maximum pointwise difference: 8.89735222
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -3.28547406, Converted: 5.61187792
- Biggest difference in row (0, 0), sum 68.200790 vs 51.359711
- Layer 6, Token 27 (model.layers.out comparison):
- Original tensor sum: 70.421684
- Converted tensor sum: 41.851700
- Original tensor mean: 8.802711
- Converted tensor mean: 5.231462
- Mean difference: 5.60544014
- Maximum pointwise difference: 9.42855549
- Max difference location: (0, 0, 2)
- Values at max diff - Original: 15.64872551, Converted: 6.22017002
- Biggest difference in row (0, 0), sum 70.421684 vs 41.851700
- Layer 7, Token 27 (model.layers.out comparison):
- Original tensor sum: 138.012558
- Converted tensor sum: 106.052734
- Original tensor mean: 17.251570
- Converted tensor mean: 13.256592
- Mean difference: 5.83357430
- Maximum pointwise difference: 9.46822166
- Max difference location: (0, 0, 4)
- Values at max diff - Original: 20.60037422, Converted: 11.13215256
- Biggest difference in row (0, 0), sum 138.012558 vs 106.052734
- Layer 8, Token 27 (model.layers.out comparison):
- Original tensor sum: 124.592545
- Converted tensor sum: 109.657555
- Original tensor mean: 15.574068
- Converted tensor mean: 13.707194
- Mean difference: 4.43112850
- Maximum pointwise difference: 10.25702190
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 3.63314724, Converted: 13.89016914
- Biggest difference in row (0, 0), sum 124.592545 vs 109.657555
- Layer 9, Token 27 (model.layers.out comparison):
- Original tensor sum: 110.794357
- Converted tensor sum: 109.277565
- Original tensor mean: 13.849295
- Converted tensor mean: 13.659696
- Mean difference: 4.23832560
- Maximum pointwise difference: 11.81062031
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 1.84443951, Converted: 13.65505981
- Biggest difference in row (0, 0), sum 110.794357 vs 109.277565
- Layer 10, Token 27 (model.layers.out comparison):
- Original tensor sum: 104.034340
- Converted tensor sum: 104.158554
- Original tensor mean: 13.004292
- Converted tensor mean: 13.019819
- Mean difference: 4.04881191
- Maximum pointwise difference: 12.64350224
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 0.44486341, Converted: 13.08836555
- Biggest difference in row (0, 0), sum 104.034340 vs 104.158554
- Layer 11, Token 27 (model.layers.out comparison):
- Original tensor sum: 194.747101
- Converted tensor sum: 186.990341
- Original tensor mean: 24.343388
- Converted tensor mean: 23.373793
- Mean difference: 4.42853832
- Maximum pointwise difference: 11.92787266
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 11.99839497, Converted: 23.92626762
- Biggest difference in row (0, 0), sum 194.747101 vs 186.990341
- Layer 12, Token 27 (model.layers.out comparison):
- Original tensor sum: 195.014465
- Converted tensor sum: 185.515793
- Original tensor mean: 24.376808
- Converted tensor mean: 23.189474
- Mean difference: 4.49333429
- Maximum pointwise difference: 11.10862160
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 11.92906380, Converted: 23.03768539
- Biggest difference in row (0, 0), sum 195.014465 vs 185.515793
- Layer 13, Token 27 (model.layers.out comparison):
- Original tensor sum: 187.897064
- Converted tensor sum: 182.353088
- Original tensor mean: 23.487133
- Converted tensor mean: 22.794136
- Mean difference: 4.64961338
- Maximum pointwise difference: 12.63825989
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 10.51706123, Converted: 23.15532112
- Biggest difference in row (0, 0), sum 187.897064 vs 182.353088
- Layer 14, Token 27 (model.layers.out comparison):
- Original tensor sum: 182.226410
- Converted tensor sum: 180.585373
- Original tensor mean: 22.778301
- Converted tensor mean: 22.573172
- Mean difference: 4.70111561
- Maximum pointwise difference: 12.44419956
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 11.31790829, Converted: 23.76210785
- Biggest difference in row (0, 0), sum 182.226410 vs 180.585373
- Layer 15, Token 27 (model.layers.out comparison):
- Original tensor sum: 333.560730
- Converted tensor sum: 318.274811
- Original tensor mean: 41.695091
- Converted tensor mean: 39.784351
- Mean difference: 4.67095470
- Maximum pointwise difference: 11.04085732
- Max difference location: (0, 0, 5)
- Values at max diff - Original: 28.01206779, Converted: 39.05292511
- Biggest difference in row (0, 0), sum 333.560730 vs 318.274811
- Layer 0, Token 28 (model.layers.out comparison):
- Original tensor sum: -40.607262
- Converted tensor sum: 42.743095
- Original tensor mean: -5.075908
- Converted tensor mean: 5.342887
- Mean difference: 11.17178345
- Maximum pointwise difference: 22.58385468
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -14.17651558, Converted: 8.40733814
- Biggest difference in row (0, 0), sum -40.607262 vs 42.743095
- Layer 1, Token 28 (model.layers.out comparison):
- Original tensor sum: -43.333393
- Converted tensor sum: 31.481144
- Original tensor mean: -5.416674
- Converted tensor mean: 3.935143
- Mean difference: 11.11242485
- Maximum pointwise difference: 18.85606575
- Max difference location: (0, 0, 5)
- Values at max diff - Original: -10.93557739, Converted: 7.92048883
- Biggest difference in row (0, 0), sum -43.333393 vs 31.481144
- Layer 2, Token 28 (model.layers.out comparison):
- Original tensor sum: -67.416214
- Converted tensor sum: 33.172539
- Original tensor mean: -8.427027
- Converted tensor mean: 4.146567
- Mean difference: 14.60656548
- Maximum pointwise difference: 20.67273331
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -13.93512535, Converted: 6.73760748
- Biggest difference in row (0, 0), sum -67.416214 vs 33.172539
- Layer 3, Token 28 (model.layers.out comparison):
- Original tensor sum: -199.361206
- Converted tensor sum: 72.683899
- Original tensor mean: -24.920151
- Converted tensor mean: 9.085487
- Mean difference: 34.00563812
- Maximum pointwise difference: 41.37638092
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -29.60864067, Converted: 11.76773930
- Biggest difference in row (0, 0), sum -199.361206 vs 72.683899
- Layer 4, Token 28 (model.layers.out comparison):
- Original tensor sum: -137.055893
- Converted tensor sum: 63.596687
- Original tensor mean: -17.131987
- Converted tensor mean: 7.949586
- Mean difference: 25.75262260
- Maximum pointwise difference: 40.96822739
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -29.79143906, Converted: 11.17678833
- Biggest difference in row (0, 0), sum -137.055893 vs 63.596687
- Layer 5, Token 28 (model.layers.out comparison):
- Original tensor sum: -73.715279
- Converted tensor sum: 62.123581
- Original tensor mean: -9.214410
- Converted tensor mean: 7.765448
- Mean difference: 17.84333420
- Maximum pointwise difference: 31.90785027
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -24.06629181, Converted: 7.84155846
- Biggest difference in row (0, 0), sum -73.715279 vs 62.123581
- Layer 6, Token 28 (model.layers.out comparison):
- Original tensor sum: -126.770874
- Converted tensor sum: 61.464096
- Original tensor mean: -15.846359
- Converted tensor mean: 7.683012
- Mean difference: 23.61796379
- Maximum pointwise difference: 36.02120209
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -27.60279655, Converted: 8.41840744
- Biggest difference in row (0, 0), sum -126.770874 vs 61.464096
- Layer 7, Token 28 (model.layers.out comparison):
- Original tensor sum: -254.607422
- Converted tensor sum: 126.028885
- Original tensor mean: -31.825928
- Converted tensor mean: 15.753611
- Mean difference: 47.57954025
- Maximum pointwise difference: 61.42348480
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -40.33562851, Converted: 21.08785439
- Biggest difference in row (0, 0), sum -254.607422 vs 126.028885
- Layer 8, Token 28 (model.layers.out comparison):
- Original tensor sum: -198.536194
- Converted tensor sum: 120.381157
- Original tensor mean: -24.817024
- Converted tensor mean: 15.047645
- Mean difference: 39.86466980
- Maximum pointwise difference: 52.24274063
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -30.55666733, Converted: 21.68607330
- Biggest difference in row (0, 0), sum -198.536194 vs 120.381157
- Layer 9, Token 28 (model.layers.out comparison):
- Original tensor sum: -203.318542
- Converted tensor sum: 118.674896
- Original tensor mean: -25.414818
- Converted tensor mean: 14.834362
- Mean difference: 40.24917984
- Maximum pointwise difference: 51.74636078
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -36.37534714, Converted: 15.37101555
- Biggest difference in row (0, 0), sum -203.318542 vs 118.674896
- Layer 10, Token 28 (model.layers.out comparison):
- Original tensor sum: -173.929123
- Converted tensor sum: 115.971573
- Original tensor mean: -21.741140
- Converted tensor mean: 14.496447
- Mean difference: 36.23758698
- Maximum pointwise difference: 47.99763489
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -32.96516418, Converted: 15.03247166
- Biggest difference in row (0, 0), sum -173.929123 vs 115.971573
- Layer 11, Token 28 (model.layers.out comparison):
- Original tensor sum: -450.842834
- Converted tensor sum: 202.799988
- Original tensor mean: -56.355354
- Converted tensor mean: 25.349998
- Mean difference: 81.70535278
- Maximum pointwise difference: 92.55924988
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -66.56226349, Converted: 25.99698830
- Biggest difference in row (0, 0), sum -450.842834 vs 202.799988
- Layer 12, Token 28 (model.layers.out comparison):
- Original tensor sum: -483.456177
- Converted tensor sum: 204.607147
- Original tensor mean: -60.432022
- Converted tensor mean: 25.575893
- Mean difference: 86.00791931
- Maximum pointwise difference: 97.30514526
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -70.58811951, Converted: 26.71702957
- Biggest difference in row (0, 0), sum -483.456177 vs 204.607147
- Layer 13, Token 28 (model.layers.out comparison):
- Original tensor sum: -487.978210
- Converted tensor sum: 194.803741
- Original tensor mean: -60.997276
- Converted tensor mean: 24.350468
- Mean difference: 85.34774780
- Maximum pointwise difference: 97.00595093
- Max difference location: (0, 0, 6)
- Values at max diff - Original: -71.41757965, Converted: 25.58836937
- Biggest difference in row (0, 0), sum -487.978210 vs 194.803741
- Layer 14, Token 28 (model.layers.out comparison):
- Original tensor sum: -487.676697
- Converted tensor sum: 192.080292
- Original tensor mean: -60.959587
- Converted tensor mean: 24.010036
- Mean difference: 84.96962738
- Maximum pointwise difference: 101.62533569
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -75.57343292, Converted: 26.05190277
- Biggest difference in row (0, 0), sum -487.676697 vs 192.080292
- Layer 15, Token 28 (model.layers.out comparison):
- Original tensor sum: -826.685791
- Converted tensor sum: 324.333130
- Original tensor mean: -103.335724
- Converted tensor mean: 40.541641
- Mean difference: 143.87736511
- Maximum pointwise difference: 160.84576416
- Max difference location: (0, 0, 4)
- Values at max diff - Original: -111.58706665, Converted: 49.25869751
- Biggest difference in row (0, 0), sum -826.685791 vs 324.333130
- Layer 0, Token 29 (model.layers.out comparison):
- Original tensor sum: -7.335809
- Converted tensor sum: 5.924038
- Original tensor mean: -0.916976
- Converted tensor mean: 0.740505
- Mean difference: 2.81220579
- Maximum pointwise difference: 5.74731255
- Max difference location: (0, 0, 2)
- Values at max diff - Original: -3.16068745, Converted: 2.58662534
- Biggest difference in row (0, 0), sum -7.335809 vs 5.924038
- Layer 1, Token 29 (model.layers.out comparison):
- Original tensor sum: -4.554134
- Converted tensor sum: 7.198357
- Original tensor mean: -0.569267
- Converted tensor mean: 0.899795
- Mean difference: 4.59539890
- Maximum pointwise difference: 12.22019768
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -9.41592121, Converted: 2.80427670
- Biggest difference in row (0, 0), sum -4.554134 vs 7.198357
- Layer 2, Token 29 (model.layers.out comparison):
- Original tensor sum: 18.821238
- Converted tensor sum: -2.444355
- Original tensor mean: 2.352655
- Converted tensor mean: -0.305544
- Mean difference: 5.75418472
- Maximum pointwise difference: 9.27616215
- Max difference location: (0, 0, 1)
- Values at max diff - Original: -6.88754845, Converted: 2.38861346
- Biggest difference in row (0, 0), sum 18.821238 vs -2.444355
- Layer 3, Token 29 (model.layers.out comparison):
- Original tensor sum: 70.965004
- Converted tensor sum: -68.014175
- Original tensor mean: 8.870625
- Converted tensor mean: -8.501772
- Mean difference: 17.37239647
- Maximum pointwise difference: 24.10712433
- Max difference location: (0, 0, 3)
- Values at max diff - Original: 16.14313126, Converted: -7.96399307
- Biggest difference in row (0, 0), sum 70.965004 vs -68.014175
- Layer 4, Token 29 (model.layers.out comparison):
- Original tensor sum: 62.607174
- Converted tensor sum: -17.623362
- Original tensor mean: 7.825897
- Converted tensor mean: -2.202920
- Mean difference: 10.34164429
- Maximum pointwise difference: 18.22177315
- Max difference location: (0, 0, 0)
- Values at max diff - Original: 8.43466568, Converted: -9.78710747
- Biggest difference in row (0, 0), sum 62.607174 vs -17.623362
- Layer 5, Token 29 (model.layers.out comparison):
- Original tensor sum: 52.727810
- Converted tensor sum: 1.219590
- Original tensor mean: 6.590976
- Converted tensor mean: 0.152449
- Mean difference: 7.65116024
- Maximum pointwise difference: 18.62134933
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 9.17143154, Converted: -9.44991875
- Biggest difference in row (0, 0), sum 52.727810 vs 1.219590
- Layer 6, Token 29 (model.layers.out comparison):
- Original tensor sum: 56.382370
- Converted tensor sum: 4.153158
- Original tensor mean: 7.047796
- Converted tensor mean: 0.519145
- Mean difference: 7.41536808
- Maximum pointwise difference: 17.59628677
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 10.29856682, Converted: -7.29772091
- Biggest difference in row (0, 0), sum 56.382370 vs 4.153158
- Layer 7, Token 29 (model.layers.out comparison):
- Original tensor sum: 136.310486
- Converted tensor sum: 3.958838
- Original tensor mean: 17.038811
- Converted tensor mean: 0.494855
- Mean difference: 16.54395676
- Maximum pointwise difference: 26.66838837
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 18.93185425, Converted: -7.73653507
- Biggest difference in row (0, 0), sum 136.310486 vs 3.958838
- Layer 8, Token 29 (model.layers.out comparison):
- Original tensor sum: 119.467941
- Converted tensor sum: 9.372761
- Original tensor mean: 14.933493
- Converted tensor mean: 1.171595
- Mean difference: 13.76189804
- Maximum pointwise difference: 22.09118652
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 20.88569450, Converted: -1.20549154
- Biggest difference in row (0, 0), sum 119.467941 vs 9.372761
- Layer 9, Token 29 (model.layers.out comparison):
- Original tensor sum: 111.468323
- Converted tensor sum: 12.752249
- Original tensor mean: 13.933540
- Converted tensor mean: 1.594031
- Mean difference: 12.36562347
- Maximum pointwise difference: 19.99691391
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 20.75084877, Converted: 0.75393468
- Biggest difference in row (0, 0), sum 111.468323 vs 12.752249
- Layer 10, Token 29 (model.layers.out comparison):
- Original tensor sum: 103.290207
- Converted tensor sum: 4.031506
- Original tensor mean: 12.911276
- Converted tensor mean: 0.503938
- Mean difference: 12.90593433
- Maximum pointwise difference: 20.97147560
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 20.37113762, Converted: -0.60033715
- Biggest difference in row (0, 0), sum 103.290207 vs 4.031506
- Layer 11, Token 29 (model.layers.out comparison):
- Original tensor sum: 195.291718
- Converted tensor sum: 60.566498
- Original tensor mean: 24.411465
- Converted tensor mean: 7.570812
- Mean difference: 16.84065247
- Maximum pointwise difference: 26.14917755
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 32.22053146, Converted: 6.07135296
- Biggest difference in row (0, 0), sum 195.291718 vs 60.566498
- Layer 12, Token 29 (model.layers.out comparison):
- Original tensor sum: 193.868057
- Converted tensor sum: 56.865105
- Original tensor mean: 24.233507
- Converted tensor mean: 7.108138
- Mean difference: 17.12537003
- Maximum pointwise difference: 27.68391991
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 25.67190361, Converted: -2.01201606
- Biggest difference in row (0, 0), sum 193.868057 vs 56.865105
- Layer 13, Token 29 (model.layers.out comparison):
- Original tensor sum: 191.697586
- Converted tensor sum: 55.096077
- Original tensor mean: 23.962198
- Converted tensor mean: 6.887010
- Mean difference: 17.07518768
- Maximum pointwise difference: 27.05913162
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 24.89521027, Converted: -2.16392159
- Biggest difference in row (0, 0), sum 191.697586 vs 55.096077
- Layer 14, Token 29 (model.layers.out comparison):
- Original tensor sum: 188.843628
- Converted tensor sum: 53.397236
- Original tensor mean: 23.605453
- Converted tensor mean: 6.674654
- Mean difference: 16.93079758
- Maximum pointwise difference: 25.94162941
- Max difference location: (0, 0, 6)
- Values at max diff - Original: 23.64732933, Converted: -2.29430056
- Biggest difference in row (0, 0), sum 188.843628 vs 53.397236
- Layer 15, Token 29 (model.layers.out comparison):
- Original tensor sum: 336.074646
- Converted tensor sum: 200.162903
- Original tensor mean: 42.009331
- Converted tensor mean: 25.020363
- Mean difference: 16.98896790
- Maximum pointwise difference: 25.90124702
- Max difference location: (0, 0, 7)
- Values at max diff - Original: 47.47709274, Converted: 21.57584572
- Biggest difference in row (0, 0), sum 336.074646 vs 200.162903
- Layer 0, Token 30 (model.layers.out comparison):
- Original tensor sum: 17.017063
- Converted tensor sum: 23.545963
- Original tensor mean: 2.127133
- Converted tensor mean: 2.943245
- Mean difference: 2.51119232
- Maximum pointwise difference: 4.74783516
- Max difference location: (0, 0, 7)
- Values at max diff - Original: -3.57869840, Converted: 1.16913700
- Biggest difference in row (0, 0), sum 17.017063 vs 23.545963
- Layer 1, Token 30 (model.layers.out comparison):
- Original tensor sum: 20.432869
- Converted tensor sum: 19.928423
- Original tensor mean: 2.554109
- Converted tensor mean: 2.491053
- Mean difference: 3.21921587
- Maximum pointwise difference: 5.61581087
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 4.57620192, Converted: -1.03960896
- Biggest difference in row (0, 0), sum 20.432869 vs 19.928423
- Layer 2, Token 30 (model.layers.out comparison):
- Original tensor sum: 28.017879
- Converted tensor sum: 17.077301
- Original tensor mean: 3.502235
- Converted tensor mean: 2.134663
- Mean difference: 3.63509035
- Maximum pointwise difference: 8.41316605
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 6.66613007, Converted: -1.74703574
- Biggest difference in row (0, 0), sum 28.017879 vs 17.077301
- Layer 3, Token 30 (model.layers.out comparison):
- Original tensor sum: 85.620071
- Converted tensor sum: 45.387245
- Original tensor mean: 10.702509
- Converted tensor mean: 5.673406
- Mean difference: 5.25029612
- Maximum pointwise difference: 14.27389336
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 17.08827591, Converted: 2.81438255
- Biggest difference in row (0, 0), sum 85.620071 vs 45.387245
- Layer 4, Token 30 (model.layers.out comparison):
- Original tensor sum: 76.943909
- Converted tensor sum: 38.849068
- Original tensor mean: 9.617989
- Converted tensor mean: 4.856133
- Mean difference: 5.60086536
- Maximum pointwise difference: 14.69901657
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 17.17034531, Converted: 2.47132850
- Biggest difference in row (0, 0), sum 76.943909 vs 38.849068
- Layer 5, Token 30 (model.layers.out comparison):
- Original tensor sum: 59.381409
- Converted tensor sum: 29.835991
- Original tensor mean: 7.422676
- Converted tensor mean: 3.729499
- Mean difference: 4.96050739
- Maximum pointwise difference: 12.96257687
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 14.77257729, Converted: 1.81000042
- Biggest difference in row (0, 0), sum 59.381409 vs 29.835991
- Layer 6, Token 30 (model.layers.out comparison):
- Original tensor sum: 59.339882
- Converted tensor sum: 27.141592
- Original tensor mean: 7.417485
- Converted tensor mean: 3.392699
- Mean difference: 4.87107563
- Maximum pointwise difference: 14.00060558
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 15.80483246, Converted: 1.80422711
- Biggest difference in row (0, 0), sum 59.339882 vs 27.141592
- Layer 7, Token 30 (model.layers.out comparison):
- Original tensor sum: 131.503036
- Converted tensor sum: 91.997757
- Original tensor mean: 16.437880
- Converted tensor mean: 11.499720
- Mean difference: 5.33721828
- Maximum pointwise difference: 14.37581253
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 27.27588463, Converted: 12.90007210
- Biggest difference in row (0, 0), sum 131.503036 vs 91.997757
- Layer 8, Token 30 (model.layers.out comparison):
- Original tensor sum: 123.886139
- Converted tensor sum: 79.985909
- Original tensor mean: 15.485767
- Converted tensor mean: 9.998239
- Mean difference: 6.03210974
- Maximum pointwise difference: 16.31963348
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 26.14530563, Converted: 9.82567215
- Biggest difference in row (0, 0), sum 123.886139 vs 79.985909
- Layer 9, Token 30 (model.layers.out comparison):
- Original tensor sum: 118.487213
- Converted tensor sum: 61.110474
- Original tensor mean: 14.810902
- Converted tensor mean: 7.638809
- Mean difference: 7.17209244
- Maximum pointwise difference: 17.08554077
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 26.00649452, Converted: 8.92095280
- Biggest difference in row (0, 0), sum 118.487213 vs 61.110474
- Layer 10, Token 30 (model.layers.out comparison):
- Original tensor sum: 110.301559
- Converted tensor sum: 57.444092
- Original tensor mean: 13.787695
- Converted tensor mean: 7.180511
- Mean difference: 6.69173956
- Maximum pointwise difference: 18.18347359
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 26.85584831, Converted: 8.67237473
- Biggest difference in row (0, 0), sum 110.301559 vs 57.444092
- Layer 11, Token 30 (model.layers.out comparison):
- Original tensor sum: 209.603394
- Converted tensor sum: 163.279968
- Original tensor mean: 26.200424
- Converted tensor mean: 20.409996
- Mean difference: 6.23670197
- Maximum pointwise difference: 18.06859207
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 37.88209915, Converted: 19.81350708
- Biggest difference in row (0, 0), sum 209.603394 vs 163.279968
- Layer 12, Token 30 (model.layers.out comparison):
- Original tensor sum: 210.341476
- Converted tensor sum: 159.541199
- Original tensor mean: 26.292685
- Converted tensor mean: 19.942650
- Mean difference: 6.62348843
- Maximum pointwise difference: 17.79612160
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 37.21872330, Converted: 19.42260170
- Biggest difference in row (0, 0), sum 210.341476 vs 159.541199
- Layer 13, Token 30 (model.layers.out comparison):
- Original tensor sum: 206.045227
- Converted tensor sum: 156.530212
- Original tensor mean: 25.755653
- Converted tensor mean: 19.566277
- Mean difference: 6.46108055
- Maximum pointwise difference: 17.11543655
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 36.85726547, Converted: 19.74182892
- Biggest difference in row (0, 0), sum 206.045227 vs 156.530212
- Layer 14, Token 30 (model.layers.out comparison):
- Original tensor sum: 204.884491
- Converted tensor sum: 151.571396
- Original tensor mean: 25.610561
- Converted tensor mean: 18.946424
- Mean difference: 6.66413498
- Maximum pointwise difference: 18.41207695
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 37.42459488, Converted: 19.01251793
- Biggest difference in row (0, 0), sum 204.884491 vs 151.571396
- Layer 15, Token 30 (model.layers.out comparison):
- Original tensor sum: 358.352844
- Converted tensor sum: 289.552582
- Original tensor mean: 44.794106
- Converted tensor mean: 36.194073
- Mean difference: 8.60003757
- Maximum pointwise difference: 19.94306946
- Max difference location: (0, 0, 1)
- Values at max diff - Original: 55.22903824, Converted: 35.28596878
- Biggest difference in row (0, 0), sum 358.352844 vs 289.552582
- ================================================================================
- Comparing recurrent cache tensors...
- ================================================================================
- Layer 0, Token 1 (recurrent cache comparison):
- Original tensor sum: -3.317356
- Converted tensor sum: -3.317369
- Original tensor mean: -0.001037
- Converted tensor mean: -0.001037
- Mean difference: 0.00000005
- Maximum pointwise difference: 0.00000250
- Max difference location: (0, 4, 8, 1)
- Values at max diff - Original: -1.34675360, Converted: -1.34675610
- Biggest difference in row (0, 4, 3), sum -1.531199 vs -1.531201
- Original tensor:
- [[[[-0.01188182 0.00870434 -0.00525597 ... 0.01664828 0.0042294
- 0.01396134]
- [-0.00601455 0.00372374 0.00119549 ... -0.00689575 0.00234476
- -0.00023902]
- [ 0.12993637 -0.07801484 0.03047845 ... -0.05703255 -0.06261977
- -0.10933896]
- ...
- [-0.04649648 0.02312872 -0.00121024 ... -0.02114891 0.02579406
- 0.02258455]
- [-0.04175662 0.02266306 -0.0035618 ... -0.0084533 0.02211451
- 0.02416236]
- [ 0.02032246 -0.01281894 0.00930294 ... -0.02656155 -0.00984932
- -0.02582185]]
- [[ 0.01767723 0.01862493 0.00546727 ... 0.00556207 0.00562948
- 0.02792829]
- [ 0.00329595 0.00522457 0.00275346 ... 0.00801896 0.0103077
- -0.00079376]
- [-0.15666749 -0.19953263 -0.06468897 ... -0.12443222 -0.10325672
- -0.20960501]
- ...
- [ 0.04138051 0.06359718 0.02354327 ... 0.06241166 0.05219408
- 0.03928925]
- [ 0.04164674 0.06036352 0.02137833 ... 0.05146553 0.04422566
- 0.0441802 ]
- [-0.03129916 -0.03683262 -0.01027868 ... -0.01391416 -0.00729654
- -0.0505065 ]]
- [[-0.12362282 0.10214025 -0.01907291 ... -0.06202121 -0.10286148
- -0.04492377]
- [-0.0150543 0.08293391 -0.00673187 ... -0.00035791 -0.01116562
- -0.00036771]
- [-0.02004597 0.00927652 -0.00294111 ... -0.01171783 -0.01758975
- -0.00819483]
- ...
- [ 0.00270219 -0.04824698 0.00360209 ... -0.00234267 0.00216798
- -0.00194733]
- [ 0.01524375 -0.03120736 0.00455077 ... 0.00138342 0.01178958
- 0.00394295]
- [ 0.02191158 -0.03620601 0.00567079 ... 0.00745023 0.01862757
- 0.00703449]]
- ...
- [[ 0.00741537 -0.04865595 -0.00886576 ... -0.02448454 0.01194548
- -0.00861733]
- [-0.00134769 0.01334649 0.01967893 ... 0.02112496 -0.01624596
- 0.00516407]
- [-0.0050677 0.02272661 0.01807955 ... 0.02094838 -0.01449073
- 0.00967227]
- ...
- [-0.02633221 0.05768563 0.01628287 ... 0.0149423 -0.00576269
- 0.04385136]
- [-0.03326959 0.185886 -0.02219751 ... 0.04430137 -0.00146678
- 0.02707055]
- [-0.00715611 -0.00657876 -0.10976178 ... -0.09874185 0.08591411
- -0.00940268]]
- [[-0.03609058 -0.07579004 0.01501239 ... -0.00192132 -0.01605882
- 0.00820769]
- [-0.00521284 -0.03044076 0.01835437 ... -0.00124992 -0.01034386
- 0.00627647]
- [ 0.02380822 0.16997556 -0.04292414 ... 0.01702266 0.04020631
- -0.03895959]
- ...
- [-0.00115029 -0.0217499 0.00398471 ... -0.00293407 -0.00470166
- 0.00579625]
- [-0.00415053 -0.03030142 0.02518196 ... -0.00043284 -0.01240897
- 0.00634339]
- [ 0.00861687 -0.01112233 -0.03039085 ... -0.00862329 0.00705495
- 0.00750164]]
- [[ 0.00614664 -0.01302179 -0.0609244 ... -0.05605923 -0.06379453
- 0.01912303]
- [ 0.01061937 -0.00787821 -0.02997783 ... -0.03494435 -0.04587581
- 0.01142649]
- [-0.04273459 0.08807568 0.18954179 ... 0.19141153 0.05976401
- -0.01481191]
- ...
- [ 0.0059959 -0.01474381 -0.02677062 ... -0.02669823 0.00146604
- -0.00064257]
- [ 0.01313105 -0.0043188 -0.02868656 ... -0.03682106 -0.06574353
- 0.01620813]
- [-0.00286384 -0.03923091 -0.03224784 ... -0.01919729 0.12107897
- -0.03120236]]]]
- Converted tensor:
- [[[[-0.01188182 0.00870434 -0.00525597 ... 0.0166483 0.0042294
- 0.01396135]
- [-0.00601455 0.00372375 0.00119549 ... -0.00689576 0.00234477
- -0.00023903]
- [ 0.12993638 -0.07801486 0.03047847 ... -0.05703259 -0.06261978
- -0.10933899]
- ...
- [-0.04649651 0.02312873 -0.00121024 ... -0.02114895 0.02579408
- 0.02258454]
- [-0.04175663 0.02266307 -0.0035618 ... -0.00845332 0.02211452
- 0.02416236]
- [ 0.02032245 -0.01281894 0.00930295 ... -0.02656158 -0.00984932
- -0.02582186]]
- [[ 0.01767723 0.01862492 0.00546727 ... 0.00556206 0.00562947
- 0.02792831]
- [ 0.00329595 0.00522458 0.00275346 ... 0.00801897 0.01030772
- -0.00079377]
- [-0.15666753 -0.19953264 -0.06468898 ... -0.12443225 -0.10325674
- -0.20960508]
- ...
- [ 0.04138052 0.06359721 0.02354329 ... 0.06241173 0.05219414
- 0.03928925]
- [ 0.04164676 0.06036354 0.02137835 ... 0.05146557 0.04422571
- 0.0441802 ]
- [-0.03129917 -0.03683261 -0.01027868 ... -0.01391415 -0.00729652
- -0.05050653]]
- [[-0.12362286 0.10214026 -0.01907291 ... -0.06202124 -0.10286151
- -0.04492378]
- [-0.01505431 0.08293395 -0.00673187 ... -0.00035791 -0.01116562
- -0.00036771]
- [-0.02004598 0.00927651 -0.00294111 ... -0.01171784 -0.01758976
- -0.00819483]
- ...
- [ 0.00270219 -0.04824701 0.00360209 ... -0.00234266 0.00216798
- -0.00194733]
- [ 0.01524375 -0.03120738 0.00455077 ... 0.00138341 0.01178958
- 0.00394295]
- [ 0.02191159 -0.03620601 0.00567079 ... 0.00745023 0.01862758
- 0.00703449]]
- ...
- [[ 0.00741537 -0.04865595 -0.00886576 ... -0.02448454 0.01194548
- -0.00861733]
- [-0.00134769 0.01334648 0.01967893 ... 0.02112496 -0.01624596
- 0.00516407]
- [-0.0050677 0.02272661 0.01807955 ... 0.02094838 -0.01449073
- 0.00967227]
- ...
- [-0.02633222 0.05768563 0.01628287 ... 0.01494229 -0.00576268
- 0.04385137]
- [-0.03326959 0.18588606 -0.02219752 ... 0.04430138 -0.00146678
- 0.02707056]
- [-0.00715612 -0.00657868 -0.1097618 ... -0.09874186 0.08591412
- -0.00940266]]
- [[-0.03609059 -0.07579008 0.01501241 ... -0.00192132 -0.01605884
- 0.00820769]
- [-0.00521284 -0.03044078 0.01835438 ... -0.00124992 -0.01034387
- 0.00627648]
- [ 0.02380823 0.16997567 -0.04292417 ... 0.01702267 0.04020633
- -0.03895961]
- ...
- [-0.00115029 -0.02174992 0.00398472 ... -0.00293407 -0.00470167
- 0.00579625]
- [-0.00415053 -0.03030144 0.02518198 ... -0.00043284 -0.01240898
- 0.00634339]
- [ 0.00861687 -0.01112236 -0.03039089 ... -0.0086233 0.00705496
- 0.00750165]]
- [[ 0.00614664 -0.0130218 -0.06092443 ... -0.05605926 -0.06379459
- 0.01912304]
- [ 0.01061938 -0.00787821 -0.02997785 ... -0.03494437 -0.04587585
- 0.0114265 ]
- [-0.04273462 0.08807574 0.18954192 ... 0.19141163 0.05976404
- -0.01481192]
- ...
- [ 0.0059959 -0.01474382 -0.02677064 ... -0.02669825 0.00146605
- -0.00064257]
- [ 0.01313106 -0.00431879 -0.02868656 ... -0.03682107 -0.06574361
- 0.01620815]
- [-0.00286384 -0.03923097 -0.0322479 ... -0.01919733 0.12107915
- -0.03120241]]]]
- Layer 1, Token 1 (recurrent cache comparison):
- Original tensor sum: 5.922648
- Converted tensor sum: 5.922640
- Original tensor mean: 0.001851
- Converted tensor mean: 0.001851
- Mean difference: 0.00000005
- Maximum pointwise difference: 0.00000155
- Max difference location: (0, 24, 4, 5)
- Values at max diff - Original: -0.26876855, Converted: -0.26877010
- Biggest difference in row (0, 14, 3), sum -0.918731 vs -0.918733
- Layer 2, Token 1 (recurrent cache comparison):
- Original tensor sum: 12.229185
- Converted tensor sum: 12.229182
- Original tensor mean: 0.003822
- Converted tensor mean: 0.003822
- Mean difference: 0.00000009
- Maximum pointwise difference: 0.00000620
- Max difference location: (0, 3, 6, 0)
- Values at max diff - Original: 2.35518169, Converted: 2.35517550
- Biggest difference in row (0, 3, 6), sum 3.961787 vs 3.961781
- Layer 4, Token 1 (recurrent cache comparison):
- Original tensor sum: 4.260600
- Converted tensor sum: 4.260149
- Original tensor mean: 0.001331
- Converted tensor mean: 0.001331
- Mean difference: 0.00000526
- Maximum pointwise difference: 0.00011003
- Max difference location: (0, 25, 2, 4)
- Values at max diff - Original: 0.21691340, Converted: 0.21702343
- Biggest difference in row (0, 3, 1), sum -0.358275 vs -0.358136
- Layer 5, Token 1 (recurrent cache comparison):
- Original tensor sum: 12.744413
- Converted tensor sum: 12.744514
- Original tensor mean: 0.003983
- Converted tensor mean: 0.003983
- Mean difference: 0.00000413
- Maximum pointwise difference: 0.00011247
- Max difference location: (0, 5, 2, 8)
- Values at max diff - Original: 0.86490124, Converted: 0.86478877
- Biggest difference in row (0, 5, 2), sum -0.456235 vs -0.456385
- Layer 6, Token 1 (recurrent cache comparison):
- Original tensor sum: -14.490761
- Converted tensor sum: -14.493523
- Original tensor mean: -0.004528
- Converted tensor mean: -0.004529
- Mean difference: 0.00002331
- Maximum pointwise difference: 0.00149512
- Max difference location: (0, 28, 9, 8)
- Values at max diff - Original: 2.97030377, Converted: 2.96880865
- Biggest difference in row (0, 8, 5), sum 5.080033 vs 5.077976
- Layer 8, Token 1 (recurrent cache comparison):
- Original tensor sum: -18.806082
- Converted tensor sum: -18.808296
- Original tensor mean: -0.005877
- Converted tensor mean: -0.005878
- Mean difference: 0.00002112
- Maximum pointwise difference: 0.00074953
- Max difference location: (0, 20, 1, 8)
- Values at max diff - Original: 0.62514198, Converted: 0.62439245
- Biggest difference in row (0, 25, 6), sum 1.048032 vs 1.047222
- Layer 9, Token 1 (recurrent cache comparison):
- Original tensor sum: 16.764290
- Converted tensor sum: 16.760258
- Original tensor mean: 0.005239
- Converted tensor mean: 0.005238
- Mean difference: 0.00002129
- Maximum pointwise difference: 0.00044209
- Max difference location: (0, 21, 5, 8)
- Values at max diff - Original: 0.85285813, Converted: 0.85241604
- Biggest difference in row (0, 0, 1), sum -0.046629 vs -0.046069
- Layer 10, Token 1 (recurrent cache comparison):
- Original tensor sum: 13.242327
- Converted tensor sum: 13.242817
- Original tensor mean: 0.004138
- Converted tensor mean: 0.004138
- Mean difference: 0.00002325
- Maximum pointwise difference: 0.00070238
- Max difference location: (0, 18, 5, 1)
- Values at max diff - Original: 0.48423475, Converted: 0.48353237
- Biggest difference in row (0, 10, 0), sum -0.502937 vs -0.502024
- Layer 12, Token 1 (recurrent cache comparison):
- Original tensor sum: 14.374599
- Converted tensor sum: 14.372844
- Original tensor mean: 0.004492
- Converted tensor mean: 0.004492
- Mean difference: 0.00002070
- Maximum pointwise difference: 0.00084567
- Max difference location: (0, 0, 3, 1)
- Values at max diff - Original: 1.31967652, Converted: 1.31883085
- Biggest difference in row (0, 0, 5), sum -0.676982 vs -0.676066
- Layer 13, Token 1 (recurrent cache comparison):
- Original tensor sum: 28.120127
- Converted tensor sum: 28.128502
- Original tensor mean: 0.008788
- Converted tensor mean: 0.008790
- Mean difference: 0.00001703
- Maximum pointwise difference: 0.00037390
- Max difference location: (0, 4, 2, 1)
- Values at max diff - Original: -0.33164161, Converted: -0.33126771
- Biggest difference in row (0, 24, 1), sum -0.030439 vs -0.029779
- Layer 14, Token 1 (recurrent cache comparison):
- Original tensor sum: 27.012432
- Converted tensor sum: 27.011541
- Original tensor mean: 0.008441
- Converted tensor mean: 0.008441
- Mean difference: 0.00002248
- Maximum pointwise difference: 0.00121775
- Max difference location: (0, 18, 0, 1)
- Values at max diff - Original: 0.37722895, Converted: 0.37844670
- Biggest difference in row (0, 28, 1), sum -0.493242 vs -0.492468
- Layer 0, Token 2 (recurrent cache comparison):
- Original tensor sum: 4.531467
- Converted tensor sum: 4.531466
- Original tensor mean: 0.001416
- Converted tensor mean: 0.001416
- Mean difference: 0.08359446
- Maximum pointwise difference: 1.77142978
- Max difference location: (0, 1, 3, 5)
- Values at max diff - Original: -0.02699410, Converted: 1.74443567
- Biggest difference in row (0, 25, 2), sum -0.057628 vs -2.844908
- Layer 1, Token 2 (recurrent cache comparison):
- Original tensor sum: 11.008316
- Converted tensor sum: 11.008326
- Original tensor mean: 0.003440
- Converted tensor mean: 0.003440
- Mean difference: 0.06277661
- Maximum pointwise difference: 0.71243107
- Max difference location: (0, 10, 0, 2)
- Values at max diff - Original: 0.01163737, Converted: 0.72406846
- Biggest difference in row (0, 12, 3), sum 0.228652 vs -1.768667
- Layer 2, Token 2 (recurrent cache comparison):
- Original tensor sum: 17.248280
- Converted tensor sum: 17.248241
- Original tensor mean: 0.005390
- Converted tensor mean: 0.005390
- Mean difference: 0.08558470
- Maximum pointwise difference: 1.97508693
- Max difference location: (0, 10, 7, 3)
- Values at max diff - Original: 1.98190892, Converted: 0.00682194
- Biggest difference in row (0, 27, 7), sum -0.594255 vs 3.191915
- Layer 4, Token 2 (recurrent cache comparison):
- Original tensor sum: 7.984356
- Converted tensor sum: 7.983810
- Original tensor mean: 0.002495
- Converted tensor mean: 0.002495
- Mean difference: 0.07671142
- Maximum pointwise difference: 1.85330796
- Max difference location: (0, 20, 4, 6)
- Values at max diff - Original: 0.01886898, Converted: 1.87217689
- Biggest difference in row (0, 20, 6), sum 2.845701 vs -0.305152
- Layer 5, Token 2 (recurrent cache comparison):
- Original tensor sum: 9.205366
- Converted tensor sum: 9.205467
- Original tensor mean: 0.002877
- Converted tensor mean: 0.002877
- Mean difference: 0.06804129
- Maximum pointwise difference: 1.41803539
- Max difference location: (0, 31, 6, 3)
- Values at max diff - Original: 1.40662789, Converted: -0.01140754
- Biggest difference in row (0, 24, 8), sum -0.372748 vs -2.656956
- Layer 6, Token 2 (recurrent cache comparison):
- Original tensor sum: -7.876884
- Converted tensor sum: -7.873561
- Original tensor mean: -0.002462
- Converted tensor mean: -0.002460
- Mean difference: 0.10029175
- Maximum pointwise difference: 2.66715860
- Max difference location: (0, 28, 9, 8)
- Values at max diff - Original: 2.59401202, Converted: -0.07314663
- Biggest difference in row (0, 19, 4), sum 0.710449 vs -5.203167
- Layer 8, Token 2 (recurrent cache comparison):
- Original tensor sum: -13.154655
- Converted tensor sum: -13.156775
- Original tensor mean: -0.004111
- Converted tensor mean: -0.004111
- Mean difference: 0.08601540
- Maximum pointwise difference: 2.83156943
- Max difference location: (0, 12, 4, 7)
- Values at max diff - Original: 2.83582592, Converted: 0.00425647
- Biggest difference in row (0, 30, 3), sum -0.848019 vs -4.754877
- Layer 9, Token 2 (recurrent cache comparison):
- Original tensor sum: 13.187357
- Converted tensor sum: 13.181618
- Original tensor mean: 0.004121
- Converted tensor mean: 0.004119
- Mean difference: 0.05544823
- Maximum pointwise difference: 0.65544760
- Max difference location: (0, 21, 5, 8)
- Values at max diff - Original: 0.71338689, Converted: 0.05793926
- Biggest difference in row (0, 19, 9), sum -0.054951 vs -1.900613
- Layer 10, Token 2 (recurrent cache comparison):
- Original tensor sum: 10.860550
- Converted tensor sum: 10.860478
- Original tensor mean: 0.003394
- Converted tensor mean: 0.003394
- Mean difference: 0.05739149
- Maximum pointwise difference: 1.22302496
- Max difference location: (0, 30, 4, 5)
- Values at max diff - Original: -0.01519475, Converted: 1.20783019
- Biggest difference in row (0, 23, 3), sum -0.221712 vs -3.841269
- Layer 12, Token 2 (recurrent cache comparison):
- Original tensor sum: 3.134315
- Converted tensor sum: 3.132089
- Original tensor mean: 0.000979
- Converted tensor mean: 0.000979
- Mean difference: 0.07305207
- Maximum pointwise difference: 2.30649829
- Max difference location: (0, 5, 4, 5)
- Values at max diff - Original: 2.33141446, Converted: 0.02491626
- Biggest difference in row (0, 0, 1), sum -1.541565 vs -6.179572
- Layer 13, Token 2 (recurrent cache comparison):
- Original tensor sum: 18.773312
- Converted tensor sum: 18.779602
- Original tensor mean: 0.005867
- Converted tensor mean: 0.005869
- Mean difference: 0.04688552
- Maximum pointwise difference: 0.60163057
- Max difference location: (0, 6, 1, 7)
- Values at max diff - Original: 0.04654653, Converted: 0.64817709
- Biggest difference in row (0, 4, 1), sum -0.566141 vs -2.623919
- Layer 14, Token 2 (recurrent cache comparison):
- Original tensor sum: 13.960938
- Converted tensor sum: 13.964265
- Original tensor mean: 0.004363
- Converted tensor mean: 0.004364
- Mean difference: 0.06759205
- Maximum pointwise difference: 1.25844812
- Max difference location: (0, 15, 8, 4)
- Values at max diff - Original: 1.26228178, Converted: 0.00383368
- Biggest difference in row (0, 31, 3), sum -0.096068 vs -5.326997
- Layer 0, Token 3 (recurrent cache comparison):
- Original tensor sum: 0.684784
- Converted tensor sum: 0.422194
- Original tensor mean: 0.000214
- Converted tensor mean: 0.000132
- Mean difference: 0.06314481
- Maximum pointwise difference: 1.39332521
- Max difference location: (0, 28, 5, 9)
- Values at max diff - Original: -0.03651731, Converted: 1.35680795
- Biggest difference in row (0, 4, 9), sum 2.498745 vs 0.335116
- Layer 1, Token 3 (recurrent cache comparison):
- Original tensor sum: 3.526195
- Converted tensor sum: 7.632782
- Original tensor mean: 0.001102
- Converted tensor mean: 0.002385
- Mean difference: 0.04427468
- Maximum pointwise difference: 0.98676205
- Max difference location: (0, 12, 3, 7)
- Values at max diff - Original: 0.92044085, Converted: -0.06632122
- Biggest difference in row (0, 24, 2), sum 0.609889 vs -0.814516
- Layer 2, Token 3 (recurrent cache comparison):
- Original tensor sum: 15.850447
- Converted tensor sum: 14.785593
- Original tensor mean: 0.004953
- Converted tensor mean: 0.004620
- Mean difference: 0.06092339
- Maximum pointwise difference: 2.43390632
- Max difference location: (0, 1, 0, 4)
- Values at max diff - Original: 2.80371213, Converted: 0.36980587
- Biggest difference in row (0, 1, 0), sum 4.370481 vs 0.423526
- Layer 4, Token 3 (recurrent cache comparison):
- Original tensor sum: 19.856752
- Converted tensor sum: 11.778177
- Original tensor mean: 0.006205
- Converted tensor mean: 0.003681
- Mean difference: 0.07194611
- Maximum pointwise difference: 2.48742008
- Max difference location: (0, 14, 3, 9)
- Values at max diff - Original: 2.47506452, Converted: -0.01235563
- Biggest difference in row (0, 19, 2), sum 0.372920 vs -2.973422
- Layer 5, Token 3 (recurrent cache comparison):
- Original tensor sum: 9.792118
- Converted tensor sum: 9.138845
- Original tensor mean: 0.003060
- Converted tensor mean: 0.002856
- Mean difference: 0.05089124
- Maximum pointwise difference: 1.42593253
- Max difference location: (0, 29, 0, 8)
- Values at max diff - Original: 1.42089915, Converted: -0.00503340
- Biggest difference in row (0, 29, 0), sum 2.198264 vs 0.182178
- Layer 6, Token 3 (recurrent cache comparison):
- Original tensor sum: 39.415325
- Converted tensor sum: 64.451355
- Original tensor mean: 0.012317
- Converted tensor mean: 0.020141
- Mean difference: 0.08079723
- Maximum pointwise difference: 4.89647627
- Max difference location: (0, 15, 3, 6)
- Values at max diff - Original: -0.18732879, Converted: 4.70914745
- Biggest difference in row (0, 6, 0), sum 0.836966 vs 8.447909
- Layer 8, Token 3 (recurrent cache comparison):
- Original tensor sum: 23.689789
- Converted tensor sum: 12.321936
- Original tensor mean: 0.007403
- Converted tensor mean: 0.003851
- Mean difference: 0.08749782
- Maximum pointwise difference: 3.85876298
- Max difference location: (0, 6, 4, 8)
- Values at max diff - Original: 0.01438628, Converted: 3.87314916
- Biggest difference in row (0, 6, 4), sum 0.119168 vs 5.376393
- Layer 9, Token 3 (recurrent cache comparison):
- Original tensor sum: 8.901470
- Converted tensor sum: 4.339914
- Original tensor mean: 0.002782
- Converted tensor mean: 0.001356
- Mean difference: 0.06287189
- Maximum pointwise difference: 1.40262556
- Max difference location: (0, 4, 0, 5)
- Values at max diff - Original: -0.00241524, Converted: 1.40021038
- Biggest difference in row (0, 18, 1), sum 1.268483 vs -0.696936
- Layer 10, Token 3 (recurrent cache comparison):
- Original tensor sum: 18.375820
- Converted tensor sum: 3.348410
- Original tensor mean: 0.005742
- Converted tensor mean: 0.001046
- Mean difference: 0.06042652
- Maximum pointwise difference: 2.94567752
- Max difference location: (0, 3, 8, 7)
- Values at max diff - Original: -0.26693973, Converted: 2.67873788
- Biggest difference in row (0, 3, 8), sum 0.004062 vs 2.562259
- Layer 12, Token 3 (recurrent cache comparison):
- Original tensor sum: 15.322770
- Converted tensor sum: 2.674777
- Original tensor mean: 0.004788
- Converted tensor mean: 0.000836
- Mean difference: 0.07379209
- Maximum pointwise difference: 2.73401403
- Max difference location: (0, 30, 4, 0)
- Values at max diff - Original: -0.02140745, Converted: 2.71260667
- Biggest difference in row (0, 7, 6), sum -0.113145 vs 2.612027
- Layer 13, Token 3 (recurrent cache comparison):
- Original tensor sum: 14.910538
- Converted tensor sum: 8.724025
- Original tensor mean: 0.004660
- Converted tensor mean: 0.002726
- Mean difference: 0.05616682
- Maximum pointwise difference: 1.43021226
- Max difference location: (0, 26, 5, 0)
- Values at max diff - Original: -1.41802061, Converted: 0.01219167
- Biggest difference in row (0, 3, 9), sum 0.002563 vs -3.030253
- Layer 14, Token 3 (recurrent cache comparison):
- Original tensor sum: 59.583878
- Converted tensor sum: -2.192444
- Original tensor mean: 0.018620
- Converted tensor mean: -0.000685
- Mean difference: 0.10199536
- Maximum pointwise difference: 2.77383018
- Max difference location: (0, 2, 0, 2)
- Values at max diff - Original: 2.72142744, Converted: -0.05240267
- Biggest difference in row (0, 16, 6), sum 0.145663 vs -8.295967
- Layer 0, Token 4 (recurrent cache comparison):
- Original tensor sum: 7.899137
- Converted tensor sum: 4.783788
- Original tensor mean: 0.002468
- Converted tensor mean: 0.001495
- Mean difference: 0.06620996
- Maximum pointwise difference: 1.03156960
- Max difference location: (0, 1, 3, 7)
- Values at max diff - Original: -0.00703356, Converted: -1.03860319
- Biggest difference in row (0, 21, 4), sum 0.038056 vs -1.875101
- Layer 1, Token 4 (recurrent cache comparison):
- Original tensor sum: 11.224692
- Converted tensor sum: 15.232712
- Original tensor mean: 0.003508
- Converted tensor mean: 0.004760
- Mean difference: 0.06535107
- Maximum pointwise difference: 1.53891993
- Max difference location: (0, 28, 3, 7)
- Values at max diff - Original: 0.04949531, Converted: 1.58841527
- Biggest difference in row (0, 28, 3), sum 0.880954 vs 3.297761
- Layer 2, Token 4 (recurrent cache comparison):
- Original tensor sum: 15.875578
- Converted tensor sum: 5.908407
- Original tensor mean: 0.004961
- Converted tensor mean: 0.001846
- Mean difference: 0.09298474
- Maximum pointwise difference: 2.68871808
- Max difference location: (0, 14, 3, 7)
- Values at max diff - Original: 2.67706752, Converted: -0.01165051
- Biggest difference in row (0, 27, 2), sum 3.910276 vs 0.204061
- Layer 4, Token 4 (recurrent cache comparison):
- Original tensor sum: 34.602001
- Converted tensor sum: 14.365917
- Original tensor mean: 0.010813
- Converted tensor mean: 0.004489
- Mean difference: 0.10193390
- Maximum pointwise difference: 3.22817802
- Max difference location: (0, 26, 6, 5)
- Values at max diff - Original: -0.04091755, Converted: 3.18726039
- Biggest difference in row (0, 26, 6), sum 0.735282 vs 4.516615
- Layer 5, Token 4 (recurrent cache comparison):
- Original tensor sum: 24.322514
- Converted tensor sum: 18.418108
- Original tensor mean: 0.007601
- Converted tensor mean: 0.005756
- Mean difference: 0.08364967
- Maximum pointwise difference: 2.23648024
- Max difference location: (0, 22, 6, 1)
- Values at max diff - Original: 2.19508362, Converted: -0.04139667
- Biggest difference in row (0, 3, 0), sum 3.424673 vs -0.163761
- Layer 6, Token 4 (recurrent cache comparison):
- Original tensor sum: 34.762104
- Converted tensor sum: 77.105461
- Original tensor mean: 0.010863
- Converted tensor mean: 0.024095
- Mean difference: 0.12376648
- Maximum pointwise difference: 3.91498804
- Max difference location: (0, 12, 5, 4)
- Values at max diff - Original: -0.17797241, Converted: 3.73701572
- Biggest difference in row (0, 10, 4), sum -0.207436 vs 6.811815
- Layer 8, Token 4 (recurrent cache comparison):
- Original tensor sum: 52.858780
- Converted tensor sum: 5.570855
- Original tensor mean: 0.016518
- Converted tensor mean: 0.001741
- Mean difference: 0.12005786
- Maximum pointwise difference: 5.32569838
- Max difference location: (0, 12, 3, 5)
- Values at max diff - Original: 5.34859705, Converted: 0.02289869
- Biggest difference in row (0, 20, 0), sum 8.008233 vs -0.003253
- Layer 9, Token 4 (recurrent cache comparison):
- Original tensor sum: 20.435345
- Converted tensor sum: -2.045311
- Original tensor mean: 0.006386
- Converted tensor mean: -0.000639
- Mean difference: 0.08372314
- Maximum pointwise difference: 2.78602862
- Max difference location: (0, 28, 2, 0)
- Values at max diff - Original: 2.71785426, Converted: -0.06817436
- Biggest difference in row (0, 28, 2), sum 4.726543 vs 1.302800
- Layer 10, Token 4 (recurrent cache comparison):
- Original tensor sum: 28.353613
- Converted tensor sum: 12.385429
- Original tensor mean: 0.008861
- Converted tensor mean: 0.003870
- Mean difference: 0.09276734
- Maximum pointwise difference: 2.28980851
- Max difference location: (0, 2, 9, 5)
- Values at max diff - Original: -0.00412231, Converted: 2.28568625
- Biggest difference in row (0, 13, 8), sum 3.624647 vs 0.020094
- Layer 12, Token 4 (recurrent cache comparison):
- Original tensor sum: 70.502647
- Converted tensor sum: -11.005323
- Original tensor mean: 0.022032
- Converted tensor mean: -0.003439
- Mean difference: 0.13381547
- Maximum pointwise difference: 3.57928109
- Max difference location: (0, 30, 0, 4)
- Values at max diff - Original: 3.95710707, Converted: 0.37782601
- Biggest difference in row (0, 21, 9), sum -1.532540 vs -12.302475
- Layer 13, Token 4 (recurrent cache comparison):
- Original tensor sum: 38.753532
- Converted tensor sum: 5.437235
- Original tensor mean: 0.012110
- Converted tensor mean: 0.001699
- Mean difference: 0.08178755
- Maximum pointwise difference: 2.55715966
- Max difference location: (0, 3, 4, 9)
- Values at max diff - Original: 2.84962225, Converted: 0.29246253
- Biggest difference in row (0, 3, 4), sum 4.266754 vs 0.847269
- Layer 14, Token 4 (recurrent cache comparison):
- Original tensor sum: 141.714035
- Converted tensor sum: 3.640444
- Original tensor mean: 0.044286
- Converted tensor mean: 0.001138
- Mean difference: 0.14463389
- Maximum pointwise difference: 5.68939066
- Max difference location: (0, 16, 7, 6)
- Values at max diff - Original: 5.55827475, Converted: -0.13111581
- Biggest difference in row (0, 28, 1), sum 11.271111 vs 0.609705
- Layer 0, Token 5 (recurrent cache comparison):
- Original tensor sum: 9.131315
- Converted tensor sum: 12.396471
- Original tensor mean: 0.002854
- Converted tensor mean: 0.003874
- Mean difference: 0.05539500
- Maximum pointwise difference: 1.09641600
- Max difference location: (0, 28, 9, 5)
- Values at max diff - Original: 1.12920201, Converted: 0.03278603
- Biggest difference in row (0, 4, 9), sum 1.258661 vs 0.175881
- Layer 1, Token 5 (recurrent cache comparison):
- Original tensor sum: 24.366199
- Converted tensor sum: 10.052802
- Original tensor mean: 0.007614
- Converted tensor mean: 0.003142
- Mean difference: 0.05824861
- Maximum pointwise difference: 2.14620328
- Max difference location: (0, 14, 2, 5)
- Values at max diff - Original: 0.00643282, Converted: 2.15263605
- Biggest difference in row (0, 6, 4), sum 1.356658 vs -0.156208
- Layer 2, Token 5 (recurrent cache comparison):
- Original tensor sum: 50.376324
- Converted tensor sum: 20.166676
- Original tensor mean: 0.015743
- Converted tensor mean: 0.006302
- Mean difference: 0.07966200
- Maximum pointwise difference: 2.04463291
- Max difference location: (0, 27, 4, 2)
- Values at max diff - Original: 2.00972342, Converted: -0.03490951
- Biggest difference in row (0, 27, 2), sum 5.745794 vs 1.959190
- Layer 4, Token 5 (recurrent cache comparison):
- Original tensor sum: 44.478531
- Converted tensor sum: 48.696777
- Original tensor mean: 0.013900
- Converted tensor mean: 0.015218
- Mean difference: 0.09315307
- Maximum pointwise difference: 2.43060613
- Max difference location: (0, 26, 5, 6)
- Values at max diff - Original: 0.46136302, Converted: 2.89196920
- Biggest difference in row (0, 8, 6), sum 0.054414 vs 4.076869
- Layer 5, Token 5 (recurrent cache comparison):
- Original tensor sum: 57.863758
- Converted tensor sum: 66.390915
- Original tensor mean: 0.018082
- Converted tensor mean: 0.020747
- Mean difference: 0.10497291
- Maximum pointwise difference: 2.49356651
- Max difference location: (0, 17, 3, 6)
- Values at max diff - Original: 2.50974846, Converted: 0.01618202
- Biggest difference in row (0, 28, 9), sum 3.771637 vs 0.053981
- Layer 6, Token 5 (recurrent cache comparison):
- Original tensor sum: 39.502037
- Converted tensor sum: 161.817169
- Original tensor mean: 0.012344
- Converted tensor mean: 0.050568
- Mean difference: 0.14194940
- Maximum pointwise difference: 3.58584666
- Max difference location: (0, 26, 3, 9)
- Values at max diff - Original: 3.40417242, Converted: -0.18167432
- Biggest difference in row (0, 12, 4), sum 1.168972 vs 7.813907
- Layer 8, Token 5 (recurrent cache comparison):
- Original tensor sum: 44.896149
- Converted tensor sum: 38.246201
- Original tensor mean: 0.014030
- Converted tensor mean: 0.011952
- Mean difference: 0.10806250
- Maximum pointwise difference: 2.33007479
- Max difference location: (0, 1, 6, 0)
- Values at max diff - Original: 2.35027504, Converted: 0.02020025
- Biggest difference in row (0, 1, 6), sum 5.246045 vs 0.247956
- Layer 9, Token 5 (recurrent cache comparison):
- Original tensor sum: 20.569098
- Converted tensor sum: 11.688971
- Original tensor mean: 0.006428
- Converted tensor mean: 0.003653
- Mean difference: 0.08318320
- Maximum pointwise difference: 1.79917610
- Max difference location: (0, 28, 0, 3)
- Values at max diff - Original: 1.69918346, Converted: -0.09999267
- Biggest difference in row (0, 3, 4), sum 3.283048 vs 0.225886
- Layer 10, Token 5 (recurrent cache comparison):
- Original tensor sum: 42.493145
- Converted tensor sum: 26.750286
- Original tensor mean: 0.013279
- Converted tensor mean: 0.008359
- Mean difference: 0.09709122
- Maximum pointwise difference: 2.97919798
- Max difference location: (0, 10, 0, 3)
- Values at max diff - Original: 3.34914303, Converted: 0.36994517
- Biggest difference in row (0, 10, 0), sum 5.613201 vs -0.079588
- Layer 12, Token 5 (recurrent cache comparison):
- Original tensor sum: 91.460236
- Converted tensor sum: 14.637827
- Original tensor mean: 0.028581
- Converted tensor mean: 0.004574
- Mean difference: 0.12184902
- Maximum pointwise difference: 4.17300320
- Max difference location: (0, 23, 2, 9)
- Values at max diff - Original: 3.98550677, Converted: -0.18749636
- Biggest difference in row (0, 28, 5), sum 5.243108 vs -0.797499
- Layer 13, Token 5 (recurrent cache comparison):
- Original tensor sum: 50.306297
- Converted tensor sum: 16.367235
- Original tensor mean: 0.015721
- Converted tensor mean: 0.005115
- Mean difference: 0.08688851
- Maximum pointwise difference: 2.08200264
- Max difference location: (0, 19, 9, 3)
- Values at max diff - Original: -1.59057343, Converted: 0.49142930
- Biggest difference in row (0, 19, 5), sum 3.595970 vs 0.049368
- Layer 14, Token 5 (recurrent cache comparison):
- Original tensor sum: 120.273888
- Converted tensor sum: 44.449192
- Original tensor mean: 0.037586
- Converted tensor mean: 0.013890
- Mean difference: 0.13929905
- Maximum pointwise difference: 4.73129654
- Max difference location: (0, 18, 5, 9)
- Values at max diff - Original: 4.35292673, Converted: -0.37836996
- Biggest difference in row (0, 18, 5), sum 8.950241 vs -0.746074
- Layer 0, Token 6 (recurrent cache comparison):
- Original tensor sum: 11.608546
- Converted tensor sum: 10.627696
- Original tensor mean: 0.003628
- Converted tensor mean: 0.003321
- Mean difference: 0.05484011
- Maximum pointwise difference: 1.12371099
- Max difference location: (0, 1, 2, 3)
- Values at max diff - Original: 1.11502755, Converted: -0.00868344
- Biggest difference in row (0, 28, 5), sum 0.118289 vs 2.332705
- Layer 1, Token 6 (recurrent cache comparison):
- Original tensor sum: 92.219727
- Converted tensor sum: 28.768579
- Original tensor mean: 0.028819
- Converted tensor mean: 0.008990
- Mean difference: 0.08724788
- Maximum pointwise difference: 1.51144505
- Max difference location: (0, 23, 0, 4)
- Values at max diff - Original: 1.55765891, Converted: 0.04621384
- Biggest difference in row (0, 14, 0), sum 2.954077 vs -0.012181
- Layer 2, Token 6 (recurrent cache comparison):
- Original tensor sum: 101.609215
- Converted tensor sum: 93.242142
- Original tensor mean: 0.031753
- Converted tensor mean: 0.029138
- Mean difference: 0.12457406
- Maximum pointwise difference: 2.07845497
- Max difference location: (0, 13, 1, 9)
- Values at max diff - Original: 2.17026591, Converted: 0.09181103
- Biggest difference in row (0, 5, 5), sum 4.805948 vs -0.569050
- Layer 4, Token 6 (recurrent cache comparison):
- Original tensor sum: 13.856092
- Converted tensor sum: 22.610188
- Original tensor mean: 0.004330
- Converted tensor mean: 0.007066
- Mean difference: 0.09440003
- Maximum pointwise difference: 2.37087321
- Max difference location: (0, 19, 2, 6)
- Values at max diff - Original: -0.02839734, Converted: 2.34247589
- Biggest difference in row (0, 28, 1), sum -0.280756 vs 2.458031
- Layer 5, Token 6 (recurrent cache comparison):
- Original tensor sum: 39.960052
- Converted tensor sum: 41.437057
- Original tensor mean: 0.012488
- Converted tensor mean: 0.012949
- Mean difference: 0.11209048
- Maximum pointwise difference: 2.79378676
- Max difference location: (0, 19, 8, 4)
- Values at max diff - Original: 0.01245314, Converted: 2.80623984
- Biggest difference in row (0, 13, 1), sum 6.005285 vs -0.085273
- Layer 6, Token 6 (recurrent cache comparison):
- Original tensor sum: -2.419616
- Converted tensor sum: 156.977676
- Original tensor mean: -0.000756
- Converted tensor mean: 0.049056
- Mean difference: 0.13894926
- Maximum pointwise difference: 6.69993019
- Max difference location: (0, 10, 3, 1)
- Values at max diff - Original: -1.12109971, Converted: 5.57883024
- Biggest difference in row (0, 12, 1), sum -0.201558 vs 10.382487
- Layer 8, Token 6 (recurrent cache comparison):
- Original tensor sum: 8.213539
- Converted tensor sum: 18.368313
- Original tensor mean: 0.002567
- Converted tensor mean: 0.005740
- Mean difference: 0.10382870
- Maximum pointwise difference: 3.36055303
- Max difference location: (0, 6, 4, 8)
- Values at max diff - Original: 0.10355368, Converted: 3.46410680
- Biggest difference in row (0, 6, 4), sum -0.613209 vs 4.409491
- Layer 9, Token 6 (recurrent cache comparison):
- Original tensor sum: 12.889297
- Converted tensor sum: -0.411069
- Original tensor mean: 0.004028
- Converted tensor mean: -0.000128
- Mean difference: 0.08612256
- Maximum pointwise difference: 1.89322448
- Max difference location: (0, 6, 4, 1)
- Values at max diff - Original: -0.52327746, Converted: 1.36994708
- Biggest difference in row (0, 21, 7), sum 0.245074 vs -2.764518
- Layer 10, Token 6 (recurrent cache comparison):
- Original tensor sum: 3.506564
- Converted tensor sum: 11.408216
- Original tensor mean: 0.001096
- Converted tensor mean: 0.003565
- Mean difference: 0.08594991
- Maximum pointwise difference: 3.30037594
- Max difference location: (0, 3, 8, 7)
- Values at max diff - Original: -0.08371022, Converted: 3.21666574
- Biggest difference in row (0, 0, 7), sum -0.426351 vs 3.218251
- Layer 12, Token 6 (recurrent cache comparison):
- Original tensor sum: 30.742065
- Converted tensor sum: 1.932971
- Original tensor mean: 0.009607
- Converted tensor mean: 0.000604
- Mean difference: 0.10983281
- Maximum pointwise difference: 3.31334734
- Max difference location: (0, 29, 5, 6)
- Values at max diff - Original: 3.34788132, Converted: 0.03453401
- Biggest difference in row (0, 29, 5), sum 6.176572 vs 0.072738
- Layer 13, Token 6 (recurrent cache comparison):
- Original tensor sum: 14.579787
- Converted tensor sum: 9.630959
- Original tensor mean: 0.004556
- Converted tensor mean: 0.003010
- Mean difference: 0.08181592
- Maximum pointwise difference: 2.27647829
- Max difference location: (0, 19, 1, 3)
- Values at max diff - Original: 2.46903062, Converted: 0.19255245
- Biggest difference in row (0, 19, 5), sum 2.241402 vs -0.017656
- Layer 14, Token 6 (recurrent cache comparison):
- Original tensor sum: 42.673443
- Converted tensor sum: 13.958614
- Original tensor mean: 0.013335
- Converted tensor mean: 0.004362
- Mean difference: 0.12478559
- Maximum pointwise difference: 3.53676820
- Max difference location: (0, 15, 8, 4)
- Values at max diff - Original: 3.58003521, Converted: 0.04326708
- Biggest difference in row (0, 16, 6), sum 0.056718 vs -5.064022
- Layer 0, Token 7 (recurrent cache comparison):
- Original tensor sum: 13.531075
- Converted tensor sum: 7.895350
- Original tensor mean: 0.004228
- Converted tensor mean: 0.002467
- Mean difference: 0.05525878
- Maximum pointwise difference: 0.84158301
- Max difference location: (0, 4, 1, 9)
- Values at max diff - Original: -0.04387791, Converted: 0.79770511
- Biggest difference in row (0, 11, 9), sum -0.221553 vs -1.606123
- Layer 1, Token 7 (recurrent cache comparison):
- Original tensor sum: 106.468651
- Converted tensor sum: 29.931305
- Original tensor mean: 0.033271
- Converted tensor mean: 0.009354
- Mean difference: 0.07464606
- Maximum pointwise difference: 1.52088320
- Max difference location: (0, 24, 0, 1)
- Values at max diff - Original: 1.28372872, Converted: -0.23715444
- Biggest difference in row (0, 31, 9), sum 2.350637 vs -0.270012
- Layer 2, Token 7 (recurrent cache comparison):
- Original tensor sum: 129.077255
- Converted tensor sum: 124.290329
- Original tensor mean: 0.040337
- Converted tensor mean: 0.038841
- Mean difference: 0.12615709
- Maximum pointwise difference: 3.32020164
- Max difference location: (0, 23, 3, 9)
- Values at max diff - Original: 0.05276818, Converted: 3.37296987
- Biggest difference in row (0, 5, 6), sum -1.585131 vs 2.877644
- Layer 4, Token 7 (recurrent cache comparison):
- Original tensor sum: 12.337616
- Converted tensor sum: 29.998875
- Original tensor mean: 0.003856
- Converted tensor mean: 0.009375
- Mean difference: 0.08588156
- Maximum pointwise difference: 1.48782670
- Max difference location: (0, 19, 6, 2)
- Values at max diff - Original: -0.00142645, Converted: 1.48640025
- Biggest difference in row (0, 8, 3), sum -0.318221 vs 2.809558
- Layer 5, Token 7 (recurrent cache comparison):
- Original tensor sum: 28.667000
- Converted tensor sum: 37.180931
- Original tensor mean: 0.008958
- Converted tensor mean: 0.011619
- Mean difference: 0.09552816
- Maximum pointwise difference: 2.18750906
- Max difference location: (0, 19, 4, 8)
- Values at max diff - Original: 0.10599449, Converted: 2.29350352
- Biggest difference in row (0, 28, 9), sum 2.464837 vs 0.175544
- Layer 6, Token 7 (recurrent cache comparison):
- Original tensor sum: -5.179218
- Converted tensor sum: 165.798248
- Original tensor mean: -0.001619
- Converted tensor mean: 0.051812
- Mean difference: 0.12655024
- Maximum pointwise difference: 4.26992130
- Max difference location: (0, 10, 1, 3)
- Values at max diff - Original: -0.81827015, Converted: 3.45165110
- Biggest difference in row (0, 12, 6), sum 2.458921 vs 9.472747
- Layer 8, Token 7 (recurrent cache comparison):
- Original tensor sum: 8.037577
- Converted tensor sum: 36.050400
- Original tensor mean: 0.002512
- Converted tensor mean: 0.011266
- Mean difference: 0.10181364
- Maximum pointwise difference: 3.21224403
- Max difference location: (0, 6, 8, 4)
- Values at max diff - Original: 0.04581403, Converted: 3.25805807
- Biggest difference in row (0, 6, 8), sum -0.710102 vs 2.858772
- Layer 9, Token 7 (recurrent cache comparison):
- Original tensor sum: 10.771255
- Converted tensor sum: 9.047117
- Original tensor mean: 0.003366
- Converted tensor mean: 0.002827
- Mean difference: 0.07432807
- Maximum pointwise difference: 1.92723787
- Max difference location: (0, 18, 5, 2)
- Values at max diff - Original: 0.10259621, Converted: 2.02983403
- Biggest difference in row (0, 14, 2), sum 0.009283 vs 3.088803
- Layer 10, Token 7 (recurrent cache comparison):
- Original tensor sum: 2.196672
- Converted tensor sum: 31.273930
- Original tensor mean: 0.000686
- Converted tensor mean: 0.009773
- Mean difference: 0.07749946
- Maximum pointwise difference: 2.52166486
- Max difference location: (0, 3, 7, 8)
- Values at max diff - Original: 0.31132898, Converted: 2.83299375
- Biggest difference in row (0, 20, 9), sum -0.957283 vs 1.748438
- Layer 12, Token 7 (recurrent cache comparison):
- Original tensor sum: 18.589321
- Converted tensor sum: 5.047585
- Original tensor mean: 0.005809
- Converted tensor mean: 0.001577
- Mean difference: 0.10475901
- Maximum pointwise difference: 2.85224462
- Max difference location: (0, 29, 5, 6)
- Values at max diff - Original: 2.91423106, Converted: 0.06198643
- Biggest difference in row (0, 29, 5), sum 5.378224 vs 0.028987
- Layer 13, Token 7 (recurrent cache comparison):
- Original tensor sum: 10.072084
- Converted tensor sum: 22.447376
- Original tensor mean: 0.003148
- Converted tensor mean: 0.007015
- Mean difference: 0.06809221
- Maximum pointwise difference: 1.16759956
- Max difference location: (0, 27, 3, 5)
- Values at max diff - Original: -0.07106454, Converted: 1.09653497
- Biggest difference in row (0, 27, 3), sum -0.724999 vs 1.414439
- Layer 14, Token 7 (recurrent cache comparison):
- Original tensor sum: 24.727911
- Converted tensor sum: 26.743217
- Original tensor mean: 0.007727
- Converted tensor mean: 0.008357
- Mean difference: 0.11743267
- Maximum pointwise difference: 2.98747468
- Max difference location: (0, 18, 5, 1)
- Values at max diff - Original: 2.95096135, Converted: -0.03651327
- Biggest difference in row (0, 28, 1), sum -0.138044 vs 7.456189
- Layer 0, Token 8 (recurrent cache comparison):
- Original tensor sum: 15.709320
- Converted tensor sum: 12.209140
- Original tensor mean: 0.004909
- Converted tensor mean: 0.003815
- Mean difference: 0.05364013
- Maximum pointwise difference: 1.00742257
- Max difference location: (0, 1, 3, 2)
- Values at max diff - Original: 0.00399712, Converted: 1.01141965
- Biggest difference in row (0, 28, 5), sum 0.102939 vs 1.531078
- Layer 1, Token 8 (recurrent cache comparison):
- Original tensor sum: 188.393356
- Converted tensor sum: 69.447678
- Original tensor mean: 0.058873
- Converted tensor mean: 0.021702
- Mean difference: 0.10494157
- Maximum pointwise difference: 2.06318974
- Max difference location: (0, 24, 6, 8)
- Values at max diff - Original: 2.06102371, Converted: -0.00216593
- Biggest difference in row (0, 14, 0), sum 8.656445 vs 0.053197
- Layer 2, Token 8 (recurrent cache comparison):
- Original tensor sum: 204.433716
- Converted tensor sum: 228.728714
- Original tensor mean: 0.063886
- Converted tensor mean: 0.071478
- Mean difference: 0.17672807
- Maximum pointwise difference: 4.02747822
- Max difference location: (0, 14, 7, 4)
- Values at max diff - Original: -0.50838530, Converted: 3.51909280
- Biggest difference in row (0, 14, 7), sum -0.459507 vs 8.282653
- Layer 4, Token 8 (recurrent cache comparison):
- Original tensor sum: 27.791477
- Converted tensor sum: 81.184990
- Original tensor mean: 0.008685
- Converted tensor mean: 0.025370
- Mean difference: 0.10353857
- Maximum pointwise difference: 2.46198463
- Max difference location: (0, 20, 0, 0)
- Values at max diff - Original: -0.22187454, Converted: 2.24011016
- Biggest difference in row (0, 20, 0), sum 0.256525 vs 5.813072
- Layer 5, Token 8 (recurrent cache comparison):
- Original tensor sum: 29.250452
- Converted tensor sum: 93.253128
- Original tensor mean: 0.009141
- Converted tensor mean: 0.029142
- Mean difference: 0.10660823
- Maximum pointwise difference: 2.56040263
- Max difference location: (0, 5, 9, 6)
- Values at max diff - Original: 2.57331157, Converted: 0.01290902
- Biggest difference in row (0, 6, 9), sum 0.078166 vs 4.415024
- Layer 6, Token 8 (recurrent cache comparison):
- Original tensor sum: 27.846973
- Converted tensor sum: 254.006149
- Original tensor mean: 0.008702
- Converted tensor mean: 0.079377
- Mean difference: 0.15745334
- Maximum pointwise difference: 4.78712130
- Max difference location: (0, 6, 0, 1)
- Values at max diff - Original: -0.02898185, Converted: 4.75813961
- Biggest difference in row (0, 6, 0), sum 0.390611 vs 12.429944
- Layer 8, Token 8 (recurrent cache comparison):
- Original tensor sum: 30.536982
- Converted tensor sum: 101.827225
- Original tensor mean: 0.009543
- Converted tensor mean: 0.031821
- Mean difference: 0.12039161
- Maximum pointwise difference: 3.22662950
- Max difference location: (0, 6, 4, 8)
- Values at max diff - Original: 0.09277204, Converted: 3.31940150
- Biggest difference in row (0, 6, 4), sum -0.525502 vs 4.532234
- Layer 9, Token 8 (recurrent cache comparison):
- Original tensor sum: 16.682407
- Converted tensor sum: 55.948948
- Original tensor mean: 0.005213
- Converted tensor mean: 0.017484
- Mean difference: 0.08395444
- Maximum pointwise difference: 2.21269536
- Max difference location: (0, 2, 6, 8)
- Values at max diff - Original: -0.01177103, Converted: 2.20092440
- Biggest difference in row (0, 2, 6), sum 0.250594 vs 2.860795
- Layer 10, Token 8 (recurrent cache comparison):
- Original tensor sum: 12.510189
- Converted tensor sum: 82.301987
- Original tensor mean: 0.003909
- Converted tensor mean: 0.025719
- Mean difference: 0.08603403
- Maximum pointwise difference: 2.56086898
- Max difference location: (0, 3, 8, 7)
- Values at max diff - Original: -0.06791666, Converted: 2.49295235
- Biggest difference in row (0, 27, 2), sum -0.661969 vs 2.579364
- Layer 12, Token 8 (recurrent cache comparison):
- Original tensor sum: 32.357769
- Converted tensor sum: 70.608459
- Original tensor mean: 0.010112
- Converted tensor mean: 0.022065
- Mean difference: 0.11435273
- Maximum pointwise difference: 2.54995298
- Max difference location: (0, 29, 5, 6)
- Values at max diff - Original: 2.57914209, Converted: 0.02918900
- Biggest difference in row (0, 24, 2), sum -0.360438 vs 5.434034
- Layer 13, Token 8 (recurrent cache comparison):
- Original tensor sum: 15.804648
- Converted tensor sum: 72.853622
- Original tensor mean: 0.004939
- Converted tensor mean: 0.022767
- Mean difference: 0.07997719
- Maximum pointwise difference: 2.65385294
- Max difference location: (0, 26, 0, 4)
- Values at max diff - Original: -0.03116010, Converted: 2.62269282
- Biggest difference in row (0, 26, 0), sum -1.206431 vs 2.459876
- Layer 14, Token 8 (recurrent cache comparison):
- Original tensor sum: 69.455246
- Converted tensor sum: 167.620041
- Original tensor mean: 0.021705
- Converted tensor mean: 0.052381
- Mean difference: 0.15660757
- Maximum pointwise difference: 2.87237978
- Max difference location: (0, 29, 9, 1)
- Values at max diff - Original: -0.04621891, Converted: 2.82616091
- Biggest difference in row (0, 20, 4), sum -0.064347 vs 6.085094
- Layer 0, Token 9 (recurrent cache comparison):
- Original tensor sum: 13.786104
- Converted tensor sum: 5.261156
- Original tensor mean: 0.004308
- Converted tensor mean: 0.001644
- Mean difference: 0.06277616
- Maximum pointwise difference: 1.31032252
- Max difference location: (0, 4, 1, 9)
- Values at max diff - Original: -0.02821357, Converted: 1.28210890
- Biggest difference in row (0, 11, 3), sum 0.289278 vs -0.836586
- Layer 1, Token 9 (recurrent cache comparison):
- Original tensor sum: 203.497635
- Converted tensor sum: 111.110443
- Original tensor mean: 0.063593
- Converted tensor mean: 0.034722
- Mean difference: 0.10077493
- Maximum pointwise difference: 1.97459030
- Max difference location: (0, 24, 0, 1)
- Values at max diff - Original: 1.88861251, Converted: -0.08597784
- Biggest difference in row (0, 14, 0), sum 9.054160 vs 0.974541
- Layer 2, Token 9 (recurrent cache comparison):
- Original tensor sum: 210.326843
- Converted tensor sum: 237.847137
- Original tensor mean: 0.065727
- Converted tensor mean: 0.074327
- Mean difference: 0.16504267
- Maximum pointwise difference: 2.71314573
- Max difference location: (0, 4, 8, 1)
- Values at max diff - Original: -0.00067222, Converted: 2.71247363
- Biggest difference in row (0, 1, 4), sum 2.414350 vs 7.828261
- Layer 4, Token 9 (recurrent cache comparison):
- Original tensor sum: 76.020309
- Converted tensor sum: 125.208931
- Original tensor mean: 0.023756
- Converted tensor mean: 0.039128
- Mean difference: 0.11094213
- Maximum pointwise difference: 3.67572975
- Max difference location: (0, 27, 7, 5)
- Values at max diff - Original: 3.66171432, Converted: -0.01401533
- Biggest difference in row (0, 3, 0), sum 4.612147 vs 0.005273
- Layer 5, Token 9 (recurrent cache comparison):
- Original tensor sum: 70.017532
- Converted tensor sum: 128.789795
- Original tensor mean: 0.021880
- Converted tensor mean: 0.040247
- Mean difference: 0.11726990
- Maximum pointwise difference: 2.56784987
- Max difference location: (0, 6, 7, 6)
- Values at max diff - Original: 2.56954336, Converted: 0.00169344
- Biggest difference in row (0, 6, 7), sum 5.224357 vs 0.050091
- Layer 6, Token 9 (recurrent cache comparison):
- Original tensor sum: 97.678406
- Converted tensor sum: 298.968506
- Original tensor mean: 0.030525
- Converted tensor mean: 0.093428
- Mean difference: 0.16553456
- Maximum pointwise difference: 4.22000217
- Max difference location: (0, 14, 1, 7)
- Values at max diff - Original: -0.10210184, Converted: 4.11790037
- Biggest difference in row (0, 14, 1), sum -0.198166 vs 10.807201
- Layer 8, Token 9 (recurrent cache comparison):
- Original tensor sum: 106.931870
- Converted tensor sum: 173.151855
- Original tensor mean: 0.033416
- Converted tensor mean: 0.054110
- Mean difference: 0.14065868
- Maximum pointwise difference: 3.01797652
- Max difference location: (0, 14, 9, 5)
- Values at max diff - Original: -0.05490554, Converted: 2.96307087
- Biggest difference in row (0, 20, 7), sum 0.154971 vs 7.357482
- Layer 9, Token 9 (recurrent cache comparison):
- Original tensor sum: 64.670883
- Converted tensor sum: 92.657562
- Original tensor mean: 0.020210
- Converted tensor mean: 0.028955
- Mean difference: 0.09020478
- Maximum pointwise difference: 3.22673941
- Max difference location: (0, 18, 5, 2)
- Values at max diff - Original: 0.18116489, Converted: 3.40790439
- Biggest difference in row (0, 18, 2), sum 6.946761 vs 1.273814
- Layer 10, Token 9 (recurrent cache comparison):
- Original tensor sum: 52.923912
- Converted tensor sum: 104.621475
- Original tensor mean: 0.016539
- Converted tensor mean: 0.032694
- Mean difference: 0.08354937
- Maximum pointwise difference: 1.84956801
- Max difference location: (0, 3, 7, 8)
- Values at max diff - Original: 0.37758890, Converted: 2.22715688
- Biggest difference in row (0, 20, 9), sum -1.298731 vs 2.479056
- Layer 12, Token 9 (recurrent cache comparison):
- Original tensor sum: 87.343620
- Converted tensor sum: 117.516281
- Original tensor mean: 0.027295
- Converted tensor mean: 0.036724
- Mean difference: 0.12288742
- Maximum pointwise difference: 3.19170189
- Max difference location: (0, 13, 2, 4)
- Values at max diff - Original: -0.11148589, Converted: 3.08021593
- Biggest difference in row (0, 13, 2), sum 0.993775 vs 6.040417
- Layer 13, Token 9 (recurrent cache comparison):
- Original tensor sum: 77.928635
- Converted tensor sum: 116.695862
- Original tensor mean: 0.024353
- Converted tensor mean: 0.036467
- Mean difference: 0.09447044
- Maximum pointwise difference: 1.43028283
- Max difference location: (0, 26, 0, 4)
- Values at max diff - Original: -0.00879327, Converted: 1.42148960
- Biggest difference in row (0, 25, 3), sum -0.128404 vs 3.423045
- Layer 14, Token 9 (recurrent cache comparison):
- Original tensor sum: 162.069077
- Converted tensor sum: 247.590637
- Original tensor mean: 0.050647
- Converted tensor mean: 0.077372
- Mean difference: 0.17534283
- Maximum pointwise difference: 3.21209598
- Max difference location: (0, 28, 1, 9)
- Values at max diff - Original: -0.25805441, Converted: 2.95404148
- Biggest difference in row (0, 28, 1), sum 1.364790 vs 9.833094
- Layer 0, Token 10 (recurrent cache comparison):
- Original tensor sum: 7.816267
- Converted tensor sum: 1.466951
- Original tensor mean: 0.002443
- Converted tensor mean: 0.000458
- Mean difference: 0.05842621
- Maximum pointwise difference: 1.09208894
- Max difference location: (0, 21, 4, 1)
- Values at max diff - Original: 0.04324723, Converted: 1.13533616
- Biggest difference in row (0, 28, 5), sum 0.301255 vs 2.364079
- Layer 1, Token 10 (recurrent cache comparison):
- Original tensor sum: 223.526520
- Converted tensor sum: 135.921234
- Original tensor mean: 0.069852
- Converted tensor mean: 0.042475
- Mean difference: 0.10827781
- Maximum pointwise difference: 1.68770814
- Max difference location: (0, 16, 6, 1)
- Values at max diff - Original: 2.02958679, Converted: 0.34187865
- Biggest difference in row (0, 14, 0), sum 5.745544 vs -0.048143
- Layer 2, Token 10 (recurrent cache comparison):
- Original tensor sum: 215.104584
- Converted tensor sum: 227.212708
- Original tensor mean: 0.067220
- Converted tensor mean: 0.071004
- Mean difference: 0.17289215
- Maximum pointwise difference: 3.18850541
- Max difference location: (0, 26, 3, 8)
- Values at max diff - Original: 0.01985940, Converted: 3.20836473
- Biggest difference in row (0, 12, 7), sum 8.279942 vs -0.264312
- Layer 4, Token 10 (recurrent cache comparison):
- Original tensor sum: 185.702744
- Converted tensor sum: 211.499130
- Original tensor mean: 0.058032
- Converted tensor mean: 0.066093
- Mean difference: 0.12541530
- Maximum pointwise difference: 2.52001357
- Max difference location: (0, 27, 5, 8)
- Values at max diff - Original: 0.05403204, Converted: 2.57404566
- Biggest difference in row (0, 27, 5), sum 0.682007 vs 7.443546
- Layer 5, Token 10 (recurrent cache comparison):
- Original tensor sum: 169.265594
- Converted tensor sum: 227.449417
- Original tensor mean: 0.052895
- Converted tensor mean: 0.071078
- Mean difference: 0.13289575
- Maximum pointwise difference: 3.03736281
- Max difference location: (0, 6, 2, 6)
- Values at max diff - Original: 3.01727891, Converted: -0.02008397
- Biggest difference in row (0, 6, 2), sum 9.659736 vs 0.153498
- Layer 6, Token 10 (recurrent cache comparison):
- Original tensor sum: 230.247437
- Converted tensor sum: 418.704895
- Original tensor mean: 0.071952
- Converted tensor mean: 0.130845
- Mean difference: 0.17921637
- Maximum pointwise difference: 4.08086109
- Max difference location: (0, 6, 0, 1)
- Values at max diff - Original: 0.00348123, Converted: 4.08434248
- Biggest difference in row (0, 6, 0), sum 0.879897 vs 15.160538
- Layer 8, Token 10 (recurrent cache comparison):
- Original tensor sum: 206.699799
- Converted tensor sum: 283.296692
- Original tensor mean: 0.064594
- Converted tensor mean: 0.088530
- Mean difference: 0.15303743
- Maximum pointwise difference: 3.20992827
- Max difference location: (0, 14, 4, 5)
- Values at max diff - Original: 0.00341668, Converted: 3.21334505
- Biggest difference in row (0, 2, 4), sum -0.470056 vs 8.175467
- Layer 9, Token 10 (recurrent cache comparison):
- Original tensor sum: 155.765579
- Converted tensor sum: 185.697693
- Original tensor mean: 0.048677
- Converted tensor mean: 0.058031
- Mean difference: 0.09974226
- Maximum pointwise difference: 2.01155925
- Max difference location: (0, 14, 1, 8)
- Values at max diff - Original: -0.00813468, Converted: 2.00342464
- Biggest difference in row (0, 18, 3), sum -0.273577 vs 5.096995
- Layer 10, Token 10 (recurrent cache comparison):
- Original tensor sum: 147.632782
- Converted tensor sum: 177.473785
- Original tensor mean: 0.046135
- Converted tensor mean: 0.055461
- Mean difference: 0.10073428
- Maximum pointwise difference: 2.04938221
- Max difference location: (0, 3, 8, 7)
- Values at max diff - Original: -0.06264466, Converted: 1.98673749
- Biggest difference in row (0, 24, 0), sum 0.061289 vs 4.106022
- Layer 12, Token 10 (recurrent cache comparison):
- Original tensor sum: 189.647308
- Converted tensor sum: 212.602402
- Original tensor mean: 0.059265
- Converted tensor mean: 0.066438
- Mean difference: 0.12409261
- Maximum pointwise difference: 3.06548572
- Max difference location: (0, 14, 1, 8)
- Values at max diff - Original: -0.05504636, Converted: 3.01043940
- Biggest difference in row (0, 14, 1), sum -1.444618 vs 6.230721
- Layer 13, Token 10 (recurrent cache comparison):
- Original tensor sum: 176.983215
- Converted tensor sum: 204.426437
- Original tensor mean: 0.055307
- Converted tensor mean: 0.063883
- Mean difference: 0.10065258
- Maximum pointwise difference: 1.83688605
- Max difference location: (0, 26, 0, 4)
- Values at max diff - Original: -0.00286533, Converted: 1.83402073
- Biggest difference in row (0, 17, 8), sum 4.395949 vs 0.724224
- Layer 14, Token 10 (recurrent cache comparison):
- Original tensor sum: 362.967407
- Converted tensor sum: 429.969727
- Original tensor mean: 0.113427
- Converted tensor mean: 0.134366
- Mean difference: 0.20180641
- Maximum pointwise difference: 3.78999281
- Max difference location: (0, 8, 9, 2)
- Values at max diff - Original: -0.03249586, Converted: 3.75749683
- Biggest difference in row (0, 8, 9), sum 0.437254 vs 14.025442
- Layer 0, Token 11 (recurrent cache comparison):
- Original tensor sum: 1.054740
- Converted tensor sum: -4.912385
- Original tensor mean: 0.000330
- Converted tensor mean: -0.001535
- Mean difference: 0.06330946
- Maximum pointwise difference: 0.92195946
- Max difference location: (0, 4, 9, 1)
- Values at max diff - Original: 0.89514881, Converted: -0.02681063
- Biggest difference in row (0, 4, 9), sum 1.999353 vs 0.163843
- Layer 1, Token 11 (recurrent cache comparison):
- Original tensor sum: 229.025497
- Converted tensor sum: 120.378685
- Original tensor mean: 0.071570
- Converted tensor mean: 0.037618
- Mean difference: 0.11386316
- Maximum pointwise difference: 2.45059752
- Max difference location: (0, 14, 7, 2)
- Values at max diff - Original: 2.53569841, Converted: 0.08510098
- Biggest difference in row (0, 16, 6), sum 5.812350 vs -0.022719
- Layer 2, Token 11 (recurrent cache comparison):
- Original tensor sum: 158.621384
- Converted tensor sum: 133.457428
- Original tensor mean: 0.049569
- Converted tensor mean: 0.041705
- Mean difference: 0.14393179
- Maximum pointwise difference: 2.77776694
- Max difference location: (0, 12, 7, 9)
- Values at max diff - Original: 2.95237303, Converted: 0.17460610
- Biggest difference in row (0, 12, 7), sum 8.065367 vs 1.687768
- Layer 4, Token 11 (recurrent cache comparison):
- Original tensor sum: 216.897552
- Converted tensor sum: 241.688950
- Original tensor mean: 0.067780
- Converted tensor mean: 0.075528
- Mean difference: 0.14223064
- Maximum pointwise difference: 3.88969064
- Max difference location: (0, 19, 2, 0)
- Values at max diff - Original: 0.01694401, Converted: 3.90663457
- Biggest difference in row (0, 19, 2), sum 0.437507 vs 8.962053
- Layer 5, Token 11 (recurrent cache comparison):
- Original tensor sum: 252.265610
- Converted tensor sum: 322.771881
- Original tensor mean: 0.078833
- Converted tensor mean: 0.100866
- Mean difference: 0.17598768
- Maximum pointwise difference: 7.97533512
- Max difference location: (0, 28, 6, 9)
- Values at max diff - Original: 0.35858834, Converted: 8.33392334
- Biggest difference in row (0, 28, 6), sum 5.014431 vs 26.334686
- Layer 6, Token 11 (recurrent cache comparison):
- Original tensor sum: 291.508423
- Converted tensor sum: 433.311768
- Original tensor mean: 0.091096
- Converted tensor mean: 0.135410
- Mean difference: 0.17094433
- Maximum pointwise difference: 3.41666508
- Max difference location: (0, 6, 4, 5)
- Values at max diff - Original: 0.27297387, Converted: 3.68963885
- Biggest difference in row (0, 14, 1), sum -0.165701 vs 10.544808
- Layer 8, Token 11 (recurrent cache comparison):
- Original tensor sum: 215.415359
- Converted tensor sum: 351.092529
- Original tensor mean: 0.067317
- Converted tensor mean: 0.109716
- Mean difference: 0.18807893
- Maximum pointwise difference: 3.95769572
- Max difference location: (0, 23, 4, 7)
- Values at max diff - Original: 3.95293593, Converted: -0.00475990
- Biggest difference in row (0, 2, 4), sum 0.017769 vs 8.146402
- Layer 9, Token 11 (recurrent cache comparison):
- Original tensor sum: 230.947296
- Converted tensor sum: 244.599213
- Original tensor mean: 0.072171
- Converted tensor mean: 0.076437
- Mean difference: 0.13342199
- Maximum pointwise difference: 2.90320230
- Max difference location: (0, 18, 3, 2)
- Values at max diff - Original: -0.01862744, Converted: 2.88457489
- Biggest difference in row (0, 28, 7), sum 8.403417 vs 1.460527
- Layer 10, Token 11 (recurrent cache comparison):
- Original tensor sum: 271.779785
- Converted tensor sum: 241.771790
- Original tensor mean: 0.084931
- Converted tensor mean: 0.075554
- Mean difference: 0.15158509
- Maximum pointwise difference: 3.77889895
- Max difference location: (0, 0, 3, 7)
- Values at max diff - Original: 4.08713722, Converted: 0.30823818
- Biggest difference in row (0, 10, 4), sum 7.732811 vs 0.603564
- Layer 12, Token 11 (recurrent cache comparison):
- Original tensor sum: 274.425629
- Converted tensor sum: 286.277039
- Original tensor mean: 0.085758
- Converted tensor mean: 0.089462
- Mean difference: 0.16393411
- Maximum pointwise difference: 3.90725374
- Max difference location: (0, 14, 1, 8)
- Values at max diff - Original: 0.01574333, Converted: 3.92299700
- Biggest difference in row (0, 23, 2), sum 10.560888 vs 1.081235
- Layer 13, Token 11 (recurrent cache comparison):
- Original tensor sum: 212.238953
- Converted tensor sum: 260.726898
- Original tensor mean: 0.066325
- Converted tensor mean: 0.081477
- Mean difference: 0.12856843
- Maximum pointwise difference: 3.76317525
- Max difference location: (0, 17, 8, 2)
- Values at max diff - Original: 4.56109810, Converted: 0.79792279
- Biggest difference in row (0, 19, 1), sum 10.229995 vs 2.908604
- Layer 14, Token 11 (recurrent cache comparison):
- Original tensor sum: 502.973511
- Converted tensor sum: 568.935181
- Original tensor mean: 0.157179
- Converted tensor mean: 0.177792
- Mean difference: 0.27989930
- Maximum pointwise difference: 4.54578638
- Max difference location: (0, 16, 7, 6)
- Values at max diff - Original: 4.27132416, Converted: -0.27446240
- Biggest difference in row (0, 21, 5), sum -0.168386 vs 13.477350
- Layer 0, Token 12 (recurrent cache comparison):
- Original tensor sum: 4.252830
- Converted tensor sum: -0.731128
- Original tensor mean: 0.001329
- Converted tensor mean: -0.000228
- Mean difference: 0.06294378
- Maximum pointwise difference: 1.78251398
- Max difference location: (0, 1, 3, 2)
- Values at max diff - Original: -0.00792313, Converted: 1.77459085
- Biggest difference in row (0, 28, 5), sum 0.238817 vs 2.175461
- Layer 1, Token 12 (recurrent cache comparison):
- Original tensor sum: 242.003052
- Converted tensor sum: 66.457909
- Original tensor mean: 0.075626
- Converted tensor mean: 0.020768
- Mean difference: 0.11966369
- Maximum pointwise difference: 2.80864978
- Max difference location: (0, 24, 0, 1)
- Values at max diff - Original: 2.71780372, Converted: -0.09084603
- Biggest difference in row (0, 14, 0), sum 5.513966 vs -0.057299
- Layer 2, Token 12 (recurrent cache comparison):
- Original tensor sum: 212.836731
- Converted tensor sum: 76.092499
- Original tensor mean: 0.066511
- Converted tensor mean: 0.023779
- Mean difference: 0.14941603
- Maximum pointwise difference: 2.88118339
- Max difference location: (0, 12, 7, 0)
- Values at max diff - Original: 2.70842910, Converted: -0.17275429
- Biggest difference in row (0, 12, 7), sum 7.969865 vs 0.167881
- Layer 4, Token 12 (recurrent cache comparison):
- Original tensor sum: 128.756699
- Converted tensor sum: 154.911957
- Original tensor mean: 0.040236
- Converted tensor mean: 0.048410
- Mean difference: 0.10618121
- Maximum pointwise difference: 2.31433964
- Max difference location: (0, 8, 1, 6)
- Values at max diff - Original: 2.26328707, Converted: -0.05105254
- Biggest difference in row (0, 25, 7), sum 3.269817 vs -0.397900
- Layer 5, Token 12 (recurrent cache comparison):
- Original tensor sum: 176.745117
- Converted tensor sum: 232.734680
- Original tensor mean: 0.055233
- Converted tensor mean: 0.072730
- Mean difference: 0.13117053
- Maximum pointwise difference: 4.35398436
- Max difference location: (0, 28, 6, 9)
- Values at max diff - Original: 0.18738972, Converted: 4.54137421
- Biggest difference in row (0, 28, 6), sum 3.095334 vs 9.516649
- Layer 6, Token 12 (recurrent cache comparison):
- Original tensor sum: 259.031647
- Converted tensor sum: 428.069794
- Original tensor mean: 0.080947
- Converted tensor mean: 0.133772
- Mean difference: 0.16942802
- Maximum pointwise difference: 5.44846153
- Max difference location: (0, 26, 9, 3)
- Values at max diff - Original: -0.01164311, Converted: 5.43681860
- Biggest difference in row (0, 6, 0), sum 0.994667 vs 12.910238
- Layer 8, Token 12 (recurrent cache comparison):
- Original tensor sum: 221.930222
- Converted tensor sum: 262.522369
- Original tensor mean: 0.069353
- Converted tensor mean: 0.082038
- Mean difference: 0.17785330
- Maximum pointwise difference: 4.14597464
- Max difference location: (0, 21, 9, 9)
- Values at max diff - Original: -0.07410901, Converted: 4.07186556
- Biggest difference in row (0, 21, 9), sum -0.204344 vs 10.075971
- Layer 9, Token 12 (recurrent cache comparison):
- Original tensor sum: 189.028931
- Converted tensor sum: 238.029388
- Original tensor mean: 0.059072
- Converted tensor mean: 0.074384
- Mean difference: 0.14264640
- Maximum pointwise difference: 2.92814064
- Max difference location: (0, 14, 1, 2)
- Values at max diff - Original: -0.88447762, Converted: 2.04366302
- Biggest difference in row (0, 28, 0), sum 1.806244 vs 7.562672
- Layer 10, Token 12 (recurrent cache comparison):
- Original tensor sum: 236.811234
- Converted tensor sum: 260.771973
- Original tensor mean: 0.074004
- Converted tensor mean: 0.081491
- Mean difference: 0.15943669
- Maximum pointwise difference: 5.29651165
- Max difference location: (0, 24, 0, 1)
- Values at max diff - Original: 0.03258384, Converted: 5.32909536
- Biggest difference in row (0, 24, 0), sum 0.082025 vs 10.949675
- Layer 12, Token 12 (recurrent cache comparison):
- Original tensor sum: 244.807922
- Converted tensor sum: 314.705444
- Original tensor mean: 0.076502
- Converted tensor mean: 0.098345
- Mean difference: 0.16864727
- Maximum pointwise difference: 4.38556862
- Max difference location: (0, 20, 3, 2)
- Values at max diff - Original: -0.00896719, Converted: 4.37660122
- Biggest difference in row (0, 28, 3), sum 10.509099 vs 0.169576
- Layer 13, Token 12 (recurrent cache comparison):
- Original tensor sum: 195.554291
- Converted tensor sum: 222.348053
- Original tensor mean: 0.061111
- Converted tensor mean: 0.069484
- Mean difference: 0.13128105
- Maximum pointwise difference: 3.68478298
- Max difference location: (0, 17, 2, 8)
- Values at max diff - Original: 0.00859472, Converted: 3.69337773
- Biggest difference in row (0, 17, 2), sum 0.146146 vs 8.692631
- Layer 14, Token 12 (recurrent cache comparison):
- Original tensor sum: 483.896393
- Converted tensor sum: 527.955566
- Original tensor mean: 0.151218
- Converted tensor mean: 0.164986
- Mean difference: 0.27409020
- Maximum pointwise difference: 4.70396519
- Max difference location: (0, 25, 4, 1)
- Values at max diff - Original: -0.42079771, Converted: 4.28316736
- Biggest difference in row (0, 16, 6), sum -0.041328 vs 13.549324
- Layer 0, Token 13 (recurrent cache comparison):
- Original tensor sum: 1.659033
- Converted tensor sum: -7.970642
- Original tensor mean: 0.000518
- Converted tensor mean: -0.002491
- Mean difference: 0.07536316
- Maximum pointwise difference: 1.29645300
- Max difference location: (0, 4, 9, 1)
- Values at max diff - Original: 1.30392849, Converted: 0.00747545
- Biggest difference in row (0, 26, 3), sum -0.329301 vs -3.374216
- Layer 1, Token 13 (recurrent cache comparison):
- Original tensor sum: 239.724915
- Converted tensor sum: 79.675636
- Original tensor mean: 0.074914
- Converted tensor mean: 0.024899
- Mean difference: 0.12407961
- Maximum pointwise difference: 2.50358605
- Max difference location: (0, 24, 0, 1)
- Values at max diff - Original: 2.48077655, Converted: -0.02280946
- Biggest difference in row (0, 14, 0), sum 6.016558 vs 0.013054
- Layer 2, Token 13 (recurrent cache comparison):
- Original tensor sum: 247.626099
- Converted tensor sum: 106.589592
- Original tensor mean: 0.077383
- Converted tensor mean: 0.033309
- Mean difference: 0.15574569
- Maximum pointwise difference: 3.29841137
- Max difference location: (0, 4, 2, 8)
- Values at max diff - Original: 3.44825506, Converted: 0.14984375
- Biggest difference in row (0, 12, 7), sum 7.714676 vs 0.758271
- Layer 4, Token 13 (recurrent cache comparison):
- Original tensor sum: 123.371284
- Converted tensor sum: 126.859177
- Original tensor mean: 0.038554
- Converted tensor mean: 0.039643
- Mean difference: 0.08389783
- Maximum pointwise difference: 2.97862935
- Max difference location: (0, 28, 2, 3)
- Values at max diff - Original: 3.17326093, Converted: 0.19463167
- Biggest difference in row (0, 28, 2), sum 4.464350 vs 0.493919
- Layer 5, Token 13 (recurrent cache comparison):
- Original tensor sum: 147.258102
- Converted tensor sum: 184.070984
- Original tensor mean: 0.046018
- Converted tensor mean: 0.057522
- Mean difference: 0.10195178
- Maximum pointwise difference: 2.96551919
- Max difference location: (0, 28, 6, 9)
- Values at max diff - Original: 0.10930623, Converted: 3.07482553
- Biggest difference in row (0, 28, 6), sum 1.825548 vs 9.674469
- Layer 6, Token 13 (recurrent cache comparison):
- Original tensor sum: 283.270142
- Converted tensor sum: 448.314880
- Original tensor mean: 0.088522
- Converted tensor mean: 0.140098
- Mean difference: 0.15905625
- Maximum pointwise difference: 3.46541429
- Max difference location: (0, 24, 8, 2)
- Values at max diff - Original: -0.00599505, Converted: 3.45941925
- Biggest difference in row (0, 6, 4), sum 3.774855 vs 11.804656
- Layer 8, Token 13 (recurrent cache comparison):
- Original tensor sum: 241.112183
- Converted tensor sum: 274.130127
- Original tensor mean: 0.075348
- Converted tensor mean: 0.085666
- Mean difference: 0.10883617
- Maximum pointwise difference: 4.01715469
- Max difference location: (0, 21, 9, 9)
- Values at max diff - Original: -0.09944591, Converted: 3.91770887
- Biggest difference in row (0, 20, 9), sum -0.234172 vs 5.020240
- Layer 9, Token 13 (recurrent cache comparison):
- Original tensor sum: 173.039688
- Converted tensor sum: 214.021088
- Original tensor mean: 0.054075
- Converted tensor mean: 0.066882
- Mean difference: 0.09634628
- Maximum pointwise difference: 1.72028887
- Max difference location: (0, 18, 6, 2)
- Values at max diff - Original: 0.10816531, Converted: 1.82845414
- Biggest difference in row (0, 18, 6), sum 0.705206 vs 4.642780
- Layer 10, Token 13 (recurrent cache comparison):
- Original tensor sum: 213.873550
- Converted tensor sum: 242.753281
- Original tensor mean: 0.066835
- Converted tensor mean: 0.075860
- Mean difference: 0.09029815
- Maximum pointwise difference: 1.41950274
- Max difference location: (0, 11, 2, 6)
- Values at max diff - Original: 0.20659086, Converted: 1.62609363
- Biggest difference in row (0, 23, 5), sum 0.448223 vs 3.806486
- Layer 12, Token 13 (recurrent cache comparison):
- Original tensor sum: 233.552292
- Converted tensor sum: 296.583405
- Original tensor mean: 0.072985
- Converted tensor mean: 0.092682
- Mean difference: 0.08977944
- Maximum pointwise difference: 1.59837830
- Max difference location: (0, 19, 7, 7)
- Values at max diff - Original: 0.86349380, Converted: 2.46187210
- Biggest difference in row (0, 4, 3), sum 5.997213 vs 0.558758
- Layer 13, Token 13 (recurrent cache comparison):
- Original tensor sum: 172.634430
- Converted tensor sum: 190.998459
- Original tensor mean: 0.053948
- Converted tensor mean: 0.059687
- Mean difference: 0.07964972
- Maximum pointwise difference: 2.45006180
- Max difference location: (0, 26, 4, 0)
- Values at max diff - Original: 2.51385903, Converted: 0.06379732
- Biggest difference in row (0, 26, 4), sum 5.078406 vs 0.298857
- Layer 14, Token 13 (recurrent cache comparison):
- Original tensor sum: 516.640808
- Converted tensor sum: 514.890991
- Original tensor mean: 0.161450
- Converted tensor mean: 0.160903
- Mean difference: 0.14294353
- Maximum pointwise difference: 2.38266706
- Max difference location: (0, 8, 9, 3)
- Values at max diff - Original: 0.05516699, Converted: 2.43783402
- Biggest difference in row (0, 8, 9), sum -0.157885 vs 10.688316
- Layer 0, Token 14 (recurrent cache comparison):
- Original tensor sum: 5.983342
- Converted tensor sum: -8.715725
- Original tensor mean: 0.001870
- Converted tensor mean: -0.002724
- Mean difference: 0.07516728
- Maximum pointwise difference: 1.55751526
- Max difference location: (0, 25, 8, 2)
- Values at max diff - Original: 1.57396424, Converted: 0.01644893
- Biggest difference in row (0, 7, 3), sum 0.124509 vs -1.539357
- Layer 1, Token 14 (recurrent cache comparison):
- Original tensor sum: 229.783936
- Converted tensor sum: 87.531807
- Original tensor mean: 0.071807
- Converted tensor mean: 0.027354
- Mean difference: 0.11559690
- Maximum pointwise difference: 2.09234738
- Max difference location: (0, 24, 0, 1)
- Values at max diff - Original: 2.22317505, Converted: 0.13082767
- Biggest difference in row (0, 14, 0), sum 5.592927 vs -0.441425
- Layer 2, Token 14 (recurrent cache comparison):
- Original tensor sum: 268.945923
- Converted tensor sum: 133.786499
- Original tensor mean: 0.084046
- Converted tensor mean: 0.041808
- Mean difference: 0.16352382
- Maximum pointwise difference: 2.87041712
- Max difference location: (0, 11, 9, 6)
- Values at max diff - Original: 2.94916487, Converted: 0.07874785
- Biggest difference in row (0, 23, 9), sum 8.145676 vs 0.297307
- Layer 4, Token 14 (recurrent cache comparison):
- Original tensor sum: 117.762733
- Converted tensor sum: 118.469772
- Original tensor mean: 0.036801
- Converted tensor mean: 0.037022
- Mean difference: 0.09650213
- Maximum pointwise difference: 1.50842690
- Max difference location: (0, 20, 6, 4)
- Values at max diff - Original: 2.04159784, Converted: 0.53317100
- Biggest difference in row (0, 20, 6), sum 4.022114 vs 0.957074
- Layer 5, Token 14 (recurrent cache comparison):
- Original tensor sum: 128.612335
- Converted tensor sum: 157.030731
- Original tensor mean: 0.040191
- Converted tensor mean: 0.049072
- Mean difference: 0.10371025
- Maximum pointwise difference: 2.24814534
- Max difference location: (0, 8, 5, 9)
- Values at max diff - Original: -0.03905072, Converted: 2.20909452
- Biggest difference in row (0, 2, 5), sum 3.689715 vs 0.515908
- Layer 6, Token 14 (recurrent cache comparison):
- Original tensor sum: 284.314667
- Converted tensor sum: 446.866150
- Original tensor mean: 0.088848
- Converted tensor mean: 0.139646
- Mean difference: 0.16138166
- Maximum pointwise difference: 3.92217135
- Max difference location: (0, 26, 9, 3)
- Values at max diff - Original: -0.00824802, Converted: 3.91392326
- Biggest difference in row (0, 6, 0), sum 1.467430 vs 14.063056
- Layer 8, Token 14 (recurrent cache comparison):
- Original tensor sum: 268.420227
- Converted tensor sum: 298.094666
- Original tensor mean: 0.083881
- Converted tensor mean: 0.093155
- Mean difference: 0.17274044
- Maximum pointwise difference: 3.57632637
- Max difference location: (0, 21, 9, 9)
- Values at max diff - Original: -0.07158025, Converted: 3.50474620
- Biggest difference in row (0, 21, 9), sum -0.176140 vs 9.883745
- Layer 9, Token 14 (recurrent cache comparison):
- Original tensor sum: 153.733398
- Converted tensor sum: 197.629532
- Original tensor mean: 0.048042
- Converted tensor mean: 0.061759
- Mean difference: 0.11230749
- Maximum pointwise difference: 2.07441854
- Max difference location: (0, 2, 6, 8)
- Values at max diff - Original: -0.01318651, Converted: 2.06123209
- Biggest difference in row (0, 28, 0), sum 1.515908 vs 6.081204
- Layer 10, Token 14 (recurrent cache comparison):
- Original tensor sum: 196.466980
- Converted tensor sum: 228.325546
- Original tensor mean: 0.061396
- Converted tensor mean: 0.071352
- Mean difference: 0.11859564
- Maximum pointwise difference: 4.73182058
- Max difference location: (0, 24, 0, 1)
- Values at max diff - Original: 0.02647224, Converted: 4.75829268
- Biggest difference in row (0, 24, 0), sum 0.182875 vs 9.743350
- Layer 12, Token 14 (recurrent cache comparison):
- Original tensor sum: 235.148682
- Converted tensor sum: 279.831421
- Original tensor mean: 0.073484
- Converted tensor mean: 0.087447
- Mean difference: 0.14844361
- Maximum pointwise difference: 3.64688230
- Max difference location: (0, 28, 4, 2)
- Values at max diff - Original: 0.01143306, Converted: 3.65831542
- Biggest difference in row (0, 28, 2), sum 9.410328 vs 0.407452
- Layer 13, Token 14 (recurrent cache comparison):
- Original tensor sum: 165.774078
- Converted tensor sum: 179.691483
- Original tensor mean: 0.051804
- Converted tensor mean: 0.056154
- Mean difference: 0.09881324
- Maximum pointwise difference: 3.03563190
- Max difference location: (0, 11, 0, 4)
- Values at max diff - Original: -0.10383722, Converted: 2.93179464
- Biggest difference in row (0, 11, 0), sum 0.092786 vs 5.614193
- Layer 14, Token 14 (recurrent cache comparison):
- Original tensor sum: 519.787109
- Converted tensor sum: 539.567444
- Original tensor mean: 0.162433
- Converted tensor mean: 0.168615
- Mean difference: 0.25360039
- Maximum pointwise difference: 4.24835634
- Max difference location: (0, 15, 8, 2)
- Values at max diff - Original: -0.01945496, Converted: 4.22890139
- Biggest difference in row (0, 16, 6), sum 0.069426 vs 10.617959
- Layer 0, Token 15 (recurrent cache comparison):
- Original tensor sum: 1.143128
- Converted tensor sum: 1.955431
- Original tensor mean: 0.000357
- Converted tensor mean: 0.000611
- Mean difference: 0.06554744
- Maximum pointwise difference: 1.62353444
- Max difference location: (0, 1, 3, 2)
- Values at max diff - Original: -0.04374466, Converted: 1.57978976
- Biggest difference in row (0, 28, 5), sum 0.256206 vs 2.700654
- Layer 1, Token 15 (recurrent cache comparison):
- Original tensor sum: 237.576813
- Converted tensor sum: 84.227829
- Original tensor mean: 0.074243
- Converted tensor mean: 0.026321
- Mean difference: 0.12017149
- Maximum pointwise difference: 2.73136139
- Max difference location: (0, 17, 6, 0)
- Values at max diff - Original: 2.89759755, Converted: 0.16623622
- Biggest difference in row (0, 16, 6), sum 6.557743 vs 0.035282
- Layer 2, Token 15 (recurrent cache comparison):
- Original tensor sum: 311.619568
- Converted tensor sum: 155.972748
- Original tensor mean: 0.097381
- Converted tensor mean: 0.048741
- Mean difference: 0.17912415
- Maximum pointwise difference: 3.15524197
- Max difference location: (0, 12, 0, 0)
- Values at max diff - Original: 3.30613947, Converted: 0.15089758
- Biggest difference in row (0, 12, 0), sum 9.937962 vs 2.002455
- Layer 4, Token 15 (recurrent cache comparison):
- Original tensor sum: 167.504608
- Converted tensor sum: 97.213791
- Original tensor mean: 0.052345
- Converted tensor mean: 0.030379
- Mean difference: 0.11675335
- Maximum pointwise difference: 2.34569263
- Max difference location: (0, 28, 2, 3)
- Values at max diff - Original: 2.36823630, Converted: 0.02254373
- Biggest difference in row (0, 24, 1), sum 4.970531 vs 0.552202
- Layer 5, Token 15 (recurrent cache comparison):
- Original tensor sum: 165.098206
- Converted tensor sum: 106.835938
- Original tensor mean: 0.051593
- Converted tensor mean: 0.033386
- Mean difference: 0.11981978
- Maximum pointwise difference: 3.00254560
- Max difference location: (0, 19, 0, 4)
- Values at max diff - Original: -0.04975805, Converted: 2.95278764
- Biggest difference in row (0, 6, 7), sum 6.529483 vs 0.743666
- Layer 6, Token 15 (recurrent cache comparison):
- Original tensor sum: 328.690277
- Converted tensor sum: 322.121643
- Original tensor mean: 0.102716
- Converted tensor mean: 0.100663
- Mean difference: 0.17106648
- Maximum pointwise difference: 3.49930573
- Max difference location: (0, 10, 4, 0)
- Values at max diff - Original: -0.02206346, Converted: 3.47724223
- Biggest difference in row (0, 10, 4), sum 0.599032 vs 10.579692
- Layer 8, Token 15 (recurrent cache comparison):
- Original tensor sum: 317.911224
- Converted tensor sum: 119.034622
- Original tensor mean: 0.099347
- Converted tensor mean: 0.037198
- Mean difference: 0.17545381
- Maximum pointwise difference: 5.58166885
- Max difference location: (0, 12, 5, 9)
- Values at max diff - Original: -0.00163084, Converted: 5.58003807
- Biggest difference in row (0, 12, 5), sum -0.115539 vs 9.864284
- Layer 9, Token 15 (recurrent cache comparison):
- Original tensor sum: 190.170853
- Converted tensor sum: 99.272003
- Original tensor mean: 0.059428
- Converted tensor mean: 0.031023
- Mean difference: 0.10875368
- Maximum pointwise difference: 2.41038036
- Max difference location: (0, 18, 2, 3)
- Values at max diff - Original: 2.75146770, Converted: 0.34108725
- Biggest difference in row (0, 18, 2), sum 8.039729 vs 1.441757
- Layer 10, Token 15 (recurrent cache comparison):
- Original tensor sum: 224.608826
- Converted tensor sum: 140.291000
- Original tensor mean: 0.070190
- Converted tensor mean: 0.043841
- Mean difference: 0.13173704
- Maximum pointwise difference: 3.69921541
- Max difference location: (0, 0, 7, 3)
- Values at max diff - Original: -0.00459916, Converted: 3.69461632
- Biggest difference in row (0, 18, 3), sum 0.045224 vs 5.212623
- Layer 12, Token 15 (recurrent cache comparison):
- Original tensor sum: 284.485657
- Converted tensor sum: 158.051971
- Original tensor mean: 0.088902
- Converted tensor mean: 0.049391
- Mean difference: 0.16240636
- Maximum pointwise difference: 3.41311693
- Max difference location: (0, 30, 4, 0)
- Values at max diff - Original: -0.00378206, Converted: 3.40933490
- Biggest difference in row (0, 28, 3), sum 10.288229 vs 0.646799
- Layer 13, Token 15 (recurrent cache comparison):
- Original tensor sum: 217.891571
- Converted tensor sum: 114.440430
- Original tensor mean: 0.068091
- Converted tensor mean: 0.035763
- Mean difference: 0.11250080
- Maximum pointwise difference: 2.57714581
- Max difference location: (0, 3, 9, 4)
- Values at max diff - Original: -0.00322327, Converted: 2.57392263
- Biggest difference in row (0, 8, 1), sum 5.657505 vs 0.025426
- Layer 14, Token 15 (recurrent cache comparison):
- Original tensor sum: 613.393188
- Converted tensor sum: 259.209320
- Original tensor mean: 0.191685
- Converted tensor mean: 0.081003
- Mean difference: 0.25669345
- Maximum pointwise difference: 4.67302513
- Max difference location: (0, 16, 6, 7)
- Values at max diff - Original: 0.00394140, Converted: 4.67696667
- Biggest difference in row (0, 16, 6), sum 0.113854 vs 11.389561
- Layer 0, Token 16 (recurrent cache comparison):
- Original tensor sum: -7.241831
- Converted tensor sum: 6.292229
- Original tensor mean: -0.002263
- Converted tensor mean: 0.001966
- Mean difference: 0.07260455
- Maximum pointwise difference: 1.56294525
- Max difference location: (0, 4, 9, 1)
- Values at max diff - Original: 1.55768764, Converted: -0.00525762
- Biggest difference in row (0, 4, 9), sum 3.422554 vs -0.082252
- Layer 1, Token 16 (recurrent cache comparison):
- Original tensor sum: 208.371277
- Converted tensor sum: 121.900169
- Original tensor mean: 0.065116
- Converted tensor mean: 0.038094
- Mean difference: 0.10988435
- Maximum pointwise difference: 2.54077005
- Max difference location: (0, 16, 6, 1)
- Values at max diff - Original: 2.44506192, Converted: -0.09570823
- Biggest difference in row (0, 16, 6), sum 5.495286 vs 0.369152
- Layer 2, Token 16 (recurrent cache comparison):
- Original tensor sum: 271.274109
- Converted tensor sum: 250.062592
- Original tensor mean: 0.084773
- Converted tensor mean: 0.078145
- Mean difference: 0.18668148
- Maximum pointwise difference: 3.97749329
- Max difference location: (0, 4, 8, 2)
- Values at max diff - Original: 0.00367373, Converted: 3.98116708
- Biggest difference in row (0, 4, 8), sum 0.084576 vs 8.366636
- Layer 4, Token 16 (recurrent cache comparison):
- Original tensor sum: 245.506393
- Converted tensor sum: 128.042282
- Original tensor mean: 0.076721
- Converted tensor mean: 0.040013
- Mean difference: 0.13813969
- Maximum pointwise difference: 2.50754499
- Max difference location: (0, 27, 2, 5)
- Values at max diff - Original: 2.48510361, Converted: -0.02244142
- Biggest difference in row (0, 30, 3), sum 6.143555 vs -0.003137
- Layer 5, Token 16 (recurrent cache comparison):
- Original tensor sum: 252.541031
- Converted tensor sum: 153.491074
- Original tensor mean: 0.078919
- Converted tensor mean: 0.047966
- Mean difference: 0.13783714
- Maximum pointwise difference: 4.67899084
- Max difference location: (0, 6, 2, 9)
- Values at max diff - Original: 4.74959278, Converted: 0.07060210
- Biggest difference in row (0, 6, 2), sum 15.435174 vs 0.669571
- Layer 6, Token 16 (recurrent cache comparison):
- Original tensor sum: 417.031616
- Converted tensor sum: 302.490662
- Original tensor mean: 0.130322
- Converted tensor mean: 0.094528
- Mean difference: 0.18095936
- Maximum pointwise difference: 3.41091108
- Max difference location: (0, 1, 9, 8)
- Values at max diff - Original: 3.94837856, Converted: 0.53746736
- Biggest difference in row (0, 17, 7), sum 10.598001 vs 1.553886
- Layer 8, Token 16 (recurrent cache comparison):
- Original tensor sum: 360.497803
- Converted tensor sum: 167.798264
- Original tensor mean: 0.112656
- Converted tensor mean: 0.052437
- Mean difference: 0.18179806
- Maximum pointwise difference: 4.85258770
- Max difference location: (0, 20, 6, 7)
- Values at max diff - Original: 3.78496194, Converted: -1.06762552
- Biggest difference in row (0, 20, 6), sum 7.293591 vs -2.448533
- Layer 9, Token 16 (recurrent cache comparison):
- Original tensor sum: 231.574097
- Converted tensor sum: 117.788071
- Original tensor mean: 0.072367
- Converted tensor mean: 0.036809
- Mean difference: 0.12296900
- Maximum pointwise difference: 1.94617844
- Max difference location: (0, 18, 2, 3)
- Values at max diff - Original: 2.51620770, Converted: 0.57002932
- Biggest difference in row (0, 18, 2), sum 7.408888 vs 2.509162
- Layer 10, Token 16 (recurrent cache comparison):
- Original tensor sum: 251.412247
- Converted tensor sum: 167.548752
- Original tensor mean: 0.078566
- Converted tensor mean: 0.052359
- Mean difference: 0.13002089
- Maximum pointwise difference: 2.56599689
- Max difference location: (0, 24, 1, 0)
- Values at max diff - Original: 4.14129448, Converted: 1.57529759
- Biggest difference in row (0, 14, 2), sum 5.702995 vs 0.022515
- Layer 12, Token 16 (recurrent cache comparison):
- Original tensor sum: 309.263367
- Converted tensor sum: 172.743027
- Original tensor mean: 0.096645
- Converted tensor mean: 0.053982
- Mean difference: 0.16015999
- Maximum pointwise difference: 4.55992699
- Max difference location: (0, 28, 3, 4)
- Values at max diff - Original: 3.40088701, Converted: -1.15903974
- Biggest difference in row (0, 28, 3), sum 10.782799 vs -1.738761
- Layer 13, Token 16 (recurrent cache comparison):
- Original tensor sum: 245.305267
- Converted tensor sum: 135.343552
- Original tensor mean: 0.076658
- Converted tensor mean: 0.042295
- Mean difference: 0.11650297
- Maximum pointwise difference: 2.94789600
- Max difference location: (0, 11, 4, 0)
- Values at max diff - Original: 3.34942126, Converted: 0.40152529
- Biggest difference in row (0, 27, 4), sum 6.619488 vs 0.377767
- Layer 14, Token 16 (recurrent cache comparison):
- Original tensor sum: 677.616821
- Converted tensor sum: 309.657593
- Original tensor mean: 0.211755
- Converted tensor mean: 0.096768
- Mean difference: 0.25261062
- Maximum pointwise difference: 4.12457132
- Max difference location: (0, 21, 3, 5)
- Values at max diff - Original: 4.07018948, Converted: -0.05438172
- Biggest difference in row (0, 21, 3), sum 12.550769 vs -0.320660
- Layer 0, Token 17 (recurrent cache comparison):
- Original tensor sum: 8.814422
- Converted tensor sum: 2.569008
- Original tensor mean: 0.002755
- Converted tensor mean: 0.000803
- Mean difference: 0.07054429
- Maximum pointwise difference: 2.09221244
- Max difference location: (0, 1, 2, 3)
- Values at max diff - Original: 2.03968024, Converted: -0.05253213
- Biggest difference in row (0, 17, 2), sum 2.854507 vs 0.425217
- Layer 1, Token 17 (recurrent cache comparison):
- Original tensor sum: 202.785217
- Converted tensor sum: 127.821655
- Original tensor mean: 0.063370
- Converted tensor mean: 0.039944
- Mean difference: 0.11817915
- Maximum pointwise difference: 2.18196273
- Max difference location: (0, 23, 4, 0)
- Values at max diff - Original: 0.00466894, Converted: 2.18663168
- Biggest difference in row (0, 23, 4), sum 1.189118 vs 6.664180
- Layer 2, Token 17 (recurrent cache comparison):
- Original tensor sum: 269.547241
- Converted tensor sum: 202.949875
- Original tensor mean: 0.084234
- Converted tensor mean: 0.063422
- Mean difference: 0.17686243
- Maximum pointwise difference: 3.38580871
- Max difference location: (0, 30, 3, 9)
- Values at max diff - Original: -0.03989490, Converted: 3.34591389
- Biggest difference in row (0, 23, 4), sum 0.959554 vs 6.602069
- Layer 4, Token 17 (recurrent cache comparison):
- Original tensor sum: 285.057709
- Converted tensor sum: 90.890617
- Original tensor mean: 0.089081
- Converted tensor mean: 0.028403
- Mean difference: 0.14633463
- Maximum pointwise difference: 3.59569287
- Max difference location: (0, 19, 2, 9)
- Values at max diff - Original: 0.11129396, Converted: 3.70698690
- Biggest difference in row (0, 24, 1), sum 6.665072 vs 0.069785
- Layer 5, Token 17 (recurrent cache comparison):
- Original tensor sum: 305.935303
- Converted tensor sum: 101.421249
- Original tensor mean: 0.095605
- Converted tensor mean: 0.031694
- Mean difference: 0.15904053
- Maximum pointwise difference: 2.52599096
- Max difference location: (0, 6, 2, 9)
- Values at max diff - Original: 2.51262259, Converted: -0.01336834
- Biggest difference in row (0, 6, 2), sum 10.206850 vs 1.778209
- Layer 6, Token 17 (recurrent cache comparison):
- Original tensor sum: 411.833740
- Converted tensor sum: 250.492935
- Original tensor mean: 0.128698
- Converted tensor mean: 0.078279
- Mean difference: 0.18581259
- Maximum pointwise difference: 4.02491474
- Max difference location: (0, 17, 7, 5)
- Values at max diff - Original: 4.38884020, Converted: 0.36392546
- Biggest difference in row (0, 17, 7), sum 11.349621 vs 0.846145
- Layer 8, Token 17 (recurrent cache comparison):
- Original tensor sum: 373.165680
- Converted tensor sum: 136.027786
- Original tensor mean: 0.116614
- Converted tensor mean: 0.042509
- Mean difference: 0.18740444
- Maximum pointwise difference: 4.54259586
- Max difference location: (0, 20, 0, 7)
- Values at max diff - Original: 4.54873943, Converted: 0.00614343
- Biggest difference in row (0, 7, 2), sum 8.000880 vs 0.043800
- Layer 9, Token 17 (recurrent cache comparison):
- Original tensor sum: 239.737335
- Converted tensor sum: 91.044197
- Original tensor mean: 0.074918
- Converted tensor mean: 0.028451
- Mean difference: 0.11736859
- Maximum pointwise difference: 1.98427892
- Max difference location: (0, 14, 2, 1)
- Values at max diff - Original: 1.90727878, Converted: -0.07700008
- Biggest difference in row (0, 28, 7), sum 5.596577 vs -0.058259
- Layer 10, Token 17 (recurrent cache comparison):
- Original tensor sum: 260.470673
- Converted tensor sum: 162.895706
- Original tensor mean: 0.081397
- Converted tensor mean: 0.050905
- Mean difference: 0.14167482
- Maximum pointwise difference: 3.23060656
- Max difference location: (0, 24, 1, 0)
- Values at max diff - Original: 4.23022413, Converted: 0.99961770
- Biggest difference in row (0, 24, 0), sum 0.125982 vs 9.195232
- Layer 12, Token 17 (recurrent cache comparison):
- Original tensor sum: 321.268158
- Converted tensor sum: 134.452438
- Original tensor mean: 0.100396
- Converted tensor mean: 0.042016
- Mean difference: 0.17344666
- Maximum pointwise difference: 4.15682602
- Max difference location: (0, 14, 8, 1)
- Values at max diff - Original: 4.38615370, Converted: 0.22932746
- Biggest difference in row (0, 28, 3), sum 11.304427 vs 0.427086
- Layer 13, Token 17 (recurrent cache comparison):
- Original tensor sum: 255.942596
- Converted tensor sum: 107.501419
- Original tensor mean: 0.079982
- Converted tensor mean: 0.033594
- Mean difference: 0.11964211
- Maximum pointwise difference: 2.72310257
- Max difference location: (0, 11, 4, 0)
- Values at max diff - Original: 3.71963763, Converted: 0.99653512
- Biggest difference in row (0, 27, 4), sum 6.949797 vs 0.279431
- Layer 14, Token 17 (recurrent cache comparison):
- Original tensor sum: 718.971008
- Converted tensor sum: 252.775909
- Original tensor mean: 0.224678
- Converted tensor mean: 0.078992
- Mean difference: 0.28457019
- Maximum pointwise difference: 4.54859781
- Max difference location: (0, 5, 8, 9)
- Values at max diff - Original: -0.00566958, Converted: 4.54292822
- Biggest difference in row (0, 6, 1), sum 11.820190 vs 0.083275
- Layer 0, Token 18 (recurrent cache comparison):
- Original tensor sum: -2.745796
- Converted tensor sum: 11.596529
- Original tensor mean: -0.000858
- Converted tensor mean: 0.003624
- Mean difference: 0.06698289
- Maximum pointwise difference: 1.30398095
- Max difference location: (0, 1, 2, 3)
- Values at max diff - Original: 1.23424304, Converted: -0.06973789
- Biggest difference in row (0, 25, 7), sum -1.491066 vs 0.107394
- Layer 1, Token 18 (recurrent cache comparison):
- Original tensor sum: 196.680084
- Converted tensor sum: 112.820984
- Original tensor mean: 0.061463
- Converted tensor mean: 0.035257
- Mean difference: 0.11424790
- Maximum pointwise difference: 1.90677047
- Max difference location: (0, 14, 2, 3)
- Values at max diff - Original: 0.10773923, Converted: 2.01450968
- Biggest difference in row (0, 24, 3), sum 5.388914 vs 0.084538
- Layer 2, Token 18 (recurrent cache comparison):
- Original tensor sum: 269.808228
- Converted tensor sum: 106.268402
- Original tensor mean: 0.084315
- Converted tensor mean: 0.033209
- Mean difference: 0.16576965
- Maximum pointwise difference: 2.41004586
- Max difference location: (0, 12, 0, 0)
- Values at max diff - Original: 2.62151933, Converted: 0.21147355
- Biggest difference in row (0, 12, 0), sum 7.396654 vs 0.148190
- Layer 4, Token 18 (recurrent cache comparison):
- Original tensor sum: 299.541138
- Converted tensor sum: 34.684372
- Original tensor mean: 0.093607
- Converted tensor mean: 0.010839
- Mean difference: 0.15344296
- Maximum pointwise difference: 4.97097397
- Max difference location: (0, 27, 8, 5)
- Values at max diff - Original: 4.93650246, Converted: -0.03447145
- Biggest difference in row (0, 27, 8), sum 10.168988 vs 0.095367
- Layer 5, Token 18 (recurrent cache comparison):
- Original tensor sum: 322.520721
- Converted tensor sum: 32.353989
- Original tensor mean: 0.100788
- Converted tensor mean: 0.010111
- Mean difference: 0.15857503
- Maximum pointwise difference: 3.27807403
- Max difference location: (0, 28, 9, 6)
- Values at max diff - Original: 3.39260817, Converted: 0.11453414
- Biggest difference in row (0, 31, 7), sum 9.317598 vs 0.060667
- Layer 6, Token 18 (recurrent cache comparison):
- Original tensor sum: 404.272705
- Converted tensor sum: 105.430817
- Original tensor mean: 0.126335
- Converted tensor mean: 0.032947
- Mean difference: 0.18362552
- Maximum pointwise difference: 4.36808205
- Max difference location: (0, 6, 5, 4)
- Values at max diff - Original: 4.63004971, Converted: 0.26196742
- Biggest difference in row (0, 30, 4), sum 12.429064 vs 1.549177
- Layer 8, Token 18 (recurrent cache comparison):
- Original tensor sum: 379.120117
- Converted tensor sum: 49.316475
- Original tensor mean: 0.118475
- Converted tensor mean: 0.015411
- Mean difference: 0.18690227
- Maximum pointwise difference: 4.34863997
- Max difference location: (0, 20, 0, 7)
- Values at max diff - Original: 4.50196075, Converted: 0.15332088
- Biggest difference in row (0, 7, 2), sum 8.701149 vs -1.880803
- Layer 9, Token 18 (recurrent cache comparison):
- Original tensor sum: 247.687454
- Converted tensor sum: 31.604210
- Original tensor mean: 0.077402
- Converted tensor mean: 0.009876
- Mean difference: 0.12334745
- Maximum pointwise difference: 2.89748645
- Max difference location: (0, 14, 2, 1)
- Values at max diff - Original: 2.54342103, Converted: -0.35406536
- Biggest difference in row (0, 9, 8), sum 5.984664 vs -0.341670
- Layer 10, Token 18 (recurrent cache comparison):
- Original tensor sum: 262.752014
- Converted tensor sum: 52.628201
- Original tensor mean: 0.082110
- Converted tensor mean: 0.016446
- Mean difference: 0.13161205
- Maximum pointwise difference: 2.92723370
- Max difference location: (0, 24, 1, 0)
- Values at max diff - Original: 4.35996389, Converted: 1.43273032
- Biggest difference in row (0, 11, 6), sum 6.418620 vs 0.589213
- Layer 12, Token 18 (recurrent cache comparison):
- Original tensor sum: 326.667419
- Converted tensor sum: 31.792521
- Original tensor mean: 0.102084
- Converted tensor mean: 0.009935
- Mean difference: 0.17550385
- Maximum pointwise difference: 4.50774860
- Max difference location: (0, 14, 8, 1)
- Values at max diff - Original: 4.50715399, Converted: -0.00059444
- Biggest difference in row (0, 21, 9), sum 0.167931 vs -15.009873
- Layer 13, Token 18 (recurrent cache comparison):
- Original tensor sum: 261.870972
- Converted tensor sum: 53.651596
- Original tensor mean: 0.081835
- Converted tensor mean: 0.016766
- Mean difference: 0.12234001
- Maximum pointwise difference: 4.01087809
- Max difference location: (0, 11, 4, 0)
- Values at max diff - Original: 4.08570528, Converted: 0.07482710
- Biggest difference in row (0, 20, 0), sum 2.293484 vs -4.637159
- Layer 14, Token 18 (recurrent cache comparison):
- Original tensor sum: 740.518921
- Converted tensor sum: 84.074921
- Original tensor mean: 0.231412
- Converted tensor mean: 0.026273
- Mean difference: 0.28319737
- Maximum pointwise difference: 4.63366222
- Max difference location: (0, 1, 4, 6)
- Values at max diff - Original: -0.00748948, Converted: 4.62617254
- Biggest difference in row (0, 6, 1), sum 12.889781 vs -0.226667
- Layer 0, Token 19 (recurrent cache comparison):
- Original tensor sum: 0.866719
- Converted tensor sum: 13.915674
- Original tensor mean: 0.000271
- Converted tensor mean: 0.004349
- Mean difference: 0.05563419
- Maximum pointwise difference: 1.58602941
- Max difference location: (0, 1, 5, 3)
- Values at max diff - Original: 1.50699055, Converted: -0.07903884
- Biggest difference in row (0, 28, 5), sum 0.233465 vs 1.374955
- Layer 1, Token 19 (recurrent cache comparison):
- Original tensor sum: 143.055450
- Converted tensor sum: 84.285873
- Original tensor mean: 0.044705
- Converted tensor mean: 0.026339
- Mean difference: 0.11436888
- Maximum pointwise difference: 2.11188436
- Max difference location: (0, 15, 8, 5)
- Values at max diff - Original: -0.06675819, Converted: 2.04512620
- Biggest difference in row (0, 23, 4), sum 0.445206 vs 4.313503
- Layer 2, Token 19 (recurrent cache comparison):
- Original tensor sum: 206.674835
- Converted tensor sum: 69.739983
- Original tensor mean: 0.064586
- Converted tensor mean: 0.021794
- Mean difference: 0.14624587
- Maximum pointwise difference: 2.46052098
- Max difference location: (0, 5, 4, 5)
- Values at max diff - Original: 2.46177387, Converted: 0.00125289
- Biggest difference in row (0, 23, 9), sum 5.872013 vs -0.147400
- Layer 4, Token 19 (recurrent cache comparison):
- Original tensor sum: 223.180557
- Converted tensor sum: 57.034431
- Original tensor mean: 0.069744
- Converted tensor mean: 0.017823
- Mean difference: 0.13700224
- Maximum pointwise difference: 4.09037542
- Max difference location: (0, 25, 1, 9)
- Values at max diff - Original: 3.97389102, Converted: -0.11648450
- Biggest difference in row (0, 24, 1), sum 6.574383 vs 0.271665
- Layer 5, Token 19 (recurrent cache comparison):
- Original tensor sum: 315.655853
- Converted tensor sum: 48.647461
- Original tensor mean: 0.098642
- Converted tensor mean: 0.015202
- Mean difference: 0.15315701
- Maximum pointwise difference: 6.10414743
- Max difference location: (0, 28, 9, 6)
- Values at max diff - Original: 6.29615974, Converted: 0.19201221
- Biggest difference in row (0, 28, 9), sum 11.702868 vs 0.168917
- Layer 6, Token 19 (recurrent cache comparison):
- Original tensor sum: 358.473572
- Converted tensor sum: 101.158226
- Original tensor mean: 0.112023
- Converted tensor mean: 0.031612
- Mean difference: 0.16535039
- Maximum pointwise difference: 3.82374835
- Max difference location: (0, 18, 1, 7)
- Values at max diff - Original: 3.88149524, Converted: 0.05774695
- Biggest difference in row (0, 20, 9), sum 9.851446 vs -0.224849
- Layer 8, Token 19 (recurrent cache comparison):
- Original tensor sum: 346.821899
- Converted tensor sum: 80.751968
- Original tensor mean: 0.108382
- Converted tensor mean: 0.025235
- Mean difference: 0.17743167
- Maximum pointwise difference: 3.78403044
- Max difference location: (0, 20, 0, 7)
- Values at max diff - Original: 3.89911222, Converted: 0.11508182
- Biggest difference in row (0, 7, 2), sum 9.025558 vs -0.167117
- Layer 9, Token 19 (recurrent cache comparison):
- Original tensor sum: 249.268311
- Converted tensor sum: 43.202286
- Original tensor mean: 0.077896
- Converted tensor mean: 0.013501
- Mean difference: 0.12318792
- Maximum pointwise difference: 2.83834696
- Max difference location: (0, 9, 8, 6)
- Values at max diff - Original: 2.71989083, Converted: -0.11845621
- Biggest difference in row (0, 9, 8), sum 8.573050 vs -0.169431
- Layer 10, Token 19 (recurrent cache comparison):
- Original tensor sum: 291.462646
- Converted tensor sum: 66.798782
- Original tensor mean: 0.091082
- Converted tensor mean: 0.020875
- Mean difference: 0.14087133
- Maximum pointwise difference: 3.38042760
- Max difference location: (0, 25, 1, 9)
- Values at max diff - Original: 3.51948309, Converted: 0.13905543
- Biggest difference in row (0, 25, 1), sum 8.942734 vs 0.065733
- Layer 12, Token 19 (recurrent cache comparison):
- Original tensor sum: 342.570038
- Converted tensor sum: 48.484200
- Original tensor mean: 0.107053
- Converted tensor mean: 0.015151
- Mean difference: 0.17410682
- Maximum pointwise difference: 4.36208725
- Max difference location: (0, 14, 8, 1)
- Values at max diff - Original: 4.34435558, Converted: -0.01773176
- Biggest difference in row (0, 28, 3), sum 11.628893 vs -0.798577
- Layer 13, Token 19 (recurrent cache comparison):
- Original tensor sum: 270.129211
- Converted tensor sum: 52.121815
- Original tensor mean: 0.084415
- Converted tensor mean: 0.016288
- Mean difference: 0.12223634
- Maximum pointwise difference: 3.81266069
- Max difference location: (0, 11, 4, 0)
- Values at max diff - Original: 4.31173086, Converted: 0.49907014
- Biggest difference in row (0, 27, 4), sum 5.975472 vs -0.026263
- Layer 14, Token 19 (recurrent cache comparison):
- Original tensor sum: 772.850342
- Converted tensor sum: 107.083702
- Original tensor mean: 0.241516
- Converted tensor mean: 0.033464
- Mean difference: 0.28851181
- Maximum pointwise difference: 4.31482410
- Max difference location: (0, 28, 4, 1)
- Values at max diff - Original: 4.32322884, Converted: 0.00840468
- Biggest difference in row (0, 14, 2), sum 14.072536 vs 0.377507
- Layer 0, Token 20 (recurrent cache comparison):
- Original tensor sum: 2.008890
- Converted tensor sum: 12.614055
- Original tensor mean: 0.000628
- Converted tensor mean: 0.003942
- Mean difference: 0.05576663
- Maximum pointwise difference: 1.29991353
- Max difference location: (0, 1, 5, 3)
- Values at max diff - Original: 1.36800277, Converted: 0.06808926
- Biggest difference in row (0, 23, 6), sum 0.260241 vs -1.370477
- Layer 1, Token 20 (recurrent cache comparison):
- Original tensor sum: 58.587276
- Converted tensor sum: 76.507767
- Original tensor mean: 0.018309
- Converted tensor mean: 0.023909
- Mean difference: 0.10026859
- Maximum pointwise difference: 2.19443369
- Max difference location: (0, 14, 2, 2)
- Values at max diff - Original: -0.11835258, Converted: 2.07608104
- Biggest difference in row (0, 14, 2), sum -0.449485 vs 3.433519
- Layer 2, Token 20 (recurrent cache comparison):
- Original tensor sum: 165.744568
- Converted tensor sum: 64.695602
- Original tensor mean: 0.051795
- Converted tensor mean: 0.020217
- Mean difference: 0.14529096
- Maximum pointwise difference: 2.04155922
- Max difference location: (0, 5, 4, 5)
- Values at max diff - Original: 2.00637627, Converted: -0.03518293
- Biggest difference in row (0, 5, 4), sum 4.793974 vs -0.065828
- Layer 4, Token 20 (recurrent cache comparison):
- Original tensor sum: 212.915298
- Converted tensor sum: 76.568939
- Original tensor mean: 0.066536
- Converted tensor mean: 0.023928
- Mean difference: 0.12807344
- Maximum pointwise difference: 3.65112019
- Max difference location: (0, 25, 1, 9)
- Values at max diff - Original: 3.82295465, Converted: 0.17183457
- Biggest difference in row (0, 20, 4), sum 0.743454 vs 6.637871
- Layer 5, Token 20 (recurrent cache comparison):
- Original tensor sum: 258.077209
- Converted tensor sum: 49.652397
- Original tensor mean: 0.080649
- Converted tensor mean: 0.015516
- Mean difference: 0.13090378
- Maximum pointwise difference: 2.72355151
- Max difference location: (0, 28, 9, 6)
- Values at max diff - Original: 2.71506453, Converted: -0.00848696
- Biggest difference in row (0, 28, 9), sum 6.250334 vs 0.160866
- Layer 6, Token 20 (recurrent cache comparison):
- Original tensor sum: 336.431519
- Converted tensor sum: 141.819733
- Original tensor mean: 0.105135
- Converted tensor mean: 0.044319
- Mean difference: 0.16430938
- Maximum pointwise difference: 3.65949225
- Max difference location: (0, 6, 5, 4)
- Values at max diff - Original: 3.87317371, Converted: 0.21368141
- Biggest difference in row (0, 12, 1), sum 12.053196 vs 2.254734
- Layer 8, Token 20 (recurrent cache comparison):
- Original tensor sum: 345.424561
- Converted tensor sum: 112.814018
- Original tensor mean: 0.107945
- Converted tensor mean: 0.035254
- Mean difference: 0.17631440
- Maximum pointwise difference: 3.36074710
- Max difference location: (0, 20, 0, 7)
- Values at max diff - Original: 3.50376892, Converted: 0.14302187
- Biggest difference in row (0, 21, 0), sum 8.499396 vs 0.480686
- Layer 9, Token 20 (recurrent cache comparison):
- Original tensor sum: 261.041870
- Converted tensor sum: 41.182373
- Original tensor mean: 0.081576
- Converted tensor mean: 0.012869
- Mean difference: 0.12376894
- Maximum pointwise difference: 2.65249300
- Max difference location: (0, 18, 2, 3)
- Values at max diff - Original: 2.77233696, Converted: 0.11984408
- Biggest difference in row (0, 9, 8), sum 7.305106 vs -0.383589
- Layer 10, Token 20 (recurrent cache comparison):
- Original tensor sum: 276.296692
- Converted tensor sum: 75.410934
- Original tensor mean: 0.086343
- Converted tensor mean: 0.023566
- Mean difference: 0.12437831
- Maximum pointwise difference: 2.84117389
- Max difference location: (0, 25, 1, 9)
- Values at max diff - Original: 3.23575449, Converted: 0.39458057
- Biggest difference in row (0, 25, 1), sum 8.240932 vs 1.044036
- Layer 12, Token 20 (recurrent cache comparison):
- Original tensor sum: 345.097260
- Converted tensor sum: 53.731094
- Original tensor mean: 0.107843
- Converted tensor mean: 0.016791
- Mean difference: 0.17168441
- Maximum pointwise difference: 4.60863352
- Max difference location: (0, 14, 8, 1)
- Values at max diff - Original: 4.63144016, Converted: 0.02280665
- Biggest difference in row (0, 28, 3), sum 11.591027 vs 0.333645
- Layer 13, Token 20 (recurrent cache comparison):
- Original tensor sum: 253.047394
- Converted tensor sum: 40.628811
- Original tensor mean: 0.079077
- Converted tensor mean: 0.012697
- Mean difference: 0.11498150
- Maximum pointwise difference: 4.22373772
- Max difference location: (0, 11, 4, 0)
- Values at max diff - Original: 4.22381639, Converted: 0.00007845
- Biggest difference in row (0, 27, 4), sum 6.304989 vs 0.143700
- Layer 14, Token 20 (recurrent cache comparison):
- Original tensor sum: 769.098083
- Converted tensor sum: 130.283981
- Original tensor mean: 0.240343
- Converted tensor mean: 0.040714
- Mean difference: 0.28381503
- Maximum pointwise difference: 4.93393469
- Max difference location: (0, 28, 4, 1)
- Values at max diff - Original: 4.91371727, Converted: -0.02021729
- Biggest difference in row (0, 6, 1), sum 14.151162 vs 0.315893
- Layer 0, Token 21 (recurrent cache comparison):
- Original tensor sum: 1.077594
- Converted tensor sum: 15.438447
- Original tensor mean: 0.000337
- Converted tensor mean: 0.004825
- Mean difference: 0.05193665
- Maximum pointwise difference: 0.74260694
- Max difference location: (0, 28, 8, 5)
- Values at max diff - Original: 0.72446448, Converted: -0.01814246
- Biggest difference in row (0, 23, 8), sum -0.100890 vs -1.090759
- Layer 1, Token 21 (recurrent cache comparison):
- Original tensor sum: 28.594997
- Converted tensor sum: 89.290833
- Original tensor mean: 0.008936
- Converted tensor mean: 0.027903
- Mean difference: 0.10794319
- Maximum pointwise difference: 1.59959590
- Max difference location: (0, 20, 2, 0)
- Values at max diff - Original: 0.00296844, Converted: 1.60256433
- Biggest difference in row (0, 20, 2), sum 0.038832 vs 3.326198
- Layer 2, Token 21 (recurrent cache comparison):
- Original tensor sum: 146.744446
- Converted tensor sum: 85.128494
- Original tensor mean: 0.045858
- Converted tensor mean: 0.026603
- Mean difference: 0.15625563
- Maximum pointwise difference: 3.40082598
- Max difference location: (0, 4, 2, 4)
- Values at max diff - Original: 3.41796732, Converted: 0.01714140
- Biggest difference in row (0, 4, 2), sum 7.012363 vs 0.098989
- Layer 4, Token 21 (recurrent cache comparison):
- Original tensor sum: 120.798615
- Converted tensor sum: 143.282379
- Original tensor mean: 0.037750
- Converted tensor mean: 0.044776
- Mean difference: 0.13425863
- Maximum pointwise difference: 2.73616052
- Max difference location: (0, 24, 9, 1)
- Values at max diff - Original: 0.23530871, Converted: 2.97146916
- Biggest difference in row (0, 30, 3), sum 1.329738 vs 6.360154
- Layer 5, Token 21 (recurrent cache comparison):
- Original tensor sum: 222.583710
- Converted tensor sum: 86.326241
- Original tensor mean: 0.069557
- Converted tensor mean: 0.026977
- Mean difference: 0.13234577
- Maximum pointwise difference: 2.64859867
- Max difference location: (0, 28, 7, 6)
- Values at max diff - Original: 2.67573905, Converted: 0.02714031
- Biggest difference in row (0, 26, 8), sum 3.963463 vs -0.648591
- Layer 6, Token 21 (recurrent cache comparison):
- Original tensor sum: 317.078064
- Converted tensor sum: 162.595886
- Original tensor mean: 0.099087
- Converted tensor mean: 0.050811
- Mean difference: 0.15550284
- Maximum pointwise difference: 3.79531074
- Max difference location: (0, 17, 7, 5)
- Values at max diff - Original: 3.63465667, Converted: -0.16065404
- Biggest difference in row (0, 20, 9), sum 8.606161 vs 0.369012
- Layer 8, Token 21 (recurrent cache comparison):
- Original tensor sum: 345.257385
- Converted tensor sum: 184.546997
- Original tensor mean: 0.107893
- Converted tensor mean: 0.057671
- Mean difference: 0.18574484
- Maximum pointwise difference: 3.21210074
- Max difference location: (0, 7, 2, 9)
- Values at max diff - Original: 3.22117043, Converted: 0.00906963
- Biggest difference in row (0, 21, 0), sum 7.649475 vs -0.057539
- Layer 9, Token 21 (recurrent cache comparison):
- Original tensor sum: 268.515228
- Converted tensor sum: 95.449539
- Original tensor mean: 0.083911
- Converted tensor mean: 0.029828
- Mean difference: 0.13116649
- Maximum pointwise difference: 3.19655538
- Max difference location: (0, 18, 2, 3)
- Values at max diff - Original: 3.71445417, Converted: 0.51789874
- Biggest difference in row (0, 9, 5), sum 9.129113 vs 0.079633
- Layer 10, Token 21 (recurrent cache comparison):
- Original tensor sum: 259.887024
- Converted tensor sum: 104.823151
- Original tensor mean: 0.081215
- Converted tensor mean: 0.032757
- Mean difference: 0.12396878
- Maximum pointwise difference: 3.03640962
- Max difference location: (0, 25, 1, 9)
- Values at max diff - Original: 3.24910450, Converted: 0.21269491
- Biggest difference in row (0, 25, 1), sum 8.045052 vs 0.162466
- Layer 12, Token 21 (recurrent cache comparison):
- Original tensor sum: 340.602814
- Converted tensor sum: 113.082108
- Original tensor mean: 0.106438
- Converted tensor mean: 0.035338
- Mean difference: 0.17276871
- Maximum pointwise difference: 4.99602270
- Max difference location: (0, 14, 8, 1)
- Values at max diff - Original: 4.72621298, Converted: -0.26980966
- Biggest difference in row (0, 28, 3), sum 11.259501 vs -0.695297
- Layer 13, Token 21 (recurrent cache comparison):
- Original tensor sum: 236.875137
- Converted tensor sum: 95.429146
- Original tensor mean: 0.074023
- Converted tensor mean: 0.029822
- Mean difference: 0.11990514
- Maximum pointwise difference: 3.69410872
- Max difference location: (0, 11, 4, 0)
- Values at max diff - Original: 4.10646772, Converted: 0.41235897
- Biggest difference in row (0, 23, 6), sum 5.276991 vs -0.187177
- Layer 14, Token 21 (recurrent cache comparison):
- Original tensor sum: 782.813049
- Converted tensor sum: 216.654602
- Original tensor mean: 0.244629
- Converted tensor mean: 0.067705
- Mean difference: 0.29570371
- Maximum pointwise difference: 5.94400930
- Max difference location: (0, 28, 4, 1)
- Values at max diff - Original: 5.97852135, Converted: 0.03451204
- Biggest difference in row (0, 6, 1), sum 14.360078 vs 0.533817
- Layer 0, Token 22 (recurrent cache comparison):
- Original tensor sum: 2.700914
- Converted tensor sum: 8.066211
- Original tensor mean: 0.000844
- Converted tensor mean: 0.002521
- Mean difference: 0.06021541
- Maximum pointwise difference: 1.02617574
- Max difference location: (0, 28, 9, 5)
- Values at max diff - Original: 1.17021942, Converted: 0.14404365
- Biggest difference in row (0, 4, 9), sum 1.758845 vs -0.049155
- Layer 1, Token 22 (recurrent cache comparison):
- Original tensor sum: 9.402251
- Converted tensor sum: 79.292084
- Original tensor mean: 0.002938
- Converted tensor mean: 0.024779
- Mean difference: 0.09312414
- Maximum pointwise difference: 1.86848283
- Max difference location: (0, 14, 2, 2)
- Values at max diff - Original: -0.01261259, Converted: 1.85587025
- Biggest difference in row (0, 1, 8), sum -0.144765 vs 2.729439
- Layer 2, Token 22 (recurrent cache comparison):
- Original tensor sum: 150.273865
- Converted tensor sum: 102.280075
- Original tensor mean: 0.046961
- Converted tensor mean: 0.031963
- Mean difference: 0.15655471
- Maximum pointwise difference: 2.95679903
- Max difference location: (0, 4, 8, 6)
- Values at max diff - Original: -0.15384272, Converted: 2.80295634
- Biggest difference in row (0, 10, 6), sum -0.449118 vs 3.435276
- Layer 4, Token 22 (recurrent cache comparison):
- Original tensor sum: 109.346573
- Converted tensor sum: 167.629913
- Original tensor mean: 0.034171
- Converted tensor mean: 0.052384
- Mean difference: 0.12662907
- Maximum pointwise difference: 2.69411635
- Max difference location: (0, 19, 2, 0)
- Values at max diff - Original: 0.00617844, Converted: 2.70029473
- Biggest difference in row (0, 19, 2), sum -0.222631 vs 5.908413
- Layer 5, Token 22 (recurrent cache comparison):
- Original tensor sum: 191.832321
- Converted tensor sum: 202.874756
- Original tensor mean: 0.059948
- Converted tensor mean: 0.063398
- Mean difference: 0.15467224
- Maximum pointwise difference: 6.38972092
- Max difference location: (0, 28, 6, 9)
- Values at max diff - Original: 0.04361831, Converted: 6.43333912
- Biggest difference in row (0, 28, 6), sum 0.738313 vs 17.286346
- Layer 6, Token 22 (recurrent cache comparison):
- Original tensor sum: 304.042816
- Converted tensor sum: 238.043579
- Original tensor mean: 0.095013
- Converted tensor mean: 0.074389
- Mean difference: 0.15846148
- Maximum pointwise difference: 3.40163994
- Max difference location: (0, 12, 2, 1)
- Values at max diff - Original: 1.29805720, Converted: 4.69969702
- Biggest difference in row (0, 17, 7), sum 7.862279 vs 0.254134
- Layer 8, Token 22 (recurrent cache comparison):
- Original tensor sum: 352.235718
- Converted tensor sum: 277.930298
- Original tensor mean: 0.110074
- Converted tensor mean: 0.086853
- Mean difference: 0.19249398
- Maximum pointwise difference: 3.61912727
- Max difference location: (0, 7, 2, 9)
- Values at max diff - Original: 3.80060625, Converted: 0.18147889
- Biggest difference in row (0, 21, 0), sum 9.710941 vs 0.797433
- Layer 9, Token 22 (recurrent cache comparison):
- Original tensor sum: 273.245667
- Converted tensor sum: 226.375031
- Original tensor mean: 0.085389
- Converted tensor mean: 0.070742
- Mean difference: 0.14207596
- Maximum pointwise difference: 2.82711124
- Max difference location: (0, 14, 1, 2)
- Values at max diff - Original: 0.05765805, Converted: 2.88476920
- Biggest difference in row (0, 9, 5), sum 9.348074 vs 1.880102
- Layer 10, Token 22 (recurrent cache comparison):
- Original tensor sum: 239.880463
- Converted tensor sum: 275.399414
- Original tensor mean: 0.074963
- Converted tensor mean: 0.086062
- Mean difference: 0.15027112
- Maximum pointwise difference: 3.59689593
- Max difference location: (0, 0, 7, 8)
- Values at max diff - Original: -0.00771881, Converted: 3.58917713
- Biggest difference in row (0, 24, 0), sum 0.303092 vs 7.643524
- Layer 12, Token 22 (recurrent cache comparison):
- Original tensor sum: 327.704742
- Converted tensor sum: 271.485931
- Original tensor mean: 0.102408
- Converted tensor mean: 0.084839
- Mean difference: 0.17104822
- Maximum pointwise difference: 4.17193794
- Max difference location: (0, 14, 8, 1)
- Values at max diff - Original: 4.78667879, Converted: 0.61474097
- Biggest difference in row (0, 28, 3), sum 10.929213 vs 0.205626
- Layer 13, Token 22 (recurrent cache comparison):
- Original tensor sum: 231.619003
- Converted tensor sum: 232.506165
- Original tensor mean: 0.072381
- Converted tensor mean: 0.072658
- Mean difference: 0.13752523
- Maximum pointwise difference: 4.03583384
- Max difference location: (0, 11, 4, 0)
- Values at max diff - Original: 3.99545026, Converted: -0.04038341
- Biggest difference in row (0, 11, 0), sum -0.083207 vs 6.010875
- Layer 14, Token 22 (recurrent cache comparison):
- Original tensor sum: 772.479431
- Converted tensor sum: 607.419800
- Original tensor mean: 0.241400
- Converted tensor mean: 0.189819
- Mean difference: 0.31881297
- Maximum pointwise difference: 5.76619625
- Max difference location: (0, 28, 4, 1)
- Values at max diff - Original: 6.25043201, Converted: 0.48423576
- Biggest difference in row (0, 6, 1), sum 14.585131 vs 0.928486
- Layer 0, Token 23 (recurrent cache comparison):
- Original tensor sum: 4.463778
- Converted tensor sum: 4.492921
- Original tensor mean: 0.001395
- Converted tensor mean: 0.001404
- Mean difference: 0.06506675
- Maximum pointwise difference: 1.83452773
- Max difference location: (0, 1, 3, 5)
- Values at max diff - Original: -0.04470510, Converted: 1.78982258
- Biggest difference in row (0, 8, 9), sum 0.088014 vs -1.806111
- Layer 1, Token 23 (recurrent cache comparison):
- Original tensor sum: 16.812580
- Converted tensor sum: 109.310081
- Original tensor mean: 0.005254
- Converted tensor mean: 0.034159
- Mean difference: 0.09598633
- Maximum pointwise difference: 1.58349574
- Max difference location: (0, 14, 2, 2)
- Values at max diff - Original: -0.00151580, Converted: 1.58197999
- Biggest difference in row (0, 31, 9), sum 0.029068 vs 3.659988
- Layer 2, Token 23 (recurrent cache comparison):
- Original tensor sum: 75.151047
- Converted tensor sum: 119.211670
- Original tensor mean: 0.023485
- Converted tensor mean: 0.037254
- Mean difference: 0.13861641
- Maximum pointwise difference: 2.43731642
- Max difference location: (0, 1, 3, 2)
- Values at max diff - Original: 0.08128840, Converted: 2.51860476
- Biggest difference in row (0, 1, 3), sum 0.598150 vs 6.365501
- Layer 4, Token 23 (recurrent cache comparison):
- Original tensor sum: 76.628754
- Converted tensor sum: 155.459259
- Original tensor mean: 0.023946
- Converted tensor mean: 0.048581
- Mean difference: 0.11704257
- Maximum pointwise difference: 2.73834753
- Max difference location: (0, 19, 9, 2)
- Values at max diff - Original: 4.03167677, Converted: 1.29332936
- Biggest difference in row (0, 8, 6), sum 0.016739 vs 3.865431
- Layer 5, Token 23 (recurrent cache comparison):
- Original tensor sum: 150.354111
- Converted tensor sum: 169.511383
- Original tensor mean: 0.046986
- Converted tensor mean: 0.052972
- Mean difference: 0.12414169
- Maximum pointwise difference: 4.08761406
- Max difference location: (0, 28, 8, 6)
- Values at max diff - Original: 3.60962462, Converted: -0.47798958
- Biggest difference in row (0, 28, 6), sum 0.154782 vs 5.205485
- Layer 6, Token 23 (recurrent cache comparison):
- Original tensor sum: 225.564255
- Converted tensor sum: 262.560272
- Original tensor mean: 0.070489
- Converted tensor mean: 0.082050
- Mean difference: 0.14410818
- Maximum pointwise difference: 5.85085487
- Max difference location: (0, 12, 6, 1)
- Values at max diff - Original: 6.56323051, Converted: 0.71237558
- Biggest difference in row (0, 12, 6), sum 15.410420 vs 3.070242
- Layer 8, Token 23 (recurrent cache comparison):
- Original tensor sum: 216.401703
- Converted tensor sum: 306.942932
- Original tensor mean: 0.067626
- Converted tensor mean: 0.095920
- Mean difference: 0.14416558
- Maximum pointwise difference: 3.46720839
- Max difference location: (0, 22, 4, 7)
- Values at max diff - Original: 3.77501345, Converted: 0.30780506
- Biggest difference in row (0, 22, 4), sum 7.765969 vs 0.982070
- Layer 9, Token 23 (recurrent cache comparison):
- Original tensor sum: 247.185196
- Converted tensor sum: 250.177109
- Original tensor mean: 0.077245
- Converted tensor mean: 0.078180
- Mean difference: 0.11487159
- Maximum pointwise difference: 2.80121279
- Max difference location: (0, 14, 2, 1)
- Values at max diff - Original: 3.97450233, Converted: 1.17328954
- Biggest difference in row (0, 28, 2), sum 8.960711 vs 2.242082
- Layer 10, Token 23 (recurrent cache comparison):
- Original tensor sum: 193.715546
- Converted tensor sum: 271.413574
- Original tensor mean: 0.060536
- Converted tensor mean: 0.084817
- Mean difference: 0.13006650
- Maximum pointwise difference: 3.03568482
- Max difference location: (0, 0, 3, 7)
- Values at max diff - Original: 3.30636239, Converted: 0.27067760
- Biggest difference in row (0, 23, 3), sum 6.103652 vs 1.165035
- Layer 12, Token 23 (recurrent cache comparison):
- Original tensor sum: 277.550171
- Converted tensor sum: 296.251099
- Original tensor mean: 0.086734
- Converted tensor mean: 0.092578
- Mean difference: 0.12569407
- Maximum pointwise difference: 2.70571613
- Max difference location: (0, 20, 2, 3)
- Values at max diff - Original: 3.96422935, Converted: 1.25851309
- Biggest difference in row (0, 30, 9), sum 8.374757 vs 2.683706
- Layer 13, Token 23 (recurrent cache comparison):
- Original tensor sum: 189.736130
- Converted tensor sum: 235.426422
- Original tensor mean: 0.059293
- Converted tensor mean: 0.073571
- Mean difference: 0.09623930
- Maximum pointwise difference: 3.40506268
- Max difference location: (0, 17, 8, 2)
- Values at max diff - Original: 4.38167763, Converted: 0.97661489
- Biggest difference in row (0, 17, 8), sum 7.328513 vs 2.010616
- Layer 14, Token 23 (recurrent cache comparison):
- Original tensor sum: 508.593140
- Converted tensor sum: 650.881714
- Original tensor mean: 0.158935
- Converted tensor mean: 0.203401
- Mean difference: 0.21357311
- Maximum pointwise difference: 4.58951044
- Max difference location: (0, 28, 4, 1)
- Values at max diff - Original: 0.01861674, Converted: 4.60812712
- Biggest difference in row (0, 27, 6), sum -2.170214 vs 13.869398
- Layer 0, Token 24 (recurrent cache comparison):
- Original tensor sum: 0.801011
- Converted tensor sum: -1.634871
- Original tensor mean: 0.000250
- Converted tensor mean: -0.000511
- Mean difference: 0.07332502
- Maximum pointwise difference: 1.81247604
- Max difference location: (0, 1, 3, 5)
- Values at max diff - Original: -0.04395379, Converted: 1.76852226
- Biggest difference in row (0, 25, 2), sum 0.205085 vs -3.009443
- Layer 1, Token 24 (recurrent cache comparison):
- Original tensor sum: 23.350971
- Converted tensor sum: 88.090744
- Original tensor mean: 0.007297
- Converted tensor mean: 0.027528
- Mean difference: 0.08751559
- Maximum pointwise difference: 1.07916749
- Max difference location: (0, 20, 7, 8)
- Values at max diff - Original: 1.04420257, Converted: -0.03496487
- Biggest difference in row (0, 31, 5), sum -0.311075 vs 1.779173
- Layer 2, Token 24 (recurrent cache comparison):
- Original tensor sum: 108.804047
- Converted tensor sum: 87.620453
- Original tensor mean: 0.034001
- Converted tensor mean: 0.027381
- Mean difference: 0.12934437
- Maximum pointwise difference: 2.40617442
- Max difference location: (0, 1, 2, 3)
- Values at max diff - Original: 0.02315997, Converted: 2.42933440
- Biggest difference in row (0, 27, 2), sum 3.832137 vs 0.454090
- Layer 4, Token 24 (recurrent cache comparison):
- Original tensor sum: 89.705452
- Converted tensor sum: 61.452301
- Original tensor mean: 0.028033
- Converted tensor mean: 0.019204
- Mean difference: 0.11625614
- Maximum pointwise difference: 3.20758009
- Max difference location: (0, 19, 2, 9)
- Values at max diff - Original: -0.01131610, Converted: 3.19626403
- Biggest difference in row (0, 19, 9), sum 3.560462 vs 0.155535
- Layer 5, Token 24 (recurrent cache comparison):
- Original tensor sum: 153.870117
- Converted tensor sum: 79.160019
- Original tensor mean: 0.048084
- Converted tensor mean: 0.024738
- Mean difference: 0.12364670
- Maximum pointwise difference: 2.68913746
- Max difference location: (0, 28, 3, 6)
- Values at max diff - Original: 2.79144502, Converted: 0.10230768
- Biggest difference in row (0, 28, 6), sum 0.099721 vs 5.709799
- Layer 6, Token 24 (recurrent cache comparison):
- Original tensor sum: 230.254852
- Converted tensor sum: 174.787750
- Original tensor mean: 0.071955
- Converted tensor mean: 0.054621
- Mean difference: 0.15056056
- Maximum pointwise difference: 6.34924650
- Max difference location: (0, 12, 6, 1)
- Values at max diff - Original: 6.46217585, Converted: 0.11292921
- Biggest difference in row (0, 12, 6), sum 15.171618 vs 0.722292
- Layer 8, Token 24 (recurrent cache comparison):
- Original tensor sum: 235.891174
- Converted tensor sum: 145.097076
- Original tensor mean: 0.073716
- Converted tensor mean: 0.045343
- Mean difference: 0.16653843
- Maximum pointwise difference: 3.68727565
- Max difference location: (0, 21, 9, 7)
- Values at max diff - Original: -0.03629338, Converted: 3.65098238
- Biggest difference in row (0, 22, 4), sum 6.845831 vs -0.173057
- Layer 9, Token 24 (recurrent cache comparison):
- Original tensor sum: 230.641953
- Converted tensor sum: 158.276245
- Original tensor mean: 0.072076
- Converted tensor mean: 0.049461
- Mean difference: 0.13344021
- Maximum pointwise difference: 2.99997020
- Max difference location: (0, 28, 7, 0)
- Values at max diff - Original: 3.18566871, Converted: 0.18569851
- Biggest difference in row (0, 28, 7), sum 10.468034 vs 1.164585
- Layer 10, Token 24 (recurrent cache comparison):
- Original tensor sum: 196.116974
- Converted tensor sum: 120.883209
- Original tensor mean: 0.061287
- Converted tensor mean: 0.037776
- Mean difference: 0.14279810
- Maximum pointwise difference: 3.15166354
- Max difference location: (0, 24, 0, 1)
- Values at max diff - Original: 0.01040818, Converted: 3.16207170
- Biggest difference in row (0, 24, 0), sum 0.920592 vs 9.820712
- Layer 12, Token 24 (recurrent cache comparison):
- Original tensor sum: 263.522400
- Converted tensor sum: 204.364563
- Original tensor mean: 0.082351
- Converted tensor mean: 0.063864
- Mean difference: 0.15161198
- Maximum pointwise difference: 3.57106376
- Max difference location: (0, 30, 4, 9)
- Values at max diff - Original: 0.28180352, Converted: 3.85286736
- Biggest difference in row (0, 28, 3), sum 8.790596 vs 0.101635
- Layer 13, Token 24 (recurrent cache comparison):
- Original tensor sum: 174.369919
- Converted tensor sum: 135.311646
- Original tensor mean: 0.054491
- Converted tensor mean: 0.042285
- Mean difference: 0.11190581
- Maximum pointwise difference: 3.24499154
- Max difference location: (0, 9, 2, 1)
- Values at max diff - Original: -0.00551485, Converted: 3.23947668
- Biggest difference in row (0, 9, 2), sum -0.041496 vs 5.267887
- Layer 14, Token 24 (recurrent cache comparison):
- Original tensor sum: 507.494324
- Converted tensor sum: 360.428650
- Original tensor mean: 0.158592
- Converted tensor mean: 0.112634
- Mean difference: 0.25558040
- Maximum pointwise difference: 5.38855457
- Max difference location: (0, 28, 6, 1)
- Values at max diff - Original: 5.42326450, Converted: 0.03470971
- Biggest difference in row (0, 28, 6), sum 14.975449 vs 0.420049
- Layer 0, Token 25 (recurrent cache comparison):
- Original tensor sum: 3.754472
- Converted tensor sum: -0.036336
- Original tensor mean: 0.001173
- Converted tensor mean: -0.000011
- Mean difference: 0.07934358
- Maximum pointwise difference: 1.86529565
- Max difference location: (0, 1, 2, 3)
- Values at max diff - Original: 1.82291889, Converted: -0.04237675
- Biggest difference in row (0, 26, 9), sum -0.049344 vs -1.991895
- Layer 1, Token 25 (recurrent cache comparison):
- Original tensor sum: 69.339890
- Converted tensor sum: 74.604774
- Original tensor mean: 0.021669
- Converted tensor mean: 0.023314
- Mean difference: 0.08100989
- Maximum pointwise difference: 1.22147357
- Max difference location: (0, 23, 0, 4)
- Values at max diff - Original: 1.23978972, Converted: 0.01831620
- Biggest difference in row (0, 20, 8), sum 2.595490 vs 0.385527
- Layer 2, Token 25 (recurrent cache comparison):
- Original tensor sum: 122.554489
- Converted tensor sum: 59.594086
- Original tensor mean: 0.038298
- Converted tensor mean: 0.018623
- Mean difference: 0.14621988
- Maximum pointwise difference: 3.03828931
- Max difference location: (0, 8, 9, 3)
- Values at max diff - Original: 3.01308012, Converted: -0.02520920
- Biggest difference in row (0, 6, 7), sum 4.544618 vs 0.330778
- Layer 4, Token 25 (recurrent cache comparison):
- Original tensor sum: 135.021027
- Converted tensor sum: 31.374174
- Original tensor mean: 0.042194
- Converted tensor mean: 0.009804
- Mean difference: 0.11780138
- Maximum pointwise difference: 2.41319752
- Max difference location: (0, 26, 6, 5)
- Values at max diff - Original: -0.06945831, Converted: 2.34373927
- Biggest difference in row (0, 20, 0), sum 5.919655 vs -0.125531
- Layer 5, Token 25 (recurrent cache comparison):
- Original tensor sum: 151.868256
- Converted tensor sum: 37.756584
- Original tensor mean: 0.047459
- Converted tensor mean: 0.011799
- Mean difference: 0.11239365
- Maximum pointwise difference: 2.04250264
- Max difference location: (0, 28, 8, 6)
- Values at max diff - Original: 1.64249492, Converted: -0.40000769
- Biggest difference in row (0, 23, 0), sum 3.497306 vs 0.162423
- Layer 6, Token 25 (recurrent cache comparison):
- Original tensor sum: 251.935211
- Converted tensor sum: 40.890175
- Original tensor mean: 0.078730
- Converted tensor mean: 0.012778
- Mean difference: 0.15190262
- Maximum pointwise difference: 5.74138451
- Max difference location: (0, 12, 6, 1)
- Values at max diff - Original: 5.98834372, Converted: 0.24695921
- Biggest difference in row (0, 12, 6), sum 13.863525 vs 0.418773
- Layer 8, Token 25 (recurrent cache comparison):
- Original tensor sum: 253.027832
- Converted tensor sum: 38.795532
- Original tensor mean: 0.079071
- Converted tensor mean: 0.012124
- Mean difference: 0.15110740
- Maximum pointwise difference: 2.77147269
- Max difference location: (0, 21, 8, 9)
- Values at max diff - Original: 2.86136007, Converted: 0.08988741
- Biggest difference in row (0, 6, 2), sum 5.609079 vs -2.170572
- Layer 9, Token 25 (recurrent cache comparison):
- Original tensor sum: 207.731750
- Converted tensor sum: 52.985756
- Original tensor mean: 0.064916
- Converted tensor mean: 0.016558
- Mean difference: 0.11516394
- Maximum pointwise difference: 2.72221398
- Max difference location: (0, 28, 7, 0)
- Values at max diff - Original: 2.76798820, Converted: 0.04577418
- Biggest difference in row (0, 28, 7), sum 9.256445 vs 0.484987
- Layer 10, Token 25 (recurrent cache comparison):
- Original tensor sum: 196.952515
- Converted tensor sum: 54.152390
- Original tensor mean: 0.061548
- Converted tensor mean: 0.016923
- Mean difference: 0.12454510
- Maximum pointwise difference: 2.34993958
- Max difference location: (0, 10, 3, 5)
- Values at max diff - Original: -0.00316075, Converted: 2.34677887
- Biggest difference in row (0, 11, 6), sum 5.878725 vs 0.250239
- Layer 12, Token 25 (recurrent cache comparison):
- Original tensor sum: 255.808289
- Converted tensor sum: 65.224335
- Original tensor mean: 0.079940
- Converted tensor mean: 0.020383
- Mean difference: 0.14238897
- Maximum pointwise difference: 2.58750200
- Max difference location: (0, 30, 8, 9)
- Values at max diff - Original: -0.02865839, Converted: 2.55884361
- Biggest difference in row (0, 28, 3), sum 8.769258 vs 0.354862
- Layer 13, Token 25 (recurrent cache comparison):
- Original tensor sum: 166.242828
- Converted tensor sum: 63.081795
- Original tensor mean: 0.051951
- Converted tensor mean: 0.019713
- Mean difference: 0.10068022
- Maximum pointwise difference: 2.70444345
- Max difference location: (0, 26, 4, 0)
- Values at max diff - Original: 2.70685434, Converted: 0.00241077
- Biggest difference in row (0, 26, 4), sum 5.351704 vs -0.105821
- Layer 14, Token 25 (recurrent cache comparison):
- Original tensor sum: 542.257324
- Converted tensor sum: 126.161835
- Original tensor mean: 0.169455
- Converted tensor mean: 0.039426
- Mean difference: 0.22693451
- Maximum pointwise difference: 4.91657877
- Max difference location: (0, 28, 6, 1)
- Values at max diff - Original: 5.17964792, Converted: 0.26306900
- Biggest difference in row (0, 28, 6), sum 14.244452 vs 1.160758
- Layer 0, Token 26 (recurrent cache comparison):
- Original tensor sum: 2.494154
- Converted tensor sum: -0.022610
- Original tensor mean: 0.000779
- Converted tensor mean: -0.000007
- Mean difference: 0.07249723
- Maximum pointwise difference: 1.12537110
- Max difference location: (0, 23, 8, 6)
- Values at max diff - Original: -0.77736998, Converted: 0.34800115
- Biggest difference in row (0, 25, 2), sum 0.139047 vs -2.260486
- Layer 1, Token 26 (recurrent cache comparison):
- Original tensor sum: 89.948196
- Converted tensor sum: 28.472143
- Original tensor mean: 0.028109
- Converted tensor mean: 0.008898
- Mean difference: 0.08773426
- Maximum pointwise difference: 1.21594334
- Max difference location: (0, 31, 9, 5)
- Values at max diff - Original: 1.12476408, Converted: -0.09117921
- Biggest difference in row (0, 3, 0), sum 2.359989 vs -0.070505
- Layer 2, Token 26 (recurrent cache comparison):
- Original tensor sum: 129.416809
- Converted tensor sum: 41.503624
- Original tensor mean: 0.040443
- Converted tensor mean: 0.012970
- Mean difference: 0.15461735
- Maximum pointwise difference: 2.68493867
- Max difference location: (0, 8, 8, 3)
- Values at max diff - Original: 2.36720443, Converted: -0.31773427
- Biggest difference in row (0, 27, 9), sum 4.510338 vs -0.361951
- Layer 4, Token 26 (recurrent cache comparison):
- Original tensor sum: 167.357330
- Converted tensor sum: 22.416847
- Original tensor mean: 0.052299
- Converted tensor mean: 0.007005
- Mean difference: 0.12134697
- Maximum pointwise difference: 2.10167456
- Max difference location: (0, 27, 2, 5)
- Values at max diff - Original: 2.16418123, Converted: 0.06250665
- Biggest difference in row (0, 20, 0), sum 5.742605 vs 0.074519
- Layer 5, Token 26 (recurrent cache comparison):
- Original tensor sum: 163.754578
- Converted tensor sum: 25.965012
- Original tensor mean: 0.051173
- Converted tensor mean: 0.008114
- Mean difference: 0.12282242
- Maximum pointwise difference: 1.71204209
- Max difference location: (0, 6, 7, 6)
- Values at max diff - Original: 1.87962317, Converted: 0.16758111
- Biggest difference in row (0, 6, 7), sum 6.898893 vs 0.817218
- Layer 6, Token 26 (recurrent cache comparison):
- Original tensor sum: 280.407990
- Converted tensor sum: 7.497489
- Original tensor mean: 0.087628
- Converted tensor mean: 0.002343
- Mean difference: 0.16469882
- Maximum pointwise difference: 5.09109163
- Max difference location: (0, 12, 6, 1)
- Values at max diff - Original: 5.84504795, Converted: 0.75395638
- Biggest difference in row (0, 12, 6), sum 13.522006 vs 2.690509
- Layer 8, Token 26 (recurrent cache comparison):
- Original tensor sum: 290.931335
- Converted tensor sum: 24.817287
- Original tensor mean: 0.090916
- Converted tensor mean: 0.007755
- Mean difference: 0.16735801
- Maximum pointwise difference: 2.96624160
- Max difference location: (0, 12, 7, 4)
- Values at max diff - Original: 0.00615764, Converted: 2.97239923
- Biggest difference in row (0, 6, 2), sum 5.174712 vs -3.075627
- Layer 9, Token 26 (recurrent cache comparison):
- Original tensor sum: 196.708160
- Converted tensor sum: 30.441196
- Original tensor mean: 0.061471
- Converted tensor mean: 0.009513
- Mean difference: 0.11019707
- Maximum pointwise difference: 2.66847897
- Max difference location: (0, 28, 7, 0)
- Values at max diff - Original: 2.53971243, Converted: -0.12876646
- Biggest difference in row (0, 28, 7), sum 8.254028 vs 0.381486
- Layer 10, Token 26 (recurrent cache comparison):
- Original tensor sum: 199.032516
- Converted tensor sum: 15.679170
- Original tensor mean: 0.062198
- Converted tensor mean: 0.004900
- Mean difference: 0.11978843
- Maximum pointwise difference: 2.87448788
- Max difference location: (0, 24, 1, 0)
- Values at max diff - Original: 3.14507675, Converted: 0.27058893
- Biggest difference in row (0, 25, 1), sum 5.510708 vs 0.187406
- Layer 12, Token 26 (recurrent cache comparison):
- Original tensor sum: 260.372742
- Converted tensor sum: 27.850517
- Original tensor mean: 0.081366
- Converted tensor mean: 0.008703
- Mean difference: 0.15131992
- Maximum pointwise difference: 3.11937833
- Max difference location: (0, 29, 6, 5)
- Values at max diff - Original: -0.00478183, Converted: 3.11459661
- Biggest difference in row (0, 28, 3), sum 8.629121 vs -0.241569
- Layer 13, Token 26 (recurrent cache comparison):
- Original tensor sum: 175.842209
- Converted tensor sum: 31.150665
- Original tensor mean: 0.054951
- Converted tensor mean: 0.009735
- Mean difference: 0.10132494
- Maximum pointwise difference: 2.68282986
- Max difference location: (0, 26, 4, 0)
- Values at max diff - Original: 2.69746804, Converted: 0.01463811
- Biggest difference in row (0, 26, 4), sum 5.839348 vs 0.118608
- Layer 14, Token 26 (recurrent cache comparison):
- Original tensor sum: 549.098877
- Converted tensor sum: 57.239769
- Original tensor mean: 0.171593
- Converted tensor mean: 0.017887
- Mean difference: 0.23359555
- Maximum pointwise difference: 4.78898478
- Max difference location: (0, 28, 6, 1)
- Values at max diff - Original: 4.82380438, Converted: 0.03481963
- Biggest difference in row (0, 28, 6), sum 13.322067 vs -0.096704
- Layer 0, Token 27 (recurrent cache comparison):
- Original tensor sum: 1.918821
- Converted tensor sum: 4.296852
- Original tensor mean: 0.000600
- Converted tensor mean: 0.001343
- Mean difference: 0.06445935
- Maximum pointwise difference: 1.46873963
- Max difference location: (0, 1, 3, 2)
- Values at max diff - Original: -0.01301772, Converted: 1.45572186
- Biggest difference in row (0, 28, 5), sum 0.223120 vs 1.905128
- Layer 1, Token 27 (recurrent cache comparison):
- Original tensor sum: 160.952576
- Converted tensor sum: 15.469984
- Original tensor mean: 0.050298
- Converted tensor mean: 0.004834
- Mean difference: 0.10194612
- Maximum pointwise difference: 1.58813882
- Max difference location: (0, 10, 6, 8)
- Values at max diff - Original: 1.63966167, Converted: 0.05152279
- Biggest difference in row (0, 16, 1), sum 4.988435 vs 0.628698
- Layer 2, Token 27 (recurrent cache comparison):
- Original tensor sum: 195.883148
- Converted tensor sum: 23.802681
- Original tensor mean: 0.061213
- Converted tensor mean: 0.007438
- Mean difference: 0.16412406
- Maximum pointwise difference: 3.51121449
- Max difference location: (0, 18, 2, 1)
- Values at max diff - Original: 0.00709479, Converted: 3.51830935
- Biggest difference in row (0, 0, 2), sum 7.858056 vs -0.148840
- Layer 4, Token 27 (recurrent cache comparison):
- Original tensor sum: 233.660095
- Converted tensor sum: 13.142452
- Original tensor mean: 0.073019
- Converted tensor mean: 0.004107
- Mean difference: 0.12733760
- Maximum pointwise difference: 2.84240961
- Max difference location: (0, 27, 8, 5)
- Values at max diff - Original: 2.76694965, Converted: -0.07546007
- Biggest difference in row (0, 24, 1), sum 6.535775 vs 0.658166
- Layer 5, Token 27 (recurrent cache comparison):
- Original tensor sum: 251.330231
- Converted tensor sum: 21.526363
- Original tensor mean: 0.078541
- Converted tensor mean: 0.006727
- Mean difference: 0.13129665
- Maximum pointwise difference: 2.36431837
- Max difference location: (0, 6, 2, 8)
- Values at max diff - Original: 2.37356281, Converted: 0.00924453
- Biggest difference in row (0, 6, 2), sum 8.637090 vs 0.102351
- Layer 6, Token 27 (recurrent cache comparison):
- Original tensor sum: 362.387848
- Converted tensor sum: -2.171665
- Original tensor mean: 0.113246
- Converted tensor mean: -0.000679
- Mean difference: 0.18160143
- Maximum pointwise difference: 5.93641853
- Max difference location: (0, 12, 6, 1)
- Values at max diff - Original: 5.75199318, Converted: -0.18442529
- Biggest difference in row (0, 12, 6), sum 13.466440 vs 2.236503
- Layer 8, Token 27 (recurrent cache comparison):
- Original tensor sum: 350.323914
- Converted tensor sum: 19.725079
- Original tensor mean: 0.109476
- Converted tensor mean: 0.006164
- Mean difference: 0.17721944
- Maximum pointwise difference: 3.75930500
- Max difference location: (0, 20, 0, 7)
- Values at max diff - Original: 3.75676632, Converted: -0.00253879
- Biggest difference in row (0, 13, 8), sum 9.584435 vs 0.181711
- Layer 9, Token 27 (recurrent cache comparison):
- Original tensor sum: 240.779663
- Converted tensor sum: 24.165503
- Original tensor mean: 0.075244
- Converted tensor mean: 0.007552
- Mean difference: 0.11309086
- Maximum pointwise difference: 2.43383050
- Max difference location: (0, 28, 7, 0)
- Values at max diff - Original: 2.44759488, Converted: 0.01376434
- Biggest difference in row (0, 28, 7), sum 8.022928 vs 0.225877
- Layer 10, Token 27 (recurrent cache comparison):
- Original tensor sum: 244.469070
- Converted tensor sum: 12.286395
- Original tensor mean: 0.076397
- Converted tensor mean: 0.003839
- Mean difference: 0.11746948
- Maximum pointwise difference: 2.32974362
- Max difference location: (0, 24, 1, 0)
- Values at max diff - Original: 3.20926118, Converted: 0.87951756
- Biggest difference in row (0, 11, 6), sum 6.942329 vs -0.007718
- Layer 12, Token 27 (recurrent cache comparison):
- Original tensor sum: 306.749817
- Converted tensor sum: 12.790400
- Original tensor mean: 0.095859
- Converted tensor mean: 0.003997
- Mean difference: 0.15706061
- Maximum pointwise difference: 3.82620597
- Max difference location: (0, 14, 8, 1)
- Values at max diff - Original: 3.90818167, Converted: 0.08197562
- Biggest difference in row (0, 28, 3), sum 9.139596 vs 0.006271
- Layer 13, Token 27 (recurrent cache comparison):
- Original tensor sum: 231.223206
- Converted tensor sum: 21.992476
- Original tensor mean: 0.072257
- Converted tensor mean: 0.006873
- Mean difference: 0.10150776
- Maximum pointwise difference: 2.88272619
- Max difference location: (0, 11, 4, 0)
- Values at max diff - Original: 2.93226290, Converted: 0.04953665
- Biggest difference in row (0, 26, 4), sum 5.809074 vs 0.116277
- Layer 14, Token 27 (recurrent cache comparison):
- Original tensor sum: 648.596985
- Converted tensor sum: 37.038162
- Original tensor mean: 0.202687
- Converted tensor mean: 0.011574
- Mean difference: 0.25362208
- Maximum pointwise difference: 4.70936871
- Max difference location: (0, 28, 6, 1)
- Values at max diff - Original: 4.71021414, Converted: 0.00084528
- Biggest difference in row (0, 27, 4), sum 11.931866 vs 0.099372
- Layer 0, Token 28 (recurrent cache comparison):
- Original tensor sum: 3.825253
- Converted tensor sum: 10.656538
- Original tensor mean: 0.001195
- Converted tensor mean: 0.003330
- Mean difference: 0.06744900
- Maximum pointwise difference: 1.23786223
- Max difference location: (0, 1, 5, 3)
- Values at max diff - Original: 1.16935611, Converted: -0.06850608
- Biggest difference in row (0, 23, 7), sum -1.495719 vs 0.880324
- Layer 1, Token 28 (recurrent cache comparison):
- Original tensor sum: 64.976830
- Converted tensor sum: 30.582441
- Original tensor mean: 0.020305
- Converted tensor mean: 0.009557
- Mean difference: 0.08607832
- Maximum pointwise difference: 1.46864974
- Max difference location: (0, 16, 2, 9)
- Values at max diff - Original: 1.56847525, Converted: 0.09982550
- Biggest difference in row (0, 1, 3), sum 2.236484 vs -0.154611
- Layer 2, Token 28 (recurrent cache comparison):
- Original tensor sum: 104.630646
- Converted tensor sum: 53.524834
- Original tensor mean: 0.032697
- Converted tensor mean: 0.016727
- Mean difference: 0.14054969
- Maximum pointwise difference: 2.87744927
- Max difference location: (0, 13, 1, 7)
- Values at max diff - Original: 0.03716344, Converted: 2.91461277
- Biggest difference in row (0, 23, 4), sum 0.081307 vs 4.303990
- Layer 4, Token 28 (recurrent cache comparison):
- Original tensor sum: 192.219788
- Converted tensor sum: 29.228979
- Original tensor mean: 0.060069
- Converted tensor mean: 0.009134
- Mean difference: 0.12325959
- Maximum pointwise difference: 4.08833027
- Max difference location: (0, 19, 0, 2)
- Values at max diff - Original: 4.01820278, Converted: -0.07012761
- Biggest difference in row (0, 19, 0), sum 6.219261 vs -0.327518
- Layer 5, Token 28 (recurrent cache comparison):
- Original tensor sum: 243.385864
- Converted tensor sum: 23.409119
- Original tensor mean: 0.076058
- Converted tensor mean: 0.007315
- Mean difference: 0.14056823
- Maximum pointwise difference: 5.76254559
- Max difference location: (0, 28, 9, 6)
- Values at max diff - Original: 6.02726078, Converted: 0.26471528
- Biggest difference in row (0, 28, 9), sum 9.890844 vs 0.406699
- Layer 6, Token 28 (recurrent cache comparison):
- Original tensor sum: 283.616272
- Converted tensor sum: 40.143700
- Original tensor mean: 0.088630
- Converted tensor mean: 0.012545
- Mean difference: 0.16413040
- Maximum pointwise difference: 4.72735071
- Max difference location: (0, 12, 1, 2)
- Values at max diff - Original: 4.75247860, Converted: 0.02512792
- Biggest difference in row (0, 12, 1), sum 13.120539 vs 0.133712
- Layer 8, Token 28 (recurrent cache comparison):
- Original tensor sum: 228.649261
- Converted tensor sum: 44.837063
- Original tensor mean: 0.071453
- Converted tensor mean: 0.014012
- Mean difference: 0.15679255
- Maximum pointwise difference: 3.82907844
- Max difference location: (0, 23, 4, 7)
- Values at max diff - Original: 3.84108162, Converted: 0.01200324
- Biggest difference in row (0, 1, 4), sum 8.901268 vs 0.416754
- Layer 9, Token 28 (recurrent cache comparison):
- Original tensor sum: 212.272324
- Converted tensor sum: 21.536671
- Original tensor mean: 0.066335
- Converted tensor mean: 0.006730
- Mean difference: 0.11465029
- Maximum pointwise difference: 2.85586047
- Max difference location: (0, 15, 2, 3)
- Values at max diff - Original: 2.84589958, Converted: -0.00996090
- Biggest difference in row (0, 15, 2), sum 8.293229 vs 0.275981
- Layer 10, Token 28 (recurrent cache comparison):
- Original tensor sum: 212.098206
- Converted tensor sum: 19.835695
- Original tensor mean: 0.066281
- Converted tensor mean: 0.006199
- Mean difference: 0.14262109
- Maximum pointwise difference: 4.31178093
- Max difference location: (0, 24, 1, 0)
- Values at max diff - Original: 4.53196430, Converted: 0.22018313
- Biggest difference in row (0, 10, 4), sum 9.766387 vs -0.072625
- Layer 12, Token 28 (recurrent cache comparison):
- Original tensor sum: 280.744019
- Converted tensor sum: 26.187149
- Original tensor mean: 0.087733
- Converted tensor mean: 0.008183
- Mean difference: 0.15264840
- Maximum pointwise difference: 4.41812420
- Max difference location: (0, 21, 2, 4)
- Values at max diff - Original: 4.41481018, Converted: -0.00331383
- Biggest difference in row (0, 23, 2), sum 10.581321 vs 0.608111
- Layer 13, Token 28 (recurrent cache comparison):
- Original tensor sum: 220.357834
- Converted tensor sum: 20.228846
- Original tensor mean: 0.068862
- Converted tensor mean: 0.006322
- Mean difference: 0.11583474
- Maximum pointwise difference: 4.72553635
- Max difference location: (0, 17, 8, 2)
- Values at max diff - Original: 4.72810841, Converted: 0.00257226
- Biggest difference in row (0, 19, 1), sum 9.879478 vs 0.388081
- Layer 14, Token 28 (recurrent cache comparison):
- Original tensor sum: 515.894897
- Converted tensor sum: 74.440948
- Original tensor mean: 0.161217
- Converted tensor mean: 0.023263
- Mean difference: 0.23548929
- Maximum pointwise difference: 4.93366051
- Max difference location: (0, 16, 7, 6)
- Values at max diff - Original: 4.92017603, Converted: -0.01348470
- Biggest difference in row (0, 28, 6), sum 14.032580 vs -0.061767
- Layer 0, Token 29 (recurrent cache comparison):
- Original tensor sum: 7.490709
- Converted tensor sum: 13.732031
- Original tensor mean: 0.002341
- Converted tensor mean: 0.004291
- Mean difference: 0.06082471
- Maximum pointwise difference: 1.43740010
- Max difference location: (0, 1, 3, 3)
- Values at max diff - Original: 1.39118814, Converted: -0.04621201
- Biggest difference in row (0, 23, 1), sum -0.646684 vs 1.039518
- Layer 1, Token 29 (recurrent cache comparison):
- Original tensor sum: 34.789967
- Converted tensor sum: 32.546562
- Original tensor mean: 0.010872
- Converted tensor mean: 0.010171
- Mean difference: 0.08757141
- Maximum pointwise difference: 1.04371011
- Max difference location: (0, 6, 1, 2)
- Values at max diff - Original: 0.99249512, Converted: -0.05121503
- Biggest difference in row (0, 3, 8), sum -0.976319 vs 2.533029
- Layer 2, Token 29 (recurrent cache comparison):
- Original tensor sum: 81.188293
- Converted tensor sum: 110.873352
- Original tensor mean: 0.025371
- Converted tensor mean: 0.034648
- Mean difference: 0.13966069
- Maximum pointwise difference: 2.45380425
- Max difference location: (0, 13, 7, 1)
- Values at max diff - Original: 0.05114410, Converted: 2.50494838
- Biggest difference in row (0, 12, 1), sum 5.281791 vs 0.538119
- Layer 4, Token 29 (recurrent cache comparison):
- Original tensor sum: 188.945206
- Converted tensor sum: 82.802734
- Original tensor mean: 0.059045
- Converted tensor mean: 0.025876
- Mean difference: 0.13653603
- Maximum pointwise difference: 2.89840102
- Max difference location: (0, 19, 0, 2)
- Values at max diff - Original: 2.89151430, Converted: -0.00688672
- Biggest difference in row (0, 19, 0), sum 4.444302 vs -0.202434
- Layer 5, Token 29 (recurrent cache comparison):
- Original tensor sum: 234.074219
- Converted tensor sum: 65.914871
- Original tensor mean: 0.073148
- Converted tensor mean: 0.020598
- Mean difference: 0.14784601
- Maximum pointwise difference: 3.25614643
- Max difference location: (0, 28, 9, 6)
- Values at max diff - Original: 3.74669981, Converted: 0.49055350
- Biggest difference in row (0, 28, 9), sum 6.683680 vs 1.130066
- Layer 6, Token 29 (recurrent cache comparison):
- Original tensor sum: 312.478729
- Converted tensor sum: 136.998260
- Original tensor mean: 0.097650
- Converted tensor mean: 0.042812
- Mean difference: 0.19563875
- Maximum pointwise difference: 4.93519068
- Max difference location: (0, 12, 6, 2)
- Values at max diff - Original: 4.85506201, Converted: -0.08012870
- Biggest difference in row (0, 12, 6), sum 14.484787 vs 2.203152
- Layer 8, Token 29 (recurrent cache comparison):
- Original tensor sum: 249.388092
- Converted tensor sum: 124.562820
- Original tensor mean: 0.077934
- Converted tensor mean: 0.038926
- Mean difference: 0.18382950
- Maximum pointwise difference: 3.92004848
- Max difference location: (0, 20, 7, 0)
- Values at max diff - Original: 0.21650003, Converted: 4.13654852
- Biggest difference in row (0, 23, 4), sum 6.951686 vs -0.318011
- Layer 9, Token 29 (recurrent cache comparison):
- Original tensor sum: 200.171021
- Converted tensor sum: 82.927864
- Original tensor mean: 0.062553
- Converted tensor mean: 0.025915
- Mean difference: 0.12187681
- Maximum pointwise difference: 2.69074798
- Max difference location: (0, 15, 2, 3)
- Values at max diff - Original: 2.69794440, Converted: 0.00719635
- Biggest difference in row (0, 15, 2), sum 7.941767 vs 0.050363
- Layer 10, Token 29 (recurrent cache comparison):
- Original tensor sum: 213.368591
- Converted tensor sum: 77.427185
- Original tensor mean: 0.066678
- Converted tensor mean: 0.024196
- Mean difference: 0.13651104
- Maximum pointwise difference: 3.13308334
- Max difference location: (0, 24, 1, 0)
- Values at max diff - Original: 3.58547378, Converted: 0.45239034
- Biggest difference in row (0, 10, 4), sum 6.818930 vs -0.155169
- Layer 12, Token 29 (recurrent cache comparison):
- Original tensor sum: 263.786377
- Converted tensor sum: 92.682205
- Original tensor mean: 0.082433
- Converted tensor mean: 0.028963
- Mean difference: 0.15690672
- Maximum pointwise difference: 3.50486374
- Max difference location: (0, 23, 2, 9)
- Values at max diff - Original: 3.44645429, Converted: -0.05840937
- Biggest difference in row (0, 23, 2), sum 9.830493 vs -0.231371
- Layer 13, Token 29 (recurrent cache comparison):
- Original tensor sum: 193.539474
- Converted tensor sum: 79.679726
- Original tensor mean: 0.060481
- Converted tensor mean: 0.024900
- Mean difference: 0.11795644
- Maximum pointwise difference: 3.62266445
- Max difference location: (0, 11, 4, 0)
- Values at max diff - Original: 3.49508691, Converted: -0.12757748
- Biggest difference in row (0, 18, 1), sum 5.632851 vs -0.122056
- Layer 14, Token 29 (recurrent cache comparison):
- Original tensor sum: 525.021179
- Converted tensor sum: 197.845932
- Original tensor mean: 0.164069
- Converted tensor mean: 0.061827
- Mean difference: 0.25022614
- Maximum pointwise difference: 4.42602730
- Max difference location: (0, 15, 2, 8)
- Values at max diff - Original: 4.48904753, Converted: 0.06302036
- Biggest difference in row (0, 28, 6), sum 13.769245 vs 2.199155
- Layer 0, Token 30 (recurrent cache comparison):
- Original tensor sum: 4.659326
- Converted tensor sum: 10.953376
- Original tensor mean: 0.001456
- Converted tensor mean: 0.003423
- Mean difference: 0.06142937
- Maximum pointwise difference: 1.06926394
- Max difference location: (0, 28, 5, 9)
- Values at max diff - Original: -0.05087389, Converted: 1.01839006
- Biggest difference in row (0, 4, 9), sum 2.534327 vs -0.105926
- Layer 1, Token 30 (recurrent cache comparison):
- Original tensor sum: 24.136578
- Converted tensor sum: 96.968475
- Original tensor mean: 0.007543
- Converted tensor mean: 0.030303
- Mean difference: 0.08820312
- Maximum pointwise difference: 1.49761820
- Max difference location: (0, 6, 4, 4)
- Values at max diff - Original: 0.06953955, Converted: 1.56715775
- Biggest difference in row (0, 14, 2), sum 0.115400 vs 3.481205
- Layer 2, Token 30 (recurrent cache comparison):
- Original tensor sum: 64.494400
- Converted tensor sum: 246.552582
- Original tensor mean: 0.020155
- Converted tensor mean: 0.077048
- Mean difference: 0.16151237
- Maximum pointwise difference: 3.98919630
- Max difference location: (0, 4, 8, 4)
- Values at max diff - Original: -0.10013573, Converted: 3.88906050
- Biggest difference in row (0, 23, 4), sum -0.108707 vs 7.892229
- Layer 4, Token 30 (recurrent cache comparison):
- Original tensor sum: 190.921097
- Converted tensor sum: 126.537048
- Original tensor mean: 0.059663
- Converted tensor mean: 0.039543
- Mean difference: 0.13220279
- Maximum pointwise difference: 2.87259126
- Max difference location: (0, 8, 6, 5)
- Values at max diff - Original: 0.00449362, Converted: 2.87708497
- Biggest difference in row (0, 17, 9), sum 0.710816 vs 6.274773
- Layer 5, Token 30 (recurrent cache comparison):
- Original tensor sum: 222.353195
- Converted tensor sum: 164.720016
- Original tensor mean: 0.069485
- Converted tensor mean: 0.051475
- Mean difference: 0.15598193
- Maximum pointwise difference: 2.88562417
- Max difference location: (0, 28, 9, 6)
- Values at max diff - Original: 3.18444014, Converted: 0.29881600
- Biggest difference in row (0, 30, 2), sum 0.004416 vs 6.153850
- Layer 6, Token 30 (recurrent cache comparison):
- Original tensor sum: 339.244141
- Converted tensor sum: 317.588440
- Original tensor mean: 0.106014
- Converted tensor mean: 0.099246
- Mean difference: 0.21152201
- Maximum pointwise difference: 4.30255318
- Max difference location: (0, 6, 4, 8)
- Values at max diff - Original: -0.19493943, Converted: 4.10761356
- Biggest difference in row (0, 12, 6), sum 13.503227 vs 2.285058
- Layer 8, Token 30 (recurrent cache comparison):
- Original tensor sum: 261.308044
- Converted tensor sum: 204.488892
- Original tensor mean: 0.081659
- Converted tensor mean: 0.063903
- Mean difference: 0.18225618
- Maximum pointwise difference: 3.88148618
- Max difference location: (0, 21, 7, 9)
- Values at max diff - Original: 3.48627377, Converted: -0.39521238
- Biggest difference in row (0, 2, 4), sum -0.009086 vs 6.555274
- Layer 9, Token 30 (recurrent cache comparison):
- Original tensor sum: 187.010895
- Converted tensor sum: 173.659409
- Original tensor mean: 0.058441
- Converted tensor mean: 0.054269
- Mean difference: 0.12517925
- Maximum pointwise difference: 2.68900180
- Max difference location: (0, 15, 2, 3)
- Values at max diff - Original: 2.59999108, Converted: -0.08901066
- Biggest difference in row (0, 15, 2), sum 7.543541 vs 0.209705
- Layer 10, Token 30 (recurrent cache comparison):
- Original tensor sum: 206.371735
- Converted tensor sum: 145.950043
- Original tensor mean: 0.064491
- Converted tensor mean: 0.045609
- Mean difference: 0.12893555
- Maximum pointwise difference: 2.97875929
- Max difference location: (0, 24, 1, 0)
- Values at max diff - Original: 3.54119730, Converted: 0.56243801
- Biggest difference in row (0, 11, 6), sum 5.982455 vs 0.632388
- Layer 12, Token 30 (recurrent cache comparison):
- Original tensor sum: 251.250732
- Converted tensor sum: 193.503662
- Original tensor mean: 0.078516
- Converted tensor mean: 0.060470
- Mean difference: 0.14629500
- Maximum pointwise difference: 3.24942660
- Max difference location: (0, 28, 2, 4)
- Values at max diff - Original: 3.09908056, Converted: -0.15034601
- Biggest difference in row (0, 28, 3), sum 9.363594 vs -0.017764
- Layer 13, Token 30 (recurrent cache comparison):
- Original tensor sum: 176.694855
- Converted tensor sum: 165.849930
- Original tensor mean: 0.055217
- Converted tensor mean: 0.051828
- Mean difference: 0.11395165
- Maximum pointwise difference: 3.52955794
- Max difference location: (0, 11, 4, 0)
- Values at max diff - Original: 3.33610535, Converted: -0.19345257
- Biggest difference in row (0, 8, 7), sum -0.009830 vs 4.540796
- Layer 14, Token 30 (recurrent cache comparison):
- Original tensor sum: 562.166748
- Converted tensor sum: 408.797607
- Original tensor mean: 0.175677
- Converted tensor mean: 0.127749
- Mean difference: 0.25758758
- Maximum pointwise difference: 4.45499659
- Max difference location: (0, 15, 2, 8)
- Values at max diff - Original: 4.37386942, Converted: -0.08112720
- Biggest difference in row (0, 28, 6), sum 13.013643 vs -0.161676
- ================================================================================
- Comparing q padded tensors...
- ================================================================================
- Layer 0, Token 1 (q padded comparison):
- Original tensor sum: 7.958682
- Converted tensor sum: 7.958661
- Original tensor mean: 0.000389
- Converted tensor mean: 0.000389
- Mean difference: 0.00000000
- Maximum pointwise difference: 0.00000076
- Max difference location: (0, 0, 0, 6)
- Values at max diff - Original: -0.22316068, Converted: -0.22316144
- Biggest difference in row (0, 0, 0), sum -0.570113 vs -0.570115
- Original tensor:
- [[[[ 1.97370015e-02 -7.89398551e-02 2.40650475e-02 ... -3.46655026e-02
- -1.84459373e-01 1.35031175e-02]
- [-3.90069596e-02 -6.45441562e-02 -9.85123310e-03 ... -7.10528418e-02
- 2.86484748e-01 -4.78143468e-02]
- [-3.32845971e-02 8.48600932e-04 -1.83281749e-02 ... -3.60261202e-02
- 1.16759300e-01 -3.79200131e-02]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]
- [[ 1.97370015e-02 -7.89398551e-02 2.40650475e-02 ... -3.46655026e-02
- -1.84459373e-01 1.35031175e-02]
- [-3.90069596e-02 -6.45441562e-02 -9.85123310e-03 ... -7.10528418e-02
- 2.86484748e-01 -4.78143468e-02]
- [-3.32845971e-02 8.48600932e-04 -1.83281749e-02 ... -3.60261202e-02
- 1.16759300e-01 -3.79200131e-02]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]
- [[-6.58135489e-02 2.45508045e-01 -1.13810226e-02 ... 2.54544546e-03
- 2.51089204e-02 2.86987983e-04]
- [-1.25565156e-01 -7.94792548e-02 -9.97955501e-02 ... 7.12259486e-02
- 9.36590508e-02 -1.65728614e-01]
- [-1.35633466e-03 -9.60636213e-02 -8.94494876e-02 ... 1.94221988e-01
- -4.70091067e-02 -9.31773186e-02]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]
- ...
- [[-1.80936769e-01 2.09823474e-02 -1.53481111e-01 ... -6.53458312e-02
- 9.94268879e-02 8.78875237e-03]
- [-1.07081555e-01 1.26294538e-01 -9.78934765e-02 ... -5.38439713e-02
- -5.59990015e-03 1.52285740e-01]
- [ 2.60844707e-01 8.11591521e-02 1.12913184e-01 ... -1.86833683e-02
- -1.93844642e-02 -7.96004198e-03]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]
- [[ 5.65589257e-02 -7.43661374e-02 -2.00723484e-01 ... 1.52545767e-02
- 1.50462063e-02 2.35310309e-02]
- [ 7.43804872e-02 -1.34884328e-01 2.01406017e-01 ... -9.13856328e-02
- -5.48248030e-02 8.11865740e-03]
- [ 1.52915101e-02 -1.20854350e-02 2.73873240e-01 ... -3.24299149e-02
- -6.92289770e-02 -1.53110905e-05]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]
- [[ 5.65589257e-02 -7.43661374e-02 -2.00723484e-01 ... 1.52545767e-02
- 1.50462063e-02 2.35310309e-02]
- [ 7.43804872e-02 -1.34884328e-01 2.01406017e-01 ... -9.13856328e-02
- -5.48248030e-02 8.11865740e-03]
- [ 1.52915101e-02 -1.20854350e-02 2.73873240e-01 ... -3.24299149e-02
- -6.92289770e-02 -1.53110905e-05]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]]]
- Converted tensor:
- [[[[ 1.97370723e-02 -7.89401382e-02 2.40651332e-02 ... -3.46656255e-02
- -1.84460029e-01 1.35031650e-02]
- [-3.90069783e-02 -6.45441785e-02 -9.85123683e-03 ... -7.10528716e-02
- 2.86484867e-01 -4.78143729e-02]
- [-3.32845971e-02 8.48600990e-04 -1.83281731e-02 ... -3.60261202e-02
- 1.16759300e-01 -3.79200131e-02]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]
- [[ 1.97370723e-02 -7.89401382e-02 2.40651332e-02 ... -3.46656255e-02
- -1.84460029e-01 1.35031650e-02]
- [-3.90069783e-02 -6.45441785e-02 -9.85123683e-03 ... -7.10528716e-02
- 2.86484867e-01 -4.78143729e-02]
- [-3.32845971e-02 8.48600990e-04 -1.83281731e-02 ... -3.60261202e-02
- 1.16759300e-01 -3.79200131e-02]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]
- [[-6.58135936e-02 2.45508194e-01 -1.13810301e-02 ... 2.54544709e-03
- 2.51089353e-02 2.86988186e-04]
- [-1.25565395e-01 -7.94794038e-02 -9.97957364e-02 ... 7.12260827e-02
- 9.36592296e-02 -1.65728927e-01]
- [-1.35633559e-03 -9.60636735e-02 -8.94495398e-02 ... 1.94222078e-01
- -4.70091291e-02 -9.31773633e-02]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]
- ...
- [[-1.80937156e-01 2.09823940e-02 -1.53481439e-01 ... -6.53459728e-02
- 9.94271040e-02 8.78877100e-03]
- [-1.07081644e-01 1.26294628e-01 -9.78935510e-02 ... -5.38440198e-02
- -5.59990434e-03 1.52285874e-01]
- [ 2.60844767e-01 8.11591670e-02 1.12913206e-01 ... -1.86833721e-02
- -1.93844680e-02 -7.96004292e-03]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]
- [[ 5.65591007e-02 -7.43663609e-02 -2.00724110e-01 ... 1.52546223e-02
- 1.50462529e-02 2.35311035e-02]
- [ 7.43805990e-02 -1.34884506e-01 2.01406300e-01 ... -9.13857669e-02
- -5.48248850e-02 8.11866950e-03]
- [ 1.52915157e-02 -1.20854378e-02 2.73873329e-01 ... -3.24299261e-02
- -6.92289993e-02 -1.53110959e-05]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]
- [[ 5.65591007e-02 -7.43663609e-02 -2.00724110e-01 ... 1.52546223e-02
- 1.50462529e-02 2.35311035e-02]
- [ 7.43805990e-02 -1.34884506e-01 2.01406300e-01 ... -9.13857669e-02
- -5.48248850e-02 8.11866950e-03]
- [ 1.52915157e-02 -1.20854378e-02 2.73873329e-01 ... -3.24299261e-02
- -6.92289993e-02 -1.53110959e-05]
- ...
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]
- [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
- 0.00000000e+00 0.00000000e+00]]]]
- Layer 1, Token 1 (q padded comparison):
- Original tensor sum: 8.938188
- Converted tensor sum: 8.938201
- Original tensor mean: 0.000436
- Converted tensor mean: 0.000436
- Mean difference: 0.00000001
- Maximum pointwise difference: 0.00000305
- Max difference location: (0, 16, 0, 8)
- Values at max diff - Original: 0.24272950, Converted: 0.24273255
- Biggest difference in row (0, 16, 0), sum 0.509919 vs 0.509925
- Layer 2, Token 1 (q padded comparison):
- Original tensor sum: -2.985352
- Converted tensor sum: -2.985393
- Original tensor mean: -0.000146
- Converted tensor mean: -0.000146
- Mean difference: 0.00000001
- Maximum pointwise difference: 0.00000104
- Max difference location: (0, 12, 3, 1)
- Values at max diff - Original: -0.02719286, Converted: -0.02719390
- Biggest difference in row (0, 12, 3), sum -0.530951 vs -0.530954
- Layer 4, Token 1 (q padded comparison):
- Original tensor sum: -31.644516
- Converted tensor sum: -31.643524
- Original tensor mean: -0.001545
- Converted tensor mean: -0.001545
- Mean difference: 0.00000028
- Maximum pointwise difference: 0.00006898
- Max difference location: (0, 6, 3, 7)
- Values at max diff - Original: 0.07510993, Converted: 0.07517891
- Biggest difference in row (0, 6, 3), sum -0.645874 vs -0.645761
- Layer 5, Token 1 (q padded comparison):
- Original tensor sum: -30.684572
- Converted tensor sum: -30.685047
- Original tensor mean: -0.001498
- Converted tensor mean: -0.001498
- Mean difference: 0.00000021
- Maximum pointwise difference: 0.00003881
- Max difference location: (0, 30, 3, 0)
- Values at max diff - Original: 0.03456598, Converted: 0.03452717
- Biggest difference in row (0, 30, 3), sum -0.428461 vs -0.428590
- Layer 6, Token 1 (q padded comparison):
- Original tensor sum: -10.008605
- Converted tensor sum: -10.014137
- Original tensor mean: -0.000489
- Converted tensor mean: -0.000489
- Mean difference: 0.00000105
- Maximum pointwise difference: 0.00017181
- Max difference location: (0, 6, 2, 7)
- Values at max diff - Original: 0.01523990, Converted: 0.01506809
- Biggest difference in row (0, 2, 1), sum -0.388271 vs -0.388545
- Layer 8, Token 1 (q padded comparison):
- Original tensor sum: -36.801449
- Converted tensor sum: -36.801811
- Original tensor mean: -0.001797
- Converted tensor mean: -0.001797
- Mean difference: 0.00000098
- Maximum pointwise difference: 0.00025206
- Max difference location: (0, 20, 3, 1)
- Values at max diff - Original: 0.04204723, Converted: 0.04179518
- Biggest difference in row (0, 2, 0), sum -0.275884 vs -0.275609
- Layer 9, Token 1 (q padded comparison):
- Original tensor sum: -37.401527
- Converted tensor sum: -37.397404
- Original tensor mean: -0.001826
- Converted tensor mean: -0.001826
- Mean difference: 0.00000135
- Maximum pointwise difference: 0.00026937
- Max difference location: (0, 20, 2, 2)
- Values at max diff - Original: 0.14496517, Converted: 0.14469580
- Biggest difference in row (0, 20, 3), sum -0.264264 vs -0.264851
- Layer 10, Token 1 (q padded comparison):
- Original tensor sum: -43.546944
- Converted tensor sum: -43.543182
- Original tensor mean: -0.002126
- Converted tensor mean: -0.002126
- Mean difference: 0.00000175
- Maximum pointwise difference: 0.00031144
- Max difference location: (0, 0, 2, 5)
- Values at max diff - Original: -0.03211254, Converted: -0.03180110
- Biggest difference in row (0, 24, 3), sum -0.476393 vs -0.475955
- Layer 12, Token 1 (q padded comparison):
- Original tensor sum: -19.226507
- Converted tensor sum: -19.226831
- Original tensor mean: -0.000939
- Converted tensor mean: -0.000939
- Mean difference: 0.00000116
- Maximum pointwise difference: 0.00020705
- Max difference location: (0, 28, 2, 7)
- Values at max diff - Original: 0.06080329, Converted: 0.06101035
- Biggest difference in row (0, 14, 3), sum -0.455543 vs -0.455054
- Layer 13, Token 1 (q padded comparison):
- Original tensor sum: -36.510368
- Converted tensor sum: -36.510063
- Original tensor mean: -0.001783
- Converted tensor mean: -0.001783
- Mean difference: 0.00000135
- Maximum pointwise difference: 0.00022900
- Max difference location: (0, 16, 2, 1)
- Values at max diff - Original: -0.03357363, Converted: -0.03334463
- Biggest difference in row (0, 18, 2), sum -0.183418 vs -0.183802
- Layer 14, Token 1 (q padded comparison):
- Original tensor sum: -15.543186
- Converted tensor sum: -15.543753
- Original tensor mean: -0.000759
- Converted tensor mean: -0.000759
- Mean difference: 0.00000116
- Maximum pointwise difference: 0.00036725
- Max difference location: (0, 4, 2, 2)
- Values at max diff - Original: 0.05589651, Converted: 0.05552926
- Biggest difference in row (0, 18, 1), sum -0.470654 vs -0.470283
- ================================================================================
- Comparing k padded tensors...
- ================================================================================
- Layer 0, Token 1 (k padded comparison):
- Original tensor sum: -12.851240
- Converted tensor sum: -12.851334
- Original tensor mean: -0.000628
- Converted tensor mean: -0.000628
- Mean difference: 0.00000002
- Maximum pointwise difference: 0.00000304
- Max difference location: (0, 24, 0, 7)
- Values at max diff - Original: -0.57623452, Converted: -0.57623756
- Biggest difference in row (0, 24, 0), sum -1.467058 vs -1.467066
- Original tensor:
- [[[[-0.0023386 0.00352692 -0.13370702 ... -0.18872206 0.09370422
- -0.04139194]
- [ 0.09375711 0.09519143 0.04368615 ... -0.17057192 -0.09237721
- 0.09026651]
- [ 0.19408916 -0.1052211 -0.5198605 ... -0.35431755 -0.18219906
- -0.31666332]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[-0.0023386 0.00352692 -0.13370702 ... -0.18872206 0.09370422
- -0.04139194]
- [ 0.09375711 0.09519143 0.04368615 ... -0.17057192 -0.09237721
- 0.09026651]
- [ 0.19408916 -0.1052211 -0.5198605 ... -0.35431755 -0.18219906
- -0.31666332]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[-0.52186674 -0.28046784 -0.03100401 ... 0.12330638 -0.17640771
- -0.10358577]
- [-0.4391339 -0.25189647 0.12411524 ... -0.04670377 0.4796994
- 0.13396528]
- [ 0.80941254 0.33414015 0.10742755 ... -0.17197518 -0.16508798
- -0.20685418]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- ...
- [[-0.02867949 0.05648347 0.01508509 ... 0.7403576 -0.30081272
- 0.31962797]
- [ 0.07382206 -0.05249733 0.05087741 ... 0.8205082 -0.03774351
- 0.4122186 ]
- [-0.10616651 -0.07183579 -0.02862857 ... 0.13253474 0.73543155
- 0.63596827]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[ 0.949689 -0.00939775 -0.0047697 ... -0.04689857 -0.0884609
- -0.20121996]
- [ 0.9700847 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
- -0.14288443]
- [-0.20942387 -0.21343033 -0.00624497 ... 0.05516734 -0.33565474
- 0.75833493]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[ 0.949689 -0.00939775 -0.0047697 ... -0.04689857 -0.0884609
- -0.20121996]
- [ 0.9700847 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
- -0.14288443]
- [-0.20942387 -0.21343033 -0.00624497 ... 0.05516734 -0.33565474
- 0.75833493]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]]]
- Converted tensor:
- [[[[-0.0023386 0.00352692 -0.13370706 ... -0.18872213 0.09370426
- -0.04139195]
- [ 0.09375713 0.09519145 0.04368616 ... -0.17057195 -0.09237722
- 0.09026653]
- [ 0.19408953 -0.10522129 -0.5198614 ... -0.3543182 -0.18219939
- -0.31666392]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[-0.0023386 0.00352692 -0.13370706 ... -0.18872213 0.09370426
- -0.04139195]
- [ 0.09375713 0.09519145 0.04368616 ... -0.17057195 -0.09237722
- 0.09026653]
- [ 0.19408953 -0.10522129 -0.5198614 ... -0.3543182 -0.18219939
- -0.31666392]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[-0.5218679 -0.28046846 -0.03100408 ... 0.12330665 -0.1764081
- -0.10358601]
- [-0.43913472 -0.25189692 0.12411546 ... -0.04670386 0.47970027
- 0.1339655 ]
- [ 0.80941284 0.33414027 0.10742759 ... -0.17197524 -0.16508804
- -0.20685425]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- ...
- [[-0.02867951 0.0564835 0.0150851 ... 0.74035805 -0.30081287
- 0.31962818]
- [ 0.07382207 -0.05249734 0.05087743 ... 0.82050836 -0.03774352
- 0.41221875]
- [-0.10616651 -0.07183579 -0.02862857 ... 0.13253474 0.73543155
- 0.6359683 ]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[ 0.94968927 -0.00939775 -0.0047697 ... -0.04689858 -0.08846093
- -0.20122004]
- [ 0.97008485 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
- -0.14288445]
- [-0.20942406 -0.21343052 -0.00624497 ... 0.05516739 -0.33565506
- 0.7583357 ]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[ 0.94968927 -0.00939775 -0.0047697 ... -0.04689858 -0.08846093
- -0.20122004]
- [ 0.97008485 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
- -0.14288445]
- [-0.20942406 -0.21343052 -0.00624497 ... 0.05516739 -0.33565506
- 0.7583357 ]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]]]
- Layer 1, Token 1 (k padded comparison):
- Original tensor sum: 46.146324
- Converted tensor sum: 46.146336
- Original tensor mean: 0.002253
- Converted tensor mean: 0.002253
- Mean difference: 0.00000002
- Maximum pointwise difference: 0.00001496
- Max difference location: (0, 24, 0, 4)
- Values at max diff - Original: -0.75322348, Converted: -0.75323844
- Biggest difference in row (0, 4, 0), sum -1.893247 vs -1.893263
- Layer 2, Token 1 (k padded comparison):
- Original tensor sum: 38.402348
- Converted tensor sum: 38.402321
- Original tensor mean: 0.001875
- Converted tensor mean: 0.001875
- Mean difference: 0.00000002
- Maximum pointwise difference: 0.00000370
- Max difference location: (0, 4, 0, 1)
- Values at max diff - Original: 0.75365573, Converted: 0.75365943
- Biggest difference in row (0, 8, 0), sum -1.569355 vs -1.569358
- Layer 4, Token 1 (k padded comparison):
- Original tensor sum: -80.321693
- Converted tensor sum: -80.319084
- Original tensor mean: -0.003922
- Converted tensor mean: -0.003922
- Mean difference: 0.00000094
- Maximum pointwise difference: 0.00016582
- Max difference location: (0, 12, 1, 2)
- Values at max diff - Original: 0.42303348, Converted: 0.42286766
- Biggest difference in row (0, 4, 3), sum -0.373179 vs -0.372919
- Layer 5, Token 1 (k padded comparison):
- Original tensor sum: -101.494308
- Converted tensor sum: -101.496490
- Original tensor mean: -0.004956
- Converted tensor mean: -0.004956
- Mean difference: 0.00000073
- Maximum pointwise difference: 0.00011382
- Max difference location: (0, 8, 3, 4)
- Values at max diff - Original: -0.06280152, Converted: -0.06291535
- Biggest difference in row (0, 24, 2), sum -1.003613 vs -1.003973
- Layer 6, Token 1 (k padded comparison):
- Original tensor sum: -60.378914
- Converted tensor sum: -60.399891
- Original tensor mean: -0.002948
- Converted tensor mean: -0.002949
- Mean difference: 0.00000342
- Maximum pointwise difference: 0.00096719
- Max difference location: (0, 8, 1, 5)
- Values at max diff - Original: 0.19049226, Converted: 0.19145945
- Biggest difference in row (0, 20, 0), sum -1.118855 vs -1.120621
- Layer 8, Token 1 (k padded comparison):
- Original tensor sum: -61.474350
- Converted tensor sum: -61.483994
- Original tensor mean: -0.003002
- Converted tensor mean: -0.003002
- Mean difference: 0.00000346
- Maximum pointwise difference: 0.00061786
- Max difference location: (0, 8, 2, 7)
- Values at max diff - Original: 0.35214049, Converted: 0.35275835
- Biggest difference in row (0, 20, 3), sum -0.407597 vs -0.408426
- Layer 9, Token 1 (k padded comparison):
- Original tensor sum: -110.836624
- Converted tensor sum: -110.841522
- Original tensor mean: -0.005412
- Converted tensor mean: -0.005412
- Mean difference: 0.00000378
- Maximum pointwise difference: 0.00051466
- Max difference location: (0, 18, 1, 8)
- Values at max diff - Original: 0.40876523, Converted: 0.40927988
- Biggest difference in row (0, 28, 3), sum -0.911474 vs -0.910520
- Layer 10, Token 1 (k padded comparison):
- Original tensor sum: -90.985107
- Converted tensor sum: -90.978966
- Original tensor mean: -0.004443
- Converted tensor mean: -0.004442
- Mean difference: 0.00000465
- Maximum pointwise difference: 0.00078443
- Max difference location: (0, 18, 3, 6)
- Values at max diff - Original: 0.38864151, Converted: 0.38785708
- Biggest difference in row (0, 18, 3), sum -0.245571 vs -0.247415
- Layer 12, Token 1 (k padded comparison):
- Original tensor sum: -80.152397
- Converted tensor sum: -80.143387
- Original tensor mean: -0.003914
- Converted tensor mean: -0.003913
- Mean difference: 0.00000377
- Maximum pointwise difference: 0.00053528
- Max difference location: (0, 4, 2, 6)
- Values at max diff - Original: 0.33732986, Converted: 0.33786514
- Biggest difference in row (0, 26, 2), sum -2.083733 vs -2.084640
- Layer 13, Token 1 (k padded comparison):
- Original tensor sum: -149.692871
- Converted tensor sum: -149.699692
- Original tensor mean: -0.007309
- Converted tensor mean: -0.007310
- Mean difference: 0.00000382
- Maximum pointwise difference: 0.00069700
- Max difference location: (0, 24, 2, 1)
- Values at max diff - Original: 0.03209215, Converted: 0.03139514
- Biggest difference in row (0, 18, 3), sum -1.337807 vs -1.338803
- Layer 14, Token 1 (k padded comparison):
- Original tensor sum: -158.503815
- Converted tensor sum: -158.505280
- Original tensor mean: -0.007739
- Converted tensor mean: -0.007740
- Mean difference: 0.00000406
- Maximum pointwise difference: 0.00088650
- Max difference location: (0, 18, 3, 0)
- Values at max diff - Original: 0.31103787, Converted: 0.31192437
- Biggest difference in row (0, 24, 2), sum -2.245067 vs -2.246189
- ================================================================================
- Comparing v padded tensors...
- ================================================================================
- Layer 0, Token 1 (v padded comparison):
- Original tensor sum: 43.396095
- Converted tensor sum: 43.396103
- Original tensor mean: 0.002119
- Converted tensor mean: 0.002119
- Mean difference: 0.00000000
- Maximum pointwise difference: 0.00000024
- Max difference location: (0, 4, 3, 1)
- Values at max diff - Original: 3.02466559, Converted: 3.02466583
- Biggest difference in row (0, 4, 3), sum 4.080367 vs 4.080368
- Original tensor:
- [[[[ 0.29945952 0.07364164 0.00633647 ... -0.03352018 -0.13518293
- -0.24422395]
- [-0.06384649 0.34527305 0.05128174 ... 0.10202903 -0.27791512
- -0.26350227]
- [ 0.32036152 -0.10731668 -0.13258429 ... 0.7373227 -0.21349299
- 0.09487297]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[ 0.09463742 0.3331761 0.04175158 ... -0.16139531 0.14495076
- -0.23538315]
- [ 0.0059099 -0.22937416 -0.01920018 ... -0.2725759 0.3779854
- -0.25018957]
- [-0.02874102 -0.1163442 -0.06129871 ... -0.24273473 -0.2218994
- 0.09502672]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[-0.01040334 -0.16231607 -0.19213551 ... 0.26839197 -0.14292948
- -0.0833158 ]
- [-0.22485131 -0.26889268 -0.03555897 ... -0.26755306 -0.27845183
- -0.15565467]
- [-0.27764964 2.820727 -0.24290419 ... 0.12924032 -0.22718066
- 0.06345078]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- ...
- [[ 0.05224958 -0.27178496 0.02280007 ... -0.17813048 -0.00848302
- 0.3436797 ]
- [-0.23870829 0.0102903 0.09486482 ... -0.17058551 0.10059616
- 0.45001176]
- [-0.24846101 1.1912329 -0.26268318 ... 0.148858 0.10272522
- 0.21719539]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[-0.20595089 -0.02217443 0.01070492 ... 0.00675152 0.02506094
- -0.0267982 ]
- [-0.21499586 -0.25627282 -0.07001566 ... 0.00795406 -0.02202371
- -0.01158573]
- [ 0.04917984 -0.27141818 -0.26334 ... -0.09943416 0.03347556
- 0.10718762]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[ 0.1791143 -0.0034847 0.9858279 ... 0.19559488 -0.0804936
- -0.01883564]
- [-0.17319466 0.07188834 -0.26032022 ... -0.04845351 -0.24498041
- 0.12539098]
- [ 0.00640415 -0.22212675 -0.22916575 ... -0.170733 0.5452839
- -0.14139794]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]]]
- Converted tensor:
- [[[[ 0.29945952 0.07364164 0.00633647 ... -0.03352018 -0.13518293
- -0.24422395]
- [-0.06384649 0.34527302 0.05128174 ... 0.10202905 -0.27791512
- -0.26350227]
- [ 0.3203615 -0.10731667 -0.13258429 ... 0.7373226 -0.213493
- 0.09487297]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[ 0.09463742 0.33317608 0.04175158 ... -0.16139533 0.14495076
- -0.23538315]
- [ 0.0059099 -0.22937416 -0.01920018 ... -0.27257589 0.3779854
- -0.25018957]
- [-0.02874102 -0.11634421 -0.06129871 ... -0.24273473 -0.22189939
- 0.09502671]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[-0.01040334 -0.16231604 -0.19213554 ... 0.268392 -0.14292948
- -0.0833158 ]
- [-0.22485131 -0.26889268 -0.03555898 ... -0.26755306 -0.27845183
- -0.15565467]
- [-0.27764964 2.820727 -0.24290417 ... 0.12924033 -0.22718067
- 0.06345078]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- ...
- [[ 0.05224958 -0.27178493 0.02280007 ... -0.17813048 -0.00848302
- 0.34367973]
- [-0.23870829 0.0102903 0.09486482 ... -0.17058551 0.10059617
- 0.45001176]
- [-0.248461 1.1912329 -0.26268318 ... 0.148858 0.10272522
- 0.21719539]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[-0.20595089 -0.02217443 0.01070492 ... 0.00675152 0.02506094
- -0.0267982 ]
- [-0.21499586 -0.2562728 -0.07001566 ... 0.00795406 -0.02202371
- -0.01158573]
- [ 0.04917984 -0.27141815 -0.26334 ... -0.09943416 0.03347556
- 0.10718761]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]
- [[ 0.1791143 -0.0034847 0.985828 ... 0.19559486 -0.08049361
- -0.01883564]
- [-0.17319466 0.07188834 -0.2603202 ... -0.04845351 -0.24498038
- 0.12539098]
- [ 0.00640414 -0.22212675 -0.22916573 ... -0.17073299 0.5452839
- -0.14139794]
- ...
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]
- [ 0. 0. 0. ... 0. 0.
- 0. ]]]]
- Layer 1, Token 1 (v padded comparison):
- Original tensor sum: 64.583611
- Converted tensor sum: 64.583618
- Original tensor mean: 0.003153
- Converted tensor mean: 0.003153
- Mean difference: 0.00000000
- Maximum pointwise difference: 0.00000083
- Max difference location: (0, 31, 2, 0)
- Values at max diff - Original: 1.71371531, Converted: 1.71371615
- Biggest difference in row (0, 14, 2), sum 3.047640 vs 3.047641
- Layer 2, Token 1 (v padded comparison):
- Original tensor sum: 79.718636
- Converted tensor sum: 79.718628
- Original tensor mean: 0.003893
- Converted tensor mean: 0.003893
- Mean difference: 0.00000002
- Maximum pointwise difference: 0.00000691
- Max difference location: (0, 3, 3, 0)
- Values at max diff - Original: 3.08589840, Converted: 3.08589149
- Biggest difference in row (0, 3, 3), sum 5.127280 vs 5.127275
- Layer 4, Token 1 (v padded comparison):
- Original tensor sum: -6.421658
- Converted tensor sum: -6.417439
- Original tensor mean: -0.000314
- Converted tensor mean: -0.000313
- Mean difference: 0.00000083
- Maximum pointwise difference: 0.00020146
- Max difference location: (0, 3, 3, 9)
- Values at max diff - Original: 0.71459866, Converted: 0.71439719
- Biggest difference in row (0, 2, 2), sum 1.330729 vs 1.330986
- Layer 5, Token 1 (v padded comparison):
- Original tensor sum: -22.732481
- Converted tensor sum: -22.732681
- Original tensor mean: -0.001110
- Converted tensor mean: -0.001110
- Mean difference: 0.00000057
- Maximum pointwise difference: 0.00014561
- Max difference location: (0, 5, 2, 8)
- Values at max diff - Original: 0.86213899, Converted: 0.86199337
- Biggest difference in row (0, 5, 2), sum 0.321165 vs 0.320951
- Layer 6, Token 1 (v padded comparison):
- Original tensor sum: 79.420486
- Converted tensor sum: 79.392494
- Original tensor mean: 0.003878
- Converted tensor mean: 0.003877
- Mean difference: 0.00000437
- Maximum pointwise difference: 0.00160646
- Max difference location: (0, 28, 3, 8)
- Values at max diff - Original: 3.32436800, Converted: 3.32276154
- Biggest difference in row (0, 8, 2), sum 5.307434 vs 5.305095
- Layer 8, Token 1 (v padded comparison):
- Original tensor sum: 56.337997
- Converted tensor sum: 56.328655
- Original tensor mean: 0.002751
- Converted tensor mean: 0.002750
- Mean difference: 0.00000345
- Maximum pointwise difference: 0.00109446
- Max difference location: (0, 27, 3, 8)
- Values at max diff - Original: 1.29648387, Converted: 1.29538941
- Biggest difference in row (0, 0, 2), sum 3.391128 vs 3.390095
- Layer 9, Token 1 (v padded comparison):
- Original tensor sum: -60.833374
- Converted tensor sum: -60.822338
- Original tensor mean: -0.002970
- Converted tensor mean: -0.002970
- Mean difference: 0.00000277
- Maximum pointwise difference: 0.00082873
- Max difference location: (0, 4, 2, 0)
- Values at max diff - Original: 0.17745507, Converted: 0.17828380
- Biggest difference in row (0, 29, 3), sum -0.619908 vs -0.618863
- Layer 10, Token 1 (v padded comparison):
- Original tensor sum: -61.881168
- Converted tensor sum: -61.881893
- Original tensor mean: -0.003022
- Converted tensor mean: -0.003022
- Mean difference: 0.00000326
- Maximum pointwise difference: 0.00088513
- Max difference location: (0, 18, 3, 1)
- Values at max diff - Original: 0.75186056, Converted: 0.75097543
- Biggest difference in row (0, 1, 2), sum -0.687588 vs -0.688463
- Layer 12, Token 1 (v padded comparison):
- Original tensor sum: -25.326912
- Converted tensor sum: -25.328352
- Original tensor mean: -0.001237
- Converted tensor mean: -0.001237
- Mean difference: 0.00000326
- Maximum pointwise difference: 0.00108600
- Max difference location: (0, 26, 1, 1)
- Values at max diff - Original: 2.54334521, Converted: 2.54225922
- Biggest difference in row (0, 16, 2), sum 1.421780 vs 1.420637
- Layer 13, Token 1 (v padded comparison):
- Original tensor sum: -76.935516
- Converted tensor sum: -76.941040
- Original tensor mean: -0.003757
- Converted tensor mean: -0.003757
- Mean difference: 0.00000263
- Maximum pointwise difference: 0.00127554
- Max difference location: (0, 19, 1, 3)
- Values at max diff - Original: 2.36973763, Converted: 2.36846209
- Biggest difference in row (0, 19, 1), sum 1.449438 vs 1.448400
- Layer 14, Token 1 (v padded comparison):
- Original tensor sum: -45.008949
- Converted tensor sum: -45.003647
- Original tensor mean: -0.002198
- Converted tensor mean: -0.002197
- Mean difference: 0.00000327
- Maximum pointwise difference: 0.00136590
- Max difference location: (0, 28, 3, 5)
- Values at max diff - Original: 2.56902742, Converted: 2.56766152
- Biggest difference in row (0, 28, 3), sum 1.363533 vs 1.361795
- ================================================================================
- SUMMARY:
- Total comparisons attempted: 876
- Successful comparisons: 875
- Failed comparisons: 1
- Maximum difference statistics:
- Min max difference: 0.00000024
- Max max difference: 235.55526733
- Mean of max differences: 18.71273422
- Median of max differences: 5.37744808
- Comparisons with diff > 1e-5: 804/875
|