tinylong-30-compare.txt 393 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393
  1. Comparing tensors between original and converted GGML models...
  2. Tokens: 30, Layers: 16
  3. ================================================================================
  4. ================================================================================
  5. Comparing model.layers.out tensors...
  6. ================================================================================
  7. Layer 0, Token 1 (model.layers.out comparison):
  8. Original tensor sum: -109.202682
  9. Converted tensor sum: -109.202667
  10. Original tensor mean: -3.412584
  11. Converted tensor mean: -3.412583
  12. Mean difference: 0.00000112
  13. Maximum pointwise difference: 0.00000358
  14. Max difference location: (0, 3, 2)
  15. Values at max diff - Original: -3.23131371, Converted: -3.23131013
  16. Biggest difference in row (0, 3), sum -70.622650 vs -70.622643
  17. Original tensor:
  18. [[[ 0.53282046 0.45114386 2.2156353 -0.5117184 -1.6482054
  19. 4.6376505 -2.9421384 -3.4354253 ]
  20. [ -8.487997 -5.323722 -4.790135 -8.482631 4.4259453
  21. -0.7649012 -5.2080426 -3.5365663 ]
  22. [ -2.8659308 -0.7302124 3.4494972 -0.7121358 -4.4744496
  23. 1.4391303 -1.05655 -0.76109344]
  24. [-10.8983 -11.325392 -3.2313137 -11.594204 -13.007862
  25. -6.099822 -13.027901 -1.4378595 ]]]
  26. Converted tensor:
  27. [[[ 0.53281975 0.45114377 2.215636 -0.5117179 -1.6482062
  28. 4.6376514 -2.942138 -3.4354265 ]
  29. [ -8.487997 -5.323724 -4.7901373 -8.48263 4.425948
  30. -0.7649009 -5.208041 -3.5365672 ]
  31. [ -2.8659306 -0.7302135 3.4494982 -0.7121362 -4.474449
  32. 1.4391313 -1.0565499 -0.7610918 ]
  33. [-10.898299 -11.325391 -3.2313101 -11.594204 -13.00786
  34. -6.099819 -13.027899 -1.437861 ]]]
  35. Layer 1, Token 1 (model.layers.out comparison):
  36. Original tensor sum: -132.672058
  37. Converted tensor sum: -132.672043
  38. Original tensor mean: -4.146002
  39. Converted tensor mean: -4.146001
  40. Mean difference: 0.00000322
  41. Maximum pointwise difference: 0.00000763
  42. Max difference location: (0, 1, 0)
  43. Values at max diff - Original: -8.45331192, Converted: -8.45330429
  44. Biggest difference in row (0, 2), sum 8.045303 vs 8.045274
  45. Layer 2, Token 1 (model.layers.out comparison):
  46. Original tensor sum: -123.594589
  47. Converted tensor sum: -123.594765
  48. Original tensor mean: -3.862331
  49. Converted tensor mean: -3.862336
  50. Mean difference: 0.00001101
  51. Maximum pointwise difference: 0.00005722
  52. Max difference location: (0, 3, 0)
  53. Values at max diff - Original: -14.73531914, Converted: -14.73537636
  54. Biggest difference in row (0, 3), sum -100.578644 vs -100.578781
  55. Layer 3, Token 1 (model.layers.out comparison):
  56. Original tensor sum: -1014.197754
  57. Converted tensor sum: -1014.208618
  58. Original tensor mean: -31.693680
  59. Converted tensor mean: -31.694019
  60. Mean difference: 0.00261304
  61. Maximum pointwise difference: 0.00854874
  62. Max difference location: (0, 3, 4)
  63. Values at max diff - Original: -47.59802246, Converted: -47.60657120
  64. Biggest difference in row (0, 3), sum -413.478455 vs -413.514832
  65. Layer 4, Token 1 (model.layers.out comparison):
  66. Original tensor sum: -974.648987
  67. Converted tensor sum: -974.659424
  68. Original tensor mean: -30.457781
  69. Converted tensor mean: -30.458107
  70. Mean difference: 0.00296569
  71. Maximum pointwise difference: 0.00885773
  72. Max difference location: (0, 3, 4)
  73. Values at max diff - Original: -45.65669632, Converted: -45.66555405
  74. Biggest difference in row (0, 3), sum -380.904694 vs -380.942291
  75. Layer 5, Token 1 (model.layers.out comparison):
  76. Original tensor sum: -842.923950
  77. Converted tensor sum: -842.923950
  78. Original tensor mean: -26.341373
  79. Converted tensor mean: -26.341373
  80. Mean difference: 0.00327585
  81. Maximum pointwise difference: 0.00857162
  82. Max difference location: (0, 3, 4)
  83. Values at max diff - Original: -47.09656525, Converted: -47.10513687
  84. Biggest difference in row (0, 3), sum -366.704346 vs -366.739746
  85. Layer 6, Token 1 (model.layers.out comparison):
  86. Original tensor sum: -940.556580
  87. Converted tensor sum: -940.507812
  88. Original tensor mean: -29.392393
  89. Converted tensor mean: -29.390869
  90. Mean difference: 0.00368834
  91. Maximum pointwise difference: 0.00840378
  92. Max difference location: (0, 3, 4)
  93. Values at max diff - Original: -49.34116364, Converted: -49.34956741
  94. Biggest difference in row (0, 2), sum -130.006729 vs -129.970612
  95. Layer 7, Token 1 (model.layers.out comparison):
  96. Original tensor sum: -1838.171143
  97. Converted tensor sum: -1838.228271
  98. Original tensor mean: -57.442848
  99. Converted tensor mean: -57.444633
  100. Mean difference: 0.00574541
  101. Maximum pointwise difference: 0.01725769
  102. Max difference location: (0, 0, 4)
  103. Values at max diff - Original: -91.24589539, Converted: -91.26315308
  104. Biggest difference in row (0, 0), sum -622.551270 vs -622.626587
  105. Layer 8, Token 1 (model.layers.out comparison):
  106. Original tensor sum: -1890.751709
  107. Converted tensor sum: -1890.670898
  108. Original tensor mean: -59.085991
  109. Converted tensor mean: -59.083466
  110. Mean difference: 0.01148558
  111. Maximum pointwise difference: 0.05082703
  112. Max difference location: (0, 2, 6)
  113. Values at max diff - Original: -49.12084961, Converted: -49.07002258
  114. Biggest difference in row (0, 2), sum -356.818451 vs -356.663208
  115. Layer 9, Token 1 (model.layers.out comparison):
  116. Original tensor sum: -1949.811523
  117. Converted tensor sum: -1949.711426
  118. Original tensor mean: -60.931610
  119. Converted tensor mean: -60.928482
  120. Mean difference: 0.01115143
  121. Maximum pointwise difference: 0.04758072
  122. Max difference location: (0, 2, 6)
  123. Values at max diff - Original: -49.22105789, Converted: -49.17347717
  124. Biggest difference in row (0, 2), sum -367.878845 vs -367.720154
  125. Layer 10, Token 1 (model.layers.out comparison):
  126. Original tensor sum: -1955.402832
  127. Converted tensor sum: -1955.281250
  128. Original tensor mean: -61.106339
  129. Converted tensor mean: -61.102539
  130. Mean difference: 0.01230341
  131. Maximum pointwise difference: 0.04833603
  132. Max difference location: (0, 2, 6)
  133. Values at max diff - Original: -43.91606140, Converted: -43.86772537
  134. Biggest difference in row (0, 2), sum -370.409668 vs -370.259583
  135. Layer 11, Token 1 (model.layers.out comparison):
  136. Original tensor sum: -3642.472900
  137. Converted tensor sum: -3642.428711
  138. Original tensor mean: -113.827278
  139. Converted tensor mean: -113.825897
  140. Mean difference: 0.01628518
  141. Maximum pointwise difference: 0.05126190
  142. Max difference location: (0, 2, 6)
  143. Values at max diff - Original: -94.39852142, Converted: -94.34725952
  144. Biggest difference in row (0, 2), sum -786.509460 vs -786.331726
  145. Layer 12, Token 1 (model.layers.out comparison):
  146. Original tensor sum: -3739.976807
  147. Converted tensor sum: -3739.936035
  148. Original tensor mean: -116.874275
  149. Converted tensor mean: -116.873001
  150. Mean difference: 0.01711488
  151. Maximum pointwise difference: 0.05059052
  152. Max difference location: (0, 2, 6)
  153. Values at max diff - Original: -95.09668732, Converted: -95.04609680
  154. Biggest difference in row (0, 2), sum -816.550781 vs -816.352295
  155. Layer 13, Token 1 (model.layers.out comparison):
  156. Original tensor sum: -3821.749268
  157. Converted tensor sum: -3821.721680
  158. Original tensor mean: -119.429665
  159. Converted tensor mean: -119.428802
  160. Mean difference: 0.01747012
  161. Maximum pointwise difference: 0.05052948
  162. Max difference location: (0, 2, 7)
  163. Values at max diff - Original: -79.35634613, Converted: -79.30581665
  164. Biggest difference in row (0, 2), sum -840.805908 vs -840.616699
  165. Layer 14, Token 1 (model.layers.out comparison):
  166. Original tensor sum: -4057.451904
  167. Converted tensor sum: -4057.284668
  168. Original tensor mean: -126.795372
  169. Converted tensor mean: -126.790146
  170. Mean difference: 0.01935625
  171. Maximum pointwise difference: 0.07952881
  172. Max difference location: (0, 2, 6)
  173. Values at max diff - Original: -97.11465454, Converted: -97.03512573
  174. Biggest difference in row (0, 2), sum -917.124573 vs -916.826172
  175. Error processing model.layers.out layer 15, token 1: cannot reshape array of size 8 into shape (1,4,8)
  176. Layer 0, Token 2 (model.layers.out comparison):
  177. Original tensor sum: -7.280505
  178. Converted tensor sum: -7.280507
  179. Original tensor mean: -0.910063
  180. Converted tensor mean: -0.910063
  181. Mean difference: 0.00000097
  182. Maximum pointwise difference: 0.00000179
  183. Max difference location: (0, 0, 4)
  184. Values at max diff - Original: -1.49786282, Converted: -1.49786103
  185. Biggest difference in row (0, 0), sum -7.280505 vs -7.280507
  186. Layer 1, Token 2 (model.layers.out comparison):
  187. Original tensor sum: -7.318125
  188. Converted tensor sum: -7.318151
  189. Original tensor mean: -0.914766
  190. Converted tensor mean: -0.914769
  191. Mean difference: 0.00000331
  192. Maximum pointwise difference: 0.00000930
  193. Max difference location: (0, 0, 3)
  194. Values at max diff - Original: -3.41128922, Converted: -3.41129851
  195. Biggest difference in row (0, 0), sum -7.318125 vs -7.318151
  196. Layer 2, Token 2 (model.layers.out comparison):
  197. Original tensor sum: 14.344932
  198. Converted tensor sum: 14.344961
  199. Original tensor mean: 1.793116
  200. Converted tensor mean: 1.793120
  201. Mean difference: 0.00000746
  202. Maximum pointwise difference: 0.00003266
  203. Max difference location: (0, 0, 0)
  204. Values at max diff - Original: 3.23243976, Converted: 3.23247242
  205. Biggest difference in row (0, 0), sum 14.344932 vs 14.344961
  206. Layer 3, Token 2 (model.layers.out comparison):
  207. Original tensor sum: 46.801067
  208. Converted tensor sum: 46.811996
  209. Original tensor mean: 5.850133
  210. Converted tensor mean: 5.851500
  211. Mean difference: 0.00141515
  212. Maximum pointwise difference: 0.00275421
  213. Max difference location: (0, 0, 3)
  214. Values at max diff - Original: 6.65637064, Converted: 6.65912485
  215. Biggest difference in row (0, 0), sum 46.801067 vs 46.811996
  216. Layer 4, Token 2 (model.layers.out comparison):
  217. Original tensor sum: 47.891678
  218. Converted tensor sum: 47.901840
  219. Original tensor mean: 5.986460
  220. Converted tensor mean: 5.987730
  221. Mean difference: 0.00131346
  222. Maximum pointwise difference: 0.00296640
  223. Max difference location: (0, 0, 3)
  224. Values at max diff - Original: 7.13961887, Converted: 7.14258528
  225. Biggest difference in row (0, 0), sum 47.891678 vs 47.901840
  226. Layer 5, Token 2 (model.layers.out comparison):
  227. Original tensor sum: 45.815926
  228. Converted tensor sum: 45.826260
  229. Original tensor mean: 5.726991
  230. Converted tensor mean: 5.728282
  231. Mean difference: 0.00137006
  232. Maximum pointwise difference: 0.00332642
  233. Max difference location: (0, 0, 3)
  234. Values at max diff - Original: 7.94661283, Converted: 7.94993925
  235. Biggest difference in row (0, 0), sum 45.815926 vs 45.826260
  236. Layer 6, Token 2 (model.layers.out comparison):
  237. Original tensor sum: 40.223167
  238. Converted tensor sum: 40.231720
  239. Original tensor mean: 5.027896
  240. Converted tensor mean: 5.028965
  241. Mean difference: 0.00155937
  242. Maximum pointwise difference: 0.00270462
  243. Max difference location: (0, 0, 0)
  244. Values at max diff - Original: 7.07846451, Converted: 7.08116913
  245. Biggest difference in row (0, 0), sum 40.223167 vs 40.231720
  246. Layer 7, Token 2 (model.layers.out comparison):
  247. Original tensor sum: 84.588196
  248. Converted tensor sum: 84.602402
  249. Original tensor mean: 10.573524
  250. Converted tensor mean: 10.575300
  251. Mean difference: 0.00185513
  252. Maximum pointwise difference: 0.00356102
  253. Max difference location: (0, 0, 0)
  254. Values at max diff - Original: 13.31151009, Converted: 13.31507111
  255. Biggest difference in row (0, 0), sum 84.588196 vs 84.602402
  256. Layer 8, Token 2 (model.layers.out comparison):
  257. Original tensor sum: 85.737823
  258. Converted tensor sum: 85.749390
  259. Original tensor mean: 10.717228
  260. Converted tensor mean: 10.718674
  261. Mean difference: 0.00189817
  262. Maximum pointwise difference: 0.00350094
  263. Max difference location: (0, 0, 0)
  264. Values at max diff - Original: 13.90340519, Converted: 13.90690613
  265. Biggest difference in row (0, 0), sum 85.737823 vs 85.749390
  266. Layer 9, Token 2 (model.layers.out comparison):
  267. Original tensor sum: 83.069107
  268. Converted tensor sum: 83.078979
  269. Original tensor mean: 10.383638
  270. Converted tensor mean: 10.384872
  271. Mean difference: 0.00177890
  272. Maximum pointwise difference: 0.00335407
  273. Max difference location: (0, 0, 0)
  274. Values at max diff - Original: 13.79222488, Converted: 13.79557896
  275. Biggest difference in row (0, 0), sum 83.069107 vs 83.078979
  276. Layer 10, Token 2 (model.layers.out comparison):
  277. Original tensor sum: 80.782455
  278. Converted tensor sum: 80.791588
  279. Original tensor mean: 10.097807
  280. Converted tensor mean: 10.098948
  281. Mean difference: 0.00190949
  282. Maximum pointwise difference: 0.00329256
  283. Max difference location: (0, 0, 2)
  284. Values at max diff - Original: 6.64920282, Converted: 6.65249538
  285. Biggest difference in row (0, 0), sum 80.782455 vs 80.791588
  286. Layer 11, Token 2 (model.layers.out comparison):
  287. Original tensor sum: 124.938332
  288. Converted tensor sum: 124.953712
  289. Original tensor mean: 15.617291
  290. Converted tensor mean: 15.619214
  291. Mean difference: 0.00253391
  292. Maximum pointwise difference: 0.00420666
  293. Max difference location: (0, 0, 2)
  294. Values at max diff - Original: 12.86635590, Converted: 12.87056255
  295. Biggest difference in row (0, 0), sum 124.938332 vs 124.953712
  296. Layer 12, Token 2 (model.layers.out comparison):
  297. Original tensor sum: 124.466995
  298. Converted tensor sum: 124.483871
  299. Original tensor mean: 15.558374
  300. Converted tensor mean: 15.560484
  301. Mean difference: 0.00271881
  302. Maximum pointwise difference: 0.00506878
  303. Max difference location: (0, 0, 2)
  304. Values at max diff - Original: 12.41438103, Converted: 12.41944981
  305. Biggest difference in row (0, 0), sum 124.466995 vs 124.483871
  306. Layer 13, Token 2 (model.layers.out comparison):
  307. Original tensor sum: 121.646957
  308. Converted tensor sum: 121.660385
  309. Original tensor mean: 15.205870
  310. Converted tensor mean: 15.207548
  311. Mean difference: 0.00218880
  312. Maximum pointwise difference: 0.00470448
  313. Max difference location: (0, 0, 2)
  314. Values at max diff - Original: 12.02227020, Converted: 12.02697468
  315. Biggest difference in row (0, 0), sum 121.646957 vs 121.660385
  316. Layer 14, Token 2 (model.layers.out comparison):
  317. Original tensor sum: 116.636169
  318. Converted tensor sum: 116.658142
  319. Original tensor mean: 14.579521
  320. Converted tensor mean: 14.582268
  321. Mean difference: 0.00299489
  322. Maximum pointwise difference: 0.00521469
  323. Max difference location: (0, 0, 0)
  324. Values at max diff - Original: 18.26870537, Converted: 18.27392006
  325. Biggest difference in row (0, 0), sum 116.636169 vs 116.658142
  326. Layer 15, Token 2 (model.layers.out comparison):
  327. Original tensor sum: 201.843384
  328. Converted tensor sum: 201.865143
  329. Original tensor mean: 25.230423
  330. Converted tensor mean: 25.233143
  331. Mean difference: 0.00346577
  332. Maximum pointwise difference: 0.00746727
  333. Max difference location: (0, 0, 0)
  334. Values at max diff - Original: 30.94509888, Converted: 30.95256615
  335. Biggest difference in row (0, 0), sum 201.843384 vs 201.865143
  336. Layer 0, Token 3 (model.layers.out comparison):
  337. Original tensor sum: 18.698099
  338. Converted tensor sum: 18.475292
  339. Original tensor mean: 2.337262
  340. Converted tensor mean: 2.309412
  341. Mean difference: 2.67848086
  342. Maximum pointwise difference: 4.89963531
  343. Max difference location: (0, 0, 6)
  344. Values at max diff - Original: 2.51813841, Converted: 7.41777372
  345. Biggest difference in row (0, 0), sum 18.698099 vs 18.475292
  346. Layer 1, Token 3 (model.layers.out comparison):
  347. Original tensor sum: 13.937105
  348. Converted tensor sum: 11.538675
  349. Original tensor mean: 1.742138
  350. Converted tensor mean: 1.442334
  351. Mean difference: 2.56903791
  352. Maximum pointwise difference: 5.56039190
  353. Max difference location: (0, 0, 7)
  354. Values at max diff - Original: 5.86116695, Converted: 0.30077514
  355. Biggest difference in row (0, 0), sum 13.937105 vs 11.538675
  356. Layer 2, Token 3 (model.layers.out comparison):
  357. Original tensor sum: 17.835873
  358. Converted tensor sum: 9.065081
  359. Original tensor mean: 2.229484
  360. Converted tensor mean: 1.133135
  361. Mean difference: 2.48439741
  362. Maximum pointwise difference: 7.80053854
  363. Max difference location: (0, 0, 7)
  364. Values at max diff - Original: 7.08156919, Converted: -0.71896935
  365. Biggest difference in row (0, 0), sum 17.835873 vs 9.065081
  366. Layer 3, Token 3 (model.layers.out comparison):
  367. Original tensor sum: 19.733971
  368. Converted tensor sum: 0.388454
  369. Original tensor mean: 2.466746
  370. Converted tensor mean: 0.048557
  371. Mean difference: 2.74538827
  372. Maximum pointwise difference: 8.14173889
  373. Max difference location: (0, 0, 7)
  374. Values at max diff - Original: 7.32600927, Converted: -0.81572962
  375. Biggest difference in row (0, 0), sum 19.733971 vs 0.388454
  376. Layer 4, Token 3 (model.layers.out comparison):
  377. Original tensor sum: 17.522738
  378. Converted tensor sum: 7.885162
  379. Original tensor mean: 2.190342
  380. Converted tensor mean: 0.985645
  381. Mean difference: 4.25575876
  382. Maximum pointwise difference: 7.97597837
  383. Max difference location: (0, 0, 1)
  384. Values at max diff - Original: 3.98348713, Converted: -3.99249125
  385. Biggest difference in row (0, 0), sum 17.522738 vs 7.885162
  386. Layer 5, Token 3 (model.layers.out comparison):
  387. Original tensor sum: 21.460897
  388. Converted tensor sum: 15.969997
  389. Original tensor mean: 2.682612
  390. Converted tensor mean: 1.996250
  391. Mean difference: 4.34595299
  392. Maximum pointwise difference: 8.46822739
  393. Max difference location: (0, 0, 1)
  394. Values at max diff - Original: 6.38704681, Converted: -2.08118057
  395. Biggest difference in row (0, 0), sum 21.460897 vs 15.969997
  396. Layer 6, Token 3 (model.layers.out comparison):
  397. Original tensor sum: 18.336536
  398. Converted tensor sum: 9.128950
  399. Original tensor mean: 2.292067
  400. Converted tensor mean: 1.141119
  401. Mean difference: 3.42625880
  402. Maximum pointwise difference: 9.18005276
  403. Max difference location: (0, 0, 1)
  404. Values at max diff - Original: 6.15963268, Converted: -3.02042007
  405. Biggest difference in row (0, 0), sum 18.336536 vs 9.128950
  406. Layer 7, Token 3 (model.layers.out comparison):
  407. Original tensor sum: 27.127436
  408. Converted tensor sum: -91.853516
  409. Original tensor mean: 3.390929
  410. Converted tensor mean: -11.481689
  411. Mean difference: 14.87261772
  412. Maximum pointwise difference: 25.04354668
  413. Max difference location: (0, 0, 1)
  414. Values at max diff - Original: 6.62252426, Converted: -18.42102242
  415. Biggest difference in row (0, 0), sum 27.127436 vs -91.853516
  416. Layer 8, Token 3 (model.layers.out comparison):
  417. Original tensor sum: 22.795490
  418. Converted tensor sum: -94.016220
  419. Original tensor mean: 2.849436
  420. Converted tensor mean: -11.752028
  421. Mean difference: 14.60146332
  422. Maximum pointwise difference: 26.14372826
  423. Max difference location: (0, 0, 2)
  424. Values at max diff - Original: 0.59730154, Converted: -25.54642677
  425. Biggest difference in row (0, 0), sum 22.795490 vs -94.016220
  426. Layer 9, Token 3 (model.layers.out comparison):
  427. Original tensor sum: 16.367466
  428. Converted tensor sum: -159.747223
  429. Original tensor mean: 2.045933
  430. Converted tensor mean: -19.968403
  431. Mean difference: 22.01433563
  432. Maximum pointwise difference: 34.04418182
  433. Max difference location: (0, 0, 2)
  434. Values at max diff - Original: -0.55563742, Converted: -34.59981918
  435. Biggest difference in row (0, 0), sum 16.367466 vs -159.747223
  436. Layer 10, Token 3 (model.layers.out comparison):
  437. Original tensor sum: 22.587862
  438. Converted tensor sum: -171.457092
  439. Original tensor mean: 2.823483
  440. Converted tensor mean: -21.432137
  441. Mean difference: 24.25561905
  442. Maximum pointwise difference: 40.39982224
  443. Max difference location: (0, 0, 2)
  444. Values at max diff - Original: -0.52963394, Converted: -40.92945480
  445. Biggest difference in row (0, 0), sum 22.587862 vs -171.457092
  446. Layer 11, Token 3 (model.layers.out comparison):
  447. Original tensor sum: 98.501198
  448. Converted tensor sum: -580.205811
  449. Original tensor mean: 12.312650
  450. Converted tensor mean: -72.525726
  451. Mean difference: 84.83837128
  452. Maximum pointwise difference: 107.93860626
  453. Max difference location: (0, 0, 2)
  454. Values at max diff - Original: 9.91925716, Converted: -98.01934814
  455. Biggest difference in row (0, 0), sum 98.501198 vs -580.205811
  456. Layer 12, Token 3 (model.layers.out comparison):
  457. Original tensor sum: 96.017456
  458. Converted tensor sum: -599.130005
  459. Original tensor mean: 12.002182
  460. Converted tensor mean: -74.891251
  461. Mean difference: 86.89343262
  462. Maximum pointwise difference: 107.37790680
  463. Max difference location: (0, 0, 2)
  464. Values at max diff - Original: 10.14877987, Converted: -97.22912598
  465. Biggest difference in row (0, 0), sum 96.017456 vs -599.130005
  466. Layer 13, Token 3 (model.layers.out comparison):
  467. Original tensor sum: 93.969711
  468. Converted tensor sum: -604.221680
  469. Original tensor mean: 11.746214
  470. Converted tensor mean: -75.527710
  471. Mean difference: 87.27392578
  472. Maximum pointwise difference: 107.42771149
  473. Max difference location: (0, 0, 2)
  474. Values at max diff - Original: 9.00540733, Converted: -98.42230225
  475. Biggest difference in row (0, 0), sum 93.969711 vs -604.221680
  476. Layer 14, Token 3 (model.layers.out comparison):
  477. Original tensor sum: 96.193565
  478. Converted tensor sum: -675.267456
  479. Original tensor mean: 12.024196
  480. Converted tensor mean: -84.408432
  481. Mean difference: 96.43263245
  482. Maximum pointwise difference: 115.43507385
  483. Max difference location: (0, 0, 2)
  484. Values at max diff - Original: 9.17813015, Converted: -106.25694275
  485. Biggest difference in row (0, 0), sum 96.193565 vs -675.267456
  486. Layer 15, Token 3 (model.layers.out comparison):
  487. Original tensor sum: 203.967834
  488. Converted tensor sum: -1113.465820
  489. Original tensor mean: 25.495979
  490. Converted tensor mean: -139.183228
  491. Mean difference: 164.67919922
  492. Maximum pointwise difference: 181.33709717
  493. Max difference location: (0, 0, 3)
  494. Values at max diff - Original: 23.16110420, Converted: -158.17599487
  495. Biggest difference in row (0, 0), sum 203.967834 vs -1113.465820
  496. Layer 0, Token 4 (model.layers.out comparison):
  497. Original tensor sum: 1.060196
  498. Converted tensor sum: -6.035928
  499. Original tensor mean: 0.132525
  500. Converted tensor mean: -0.754491
  501. Mean difference: 1.11038423
  502. Maximum pointwise difference: 2.90589857
  503. Max difference location: (0, 0, 2)
  504. Values at max diff - Original: 1.75988472, Converted: -1.14601374
  505. Biggest difference in row (0, 0), sum 1.060196 vs -6.035928
  506. Layer 1, Token 4 (model.layers.out comparison):
  507. Original tensor sum: -45.922947
  508. Converted tensor sum: -53.028908
  509. Original tensor mean: -5.740368
  510. Converted tensor mean: -6.628613
  511. Mean difference: 1.58238363
  512. Maximum pointwise difference: 3.98315811
  513. Max difference location: (0, 0, 3)
  514. Values at max diff - Original: -7.45665455, Converted: -11.43981266
  515. Biggest difference in row (0, 0), sum -45.922947 vs -53.028908
  516. Layer 2, Token 4 (model.layers.out comparison):
  517. Original tensor sum: -47.965603
  518. Converted tensor sum: -68.008888
  519. Original tensor mean: -5.995700
  520. Converted tensor mean: -8.501111
  521. Mean difference: 4.45314884
  522. Maximum pointwise difference: 12.72673607
  523. Max difference location: (0, 0, 7)
  524. Values at max diff - Original: 10.93319416, Converted: -1.79354143
  525. Biggest difference in row (0, 0), sum -47.965603 vs -68.008888
  526. Layer 3, Token 4 (model.layers.out comparison):
  527. Original tensor sum: -224.689087
  528. Converted tensor sum: -313.872162
  529. Original tensor mean: -28.086136
  530. Converted tensor mean: -39.234020
  531. Mean difference: 11.14788437
  532. Maximum pointwise difference: 20.76882172
  533. Max difference location: (0, 0, 7)
  534. Values at max diff - Original: -9.44140339, Converted: -30.21022415
  535. Biggest difference in row (0, 0), sum -224.689087 vs -313.872162
  536. Layer 4, Token 4 (model.layers.out comparison):
  537. Original tensor sum: -207.206879
  538. Converted tensor sum: -293.960205
  539. Original tensor mean: -25.900860
  540. Converted tensor mean: -36.745026
  541. Mean difference: 10.84416676
  542. Maximum pointwise difference: 23.99023056
  543. Max difference location: (0, 0, 7)
  544. Values at max diff - Original: -13.00386524, Converted: -36.99409485
  545. Biggest difference in row (0, 0), sum -207.206879 vs -293.960205
  546. Layer 5, Token 4 (model.layers.out comparison):
  547. Original tensor sum: -185.594986
  548. Converted tensor sum: -298.454895
  549. Original tensor mean: -23.199373
  550. Converted tensor mean: -37.306862
  551. Mean difference: 14.10748863
  552. Maximum pointwise difference: 27.16260529
  553. Max difference location: (0, 0, 7)
  554. Values at max diff - Original: -13.13538361, Converted: -40.29798889
  555. Biggest difference in row (0, 0), sum -185.594986 vs -298.454895
  556. Layer 6, Token 4 (model.layers.out comparison):
  557. Original tensor sum: -226.913589
  558. Converted tensor sum: -341.315369
  559. Original tensor mean: -28.364199
  560. Converted tensor mean: -42.664421
  561. Mean difference: 14.30021858
  562. Maximum pointwise difference: 27.83255386
  563. Max difference location: (0, 0, 7)
  564. Values at max diff - Original: -20.03028870, Converted: -47.86284256
  565. Biggest difference in row (0, 0), sum -226.913589 vs -341.315369
  566. Layer 7, Token 4 (model.layers.out comparison):
  567. Original tensor sum: -431.530212
  568. Converted tensor sum: -553.909912
  569. Original tensor mean: -53.941277
  570. Converted tensor mean: -69.238739
  571. Mean difference: 15.29746723
  572. Maximum pointwise difference: 28.98126602
  573. Max difference location: (0, 0, 7)
  574. Values at max diff - Original: -44.07294083, Converted: -73.05420685
  575. Biggest difference in row (0, 0), sum -431.530212 vs -553.909912
  576. Layer 8, Token 4 (model.layers.out comparison):
  577. Original tensor sum: -442.906403
  578. Converted tensor sum: -577.351807
  579. Original tensor mean: -55.363300
  580. Converted tensor mean: -72.168976
  581. Mean difference: 16.80567932
  582. Maximum pointwise difference: 24.00010681
  583. Max difference location: (0, 0, 6)
  584. Values at max diff - Original: -44.65782166, Converted: -68.65792847
  585. Biggest difference in row (0, 0), sum -442.906403 vs -577.351807
  586. Layer 9, Token 4 (model.layers.out comparison):
  587. Original tensor sum: -457.224976
  588. Converted tensor sum: -606.660400
  589. Original tensor mean: -57.153122
  590. Converted tensor mean: -75.832550
  591. Mean difference: 18.67943192
  592. Maximum pointwise difference: 31.74385834
  593. Max difference location: (0, 0, 7)
  594. Values at max diff - Original: -37.91560364, Converted: -69.65946198
  595. Biggest difference in row (0, 0), sum -457.224976 vs -606.660400
  596. Layer 10, Token 4 (model.layers.out comparison):
  597. Original tensor sum: -464.368622
  598. Converted tensor sum: -617.020081
  599. Original tensor mean: -58.046078
  600. Converted tensor mean: -77.127510
  601. Mean difference: 19.08143044
  602. Maximum pointwise difference: 31.15077591
  603. Max difference location: (0, 0, 7)
  604. Values at max diff - Original: -40.76456833, Converted: -71.91534424
  605. Biggest difference in row (0, 0), sum -464.368622 vs -617.020081
  606. Layer 11, Token 4 (model.layers.out comparison):
  607. Original tensor sum: -848.365112
  608. Converted tensor sum: -1029.810791
  609. Original tensor mean: -106.045639
  610. Converted tensor mean: -128.726349
  611. Mean difference: 22.68070793
  612. Maximum pointwise difference: 33.58893585
  613. Max difference location: (0, 0, 7)
  614. Values at max diff - Original: -79.47626495, Converted: -113.06520081
  615. Biggest difference in row (0, 0), sum -848.365112 vs -1029.810791
  616. Layer 12, Token 4 (model.layers.out comparison):
  617. Original tensor sum: -856.364807
  618. Converted tensor sum: -1034.875244
  619. Original tensor mean: -107.045601
  620. Converted tensor mean: -129.359406
  621. Mean difference: 22.31380081
  622. Maximum pointwise difference: 34.47047424
  623. Max difference location: (0, 0, 6)
  624. Values at max diff - Original: -94.66131592, Converted: -129.13179016
  625. Biggest difference in row (0, 0), sum -856.364807 vs -1034.875244
  626. Layer 13, Token 4 (model.layers.out comparison):
  627. Original tensor sum: -876.941895
  628. Converted tensor sum: -1070.547119
  629. Original tensor mean: -109.617737
  630. Converted tensor mean: -133.818390
  631. Mean difference: 24.20065689
  632. Maximum pointwise difference: 35.39904022
  633. Max difference location: (0, 0, 6)
  634. Values at max diff - Original: -93.12728119, Converted: -128.52632141
  635. Biggest difference in row (0, 0), sum -876.941895 vs -1070.547119
  636. Layer 14, Token 4 (model.layers.out comparison):
  637. Original tensor sum: -914.061707
  638. Converted tensor sum: -1087.587036
  639. Original tensor mean: -114.257713
  640. Converted tensor mean: -135.948380
  641. Mean difference: 21.69067001
  642. Maximum pointwise difference: 38.16375732
  643. Max difference location: (0, 0, 3)
  644. Values at max diff - Original: -116.85905457, Converted: -155.02281189
  645. Biggest difference in row (0, 0), sum -914.061707 vs -1087.587036
  646. Layer 15, Token 4 (model.layers.out comparison):
  647. Original tensor sum: -1341.588623
  648. Converted tensor sum: -1530.308838
  649. Original tensor mean: -167.698578
  650. Converted tensor mean: -191.288605
  651. Mean difference: 23.59002495
  652. Maximum pointwise difference: 40.53677368
  653. Max difference location: (0, 0, 3)
  654. Values at max diff - Original: -178.01094055, Converted: -218.54771423
  655. Biggest difference in row (0, 0), sum -1341.588623 vs -1530.308838
  656. Layer 0, Token 5 (model.layers.out comparison):
  657. Original tensor sum: 12.113814
  658. Converted tensor sum: 1.907211
  659. Original tensor mean: 1.514227
  660. Converted tensor mean: 0.238401
  661. Mean difference: 1.41127276
  662. Maximum pointwise difference: 3.03878593
  663. Max difference location: (0, 0, 4)
  664. Values at max diff - Original: 1.65080941, Converted: -1.38797641
  665. Biggest difference in row (0, 0), sum 12.113814 vs 1.907211
  666. Layer 1, Token 5 (model.layers.out comparison):
  667. Original tensor sum: 3.328269
  668. Converted tensor sum: 7.141708
  669. Original tensor mean: 0.416034
  670. Converted tensor mean: 0.892714
  671. Mean difference: 1.73651075
  672. Maximum pointwise difference: 4.59446096
  673. Max difference location: (0, 0, 6)
  674. Values at max diff - Original: -0.09795946, Converted: 4.49650145
  675. Biggest difference in row (0, 0), sum 3.328269 vs 7.141708
  676. Layer 2, Token 5 (model.layers.out comparison):
  677. Original tensor sum: -7.901872
  678. Converted tensor sum: 12.052417
  679. Original tensor mean: -0.987734
  680. Converted tensor mean: 1.506552
  681. Mean difference: 2.91872406
  682. Maximum pointwise difference: 6.22109556
  683. Max difference location: (0, 0, 6)
  684. Values at max diff - Original: -1.61789608, Converted: 4.60319948
  685. Biggest difference in row (0, 0), sum -7.901872 vs 12.052417
  686. Layer 3, Token 5 (model.layers.out comparison):
  687. Original tensor sum: -206.706451
  688. Converted tensor sum: 38.517872
  689. Original tensor mean: -25.838306
  690. Converted tensor mean: 4.814734
  691. Mean difference: 30.65304184
  692. Maximum pointwise difference: 36.99858475
  693. Max difference location: (0, 0, 3)
  694. Values at max diff - Original: -30.05084610, Converted: 6.94773912
  695. Biggest difference in row (0, 0), sum -206.706451 vs 38.517872
  696. Layer 4, Token 5 (model.layers.out comparison):
  697. Original tensor sum: -190.520950
  698. Converted tensor sum: 37.683086
  699. Original tensor mean: -23.815119
  700. Converted tensor mean: 4.710386
  701. Mean difference: 28.52550507
  702. Maximum pointwise difference: 36.21773911
  703. Max difference location: (0, 0, 6)
  704. Values at max diff - Original: -29.42410278, Converted: 6.79363585
  705. Biggest difference in row (0, 0), sum -190.520950 vs 37.683086
  706. Layer 5, Token 5 (model.layers.out comparison):
  707. Original tensor sum: -129.615097
  708. Converted tensor sum: 37.492149
  709. Original tensor mean: -16.201887
  710. Converted tensor mean: 4.686519
  711. Mean difference: 20.88840675
  712. Maximum pointwise difference: 30.11524200
  713. Max difference location: (0, 0, 6)
  714. Values at max diff - Original: -22.47561646, Converted: 7.63962507
  715. Biggest difference in row (0, 0), sum -129.615097 vs 37.492149
  716. Layer 6, Token 5 (model.layers.out comparison):
  717. Original tensor sum: -168.733810
  718. Converted tensor sum: 40.467735
  719. Original tensor mean: -21.091726
  720. Converted tensor mean: 5.058467
  721. Mean difference: 26.15019226
  722. Maximum pointwise difference: 35.40680313
  723. Max difference location: (0, 0, 6)
  724. Values at max diff - Original: -27.34041214, Converted: 8.06639194
  725. Biggest difference in row (0, 0), sum -168.733810 vs 40.467735
  726. Layer 7, Token 5 (model.layers.out comparison):
  727. Original tensor sum: -375.952911
  728. Converted tensor sum: 84.494781
  729. Original tensor mean: -46.994114
  730. Converted tensor mean: 10.561848
  731. Mean difference: 57.55596161
  732. Maximum pointwise difference: 65.51675415
  733. Max difference location: (0, 0, 4)
  734. Values at max diff - Original: -54.48764038, Converted: 11.02911663
  735. Biggest difference in row (0, 0), sum -375.952911 vs 84.494781
  736. Layer 8, Token 5 (model.layers.out comparison):
  737. Original tensor sum: -386.335632
  738. Converted tensor sum: 90.464653
  739. Original tensor mean: -48.291954
  740. Converted tensor mean: 11.308082
  741. Mean difference: 59.60003662
  742. Maximum pointwise difference: 70.12364197
  743. Max difference location: (0, 0, 2)
  744. Values at max diff - Original: -54.78602219, Converted: 15.33761883
  745. Biggest difference in row (0, 0), sum -386.335632 vs 90.464653
  746. Layer 9, Token 5 (model.layers.out comparison):
  747. Original tensor sum: -407.643036
  748. Converted tensor sum: 83.872604
  749. Original tensor mean: -50.955379
  750. Converted tensor mean: 10.484076
  751. Mean difference: 61.43945694
  752. Maximum pointwise difference: 73.87419128
  753. Max difference location: (0, 0, 4)
  754. Values at max diff - Original: -62.38755035, Converted: 11.48663712
  755. Biggest difference in row (0, 0), sum -407.643036 vs 83.872604
  756. Layer 10, Token 5 (model.layers.out comparison):
  757. Original tensor sum: -398.133545
  758. Converted tensor sum: 83.310257
  759. Original tensor mean: -49.766693
  760. Converted tensor mean: 10.413782
  761. Mean difference: 60.18047714
  762. Maximum pointwise difference: 71.93079376
  763. Max difference location: (0, 0, 4)
  764. Values at max diff - Original: -61.05200958, Converted: 10.87878418
  765. Biggest difference in row (0, 0), sum -398.133545 vs 83.310257
  766. Layer 11, Token 5 (model.layers.out comparison):
  767. Original tensor sum: -795.896240
  768. Converted tensor sum: 161.559113
  769. Original tensor mean: -99.487030
  770. Converted tensor mean: 20.194889
  771. Mean difference: 119.68191528
  772. Maximum pointwise difference: 136.52630615
  773. Max difference location: (0, 0, 2)
  774. Values at max diff - Original: -112.33381653, Converted: 24.19249153
  775. Biggest difference in row (0, 0), sum -795.896240 vs 161.559113
  776. Layer 12, Token 5 (model.layers.out comparison):
  777. Original tensor sum: -795.492065
  778. Converted tensor sum: 157.049652
  779. Original tensor mean: -99.436508
  780. Converted tensor mean: 19.631207
  781. Mean difference: 119.06771088
  782. Maximum pointwise difference: 138.69142151
  783. Max difference location: (0, 0, 2)
  784. Values at max diff - Original: -115.85614014, Converted: 22.83527946
  785. Biggest difference in row (0, 0), sum -795.492065 vs 157.049652
  786. Layer 13, Token 5 (model.layers.out comparison):
  787. Original tensor sum: -816.679565
  788. Converted tensor sum: 152.172302
  789. Original tensor mean: -102.084946
  790. Converted tensor mean: 19.021538
  791. Mean difference: 121.10647583
  792. Maximum pointwise difference: 142.45770264
  793. Max difference location: (0, 0, 2)
  794. Values at max diff - Original: -120.28170013, Converted: 22.17600250
  795. Biggest difference in row (0, 0), sum -816.679565 vs 152.172302
  796. Layer 14, Token 5 (model.layers.out comparison):
  797. Original tensor sum: -858.712524
  798. Converted tensor sum: 152.386047
  799. Original tensor mean: -107.339066
  800. Converted tensor mean: 19.048256
  801. Mean difference: 126.38732147
  802. Maximum pointwise difference: 150.80645752
  803. Max difference location: (0, 0, 2)
  804. Values at max diff - Original: -129.48748779, Converted: 21.31897736
  805. Biggest difference in row (0, 0), sum -858.712524 vs 152.386047
  806. Layer 15, Token 5 (model.layers.out comparison):
  807. Original tensor sum: -1291.953247
  808. Converted tensor sum: 244.354996
  809. Original tensor mean: -161.494156
  810. Converted tensor mean: 30.544374
  811. Mean difference: 192.03852844
  812. Maximum pointwise difference: 220.75814819
  813. Max difference location: (0, 0, 3)
  814. Values at max diff - Original: -189.25143433, Converted: 31.50671959
  815. Biggest difference in row (0, 0), sum -1291.953247 vs 244.354996
  816. Layer 0, Token 6 (model.layers.out comparison):
  817. Original tensor sum: 4.713745
  818. Converted tensor sum: 11.404326
  819. Original tensor mean: 0.589218
  820. Converted tensor mean: 1.425541
  821. Mean difference: 1.39658785
  822. Maximum pointwise difference: 3.99744058
  823. Max difference location: (0, 0, 3)
  824. Values at max diff - Original: -2.16165113, Converted: 1.83578944
  825. Biggest difference in row (0, 0), sum 4.713745 vs 11.404326
  826. Layer 1, Token 6 (model.layers.out comparison):
  827. Original tensor sum: 2.484277
  828. Converted tensor sum: 9.422175
  829. Original tensor mean: 0.310535
  830. Converted tensor mean: 1.177772
  831. Mean difference: 1.56714785
  832. Maximum pointwise difference: 3.13825679
  833. Max difference location: (0, 0, 4)
  834. Values at max diff - Original: -2.85257578, Converted: 0.28568110
  835. Biggest difference in row (0, 0), sum 2.484277 vs 9.422175
  836. Layer 2, Token 6 (model.layers.out comparison):
  837. Original tensor sum: -4.950438
  838. Converted tensor sum: -1.357174
  839. Original tensor mean: -0.618805
  840. Converted tensor mean: -0.169647
  841. Mean difference: 1.71385837
  842. Maximum pointwise difference: 3.88516402
  843. Max difference location: (0, 0, 0)
  844. Values at max diff - Original: 3.02349472, Converted: -0.86166936
  845. Biggest difference in row (0, 0), sum -4.950438 vs -1.357174
  846. Layer 3, Token 6 (model.layers.out comparison):
  847. Original tensor sum: -125.927612
  848. Converted tensor sum: -106.782318
  849. Original tensor mean: -15.740952
  850. Converted tensor mean: -13.347790
  851. Mean difference: 3.11209679
  852. Maximum pointwise difference: 4.75263119
  853. Max difference location: (0, 0, 2)
  854. Values at max diff - Original: -17.46803665, Converted: -12.71540546
  855. Biggest difference in row (0, 0), sum -125.927612 vs -106.782318
  856. Layer 4, Token 6 (model.layers.out comparison):
  857. Original tensor sum: -139.830460
  858. Converted tensor sum: -126.311844
  859. Original tensor mean: -17.478807
  860. Converted tensor mean: -15.788980
  861. Mean difference: 3.15184307
  862. Maximum pointwise difference: 5.99608994
  863. Max difference location: (0, 0, 3)
  864. Values at max diff - Original: -25.84107971, Converted: -19.84498978
  865. Biggest difference in row (0, 0), sum -139.830460 vs -126.311844
  866. Layer 5, Token 6 (model.layers.out comparison):
  867. Original tensor sum: -142.974274
  868. Converted tensor sum: -73.637054
  869. Original tensor mean: -17.871784
  870. Converted tensor mean: -9.204632
  871. Mean difference: 10.37221718
  872. Maximum pointwise difference: 16.99522591
  873. Max difference location: (0, 0, 5)
  874. Values at max diff - Original: -22.96857643, Converted: -5.97335052
  875. Biggest difference in row (0, 0), sum -142.974274 vs -73.637054
  876. Layer 6, Token 6 (model.layers.out comparison):
  877. Original tensor sum: -180.967728
  878. Converted tensor sum: -69.754128
  879. Original tensor mean: -22.620966
  880. Converted tensor mean: -8.719266
  881. Mean difference: 14.33841133
  882. Maximum pointwise difference: 25.72810745
  883. Max difference location: (0, 0, 3)
  884. Values at max diff - Original: -36.46190262, Converted: -10.73379517
  885. Biggest difference in row (0, 0), sum -180.967728 vs -69.754128
  886. Layer 7, Token 6 (model.layers.out comparison):
  887. Original tensor sum: -390.468323
  888. Converted tensor sum: -284.137634
  889. Original tensor mean: -48.808540
  890. Converted tensor mean: -35.517204
  891. Mean difference: 14.31795502
  892. Maximum pointwise difference: 25.91625977
  893. Max difference location: (0, 0, 3)
  894. Values at max diff - Original: -61.98001099, Converted: -36.06375122
  895. Biggest difference in row (0, 0), sum -390.468323 vs -284.137634
  896. Layer 8, Token 6 (model.layers.out comparison):
  897. Original tensor sum: -325.042450
  898. Converted tensor sum: -284.328186
  899. Original tensor mean: -40.630306
  900. Converted tensor mean: -35.541023
  901. Mean difference: 6.66226053
  902. Maximum pointwise difference: 16.25393486
  903. Max difference location: (0, 0, 0)
  904. Values at max diff - Original: -47.66500854, Converted: -31.41107368
  905. Biggest difference in row (0, 0), sum -325.042450 vs -284.328186
  906. Layer 9, Token 6 (model.layers.out comparison):
  907. Original tensor sum: -350.015503
  908. Converted tensor sum: -313.897308
  909. Original tensor mean: -43.751938
  910. Converted tensor mean: -39.237164
  911. Mean difference: 9.32056522
  912. Maximum pointwise difference: 23.60877037
  913. Max difference location: (0, 0, 0)
  914. Values at max diff - Original: -54.44406891, Converted: -30.83529854
  915. Biggest difference in row (0, 0), sum -350.015503 vs -313.897308
  916. Layer 10, Token 6 (model.layers.out comparison):
  917. Original tensor sum: -375.606720
  918. Converted tensor sum: -330.646790
  919. Original tensor mean: -46.950840
  920. Converted tensor mean: -41.330849
  921. Mean difference: 8.38710022
  922. Maximum pointwise difference: 27.84555435
  923. Max difference location: (0, 0, 0)
  924. Values at max diff - Original: -60.66308594, Converted: -32.81753159
  925. Biggest difference in row (0, 0), sum -375.606720 vs -330.646790
  926. Layer 11, Token 6 (model.layers.out comparison):
  927. Original tensor sum: -764.285278
  928. Converted tensor sum: -730.992798
  929. Original tensor mean: -95.535660
  930. Converted tensor mean: -91.374100
  931. Mean difference: 7.89588118
  932. Maximum pointwise difference: 26.59626007
  933. Max difference location: (0, 0, 0)
  934. Values at max diff - Original: -118.78226471, Converted: -92.18600464
  935. Biggest difference in row (0, 0), sum -764.285278 vs -730.992798
  936. Layer 12, Token 6 (model.layers.out comparison):
  937. Original tensor sum: -777.147827
  938. Converted tensor sum: -765.448669
  939. Original tensor mean: -97.143478
  940. Converted tensor mean: -95.681084
  941. Mean difference: 6.33593750
  942. Maximum pointwise difference: 19.02982330
  943. Max difference location: (0, 0, 0)
  944. Values at max diff - Original: -119.55146790, Converted: -100.52164459
  945. Biggest difference in row (0, 0), sum -777.147827 vs -765.448669
  946. Layer 13, Token 6 (model.layers.out comparison):
  947. Original tensor sum: -787.772400
  948. Converted tensor sum: -777.362915
  949. Original tensor mean: -98.471550
  950. Converted tensor mean: -97.170364
  951. Mean difference: 7.69482183
  952. Maximum pointwise difference: 19.15751648
  953. Max difference location: (0, 0, 0)
  954. Values at max diff - Original: -120.39152527, Converted: -101.23400879
  955. Biggest difference in row (0, 0), sum -787.772400 vs -777.362915
  956. Layer 14, Token 6 (model.layers.out comparison):
  957. Original tensor sum: -883.013428
  958. Converted tensor sum: -881.301514
  959. Original tensor mean: -110.376678
  960. Converted tensor mean: -110.162689
  961. Mean difference: 12.85068035
  962. Maximum pointwise difference: 28.13771820
  963. Max difference location: (0, 0, 0)
  964. Values at max diff - Original: -129.54022217, Converted: -101.40250397
  965. Biggest difference in row (0, 0), sum -883.013428 vs -881.301514
  966. Layer 15, Token 6 (model.layers.out comparison):
  967. Original tensor sum: -1324.892822
  968. Converted tensor sum: -1316.172363
  969. Original tensor mean: -165.611603
  970. Converted tensor mean: -164.521545
  971. Mean difference: 12.77940941
  972. Maximum pointwise difference: 29.43301392
  973. Max difference location: (0, 0, 0)
  974. Values at max diff - Original: -192.78923035, Converted: -163.35621643
  975. Biggest difference in row (0, 0), sum -1324.892822 vs -1316.172363
  976. Layer 0, Token 7 (model.layers.out comparison):
  977. Original tensor sum: 16.302702
  978. Converted tensor sum: 6.534010
  979. Original tensor mean: 2.037838
  980. Converted tensor mean: 0.816751
  981. Mean difference: 1.39780235
  982. Maximum pointwise difference: 4.86297131
  983. Max difference location: (0, 0, 5)
  984. Values at max diff - Original: 4.45225191, Converted: -0.41071916
  985. Biggest difference in row (0, 0), sum 16.302702 vs 6.534010
  986. Layer 1, Token 7 (model.layers.out comparison):
  987. Original tensor sum: 7.949856
  988. Converted tensor sum: 10.515163
  989. Original tensor mean: 0.993732
  990. Converted tensor mean: 1.314395
  991. Mean difference: 1.91308641
  992. Maximum pointwise difference: 3.92083621
  993. Max difference location: (0, 0, 6)
  994. Values at max diff - Original: 1.42750001, Converted: 5.34833622
  995. Biggest difference in row (0, 0), sum 7.949856 vs 10.515163
  996. Layer 2, Token 7 (model.layers.out comparison):
  997. Original tensor sum: 5.224671
  998. Converted tensor sum: 8.502550
  999. Original tensor mean: 0.653084
  1000. Converted tensor mean: 1.062819
  1001. Mean difference: 2.38619947
  1002. Maximum pointwise difference: 6.21067238
  1003. Max difference location: (0, 0, 5)
  1004. Values at max diff - Original: 4.76728964, Converted: -1.44338274
  1005. Biggest difference in row (0, 0), sum 5.224671 vs 8.502550
  1006. Layer 3, Token 7 (model.layers.out comparison):
  1007. Original tensor sum: 13.283526
  1008. Converted tensor sum: 35.439297
  1009. Original tensor mean: 1.660441
  1010. Converted tensor mean: 4.429912
  1011. Mean difference: 3.47373605
  1012. Maximum pointwise difference: 5.22519779
  1013. Max difference location: (0, 0, 1)
  1014. Values at max diff - Original: 1.58731771, Converted: 6.81251574
  1015. Biggest difference in row (0, 0), sum 13.283526 vs 35.439297
  1016. Layer 4, Token 7 (model.layers.out comparison):
  1017. Original tensor sum: 17.744591
  1018. Converted tensor sum: 31.593395
  1019. Original tensor mean: 2.218074
  1020. Converted tensor mean: 3.949174
  1021. Mean difference: 2.68589926
  1022. Maximum pointwise difference: 4.57245827
  1023. Max difference location: (0, 0, 7)
  1024. Values at max diff - Original: -2.52367592, Converted: 2.04878211
  1025. Biggest difference in row (0, 0), sum 17.744591 vs 31.593395
  1026. Layer 5, Token 7 (model.layers.out comparison):
  1027. Original tensor sum: 23.343349
  1028. Converted tensor sum: 33.269924
  1029. Original tensor mean: 2.917919
  1030. Converted tensor mean: 4.158741
  1031. Mean difference: 2.63248682
  1032. Maximum pointwise difference: 5.37845278
  1033. Max difference location: (0, 0, 5)
  1034. Values at max diff - Original: 5.39788294, Converted: 0.01943016
  1035. Biggest difference in row (0, 0), sum 23.343349 vs 33.269924
  1036. Layer 6, Token 7 (model.layers.out comparison):
  1037. Original tensor sum: 23.346264
  1038. Converted tensor sum: 35.443920
  1039. Original tensor mean: 2.918283
  1040. Converted tensor mean: 4.430490
  1041. Mean difference: 2.67119837
  1042. Maximum pointwise difference: 4.63596630
  1043. Max difference location: (0, 0, 5)
  1044. Values at max diff - Original: 6.03884697, Converted: 1.40288091
  1045. Biggest difference in row (0, 0), sum 23.346264 vs 35.443920
  1046. Layer 7, Token 7 (model.layers.out comparison):
  1047. Original tensor sum: 64.039200
  1048. Converted tensor sum: 91.760284
  1049. Original tensor mean: 8.004900
  1050. Converted tensor mean: 11.470036
  1051. Mean difference: 4.01984978
  1052. Maximum pointwise difference: 7.18059826
  1053. Max difference location: (0, 0, 3)
  1054. Values at max diff - Original: 4.17877483, Converted: 11.35937309
  1055. Biggest difference in row (0, 0), sum 64.039200 vs 91.760284
  1056. Layer 8, Token 7 (model.layers.out comparison):
  1057. Original tensor sum: 72.276039
  1058. Converted tensor sum: 93.156998
  1059. Original tensor mean: 9.034505
  1060. Converted tensor mean: 11.644625
  1061. Mean difference: 3.85819149
  1062. Maximum pointwise difference: 7.09706306
  1063. Max difference location: (0, 0, 2)
  1064. Values at max diff - Original: 6.97290230, Converted: 14.06996536
  1065. Biggest difference in row (0, 0), sum 72.276039 vs 93.156998
  1066. Layer 9, Token 7 (model.layers.out comparison):
  1067. Original tensor sum: 77.303429
  1068. Converted tensor sum: 87.750015
  1069. Original tensor mean: 9.662929
  1070. Converted tensor mean: 10.968752
  1071. Mean difference: 3.21908855
  1072. Maximum pointwise difference: 7.22212887
  1073. Max difference location: (0, 0, 2)
  1074. Values at max diff - Original: 7.19689465, Converted: 14.41902351
  1075. Biggest difference in row (0, 0), sum 77.303429 vs 87.750015
  1076. Layer 10, Token 7 (model.layers.out comparison):
  1077. Original tensor sum: 75.555130
  1078. Converted tensor sum: 87.081650
  1079. Original tensor mean: 9.444391
  1080. Converted tensor mean: 10.885206
  1081. Mean difference: 3.37582994
  1082. Maximum pointwise difference: 7.74006128
  1083. Max difference location: (0, 0, 5)
  1084. Values at max diff - Original: 13.60124302, Converted: 5.86118174
  1085. Biggest difference in row (0, 0), sum 75.555130 vs 87.081650
  1086. Layer 11, Token 7 (model.layers.out comparison):
  1087. Original tensor sum: 156.940781
  1088. Converted tensor sum: 159.013306
  1089. Original tensor mean: 19.617598
  1090. Converted tensor mean: 19.876663
  1091. Mean difference: 3.38565111
  1092. Maximum pointwise difference: 8.84408474
  1093. Max difference location: (0, 0, 5)
  1094. Values at max diff - Original: 24.20116806, Converted: 15.35708332
  1095. Biggest difference in row (0, 0), sum 156.940781 vs 159.013306
  1096. Layer 12, Token 7 (model.layers.out comparison):
  1097. Original tensor sum: 154.763428
  1098. Converted tensor sum: 153.900482
  1099. Original tensor mean: 19.345428
  1100. Converted tensor mean: 19.237560
  1101. Mean difference: 3.46122217
  1102. Maximum pointwise difference: 9.50335789
  1103. Max difference location: (0, 0, 5)
  1104. Values at max diff - Original: 24.17844582, Converted: 14.67508793
  1105. Biggest difference in row (0, 0), sum 154.763428 vs 153.900482
  1106. Layer 13, Token 7 (model.layers.out comparison):
  1107. Original tensor sum: 153.990646
  1108. Converted tensor sum: 150.608353
  1109. Original tensor mean: 19.248831
  1110. Converted tensor mean: 18.826044
  1111. Mean difference: 3.53592730
  1112. Maximum pointwise difference: 9.36601925
  1113. Max difference location: (0, 0, 5)
  1114. Values at max diff - Original: 23.90514946, Converted: 14.53913021
  1115. Biggest difference in row (0, 0), sum 153.990646 vs 150.608353
  1116. Layer 14, Token 7 (model.layers.out comparison):
  1117. Original tensor sum: 153.169525
  1118. Converted tensor sum: 133.618896
  1119. Original tensor mean: 19.146191
  1120. Converted tensor mean: 16.702362
  1121. Mean difference: 4.84187126
  1122. Maximum pointwise difference: 11.02708149
  1123. Max difference location: (0, 0, 5)
  1124. Values at max diff - Original: 24.07042313, Converted: 13.04334164
  1125. Biggest difference in row (0, 0), sum 153.169525 vs 133.618896
  1126. Layer 15, Token 7 (model.layers.out comparison):
  1127. Original tensor sum: 256.612762
  1128. Converted tensor sum: 236.694611
  1129. Original tensor mean: 32.076595
  1130. Converted tensor mean: 29.586826
  1131. Mean difference: 4.89619875
  1132. Maximum pointwise difference: 11.06676292
  1133. Max difference location: (0, 0, 5)
  1134. Values at max diff - Original: 34.29892731, Converted: 23.23216438
  1135. Biggest difference in row (0, 0), sum 256.612762 vs 236.694611
  1136. Layer 0, Token 8 (model.layers.out comparison):
  1137. Original tensor sum: 4.551975
  1138. Converted tensor sum: 1.348729
  1139. Original tensor mean: 0.568997
  1140. Converted tensor mean: 0.168591
  1141. Mean difference: 2.05911183
  1142. Maximum pointwise difference: 5.11385345
  1143. Max difference location: (0, 0, 2)
  1144. Values at max diff - Original: 1.91795087, Converted: -3.19590235
  1145. Biggest difference in row (0, 0), sum 4.551975 vs 1.348729
  1146. Layer 1, Token 8 (model.layers.out comparison):
  1147. Original tensor sum: -10.499850
  1148. Converted tensor sum: -11.510830
  1149. Original tensor mean: -1.312481
  1150. Converted tensor mean: -1.438854
  1151. Mean difference: 3.72058988
  1152. Maximum pointwise difference: 7.12741280
  1153. Max difference location: (0, 0, 5)
  1154. Values at max diff - Original: -4.90886450, Converted: 2.21854830
  1155. Biggest difference in row (0, 0), sum -10.499850 vs -11.510830
  1156. Layer 2, Token 8 (model.layers.out comparison):
  1157. Original tensor sum: 21.469618
  1158. Converted tensor sum: 13.045154
  1159. Original tensor mean: 2.683702
  1160. Converted tensor mean: 1.630644
  1161. Mean difference: 4.73055506
  1162. Maximum pointwise difference: 11.87027359
  1163. Max difference location: (0, 0, 2)
  1164. Values at max diff - Original: 6.33750200, Converted: -5.53277111
  1165. Biggest difference in row (0, 0), sum 21.469618 vs 13.045154
  1166. Layer 3, Token 8 (model.layers.out comparison):
  1167. Original tensor sum: 56.933716
  1168. Converted tensor sum: 65.067757
  1169. Original tensor mean: 7.116714
  1170. Converted tensor mean: 8.133470
  1171. Mean difference: 5.21158791
  1172. Maximum pointwise difference: 10.06817722
  1173. Max difference location: (0, 0, 2)
  1174. Values at max diff - Original: 10.73284817, Converted: 0.66467106
  1175. Biggest difference in row (0, 0), sum 56.933716 vs 65.067757
  1176. Layer 4, Token 8 (model.layers.out comparison):
  1177. Original tensor sum: 54.841175
  1178. Converted tensor sum: 58.977600
  1179. Original tensor mean: 6.855147
  1180. Converted tensor mean: 7.372200
  1181. Mean difference: 5.39579868
  1182. Maximum pointwise difference: 10.23285866
  1183. Max difference location: (0, 0, 2)
  1184. Values at max diff - Original: 10.38635254, Converted: 0.15349340
  1185. Biggest difference in row (0, 0), sum 54.841175 vs 58.977600
  1186. Layer 5, Token 8 (model.layers.out comparison):
  1187. Original tensor sum: 59.439285
  1188. Converted tensor sum: 59.979446
  1189. Original tensor mean: 7.429911
  1190. Converted tensor mean: 7.497431
  1191. Mean difference: 5.44655371
  1192. Maximum pointwise difference: 11.05043030
  1193. Max difference location: (0, 0, 2)
  1194. Values at max diff - Original: 9.77372646, Converted: -1.27670395
  1195. Biggest difference in row (0, 0), sum 59.439285 vs 59.979446
  1196. Layer 6, Token 8 (model.layers.out comparison):
  1197. Original tensor sum: 57.398651
  1198. Converted tensor sum: 56.296188
  1199. Original tensor mean: 7.174831
  1200. Converted tensor mean: 7.037024
  1201. Mean difference: 5.29393005
  1202. Maximum pointwise difference: 9.82726002
  1203. Max difference location: (0, 0, 2)
  1204. Values at max diff - Original: 9.26543045, Converted: -0.56182986
  1205. Biggest difference in row (0, 0), sum 57.398651 vs 56.296188
  1206. Layer 7, Token 8 (model.layers.out comparison):
  1207. Original tensor sum: 108.492706
  1208. Converted tensor sum: 119.552338
  1209. Original tensor mean: 13.561588
  1210. Converted tensor mean: 14.944042
  1211. Mean difference: 5.49957895
  1212. Maximum pointwise difference: 11.73512173
  1213. Max difference location: (0, 0, 0)
  1214. Values at max diff - Original: 10.45698833, Converted: 22.19211006
  1215. Biggest difference in row (0, 0), sum 108.492706 vs 119.552338
  1216. Layer 8, Token 8 (model.layers.out comparison):
  1217. Original tensor sum: 106.563354
  1218. Converted tensor sum: 119.608925
  1219. Original tensor mean: 13.320419
  1220. Converted tensor mean: 14.951116
  1221. Mean difference: 4.46781254
  1222. Maximum pointwise difference: 10.82487202
  1223. Max difference location: (0, 0, 0)
  1224. Values at max diff - Original: 9.85585308, Converted: 20.68072510
  1225. Biggest difference in row (0, 0), sum 106.563354 vs 119.608925
  1226. Layer 9, Token 8 (model.layers.out comparison):
  1227. Original tensor sum: 111.512817
  1228. Converted tensor sum: 109.928528
  1229. Original tensor mean: 13.939102
  1230. Converted tensor mean: 13.741066
  1231. Mean difference: 4.52381039
  1232. Maximum pointwise difference: 8.89503384
  1233. Max difference location: (0, 0, 5)
  1234. Values at max diff - Original: 12.07329845, Converted: 20.96833229
  1235. Biggest difference in row (0, 0), sum 111.512817 vs 109.928528
  1236. Layer 10, Token 8 (model.layers.out comparison):
  1237. Original tensor sum: 111.241730
  1238. Converted tensor sum: 103.886688
  1239. Original tensor mean: 13.905216
  1240. Converted tensor mean: 12.985836
  1241. Mean difference: 4.59785748
  1242. Maximum pointwise difference: 8.55565834
  1243. Max difference location: (0, 0, 0)
  1244. Values at max diff - Original: 11.01864815, Converted: 19.57430649
  1245. Biggest difference in row (0, 0), sum 111.241730 vs 103.886688
  1246. Layer 11, Token 8 (model.layers.out comparison):
  1247. Original tensor sum: 194.094177
  1248. Converted tensor sum: 193.564484
  1249. Original tensor mean: 24.261772
  1250. Converted tensor mean: 24.195560
  1251. Mean difference: 4.49120235
  1252. Maximum pointwise difference: 9.88864136
  1253. Max difference location: (0, 0, 0)
  1254. Values at max diff - Original: 22.25957108, Converted: 32.14821243
  1255. Biggest difference in row (0, 0), sum 194.094177 vs 193.564484
  1256. Layer 12, Token 8 (model.layers.out comparison):
  1257. Original tensor sum: 196.658234
  1258. Converted tensor sum: 189.827057
  1259. Original tensor mean: 24.582279
  1260. Converted tensor mean: 23.728382
  1261. Mean difference: 5.10350180
  1262. Maximum pointwise difference: 9.80338287
  1263. Max difference location: (0, 0, 1)
  1264. Values at max diff - Original: 30.23954582, Converted: 20.43616295
  1265. Biggest difference in row (0, 0), sum 196.658234 vs 189.827057
  1266. Layer 13, Token 8 (model.layers.out comparison):
  1267. Original tensor sum: 193.237976
  1268. Converted tensor sum: 184.223190
  1269. Original tensor mean: 24.154747
  1270. Converted tensor mean: 23.027899
  1271. Mean difference: 5.11390686
  1272. Maximum pointwise difference: 10.04300690
  1273. Max difference location: (0, 0, 1)
  1274. Values at max diff - Original: 30.05261230, Converted: 20.00960541
  1275. Biggest difference in row (0, 0), sum 193.237976 vs 184.223190
  1276. Layer 14, Token 8 (model.layers.out comparison):
  1277. Original tensor sum: 183.582977
  1278. Converted tensor sum: 183.402130
  1279. Original tensor mean: 22.947872
  1280. Converted tensor mean: 22.925266
  1281. Mean difference: 5.41123581
  1282. Maximum pointwise difference: 10.28223228
  1283. Max difference location: (0, 0, 0)
  1284. Values at max diff - Original: 19.26763725, Converted: 29.54986954
  1285. Biggest difference in row (0, 0), sum 183.582977 vs 183.402130
  1286. Layer 15, Token 8 (model.layers.out comparison):
  1287. Original tensor sum: 297.650543
  1288. Converted tensor sum: 301.053558
  1289. Original tensor mean: 37.206318
  1290. Converted tensor mean: 37.631695
  1291. Mean difference: 5.31624222
  1292. Maximum pointwise difference: 10.28567123
  1293. Max difference location: (0, 0, 0)
  1294. Values at max diff - Original: 36.54620743, Converted: 46.83187866
  1295. Biggest difference in row (0, 0), sum 297.650543 vs 301.053558
  1296. Layer 0, Token 9 (model.layers.out comparison):
  1297. Original tensor sum: 27.724323
  1298. Converted tensor sum: 7.010333
  1299. Original tensor mean: 3.465540
  1300. Converted tensor mean: 0.876292
  1301. Mean difference: 3.55158758
  1302. Maximum pointwise difference: 7.14975357
  1303. Max difference location: (0, 0, 1)
  1304. Values at max diff - Original: 4.03241444, Converted: -3.11733937
  1305. Biggest difference in row (0, 0), sum 27.724323 vs 7.010333
  1306. Layer 1, Token 9 (model.layers.out comparison):
  1307. Original tensor sum: 17.384836
  1308. Converted tensor sum: 7.348456
  1309. Original tensor mean: 2.173105
  1310. Converted tensor mean: 0.918557
  1311. Mean difference: 3.79201698
  1312. Maximum pointwise difference: 8.55698013
  1313. Max difference location: (0, 0, 3)
  1314. Values at max diff - Original: -0.55471849, Converted: 8.00226116
  1315. Biggest difference in row (0, 0), sum 17.384836 vs 7.348456
  1316. Layer 2, Token 9 (model.layers.out comparison):
  1317. Original tensor sum: 20.318661
  1318. Converted tensor sum: 28.392349
  1319. Original tensor mean: 2.539833
  1320. Converted tensor mean: 3.549044
  1321. Mean difference: 2.94842267
  1322. Maximum pointwise difference: 9.89197159
  1323. Max difference location: (0, 0, 3)
  1324. Values at max diff - Original: -1.05586541, Converted: 8.83610630
  1325. Biggest difference in row (0, 0), sum 20.318661 vs 28.392349
  1326. Layer 3, Token 9 (model.layers.out comparison):
  1327. Original tensor sum: 65.513725
  1328. Converted tensor sum: 84.414536
  1329. Original tensor mean: 8.189216
  1330. Converted tensor mean: 10.551817
  1331. Mean difference: 4.41447163
  1332. Maximum pointwise difference: 10.74111176
  1333. Max difference location: (0, 0, 3)
  1334. Values at max diff - Original: 6.86948347, Converted: 17.61059570
  1335. Biggest difference in row (0, 0), sum 65.513725 vs 84.414536
  1336. Layer 4, Token 9 (model.layers.out comparison):
  1337. Original tensor sum: 61.603691
  1338. Converted tensor sum: 72.172562
  1339. Original tensor mean: 7.700461
  1340. Converted tensor mean: 9.021570
  1341. Mean difference: 4.32150173
  1342. Maximum pointwise difference: 10.51774502
  1343. Max difference location: (0, 0, 3)
  1344. Values at max diff - Original: 4.71584320, Converted: 15.23358822
  1345. Biggest difference in row (0, 0), sum 61.603691 vs 72.172562
  1346. Layer 5, Token 9 (model.layers.out comparison):
  1347. Original tensor sum: 61.554985
  1348. Converted tensor sum: 60.684212
  1349. Original tensor mean: 7.694373
  1350. Converted tensor mean: 7.585526
  1351. Mean difference: 4.84910297
  1352. Maximum pointwise difference: 9.77899742
  1353. Max difference location: (0, 0, 3)
  1354. Values at max diff - Original: 5.03849173, Converted: 14.81748962
  1355. Biggest difference in row (0, 0), sum 61.554985 vs 60.684212
  1356. Layer 6, Token 9 (model.layers.out comparison):
  1357. Original tensor sum: 60.121288
  1358. Converted tensor sum: 61.323517
  1359. Original tensor mean: 7.515161
  1360. Converted tensor mean: 7.665440
  1361. Mean difference: 4.61501122
  1362. Maximum pointwise difference: 10.19813538
  1363. Max difference location: (0, 0, 3)
  1364. Values at max diff - Original: 4.46036100, Converted: 14.65849590
  1365. Biggest difference in row (0, 0), sum 60.121288 vs 61.323517
  1366. Layer 7, Token 9 (model.layers.out comparison):
  1367. Original tensor sum: 120.854408
  1368. Converted tensor sum: 122.564323
  1369. Original tensor mean: 15.106801
  1370. Converted tensor mean: 15.320540
  1371. Mean difference: 4.58281326
  1372. Maximum pointwise difference: 10.81363106
  1373. Max difference location: (0, 0, 3)
  1374. Values at max diff - Original: 11.19677925, Converted: 22.01041031
  1375. Biggest difference in row (0, 0), sum 120.854408 vs 122.564323
  1376. Layer 8, Token 9 (model.layers.out comparison):
  1377. Original tensor sum: 111.411377
  1378. Converted tensor sum: 113.878586
  1379. Original tensor mean: 13.926422
  1380. Converted tensor mean: 14.234823
  1381. Mean difference: 4.80341482
  1382. Maximum pointwise difference: 8.54869747
  1383. Max difference location: (0, 0, 3)
  1384. Values at max diff - Original: 10.95728207, Converted: 19.50597954
  1385. Biggest difference in row (0, 0), sum 111.411377 vs 113.878586
  1386. Layer 9, Token 9 (model.layers.out comparison):
  1387. Original tensor sum: 104.621353
  1388. Converted tensor sum: 99.551331
  1389. Original tensor mean: 13.077669
  1390. Converted tensor mean: 12.443916
  1391. Mean difference: 4.94641495
  1392. Maximum pointwise difference: 7.18619919
  1393. Max difference location: (0, 0, 3)
  1394. Values at max diff - Original: 10.17811966, Converted: 17.36431885
  1395. Biggest difference in row (0, 0), sum 104.621353 vs 99.551331
  1396. Layer 10, Token 9 (model.layers.out comparison):
  1397. Original tensor sum: 105.495895
  1398. Converted tensor sum: 90.669807
  1399. Original tensor mean: 13.186987
  1400. Converted tensor mean: 11.333726
  1401. Mean difference: 4.88313580
  1402. Maximum pointwise difference: 8.44397736
  1403. Max difference location: (0, 0, 6)
  1404. Values at max diff - Original: 11.21555328, Converted: 2.77157593
  1405. Biggest difference in row (0, 0), sum 105.495895 vs 90.669807
  1406. Layer 11, Token 9 (model.layers.out comparison):
  1407. Original tensor sum: 198.914932
  1408. Converted tensor sum: 187.657013
  1409. Original tensor mean: 24.864367
  1410. Converted tensor mean: 23.457127
  1411. Mean difference: 4.87979174
  1412. Maximum pointwise difference: 8.17332649
  1413. Max difference location: (0, 0, 6)
  1414. Values at max diff - Original: 22.94329643, Converted: 14.76996994
  1415. Biggest difference in row (0, 0), sum 198.914932 vs 187.657013
  1416. Layer 12, Token 9 (model.layers.out comparison):
  1417. Original tensor sum: 197.781982
  1418. Converted tensor sum: 182.248840
  1419. Original tensor mean: 24.722748
  1420. Converted tensor mean: 22.781105
  1421. Mean difference: 5.16355371
  1422. Maximum pointwise difference: 9.60578632
  1423. Max difference location: (0, 0, 6)
  1424. Values at max diff - Original: 23.29119301, Converted: 13.68540668
  1425. Biggest difference in row (0, 0), sum 197.781982 vs 182.248840
  1426. Layer 13, Token 9 (model.layers.out comparison):
  1427. Original tensor sum: 191.909027
  1428. Converted tensor sum: 177.667252
  1429. Original tensor mean: 23.988628
  1430. Converted tensor mean: 22.208406
  1431. Mean difference: 5.14386559
  1432. Maximum pointwise difference: 9.20664406
  1433. Max difference location: (0, 0, 6)
  1434. Values at max diff - Original: 22.88940430, Converted: 13.68276024
  1435. Biggest difference in row (0, 0), sum 191.909027 vs 177.667252
  1436. Layer 14, Token 9 (model.layers.out comparison):
  1437. Original tensor sum: 193.112854
  1438. Converted tensor sum: 170.826324
  1439. Original tensor mean: 24.139107
  1440. Converted tensor mean: 21.353291
  1441. Mean difference: 5.67996836
  1442. Maximum pointwise difference: 10.54143143
  1443. Max difference location: (0, 0, 1)
  1444. Values at max diff - Original: 29.37781715, Converted: 18.83638573
  1445. Biggest difference in row (0, 0), sum 193.112854 vs 170.826324
  1446. Layer 15, Token 9 (model.layers.out comparison):
  1447. Original tensor sum: 310.393738
  1448. Converted tensor sum: 295.392517
  1449. Original tensor mean: 38.799217
  1450. Converted tensor mean: 36.924065
  1451. Mean difference: 5.11053467
  1452. Maximum pointwise difference: 9.09804153
  1453. Max difference location: (0, 0, 1)
  1454. Values at max diff - Original: 43.17533493, Converted: 34.07729340
  1455. Biggest difference in row (0, 0), sum 310.393738 vs 295.392517
  1456. Layer 0, Token 10 (model.layers.out comparison):
  1457. Original tensor sum: 11.304202
  1458. Converted tensor sum: 14.919886
  1459. Original tensor mean: 1.413025
  1460. Converted tensor mean: 1.864986
  1461. Mean difference: 1.20558476
  1462. Maximum pointwise difference: 2.02042794
  1463. Max difference location: (0, 0, 1)
  1464. Values at max diff - Original: -0.23466866, Converted: 1.78575933
  1465. Biggest difference in row (0, 0), sum 11.304202 vs 14.919886
  1466. Layer 1, Token 10 (model.layers.out comparison):
  1467. Original tensor sum: 4.380467
  1468. Converted tensor sum: 1.448399
  1469. Original tensor mean: 0.547558
  1470. Converted tensor mean: 0.181050
  1471. Mean difference: 1.55803418
  1472. Maximum pointwise difference: 3.08950615
  1473. Max difference location: (0, 0, 0)
  1474. Values at max diff - Original: 3.13031340, Converted: 0.04080731
  1475. Biggest difference in row (0, 0), sum 4.380467 vs 1.448399
  1476. Layer 2, Token 10 (model.layers.out comparison):
  1477. Original tensor sum: 21.641123
  1478. Converted tensor sum: 18.135971
  1479. Original tensor mean: 2.705140
  1480. Converted tensor mean: 2.266996
  1481. Mean difference: 2.29236317
  1482. Maximum pointwise difference: 5.34974813
  1483. Max difference location: (0, 0, 6)
  1484. Values at max diff - Original: 4.73606253, Converted: -0.61368543
  1485. Biggest difference in row (0, 0), sum 21.641123 vs 18.135971
  1486. Layer 3, Token 10 (model.layers.out comparison):
  1487. Original tensor sum: 84.183029
  1488. Converted tensor sum: 75.554764
  1489. Original tensor mean: 10.522879
  1490. Converted tensor mean: 9.444345
  1491. Mean difference: 2.50477004
  1492. Maximum pointwise difference: 7.33609867
  1493. Max difference location: (0, 0, 6)
  1494. Values at max diff - Original: 11.12465191, Converted: 3.78855324
  1495. Biggest difference in row (0, 0), sum 84.183029 vs 75.554764
  1496. Layer 4, Token 10 (model.layers.out comparison):
  1497. Original tensor sum: 75.952011
  1498. Converted tensor sum: 63.684746
  1499. Original tensor mean: 9.494001
  1500. Converted tensor mean: 7.960593
  1501. Mean difference: 2.89978528
  1502. Maximum pointwise difference: 6.58637476
  1503. Max difference location: (0, 0, 6)
  1504. Values at max diff - Original: 10.07624340, Converted: 3.48986864
  1505. Biggest difference in row (0, 0), sum 75.952011 vs 63.684746
  1506. Layer 5, Token 10 (model.layers.out comparison):
  1507. Original tensor sum: 67.380692
  1508. Converted tensor sum: 51.477894
  1509. Original tensor mean: 8.422586
  1510. Converted tensor mean: 6.434737
  1511. Mean difference: 2.92978549
  1512. Maximum pointwise difference: 6.54403639
  1513. Max difference location: (0, 0, 2)
  1514. Values at max diff - Original: 10.47875404, Converted: 3.93471766
  1515. Biggest difference in row (0, 0), sum 67.380692 vs 51.477894
  1516. Layer 6, Token 10 (model.layers.out comparison):
  1517. Original tensor sum: 64.356155
  1518. Converted tensor sum: 44.292259
  1519. Original tensor mean: 8.044519
  1520. Converted tensor mean: 5.536532
  1521. Mean difference: 3.18394947
  1522. Maximum pointwise difference: 7.18761826
  1523. Max difference location: (0, 0, 6)
  1524. Values at max diff - Original: 9.33854580, Converted: 2.15092754
  1525. Biggest difference in row (0, 0), sum 64.356155 vs 44.292259
  1526. Layer 7, Token 10 (model.layers.out comparison):
  1527. Original tensor sum: 124.955261
  1528. Converted tensor sum: 105.713638
  1529. Original tensor mean: 15.619408
  1530. Converted tensor mean: 13.214205
  1531. Mean difference: 3.17375469
  1532. Maximum pointwise difference: 7.15706635
  1533. Max difference location: (0, 0, 6)
  1534. Values at max diff - Original: 16.18268585, Converted: 9.02561951
  1535. Biggest difference in row (0, 0), sum 124.955261 vs 105.713638
  1536. Layer 8, Token 10 (model.layers.out comparison):
  1537. Original tensor sum: 105.275124
  1538. Converted tensor sum: 92.354050
  1539. Original tensor mean: 13.159390
  1540. Converted tensor mean: 11.544256
  1541. Mean difference: 2.89860010
  1542. Maximum pointwise difference: 6.96542978
  1543. Max difference location: (0, 0, 6)
  1544. Values at max diff - Original: 12.28927994, Converted: 5.32385015
  1545. Biggest difference in row (0, 0), sum 105.275124 vs 92.354050
  1546. Layer 9, Token 10 (model.layers.out comparison):
  1547. Original tensor sum: 89.282066
  1548. Converted tensor sum: 75.157639
  1549. Original tensor mean: 11.160258
  1550. Converted tensor mean: 9.394705
  1551. Mean difference: 2.89608860
  1552. Maximum pointwise difference: 7.40043926
  1553. Max difference location: (0, 0, 6)
  1554. Values at max diff - Original: 9.69557953, Converted: 2.29514027
  1555. Biggest difference in row (0, 0), sum 89.282066 vs 75.157639
  1556. Layer 10, Token 10 (model.layers.out comparison):
  1557. Original tensor sum: 87.814186
  1558. Converted tensor sum: 68.457840
  1559. Original tensor mean: 10.976773
  1560. Converted tensor mean: 8.557230
  1561. Mean difference: 3.06474447
  1562. Maximum pointwise difference: 8.03616142
  1563. Max difference location: (0, 0, 6)
  1564. Values at max diff - Original: 10.20811844, Converted: 2.17195749
  1565. Biggest difference in row (0, 0), sum 87.814186 vs 68.457840
  1566. Layer 11, Token 10 (model.layers.out comparison):
  1567. Original tensor sum: 184.781067
  1568. Converted tensor sum: 170.778610
  1569. Original tensor mean: 23.097633
  1570. Converted tensor mean: 21.347326
  1571. Mean difference: 2.85195446
  1572. Maximum pointwise difference: 6.81012630
  1573. Max difference location: (0, 0, 6)
  1574. Values at max diff - Original: 22.35528374, Converted: 15.54515743
  1575. Biggest difference in row (0, 0), sum 184.781067 vs 170.778610
  1576. Layer 12, Token 10 (model.layers.out comparison):
  1577. Original tensor sum: 187.157104
  1578. Converted tensor sum: 166.325562
  1579. Original tensor mean: 23.394638
  1580. Converted tensor mean: 20.790695
  1581. Mean difference: 3.00816154
  1582. Maximum pointwise difference: 8.29628849
  1583. Max difference location: (0, 0, 6)
  1584. Values at max diff - Original: 23.84814453, Converted: 15.55185604
  1585. Biggest difference in row (0, 0), sum 187.157104 vs 166.325562
  1586. Layer 13, Token 10 (model.layers.out comparison):
  1587. Original tensor sum: 180.577179
  1588. Converted tensor sum: 161.409668
  1589. Original tensor mean: 22.572147
  1590. Converted tensor mean: 20.176208
  1591. Mean difference: 3.22855854
  1592. Maximum pointwise difference: 8.27111149
  1593. Max difference location: (0, 0, 6)
  1594. Values at max diff - Original: 22.88647652, Converted: 14.61536503
  1595. Biggest difference in row (0, 0), sum 180.577179 vs 161.409668
  1596. Layer 14, Token 10 (model.layers.out comparison):
  1597. Original tensor sum: 176.409912
  1598. Converted tensor sum: 155.317413
  1599. Original tensor mean: 22.051239
  1600. Converted tensor mean: 19.414677
  1601. Mean difference: 3.30306578
  1602. Maximum pointwise difference: 8.51622581
  1603. Max difference location: (0, 0, 6)
  1604. Values at max diff - Original: 21.49407005, Converted: 12.97784424
  1605. Biggest difference in row (0, 0), sum 176.409912 vs 155.317413
  1606. Layer 15, Token 10 (model.layers.out comparison):
  1607. Original tensor sum: 303.652618
  1608. Converted tensor sum: 289.143890
  1609. Original tensor mean: 37.956577
  1610. Converted tensor mean: 36.142986
  1611. Mean difference: 3.20148277
  1612. Maximum pointwise difference: 7.65085030
  1613. Max difference location: (0, 0, 6)
  1614. Values at max diff - Original: 38.91091537, Converted: 31.26006508
  1615. Biggest difference in row (0, 0), sum 303.652618 vs 289.143890
  1616. Layer 0, Token 11 (model.layers.out comparison):
  1617. Original tensor sum: 3.868190
  1618. Converted tensor sum: -4.365316
  1619. Original tensor mean: 0.483524
  1620. Converted tensor mean: -0.545665
  1621. Mean difference: 1.47696412
  1622. Maximum pointwise difference: 3.49379730
  1623. Max difference location: (0, 0, 5)
  1624. Values at max diff - Original: 1.60926533, Converted: -1.88453186
  1625. Biggest difference in row (0, 0), sum 3.868190 vs -4.365316
  1626. Layer 1, Token 11 (model.layers.out comparison):
  1627. Original tensor sum: -4.763882
  1628. Converted tensor sum: -8.100720
  1629. Original tensor mean: -0.595485
  1630. Converted tensor mean: -1.012590
  1631. Mean difference: 2.60996270
  1632. Maximum pointwise difference: 4.04230022
  1633. Max difference location: (0, 0, 4)
  1634. Values at max diff - Original: -3.43199134, Converted: 0.61030883
  1635. Biggest difference in row (0, 0), sum -4.763882 vs -8.100720
  1636. Layer 2, Token 11 (model.layers.out comparison):
  1637. Original tensor sum: -8.837991
  1638. Converted tensor sum: -17.355688
  1639. Original tensor mean: -1.104749
  1640. Converted tensor mean: -2.169461
  1641. Mean difference: 3.57004023
  1642. Maximum pointwise difference: 7.78442717
  1643. Max difference location: (0, 0, 4)
  1644. Values at max diff - Original: -2.88003159, Converted: 4.90439558
  1645. Biggest difference in row (0, 0), sum -8.837991 vs -17.355688
  1646. Layer 3, Token 11 (model.layers.out comparison):
  1647. Original tensor sum: -151.825806
  1648. Converted tensor sum: -119.589157
  1649. Original tensor mean: -18.978226
  1650. Converted tensor mean: -14.948645
  1651. Mean difference: 4.57043171
  1652. Maximum pointwise difference: 10.22036552
  1653. Max difference location: (0, 0, 0)
  1654. Values at max diff - Original: -23.12115479, Converted: -12.90078926
  1655. Biggest difference in row (0, 0), sum -151.825806 vs -119.589157
  1656. Layer 4, Token 11 (model.layers.out comparison):
  1657. Original tensor sum: -87.672623
  1658. Converted tensor sum: -49.333626
  1659. Original tensor mean: -10.959078
  1660. Converted tensor mean: -6.166703
  1661. Mean difference: 5.28691673
  1662. Maximum pointwise difference: 12.21502209
  1663. Max difference location: (0, 0, 4)
  1664. Values at max diff - Original: -14.83695984, Converted: -2.62193775
  1665. Biggest difference in row (0, 0), sum -87.672623 vs -49.333626
  1666. Layer 5, Token 11 (model.layers.out comparison):
  1667. Original tensor sum: -19.529230
  1668. Converted tensor sum: 51.921982
  1669. Original tensor mean: -2.441154
  1670. Converted tensor mean: 6.490248
  1671. Mean difference: 8.93140125
  1672. Maximum pointwise difference: 17.95970917
  1673. Max difference location: (0, 0, 4)
  1674. Values at max diff - Original: 2.32367539, Converted: 20.28338432
  1675. Biggest difference in row (0, 0), sum -19.529230 vs 51.921982
  1676. Layer 6, Token 11 (model.layers.out comparison):
  1677. Original tensor sum: -34.699642
  1678. Converted tensor sum: 49.364166
  1679. Original tensor mean: -4.337455
  1680. Converted tensor mean: 6.170521
  1681. Mean difference: 10.50797558
  1682. Maximum pointwise difference: 19.14058685
  1683. Max difference location: (0, 0, 0)
  1684. Values at max diff - Original: -4.33303738, Converted: 14.80754948
  1685. Biggest difference in row (0, 0), sum -34.699642 vs 49.364166
  1686. Layer 7, Token 11 (model.layers.out comparison):
  1687. Original tensor sum: -174.093460
  1688. Converted tensor sum: 116.110802
  1689. Original tensor mean: -21.761683
  1690. Converted tensor mean: 14.513850
  1691. Mean difference: 36.27553177
  1692. Maximum pointwise difference: 45.40389252
  1693. Max difference location: (0, 0, 4)
  1694. Values at max diff - Original: -18.08833313, Converted: 27.31555748
  1695. Biggest difference in row (0, 0), sum -174.093460 vs 116.110802
  1696. Layer 8, Token 11 (model.layers.out comparison):
  1697. Original tensor sum: -95.914619
  1698. Converted tensor sum: 104.116745
  1699. Original tensor mean: -11.989327
  1700. Converted tensor mean: 13.014593
  1701. Mean difference: 25.00392151
  1702. Maximum pointwise difference: 39.39223480
  1703. Max difference location: (0, 0, 7)
  1704. Values at max diff - Original: -25.33579826, Converted: 14.05643463
  1705. Biggest difference in row (0, 0), sum -95.914619 vs 104.116745
  1706. Layer 9, Token 11 (model.layers.out comparison):
  1707. Original tensor sum: -76.038055
  1708. Converted tensor sum: 86.082336
  1709. Original tensor mean: -9.504757
  1710. Converted tensor mean: 10.760292
  1711. Mean difference: 20.92745209
  1712. Maximum pointwise difference: 40.40296555
  1713. Max difference location: (0, 0, 7)
  1714. Values at max diff - Original: -26.76908302, Converted: 13.63388157
  1715. Biggest difference in row (0, 0), sum -76.038055 vs 86.082336
  1716. Layer 10, Token 11 (model.layers.out comparison):
  1717. Original tensor sum: -62.967239
  1718. Converted tensor sum: 79.332596
  1719. Original tensor mean: -7.870905
  1720. Converted tensor mean: 9.916574
  1721. Mean difference: 18.64283180
  1722. Maximum pointwise difference: 40.29864883
  1723. Max difference location: (0, 0, 7)
  1724. Values at max diff - Original: -27.55656052, Converted: 12.74208832
  1725. Biggest difference in row (0, 0), sum -62.967239 vs 79.332596
  1726. Layer 11, Token 11 (model.layers.out comparison):
  1727. Original tensor sum: -348.172638
  1728. Converted tensor sum: 185.268341
  1729. Original tensor mean: -43.521580
  1730. Converted tensor mean: 23.158543
  1731. Mean difference: 66.68012238
  1732. Maximum pointwise difference: 90.25902557
  1733. Max difference location: (0, 0, 0)
  1734. Values at max diff - Original: -54.79597092, Converted: 35.46305466
  1735. Biggest difference in row (0, 0), sum -348.172638 vs 185.268341
  1736. Layer 12, Token 11 (model.layers.out comparison):
  1737. Original tensor sum: -380.460999
  1738. Converted tensor sum: 184.850082
  1739. Original tensor mean: -47.557625
  1740. Converted tensor mean: 23.106260
  1741. Mean difference: 70.66388702
  1742. Maximum pointwise difference: 91.58323669
  1743. Max difference location: (0, 0, 0)
  1744. Values at max diff - Original: -56.39131927, Converted: 35.19191360
  1745. Biggest difference in row (0, 0), sum -380.460999 vs 184.850082
  1746. Layer 13, Token 11 (model.layers.out comparison):
  1747. Original tensor sum: -387.549927
  1748. Converted tensor sum: 178.291550
  1749. Original tensor mean: -48.443741
  1750. Converted tensor mean: 22.286444
  1751. Mean difference: 70.73018646
  1752. Maximum pointwise difference: 92.60649109
  1753. Max difference location: (0, 0, 0)
  1754. Values at max diff - Original: -57.23683167, Converted: 35.36965561
  1755. Biggest difference in row (0, 0), sum -387.549927 vs 178.291550
  1756. Layer 14, Token 11 (model.layers.out comparison):
  1757. Original tensor sum: -381.615417
  1758. Converted tensor sum: 175.841187
  1759. Original tensor mean: -47.701927
  1760. Converted tensor mean: 21.980148
  1761. Mean difference: 69.68207550
  1762. Maximum pointwise difference: 95.39483643
  1763. Max difference location: (0, 0, 0)
  1764. Values at max diff - Original: -61.00698853, Converted: 34.38784409
  1765. Biggest difference in row (0, 0), sum -381.615417 vs 175.841187
  1766. Layer 15, Token 11 (model.layers.out comparison):
  1767. Original tensor sum: -791.898560
  1768. Converted tensor sum: 313.297852
  1769. Original tensor mean: -98.987320
  1770. Converted tensor mean: 39.162231
  1771. Mean difference: 138.14956665
  1772. Maximum pointwise difference: 174.31031799
  1773. Max difference location: (0, 0, 0)
  1774. Values at max diff - Original: -120.39865875, Converted: 53.91165924
  1775. Biggest difference in row (0, 0), sum -791.898560 vs 313.297852
  1776. Layer 0, Token 12 (model.layers.out comparison):
  1777. Original tensor sum: 17.494652
  1778. Converted tensor sum: -39.301899
  1779. Original tensor mean: 2.186831
  1780. Converted tensor mean: -4.912737
  1781. Mean difference: 8.11834240
  1782. Maximum pointwise difference: 15.19715595
  1783. Max difference location: (0, 0, 2)
  1784. Values at max diff - Original: 2.70196438, Converted: -12.49519157
  1785. Biggest difference in row (0, 0), sum 17.494652 vs -39.301899
  1786. Layer 1, Token 12 (model.layers.out comparison):
  1787. Original tensor sum: 11.314701
  1788. Converted tensor sum: -35.014473
  1789. Original tensor mean: 1.414338
  1790. Converted tensor mean: -4.376809
  1791. Mean difference: 7.67025709
  1792. Maximum pointwise difference: 15.05980301
  1793. Max difference location: (0, 0, 2)
  1794. Values at max diff - Original: 2.28716040, Converted: -12.77264309
  1795. Biggest difference in row (0, 0), sum 11.314701 vs -35.014473
  1796. Layer 2, Token 12 (model.layers.out comparison):
  1797. Original tensor sum: 3.520873
  1798. Converted tensor sum: -23.351210
  1799. Original tensor mean: 0.440109
  1800. Converted tensor mean: -2.918901
  1801. Mean difference: 7.09708309
  1802. Maximum pointwise difference: 10.56869507
  1803. Max difference location: (0, 0, 7)
  1804. Values at max diff - Original: 0.29396084, Converted: -10.27473450
  1805. Biggest difference in row (0, 0), sum 3.520873 vs -23.351210
  1806. Layer 3, Token 12 (model.layers.out comparison):
  1807. Original tensor sum: -38.507721
  1808. Converted tensor sum: -65.860725
  1809. Original tensor mean: -4.813465
  1810. Converted tensor mean: -8.232591
  1811. Mean difference: 6.29614639
  1812. Maximum pointwise difference: 10.23156357
  1813. Max difference location: (0, 0, 7)
  1814. Values at max diff - Original: -4.52744627, Converted: -14.75901031
  1815. Biggest difference in row (0, 0), sum -38.507721 vs -65.860725
  1816. Layer 4, Token 12 (model.layers.out comparison):
  1817. Original tensor sum: -25.538549
  1818. Converted tensor sum: -16.346577
  1819. Original tensor mean: -3.192319
  1820. Converted tensor mean: -2.043322
  1821. Mean difference: 5.56114197
  1822. Maximum pointwise difference: 11.51591301
  1823. Max difference location: (0, 0, 6)
  1824. Values at max diff - Original: -9.10746288, Converted: 2.40844989
  1825. Biggest difference in row (0, 0), sum -25.538549 vs -16.346577
  1826. Layer 5, Token 12 (model.layers.out comparison):
  1827. Original tensor sum: 5.103131
  1828. Converted tensor sum: -11.820143
  1829. Original tensor mean: 0.637891
  1830. Converted tensor mean: -1.477518
  1831. Mean difference: 6.80205250
  1832. Maximum pointwise difference: 11.26421928
  1833. Max difference location: (0, 0, 1)
  1834. Values at max diff - Original: 5.06476593, Converted: -6.19945335
  1835. Biggest difference in row (0, 0), sum 5.103131 vs -11.820143
  1836. Layer 6, Token 12 (model.layers.out comparison):
  1837. Original tensor sum: 1.231229
  1838. Converted tensor sum: -13.329983
  1839. Original tensor mean: 0.153904
  1840. Converted tensor mean: -1.666248
  1841. Mean difference: 7.36224794
  1842. Maximum pointwise difference: 11.85875893
  1843. Max difference location: (0, 0, 1)
  1844. Values at max diff - Original: 5.86865807, Converted: -5.99010086
  1845. Biggest difference in row (0, 0), sum 1.231229 vs -13.329983
  1846. Layer 7, Token 12 (model.layers.out comparison):
  1847. Original tensor sum: 49.883171
  1848. Converted tensor sum: -138.587738
  1849. Original tensor mean: 6.235396
  1850. Converted tensor mean: -17.323467
  1851. Mean difference: 23.55886269
  1852. Maximum pointwise difference: 38.93606567
  1853. Max difference location: (0, 0, 1)
  1854. Values at max diff - Original: 13.31498432, Converted: -25.62108231
  1855. Biggest difference in row (0, 0), sum 49.883171 vs -138.587738
  1856. Layer 8, Token 12 (model.layers.out comparison):
  1857. Original tensor sum: 32.997459
  1858. Converted tensor sum: -79.532417
  1859. Original tensor mean: 4.124682
  1860. Converted tensor mean: -9.941552
  1861. Mean difference: 15.04267120
  1862. Maximum pointwise difference: 28.15183258
  1863. Max difference location: (0, 0, 1)
  1864. Values at max diff - Original: 10.99009418, Converted: -17.16173744
  1865. Biggest difference in row (0, 0), sum 32.997459 vs -79.532417
  1866. Layer 9, Token 12 (model.layers.out comparison):
  1867. Original tensor sum: 30.462442
  1868. Converted tensor sum: -58.022911
  1869. Original tensor mean: 3.807805
  1870. Converted tensor mean: -7.252864
  1871. Mean difference: 13.06616974
  1872. Maximum pointwise difference: 26.93473625
  1873. Max difference location: (0, 0, 1)
  1874. Values at max diff - Original: 10.51771736, Converted: -16.41701889
  1875. Biggest difference in row (0, 0), sum 30.462442 vs -58.022911
  1876. Layer 10, Token 12 (model.layers.out comparison):
  1877. Original tensor sum: 31.758196
  1878. Converted tensor sum: -31.289818
  1879. Original tensor mean: 3.969774
  1880. Converted tensor mean: -3.911227
  1881. Mean difference: 11.64717674
  1882. Maximum pointwise difference: 25.19077682
  1883. Max difference location: (0, 0, 1)
  1884. Values at max diff - Original: 10.60759830, Converted: -14.58317757
  1885. Biggest difference in row (0, 0), sum 31.758196 vs -31.289818
  1886. Layer 11, Token 12 (model.layers.out comparison):
  1887. Original tensor sum: 64.195580
  1888. Converted tensor sum: -290.078918
  1889. Original tensor mean: 8.024447
  1890. Converted tensor mean: -36.259865
  1891. Mean difference: 44.28431320
  1892. Maximum pointwise difference: 58.32298279
  1893. Max difference location: (0, 0, 1)
  1894. Values at max diff - Original: 14.01799965, Converted: -44.30498123
  1895. Biggest difference in row (0, 0), sum 64.195580 vs -290.078918
  1896. Layer 12, Token 12 (model.layers.out comparison):
  1897. Original tensor sum: 65.652679
  1898. Converted tensor sum: -300.691650
  1899. Original tensor mean: 8.206585
  1900. Converted tensor mean: -37.586456
  1901. Mean difference: 45.79303741
  1902. Maximum pointwise difference: 64.50979614
  1903. Max difference location: (0, 0, 1)
  1904. Values at max diff - Original: 13.89292812, Converted: -50.61687088
  1905. Biggest difference in row (0, 0), sum 65.652679 vs -300.691650
  1906. Layer 13, Token 12 (model.layers.out comparison):
  1907. Original tensor sum: 64.880409
  1908. Converted tensor sum: -292.294403
  1909. Original tensor mean: 8.110051
  1910. Converted tensor mean: -36.536800
  1911. Mean difference: 44.64685059
  1912. Maximum pointwise difference: 61.03430176
  1913. Max difference location: (0, 0, 2)
  1914. Values at max diff - Original: 8.34301949, Converted: -52.69128418
  1915. Biggest difference in row (0, 0), sum 64.880409 vs -292.294403
  1916. Layer 14, Token 12 (model.layers.out comparison):
  1917. Original tensor sum: 55.352615
  1918. Converted tensor sum: -232.615005
  1919. Original tensor mean: 6.919077
  1920. Converted tensor mean: -29.076876
  1921. Mean difference: 35.99595261
  1922. Maximum pointwise difference: 69.32642365
  1923. Max difference location: (0, 0, 2)
  1924. Values at max diff - Original: 7.07370424, Converted: -62.25271606
  1925. Biggest difference in row (0, 0), sum 55.352615 vs -232.615005
  1926. Layer 15, Token 12 (model.layers.out comparison):
  1927. Original tensor sum: 191.366241
  1928. Converted tensor sum: -607.544556
  1929. Original tensor mean: 23.920780
  1930. Converted tensor mean: -75.943069
  1931. Mean difference: 99.86384583
  1932. Maximum pointwise difference: 121.99198914
  1933. Max difference location: (0, 0, 0)
  1934. Values at max diff - Original: 33.01739502, Converted: -88.97459412
  1935. Biggest difference in row (0, 0), sum 191.366241 vs -607.544556
  1936. Layer 0, Token 13 (model.layers.out comparison):
  1937. Original tensor sum: 28.716766
  1938. Converted tensor sum: 24.262428
  1939. Original tensor mean: 3.589596
  1940. Converted tensor mean: 3.032804
  1941. Mean difference: 2.20962214
  1942. Maximum pointwise difference: 5.77315617
  1943. Max difference location: (0, 0, 4)
  1944. Values at max diff - Original: 4.62014198, Converted: -1.15301442
  1945. Biggest difference in row (0, 0), sum 28.716766 vs 24.262428
  1946. Layer 1, Token 13 (model.layers.out comparison):
  1947. Original tensor sum: 18.283722
  1948. Converted tensor sum: 16.804958
  1949. Original tensor mean: 2.285465
  1950. Converted tensor mean: 2.100620
  1951. Mean difference: 2.44061017
  1952. Maximum pointwise difference: 5.48099232
  1953. Max difference location: (0, 0, 7)
  1954. Values at max diff - Original: -7.47550392, Converted: -1.99451160
  1955. Biggest difference in row (0, 0), sum 18.283722 vs 16.804958
  1956. Layer 2, Token 13 (model.layers.out comparison):
  1957. Original tensor sum: 14.973861
  1958. Converted tensor sum: 10.670280
  1959. Original tensor mean: 1.871733
  1960. Converted tensor mean: 1.333785
  1961. Mean difference: 2.94856715
  1962. Maximum pointwise difference: 6.09164524
  1963. Max difference location: (0, 0, 4)
  1964. Values at max diff - Original: 2.11467242, Converted: -3.97697282
  1965. Biggest difference in row (0, 0), sum 14.973861 vs 10.670280
  1966. Layer 3, Token 13 (model.layers.out comparison):
  1967. Original tensor sum: 62.116623
  1968. Converted tensor sum: 46.581398
  1969. Original tensor mean: 7.764578
  1970. Converted tensor mean: 5.822675
  1971. Mean difference: 3.59710693
  1972. Maximum pointwise difference: 6.89595842
  1973. Max difference location: (0, 0, 2)
  1974. Values at max diff - Original: 11.14201260, Converted: 4.24605417
  1975. Biggest difference in row (0, 0), sum 62.116623 vs 46.581398
  1976. Layer 4, Token 13 (model.layers.out comparison):
  1977. Original tensor sum: 65.792244
  1978. Converted tensor sum: 43.042854
  1979. Original tensor mean: 8.224030
  1980. Converted tensor mean: 5.380357
  1981. Mean difference: 3.63414001
  1982. Maximum pointwise difference: 8.06606770
  1983. Max difference location: (0, 0, 4)
  1984. Values at max diff - Original: 9.46925735, Converted: 1.40318930
  1985. Biggest difference in row (0, 0), sum 65.792244 vs 43.042854
  1986. Layer 5, Token 13 (model.layers.out comparison):
  1987. Original tensor sum: 60.294563
  1988. Converted tensor sum: 38.709320
  1989. Original tensor mean: 7.536820
  1990. Converted tensor mean: 4.838665
  1991. Mean difference: 4.29471397
  1992. Maximum pointwise difference: 9.28423500
  1993. Max difference location: (0, 0, 4)
  1994. Values at max diff - Original: 9.56281090, Converted: 0.27857587
  1995. Biggest difference in row (0, 0), sum 60.294563 vs 38.709320
  1996. Layer 6, Token 13 (model.layers.out comparison):
  1997. Original tensor sum: 60.864697
  1998. Converted tensor sum: 41.897995
  1999. Original tensor mean: 7.608087
  2000. Converted tensor mean: 5.237249
  2001. Mean difference: 4.15325356
  2002. Maximum pointwise difference: 7.30325747
  2003. Max difference location: (0, 0, 2)
  2004. Values at max diff - Original: 11.01063633, Converted: 3.70737886
  2005. Biggest difference in row (0, 0), sum 60.864697 vs 41.897995
  2006. Layer 7, Token 13 (model.layers.out comparison):
  2007. Original tensor sum: 124.166924
  2008. Converted tensor sum: 107.577675
  2009. Original tensor mean: 15.520865
  2010. Converted tensor mean: 13.447209
  2011. Mean difference: 4.08049011
  2012. Maximum pointwise difference: 7.30880928
  2013. Max difference location: (0, 0, 2)
  2014. Values at max diff - Original: 17.63167572, Converted: 10.32286644
  2015. Biggest difference in row (0, 0), sum 124.166924 vs 107.577675
  2016. Layer 8, Token 13 (model.layers.out comparison):
  2017. Original tensor sum: 114.534744
  2018. Converted tensor sum: 106.782104
  2019. Original tensor mean: 14.316843
  2020. Converted tensor mean: 13.347763
  2021. Mean difference: 3.79455638
  2022. Maximum pointwise difference: 8.56559753
  2023. Max difference location: (0, 0, 4)
  2024. Values at max diff - Original: 18.64526367, Converted: 10.07966614
  2025. Biggest difference in row (0, 0), sum 114.534744 vs 106.782104
  2026. Layer 9, Token 13 (model.layers.out comparison):
  2027. Original tensor sum: 111.904816
  2028. Converted tensor sum: 90.398567
  2029. Original tensor mean: 13.988102
  2030. Converted tensor mean: 11.299821
  2031. Mean difference: 4.39770985
  2032. Maximum pointwise difference: 12.01837921
  2033. Max difference location: (0, 0, 4)
  2034. Values at max diff - Original: 18.37693977, Converted: 6.35856009
  2035. Biggest difference in row (0, 0), sum 111.904816 vs 90.398567
  2036. Layer 10, Token 13 (model.layers.out comparison):
  2037. Original tensor sum: 106.496719
  2038. Converted tensor sum: 84.186646
  2039. Original tensor mean: 13.312090
  2040. Converted tensor mean: 10.523331
  2041. Mean difference: 4.35723734
  2042. Maximum pointwise difference: 11.76342964
  2043. Max difference location: (0, 0, 4)
  2044. Values at max diff - Original: 17.81115723, Converted: 6.04772711
  2045. Biggest difference in row (0, 0), sum 106.496719 vs 84.186646
  2046. Layer 11, Token 13 (model.layers.out comparison):
  2047. Original tensor sum: 197.848022
  2048. Converted tensor sum: 191.943436
  2049. Original tensor mean: 24.731003
  2050. Converted tensor mean: 23.992929
  2051. Mean difference: 3.31890941
  2052. Maximum pointwise difference: 10.13029099
  2053. Max difference location: (0, 0, 4)
  2054. Values at max diff - Original: 27.00849915, Converted: 16.87820816
  2055. Biggest difference in row (0, 0), sum 197.848022 vs 191.943436
  2056. Layer 12, Token 13 (model.layers.out comparison):
  2057. Original tensor sum: 197.513275
  2058. Converted tensor sum: 189.807312
  2059. Original tensor mean: 24.689159
  2060. Converted tensor mean: 23.725914
  2061. Mean difference: 3.50938702
  2062. Maximum pointwise difference: 10.66487598
  2063. Max difference location: (0, 0, 4)
  2064. Values at max diff - Original: 26.39979744, Converted: 15.73492146
  2065. Biggest difference in row (0, 0), sum 197.513275 vs 189.807312
  2066. Layer 13, Token 13 (model.layers.out comparison):
  2067. Original tensor sum: 193.055618
  2068. Converted tensor sum: 185.801392
  2069. Original tensor mean: 24.131952
  2070. Converted tensor mean: 23.225174
  2071. Mean difference: 3.32275867
  2072. Maximum pointwise difference: 10.17280674
  2073. Max difference location: (0, 0, 4)
  2074. Values at max diff - Original: 25.57653046, Converted: 15.40372372
  2075. Biggest difference in row (0, 0), sum 193.055618 vs 185.801392
  2076. Layer 14, Token 13 (model.layers.out comparison):
  2077. Original tensor sum: 190.084717
  2078. Converted tensor sum: 186.092697
  2079. Original tensor mean: 23.760590
  2080. Converted tensor mean: 23.261587
  2081. Mean difference: 3.19069362
  2082. Maximum pointwise difference: 9.42493057
  2083. Max difference location: (0, 0, 4)
  2084. Values at max diff - Original: 24.81001282, Converted: 15.38508224
  2085. Biggest difference in row (0, 0), sum 190.084717 vs 186.092697
  2086. Layer 15, Token 13 (model.layers.out comparison):
  2087. Original tensor sum: 319.170319
  2088. Converted tensor sum: 323.837036
  2089. Original tensor mean: 39.896290
  2090. Converted tensor mean: 40.479630
  2091. Mean difference: 3.55193925
  2092. Maximum pointwise difference: 8.15688324
  2093. Max difference location: (0, 0, 4)
  2094. Values at max diff - Original: 46.74212265, Converted: 38.58523941
  2095. Biggest difference in row (0, 0), sum 319.170319 vs 323.837036
  2096. Layer 0, Token 14 (model.layers.out comparison):
  2097. Original tensor sum: 60.062901
  2098. Converted tensor sum: 42.401054
  2099. Original tensor mean: 7.507863
  2100. Converted tensor mean: 5.300132
  2101. Mean difference: 2.97920632
  2102. Maximum pointwise difference: 7.75320148
  2103. Max difference location: (0, 0, 4)
  2104. Values at max diff - Original: 13.24933434, Converted: 5.49613285
  2105. Biggest difference in row (0, 0), sum 60.062901 vs 42.401054
  2106. Layer 1, Token 14 (model.layers.out comparison):
  2107. Original tensor sum: 48.843086
  2108. Converted tensor sum: 34.002205
  2109. Original tensor mean: 6.105386
  2110. Converted tensor mean: 4.250276
  2111. Mean difference: 2.82561874
  2112. Maximum pointwise difference: 7.41196299
  2113. Max difference location: (0, 0, 4)
  2114. Values at max diff - Original: 12.44728756, Converted: 5.03532457
  2115. Biggest difference in row (0, 0), sum 48.843086 vs 34.002205
  2116. Layer 2, Token 14 (model.layers.out comparison):
  2117. Original tensor sum: 49.100876
  2118. Converted tensor sum: 29.831078
  2119. Original tensor mean: 6.137609
  2120. Converted tensor mean: 3.728885
  2121. Mean difference: 3.44625640
  2122. Maximum pointwise difference: 8.00705624
  2123. Max difference location: (0, 0, 4)
  2124. Values at max diff - Original: 12.05760670, Converted: 4.05055046
  2125. Biggest difference in row (0, 0), sum 49.100876 vs 29.831078
  2126. Layer 3, Token 14 (model.layers.out comparison):
  2127. Original tensor sum: 94.051392
  2128. Converted tensor sum: 85.936119
  2129. Original tensor mean: 11.756424
  2130. Converted tensor mean: 10.742015
  2131. Mean difference: 3.43988085
  2132. Maximum pointwise difference: 6.90394783
  2133. Max difference location: (0, 0, 4)
  2134. Values at max diff - Original: 18.44681168, Converted: 11.54286385
  2135. Biggest difference in row (0, 0), sum 94.051392 vs 85.936119
  2136. Layer 4, Token 14 (model.layers.out comparison):
  2137. Original tensor sum: 90.357742
  2138. Converted tensor sum: 82.357994
  2139. Original tensor mean: 11.294718
  2140. Converted tensor mean: 10.294749
  2141. Mean difference: 3.55732656
  2142. Maximum pointwise difference: 7.83766174
  2143. Max difference location: (0, 0, 4)
  2144. Values at max diff - Original: 19.03264809, Converted: 11.19498634
  2145. Biggest difference in row (0, 0), sum 90.357742 vs 82.357994
  2146. Layer 5, Token 14 (model.layers.out comparison):
  2147. Original tensor sum: 84.158882
  2148. Converted tensor sum: 72.302864
  2149. Original tensor mean: 10.519860
  2150. Converted tensor mean: 9.037858
  2151. Mean difference: 3.79493260
  2152. Maximum pointwise difference: 9.27737904
  2153. Max difference location: (0, 0, 4)
  2154. Values at max diff - Original: 18.81698799, Converted: 9.53960896
  2155. Biggest difference in row (0, 0), sum 84.158882 vs 72.302864
  2156. Layer 6, Token 14 (model.layers.out comparison):
  2157. Original tensor sum: 82.342606
  2158. Converted tensor sum: 74.838448
  2159. Original tensor mean: 10.292826
  2160. Converted tensor mean: 9.354806
  2161. Mean difference: 3.72385550
  2162. Maximum pointwise difference: 8.27861023
  2163. Max difference location: (0, 0, 4)
  2164. Values at max diff - Original: 18.35614967, Converted: 10.07753944
  2165. Biggest difference in row (0, 0), sum 82.342606 vs 74.838448
  2166. Layer 7, Token 14 (model.layers.out comparison):
  2167. Original tensor sum: 152.811584
  2168. Converted tensor sum: 143.282593
  2169. Original tensor mean: 19.101448
  2170. Converted tensor mean: 17.910324
  2171. Mean difference: 3.79641771
  2172. Maximum pointwise difference: 8.94160843
  2173. Max difference location: (0, 0, 4)
  2174. Values at max diff - Original: 28.97978973, Converted: 20.03818130
  2175. Biggest difference in row (0, 0), sum 152.811584 vs 143.282593
  2176. Layer 8, Token 14 (model.layers.out comparison):
  2177. Original tensor sum: 134.962891
  2178. Converted tensor sum: 135.762573
  2179. Original tensor mean: 16.870361
  2180. Converted tensor mean: 16.970322
  2181. Mean difference: 3.42910838
  2182. Maximum pointwise difference: 6.22266769
  2183. Max difference location: (0, 0, 4)
  2184. Values at max diff - Original: 27.13297844, Converted: 20.91031075
  2185. Biggest difference in row (0, 0), sum 134.962891 vs 135.762573
  2186. Layer 9, Token 14 (model.layers.out comparison):
  2187. Original tensor sum: 131.262939
  2188. Converted tensor sum: 130.663895
  2189. Original tensor mean: 16.407867
  2190. Converted tensor mean: 16.332987
  2191. Mean difference: 3.14643574
  2192. Maximum pointwise difference: 6.41224289
  2193. Max difference location: (0, 0, 4)
  2194. Values at max diff - Original: 25.90853310, Converted: 19.49629021
  2195. Biggest difference in row (0, 0), sum 131.262939 vs 130.663895
  2196. Layer 10, Token 14 (model.layers.out comparison):
  2197. Original tensor sum: 130.994781
  2198. Converted tensor sum: 121.948547
  2199. Original tensor mean: 16.374348
  2200. Converted tensor mean: 15.243568
  2201. Mean difference: 3.14505911
  2202. Maximum pointwise difference: 6.92271805
  2203. Max difference location: (0, 0, 4)
  2204. Values at max diff - Original: 25.71545982, Converted: 18.79274178
  2205. Biggest difference in row (0, 0), sum 130.994781 vs 121.948547
  2206. Layer 11, Token 14 (model.layers.out comparison):
  2207. Original tensor sum: 227.322296
  2208. Converted tensor sum: 221.945038
  2209. Original tensor mean: 28.415287
  2210. Converted tensor mean: 27.743130
  2211. Mean difference: 2.92038918
  2212. Maximum pointwise difference: 6.72454262
  2213. Max difference location: (0, 0, 4)
  2214. Values at max diff - Original: 35.09742355, Converted: 28.37288094
  2215. Biggest difference in row (0, 0), sum 227.322296 vs 221.945038
  2216. Layer 12, Token 14 (model.layers.out comparison):
  2217. Original tensor sum: 226.411957
  2218. Converted tensor sum: 219.124207
  2219. Original tensor mean: 28.301495
  2220. Converted tensor mean: 27.390526
  2221. Mean difference: 3.00309324
  2222. Maximum pointwise difference: 5.31435776
  2223. Max difference location: (0, 0, 4)
  2224. Values at max diff - Original: 32.55270767, Converted: 27.23834991
  2225. Biggest difference in row (0, 0), sum 226.411957 vs 219.124207
  2226. Layer 13, Token 14 (model.layers.out comparison):
  2227. Original tensor sum: 222.480804
  2228. Converted tensor sum: 215.029236
  2229. Original tensor mean: 27.810101
  2230. Converted tensor mean: 26.878654
  2231. Mean difference: 3.01644969
  2232. Maximum pointwise difference: 5.75550079
  2233. Max difference location: (0, 0, 4)
  2234. Values at max diff - Original: 32.34063721, Converted: 26.58513641
  2235. Biggest difference in row (0, 0), sum 222.480804 vs 215.029236
  2236. Layer 14, Token 14 (model.layers.out comparison):
  2237. Original tensor sum: 217.584625
  2238. Converted tensor sum: 210.219940
  2239. Original tensor mean: 27.198078
  2240. Converted tensor mean: 26.277493
  2241. Mean difference: 3.42921877
  2242. Maximum pointwise difference: 5.59035873
  2243. Max difference location: (0, 0, 4)
  2244. Values at max diff - Original: 31.23370743, Converted: 25.64334869
  2245. Biggest difference in row (0, 0), sum 217.584625 vs 210.219940
  2246. Layer 15, Token 14 (model.layers.out comparison):
  2247. Original tensor sum: 347.902100
  2248. Converted tensor sum: 344.275635
  2249. Original tensor mean: 43.487762
  2250. Converted tensor mean: 43.034454
  2251. Mean difference: 3.27294016
  2252. Maximum pointwise difference: 5.50515747
  2253. Max difference location: (0, 0, 6)
  2254. Values at max diff - Original: 41.73074341, Converted: 47.23590088
  2255. Biggest difference in row (0, 0), sum 347.902100 vs 344.275635
  2256. Layer 0, Token 15 (model.layers.out comparison):
  2257. Original tensor sum: 2.268566
  2258. Converted tensor sum: -1.956201
  2259. Original tensor mean: 0.283571
  2260. Converted tensor mean: -0.244525
  2261. Mean difference: 1.30659735
  2262. Maximum pointwise difference: 3.65664506
  2263. Max difference location: (0, 0, 0)
  2264. Values at max diff - Original: 3.25675011, Converted: -0.39989486
  2265. Biggest difference in row (0, 0), sum 2.268566 vs -1.956201
  2266. Layer 1, Token 15 (model.layers.out comparison):
  2267. Original tensor sum: -3.244995
  2268. Converted tensor sum: -0.596967
  2269. Original tensor mean: -0.405624
  2270. Converted tensor mean: -0.074621
  2271. Mean difference: 1.73462176
  2272. Maximum pointwise difference: 3.99903250
  2273. Max difference location: (0, 0, 0)
  2274. Values at max diff - Original: 2.07227492, Converted: -1.92675745
  2275. Biggest difference in row (0, 0), sum -3.244995 vs -0.596967
  2276. Layer 2, Token 15 (model.layers.out comparison):
  2277. Original tensor sum: 18.643393
  2278. Converted tensor sum: -7.624215
  2279. Original tensor mean: 2.330424
  2280. Converted tensor mean: -0.953027
  2281. Mean difference: 3.99837518
  2282. Maximum pointwise difference: 9.85657215
  2283. Max difference location: (0, 0, 0)
  2284. Values at max diff - Original: 9.41628456, Converted: -0.44028741
  2285. Biggest difference in row (0, 0), sum 18.643393 vs -7.624215
  2286. Layer 3, Token 15 (model.layers.out comparison):
  2287. Original tensor sum: 77.711205
  2288. Converted tensor sum: -115.602707
  2289. Original tensor mean: 9.713901
  2290. Converted tensor mean: -14.450338
  2291. Mean difference: 24.16423798
  2292. Maximum pointwise difference: 33.14313507
  2293. Max difference location: (0, 0, 0)
  2294. Values at max diff - Original: 17.84219551, Converted: -15.30093956
  2295. Biggest difference in row (0, 0), sum 77.711205 vs -115.602707
  2296. Layer 4, Token 15 (model.layers.out comparison):
  2297. Original tensor sum: 71.264816
  2298. Converted tensor sum: -87.184593
  2299. Original tensor mean: 8.908102
  2300. Converted tensor mean: -10.898074
  2301. Mean difference: 19.80617714
  2302. Maximum pointwise difference: 27.60903931
  2303. Max difference location: (0, 0, 0)
  2304. Values at max diff - Original: 16.59056091, Converted: -11.01847839
  2305. Biggest difference in row (0, 0), sum 71.264816 vs -87.184593
  2306. Layer 5, Token 15 (model.layers.out comparison):
  2307. Original tensor sum: 65.154488
  2308. Converted tensor sum: -20.586208
  2309. Original tensor mean: 8.144311
  2310. Converted tensor mean: -2.573276
  2311. Mean difference: 11.36003971
  2312. Maximum pointwise difference: 17.89420700
  2313. Max difference location: (0, 0, 6)
  2314. Values at max diff - Original: 3.50937057, Converted: -14.38483620
  2315. Biggest difference in row (0, 0), sum 65.154488 vs -20.586208
  2316. Layer 6, Token 15 (model.layers.out comparison):
  2317. Original tensor sum: 62.447323
  2318. Converted tensor sum: -39.734089
  2319. Original tensor mean: 7.805915
  2320. Converted tensor mean: -4.966761
  2321. Mean difference: 12.77267647
  2322. Maximum pointwise difference: 22.75133705
  2323. Max difference location: (0, 0, 0)
  2324. Values at max diff - Original: 14.95188141, Converted: -7.79945612
  2325. Biggest difference in row (0, 0), sum 62.447323 vs -39.734089
  2326. Layer 7, Token 15 (model.layers.out comparison):
  2327. Original tensor sum: 127.895920
  2328. Converted tensor sum: -184.804230
  2329. Original tensor mean: 15.986990
  2330. Converted tensor mean: -23.100529
  2331. Mean difference: 39.08751678
  2332. Maximum pointwise difference: 51.54846191
  2333. Max difference location: (0, 0, 3)
  2334. Values at max diff - Original: 16.32706261, Converted: -35.22139740
  2335. Biggest difference in row (0, 0), sum 127.895920 vs -184.804230
  2336. Layer 8, Token 15 (model.layers.out comparison):
  2337. Original tensor sum: 109.946281
  2338. Converted tensor sum: -183.545380
  2339. Original tensor mean: 13.743285
  2340. Converted tensor mean: -22.943172
  2341. Mean difference: 36.68645859
  2342. Maximum pointwise difference: 44.14192963
  2343. Max difference location: (0, 0, 0)
  2344. Values at max diff - Original: 21.54407120, Converted: -22.59785843
  2345. Biggest difference in row (0, 0), sum 109.946281 vs -183.545380
  2346. Layer 9, Token 15 (model.layers.out comparison):
  2347. Original tensor sum: 100.719040
  2348. Converted tensor sum: -189.035889
  2349. Original tensor mean: 12.589880
  2350. Converted tensor mean: -23.629486
  2351. Mean difference: 36.21936798
  2352. Maximum pointwise difference: 49.71876526
  2353. Max difference location: (0, 0, 0)
  2354. Values at max diff - Original: 20.62917519, Converted: -29.08958817
  2355. Biggest difference in row (0, 0), sum 100.719040 vs -189.035889
  2356. Layer 10, Token 15 (model.layers.out comparison):
  2357. Original tensor sum: 94.437965
  2358. Converted tensor sum: -184.073608
  2359. Original tensor mean: 11.804746
  2360. Converted tensor mean: -23.009201
  2361. Mean difference: 34.81394577
  2362. Maximum pointwise difference: 49.50559998
  2363. Max difference location: (0, 0, 0)
  2364. Values at max diff - Original: 19.98403168, Converted: -29.52156830
  2365. Biggest difference in row (0, 0), sum 94.437965 vs -184.073608
  2366. Layer 11, Token 15 (model.layers.out comparison):
  2367. Original tensor sum: 187.329086
  2368. Converted tensor sum: -525.129150
  2369. Original tensor mean: 23.416136
  2370. Converted tensor mean: -65.641144
  2371. Mean difference: 89.05728149
  2372. Maximum pointwise difference: 114.85643005
  2373. Max difference location: (0, 0, 0)
  2374. Values at max diff - Original: 32.40055466, Converted: -82.45587158
  2375. Biggest difference in row (0, 0), sum 187.329086 vs -525.129150
  2376. Layer 12, Token 15 (model.layers.out comparison):
  2377. Original tensor sum: 189.391296
  2378. Converted tensor sum: -524.645203
  2379. Original tensor mean: 23.673912
  2380. Converted tensor mean: -65.580650
  2381. Mean difference: 89.25456238
  2382. Maximum pointwise difference: 119.02915955
  2383. Max difference location: (0, 0, 0)
  2384. Values at max diff - Original: 33.67853165, Converted: -85.35062408
  2385. Biggest difference in row (0, 0), sum 189.391296 vs -524.645203
  2386. Layer 13, Token 15 (model.layers.out comparison):
  2387. Original tensor sum: 183.008652
  2388. Converted tensor sum: -545.134033
  2389. Original tensor mean: 22.876081
  2390. Converted tensor mean: -68.141754
  2391. Mean difference: 91.01783752
  2392. Maximum pointwise difference: 119.28398132
  2393. Max difference location: (0, 0, 0)
  2394. Values at max diff - Original: 33.81208420, Converted: -85.47189331
  2395. Biggest difference in row (0, 0), sum 183.008652 vs -545.134033
  2396. Layer 14, Token 15 (model.layers.out comparison):
  2397. Original tensor sum: 179.184265
  2398. Converted tensor sum: -590.197998
  2399. Original tensor mean: 22.398033
  2400. Converted tensor mean: -73.774750
  2401. Mean difference: 96.17278290
  2402. Maximum pointwise difference: 126.14685059
  2403. Max difference location: (0, 0, 0)
  2404. Values at max diff - Original: 33.16656876, Converted: -92.98027802
  2405. Biggest difference in row (0, 0), sum 179.184265 vs -590.197998
  2406. Layer 15, Token 15 (model.layers.out comparison):
  2407. Original tensor sum: 315.300140
  2408. Converted tensor sum: -976.074097
  2409. Original tensor mean: 39.412518
  2410. Converted tensor mean: -122.009262
  2411. Mean difference: 161.42178345
  2412. Maximum pointwise difference: 201.52458191
  2413. Max difference location: (0, 0, 0)
  2414. Values at max diff - Original: 52.86392212, Converted: -148.66065979
  2415. Biggest difference in row (0, 0), sum 315.300140 vs -976.074097
  2416. Layer 0, Token 16 (model.layers.out comparison):
  2417. Original tensor sum: 12.044241
  2418. Converted tensor sum: 14.548074
  2419. Original tensor mean: 1.505530
  2420. Converted tensor mean: 1.818509
  2421. Mean difference: 3.51175261
  2422. Maximum pointwise difference: 7.44231224
  2423. Max difference location: (0, 0, 2)
  2424. Values at max diff - Original: -4.31869221, Converted: 3.12362027
  2425. Biggest difference in row (0, 0), sum 12.044241 vs 14.548074
  2426. Layer 1, Token 16 (model.layers.out comparison):
  2427. Original tensor sum: 7.660315
  2428. Converted tensor sum: 1.425261
  2429. Original tensor mean: 0.957539
  2430. Converted tensor mean: 0.178158
  2431. Mean difference: 4.00331783
  2432. Maximum pointwise difference: 8.79326248
  2433. Max difference location: (0, 0, 7)
  2434. Values at max diff - Original: 3.55122566, Converted: -5.24203634
  2435. Biggest difference in row (0, 0), sum 7.660315 vs 1.425261
  2436. Layer 2, Token 16 (model.layers.out comparison):
  2437. Original tensor sum: 5.985608
  2438. Converted tensor sum: -2.881522
  2439. Original tensor mean: 0.748201
  2440. Converted tensor mean: -0.360190
  2441. Mean difference: 6.00233269
  2442. Maximum pointwise difference: 9.75814056
  2443. Max difference location: (0, 0, 7)
  2444. Values at max diff - Original: 3.30634618, Converted: -6.45179462
  2445. Biggest difference in row (0, 0), sum 5.985608 vs -2.881522
  2446. Layer 3, Token 16 (model.layers.out comparison):
  2447. Original tensor sum: 66.644623
  2448. Converted tensor sum: 38.471397
  2449. Original tensor mean: 8.330578
  2450. Converted tensor mean: 4.808925
  2451. Mean difference: 5.99987411
  2452. Maximum pointwise difference: 11.70975304
  2453. Max difference location: (0, 0, 3)
  2454. Values at max diff - Original: 11.37678432, Converted: -0.33296829
  2455. Biggest difference in row (0, 0), sum 66.644623 vs 38.471397
  2456. Layer 4, Token 16 (model.layers.out comparison):
  2457. Original tensor sum: 55.084259
  2458. Converted tensor sum: 39.585022
  2459. Original tensor mean: 6.885532
  2460. Converted tensor mean: 4.948128
  2461. Mean difference: 5.54818344
  2462. Maximum pointwise difference: 10.42512989
  2463. Max difference location: (0, 0, 3)
  2464. Values at max diff - Original: 8.96806908, Converted: -1.45706093
  2465. Biggest difference in row (0, 0), sum 55.084259 vs 39.585022
  2466. Layer 5, Token 16 (model.layers.out comparison):
  2467. Original tensor sum: 47.768257
  2468. Converted tensor sum: 29.551674
  2469. Original tensor mean: 5.971032
  2470. Converted tensor mean: 3.693959
  2471. Mean difference: 5.40017319
  2472. Maximum pointwise difference: 11.83149147
  2473. Max difference location: (0, 0, 3)
  2474. Values at max diff - Original: 9.62209320, Converted: -2.20939875
  2475. Biggest difference in row (0, 0), sum 47.768257 vs 29.551674
  2476. Layer 6, Token 16 (model.layers.out comparison):
  2477. Original tensor sum: 47.378487
  2478. Converted tensor sum: 33.471664
  2479. Original tensor mean: 5.922311
  2480. Converted tensor mean: 4.183958
  2481. Mean difference: 5.35756683
  2482. Maximum pointwise difference: 11.70071220
  2483. Max difference location: (0, 0, 3)
  2484. Values at max diff - Original: 10.01993370, Converted: -1.68077850
  2485. Biggest difference in row (0, 0), sum 47.378487 vs 33.471664
  2486. Layer 7, Token 16 (model.layers.out comparison):
  2487. Original tensor sum: 121.329849
  2488. Converted tensor sum: 101.072693
  2489. Original tensor mean: 15.166231
  2490. Converted tensor mean: 12.634087
  2491. Mean difference: 4.85845757
  2492. Maximum pointwise difference: 11.92098331
  2493. Max difference location: (0, 0, 3)
  2494. Values at max diff - Original: 18.39835739, Converted: 6.47737408
  2495. Biggest difference in row (0, 0), sum 121.329849 vs 101.072693
  2496. Layer 8, Token 16 (model.layers.out comparison):
  2497. Original tensor sum: 105.626358
  2498. Converted tensor sum: 92.869370
  2499. Original tensor mean: 13.203295
  2500. Converted tensor mean: 11.608671
  2501. Mean difference: 5.01301622
  2502. Maximum pointwise difference: 11.09072685
  2503. Max difference location: (0, 0, 3)
  2504. Values at max diff - Original: 15.02331066, Converted: 3.93258405
  2505. Biggest difference in row (0, 0), sum 105.626358 vs 92.869370
  2506. Layer 9, Token 16 (model.layers.out comparison):
  2507. Original tensor sum: 94.886589
  2508. Converted tensor sum: 86.461792
  2509. Original tensor mean: 11.860824
  2510. Converted tensor mean: 10.807724
  2511. Mean difference: 5.16425228
  2512. Maximum pointwise difference: 10.79585648
  2513. Max difference location: (0, 0, 2)
  2514. Values at max diff - Original: 2.03169847, Converted: 12.82755470
  2515. Biggest difference in row (0, 0), sum 94.886589 vs 86.461792
  2516. Layer 10, Token 16 (model.layers.out comparison):
  2517. Original tensor sum: 93.657555
  2518. Converted tensor sum: 77.932861
  2519. Original tensor mean: 11.707194
  2520. Converted tensor mean: 9.741608
  2521. Mean difference: 5.07010078
  2522. Maximum pointwise difference: 11.53797054
  2523. Max difference location: (0, 0, 3)
  2524. Values at max diff - Original: 13.33782196, Converted: 1.79985178
  2525. Biggest difference in row (0, 0), sum 93.657555 vs 77.932861
  2526. Layer 11, Token 16 (model.layers.out comparison):
  2527. Original tensor sum: 186.086578
  2528. Converted tensor sum: 176.759811
  2529. Original tensor mean: 23.260822
  2530. Converted tensor mean: 22.094976
  2531. Mean difference: 4.87584686
  2532. Maximum pointwise difference: 10.12077332
  2533. Max difference location: (0, 0, 2)
  2534. Values at max diff - Original: 14.05643463, Converted: 24.17720795
  2535. Biggest difference in row (0, 0), sum 186.086578 vs 176.759811
  2536. Layer 12, Token 16 (model.layers.out comparison):
  2537. Original tensor sum: 188.253220
  2538. Converted tensor sum: 173.150467
  2539. Original tensor mean: 23.531652
  2540. Converted tensor mean: 21.643808
  2541. Mean difference: 5.08278847
  2542. Maximum pointwise difference: 9.91738033
  2543. Max difference location: (0, 0, 2)
  2544. Values at max diff - Original: 14.99966526, Converted: 24.91704559
  2545. Biggest difference in row (0, 0), sum 188.253220 vs 173.150467
  2546. Layer 13, Token 16 (model.layers.out comparison):
  2547. Original tensor sum: 181.761749
  2548. Converted tensor sum: 171.658249
  2549. Original tensor mean: 22.720219
  2550. Converted tensor mean: 21.457281
  2551. Mean difference: 4.79229736
  2552. Maximum pointwise difference: 9.82627106
  2553. Max difference location: (0, 0, 2)
  2554. Values at max diff - Original: 14.16268539, Converted: 23.98895645
  2555. Biggest difference in row (0, 0), sum 181.761749 vs 171.658249
  2556. Layer 14, Token 16 (model.layers.out comparison):
  2557. Original tensor sum: 176.198990
  2558. Converted tensor sum: 170.420898
  2559. Original tensor mean: 22.024874
  2560. Converted tensor mean: 21.302612
  2561. Mean difference: 4.28427029
  2562. Maximum pointwise difference: 9.05801964
  2563. Max difference location: (0, 0, 2)
  2564. Values at max diff - Original: 13.50310326, Converted: 22.56112289
  2565. Biggest difference in row (0, 0), sum 176.198990 vs 170.420898
  2566. Layer 15, Token 16 (model.layers.out comparison):
  2567. Original tensor sum: 314.888916
  2568. Converted tensor sum: 308.839905
  2569. Original tensor mean: 39.361115
  2570. Converted tensor mean: 38.604988
  2571. Mean difference: 4.36002254
  2572. Maximum pointwise difference: 9.44413185
  2573. Max difference location: (0, 0, 2)
  2574. Values at max diff - Original: 25.14219856, Converted: 34.58633041
  2575. Biggest difference in row (0, 0), sum 314.888916 vs 308.839905
  2576. Layer 0, Token 17 (model.layers.out comparison):
  2577. Original tensor sum: 6.615214
  2578. Converted tensor sum: -14.476066
  2579. Original tensor mean: 0.826902
  2580. Converted tensor mean: -1.809508
  2581. Mean difference: 4.01758480
  2582. Maximum pointwise difference: 12.95696259
  2583. Max difference location: (0, 0, 5)
  2584. Values at max diff - Original: 8.16467762, Converted: -4.79228544
  2585. Biggest difference in row (0, 0), sum 6.615214 vs -14.476066
  2586. Layer 1, Token 17 (model.layers.out comparison):
  2587. Original tensor sum: 4.332821
  2588. Converted tensor sum: -48.476418
  2589. Original tensor mean: 0.541603
  2590. Converted tensor mean: -6.059552
  2591. Mean difference: 8.00736046
  2592. Maximum pointwise difference: 13.83443928
  2593. Max difference location: (0, 0, 5)
  2594. Values at max diff - Original: 7.88728952, Converted: -5.94714975
  2595. Biggest difference in row (0, 0), sum 4.332821 vs -48.476418
  2596. Layer 2, Token 17 (model.layers.out comparison):
  2597. Original tensor sum: 13.631664
  2598. Converted tensor sum: -24.375608
  2599. Original tensor mean: 1.703958
  2600. Converted tensor mean: -3.046951
  2601. Mean difference: 9.48411465
  2602. Maximum pointwise difference: 15.28743267
  2603. Max difference location: (0, 0, 1)
  2604. Values at max diff - Original: 2.43811703, Converted: -12.84931564
  2605. Biggest difference in row (0, 0), sum 13.631664 vs -24.375608
  2606. Layer 3, Token 17 (model.layers.out comparison):
  2607. Original tensor sum: 59.143936
  2608. Converted tensor sum: -80.541725
  2609. Original tensor mean: 7.392992
  2610. Converted tensor mean: -10.067716
  2611. Mean difference: 17.46070862
  2612. Maximum pointwise difference: 28.83273697
  2613. Max difference location: (0, 0, 1)
  2614. Values at max diff - Original: 9.60771275, Converted: -19.22502327
  2615. Biggest difference in row (0, 0), sum 59.143936 vs -80.541725
  2616. Layer 4, Token 17 (model.layers.out comparison):
  2617. Original tensor sum: 51.750626
  2618. Converted tensor sum: -81.567123
  2619. Original tensor mean: 6.468828
  2620. Converted tensor mean: -10.195890
  2621. Mean difference: 17.13005066
  2622. Maximum pointwise difference: 30.73341751
  2623. Max difference location: (0, 0, 1)
  2624. Values at max diff - Original: 9.45896626, Converted: -21.27445221
  2625. Biggest difference in row (0, 0), sum 51.750626 vs -81.567123
  2626. Layer 5, Token 17 (model.layers.out comparison):
  2627. Original tensor sum: 33.377792
  2628. Converted tensor sum: -8.966677
  2629. Original tensor mean: 4.172224
  2630. Converted tensor mean: -1.120835
  2631. Mean difference: 11.87618256
  2632. Maximum pointwise difference: 19.17303848
  2633. Max difference location: (0, 0, 0)
  2634. Values at max diff - Original: 0.59302533, Converted: 19.76606369
  2635. Biggest difference in row (0, 0), sum 33.377792 vs -8.966677
  2636. Layer 6, Token 17 (model.layers.out comparison):
  2637. Original tensor sum: 34.373646
  2638. Converted tensor sum: -17.893101
  2639. Original tensor mean: 4.296706
  2640. Converted tensor mean: -2.236638
  2641. Mean difference: 12.44108009
  2642. Maximum pointwise difference: 21.66391373
  2643. Max difference location: (0, 0, 1)
  2644. Values at max diff - Original: 7.57746935, Converted: -14.08644485
  2645. Biggest difference in row (0, 0), sum 34.373646 vs -17.893101
  2646. Layer 7, Token 17 (model.layers.out comparison):
  2647. Original tensor sum: 117.899002
  2648. Converted tensor sum: -60.493092
  2649. Original tensor mean: 14.737375
  2650. Converted tensor mean: -7.561636
  2651. Mean difference: 22.75322723
  2652. Maximum pointwise difference: 41.73314667
  2653. Max difference location: (0, 0, 1)
  2654. Values at max diff - Original: 20.46781158, Converted: -21.26533699
  2655. Biggest difference in row (0, 0), sum 117.899002 vs -60.493092
  2656. Layer 8, Token 17 (model.layers.out comparison):
  2657. Original tensor sum: 102.151550
  2658. Converted tensor sum: -53.178627
  2659. Original tensor mean: 12.768944
  2660. Converted tensor mean: -6.647328
  2661. Mean difference: 21.35518456
  2662. Maximum pointwise difference: 40.89769745
  2663. Max difference location: (0, 0, 1)
  2664. Values at max diff - Original: 18.52126884, Converted: -22.37642860
  2665. Biggest difference in row (0, 0), sum 102.151550 vs -53.178627
  2666. Layer 9, Token 17 (model.layers.out comparison):
  2667. Original tensor sum: 90.451920
  2668. Converted tensor sum: -34.497658
  2669. Original tensor mean: 11.306490
  2670. Converted tensor mean: -4.312207
  2671. Mean difference: 18.82321548
  2672. Maximum pointwise difference: 37.83747864
  2673. Max difference location: (0, 0, 1)
  2674. Values at max diff - Original: 18.08675385, Converted: -19.75072479
  2675. Biggest difference in row (0, 0), sum 90.451920 vs -34.497658
  2676. Layer 10, Token 17 (model.layers.out comparison):
  2677. Original tensor sum: 87.881783
  2678. Converted tensor sum: -25.459152
  2679. Original tensor mean: 10.985223
  2680. Converted tensor mean: -3.182394
  2681. Mean difference: 17.43336678
  2682. Maximum pointwise difference: 35.29803467
  2683. Max difference location: (0, 0, 1)
  2684. Values at max diff - Original: 17.46567726, Converted: -17.83235931
  2685. Biggest difference in row (0, 0), sum 87.881783 vs -25.459152
  2686. Layer 11, Token 17 (model.layers.out comparison):
  2687. Original tensor sum: 185.306732
  2688. Converted tensor sum: -264.026886
  2689. Original tensor mean: 23.163342
  2690. Converted tensor mean: -33.003361
  2691. Mean difference: 56.16670227
  2692. Maximum pointwise difference: 73.40274048
  2693. Max difference location: (0, 0, 1)
  2694. Values at max diff - Original: 27.15820312, Converted: -46.24454117
  2695. Biggest difference in row (0, 0), sum 185.306732 vs -264.026886
  2696. Layer 12, Token 17 (model.layers.out comparison):
  2697. Original tensor sum: 186.018799
  2698. Converted tensor sum: -238.738007
  2699. Original tensor mean: 23.252350
  2700. Converted tensor mean: -29.842251
  2701. Mean difference: 53.09460068
  2702. Maximum pointwise difference: 71.14258575
  2703. Max difference location: (0, 0, 1)
  2704. Values at max diff - Original: 27.12987900, Converted: -44.01270676
  2705. Biggest difference in row (0, 0), sum 186.018799 vs -238.738007
  2706. Layer 13, Token 17 (model.layers.out comparison):
  2707. Original tensor sum: 178.633179
  2708. Converted tensor sum: -250.662323
  2709. Original tensor mean: 22.329147
  2710. Converted tensor mean: -31.332790
  2711. Mean difference: 53.66194153
  2712. Maximum pointwise difference: 72.33184814
  2713. Max difference location: (0, 0, 3)
  2714. Values at max diff - Original: 22.06610680, Converted: -50.26573944
  2715. Biggest difference in row (0, 0), sum 178.633179 vs -250.662323
  2716. Layer 14, Token 17 (model.layers.out comparison):
  2717. Original tensor sum: 171.761902
  2718. Converted tensor sum: -301.707916
  2719. Original tensor mean: 21.470238
  2720. Converted tensor mean: -37.713490
  2721. Mean difference: 59.18372726
  2722. Maximum pointwise difference: 84.33922577
  2723. Max difference location: (0, 0, 3)
  2724. Values at max diff - Original: 21.43586349, Converted: -62.90336227
  2725. Biggest difference in row (0, 0), sum 171.761902 vs -301.707916
  2726. Layer 15, Token 17 (model.layers.out comparison):
  2727. Original tensor sum: 313.503632
  2728. Converted tensor sum: -672.745667
  2729. Original tensor mean: 39.187954
  2730. Converted tensor mean: -84.093208
  2731. Mean difference: 123.28115845
  2732. Maximum pointwise difference: 153.27690125
  2733. Max difference location: (0, 0, 3)
  2734. Values at max diff - Original: 38.26152039, Converted: -115.01538086
  2735. Biggest difference in row (0, 0), sum 313.503632 vs -672.745667
  2736. Layer 0, Token 18 (model.layers.out comparison):
  2737. Original tensor sum: 37.370514
  2738. Converted tensor sum: 2.800200
  2739. Original tensor mean: 4.671314
  2740. Converted tensor mean: 0.350025
  2741. Mean difference: 5.54810905
  2742. Maximum pointwise difference: 9.22967339
  2743. Max difference location: (0, 0, 5)
  2744. Values at max diff - Original: 9.28797436, Converted: 0.05830121
  2745. Biggest difference in row (0, 0), sum 37.370514 vs 2.800200
  2746. Layer 1, Token 18 (model.layers.out comparison):
  2747. Original tensor sum: 27.386568
  2748. Converted tensor sum: -8.815313
  2749. Original tensor mean: 3.423321
  2750. Converted tensor mean: -1.101914
  2751. Mean difference: 5.46173763
  2752. Maximum pointwise difference: 11.23313618
  2753. Max difference location: (0, 0, 5)
  2754. Values at max diff - Original: 7.72619963, Converted: -3.50693655
  2755. Biggest difference in row (0, 0), sum 27.386568 vs -8.815313
  2756. Layer 2, Token 18 (model.layers.out comparison):
  2757. Original tensor sum: 22.950966
  2758. Converted tensor sum: -26.951405
  2759. Original tensor mean: 2.868871
  2760. Converted tensor mean: -3.368926
  2761. Mean difference: 7.41814232
  2762. Maximum pointwise difference: 14.15112782
  2763. Max difference location: (0, 0, 5)
  2764. Values at max diff - Original: 7.93941879, Converted: -6.21170902
  2765. Biggest difference in row (0, 0), sum 22.950966 vs -26.951405
  2766. Layer 3, Token 18 (model.layers.out comparison):
  2767. Original tensor sum: 75.358887
  2768. Converted tensor sum: -194.584152
  2769. Original tensor mean: 9.419861
  2770. Converted tensor mean: -24.323019
  2771. Mean difference: 33.74287796
  2772. Maximum pointwise difference: 39.03241730
  2773. Max difference location: (0, 0, 1)
  2774. Values at max diff - Original: 16.72500801, Converted: -22.30740929
  2775. Biggest difference in row (0, 0), sum 75.358887 vs -194.584152
  2776. Layer 4, Token 18 (model.layers.out comparison):
  2777. Original tensor sum: 63.885963
  2778. Converted tensor sum: -193.801666
  2779. Original tensor mean: 7.985745
  2780. Converted tensor mean: -24.225208
  2781. Mean difference: 32.21095276
  2782. Maximum pointwise difference: 39.04253769
  2783. Max difference location: (0, 0, 1)
  2784. Values at max diff - Original: 15.83776665, Converted: -23.20477104
  2785. Biggest difference in row (0, 0), sum 63.885963 vs -193.801666
  2786. Layer 5, Token 18 (model.layers.out comparison):
  2787. Original tensor sum: 51.427219
  2788. Converted tensor sum: -189.920349
  2789. Original tensor mean: 6.428402
  2790. Converted tensor mean: -23.740044
  2791. Mean difference: 30.16844559
  2792. Maximum pointwise difference: 35.64602280
  2793. Max difference location: (0, 0, 6)
  2794. Values at max diff - Original: 8.10052967, Converted: -27.54549408
  2795. Biggest difference in row (0, 0), sum 51.427219 vs -189.920349
  2796. Layer 6, Token 18 (model.layers.out comparison):
  2797. Original tensor sum: 52.837097
  2798. Converted tensor sum: -237.793671
  2799. Original tensor mean: 6.604637
  2800. Converted tensor mean: -29.724209
  2801. Mean difference: 36.32884598
  2802. Maximum pointwise difference: 41.40105438
  2803. Max difference location: (0, 0, 2)
  2804. Values at max diff - Original: 7.84163952, Converted: -33.55941391
  2805. Biggest difference in row (0, 0), sum 52.837097 vs -237.793671
  2806. Layer 7, Token 18 (model.layers.out comparison):
  2807. Original tensor sum: 129.848618
  2808. Converted tensor sum: -405.475128
  2809. Original tensor mean: 16.231077
  2810. Converted tensor mean: -50.684391
  2811. Mean difference: 66.91546631
  2812. Maximum pointwise difference: 75.46723938
  2813. Max difference location: (0, 0, 4)
  2814. Values at max diff - Original: 22.33297348, Converted: -53.13426590
  2815. Biggest difference in row (0, 0), sum 129.848618 vs -405.475128
  2816. Layer 8, Token 18 (model.layers.out comparison):
  2817. Original tensor sum: 112.813950
  2818. Converted tensor sum: -388.213379
  2819. Original tensor mean: 14.101744
  2820. Converted tensor mean: -48.526672
  2821. Mean difference: 62.62841415
  2822. Maximum pointwise difference: 74.58121490
  2823. Max difference location: (0, 0, 4)
  2824. Values at max diff - Original: 20.05025291, Converted: -54.53096390
  2825. Biggest difference in row (0, 0), sum 112.813950 vs -388.213379
  2826. Layer 9, Token 18 (model.layers.out comparison):
  2827. Original tensor sum: 98.625351
  2828. Converted tensor sum: -428.683411
  2829. Original tensor mean: 12.328169
  2830. Converted tensor mean: -53.585426
  2831. Mean difference: 65.91359711
  2832. Maximum pointwise difference: 78.76679230
  2833. Max difference location: (0, 0, 0)
  2834. Values at max diff - Original: 5.12599134, Converted: -73.64080048
  2835. Biggest difference in row (0, 0), sum 98.625351 vs -428.683411
  2836. Layer 10, Token 18 (model.layers.out comparison):
  2837. Original tensor sum: 93.009445
  2838. Converted tensor sum: -432.554626
  2839. Original tensor mean: 11.626181
  2840. Converted tensor mean: -54.069328
  2841. Mean difference: 65.69551086
  2842. Maximum pointwise difference: 76.13760376
  2843. Max difference location: (0, 0, 0)
  2844. Values at max diff - Original: 4.86473036, Converted: -71.27287292
  2845. Biggest difference in row (0, 0), sum 93.009445 vs -432.554626
  2846. Layer 11, Token 18 (model.layers.out comparison):
  2847. Original tensor sum: 188.645950
  2848. Converted tensor sum: -772.146790
  2849. Original tensor mean: 23.580744
  2850. Converted tensor mean: -96.518349
  2851. Mean difference: 120.09909058
  2852. Maximum pointwise difference: 140.57998657
  2853. Max difference location: (0, 0, 0)
  2854. Values at max diff - Original: 17.72886276, Converted: -122.85112000
  2855. Biggest difference in row (0, 0), sum 188.645950 vs -772.146790
  2856. Layer 12, Token 18 (model.layers.out comparison):
  2857. Original tensor sum: 191.028870
  2858. Converted tensor sum: -781.472900
  2859. Original tensor mean: 23.878609
  2860. Converted tensor mean: -97.684113
  2861. Mean difference: 121.56272888
  2862. Maximum pointwise difference: 143.88111877
  2863. Max difference location: (0, 0, 0)
  2864. Values at max diff - Original: 18.69833946, Converted: -125.18278503
  2865. Biggest difference in row (0, 0), sum 191.028870 vs -781.472900
  2866. Layer 13, Token 18 (model.layers.out comparison):
  2867. Original tensor sum: 183.829086
  2868. Converted tensor sum: -808.856689
  2869. Original tensor mean: 22.978636
  2870. Converted tensor mean: -101.107086
  2871. Mean difference: 124.08572388
  2872. Maximum pointwise difference: 147.60656738
  2873. Max difference location: (0, 0, 0)
  2874. Values at max diff - Original: 18.44003105, Converted: -129.16653442
  2875. Biggest difference in row (0, 0), sum 183.829086 vs -808.856689
  2876. Layer 14, Token 18 (model.layers.out comparison):
  2877. Original tensor sum: 177.643005
  2878. Converted tensor sum: -844.687622
  2879. Original tensor mean: 22.205376
  2880. Converted tensor mean: -105.585953
  2881. Mean difference: 127.79132843
  2882. Maximum pointwise difference: 148.00994873
  2883. Max difference location: (0, 0, 0)
  2884. Values at max diff - Original: 17.69933319, Converted: -130.31060791
  2885. Biggest difference in row (0, 0), sum 177.643005 vs -844.687622
  2886. Layer 15, Token 18 (model.layers.out comparison):
  2887. Original tensor sum: 320.725769
  2888. Converted tensor sum: -1234.242676
  2889. Original tensor mean: 40.090721
  2890. Converted tensor mean: -154.280334
  2891. Mean difference: 194.37104797
  2892. Maximum pointwise difference: 225.51652527
  2893. Max difference location: (0, 0, 0)
  2894. Values at max diff - Original: 39.54684830, Converted: -185.96968079
  2895. Biggest difference in row (0, 0), sum 320.725769 vs -1234.242676
  2896. Layer 0, Token 19 (model.layers.out comparison):
  2897. Original tensor sum: -9.932329
  2898. Converted tensor sum: -1.418950
  2899. Original tensor mean: -1.241541
  2900. Converted tensor mean: -0.177369
  2901. Mean difference: 1.91613591
  2902. Maximum pointwise difference: 5.37744808
  2903. Max difference location: (0, 0, 4)
  2904. Values at max diff - Original: -4.92564631, Converted: 0.45180166
  2905. Biggest difference in row (0, 0), sum -9.932329 vs -1.418950
  2906. Layer 1, Token 19 (model.layers.out comparison):
  2907. Original tensor sum: -28.079020
  2908. Converted tensor sum: 4.360578
  2909. Original tensor mean: -3.509877
  2910. Converted tensor mean: 0.545072
  2911. Mean difference: 4.81566954
  2912. Maximum pointwise difference: 12.93084526
  2913. Max difference location: (0, 0, 5)
  2914. Values at max diff - Original: -11.63085365, Converted: 1.29999185
  2915. Biggest difference in row (0, 0), sum -28.079020 vs 4.360578
  2916. Layer 2, Token 19 (model.layers.out comparison):
  2917. Original tensor sum: -9.719646
  2918. Converted tensor sum: 14.192688
  2919. Original tensor mean: -1.214956
  2920. Converted tensor mean: 1.774086
  2921. Mean difference: 5.83081627
  2922. Maximum pointwise difference: 15.01737213
  2923. Max difference location: (0, 0, 5)
  2924. Values at max diff - Original: -10.13109303, Converted: 4.88627911
  2925. Biggest difference in row (0, 0), sum -9.719646 vs 14.192688
  2926. Layer 3, Token 19 (model.layers.out comparison):
  2927. Original tensor sum: -78.071198
  2928. Converted tensor sum: 44.287003
  2929. Original tensor mean: -9.758900
  2930. Converted tensor mean: 5.535875
  2931. Mean difference: 15.29477501
  2932. Maximum pointwise difference: 25.90341759
  2933. Max difference location: (0, 0, 5)
  2934. Values at max diff - Original: -17.29398918, Converted: 8.60942841
  2935. Biggest difference in row (0, 0), sum -78.071198 vs 44.287003
  2936. Layer 4, Token 19 (model.layers.out comparison):
  2937. Original tensor sum: -17.936802
  2938. Converted tensor sum: 43.255585
  2939. Original tensor mean: -2.242100
  2940. Converted tensor mean: 5.406948
  2941. Mean difference: 9.52408981
  2942. Maximum pointwise difference: 16.11044312
  2943. Max difference location: (0, 0, 5)
  2944. Values at max diff - Original: -8.10731792, Converted: 8.00312424
  2945. Biggest difference in row (0, 0), sum -17.936802 vs 43.255585
  2946. Layer 5, Token 19 (model.layers.out comparison):
  2947. Original tensor sum: 14.270342
  2948. Converted tensor sum: 40.868690
  2949. Original tensor mean: 1.783793
  2950. Converted tensor mean: 5.108586
  2951. Mean difference: 6.39925480
  2952. Maximum pointwise difference: 13.00582123
  2953. Max difference location: (0, 0, 6)
  2954. Values at max diff - Original: -10.49264050, Converted: 2.51318097
  2955. Biggest difference in row (0, 0), sum 14.270342 vs 40.868690
  2956. Layer 6, Token 19 (model.layers.out comparison):
  2957. Original tensor sum: 8.770991
  2958. Converted tensor sum: 44.250122
  2959. Original tensor mean: 1.096374
  2960. Converted tensor mean: 5.531265
  2961. Mean difference: 7.05475235
  2962. Maximum pointwise difference: 14.57606697
  2963. Max difference location: (0, 0, 6)
  2964. Values at max diff - Original: -11.80261707, Converted: 2.77344990
  2965. Biggest difference in row (0, 0), sum 8.770991 vs 44.250122
  2966. Layer 7, Token 19 (model.layers.out comparison):
  2967. Original tensor sum: 27.567080
  2968. Converted tensor sum: 110.976578
  2969. Original tensor mean: 3.445885
  2970. Converted tensor mean: 13.872072
  2971. Mean difference: 11.90625381
  2972. Maximum pointwise difference: 20.18301392
  2973. Max difference location: (0, 0, 6)
  2974. Values at max diff - Original: -9.75880718, Converted: 10.42420769
  2975. Biggest difference in row (0, 0), sum 27.567080 vs 110.976578
  2976. Layer 8, Token 19 (model.layers.out comparison):
  2977. Original tensor sum: 12.723747
  2978. Converted tensor sum: 112.570312
  2979. Original tensor mean: 1.590468
  2980. Converted tensor mean: 14.071289
  2981. Mean difference: 12.89592552
  2982. Maximum pointwise difference: 20.84409904
  2983. Max difference location: (0, 0, 6)
  2984. Values at max diff - Original: -12.16371441, Converted: 8.68038464
  2985. Biggest difference in row (0, 0), sum 12.723747 vs 112.570312
  2986. Layer 9, Token 19 (model.layers.out comparison):
  2987. Original tensor sum: 10.056442
  2988. Converted tensor sum: 106.334442
  2989. Original tensor mean: 1.257055
  2990. Converted tensor mean: 13.291805
  2991. Mean difference: 12.47594643
  2992. Maximum pointwise difference: 22.08431053
  2993. Max difference location: (0, 0, 6)
  2994. Values at max diff - Original: -14.35114861, Converted: 7.73316193
  2995. Biggest difference in row (0, 0), sum 10.056442 vs 106.334442
  2996. Layer 10, Token 19 (model.layers.out comparison):
  2997. Original tensor sum: -1.989794
  2998. Converted tensor sum: 99.182007
  2999. Original tensor mean: -0.248724
  3000. Converted tensor mean: 12.397751
  3001. Mean difference: 13.17310143
  3002. Maximum pointwise difference: 24.05181694
  3003. Max difference location: (0, 0, 6)
  3004. Values at max diff - Original: -17.15268326, Converted: 6.89913368
  3005. Biggest difference in row (0, 0), sum -1.989794 vs 99.182007
  3006. Layer 11, Token 19 (model.layers.out comparison):
  3007. Original tensor sum: 67.349617
  3008. Converted tensor sum: 188.920929
  3009. Original tensor mean: 8.418702
  3010. Converted tensor mean: 23.615116
  3011. Mean difference: 15.19641399
  3012. Maximum pointwise difference: 29.17947769
  3013. Max difference location: (0, 0, 6)
  3014. Values at max diff - Original: -10.99394608, Converted: 18.18553162
  3015. Biggest difference in row (0, 0), sum 67.349617 vs 188.920929
  3016. Layer 12, Token 19 (model.layers.out comparison):
  3017. Original tensor sum: 65.645859
  3018. Converted tensor sum: 187.996002
  3019. Original tensor mean: 8.205732
  3020. Converted tensor mean: 23.499500
  3021. Mean difference: 15.29376984
  3022. Maximum pointwise difference: 29.97419739
  3023. Max difference location: (0, 0, 6)
  3024. Values at max diff - Original: -11.02998257, Converted: 18.94421577
  3025. Biggest difference in row (0, 0), sum 65.645859 vs 187.996002
  3026. Layer 13, Token 19 (model.layers.out comparison):
  3027. Original tensor sum: 62.775318
  3028. Converted tensor sum: 186.939407
  3029. Original tensor mean: 7.846915
  3030. Converted tensor mean: 23.367426
  3031. Mean difference: 15.52051163
  3032. Maximum pointwise difference: 30.78374863
  3033. Max difference location: (0, 0, 6)
  3034. Values at max diff - Original: -11.59408474, Converted: 19.18966293
  3035. Biggest difference in row (0, 0), sum 62.775318 vs 186.939407
  3036. Layer 14, Token 19 (model.layers.out comparison):
  3037. Original tensor sum: 66.572449
  3038. Converted tensor sum: 192.538483
  3039. Original tensor mean: 8.321556
  3040. Converted tensor mean: 24.067310
  3041. Mean difference: 15.74575615
  3042. Maximum pointwise difference: 32.26174927
  3043. Max difference location: (0, 0, 6)
  3044. Values at max diff - Original: -11.68817997, Converted: 20.57357025
  3045. Biggest difference in row (0, 0), sum 66.572449 vs 192.538483
  3046. Layer 15, Token 19 (model.layers.out comparison):
  3047. Original tensor sum: 224.145126
  3048. Converted tensor sum: 325.050964
  3049. Original tensor mean: 28.018141
  3050. Converted tensor mean: 40.631371
  3051. Mean difference: 13.45689964
  3052. Maximum pointwise difference: 28.69198799
  3053. Max difference location: (0, 0, 6)
  3054. Values at max diff - Original: 9.77963829, Converted: 38.47162628
  3055. Biggest difference in row (0, 0), sum 224.145126 vs 325.050964
  3056. Layer 0, Token 20 (model.layers.out comparison):
  3057. Original tensor sum: -29.569780
  3058. Converted tensor sum: 10.794893
  3059. Original tensor mean: -3.696223
  3060. Converted tensor mean: 1.349362
  3061. Mean difference: 6.44896221
  3062. Maximum pointwise difference: 13.91718292
  3063. Max difference location: (0, 0, 1)
  3064. Values at max diff - Original: -9.61637592, Converted: 4.30080748
  3065. Biggest difference in row (0, 0), sum -29.569780 vs 10.794893
  3066. Layer 1, Token 20 (model.layers.out comparison):
  3067. Original tensor sum: 1.025735
  3068. Converted tensor sum: 6.199029
  3069. Original tensor mean: 0.128217
  3070. Converted tensor mean: 0.774879
  3071. Mean difference: 7.59240437
  3072. Maximum pointwise difference: 14.00857544
  3073. Max difference location: (0, 0, 1)
  3074. Values at max diff - Original: -9.50434303, Converted: 4.50423241
  3075. Biggest difference in row (0, 0), sum 1.025735 vs 6.199029
  3076. Layer 2, Token 20 (model.layers.out comparison):
  3077. Original tensor sum: 17.293440
  3078. Converted tensor sum: 7.479863
  3079. Original tensor mean: 2.161680
  3080. Converted tensor mean: 0.934983
  3081. Mean difference: 7.88275719
  3082. Maximum pointwise difference: 14.18584061
  3083. Max difference location: (0, 0, 1)
  3084. Values at max diff - Original: -8.63929176, Converted: 5.54654837
  3085. Biggest difference in row (0, 0), sum 17.293440 vs 7.479863
  3086. Layer 3, Token 20 (model.layers.out comparison):
  3087. Original tensor sum: 36.610168
  3088. Converted tensor sum: 49.467545
  3089. Original tensor mean: 4.576271
  3090. Converted tensor mean: 6.183443
  3091. Mean difference: 6.93841553
  3092. Maximum pointwise difference: 18.18937302
  3093. Max difference location: (0, 0, 1)
  3094. Values at max diff - Original: -5.24265194, Converted: 12.94672108
  3095. Biggest difference in row (0, 0), sum 36.610168 vs 49.467545
  3096. Layer 4, Token 20 (model.layers.out comparison):
  3097. Original tensor sum: 29.254171
  3098. Converted tensor sum: 47.750710
  3099. Original tensor mean: 3.656771
  3100. Converted tensor mean: 5.968839
  3101. Mean difference: 7.21544361
  3102. Maximum pointwise difference: 18.54884338
  3103. Max difference location: (0, 0, 1)
  3104. Values at max diff - Original: -5.63391066, Converted: 12.91493225
  3105. Biggest difference in row (0, 0), sum 29.254171 vs 47.750710
  3106. Layer 5, Token 20 (model.layers.out comparison):
  3107. Original tensor sum: 35.151703
  3108. Converted tensor sum: 48.878067
  3109. Original tensor mean: 4.393963
  3110. Converted tensor mean: 6.109758
  3111. Mean difference: 6.99968100
  3112. Maximum pointwise difference: 14.96766090
  3113. Max difference location: (0, 0, 1)
  3114. Values at max diff - Original: -3.65913010, Converted: 11.30853081
  3115. Biggest difference in row (0, 0), sum 35.151703 vs 48.878067
  3116. Layer 6, Token 20 (model.layers.out comparison):
  3117. Original tensor sum: 30.034544
  3118. Converted tensor sum: 47.318748
  3119. Original tensor mean: 3.754318
  3120. Converted tensor mean: 5.914844
  3121. Mean difference: 7.24886227
  3122. Maximum pointwise difference: 14.76261425
  3123. Max difference location: (0, 0, 1)
  3124. Values at max diff - Original: -3.74199128, Converted: 11.02062321
  3125. Biggest difference in row (0, 0), sum 30.034544 vs 47.318748
  3126. Layer 7, Token 20 (model.layers.out comparison):
  3127. Original tensor sum: 93.501678
  3128. Converted tensor sum: 109.843590
  3129. Original tensor mean: 11.687710
  3130. Converted tensor mean: 13.730449
  3131. Mean difference: 6.95008612
  3132. Maximum pointwise difference: 15.00504684
  3133. Max difference location: (0, 0, 1)
  3134. Values at max diff - Original: 6.03040743, Converted: 21.03545380
  3135. Biggest difference in row (0, 0), sum 93.501678 vs 109.843590
  3136. Layer 8, Token 20 (model.layers.out comparison):
  3137. Original tensor sum: 79.472687
  3138. Converted tensor sum: 102.823357
  3139. Original tensor mean: 9.934086
  3140. Converted tensor mean: 12.852920
  3141. Mean difference: 7.54766369
  3142. Maximum pointwise difference: 16.02755737
  3143. Max difference location: (0, 0, 1)
  3144. Values at max diff - Original: 5.07132435, Converted: 21.09888077
  3145. Biggest difference in row (0, 0), sum 79.472687 vs 102.823357
  3146. Layer 9, Token 20 (model.layers.out comparison):
  3147. Original tensor sum: 67.706139
  3148. Converted tensor sum: 99.777931
  3149. Original tensor mean: 8.463267
  3150. Converted tensor mean: 12.472241
  3151. Mean difference: 8.86232471
  3152. Maximum pointwise difference: 16.78725052
  3153. Max difference location: (0, 0, 1)
  3154. Values at max diff - Original: 4.42850208, Converted: 21.21575165
  3155. Biggest difference in row (0, 0), sum 67.706139 vs 99.777931
  3156. Layer 10, Token 20 (model.layers.out comparison):
  3157. Original tensor sum: 63.760403
  3158. Converted tensor sum: 96.691109
  3159. Original tensor mean: 7.970050
  3160. Converted tensor mean: 12.086389
  3161. Mean difference: 9.02034378
  3162. Maximum pointwise difference: 16.24016762
  3163. Max difference location: (0, 0, 1)
  3164. Values at max diff - Original: 4.14136124, Converted: 20.38152885
  3165. Biggest difference in row (0, 0), sum 63.760403 vs 96.691109
  3166. Layer 11, Token 20 (model.layers.out comparison):
  3167. Original tensor sum: 158.635681
  3168. Converted tensor sum: 194.330322
  3169. Original tensor mean: 19.829460
  3170. Converted tensor mean: 24.291290
  3171. Mean difference: 8.75148964
  3172. Maximum pointwise difference: 16.25316620
  3173. Max difference location: (0, 0, 1)
  3174. Values at max diff - Original: 15.08195591, Converted: 31.33512306
  3175. Biggest difference in row (0, 0), sum 158.635681 vs 194.330322
  3176. Layer 12, Token 20 (model.layers.out comparison):
  3177. Original tensor sum: 159.106079
  3178. Converted tensor sum: 194.084503
  3179. Original tensor mean: 19.888260
  3180. Converted tensor mean: 24.260563
  3181. Mean difference: 8.75931835
  3182. Maximum pointwise difference: 16.29665756
  3183. Max difference location: (0, 0, 1)
  3184. Values at max diff - Original: 14.28990650, Converted: 30.58656502
  3185. Biggest difference in row (0, 0), sum 159.106079 vs 194.084503
  3186. Layer 13, Token 20 (model.layers.out comparison):
  3187. Original tensor sum: 153.442200
  3188. Converted tensor sum: 186.870270
  3189. Original tensor mean: 19.180275
  3190. Converted tensor mean: 23.358784
  3191. Mean difference: 8.66864204
  3192. Maximum pointwise difference: 15.29904747
  3193. Max difference location: (0, 0, 1)
  3194. Values at max diff - Original: 14.04843903, Converted: 29.34748650
  3195. Biggest difference in row (0, 0), sum 153.442200 vs 186.870270
  3196. Layer 14, Token 20 (model.layers.out comparison):
  3197. Original tensor sum: 147.691605
  3198. Converted tensor sum: 175.338470
  3199. Original tensor mean: 18.461451
  3200. Converted tensor mean: 21.917309
  3201. Mean difference: 8.84063625
  3202. Maximum pointwise difference: 15.63497734
  3203. Max difference location: (0, 0, 1)
  3204. Values at max diff - Original: 13.52752876, Converted: 29.16250610
  3205. Biggest difference in row (0, 0), sum 147.691605 vs 175.338470
  3206. Layer 15, Token 20 (model.layers.out comparison):
  3207. Original tensor sum: 294.136749
  3208. Converted tensor sum: 310.250946
  3209. Original tensor mean: 36.767094
  3210. Converted tensor mean: 38.781368
  3211. Mean difference: 9.18845367
  3212. Maximum pointwise difference: 14.23109627
  3213. Max difference location: (0, 0, 1)
  3214. Values at max diff - Original: 30.77650642, Converted: 45.00760269
  3215. Biggest difference in row (0, 0), sum 294.136749 vs 310.250946
  3216. Layer 0, Token 21 (model.layers.out comparison):
  3217. Original tensor sum: -18.838482
  3218. Converted tensor sum: -1.325968
  3219. Original tensor mean: -2.354810
  3220. Converted tensor mean: -0.165746
  3221. Mean difference: 2.79272628
  3222. Maximum pointwise difference: 6.95248222
  3223. Max difference location: (0, 0, 4)
  3224. Values at max diff - Original: -6.02015686, Converted: 0.93232512
  3225. Biggest difference in row (0, 0), sum -18.838482 vs -1.325968
  3226. Layer 1, Token 21 (model.layers.out comparison):
  3227. Original tensor sum: -6.250936
  3228. Converted tensor sum: -2.277201
  3229. Original tensor mean: -0.781367
  3230. Converted tensor mean: -0.284650
  3231. Mean difference: 5.05594349
  3232. Maximum pointwise difference: 9.99544907
  3233. Max difference location: (0, 0, 3)
  3234. Values at max diff - Original: 6.23908186, Converted: -3.75636768
  3235. Biggest difference in row (0, 0), sum -6.250936 vs -2.277201
  3236. Layer 2, Token 21 (model.layers.out comparison):
  3237. Original tensor sum: -2.587172
  3238. Converted tensor sum: 0.977817
  3239. Original tensor mean: -0.323396
  3240. Converted tensor mean: 0.122227
  3241. Mean difference: 3.66970563
  3242. Maximum pointwise difference: 9.02869225
  3243. Max difference location: (0, 0, 1)
  3244. Values at max diff - Original: -7.89160728, Converted: 1.13708520
  3245. Biggest difference in row (0, 0), sum -2.587172 vs 0.977817
  3246. Layer 3, Token 21 (model.layers.out comparison):
  3247. Original tensor sum: -37.525734
  3248. Converted tensor sum: 5.221979
  3249. Original tensor mean: -4.690717
  3250. Converted tensor mean: 0.652747
  3251. Mean difference: 6.04651690
  3252. Maximum pointwise difference: 12.38726807
  3253. Max difference location: (0, 0, 1)
  3254. Values at max diff - Original: -10.43804359, Converted: 1.94922423
  3255. Biggest difference in row (0, 0), sum -37.525734 vs 5.221979
  3256. Layer 4, Token 21 (model.layers.out comparison):
  3257. Original tensor sum: 4.066291
  3258. Converted tensor sum: 13.447447
  3259. Original tensor mean: 0.508286
  3260. Converted tensor mean: 1.680931
  3261. Mean difference: 5.62788296
  3262. Maximum pointwise difference: 14.04961491
  3263. Max difference location: (0, 0, 1)
  3264. Values at max diff - Original: -10.56051826, Converted: 3.48909688
  3265. Biggest difference in row (0, 0), sum 4.066291 vs 13.447447
  3266. Layer 5, Token 21 (model.layers.out comparison):
  3267. Original tensor sum: 22.123846
  3268. Converted tensor sum: 14.835675
  3269. Original tensor mean: 2.765481
  3270. Converted tensor mean: 1.854459
  3271. Mean difference: 5.25254917
  3272. Maximum pointwise difference: 11.90699482
  3273. Max difference location: (0, 0, 5)
  3274. Values at max diff - Original: 7.93798828, Converted: -3.96900630
  3275. Biggest difference in row (0, 0), sum 22.123846 vs 14.835675
  3276. Layer 6, Token 21 (model.layers.out comparison):
  3277. Original tensor sum: 22.319403
  3278. Converted tensor sum: 11.047790
  3279. Original tensor mean: 2.789925
  3280. Converted tensor mean: 1.380974
  3281. Mean difference: 5.50898457
  3282. Maximum pointwise difference: 13.00136471
  3283. Max difference location: (0, 0, 5)
  3284. Values at max diff - Original: 7.73285818, Converted: -5.26850653
  3285. Biggest difference in row (0, 0), sum 22.319403 vs 11.047790
  3286. Layer 7, Token 21 (model.layers.out comparison):
  3287. Original tensor sum: 55.420013
  3288. Converted tensor sum: 74.081238
  3289. Original tensor mean: 6.927502
  3290. Converted tensor mean: 9.260155
  3291. Mean difference: 5.90270138
  3292. Maximum pointwise difference: 12.46957588
  3293. Max difference location: (0, 0, 1)
  3294. Values at max diff - Original: -1.65777194, Converted: 10.81180382
  3295. Biggest difference in row (0, 0), sum 55.420013 vs 74.081238
  3296. Layer 8, Token 21 (model.layers.out comparison):
  3297. Original tensor sum: 39.922848
  3298. Converted tensor sum: 72.282196
  3299. Original tensor mean: 4.990356
  3300. Converted tensor mean: 9.035275
  3301. Mean difference: 6.12995577
  3302. Maximum pointwise difference: 13.29505730
  3303. Max difference location: (0, 0, 1)
  3304. Values at max diff - Original: -3.36732197, Converted: 9.92773533
  3305. Biggest difference in row (0, 0), sum 39.922848 vs 72.282196
  3306. Layer 9, Token 21 (model.layers.out comparison):
  3307. Original tensor sum: 29.193859
  3308. Converted tensor sum: 64.425896
  3309. Original tensor mean: 3.649232
  3310. Converted tensor mean: 8.053237
  3311. Mean difference: 6.22422409
  3312. Maximum pointwise difference: 14.81417084
  3313. Max difference location: (0, 0, 1)
  3314. Values at max diff - Original: -3.98170996, Converted: 10.83246040
  3315. Biggest difference in row (0, 0), sum 29.193859 vs 64.425896
  3316. Layer 10, Token 21 (model.layers.out comparison):
  3317. Original tensor sum: 23.706369
  3318. Converted tensor sum: 55.726307
  3319. Original tensor mean: 2.963296
  3320. Converted tensor mean: 6.965788
  3321. Mean difference: 6.04786444
  3322. Maximum pointwise difference: 14.39242363
  3323. Max difference location: (0, 0, 1)
  3324. Values at max diff - Original: -4.86538124, Converted: 9.52704239
  3325. Biggest difference in row (0, 0), sum 23.706369 vs 55.726307
  3326. Layer 11, Token 21 (model.layers.out comparison):
  3327. Original tensor sum: 123.990646
  3328. Converted tensor sum: 150.405350
  3329. Original tensor mean: 15.498831
  3330. Converted tensor mean: 18.800669
  3331. Mean difference: 5.61389732
  3332. Maximum pointwise difference: 14.70817947
  3333. Max difference location: (0, 0, 1)
  3334. Values at max diff - Original: 6.49463272, Converted: 21.20281219
  3335. Biggest difference in row (0, 0), sum 123.990646 vs 150.405350
  3336. Layer 12, Token 21 (model.layers.out comparison):
  3337. Original tensor sum: 120.701889
  3338. Converted tensor sum: 144.158798
  3339. Original tensor mean: 15.087736
  3340. Converted tensor mean: 18.019850
  3341. Mean difference: 5.24121237
  3342. Maximum pointwise difference: 14.31963730
  3343. Max difference location: (0, 0, 1)
  3344. Values at max diff - Original: 5.24581337, Converted: 19.56545067
  3345. Biggest difference in row (0, 0), sum 120.701889 vs 144.158798
  3346. Layer 13, Token 21 (model.layers.out comparison):
  3347. Original tensor sum: 114.196152
  3348. Converted tensor sum: 142.528229
  3349. Original tensor mean: 14.274519
  3350. Converted tensor mean: 17.816029
  3351. Mean difference: 5.27994871
  3352. Maximum pointwise difference: 14.28137684
  3353. Max difference location: (0, 0, 1)
  3354. Values at max diff - Original: 4.50468159, Converted: 18.78605843
  3355. Biggest difference in row (0, 0), sum 114.196152 vs 142.528229
  3356. Layer 14, Token 21 (model.layers.out comparison):
  3357. Original tensor sum: 109.654587
  3358. Converted tensor sum: 141.504807
  3359. Original tensor mean: 13.706823
  3360. Converted tensor mean: 17.688101
  3361. Mean difference: 5.26909733
  3362. Maximum pointwise difference: 15.04267311
  3363. Max difference location: (0, 0, 1)
  3364. Values at max diff - Original: 3.59476113, Converted: 18.63743401
  3365. Biggest difference in row (0, 0), sum 109.654587 vs 141.504807
  3366. Layer 15, Token 21 (model.layers.out comparison):
  3367. Original tensor sum: 258.799988
  3368. Converted tensor sum: 280.546570
  3369. Original tensor mean: 32.349998
  3370. Converted tensor mean: 35.068321
  3371. Mean difference: 5.38046169
  3372. Maximum pointwise difference: 13.79010963
  3373. Max difference location: (0, 0, 1)
  3374. Values at max diff - Original: 21.50290108, Converted: 35.29301071
  3375. Biggest difference in row (0, 0), sum 258.799988 vs 280.546570
  3376. Layer 0, Token 22 (model.layers.out comparison):
  3377. Original tensor sum: 22.958118
  3378. Converted tensor sum: -3.202849
  3379. Original tensor mean: 2.869765
  3380. Converted tensor mean: -0.400356
  3381. Mean difference: 4.91125917
  3382. Maximum pointwise difference: 8.36230850
  3383. Max difference location: (0, 0, 2)
  3384. Values at max diff - Original: 9.21100616, Converted: 0.84869760
  3385. Biggest difference in row (0, 0), sum 22.958118 vs -3.202849
  3386. Layer 1, Token 22 (model.layers.out comparison):
  3387. Original tensor sum: 25.125549
  3388. Converted tensor sum: -10.143456
  3389. Original tensor mean: 3.140694
  3390. Converted tensor mean: -1.267932
  3391. Mean difference: 5.89313412
  3392. Maximum pointwise difference: 9.59223843
  3393. Max difference location: (0, 0, 2)
  3394. Values at max diff - Original: 9.62790585, Converted: 0.03566782
  3395. Biggest difference in row (0, 0), sum 25.125549 vs -10.143456
  3396. Layer 2, Token 22 (model.layers.out comparison):
  3397. Original tensor sum: 27.315422
  3398. Converted tensor sum: -15.748328
  3399. Original tensor mean: 3.414428
  3400. Converted tensor mean: -1.968541
  3401. Mean difference: 8.97875500
  3402. Maximum pointwise difference: 14.55634785
  3403. Max difference location: (0, 0, 7)
  3404. Values at max diff - Original: 2.85774899, Converted: -11.69859886
  3405. Biggest difference in row (0, 0), sum 27.315422 vs -15.748328
  3406. Layer 3, Token 22 (model.layers.out comparison):
  3407. Original tensor sum: 65.650429
  3408. Converted tensor sum: -88.889626
  3409. Original tensor mean: 8.206304
  3410. Converted tensor mean: -11.111203
  3411. Mean difference: 19.31750679
  3412. Maximum pointwise difference: 27.03379250
  3413. Max difference location: (0, 0, 7)
  3414. Values at max diff - Original: 6.11478758, Converted: -20.91900444
  3415. Biggest difference in row (0, 0), sum 65.650429 vs -88.889626
  3416. Layer 4, Token 22 (model.layers.out comparison):
  3417. Original tensor sum: 61.788639
  3418. Converted tensor sum: -42.131989
  3419. Original tensor mean: 7.723580
  3420. Converted tensor mean: -5.266499
  3421. Mean difference: 12.99007797
  3422. Maximum pointwise difference: 18.81860924
  3423. Max difference location: (0, 0, 5)
  3424. Values at max diff - Original: 14.83176613, Converted: -3.98684263
  3425. Biggest difference in row (0, 0), sum 61.788639 vs -42.131989
  3426. Layer 5, Token 22 (model.layers.out comparison):
  3427. Original tensor sum: 57.004955
  3428. Converted tensor sum: 4.555844
  3429. Original tensor mean: 7.125619
  3430. Converted tensor mean: 0.569481
  3431. Mean difference: 8.63973427
  3432. Maximum pointwise difference: 19.13692093
  3433. Max difference location: (0, 0, 6)
  3434. Values at max diff - Original: 2.54869914, Converted: -16.58822250
  3435. Biggest difference in row (0, 0), sum 57.004955 vs 4.555844
  3436. Layer 6, Token 22 (model.layers.out comparison):
  3437. Original tensor sum: 54.908669
  3438. Converted tensor sum: -0.669161
  3439. Original tensor mean: 6.863584
  3440. Converted tensor mean: -0.083645
  3441. Mean difference: 8.70907402
  3442. Maximum pointwise difference: 18.54141235
  3443. Max difference location: (0, 0, 6)
  3444. Values at max diff - Original: 2.42641473, Converted: -16.11499786
  3445. Biggest difference in row (0, 0), sum 54.908669 vs -0.669161
  3446. Layer 7, Token 22 (model.layers.out comparison):
  3447. Original tensor sum: 125.605499
  3448. Converted tensor sum: -1.624224
  3449. Original tensor mean: 15.700687
  3450. Converted tensor mean: -0.203028
  3451. Mean difference: 15.90371513
  3452. Maximum pointwise difference: 27.27110672
  3453. Max difference location: (0, 0, 6)
  3454. Values at max diff - Original: 10.48501492, Converted: -16.78609276
  3455. Biggest difference in row (0, 0), sum 125.605499 vs -1.624224
  3456. Layer 8, Token 22 (model.layers.out comparison):
  3457. Original tensor sum: 109.340508
  3458. Converted tensor sum: -1.809371
  3459. Original tensor mean: 13.667563
  3460. Converted tensor mean: -0.226171
  3461. Mean difference: 14.19305420
  3462. Maximum pointwise difference: 24.19651794
  3463. Max difference location: (0, 0, 6)
  3464. Values at max diff - Original: 6.68867207, Converted: -17.50784492
  3465. Biggest difference in row (0, 0), sum 109.340508 vs -1.809371
  3466. Layer 9, Token 22 (model.layers.out comparison):
  3467. Original tensor sum: 93.036400
  3468. Converted tensor sum: -10.185041
  3469. Original tensor mean: 11.629550
  3470. Converted tensor mean: -1.273130
  3471. Mean difference: 13.22967815
  3472. Maximum pointwise difference: 22.35823822
  3473. Max difference location: (0, 0, 6)
  3474. Values at max diff - Original: 2.86865139, Converted: -19.48958588
  3475. Biggest difference in row (0, 0), sum 93.036400 vs -10.185041
  3476. Layer 10, Token 22 (model.layers.out comparison):
  3477. Original tensor sum: 85.756668
  3478. Converted tensor sum: -2.302891
  3479. Original tensor mean: 10.719584
  3480. Converted tensor mean: -0.287861
  3481. Mean difference: 11.49190331
  3482. Maximum pointwise difference: 20.63401985
  3483. Max difference location: (0, 0, 1)
  3484. Values at max diff - Original: 19.00829315, Converted: -1.62572634
  3485. Biggest difference in row (0, 0), sum 85.756668 vs -2.302891
  3486. Layer 11, Token 22 (model.layers.out comparison):
  3487. Original tensor sum: 182.162292
  3488. Converted tensor sum: -8.586711
  3489. Original tensor mean: 22.770287
  3490. Converted tensor mean: -1.073339
  3491. Mean difference: 23.84362602
  3492. Maximum pointwise difference: 34.19173050
  3493. Max difference location: (0, 0, 6)
  3494. Values at max diff - Original: 13.77398682, Converted: -20.41774368
  3495. Biggest difference in row (0, 0), sum 182.162292 vs -8.586711
  3496. Layer 12, Token 22 (model.layers.out comparison):
  3497. Original tensor sum: 182.096252
  3498. Converted tensor sum: -6.677206
  3499. Original tensor mean: 22.762032
  3500. Converted tensor mean: -0.834651
  3501. Mean difference: 23.59668159
  3502. Maximum pointwise difference: 35.20670319
  3503. Max difference location: (0, 0, 6)
  3504. Values at max diff - Original: 14.20073891, Converted: -21.00596619
  3505. Biggest difference in row (0, 0), sum 182.096252 vs -6.677206
  3506. Layer 13, Token 22 (model.layers.out comparison):
  3507. Original tensor sum: 176.400360
  3508. Converted tensor sum: -0.142300
  3509. Original tensor mean: 22.050045
  3510. Converted tensor mean: -0.017787
  3511. Mean difference: 22.06783295
  3512. Maximum pointwise difference: 34.37791824
  3513. Max difference location: (0, 0, 6)
  3514. Values at max diff - Original: 13.55050278, Converted: -20.82741547
  3515. Biggest difference in row (0, 0), sum 176.400360 vs -0.142300
  3516. Layer 14, Token 22 (model.layers.out comparison):
  3517. Original tensor sum: 169.308212
  3518. Converted tensor sum: 22.573196
  3519. Original tensor mean: 21.163527
  3520. Converted tensor mean: 2.821650
  3521. Mean difference: 18.34187508
  3522. Maximum pointwise difference: 31.75983810
  3523. Max difference location: (0, 0, 6)
  3524. Values at max diff - Original: 11.94197941, Converted: -19.81785965
  3525. Biggest difference in row (0, 0), sum 169.308212 vs 22.573196
  3526. Layer 15, Token 22 (model.layers.out comparison):
  3527. Original tensor sum: 321.080658
  3528. Converted tensor sum: 136.787018
  3529. Original tensor mean: 40.135082
  3530. Converted tensor mean: 17.098377
  3531. Mean difference: 23.03670502
  3532. Maximum pointwise difference: 37.83760452
  3533. Max difference location: (0, 0, 6)
  3534. Values at max diff - Original: 31.89689064, Converted: -5.94071388
  3535. Biggest difference in row (0, 0), sum 321.080658 vs 136.787018
  3536. Layer 0, Token 23 (model.layers.out comparison):
  3537. Original tensor sum: 3.588341
  3538. Converted tensor sum: 9.359616
  3539. Original tensor mean: 0.448543
  3540. Converted tensor mean: 1.169952
  3541. Mean difference: 3.70246077
  3542. Maximum pointwise difference: 5.65140629
  3543. Max difference location: (0, 0, 4)
  3544. Values at max diff - Original: -1.32952428, Converted: 4.32188225
  3545. Biggest difference in row (0, 0), sum 3.588341 vs 9.359616
  3546. Layer 1, Token 23 (model.layers.out comparison):
  3547. Original tensor sum: -13.513486
  3548. Converted tensor sum: 3.000220
  3549. Original tensor mean: -1.689186
  3550. Converted tensor mean: 0.375028
  3551. Mean difference: 3.49640799
  3552. Maximum pointwise difference: 8.52665997
  3553. Max difference location: (0, 0, 4)
  3554. Values at max diff - Original: -4.16102409, Converted: 4.36563587
  3555. Biggest difference in row (0, 0), sum -13.513486 vs 3.000220
  3556. Layer 2, Token 23 (model.layers.out comparison):
  3557. Original tensor sum: -19.782562
  3558. Converted tensor sum: 4.253428
  3559. Original tensor mean: -2.472820
  3560. Converted tensor mean: 0.531678
  3561. Mean difference: 5.22110939
  3562. Maximum pointwise difference: 11.62318039
  3563. Max difference location: (0, 0, 5)
  3564. Values at max diff - Original: -9.56802559, Converted: 2.05515456
  3565. Biggest difference in row (0, 0), sum -19.782562 vs 4.253428
  3566. Layer 3, Token 23 (model.layers.out comparison):
  3567. Original tensor sum: -117.794266
  3568. Converted tensor sum: 14.072861
  3569. Original tensor mean: -14.724283
  3570. Converted tensor mean: 1.759108
  3571. Mean difference: 16.48339081
  3572. Maximum pointwise difference: 22.75023079
  3573. Max difference location: (0, 0, 4)
  3574. Values at max diff - Original: -16.32844543, Converted: 6.42178583
  3575. Biggest difference in row (0, 0), sum -117.794266 vs 14.072861
  3576. Layer 4, Token 23 (model.layers.out comparison):
  3577. Original tensor sum: -73.092270
  3578. Converted tensor sum: 6.691208
  3579. Original tensor mean: -9.136534
  3580. Converted tensor mean: 0.836401
  3581. Mean difference: 10.72612858
  3582. Maximum pointwise difference: 19.55576324
  3583. Max difference location: (0, 0, 5)
  3584. Values at max diff - Original: -16.38935280, Converted: 3.16641092
  3585. Biggest difference in row (0, 0), sum -73.092270 vs 6.691208
  3586. Layer 5, Token 23 (model.layers.out comparison):
  3587. Original tensor sum: -37.015450
  3588. Converted tensor sum: 8.681388
  3589. Original tensor mean: -4.626931
  3590. Converted tensor mean: 1.085173
  3591. Mean difference: 7.61082363
  3592. Maximum pointwise difference: 20.55440712
  3593. Max difference location: (0, 0, 3)
  3594. Values at max diff - Original: -13.93057537, Converted: 6.62383223
  3595. Biggest difference in row (0, 0), sum -37.015450 vs 8.681388
  3596. Layer 6, Token 23 (model.layers.out comparison):
  3597. Original tensor sum: -90.333237
  3598. Converted tensor sum: 7.396842
  3599. Original tensor mean: -11.291655
  3600. Converted tensor mean: 0.924605
  3601. Mean difference: 12.82605839
  3602. Maximum pointwise difference: 34.69086456
  3603. Max difference location: (0, 0, 3)
  3604. Values at max diff - Original: -27.39507294, Converted: 7.29579258
  3605. Biggest difference in row (0, 0), sum -90.333237 vs 7.396842
  3606. Layer 7, Token 23 (model.layers.out comparison):
  3607. Original tensor sum: -214.526337
  3608. Converted tensor sum: 60.269241
  3609. Original tensor mean: -26.815792
  3610. Converted tensor mean: 7.533655
  3611. Mean difference: 34.34944916
  3612. Maximum pointwise difference: 55.60475159
  3613. Max difference location: (0, 0, 3)
  3614. Values at max diff - Original: -42.02355576, Converted: 13.58119678
  3615. Biggest difference in row (0, 0), sum -214.526337 vs 60.269241
  3616. Layer 8, Token 23 (model.layers.out comparison):
  3617. Original tensor sum: -138.238464
  3618. Converted tensor sum: 48.862061
  3619. Original tensor mean: -17.279808
  3620. Converted tensor mean: 6.107758
  3621. Mean difference: 23.38756561
  3622. Maximum pointwise difference: 37.09150314
  3623. Max difference location: (0, 0, 3)
  3624. Values at max diff - Original: -25.42422676, Converted: 11.66727638
  3625. Biggest difference in row (0, 0), sum -138.238464 vs 48.862061
  3626. Layer 9, Token 23 (model.layers.out comparison):
  3627. Original tensor sum: -129.366013
  3628. Converted tensor sum: 32.791050
  3629. Original tensor mean: -16.170752
  3630. Converted tensor mean: 4.098881
  3631. Mean difference: 20.26963234
  3632. Maximum pointwise difference: 31.74017334
  3633. Max difference location: (0, 0, 3)
  3634. Values at max diff - Original: -22.80648041, Converted: 8.93369293
  3635. Biggest difference in row (0, 0), sum -129.366013 vs 32.791050
  3636. Layer 10, Token 23 (model.layers.out comparison):
  3637. Original tensor sum: -112.076103
  3638. Converted tensor sum: 33.542336
  3639. Original tensor mean: -14.009513
  3640. Converted tensor mean: 4.192792
  3641. Mean difference: 18.20230484
  3642. Maximum pointwise difference: 30.71049118
  3643. Max difference location: (0, 0, 3)
  3644. Values at max diff - Original: -22.22323608, Converted: 8.48725605
  3645. Biggest difference in row (0, 0), sum -112.076103 vs 33.542336
  3646. Layer 11, Token 23 (model.layers.out comparison):
  3647. Original tensor sum: -392.294312
  3648. Converted tensor sum: 130.177963
  3649. Original tensor mean: -49.036789
  3650. Converted tensor mean: 16.272245
  3651. Mean difference: 65.30903625
  3652. Maximum pointwise difference: 80.34357452
  3653. Max difference location: (0, 0, 3)
  3654. Values at max diff - Original: -59.32800293, Converted: 21.01557350
  3655. Biggest difference in row (0, 0), sum -392.294312 vs 130.177963
  3656. Layer 12, Token 23 (model.layers.out comparison):
  3657. Original tensor sum: -416.741821
  3658. Converted tensor sum: 126.312363
  3659. Original tensor mean: -52.092728
  3660. Converted tensor mean: 15.789045
  3661. Mean difference: 67.88177490
  3662. Maximum pointwise difference: 87.29133606
  3663. Max difference location: (0, 0, 3)
  3664. Values at max diff - Original: -65.95119476, Converted: 21.34013939
  3665. Biggest difference in row (0, 0), sum -416.741821 vs 126.312363
  3666. Layer 13, Token 23 (model.layers.out comparison):
  3667. Original tensor sum: -420.622223
  3668. Converted tensor sum: 122.472458
  3669. Original tensor mean: -52.577778
  3670. Converted tensor mean: 15.309057
  3671. Mean difference: 67.88684082
  3672. Maximum pointwise difference: 89.02587891
  3673. Max difference location: (0, 0, 3)
  3674. Values at max diff - Original: -68.22624969, Converted: 20.79962921
  3675. Biggest difference in row (0, 0), sum -420.622223 vs 122.472458
  3676. Layer 14, Token 23 (model.layers.out comparison):
  3677. Original tensor sum: -398.408966
  3678. Converted tensor sum: 120.881279
  3679. Original tensor mean: -49.801121
  3680. Converted tensor mean: 15.110160
  3681. Mean difference: 64.91127777
  3682. Maximum pointwise difference: 91.32544708
  3683. Max difference location: (0, 0, 3)
  3684. Values at max diff - Original: -69.91543579, Converted: 21.41001320
  3685. Biggest difference in row (0, 0), sum -398.408966 vs 120.881279
  3686. Layer 15, Token 23 (model.layers.out comparison):
  3687. Original tensor sum: -754.637085
  3688. Converted tensor sum: 262.993530
  3689. Original tensor mean: -94.329636
  3690. Converted tensor mean: 32.874191
  3691. Mean difference: 127.20383453
  3692. Maximum pointwise difference: 157.75305176
  3693. Max difference location: (0, 0, 3)
  3694. Values at max diff - Original: -119.83902740, Converted: 37.91403198
  3695. Biggest difference in row (0, 0), sum -754.637085 vs 262.993530
  3696. Layer 0, Token 24 (model.layers.out comparison):
  3697. Original tensor sum: 14.859251
  3698. Converted tensor sum: 2.731961
  3699. Original tensor mean: 1.857406
  3700. Converted tensor mean: 0.341495
  3701. Mean difference: 4.21605587
  3702. Maximum pointwise difference: 9.80887794
  3703. Max difference location: (0, 0, 2)
  3704. Values at max diff - Original: 3.86449504, Converted: -5.94438314
  3705. Biggest difference in row (0, 0), sum 14.859251 vs 2.731961
  3706. Layer 1, Token 24 (model.layers.out comparison):
  3707. Original tensor sum: 13.986740
  3708. Converted tensor sum: -2.697716
  3709. Original tensor mean: 1.748343
  3710. Converted tensor mean: -0.337215
  3711. Mean difference: 4.70033360
  3712. Maximum pointwise difference: 10.86390495
  3713. Max difference location: (0, 0, 2)
  3714. Values at max diff - Original: 3.54197407, Converted: -7.32193136
  3715. Biggest difference in row (0, 0), sum 13.986740 vs -2.697716
  3716. Layer 2, Token 24 (model.layers.out comparison):
  3717. Original tensor sum: 13.856454
  3718. Converted tensor sum: -1.915652
  3719. Original tensor mean: 1.732057
  3720. Converted tensor mean: -0.239456
  3721. Mean difference: 5.10369968
  3722. Maximum pointwise difference: 13.13724899
  3723. Max difference location: (0, 0, 2)
  3724. Values at max diff - Original: 3.00663447, Converted: -10.13061428
  3725. Biggest difference in row (0, 0), sum 13.856454 vs -1.915652
  3726. Layer 3, Token 24 (model.layers.out comparison):
  3727. Original tensor sum: 63.979485
  3728. Converted tensor sum: -50.051231
  3729. Original tensor mean: 7.997436
  3730. Converted tensor mean: -6.256404
  3731. Mean difference: 14.25383949
  3732. Maximum pointwise difference: 25.59371948
  3733. Max difference location: (0, 0, 2)
  3734. Values at max diff - Original: 9.45009327, Converted: -16.14362717
  3735. Biggest difference in row (0, 0), sum 63.979485 vs -50.051231
  3736. Layer 4, Token 24 (model.layers.out comparison):
  3737. Original tensor sum: 60.174347
  3738. Converted tensor sum: -64.423790
  3739. Original tensor mean: 7.521793
  3740. Converted tensor mean: -8.052974
  3741. Mean difference: 15.57476616
  3742. Maximum pointwise difference: 27.42375755
  3743. Max difference location: (0, 0, 2)
  3744. Values at max diff - Original: 8.99039078, Converted: -18.43336678
  3745. Biggest difference in row (0, 0), sum 60.174347 vs -64.423790
  3746. Layer 5, Token 24 (model.layers.out comparison):
  3747. Original tensor sum: 53.195156
  3748. Converted tensor sum: -88.183350
  3749. Original tensor mean: 6.649395
  3750. Converted tensor mean: -11.022919
  3751. Mean difference: 17.67231369
  3752. Maximum pointwise difference: 35.18456650
  3753. Max difference location: (0, 0, 2)
  3754. Values at max diff - Original: 7.48332596, Converted: -27.70124054
  3755. Biggest difference in row (0, 0), sum 53.195156 vs -88.183350
  3756. Layer 6, Token 24 (model.layers.out comparison):
  3757. Original tensor sum: 55.262775
  3758. Converted tensor sum: -106.113434
  3759. Original tensor mean: 6.907847
  3760. Converted tensor mean: -13.264179
  3761. Mean difference: 20.17202759
  3762. Maximum pointwise difference: 40.46305084
  3763. Max difference location: (0, 0, 2)
  3764. Values at max diff - Original: 8.41111183, Converted: -32.05193710
  3765. Biggest difference in row (0, 0), sum 55.262775 vs -106.113434
  3766. Layer 7, Token 24 (model.layers.out comparison):
  3767. Original tensor sum: 120.454941
  3768. Converted tensor sum: -239.645325
  3769. Original tensor mean: 15.056868
  3770. Converted tensor mean: -29.955666
  3771. Mean difference: 45.01253128
  3772. Maximum pointwise difference: 65.79338074
  3773. Max difference location: (0, 0, 2)
  3774. Values at max diff - Original: 14.88038158, Converted: -50.91299820
  3775. Biggest difference in row (0, 0), sum 120.454941 vs -239.645325
  3776. Layer 8, Token 24 (model.layers.out comparison):
  3777. Original tensor sum: 103.648430
  3778. Converted tensor sum: -223.958084
  3779. Original tensor mean: 12.956054
  3780. Converted tensor mean: -27.994761
  3781. Mean difference: 40.95081329
  3782. Maximum pointwise difference: 71.07021332
  3783. Max difference location: (0, 0, 2)
  3784. Values at max diff - Original: 13.01342583, Converted: -58.05678558
  3785. Biggest difference in row (0, 0), sum 103.648430 vs -223.958084
  3786. Layer 9, Token 24 (model.layers.out comparison):
  3787. Original tensor sum: 90.361565
  3788. Converted tensor sum: -216.935654
  3789. Original tensor mean: 11.295196
  3790. Converted tensor mean: -27.116957
  3791. Mean difference: 38.41215515
  3792. Maximum pointwise difference: 69.46690369
  3793. Max difference location: (0, 0, 2)
  3794. Values at max diff - Original: 9.60147953, Converted: -59.86542511
  3795. Biggest difference in row (0, 0), sum 90.361565 vs -216.935654
  3796. Layer 10, Token 24 (model.layers.out comparison):
  3797. Original tensor sum: 83.880753
  3798. Converted tensor sum: -215.275970
  3799. Original tensor mean: 10.485094
  3800. Converted tensor mean: -26.909496
  3801. Mean difference: 37.39459229
  3802. Maximum pointwise difference: 70.35929108
  3803. Max difference location: (0, 0, 2)
  3804. Values at max diff - Original: 8.32141781, Converted: -62.03787613
  3805. Biggest difference in row (0, 0), sum 83.880753 vs -215.275970
  3806. Layer 11, Token 24 (model.layers.out comparison):
  3807. Original tensor sum: 169.893204
  3808. Converted tensor sum: -521.842712
  3809. Original tensor mean: 21.236650
  3810. Converted tensor mean: -65.230339
  3811. Mean difference: 86.46699524
  3812. Maximum pointwise difference: 124.57461548
  3813. Max difference location: (0, 0, 2)
  3814. Values at max diff - Original: 19.84806633, Converted: -104.72654724
  3815. Biggest difference in row (0, 0), sum 169.893204 vs -521.842712
  3816. Layer 12, Token 24 (model.layers.out comparison):
  3817. Original tensor sum: 170.650391
  3818. Converted tensor sum: -527.495605
  3819. Original tensor mean: 21.331299
  3820. Converted tensor mean: -65.936951
  3821. Mean difference: 87.26824951
  3822. Maximum pointwise difference: 124.01423645
  3823. Max difference location: (0, 0, 2)
  3824. Values at max diff - Original: 20.41718483, Converted: -103.59705353
  3825. Biggest difference in row (0, 0), sum 170.650391 vs -527.495605
  3826. Layer 13, Token 24 (model.layers.out comparison):
  3827. Original tensor sum: 167.707260
  3828. Converted tensor sum: -525.824341
  3829. Original tensor mean: 20.963408
  3830. Converted tensor mean: -65.728043
  3831. Mean difference: 86.69145203
  3832. Maximum pointwise difference: 120.31568909
  3833. Max difference location: (0, 0, 2)
  3834. Values at max diff - Original: 18.97763062, Converted: -101.33805847
  3835. Biggest difference in row (0, 0), sum 167.707260 vs -525.824341
  3836. Layer 14, Token 24 (model.layers.out comparison):
  3837. Original tensor sum: 160.910034
  3838. Converted tensor sum: -562.698975
  3839. Original tensor mean: 20.113754
  3840. Converted tensor mean: -70.337372
  3841. Mean difference: 90.45112610
  3842. Maximum pointwise difference: 127.80590057
  3843. Max difference location: (0, 0, 2)
  3844. Values at max diff - Original: 17.37784767, Converted: -110.42805481
  3845. Biggest difference in row (0, 0), sum 160.910034 vs -562.698975
  3846. Layer 15, Token 24 (model.layers.out comparison):
  3847. Original tensor sum: 306.123810
  3848. Converted tensor sum: -931.621094
  3849. Original tensor mean: 38.265476
  3850. Converted tensor mean: -116.452637
  3851. Mean difference: 154.71810913
  3852. Maximum pointwise difference: 176.81520081
  3853. Max difference location: (0, 0, 2)
  3854. Values at max diff - Original: 29.99453545, Converted: -146.82066345
  3855. Biggest difference in row (0, 0), sum 306.123810 vs -931.621094
  3856. Layer 0, Token 25 (model.layers.out comparison):
  3857. Original tensor sum: -6.641135
  3858. Converted tensor sum: -3.933383
  3859. Original tensor mean: -0.830142
  3860. Converted tensor mean: -0.491673
  3861. Mean difference: 3.03462601
  3862. Maximum pointwise difference: 5.75030708
  3863. Max difference location: (0, 0, 7)
  3864. Values at max diff - Original: -6.01051331, Converted: -0.26020634
  3865. Biggest difference in row (0, 0), sum -6.641135 vs -3.933383
  3866. Layer 1, Token 25 (model.layers.out comparison):
  3867. Original tensor sum: -1.642994
  3868. Converted tensor sum: -11.347046
  3869. Original tensor mean: -0.205374
  3870. Converted tensor mean: -1.418381
  3871. Mean difference: 2.82665110
  3872. Maximum pointwise difference: 5.44076443
  3873. Max difference location: (0, 0, 3)
  3874. Values at max diff - Original: -3.67477202, Converted: -9.11553669
  3875. Biggest difference in row (0, 0), sum -1.642994 vs -11.347046
  3876. Layer 2, Token 25 (model.layers.out comparison):
  3877. Original tensor sum: 6.404377
  3878. Converted tensor sum: -14.681939
  3879. Original tensor mean: 0.800547
  3880. Converted tensor mean: -1.835242
  3881. Mean difference: 3.35868859
  3882. Maximum pointwise difference: 7.97232580
  3883. Max difference location: (0, 0, 6)
  3884. Values at max diff - Original: 5.46229649, Converted: -2.51002932
  3885. Biggest difference in row (0, 0), sum 6.404377 vs -14.681939
  3886. Layer 3, Token 25 (model.layers.out comparison):
  3887. Original tensor sum: 73.178505
  3888. Converted tensor sum: -57.235046
  3889. Original tensor mean: 9.147313
  3890. Converted tensor mean: -7.154381
  3891. Mean difference: 16.30169487
  3892. Maximum pointwise difference: 20.31940651
  3893. Max difference location: (0, 0, 3)
  3894. Values at max diff - Original: 6.96401119, Converted: -13.35539532
  3895. Biggest difference in row (0, 0), sum 73.178505 vs -57.235046
  3896. Layer 4, Token 25 (model.layers.out comparison):
  3897. Original tensor sum: 65.662933
  3898. Converted tensor sum: -75.145912
  3899. Original tensor mean: 8.207867
  3900. Converted tensor mean: -9.393239
  3901. Mean difference: 17.60110474
  3902. Maximum pointwise difference: 25.96934509
  3903. Max difference location: (0, 0, 3)
  3904. Values at max diff - Original: 5.62515926, Converted: -20.34418488
  3905. Biggest difference in row (0, 0), sum 65.662933 vs -75.145912
  3906. Layer 5, Token 25 (model.layers.out comparison):
  3907. Original tensor sum: 54.107101
  3908. Converted tensor sum: -105.733917
  3909. Original tensor mean: 6.763388
  3910. Converted tensor mean: -13.216740
  3911. Mean difference: 19.98012924
  3912. Maximum pointwise difference: 28.99731064
  3913. Max difference location: (0, 0, 4)
  3914. Values at max diff - Original: 10.49883652, Converted: -18.49847412
  3915. Biggest difference in row (0, 0), sum 54.107101 vs -105.733917
  3916. Layer 6, Token 25 (model.layers.out comparison):
  3917. Original tensor sum: 48.177361
  3918. Converted tensor sum: -134.772308
  3919. Original tensor mean: 6.022170
  3920. Converted tensor mean: -16.846539
  3921. Mean difference: 22.86870766
  3922. Maximum pointwise difference: 36.34035110
  3923. Max difference location: (0, 0, 3)
  3924. Values at max diff - Original: 2.22752476, Converted: -34.11282730
  3925. Biggest difference in row (0, 0), sum 48.177361 vs -134.772308
  3926. Layer 7, Token 25 (model.layers.out comparison):
  3927. Original tensor sum: 111.839172
  3928. Converted tensor sum: -277.301056
  3929. Original tensor mean: 13.979897
  3930. Converted tensor mean: -34.662632
  3931. Mean difference: 48.64252853
  3932. Maximum pointwise difference: 62.89208221
  3933. Max difference location: (0, 0, 3)
  3934. Values at max diff - Original: 9.78997040, Converted: -53.10211182
  3935. Biggest difference in row (0, 0), sum 111.839172 vs -277.301056
  3936. Layer 8, Token 25 (model.layers.out comparison):
  3937. Original tensor sum: 104.861267
  3938. Converted tensor sum: -286.217560
  3939. Original tensor mean: 13.107658
  3940. Converted tensor mean: -35.777195
  3941. Mean difference: 48.88484955
  3942. Maximum pointwise difference: 65.30915833
  3943. Max difference location: (0, 0, 4)
  3944. Values at max diff - Original: 20.24993896, Converted: -45.05921936
  3945. Biggest difference in row (0, 0), sum 104.861267 vs -286.217560
  3946. Layer 9, Token 25 (model.layers.out comparison):
  3947. Original tensor sum: 96.630295
  3948. Converted tensor sum: -313.393005
  3949. Original tensor mean: 12.078787
  3950. Converted tensor mean: -39.174126
  3951. Mean difference: 51.25291061
  3952. Maximum pointwise difference: 67.83577728
  3953. Max difference location: (0, 0, 4)
  3954. Values at max diff - Original: 19.77431297, Converted: -48.06146622
  3955. Biggest difference in row (0, 0), sum 96.630295 vs -313.393005
  3956. Layer 10, Token 25 (model.layers.out comparison):
  3957. Original tensor sum: 89.098160
  3958. Converted tensor sum: -316.188721
  3959. Original tensor mean: 11.137270
  3960. Converted tensor mean: -39.523590
  3961. Mean difference: 50.66085815
  3962. Maximum pointwise difference: 63.01490784
  3963. Max difference location: (0, 0, 4)
  3964. Values at max diff - Original: 18.63522339, Converted: -44.37968445
  3965. Biggest difference in row (0, 0), sum 89.098160 vs -316.188721
  3966. Layer 11, Token 25 (model.layers.out comparison):
  3967. Original tensor sum: 183.329193
  3968. Converted tensor sum: -640.859741
  3969. Original tensor mean: 22.916149
  3970. Converted tensor mean: -80.107468
  3971. Mean difference: 103.02362061
  3972. Maximum pointwise difference: 123.61917114
  3973. Max difference location: (0, 0, 0)
  3974. Values at max diff - Original: 28.08130074, Converted: -95.53787231
  3975. Biggest difference in row (0, 0), sum 183.329193 vs -640.859741
  3976. Layer 12, Token 25 (model.layers.out comparison):
  3977. Original tensor sum: 183.012512
  3978. Converted tensor sum: -647.243774
  3979. Original tensor mean: 22.876564
  3980. Converted tensor mean: -80.905472
  3981. Mean difference: 103.78203583
  3982. Maximum pointwise difference: 121.95301819
  3983. Max difference location: (0, 0, 0)
  3984. Values at max diff - Original: 28.78862381, Converted: -93.16439056
  3985. Biggest difference in row (0, 0), sum 183.012512 vs -647.243774
  3986. Layer 13, Token 25 (model.layers.out comparison):
  3987. Original tensor sum: 179.038055
  3988. Converted tensor sum: -675.284363
  3989. Original tensor mean: 22.379757
  3990. Converted tensor mean: -84.410545
  3991. Mean difference: 106.79029846
  3992. Maximum pointwise difference: 124.18766785
  3993. Max difference location: (0, 0, 0)
  3994. Values at max diff - Original: 29.24967384, Converted: -94.93799591
  3995. Biggest difference in row (0, 0), sum 179.038055 vs -675.284363
  3996. Layer 14, Token 25 (model.layers.out comparison):
  3997. Original tensor sum: 177.600830
  3998. Converted tensor sum: -653.687622
  3999. Original tensor mean: 22.200104
  4000. Converted tensor mean: -81.710953
  4001. Mean difference: 103.91105652
  4002. Maximum pointwise difference: 120.82553864
  4003. Max difference location: (0, 0, 3)
  4004. Values at max diff - Original: 21.10656929, Converted: -99.71897125
  4005. Biggest difference in row (0, 0), sum 177.600830 vs -653.687622
  4006. Layer 15, Token 25 (model.layers.out comparison):
  4007. Original tensor sum: 323.013031
  4008. Converted tensor sum: -1030.671143
  4009. Original tensor mean: 40.376629
  4010. Converted tensor mean: -128.833893
  4011. Mean difference: 169.21054077
  4012. Maximum pointwise difference: 193.25675964
  4013. Max difference location: (0, 0, 0)
  4014. Values at max diff - Original: 50.33515549, Converted: -142.92160034
  4015. Biggest difference in row (0, 0), sum 323.013031 vs -1030.671143
  4016. Layer 0, Token 26 (model.layers.out comparison):
  4017. Original tensor sum: 65.941025
  4018. Converted tensor sum: -21.309677
  4019. Original tensor mean: 8.242628
  4020. Converted tensor mean: -2.663710
  4021. Mean difference: 10.92460823
  4022. Maximum pointwise difference: 22.60500336
  4023. Max difference location: (0, 0, 4)
  4024. Values at max diff - Original: 19.03843307, Converted: -3.56657028
  4025. Biggest difference in row (0, 0), sum 65.941025 vs -21.309677
  4026. Layer 1, Token 26 (model.layers.out comparison):
  4027. Original tensor sum: 52.076649
  4028. Converted tensor sum: -57.925156
  4029. Original tensor mean: 6.509581
  4030. Converted tensor mean: -7.240644
  4031. Mean difference: 14.23825073
  4032. Maximum pointwise difference: 19.17949104
  4033. Max difference location: (0, 0, 3)
  4034. Values at max diff - Original: 5.37531137, Converted: -13.80417919
  4035. Biggest difference in row (0, 0), sum 52.076649 vs -57.925156
  4036. Layer 2, Token 26 (model.layers.out comparison):
  4037. Original tensor sum: 51.231728
  4038. Converted tensor sum: -47.847797
  4039. Original tensor mean: 6.403966
  4040. Converted tensor mean: -5.980975
  4041. Mean difference: 12.38494110
  4042. Maximum pointwise difference: 23.60085297
  4043. Max difference location: (0, 0, 6)
  4044. Values at max diff - Original: 9.02445030, Converted: -14.57640362
  4045. Biggest difference in row (0, 0), sum 51.231728 vs -47.847797
  4046. Layer 3, Token 26 (model.layers.out comparison):
  4047. Original tensor sum: 107.302612
  4048. Converted tensor sum: -173.292923
  4049. Original tensor mean: 13.412827
  4050. Converted tensor mean: -21.661615
  4051. Mean difference: 35.07444000
  4052. Maximum pointwise difference: 43.60850143
  4053. Max difference location: (0, 0, 6)
  4054. Values at max diff - Original: 14.85190392, Converted: -28.75659752
  4055. Biggest difference in row (0, 0), sum 107.302612 vs -173.292923
  4056. Layer 4, Token 26 (model.layers.out comparison):
  4057. Original tensor sum: 97.273697
  4058. Converted tensor sum: -182.550171
  4059. Original tensor mean: 12.159212
  4060. Converted tensor mean: -22.818771
  4061. Mean difference: 34.97798157
  4062. Maximum pointwise difference: 46.59681320
  4063. Max difference location: (0, 0, 6)
  4064. Values at max diff - Original: 14.26772594, Converted: -32.32908630
  4065. Biggest difference in row (0, 0), sum 97.273697 vs -182.550171
  4066. Layer 5, Token 26 (model.layers.out comparison):
  4067. Original tensor sum: 85.259064
  4068. Converted tensor sum: -172.859528
  4069. Original tensor mean: 10.657383
  4070. Converted tensor mean: -21.607441
  4071. Mean difference: 32.26482391
  4072. Maximum pointwise difference: 44.72983170
  4073. Max difference location: (0, 0, 6)
  4074. Values at max diff - Original: 13.95336819, Converted: -30.77646255
  4075. Biggest difference in row (0, 0), sum 85.259064 vs -172.859528
  4076. Layer 6, Token 26 (model.layers.out comparison):
  4077. Original tensor sum: 87.096161
  4078. Converted tensor sum: -208.315033
  4079. Original tensor mean: 10.887020
  4080. Converted tensor mean: -26.039379
  4081. Mean difference: 36.92639923
  4082. Maximum pointwise difference: 45.54611206
  4083. Max difference location: (0, 0, 6)
  4084. Values at max diff - Original: 14.15797043, Converted: -31.38814354
  4085. Biggest difference in row (0, 0), sum 87.096161 vs -208.315033
  4086. Layer 7, Token 26 (model.layers.out comparison):
  4087. Original tensor sum: 160.905060
  4088. Converted tensor sum: -356.607910
  4089. Original tensor mean: 20.113132
  4090. Converted tensor mean: -44.575989
  4091. Mean difference: 64.68911743
  4092. Maximum pointwise difference: 73.27433014
  4093. Max difference location: (0, 0, 3)
  4094. Values at max diff - Original: 19.29874229, Converted: -53.97558594
  4095. Biggest difference in row (0, 0), sum 160.905060 vs -356.607910
  4096. Layer 8, Token 26 (model.layers.out comparison):
  4097. Original tensor sum: 147.546188
  4098. Converted tensor sum: -372.627655
  4099. Original tensor mean: 18.443274
  4100. Converted tensor mean: -46.578457
  4101. Mean difference: 65.02172852
  4102. Maximum pointwise difference: 75.06597900
  4103. Max difference location: (0, 0, 3)
  4104. Values at max diff - Original: 17.32047462, Converted: -57.74550629
  4105. Biggest difference in row (0, 0), sum 147.546188 vs -372.627655
  4106. Layer 9, Token 26 (model.layers.out comparison):
  4107. Original tensor sum: 142.108231
  4108. Converted tensor sum: -384.533997
  4109. Original tensor mean: 17.763529
  4110. Converted tensor mean: -48.066750
  4111. Mean difference: 65.83027649
  4112. Maximum pointwise difference: 80.39822388
  4113. Max difference location: (0, 0, 0)
  4114. Values at max diff - Original: 9.57865334, Converted: -70.81957245
  4115. Biggest difference in row (0, 0), sum 142.108231 vs -384.533997
  4116. Layer 10, Token 26 (model.layers.out comparison):
  4117. Original tensor sum: 136.597595
  4118. Converted tensor sum: -406.001617
  4119. Original tensor mean: 17.074699
  4120. Converted tensor mean: -50.750202
  4121. Mean difference: 67.82489777
  4122. Maximum pointwise difference: 83.06503296
  4123. Max difference location: (0, 0, 3)
  4124. Values at max diff - Original: 16.25280952, Converted: -66.81222534
  4125. Biggest difference in row (0, 0), sum 136.597595 vs -406.001617
  4126. Layer 11, Token 26 (model.layers.out comparison):
  4127. Original tensor sum: 234.238876
  4128. Converted tensor sum: -719.742371
  4129. Original tensor mean: 29.279860
  4130. Converted tensor mean: -89.967796
  4131. Mean difference: 119.24765778
  4132. Maximum pointwise difference: 144.35720825
  4133. Max difference location: (0, 0, 0)
  4134. Values at max diff - Original: 22.54579163, Converted: -121.81141663
  4135. Biggest difference in row (0, 0), sum 234.238876 vs -719.742371
  4136. Layer 12, Token 26 (model.layers.out comparison):
  4137. Original tensor sum: 230.967987
  4138. Converted tensor sum: -737.411499
  4139. Original tensor mean: 28.870998
  4140. Converted tensor mean: -92.176437
  4141. Mean difference: 121.04743958
  4142. Maximum pointwise difference: 145.76480103
  4143. Max difference location: (0, 0, 0)
  4144. Values at max diff - Original: 23.33647728, Converted: -122.42832184
  4145. Biggest difference in row (0, 0), sum 230.967987 vs -737.411499
  4146. Layer 13, Token 26 (model.layers.out comparison):
  4147. Original tensor sum: 225.836136
  4148. Converted tensor sum: -743.471008
  4149. Original tensor mean: 28.229517
  4150. Converted tensor mean: -92.933876
  4151. Mean difference: 121.16339111
  4152. Maximum pointwise difference: 141.17944336
  4153. Max difference location: (0, 0, 0)
  4154. Values at max diff - Original: 23.16177559, Converted: -118.01766205
  4155. Biggest difference in row (0, 0), sum 225.836136 vs -743.471008
  4156. Layer 14, Token 26 (model.layers.out comparison):
  4157. Original tensor sum: 222.057236
  4158. Converted tensor sum: -845.007874
  4159. Original tensor mean: 27.757154
  4160. Converted tensor mean: -105.625984
  4161. Mean difference: 133.38313293
  4162. Maximum pointwise difference: 164.57283020
  4163. Max difference location: (0, 0, 3)
  4164. Values at max diff - Original: 29.71310997, Converted: -134.85972595
  4165. Biggest difference in row (0, 0), sum 222.057236 vs -845.007874
  4166. Layer 15, Token 26 (model.layers.out comparison):
  4167. Original tensor sum: 366.139526
  4168. Converted tensor sum: -1227.681152
  4169. Original tensor mean: 45.767441
  4170. Converted tensor mean: -153.460144
  4171. Mean difference: 199.22756958
  4172. Maximum pointwise difference: 235.55526733
  4173. Max difference location: (0, 0, 3)
  4174. Values at max diff - Original: 46.65935516, Converted: -188.89590454
  4175. Biggest difference in row (0, 0), sum 366.139526 vs -1227.681152
  4176. Layer 0, Token 27 (model.layers.out comparison):
  4177. Original tensor sum: 0.538792
  4178. Converted tensor sum: -2.767126
  4179. Original tensor mean: 0.067349
  4180. Converted tensor mean: -0.345891
  4181. Mean difference: 1.04583490
  4182. Maximum pointwise difference: 4.03163290
  4183. Max difference location: (0, 0, 7)
  4184. Values at max diff - Original: 4.54428434, Converted: 0.51265144
  4185. Biggest difference in row (0, 0), sum 0.538792 vs -2.767126
  4186. Layer 1, Token 27 (model.layers.out comparison):
  4187. Original tensor sum: -13.666726
  4188. Converted tensor sum: 4.859785
  4189. Original tensor mean: -1.708341
  4190. Converted tensor mean: 0.607473
  4191. Mean difference: 3.73808312
  4192. Maximum pointwise difference: 11.04657841
  4193. Max difference location: (0, 0, 5)
  4194. Values at max diff - Original: -6.84830761, Converted: 4.19827080
  4195. Biggest difference in row (0, 0), sum -13.666726 vs 4.859785
  4196. Layer 2, Token 27 (model.layers.out comparison):
  4197. Original tensor sum: 19.892342
  4198. Converted tensor sum: 18.553621
  4199. Original tensor mean: 2.486543
  4200. Converted tensor mean: 2.319203
  4201. Mean difference: 3.86019540
  4202. Maximum pointwise difference: 12.85380554
  4203. Max difference location: (0, 0, 5)
  4204. Values at max diff - Original: -5.59446335, Converted: 7.25934219
  4205. Biggest difference in row (0, 0), sum 19.892342 vs 18.553621
  4206. Layer 3, Token 27 (model.layers.out comparison):
  4207. Original tensor sum: 84.246483
  4208. Converted tensor sum: 49.827652
  4209. Original tensor mean: 10.530810
  4210. Converted tensor mean: 6.228456
  4211. Mean difference: 6.56024361
  4212. Maximum pointwise difference: 11.30776882
  4213. Max difference location: (0, 0, 2)
  4214. Values at max diff - Original: 18.61387444, Converted: 7.30610561
  4215. Biggest difference in row (0, 0), sum 84.246483 vs 49.827652
  4216. Layer 4, Token 27 (model.layers.out comparison):
  4217. Original tensor sum: 72.374397
  4218. Converted tensor sum: 50.589382
  4219. Original tensor mean: 9.046800
  4220. Converted tensor mean: 6.323673
  4221. Mean difference: 5.51325321
  4222. Maximum pointwise difference: 11.16050529
  4223. Max difference location: (0, 0, 5)
  4224. Values at max diff - Original: -2.93389368, Converted: 8.22661209
  4225. Biggest difference in row (0, 0), sum 72.374397 vs 50.589382
  4226. Layer 5, Token 27 (model.layers.out comparison):
  4227. Original tensor sum: 68.200790
  4228. Converted tensor sum: 51.359711
  4229. Original tensor mean: 8.525099
  4230. Converted tensor mean: 6.419964
  4231. Mean difference: 4.32947350
  4232. Maximum pointwise difference: 8.89735222
  4233. Max difference location: (0, 0, 5)
  4234. Values at max diff - Original: -3.28547406, Converted: 5.61187792
  4235. Biggest difference in row (0, 0), sum 68.200790 vs 51.359711
  4236. Layer 6, Token 27 (model.layers.out comparison):
  4237. Original tensor sum: 70.421684
  4238. Converted tensor sum: 41.851700
  4239. Original tensor mean: 8.802711
  4240. Converted tensor mean: 5.231462
  4241. Mean difference: 5.60544014
  4242. Maximum pointwise difference: 9.42855549
  4243. Max difference location: (0, 0, 2)
  4244. Values at max diff - Original: 15.64872551, Converted: 6.22017002
  4245. Biggest difference in row (0, 0), sum 70.421684 vs 41.851700
  4246. Layer 7, Token 27 (model.layers.out comparison):
  4247. Original tensor sum: 138.012558
  4248. Converted tensor sum: 106.052734
  4249. Original tensor mean: 17.251570
  4250. Converted tensor mean: 13.256592
  4251. Mean difference: 5.83357430
  4252. Maximum pointwise difference: 9.46822166
  4253. Max difference location: (0, 0, 4)
  4254. Values at max diff - Original: 20.60037422, Converted: 11.13215256
  4255. Biggest difference in row (0, 0), sum 138.012558 vs 106.052734
  4256. Layer 8, Token 27 (model.layers.out comparison):
  4257. Original tensor sum: 124.592545
  4258. Converted tensor sum: 109.657555
  4259. Original tensor mean: 15.574068
  4260. Converted tensor mean: 13.707194
  4261. Mean difference: 4.43112850
  4262. Maximum pointwise difference: 10.25702190
  4263. Max difference location: (0, 0, 5)
  4264. Values at max diff - Original: 3.63314724, Converted: 13.89016914
  4265. Biggest difference in row (0, 0), sum 124.592545 vs 109.657555
  4266. Layer 9, Token 27 (model.layers.out comparison):
  4267. Original tensor sum: 110.794357
  4268. Converted tensor sum: 109.277565
  4269. Original tensor mean: 13.849295
  4270. Converted tensor mean: 13.659696
  4271. Mean difference: 4.23832560
  4272. Maximum pointwise difference: 11.81062031
  4273. Max difference location: (0, 0, 5)
  4274. Values at max diff - Original: 1.84443951, Converted: 13.65505981
  4275. Biggest difference in row (0, 0), sum 110.794357 vs 109.277565
  4276. Layer 10, Token 27 (model.layers.out comparison):
  4277. Original tensor sum: 104.034340
  4278. Converted tensor sum: 104.158554
  4279. Original tensor mean: 13.004292
  4280. Converted tensor mean: 13.019819
  4281. Mean difference: 4.04881191
  4282. Maximum pointwise difference: 12.64350224
  4283. Max difference location: (0, 0, 5)
  4284. Values at max diff - Original: 0.44486341, Converted: 13.08836555
  4285. Biggest difference in row (0, 0), sum 104.034340 vs 104.158554
  4286. Layer 11, Token 27 (model.layers.out comparison):
  4287. Original tensor sum: 194.747101
  4288. Converted tensor sum: 186.990341
  4289. Original tensor mean: 24.343388
  4290. Converted tensor mean: 23.373793
  4291. Mean difference: 4.42853832
  4292. Maximum pointwise difference: 11.92787266
  4293. Max difference location: (0, 0, 5)
  4294. Values at max diff - Original: 11.99839497, Converted: 23.92626762
  4295. Biggest difference in row (0, 0), sum 194.747101 vs 186.990341
  4296. Layer 12, Token 27 (model.layers.out comparison):
  4297. Original tensor sum: 195.014465
  4298. Converted tensor sum: 185.515793
  4299. Original tensor mean: 24.376808
  4300. Converted tensor mean: 23.189474
  4301. Mean difference: 4.49333429
  4302. Maximum pointwise difference: 11.10862160
  4303. Max difference location: (0, 0, 5)
  4304. Values at max diff - Original: 11.92906380, Converted: 23.03768539
  4305. Biggest difference in row (0, 0), sum 195.014465 vs 185.515793
  4306. Layer 13, Token 27 (model.layers.out comparison):
  4307. Original tensor sum: 187.897064
  4308. Converted tensor sum: 182.353088
  4309. Original tensor mean: 23.487133
  4310. Converted tensor mean: 22.794136
  4311. Mean difference: 4.64961338
  4312. Maximum pointwise difference: 12.63825989
  4313. Max difference location: (0, 0, 5)
  4314. Values at max diff - Original: 10.51706123, Converted: 23.15532112
  4315. Biggest difference in row (0, 0), sum 187.897064 vs 182.353088
  4316. Layer 14, Token 27 (model.layers.out comparison):
  4317. Original tensor sum: 182.226410
  4318. Converted tensor sum: 180.585373
  4319. Original tensor mean: 22.778301
  4320. Converted tensor mean: 22.573172
  4321. Mean difference: 4.70111561
  4322. Maximum pointwise difference: 12.44419956
  4323. Max difference location: (0, 0, 5)
  4324. Values at max diff - Original: 11.31790829, Converted: 23.76210785
  4325. Biggest difference in row (0, 0), sum 182.226410 vs 180.585373
  4326. Layer 15, Token 27 (model.layers.out comparison):
  4327. Original tensor sum: 333.560730
  4328. Converted tensor sum: 318.274811
  4329. Original tensor mean: 41.695091
  4330. Converted tensor mean: 39.784351
  4331. Mean difference: 4.67095470
  4332. Maximum pointwise difference: 11.04085732
  4333. Max difference location: (0, 0, 5)
  4334. Values at max diff - Original: 28.01206779, Converted: 39.05292511
  4335. Biggest difference in row (0, 0), sum 333.560730 vs 318.274811
  4336. Layer 0, Token 28 (model.layers.out comparison):
  4337. Original tensor sum: -40.607262
  4338. Converted tensor sum: 42.743095
  4339. Original tensor mean: -5.075908
  4340. Converted tensor mean: 5.342887
  4341. Mean difference: 11.17178345
  4342. Maximum pointwise difference: 22.58385468
  4343. Max difference location: (0, 0, 4)
  4344. Values at max diff - Original: -14.17651558, Converted: 8.40733814
  4345. Biggest difference in row (0, 0), sum -40.607262 vs 42.743095
  4346. Layer 1, Token 28 (model.layers.out comparison):
  4347. Original tensor sum: -43.333393
  4348. Converted tensor sum: 31.481144
  4349. Original tensor mean: -5.416674
  4350. Converted tensor mean: 3.935143
  4351. Mean difference: 11.11242485
  4352. Maximum pointwise difference: 18.85606575
  4353. Max difference location: (0, 0, 5)
  4354. Values at max diff - Original: -10.93557739, Converted: 7.92048883
  4355. Biggest difference in row (0, 0), sum -43.333393 vs 31.481144
  4356. Layer 2, Token 28 (model.layers.out comparison):
  4357. Original tensor sum: -67.416214
  4358. Converted tensor sum: 33.172539
  4359. Original tensor mean: -8.427027
  4360. Converted tensor mean: 4.146567
  4361. Mean difference: 14.60656548
  4362. Maximum pointwise difference: 20.67273331
  4363. Max difference location: (0, 0, 4)
  4364. Values at max diff - Original: -13.93512535, Converted: 6.73760748
  4365. Biggest difference in row (0, 0), sum -67.416214 vs 33.172539
  4366. Layer 3, Token 28 (model.layers.out comparison):
  4367. Original tensor sum: -199.361206
  4368. Converted tensor sum: 72.683899
  4369. Original tensor mean: -24.920151
  4370. Converted tensor mean: 9.085487
  4371. Mean difference: 34.00563812
  4372. Maximum pointwise difference: 41.37638092
  4373. Max difference location: (0, 0, 4)
  4374. Values at max diff - Original: -29.60864067, Converted: 11.76773930
  4375. Biggest difference in row (0, 0), sum -199.361206 vs 72.683899
  4376. Layer 4, Token 28 (model.layers.out comparison):
  4377. Original tensor sum: -137.055893
  4378. Converted tensor sum: 63.596687
  4379. Original tensor mean: -17.131987
  4380. Converted tensor mean: 7.949586
  4381. Mean difference: 25.75262260
  4382. Maximum pointwise difference: 40.96822739
  4383. Max difference location: (0, 0, 4)
  4384. Values at max diff - Original: -29.79143906, Converted: 11.17678833
  4385. Biggest difference in row (0, 0), sum -137.055893 vs 63.596687
  4386. Layer 5, Token 28 (model.layers.out comparison):
  4387. Original tensor sum: -73.715279
  4388. Converted tensor sum: 62.123581
  4389. Original tensor mean: -9.214410
  4390. Converted tensor mean: 7.765448
  4391. Mean difference: 17.84333420
  4392. Maximum pointwise difference: 31.90785027
  4393. Max difference location: (0, 0, 6)
  4394. Values at max diff - Original: -24.06629181, Converted: 7.84155846
  4395. Biggest difference in row (0, 0), sum -73.715279 vs 62.123581
  4396. Layer 6, Token 28 (model.layers.out comparison):
  4397. Original tensor sum: -126.770874
  4398. Converted tensor sum: 61.464096
  4399. Original tensor mean: -15.846359
  4400. Converted tensor mean: 7.683012
  4401. Mean difference: 23.61796379
  4402. Maximum pointwise difference: 36.02120209
  4403. Max difference location: (0, 0, 6)
  4404. Values at max diff - Original: -27.60279655, Converted: 8.41840744
  4405. Biggest difference in row (0, 0), sum -126.770874 vs 61.464096
  4406. Layer 7, Token 28 (model.layers.out comparison):
  4407. Original tensor sum: -254.607422
  4408. Converted tensor sum: 126.028885
  4409. Original tensor mean: -31.825928
  4410. Converted tensor mean: 15.753611
  4411. Mean difference: 47.57954025
  4412. Maximum pointwise difference: 61.42348480
  4413. Max difference location: (0, 0, 4)
  4414. Values at max diff - Original: -40.33562851, Converted: 21.08785439
  4415. Biggest difference in row (0, 0), sum -254.607422 vs 126.028885
  4416. Layer 8, Token 28 (model.layers.out comparison):
  4417. Original tensor sum: -198.536194
  4418. Converted tensor sum: 120.381157
  4419. Original tensor mean: -24.817024
  4420. Converted tensor mean: 15.047645
  4421. Mean difference: 39.86466980
  4422. Maximum pointwise difference: 52.24274063
  4423. Max difference location: (0, 0, 4)
  4424. Values at max diff - Original: -30.55666733, Converted: 21.68607330
  4425. Biggest difference in row (0, 0), sum -198.536194 vs 120.381157
  4426. Layer 9, Token 28 (model.layers.out comparison):
  4427. Original tensor sum: -203.318542
  4428. Converted tensor sum: 118.674896
  4429. Original tensor mean: -25.414818
  4430. Converted tensor mean: 14.834362
  4431. Mean difference: 40.24917984
  4432. Maximum pointwise difference: 51.74636078
  4433. Max difference location: (0, 0, 6)
  4434. Values at max diff - Original: -36.37534714, Converted: 15.37101555
  4435. Biggest difference in row (0, 0), sum -203.318542 vs 118.674896
  4436. Layer 10, Token 28 (model.layers.out comparison):
  4437. Original tensor sum: -173.929123
  4438. Converted tensor sum: 115.971573
  4439. Original tensor mean: -21.741140
  4440. Converted tensor mean: 14.496447
  4441. Mean difference: 36.23758698
  4442. Maximum pointwise difference: 47.99763489
  4443. Max difference location: (0, 0, 6)
  4444. Values at max diff - Original: -32.96516418, Converted: 15.03247166
  4445. Biggest difference in row (0, 0), sum -173.929123 vs 115.971573
  4446. Layer 11, Token 28 (model.layers.out comparison):
  4447. Original tensor sum: -450.842834
  4448. Converted tensor sum: 202.799988
  4449. Original tensor mean: -56.355354
  4450. Converted tensor mean: 25.349998
  4451. Mean difference: 81.70535278
  4452. Maximum pointwise difference: 92.55924988
  4453. Max difference location: (0, 0, 6)
  4454. Values at max diff - Original: -66.56226349, Converted: 25.99698830
  4455. Biggest difference in row (0, 0), sum -450.842834 vs 202.799988
  4456. Layer 12, Token 28 (model.layers.out comparison):
  4457. Original tensor sum: -483.456177
  4458. Converted tensor sum: 204.607147
  4459. Original tensor mean: -60.432022
  4460. Converted tensor mean: 25.575893
  4461. Mean difference: 86.00791931
  4462. Maximum pointwise difference: 97.30514526
  4463. Max difference location: (0, 0, 6)
  4464. Values at max diff - Original: -70.58811951, Converted: 26.71702957
  4465. Biggest difference in row (0, 0), sum -483.456177 vs 204.607147
  4466. Layer 13, Token 28 (model.layers.out comparison):
  4467. Original tensor sum: -487.978210
  4468. Converted tensor sum: 194.803741
  4469. Original tensor mean: -60.997276
  4470. Converted tensor mean: 24.350468
  4471. Mean difference: 85.34774780
  4472. Maximum pointwise difference: 97.00595093
  4473. Max difference location: (0, 0, 6)
  4474. Values at max diff - Original: -71.41757965, Converted: 25.58836937
  4475. Biggest difference in row (0, 0), sum -487.978210 vs 194.803741
  4476. Layer 14, Token 28 (model.layers.out comparison):
  4477. Original tensor sum: -487.676697
  4478. Converted tensor sum: 192.080292
  4479. Original tensor mean: -60.959587
  4480. Converted tensor mean: 24.010036
  4481. Mean difference: 84.96962738
  4482. Maximum pointwise difference: 101.62533569
  4483. Max difference location: (0, 0, 2)
  4484. Values at max diff - Original: -75.57343292, Converted: 26.05190277
  4485. Biggest difference in row (0, 0), sum -487.676697 vs 192.080292
  4486. Layer 15, Token 28 (model.layers.out comparison):
  4487. Original tensor sum: -826.685791
  4488. Converted tensor sum: 324.333130
  4489. Original tensor mean: -103.335724
  4490. Converted tensor mean: 40.541641
  4491. Mean difference: 143.87736511
  4492. Maximum pointwise difference: 160.84576416
  4493. Max difference location: (0, 0, 4)
  4494. Values at max diff - Original: -111.58706665, Converted: 49.25869751
  4495. Biggest difference in row (0, 0), sum -826.685791 vs 324.333130
  4496. Layer 0, Token 29 (model.layers.out comparison):
  4497. Original tensor sum: -7.335809
  4498. Converted tensor sum: 5.924038
  4499. Original tensor mean: -0.916976
  4500. Converted tensor mean: 0.740505
  4501. Mean difference: 2.81220579
  4502. Maximum pointwise difference: 5.74731255
  4503. Max difference location: (0, 0, 2)
  4504. Values at max diff - Original: -3.16068745, Converted: 2.58662534
  4505. Biggest difference in row (0, 0), sum -7.335809 vs 5.924038
  4506. Layer 1, Token 29 (model.layers.out comparison):
  4507. Original tensor sum: -4.554134
  4508. Converted tensor sum: 7.198357
  4509. Original tensor mean: -0.569267
  4510. Converted tensor mean: 0.899795
  4511. Mean difference: 4.59539890
  4512. Maximum pointwise difference: 12.22019768
  4513. Max difference location: (0, 0, 1)
  4514. Values at max diff - Original: -9.41592121, Converted: 2.80427670
  4515. Biggest difference in row (0, 0), sum -4.554134 vs 7.198357
  4516. Layer 2, Token 29 (model.layers.out comparison):
  4517. Original tensor sum: 18.821238
  4518. Converted tensor sum: -2.444355
  4519. Original tensor mean: 2.352655
  4520. Converted tensor mean: -0.305544
  4521. Mean difference: 5.75418472
  4522. Maximum pointwise difference: 9.27616215
  4523. Max difference location: (0, 0, 1)
  4524. Values at max diff - Original: -6.88754845, Converted: 2.38861346
  4525. Biggest difference in row (0, 0), sum 18.821238 vs -2.444355
  4526. Layer 3, Token 29 (model.layers.out comparison):
  4527. Original tensor sum: 70.965004
  4528. Converted tensor sum: -68.014175
  4529. Original tensor mean: 8.870625
  4530. Converted tensor mean: -8.501772
  4531. Mean difference: 17.37239647
  4532. Maximum pointwise difference: 24.10712433
  4533. Max difference location: (0, 0, 3)
  4534. Values at max diff - Original: 16.14313126, Converted: -7.96399307
  4535. Biggest difference in row (0, 0), sum 70.965004 vs -68.014175
  4536. Layer 4, Token 29 (model.layers.out comparison):
  4537. Original tensor sum: 62.607174
  4538. Converted tensor sum: -17.623362
  4539. Original tensor mean: 7.825897
  4540. Converted tensor mean: -2.202920
  4541. Mean difference: 10.34164429
  4542. Maximum pointwise difference: 18.22177315
  4543. Max difference location: (0, 0, 0)
  4544. Values at max diff - Original: 8.43466568, Converted: -9.78710747
  4545. Biggest difference in row (0, 0), sum 62.607174 vs -17.623362
  4546. Layer 5, Token 29 (model.layers.out comparison):
  4547. Original tensor sum: 52.727810
  4548. Converted tensor sum: 1.219590
  4549. Original tensor mean: 6.590976
  4550. Converted tensor mean: 0.152449
  4551. Mean difference: 7.65116024
  4552. Maximum pointwise difference: 18.62134933
  4553. Max difference location: (0, 0, 6)
  4554. Values at max diff - Original: 9.17143154, Converted: -9.44991875
  4555. Biggest difference in row (0, 0), sum 52.727810 vs 1.219590
  4556. Layer 6, Token 29 (model.layers.out comparison):
  4557. Original tensor sum: 56.382370
  4558. Converted tensor sum: 4.153158
  4559. Original tensor mean: 7.047796
  4560. Converted tensor mean: 0.519145
  4561. Mean difference: 7.41536808
  4562. Maximum pointwise difference: 17.59628677
  4563. Max difference location: (0, 0, 6)
  4564. Values at max diff - Original: 10.29856682, Converted: -7.29772091
  4565. Biggest difference in row (0, 0), sum 56.382370 vs 4.153158
  4566. Layer 7, Token 29 (model.layers.out comparison):
  4567. Original tensor sum: 136.310486
  4568. Converted tensor sum: 3.958838
  4569. Original tensor mean: 17.038811
  4570. Converted tensor mean: 0.494855
  4571. Mean difference: 16.54395676
  4572. Maximum pointwise difference: 26.66838837
  4573. Max difference location: (0, 0, 6)
  4574. Values at max diff - Original: 18.93185425, Converted: -7.73653507
  4575. Biggest difference in row (0, 0), sum 136.310486 vs 3.958838
  4576. Layer 8, Token 29 (model.layers.out comparison):
  4577. Original tensor sum: 119.467941
  4578. Converted tensor sum: 9.372761
  4579. Original tensor mean: 14.933493
  4580. Converted tensor mean: 1.171595
  4581. Mean difference: 13.76189804
  4582. Maximum pointwise difference: 22.09118652
  4583. Max difference location: (0, 0, 7)
  4584. Values at max diff - Original: 20.88569450, Converted: -1.20549154
  4585. Biggest difference in row (0, 0), sum 119.467941 vs 9.372761
  4586. Layer 9, Token 29 (model.layers.out comparison):
  4587. Original tensor sum: 111.468323
  4588. Converted tensor sum: 12.752249
  4589. Original tensor mean: 13.933540
  4590. Converted tensor mean: 1.594031
  4591. Mean difference: 12.36562347
  4592. Maximum pointwise difference: 19.99691391
  4593. Max difference location: (0, 0, 7)
  4594. Values at max diff - Original: 20.75084877, Converted: 0.75393468
  4595. Biggest difference in row (0, 0), sum 111.468323 vs 12.752249
  4596. Layer 10, Token 29 (model.layers.out comparison):
  4597. Original tensor sum: 103.290207
  4598. Converted tensor sum: 4.031506
  4599. Original tensor mean: 12.911276
  4600. Converted tensor mean: 0.503938
  4601. Mean difference: 12.90593433
  4602. Maximum pointwise difference: 20.97147560
  4603. Max difference location: (0, 0, 7)
  4604. Values at max diff - Original: 20.37113762, Converted: -0.60033715
  4605. Biggest difference in row (0, 0), sum 103.290207 vs 4.031506
  4606. Layer 11, Token 29 (model.layers.out comparison):
  4607. Original tensor sum: 195.291718
  4608. Converted tensor sum: 60.566498
  4609. Original tensor mean: 24.411465
  4610. Converted tensor mean: 7.570812
  4611. Mean difference: 16.84065247
  4612. Maximum pointwise difference: 26.14917755
  4613. Max difference location: (0, 0, 7)
  4614. Values at max diff - Original: 32.22053146, Converted: 6.07135296
  4615. Biggest difference in row (0, 0), sum 195.291718 vs 60.566498
  4616. Layer 12, Token 29 (model.layers.out comparison):
  4617. Original tensor sum: 193.868057
  4618. Converted tensor sum: 56.865105
  4619. Original tensor mean: 24.233507
  4620. Converted tensor mean: 7.108138
  4621. Mean difference: 17.12537003
  4622. Maximum pointwise difference: 27.68391991
  4623. Max difference location: (0, 0, 6)
  4624. Values at max diff - Original: 25.67190361, Converted: -2.01201606
  4625. Biggest difference in row (0, 0), sum 193.868057 vs 56.865105
  4626. Layer 13, Token 29 (model.layers.out comparison):
  4627. Original tensor sum: 191.697586
  4628. Converted tensor sum: 55.096077
  4629. Original tensor mean: 23.962198
  4630. Converted tensor mean: 6.887010
  4631. Mean difference: 17.07518768
  4632. Maximum pointwise difference: 27.05913162
  4633. Max difference location: (0, 0, 6)
  4634. Values at max diff - Original: 24.89521027, Converted: -2.16392159
  4635. Biggest difference in row (0, 0), sum 191.697586 vs 55.096077
  4636. Layer 14, Token 29 (model.layers.out comparison):
  4637. Original tensor sum: 188.843628
  4638. Converted tensor sum: 53.397236
  4639. Original tensor mean: 23.605453
  4640. Converted tensor mean: 6.674654
  4641. Mean difference: 16.93079758
  4642. Maximum pointwise difference: 25.94162941
  4643. Max difference location: (0, 0, 6)
  4644. Values at max diff - Original: 23.64732933, Converted: -2.29430056
  4645. Biggest difference in row (0, 0), sum 188.843628 vs 53.397236
  4646. Layer 15, Token 29 (model.layers.out comparison):
  4647. Original tensor sum: 336.074646
  4648. Converted tensor sum: 200.162903
  4649. Original tensor mean: 42.009331
  4650. Converted tensor mean: 25.020363
  4651. Mean difference: 16.98896790
  4652. Maximum pointwise difference: 25.90124702
  4653. Max difference location: (0, 0, 7)
  4654. Values at max diff - Original: 47.47709274, Converted: 21.57584572
  4655. Biggest difference in row (0, 0), sum 336.074646 vs 200.162903
  4656. Layer 0, Token 30 (model.layers.out comparison):
  4657. Original tensor sum: 17.017063
  4658. Converted tensor sum: 23.545963
  4659. Original tensor mean: 2.127133
  4660. Converted tensor mean: 2.943245
  4661. Mean difference: 2.51119232
  4662. Maximum pointwise difference: 4.74783516
  4663. Max difference location: (0, 0, 7)
  4664. Values at max diff - Original: -3.57869840, Converted: 1.16913700
  4665. Biggest difference in row (0, 0), sum 17.017063 vs 23.545963
  4666. Layer 1, Token 30 (model.layers.out comparison):
  4667. Original tensor sum: 20.432869
  4668. Converted tensor sum: 19.928423
  4669. Original tensor mean: 2.554109
  4670. Converted tensor mean: 2.491053
  4671. Mean difference: 3.21921587
  4672. Maximum pointwise difference: 5.61581087
  4673. Max difference location: (0, 0, 1)
  4674. Values at max diff - Original: 4.57620192, Converted: -1.03960896
  4675. Biggest difference in row (0, 0), sum 20.432869 vs 19.928423
  4676. Layer 2, Token 30 (model.layers.out comparison):
  4677. Original tensor sum: 28.017879
  4678. Converted tensor sum: 17.077301
  4679. Original tensor mean: 3.502235
  4680. Converted tensor mean: 2.134663
  4681. Mean difference: 3.63509035
  4682. Maximum pointwise difference: 8.41316605
  4683. Max difference location: (0, 0, 1)
  4684. Values at max diff - Original: 6.66613007, Converted: -1.74703574
  4685. Biggest difference in row (0, 0), sum 28.017879 vs 17.077301
  4686. Layer 3, Token 30 (model.layers.out comparison):
  4687. Original tensor sum: 85.620071
  4688. Converted tensor sum: 45.387245
  4689. Original tensor mean: 10.702509
  4690. Converted tensor mean: 5.673406
  4691. Mean difference: 5.25029612
  4692. Maximum pointwise difference: 14.27389336
  4693. Max difference location: (0, 0, 1)
  4694. Values at max diff - Original: 17.08827591, Converted: 2.81438255
  4695. Biggest difference in row (0, 0), sum 85.620071 vs 45.387245
  4696. Layer 4, Token 30 (model.layers.out comparison):
  4697. Original tensor sum: 76.943909
  4698. Converted tensor sum: 38.849068
  4699. Original tensor mean: 9.617989
  4700. Converted tensor mean: 4.856133
  4701. Mean difference: 5.60086536
  4702. Maximum pointwise difference: 14.69901657
  4703. Max difference location: (0, 0, 1)
  4704. Values at max diff - Original: 17.17034531, Converted: 2.47132850
  4705. Biggest difference in row (0, 0), sum 76.943909 vs 38.849068
  4706. Layer 5, Token 30 (model.layers.out comparison):
  4707. Original tensor sum: 59.381409
  4708. Converted tensor sum: 29.835991
  4709. Original tensor mean: 7.422676
  4710. Converted tensor mean: 3.729499
  4711. Mean difference: 4.96050739
  4712. Maximum pointwise difference: 12.96257687
  4713. Max difference location: (0, 0, 1)
  4714. Values at max diff - Original: 14.77257729, Converted: 1.81000042
  4715. Biggest difference in row (0, 0), sum 59.381409 vs 29.835991
  4716. Layer 6, Token 30 (model.layers.out comparison):
  4717. Original tensor sum: 59.339882
  4718. Converted tensor sum: 27.141592
  4719. Original tensor mean: 7.417485
  4720. Converted tensor mean: 3.392699
  4721. Mean difference: 4.87107563
  4722. Maximum pointwise difference: 14.00060558
  4723. Max difference location: (0, 0, 1)
  4724. Values at max diff - Original: 15.80483246, Converted: 1.80422711
  4725. Biggest difference in row (0, 0), sum 59.339882 vs 27.141592
  4726. Layer 7, Token 30 (model.layers.out comparison):
  4727. Original tensor sum: 131.503036
  4728. Converted tensor sum: 91.997757
  4729. Original tensor mean: 16.437880
  4730. Converted tensor mean: 11.499720
  4731. Mean difference: 5.33721828
  4732. Maximum pointwise difference: 14.37581253
  4733. Max difference location: (0, 0, 1)
  4734. Values at max diff - Original: 27.27588463, Converted: 12.90007210
  4735. Biggest difference in row (0, 0), sum 131.503036 vs 91.997757
  4736. Layer 8, Token 30 (model.layers.out comparison):
  4737. Original tensor sum: 123.886139
  4738. Converted tensor sum: 79.985909
  4739. Original tensor mean: 15.485767
  4740. Converted tensor mean: 9.998239
  4741. Mean difference: 6.03210974
  4742. Maximum pointwise difference: 16.31963348
  4743. Max difference location: (0, 0, 1)
  4744. Values at max diff - Original: 26.14530563, Converted: 9.82567215
  4745. Biggest difference in row (0, 0), sum 123.886139 vs 79.985909
  4746. Layer 9, Token 30 (model.layers.out comparison):
  4747. Original tensor sum: 118.487213
  4748. Converted tensor sum: 61.110474
  4749. Original tensor mean: 14.810902
  4750. Converted tensor mean: 7.638809
  4751. Mean difference: 7.17209244
  4752. Maximum pointwise difference: 17.08554077
  4753. Max difference location: (0, 0, 1)
  4754. Values at max diff - Original: 26.00649452, Converted: 8.92095280
  4755. Biggest difference in row (0, 0), sum 118.487213 vs 61.110474
  4756. Layer 10, Token 30 (model.layers.out comparison):
  4757. Original tensor sum: 110.301559
  4758. Converted tensor sum: 57.444092
  4759. Original tensor mean: 13.787695
  4760. Converted tensor mean: 7.180511
  4761. Mean difference: 6.69173956
  4762. Maximum pointwise difference: 18.18347359
  4763. Max difference location: (0, 0, 1)
  4764. Values at max diff - Original: 26.85584831, Converted: 8.67237473
  4765. Biggest difference in row (0, 0), sum 110.301559 vs 57.444092
  4766. Layer 11, Token 30 (model.layers.out comparison):
  4767. Original tensor sum: 209.603394
  4768. Converted tensor sum: 163.279968
  4769. Original tensor mean: 26.200424
  4770. Converted tensor mean: 20.409996
  4771. Mean difference: 6.23670197
  4772. Maximum pointwise difference: 18.06859207
  4773. Max difference location: (0, 0, 1)
  4774. Values at max diff - Original: 37.88209915, Converted: 19.81350708
  4775. Biggest difference in row (0, 0), sum 209.603394 vs 163.279968
  4776. Layer 12, Token 30 (model.layers.out comparison):
  4777. Original tensor sum: 210.341476
  4778. Converted tensor sum: 159.541199
  4779. Original tensor mean: 26.292685
  4780. Converted tensor mean: 19.942650
  4781. Mean difference: 6.62348843
  4782. Maximum pointwise difference: 17.79612160
  4783. Max difference location: (0, 0, 1)
  4784. Values at max diff - Original: 37.21872330, Converted: 19.42260170
  4785. Biggest difference in row (0, 0), sum 210.341476 vs 159.541199
  4786. Layer 13, Token 30 (model.layers.out comparison):
  4787. Original tensor sum: 206.045227
  4788. Converted tensor sum: 156.530212
  4789. Original tensor mean: 25.755653
  4790. Converted tensor mean: 19.566277
  4791. Mean difference: 6.46108055
  4792. Maximum pointwise difference: 17.11543655
  4793. Max difference location: (0, 0, 1)
  4794. Values at max diff - Original: 36.85726547, Converted: 19.74182892
  4795. Biggest difference in row (0, 0), sum 206.045227 vs 156.530212
  4796. Layer 14, Token 30 (model.layers.out comparison):
  4797. Original tensor sum: 204.884491
  4798. Converted tensor sum: 151.571396
  4799. Original tensor mean: 25.610561
  4800. Converted tensor mean: 18.946424
  4801. Mean difference: 6.66413498
  4802. Maximum pointwise difference: 18.41207695
  4803. Max difference location: (0, 0, 1)
  4804. Values at max diff - Original: 37.42459488, Converted: 19.01251793
  4805. Biggest difference in row (0, 0), sum 204.884491 vs 151.571396
  4806. Layer 15, Token 30 (model.layers.out comparison):
  4807. Original tensor sum: 358.352844
  4808. Converted tensor sum: 289.552582
  4809. Original tensor mean: 44.794106
  4810. Converted tensor mean: 36.194073
  4811. Mean difference: 8.60003757
  4812. Maximum pointwise difference: 19.94306946
  4813. Max difference location: (0, 0, 1)
  4814. Values at max diff - Original: 55.22903824, Converted: 35.28596878
  4815. Biggest difference in row (0, 0), sum 358.352844 vs 289.552582
  4816. ================================================================================
  4817. Comparing recurrent cache tensors...
  4818. ================================================================================
  4819. Layer 0, Token 1 (recurrent cache comparison):
  4820. Original tensor sum: -3.317356
  4821. Converted tensor sum: -3.317369
  4822. Original tensor mean: -0.001037
  4823. Converted tensor mean: -0.001037
  4824. Mean difference: 0.00000005
  4825. Maximum pointwise difference: 0.00000250
  4826. Max difference location: (0, 4, 8, 1)
  4827. Values at max diff - Original: -1.34675360, Converted: -1.34675610
  4828. Biggest difference in row (0, 4, 3), sum -1.531199 vs -1.531201
  4829. Original tensor:
  4830. [[[[-0.01188182 0.00870434 -0.00525597 ... 0.01664828 0.0042294
  4831. 0.01396134]
  4832. [-0.00601455 0.00372374 0.00119549 ... -0.00689575 0.00234476
  4833. -0.00023902]
  4834. [ 0.12993637 -0.07801484 0.03047845 ... -0.05703255 -0.06261977
  4835. -0.10933896]
  4836. ...
  4837. [-0.04649648 0.02312872 -0.00121024 ... -0.02114891 0.02579406
  4838. 0.02258455]
  4839. [-0.04175662 0.02266306 -0.0035618 ... -0.0084533 0.02211451
  4840. 0.02416236]
  4841. [ 0.02032246 -0.01281894 0.00930294 ... -0.02656155 -0.00984932
  4842. -0.02582185]]
  4843. [[ 0.01767723 0.01862493 0.00546727 ... 0.00556207 0.00562948
  4844. 0.02792829]
  4845. [ 0.00329595 0.00522457 0.00275346 ... 0.00801896 0.0103077
  4846. -0.00079376]
  4847. [-0.15666749 -0.19953263 -0.06468897 ... -0.12443222 -0.10325672
  4848. -0.20960501]
  4849. ...
  4850. [ 0.04138051 0.06359718 0.02354327 ... 0.06241166 0.05219408
  4851. 0.03928925]
  4852. [ 0.04164674 0.06036352 0.02137833 ... 0.05146553 0.04422566
  4853. 0.0441802 ]
  4854. [-0.03129916 -0.03683262 -0.01027868 ... -0.01391416 -0.00729654
  4855. -0.0505065 ]]
  4856. [[-0.12362282 0.10214025 -0.01907291 ... -0.06202121 -0.10286148
  4857. -0.04492377]
  4858. [-0.0150543 0.08293391 -0.00673187 ... -0.00035791 -0.01116562
  4859. -0.00036771]
  4860. [-0.02004597 0.00927652 -0.00294111 ... -0.01171783 -0.01758975
  4861. -0.00819483]
  4862. ...
  4863. [ 0.00270219 -0.04824698 0.00360209 ... -0.00234267 0.00216798
  4864. -0.00194733]
  4865. [ 0.01524375 -0.03120736 0.00455077 ... 0.00138342 0.01178958
  4866. 0.00394295]
  4867. [ 0.02191158 -0.03620601 0.00567079 ... 0.00745023 0.01862757
  4868. 0.00703449]]
  4869. ...
  4870. [[ 0.00741537 -0.04865595 -0.00886576 ... -0.02448454 0.01194548
  4871. -0.00861733]
  4872. [-0.00134769 0.01334649 0.01967893 ... 0.02112496 -0.01624596
  4873. 0.00516407]
  4874. [-0.0050677 0.02272661 0.01807955 ... 0.02094838 -0.01449073
  4875. 0.00967227]
  4876. ...
  4877. [-0.02633221 0.05768563 0.01628287 ... 0.0149423 -0.00576269
  4878. 0.04385136]
  4879. [-0.03326959 0.185886 -0.02219751 ... 0.04430137 -0.00146678
  4880. 0.02707055]
  4881. [-0.00715611 -0.00657876 -0.10976178 ... -0.09874185 0.08591411
  4882. -0.00940268]]
  4883. [[-0.03609058 -0.07579004 0.01501239 ... -0.00192132 -0.01605882
  4884. 0.00820769]
  4885. [-0.00521284 -0.03044076 0.01835437 ... -0.00124992 -0.01034386
  4886. 0.00627647]
  4887. [ 0.02380822 0.16997556 -0.04292414 ... 0.01702266 0.04020631
  4888. -0.03895959]
  4889. ...
  4890. [-0.00115029 -0.0217499 0.00398471 ... -0.00293407 -0.00470166
  4891. 0.00579625]
  4892. [-0.00415053 -0.03030142 0.02518196 ... -0.00043284 -0.01240897
  4893. 0.00634339]
  4894. [ 0.00861687 -0.01112233 -0.03039085 ... -0.00862329 0.00705495
  4895. 0.00750164]]
  4896. [[ 0.00614664 -0.01302179 -0.0609244 ... -0.05605923 -0.06379453
  4897. 0.01912303]
  4898. [ 0.01061937 -0.00787821 -0.02997783 ... -0.03494435 -0.04587581
  4899. 0.01142649]
  4900. [-0.04273459 0.08807568 0.18954179 ... 0.19141153 0.05976401
  4901. -0.01481191]
  4902. ...
  4903. [ 0.0059959 -0.01474381 -0.02677062 ... -0.02669823 0.00146604
  4904. -0.00064257]
  4905. [ 0.01313105 -0.0043188 -0.02868656 ... -0.03682106 -0.06574353
  4906. 0.01620813]
  4907. [-0.00286384 -0.03923091 -0.03224784 ... -0.01919729 0.12107897
  4908. -0.03120236]]]]
  4909. Converted tensor:
  4910. [[[[-0.01188182 0.00870434 -0.00525597 ... 0.0166483 0.0042294
  4911. 0.01396135]
  4912. [-0.00601455 0.00372375 0.00119549 ... -0.00689576 0.00234477
  4913. -0.00023903]
  4914. [ 0.12993638 -0.07801486 0.03047847 ... -0.05703259 -0.06261978
  4915. -0.10933899]
  4916. ...
  4917. [-0.04649651 0.02312873 -0.00121024 ... -0.02114895 0.02579408
  4918. 0.02258454]
  4919. [-0.04175663 0.02266307 -0.0035618 ... -0.00845332 0.02211452
  4920. 0.02416236]
  4921. [ 0.02032245 -0.01281894 0.00930295 ... -0.02656158 -0.00984932
  4922. -0.02582186]]
  4923. [[ 0.01767723 0.01862492 0.00546727 ... 0.00556206 0.00562947
  4924. 0.02792831]
  4925. [ 0.00329595 0.00522458 0.00275346 ... 0.00801897 0.01030772
  4926. -0.00079377]
  4927. [-0.15666753 -0.19953264 -0.06468898 ... -0.12443225 -0.10325674
  4928. -0.20960508]
  4929. ...
  4930. [ 0.04138052 0.06359721 0.02354329 ... 0.06241173 0.05219414
  4931. 0.03928925]
  4932. [ 0.04164676 0.06036354 0.02137835 ... 0.05146557 0.04422571
  4933. 0.0441802 ]
  4934. [-0.03129917 -0.03683261 -0.01027868 ... -0.01391415 -0.00729652
  4935. -0.05050653]]
  4936. [[-0.12362286 0.10214026 -0.01907291 ... -0.06202124 -0.10286151
  4937. -0.04492378]
  4938. [-0.01505431 0.08293395 -0.00673187 ... -0.00035791 -0.01116562
  4939. -0.00036771]
  4940. [-0.02004598 0.00927651 -0.00294111 ... -0.01171784 -0.01758976
  4941. -0.00819483]
  4942. ...
  4943. [ 0.00270219 -0.04824701 0.00360209 ... -0.00234266 0.00216798
  4944. -0.00194733]
  4945. [ 0.01524375 -0.03120738 0.00455077 ... 0.00138341 0.01178958
  4946. 0.00394295]
  4947. [ 0.02191159 -0.03620601 0.00567079 ... 0.00745023 0.01862758
  4948. 0.00703449]]
  4949. ...
  4950. [[ 0.00741537 -0.04865595 -0.00886576 ... -0.02448454 0.01194548
  4951. -0.00861733]
  4952. [-0.00134769 0.01334648 0.01967893 ... 0.02112496 -0.01624596
  4953. 0.00516407]
  4954. [-0.0050677 0.02272661 0.01807955 ... 0.02094838 -0.01449073
  4955. 0.00967227]
  4956. ...
  4957. [-0.02633222 0.05768563 0.01628287 ... 0.01494229 -0.00576268
  4958. 0.04385137]
  4959. [-0.03326959 0.18588606 -0.02219752 ... 0.04430138 -0.00146678
  4960. 0.02707056]
  4961. [-0.00715612 -0.00657868 -0.1097618 ... -0.09874186 0.08591412
  4962. -0.00940266]]
  4963. [[-0.03609059 -0.07579008 0.01501241 ... -0.00192132 -0.01605884
  4964. 0.00820769]
  4965. [-0.00521284 -0.03044078 0.01835438 ... -0.00124992 -0.01034387
  4966. 0.00627648]
  4967. [ 0.02380823 0.16997567 -0.04292417 ... 0.01702267 0.04020633
  4968. -0.03895961]
  4969. ...
  4970. [-0.00115029 -0.02174992 0.00398472 ... -0.00293407 -0.00470167
  4971. 0.00579625]
  4972. [-0.00415053 -0.03030144 0.02518198 ... -0.00043284 -0.01240898
  4973. 0.00634339]
  4974. [ 0.00861687 -0.01112236 -0.03039089 ... -0.0086233 0.00705496
  4975. 0.00750165]]
  4976. [[ 0.00614664 -0.0130218 -0.06092443 ... -0.05605926 -0.06379459
  4977. 0.01912304]
  4978. [ 0.01061938 -0.00787821 -0.02997785 ... -0.03494437 -0.04587585
  4979. 0.0114265 ]
  4980. [-0.04273462 0.08807574 0.18954192 ... 0.19141163 0.05976404
  4981. -0.01481192]
  4982. ...
  4983. [ 0.0059959 -0.01474382 -0.02677064 ... -0.02669825 0.00146605
  4984. -0.00064257]
  4985. [ 0.01313106 -0.00431879 -0.02868656 ... -0.03682107 -0.06574361
  4986. 0.01620815]
  4987. [-0.00286384 -0.03923097 -0.0322479 ... -0.01919733 0.12107915
  4988. -0.03120241]]]]
  4989. Layer 1, Token 1 (recurrent cache comparison):
  4990. Original tensor sum: 5.922648
  4991. Converted tensor sum: 5.922640
  4992. Original tensor mean: 0.001851
  4993. Converted tensor mean: 0.001851
  4994. Mean difference: 0.00000005
  4995. Maximum pointwise difference: 0.00000155
  4996. Max difference location: (0, 24, 4, 5)
  4997. Values at max diff - Original: -0.26876855, Converted: -0.26877010
  4998. Biggest difference in row (0, 14, 3), sum -0.918731 vs -0.918733
  4999. Layer 2, Token 1 (recurrent cache comparison):
  5000. Original tensor sum: 12.229185
  5001. Converted tensor sum: 12.229182
  5002. Original tensor mean: 0.003822
  5003. Converted tensor mean: 0.003822
  5004. Mean difference: 0.00000009
  5005. Maximum pointwise difference: 0.00000620
  5006. Max difference location: (0, 3, 6, 0)
  5007. Values at max diff - Original: 2.35518169, Converted: 2.35517550
  5008. Biggest difference in row (0, 3, 6), sum 3.961787 vs 3.961781
  5009. Layer 4, Token 1 (recurrent cache comparison):
  5010. Original tensor sum: 4.260600
  5011. Converted tensor sum: 4.260149
  5012. Original tensor mean: 0.001331
  5013. Converted tensor mean: 0.001331
  5014. Mean difference: 0.00000526
  5015. Maximum pointwise difference: 0.00011003
  5016. Max difference location: (0, 25, 2, 4)
  5017. Values at max diff - Original: 0.21691340, Converted: 0.21702343
  5018. Biggest difference in row (0, 3, 1), sum -0.358275 vs -0.358136
  5019. Layer 5, Token 1 (recurrent cache comparison):
  5020. Original tensor sum: 12.744413
  5021. Converted tensor sum: 12.744514
  5022. Original tensor mean: 0.003983
  5023. Converted tensor mean: 0.003983
  5024. Mean difference: 0.00000413
  5025. Maximum pointwise difference: 0.00011247
  5026. Max difference location: (0, 5, 2, 8)
  5027. Values at max diff - Original: 0.86490124, Converted: 0.86478877
  5028. Biggest difference in row (0, 5, 2), sum -0.456235 vs -0.456385
  5029. Layer 6, Token 1 (recurrent cache comparison):
  5030. Original tensor sum: -14.490761
  5031. Converted tensor sum: -14.493523
  5032. Original tensor mean: -0.004528
  5033. Converted tensor mean: -0.004529
  5034. Mean difference: 0.00002331
  5035. Maximum pointwise difference: 0.00149512
  5036. Max difference location: (0, 28, 9, 8)
  5037. Values at max diff - Original: 2.97030377, Converted: 2.96880865
  5038. Biggest difference in row (0, 8, 5), sum 5.080033 vs 5.077976
  5039. Layer 8, Token 1 (recurrent cache comparison):
  5040. Original tensor sum: -18.806082
  5041. Converted tensor sum: -18.808296
  5042. Original tensor mean: -0.005877
  5043. Converted tensor mean: -0.005878
  5044. Mean difference: 0.00002112
  5045. Maximum pointwise difference: 0.00074953
  5046. Max difference location: (0, 20, 1, 8)
  5047. Values at max diff - Original: 0.62514198, Converted: 0.62439245
  5048. Biggest difference in row (0, 25, 6), sum 1.048032 vs 1.047222
  5049. Layer 9, Token 1 (recurrent cache comparison):
  5050. Original tensor sum: 16.764290
  5051. Converted tensor sum: 16.760258
  5052. Original tensor mean: 0.005239
  5053. Converted tensor mean: 0.005238
  5054. Mean difference: 0.00002129
  5055. Maximum pointwise difference: 0.00044209
  5056. Max difference location: (0, 21, 5, 8)
  5057. Values at max diff - Original: 0.85285813, Converted: 0.85241604
  5058. Biggest difference in row (0, 0, 1), sum -0.046629 vs -0.046069
  5059. Layer 10, Token 1 (recurrent cache comparison):
  5060. Original tensor sum: 13.242327
  5061. Converted tensor sum: 13.242817
  5062. Original tensor mean: 0.004138
  5063. Converted tensor mean: 0.004138
  5064. Mean difference: 0.00002325
  5065. Maximum pointwise difference: 0.00070238
  5066. Max difference location: (0, 18, 5, 1)
  5067. Values at max diff - Original: 0.48423475, Converted: 0.48353237
  5068. Biggest difference in row (0, 10, 0), sum -0.502937 vs -0.502024
  5069. Layer 12, Token 1 (recurrent cache comparison):
  5070. Original tensor sum: 14.374599
  5071. Converted tensor sum: 14.372844
  5072. Original tensor mean: 0.004492
  5073. Converted tensor mean: 0.004492
  5074. Mean difference: 0.00002070
  5075. Maximum pointwise difference: 0.00084567
  5076. Max difference location: (0, 0, 3, 1)
  5077. Values at max diff - Original: 1.31967652, Converted: 1.31883085
  5078. Biggest difference in row (0, 0, 5), sum -0.676982 vs -0.676066
  5079. Layer 13, Token 1 (recurrent cache comparison):
  5080. Original tensor sum: 28.120127
  5081. Converted tensor sum: 28.128502
  5082. Original tensor mean: 0.008788
  5083. Converted tensor mean: 0.008790
  5084. Mean difference: 0.00001703
  5085. Maximum pointwise difference: 0.00037390
  5086. Max difference location: (0, 4, 2, 1)
  5087. Values at max diff - Original: -0.33164161, Converted: -0.33126771
  5088. Biggest difference in row (0, 24, 1), sum -0.030439 vs -0.029779
  5089. Layer 14, Token 1 (recurrent cache comparison):
  5090. Original tensor sum: 27.012432
  5091. Converted tensor sum: 27.011541
  5092. Original tensor mean: 0.008441
  5093. Converted tensor mean: 0.008441
  5094. Mean difference: 0.00002248
  5095. Maximum pointwise difference: 0.00121775
  5096. Max difference location: (0, 18, 0, 1)
  5097. Values at max diff - Original: 0.37722895, Converted: 0.37844670
  5098. Biggest difference in row (0, 28, 1), sum -0.493242 vs -0.492468
  5099. Layer 0, Token 2 (recurrent cache comparison):
  5100. Original tensor sum: 4.531467
  5101. Converted tensor sum: 4.531466
  5102. Original tensor mean: 0.001416
  5103. Converted tensor mean: 0.001416
  5104. Mean difference: 0.08359446
  5105. Maximum pointwise difference: 1.77142978
  5106. Max difference location: (0, 1, 3, 5)
  5107. Values at max diff - Original: -0.02699410, Converted: 1.74443567
  5108. Biggest difference in row (0, 25, 2), sum -0.057628 vs -2.844908
  5109. Layer 1, Token 2 (recurrent cache comparison):
  5110. Original tensor sum: 11.008316
  5111. Converted tensor sum: 11.008326
  5112. Original tensor mean: 0.003440
  5113. Converted tensor mean: 0.003440
  5114. Mean difference: 0.06277661
  5115. Maximum pointwise difference: 0.71243107
  5116. Max difference location: (0, 10, 0, 2)
  5117. Values at max diff - Original: 0.01163737, Converted: 0.72406846
  5118. Biggest difference in row (0, 12, 3), sum 0.228652 vs -1.768667
  5119. Layer 2, Token 2 (recurrent cache comparison):
  5120. Original tensor sum: 17.248280
  5121. Converted tensor sum: 17.248241
  5122. Original tensor mean: 0.005390
  5123. Converted tensor mean: 0.005390
  5124. Mean difference: 0.08558470
  5125. Maximum pointwise difference: 1.97508693
  5126. Max difference location: (0, 10, 7, 3)
  5127. Values at max diff - Original: 1.98190892, Converted: 0.00682194
  5128. Biggest difference in row (0, 27, 7), sum -0.594255 vs 3.191915
  5129. Layer 4, Token 2 (recurrent cache comparison):
  5130. Original tensor sum: 7.984356
  5131. Converted tensor sum: 7.983810
  5132. Original tensor mean: 0.002495
  5133. Converted tensor mean: 0.002495
  5134. Mean difference: 0.07671142
  5135. Maximum pointwise difference: 1.85330796
  5136. Max difference location: (0, 20, 4, 6)
  5137. Values at max diff - Original: 0.01886898, Converted: 1.87217689
  5138. Biggest difference in row (0, 20, 6), sum 2.845701 vs -0.305152
  5139. Layer 5, Token 2 (recurrent cache comparison):
  5140. Original tensor sum: 9.205366
  5141. Converted tensor sum: 9.205467
  5142. Original tensor mean: 0.002877
  5143. Converted tensor mean: 0.002877
  5144. Mean difference: 0.06804129
  5145. Maximum pointwise difference: 1.41803539
  5146. Max difference location: (0, 31, 6, 3)
  5147. Values at max diff - Original: 1.40662789, Converted: -0.01140754
  5148. Biggest difference in row (0, 24, 8), sum -0.372748 vs -2.656956
  5149. Layer 6, Token 2 (recurrent cache comparison):
  5150. Original tensor sum: -7.876884
  5151. Converted tensor sum: -7.873561
  5152. Original tensor mean: -0.002462
  5153. Converted tensor mean: -0.002460
  5154. Mean difference: 0.10029175
  5155. Maximum pointwise difference: 2.66715860
  5156. Max difference location: (0, 28, 9, 8)
  5157. Values at max diff - Original: 2.59401202, Converted: -0.07314663
  5158. Biggest difference in row (0, 19, 4), sum 0.710449 vs -5.203167
  5159. Layer 8, Token 2 (recurrent cache comparison):
  5160. Original tensor sum: -13.154655
  5161. Converted tensor sum: -13.156775
  5162. Original tensor mean: -0.004111
  5163. Converted tensor mean: -0.004111
  5164. Mean difference: 0.08601540
  5165. Maximum pointwise difference: 2.83156943
  5166. Max difference location: (0, 12, 4, 7)
  5167. Values at max diff - Original: 2.83582592, Converted: 0.00425647
  5168. Biggest difference in row (0, 30, 3), sum -0.848019 vs -4.754877
  5169. Layer 9, Token 2 (recurrent cache comparison):
  5170. Original tensor sum: 13.187357
  5171. Converted tensor sum: 13.181618
  5172. Original tensor mean: 0.004121
  5173. Converted tensor mean: 0.004119
  5174. Mean difference: 0.05544823
  5175. Maximum pointwise difference: 0.65544760
  5176. Max difference location: (0, 21, 5, 8)
  5177. Values at max diff - Original: 0.71338689, Converted: 0.05793926
  5178. Biggest difference in row (0, 19, 9), sum -0.054951 vs -1.900613
  5179. Layer 10, Token 2 (recurrent cache comparison):
  5180. Original tensor sum: 10.860550
  5181. Converted tensor sum: 10.860478
  5182. Original tensor mean: 0.003394
  5183. Converted tensor mean: 0.003394
  5184. Mean difference: 0.05739149
  5185. Maximum pointwise difference: 1.22302496
  5186. Max difference location: (0, 30, 4, 5)
  5187. Values at max diff - Original: -0.01519475, Converted: 1.20783019
  5188. Biggest difference in row (0, 23, 3), sum -0.221712 vs -3.841269
  5189. Layer 12, Token 2 (recurrent cache comparison):
  5190. Original tensor sum: 3.134315
  5191. Converted tensor sum: 3.132089
  5192. Original tensor mean: 0.000979
  5193. Converted tensor mean: 0.000979
  5194. Mean difference: 0.07305207
  5195. Maximum pointwise difference: 2.30649829
  5196. Max difference location: (0, 5, 4, 5)
  5197. Values at max diff - Original: 2.33141446, Converted: 0.02491626
  5198. Biggest difference in row (0, 0, 1), sum -1.541565 vs -6.179572
  5199. Layer 13, Token 2 (recurrent cache comparison):
  5200. Original tensor sum: 18.773312
  5201. Converted tensor sum: 18.779602
  5202. Original tensor mean: 0.005867
  5203. Converted tensor mean: 0.005869
  5204. Mean difference: 0.04688552
  5205. Maximum pointwise difference: 0.60163057
  5206. Max difference location: (0, 6, 1, 7)
  5207. Values at max diff - Original: 0.04654653, Converted: 0.64817709
  5208. Biggest difference in row (0, 4, 1), sum -0.566141 vs -2.623919
  5209. Layer 14, Token 2 (recurrent cache comparison):
  5210. Original tensor sum: 13.960938
  5211. Converted tensor sum: 13.964265
  5212. Original tensor mean: 0.004363
  5213. Converted tensor mean: 0.004364
  5214. Mean difference: 0.06759205
  5215. Maximum pointwise difference: 1.25844812
  5216. Max difference location: (0, 15, 8, 4)
  5217. Values at max diff - Original: 1.26228178, Converted: 0.00383368
  5218. Biggest difference in row (0, 31, 3), sum -0.096068 vs -5.326997
  5219. Layer 0, Token 3 (recurrent cache comparison):
  5220. Original tensor sum: 0.684784
  5221. Converted tensor sum: 0.422194
  5222. Original tensor mean: 0.000214
  5223. Converted tensor mean: 0.000132
  5224. Mean difference: 0.06314481
  5225. Maximum pointwise difference: 1.39332521
  5226. Max difference location: (0, 28, 5, 9)
  5227. Values at max diff - Original: -0.03651731, Converted: 1.35680795
  5228. Biggest difference in row (0, 4, 9), sum 2.498745 vs 0.335116
  5229. Layer 1, Token 3 (recurrent cache comparison):
  5230. Original tensor sum: 3.526195
  5231. Converted tensor sum: 7.632782
  5232. Original tensor mean: 0.001102
  5233. Converted tensor mean: 0.002385
  5234. Mean difference: 0.04427468
  5235. Maximum pointwise difference: 0.98676205
  5236. Max difference location: (0, 12, 3, 7)
  5237. Values at max diff - Original: 0.92044085, Converted: -0.06632122
  5238. Biggest difference in row (0, 24, 2), sum 0.609889 vs -0.814516
  5239. Layer 2, Token 3 (recurrent cache comparison):
  5240. Original tensor sum: 15.850447
  5241. Converted tensor sum: 14.785593
  5242. Original tensor mean: 0.004953
  5243. Converted tensor mean: 0.004620
  5244. Mean difference: 0.06092339
  5245. Maximum pointwise difference: 2.43390632
  5246. Max difference location: (0, 1, 0, 4)
  5247. Values at max diff - Original: 2.80371213, Converted: 0.36980587
  5248. Biggest difference in row (0, 1, 0), sum 4.370481 vs 0.423526
  5249. Layer 4, Token 3 (recurrent cache comparison):
  5250. Original tensor sum: 19.856752
  5251. Converted tensor sum: 11.778177
  5252. Original tensor mean: 0.006205
  5253. Converted tensor mean: 0.003681
  5254. Mean difference: 0.07194611
  5255. Maximum pointwise difference: 2.48742008
  5256. Max difference location: (0, 14, 3, 9)
  5257. Values at max diff - Original: 2.47506452, Converted: -0.01235563
  5258. Biggest difference in row (0, 19, 2), sum 0.372920 vs -2.973422
  5259. Layer 5, Token 3 (recurrent cache comparison):
  5260. Original tensor sum: 9.792118
  5261. Converted tensor sum: 9.138845
  5262. Original tensor mean: 0.003060
  5263. Converted tensor mean: 0.002856
  5264. Mean difference: 0.05089124
  5265. Maximum pointwise difference: 1.42593253
  5266. Max difference location: (0, 29, 0, 8)
  5267. Values at max diff - Original: 1.42089915, Converted: -0.00503340
  5268. Biggest difference in row (0, 29, 0), sum 2.198264 vs 0.182178
  5269. Layer 6, Token 3 (recurrent cache comparison):
  5270. Original tensor sum: 39.415325
  5271. Converted tensor sum: 64.451355
  5272. Original tensor mean: 0.012317
  5273. Converted tensor mean: 0.020141
  5274. Mean difference: 0.08079723
  5275. Maximum pointwise difference: 4.89647627
  5276. Max difference location: (0, 15, 3, 6)
  5277. Values at max diff - Original: -0.18732879, Converted: 4.70914745
  5278. Biggest difference in row (0, 6, 0), sum 0.836966 vs 8.447909
  5279. Layer 8, Token 3 (recurrent cache comparison):
  5280. Original tensor sum: 23.689789
  5281. Converted tensor sum: 12.321936
  5282. Original tensor mean: 0.007403
  5283. Converted tensor mean: 0.003851
  5284. Mean difference: 0.08749782
  5285. Maximum pointwise difference: 3.85876298
  5286. Max difference location: (0, 6, 4, 8)
  5287. Values at max diff - Original: 0.01438628, Converted: 3.87314916
  5288. Biggest difference in row (0, 6, 4), sum 0.119168 vs 5.376393
  5289. Layer 9, Token 3 (recurrent cache comparison):
  5290. Original tensor sum: 8.901470
  5291. Converted tensor sum: 4.339914
  5292. Original tensor mean: 0.002782
  5293. Converted tensor mean: 0.001356
  5294. Mean difference: 0.06287189
  5295. Maximum pointwise difference: 1.40262556
  5296. Max difference location: (0, 4, 0, 5)
  5297. Values at max diff - Original: -0.00241524, Converted: 1.40021038
  5298. Biggest difference in row (0, 18, 1), sum 1.268483 vs -0.696936
  5299. Layer 10, Token 3 (recurrent cache comparison):
  5300. Original tensor sum: 18.375820
  5301. Converted tensor sum: 3.348410
  5302. Original tensor mean: 0.005742
  5303. Converted tensor mean: 0.001046
  5304. Mean difference: 0.06042652
  5305. Maximum pointwise difference: 2.94567752
  5306. Max difference location: (0, 3, 8, 7)
  5307. Values at max diff - Original: -0.26693973, Converted: 2.67873788
  5308. Biggest difference in row (0, 3, 8), sum 0.004062 vs 2.562259
  5309. Layer 12, Token 3 (recurrent cache comparison):
  5310. Original tensor sum: 15.322770
  5311. Converted tensor sum: 2.674777
  5312. Original tensor mean: 0.004788
  5313. Converted tensor mean: 0.000836
  5314. Mean difference: 0.07379209
  5315. Maximum pointwise difference: 2.73401403
  5316. Max difference location: (0, 30, 4, 0)
  5317. Values at max diff - Original: -0.02140745, Converted: 2.71260667
  5318. Biggest difference in row (0, 7, 6), sum -0.113145 vs 2.612027
  5319. Layer 13, Token 3 (recurrent cache comparison):
  5320. Original tensor sum: 14.910538
  5321. Converted tensor sum: 8.724025
  5322. Original tensor mean: 0.004660
  5323. Converted tensor mean: 0.002726
  5324. Mean difference: 0.05616682
  5325. Maximum pointwise difference: 1.43021226
  5326. Max difference location: (0, 26, 5, 0)
  5327. Values at max diff - Original: -1.41802061, Converted: 0.01219167
  5328. Biggest difference in row (0, 3, 9), sum 0.002563 vs -3.030253
  5329. Layer 14, Token 3 (recurrent cache comparison):
  5330. Original tensor sum: 59.583878
  5331. Converted tensor sum: -2.192444
  5332. Original tensor mean: 0.018620
  5333. Converted tensor mean: -0.000685
  5334. Mean difference: 0.10199536
  5335. Maximum pointwise difference: 2.77383018
  5336. Max difference location: (0, 2, 0, 2)
  5337. Values at max diff - Original: 2.72142744, Converted: -0.05240267
  5338. Biggest difference in row (0, 16, 6), sum 0.145663 vs -8.295967
  5339. Layer 0, Token 4 (recurrent cache comparison):
  5340. Original tensor sum: 7.899137
  5341. Converted tensor sum: 4.783788
  5342. Original tensor mean: 0.002468
  5343. Converted tensor mean: 0.001495
  5344. Mean difference: 0.06620996
  5345. Maximum pointwise difference: 1.03156960
  5346. Max difference location: (0, 1, 3, 7)
  5347. Values at max diff - Original: -0.00703356, Converted: -1.03860319
  5348. Biggest difference in row (0, 21, 4), sum 0.038056 vs -1.875101
  5349. Layer 1, Token 4 (recurrent cache comparison):
  5350. Original tensor sum: 11.224692
  5351. Converted tensor sum: 15.232712
  5352. Original tensor mean: 0.003508
  5353. Converted tensor mean: 0.004760
  5354. Mean difference: 0.06535107
  5355. Maximum pointwise difference: 1.53891993
  5356. Max difference location: (0, 28, 3, 7)
  5357. Values at max diff - Original: 0.04949531, Converted: 1.58841527
  5358. Biggest difference in row (0, 28, 3), sum 0.880954 vs 3.297761
  5359. Layer 2, Token 4 (recurrent cache comparison):
  5360. Original tensor sum: 15.875578
  5361. Converted tensor sum: 5.908407
  5362. Original tensor mean: 0.004961
  5363. Converted tensor mean: 0.001846
  5364. Mean difference: 0.09298474
  5365. Maximum pointwise difference: 2.68871808
  5366. Max difference location: (0, 14, 3, 7)
  5367. Values at max diff - Original: 2.67706752, Converted: -0.01165051
  5368. Biggest difference in row (0, 27, 2), sum 3.910276 vs 0.204061
  5369. Layer 4, Token 4 (recurrent cache comparison):
  5370. Original tensor sum: 34.602001
  5371. Converted tensor sum: 14.365917
  5372. Original tensor mean: 0.010813
  5373. Converted tensor mean: 0.004489
  5374. Mean difference: 0.10193390
  5375. Maximum pointwise difference: 3.22817802
  5376. Max difference location: (0, 26, 6, 5)
  5377. Values at max diff - Original: -0.04091755, Converted: 3.18726039
  5378. Biggest difference in row (0, 26, 6), sum 0.735282 vs 4.516615
  5379. Layer 5, Token 4 (recurrent cache comparison):
  5380. Original tensor sum: 24.322514
  5381. Converted tensor sum: 18.418108
  5382. Original tensor mean: 0.007601
  5383. Converted tensor mean: 0.005756
  5384. Mean difference: 0.08364967
  5385. Maximum pointwise difference: 2.23648024
  5386. Max difference location: (0, 22, 6, 1)
  5387. Values at max diff - Original: 2.19508362, Converted: -0.04139667
  5388. Biggest difference in row (0, 3, 0), sum 3.424673 vs -0.163761
  5389. Layer 6, Token 4 (recurrent cache comparison):
  5390. Original tensor sum: 34.762104
  5391. Converted tensor sum: 77.105461
  5392. Original tensor mean: 0.010863
  5393. Converted tensor mean: 0.024095
  5394. Mean difference: 0.12376648
  5395. Maximum pointwise difference: 3.91498804
  5396. Max difference location: (0, 12, 5, 4)
  5397. Values at max diff - Original: -0.17797241, Converted: 3.73701572
  5398. Biggest difference in row (0, 10, 4), sum -0.207436 vs 6.811815
  5399. Layer 8, Token 4 (recurrent cache comparison):
  5400. Original tensor sum: 52.858780
  5401. Converted tensor sum: 5.570855
  5402. Original tensor mean: 0.016518
  5403. Converted tensor mean: 0.001741
  5404. Mean difference: 0.12005786
  5405. Maximum pointwise difference: 5.32569838
  5406. Max difference location: (0, 12, 3, 5)
  5407. Values at max diff - Original: 5.34859705, Converted: 0.02289869
  5408. Biggest difference in row (0, 20, 0), sum 8.008233 vs -0.003253
  5409. Layer 9, Token 4 (recurrent cache comparison):
  5410. Original tensor sum: 20.435345
  5411. Converted tensor sum: -2.045311
  5412. Original tensor mean: 0.006386
  5413. Converted tensor mean: -0.000639
  5414. Mean difference: 0.08372314
  5415. Maximum pointwise difference: 2.78602862
  5416. Max difference location: (0, 28, 2, 0)
  5417. Values at max diff - Original: 2.71785426, Converted: -0.06817436
  5418. Biggest difference in row (0, 28, 2), sum 4.726543 vs 1.302800
  5419. Layer 10, Token 4 (recurrent cache comparison):
  5420. Original tensor sum: 28.353613
  5421. Converted tensor sum: 12.385429
  5422. Original tensor mean: 0.008861
  5423. Converted tensor mean: 0.003870
  5424. Mean difference: 0.09276734
  5425. Maximum pointwise difference: 2.28980851
  5426. Max difference location: (0, 2, 9, 5)
  5427. Values at max diff - Original: -0.00412231, Converted: 2.28568625
  5428. Biggest difference in row (0, 13, 8), sum 3.624647 vs 0.020094
  5429. Layer 12, Token 4 (recurrent cache comparison):
  5430. Original tensor sum: 70.502647
  5431. Converted tensor sum: -11.005323
  5432. Original tensor mean: 0.022032
  5433. Converted tensor mean: -0.003439
  5434. Mean difference: 0.13381547
  5435. Maximum pointwise difference: 3.57928109
  5436. Max difference location: (0, 30, 0, 4)
  5437. Values at max diff - Original: 3.95710707, Converted: 0.37782601
  5438. Biggest difference in row (0, 21, 9), sum -1.532540 vs -12.302475
  5439. Layer 13, Token 4 (recurrent cache comparison):
  5440. Original tensor sum: 38.753532
  5441. Converted tensor sum: 5.437235
  5442. Original tensor mean: 0.012110
  5443. Converted tensor mean: 0.001699
  5444. Mean difference: 0.08178755
  5445. Maximum pointwise difference: 2.55715966
  5446. Max difference location: (0, 3, 4, 9)
  5447. Values at max diff - Original: 2.84962225, Converted: 0.29246253
  5448. Biggest difference in row (0, 3, 4), sum 4.266754 vs 0.847269
  5449. Layer 14, Token 4 (recurrent cache comparison):
  5450. Original tensor sum: 141.714035
  5451. Converted tensor sum: 3.640444
  5452. Original tensor mean: 0.044286
  5453. Converted tensor mean: 0.001138
  5454. Mean difference: 0.14463389
  5455. Maximum pointwise difference: 5.68939066
  5456. Max difference location: (0, 16, 7, 6)
  5457. Values at max diff - Original: 5.55827475, Converted: -0.13111581
  5458. Biggest difference in row (0, 28, 1), sum 11.271111 vs 0.609705
  5459. Layer 0, Token 5 (recurrent cache comparison):
  5460. Original tensor sum: 9.131315
  5461. Converted tensor sum: 12.396471
  5462. Original tensor mean: 0.002854
  5463. Converted tensor mean: 0.003874
  5464. Mean difference: 0.05539500
  5465. Maximum pointwise difference: 1.09641600
  5466. Max difference location: (0, 28, 9, 5)
  5467. Values at max diff - Original: 1.12920201, Converted: 0.03278603
  5468. Biggest difference in row (0, 4, 9), sum 1.258661 vs 0.175881
  5469. Layer 1, Token 5 (recurrent cache comparison):
  5470. Original tensor sum: 24.366199
  5471. Converted tensor sum: 10.052802
  5472. Original tensor mean: 0.007614
  5473. Converted tensor mean: 0.003142
  5474. Mean difference: 0.05824861
  5475. Maximum pointwise difference: 2.14620328
  5476. Max difference location: (0, 14, 2, 5)
  5477. Values at max diff - Original: 0.00643282, Converted: 2.15263605
  5478. Biggest difference in row (0, 6, 4), sum 1.356658 vs -0.156208
  5479. Layer 2, Token 5 (recurrent cache comparison):
  5480. Original tensor sum: 50.376324
  5481. Converted tensor sum: 20.166676
  5482. Original tensor mean: 0.015743
  5483. Converted tensor mean: 0.006302
  5484. Mean difference: 0.07966200
  5485. Maximum pointwise difference: 2.04463291
  5486. Max difference location: (0, 27, 4, 2)
  5487. Values at max diff - Original: 2.00972342, Converted: -0.03490951
  5488. Biggest difference in row (0, 27, 2), sum 5.745794 vs 1.959190
  5489. Layer 4, Token 5 (recurrent cache comparison):
  5490. Original tensor sum: 44.478531
  5491. Converted tensor sum: 48.696777
  5492. Original tensor mean: 0.013900
  5493. Converted tensor mean: 0.015218
  5494. Mean difference: 0.09315307
  5495. Maximum pointwise difference: 2.43060613
  5496. Max difference location: (0, 26, 5, 6)
  5497. Values at max diff - Original: 0.46136302, Converted: 2.89196920
  5498. Biggest difference in row (0, 8, 6), sum 0.054414 vs 4.076869
  5499. Layer 5, Token 5 (recurrent cache comparison):
  5500. Original tensor sum: 57.863758
  5501. Converted tensor sum: 66.390915
  5502. Original tensor mean: 0.018082
  5503. Converted tensor mean: 0.020747
  5504. Mean difference: 0.10497291
  5505. Maximum pointwise difference: 2.49356651
  5506. Max difference location: (0, 17, 3, 6)
  5507. Values at max diff - Original: 2.50974846, Converted: 0.01618202
  5508. Biggest difference in row (0, 28, 9), sum 3.771637 vs 0.053981
  5509. Layer 6, Token 5 (recurrent cache comparison):
  5510. Original tensor sum: 39.502037
  5511. Converted tensor sum: 161.817169
  5512. Original tensor mean: 0.012344
  5513. Converted tensor mean: 0.050568
  5514. Mean difference: 0.14194940
  5515. Maximum pointwise difference: 3.58584666
  5516. Max difference location: (0, 26, 3, 9)
  5517. Values at max diff - Original: 3.40417242, Converted: -0.18167432
  5518. Biggest difference in row (0, 12, 4), sum 1.168972 vs 7.813907
  5519. Layer 8, Token 5 (recurrent cache comparison):
  5520. Original tensor sum: 44.896149
  5521. Converted tensor sum: 38.246201
  5522. Original tensor mean: 0.014030
  5523. Converted tensor mean: 0.011952
  5524. Mean difference: 0.10806250
  5525. Maximum pointwise difference: 2.33007479
  5526. Max difference location: (0, 1, 6, 0)
  5527. Values at max diff - Original: 2.35027504, Converted: 0.02020025
  5528. Biggest difference in row (0, 1, 6), sum 5.246045 vs 0.247956
  5529. Layer 9, Token 5 (recurrent cache comparison):
  5530. Original tensor sum: 20.569098
  5531. Converted tensor sum: 11.688971
  5532. Original tensor mean: 0.006428
  5533. Converted tensor mean: 0.003653
  5534. Mean difference: 0.08318320
  5535. Maximum pointwise difference: 1.79917610
  5536. Max difference location: (0, 28, 0, 3)
  5537. Values at max diff - Original: 1.69918346, Converted: -0.09999267
  5538. Biggest difference in row (0, 3, 4), sum 3.283048 vs 0.225886
  5539. Layer 10, Token 5 (recurrent cache comparison):
  5540. Original tensor sum: 42.493145
  5541. Converted tensor sum: 26.750286
  5542. Original tensor mean: 0.013279
  5543. Converted tensor mean: 0.008359
  5544. Mean difference: 0.09709122
  5545. Maximum pointwise difference: 2.97919798
  5546. Max difference location: (0, 10, 0, 3)
  5547. Values at max diff - Original: 3.34914303, Converted: 0.36994517
  5548. Biggest difference in row (0, 10, 0), sum 5.613201 vs -0.079588
  5549. Layer 12, Token 5 (recurrent cache comparison):
  5550. Original tensor sum: 91.460236
  5551. Converted tensor sum: 14.637827
  5552. Original tensor mean: 0.028581
  5553. Converted tensor mean: 0.004574
  5554. Mean difference: 0.12184902
  5555. Maximum pointwise difference: 4.17300320
  5556. Max difference location: (0, 23, 2, 9)
  5557. Values at max diff - Original: 3.98550677, Converted: -0.18749636
  5558. Biggest difference in row (0, 28, 5), sum 5.243108 vs -0.797499
  5559. Layer 13, Token 5 (recurrent cache comparison):
  5560. Original tensor sum: 50.306297
  5561. Converted tensor sum: 16.367235
  5562. Original tensor mean: 0.015721
  5563. Converted tensor mean: 0.005115
  5564. Mean difference: 0.08688851
  5565. Maximum pointwise difference: 2.08200264
  5566. Max difference location: (0, 19, 9, 3)
  5567. Values at max diff - Original: -1.59057343, Converted: 0.49142930
  5568. Biggest difference in row (0, 19, 5), sum 3.595970 vs 0.049368
  5569. Layer 14, Token 5 (recurrent cache comparison):
  5570. Original tensor sum: 120.273888
  5571. Converted tensor sum: 44.449192
  5572. Original tensor mean: 0.037586
  5573. Converted tensor mean: 0.013890
  5574. Mean difference: 0.13929905
  5575. Maximum pointwise difference: 4.73129654
  5576. Max difference location: (0, 18, 5, 9)
  5577. Values at max diff - Original: 4.35292673, Converted: -0.37836996
  5578. Biggest difference in row (0, 18, 5), sum 8.950241 vs -0.746074
  5579. Layer 0, Token 6 (recurrent cache comparison):
  5580. Original tensor sum: 11.608546
  5581. Converted tensor sum: 10.627696
  5582. Original tensor mean: 0.003628
  5583. Converted tensor mean: 0.003321
  5584. Mean difference: 0.05484011
  5585. Maximum pointwise difference: 1.12371099
  5586. Max difference location: (0, 1, 2, 3)
  5587. Values at max diff - Original: 1.11502755, Converted: -0.00868344
  5588. Biggest difference in row (0, 28, 5), sum 0.118289 vs 2.332705
  5589. Layer 1, Token 6 (recurrent cache comparison):
  5590. Original tensor sum: 92.219727
  5591. Converted tensor sum: 28.768579
  5592. Original tensor mean: 0.028819
  5593. Converted tensor mean: 0.008990
  5594. Mean difference: 0.08724788
  5595. Maximum pointwise difference: 1.51144505
  5596. Max difference location: (0, 23, 0, 4)
  5597. Values at max diff - Original: 1.55765891, Converted: 0.04621384
  5598. Biggest difference in row (0, 14, 0), sum 2.954077 vs -0.012181
  5599. Layer 2, Token 6 (recurrent cache comparison):
  5600. Original tensor sum: 101.609215
  5601. Converted tensor sum: 93.242142
  5602. Original tensor mean: 0.031753
  5603. Converted tensor mean: 0.029138
  5604. Mean difference: 0.12457406
  5605. Maximum pointwise difference: 2.07845497
  5606. Max difference location: (0, 13, 1, 9)
  5607. Values at max diff - Original: 2.17026591, Converted: 0.09181103
  5608. Biggest difference in row (0, 5, 5), sum 4.805948 vs -0.569050
  5609. Layer 4, Token 6 (recurrent cache comparison):
  5610. Original tensor sum: 13.856092
  5611. Converted tensor sum: 22.610188
  5612. Original tensor mean: 0.004330
  5613. Converted tensor mean: 0.007066
  5614. Mean difference: 0.09440003
  5615. Maximum pointwise difference: 2.37087321
  5616. Max difference location: (0, 19, 2, 6)
  5617. Values at max diff - Original: -0.02839734, Converted: 2.34247589
  5618. Biggest difference in row (0, 28, 1), sum -0.280756 vs 2.458031
  5619. Layer 5, Token 6 (recurrent cache comparison):
  5620. Original tensor sum: 39.960052
  5621. Converted tensor sum: 41.437057
  5622. Original tensor mean: 0.012488
  5623. Converted tensor mean: 0.012949
  5624. Mean difference: 0.11209048
  5625. Maximum pointwise difference: 2.79378676
  5626. Max difference location: (0, 19, 8, 4)
  5627. Values at max diff - Original: 0.01245314, Converted: 2.80623984
  5628. Biggest difference in row (0, 13, 1), sum 6.005285 vs -0.085273
  5629. Layer 6, Token 6 (recurrent cache comparison):
  5630. Original tensor sum: -2.419616
  5631. Converted tensor sum: 156.977676
  5632. Original tensor mean: -0.000756
  5633. Converted tensor mean: 0.049056
  5634. Mean difference: 0.13894926
  5635. Maximum pointwise difference: 6.69993019
  5636. Max difference location: (0, 10, 3, 1)
  5637. Values at max diff - Original: -1.12109971, Converted: 5.57883024
  5638. Biggest difference in row (0, 12, 1), sum -0.201558 vs 10.382487
  5639. Layer 8, Token 6 (recurrent cache comparison):
  5640. Original tensor sum: 8.213539
  5641. Converted tensor sum: 18.368313
  5642. Original tensor mean: 0.002567
  5643. Converted tensor mean: 0.005740
  5644. Mean difference: 0.10382870
  5645. Maximum pointwise difference: 3.36055303
  5646. Max difference location: (0, 6, 4, 8)
  5647. Values at max diff - Original: 0.10355368, Converted: 3.46410680
  5648. Biggest difference in row (0, 6, 4), sum -0.613209 vs 4.409491
  5649. Layer 9, Token 6 (recurrent cache comparison):
  5650. Original tensor sum: 12.889297
  5651. Converted tensor sum: -0.411069
  5652. Original tensor mean: 0.004028
  5653. Converted tensor mean: -0.000128
  5654. Mean difference: 0.08612256
  5655. Maximum pointwise difference: 1.89322448
  5656. Max difference location: (0, 6, 4, 1)
  5657. Values at max diff - Original: -0.52327746, Converted: 1.36994708
  5658. Biggest difference in row (0, 21, 7), sum 0.245074 vs -2.764518
  5659. Layer 10, Token 6 (recurrent cache comparison):
  5660. Original tensor sum: 3.506564
  5661. Converted tensor sum: 11.408216
  5662. Original tensor mean: 0.001096
  5663. Converted tensor mean: 0.003565
  5664. Mean difference: 0.08594991
  5665. Maximum pointwise difference: 3.30037594
  5666. Max difference location: (0, 3, 8, 7)
  5667. Values at max diff - Original: -0.08371022, Converted: 3.21666574
  5668. Biggest difference in row (0, 0, 7), sum -0.426351 vs 3.218251
  5669. Layer 12, Token 6 (recurrent cache comparison):
  5670. Original tensor sum: 30.742065
  5671. Converted tensor sum: 1.932971
  5672. Original tensor mean: 0.009607
  5673. Converted tensor mean: 0.000604
  5674. Mean difference: 0.10983281
  5675. Maximum pointwise difference: 3.31334734
  5676. Max difference location: (0, 29, 5, 6)
  5677. Values at max diff - Original: 3.34788132, Converted: 0.03453401
  5678. Biggest difference in row (0, 29, 5), sum 6.176572 vs 0.072738
  5679. Layer 13, Token 6 (recurrent cache comparison):
  5680. Original tensor sum: 14.579787
  5681. Converted tensor sum: 9.630959
  5682. Original tensor mean: 0.004556
  5683. Converted tensor mean: 0.003010
  5684. Mean difference: 0.08181592
  5685. Maximum pointwise difference: 2.27647829
  5686. Max difference location: (0, 19, 1, 3)
  5687. Values at max diff - Original: 2.46903062, Converted: 0.19255245
  5688. Biggest difference in row (0, 19, 5), sum 2.241402 vs -0.017656
  5689. Layer 14, Token 6 (recurrent cache comparison):
  5690. Original tensor sum: 42.673443
  5691. Converted tensor sum: 13.958614
  5692. Original tensor mean: 0.013335
  5693. Converted tensor mean: 0.004362
  5694. Mean difference: 0.12478559
  5695. Maximum pointwise difference: 3.53676820
  5696. Max difference location: (0, 15, 8, 4)
  5697. Values at max diff - Original: 3.58003521, Converted: 0.04326708
  5698. Biggest difference in row (0, 16, 6), sum 0.056718 vs -5.064022
  5699. Layer 0, Token 7 (recurrent cache comparison):
  5700. Original tensor sum: 13.531075
  5701. Converted tensor sum: 7.895350
  5702. Original tensor mean: 0.004228
  5703. Converted tensor mean: 0.002467
  5704. Mean difference: 0.05525878
  5705. Maximum pointwise difference: 0.84158301
  5706. Max difference location: (0, 4, 1, 9)
  5707. Values at max diff - Original: -0.04387791, Converted: 0.79770511
  5708. Biggest difference in row (0, 11, 9), sum -0.221553 vs -1.606123
  5709. Layer 1, Token 7 (recurrent cache comparison):
  5710. Original tensor sum: 106.468651
  5711. Converted tensor sum: 29.931305
  5712. Original tensor mean: 0.033271
  5713. Converted tensor mean: 0.009354
  5714. Mean difference: 0.07464606
  5715. Maximum pointwise difference: 1.52088320
  5716. Max difference location: (0, 24, 0, 1)
  5717. Values at max diff - Original: 1.28372872, Converted: -0.23715444
  5718. Biggest difference in row (0, 31, 9), sum 2.350637 vs -0.270012
  5719. Layer 2, Token 7 (recurrent cache comparison):
  5720. Original tensor sum: 129.077255
  5721. Converted tensor sum: 124.290329
  5722. Original tensor mean: 0.040337
  5723. Converted tensor mean: 0.038841
  5724. Mean difference: 0.12615709
  5725. Maximum pointwise difference: 3.32020164
  5726. Max difference location: (0, 23, 3, 9)
  5727. Values at max diff - Original: 0.05276818, Converted: 3.37296987
  5728. Biggest difference in row (0, 5, 6), sum -1.585131 vs 2.877644
  5729. Layer 4, Token 7 (recurrent cache comparison):
  5730. Original tensor sum: 12.337616
  5731. Converted tensor sum: 29.998875
  5732. Original tensor mean: 0.003856
  5733. Converted tensor mean: 0.009375
  5734. Mean difference: 0.08588156
  5735. Maximum pointwise difference: 1.48782670
  5736. Max difference location: (0, 19, 6, 2)
  5737. Values at max diff - Original: -0.00142645, Converted: 1.48640025
  5738. Biggest difference in row (0, 8, 3), sum -0.318221 vs 2.809558
  5739. Layer 5, Token 7 (recurrent cache comparison):
  5740. Original tensor sum: 28.667000
  5741. Converted tensor sum: 37.180931
  5742. Original tensor mean: 0.008958
  5743. Converted tensor mean: 0.011619
  5744. Mean difference: 0.09552816
  5745. Maximum pointwise difference: 2.18750906
  5746. Max difference location: (0, 19, 4, 8)
  5747. Values at max diff - Original: 0.10599449, Converted: 2.29350352
  5748. Biggest difference in row (0, 28, 9), sum 2.464837 vs 0.175544
  5749. Layer 6, Token 7 (recurrent cache comparison):
  5750. Original tensor sum: -5.179218
  5751. Converted tensor sum: 165.798248
  5752. Original tensor mean: -0.001619
  5753. Converted tensor mean: 0.051812
  5754. Mean difference: 0.12655024
  5755. Maximum pointwise difference: 4.26992130
  5756. Max difference location: (0, 10, 1, 3)
  5757. Values at max diff - Original: -0.81827015, Converted: 3.45165110
  5758. Biggest difference in row (0, 12, 6), sum 2.458921 vs 9.472747
  5759. Layer 8, Token 7 (recurrent cache comparison):
  5760. Original tensor sum: 8.037577
  5761. Converted tensor sum: 36.050400
  5762. Original tensor mean: 0.002512
  5763. Converted tensor mean: 0.011266
  5764. Mean difference: 0.10181364
  5765. Maximum pointwise difference: 3.21224403
  5766. Max difference location: (0, 6, 8, 4)
  5767. Values at max diff - Original: 0.04581403, Converted: 3.25805807
  5768. Biggest difference in row (0, 6, 8), sum -0.710102 vs 2.858772
  5769. Layer 9, Token 7 (recurrent cache comparison):
  5770. Original tensor sum: 10.771255
  5771. Converted tensor sum: 9.047117
  5772. Original tensor mean: 0.003366
  5773. Converted tensor mean: 0.002827
  5774. Mean difference: 0.07432807
  5775. Maximum pointwise difference: 1.92723787
  5776. Max difference location: (0, 18, 5, 2)
  5777. Values at max diff - Original: 0.10259621, Converted: 2.02983403
  5778. Biggest difference in row (0, 14, 2), sum 0.009283 vs 3.088803
  5779. Layer 10, Token 7 (recurrent cache comparison):
  5780. Original tensor sum: 2.196672
  5781. Converted tensor sum: 31.273930
  5782. Original tensor mean: 0.000686
  5783. Converted tensor mean: 0.009773
  5784. Mean difference: 0.07749946
  5785. Maximum pointwise difference: 2.52166486
  5786. Max difference location: (0, 3, 7, 8)
  5787. Values at max diff - Original: 0.31132898, Converted: 2.83299375
  5788. Biggest difference in row (0, 20, 9), sum -0.957283 vs 1.748438
  5789. Layer 12, Token 7 (recurrent cache comparison):
  5790. Original tensor sum: 18.589321
  5791. Converted tensor sum: 5.047585
  5792. Original tensor mean: 0.005809
  5793. Converted tensor mean: 0.001577
  5794. Mean difference: 0.10475901
  5795. Maximum pointwise difference: 2.85224462
  5796. Max difference location: (0, 29, 5, 6)
  5797. Values at max diff - Original: 2.91423106, Converted: 0.06198643
  5798. Biggest difference in row (0, 29, 5), sum 5.378224 vs 0.028987
  5799. Layer 13, Token 7 (recurrent cache comparison):
  5800. Original tensor sum: 10.072084
  5801. Converted tensor sum: 22.447376
  5802. Original tensor mean: 0.003148
  5803. Converted tensor mean: 0.007015
  5804. Mean difference: 0.06809221
  5805. Maximum pointwise difference: 1.16759956
  5806. Max difference location: (0, 27, 3, 5)
  5807. Values at max diff - Original: -0.07106454, Converted: 1.09653497
  5808. Biggest difference in row (0, 27, 3), sum -0.724999 vs 1.414439
  5809. Layer 14, Token 7 (recurrent cache comparison):
  5810. Original tensor sum: 24.727911
  5811. Converted tensor sum: 26.743217
  5812. Original tensor mean: 0.007727
  5813. Converted tensor mean: 0.008357
  5814. Mean difference: 0.11743267
  5815. Maximum pointwise difference: 2.98747468
  5816. Max difference location: (0, 18, 5, 1)
  5817. Values at max diff - Original: 2.95096135, Converted: -0.03651327
  5818. Biggest difference in row (0, 28, 1), sum -0.138044 vs 7.456189
  5819. Layer 0, Token 8 (recurrent cache comparison):
  5820. Original tensor sum: 15.709320
  5821. Converted tensor sum: 12.209140
  5822. Original tensor mean: 0.004909
  5823. Converted tensor mean: 0.003815
  5824. Mean difference: 0.05364013
  5825. Maximum pointwise difference: 1.00742257
  5826. Max difference location: (0, 1, 3, 2)
  5827. Values at max diff - Original: 0.00399712, Converted: 1.01141965
  5828. Biggest difference in row (0, 28, 5), sum 0.102939 vs 1.531078
  5829. Layer 1, Token 8 (recurrent cache comparison):
  5830. Original tensor sum: 188.393356
  5831. Converted tensor sum: 69.447678
  5832. Original tensor mean: 0.058873
  5833. Converted tensor mean: 0.021702
  5834. Mean difference: 0.10494157
  5835. Maximum pointwise difference: 2.06318974
  5836. Max difference location: (0, 24, 6, 8)
  5837. Values at max diff - Original: 2.06102371, Converted: -0.00216593
  5838. Biggest difference in row (0, 14, 0), sum 8.656445 vs 0.053197
  5839. Layer 2, Token 8 (recurrent cache comparison):
  5840. Original tensor sum: 204.433716
  5841. Converted tensor sum: 228.728714
  5842. Original tensor mean: 0.063886
  5843. Converted tensor mean: 0.071478
  5844. Mean difference: 0.17672807
  5845. Maximum pointwise difference: 4.02747822
  5846. Max difference location: (0, 14, 7, 4)
  5847. Values at max diff - Original: -0.50838530, Converted: 3.51909280
  5848. Biggest difference in row (0, 14, 7), sum -0.459507 vs 8.282653
  5849. Layer 4, Token 8 (recurrent cache comparison):
  5850. Original tensor sum: 27.791477
  5851. Converted tensor sum: 81.184990
  5852. Original tensor mean: 0.008685
  5853. Converted tensor mean: 0.025370
  5854. Mean difference: 0.10353857
  5855. Maximum pointwise difference: 2.46198463
  5856. Max difference location: (0, 20, 0, 0)
  5857. Values at max diff - Original: -0.22187454, Converted: 2.24011016
  5858. Biggest difference in row (0, 20, 0), sum 0.256525 vs 5.813072
  5859. Layer 5, Token 8 (recurrent cache comparison):
  5860. Original tensor sum: 29.250452
  5861. Converted tensor sum: 93.253128
  5862. Original tensor mean: 0.009141
  5863. Converted tensor mean: 0.029142
  5864. Mean difference: 0.10660823
  5865. Maximum pointwise difference: 2.56040263
  5866. Max difference location: (0, 5, 9, 6)
  5867. Values at max diff - Original: 2.57331157, Converted: 0.01290902
  5868. Biggest difference in row (0, 6, 9), sum 0.078166 vs 4.415024
  5869. Layer 6, Token 8 (recurrent cache comparison):
  5870. Original tensor sum: 27.846973
  5871. Converted tensor sum: 254.006149
  5872. Original tensor mean: 0.008702
  5873. Converted tensor mean: 0.079377
  5874. Mean difference: 0.15745334
  5875. Maximum pointwise difference: 4.78712130
  5876. Max difference location: (0, 6, 0, 1)
  5877. Values at max diff - Original: -0.02898185, Converted: 4.75813961
  5878. Biggest difference in row (0, 6, 0), sum 0.390611 vs 12.429944
  5879. Layer 8, Token 8 (recurrent cache comparison):
  5880. Original tensor sum: 30.536982
  5881. Converted tensor sum: 101.827225
  5882. Original tensor mean: 0.009543
  5883. Converted tensor mean: 0.031821
  5884. Mean difference: 0.12039161
  5885. Maximum pointwise difference: 3.22662950
  5886. Max difference location: (0, 6, 4, 8)
  5887. Values at max diff - Original: 0.09277204, Converted: 3.31940150
  5888. Biggest difference in row (0, 6, 4), sum -0.525502 vs 4.532234
  5889. Layer 9, Token 8 (recurrent cache comparison):
  5890. Original tensor sum: 16.682407
  5891. Converted tensor sum: 55.948948
  5892. Original tensor mean: 0.005213
  5893. Converted tensor mean: 0.017484
  5894. Mean difference: 0.08395444
  5895. Maximum pointwise difference: 2.21269536
  5896. Max difference location: (0, 2, 6, 8)
  5897. Values at max diff - Original: -0.01177103, Converted: 2.20092440
  5898. Biggest difference in row (0, 2, 6), sum 0.250594 vs 2.860795
  5899. Layer 10, Token 8 (recurrent cache comparison):
  5900. Original tensor sum: 12.510189
  5901. Converted tensor sum: 82.301987
  5902. Original tensor mean: 0.003909
  5903. Converted tensor mean: 0.025719
  5904. Mean difference: 0.08603403
  5905. Maximum pointwise difference: 2.56086898
  5906. Max difference location: (0, 3, 8, 7)
  5907. Values at max diff - Original: -0.06791666, Converted: 2.49295235
  5908. Biggest difference in row (0, 27, 2), sum -0.661969 vs 2.579364
  5909. Layer 12, Token 8 (recurrent cache comparison):
  5910. Original tensor sum: 32.357769
  5911. Converted tensor sum: 70.608459
  5912. Original tensor mean: 0.010112
  5913. Converted tensor mean: 0.022065
  5914. Mean difference: 0.11435273
  5915. Maximum pointwise difference: 2.54995298
  5916. Max difference location: (0, 29, 5, 6)
  5917. Values at max diff - Original: 2.57914209, Converted: 0.02918900
  5918. Biggest difference in row (0, 24, 2), sum -0.360438 vs 5.434034
  5919. Layer 13, Token 8 (recurrent cache comparison):
  5920. Original tensor sum: 15.804648
  5921. Converted tensor sum: 72.853622
  5922. Original tensor mean: 0.004939
  5923. Converted tensor mean: 0.022767
  5924. Mean difference: 0.07997719
  5925. Maximum pointwise difference: 2.65385294
  5926. Max difference location: (0, 26, 0, 4)
  5927. Values at max diff - Original: -0.03116010, Converted: 2.62269282
  5928. Biggest difference in row (0, 26, 0), sum -1.206431 vs 2.459876
  5929. Layer 14, Token 8 (recurrent cache comparison):
  5930. Original tensor sum: 69.455246
  5931. Converted tensor sum: 167.620041
  5932. Original tensor mean: 0.021705
  5933. Converted tensor mean: 0.052381
  5934. Mean difference: 0.15660757
  5935. Maximum pointwise difference: 2.87237978
  5936. Max difference location: (0, 29, 9, 1)
  5937. Values at max diff - Original: -0.04621891, Converted: 2.82616091
  5938. Biggest difference in row (0, 20, 4), sum -0.064347 vs 6.085094
  5939. Layer 0, Token 9 (recurrent cache comparison):
  5940. Original tensor sum: 13.786104
  5941. Converted tensor sum: 5.261156
  5942. Original tensor mean: 0.004308
  5943. Converted tensor mean: 0.001644
  5944. Mean difference: 0.06277616
  5945. Maximum pointwise difference: 1.31032252
  5946. Max difference location: (0, 4, 1, 9)
  5947. Values at max diff - Original: -0.02821357, Converted: 1.28210890
  5948. Biggest difference in row (0, 11, 3), sum 0.289278 vs -0.836586
  5949. Layer 1, Token 9 (recurrent cache comparison):
  5950. Original tensor sum: 203.497635
  5951. Converted tensor sum: 111.110443
  5952. Original tensor mean: 0.063593
  5953. Converted tensor mean: 0.034722
  5954. Mean difference: 0.10077493
  5955. Maximum pointwise difference: 1.97459030
  5956. Max difference location: (0, 24, 0, 1)
  5957. Values at max diff - Original: 1.88861251, Converted: -0.08597784
  5958. Biggest difference in row (0, 14, 0), sum 9.054160 vs 0.974541
  5959. Layer 2, Token 9 (recurrent cache comparison):
  5960. Original tensor sum: 210.326843
  5961. Converted tensor sum: 237.847137
  5962. Original tensor mean: 0.065727
  5963. Converted tensor mean: 0.074327
  5964. Mean difference: 0.16504267
  5965. Maximum pointwise difference: 2.71314573
  5966. Max difference location: (0, 4, 8, 1)
  5967. Values at max diff - Original: -0.00067222, Converted: 2.71247363
  5968. Biggest difference in row (0, 1, 4), sum 2.414350 vs 7.828261
  5969. Layer 4, Token 9 (recurrent cache comparison):
  5970. Original tensor sum: 76.020309
  5971. Converted tensor sum: 125.208931
  5972. Original tensor mean: 0.023756
  5973. Converted tensor mean: 0.039128
  5974. Mean difference: 0.11094213
  5975. Maximum pointwise difference: 3.67572975
  5976. Max difference location: (0, 27, 7, 5)
  5977. Values at max diff - Original: 3.66171432, Converted: -0.01401533
  5978. Biggest difference in row (0, 3, 0), sum 4.612147 vs 0.005273
  5979. Layer 5, Token 9 (recurrent cache comparison):
  5980. Original tensor sum: 70.017532
  5981. Converted tensor sum: 128.789795
  5982. Original tensor mean: 0.021880
  5983. Converted tensor mean: 0.040247
  5984. Mean difference: 0.11726990
  5985. Maximum pointwise difference: 2.56784987
  5986. Max difference location: (0, 6, 7, 6)
  5987. Values at max diff - Original: 2.56954336, Converted: 0.00169344
  5988. Biggest difference in row (0, 6, 7), sum 5.224357 vs 0.050091
  5989. Layer 6, Token 9 (recurrent cache comparison):
  5990. Original tensor sum: 97.678406
  5991. Converted tensor sum: 298.968506
  5992. Original tensor mean: 0.030525
  5993. Converted tensor mean: 0.093428
  5994. Mean difference: 0.16553456
  5995. Maximum pointwise difference: 4.22000217
  5996. Max difference location: (0, 14, 1, 7)
  5997. Values at max diff - Original: -0.10210184, Converted: 4.11790037
  5998. Biggest difference in row (0, 14, 1), sum -0.198166 vs 10.807201
  5999. Layer 8, Token 9 (recurrent cache comparison):
  6000. Original tensor sum: 106.931870
  6001. Converted tensor sum: 173.151855
  6002. Original tensor mean: 0.033416
  6003. Converted tensor mean: 0.054110
  6004. Mean difference: 0.14065868
  6005. Maximum pointwise difference: 3.01797652
  6006. Max difference location: (0, 14, 9, 5)
  6007. Values at max diff - Original: -0.05490554, Converted: 2.96307087
  6008. Biggest difference in row (0, 20, 7), sum 0.154971 vs 7.357482
  6009. Layer 9, Token 9 (recurrent cache comparison):
  6010. Original tensor sum: 64.670883
  6011. Converted tensor sum: 92.657562
  6012. Original tensor mean: 0.020210
  6013. Converted tensor mean: 0.028955
  6014. Mean difference: 0.09020478
  6015. Maximum pointwise difference: 3.22673941
  6016. Max difference location: (0, 18, 5, 2)
  6017. Values at max diff - Original: 0.18116489, Converted: 3.40790439
  6018. Biggest difference in row (0, 18, 2), sum 6.946761 vs 1.273814
  6019. Layer 10, Token 9 (recurrent cache comparison):
  6020. Original tensor sum: 52.923912
  6021. Converted tensor sum: 104.621475
  6022. Original tensor mean: 0.016539
  6023. Converted tensor mean: 0.032694
  6024. Mean difference: 0.08354937
  6025. Maximum pointwise difference: 1.84956801
  6026. Max difference location: (0, 3, 7, 8)
  6027. Values at max diff - Original: 0.37758890, Converted: 2.22715688
  6028. Biggest difference in row (0, 20, 9), sum -1.298731 vs 2.479056
  6029. Layer 12, Token 9 (recurrent cache comparison):
  6030. Original tensor sum: 87.343620
  6031. Converted tensor sum: 117.516281
  6032. Original tensor mean: 0.027295
  6033. Converted tensor mean: 0.036724
  6034. Mean difference: 0.12288742
  6035. Maximum pointwise difference: 3.19170189
  6036. Max difference location: (0, 13, 2, 4)
  6037. Values at max diff - Original: -0.11148589, Converted: 3.08021593
  6038. Biggest difference in row (0, 13, 2), sum 0.993775 vs 6.040417
  6039. Layer 13, Token 9 (recurrent cache comparison):
  6040. Original tensor sum: 77.928635
  6041. Converted tensor sum: 116.695862
  6042. Original tensor mean: 0.024353
  6043. Converted tensor mean: 0.036467
  6044. Mean difference: 0.09447044
  6045. Maximum pointwise difference: 1.43028283
  6046. Max difference location: (0, 26, 0, 4)
  6047. Values at max diff - Original: -0.00879327, Converted: 1.42148960
  6048. Biggest difference in row (0, 25, 3), sum -0.128404 vs 3.423045
  6049. Layer 14, Token 9 (recurrent cache comparison):
  6050. Original tensor sum: 162.069077
  6051. Converted tensor sum: 247.590637
  6052. Original tensor mean: 0.050647
  6053. Converted tensor mean: 0.077372
  6054. Mean difference: 0.17534283
  6055. Maximum pointwise difference: 3.21209598
  6056. Max difference location: (0, 28, 1, 9)
  6057. Values at max diff - Original: -0.25805441, Converted: 2.95404148
  6058. Biggest difference in row (0, 28, 1), sum 1.364790 vs 9.833094
  6059. Layer 0, Token 10 (recurrent cache comparison):
  6060. Original tensor sum: 7.816267
  6061. Converted tensor sum: 1.466951
  6062. Original tensor mean: 0.002443
  6063. Converted tensor mean: 0.000458
  6064. Mean difference: 0.05842621
  6065. Maximum pointwise difference: 1.09208894
  6066. Max difference location: (0, 21, 4, 1)
  6067. Values at max diff - Original: 0.04324723, Converted: 1.13533616
  6068. Biggest difference in row (0, 28, 5), sum 0.301255 vs 2.364079
  6069. Layer 1, Token 10 (recurrent cache comparison):
  6070. Original tensor sum: 223.526520
  6071. Converted tensor sum: 135.921234
  6072. Original tensor mean: 0.069852
  6073. Converted tensor mean: 0.042475
  6074. Mean difference: 0.10827781
  6075. Maximum pointwise difference: 1.68770814
  6076. Max difference location: (0, 16, 6, 1)
  6077. Values at max diff - Original: 2.02958679, Converted: 0.34187865
  6078. Biggest difference in row (0, 14, 0), sum 5.745544 vs -0.048143
  6079. Layer 2, Token 10 (recurrent cache comparison):
  6080. Original tensor sum: 215.104584
  6081. Converted tensor sum: 227.212708
  6082. Original tensor mean: 0.067220
  6083. Converted tensor mean: 0.071004
  6084. Mean difference: 0.17289215
  6085. Maximum pointwise difference: 3.18850541
  6086. Max difference location: (0, 26, 3, 8)
  6087. Values at max diff - Original: 0.01985940, Converted: 3.20836473
  6088. Biggest difference in row (0, 12, 7), sum 8.279942 vs -0.264312
  6089. Layer 4, Token 10 (recurrent cache comparison):
  6090. Original tensor sum: 185.702744
  6091. Converted tensor sum: 211.499130
  6092. Original tensor mean: 0.058032
  6093. Converted tensor mean: 0.066093
  6094. Mean difference: 0.12541530
  6095. Maximum pointwise difference: 2.52001357
  6096. Max difference location: (0, 27, 5, 8)
  6097. Values at max diff - Original: 0.05403204, Converted: 2.57404566
  6098. Biggest difference in row (0, 27, 5), sum 0.682007 vs 7.443546
  6099. Layer 5, Token 10 (recurrent cache comparison):
  6100. Original tensor sum: 169.265594
  6101. Converted tensor sum: 227.449417
  6102. Original tensor mean: 0.052895
  6103. Converted tensor mean: 0.071078
  6104. Mean difference: 0.13289575
  6105. Maximum pointwise difference: 3.03736281
  6106. Max difference location: (0, 6, 2, 6)
  6107. Values at max diff - Original: 3.01727891, Converted: -0.02008397
  6108. Biggest difference in row (0, 6, 2), sum 9.659736 vs 0.153498
  6109. Layer 6, Token 10 (recurrent cache comparison):
  6110. Original tensor sum: 230.247437
  6111. Converted tensor sum: 418.704895
  6112. Original tensor mean: 0.071952
  6113. Converted tensor mean: 0.130845
  6114. Mean difference: 0.17921637
  6115. Maximum pointwise difference: 4.08086109
  6116. Max difference location: (0, 6, 0, 1)
  6117. Values at max diff - Original: 0.00348123, Converted: 4.08434248
  6118. Biggest difference in row (0, 6, 0), sum 0.879897 vs 15.160538
  6119. Layer 8, Token 10 (recurrent cache comparison):
  6120. Original tensor sum: 206.699799
  6121. Converted tensor sum: 283.296692
  6122. Original tensor mean: 0.064594
  6123. Converted tensor mean: 0.088530
  6124. Mean difference: 0.15303743
  6125. Maximum pointwise difference: 3.20992827
  6126. Max difference location: (0, 14, 4, 5)
  6127. Values at max diff - Original: 0.00341668, Converted: 3.21334505
  6128. Biggest difference in row (0, 2, 4), sum -0.470056 vs 8.175467
  6129. Layer 9, Token 10 (recurrent cache comparison):
  6130. Original tensor sum: 155.765579
  6131. Converted tensor sum: 185.697693
  6132. Original tensor mean: 0.048677
  6133. Converted tensor mean: 0.058031
  6134. Mean difference: 0.09974226
  6135. Maximum pointwise difference: 2.01155925
  6136. Max difference location: (0, 14, 1, 8)
  6137. Values at max diff - Original: -0.00813468, Converted: 2.00342464
  6138. Biggest difference in row (0, 18, 3), sum -0.273577 vs 5.096995
  6139. Layer 10, Token 10 (recurrent cache comparison):
  6140. Original tensor sum: 147.632782
  6141. Converted tensor sum: 177.473785
  6142. Original tensor mean: 0.046135
  6143. Converted tensor mean: 0.055461
  6144. Mean difference: 0.10073428
  6145. Maximum pointwise difference: 2.04938221
  6146. Max difference location: (0, 3, 8, 7)
  6147. Values at max diff - Original: -0.06264466, Converted: 1.98673749
  6148. Biggest difference in row (0, 24, 0), sum 0.061289 vs 4.106022
  6149. Layer 12, Token 10 (recurrent cache comparison):
  6150. Original tensor sum: 189.647308
  6151. Converted tensor sum: 212.602402
  6152. Original tensor mean: 0.059265
  6153. Converted tensor mean: 0.066438
  6154. Mean difference: 0.12409261
  6155. Maximum pointwise difference: 3.06548572
  6156. Max difference location: (0, 14, 1, 8)
  6157. Values at max diff - Original: -0.05504636, Converted: 3.01043940
  6158. Biggest difference in row (0, 14, 1), sum -1.444618 vs 6.230721
  6159. Layer 13, Token 10 (recurrent cache comparison):
  6160. Original tensor sum: 176.983215
  6161. Converted tensor sum: 204.426437
  6162. Original tensor mean: 0.055307
  6163. Converted tensor mean: 0.063883
  6164. Mean difference: 0.10065258
  6165. Maximum pointwise difference: 1.83688605
  6166. Max difference location: (0, 26, 0, 4)
  6167. Values at max diff - Original: -0.00286533, Converted: 1.83402073
  6168. Biggest difference in row (0, 17, 8), sum 4.395949 vs 0.724224
  6169. Layer 14, Token 10 (recurrent cache comparison):
  6170. Original tensor sum: 362.967407
  6171. Converted tensor sum: 429.969727
  6172. Original tensor mean: 0.113427
  6173. Converted tensor mean: 0.134366
  6174. Mean difference: 0.20180641
  6175. Maximum pointwise difference: 3.78999281
  6176. Max difference location: (0, 8, 9, 2)
  6177. Values at max diff - Original: -0.03249586, Converted: 3.75749683
  6178. Biggest difference in row (0, 8, 9), sum 0.437254 vs 14.025442
  6179. Layer 0, Token 11 (recurrent cache comparison):
  6180. Original tensor sum: 1.054740
  6181. Converted tensor sum: -4.912385
  6182. Original tensor mean: 0.000330
  6183. Converted tensor mean: -0.001535
  6184. Mean difference: 0.06330946
  6185. Maximum pointwise difference: 0.92195946
  6186. Max difference location: (0, 4, 9, 1)
  6187. Values at max diff - Original: 0.89514881, Converted: -0.02681063
  6188. Biggest difference in row (0, 4, 9), sum 1.999353 vs 0.163843
  6189. Layer 1, Token 11 (recurrent cache comparison):
  6190. Original tensor sum: 229.025497
  6191. Converted tensor sum: 120.378685
  6192. Original tensor mean: 0.071570
  6193. Converted tensor mean: 0.037618
  6194. Mean difference: 0.11386316
  6195. Maximum pointwise difference: 2.45059752
  6196. Max difference location: (0, 14, 7, 2)
  6197. Values at max diff - Original: 2.53569841, Converted: 0.08510098
  6198. Biggest difference in row (0, 16, 6), sum 5.812350 vs -0.022719
  6199. Layer 2, Token 11 (recurrent cache comparison):
  6200. Original tensor sum: 158.621384
  6201. Converted tensor sum: 133.457428
  6202. Original tensor mean: 0.049569
  6203. Converted tensor mean: 0.041705
  6204. Mean difference: 0.14393179
  6205. Maximum pointwise difference: 2.77776694
  6206. Max difference location: (0, 12, 7, 9)
  6207. Values at max diff - Original: 2.95237303, Converted: 0.17460610
  6208. Biggest difference in row (0, 12, 7), sum 8.065367 vs 1.687768
  6209. Layer 4, Token 11 (recurrent cache comparison):
  6210. Original tensor sum: 216.897552
  6211. Converted tensor sum: 241.688950
  6212. Original tensor mean: 0.067780
  6213. Converted tensor mean: 0.075528
  6214. Mean difference: 0.14223064
  6215. Maximum pointwise difference: 3.88969064
  6216. Max difference location: (0, 19, 2, 0)
  6217. Values at max diff - Original: 0.01694401, Converted: 3.90663457
  6218. Biggest difference in row (0, 19, 2), sum 0.437507 vs 8.962053
  6219. Layer 5, Token 11 (recurrent cache comparison):
  6220. Original tensor sum: 252.265610
  6221. Converted tensor sum: 322.771881
  6222. Original tensor mean: 0.078833
  6223. Converted tensor mean: 0.100866
  6224. Mean difference: 0.17598768
  6225. Maximum pointwise difference: 7.97533512
  6226. Max difference location: (0, 28, 6, 9)
  6227. Values at max diff - Original: 0.35858834, Converted: 8.33392334
  6228. Biggest difference in row (0, 28, 6), sum 5.014431 vs 26.334686
  6229. Layer 6, Token 11 (recurrent cache comparison):
  6230. Original tensor sum: 291.508423
  6231. Converted tensor sum: 433.311768
  6232. Original tensor mean: 0.091096
  6233. Converted tensor mean: 0.135410
  6234. Mean difference: 0.17094433
  6235. Maximum pointwise difference: 3.41666508
  6236. Max difference location: (0, 6, 4, 5)
  6237. Values at max diff - Original: 0.27297387, Converted: 3.68963885
  6238. Biggest difference in row (0, 14, 1), sum -0.165701 vs 10.544808
  6239. Layer 8, Token 11 (recurrent cache comparison):
  6240. Original tensor sum: 215.415359
  6241. Converted tensor sum: 351.092529
  6242. Original tensor mean: 0.067317
  6243. Converted tensor mean: 0.109716
  6244. Mean difference: 0.18807893
  6245. Maximum pointwise difference: 3.95769572
  6246. Max difference location: (0, 23, 4, 7)
  6247. Values at max diff - Original: 3.95293593, Converted: -0.00475990
  6248. Biggest difference in row (0, 2, 4), sum 0.017769 vs 8.146402
  6249. Layer 9, Token 11 (recurrent cache comparison):
  6250. Original tensor sum: 230.947296
  6251. Converted tensor sum: 244.599213
  6252. Original tensor mean: 0.072171
  6253. Converted tensor mean: 0.076437
  6254. Mean difference: 0.13342199
  6255. Maximum pointwise difference: 2.90320230
  6256. Max difference location: (0, 18, 3, 2)
  6257. Values at max diff - Original: -0.01862744, Converted: 2.88457489
  6258. Biggest difference in row (0, 28, 7), sum 8.403417 vs 1.460527
  6259. Layer 10, Token 11 (recurrent cache comparison):
  6260. Original tensor sum: 271.779785
  6261. Converted tensor sum: 241.771790
  6262. Original tensor mean: 0.084931
  6263. Converted tensor mean: 0.075554
  6264. Mean difference: 0.15158509
  6265. Maximum pointwise difference: 3.77889895
  6266. Max difference location: (0, 0, 3, 7)
  6267. Values at max diff - Original: 4.08713722, Converted: 0.30823818
  6268. Biggest difference in row (0, 10, 4), sum 7.732811 vs 0.603564
  6269. Layer 12, Token 11 (recurrent cache comparison):
  6270. Original tensor sum: 274.425629
  6271. Converted tensor sum: 286.277039
  6272. Original tensor mean: 0.085758
  6273. Converted tensor mean: 0.089462
  6274. Mean difference: 0.16393411
  6275. Maximum pointwise difference: 3.90725374
  6276. Max difference location: (0, 14, 1, 8)
  6277. Values at max diff - Original: 0.01574333, Converted: 3.92299700
  6278. Biggest difference in row (0, 23, 2), sum 10.560888 vs 1.081235
  6279. Layer 13, Token 11 (recurrent cache comparison):
  6280. Original tensor sum: 212.238953
  6281. Converted tensor sum: 260.726898
  6282. Original tensor mean: 0.066325
  6283. Converted tensor mean: 0.081477
  6284. Mean difference: 0.12856843
  6285. Maximum pointwise difference: 3.76317525
  6286. Max difference location: (0, 17, 8, 2)
  6287. Values at max diff - Original: 4.56109810, Converted: 0.79792279
  6288. Biggest difference in row (0, 19, 1), sum 10.229995 vs 2.908604
  6289. Layer 14, Token 11 (recurrent cache comparison):
  6290. Original tensor sum: 502.973511
  6291. Converted tensor sum: 568.935181
  6292. Original tensor mean: 0.157179
  6293. Converted tensor mean: 0.177792
  6294. Mean difference: 0.27989930
  6295. Maximum pointwise difference: 4.54578638
  6296. Max difference location: (0, 16, 7, 6)
  6297. Values at max diff - Original: 4.27132416, Converted: -0.27446240
  6298. Biggest difference in row (0, 21, 5), sum -0.168386 vs 13.477350
  6299. Layer 0, Token 12 (recurrent cache comparison):
  6300. Original tensor sum: 4.252830
  6301. Converted tensor sum: -0.731128
  6302. Original tensor mean: 0.001329
  6303. Converted tensor mean: -0.000228
  6304. Mean difference: 0.06294378
  6305. Maximum pointwise difference: 1.78251398
  6306. Max difference location: (0, 1, 3, 2)
  6307. Values at max diff - Original: -0.00792313, Converted: 1.77459085
  6308. Biggest difference in row (0, 28, 5), sum 0.238817 vs 2.175461
  6309. Layer 1, Token 12 (recurrent cache comparison):
  6310. Original tensor sum: 242.003052
  6311. Converted tensor sum: 66.457909
  6312. Original tensor mean: 0.075626
  6313. Converted tensor mean: 0.020768
  6314. Mean difference: 0.11966369
  6315. Maximum pointwise difference: 2.80864978
  6316. Max difference location: (0, 24, 0, 1)
  6317. Values at max diff - Original: 2.71780372, Converted: -0.09084603
  6318. Biggest difference in row (0, 14, 0), sum 5.513966 vs -0.057299
  6319. Layer 2, Token 12 (recurrent cache comparison):
  6320. Original tensor sum: 212.836731
  6321. Converted tensor sum: 76.092499
  6322. Original tensor mean: 0.066511
  6323. Converted tensor mean: 0.023779
  6324. Mean difference: 0.14941603
  6325. Maximum pointwise difference: 2.88118339
  6326. Max difference location: (0, 12, 7, 0)
  6327. Values at max diff - Original: 2.70842910, Converted: -0.17275429
  6328. Biggest difference in row (0, 12, 7), sum 7.969865 vs 0.167881
  6329. Layer 4, Token 12 (recurrent cache comparison):
  6330. Original tensor sum: 128.756699
  6331. Converted tensor sum: 154.911957
  6332. Original tensor mean: 0.040236
  6333. Converted tensor mean: 0.048410
  6334. Mean difference: 0.10618121
  6335. Maximum pointwise difference: 2.31433964
  6336. Max difference location: (0, 8, 1, 6)
  6337. Values at max diff - Original: 2.26328707, Converted: -0.05105254
  6338. Biggest difference in row (0, 25, 7), sum 3.269817 vs -0.397900
  6339. Layer 5, Token 12 (recurrent cache comparison):
  6340. Original tensor sum: 176.745117
  6341. Converted tensor sum: 232.734680
  6342. Original tensor mean: 0.055233
  6343. Converted tensor mean: 0.072730
  6344. Mean difference: 0.13117053
  6345. Maximum pointwise difference: 4.35398436
  6346. Max difference location: (0, 28, 6, 9)
  6347. Values at max diff - Original: 0.18738972, Converted: 4.54137421
  6348. Biggest difference in row (0, 28, 6), sum 3.095334 vs 9.516649
  6349. Layer 6, Token 12 (recurrent cache comparison):
  6350. Original tensor sum: 259.031647
  6351. Converted tensor sum: 428.069794
  6352. Original tensor mean: 0.080947
  6353. Converted tensor mean: 0.133772
  6354. Mean difference: 0.16942802
  6355. Maximum pointwise difference: 5.44846153
  6356. Max difference location: (0, 26, 9, 3)
  6357. Values at max diff - Original: -0.01164311, Converted: 5.43681860
  6358. Biggest difference in row (0, 6, 0), sum 0.994667 vs 12.910238
  6359. Layer 8, Token 12 (recurrent cache comparison):
  6360. Original tensor sum: 221.930222
  6361. Converted tensor sum: 262.522369
  6362. Original tensor mean: 0.069353
  6363. Converted tensor mean: 0.082038
  6364. Mean difference: 0.17785330
  6365. Maximum pointwise difference: 4.14597464
  6366. Max difference location: (0, 21, 9, 9)
  6367. Values at max diff - Original: -0.07410901, Converted: 4.07186556
  6368. Biggest difference in row (0, 21, 9), sum -0.204344 vs 10.075971
  6369. Layer 9, Token 12 (recurrent cache comparison):
  6370. Original tensor sum: 189.028931
  6371. Converted tensor sum: 238.029388
  6372. Original tensor mean: 0.059072
  6373. Converted tensor mean: 0.074384
  6374. Mean difference: 0.14264640
  6375. Maximum pointwise difference: 2.92814064
  6376. Max difference location: (0, 14, 1, 2)
  6377. Values at max diff - Original: -0.88447762, Converted: 2.04366302
  6378. Biggest difference in row (0, 28, 0), sum 1.806244 vs 7.562672
  6379. Layer 10, Token 12 (recurrent cache comparison):
  6380. Original tensor sum: 236.811234
  6381. Converted tensor sum: 260.771973
  6382. Original tensor mean: 0.074004
  6383. Converted tensor mean: 0.081491
  6384. Mean difference: 0.15943669
  6385. Maximum pointwise difference: 5.29651165
  6386. Max difference location: (0, 24, 0, 1)
  6387. Values at max diff - Original: 0.03258384, Converted: 5.32909536
  6388. Biggest difference in row (0, 24, 0), sum 0.082025 vs 10.949675
  6389. Layer 12, Token 12 (recurrent cache comparison):
  6390. Original tensor sum: 244.807922
  6391. Converted tensor sum: 314.705444
  6392. Original tensor mean: 0.076502
  6393. Converted tensor mean: 0.098345
  6394. Mean difference: 0.16864727
  6395. Maximum pointwise difference: 4.38556862
  6396. Max difference location: (0, 20, 3, 2)
  6397. Values at max diff - Original: -0.00896719, Converted: 4.37660122
  6398. Biggest difference in row (0, 28, 3), sum 10.509099 vs 0.169576
  6399. Layer 13, Token 12 (recurrent cache comparison):
  6400. Original tensor sum: 195.554291
  6401. Converted tensor sum: 222.348053
  6402. Original tensor mean: 0.061111
  6403. Converted tensor mean: 0.069484
  6404. Mean difference: 0.13128105
  6405. Maximum pointwise difference: 3.68478298
  6406. Max difference location: (0, 17, 2, 8)
  6407. Values at max diff - Original: 0.00859472, Converted: 3.69337773
  6408. Biggest difference in row (0, 17, 2), sum 0.146146 vs 8.692631
  6409. Layer 14, Token 12 (recurrent cache comparison):
  6410. Original tensor sum: 483.896393
  6411. Converted tensor sum: 527.955566
  6412. Original tensor mean: 0.151218
  6413. Converted tensor mean: 0.164986
  6414. Mean difference: 0.27409020
  6415. Maximum pointwise difference: 4.70396519
  6416. Max difference location: (0, 25, 4, 1)
  6417. Values at max diff - Original: -0.42079771, Converted: 4.28316736
  6418. Biggest difference in row (0, 16, 6), sum -0.041328 vs 13.549324
  6419. Layer 0, Token 13 (recurrent cache comparison):
  6420. Original tensor sum: 1.659033
  6421. Converted tensor sum: -7.970642
  6422. Original tensor mean: 0.000518
  6423. Converted tensor mean: -0.002491
  6424. Mean difference: 0.07536316
  6425. Maximum pointwise difference: 1.29645300
  6426. Max difference location: (0, 4, 9, 1)
  6427. Values at max diff - Original: 1.30392849, Converted: 0.00747545
  6428. Biggest difference in row (0, 26, 3), sum -0.329301 vs -3.374216
  6429. Layer 1, Token 13 (recurrent cache comparison):
  6430. Original tensor sum: 239.724915
  6431. Converted tensor sum: 79.675636
  6432. Original tensor mean: 0.074914
  6433. Converted tensor mean: 0.024899
  6434. Mean difference: 0.12407961
  6435. Maximum pointwise difference: 2.50358605
  6436. Max difference location: (0, 24, 0, 1)
  6437. Values at max diff - Original: 2.48077655, Converted: -0.02280946
  6438. Biggest difference in row (0, 14, 0), sum 6.016558 vs 0.013054
  6439. Layer 2, Token 13 (recurrent cache comparison):
  6440. Original tensor sum: 247.626099
  6441. Converted tensor sum: 106.589592
  6442. Original tensor mean: 0.077383
  6443. Converted tensor mean: 0.033309
  6444. Mean difference: 0.15574569
  6445. Maximum pointwise difference: 3.29841137
  6446. Max difference location: (0, 4, 2, 8)
  6447. Values at max diff - Original: 3.44825506, Converted: 0.14984375
  6448. Biggest difference in row (0, 12, 7), sum 7.714676 vs 0.758271
  6449. Layer 4, Token 13 (recurrent cache comparison):
  6450. Original tensor sum: 123.371284
  6451. Converted tensor sum: 126.859177
  6452. Original tensor mean: 0.038554
  6453. Converted tensor mean: 0.039643
  6454. Mean difference: 0.08389783
  6455. Maximum pointwise difference: 2.97862935
  6456. Max difference location: (0, 28, 2, 3)
  6457. Values at max diff - Original: 3.17326093, Converted: 0.19463167
  6458. Biggest difference in row (0, 28, 2), sum 4.464350 vs 0.493919
  6459. Layer 5, Token 13 (recurrent cache comparison):
  6460. Original tensor sum: 147.258102
  6461. Converted tensor sum: 184.070984
  6462. Original tensor mean: 0.046018
  6463. Converted tensor mean: 0.057522
  6464. Mean difference: 0.10195178
  6465. Maximum pointwise difference: 2.96551919
  6466. Max difference location: (0, 28, 6, 9)
  6467. Values at max diff - Original: 0.10930623, Converted: 3.07482553
  6468. Biggest difference in row (0, 28, 6), sum 1.825548 vs 9.674469
  6469. Layer 6, Token 13 (recurrent cache comparison):
  6470. Original tensor sum: 283.270142
  6471. Converted tensor sum: 448.314880
  6472. Original tensor mean: 0.088522
  6473. Converted tensor mean: 0.140098
  6474. Mean difference: 0.15905625
  6475. Maximum pointwise difference: 3.46541429
  6476. Max difference location: (0, 24, 8, 2)
  6477. Values at max diff - Original: -0.00599505, Converted: 3.45941925
  6478. Biggest difference in row (0, 6, 4), sum 3.774855 vs 11.804656
  6479. Layer 8, Token 13 (recurrent cache comparison):
  6480. Original tensor sum: 241.112183
  6481. Converted tensor sum: 274.130127
  6482. Original tensor mean: 0.075348
  6483. Converted tensor mean: 0.085666
  6484. Mean difference: 0.10883617
  6485. Maximum pointwise difference: 4.01715469
  6486. Max difference location: (0, 21, 9, 9)
  6487. Values at max diff - Original: -0.09944591, Converted: 3.91770887
  6488. Biggest difference in row (0, 20, 9), sum -0.234172 vs 5.020240
  6489. Layer 9, Token 13 (recurrent cache comparison):
  6490. Original tensor sum: 173.039688
  6491. Converted tensor sum: 214.021088
  6492. Original tensor mean: 0.054075
  6493. Converted tensor mean: 0.066882
  6494. Mean difference: 0.09634628
  6495. Maximum pointwise difference: 1.72028887
  6496. Max difference location: (0, 18, 6, 2)
  6497. Values at max diff - Original: 0.10816531, Converted: 1.82845414
  6498. Biggest difference in row (0, 18, 6), sum 0.705206 vs 4.642780
  6499. Layer 10, Token 13 (recurrent cache comparison):
  6500. Original tensor sum: 213.873550
  6501. Converted tensor sum: 242.753281
  6502. Original tensor mean: 0.066835
  6503. Converted tensor mean: 0.075860
  6504. Mean difference: 0.09029815
  6505. Maximum pointwise difference: 1.41950274
  6506. Max difference location: (0, 11, 2, 6)
  6507. Values at max diff - Original: 0.20659086, Converted: 1.62609363
  6508. Biggest difference in row (0, 23, 5), sum 0.448223 vs 3.806486
  6509. Layer 12, Token 13 (recurrent cache comparison):
  6510. Original tensor sum: 233.552292
  6511. Converted tensor sum: 296.583405
  6512. Original tensor mean: 0.072985
  6513. Converted tensor mean: 0.092682
  6514. Mean difference: 0.08977944
  6515. Maximum pointwise difference: 1.59837830
  6516. Max difference location: (0, 19, 7, 7)
  6517. Values at max diff - Original: 0.86349380, Converted: 2.46187210
  6518. Biggest difference in row (0, 4, 3), sum 5.997213 vs 0.558758
  6519. Layer 13, Token 13 (recurrent cache comparison):
  6520. Original tensor sum: 172.634430
  6521. Converted tensor sum: 190.998459
  6522. Original tensor mean: 0.053948
  6523. Converted tensor mean: 0.059687
  6524. Mean difference: 0.07964972
  6525. Maximum pointwise difference: 2.45006180
  6526. Max difference location: (0, 26, 4, 0)
  6527. Values at max diff - Original: 2.51385903, Converted: 0.06379732
  6528. Biggest difference in row (0, 26, 4), sum 5.078406 vs 0.298857
  6529. Layer 14, Token 13 (recurrent cache comparison):
  6530. Original tensor sum: 516.640808
  6531. Converted tensor sum: 514.890991
  6532. Original tensor mean: 0.161450
  6533. Converted tensor mean: 0.160903
  6534. Mean difference: 0.14294353
  6535. Maximum pointwise difference: 2.38266706
  6536. Max difference location: (0, 8, 9, 3)
  6537. Values at max diff - Original: 0.05516699, Converted: 2.43783402
  6538. Biggest difference in row (0, 8, 9), sum -0.157885 vs 10.688316
  6539. Layer 0, Token 14 (recurrent cache comparison):
  6540. Original tensor sum: 5.983342
  6541. Converted tensor sum: -8.715725
  6542. Original tensor mean: 0.001870
  6543. Converted tensor mean: -0.002724
  6544. Mean difference: 0.07516728
  6545. Maximum pointwise difference: 1.55751526
  6546. Max difference location: (0, 25, 8, 2)
  6547. Values at max diff - Original: 1.57396424, Converted: 0.01644893
  6548. Biggest difference in row (0, 7, 3), sum 0.124509 vs -1.539357
  6549. Layer 1, Token 14 (recurrent cache comparison):
  6550. Original tensor sum: 229.783936
  6551. Converted tensor sum: 87.531807
  6552. Original tensor mean: 0.071807
  6553. Converted tensor mean: 0.027354
  6554. Mean difference: 0.11559690
  6555. Maximum pointwise difference: 2.09234738
  6556. Max difference location: (0, 24, 0, 1)
  6557. Values at max diff - Original: 2.22317505, Converted: 0.13082767
  6558. Biggest difference in row (0, 14, 0), sum 5.592927 vs -0.441425
  6559. Layer 2, Token 14 (recurrent cache comparison):
  6560. Original tensor sum: 268.945923
  6561. Converted tensor sum: 133.786499
  6562. Original tensor mean: 0.084046
  6563. Converted tensor mean: 0.041808
  6564. Mean difference: 0.16352382
  6565. Maximum pointwise difference: 2.87041712
  6566. Max difference location: (0, 11, 9, 6)
  6567. Values at max diff - Original: 2.94916487, Converted: 0.07874785
  6568. Biggest difference in row (0, 23, 9), sum 8.145676 vs 0.297307
  6569. Layer 4, Token 14 (recurrent cache comparison):
  6570. Original tensor sum: 117.762733
  6571. Converted tensor sum: 118.469772
  6572. Original tensor mean: 0.036801
  6573. Converted tensor mean: 0.037022
  6574. Mean difference: 0.09650213
  6575. Maximum pointwise difference: 1.50842690
  6576. Max difference location: (0, 20, 6, 4)
  6577. Values at max diff - Original: 2.04159784, Converted: 0.53317100
  6578. Biggest difference in row (0, 20, 6), sum 4.022114 vs 0.957074
  6579. Layer 5, Token 14 (recurrent cache comparison):
  6580. Original tensor sum: 128.612335
  6581. Converted tensor sum: 157.030731
  6582. Original tensor mean: 0.040191
  6583. Converted tensor mean: 0.049072
  6584. Mean difference: 0.10371025
  6585. Maximum pointwise difference: 2.24814534
  6586. Max difference location: (0, 8, 5, 9)
  6587. Values at max diff - Original: -0.03905072, Converted: 2.20909452
  6588. Biggest difference in row (0, 2, 5), sum 3.689715 vs 0.515908
  6589. Layer 6, Token 14 (recurrent cache comparison):
  6590. Original tensor sum: 284.314667
  6591. Converted tensor sum: 446.866150
  6592. Original tensor mean: 0.088848
  6593. Converted tensor mean: 0.139646
  6594. Mean difference: 0.16138166
  6595. Maximum pointwise difference: 3.92217135
  6596. Max difference location: (0, 26, 9, 3)
  6597. Values at max diff - Original: -0.00824802, Converted: 3.91392326
  6598. Biggest difference in row (0, 6, 0), sum 1.467430 vs 14.063056
  6599. Layer 8, Token 14 (recurrent cache comparison):
  6600. Original tensor sum: 268.420227
  6601. Converted tensor sum: 298.094666
  6602. Original tensor mean: 0.083881
  6603. Converted tensor mean: 0.093155
  6604. Mean difference: 0.17274044
  6605. Maximum pointwise difference: 3.57632637
  6606. Max difference location: (0, 21, 9, 9)
  6607. Values at max diff - Original: -0.07158025, Converted: 3.50474620
  6608. Biggest difference in row (0, 21, 9), sum -0.176140 vs 9.883745
  6609. Layer 9, Token 14 (recurrent cache comparison):
  6610. Original tensor sum: 153.733398
  6611. Converted tensor sum: 197.629532
  6612. Original tensor mean: 0.048042
  6613. Converted tensor mean: 0.061759
  6614. Mean difference: 0.11230749
  6615. Maximum pointwise difference: 2.07441854
  6616. Max difference location: (0, 2, 6, 8)
  6617. Values at max diff - Original: -0.01318651, Converted: 2.06123209
  6618. Biggest difference in row (0, 28, 0), sum 1.515908 vs 6.081204
  6619. Layer 10, Token 14 (recurrent cache comparison):
  6620. Original tensor sum: 196.466980
  6621. Converted tensor sum: 228.325546
  6622. Original tensor mean: 0.061396
  6623. Converted tensor mean: 0.071352
  6624. Mean difference: 0.11859564
  6625. Maximum pointwise difference: 4.73182058
  6626. Max difference location: (0, 24, 0, 1)
  6627. Values at max diff - Original: 0.02647224, Converted: 4.75829268
  6628. Biggest difference in row (0, 24, 0), sum 0.182875 vs 9.743350
  6629. Layer 12, Token 14 (recurrent cache comparison):
  6630. Original tensor sum: 235.148682
  6631. Converted tensor sum: 279.831421
  6632. Original tensor mean: 0.073484
  6633. Converted tensor mean: 0.087447
  6634. Mean difference: 0.14844361
  6635. Maximum pointwise difference: 3.64688230
  6636. Max difference location: (0, 28, 4, 2)
  6637. Values at max diff - Original: 0.01143306, Converted: 3.65831542
  6638. Biggest difference in row (0, 28, 2), sum 9.410328 vs 0.407452
  6639. Layer 13, Token 14 (recurrent cache comparison):
  6640. Original tensor sum: 165.774078
  6641. Converted tensor sum: 179.691483
  6642. Original tensor mean: 0.051804
  6643. Converted tensor mean: 0.056154
  6644. Mean difference: 0.09881324
  6645. Maximum pointwise difference: 3.03563190
  6646. Max difference location: (0, 11, 0, 4)
  6647. Values at max diff - Original: -0.10383722, Converted: 2.93179464
  6648. Biggest difference in row (0, 11, 0), sum 0.092786 vs 5.614193
  6649. Layer 14, Token 14 (recurrent cache comparison):
  6650. Original tensor sum: 519.787109
  6651. Converted tensor sum: 539.567444
  6652. Original tensor mean: 0.162433
  6653. Converted tensor mean: 0.168615
  6654. Mean difference: 0.25360039
  6655. Maximum pointwise difference: 4.24835634
  6656. Max difference location: (0, 15, 8, 2)
  6657. Values at max diff - Original: -0.01945496, Converted: 4.22890139
  6658. Biggest difference in row (0, 16, 6), sum 0.069426 vs 10.617959
  6659. Layer 0, Token 15 (recurrent cache comparison):
  6660. Original tensor sum: 1.143128
  6661. Converted tensor sum: 1.955431
  6662. Original tensor mean: 0.000357
  6663. Converted tensor mean: 0.000611
  6664. Mean difference: 0.06554744
  6665. Maximum pointwise difference: 1.62353444
  6666. Max difference location: (0, 1, 3, 2)
  6667. Values at max diff - Original: -0.04374466, Converted: 1.57978976
  6668. Biggest difference in row (0, 28, 5), sum 0.256206 vs 2.700654
  6669. Layer 1, Token 15 (recurrent cache comparison):
  6670. Original tensor sum: 237.576813
  6671. Converted tensor sum: 84.227829
  6672. Original tensor mean: 0.074243
  6673. Converted tensor mean: 0.026321
  6674. Mean difference: 0.12017149
  6675. Maximum pointwise difference: 2.73136139
  6676. Max difference location: (0, 17, 6, 0)
  6677. Values at max diff - Original: 2.89759755, Converted: 0.16623622
  6678. Biggest difference in row (0, 16, 6), sum 6.557743 vs 0.035282
  6679. Layer 2, Token 15 (recurrent cache comparison):
  6680. Original tensor sum: 311.619568
  6681. Converted tensor sum: 155.972748
  6682. Original tensor mean: 0.097381
  6683. Converted tensor mean: 0.048741
  6684. Mean difference: 0.17912415
  6685. Maximum pointwise difference: 3.15524197
  6686. Max difference location: (0, 12, 0, 0)
  6687. Values at max diff - Original: 3.30613947, Converted: 0.15089758
  6688. Biggest difference in row (0, 12, 0), sum 9.937962 vs 2.002455
  6689. Layer 4, Token 15 (recurrent cache comparison):
  6690. Original tensor sum: 167.504608
  6691. Converted tensor sum: 97.213791
  6692. Original tensor mean: 0.052345
  6693. Converted tensor mean: 0.030379
  6694. Mean difference: 0.11675335
  6695. Maximum pointwise difference: 2.34569263
  6696. Max difference location: (0, 28, 2, 3)
  6697. Values at max diff - Original: 2.36823630, Converted: 0.02254373
  6698. Biggest difference in row (0, 24, 1), sum 4.970531 vs 0.552202
  6699. Layer 5, Token 15 (recurrent cache comparison):
  6700. Original tensor sum: 165.098206
  6701. Converted tensor sum: 106.835938
  6702. Original tensor mean: 0.051593
  6703. Converted tensor mean: 0.033386
  6704. Mean difference: 0.11981978
  6705. Maximum pointwise difference: 3.00254560
  6706. Max difference location: (0, 19, 0, 4)
  6707. Values at max diff - Original: -0.04975805, Converted: 2.95278764
  6708. Biggest difference in row (0, 6, 7), sum 6.529483 vs 0.743666
  6709. Layer 6, Token 15 (recurrent cache comparison):
  6710. Original tensor sum: 328.690277
  6711. Converted tensor sum: 322.121643
  6712. Original tensor mean: 0.102716
  6713. Converted tensor mean: 0.100663
  6714. Mean difference: 0.17106648
  6715. Maximum pointwise difference: 3.49930573
  6716. Max difference location: (0, 10, 4, 0)
  6717. Values at max diff - Original: -0.02206346, Converted: 3.47724223
  6718. Biggest difference in row (0, 10, 4), sum 0.599032 vs 10.579692
  6719. Layer 8, Token 15 (recurrent cache comparison):
  6720. Original tensor sum: 317.911224
  6721. Converted tensor sum: 119.034622
  6722. Original tensor mean: 0.099347
  6723. Converted tensor mean: 0.037198
  6724. Mean difference: 0.17545381
  6725. Maximum pointwise difference: 5.58166885
  6726. Max difference location: (0, 12, 5, 9)
  6727. Values at max diff - Original: -0.00163084, Converted: 5.58003807
  6728. Biggest difference in row (0, 12, 5), sum -0.115539 vs 9.864284
  6729. Layer 9, Token 15 (recurrent cache comparison):
  6730. Original tensor sum: 190.170853
  6731. Converted tensor sum: 99.272003
  6732. Original tensor mean: 0.059428
  6733. Converted tensor mean: 0.031023
  6734. Mean difference: 0.10875368
  6735. Maximum pointwise difference: 2.41038036
  6736. Max difference location: (0, 18, 2, 3)
  6737. Values at max diff - Original: 2.75146770, Converted: 0.34108725
  6738. Biggest difference in row (0, 18, 2), sum 8.039729 vs 1.441757
  6739. Layer 10, Token 15 (recurrent cache comparison):
  6740. Original tensor sum: 224.608826
  6741. Converted tensor sum: 140.291000
  6742. Original tensor mean: 0.070190
  6743. Converted tensor mean: 0.043841
  6744. Mean difference: 0.13173704
  6745. Maximum pointwise difference: 3.69921541
  6746. Max difference location: (0, 0, 7, 3)
  6747. Values at max diff - Original: -0.00459916, Converted: 3.69461632
  6748. Biggest difference in row (0, 18, 3), sum 0.045224 vs 5.212623
  6749. Layer 12, Token 15 (recurrent cache comparison):
  6750. Original tensor sum: 284.485657
  6751. Converted tensor sum: 158.051971
  6752. Original tensor mean: 0.088902
  6753. Converted tensor mean: 0.049391
  6754. Mean difference: 0.16240636
  6755. Maximum pointwise difference: 3.41311693
  6756. Max difference location: (0, 30, 4, 0)
  6757. Values at max diff - Original: -0.00378206, Converted: 3.40933490
  6758. Biggest difference in row (0, 28, 3), sum 10.288229 vs 0.646799
  6759. Layer 13, Token 15 (recurrent cache comparison):
  6760. Original tensor sum: 217.891571
  6761. Converted tensor sum: 114.440430
  6762. Original tensor mean: 0.068091
  6763. Converted tensor mean: 0.035763
  6764. Mean difference: 0.11250080
  6765. Maximum pointwise difference: 2.57714581
  6766. Max difference location: (0, 3, 9, 4)
  6767. Values at max diff - Original: -0.00322327, Converted: 2.57392263
  6768. Biggest difference in row (0, 8, 1), sum 5.657505 vs 0.025426
  6769. Layer 14, Token 15 (recurrent cache comparison):
  6770. Original tensor sum: 613.393188
  6771. Converted tensor sum: 259.209320
  6772. Original tensor mean: 0.191685
  6773. Converted tensor mean: 0.081003
  6774. Mean difference: 0.25669345
  6775. Maximum pointwise difference: 4.67302513
  6776. Max difference location: (0, 16, 6, 7)
  6777. Values at max diff - Original: 0.00394140, Converted: 4.67696667
  6778. Biggest difference in row (0, 16, 6), sum 0.113854 vs 11.389561
  6779. Layer 0, Token 16 (recurrent cache comparison):
  6780. Original tensor sum: -7.241831
  6781. Converted tensor sum: 6.292229
  6782. Original tensor mean: -0.002263
  6783. Converted tensor mean: 0.001966
  6784. Mean difference: 0.07260455
  6785. Maximum pointwise difference: 1.56294525
  6786. Max difference location: (0, 4, 9, 1)
  6787. Values at max diff - Original: 1.55768764, Converted: -0.00525762
  6788. Biggest difference in row (0, 4, 9), sum 3.422554 vs -0.082252
  6789. Layer 1, Token 16 (recurrent cache comparison):
  6790. Original tensor sum: 208.371277
  6791. Converted tensor sum: 121.900169
  6792. Original tensor mean: 0.065116
  6793. Converted tensor mean: 0.038094
  6794. Mean difference: 0.10988435
  6795. Maximum pointwise difference: 2.54077005
  6796. Max difference location: (0, 16, 6, 1)
  6797. Values at max diff - Original: 2.44506192, Converted: -0.09570823
  6798. Biggest difference in row (0, 16, 6), sum 5.495286 vs 0.369152
  6799. Layer 2, Token 16 (recurrent cache comparison):
  6800. Original tensor sum: 271.274109
  6801. Converted tensor sum: 250.062592
  6802. Original tensor mean: 0.084773
  6803. Converted tensor mean: 0.078145
  6804. Mean difference: 0.18668148
  6805. Maximum pointwise difference: 3.97749329
  6806. Max difference location: (0, 4, 8, 2)
  6807. Values at max diff - Original: 0.00367373, Converted: 3.98116708
  6808. Biggest difference in row (0, 4, 8), sum 0.084576 vs 8.366636
  6809. Layer 4, Token 16 (recurrent cache comparison):
  6810. Original tensor sum: 245.506393
  6811. Converted tensor sum: 128.042282
  6812. Original tensor mean: 0.076721
  6813. Converted tensor mean: 0.040013
  6814. Mean difference: 0.13813969
  6815. Maximum pointwise difference: 2.50754499
  6816. Max difference location: (0, 27, 2, 5)
  6817. Values at max diff - Original: 2.48510361, Converted: -0.02244142
  6818. Biggest difference in row (0, 30, 3), sum 6.143555 vs -0.003137
  6819. Layer 5, Token 16 (recurrent cache comparison):
  6820. Original tensor sum: 252.541031
  6821. Converted tensor sum: 153.491074
  6822. Original tensor mean: 0.078919
  6823. Converted tensor mean: 0.047966
  6824. Mean difference: 0.13783714
  6825. Maximum pointwise difference: 4.67899084
  6826. Max difference location: (0, 6, 2, 9)
  6827. Values at max diff - Original: 4.74959278, Converted: 0.07060210
  6828. Biggest difference in row (0, 6, 2), sum 15.435174 vs 0.669571
  6829. Layer 6, Token 16 (recurrent cache comparison):
  6830. Original tensor sum: 417.031616
  6831. Converted tensor sum: 302.490662
  6832. Original tensor mean: 0.130322
  6833. Converted tensor mean: 0.094528
  6834. Mean difference: 0.18095936
  6835. Maximum pointwise difference: 3.41091108
  6836. Max difference location: (0, 1, 9, 8)
  6837. Values at max diff - Original: 3.94837856, Converted: 0.53746736
  6838. Biggest difference in row (0, 17, 7), sum 10.598001 vs 1.553886
  6839. Layer 8, Token 16 (recurrent cache comparison):
  6840. Original tensor sum: 360.497803
  6841. Converted tensor sum: 167.798264
  6842. Original tensor mean: 0.112656
  6843. Converted tensor mean: 0.052437
  6844. Mean difference: 0.18179806
  6845. Maximum pointwise difference: 4.85258770
  6846. Max difference location: (0, 20, 6, 7)
  6847. Values at max diff - Original: 3.78496194, Converted: -1.06762552
  6848. Biggest difference in row (0, 20, 6), sum 7.293591 vs -2.448533
  6849. Layer 9, Token 16 (recurrent cache comparison):
  6850. Original tensor sum: 231.574097
  6851. Converted tensor sum: 117.788071
  6852. Original tensor mean: 0.072367
  6853. Converted tensor mean: 0.036809
  6854. Mean difference: 0.12296900
  6855. Maximum pointwise difference: 1.94617844
  6856. Max difference location: (0, 18, 2, 3)
  6857. Values at max diff - Original: 2.51620770, Converted: 0.57002932
  6858. Biggest difference in row (0, 18, 2), sum 7.408888 vs 2.509162
  6859. Layer 10, Token 16 (recurrent cache comparison):
  6860. Original tensor sum: 251.412247
  6861. Converted tensor sum: 167.548752
  6862. Original tensor mean: 0.078566
  6863. Converted tensor mean: 0.052359
  6864. Mean difference: 0.13002089
  6865. Maximum pointwise difference: 2.56599689
  6866. Max difference location: (0, 24, 1, 0)
  6867. Values at max diff - Original: 4.14129448, Converted: 1.57529759
  6868. Biggest difference in row (0, 14, 2), sum 5.702995 vs 0.022515
  6869. Layer 12, Token 16 (recurrent cache comparison):
  6870. Original tensor sum: 309.263367
  6871. Converted tensor sum: 172.743027
  6872. Original tensor mean: 0.096645
  6873. Converted tensor mean: 0.053982
  6874. Mean difference: 0.16015999
  6875. Maximum pointwise difference: 4.55992699
  6876. Max difference location: (0, 28, 3, 4)
  6877. Values at max diff - Original: 3.40088701, Converted: -1.15903974
  6878. Biggest difference in row (0, 28, 3), sum 10.782799 vs -1.738761
  6879. Layer 13, Token 16 (recurrent cache comparison):
  6880. Original tensor sum: 245.305267
  6881. Converted tensor sum: 135.343552
  6882. Original tensor mean: 0.076658
  6883. Converted tensor mean: 0.042295
  6884. Mean difference: 0.11650297
  6885. Maximum pointwise difference: 2.94789600
  6886. Max difference location: (0, 11, 4, 0)
  6887. Values at max diff - Original: 3.34942126, Converted: 0.40152529
  6888. Biggest difference in row (0, 27, 4), sum 6.619488 vs 0.377767
  6889. Layer 14, Token 16 (recurrent cache comparison):
  6890. Original tensor sum: 677.616821
  6891. Converted tensor sum: 309.657593
  6892. Original tensor mean: 0.211755
  6893. Converted tensor mean: 0.096768
  6894. Mean difference: 0.25261062
  6895. Maximum pointwise difference: 4.12457132
  6896. Max difference location: (0, 21, 3, 5)
  6897. Values at max diff - Original: 4.07018948, Converted: -0.05438172
  6898. Biggest difference in row (0, 21, 3), sum 12.550769 vs -0.320660
  6899. Layer 0, Token 17 (recurrent cache comparison):
  6900. Original tensor sum: 8.814422
  6901. Converted tensor sum: 2.569008
  6902. Original tensor mean: 0.002755
  6903. Converted tensor mean: 0.000803
  6904. Mean difference: 0.07054429
  6905. Maximum pointwise difference: 2.09221244
  6906. Max difference location: (0, 1, 2, 3)
  6907. Values at max diff - Original: 2.03968024, Converted: -0.05253213
  6908. Biggest difference in row (0, 17, 2), sum 2.854507 vs 0.425217
  6909. Layer 1, Token 17 (recurrent cache comparison):
  6910. Original tensor sum: 202.785217
  6911. Converted tensor sum: 127.821655
  6912. Original tensor mean: 0.063370
  6913. Converted tensor mean: 0.039944
  6914. Mean difference: 0.11817915
  6915. Maximum pointwise difference: 2.18196273
  6916. Max difference location: (0, 23, 4, 0)
  6917. Values at max diff - Original: 0.00466894, Converted: 2.18663168
  6918. Biggest difference in row (0, 23, 4), sum 1.189118 vs 6.664180
  6919. Layer 2, Token 17 (recurrent cache comparison):
  6920. Original tensor sum: 269.547241
  6921. Converted tensor sum: 202.949875
  6922. Original tensor mean: 0.084234
  6923. Converted tensor mean: 0.063422
  6924. Mean difference: 0.17686243
  6925. Maximum pointwise difference: 3.38580871
  6926. Max difference location: (0, 30, 3, 9)
  6927. Values at max diff - Original: -0.03989490, Converted: 3.34591389
  6928. Biggest difference in row (0, 23, 4), sum 0.959554 vs 6.602069
  6929. Layer 4, Token 17 (recurrent cache comparison):
  6930. Original tensor sum: 285.057709
  6931. Converted tensor sum: 90.890617
  6932. Original tensor mean: 0.089081
  6933. Converted tensor mean: 0.028403
  6934. Mean difference: 0.14633463
  6935. Maximum pointwise difference: 3.59569287
  6936. Max difference location: (0, 19, 2, 9)
  6937. Values at max diff - Original: 0.11129396, Converted: 3.70698690
  6938. Biggest difference in row (0, 24, 1), sum 6.665072 vs 0.069785
  6939. Layer 5, Token 17 (recurrent cache comparison):
  6940. Original tensor sum: 305.935303
  6941. Converted tensor sum: 101.421249
  6942. Original tensor mean: 0.095605
  6943. Converted tensor mean: 0.031694
  6944. Mean difference: 0.15904053
  6945. Maximum pointwise difference: 2.52599096
  6946. Max difference location: (0, 6, 2, 9)
  6947. Values at max diff - Original: 2.51262259, Converted: -0.01336834
  6948. Biggest difference in row (0, 6, 2), sum 10.206850 vs 1.778209
  6949. Layer 6, Token 17 (recurrent cache comparison):
  6950. Original tensor sum: 411.833740
  6951. Converted tensor sum: 250.492935
  6952. Original tensor mean: 0.128698
  6953. Converted tensor mean: 0.078279
  6954. Mean difference: 0.18581259
  6955. Maximum pointwise difference: 4.02491474
  6956. Max difference location: (0, 17, 7, 5)
  6957. Values at max diff - Original: 4.38884020, Converted: 0.36392546
  6958. Biggest difference in row (0, 17, 7), sum 11.349621 vs 0.846145
  6959. Layer 8, Token 17 (recurrent cache comparison):
  6960. Original tensor sum: 373.165680
  6961. Converted tensor sum: 136.027786
  6962. Original tensor mean: 0.116614
  6963. Converted tensor mean: 0.042509
  6964. Mean difference: 0.18740444
  6965. Maximum pointwise difference: 4.54259586
  6966. Max difference location: (0, 20, 0, 7)
  6967. Values at max diff - Original: 4.54873943, Converted: 0.00614343
  6968. Biggest difference in row (0, 7, 2), sum 8.000880 vs 0.043800
  6969. Layer 9, Token 17 (recurrent cache comparison):
  6970. Original tensor sum: 239.737335
  6971. Converted tensor sum: 91.044197
  6972. Original tensor mean: 0.074918
  6973. Converted tensor mean: 0.028451
  6974. Mean difference: 0.11736859
  6975. Maximum pointwise difference: 1.98427892
  6976. Max difference location: (0, 14, 2, 1)
  6977. Values at max diff - Original: 1.90727878, Converted: -0.07700008
  6978. Biggest difference in row (0, 28, 7), sum 5.596577 vs -0.058259
  6979. Layer 10, Token 17 (recurrent cache comparison):
  6980. Original tensor sum: 260.470673
  6981. Converted tensor sum: 162.895706
  6982. Original tensor mean: 0.081397
  6983. Converted tensor mean: 0.050905
  6984. Mean difference: 0.14167482
  6985. Maximum pointwise difference: 3.23060656
  6986. Max difference location: (0, 24, 1, 0)
  6987. Values at max diff - Original: 4.23022413, Converted: 0.99961770
  6988. Biggest difference in row (0, 24, 0), sum 0.125982 vs 9.195232
  6989. Layer 12, Token 17 (recurrent cache comparison):
  6990. Original tensor sum: 321.268158
  6991. Converted tensor sum: 134.452438
  6992. Original tensor mean: 0.100396
  6993. Converted tensor mean: 0.042016
  6994. Mean difference: 0.17344666
  6995. Maximum pointwise difference: 4.15682602
  6996. Max difference location: (0, 14, 8, 1)
  6997. Values at max diff - Original: 4.38615370, Converted: 0.22932746
  6998. Biggest difference in row (0, 28, 3), sum 11.304427 vs 0.427086
  6999. Layer 13, Token 17 (recurrent cache comparison):
  7000. Original tensor sum: 255.942596
  7001. Converted tensor sum: 107.501419
  7002. Original tensor mean: 0.079982
  7003. Converted tensor mean: 0.033594
  7004. Mean difference: 0.11964211
  7005. Maximum pointwise difference: 2.72310257
  7006. Max difference location: (0, 11, 4, 0)
  7007. Values at max diff - Original: 3.71963763, Converted: 0.99653512
  7008. Biggest difference in row (0, 27, 4), sum 6.949797 vs 0.279431
  7009. Layer 14, Token 17 (recurrent cache comparison):
  7010. Original tensor sum: 718.971008
  7011. Converted tensor sum: 252.775909
  7012. Original tensor mean: 0.224678
  7013. Converted tensor mean: 0.078992
  7014. Mean difference: 0.28457019
  7015. Maximum pointwise difference: 4.54859781
  7016. Max difference location: (0, 5, 8, 9)
  7017. Values at max diff - Original: -0.00566958, Converted: 4.54292822
  7018. Biggest difference in row (0, 6, 1), sum 11.820190 vs 0.083275
  7019. Layer 0, Token 18 (recurrent cache comparison):
  7020. Original tensor sum: -2.745796
  7021. Converted tensor sum: 11.596529
  7022. Original tensor mean: -0.000858
  7023. Converted tensor mean: 0.003624
  7024. Mean difference: 0.06698289
  7025. Maximum pointwise difference: 1.30398095
  7026. Max difference location: (0, 1, 2, 3)
  7027. Values at max diff - Original: 1.23424304, Converted: -0.06973789
  7028. Biggest difference in row (0, 25, 7), sum -1.491066 vs 0.107394
  7029. Layer 1, Token 18 (recurrent cache comparison):
  7030. Original tensor sum: 196.680084
  7031. Converted tensor sum: 112.820984
  7032. Original tensor mean: 0.061463
  7033. Converted tensor mean: 0.035257
  7034. Mean difference: 0.11424790
  7035. Maximum pointwise difference: 1.90677047
  7036. Max difference location: (0, 14, 2, 3)
  7037. Values at max diff - Original: 0.10773923, Converted: 2.01450968
  7038. Biggest difference in row (0, 24, 3), sum 5.388914 vs 0.084538
  7039. Layer 2, Token 18 (recurrent cache comparison):
  7040. Original tensor sum: 269.808228
  7041. Converted tensor sum: 106.268402
  7042. Original tensor mean: 0.084315
  7043. Converted tensor mean: 0.033209
  7044. Mean difference: 0.16576965
  7045. Maximum pointwise difference: 2.41004586
  7046. Max difference location: (0, 12, 0, 0)
  7047. Values at max diff - Original: 2.62151933, Converted: 0.21147355
  7048. Biggest difference in row (0, 12, 0), sum 7.396654 vs 0.148190
  7049. Layer 4, Token 18 (recurrent cache comparison):
  7050. Original tensor sum: 299.541138
  7051. Converted tensor sum: 34.684372
  7052. Original tensor mean: 0.093607
  7053. Converted tensor mean: 0.010839
  7054. Mean difference: 0.15344296
  7055. Maximum pointwise difference: 4.97097397
  7056. Max difference location: (0, 27, 8, 5)
  7057. Values at max diff - Original: 4.93650246, Converted: -0.03447145
  7058. Biggest difference in row (0, 27, 8), sum 10.168988 vs 0.095367
  7059. Layer 5, Token 18 (recurrent cache comparison):
  7060. Original tensor sum: 322.520721
  7061. Converted tensor sum: 32.353989
  7062. Original tensor mean: 0.100788
  7063. Converted tensor mean: 0.010111
  7064. Mean difference: 0.15857503
  7065. Maximum pointwise difference: 3.27807403
  7066. Max difference location: (0, 28, 9, 6)
  7067. Values at max diff - Original: 3.39260817, Converted: 0.11453414
  7068. Biggest difference in row (0, 31, 7), sum 9.317598 vs 0.060667
  7069. Layer 6, Token 18 (recurrent cache comparison):
  7070. Original tensor sum: 404.272705
  7071. Converted tensor sum: 105.430817
  7072. Original tensor mean: 0.126335
  7073. Converted tensor mean: 0.032947
  7074. Mean difference: 0.18362552
  7075. Maximum pointwise difference: 4.36808205
  7076. Max difference location: (0, 6, 5, 4)
  7077. Values at max diff - Original: 4.63004971, Converted: 0.26196742
  7078. Biggest difference in row (0, 30, 4), sum 12.429064 vs 1.549177
  7079. Layer 8, Token 18 (recurrent cache comparison):
  7080. Original tensor sum: 379.120117
  7081. Converted tensor sum: 49.316475
  7082. Original tensor mean: 0.118475
  7083. Converted tensor mean: 0.015411
  7084. Mean difference: 0.18690227
  7085. Maximum pointwise difference: 4.34863997
  7086. Max difference location: (0, 20, 0, 7)
  7087. Values at max diff - Original: 4.50196075, Converted: 0.15332088
  7088. Biggest difference in row (0, 7, 2), sum 8.701149 vs -1.880803
  7089. Layer 9, Token 18 (recurrent cache comparison):
  7090. Original tensor sum: 247.687454
  7091. Converted tensor sum: 31.604210
  7092. Original tensor mean: 0.077402
  7093. Converted tensor mean: 0.009876
  7094. Mean difference: 0.12334745
  7095. Maximum pointwise difference: 2.89748645
  7096. Max difference location: (0, 14, 2, 1)
  7097. Values at max diff - Original: 2.54342103, Converted: -0.35406536
  7098. Biggest difference in row (0, 9, 8), sum 5.984664 vs -0.341670
  7099. Layer 10, Token 18 (recurrent cache comparison):
  7100. Original tensor sum: 262.752014
  7101. Converted tensor sum: 52.628201
  7102. Original tensor mean: 0.082110
  7103. Converted tensor mean: 0.016446
  7104. Mean difference: 0.13161205
  7105. Maximum pointwise difference: 2.92723370
  7106. Max difference location: (0, 24, 1, 0)
  7107. Values at max diff - Original: 4.35996389, Converted: 1.43273032
  7108. Biggest difference in row (0, 11, 6), sum 6.418620 vs 0.589213
  7109. Layer 12, Token 18 (recurrent cache comparison):
  7110. Original tensor sum: 326.667419
  7111. Converted tensor sum: 31.792521
  7112. Original tensor mean: 0.102084
  7113. Converted tensor mean: 0.009935
  7114. Mean difference: 0.17550385
  7115. Maximum pointwise difference: 4.50774860
  7116. Max difference location: (0, 14, 8, 1)
  7117. Values at max diff - Original: 4.50715399, Converted: -0.00059444
  7118. Biggest difference in row (0, 21, 9), sum 0.167931 vs -15.009873
  7119. Layer 13, Token 18 (recurrent cache comparison):
  7120. Original tensor sum: 261.870972
  7121. Converted tensor sum: 53.651596
  7122. Original tensor mean: 0.081835
  7123. Converted tensor mean: 0.016766
  7124. Mean difference: 0.12234001
  7125. Maximum pointwise difference: 4.01087809
  7126. Max difference location: (0, 11, 4, 0)
  7127. Values at max diff - Original: 4.08570528, Converted: 0.07482710
  7128. Biggest difference in row (0, 20, 0), sum 2.293484 vs -4.637159
  7129. Layer 14, Token 18 (recurrent cache comparison):
  7130. Original tensor sum: 740.518921
  7131. Converted tensor sum: 84.074921
  7132. Original tensor mean: 0.231412
  7133. Converted tensor mean: 0.026273
  7134. Mean difference: 0.28319737
  7135. Maximum pointwise difference: 4.63366222
  7136. Max difference location: (0, 1, 4, 6)
  7137. Values at max diff - Original: -0.00748948, Converted: 4.62617254
  7138. Biggest difference in row (0, 6, 1), sum 12.889781 vs -0.226667
  7139. Layer 0, Token 19 (recurrent cache comparison):
  7140. Original tensor sum: 0.866719
  7141. Converted tensor sum: 13.915674
  7142. Original tensor mean: 0.000271
  7143. Converted tensor mean: 0.004349
  7144. Mean difference: 0.05563419
  7145. Maximum pointwise difference: 1.58602941
  7146. Max difference location: (0, 1, 5, 3)
  7147. Values at max diff - Original: 1.50699055, Converted: -0.07903884
  7148. Biggest difference in row (0, 28, 5), sum 0.233465 vs 1.374955
  7149. Layer 1, Token 19 (recurrent cache comparison):
  7150. Original tensor sum: 143.055450
  7151. Converted tensor sum: 84.285873
  7152. Original tensor mean: 0.044705
  7153. Converted tensor mean: 0.026339
  7154. Mean difference: 0.11436888
  7155. Maximum pointwise difference: 2.11188436
  7156. Max difference location: (0, 15, 8, 5)
  7157. Values at max diff - Original: -0.06675819, Converted: 2.04512620
  7158. Biggest difference in row (0, 23, 4), sum 0.445206 vs 4.313503
  7159. Layer 2, Token 19 (recurrent cache comparison):
  7160. Original tensor sum: 206.674835
  7161. Converted tensor sum: 69.739983
  7162. Original tensor mean: 0.064586
  7163. Converted tensor mean: 0.021794
  7164. Mean difference: 0.14624587
  7165. Maximum pointwise difference: 2.46052098
  7166. Max difference location: (0, 5, 4, 5)
  7167. Values at max diff - Original: 2.46177387, Converted: 0.00125289
  7168. Biggest difference in row (0, 23, 9), sum 5.872013 vs -0.147400
  7169. Layer 4, Token 19 (recurrent cache comparison):
  7170. Original tensor sum: 223.180557
  7171. Converted tensor sum: 57.034431
  7172. Original tensor mean: 0.069744
  7173. Converted tensor mean: 0.017823
  7174. Mean difference: 0.13700224
  7175. Maximum pointwise difference: 4.09037542
  7176. Max difference location: (0, 25, 1, 9)
  7177. Values at max diff - Original: 3.97389102, Converted: -0.11648450
  7178. Biggest difference in row (0, 24, 1), sum 6.574383 vs 0.271665
  7179. Layer 5, Token 19 (recurrent cache comparison):
  7180. Original tensor sum: 315.655853
  7181. Converted tensor sum: 48.647461
  7182. Original tensor mean: 0.098642
  7183. Converted tensor mean: 0.015202
  7184. Mean difference: 0.15315701
  7185. Maximum pointwise difference: 6.10414743
  7186. Max difference location: (0, 28, 9, 6)
  7187. Values at max diff - Original: 6.29615974, Converted: 0.19201221
  7188. Biggest difference in row (0, 28, 9), sum 11.702868 vs 0.168917
  7189. Layer 6, Token 19 (recurrent cache comparison):
  7190. Original tensor sum: 358.473572
  7191. Converted tensor sum: 101.158226
  7192. Original tensor mean: 0.112023
  7193. Converted tensor mean: 0.031612
  7194. Mean difference: 0.16535039
  7195. Maximum pointwise difference: 3.82374835
  7196. Max difference location: (0, 18, 1, 7)
  7197. Values at max diff - Original: 3.88149524, Converted: 0.05774695
  7198. Biggest difference in row (0, 20, 9), sum 9.851446 vs -0.224849
  7199. Layer 8, Token 19 (recurrent cache comparison):
  7200. Original tensor sum: 346.821899
  7201. Converted tensor sum: 80.751968
  7202. Original tensor mean: 0.108382
  7203. Converted tensor mean: 0.025235
  7204. Mean difference: 0.17743167
  7205. Maximum pointwise difference: 3.78403044
  7206. Max difference location: (0, 20, 0, 7)
  7207. Values at max diff - Original: 3.89911222, Converted: 0.11508182
  7208. Biggest difference in row (0, 7, 2), sum 9.025558 vs -0.167117
  7209. Layer 9, Token 19 (recurrent cache comparison):
  7210. Original tensor sum: 249.268311
  7211. Converted tensor sum: 43.202286
  7212. Original tensor mean: 0.077896
  7213. Converted tensor mean: 0.013501
  7214. Mean difference: 0.12318792
  7215. Maximum pointwise difference: 2.83834696
  7216. Max difference location: (0, 9, 8, 6)
  7217. Values at max diff - Original: 2.71989083, Converted: -0.11845621
  7218. Biggest difference in row (0, 9, 8), sum 8.573050 vs -0.169431
  7219. Layer 10, Token 19 (recurrent cache comparison):
  7220. Original tensor sum: 291.462646
  7221. Converted tensor sum: 66.798782
  7222. Original tensor mean: 0.091082
  7223. Converted tensor mean: 0.020875
  7224. Mean difference: 0.14087133
  7225. Maximum pointwise difference: 3.38042760
  7226. Max difference location: (0, 25, 1, 9)
  7227. Values at max diff - Original: 3.51948309, Converted: 0.13905543
  7228. Biggest difference in row (0, 25, 1), sum 8.942734 vs 0.065733
  7229. Layer 12, Token 19 (recurrent cache comparison):
  7230. Original tensor sum: 342.570038
  7231. Converted tensor sum: 48.484200
  7232. Original tensor mean: 0.107053
  7233. Converted tensor mean: 0.015151
  7234. Mean difference: 0.17410682
  7235. Maximum pointwise difference: 4.36208725
  7236. Max difference location: (0, 14, 8, 1)
  7237. Values at max diff - Original: 4.34435558, Converted: -0.01773176
  7238. Biggest difference in row (0, 28, 3), sum 11.628893 vs -0.798577
  7239. Layer 13, Token 19 (recurrent cache comparison):
  7240. Original tensor sum: 270.129211
  7241. Converted tensor sum: 52.121815
  7242. Original tensor mean: 0.084415
  7243. Converted tensor mean: 0.016288
  7244. Mean difference: 0.12223634
  7245. Maximum pointwise difference: 3.81266069
  7246. Max difference location: (0, 11, 4, 0)
  7247. Values at max diff - Original: 4.31173086, Converted: 0.49907014
  7248. Biggest difference in row (0, 27, 4), sum 5.975472 vs -0.026263
  7249. Layer 14, Token 19 (recurrent cache comparison):
  7250. Original tensor sum: 772.850342
  7251. Converted tensor sum: 107.083702
  7252. Original tensor mean: 0.241516
  7253. Converted tensor mean: 0.033464
  7254. Mean difference: 0.28851181
  7255. Maximum pointwise difference: 4.31482410
  7256. Max difference location: (0, 28, 4, 1)
  7257. Values at max diff - Original: 4.32322884, Converted: 0.00840468
  7258. Biggest difference in row (0, 14, 2), sum 14.072536 vs 0.377507
  7259. Layer 0, Token 20 (recurrent cache comparison):
  7260. Original tensor sum: 2.008890
  7261. Converted tensor sum: 12.614055
  7262. Original tensor mean: 0.000628
  7263. Converted tensor mean: 0.003942
  7264. Mean difference: 0.05576663
  7265. Maximum pointwise difference: 1.29991353
  7266. Max difference location: (0, 1, 5, 3)
  7267. Values at max diff - Original: 1.36800277, Converted: 0.06808926
  7268. Biggest difference in row (0, 23, 6), sum 0.260241 vs -1.370477
  7269. Layer 1, Token 20 (recurrent cache comparison):
  7270. Original tensor sum: 58.587276
  7271. Converted tensor sum: 76.507767
  7272. Original tensor mean: 0.018309
  7273. Converted tensor mean: 0.023909
  7274. Mean difference: 0.10026859
  7275. Maximum pointwise difference: 2.19443369
  7276. Max difference location: (0, 14, 2, 2)
  7277. Values at max diff - Original: -0.11835258, Converted: 2.07608104
  7278. Biggest difference in row (0, 14, 2), sum -0.449485 vs 3.433519
  7279. Layer 2, Token 20 (recurrent cache comparison):
  7280. Original tensor sum: 165.744568
  7281. Converted tensor sum: 64.695602
  7282. Original tensor mean: 0.051795
  7283. Converted tensor mean: 0.020217
  7284. Mean difference: 0.14529096
  7285. Maximum pointwise difference: 2.04155922
  7286. Max difference location: (0, 5, 4, 5)
  7287. Values at max diff - Original: 2.00637627, Converted: -0.03518293
  7288. Biggest difference in row (0, 5, 4), sum 4.793974 vs -0.065828
  7289. Layer 4, Token 20 (recurrent cache comparison):
  7290. Original tensor sum: 212.915298
  7291. Converted tensor sum: 76.568939
  7292. Original tensor mean: 0.066536
  7293. Converted tensor mean: 0.023928
  7294. Mean difference: 0.12807344
  7295. Maximum pointwise difference: 3.65112019
  7296. Max difference location: (0, 25, 1, 9)
  7297. Values at max diff - Original: 3.82295465, Converted: 0.17183457
  7298. Biggest difference in row (0, 20, 4), sum 0.743454 vs 6.637871
  7299. Layer 5, Token 20 (recurrent cache comparison):
  7300. Original tensor sum: 258.077209
  7301. Converted tensor sum: 49.652397
  7302. Original tensor mean: 0.080649
  7303. Converted tensor mean: 0.015516
  7304. Mean difference: 0.13090378
  7305. Maximum pointwise difference: 2.72355151
  7306. Max difference location: (0, 28, 9, 6)
  7307. Values at max diff - Original: 2.71506453, Converted: -0.00848696
  7308. Biggest difference in row (0, 28, 9), sum 6.250334 vs 0.160866
  7309. Layer 6, Token 20 (recurrent cache comparison):
  7310. Original tensor sum: 336.431519
  7311. Converted tensor sum: 141.819733
  7312. Original tensor mean: 0.105135
  7313. Converted tensor mean: 0.044319
  7314. Mean difference: 0.16430938
  7315. Maximum pointwise difference: 3.65949225
  7316. Max difference location: (0, 6, 5, 4)
  7317. Values at max diff - Original: 3.87317371, Converted: 0.21368141
  7318. Biggest difference in row (0, 12, 1), sum 12.053196 vs 2.254734
  7319. Layer 8, Token 20 (recurrent cache comparison):
  7320. Original tensor sum: 345.424561
  7321. Converted tensor sum: 112.814018
  7322. Original tensor mean: 0.107945
  7323. Converted tensor mean: 0.035254
  7324. Mean difference: 0.17631440
  7325. Maximum pointwise difference: 3.36074710
  7326. Max difference location: (0, 20, 0, 7)
  7327. Values at max diff - Original: 3.50376892, Converted: 0.14302187
  7328. Biggest difference in row (0, 21, 0), sum 8.499396 vs 0.480686
  7329. Layer 9, Token 20 (recurrent cache comparison):
  7330. Original tensor sum: 261.041870
  7331. Converted tensor sum: 41.182373
  7332. Original tensor mean: 0.081576
  7333. Converted tensor mean: 0.012869
  7334. Mean difference: 0.12376894
  7335. Maximum pointwise difference: 2.65249300
  7336. Max difference location: (0, 18, 2, 3)
  7337. Values at max diff - Original: 2.77233696, Converted: 0.11984408
  7338. Biggest difference in row (0, 9, 8), sum 7.305106 vs -0.383589
  7339. Layer 10, Token 20 (recurrent cache comparison):
  7340. Original tensor sum: 276.296692
  7341. Converted tensor sum: 75.410934
  7342. Original tensor mean: 0.086343
  7343. Converted tensor mean: 0.023566
  7344. Mean difference: 0.12437831
  7345. Maximum pointwise difference: 2.84117389
  7346. Max difference location: (0, 25, 1, 9)
  7347. Values at max diff - Original: 3.23575449, Converted: 0.39458057
  7348. Biggest difference in row (0, 25, 1), sum 8.240932 vs 1.044036
  7349. Layer 12, Token 20 (recurrent cache comparison):
  7350. Original tensor sum: 345.097260
  7351. Converted tensor sum: 53.731094
  7352. Original tensor mean: 0.107843
  7353. Converted tensor mean: 0.016791
  7354. Mean difference: 0.17168441
  7355. Maximum pointwise difference: 4.60863352
  7356. Max difference location: (0, 14, 8, 1)
  7357. Values at max diff - Original: 4.63144016, Converted: 0.02280665
  7358. Biggest difference in row (0, 28, 3), sum 11.591027 vs 0.333645
  7359. Layer 13, Token 20 (recurrent cache comparison):
  7360. Original tensor sum: 253.047394
  7361. Converted tensor sum: 40.628811
  7362. Original tensor mean: 0.079077
  7363. Converted tensor mean: 0.012697
  7364. Mean difference: 0.11498150
  7365. Maximum pointwise difference: 4.22373772
  7366. Max difference location: (0, 11, 4, 0)
  7367. Values at max diff - Original: 4.22381639, Converted: 0.00007845
  7368. Biggest difference in row (0, 27, 4), sum 6.304989 vs 0.143700
  7369. Layer 14, Token 20 (recurrent cache comparison):
  7370. Original tensor sum: 769.098083
  7371. Converted tensor sum: 130.283981
  7372. Original tensor mean: 0.240343
  7373. Converted tensor mean: 0.040714
  7374. Mean difference: 0.28381503
  7375. Maximum pointwise difference: 4.93393469
  7376. Max difference location: (0, 28, 4, 1)
  7377. Values at max diff - Original: 4.91371727, Converted: -0.02021729
  7378. Biggest difference in row (0, 6, 1), sum 14.151162 vs 0.315893
  7379. Layer 0, Token 21 (recurrent cache comparison):
  7380. Original tensor sum: 1.077594
  7381. Converted tensor sum: 15.438447
  7382. Original tensor mean: 0.000337
  7383. Converted tensor mean: 0.004825
  7384. Mean difference: 0.05193665
  7385. Maximum pointwise difference: 0.74260694
  7386. Max difference location: (0, 28, 8, 5)
  7387. Values at max diff - Original: 0.72446448, Converted: -0.01814246
  7388. Biggest difference in row (0, 23, 8), sum -0.100890 vs -1.090759
  7389. Layer 1, Token 21 (recurrent cache comparison):
  7390. Original tensor sum: 28.594997
  7391. Converted tensor sum: 89.290833
  7392. Original tensor mean: 0.008936
  7393. Converted tensor mean: 0.027903
  7394. Mean difference: 0.10794319
  7395. Maximum pointwise difference: 1.59959590
  7396. Max difference location: (0, 20, 2, 0)
  7397. Values at max diff - Original: 0.00296844, Converted: 1.60256433
  7398. Biggest difference in row (0, 20, 2), sum 0.038832 vs 3.326198
  7399. Layer 2, Token 21 (recurrent cache comparison):
  7400. Original tensor sum: 146.744446
  7401. Converted tensor sum: 85.128494
  7402. Original tensor mean: 0.045858
  7403. Converted tensor mean: 0.026603
  7404. Mean difference: 0.15625563
  7405. Maximum pointwise difference: 3.40082598
  7406. Max difference location: (0, 4, 2, 4)
  7407. Values at max diff - Original: 3.41796732, Converted: 0.01714140
  7408. Biggest difference in row (0, 4, 2), sum 7.012363 vs 0.098989
  7409. Layer 4, Token 21 (recurrent cache comparison):
  7410. Original tensor sum: 120.798615
  7411. Converted tensor sum: 143.282379
  7412. Original tensor mean: 0.037750
  7413. Converted tensor mean: 0.044776
  7414. Mean difference: 0.13425863
  7415. Maximum pointwise difference: 2.73616052
  7416. Max difference location: (0, 24, 9, 1)
  7417. Values at max diff - Original: 0.23530871, Converted: 2.97146916
  7418. Biggest difference in row (0, 30, 3), sum 1.329738 vs 6.360154
  7419. Layer 5, Token 21 (recurrent cache comparison):
  7420. Original tensor sum: 222.583710
  7421. Converted tensor sum: 86.326241
  7422. Original tensor mean: 0.069557
  7423. Converted tensor mean: 0.026977
  7424. Mean difference: 0.13234577
  7425. Maximum pointwise difference: 2.64859867
  7426. Max difference location: (0, 28, 7, 6)
  7427. Values at max diff - Original: 2.67573905, Converted: 0.02714031
  7428. Biggest difference in row (0, 26, 8), sum 3.963463 vs -0.648591
  7429. Layer 6, Token 21 (recurrent cache comparison):
  7430. Original tensor sum: 317.078064
  7431. Converted tensor sum: 162.595886
  7432. Original tensor mean: 0.099087
  7433. Converted tensor mean: 0.050811
  7434. Mean difference: 0.15550284
  7435. Maximum pointwise difference: 3.79531074
  7436. Max difference location: (0, 17, 7, 5)
  7437. Values at max diff - Original: 3.63465667, Converted: -0.16065404
  7438. Biggest difference in row (0, 20, 9), sum 8.606161 vs 0.369012
  7439. Layer 8, Token 21 (recurrent cache comparison):
  7440. Original tensor sum: 345.257385
  7441. Converted tensor sum: 184.546997
  7442. Original tensor mean: 0.107893
  7443. Converted tensor mean: 0.057671
  7444. Mean difference: 0.18574484
  7445. Maximum pointwise difference: 3.21210074
  7446. Max difference location: (0, 7, 2, 9)
  7447. Values at max diff - Original: 3.22117043, Converted: 0.00906963
  7448. Biggest difference in row (0, 21, 0), sum 7.649475 vs -0.057539
  7449. Layer 9, Token 21 (recurrent cache comparison):
  7450. Original tensor sum: 268.515228
  7451. Converted tensor sum: 95.449539
  7452. Original tensor mean: 0.083911
  7453. Converted tensor mean: 0.029828
  7454. Mean difference: 0.13116649
  7455. Maximum pointwise difference: 3.19655538
  7456. Max difference location: (0, 18, 2, 3)
  7457. Values at max diff - Original: 3.71445417, Converted: 0.51789874
  7458. Biggest difference in row (0, 9, 5), sum 9.129113 vs 0.079633
  7459. Layer 10, Token 21 (recurrent cache comparison):
  7460. Original tensor sum: 259.887024
  7461. Converted tensor sum: 104.823151
  7462. Original tensor mean: 0.081215
  7463. Converted tensor mean: 0.032757
  7464. Mean difference: 0.12396878
  7465. Maximum pointwise difference: 3.03640962
  7466. Max difference location: (0, 25, 1, 9)
  7467. Values at max diff - Original: 3.24910450, Converted: 0.21269491
  7468. Biggest difference in row (0, 25, 1), sum 8.045052 vs 0.162466
  7469. Layer 12, Token 21 (recurrent cache comparison):
  7470. Original tensor sum: 340.602814
  7471. Converted tensor sum: 113.082108
  7472. Original tensor mean: 0.106438
  7473. Converted tensor mean: 0.035338
  7474. Mean difference: 0.17276871
  7475. Maximum pointwise difference: 4.99602270
  7476. Max difference location: (0, 14, 8, 1)
  7477. Values at max diff - Original: 4.72621298, Converted: -0.26980966
  7478. Biggest difference in row (0, 28, 3), sum 11.259501 vs -0.695297
  7479. Layer 13, Token 21 (recurrent cache comparison):
  7480. Original tensor sum: 236.875137
  7481. Converted tensor sum: 95.429146
  7482. Original tensor mean: 0.074023
  7483. Converted tensor mean: 0.029822
  7484. Mean difference: 0.11990514
  7485. Maximum pointwise difference: 3.69410872
  7486. Max difference location: (0, 11, 4, 0)
  7487. Values at max diff - Original: 4.10646772, Converted: 0.41235897
  7488. Biggest difference in row (0, 23, 6), sum 5.276991 vs -0.187177
  7489. Layer 14, Token 21 (recurrent cache comparison):
  7490. Original tensor sum: 782.813049
  7491. Converted tensor sum: 216.654602
  7492. Original tensor mean: 0.244629
  7493. Converted tensor mean: 0.067705
  7494. Mean difference: 0.29570371
  7495. Maximum pointwise difference: 5.94400930
  7496. Max difference location: (0, 28, 4, 1)
  7497. Values at max diff - Original: 5.97852135, Converted: 0.03451204
  7498. Biggest difference in row (0, 6, 1), sum 14.360078 vs 0.533817
  7499. Layer 0, Token 22 (recurrent cache comparison):
  7500. Original tensor sum: 2.700914
  7501. Converted tensor sum: 8.066211
  7502. Original tensor mean: 0.000844
  7503. Converted tensor mean: 0.002521
  7504. Mean difference: 0.06021541
  7505. Maximum pointwise difference: 1.02617574
  7506. Max difference location: (0, 28, 9, 5)
  7507. Values at max diff - Original: 1.17021942, Converted: 0.14404365
  7508. Biggest difference in row (0, 4, 9), sum 1.758845 vs -0.049155
  7509. Layer 1, Token 22 (recurrent cache comparison):
  7510. Original tensor sum: 9.402251
  7511. Converted tensor sum: 79.292084
  7512. Original tensor mean: 0.002938
  7513. Converted tensor mean: 0.024779
  7514. Mean difference: 0.09312414
  7515. Maximum pointwise difference: 1.86848283
  7516. Max difference location: (0, 14, 2, 2)
  7517. Values at max diff - Original: -0.01261259, Converted: 1.85587025
  7518. Biggest difference in row (0, 1, 8), sum -0.144765 vs 2.729439
  7519. Layer 2, Token 22 (recurrent cache comparison):
  7520. Original tensor sum: 150.273865
  7521. Converted tensor sum: 102.280075
  7522. Original tensor mean: 0.046961
  7523. Converted tensor mean: 0.031963
  7524. Mean difference: 0.15655471
  7525. Maximum pointwise difference: 2.95679903
  7526. Max difference location: (0, 4, 8, 6)
  7527. Values at max diff - Original: -0.15384272, Converted: 2.80295634
  7528. Biggest difference in row (0, 10, 6), sum -0.449118 vs 3.435276
  7529. Layer 4, Token 22 (recurrent cache comparison):
  7530. Original tensor sum: 109.346573
  7531. Converted tensor sum: 167.629913
  7532. Original tensor mean: 0.034171
  7533. Converted tensor mean: 0.052384
  7534. Mean difference: 0.12662907
  7535. Maximum pointwise difference: 2.69411635
  7536. Max difference location: (0, 19, 2, 0)
  7537. Values at max diff - Original: 0.00617844, Converted: 2.70029473
  7538. Biggest difference in row (0, 19, 2), sum -0.222631 vs 5.908413
  7539. Layer 5, Token 22 (recurrent cache comparison):
  7540. Original tensor sum: 191.832321
  7541. Converted tensor sum: 202.874756
  7542. Original tensor mean: 0.059948
  7543. Converted tensor mean: 0.063398
  7544. Mean difference: 0.15467224
  7545. Maximum pointwise difference: 6.38972092
  7546. Max difference location: (0, 28, 6, 9)
  7547. Values at max diff - Original: 0.04361831, Converted: 6.43333912
  7548. Biggest difference in row (0, 28, 6), sum 0.738313 vs 17.286346
  7549. Layer 6, Token 22 (recurrent cache comparison):
  7550. Original tensor sum: 304.042816
  7551. Converted tensor sum: 238.043579
  7552. Original tensor mean: 0.095013
  7553. Converted tensor mean: 0.074389
  7554. Mean difference: 0.15846148
  7555. Maximum pointwise difference: 3.40163994
  7556. Max difference location: (0, 12, 2, 1)
  7557. Values at max diff - Original: 1.29805720, Converted: 4.69969702
  7558. Biggest difference in row (0, 17, 7), sum 7.862279 vs 0.254134
  7559. Layer 8, Token 22 (recurrent cache comparison):
  7560. Original tensor sum: 352.235718
  7561. Converted tensor sum: 277.930298
  7562. Original tensor mean: 0.110074
  7563. Converted tensor mean: 0.086853
  7564. Mean difference: 0.19249398
  7565. Maximum pointwise difference: 3.61912727
  7566. Max difference location: (0, 7, 2, 9)
  7567. Values at max diff - Original: 3.80060625, Converted: 0.18147889
  7568. Biggest difference in row (0, 21, 0), sum 9.710941 vs 0.797433
  7569. Layer 9, Token 22 (recurrent cache comparison):
  7570. Original tensor sum: 273.245667
  7571. Converted tensor sum: 226.375031
  7572. Original tensor mean: 0.085389
  7573. Converted tensor mean: 0.070742
  7574. Mean difference: 0.14207596
  7575. Maximum pointwise difference: 2.82711124
  7576. Max difference location: (0, 14, 1, 2)
  7577. Values at max diff - Original: 0.05765805, Converted: 2.88476920
  7578. Biggest difference in row (0, 9, 5), sum 9.348074 vs 1.880102
  7579. Layer 10, Token 22 (recurrent cache comparison):
  7580. Original tensor sum: 239.880463
  7581. Converted tensor sum: 275.399414
  7582. Original tensor mean: 0.074963
  7583. Converted tensor mean: 0.086062
  7584. Mean difference: 0.15027112
  7585. Maximum pointwise difference: 3.59689593
  7586. Max difference location: (0, 0, 7, 8)
  7587. Values at max diff - Original: -0.00771881, Converted: 3.58917713
  7588. Biggest difference in row (0, 24, 0), sum 0.303092 vs 7.643524
  7589. Layer 12, Token 22 (recurrent cache comparison):
  7590. Original tensor sum: 327.704742
  7591. Converted tensor sum: 271.485931
  7592. Original tensor mean: 0.102408
  7593. Converted tensor mean: 0.084839
  7594. Mean difference: 0.17104822
  7595. Maximum pointwise difference: 4.17193794
  7596. Max difference location: (0, 14, 8, 1)
  7597. Values at max diff - Original: 4.78667879, Converted: 0.61474097
  7598. Biggest difference in row (0, 28, 3), sum 10.929213 vs 0.205626
  7599. Layer 13, Token 22 (recurrent cache comparison):
  7600. Original tensor sum: 231.619003
  7601. Converted tensor sum: 232.506165
  7602. Original tensor mean: 0.072381
  7603. Converted tensor mean: 0.072658
  7604. Mean difference: 0.13752523
  7605. Maximum pointwise difference: 4.03583384
  7606. Max difference location: (0, 11, 4, 0)
  7607. Values at max diff - Original: 3.99545026, Converted: -0.04038341
  7608. Biggest difference in row (0, 11, 0), sum -0.083207 vs 6.010875
  7609. Layer 14, Token 22 (recurrent cache comparison):
  7610. Original tensor sum: 772.479431
  7611. Converted tensor sum: 607.419800
  7612. Original tensor mean: 0.241400
  7613. Converted tensor mean: 0.189819
  7614. Mean difference: 0.31881297
  7615. Maximum pointwise difference: 5.76619625
  7616. Max difference location: (0, 28, 4, 1)
  7617. Values at max diff - Original: 6.25043201, Converted: 0.48423576
  7618. Biggest difference in row (0, 6, 1), sum 14.585131 vs 0.928486
  7619. Layer 0, Token 23 (recurrent cache comparison):
  7620. Original tensor sum: 4.463778
  7621. Converted tensor sum: 4.492921
  7622. Original tensor mean: 0.001395
  7623. Converted tensor mean: 0.001404
  7624. Mean difference: 0.06506675
  7625. Maximum pointwise difference: 1.83452773
  7626. Max difference location: (0, 1, 3, 5)
  7627. Values at max diff - Original: -0.04470510, Converted: 1.78982258
  7628. Biggest difference in row (0, 8, 9), sum 0.088014 vs -1.806111
  7629. Layer 1, Token 23 (recurrent cache comparison):
  7630. Original tensor sum: 16.812580
  7631. Converted tensor sum: 109.310081
  7632. Original tensor mean: 0.005254
  7633. Converted tensor mean: 0.034159
  7634. Mean difference: 0.09598633
  7635. Maximum pointwise difference: 1.58349574
  7636. Max difference location: (0, 14, 2, 2)
  7637. Values at max diff - Original: -0.00151580, Converted: 1.58197999
  7638. Biggest difference in row (0, 31, 9), sum 0.029068 vs 3.659988
  7639. Layer 2, Token 23 (recurrent cache comparison):
  7640. Original tensor sum: 75.151047
  7641. Converted tensor sum: 119.211670
  7642. Original tensor mean: 0.023485
  7643. Converted tensor mean: 0.037254
  7644. Mean difference: 0.13861641
  7645. Maximum pointwise difference: 2.43731642
  7646. Max difference location: (0, 1, 3, 2)
  7647. Values at max diff - Original: 0.08128840, Converted: 2.51860476
  7648. Biggest difference in row (0, 1, 3), sum 0.598150 vs 6.365501
  7649. Layer 4, Token 23 (recurrent cache comparison):
  7650. Original tensor sum: 76.628754
  7651. Converted tensor sum: 155.459259
  7652. Original tensor mean: 0.023946
  7653. Converted tensor mean: 0.048581
  7654. Mean difference: 0.11704257
  7655. Maximum pointwise difference: 2.73834753
  7656. Max difference location: (0, 19, 9, 2)
  7657. Values at max diff - Original: 4.03167677, Converted: 1.29332936
  7658. Biggest difference in row (0, 8, 6), sum 0.016739 vs 3.865431
  7659. Layer 5, Token 23 (recurrent cache comparison):
  7660. Original tensor sum: 150.354111
  7661. Converted tensor sum: 169.511383
  7662. Original tensor mean: 0.046986
  7663. Converted tensor mean: 0.052972
  7664. Mean difference: 0.12414169
  7665. Maximum pointwise difference: 4.08761406
  7666. Max difference location: (0, 28, 8, 6)
  7667. Values at max diff - Original: 3.60962462, Converted: -0.47798958
  7668. Biggest difference in row (0, 28, 6), sum 0.154782 vs 5.205485
  7669. Layer 6, Token 23 (recurrent cache comparison):
  7670. Original tensor sum: 225.564255
  7671. Converted tensor sum: 262.560272
  7672. Original tensor mean: 0.070489
  7673. Converted tensor mean: 0.082050
  7674. Mean difference: 0.14410818
  7675. Maximum pointwise difference: 5.85085487
  7676. Max difference location: (0, 12, 6, 1)
  7677. Values at max diff - Original: 6.56323051, Converted: 0.71237558
  7678. Biggest difference in row (0, 12, 6), sum 15.410420 vs 3.070242
  7679. Layer 8, Token 23 (recurrent cache comparison):
  7680. Original tensor sum: 216.401703
  7681. Converted tensor sum: 306.942932
  7682. Original tensor mean: 0.067626
  7683. Converted tensor mean: 0.095920
  7684. Mean difference: 0.14416558
  7685. Maximum pointwise difference: 3.46720839
  7686. Max difference location: (0, 22, 4, 7)
  7687. Values at max diff - Original: 3.77501345, Converted: 0.30780506
  7688. Biggest difference in row (0, 22, 4), sum 7.765969 vs 0.982070
  7689. Layer 9, Token 23 (recurrent cache comparison):
  7690. Original tensor sum: 247.185196
  7691. Converted tensor sum: 250.177109
  7692. Original tensor mean: 0.077245
  7693. Converted tensor mean: 0.078180
  7694. Mean difference: 0.11487159
  7695. Maximum pointwise difference: 2.80121279
  7696. Max difference location: (0, 14, 2, 1)
  7697. Values at max diff - Original: 3.97450233, Converted: 1.17328954
  7698. Biggest difference in row (0, 28, 2), sum 8.960711 vs 2.242082
  7699. Layer 10, Token 23 (recurrent cache comparison):
  7700. Original tensor sum: 193.715546
  7701. Converted tensor sum: 271.413574
  7702. Original tensor mean: 0.060536
  7703. Converted tensor mean: 0.084817
  7704. Mean difference: 0.13006650
  7705. Maximum pointwise difference: 3.03568482
  7706. Max difference location: (0, 0, 3, 7)
  7707. Values at max diff - Original: 3.30636239, Converted: 0.27067760
  7708. Biggest difference in row (0, 23, 3), sum 6.103652 vs 1.165035
  7709. Layer 12, Token 23 (recurrent cache comparison):
  7710. Original tensor sum: 277.550171
  7711. Converted tensor sum: 296.251099
  7712. Original tensor mean: 0.086734
  7713. Converted tensor mean: 0.092578
  7714. Mean difference: 0.12569407
  7715. Maximum pointwise difference: 2.70571613
  7716. Max difference location: (0, 20, 2, 3)
  7717. Values at max diff - Original: 3.96422935, Converted: 1.25851309
  7718. Biggest difference in row (0, 30, 9), sum 8.374757 vs 2.683706
  7719. Layer 13, Token 23 (recurrent cache comparison):
  7720. Original tensor sum: 189.736130
  7721. Converted tensor sum: 235.426422
  7722. Original tensor mean: 0.059293
  7723. Converted tensor mean: 0.073571
  7724. Mean difference: 0.09623930
  7725. Maximum pointwise difference: 3.40506268
  7726. Max difference location: (0, 17, 8, 2)
  7727. Values at max diff - Original: 4.38167763, Converted: 0.97661489
  7728. Biggest difference in row (0, 17, 8), sum 7.328513 vs 2.010616
  7729. Layer 14, Token 23 (recurrent cache comparison):
  7730. Original tensor sum: 508.593140
  7731. Converted tensor sum: 650.881714
  7732. Original tensor mean: 0.158935
  7733. Converted tensor mean: 0.203401
  7734. Mean difference: 0.21357311
  7735. Maximum pointwise difference: 4.58951044
  7736. Max difference location: (0, 28, 4, 1)
  7737. Values at max diff - Original: 0.01861674, Converted: 4.60812712
  7738. Biggest difference in row (0, 27, 6), sum -2.170214 vs 13.869398
  7739. Layer 0, Token 24 (recurrent cache comparison):
  7740. Original tensor sum: 0.801011
  7741. Converted tensor sum: -1.634871
  7742. Original tensor mean: 0.000250
  7743. Converted tensor mean: -0.000511
  7744. Mean difference: 0.07332502
  7745. Maximum pointwise difference: 1.81247604
  7746. Max difference location: (0, 1, 3, 5)
  7747. Values at max diff - Original: -0.04395379, Converted: 1.76852226
  7748. Biggest difference in row (0, 25, 2), sum 0.205085 vs -3.009443
  7749. Layer 1, Token 24 (recurrent cache comparison):
  7750. Original tensor sum: 23.350971
  7751. Converted tensor sum: 88.090744
  7752. Original tensor mean: 0.007297
  7753. Converted tensor mean: 0.027528
  7754. Mean difference: 0.08751559
  7755. Maximum pointwise difference: 1.07916749
  7756. Max difference location: (0, 20, 7, 8)
  7757. Values at max diff - Original: 1.04420257, Converted: -0.03496487
  7758. Biggest difference in row (0, 31, 5), sum -0.311075 vs 1.779173
  7759. Layer 2, Token 24 (recurrent cache comparison):
  7760. Original tensor sum: 108.804047
  7761. Converted tensor sum: 87.620453
  7762. Original tensor mean: 0.034001
  7763. Converted tensor mean: 0.027381
  7764. Mean difference: 0.12934437
  7765. Maximum pointwise difference: 2.40617442
  7766. Max difference location: (0, 1, 2, 3)
  7767. Values at max diff - Original: 0.02315997, Converted: 2.42933440
  7768. Biggest difference in row (0, 27, 2), sum 3.832137 vs 0.454090
  7769. Layer 4, Token 24 (recurrent cache comparison):
  7770. Original tensor sum: 89.705452
  7771. Converted tensor sum: 61.452301
  7772. Original tensor mean: 0.028033
  7773. Converted tensor mean: 0.019204
  7774. Mean difference: 0.11625614
  7775. Maximum pointwise difference: 3.20758009
  7776. Max difference location: (0, 19, 2, 9)
  7777. Values at max diff - Original: -0.01131610, Converted: 3.19626403
  7778. Biggest difference in row (0, 19, 9), sum 3.560462 vs 0.155535
  7779. Layer 5, Token 24 (recurrent cache comparison):
  7780. Original tensor sum: 153.870117
  7781. Converted tensor sum: 79.160019
  7782. Original tensor mean: 0.048084
  7783. Converted tensor mean: 0.024738
  7784. Mean difference: 0.12364670
  7785. Maximum pointwise difference: 2.68913746
  7786. Max difference location: (0, 28, 3, 6)
  7787. Values at max diff - Original: 2.79144502, Converted: 0.10230768
  7788. Biggest difference in row (0, 28, 6), sum 0.099721 vs 5.709799
  7789. Layer 6, Token 24 (recurrent cache comparison):
  7790. Original tensor sum: 230.254852
  7791. Converted tensor sum: 174.787750
  7792. Original tensor mean: 0.071955
  7793. Converted tensor mean: 0.054621
  7794. Mean difference: 0.15056056
  7795. Maximum pointwise difference: 6.34924650
  7796. Max difference location: (0, 12, 6, 1)
  7797. Values at max diff - Original: 6.46217585, Converted: 0.11292921
  7798. Biggest difference in row (0, 12, 6), sum 15.171618 vs 0.722292
  7799. Layer 8, Token 24 (recurrent cache comparison):
  7800. Original tensor sum: 235.891174
  7801. Converted tensor sum: 145.097076
  7802. Original tensor mean: 0.073716
  7803. Converted tensor mean: 0.045343
  7804. Mean difference: 0.16653843
  7805. Maximum pointwise difference: 3.68727565
  7806. Max difference location: (0, 21, 9, 7)
  7807. Values at max diff - Original: -0.03629338, Converted: 3.65098238
  7808. Biggest difference in row (0, 22, 4), sum 6.845831 vs -0.173057
  7809. Layer 9, Token 24 (recurrent cache comparison):
  7810. Original tensor sum: 230.641953
  7811. Converted tensor sum: 158.276245
  7812. Original tensor mean: 0.072076
  7813. Converted tensor mean: 0.049461
  7814. Mean difference: 0.13344021
  7815. Maximum pointwise difference: 2.99997020
  7816. Max difference location: (0, 28, 7, 0)
  7817. Values at max diff - Original: 3.18566871, Converted: 0.18569851
  7818. Biggest difference in row (0, 28, 7), sum 10.468034 vs 1.164585
  7819. Layer 10, Token 24 (recurrent cache comparison):
  7820. Original tensor sum: 196.116974
  7821. Converted tensor sum: 120.883209
  7822. Original tensor mean: 0.061287
  7823. Converted tensor mean: 0.037776
  7824. Mean difference: 0.14279810
  7825. Maximum pointwise difference: 3.15166354
  7826. Max difference location: (0, 24, 0, 1)
  7827. Values at max diff - Original: 0.01040818, Converted: 3.16207170
  7828. Biggest difference in row (0, 24, 0), sum 0.920592 vs 9.820712
  7829. Layer 12, Token 24 (recurrent cache comparison):
  7830. Original tensor sum: 263.522400
  7831. Converted tensor sum: 204.364563
  7832. Original tensor mean: 0.082351
  7833. Converted tensor mean: 0.063864
  7834. Mean difference: 0.15161198
  7835. Maximum pointwise difference: 3.57106376
  7836. Max difference location: (0, 30, 4, 9)
  7837. Values at max diff - Original: 0.28180352, Converted: 3.85286736
  7838. Biggest difference in row (0, 28, 3), sum 8.790596 vs 0.101635
  7839. Layer 13, Token 24 (recurrent cache comparison):
  7840. Original tensor sum: 174.369919
  7841. Converted tensor sum: 135.311646
  7842. Original tensor mean: 0.054491
  7843. Converted tensor mean: 0.042285
  7844. Mean difference: 0.11190581
  7845. Maximum pointwise difference: 3.24499154
  7846. Max difference location: (0, 9, 2, 1)
  7847. Values at max diff - Original: -0.00551485, Converted: 3.23947668
  7848. Biggest difference in row (0, 9, 2), sum -0.041496 vs 5.267887
  7849. Layer 14, Token 24 (recurrent cache comparison):
  7850. Original tensor sum: 507.494324
  7851. Converted tensor sum: 360.428650
  7852. Original tensor mean: 0.158592
  7853. Converted tensor mean: 0.112634
  7854. Mean difference: 0.25558040
  7855. Maximum pointwise difference: 5.38855457
  7856. Max difference location: (0, 28, 6, 1)
  7857. Values at max diff - Original: 5.42326450, Converted: 0.03470971
  7858. Biggest difference in row (0, 28, 6), sum 14.975449 vs 0.420049
  7859. Layer 0, Token 25 (recurrent cache comparison):
  7860. Original tensor sum: 3.754472
  7861. Converted tensor sum: -0.036336
  7862. Original tensor mean: 0.001173
  7863. Converted tensor mean: -0.000011
  7864. Mean difference: 0.07934358
  7865. Maximum pointwise difference: 1.86529565
  7866. Max difference location: (0, 1, 2, 3)
  7867. Values at max diff - Original: 1.82291889, Converted: -0.04237675
  7868. Biggest difference in row (0, 26, 9), sum -0.049344 vs -1.991895
  7869. Layer 1, Token 25 (recurrent cache comparison):
  7870. Original tensor sum: 69.339890
  7871. Converted tensor sum: 74.604774
  7872. Original tensor mean: 0.021669
  7873. Converted tensor mean: 0.023314
  7874. Mean difference: 0.08100989
  7875. Maximum pointwise difference: 1.22147357
  7876. Max difference location: (0, 23, 0, 4)
  7877. Values at max diff - Original: 1.23978972, Converted: 0.01831620
  7878. Biggest difference in row (0, 20, 8), sum 2.595490 vs 0.385527
  7879. Layer 2, Token 25 (recurrent cache comparison):
  7880. Original tensor sum: 122.554489
  7881. Converted tensor sum: 59.594086
  7882. Original tensor mean: 0.038298
  7883. Converted tensor mean: 0.018623
  7884. Mean difference: 0.14621988
  7885. Maximum pointwise difference: 3.03828931
  7886. Max difference location: (0, 8, 9, 3)
  7887. Values at max diff - Original: 3.01308012, Converted: -0.02520920
  7888. Biggest difference in row (0, 6, 7), sum 4.544618 vs 0.330778
  7889. Layer 4, Token 25 (recurrent cache comparison):
  7890. Original tensor sum: 135.021027
  7891. Converted tensor sum: 31.374174
  7892. Original tensor mean: 0.042194
  7893. Converted tensor mean: 0.009804
  7894. Mean difference: 0.11780138
  7895. Maximum pointwise difference: 2.41319752
  7896. Max difference location: (0, 26, 6, 5)
  7897. Values at max diff - Original: -0.06945831, Converted: 2.34373927
  7898. Biggest difference in row (0, 20, 0), sum 5.919655 vs -0.125531
  7899. Layer 5, Token 25 (recurrent cache comparison):
  7900. Original tensor sum: 151.868256
  7901. Converted tensor sum: 37.756584
  7902. Original tensor mean: 0.047459
  7903. Converted tensor mean: 0.011799
  7904. Mean difference: 0.11239365
  7905. Maximum pointwise difference: 2.04250264
  7906. Max difference location: (0, 28, 8, 6)
  7907. Values at max diff - Original: 1.64249492, Converted: -0.40000769
  7908. Biggest difference in row (0, 23, 0), sum 3.497306 vs 0.162423
  7909. Layer 6, Token 25 (recurrent cache comparison):
  7910. Original tensor sum: 251.935211
  7911. Converted tensor sum: 40.890175
  7912. Original tensor mean: 0.078730
  7913. Converted tensor mean: 0.012778
  7914. Mean difference: 0.15190262
  7915. Maximum pointwise difference: 5.74138451
  7916. Max difference location: (0, 12, 6, 1)
  7917. Values at max diff - Original: 5.98834372, Converted: 0.24695921
  7918. Biggest difference in row (0, 12, 6), sum 13.863525 vs 0.418773
  7919. Layer 8, Token 25 (recurrent cache comparison):
  7920. Original tensor sum: 253.027832
  7921. Converted tensor sum: 38.795532
  7922. Original tensor mean: 0.079071
  7923. Converted tensor mean: 0.012124
  7924. Mean difference: 0.15110740
  7925. Maximum pointwise difference: 2.77147269
  7926. Max difference location: (0, 21, 8, 9)
  7927. Values at max diff - Original: 2.86136007, Converted: 0.08988741
  7928. Biggest difference in row (0, 6, 2), sum 5.609079 vs -2.170572
  7929. Layer 9, Token 25 (recurrent cache comparison):
  7930. Original tensor sum: 207.731750
  7931. Converted tensor sum: 52.985756
  7932. Original tensor mean: 0.064916
  7933. Converted tensor mean: 0.016558
  7934. Mean difference: 0.11516394
  7935. Maximum pointwise difference: 2.72221398
  7936. Max difference location: (0, 28, 7, 0)
  7937. Values at max diff - Original: 2.76798820, Converted: 0.04577418
  7938. Biggest difference in row (0, 28, 7), sum 9.256445 vs 0.484987
  7939. Layer 10, Token 25 (recurrent cache comparison):
  7940. Original tensor sum: 196.952515
  7941. Converted tensor sum: 54.152390
  7942. Original tensor mean: 0.061548
  7943. Converted tensor mean: 0.016923
  7944. Mean difference: 0.12454510
  7945. Maximum pointwise difference: 2.34993958
  7946. Max difference location: (0, 10, 3, 5)
  7947. Values at max diff - Original: -0.00316075, Converted: 2.34677887
  7948. Biggest difference in row (0, 11, 6), sum 5.878725 vs 0.250239
  7949. Layer 12, Token 25 (recurrent cache comparison):
  7950. Original tensor sum: 255.808289
  7951. Converted tensor sum: 65.224335
  7952. Original tensor mean: 0.079940
  7953. Converted tensor mean: 0.020383
  7954. Mean difference: 0.14238897
  7955. Maximum pointwise difference: 2.58750200
  7956. Max difference location: (0, 30, 8, 9)
  7957. Values at max diff - Original: -0.02865839, Converted: 2.55884361
  7958. Biggest difference in row (0, 28, 3), sum 8.769258 vs 0.354862
  7959. Layer 13, Token 25 (recurrent cache comparison):
  7960. Original tensor sum: 166.242828
  7961. Converted tensor sum: 63.081795
  7962. Original tensor mean: 0.051951
  7963. Converted tensor mean: 0.019713
  7964. Mean difference: 0.10068022
  7965. Maximum pointwise difference: 2.70444345
  7966. Max difference location: (0, 26, 4, 0)
  7967. Values at max diff - Original: 2.70685434, Converted: 0.00241077
  7968. Biggest difference in row (0, 26, 4), sum 5.351704 vs -0.105821
  7969. Layer 14, Token 25 (recurrent cache comparison):
  7970. Original tensor sum: 542.257324
  7971. Converted tensor sum: 126.161835
  7972. Original tensor mean: 0.169455
  7973. Converted tensor mean: 0.039426
  7974. Mean difference: 0.22693451
  7975. Maximum pointwise difference: 4.91657877
  7976. Max difference location: (0, 28, 6, 1)
  7977. Values at max diff - Original: 5.17964792, Converted: 0.26306900
  7978. Biggest difference in row (0, 28, 6), sum 14.244452 vs 1.160758
  7979. Layer 0, Token 26 (recurrent cache comparison):
  7980. Original tensor sum: 2.494154
  7981. Converted tensor sum: -0.022610
  7982. Original tensor mean: 0.000779
  7983. Converted tensor mean: -0.000007
  7984. Mean difference: 0.07249723
  7985. Maximum pointwise difference: 1.12537110
  7986. Max difference location: (0, 23, 8, 6)
  7987. Values at max diff - Original: -0.77736998, Converted: 0.34800115
  7988. Biggest difference in row (0, 25, 2), sum 0.139047 vs -2.260486
  7989. Layer 1, Token 26 (recurrent cache comparison):
  7990. Original tensor sum: 89.948196
  7991. Converted tensor sum: 28.472143
  7992. Original tensor mean: 0.028109
  7993. Converted tensor mean: 0.008898
  7994. Mean difference: 0.08773426
  7995. Maximum pointwise difference: 1.21594334
  7996. Max difference location: (0, 31, 9, 5)
  7997. Values at max diff - Original: 1.12476408, Converted: -0.09117921
  7998. Biggest difference in row (0, 3, 0), sum 2.359989 vs -0.070505
  7999. Layer 2, Token 26 (recurrent cache comparison):
  8000. Original tensor sum: 129.416809
  8001. Converted tensor sum: 41.503624
  8002. Original tensor mean: 0.040443
  8003. Converted tensor mean: 0.012970
  8004. Mean difference: 0.15461735
  8005. Maximum pointwise difference: 2.68493867
  8006. Max difference location: (0, 8, 8, 3)
  8007. Values at max diff - Original: 2.36720443, Converted: -0.31773427
  8008. Biggest difference in row (0, 27, 9), sum 4.510338 vs -0.361951
  8009. Layer 4, Token 26 (recurrent cache comparison):
  8010. Original tensor sum: 167.357330
  8011. Converted tensor sum: 22.416847
  8012. Original tensor mean: 0.052299
  8013. Converted tensor mean: 0.007005
  8014. Mean difference: 0.12134697
  8015. Maximum pointwise difference: 2.10167456
  8016. Max difference location: (0, 27, 2, 5)
  8017. Values at max diff - Original: 2.16418123, Converted: 0.06250665
  8018. Biggest difference in row (0, 20, 0), sum 5.742605 vs 0.074519
  8019. Layer 5, Token 26 (recurrent cache comparison):
  8020. Original tensor sum: 163.754578
  8021. Converted tensor sum: 25.965012
  8022. Original tensor mean: 0.051173
  8023. Converted tensor mean: 0.008114
  8024. Mean difference: 0.12282242
  8025. Maximum pointwise difference: 1.71204209
  8026. Max difference location: (0, 6, 7, 6)
  8027. Values at max diff - Original: 1.87962317, Converted: 0.16758111
  8028. Biggest difference in row (0, 6, 7), sum 6.898893 vs 0.817218
  8029. Layer 6, Token 26 (recurrent cache comparison):
  8030. Original tensor sum: 280.407990
  8031. Converted tensor sum: 7.497489
  8032. Original tensor mean: 0.087628
  8033. Converted tensor mean: 0.002343
  8034. Mean difference: 0.16469882
  8035. Maximum pointwise difference: 5.09109163
  8036. Max difference location: (0, 12, 6, 1)
  8037. Values at max diff - Original: 5.84504795, Converted: 0.75395638
  8038. Biggest difference in row (0, 12, 6), sum 13.522006 vs 2.690509
  8039. Layer 8, Token 26 (recurrent cache comparison):
  8040. Original tensor sum: 290.931335
  8041. Converted tensor sum: 24.817287
  8042. Original tensor mean: 0.090916
  8043. Converted tensor mean: 0.007755
  8044. Mean difference: 0.16735801
  8045. Maximum pointwise difference: 2.96624160
  8046. Max difference location: (0, 12, 7, 4)
  8047. Values at max diff - Original: 0.00615764, Converted: 2.97239923
  8048. Biggest difference in row (0, 6, 2), sum 5.174712 vs -3.075627
  8049. Layer 9, Token 26 (recurrent cache comparison):
  8050. Original tensor sum: 196.708160
  8051. Converted tensor sum: 30.441196
  8052. Original tensor mean: 0.061471
  8053. Converted tensor mean: 0.009513
  8054. Mean difference: 0.11019707
  8055. Maximum pointwise difference: 2.66847897
  8056. Max difference location: (0, 28, 7, 0)
  8057. Values at max diff - Original: 2.53971243, Converted: -0.12876646
  8058. Biggest difference in row (0, 28, 7), sum 8.254028 vs 0.381486
  8059. Layer 10, Token 26 (recurrent cache comparison):
  8060. Original tensor sum: 199.032516
  8061. Converted tensor sum: 15.679170
  8062. Original tensor mean: 0.062198
  8063. Converted tensor mean: 0.004900
  8064. Mean difference: 0.11978843
  8065. Maximum pointwise difference: 2.87448788
  8066. Max difference location: (0, 24, 1, 0)
  8067. Values at max diff - Original: 3.14507675, Converted: 0.27058893
  8068. Biggest difference in row (0, 25, 1), sum 5.510708 vs 0.187406
  8069. Layer 12, Token 26 (recurrent cache comparison):
  8070. Original tensor sum: 260.372742
  8071. Converted tensor sum: 27.850517
  8072. Original tensor mean: 0.081366
  8073. Converted tensor mean: 0.008703
  8074. Mean difference: 0.15131992
  8075. Maximum pointwise difference: 3.11937833
  8076. Max difference location: (0, 29, 6, 5)
  8077. Values at max diff - Original: -0.00478183, Converted: 3.11459661
  8078. Biggest difference in row (0, 28, 3), sum 8.629121 vs -0.241569
  8079. Layer 13, Token 26 (recurrent cache comparison):
  8080. Original tensor sum: 175.842209
  8081. Converted tensor sum: 31.150665
  8082. Original tensor mean: 0.054951
  8083. Converted tensor mean: 0.009735
  8084. Mean difference: 0.10132494
  8085. Maximum pointwise difference: 2.68282986
  8086. Max difference location: (0, 26, 4, 0)
  8087. Values at max diff - Original: 2.69746804, Converted: 0.01463811
  8088. Biggest difference in row (0, 26, 4), sum 5.839348 vs 0.118608
  8089. Layer 14, Token 26 (recurrent cache comparison):
  8090. Original tensor sum: 549.098877
  8091. Converted tensor sum: 57.239769
  8092. Original tensor mean: 0.171593
  8093. Converted tensor mean: 0.017887
  8094. Mean difference: 0.23359555
  8095. Maximum pointwise difference: 4.78898478
  8096. Max difference location: (0, 28, 6, 1)
  8097. Values at max diff - Original: 4.82380438, Converted: 0.03481963
  8098. Biggest difference in row (0, 28, 6), sum 13.322067 vs -0.096704
  8099. Layer 0, Token 27 (recurrent cache comparison):
  8100. Original tensor sum: 1.918821
  8101. Converted tensor sum: 4.296852
  8102. Original tensor mean: 0.000600
  8103. Converted tensor mean: 0.001343
  8104. Mean difference: 0.06445935
  8105. Maximum pointwise difference: 1.46873963
  8106. Max difference location: (0, 1, 3, 2)
  8107. Values at max diff - Original: -0.01301772, Converted: 1.45572186
  8108. Biggest difference in row (0, 28, 5), sum 0.223120 vs 1.905128
  8109. Layer 1, Token 27 (recurrent cache comparison):
  8110. Original tensor sum: 160.952576
  8111. Converted tensor sum: 15.469984
  8112. Original tensor mean: 0.050298
  8113. Converted tensor mean: 0.004834
  8114. Mean difference: 0.10194612
  8115. Maximum pointwise difference: 1.58813882
  8116. Max difference location: (0, 10, 6, 8)
  8117. Values at max diff - Original: 1.63966167, Converted: 0.05152279
  8118. Biggest difference in row (0, 16, 1), sum 4.988435 vs 0.628698
  8119. Layer 2, Token 27 (recurrent cache comparison):
  8120. Original tensor sum: 195.883148
  8121. Converted tensor sum: 23.802681
  8122. Original tensor mean: 0.061213
  8123. Converted tensor mean: 0.007438
  8124. Mean difference: 0.16412406
  8125. Maximum pointwise difference: 3.51121449
  8126. Max difference location: (0, 18, 2, 1)
  8127. Values at max diff - Original: 0.00709479, Converted: 3.51830935
  8128. Biggest difference in row (0, 0, 2), sum 7.858056 vs -0.148840
  8129. Layer 4, Token 27 (recurrent cache comparison):
  8130. Original tensor sum: 233.660095
  8131. Converted tensor sum: 13.142452
  8132. Original tensor mean: 0.073019
  8133. Converted tensor mean: 0.004107
  8134. Mean difference: 0.12733760
  8135. Maximum pointwise difference: 2.84240961
  8136. Max difference location: (0, 27, 8, 5)
  8137. Values at max diff - Original: 2.76694965, Converted: -0.07546007
  8138. Biggest difference in row (0, 24, 1), sum 6.535775 vs 0.658166
  8139. Layer 5, Token 27 (recurrent cache comparison):
  8140. Original tensor sum: 251.330231
  8141. Converted tensor sum: 21.526363
  8142. Original tensor mean: 0.078541
  8143. Converted tensor mean: 0.006727
  8144. Mean difference: 0.13129665
  8145. Maximum pointwise difference: 2.36431837
  8146. Max difference location: (0, 6, 2, 8)
  8147. Values at max diff - Original: 2.37356281, Converted: 0.00924453
  8148. Biggest difference in row (0, 6, 2), sum 8.637090 vs 0.102351
  8149. Layer 6, Token 27 (recurrent cache comparison):
  8150. Original tensor sum: 362.387848
  8151. Converted tensor sum: -2.171665
  8152. Original tensor mean: 0.113246
  8153. Converted tensor mean: -0.000679
  8154. Mean difference: 0.18160143
  8155. Maximum pointwise difference: 5.93641853
  8156. Max difference location: (0, 12, 6, 1)
  8157. Values at max diff - Original: 5.75199318, Converted: -0.18442529
  8158. Biggest difference in row (0, 12, 6), sum 13.466440 vs 2.236503
  8159. Layer 8, Token 27 (recurrent cache comparison):
  8160. Original tensor sum: 350.323914
  8161. Converted tensor sum: 19.725079
  8162. Original tensor mean: 0.109476
  8163. Converted tensor mean: 0.006164
  8164. Mean difference: 0.17721944
  8165. Maximum pointwise difference: 3.75930500
  8166. Max difference location: (0, 20, 0, 7)
  8167. Values at max diff - Original: 3.75676632, Converted: -0.00253879
  8168. Biggest difference in row (0, 13, 8), sum 9.584435 vs 0.181711
  8169. Layer 9, Token 27 (recurrent cache comparison):
  8170. Original tensor sum: 240.779663
  8171. Converted tensor sum: 24.165503
  8172. Original tensor mean: 0.075244
  8173. Converted tensor mean: 0.007552
  8174. Mean difference: 0.11309086
  8175. Maximum pointwise difference: 2.43383050
  8176. Max difference location: (0, 28, 7, 0)
  8177. Values at max diff - Original: 2.44759488, Converted: 0.01376434
  8178. Biggest difference in row (0, 28, 7), sum 8.022928 vs 0.225877
  8179. Layer 10, Token 27 (recurrent cache comparison):
  8180. Original tensor sum: 244.469070
  8181. Converted tensor sum: 12.286395
  8182. Original tensor mean: 0.076397
  8183. Converted tensor mean: 0.003839
  8184. Mean difference: 0.11746948
  8185. Maximum pointwise difference: 2.32974362
  8186. Max difference location: (0, 24, 1, 0)
  8187. Values at max diff - Original: 3.20926118, Converted: 0.87951756
  8188. Biggest difference in row (0, 11, 6), sum 6.942329 vs -0.007718
  8189. Layer 12, Token 27 (recurrent cache comparison):
  8190. Original tensor sum: 306.749817
  8191. Converted tensor sum: 12.790400
  8192. Original tensor mean: 0.095859
  8193. Converted tensor mean: 0.003997
  8194. Mean difference: 0.15706061
  8195. Maximum pointwise difference: 3.82620597
  8196. Max difference location: (0, 14, 8, 1)
  8197. Values at max diff - Original: 3.90818167, Converted: 0.08197562
  8198. Biggest difference in row (0, 28, 3), sum 9.139596 vs 0.006271
  8199. Layer 13, Token 27 (recurrent cache comparison):
  8200. Original tensor sum: 231.223206
  8201. Converted tensor sum: 21.992476
  8202. Original tensor mean: 0.072257
  8203. Converted tensor mean: 0.006873
  8204. Mean difference: 0.10150776
  8205. Maximum pointwise difference: 2.88272619
  8206. Max difference location: (0, 11, 4, 0)
  8207. Values at max diff - Original: 2.93226290, Converted: 0.04953665
  8208. Biggest difference in row (0, 26, 4), sum 5.809074 vs 0.116277
  8209. Layer 14, Token 27 (recurrent cache comparison):
  8210. Original tensor sum: 648.596985
  8211. Converted tensor sum: 37.038162
  8212. Original tensor mean: 0.202687
  8213. Converted tensor mean: 0.011574
  8214. Mean difference: 0.25362208
  8215. Maximum pointwise difference: 4.70936871
  8216. Max difference location: (0, 28, 6, 1)
  8217. Values at max diff - Original: 4.71021414, Converted: 0.00084528
  8218. Biggest difference in row (0, 27, 4), sum 11.931866 vs 0.099372
  8219. Layer 0, Token 28 (recurrent cache comparison):
  8220. Original tensor sum: 3.825253
  8221. Converted tensor sum: 10.656538
  8222. Original tensor mean: 0.001195
  8223. Converted tensor mean: 0.003330
  8224. Mean difference: 0.06744900
  8225. Maximum pointwise difference: 1.23786223
  8226. Max difference location: (0, 1, 5, 3)
  8227. Values at max diff - Original: 1.16935611, Converted: -0.06850608
  8228. Biggest difference in row (0, 23, 7), sum -1.495719 vs 0.880324
  8229. Layer 1, Token 28 (recurrent cache comparison):
  8230. Original tensor sum: 64.976830
  8231. Converted tensor sum: 30.582441
  8232. Original tensor mean: 0.020305
  8233. Converted tensor mean: 0.009557
  8234. Mean difference: 0.08607832
  8235. Maximum pointwise difference: 1.46864974
  8236. Max difference location: (0, 16, 2, 9)
  8237. Values at max diff - Original: 1.56847525, Converted: 0.09982550
  8238. Biggest difference in row (0, 1, 3), sum 2.236484 vs -0.154611
  8239. Layer 2, Token 28 (recurrent cache comparison):
  8240. Original tensor sum: 104.630646
  8241. Converted tensor sum: 53.524834
  8242. Original tensor mean: 0.032697
  8243. Converted tensor mean: 0.016727
  8244. Mean difference: 0.14054969
  8245. Maximum pointwise difference: 2.87744927
  8246. Max difference location: (0, 13, 1, 7)
  8247. Values at max diff - Original: 0.03716344, Converted: 2.91461277
  8248. Biggest difference in row (0, 23, 4), sum 0.081307 vs 4.303990
  8249. Layer 4, Token 28 (recurrent cache comparison):
  8250. Original tensor sum: 192.219788
  8251. Converted tensor sum: 29.228979
  8252. Original tensor mean: 0.060069
  8253. Converted tensor mean: 0.009134
  8254. Mean difference: 0.12325959
  8255. Maximum pointwise difference: 4.08833027
  8256. Max difference location: (0, 19, 0, 2)
  8257. Values at max diff - Original: 4.01820278, Converted: -0.07012761
  8258. Biggest difference in row (0, 19, 0), sum 6.219261 vs -0.327518
  8259. Layer 5, Token 28 (recurrent cache comparison):
  8260. Original tensor sum: 243.385864
  8261. Converted tensor sum: 23.409119
  8262. Original tensor mean: 0.076058
  8263. Converted tensor mean: 0.007315
  8264. Mean difference: 0.14056823
  8265. Maximum pointwise difference: 5.76254559
  8266. Max difference location: (0, 28, 9, 6)
  8267. Values at max diff - Original: 6.02726078, Converted: 0.26471528
  8268. Biggest difference in row (0, 28, 9), sum 9.890844 vs 0.406699
  8269. Layer 6, Token 28 (recurrent cache comparison):
  8270. Original tensor sum: 283.616272
  8271. Converted tensor sum: 40.143700
  8272. Original tensor mean: 0.088630
  8273. Converted tensor mean: 0.012545
  8274. Mean difference: 0.16413040
  8275. Maximum pointwise difference: 4.72735071
  8276. Max difference location: (0, 12, 1, 2)
  8277. Values at max diff - Original: 4.75247860, Converted: 0.02512792
  8278. Biggest difference in row (0, 12, 1), sum 13.120539 vs 0.133712
  8279. Layer 8, Token 28 (recurrent cache comparison):
  8280. Original tensor sum: 228.649261
  8281. Converted tensor sum: 44.837063
  8282. Original tensor mean: 0.071453
  8283. Converted tensor mean: 0.014012
  8284. Mean difference: 0.15679255
  8285. Maximum pointwise difference: 3.82907844
  8286. Max difference location: (0, 23, 4, 7)
  8287. Values at max diff - Original: 3.84108162, Converted: 0.01200324
  8288. Biggest difference in row (0, 1, 4), sum 8.901268 vs 0.416754
  8289. Layer 9, Token 28 (recurrent cache comparison):
  8290. Original tensor sum: 212.272324
  8291. Converted tensor sum: 21.536671
  8292. Original tensor mean: 0.066335
  8293. Converted tensor mean: 0.006730
  8294. Mean difference: 0.11465029
  8295. Maximum pointwise difference: 2.85586047
  8296. Max difference location: (0, 15, 2, 3)
  8297. Values at max diff - Original: 2.84589958, Converted: -0.00996090
  8298. Biggest difference in row (0, 15, 2), sum 8.293229 vs 0.275981
  8299. Layer 10, Token 28 (recurrent cache comparison):
  8300. Original tensor sum: 212.098206
  8301. Converted tensor sum: 19.835695
  8302. Original tensor mean: 0.066281
  8303. Converted tensor mean: 0.006199
  8304. Mean difference: 0.14262109
  8305. Maximum pointwise difference: 4.31178093
  8306. Max difference location: (0, 24, 1, 0)
  8307. Values at max diff - Original: 4.53196430, Converted: 0.22018313
  8308. Biggest difference in row (0, 10, 4), sum 9.766387 vs -0.072625
  8309. Layer 12, Token 28 (recurrent cache comparison):
  8310. Original tensor sum: 280.744019
  8311. Converted tensor sum: 26.187149
  8312. Original tensor mean: 0.087733
  8313. Converted tensor mean: 0.008183
  8314. Mean difference: 0.15264840
  8315. Maximum pointwise difference: 4.41812420
  8316. Max difference location: (0, 21, 2, 4)
  8317. Values at max diff - Original: 4.41481018, Converted: -0.00331383
  8318. Biggest difference in row (0, 23, 2), sum 10.581321 vs 0.608111
  8319. Layer 13, Token 28 (recurrent cache comparison):
  8320. Original tensor sum: 220.357834
  8321. Converted tensor sum: 20.228846
  8322. Original tensor mean: 0.068862
  8323. Converted tensor mean: 0.006322
  8324. Mean difference: 0.11583474
  8325. Maximum pointwise difference: 4.72553635
  8326. Max difference location: (0, 17, 8, 2)
  8327. Values at max diff - Original: 4.72810841, Converted: 0.00257226
  8328. Biggest difference in row (0, 19, 1), sum 9.879478 vs 0.388081
  8329. Layer 14, Token 28 (recurrent cache comparison):
  8330. Original tensor sum: 515.894897
  8331. Converted tensor sum: 74.440948
  8332. Original tensor mean: 0.161217
  8333. Converted tensor mean: 0.023263
  8334. Mean difference: 0.23548929
  8335. Maximum pointwise difference: 4.93366051
  8336. Max difference location: (0, 16, 7, 6)
  8337. Values at max diff - Original: 4.92017603, Converted: -0.01348470
  8338. Biggest difference in row (0, 28, 6), sum 14.032580 vs -0.061767
  8339. Layer 0, Token 29 (recurrent cache comparison):
  8340. Original tensor sum: 7.490709
  8341. Converted tensor sum: 13.732031
  8342. Original tensor mean: 0.002341
  8343. Converted tensor mean: 0.004291
  8344. Mean difference: 0.06082471
  8345. Maximum pointwise difference: 1.43740010
  8346. Max difference location: (0, 1, 3, 3)
  8347. Values at max diff - Original: 1.39118814, Converted: -0.04621201
  8348. Biggest difference in row (0, 23, 1), sum -0.646684 vs 1.039518
  8349. Layer 1, Token 29 (recurrent cache comparison):
  8350. Original tensor sum: 34.789967
  8351. Converted tensor sum: 32.546562
  8352. Original tensor mean: 0.010872
  8353. Converted tensor mean: 0.010171
  8354. Mean difference: 0.08757141
  8355. Maximum pointwise difference: 1.04371011
  8356. Max difference location: (0, 6, 1, 2)
  8357. Values at max diff - Original: 0.99249512, Converted: -0.05121503
  8358. Biggest difference in row (0, 3, 8), sum -0.976319 vs 2.533029
  8359. Layer 2, Token 29 (recurrent cache comparison):
  8360. Original tensor sum: 81.188293
  8361. Converted tensor sum: 110.873352
  8362. Original tensor mean: 0.025371
  8363. Converted tensor mean: 0.034648
  8364. Mean difference: 0.13966069
  8365. Maximum pointwise difference: 2.45380425
  8366. Max difference location: (0, 13, 7, 1)
  8367. Values at max diff - Original: 0.05114410, Converted: 2.50494838
  8368. Biggest difference in row (0, 12, 1), sum 5.281791 vs 0.538119
  8369. Layer 4, Token 29 (recurrent cache comparison):
  8370. Original tensor sum: 188.945206
  8371. Converted tensor sum: 82.802734
  8372. Original tensor mean: 0.059045
  8373. Converted tensor mean: 0.025876
  8374. Mean difference: 0.13653603
  8375. Maximum pointwise difference: 2.89840102
  8376. Max difference location: (0, 19, 0, 2)
  8377. Values at max diff - Original: 2.89151430, Converted: -0.00688672
  8378. Biggest difference in row (0, 19, 0), sum 4.444302 vs -0.202434
  8379. Layer 5, Token 29 (recurrent cache comparison):
  8380. Original tensor sum: 234.074219
  8381. Converted tensor sum: 65.914871
  8382. Original tensor mean: 0.073148
  8383. Converted tensor mean: 0.020598
  8384. Mean difference: 0.14784601
  8385. Maximum pointwise difference: 3.25614643
  8386. Max difference location: (0, 28, 9, 6)
  8387. Values at max diff - Original: 3.74669981, Converted: 0.49055350
  8388. Biggest difference in row (0, 28, 9), sum 6.683680 vs 1.130066
  8389. Layer 6, Token 29 (recurrent cache comparison):
  8390. Original tensor sum: 312.478729
  8391. Converted tensor sum: 136.998260
  8392. Original tensor mean: 0.097650
  8393. Converted tensor mean: 0.042812
  8394. Mean difference: 0.19563875
  8395. Maximum pointwise difference: 4.93519068
  8396. Max difference location: (0, 12, 6, 2)
  8397. Values at max diff - Original: 4.85506201, Converted: -0.08012870
  8398. Biggest difference in row (0, 12, 6), sum 14.484787 vs 2.203152
  8399. Layer 8, Token 29 (recurrent cache comparison):
  8400. Original tensor sum: 249.388092
  8401. Converted tensor sum: 124.562820
  8402. Original tensor mean: 0.077934
  8403. Converted tensor mean: 0.038926
  8404. Mean difference: 0.18382950
  8405. Maximum pointwise difference: 3.92004848
  8406. Max difference location: (0, 20, 7, 0)
  8407. Values at max diff - Original: 0.21650003, Converted: 4.13654852
  8408. Biggest difference in row (0, 23, 4), sum 6.951686 vs -0.318011
  8409. Layer 9, Token 29 (recurrent cache comparison):
  8410. Original tensor sum: 200.171021
  8411. Converted tensor sum: 82.927864
  8412. Original tensor mean: 0.062553
  8413. Converted tensor mean: 0.025915
  8414. Mean difference: 0.12187681
  8415. Maximum pointwise difference: 2.69074798
  8416. Max difference location: (0, 15, 2, 3)
  8417. Values at max diff - Original: 2.69794440, Converted: 0.00719635
  8418. Biggest difference in row (0, 15, 2), sum 7.941767 vs 0.050363
  8419. Layer 10, Token 29 (recurrent cache comparison):
  8420. Original tensor sum: 213.368591
  8421. Converted tensor sum: 77.427185
  8422. Original tensor mean: 0.066678
  8423. Converted tensor mean: 0.024196
  8424. Mean difference: 0.13651104
  8425. Maximum pointwise difference: 3.13308334
  8426. Max difference location: (0, 24, 1, 0)
  8427. Values at max diff - Original: 3.58547378, Converted: 0.45239034
  8428. Biggest difference in row (0, 10, 4), sum 6.818930 vs -0.155169
  8429. Layer 12, Token 29 (recurrent cache comparison):
  8430. Original tensor sum: 263.786377
  8431. Converted tensor sum: 92.682205
  8432. Original tensor mean: 0.082433
  8433. Converted tensor mean: 0.028963
  8434. Mean difference: 0.15690672
  8435. Maximum pointwise difference: 3.50486374
  8436. Max difference location: (0, 23, 2, 9)
  8437. Values at max diff - Original: 3.44645429, Converted: -0.05840937
  8438. Biggest difference in row (0, 23, 2), sum 9.830493 vs -0.231371
  8439. Layer 13, Token 29 (recurrent cache comparison):
  8440. Original tensor sum: 193.539474
  8441. Converted tensor sum: 79.679726
  8442. Original tensor mean: 0.060481
  8443. Converted tensor mean: 0.024900
  8444. Mean difference: 0.11795644
  8445. Maximum pointwise difference: 3.62266445
  8446. Max difference location: (0, 11, 4, 0)
  8447. Values at max diff - Original: 3.49508691, Converted: -0.12757748
  8448. Biggest difference in row (0, 18, 1), sum 5.632851 vs -0.122056
  8449. Layer 14, Token 29 (recurrent cache comparison):
  8450. Original tensor sum: 525.021179
  8451. Converted tensor sum: 197.845932
  8452. Original tensor mean: 0.164069
  8453. Converted tensor mean: 0.061827
  8454. Mean difference: 0.25022614
  8455. Maximum pointwise difference: 4.42602730
  8456. Max difference location: (0, 15, 2, 8)
  8457. Values at max diff - Original: 4.48904753, Converted: 0.06302036
  8458. Biggest difference in row (0, 28, 6), sum 13.769245 vs 2.199155
  8459. Layer 0, Token 30 (recurrent cache comparison):
  8460. Original tensor sum: 4.659326
  8461. Converted tensor sum: 10.953376
  8462. Original tensor mean: 0.001456
  8463. Converted tensor mean: 0.003423
  8464. Mean difference: 0.06142937
  8465. Maximum pointwise difference: 1.06926394
  8466. Max difference location: (0, 28, 5, 9)
  8467. Values at max diff - Original: -0.05087389, Converted: 1.01839006
  8468. Biggest difference in row (0, 4, 9), sum 2.534327 vs -0.105926
  8469. Layer 1, Token 30 (recurrent cache comparison):
  8470. Original tensor sum: 24.136578
  8471. Converted tensor sum: 96.968475
  8472. Original tensor mean: 0.007543
  8473. Converted tensor mean: 0.030303
  8474. Mean difference: 0.08820312
  8475. Maximum pointwise difference: 1.49761820
  8476. Max difference location: (0, 6, 4, 4)
  8477. Values at max diff - Original: 0.06953955, Converted: 1.56715775
  8478. Biggest difference in row (0, 14, 2), sum 0.115400 vs 3.481205
  8479. Layer 2, Token 30 (recurrent cache comparison):
  8480. Original tensor sum: 64.494400
  8481. Converted tensor sum: 246.552582
  8482. Original tensor mean: 0.020155
  8483. Converted tensor mean: 0.077048
  8484. Mean difference: 0.16151237
  8485. Maximum pointwise difference: 3.98919630
  8486. Max difference location: (0, 4, 8, 4)
  8487. Values at max diff - Original: -0.10013573, Converted: 3.88906050
  8488. Biggest difference in row (0, 23, 4), sum -0.108707 vs 7.892229
  8489. Layer 4, Token 30 (recurrent cache comparison):
  8490. Original tensor sum: 190.921097
  8491. Converted tensor sum: 126.537048
  8492. Original tensor mean: 0.059663
  8493. Converted tensor mean: 0.039543
  8494. Mean difference: 0.13220279
  8495. Maximum pointwise difference: 2.87259126
  8496. Max difference location: (0, 8, 6, 5)
  8497. Values at max diff - Original: 0.00449362, Converted: 2.87708497
  8498. Biggest difference in row (0, 17, 9), sum 0.710816 vs 6.274773
  8499. Layer 5, Token 30 (recurrent cache comparison):
  8500. Original tensor sum: 222.353195
  8501. Converted tensor sum: 164.720016
  8502. Original tensor mean: 0.069485
  8503. Converted tensor mean: 0.051475
  8504. Mean difference: 0.15598193
  8505. Maximum pointwise difference: 2.88562417
  8506. Max difference location: (0, 28, 9, 6)
  8507. Values at max diff - Original: 3.18444014, Converted: 0.29881600
  8508. Biggest difference in row (0, 30, 2), sum 0.004416 vs 6.153850
  8509. Layer 6, Token 30 (recurrent cache comparison):
  8510. Original tensor sum: 339.244141
  8511. Converted tensor sum: 317.588440
  8512. Original tensor mean: 0.106014
  8513. Converted tensor mean: 0.099246
  8514. Mean difference: 0.21152201
  8515. Maximum pointwise difference: 4.30255318
  8516. Max difference location: (0, 6, 4, 8)
  8517. Values at max diff - Original: -0.19493943, Converted: 4.10761356
  8518. Biggest difference in row (0, 12, 6), sum 13.503227 vs 2.285058
  8519. Layer 8, Token 30 (recurrent cache comparison):
  8520. Original tensor sum: 261.308044
  8521. Converted tensor sum: 204.488892
  8522. Original tensor mean: 0.081659
  8523. Converted tensor mean: 0.063903
  8524. Mean difference: 0.18225618
  8525. Maximum pointwise difference: 3.88148618
  8526. Max difference location: (0, 21, 7, 9)
  8527. Values at max diff - Original: 3.48627377, Converted: -0.39521238
  8528. Biggest difference in row (0, 2, 4), sum -0.009086 vs 6.555274
  8529. Layer 9, Token 30 (recurrent cache comparison):
  8530. Original tensor sum: 187.010895
  8531. Converted tensor sum: 173.659409
  8532. Original tensor mean: 0.058441
  8533. Converted tensor mean: 0.054269
  8534. Mean difference: 0.12517925
  8535. Maximum pointwise difference: 2.68900180
  8536. Max difference location: (0, 15, 2, 3)
  8537. Values at max diff - Original: 2.59999108, Converted: -0.08901066
  8538. Biggest difference in row (0, 15, 2), sum 7.543541 vs 0.209705
  8539. Layer 10, Token 30 (recurrent cache comparison):
  8540. Original tensor sum: 206.371735
  8541. Converted tensor sum: 145.950043
  8542. Original tensor mean: 0.064491
  8543. Converted tensor mean: 0.045609
  8544. Mean difference: 0.12893555
  8545. Maximum pointwise difference: 2.97875929
  8546. Max difference location: (0, 24, 1, 0)
  8547. Values at max diff - Original: 3.54119730, Converted: 0.56243801
  8548. Biggest difference in row (0, 11, 6), sum 5.982455 vs 0.632388
  8549. Layer 12, Token 30 (recurrent cache comparison):
  8550. Original tensor sum: 251.250732
  8551. Converted tensor sum: 193.503662
  8552. Original tensor mean: 0.078516
  8553. Converted tensor mean: 0.060470
  8554. Mean difference: 0.14629500
  8555. Maximum pointwise difference: 3.24942660
  8556. Max difference location: (0, 28, 2, 4)
  8557. Values at max diff - Original: 3.09908056, Converted: -0.15034601
  8558. Biggest difference in row (0, 28, 3), sum 9.363594 vs -0.017764
  8559. Layer 13, Token 30 (recurrent cache comparison):
  8560. Original tensor sum: 176.694855
  8561. Converted tensor sum: 165.849930
  8562. Original tensor mean: 0.055217
  8563. Converted tensor mean: 0.051828
  8564. Mean difference: 0.11395165
  8565. Maximum pointwise difference: 3.52955794
  8566. Max difference location: (0, 11, 4, 0)
  8567. Values at max diff - Original: 3.33610535, Converted: -0.19345257
  8568. Biggest difference in row (0, 8, 7), sum -0.009830 vs 4.540796
  8569. Layer 14, Token 30 (recurrent cache comparison):
  8570. Original tensor sum: 562.166748
  8571. Converted tensor sum: 408.797607
  8572. Original tensor mean: 0.175677
  8573. Converted tensor mean: 0.127749
  8574. Mean difference: 0.25758758
  8575. Maximum pointwise difference: 4.45499659
  8576. Max difference location: (0, 15, 2, 8)
  8577. Values at max diff - Original: 4.37386942, Converted: -0.08112720
  8578. Biggest difference in row (0, 28, 6), sum 13.013643 vs -0.161676
  8579. ================================================================================
  8580. Comparing q padded tensors...
  8581. ================================================================================
  8582. Layer 0, Token 1 (q padded comparison):
  8583. Original tensor sum: 7.958682
  8584. Converted tensor sum: 7.958661
  8585. Original tensor mean: 0.000389
  8586. Converted tensor mean: 0.000389
  8587. Mean difference: 0.00000000
  8588. Maximum pointwise difference: 0.00000076
  8589. Max difference location: (0, 0, 0, 6)
  8590. Values at max diff - Original: -0.22316068, Converted: -0.22316144
  8591. Biggest difference in row (0, 0, 0), sum -0.570113 vs -0.570115
  8592. Original tensor:
  8593. [[[[ 1.97370015e-02 -7.89398551e-02 2.40650475e-02 ... -3.46655026e-02
  8594. -1.84459373e-01 1.35031175e-02]
  8595. [-3.90069596e-02 -6.45441562e-02 -9.85123310e-03 ... -7.10528418e-02
  8596. 2.86484748e-01 -4.78143468e-02]
  8597. [-3.32845971e-02 8.48600932e-04 -1.83281749e-02 ... -3.60261202e-02
  8598. 1.16759300e-01 -3.79200131e-02]
  8599. ...
  8600. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8601. 0.00000000e+00 0.00000000e+00]
  8602. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8603. 0.00000000e+00 0.00000000e+00]
  8604. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8605. 0.00000000e+00 0.00000000e+00]]
  8606. [[ 1.97370015e-02 -7.89398551e-02 2.40650475e-02 ... -3.46655026e-02
  8607. -1.84459373e-01 1.35031175e-02]
  8608. [-3.90069596e-02 -6.45441562e-02 -9.85123310e-03 ... -7.10528418e-02
  8609. 2.86484748e-01 -4.78143468e-02]
  8610. [-3.32845971e-02 8.48600932e-04 -1.83281749e-02 ... -3.60261202e-02
  8611. 1.16759300e-01 -3.79200131e-02]
  8612. ...
  8613. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8614. 0.00000000e+00 0.00000000e+00]
  8615. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8616. 0.00000000e+00 0.00000000e+00]
  8617. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8618. 0.00000000e+00 0.00000000e+00]]
  8619. [[-6.58135489e-02 2.45508045e-01 -1.13810226e-02 ... 2.54544546e-03
  8620. 2.51089204e-02 2.86987983e-04]
  8621. [-1.25565156e-01 -7.94792548e-02 -9.97955501e-02 ... 7.12259486e-02
  8622. 9.36590508e-02 -1.65728614e-01]
  8623. [-1.35633466e-03 -9.60636213e-02 -8.94494876e-02 ... 1.94221988e-01
  8624. -4.70091067e-02 -9.31773186e-02]
  8625. ...
  8626. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8627. 0.00000000e+00 0.00000000e+00]
  8628. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8629. 0.00000000e+00 0.00000000e+00]
  8630. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8631. 0.00000000e+00 0.00000000e+00]]
  8632. ...
  8633. [[-1.80936769e-01 2.09823474e-02 -1.53481111e-01 ... -6.53458312e-02
  8634. 9.94268879e-02 8.78875237e-03]
  8635. [-1.07081555e-01 1.26294538e-01 -9.78934765e-02 ... -5.38439713e-02
  8636. -5.59990015e-03 1.52285740e-01]
  8637. [ 2.60844707e-01 8.11591521e-02 1.12913184e-01 ... -1.86833683e-02
  8638. -1.93844642e-02 -7.96004198e-03]
  8639. ...
  8640. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8641. 0.00000000e+00 0.00000000e+00]
  8642. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8643. 0.00000000e+00 0.00000000e+00]
  8644. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8645. 0.00000000e+00 0.00000000e+00]]
  8646. [[ 5.65589257e-02 -7.43661374e-02 -2.00723484e-01 ... 1.52545767e-02
  8647. 1.50462063e-02 2.35310309e-02]
  8648. [ 7.43804872e-02 -1.34884328e-01 2.01406017e-01 ... -9.13856328e-02
  8649. -5.48248030e-02 8.11865740e-03]
  8650. [ 1.52915101e-02 -1.20854350e-02 2.73873240e-01 ... -3.24299149e-02
  8651. -6.92289770e-02 -1.53110905e-05]
  8652. ...
  8653. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8654. 0.00000000e+00 0.00000000e+00]
  8655. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8656. 0.00000000e+00 0.00000000e+00]
  8657. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8658. 0.00000000e+00 0.00000000e+00]]
  8659. [[ 5.65589257e-02 -7.43661374e-02 -2.00723484e-01 ... 1.52545767e-02
  8660. 1.50462063e-02 2.35310309e-02]
  8661. [ 7.43804872e-02 -1.34884328e-01 2.01406017e-01 ... -9.13856328e-02
  8662. -5.48248030e-02 8.11865740e-03]
  8663. [ 1.52915101e-02 -1.20854350e-02 2.73873240e-01 ... -3.24299149e-02
  8664. -6.92289770e-02 -1.53110905e-05]
  8665. ...
  8666. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8667. 0.00000000e+00 0.00000000e+00]
  8668. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8669. 0.00000000e+00 0.00000000e+00]
  8670. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8671. 0.00000000e+00 0.00000000e+00]]]]
  8672. Converted tensor:
  8673. [[[[ 1.97370723e-02 -7.89401382e-02 2.40651332e-02 ... -3.46656255e-02
  8674. -1.84460029e-01 1.35031650e-02]
  8675. [-3.90069783e-02 -6.45441785e-02 -9.85123683e-03 ... -7.10528716e-02
  8676. 2.86484867e-01 -4.78143729e-02]
  8677. [-3.32845971e-02 8.48600990e-04 -1.83281731e-02 ... -3.60261202e-02
  8678. 1.16759300e-01 -3.79200131e-02]
  8679. ...
  8680. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8681. 0.00000000e+00 0.00000000e+00]
  8682. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8683. 0.00000000e+00 0.00000000e+00]
  8684. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8685. 0.00000000e+00 0.00000000e+00]]
  8686. [[ 1.97370723e-02 -7.89401382e-02 2.40651332e-02 ... -3.46656255e-02
  8687. -1.84460029e-01 1.35031650e-02]
  8688. [-3.90069783e-02 -6.45441785e-02 -9.85123683e-03 ... -7.10528716e-02
  8689. 2.86484867e-01 -4.78143729e-02]
  8690. [-3.32845971e-02 8.48600990e-04 -1.83281731e-02 ... -3.60261202e-02
  8691. 1.16759300e-01 -3.79200131e-02]
  8692. ...
  8693. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8694. 0.00000000e+00 0.00000000e+00]
  8695. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8696. 0.00000000e+00 0.00000000e+00]
  8697. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8698. 0.00000000e+00 0.00000000e+00]]
  8699. [[-6.58135936e-02 2.45508194e-01 -1.13810301e-02 ... 2.54544709e-03
  8700. 2.51089353e-02 2.86988186e-04]
  8701. [-1.25565395e-01 -7.94794038e-02 -9.97957364e-02 ... 7.12260827e-02
  8702. 9.36592296e-02 -1.65728927e-01]
  8703. [-1.35633559e-03 -9.60636735e-02 -8.94495398e-02 ... 1.94222078e-01
  8704. -4.70091291e-02 -9.31773633e-02]
  8705. ...
  8706. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8707. 0.00000000e+00 0.00000000e+00]
  8708. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8709. 0.00000000e+00 0.00000000e+00]
  8710. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8711. 0.00000000e+00 0.00000000e+00]]
  8712. ...
  8713. [[-1.80937156e-01 2.09823940e-02 -1.53481439e-01 ... -6.53459728e-02
  8714. 9.94271040e-02 8.78877100e-03]
  8715. [-1.07081644e-01 1.26294628e-01 -9.78935510e-02 ... -5.38440198e-02
  8716. -5.59990434e-03 1.52285874e-01]
  8717. [ 2.60844767e-01 8.11591670e-02 1.12913206e-01 ... -1.86833721e-02
  8718. -1.93844680e-02 -7.96004292e-03]
  8719. ...
  8720. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8721. 0.00000000e+00 0.00000000e+00]
  8722. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8723. 0.00000000e+00 0.00000000e+00]
  8724. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8725. 0.00000000e+00 0.00000000e+00]]
  8726. [[ 5.65591007e-02 -7.43663609e-02 -2.00724110e-01 ... 1.52546223e-02
  8727. 1.50462529e-02 2.35311035e-02]
  8728. [ 7.43805990e-02 -1.34884506e-01 2.01406300e-01 ... -9.13857669e-02
  8729. -5.48248850e-02 8.11866950e-03]
  8730. [ 1.52915157e-02 -1.20854378e-02 2.73873329e-01 ... -3.24299261e-02
  8731. -6.92289993e-02 -1.53110959e-05]
  8732. ...
  8733. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8734. 0.00000000e+00 0.00000000e+00]
  8735. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8736. 0.00000000e+00 0.00000000e+00]
  8737. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8738. 0.00000000e+00 0.00000000e+00]]
  8739. [[ 5.65591007e-02 -7.43663609e-02 -2.00724110e-01 ... 1.52546223e-02
  8740. 1.50462529e-02 2.35311035e-02]
  8741. [ 7.43805990e-02 -1.34884506e-01 2.01406300e-01 ... -9.13857669e-02
  8742. -5.48248850e-02 8.11866950e-03]
  8743. [ 1.52915157e-02 -1.20854378e-02 2.73873329e-01 ... -3.24299261e-02
  8744. -6.92289993e-02 -1.53110959e-05]
  8745. ...
  8746. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8747. 0.00000000e+00 0.00000000e+00]
  8748. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8749. 0.00000000e+00 0.00000000e+00]
  8750. [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  8751. 0.00000000e+00 0.00000000e+00]]]]
  8752. Layer 1, Token 1 (q padded comparison):
  8753. Original tensor sum: 8.938188
  8754. Converted tensor sum: 8.938201
  8755. Original tensor mean: 0.000436
  8756. Converted tensor mean: 0.000436
  8757. Mean difference: 0.00000001
  8758. Maximum pointwise difference: 0.00000305
  8759. Max difference location: (0, 16, 0, 8)
  8760. Values at max diff - Original: 0.24272950, Converted: 0.24273255
  8761. Biggest difference in row (0, 16, 0), sum 0.509919 vs 0.509925
  8762. Layer 2, Token 1 (q padded comparison):
  8763. Original tensor sum: -2.985352
  8764. Converted tensor sum: -2.985393
  8765. Original tensor mean: -0.000146
  8766. Converted tensor mean: -0.000146
  8767. Mean difference: 0.00000001
  8768. Maximum pointwise difference: 0.00000104
  8769. Max difference location: (0, 12, 3, 1)
  8770. Values at max diff - Original: -0.02719286, Converted: -0.02719390
  8771. Biggest difference in row (0, 12, 3), sum -0.530951 vs -0.530954
  8772. Layer 4, Token 1 (q padded comparison):
  8773. Original tensor sum: -31.644516
  8774. Converted tensor sum: -31.643524
  8775. Original tensor mean: -0.001545
  8776. Converted tensor mean: -0.001545
  8777. Mean difference: 0.00000028
  8778. Maximum pointwise difference: 0.00006898
  8779. Max difference location: (0, 6, 3, 7)
  8780. Values at max diff - Original: 0.07510993, Converted: 0.07517891
  8781. Biggest difference in row (0, 6, 3), sum -0.645874 vs -0.645761
  8782. Layer 5, Token 1 (q padded comparison):
  8783. Original tensor sum: -30.684572
  8784. Converted tensor sum: -30.685047
  8785. Original tensor mean: -0.001498
  8786. Converted tensor mean: -0.001498
  8787. Mean difference: 0.00000021
  8788. Maximum pointwise difference: 0.00003881
  8789. Max difference location: (0, 30, 3, 0)
  8790. Values at max diff - Original: 0.03456598, Converted: 0.03452717
  8791. Biggest difference in row (0, 30, 3), sum -0.428461 vs -0.428590
  8792. Layer 6, Token 1 (q padded comparison):
  8793. Original tensor sum: -10.008605
  8794. Converted tensor sum: -10.014137
  8795. Original tensor mean: -0.000489
  8796. Converted tensor mean: -0.000489
  8797. Mean difference: 0.00000105
  8798. Maximum pointwise difference: 0.00017181
  8799. Max difference location: (0, 6, 2, 7)
  8800. Values at max diff - Original: 0.01523990, Converted: 0.01506809
  8801. Biggest difference in row (0, 2, 1), sum -0.388271 vs -0.388545
  8802. Layer 8, Token 1 (q padded comparison):
  8803. Original tensor sum: -36.801449
  8804. Converted tensor sum: -36.801811
  8805. Original tensor mean: -0.001797
  8806. Converted tensor mean: -0.001797
  8807. Mean difference: 0.00000098
  8808. Maximum pointwise difference: 0.00025206
  8809. Max difference location: (0, 20, 3, 1)
  8810. Values at max diff - Original: 0.04204723, Converted: 0.04179518
  8811. Biggest difference in row (0, 2, 0), sum -0.275884 vs -0.275609
  8812. Layer 9, Token 1 (q padded comparison):
  8813. Original tensor sum: -37.401527
  8814. Converted tensor sum: -37.397404
  8815. Original tensor mean: -0.001826
  8816. Converted tensor mean: -0.001826
  8817. Mean difference: 0.00000135
  8818. Maximum pointwise difference: 0.00026937
  8819. Max difference location: (0, 20, 2, 2)
  8820. Values at max diff - Original: 0.14496517, Converted: 0.14469580
  8821. Biggest difference in row (0, 20, 3), sum -0.264264 vs -0.264851
  8822. Layer 10, Token 1 (q padded comparison):
  8823. Original tensor sum: -43.546944
  8824. Converted tensor sum: -43.543182
  8825. Original tensor mean: -0.002126
  8826. Converted tensor mean: -0.002126
  8827. Mean difference: 0.00000175
  8828. Maximum pointwise difference: 0.00031144
  8829. Max difference location: (0, 0, 2, 5)
  8830. Values at max diff - Original: -0.03211254, Converted: -0.03180110
  8831. Biggest difference in row (0, 24, 3), sum -0.476393 vs -0.475955
  8832. Layer 12, Token 1 (q padded comparison):
  8833. Original tensor sum: -19.226507
  8834. Converted tensor sum: -19.226831
  8835. Original tensor mean: -0.000939
  8836. Converted tensor mean: -0.000939
  8837. Mean difference: 0.00000116
  8838. Maximum pointwise difference: 0.00020705
  8839. Max difference location: (0, 28, 2, 7)
  8840. Values at max diff - Original: 0.06080329, Converted: 0.06101035
  8841. Biggest difference in row (0, 14, 3), sum -0.455543 vs -0.455054
  8842. Layer 13, Token 1 (q padded comparison):
  8843. Original tensor sum: -36.510368
  8844. Converted tensor sum: -36.510063
  8845. Original tensor mean: -0.001783
  8846. Converted tensor mean: -0.001783
  8847. Mean difference: 0.00000135
  8848. Maximum pointwise difference: 0.00022900
  8849. Max difference location: (0, 16, 2, 1)
  8850. Values at max diff - Original: -0.03357363, Converted: -0.03334463
  8851. Biggest difference in row (0, 18, 2), sum -0.183418 vs -0.183802
  8852. Layer 14, Token 1 (q padded comparison):
  8853. Original tensor sum: -15.543186
  8854. Converted tensor sum: -15.543753
  8855. Original tensor mean: -0.000759
  8856. Converted tensor mean: -0.000759
  8857. Mean difference: 0.00000116
  8858. Maximum pointwise difference: 0.00036725
  8859. Max difference location: (0, 4, 2, 2)
  8860. Values at max diff - Original: 0.05589651, Converted: 0.05552926
  8861. Biggest difference in row (0, 18, 1), sum -0.470654 vs -0.470283
  8862. ================================================================================
  8863. Comparing k padded tensors...
  8864. ================================================================================
  8865. Layer 0, Token 1 (k padded comparison):
  8866. Original tensor sum: -12.851240
  8867. Converted tensor sum: -12.851334
  8868. Original tensor mean: -0.000628
  8869. Converted tensor mean: -0.000628
  8870. Mean difference: 0.00000002
  8871. Maximum pointwise difference: 0.00000304
  8872. Max difference location: (0, 24, 0, 7)
  8873. Values at max diff - Original: -0.57623452, Converted: -0.57623756
  8874. Biggest difference in row (0, 24, 0), sum -1.467058 vs -1.467066
  8875. Original tensor:
  8876. [[[[-0.0023386 0.00352692 -0.13370702 ... -0.18872206 0.09370422
  8877. -0.04139194]
  8878. [ 0.09375711 0.09519143 0.04368615 ... -0.17057192 -0.09237721
  8879. 0.09026651]
  8880. [ 0.19408916 -0.1052211 -0.5198605 ... -0.35431755 -0.18219906
  8881. -0.31666332]
  8882. ...
  8883. [ 0. 0. 0. ... 0. 0.
  8884. 0. ]
  8885. [ 0. 0. 0. ... 0. 0.
  8886. 0. ]
  8887. [ 0. 0. 0. ... 0. 0.
  8888. 0. ]]
  8889. [[-0.0023386 0.00352692 -0.13370702 ... -0.18872206 0.09370422
  8890. -0.04139194]
  8891. [ 0.09375711 0.09519143 0.04368615 ... -0.17057192 -0.09237721
  8892. 0.09026651]
  8893. [ 0.19408916 -0.1052211 -0.5198605 ... -0.35431755 -0.18219906
  8894. -0.31666332]
  8895. ...
  8896. [ 0. 0. 0. ... 0. 0.
  8897. 0. ]
  8898. [ 0. 0. 0. ... 0. 0.
  8899. 0. ]
  8900. [ 0. 0. 0. ... 0. 0.
  8901. 0. ]]
  8902. [[-0.52186674 -0.28046784 -0.03100401 ... 0.12330638 -0.17640771
  8903. -0.10358577]
  8904. [-0.4391339 -0.25189647 0.12411524 ... -0.04670377 0.4796994
  8905. 0.13396528]
  8906. [ 0.80941254 0.33414015 0.10742755 ... -0.17197518 -0.16508798
  8907. -0.20685418]
  8908. ...
  8909. [ 0. 0. 0. ... 0. 0.
  8910. 0. ]
  8911. [ 0. 0. 0. ... 0. 0.
  8912. 0. ]
  8913. [ 0. 0. 0. ... 0. 0.
  8914. 0. ]]
  8915. ...
  8916. [[-0.02867949 0.05648347 0.01508509 ... 0.7403576 -0.30081272
  8917. 0.31962797]
  8918. [ 0.07382206 -0.05249733 0.05087741 ... 0.8205082 -0.03774351
  8919. 0.4122186 ]
  8920. [-0.10616651 -0.07183579 -0.02862857 ... 0.13253474 0.73543155
  8921. 0.63596827]
  8922. ...
  8923. [ 0. 0. 0. ... 0. 0.
  8924. 0. ]
  8925. [ 0. 0. 0. ... 0. 0.
  8926. 0. ]
  8927. [ 0. 0. 0. ... 0. 0.
  8928. 0. ]]
  8929. [[ 0.949689 -0.00939775 -0.0047697 ... -0.04689857 -0.0884609
  8930. -0.20121996]
  8931. [ 0.9700847 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
  8932. -0.14288443]
  8933. [-0.20942387 -0.21343033 -0.00624497 ... 0.05516734 -0.33565474
  8934. 0.75833493]
  8935. ...
  8936. [ 0. 0. 0. ... 0. 0.
  8937. 0. ]
  8938. [ 0. 0. 0. ... 0. 0.
  8939. 0. ]
  8940. [ 0. 0. 0. ... 0. 0.
  8941. 0. ]]
  8942. [[ 0.949689 -0.00939775 -0.0047697 ... -0.04689857 -0.0884609
  8943. -0.20121996]
  8944. [ 0.9700847 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
  8945. -0.14288443]
  8946. [-0.20942387 -0.21343033 -0.00624497 ... 0.05516734 -0.33565474
  8947. 0.75833493]
  8948. ...
  8949. [ 0. 0. 0. ... 0. 0.
  8950. 0. ]
  8951. [ 0. 0. 0. ... 0. 0.
  8952. 0. ]
  8953. [ 0. 0. 0. ... 0. 0.
  8954. 0. ]]]]
  8955. Converted tensor:
  8956. [[[[-0.0023386 0.00352692 -0.13370706 ... -0.18872213 0.09370426
  8957. -0.04139195]
  8958. [ 0.09375713 0.09519145 0.04368616 ... -0.17057195 -0.09237722
  8959. 0.09026653]
  8960. [ 0.19408953 -0.10522129 -0.5198614 ... -0.3543182 -0.18219939
  8961. -0.31666392]
  8962. ...
  8963. [ 0. 0. 0. ... 0. 0.
  8964. 0. ]
  8965. [ 0. 0. 0. ... 0. 0.
  8966. 0. ]
  8967. [ 0. 0. 0. ... 0. 0.
  8968. 0. ]]
  8969. [[-0.0023386 0.00352692 -0.13370706 ... -0.18872213 0.09370426
  8970. -0.04139195]
  8971. [ 0.09375713 0.09519145 0.04368616 ... -0.17057195 -0.09237722
  8972. 0.09026653]
  8973. [ 0.19408953 -0.10522129 -0.5198614 ... -0.3543182 -0.18219939
  8974. -0.31666392]
  8975. ...
  8976. [ 0. 0. 0. ... 0. 0.
  8977. 0. ]
  8978. [ 0. 0. 0. ... 0. 0.
  8979. 0. ]
  8980. [ 0. 0. 0. ... 0. 0.
  8981. 0. ]]
  8982. [[-0.5218679 -0.28046846 -0.03100408 ... 0.12330665 -0.1764081
  8983. -0.10358601]
  8984. [-0.43913472 -0.25189692 0.12411546 ... -0.04670386 0.47970027
  8985. 0.1339655 ]
  8986. [ 0.80941284 0.33414027 0.10742759 ... -0.17197524 -0.16508804
  8987. -0.20685425]
  8988. ...
  8989. [ 0. 0. 0. ... 0. 0.
  8990. 0. ]
  8991. [ 0. 0. 0. ... 0. 0.
  8992. 0. ]
  8993. [ 0. 0. 0. ... 0. 0.
  8994. 0. ]]
  8995. ...
  8996. [[-0.02867951 0.0564835 0.0150851 ... 0.74035805 -0.30081287
  8997. 0.31962818]
  8998. [ 0.07382207 -0.05249734 0.05087743 ... 0.82050836 -0.03774352
  8999. 0.41221875]
  9000. [-0.10616651 -0.07183579 -0.02862857 ... 0.13253474 0.73543155
  9001. 0.6359683 ]
  9002. ...
  9003. [ 0. 0. 0. ... 0. 0.
  9004. 0. ]
  9005. [ 0. 0. 0. ... 0. 0.
  9006. 0. ]
  9007. [ 0. 0. 0. ... 0. 0.
  9008. 0. ]]
  9009. [[ 0.94968927 -0.00939775 -0.0047697 ... -0.04689858 -0.08846093
  9010. -0.20122004]
  9011. [ 0.97008485 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
  9012. -0.14288445]
  9013. [-0.20942406 -0.21343052 -0.00624497 ... 0.05516739 -0.33565506
  9014. 0.7583357 ]
  9015. ...
  9016. [ 0. 0. 0. ... 0. 0.
  9017. 0. ]
  9018. [ 0. 0. 0. ... 0. 0.
  9019. 0. ]
  9020. [ 0. 0. 0. ... 0. 0.
  9021. 0. ]]
  9022. [[ 0.94968927 -0.00939775 -0.0047697 ... -0.04689858 -0.08846093
  9023. -0.20122004]
  9024. [ 0.97008485 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
  9025. -0.14288445]
  9026. [-0.20942406 -0.21343052 -0.00624497 ... 0.05516739 -0.33565506
  9027. 0.7583357 ]
  9028. ...
  9029. [ 0. 0. 0. ... 0. 0.
  9030. 0. ]
  9031. [ 0. 0. 0. ... 0. 0.
  9032. 0. ]
  9033. [ 0. 0. 0. ... 0. 0.
  9034. 0. ]]]]
  9035. Layer 1, Token 1 (k padded comparison):
  9036. Original tensor sum: 46.146324
  9037. Converted tensor sum: 46.146336
  9038. Original tensor mean: 0.002253
  9039. Converted tensor mean: 0.002253
  9040. Mean difference: 0.00000002
  9041. Maximum pointwise difference: 0.00001496
  9042. Max difference location: (0, 24, 0, 4)
  9043. Values at max diff - Original: -0.75322348, Converted: -0.75323844
  9044. Biggest difference in row (0, 4, 0), sum -1.893247 vs -1.893263
  9045. Layer 2, Token 1 (k padded comparison):
  9046. Original tensor sum: 38.402348
  9047. Converted tensor sum: 38.402321
  9048. Original tensor mean: 0.001875
  9049. Converted tensor mean: 0.001875
  9050. Mean difference: 0.00000002
  9051. Maximum pointwise difference: 0.00000370
  9052. Max difference location: (0, 4, 0, 1)
  9053. Values at max diff - Original: 0.75365573, Converted: 0.75365943
  9054. Biggest difference in row (0, 8, 0), sum -1.569355 vs -1.569358
  9055. Layer 4, Token 1 (k padded comparison):
  9056. Original tensor sum: -80.321693
  9057. Converted tensor sum: -80.319084
  9058. Original tensor mean: -0.003922
  9059. Converted tensor mean: -0.003922
  9060. Mean difference: 0.00000094
  9061. Maximum pointwise difference: 0.00016582
  9062. Max difference location: (0, 12, 1, 2)
  9063. Values at max diff - Original: 0.42303348, Converted: 0.42286766
  9064. Biggest difference in row (0, 4, 3), sum -0.373179 vs -0.372919
  9065. Layer 5, Token 1 (k padded comparison):
  9066. Original tensor sum: -101.494308
  9067. Converted tensor sum: -101.496490
  9068. Original tensor mean: -0.004956
  9069. Converted tensor mean: -0.004956
  9070. Mean difference: 0.00000073
  9071. Maximum pointwise difference: 0.00011382
  9072. Max difference location: (0, 8, 3, 4)
  9073. Values at max diff - Original: -0.06280152, Converted: -0.06291535
  9074. Biggest difference in row (0, 24, 2), sum -1.003613 vs -1.003973
  9075. Layer 6, Token 1 (k padded comparison):
  9076. Original tensor sum: -60.378914
  9077. Converted tensor sum: -60.399891
  9078. Original tensor mean: -0.002948
  9079. Converted tensor mean: -0.002949
  9080. Mean difference: 0.00000342
  9081. Maximum pointwise difference: 0.00096719
  9082. Max difference location: (0, 8, 1, 5)
  9083. Values at max diff - Original: 0.19049226, Converted: 0.19145945
  9084. Biggest difference in row (0, 20, 0), sum -1.118855 vs -1.120621
  9085. Layer 8, Token 1 (k padded comparison):
  9086. Original tensor sum: -61.474350
  9087. Converted tensor sum: -61.483994
  9088. Original tensor mean: -0.003002
  9089. Converted tensor mean: -0.003002
  9090. Mean difference: 0.00000346
  9091. Maximum pointwise difference: 0.00061786
  9092. Max difference location: (0, 8, 2, 7)
  9093. Values at max diff - Original: 0.35214049, Converted: 0.35275835
  9094. Biggest difference in row (0, 20, 3), sum -0.407597 vs -0.408426
  9095. Layer 9, Token 1 (k padded comparison):
  9096. Original tensor sum: -110.836624
  9097. Converted tensor sum: -110.841522
  9098. Original tensor mean: -0.005412
  9099. Converted tensor mean: -0.005412
  9100. Mean difference: 0.00000378
  9101. Maximum pointwise difference: 0.00051466
  9102. Max difference location: (0, 18, 1, 8)
  9103. Values at max diff - Original: 0.40876523, Converted: 0.40927988
  9104. Biggest difference in row (0, 28, 3), sum -0.911474 vs -0.910520
  9105. Layer 10, Token 1 (k padded comparison):
  9106. Original tensor sum: -90.985107
  9107. Converted tensor sum: -90.978966
  9108. Original tensor mean: -0.004443
  9109. Converted tensor mean: -0.004442
  9110. Mean difference: 0.00000465
  9111. Maximum pointwise difference: 0.00078443
  9112. Max difference location: (0, 18, 3, 6)
  9113. Values at max diff - Original: 0.38864151, Converted: 0.38785708
  9114. Biggest difference in row (0, 18, 3), sum -0.245571 vs -0.247415
  9115. Layer 12, Token 1 (k padded comparison):
  9116. Original tensor sum: -80.152397
  9117. Converted tensor sum: -80.143387
  9118. Original tensor mean: -0.003914
  9119. Converted tensor mean: -0.003913
  9120. Mean difference: 0.00000377
  9121. Maximum pointwise difference: 0.00053528
  9122. Max difference location: (0, 4, 2, 6)
  9123. Values at max diff - Original: 0.33732986, Converted: 0.33786514
  9124. Biggest difference in row (0, 26, 2), sum -2.083733 vs -2.084640
  9125. Layer 13, Token 1 (k padded comparison):
  9126. Original tensor sum: -149.692871
  9127. Converted tensor sum: -149.699692
  9128. Original tensor mean: -0.007309
  9129. Converted tensor mean: -0.007310
  9130. Mean difference: 0.00000382
  9131. Maximum pointwise difference: 0.00069700
  9132. Max difference location: (0, 24, 2, 1)
  9133. Values at max diff - Original: 0.03209215, Converted: 0.03139514
  9134. Biggest difference in row (0, 18, 3), sum -1.337807 vs -1.338803
  9135. Layer 14, Token 1 (k padded comparison):
  9136. Original tensor sum: -158.503815
  9137. Converted tensor sum: -158.505280
  9138. Original tensor mean: -0.007739
  9139. Converted tensor mean: -0.007740
  9140. Mean difference: 0.00000406
  9141. Maximum pointwise difference: 0.00088650
  9142. Max difference location: (0, 18, 3, 0)
  9143. Values at max diff - Original: 0.31103787, Converted: 0.31192437
  9144. Biggest difference in row (0, 24, 2), sum -2.245067 vs -2.246189
  9145. ================================================================================
  9146. Comparing v padded tensors...
  9147. ================================================================================
  9148. Layer 0, Token 1 (v padded comparison):
  9149. Original tensor sum: 43.396095
  9150. Converted tensor sum: 43.396103
  9151. Original tensor mean: 0.002119
  9152. Converted tensor mean: 0.002119
  9153. Mean difference: 0.00000000
  9154. Maximum pointwise difference: 0.00000024
  9155. Max difference location: (0, 4, 3, 1)
  9156. Values at max diff - Original: 3.02466559, Converted: 3.02466583
  9157. Biggest difference in row (0, 4, 3), sum 4.080367 vs 4.080368
  9158. Original tensor:
  9159. [[[[ 0.29945952 0.07364164 0.00633647 ... -0.03352018 -0.13518293
  9160. -0.24422395]
  9161. [-0.06384649 0.34527305 0.05128174 ... 0.10202903 -0.27791512
  9162. -0.26350227]
  9163. [ 0.32036152 -0.10731668 -0.13258429 ... 0.7373227 -0.21349299
  9164. 0.09487297]
  9165. ...
  9166. [ 0. 0. 0. ... 0. 0.
  9167. 0. ]
  9168. [ 0. 0. 0. ... 0. 0.
  9169. 0. ]
  9170. [ 0. 0. 0. ... 0. 0.
  9171. 0. ]]
  9172. [[ 0.09463742 0.3331761 0.04175158 ... -0.16139531 0.14495076
  9173. -0.23538315]
  9174. [ 0.0059099 -0.22937416 -0.01920018 ... -0.2725759 0.3779854
  9175. -0.25018957]
  9176. [-0.02874102 -0.1163442 -0.06129871 ... -0.24273473 -0.2218994
  9177. 0.09502672]
  9178. ...
  9179. [ 0. 0. 0. ... 0. 0.
  9180. 0. ]
  9181. [ 0. 0. 0. ... 0. 0.
  9182. 0. ]
  9183. [ 0. 0. 0. ... 0. 0.
  9184. 0. ]]
  9185. [[-0.01040334 -0.16231607 -0.19213551 ... 0.26839197 -0.14292948
  9186. -0.0833158 ]
  9187. [-0.22485131 -0.26889268 -0.03555897 ... -0.26755306 -0.27845183
  9188. -0.15565467]
  9189. [-0.27764964 2.820727 -0.24290419 ... 0.12924032 -0.22718066
  9190. 0.06345078]
  9191. ...
  9192. [ 0. 0. 0. ... 0. 0.
  9193. 0. ]
  9194. [ 0. 0. 0. ... 0. 0.
  9195. 0. ]
  9196. [ 0. 0. 0. ... 0. 0.
  9197. 0. ]]
  9198. ...
  9199. [[ 0.05224958 -0.27178496 0.02280007 ... -0.17813048 -0.00848302
  9200. 0.3436797 ]
  9201. [-0.23870829 0.0102903 0.09486482 ... -0.17058551 0.10059616
  9202. 0.45001176]
  9203. [-0.24846101 1.1912329 -0.26268318 ... 0.148858 0.10272522
  9204. 0.21719539]
  9205. ...
  9206. [ 0. 0. 0. ... 0. 0.
  9207. 0. ]
  9208. [ 0. 0. 0. ... 0. 0.
  9209. 0. ]
  9210. [ 0. 0. 0. ... 0. 0.
  9211. 0. ]]
  9212. [[-0.20595089 -0.02217443 0.01070492 ... 0.00675152 0.02506094
  9213. -0.0267982 ]
  9214. [-0.21499586 -0.25627282 -0.07001566 ... 0.00795406 -0.02202371
  9215. -0.01158573]
  9216. [ 0.04917984 -0.27141818 -0.26334 ... -0.09943416 0.03347556
  9217. 0.10718762]
  9218. ...
  9219. [ 0. 0. 0. ... 0. 0.
  9220. 0. ]
  9221. [ 0. 0. 0. ... 0. 0.
  9222. 0. ]
  9223. [ 0. 0. 0. ... 0. 0.
  9224. 0. ]]
  9225. [[ 0.1791143 -0.0034847 0.9858279 ... 0.19559488 -0.0804936
  9226. -0.01883564]
  9227. [-0.17319466 0.07188834 -0.26032022 ... -0.04845351 -0.24498041
  9228. 0.12539098]
  9229. [ 0.00640415 -0.22212675 -0.22916575 ... -0.170733 0.5452839
  9230. -0.14139794]
  9231. ...
  9232. [ 0. 0. 0. ... 0. 0.
  9233. 0. ]
  9234. [ 0. 0. 0. ... 0. 0.
  9235. 0. ]
  9236. [ 0. 0. 0. ... 0. 0.
  9237. 0. ]]]]
  9238. Converted tensor:
  9239. [[[[ 0.29945952 0.07364164 0.00633647 ... -0.03352018 -0.13518293
  9240. -0.24422395]
  9241. [-0.06384649 0.34527302 0.05128174 ... 0.10202905 -0.27791512
  9242. -0.26350227]
  9243. [ 0.3203615 -0.10731667 -0.13258429 ... 0.7373226 -0.213493
  9244. 0.09487297]
  9245. ...
  9246. [ 0. 0. 0. ... 0. 0.
  9247. 0. ]
  9248. [ 0. 0. 0. ... 0. 0.
  9249. 0. ]
  9250. [ 0. 0. 0. ... 0. 0.
  9251. 0. ]]
  9252. [[ 0.09463742 0.33317608 0.04175158 ... -0.16139533 0.14495076
  9253. -0.23538315]
  9254. [ 0.0059099 -0.22937416 -0.01920018 ... -0.27257589 0.3779854
  9255. -0.25018957]
  9256. [-0.02874102 -0.11634421 -0.06129871 ... -0.24273473 -0.22189939
  9257. 0.09502671]
  9258. ...
  9259. [ 0. 0. 0. ... 0. 0.
  9260. 0. ]
  9261. [ 0. 0. 0. ... 0. 0.
  9262. 0. ]
  9263. [ 0. 0. 0. ... 0. 0.
  9264. 0. ]]
  9265. [[-0.01040334 -0.16231604 -0.19213554 ... 0.268392 -0.14292948
  9266. -0.0833158 ]
  9267. [-0.22485131 -0.26889268 -0.03555898 ... -0.26755306 -0.27845183
  9268. -0.15565467]
  9269. [-0.27764964 2.820727 -0.24290417 ... 0.12924033 -0.22718067
  9270. 0.06345078]
  9271. ...
  9272. [ 0. 0. 0. ... 0. 0.
  9273. 0. ]
  9274. [ 0. 0. 0. ... 0. 0.
  9275. 0. ]
  9276. [ 0. 0. 0. ... 0. 0.
  9277. 0. ]]
  9278. ...
  9279. [[ 0.05224958 -0.27178493 0.02280007 ... -0.17813048 -0.00848302
  9280. 0.34367973]
  9281. [-0.23870829 0.0102903 0.09486482 ... -0.17058551 0.10059617
  9282. 0.45001176]
  9283. [-0.248461 1.1912329 -0.26268318 ... 0.148858 0.10272522
  9284. 0.21719539]
  9285. ...
  9286. [ 0. 0. 0. ... 0. 0.
  9287. 0. ]
  9288. [ 0. 0. 0. ... 0. 0.
  9289. 0. ]
  9290. [ 0. 0. 0. ... 0. 0.
  9291. 0. ]]
  9292. [[-0.20595089 -0.02217443 0.01070492 ... 0.00675152 0.02506094
  9293. -0.0267982 ]
  9294. [-0.21499586 -0.2562728 -0.07001566 ... 0.00795406 -0.02202371
  9295. -0.01158573]
  9296. [ 0.04917984 -0.27141815 -0.26334 ... -0.09943416 0.03347556
  9297. 0.10718761]
  9298. ...
  9299. [ 0. 0. 0. ... 0. 0.
  9300. 0. ]
  9301. [ 0. 0. 0. ... 0. 0.
  9302. 0. ]
  9303. [ 0. 0. 0. ... 0. 0.
  9304. 0. ]]
  9305. [[ 0.1791143 -0.0034847 0.985828 ... 0.19559486 -0.08049361
  9306. -0.01883564]
  9307. [-0.17319466 0.07188834 -0.2603202 ... -0.04845351 -0.24498038
  9308. 0.12539098]
  9309. [ 0.00640414 -0.22212675 -0.22916573 ... -0.17073299 0.5452839
  9310. -0.14139794]
  9311. ...
  9312. [ 0. 0. 0. ... 0. 0.
  9313. 0. ]
  9314. [ 0. 0. 0. ... 0. 0.
  9315. 0. ]
  9316. [ 0. 0. 0. ... 0. 0.
  9317. 0. ]]]]
  9318. Layer 1, Token 1 (v padded comparison):
  9319. Original tensor sum: 64.583611
  9320. Converted tensor sum: 64.583618
  9321. Original tensor mean: 0.003153
  9322. Converted tensor mean: 0.003153
  9323. Mean difference: 0.00000000
  9324. Maximum pointwise difference: 0.00000083
  9325. Max difference location: (0, 31, 2, 0)
  9326. Values at max diff - Original: 1.71371531, Converted: 1.71371615
  9327. Biggest difference in row (0, 14, 2), sum 3.047640 vs 3.047641
  9328. Layer 2, Token 1 (v padded comparison):
  9329. Original tensor sum: 79.718636
  9330. Converted tensor sum: 79.718628
  9331. Original tensor mean: 0.003893
  9332. Converted tensor mean: 0.003893
  9333. Mean difference: 0.00000002
  9334. Maximum pointwise difference: 0.00000691
  9335. Max difference location: (0, 3, 3, 0)
  9336. Values at max diff - Original: 3.08589840, Converted: 3.08589149
  9337. Biggest difference in row (0, 3, 3), sum 5.127280 vs 5.127275
  9338. Layer 4, Token 1 (v padded comparison):
  9339. Original tensor sum: -6.421658
  9340. Converted tensor sum: -6.417439
  9341. Original tensor mean: -0.000314
  9342. Converted tensor mean: -0.000313
  9343. Mean difference: 0.00000083
  9344. Maximum pointwise difference: 0.00020146
  9345. Max difference location: (0, 3, 3, 9)
  9346. Values at max diff - Original: 0.71459866, Converted: 0.71439719
  9347. Biggest difference in row (0, 2, 2), sum 1.330729 vs 1.330986
  9348. Layer 5, Token 1 (v padded comparison):
  9349. Original tensor sum: -22.732481
  9350. Converted tensor sum: -22.732681
  9351. Original tensor mean: -0.001110
  9352. Converted tensor mean: -0.001110
  9353. Mean difference: 0.00000057
  9354. Maximum pointwise difference: 0.00014561
  9355. Max difference location: (0, 5, 2, 8)
  9356. Values at max diff - Original: 0.86213899, Converted: 0.86199337
  9357. Biggest difference in row (0, 5, 2), sum 0.321165 vs 0.320951
  9358. Layer 6, Token 1 (v padded comparison):
  9359. Original tensor sum: 79.420486
  9360. Converted tensor sum: 79.392494
  9361. Original tensor mean: 0.003878
  9362. Converted tensor mean: 0.003877
  9363. Mean difference: 0.00000437
  9364. Maximum pointwise difference: 0.00160646
  9365. Max difference location: (0, 28, 3, 8)
  9366. Values at max diff - Original: 3.32436800, Converted: 3.32276154
  9367. Biggest difference in row (0, 8, 2), sum 5.307434 vs 5.305095
  9368. Layer 8, Token 1 (v padded comparison):
  9369. Original tensor sum: 56.337997
  9370. Converted tensor sum: 56.328655
  9371. Original tensor mean: 0.002751
  9372. Converted tensor mean: 0.002750
  9373. Mean difference: 0.00000345
  9374. Maximum pointwise difference: 0.00109446
  9375. Max difference location: (0, 27, 3, 8)
  9376. Values at max diff - Original: 1.29648387, Converted: 1.29538941
  9377. Biggest difference in row (0, 0, 2), sum 3.391128 vs 3.390095
  9378. Layer 9, Token 1 (v padded comparison):
  9379. Original tensor sum: -60.833374
  9380. Converted tensor sum: -60.822338
  9381. Original tensor mean: -0.002970
  9382. Converted tensor mean: -0.002970
  9383. Mean difference: 0.00000277
  9384. Maximum pointwise difference: 0.00082873
  9385. Max difference location: (0, 4, 2, 0)
  9386. Values at max diff - Original: 0.17745507, Converted: 0.17828380
  9387. Biggest difference in row (0, 29, 3), sum -0.619908 vs -0.618863
  9388. Layer 10, Token 1 (v padded comparison):
  9389. Original tensor sum: -61.881168
  9390. Converted tensor sum: -61.881893
  9391. Original tensor mean: -0.003022
  9392. Converted tensor mean: -0.003022
  9393. Mean difference: 0.00000326
  9394. Maximum pointwise difference: 0.00088513
  9395. Max difference location: (0, 18, 3, 1)
  9396. Values at max diff - Original: 0.75186056, Converted: 0.75097543
  9397. Biggest difference in row (0, 1, 2), sum -0.687588 vs -0.688463
  9398. Layer 12, Token 1 (v padded comparison):
  9399. Original tensor sum: -25.326912
  9400. Converted tensor sum: -25.328352
  9401. Original tensor mean: -0.001237
  9402. Converted tensor mean: -0.001237
  9403. Mean difference: 0.00000326
  9404. Maximum pointwise difference: 0.00108600
  9405. Max difference location: (0, 26, 1, 1)
  9406. Values at max diff - Original: 2.54334521, Converted: 2.54225922
  9407. Biggest difference in row (0, 16, 2), sum 1.421780 vs 1.420637
  9408. Layer 13, Token 1 (v padded comparison):
  9409. Original tensor sum: -76.935516
  9410. Converted tensor sum: -76.941040
  9411. Original tensor mean: -0.003757
  9412. Converted tensor mean: -0.003757
  9413. Mean difference: 0.00000263
  9414. Maximum pointwise difference: 0.00127554
  9415. Max difference location: (0, 19, 1, 3)
  9416. Values at max diff - Original: 2.36973763, Converted: 2.36846209
  9417. Biggest difference in row (0, 19, 1), sum 1.449438 vs 1.448400
  9418. Layer 14, Token 1 (v padded comparison):
  9419. Original tensor sum: -45.008949
  9420. Converted tensor sum: -45.003647
  9421. Original tensor mean: -0.002198
  9422. Converted tensor mean: -0.002197
  9423. Mean difference: 0.00000327
  9424. Maximum pointwise difference: 0.00136590
  9425. Max difference location: (0, 28, 3, 5)
  9426. Values at max diff - Original: 2.56902742, Converted: 2.56766152
  9427. Biggest difference in row (0, 28, 3), sum 1.363533 vs 1.361795
  9428. ================================================================================
  9429. SUMMARY:
  9430. Total comparisons attempted: 876
  9431. Successful comparisons: 875
  9432. Failed comparisons: 1
  9433. Maximum difference statistics:
  9434. Min max difference: 0.00000024
  9435. Max max difference: 235.55526733
  9436. Mean of max differences: 18.71273422
  9437. Median of max differences: 5.37744808
  9438. Comparisons with diff > 1e-5: 804/875