|
|
@@ -0,0 +1,10393 @@
|
|
|
+Comparing tensors between original and converted GGML models...
|
|
|
+Tokens: 30, Layers: 16
|
|
|
+================================================================================
|
|
|
+
|
|
|
+================================================================================
|
|
|
+Comparing model.layers.out tensors...
|
|
|
+================================================================================
|
|
|
+
|
|
|
+Layer 0, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -109.202682
|
|
|
+ Converted tensor sum: -109.202667
|
|
|
+ Original tensor mean: -3.412584
|
|
|
+ Converted tensor mean: -3.412583
|
|
|
+ Mean difference: 0.00000112
|
|
|
+ Maximum pointwise difference: 0.00000358
|
|
|
+ Max difference location: (0, 3, 2)
|
|
|
+ Values at max diff - Original: -3.23131371, Converted: -3.23131013
|
|
|
+ Biggest difference in row (0, 3), sum -70.622650 vs -70.622643
|
|
|
+Original tensor:
|
|
|
+
|
|
|
+[[[ 0.53282046 0.45114386 2.2156353 -0.5117184 -1.6482054
|
|
|
+ 4.6376505 -2.9421384 -3.4354253 ]
|
|
|
+ [ -8.487997 -5.323722 -4.790135 -8.482631 4.4259453
|
|
|
+ -0.7649012 -5.2080426 -3.5365663 ]
|
|
|
+ [ -2.8659308 -0.7302124 3.4494972 -0.7121358 -4.4744496
|
|
|
+ 1.4391303 -1.05655 -0.76109344]
|
|
|
+ [-10.8983 -11.325392 -3.2313137 -11.594204 -13.007862
|
|
|
+ -6.099822 -13.027901 -1.4378595 ]]]
|
|
|
+
|
|
|
+Converted tensor:
|
|
|
+
|
|
|
+[[[ 0.53281975 0.45114377 2.215636 -0.5117179 -1.6482062
|
|
|
+ 4.6376514 -2.942138 -3.4354265 ]
|
|
|
+ [ -8.487997 -5.323724 -4.7901373 -8.48263 4.425948
|
|
|
+ -0.7649009 -5.208041 -3.5365672 ]
|
|
|
+ [ -2.8659306 -0.7302135 3.4494982 -0.7121362 -4.474449
|
|
|
+ 1.4391313 -1.0565499 -0.7610918 ]
|
|
|
+ [-10.898299 -11.325391 -3.2313101 -11.594204 -13.00786
|
|
|
+ -6.099819 -13.027899 -1.437861 ]]]
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+Layer 1, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -132.672058
|
|
|
+ Converted tensor sum: -132.672043
|
|
|
+ Original tensor mean: -4.146002
|
|
|
+ Converted tensor mean: -4.146001
|
|
|
+ Mean difference: 0.00000322
|
|
|
+ Maximum pointwise difference: 0.00000763
|
|
|
+ Max difference location: (0, 1, 0)
|
|
|
+ Values at max diff - Original: -8.45331192, Converted: -8.45330429
|
|
|
+ Biggest difference in row (0, 2), sum 8.045303 vs 8.045274
|
|
|
+
|
|
|
+Layer 2, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -123.594589
|
|
|
+ Converted tensor sum: -123.594765
|
|
|
+ Original tensor mean: -3.862331
|
|
|
+ Converted tensor mean: -3.862336
|
|
|
+ Mean difference: 0.00001101
|
|
|
+ Maximum pointwise difference: 0.00005722
|
|
|
+ Max difference location: (0, 3, 0)
|
|
|
+ Values at max diff - Original: -14.73531914, Converted: -14.73537636
|
|
|
+ Biggest difference in row (0, 3), sum -100.578644 vs -100.578781
|
|
|
+
|
|
|
+Layer 3, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -1014.197754
|
|
|
+ Converted tensor sum: -1014.208618
|
|
|
+ Original tensor mean: -31.693680
|
|
|
+ Converted tensor mean: -31.694019
|
|
|
+ Mean difference: 0.00261304
|
|
|
+ Maximum pointwise difference: 0.00854874
|
|
|
+ Max difference location: (0, 3, 4)
|
|
|
+ Values at max diff - Original: -47.59802246, Converted: -47.60657120
|
|
|
+ Biggest difference in row (0, 3), sum -413.478455 vs -413.514832
|
|
|
+
|
|
|
+Layer 4, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -974.648987
|
|
|
+ Converted tensor sum: -974.659424
|
|
|
+ Original tensor mean: -30.457781
|
|
|
+ Converted tensor mean: -30.458107
|
|
|
+ Mean difference: 0.00296569
|
|
|
+ Maximum pointwise difference: 0.00885773
|
|
|
+ Max difference location: (0, 3, 4)
|
|
|
+ Values at max diff - Original: -45.65669632, Converted: -45.66555405
|
|
|
+ Biggest difference in row (0, 3), sum -380.904694 vs -380.942291
|
|
|
+
|
|
|
+Layer 5, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -842.923950
|
|
|
+ Converted tensor sum: -842.923950
|
|
|
+ Original tensor mean: -26.341373
|
|
|
+ Converted tensor mean: -26.341373
|
|
|
+ Mean difference: 0.00327585
|
|
|
+ Maximum pointwise difference: 0.00857162
|
|
|
+ Max difference location: (0, 3, 4)
|
|
|
+ Values at max diff - Original: -47.09656525, Converted: -47.10513687
|
|
|
+ Biggest difference in row (0, 3), sum -366.704346 vs -366.739746
|
|
|
+
|
|
|
+Layer 6, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -940.556580
|
|
|
+ Converted tensor sum: -940.507812
|
|
|
+ Original tensor mean: -29.392393
|
|
|
+ Converted tensor mean: -29.390869
|
|
|
+ Mean difference: 0.00368834
|
|
|
+ Maximum pointwise difference: 0.00840378
|
|
|
+ Max difference location: (0, 3, 4)
|
|
|
+ Values at max diff - Original: -49.34116364, Converted: -49.34956741
|
|
|
+ Biggest difference in row (0, 2), sum -130.006729 vs -129.970612
|
|
|
+
|
|
|
+Layer 7, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -1838.171143
|
|
|
+ Converted tensor sum: -1838.228271
|
|
|
+ Original tensor mean: -57.442848
|
|
|
+ Converted tensor mean: -57.444633
|
|
|
+ Mean difference: 0.00574541
|
|
|
+ Maximum pointwise difference: 0.01725769
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -91.24589539, Converted: -91.26315308
|
|
|
+ Biggest difference in row (0, 0), sum -622.551270 vs -622.626587
|
|
|
+
|
|
|
+Layer 8, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -1890.751709
|
|
|
+ Converted tensor sum: -1890.670898
|
|
|
+ Original tensor mean: -59.085991
|
|
|
+ Converted tensor mean: -59.083466
|
|
|
+ Mean difference: 0.01148558
|
|
|
+ Maximum pointwise difference: 0.05082703
|
|
|
+ Max difference location: (0, 2, 6)
|
|
|
+ Values at max diff - Original: -49.12084961, Converted: -49.07002258
|
|
|
+ Biggest difference in row (0, 2), sum -356.818451 vs -356.663208
|
|
|
+
|
|
|
+Layer 9, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -1949.811523
|
|
|
+ Converted tensor sum: -1949.711426
|
|
|
+ Original tensor mean: -60.931610
|
|
|
+ Converted tensor mean: -60.928482
|
|
|
+ Mean difference: 0.01115143
|
|
|
+ Maximum pointwise difference: 0.04758072
|
|
|
+ Max difference location: (0, 2, 6)
|
|
|
+ Values at max diff - Original: -49.22105789, Converted: -49.17347717
|
|
|
+ Biggest difference in row (0, 2), sum -367.878845 vs -367.720154
|
|
|
+
|
|
|
+Layer 10, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -1955.402832
|
|
|
+ Converted tensor sum: -1955.281250
|
|
|
+ Original tensor mean: -61.106339
|
|
|
+ Converted tensor mean: -61.102539
|
|
|
+ Mean difference: 0.01230341
|
|
|
+ Maximum pointwise difference: 0.04833603
|
|
|
+ Max difference location: (0, 2, 6)
|
|
|
+ Values at max diff - Original: -43.91606140, Converted: -43.86772537
|
|
|
+ Biggest difference in row (0, 2), sum -370.409668 vs -370.259583
|
|
|
+
|
|
|
+Layer 11, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -3642.472900
|
|
|
+ Converted tensor sum: -3642.428711
|
|
|
+ Original tensor mean: -113.827278
|
|
|
+ Converted tensor mean: -113.825897
|
|
|
+ Mean difference: 0.01628518
|
|
|
+ Maximum pointwise difference: 0.05126190
|
|
|
+ Max difference location: (0, 2, 6)
|
|
|
+ Values at max diff - Original: -94.39852142, Converted: -94.34725952
|
|
|
+ Biggest difference in row (0, 2), sum -786.509460 vs -786.331726
|
|
|
+
|
|
|
+Layer 12, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -3739.976807
|
|
|
+ Converted tensor sum: -3739.936035
|
|
|
+ Original tensor mean: -116.874275
|
|
|
+ Converted tensor mean: -116.873001
|
|
|
+ Mean difference: 0.01711488
|
|
|
+ Maximum pointwise difference: 0.05059052
|
|
|
+ Max difference location: (0, 2, 6)
|
|
|
+ Values at max diff - Original: -95.09668732, Converted: -95.04609680
|
|
|
+ Biggest difference in row (0, 2), sum -816.550781 vs -816.352295
|
|
|
+
|
|
|
+Layer 13, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -3821.749268
|
|
|
+ Converted tensor sum: -3821.721680
|
|
|
+ Original tensor mean: -119.429665
|
|
|
+ Converted tensor mean: -119.428802
|
|
|
+ Mean difference: 0.01747012
|
|
|
+ Maximum pointwise difference: 0.05052948
|
|
|
+ Max difference location: (0, 2, 7)
|
|
|
+ Values at max diff - Original: -79.35634613, Converted: -79.30581665
|
|
|
+ Biggest difference in row (0, 2), sum -840.805908 vs -840.616699
|
|
|
+
|
|
|
+Layer 14, Token 1 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -4057.451904
|
|
|
+ Converted tensor sum: -4057.284668
|
|
|
+ Original tensor mean: -126.795372
|
|
|
+ Converted tensor mean: -126.790146
|
|
|
+ Mean difference: 0.01935625
|
|
|
+ Maximum pointwise difference: 0.07952881
|
|
|
+ Max difference location: (0, 2, 6)
|
|
|
+ Values at max diff - Original: -97.11465454, Converted: -97.03512573
|
|
|
+ Biggest difference in row (0, 2), sum -917.124573 vs -916.826172
|
|
|
+Error processing model.layers.out layer 15, token 1: cannot reshape array of size 8 into shape (1,4,8)
|
|
|
+
|
|
|
+Layer 0, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -7.280505
|
|
|
+ Converted tensor sum: -7.280507
|
|
|
+ Original tensor mean: -0.910063
|
|
|
+ Converted tensor mean: -0.910063
|
|
|
+ Mean difference: 0.00000097
|
|
|
+ Maximum pointwise difference: 0.00000179
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -1.49786282, Converted: -1.49786103
|
|
|
+ Biggest difference in row (0, 0), sum -7.280505 vs -7.280507
|
|
|
+
|
|
|
+Layer 1, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -7.318125
|
|
|
+ Converted tensor sum: -7.318151
|
|
|
+ Original tensor mean: -0.914766
|
|
|
+ Converted tensor mean: -0.914769
|
|
|
+ Mean difference: 0.00000331
|
|
|
+ Maximum pointwise difference: 0.00000930
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -3.41128922, Converted: -3.41129851
|
|
|
+ Biggest difference in row (0, 0), sum -7.318125 vs -7.318151
|
|
|
+
|
|
|
+Layer 2, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 14.344932
|
|
|
+ Converted tensor sum: 14.344961
|
|
|
+ Original tensor mean: 1.793116
|
|
|
+ Converted tensor mean: 1.793120
|
|
|
+ Mean difference: 0.00000746
|
|
|
+ Maximum pointwise difference: 0.00003266
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 3.23243976, Converted: 3.23247242
|
|
|
+ Biggest difference in row (0, 0), sum 14.344932 vs 14.344961
|
|
|
+
|
|
|
+Layer 3, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 46.801067
|
|
|
+ Converted tensor sum: 46.811996
|
|
|
+ Original tensor mean: 5.850133
|
|
|
+ Converted tensor mean: 5.851500
|
|
|
+ Mean difference: 0.00141515
|
|
|
+ Maximum pointwise difference: 0.00275421
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 6.65637064, Converted: 6.65912485
|
|
|
+ Biggest difference in row (0, 0), sum 46.801067 vs 46.811996
|
|
|
+
|
|
|
+Layer 4, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 47.891678
|
|
|
+ Converted tensor sum: 47.901840
|
|
|
+ Original tensor mean: 5.986460
|
|
|
+ Converted tensor mean: 5.987730
|
|
|
+ Mean difference: 0.00131346
|
|
|
+ Maximum pointwise difference: 0.00296640
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 7.13961887, Converted: 7.14258528
|
|
|
+ Biggest difference in row (0, 0), sum 47.891678 vs 47.901840
|
|
|
+
|
|
|
+Layer 5, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 45.815926
|
|
|
+ Converted tensor sum: 45.826260
|
|
|
+ Original tensor mean: 5.726991
|
|
|
+ Converted tensor mean: 5.728282
|
|
|
+ Mean difference: 0.00137006
|
|
|
+ Maximum pointwise difference: 0.00332642
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 7.94661283, Converted: 7.94993925
|
|
|
+ Biggest difference in row (0, 0), sum 45.815926 vs 45.826260
|
|
|
+
|
|
|
+Layer 6, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 40.223167
|
|
|
+ Converted tensor sum: 40.231720
|
|
|
+ Original tensor mean: 5.027896
|
|
|
+ Converted tensor mean: 5.028965
|
|
|
+ Mean difference: 0.00155937
|
|
|
+ Maximum pointwise difference: 0.00270462
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 7.07846451, Converted: 7.08116913
|
|
|
+ Biggest difference in row (0, 0), sum 40.223167 vs 40.231720
|
|
|
+
|
|
|
+Layer 7, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 84.588196
|
|
|
+ Converted tensor sum: 84.602402
|
|
|
+ Original tensor mean: 10.573524
|
|
|
+ Converted tensor mean: 10.575300
|
|
|
+ Mean difference: 0.00185513
|
|
|
+ Maximum pointwise difference: 0.00356102
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 13.31151009, Converted: 13.31507111
|
|
|
+ Biggest difference in row (0, 0), sum 84.588196 vs 84.602402
|
|
|
+
|
|
|
+Layer 8, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 85.737823
|
|
|
+ Converted tensor sum: 85.749390
|
|
|
+ Original tensor mean: 10.717228
|
|
|
+ Converted tensor mean: 10.718674
|
|
|
+ Mean difference: 0.00189817
|
|
|
+ Maximum pointwise difference: 0.00350094
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 13.90340519, Converted: 13.90690613
|
|
|
+ Biggest difference in row (0, 0), sum 85.737823 vs 85.749390
|
|
|
+
|
|
|
+Layer 9, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 83.069107
|
|
|
+ Converted tensor sum: 83.078979
|
|
|
+ Original tensor mean: 10.383638
|
|
|
+ Converted tensor mean: 10.384872
|
|
|
+ Mean difference: 0.00177890
|
|
|
+ Maximum pointwise difference: 0.00335407
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 13.79222488, Converted: 13.79557896
|
|
|
+ Biggest difference in row (0, 0), sum 83.069107 vs 83.078979
|
|
|
+
|
|
|
+Layer 10, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 80.782455
|
|
|
+ Converted tensor sum: 80.791588
|
|
|
+ Original tensor mean: 10.097807
|
|
|
+ Converted tensor mean: 10.098948
|
|
|
+ Mean difference: 0.00190949
|
|
|
+ Maximum pointwise difference: 0.00329256
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 6.64920282, Converted: 6.65249538
|
|
|
+ Biggest difference in row (0, 0), sum 80.782455 vs 80.791588
|
|
|
+
|
|
|
+Layer 11, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 124.938332
|
|
|
+ Converted tensor sum: 124.953712
|
|
|
+ Original tensor mean: 15.617291
|
|
|
+ Converted tensor mean: 15.619214
|
|
|
+ Mean difference: 0.00253391
|
|
|
+ Maximum pointwise difference: 0.00420666
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 12.86635590, Converted: 12.87056255
|
|
|
+ Biggest difference in row (0, 0), sum 124.938332 vs 124.953712
|
|
|
+
|
|
|
+Layer 12, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 124.466995
|
|
|
+ Converted tensor sum: 124.483871
|
|
|
+ Original tensor mean: 15.558374
|
|
|
+ Converted tensor mean: 15.560484
|
|
|
+ Mean difference: 0.00271881
|
|
|
+ Maximum pointwise difference: 0.00506878
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 12.41438103, Converted: 12.41944981
|
|
|
+ Biggest difference in row (0, 0), sum 124.466995 vs 124.483871
|
|
|
+
|
|
|
+Layer 13, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 121.646957
|
|
|
+ Converted tensor sum: 121.660385
|
|
|
+ Original tensor mean: 15.205870
|
|
|
+ Converted tensor mean: 15.207548
|
|
|
+ Mean difference: 0.00218880
|
|
|
+ Maximum pointwise difference: 0.00470448
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 12.02227020, Converted: 12.02697468
|
|
|
+ Biggest difference in row (0, 0), sum 121.646957 vs 121.660385
|
|
|
+
|
|
|
+Layer 14, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 116.636169
|
|
|
+ Converted tensor sum: 116.658142
|
|
|
+ Original tensor mean: 14.579521
|
|
|
+ Converted tensor mean: 14.582268
|
|
|
+ Mean difference: 0.00299489
|
|
|
+ Maximum pointwise difference: 0.00521469
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 18.26870537, Converted: 18.27392006
|
|
|
+ Biggest difference in row (0, 0), sum 116.636169 vs 116.658142
|
|
|
+
|
|
|
+Layer 15, Token 2 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 201.843384
|
|
|
+ Converted tensor sum: 201.865143
|
|
|
+ Original tensor mean: 25.230423
|
|
|
+ Converted tensor mean: 25.233143
|
|
|
+ Mean difference: 0.00346577
|
|
|
+ Maximum pointwise difference: 0.00746727
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 30.94509888, Converted: 30.95256615
|
|
|
+ Biggest difference in row (0, 0), sum 201.843384 vs 201.865143
|
|
|
+
|
|
|
+Layer 0, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 18.698099
|
|
|
+ Converted tensor sum: 18.475292
|
|
|
+ Original tensor mean: 2.337262
|
|
|
+ Converted tensor mean: 2.309412
|
|
|
+ Mean difference: 2.67848086
|
|
|
+ Maximum pointwise difference: 4.89963531
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 2.51813841, Converted: 7.41777372
|
|
|
+ Biggest difference in row (0, 0), sum 18.698099 vs 18.475292
|
|
|
+
|
|
|
+Layer 1, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 13.937105
|
|
|
+ Converted tensor sum: 11.538675
|
|
|
+ Original tensor mean: 1.742138
|
|
|
+ Converted tensor mean: 1.442334
|
|
|
+ Mean difference: 2.56903791
|
|
|
+ Maximum pointwise difference: 5.56039190
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 5.86116695, Converted: 0.30077514
|
|
|
+ Biggest difference in row (0, 0), sum 13.937105 vs 11.538675
|
|
|
+
|
|
|
+Layer 2, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 17.835873
|
|
|
+ Converted tensor sum: 9.065081
|
|
|
+ Original tensor mean: 2.229484
|
|
|
+ Converted tensor mean: 1.133135
|
|
|
+ Mean difference: 2.48439741
|
|
|
+ Maximum pointwise difference: 7.80053854
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 7.08156919, Converted: -0.71896935
|
|
|
+ Biggest difference in row (0, 0), sum 17.835873 vs 9.065081
|
|
|
+
|
|
|
+Layer 3, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 19.733971
|
|
|
+ Converted tensor sum: 0.388454
|
|
|
+ Original tensor mean: 2.466746
|
|
|
+ Converted tensor mean: 0.048557
|
|
|
+ Mean difference: 2.74538827
|
|
|
+ Maximum pointwise difference: 8.14173889
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 7.32600927, Converted: -0.81572962
|
|
|
+ Biggest difference in row (0, 0), sum 19.733971 vs 0.388454
|
|
|
+
|
|
|
+Layer 4, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 17.522738
|
|
|
+ Converted tensor sum: 7.885162
|
|
|
+ Original tensor mean: 2.190342
|
|
|
+ Converted tensor mean: 0.985645
|
|
|
+ Mean difference: 4.25575876
|
|
|
+ Maximum pointwise difference: 7.97597837
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 3.98348713, Converted: -3.99249125
|
|
|
+ Biggest difference in row (0, 0), sum 17.522738 vs 7.885162
|
|
|
+
|
|
|
+Layer 5, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 21.460897
|
|
|
+ Converted tensor sum: 15.969997
|
|
|
+ Original tensor mean: 2.682612
|
|
|
+ Converted tensor mean: 1.996250
|
|
|
+ Mean difference: 4.34595299
|
|
|
+ Maximum pointwise difference: 8.46822739
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 6.38704681, Converted: -2.08118057
|
|
|
+ Biggest difference in row (0, 0), sum 21.460897 vs 15.969997
|
|
|
+
|
|
|
+Layer 6, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 18.336536
|
|
|
+ Converted tensor sum: 9.128950
|
|
|
+ Original tensor mean: 2.292067
|
|
|
+ Converted tensor mean: 1.141119
|
|
|
+ Mean difference: 3.42625880
|
|
|
+ Maximum pointwise difference: 9.18005276
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 6.15963268, Converted: -3.02042007
|
|
|
+ Biggest difference in row (0, 0), sum 18.336536 vs 9.128950
|
|
|
+
|
|
|
+Layer 7, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 27.127436
|
|
|
+ Converted tensor sum: -91.853516
|
|
|
+ Original tensor mean: 3.390929
|
|
|
+ Converted tensor mean: -11.481689
|
|
|
+ Mean difference: 14.87261772
|
|
|
+ Maximum pointwise difference: 25.04354668
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 6.62252426, Converted: -18.42102242
|
|
|
+ Biggest difference in row (0, 0), sum 27.127436 vs -91.853516
|
|
|
+
|
|
|
+Layer 8, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 22.795490
|
|
|
+ Converted tensor sum: -94.016220
|
|
|
+ Original tensor mean: 2.849436
|
|
|
+ Converted tensor mean: -11.752028
|
|
|
+ Mean difference: 14.60146332
|
|
|
+ Maximum pointwise difference: 26.14372826
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 0.59730154, Converted: -25.54642677
|
|
|
+ Biggest difference in row (0, 0), sum 22.795490 vs -94.016220
|
|
|
+
|
|
|
+Layer 9, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 16.367466
|
|
|
+ Converted tensor sum: -159.747223
|
|
|
+ Original tensor mean: 2.045933
|
|
|
+ Converted tensor mean: -19.968403
|
|
|
+ Mean difference: 22.01433563
|
|
|
+ Maximum pointwise difference: 34.04418182
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -0.55563742, Converted: -34.59981918
|
|
|
+ Biggest difference in row (0, 0), sum 16.367466 vs -159.747223
|
|
|
+
|
|
|
+Layer 10, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 22.587862
|
|
|
+ Converted tensor sum: -171.457092
|
|
|
+ Original tensor mean: 2.823483
|
|
|
+ Converted tensor mean: -21.432137
|
|
|
+ Mean difference: 24.25561905
|
|
|
+ Maximum pointwise difference: 40.39982224
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -0.52963394, Converted: -40.92945480
|
|
|
+ Biggest difference in row (0, 0), sum 22.587862 vs -171.457092
|
|
|
+
|
|
|
+Layer 11, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 98.501198
|
|
|
+ Converted tensor sum: -580.205811
|
|
|
+ Original tensor mean: 12.312650
|
|
|
+ Converted tensor mean: -72.525726
|
|
|
+ Mean difference: 84.83837128
|
|
|
+ Maximum pointwise difference: 107.93860626
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 9.91925716, Converted: -98.01934814
|
|
|
+ Biggest difference in row (0, 0), sum 98.501198 vs -580.205811
|
|
|
+
|
|
|
+Layer 12, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 96.017456
|
|
|
+ Converted tensor sum: -599.130005
|
|
|
+ Original tensor mean: 12.002182
|
|
|
+ Converted tensor mean: -74.891251
|
|
|
+ Mean difference: 86.89343262
|
|
|
+ Maximum pointwise difference: 107.37790680
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 10.14877987, Converted: -97.22912598
|
|
|
+ Biggest difference in row (0, 0), sum 96.017456 vs -599.130005
|
|
|
+
|
|
|
+Layer 13, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 93.969711
|
|
|
+ Converted tensor sum: -604.221680
|
|
|
+ Original tensor mean: 11.746214
|
|
|
+ Converted tensor mean: -75.527710
|
|
|
+ Mean difference: 87.27392578
|
|
|
+ Maximum pointwise difference: 107.42771149
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 9.00540733, Converted: -98.42230225
|
|
|
+ Biggest difference in row (0, 0), sum 93.969711 vs -604.221680
|
|
|
+
|
|
|
+Layer 14, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 96.193565
|
|
|
+ Converted tensor sum: -675.267456
|
|
|
+ Original tensor mean: 12.024196
|
|
|
+ Converted tensor mean: -84.408432
|
|
|
+ Mean difference: 96.43263245
|
|
|
+ Maximum pointwise difference: 115.43507385
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 9.17813015, Converted: -106.25694275
|
|
|
+ Biggest difference in row (0, 0), sum 96.193565 vs -675.267456
|
|
|
+
|
|
|
+Layer 15, Token 3 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 203.967834
|
|
|
+ Converted tensor sum: -1113.465820
|
|
|
+ Original tensor mean: 25.495979
|
|
|
+ Converted tensor mean: -139.183228
|
|
|
+ Mean difference: 164.67919922
|
|
|
+ Maximum pointwise difference: 181.33709717
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 23.16110420, Converted: -158.17599487
|
|
|
+ Biggest difference in row (0, 0), sum 203.967834 vs -1113.465820
|
|
|
+
|
|
|
+Layer 0, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 1.060196
|
|
|
+ Converted tensor sum: -6.035928
|
|
|
+ Original tensor mean: 0.132525
|
|
|
+ Converted tensor mean: -0.754491
|
|
|
+ Mean difference: 1.11038423
|
|
|
+ Maximum pointwise difference: 2.90589857
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 1.75988472, Converted: -1.14601374
|
|
|
+ Biggest difference in row (0, 0), sum 1.060196 vs -6.035928
|
|
|
+
|
|
|
+Layer 1, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -45.922947
|
|
|
+ Converted tensor sum: -53.028908
|
|
|
+ Original tensor mean: -5.740368
|
|
|
+ Converted tensor mean: -6.628613
|
|
|
+ Mean difference: 1.58238363
|
|
|
+ Maximum pointwise difference: 3.98315811
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -7.45665455, Converted: -11.43981266
|
|
|
+ Biggest difference in row (0, 0), sum -45.922947 vs -53.028908
|
|
|
+
|
|
|
+Layer 2, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -47.965603
|
|
|
+ Converted tensor sum: -68.008888
|
|
|
+ Original tensor mean: -5.995700
|
|
|
+ Converted tensor mean: -8.501111
|
|
|
+ Mean difference: 4.45314884
|
|
|
+ Maximum pointwise difference: 12.72673607
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 10.93319416, Converted: -1.79354143
|
|
|
+ Biggest difference in row (0, 0), sum -47.965603 vs -68.008888
|
|
|
+
|
|
|
+Layer 3, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -224.689087
|
|
|
+ Converted tensor sum: -313.872162
|
|
|
+ Original tensor mean: -28.086136
|
|
|
+ Converted tensor mean: -39.234020
|
|
|
+ Mean difference: 11.14788437
|
|
|
+ Maximum pointwise difference: 20.76882172
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -9.44140339, Converted: -30.21022415
|
|
|
+ Biggest difference in row (0, 0), sum -224.689087 vs -313.872162
|
|
|
+
|
|
|
+Layer 4, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -207.206879
|
|
|
+ Converted tensor sum: -293.960205
|
|
|
+ Original tensor mean: -25.900860
|
|
|
+ Converted tensor mean: -36.745026
|
|
|
+ Mean difference: 10.84416676
|
|
|
+ Maximum pointwise difference: 23.99023056
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -13.00386524, Converted: -36.99409485
|
|
|
+ Biggest difference in row (0, 0), sum -207.206879 vs -293.960205
|
|
|
+
|
|
|
+Layer 5, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -185.594986
|
|
|
+ Converted tensor sum: -298.454895
|
|
|
+ Original tensor mean: -23.199373
|
|
|
+ Converted tensor mean: -37.306862
|
|
|
+ Mean difference: 14.10748863
|
|
|
+ Maximum pointwise difference: 27.16260529
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -13.13538361, Converted: -40.29798889
|
|
|
+ Biggest difference in row (0, 0), sum -185.594986 vs -298.454895
|
|
|
+
|
|
|
+Layer 6, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -226.913589
|
|
|
+ Converted tensor sum: -341.315369
|
|
|
+ Original tensor mean: -28.364199
|
|
|
+ Converted tensor mean: -42.664421
|
|
|
+ Mean difference: 14.30021858
|
|
|
+ Maximum pointwise difference: 27.83255386
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -20.03028870, Converted: -47.86284256
|
|
|
+ Biggest difference in row (0, 0), sum -226.913589 vs -341.315369
|
|
|
+
|
|
|
+Layer 7, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -431.530212
|
|
|
+ Converted tensor sum: -553.909912
|
|
|
+ Original tensor mean: -53.941277
|
|
|
+ Converted tensor mean: -69.238739
|
|
|
+ Mean difference: 15.29746723
|
|
|
+ Maximum pointwise difference: 28.98126602
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -44.07294083, Converted: -73.05420685
|
|
|
+ Biggest difference in row (0, 0), sum -431.530212 vs -553.909912
|
|
|
+
|
|
|
+Layer 8, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -442.906403
|
|
|
+ Converted tensor sum: -577.351807
|
|
|
+ Original tensor mean: -55.363300
|
|
|
+ Converted tensor mean: -72.168976
|
|
|
+ Mean difference: 16.80567932
|
|
|
+ Maximum pointwise difference: 24.00010681
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -44.65782166, Converted: -68.65792847
|
|
|
+ Biggest difference in row (0, 0), sum -442.906403 vs -577.351807
|
|
|
+
|
|
|
+Layer 9, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -457.224976
|
|
|
+ Converted tensor sum: -606.660400
|
|
|
+ Original tensor mean: -57.153122
|
|
|
+ Converted tensor mean: -75.832550
|
|
|
+ Mean difference: 18.67943192
|
|
|
+ Maximum pointwise difference: 31.74385834
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -37.91560364, Converted: -69.65946198
|
|
|
+ Biggest difference in row (0, 0), sum -457.224976 vs -606.660400
|
|
|
+
|
|
|
+Layer 10, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -464.368622
|
|
|
+ Converted tensor sum: -617.020081
|
|
|
+ Original tensor mean: -58.046078
|
|
|
+ Converted tensor mean: -77.127510
|
|
|
+ Mean difference: 19.08143044
|
|
|
+ Maximum pointwise difference: 31.15077591
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -40.76456833, Converted: -71.91534424
|
|
|
+ Biggest difference in row (0, 0), sum -464.368622 vs -617.020081
|
|
|
+
|
|
|
+Layer 11, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -848.365112
|
|
|
+ Converted tensor sum: -1029.810791
|
|
|
+ Original tensor mean: -106.045639
|
|
|
+ Converted tensor mean: -128.726349
|
|
|
+ Mean difference: 22.68070793
|
|
|
+ Maximum pointwise difference: 33.58893585
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -79.47626495, Converted: -113.06520081
|
|
|
+ Biggest difference in row (0, 0), sum -848.365112 vs -1029.810791
|
|
|
+
|
|
|
+Layer 12, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -856.364807
|
|
|
+ Converted tensor sum: -1034.875244
|
|
|
+ Original tensor mean: -107.045601
|
|
|
+ Converted tensor mean: -129.359406
|
|
|
+ Mean difference: 22.31380081
|
|
|
+ Maximum pointwise difference: 34.47047424
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -94.66131592, Converted: -129.13179016
|
|
|
+ Biggest difference in row (0, 0), sum -856.364807 vs -1034.875244
|
|
|
+
|
|
|
+Layer 13, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -876.941895
|
|
|
+ Converted tensor sum: -1070.547119
|
|
|
+ Original tensor mean: -109.617737
|
|
|
+ Converted tensor mean: -133.818390
|
|
|
+ Mean difference: 24.20065689
|
|
|
+ Maximum pointwise difference: 35.39904022
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -93.12728119, Converted: -128.52632141
|
|
|
+ Biggest difference in row (0, 0), sum -876.941895 vs -1070.547119
|
|
|
+
|
|
|
+Layer 14, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -914.061707
|
|
|
+ Converted tensor sum: -1087.587036
|
|
|
+ Original tensor mean: -114.257713
|
|
|
+ Converted tensor mean: -135.948380
|
|
|
+ Mean difference: 21.69067001
|
|
|
+ Maximum pointwise difference: 38.16375732
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -116.85905457, Converted: -155.02281189
|
|
|
+ Biggest difference in row (0, 0), sum -914.061707 vs -1087.587036
|
|
|
+
|
|
|
+Layer 15, Token 4 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -1341.588623
|
|
|
+ Converted tensor sum: -1530.308838
|
|
|
+ Original tensor mean: -167.698578
|
|
|
+ Converted tensor mean: -191.288605
|
|
|
+ Mean difference: 23.59002495
|
|
|
+ Maximum pointwise difference: 40.53677368
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -178.01094055, Converted: -218.54771423
|
|
|
+ Biggest difference in row (0, 0), sum -1341.588623 vs -1530.308838
|
|
|
+
|
|
|
+Layer 0, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 12.113814
|
|
|
+ Converted tensor sum: 1.907211
|
|
|
+ Original tensor mean: 1.514227
|
|
|
+ Converted tensor mean: 0.238401
|
|
|
+ Mean difference: 1.41127276
|
|
|
+ Maximum pointwise difference: 3.03878593
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 1.65080941, Converted: -1.38797641
|
|
|
+ Biggest difference in row (0, 0), sum 12.113814 vs 1.907211
|
|
|
+
|
|
|
+Layer 1, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 3.328269
|
|
|
+ Converted tensor sum: 7.141708
|
|
|
+ Original tensor mean: 0.416034
|
|
|
+ Converted tensor mean: 0.892714
|
|
|
+ Mean difference: 1.73651075
|
|
|
+ Maximum pointwise difference: 4.59446096
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -0.09795946, Converted: 4.49650145
|
|
|
+ Biggest difference in row (0, 0), sum 3.328269 vs 7.141708
|
|
|
+
|
|
|
+Layer 2, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -7.901872
|
|
|
+ Converted tensor sum: 12.052417
|
|
|
+ Original tensor mean: -0.987734
|
|
|
+ Converted tensor mean: 1.506552
|
|
|
+ Mean difference: 2.91872406
|
|
|
+ Maximum pointwise difference: 6.22109556
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -1.61789608, Converted: 4.60319948
|
|
|
+ Biggest difference in row (0, 0), sum -7.901872 vs 12.052417
|
|
|
+
|
|
|
+Layer 3, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -206.706451
|
|
|
+ Converted tensor sum: 38.517872
|
|
|
+ Original tensor mean: -25.838306
|
|
|
+ Converted tensor mean: 4.814734
|
|
|
+ Mean difference: 30.65304184
|
|
|
+ Maximum pointwise difference: 36.99858475
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -30.05084610, Converted: 6.94773912
|
|
|
+ Biggest difference in row (0, 0), sum -206.706451 vs 38.517872
|
|
|
+
|
|
|
+Layer 4, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -190.520950
|
|
|
+ Converted tensor sum: 37.683086
|
|
|
+ Original tensor mean: -23.815119
|
|
|
+ Converted tensor mean: 4.710386
|
|
|
+ Mean difference: 28.52550507
|
|
|
+ Maximum pointwise difference: 36.21773911
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -29.42410278, Converted: 6.79363585
|
|
|
+ Biggest difference in row (0, 0), sum -190.520950 vs 37.683086
|
|
|
+
|
|
|
+Layer 5, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -129.615097
|
|
|
+ Converted tensor sum: 37.492149
|
|
|
+ Original tensor mean: -16.201887
|
|
|
+ Converted tensor mean: 4.686519
|
|
|
+ Mean difference: 20.88840675
|
|
|
+ Maximum pointwise difference: 30.11524200
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -22.47561646, Converted: 7.63962507
|
|
|
+ Biggest difference in row (0, 0), sum -129.615097 vs 37.492149
|
|
|
+
|
|
|
+Layer 6, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -168.733810
|
|
|
+ Converted tensor sum: 40.467735
|
|
|
+ Original tensor mean: -21.091726
|
|
|
+ Converted tensor mean: 5.058467
|
|
|
+ Mean difference: 26.15019226
|
|
|
+ Maximum pointwise difference: 35.40680313
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -27.34041214, Converted: 8.06639194
|
|
|
+ Biggest difference in row (0, 0), sum -168.733810 vs 40.467735
|
|
|
+
|
|
|
+Layer 7, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -375.952911
|
|
|
+ Converted tensor sum: 84.494781
|
|
|
+ Original tensor mean: -46.994114
|
|
|
+ Converted tensor mean: 10.561848
|
|
|
+ Mean difference: 57.55596161
|
|
|
+ Maximum pointwise difference: 65.51675415
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -54.48764038, Converted: 11.02911663
|
|
|
+ Biggest difference in row (0, 0), sum -375.952911 vs 84.494781
|
|
|
+
|
|
|
+Layer 8, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -386.335632
|
|
|
+ Converted tensor sum: 90.464653
|
|
|
+ Original tensor mean: -48.291954
|
|
|
+ Converted tensor mean: 11.308082
|
|
|
+ Mean difference: 59.60003662
|
|
|
+ Maximum pointwise difference: 70.12364197
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -54.78602219, Converted: 15.33761883
|
|
|
+ Biggest difference in row (0, 0), sum -386.335632 vs 90.464653
|
|
|
+
|
|
|
+Layer 9, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -407.643036
|
|
|
+ Converted tensor sum: 83.872604
|
|
|
+ Original tensor mean: -50.955379
|
|
|
+ Converted tensor mean: 10.484076
|
|
|
+ Mean difference: 61.43945694
|
|
|
+ Maximum pointwise difference: 73.87419128
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -62.38755035, Converted: 11.48663712
|
|
|
+ Biggest difference in row (0, 0), sum -407.643036 vs 83.872604
|
|
|
+
|
|
|
+Layer 10, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -398.133545
|
|
|
+ Converted tensor sum: 83.310257
|
|
|
+ Original tensor mean: -49.766693
|
|
|
+ Converted tensor mean: 10.413782
|
|
|
+ Mean difference: 60.18047714
|
|
|
+ Maximum pointwise difference: 71.93079376
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -61.05200958, Converted: 10.87878418
|
|
|
+ Biggest difference in row (0, 0), sum -398.133545 vs 83.310257
|
|
|
+
|
|
|
+Layer 11, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -795.896240
|
|
|
+ Converted tensor sum: 161.559113
|
|
|
+ Original tensor mean: -99.487030
|
|
|
+ Converted tensor mean: 20.194889
|
|
|
+ Mean difference: 119.68191528
|
|
|
+ Maximum pointwise difference: 136.52630615
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -112.33381653, Converted: 24.19249153
|
|
|
+ Biggest difference in row (0, 0), sum -795.896240 vs 161.559113
|
|
|
+
|
|
|
+Layer 12, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -795.492065
|
|
|
+ Converted tensor sum: 157.049652
|
|
|
+ Original tensor mean: -99.436508
|
|
|
+ Converted tensor mean: 19.631207
|
|
|
+ Mean difference: 119.06771088
|
|
|
+ Maximum pointwise difference: 138.69142151
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -115.85614014, Converted: 22.83527946
|
|
|
+ Biggest difference in row (0, 0), sum -795.492065 vs 157.049652
|
|
|
+
|
|
|
+Layer 13, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -816.679565
|
|
|
+ Converted tensor sum: 152.172302
|
|
|
+ Original tensor mean: -102.084946
|
|
|
+ Converted tensor mean: 19.021538
|
|
|
+ Mean difference: 121.10647583
|
|
|
+ Maximum pointwise difference: 142.45770264
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -120.28170013, Converted: 22.17600250
|
|
|
+ Biggest difference in row (0, 0), sum -816.679565 vs 152.172302
|
|
|
+
|
|
|
+Layer 14, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -858.712524
|
|
|
+ Converted tensor sum: 152.386047
|
|
|
+ Original tensor mean: -107.339066
|
|
|
+ Converted tensor mean: 19.048256
|
|
|
+ Mean difference: 126.38732147
|
|
|
+ Maximum pointwise difference: 150.80645752
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -129.48748779, Converted: 21.31897736
|
|
|
+ Biggest difference in row (0, 0), sum -858.712524 vs 152.386047
|
|
|
+
|
|
|
+Layer 15, Token 5 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -1291.953247
|
|
|
+ Converted tensor sum: 244.354996
|
|
|
+ Original tensor mean: -161.494156
|
|
|
+ Converted tensor mean: 30.544374
|
|
|
+ Mean difference: 192.03852844
|
|
|
+ Maximum pointwise difference: 220.75814819
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -189.25143433, Converted: 31.50671959
|
|
|
+ Biggest difference in row (0, 0), sum -1291.953247 vs 244.354996
|
|
|
+
|
|
|
+Layer 0, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 4.713745
|
|
|
+ Converted tensor sum: 11.404326
|
|
|
+ Original tensor mean: 0.589218
|
|
|
+ Converted tensor mean: 1.425541
|
|
|
+ Mean difference: 1.39658785
|
|
|
+ Maximum pointwise difference: 3.99744058
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -2.16165113, Converted: 1.83578944
|
|
|
+ Biggest difference in row (0, 0), sum 4.713745 vs 11.404326
|
|
|
+
|
|
|
+Layer 1, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 2.484277
|
|
|
+ Converted tensor sum: 9.422175
|
|
|
+ Original tensor mean: 0.310535
|
|
|
+ Converted tensor mean: 1.177772
|
|
|
+ Mean difference: 1.56714785
|
|
|
+ Maximum pointwise difference: 3.13825679
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -2.85257578, Converted: 0.28568110
|
|
|
+ Biggest difference in row (0, 0), sum 2.484277 vs 9.422175
|
|
|
+
|
|
|
+Layer 2, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -4.950438
|
|
|
+ Converted tensor sum: -1.357174
|
|
|
+ Original tensor mean: -0.618805
|
|
|
+ Converted tensor mean: -0.169647
|
|
|
+ Mean difference: 1.71385837
|
|
|
+ Maximum pointwise difference: 3.88516402
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 3.02349472, Converted: -0.86166936
|
|
|
+ Biggest difference in row (0, 0), sum -4.950438 vs -1.357174
|
|
|
+
|
|
|
+Layer 3, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -125.927612
|
|
|
+ Converted tensor sum: -106.782318
|
|
|
+ Original tensor mean: -15.740952
|
|
|
+ Converted tensor mean: -13.347790
|
|
|
+ Mean difference: 3.11209679
|
|
|
+ Maximum pointwise difference: 4.75263119
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -17.46803665, Converted: -12.71540546
|
|
|
+ Biggest difference in row (0, 0), sum -125.927612 vs -106.782318
|
|
|
+
|
|
|
+Layer 4, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -139.830460
|
|
|
+ Converted tensor sum: -126.311844
|
|
|
+ Original tensor mean: -17.478807
|
|
|
+ Converted tensor mean: -15.788980
|
|
|
+ Mean difference: 3.15184307
|
|
|
+ Maximum pointwise difference: 5.99608994
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -25.84107971, Converted: -19.84498978
|
|
|
+ Biggest difference in row (0, 0), sum -139.830460 vs -126.311844
|
|
|
+
|
|
|
+Layer 5, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -142.974274
|
|
|
+ Converted tensor sum: -73.637054
|
|
|
+ Original tensor mean: -17.871784
|
|
|
+ Converted tensor mean: -9.204632
|
|
|
+ Mean difference: 10.37221718
|
|
|
+ Maximum pointwise difference: 16.99522591
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -22.96857643, Converted: -5.97335052
|
|
|
+ Biggest difference in row (0, 0), sum -142.974274 vs -73.637054
|
|
|
+
|
|
|
+Layer 6, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -180.967728
|
|
|
+ Converted tensor sum: -69.754128
|
|
|
+ Original tensor mean: -22.620966
|
|
|
+ Converted tensor mean: -8.719266
|
|
|
+ Mean difference: 14.33841133
|
|
|
+ Maximum pointwise difference: 25.72810745
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -36.46190262, Converted: -10.73379517
|
|
|
+ Biggest difference in row (0, 0), sum -180.967728 vs -69.754128
|
|
|
+
|
|
|
+Layer 7, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -390.468323
|
|
|
+ Converted tensor sum: -284.137634
|
|
|
+ Original tensor mean: -48.808540
|
|
|
+ Converted tensor mean: -35.517204
|
|
|
+ Mean difference: 14.31795502
|
|
|
+ Maximum pointwise difference: 25.91625977
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -61.98001099, Converted: -36.06375122
|
|
|
+ Biggest difference in row (0, 0), sum -390.468323 vs -284.137634
|
|
|
+
|
|
|
+Layer 8, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -325.042450
|
|
|
+ Converted tensor sum: -284.328186
|
|
|
+ Original tensor mean: -40.630306
|
|
|
+ Converted tensor mean: -35.541023
|
|
|
+ Mean difference: 6.66226053
|
|
|
+ Maximum pointwise difference: 16.25393486
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -47.66500854, Converted: -31.41107368
|
|
|
+ Biggest difference in row (0, 0), sum -325.042450 vs -284.328186
|
|
|
+
|
|
|
+Layer 9, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -350.015503
|
|
|
+ Converted tensor sum: -313.897308
|
|
|
+ Original tensor mean: -43.751938
|
|
|
+ Converted tensor mean: -39.237164
|
|
|
+ Mean difference: 9.32056522
|
|
|
+ Maximum pointwise difference: 23.60877037
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -54.44406891, Converted: -30.83529854
|
|
|
+ Biggest difference in row (0, 0), sum -350.015503 vs -313.897308
|
|
|
+
|
|
|
+Layer 10, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -375.606720
|
|
|
+ Converted tensor sum: -330.646790
|
|
|
+ Original tensor mean: -46.950840
|
|
|
+ Converted tensor mean: -41.330849
|
|
|
+ Mean difference: 8.38710022
|
|
|
+ Maximum pointwise difference: 27.84555435
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -60.66308594, Converted: -32.81753159
|
|
|
+ Biggest difference in row (0, 0), sum -375.606720 vs -330.646790
|
|
|
+
|
|
|
+Layer 11, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -764.285278
|
|
|
+ Converted tensor sum: -730.992798
|
|
|
+ Original tensor mean: -95.535660
|
|
|
+ Converted tensor mean: -91.374100
|
|
|
+ Mean difference: 7.89588118
|
|
|
+ Maximum pointwise difference: 26.59626007
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -118.78226471, Converted: -92.18600464
|
|
|
+ Biggest difference in row (0, 0), sum -764.285278 vs -730.992798
|
|
|
+
|
|
|
+Layer 12, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -777.147827
|
|
|
+ Converted tensor sum: -765.448669
|
|
|
+ Original tensor mean: -97.143478
|
|
|
+ Converted tensor mean: -95.681084
|
|
|
+ Mean difference: 6.33593750
|
|
|
+ Maximum pointwise difference: 19.02982330
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -119.55146790, Converted: -100.52164459
|
|
|
+ Biggest difference in row (0, 0), sum -777.147827 vs -765.448669
|
|
|
+
|
|
|
+Layer 13, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -787.772400
|
|
|
+ Converted tensor sum: -777.362915
|
|
|
+ Original tensor mean: -98.471550
|
|
|
+ Converted tensor mean: -97.170364
|
|
|
+ Mean difference: 7.69482183
|
|
|
+ Maximum pointwise difference: 19.15751648
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -120.39152527, Converted: -101.23400879
|
|
|
+ Biggest difference in row (0, 0), sum -787.772400 vs -777.362915
|
|
|
+
|
|
|
+Layer 14, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -883.013428
|
|
|
+ Converted tensor sum: -881.301514
|
|
|
+ Original tensor mean: -110.376678
|
|
|
+ Converted tensor mean: -110.162689
|
|
|
+ Mean difference: 12.85068035
|
|
|
+ Maximum pointwise difference: 28.13771820
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -129.54022217, Converted: -101.40250397
|
|
|
+ Biggest difference in row (0, 0), sum -883.013428 vs -881.301514
|
|
|
+
|
|
|
+Layer 15, Token 6 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -1324.892822
|
|
|
+ Converted tensor sum: -1316.172363
|
|
|
+ Original tensor mean: -165.611603
|
|
|
+ Converted tensor mean: -164.521545
|
|
|
+ Mean difference: 12.77940941
|
|
|
+ Maximum pointwise difference: 29.43301392
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -192.78923035, Converted: -163.35621643
|
|
|
+ Biggest difference in row (0, 0), sum -1324.892822 vs -1316.172363
|
|
|
+
|
|
|
+Layer 0, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 16.302702
|
|
|
+ Converted tensor sum: 6.534010
|
|
|
+ Original tensor mean: 2.037838
|
|
|
+ Converted tensor mean: 0.816751
|
|
|
+ Mean difference: 1.39780235
|
|
|
+ Maximum pointwise difference: 4.86297131
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 4.45225191, Converted: -0.41071916
|
|
|
+ Biggest difference in row (0, 0), sum 16.302702 vs 6.534010
|
|
|
+
|
|
|
+Layer 1, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 7.949856
|
|
|
+ Converted tensor sum: 10.515163
|
|
|
+ Original tensor mean: 0.993732
|
|
|
+ Converted tensor mean: 1.314395
|
|
|
+ Mean difference: 1.91308641
|
|
|
+ Maximum pointwise difference: 3.92083621
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 1.42750001, Converted: 5.34833622
|
|
|
+ Biggest difference in row (0, 0), sum 7.949856 vs 10.515163
|
|
|
+
|
|
|
+Layer 2, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 5.224671
|
|
|
+ Converted tensor sum: 8.502550
|
|
|
+ Original tensor mean: 0.653084
|
|
|
+ Converted tensor mean: 1.062819
|
|
|
+ Mean difference: 2.38619947
|
|
|
+ Maximum pointwise difference: 6.21067238
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 4.76728964, Converted: -1.44338274
|
|
|
+ Biggest difference in row (0, 0), sum 5.224671 vs 8.502550
|
|
|
+
|
|
|
+Layer 3, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 13.283526
|
|
|
+ Converted tensor sum: 35.439297
|
|
|
+ Original tensor mean: 1.660441
|
|
|
+ Converted tensor mean: 4.429912
|
|
|
+ Mean difference: 3.47373605
|
|
|
+ Maximum pointwise difference: 5.22519779
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 1.58731771, Converted: 6.81251574
|
|
|
+ Biggest difference in row (0, 0), sum 13.283526 vs 35.439297
|
|
|
+
|
|
|
+Layer 4, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 17.744591
|
|
|
+ Converted tensor sum: 31.593395
|
|
|
+ Original tensor mean: 2.218074
|
|
|
+ Converted tensor mean: 3.949174
|
|
|
+ Mean difference: 2.68589926
|
|
|
+ Maximum pointwise difference: 4.57245827
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -2.52367592, Converted: 2.04878211
|
|
|
+ Biggest difference in row (0, 0), sum 17.744591 vs 31.593395
|
|
|
+
|
|
|
+Layer 5, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 23.343349
|
|
|
+ Converted tensor sum: 33.269924
|
|
|
+ Original tensor mean: 2.917919
|
|
|
+ Converted tensor mean: 4.158741
|
|
|
+ Mean difference: 2.63248682
|
|
|
+ Maximum pointwise difference: 5.37845278
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 5.39788294, Converted: 0.01943016
|
|
|
+ Biggest difference in row (0, 0), sum 23.343349 vs 33.269924
|
|
|
+
|
|
|
+Layer 6, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 23.346264
|
|
|
+ Converted tensor sum: 35.443920
|
|
|
+ Original tensor mean: 2.918283
|
|
|
+ Converted tensor mean: 4.430490
|
|
|
+ Mean difference: 2.67119837
|
|
|
+ Maximum pointwise difference: 4.63596630
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 6.03884697, Converted: 1.40288091
|
|
|
+ Biggest difference in row (0, 0), sum 23.346264 vs 35.443920
|
|
|
+
|
|
|
+Layer 7, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 64.039200
|
|
|
+ Converted tensor sum: 91.760284
|
|
|
+ Original tensor mean: 8.004900
|
|
|
+ Converted tensor mean: 11.470036
|
|
|
+ Mean difference: 4.01984978
|
|
|
+ Maximum pointwise difference: 7.18059826
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 4.17877483, Converted: 11.35937309
|
|
|
+ Biggest difference in row (0, 0), sum 64.039200 vs 91.760284
|
|
|
+
|
|
|
+Layer 8, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 72.276039
|
|
|
+ Converted tensor sum: 93.156998
|
|
|
+ Original tensor mean: 9.034505
|
|
|
+ Converted tensor mean: 11.644625
|
|
|
+ Mean difference: 3.85819149
|
|
|
+ Maximum pointwise difference: 7.09706306
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 6.97290230, Converted: 14.06996536
|
|
|
+ Biggest difference in row (0, 0), sum 72.276039 vs 93.156998
|
|
|
+
|
|
|
+Layer 9, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 77.303429
|
|
|
+ Converted tensor sum: 87.750015
|
|
|
+ Original tensor mean: 9.662929
|
|
|
+ Converted tensor mean: 10.968752
|
|
|
+ Mean difference: 3.21908855
|
|
|
+ Maximum pointwise difference: 7.22212887
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 7.19689465, Converted: 14.41902351
|
|
|
+ Biggest difference in row (0, 0), sum 77.303429 vs 87.750015
|
|
|
+
|
|
|
+Layer 10, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 75.555130
|
|
|
+ Converted tensor sum: 87.081650
|
|
|
+ Original tensor mean: 9.444391
|
|
|
+ Converted tensor mean: 10.885206
|
|
|
+ Mean difference: 3.37582994
|
|
|
+ Maximum pointwise difference: 7.74006128
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 13.60124302, Converted: 5.86118174
|
|
|
+ Biggest difference in row (0, 0), sum 75.555130 vs 87.081650
|
|
|
+
|
|
|
+Layer 11, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 156.940781
|
|
|
+ Converted tensor sum: 159.013306
|
|
|
+ Original tensor mean: 19.617598
|
|
|
+ Converted tensor mean: 19.876663
|
|
|
+ Mean difference: 3.38565111
|
|
|
+ Maximum pointwise difference: 8.84408474
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 24.20116806, Converted: 15.35708332
|
|
|
+ Biggest difference in row (0, 0), sum 156.940781 vs 159.013306
|
|
|
+
|
|
|
+Layer 12, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 154.763428
|
|
|
+ Converted tensor sum: 153.900482
|
|
|
+ Original tensor mean: 19.345428
|
|
|
+ Converted tensor mean: 19.237560
|
|
|
+ Mean difference: 3.46122217
|
|
|
+ Maximum pointwise difference: 9.50335789
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 24.17844582, Converted: 14.67508793
|
|
|
+ Biggest difference in row (0, 0), sum 154.763428 vs 153.900482
|
|
|
+
|
|
|
+Layer 13, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 153.990646
|
|
|
+ Converted tensor sum: 150.608353
|
|
|
+ Original tensor mean: 19.248831
|
|
|
+ Converted tensor mean: 18.826044
|
|
|
+ Mean difference: 3.53592730
|
|
|
+ Maximum pointwise difference: 9.36601925
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 23.90514946, Converted: 14.53913021
|
|
|
+ Biggest difference in row (0, 0), sum 153.990646 vs 150.608353
|
|
|
+
|
|
|
+Layer 14, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 153.169525
|
|
|
+ Converted tensor sum: 133.618896
|
|
|
+ Original tensor mean: 19.146191
|
|
|
+ Converted tensor mean: 16.702362
|
|
|
+ Mean difference: 4.84187126
|
|
|
+ Maximum pointwise difference: 11.02708149
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 24.07042313, Converted: 13.04334164
|
|
|
+ Biggest difference in row (0, 0), sum 153.169525 vs 133.618896
|
|
|
+
|
|
|
+Layer 15, Token 7 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 256.612762
|
|
|
+ Converted tensor sum: 236.694611
|
|
|
+ Original tensor mean: 32.076595
|
|
|
+ Converted tensor mean: 29.586826
|
|
|
+ Mean difference: 4.89619875
|
|
|
+ Maximum pointwise difference: 11.06676292
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 34.29892731, Converted: 23.23216438
|
|
|
+ Biggest difference in row (0, 0), sum 256.612762 vs 236.694611
|
|
|
+
|
|
|
+Layer 0, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 4.551975
|
|
|
+ Converted tensor sum: 1.348729
|
|
|
+ Original tensor mean: 0.568997
|
|
|
+ Converted tensor mean: 0.168591
|
|
|
+ Mean difference: 2.05911183
|
|
|
+ Maximum pointwise difference: 5.11385345
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 1.91795087, Converted: -3.19590235
|
|
|
+ Biggest difference in row (0, 0), sum 4.551975 vs 1.348729
|
|
|
+
|
|
|
+Layer 1, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -10.499850
|
|
|
+ Converted tensor sum: -11.510830
|
|
|
+ Original tensor mean: -1.312481
|
|
|
+ Converted tensor mean: -1.438854
|
|
|
+ Mean difference: 3.72058988
|
|
|
+ Maximum pointwise difference: 7.12741280
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -4.90886450, Converted: 2.21854830
|
|
|
+ Biggest difference in row (0, 0), sum -10.499850 vs -11.510830
|
|
|
+
|
|
|
+Layer 2, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 21.469618
|
|
|
+ Converted tensor sum: 13.045154
|
|
|
+ Original tensor mean: 2.683702
|
|
|
+ Converted tensor mean: 1.630644
|
|
|
+ Mean difference: 4.73055506
|
|
|
+ Maximum pointwise difference: 11.87027359
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 6.33750200, Converted: -5.53277111
|
|
|
+ Biggest difference in row (0, 0), sum 21.469618 vs 13.045154
|
|
|
+
|
|
|
+Layer 3, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 56.933716
|
|
|
+ Converted tensor sum: 65.067757
|
|
|
+ Original tensor mean: 7.116714
|
|
|
+ Converted tensor mean: 8.133470
|
|
|
+ Mean difference: 5.21158791
|
|
|
+ Maximum pointwise difference: 10.06817722
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 10.73284817, Converted: 0.66467106
|
|
|
+ Biggest difference in row (0, 0), sum 56.933716 vs 65.067757
|
|
|
+
|
|
|
+Layer 4, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 54.841175
|
|
|
+ Converted tensor sum: 58.977600
|
|
|
+ Original tensor mean: 6.855147
|
|
|
+ Converted tensor mean: 7.372200
|
|
|
+ Mean difference: 5.39579868
|
|
|
+ Maximum pointwise difference: 10.23285866
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 10.38635254, Converted: 0.15349340
|
|
|
+ Biggest difference in row (0, 0), sum 54.841175 vs 58.977600
|
|
|
+
|
|
|
+Layer 5, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 59.439285
|
|
|
+ Converted tensor sum: 59.979446
|
|
|
+ Original tensor mean: 7.429911
|
|
|
+ Converted tensor mean: 7.497431
|
|
|
+ Mean difference: 5.44655371
|
|
|
+ Maximum pointwise difference: 11.05043030
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 9.77372646, Converted: -1.27670395
|
|
|
+ Biggest difference in row (0, 0), sum 59.439285 vs 59.979446
|
|
|
+
|
|
|
+Layer 6, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 57.398651
|
|
|
+ Converted tensor sum: 56.296188
|
|
|
+ Original tensor mean: 7.174831
|
|
|
+ Converted tensor mean: 7.037024
|
|
|
+ Mean difference: 5.29393005
|
|
|
+ Maximum pointwise difference: 9.82726002
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 9.26543045, Converted: -0.56182986
|
|
|
+ Biggest difference in row (0, 0), sum 57.398651 vs 56.296188
|
|
|
+
|
|
|
+Layer 7, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 108.492706
|
|
|
+ Converted tensor sum: 119.552338
|
|
|
+ Original tensor mean: 13.561588
|
|
|
+ Converted tensor mean: 14.944042
|
|
|
+ Mean difference: 5.49957895
|
|
|
+ Maximum pointwise difference: 11.73512173
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 10.45698833, Converted: 22.19211006
|
|
|
+ Biggest difference in row (0, 0), sum 108.492706 vs 119.552338
|
|
|
+
|
|
|
+Layer 8, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 106.563354
|
|
|
+ Converted tensor sum: 119.608925
|
|
|
+ Original tensor mean: 13.320419
|
|
|
+ Converted tensor mean: 14.951116
|
|
|
+ Mean difference: 4.46781254
|
|
|
+ Maximum pointwise difference: 10.82487202
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 9.85585308, Converted: 20.68072510
|
|
|
+ Biggest difference in row (0, 0), sum 106.563354 vs 119.608925
|
|
|
+
|
|
|
+Layer 9, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 111.512817
|
|
|
+ Converted tensor sum: 109.928528
|
|
|
+ Original tensor mean: 13.939102
|
|
|
+ Converted tensor mean: 13.741066
|
|
|
+ Mean difference: 4.52381039
|
|
|
+ Maximum pointwise difference: 8.89503384
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 12.07329845, Converted: 20.96833229
|
|
|
+ Biggest difference in row (0, 0), sum 111.512817 vs 109.928528
|
|
|
+
|
|
|
+Layer 10, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 111.241730
|
|
|
+ Converted tensor sum: 103.886688
|
|
|
+ Original tensor mean: 13.905216
|
|
|
+ Converted tensor mean: 12.985836
|
|
|
+ Mean difference: 4.59785748
|
|
|
+ Maximum pointwise difference: 8.55565834
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 11.01864815, Converted: 19.57430649
|
|
|
+ Biggest difference in row (0, 0), sum 111.241730 vs 103.886688
|
|
|
+
|
|
|
+Layer 11, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 194.094177
|
|
|
+ Converted tensor sum: 193.564484
|
|
|
+ Original tensor mean: 24.261772
|
|
|
+ Converted tensor mean: 24.195560
|
|
|
+ Mean difference: 4.49120235
|
|
|
+ Maximum pointwise difference: 9.88864136
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 22.25957108, Converted: 32.14821243
|
|
|
+ Biggest difference in row (0, 0), sum 194.094177 vs 193.564484
|
|
|
+
|
|
|
+Layer 12, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 196.658234
|
|
|
+ Converted tensor sum: 189.827057
|
|
|
+ Original tensor mean: 24.582279
|
|
|
+ Converted tensor mean: 23.728382
|
|
|
+ Mean difference: 5.10350180
|
|
|
+ Maximum pointwise difference: 9.80338287
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 30.23954582, Converted: 20.43616295
|
|
|
+ Biggest difference in row (0, 0), sum 196.658234 vs 189.827057
|
|
|
+
|
|
|
+Layer 13, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 193.237976
|
|
|
+ Converted tensor sum: 184.223190
|
|
|
+ Original tensor mean: 24.154747
|
|
|
+ Converted tensor mean: 23.027899
|
|
|
+ Mean difference: 5.11390686
|
|
|
+ Maximum pointwise difference: 10.04300690
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 30.05261230, Converted: 20.00960541
|
|
|
+ Biggest difference in row (0, 0), sum 193.237976 vs 184.223190
|
|
|
+
|
|
|
+Layer 14, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 183.582977
|
|
|
+ Converted tensor sum: 183.402130
|
|
|
+ Original tensor mean: 22.947872
|
|
|
+ Converted tensor mean: 22.925266
|
|
|
+ Mean difference: 5.41123581
|
|
|
+ Maximum pointwise difference: 10.28223228
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 19.26763725, Converted: 29.54986954
|
|
|
+ Biggest difference in row (0, 0), sum 183.582977 vs 183.402130
|
|
|
+
|
|
|
+Layer 15, Token 8 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 297.650543
|
|
|
+ Converted tensor sum: 301.053558
|
|
|
+ Original tensor mean: 37.206318
|
|
|
+ Converted tensor mean: 37.631695
|
|
|
+ Mean difference: 5.31624222
|
|
|
+ Maximum pointwise difference: 10.28567123
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 36.54620743, Converted: 46.83187866
|
|
|
+ Biggest difference in row (0, 0), sum 297.650543 vs 301.053558
|
|
|
+
|
|
|
+Layer 0, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 27.724323
|
|
|
+ Converted tensor sum: 7.010333
|
|
|
+ Original tensor mean: 3.465540
|
|
|
+ Converted tensor mean: 0.876292
|
|
|
+ Mean difference: 3.55158758
|
|
|
+ Maximum pointwise difference: 7.14975357
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 4.03241444, Converted: -3.11733937
|
|
|
+ Biggest difference in row (0, 0), sum 27.724323 vs 7.010333
|
|
|
+
|
|
|
+Layer 1, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 17.384836
|
|
|
+ Converted tensor sum: 7.348456
|
|
|
+ Original tensor mean: 2.173105
|
|
|
+ Converted tensor mean: 0.918557
|
|
|
+ Mean difference: 3.79201698
|
|
|
+ Maximum pointwise difference: 8.55698013
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -0.55471849, Converted: 8.00226116
|
|
|
+ Biggest difference in row (0, 0), sum 17.384836 vs 7.348456
|
|
|
+
|
|
|
+Layer 2, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 20.318661
|
|
|
+ Converted tensor sum: 28.392349
|
|
|
+ Original tensor mean: 2.539833
|
|
|
+ Converted tensor mean: 3.549044
|
|
|
+ Mean difference: 2.94842267
|
|
|
+ Maximum pointwise difference: 9.89197159
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -1.05586541, Converted: 8.83610630
|
|
|
+ Biggest difference in row (0, 0), sum 20.318661 vs 28.392349
|
|
|
+
|
|
|
+Layer 3, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 65.513725
|
|
|
+ Converted tensor sum: 84.414536
|
|
|
+ Original tensor mean: 8.189216
|
|
|
+ Converted tensor mean: 10.551817
|
|
|
+ Mean difference: 4.41447163
|
|
|
+ Maximum pointwise difference: 10.74111176
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 6.86948347, Converted: 17.61059570
|
|
|
+ Biggest difference in row (0, 0), sum 65.513725 vs 84.414536
|
|
|
+
|
|
|
+Layer 4, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 61.603691
|
|
|
+ Converted tensor sum: 72.172562
|
|
|
+ Original tensor mean: 7.700461
|
|
|
+ Converted tensor mean: 9.021570
|
|
|
+ Mean difference: 4.32150173
|
|
|
+ Maximum pointwise difference: 10.51774502
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 4.71584320, Converted: 15.23358822
|
|
|
+ Biggest difference in row (0, 0), sum 61.603691 vs 72.172562
|
|
|
+
|
|
|
+Layer 5, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 61.554985
|
|
|
+ Converted tensor sum: 60.684212
|
|
|
+ Original tensor mean: 7.694373
|
|
|
+ Converted tensor mean: 7.585526
|
|
|
+ Mean difference: 4.84910297
|
|
|
+ Maximum pointwise difference: 9.77899742
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 5.03849173, Converted: 14.81748962
|
|
|
+ Biggest difference in row (0, 0), sum 61.554985 vs 60.684212
|
|
|
+
|
|
|
+Layer 6, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 60.121288
|
|
|
+ Converted tensor sum: 61.323517
|
|
|
+ Original tensor mean: 7.515161
|
|
|
+ Converted tensor mean: 7.665440
|
|
|
+ Mean difference: 4.61501122
|
|
|
+ Maximum pointwise difference: 10.19813538
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 4.46036100, Converted: 14.65849590
|
|
|
+ Biggest difference in row (0, 0), sum 60.121288 vs 61.323517
|
|
|
+
|
|
|
+Layer 7, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 120.854408
|
|
|
+ Converted tensor sum: 122.564323
|
|
|
+ Original tensor mean: 15.106801
|
|
|
+ Converted tensor mean: 15.320540
|
|
|
+ Mean difference: 4.58281326
|
|
|
+ Maximum pointwise difference: 10.81363106
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 11.19677925, Converted: 22.01041031
|
|
|
+ Biggest difference in row (0, 0), sum 120.854408 vs 122.564323
|
|
|
+
|
|
|
+Layer 8, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 111.411377
|
|
|
+ Converted tensor sum: 113.878586
|
|
|
+ Original tensor mean: 13.926422
|
|
|
+ Converted tensor mean: 14.234823
|
|
|
+ Mean difference: 4.80341482
|
|
|
+ Maximum pointwise difference: 8.54869747
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 10.95728207, Converted: 19.50597954
|
|
|
+ Biggest difference in row (0, 0), sum 111.411377 vs 113.878586
|
|
|
+
|
|
|
+Layer 9, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 104.621353
|
|
|
+ Converted tensor sum: 99.551331
|
|
|
+ Original tensor mean: 13.077669
|
|
|
+ Converted tensor mean: 12.443916
|
|
|
+ Mean difference: 4.94641495
|
|
|
+ Maximum pointwise difference: 7.18619919
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 10.17811966, Converted: 17.36431885
|
|
|
+ Biggest difference in row (0, 0), sum 104.621353 vs 99.551331
|
|
|
+
|
|
|
+Layer 10, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 105.495895
|
|
|
+ Converted tensor sum: 90.669807
|
|
|
+ Original tensor mean: 13.186987
|
|
|
+ Converted tensor mean: 11.333726
|
|
|
+ Mean difference: 4.88313580
|
|
|
+ Maximum pointwise difference: 8.44397736
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 11.21555328, Converted: 2.77157593
|
|
|
+ Biggest difference in row (0, 0), sum 105.495895 vs 90.669807
|
|
|
+
|
|
|
+Layer 11, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 198.914932
|
|
|
+ Converted tensor sum: 187.657013
|
|
|
+ Original tensor mean: 24.864367
|
|
|
+ Converted tensor mean: 23.457127
|
|
|
+ Mean difference: 4.87979174
|
|
|
+ Maximum pointwise difference: 8.17332649
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 22.94329643, Converted: 14.76996994
|
|
|
+ Biggest difference in row (0, 0), sum 198.914932 vs 187.657013
|
|
|
+
|
|
|
+Layer 12, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 197.781982
|
|
|
+ Converted tensor sum: 182.248840
|
|
|
+ Original tensor mean: 24.722748
|
|
|
+ Converted tensor mean: 22.781105
|
|
|
+ Mean difference: 5.16355371
|
|
|
+ Maximum pointwise difference: 9.60578632
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 23.29119301, Converted: 13.68540668
|
|
|
+ Biggest difference in row (0, 0), sum 197.781982 vs 182.248840
|
|
|
+
|
|
|
+Layer 13, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 191.909027
|
|
|
+ Converted tensor sum: 177.667252
|
|
|
+ Original tensor mean: 23.988628
|
|
|
+ Converted tensor mean: 22.208406
|
|
|
+ Mean difference: 5.14386559
|
|
|
+ Maximum pointwise difference: 9.20664406
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 22.88940430, Converted: 13.68276024
|
|
|
+ Biggest difference in row (0, 0), sum 191.909027 vs 177.667252
|
|
|
+
|
|
|
+Layer 14, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 193.112854
|
|
|
+ Converted tensor sum: 170.826324
|
|
|
+ Original tensor mean: 24.139107
|
|
|
+ Converted tensor mean: 21.353291
|
|
|
+ Mean difference: 5.67996836
|
|
|
+ Maximum pointwise difference: 10.54143143
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 29.37781715, Converted: 18.83638573
|
|
|
+ Biggest difference in row (0, 0), sum 193.112854 vs 170.826324
|
|
|
+
|
|
|
+Layer 15, Token 9 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 310.393738
|
|
|
+ Converted tensor sum: 295.392517
|
|
|
+ Original tensor mean: 38.799217
|
|
|
+ Converted tensor mean: 36.924065
|
|
|
+ Mean difference: 5.11053467
|
|
|
+ Maximum pointwise difference: 9.09804153
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 43.17533493, Converted: 34.07729340
|
|
|
+ Biggest difference in row (0, 0), sum 310.393738 vs 295.392517
|
|
|
+
|
|
|
+Layer 0, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 11.304202
|
|
|
+ Converted tensor sum: 14.919886
|
|
|
+ Original tensor mean: 1.413025
|
|
|
+ Converted tensor mean: 1.864986
|
|
|
+ Mean difference: 1.20558476
|
|
|
+ Maximum pointwise difference: 2.02042794
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -0.23466866, Converted: 1.78575933
|
|
|
+ Biggest difference in row (0, 0), sum 11.304202 vs 14.919886
|
|
|
+
|
|
|
+Layer 1, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 4.380467
|
|
|
+ Converted tensor sum: 1.448399
|
|
|
+ Original tensor mean: 0.547558
|
|
|
+ Converted tensor mean: 0.181050
|
|
|
+ Mean difference: 1.55803418
|
|
|
+ Maximum pointwise difference: 3.08950615
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 3.13031340, Converted: 0.04080731
|
|
|
+ Biggest difference in row (0, 0), sum 4.380467 vs 1.448399
|
|
|
+
|
|
|
+Layer 2, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 21.641123
|
|
|
+ Converted tensor sum: 18.135971
|
|
|
+ Original tensor mean: 2.705140
|
|
|
+ Converted tensor mean: 2.266996
|
|
|
+ Mean difference: 2.29236317
|
|
|
+ Maximum pointwise difference: 5.34974813
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 4.73606253, Converted: -0.61368543
|
|
|
+ Biggest difference in row (0, 0), sum 21.641123 vs 18.135971
|
|
|
+
|
|
|
+Layer 3, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 84.183029
|
|
|
+ Converted tensor sum: 75.554764
|
|
|
+ Original tensor mean: 10.522879
|
|
|
+ Converted tensor mean: 9.444345
|
|
|
+ Mean difference: 2.50477004
|
|
|
+ Maximum pointwise difference: 7.33609867
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 11.12465191, Converted: 3.78855324
|
|
|
+ Biggest difference in row (0, 0), sum 84.183029 vs 75.554764
|
|
|
+
|
|
|
+Layer 4, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 75.952011
|
|
|
+ Converted tensor sum: 63.684746
|
|
|
+ Original tensor mean: 9.494001
|
|
|
+ Converted tensor mean: 7.960593
|
|
|
+ Mean difference: 2.89978528
|
|
|
+ Maximum pointwise difference: 6.58637476
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 10.07624340, Converted: 3.48986864
|
|
|
+ Biggest difference in row (0, 0), sum 75.952011 vs 63.684746
|
|
|
+
|
|
|
+Layer 5, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 67.380692
|
|
|
+ Converted tensor sum: 51.477894
|
|
|
+ Original tensor mean: 8.422586
|
|
|
+ Converted tensor mean: 6.434737
|
|
|
+ Mean difference: 2.92978549
|
|
|
+ Maximum pointwise difference: 6.54403639
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 10.47875404, Converted: 3.93471766
|
|
|
+ Biggest difference in row (0, 0), sum 67.380692 vs 51.477894
|
|
|
+
|
|
|
+Layer 6, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 64.356155
|
|
|
+ Converted tensor sum: 44.292259
|
|
|
+ Original tensor mean: 8.044519
|
|
|
+ Converted tensor mean: 5.536532
|
|
|
+ Mean difference: 3.18394947
|
|
|
+ Maximum pointwise difference: 7.18761826
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 9.33854580, Converted: 2.15092754
|
|
|
+ Biggest difference in row (0, 0), sum 64.356155 vs 44.292259
|
|
|
+
|
|
|
+Layer 7, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 124.955261
|
|
|
+ Converted tensor sum: 105.713638
|
|
|
+ Original tensor mean: 15.619408
|
|
|
+ Converted tensor mean: 13.214205
|
|
|
+ Mean difference: 3.17375469
|
|
|
+ Maximum pointwise difference: 7.15706635
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 16.18268585, Converted: 9.02561951
|
|
|
+ Biggest difference in row (0, 0), sum 124.955261 vs 105.713638
|
|
|
+
|
|
|
+Layer 8, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 105.275124
|
|
|
+ Converted tensor sum: 92.354050
|
|
|
+ Original tensor mean: 13.159390
|
|
|
+ Converted tensor mean: 11.544256
|
|
|
+ Mean difference: 2.89860010
|
|
|
+ Maximum pointwise difference: 6.96542978
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 12.28927994, Converted: 5.32385015
|
|
|
+ Biggest difference in row (0, 0), sum 105.275124 vs 92.354050
|
|
|
+
|
|
|
+Layer 9, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 89.282066
|
|
|
+ Converted tensor sum: 75.157639
|
|
|
+ Original tensor mean: 11.160258
|
|
|
+ Converted tensor mean: 9.394705
|
|
|
+ Mean difference: 2.89608860
|
|
|
+ Maximum pointwise difference: 7.40043926
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 9.69557953, Converted: 2.29514027
|
|
|
+ Biggest difference in row (0, 0), sum 89.282066 vs 75.157639
|
|
|
+
|
|
|
+Layer 10, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 87.814186
|
|
|
+ Converted tensor sum: 68.457840
|
|
|
+ Original tensor mean: 10.976773
|
|
|
+ Converted tensor mean: 8.557230
|
|
|
+ Mean difference: 3.06474447
|
|
|
+ Maximum pointwise difference: 8.03616142
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 10.20811844, Converted: 2.17195749
|
|
|
+ Biggest difference in row (0, 0), sum 87.814186 vs 68.457840
|
|
|
+
|
|
|
+Layer 11, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 184.781067
|
|
|
+ Converted tensor sum: 170.778610
|
|
|
+ Original tensor mean: 23.097633
|
|
|
+ Converted tensor mean: 21.347326
|
|
|
+ Mean difference: 2.85195446
|
|
|
+ Maximum pointwise difference: 6.81012630
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 22.35528374, Converted: 15.54515743
|
|
|
+ Biggest difference in row (0, 0), sum 184.781067 vs 170.778610
|
|
|
+
|
|
|
+Layer 12, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 187.157104
|
|
|
+ Converted tensor sum: 166.325562
|
|
|
+ Original tensor mean: 23.394638
|
|
|
+ Converted tensor mean: 20.790695
|
|
|
+ Mean difference: 3.00816154
|
|
|
+ Maximum pointwise difference: 8.29628849
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 23.84814453, Converted: 15.55185604
|
|
|
+ Biggest difference in row (0, 0), sum 187.157104 vs 166.325562
|
|
|
+
|
|
|
+Layer 13, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 180.577179
|
|
|
+ Converted tensor sum: 161.409668
|
|
|
+ Original tensor mean: 22.572147
|
|
|
+ Converted tensor mean: 20.176208
|
|
|
+ Mean difference: 3.22855854
|
|
|
+ Maximum pointwise difference: 8.27111149
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 22.88647652, Converted: 14.61536503
|
|
|
+ Biggest difference in row (0, 0), sum 180.577179 vs 161.409668
|
|
|
+
|
|
|
+Layer 14, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 176.409912
|
|
|
+ Converted tensor sum: 155.317413
|
|
|
+ Original tensor mean: 22.051239
|
|
|
+ Converted tensor mean: 19.414677
|
|
|
+ Mean difference: 3.30306578
|
|
|
+ Maximum pointwise difference: 8.51622581
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 21.49407005, Converted: 12.97784424
|
|
|
+ Biggest difference in row (0, 0), sum 176.409912 vs 155.317413
|
|
|
+
|
|
|
+Layer 15, Token 10 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 303.652618
|
|
|
+ Converted tensor sum: 289.143890
|
|
|
+ Original tensor mean: 37.956577
|
|
|
+ Converted tensor mean: 36.142986
|
|
|
+ Mean difference: 3.20148277
|
|
|
+ Maximum pointwise difference: 7.65085030
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 38.91091537, Converted: 31.26006508
|
|
|
+ Biggest difference in row (0, 0), sum 303.652618 vs 289.143890
|
|
|
+
|
|
|
+Layer 0, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 3.868190
|
|
|
+ Converted tensor sum: -4.365316
|
|
|
+ Original tensor mean: 0.483524
|
|
|
+ Converted tensor mean: -0.545665
|
|
|
+ Mean difference: 1.47696412
|
|
|
+ Maximum pointwise difference: 3.49379730
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 1.60926533, Converted: -1.88453186
|
|
|
+ Biggest difference in row (0, 0), sum 3.868190 vs -4.365316
|
|
|
+
|
|
|
+Layer 1, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -4.763882
|
|
|
+ Converted tensor sum: -8.100720
|
|
|
+ Original tensor mean: -0.595485
|
|
|
+ Converted tensor mean: -1.012590
|
|
|
+ Mean difference: 2.60996270
|
|
|
+ Maximum pointwise difference: 4.04230022
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -3.43199134, Converted: 0.61030883
|
|
|
+ Biggest difference in row (0, 0), sum -4.763882 vs -8.100720
|
|
|
+
|
|
|
+Layer 2, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -8.837991
|
|
|
+ Converted tensor sum: -17.355688
|
|
|
+ Original tensor mean: -1.104749
|
|
|
+ Converted tensor mean: -2.169461
|
|
|
+ Mean difference: 3.57004023
|
|
|
+ Maximum pointwise difference: 7.78442717
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -2.88003159, Converted: 4.90439558
|
|
|
+ Biggest difference in row (0, 0), sum -8.837991 vs -17.355688
|
|
|
+
|
|
|
+Layer 3, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -151.825806
|
|
|
+ Converted tensor sum: -119.589157
|
|
|
+ Original tensor mean: -18.978226
|
|
|
+ Converted tensor mean: -14.948645
|
|
|
+ Mean difference: 4.57043171
|
|
|
+ Maximum pointwise difference: 10.22036552
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -23.12115479, Converted: -12.90078926
|
|
|
+ Biggest difference in row (0, 0), sum -151.825806 vs -119.589157
|
|
|
+
|
|
|
+Layer 4, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -87.672623
|
|
|
+ Converted tensor sum: -49.333626
|
|
|
+ Original tensor mean: -10.959078
|
|
|
+ Converted tensor mean: -6.166703
|
|
|
+ Mean difference: 5.28691673
|
|
|
+ Maximum pointwise difference: 12.21502209
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -14.83695984, Converted: -2.62193775
|
|
|
+ Biggest difference in row (0, 0), sum -87.672623 vs -49.333626
|
|
|
+
|
|
|
+Layer 5, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -19.529230
|
|
|
+ Converted tensor sum: 51.921982
|
|
|
+ Original tensor mean: -2.441154
|
|
|
+ Converted tensor mean: 6.490248
|
|
|
+ Mean difference: 8.93140125
|
|
|
+ Maximum pointwise difference: 17.95970917
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 2.32367539, Converted: 20.28338432
|
|
|
+ Biggest difference in row (0, 0), sum -19.529230 vs 51.921982
|
|
|
+
|
|
|
+Layer 6, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -34.699642
|
|
|
+ Converted tensor sum: 49.364166
|
|
|
+ Original tensor mean: -4.337455
|
|
|
+ Converted tensor mean: 6.170521
|
|
|
+ Mean difference: 10.50797558
|
|
|
+ Maximum pointwise difference: 19.14058685
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -4.33303738, Converted: 14.80754948
|
|
|
+ Biggest difference in row (0, 0), sum -34.699642 vs 49.364166
|
|
|
+
|
|
|
+Layer 7, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -174.093460
|
|
|
+ Converted tensor sum: 116.110802
|
|
|
+ Original tensor mean: -21.761683
|
|
|
+ Converted tensor mean: 14.513850
|
|
|
+ Mean difference: 36.27553177
|
|
|
+ Maximum pointwise difference: 45.40389252
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -18.08833313, Converted: 27.31555748
|
|
|
+ Biggest difference in row (0, 0), sum -174.093460 vs 116.110802
|
|
|
+
|
|
|
+Layer 8, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -95.914619
|
|
|
+ Converted tensor sum: 104.116745
|
|
|
+ Original tensor mean: -11.989327
|
|
|
+ Converted tensor mean: 13.014593
|
|
|
+ Mean difference: 25.00392151
|
|
|
+ Maximum pointwise difference: 39.39223480
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -25.33579826, Converted: 14.05643463
|
|
|
+ Biggest difference in row (0, 0), sum -95.914619 vs 104.116745
|
|
|
+
|
|
|
+Layer 9, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -76.038055
|
|
|
+ Converted tensor sum: 86.082336
|
|
|
+ Original tensor mean: -9.504757
|
|
|
+ Converted tensor mean: 10.760292
|
|
|
+ Mean difference: 20.92745209
|
|
|
+ Maximum pointwise difference: 40.40296555
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -26.76908302, Converted: 13.63388157
|
|
|
+ Biggest difference in row (0, 0), sum -76.038055 vs 86.082336
|
|
|
+
|
|
|
+Layer 10, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -62.967239
|
|
|
+ Converted tensor sum: 79.332596
|
|
|
+ Original tensor mean: -7.870905
|
|
|
+ Converted tensor mean: 9.916574
|
|
|
+ Mean difference: 18.64283180
|
|
|
+ Maximum pointwise difference: 40.29864883
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -27.55656052, Converted: 12.74208832
|
|
|
+ Biggest difference in row (0, 0), sum -62.967239 vs 79.332596
|
|
|
+
|
|
|
+Layer 11, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -348.172638
|
|
|
+ Converted tensor sum: 185.268341
|
|
|
+ Original tensor mean: -43.521580
|
|
|
+ Converted tensor mean: 23.158543
|
|
|
+ Mean difference: 66.68012238
|
|
|
+ Maximum pointwise difference: 90.25902557
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -54.79597092, Converted: 35.46305466
|
|
|
+ Biggest difference in row (0, 0), sum -348.172638 vs 185.268341
|
|
|
+
|
|
|
+Layer 12, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -380.460999
|
|
|
+ Converted tensor sum: 184.850082
|
|
|
+ Original tensor mean: -47.557625
|
|
|
+ Converted tensor mean: 23.106260
|
|
|
+ Mean difference: 70.66388702
|
|
|
+ Maximum pointwise difference: 91.58323669
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -56.39131927, Converted: 35.19191360
|
|
|
+ Biggest difference in row (0, 0), sum -380.460999 vs 184.850082
|
|
|
+
|
|
|
+Layer 13, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -387.549927
|
|
|
+ Converted tensor sum: 178.291550
|
|
|
+ Original tensor mean: -48.443741
|
|
|
+ Converted tensor mean: 22.286444
|
|
|
+ Mean difference: 70.73018646
|
|
|
+ Maximum pointwise difference: 92.60649109
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -57.23683167, Converted: 35.36965561
|
|
|
+ Biggest difference in row (0, 0), sum -387.549927 vs 178.291550
|
|
|
+
|
|
|
+Layer 14, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -381.615417
|
|
|
+ Converted tensor sum: 175.841187
|
|
|
+ Original tensor mean: -47.701927
|
|
|
+ Converted tensor mean: 21.980148
|
|
|
+ Mean difference: 69.68207550
|
|
|
+ Maximum pointwise difference: 95.39483643
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -61.00698853, Converted: 34.38784409
|
|
|
+ Biggest difference in row (0, 0), sum -381.615417 vs 175.841187
|
|
|
+
|
|
|
+Layer 15, Token 11 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -791.898560
|
|
|
+ Converted tensor sum: 313.297852
|
|
|
+ Original tensor mean: -98.987320
|
|
|
+ Converted tensor mean: 39.162231
|
|
|
+ Mean difference: 138.14956665
|
|
|
+ Maximum pointwise difference: 174.31031799
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: -120.39865875, Converted: 53.91165924
|
|
|
+ Biggest difference in row (0, 0), sum -791.898560 vs 313.297852
|
|
|
+
|
|
|
+Layer 0, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 17.494652
|
|
|
+ Converted tensor sum: -39.301899
|
|
|
+ Original tensor mean: 2.186831
|
|
|
+ Converted tensor mean: -4.912737
|
|
|
+ Mean difference: 8.11834240
|
|
|
+ Maximum pointwise difference: 15.19715595
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 2.70196438, Converted: -12.49519157
|
|
|
+ Biggest difference in row (0, 0), sum 17.494652 vs -39.301899
|
|
|
+
|
|
|
+Layer 1, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 11.314701
|
|
|
+ Converted tensor sum: -35.014473
|
|
|
+ Original tensor mean: 1.414338
|
|
|
+ Converted tensor mean: -4.376809
|
|
|
+ Mean difference: 7.67025709
|
|
|
+ Maximum pointwise difference: 15.05980301
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 2.28716040, Converted: -12.77264309
|
|
|
+ Biggest difference in row (0, 0), sum 11.314701 vs -35.014473
|
|
|
+
|
|
|
+Layer 2, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 3.520873
|
|
|
+ Converted tensor sum: -23.351210
|
|
|
+ Original tensor mean: 0.440109
|
|
|
+ Converted tensor mean: -2.918901
|
|
|
+ Mean difference: 7.09708309
|
|
|
+ Maximum pointwise difference: 10.56869507
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 0.29396084, Converted: -10.27473450
|
|
|
+ Biggest difference in row (0, 0), sum 3.520873 vs -23.351210
|
|
|
+
|
|
|
+Layer 3, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -38.507721
|
|
|
+ Converted tensor sum: -65.860725
|
|
|
+ Original tensor mean: -4.813465
|
|
|
+ Converted tensor mean: -8.232591
|
|
|
+ Mean difference: 6.29614639
|
|
|
+ Maximum pointwise difference: 10.23156357
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -4.52744627, Converted: -14.75901031
|
|
|
+ Biggest difference in row (0, 0), sum -38.507721 vs -65.860725
|
|
|
+
|
|
|
+Layer 4, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -25.538549
|
|
|
+ Converted tensor sum: -16.346577
|
|
|
+ Original tensor mean: -3.192319
|
|
|
+ Converted tensor mean: -2.043322
|
|
|
+ Mean difference: 5.56114197
|
|
|
+ Maximum pointwise difference: 11.51591301
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -9.10746288, Converted: 2.40844989
|
|
|
+ Biggest difference in row (0, 0), sum -25.538549 vs -16.346577
|
|
|
+
|
|
|
+Layer 5, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 5.103131
|
|
|
+ Converted tensor sum: -11.820143
|
|
|
+ Original tensor mean: 0.637891
|
|
|
+ Converted tensor mean: -1.477518
|
|
|
+ Mean difference: 6.80205250
|
|
|
+ Maximum pointwise difference: 11.26421928
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 5.06476593, Converted: -6.19945335
|
|
|
+ Biggest difference in row (0, 0), sum 5.103131 vs -11.820143
|
|
|
+
|
|
|
+Layer 6, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 1.231229
|
|
|
+ Converted tensor sum: -13.329983
|
|
|
+ Original tensor mean: 0.153904
|
|
|
+ Converted tensor mean: -1.666248
|
|
|
+ Mean difference: 7.36224794
|
|
|
+ Maximum pointwise difference: 11.85875893
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 5.86865807, Converted: -5.99010086
|
|
|
+ Biggest difference in row (0, 0), sum 1.231229 vs -13.329983
|
|
|
+
|
|
|
+Layer 7, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 49.883171
|
|
|
+ Converted tensor sum: -138.587738
|
|
|
+ Original tensor mean: 6.235396
|
|
|
+ Converted tensor mean: -17.323467
|
|
|
+ Mean difference: 23.55886269
|
|
|
+ Maximum pointwise difference: 38.93606567
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 13.31498432, Converted: -25.62108231
|
|
|
+ Biggest difference in row (0, 0), sum 49.883171 vs -138.587738
|
|
|
+
|
|
|
+Layer 8, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 32.997459
|
|
|
+ Converted tensor sum: -79.532417
|
|
|
+ Original tensor mean: 4.124682
|
|
|
+ Converted tensor mean: -9.941552
|
|
|
+ Mean difference: 15.04267120
|
|
|
+ Maximum pointwise difference: 28.15183258
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 10.99009418, Converted: -17.16173744
|
|
|
+ Biggest difference in row (0, 0), sum 32.997459 vs -79.532417
|
|
|
+
|
|
|
+Layer 9, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 30.462442
|
|
|
+ Converted tensor sum: -58.022911
|
|
|
+ Original tensor mean: 3.807805
|
|
|
+ Converted tensor mean: -7.252864
|
|
|
+ Mean difference: 13.06616974
|
|
|
+ Maximum pointwise difference: 26.93473625
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 10.51771736, Converted: -16.41701889
|
|
|
+ Biggest difference in row (0, 0), sum 30.462442 vs -58.022911
|
|
|
+
|
|
|
+Layer 10, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 31.758196
|
|
|
+ Converted tensor sum: -31.289818
|
|
|
+ Original tensor mean: 3.969774
|
|
|
+ Converted tensor mean: -3.911227
|
|
|
+ Mean difference: 11.64717674
|
|
|
+ Maximum pointwise difference: 25.19077682
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 10.60759830, Converted: -14.58317757
|
|
|
+ Biggest difference in row (0, 0), sum 31.758196 vs -31.289818
|
|
|
+
|
|
|
+Layer 11, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 64.195580
|
|
|
+ Converted tensor sum: -290.078918
|
|
|
+ Original tensor mean: 8.024447
|
|
|
+ Converted tensor mean: -36.259865
|
|
|
+ Mean difference: 44.28431320
|
|
|
+ Maximum pointwise difference: 58.32298279
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 14.01799965, Converted: -44.30498123
|
|
|
+ Biggest difference in row (0, 0), sum 64.195580 vs -290.078918
|
|
|
+
|
|
|
+Layer 12, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 65.652679
|
|
|
+ Converted tensor sum: -300.691650
|
|
|
+ Original tensor mean: 8.206585
|
|
|
+ Converted tensor mean: -37.586456
|
|
|
+ Mean difference: 45.79303741
|
|
|
+ Maximum pointwise difference: 64.50979614
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 13.89292812, Converted: -50.61687088
|
|
|
+ Biggest difference in row (0, 0), sum 65.652679 vs -300.691650
|
|
|
+
|
|
|
+Layer 13, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 64.880409
|
|
|
+ Converted tensor sum: -292.294403
|
|
|
+ Original tensor mean: 8.110051
|
|
|
+ Converted tensor mean: -36.536800
|
|
|
+ Mean difference: 44.64685059
|
|
|
+ Maximum pointwise difference: 61.03430176
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 8.34301949, Converted: -52.69128418
|
|
|
+ Biggest difference in row (0, 0), sum 64.880409 vs -292.294403
|
|
|
+
|
|
|
+Layer 14, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 55.352615
|
|
|
+ Converted tensor sum: -232.615005
|
|
|
+ Original tensor mean: 6.919077
|
|
|
+ Converted tensor mean: -29.076876
|
|
|
+ Mean difference: 35.99595261
|
|
|
+ Maximum pointwise difference: 69.32642365
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 7.07370424, Converted: -62.25271606
|
|
|
+ Biggest difference in row (0, 0), sum 55.352615 vs -232.615005
|
|
|
+
|
|
|
+Layer 15, Token 12 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 191.366241
|
|
|
+ Converted tensor sum: -607.544556
|
|
|
+ Original tensor mean: 23.920780
|
|
|
+ Converted tensor mean: -75.943069
|
|
|
+ Mean difference: 99.86384583
|
|
|
+ Maximum pointwise difference: 121.99198914
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 33.01739502, Converted: -88.97459412
|
|
|
+ Biggest difference in row (0, 0), sum 191.366241 vs -607.544556
|
|
|
+
|
|
|
+Layer 0, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 28.716766
|
|
|
+ Converted tensor sum: 24.262428
|
|
|
+ Original tensor mean: 3.589596
|
|
|
+ Converted tensor mean: 3.032804
|
|
|
+ Mean difference: 2.20962214
|
|
|
+ Maximum pointwise difference: 5.77315617
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 4.62014198, Converted: -1.15301442
|
|
|
+ Biggest difference in row (0, 0), sum 28.716766 vs 24.262428
|
|
|
+
|
|
|
+Layer 1, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 18.283722
|
|
|
+ Converted tensor sum: 16.804958
|
|
|
+ Original tensor mean: 2.285465
|
|
|
+ Converted tensor mean: 2.100620
|
|
|
+ Mean difference: 2.44061017
|
|
|
+ Maximum pointwise difference: 5.48099232
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -7.47550392, Converted: -1.99451160
|
|
|
+ Biggest difference in row (0, 0), sum 18.283722 vs 16.804958
|
|
|
+
|
|
|
+Layer 2, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 14.973861
|
|
|
+ Converted tensor sum: 10.670280
|
|
|
+ Original tensor mean: 1.871733
|
|
|
+ Converted tensor mean: 1.333785
|
|
|
+ Mean difference: 2.94856715
|
|
|
+ Maximum pointwise difference: 6.09164524
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 2.11467242, Converted: -3.97697282
|
|
|
+ Biggest difference in row (0, 0), sum 14.973861 vs 10.670280
|
|
|
+
|
|
|
+Layer 3, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 62.116623
|
|
|
+ Converted tensor sum: 46.581398
|
|
|
+ Original tensor mean: 7.764578
|
|
|
+ Converted tensor mean: 5.822675
|
|
|
+ Mean difference: 3.59710693
|
|
|
+ Maximum pointwise difference: 6.89595842
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 11.14201260, Converted: 4.24605417
|
|
|
+ Biggest difference in row (0, 0), sum 62.116623 vs 46.581398
|
|
|
+
|
|
|
+Layer 4, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 65.792244
|
|
|
+ Converted tensor sum: 43.042854
|
|
|
+ Original tensor mean: 8.224030
|
|
|
+ Converted tensor mean: 5.380357
|
|
|
+ Mean difference: 3.63414001
|
|
|
+ Maximum pointwise difference: 8.06606770
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 9.46925735, Converted: 1.40318930
|
|
|
+ Biggest difference in row (0, 0), sum 65.792244 vs 43.042854
|
|
|
+
|
|
|
+Layer 5, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 60.294563
|
|
|
+ Converted tensor sum: 38.709320
|
|
|
+ Original tensor mean: 7.536820
|
|
|
+ Converted tensor mean: 4.838665
|
|
|
+ Mean difference: 4.29471397
|
|
|
+ Maximum pointwise difference: 9.28423500
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 9.56281090, Converted: 0.27857587
|
|
|
+ Biggest difference in row (0, 0), sum 60.294563 vs 38.709320
|
|
|
+
|
|
|
+Layer 6, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 60.864697
|
|
|
+ Converted tensor sum: 41.897995
|
|
|
+ Original tensor mean: 7.608087
|
|
|
+ Converted tensor mean: 5.237249
|
|
|
+ Mean difference: 4.15325356
|
|
|
+ Maximum pointwise difference: 7.30325747
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 11.01063633, Converted: 3.70737886
|
|
|
+ Biggest difference in row (0, 0), sum 60.864697 vs 41.897995
|
|
|
+
|
|
|
+Layer 7, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 124.166924
|
|
|
+ Converted tensor sum: 107.577675
|
|
|
+ Original tensor mean: 15.520865
|
|
|
+ Converted tensor mean: 13.447209
|
|
|
+ Mean difference: 4.08049011
|
|
|
+ Maximum pointwise difference: 7.30880928
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 17.63167572, Converted: 10.32286644
|
|
|
+ Biggest difference in row (0, 0), sum 124.166924 vs 107.577675
|
|
|
+
|
|
|
+Layer 8, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 114.534744
|
|
|
+ Converted tensor sum: 106.782104
|
|
|
+ Original tensor mean: 14.316843
|
|
|
+ Converted tensor mean: 13.347763
|
|
|
+ Mean difference: 3.79455638
|
|
|
+ Maximum pointwise difference: 8.56559753
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 18.64526367, Converted: 10.07966614
|
|
|
+ Biggest difference in row (0, 0), sum 114.534744 vs 106.782104
|
|
|
+
|
|
|
+Layer 9, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 111.904816
|
|
|
+ Converted tensor sum: 90.398567
|
|
|
+ Original tensor mean: 13.988102
|
|
|
+ Converted tensor mean: 11.299821
|
|
|
+ Mean difference: 4.39770985
|
|
|
+ Maximum pointwise difference: 12.01837921
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 18.37693977, Converted: 6.35856009
|
|
|
+ Biggest difference in row (0, 0), sum 111.904816 vs 90.398567
|
|
|
+
|
|
|
+Layer 10, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 106.496719
|
|
|
+ Converted tensor sum: 84.186646
|
|
|
+ Original tensor mean: 13.312090
|
|
|
+ Converted tensor mean: 10.523331
|
|
|
+ Mean difference: 4.35723734
|
|
|
+ Maximum pointwise difference: 11.76342964
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 17.81115723, Converted: 6.04772711
|
|
|
+ Biggest difference in row (0, 0), sum 106.496719 vs 84.186646
|
|
|
+
|
|
|
+Layer 11, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 197.848022
|
|
|
+ Converted tensor sum: 191.943436
|
|
|
+ Original tensor mean: 24.731003
|
|
|
+ Converted tensor mean: 23.992929
|
|
|
+ Mean difference: 3.31890941
|
|
|
+ Maximum pointwise difference: 10.13029099
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 27.00849915, Converted: 16.87820816
|
|
|
+ Biggest difference in row (0, 0), sum 197.848022 vs 191.943436
|
|
|
+
|
|
|
+Layer 12, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 197.513275
|
|
|
+ Converted tensor sum: 189.807312
|
|
|
+ Original tensor mean: 24.689159
|
|
|
+ Converted tensor mean: 23.725914
|
|
|
+ Mean difference: 3.50938702
|
|
|
+ Maximum pointwise difference: 10.66487598
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 26.39979744, Converted: 15.73492146
|
|
|
+ Biggest difference in row (0, 0), sum 197.513275 vs 189.807312
|
|
|
+
|
|
|
+Layer 13, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 193.055618
|
|
|
+ Converted tensor sum: 185.801392
|
|
|
+ Original tensor mean: 24.131952
|
|
|
+ Converted tensor mean: 23.225174
|
|
|
+ Mean difference: 3.32275867
|
|
|
+ Maximum pointwise difference: 10.17280674
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 25.57653046, Converted: 15.40372372
|
|
|
+ Biggest difference in row (0, 0), sum 193.055618 vs 185.801392
|
|
|
+
|
|
|
+Layer 14, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 190.084717
|
|
|
+ Converted tensor sum: 186.092697
|
|
|
+ Original tensor mean: 23.760590
|
|
|
+ Converted tensor mean: 23.261587
|
|
|
+ Mean difference: 3.19069362
|
|
|
+ Maximum pointwise difference: 9.42493057
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 24.81001282, Converted: 15.38508224
|
|
|
+ Biggest difference in row (0, 0), sum 190.084717 vs 186.092697
|
|
|
+
|
|
|
+Layer 15, Token 13 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 319.170319
|
|
|
+ Converted tensor sum: 323.837036
|
|
|
+ Original tensor mean: 39.896290
|
|
|
+ Converted tensor mean: 40.479630
|
|
|
+ Mean difference: 3.55193925
|
|
|
+ Maximum pointwise difference: 8.15688324
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 46.74212265, Converted: 38.58523941
|
|
|
+ Biggest difference in row (0, 0), sum 319.170319 vs 323.837036
|
|
|
+
|
|
|
+Layer 0, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 60.062901
|
|
|
+ Converted tensor sum: 42.401054
|
|
|
+ Original tensor mean: 7.507863
|
|
|
+ Converted tensor mean: 5.300132
|
|
|
+ Mean difference: 2.97920632
|
|
|
+ Maximum pointwise difference: 7.75320148
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 13.24933434, Converted: 5.49613285
|
|
|
+ Biggest difference in row (0, 0), sum 60.062901 vs 42.401054
|
|
|
+
|
|
|
+Layer 1, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 48.843086
|
|
|
+ Converted tensor sum: 34.002205
|
|
|
+ Original tensor mean: 6.105386
|
|
|
+ Converted tensor mean: 4.250276
|
|
|
+ Mean difference: 2.82561874
|
|
|
+ Maximum pointwise difference: 7.41196299
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 12.44728756, Converted: 5.03532457
|
|
|
+ Biggest difference in row (0, 0), sum 48.843086 vs 34.002205
|
|
|
+
|
|
|
+Layer 2, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 49.100876
|
|
|
+ Converted tensor sum: 29.831078
|
|
|
+ Original tensor mean: 6.137609
|
|
|
+ Converted tensor mean: 3.728885
|
|
|
+ Mean difference: 3.44625640
|
|
|
+ Maximum pointwise difference: 8.00705624
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 12.05760670, Converted: 4.05055046
|
|
|
+ Biggest difference in row (0, 0), sum 49.100876 vs 29.831078
|
|
|
+
|
|
|
+Layer 3, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 94.051392
|
|
|
+ Converted tensor sum: 85.936119
|
|
|
+ Original tensor mean: 11.756424
|
|
|
+ Converted tensor mean: 10.742015
|
|
|
+ Mean difference: 3.43988085
|
|
|
+ Maximum pointwise difference: 6.90394783
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 18.44681168, Converted: 11.54286385
|
|
|
+ Biggest difference in row (0, 0), sum 94.051392 vs 85.936119
|
|
|
+
|
|
|
+Layer 4, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 90.357742
|
|
|
+ Converted tensor sum: 82.357994
|
|
|
+ Original tensor mean: 11.294718
|
|
|
+ Converted tensor mean: 10.294749
|
|
|
+ Mean difference: 3.55732656
|
|
|
+ Maximum pointwise difference: 7.83766174
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 19.03264809, Converted: 11.19498634
|
|
|
+ Biggest difference in row (0, 0), sum 90.357742 vs 82.357994
|
|
|
+
|
|
|
+Layer 5, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 84.158882
|
|
|
+ Converted tensor sum: 72.302864
|
|
|
+ Original tensor mean: 10.519860
|
|
|
+ Converted tensor mean: 9.037858
|
|
|
+ Mean difference: 3.79493260
|
|
|
+ Maximum pointwise difference: 9.27737904
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 18.81698799, Converted: 9.53960896
|
|
|
+ Biggest difference in row (0, 0), sum 84.158882 vs 72.302864
|
|
|
+
|
|
|
+Layer 6, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 82.342606
|
|
|
+ Converted tensor sum: 74.838448
|
|
|
+ Original tensor mean: 10.292826
|
|
|
+ Converted tensor mean: 9.354806
|
|
|
+ Mean difference: 3.72385550
|
|
|
+ Maximum pointwise difference: 8.27861023
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 18.35614967, Converted: 10.07753944
|
|
|
+ Biggest difference in row (0, 0), sum 82.342606 vs 74.838448
|
|
|
+
|
|
|
+Layer 7, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 152.811584
|
|
|
+ Converted tensor sum: 143.282593
|
|
|
+ Original tensor mean: 19.101448
|
|
|
+ Converted tensor mean: 17.910324
|
|
|
+ Mean difference: 3.79641771
|
|
|
+ Maximum pointwise difference: 8.94160843
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 28.97978973, Converted: 20.03818130
|
|
|
+ Biggest difference in row (0, 0), sum 152.811584 vs 143.282593
|
|
|
+
|
|
|
+Layer 8, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 134.962891
|
|
|
+ Converted tensor sum: 135.762573
|
|
|
+ Original tensor mean: 16.870361
|
|
|
+ Converted tensor mean: 16.970322
|
|
|
+ Mean difference: 3.42910838
|
|
|
+ Maximum pointwise difference: 6.22266769
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 27.13297844, Converted: 20.91031075
|
|
|
+ Biggest difference in row (0, 0), sum 134.962891 vs 135.762573
|
|
|
+
|
|
|
+Layer 9, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 131.262939
|
|
|
+ Converted tensor sum: 130.663895
|
|
|
+ Original tensor mean: 16.407867
|
|
|
+ Converted tensor mean: 16.332987
|
|
|
+ Mean difference: 3.14643574
|
|
|
+ Maximum pointwise difference: 6.41224289
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 25.90853310, Converted: 19.49629021
|
|
|
+ Biggest difference in row (0, 0), sum 131.262939 vs 130.663895
|
|
|
+
|
|
|
+Layer 10, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 130.994781
|
|
|
+ Converted tensor sum: 121.948547
|
|
|
+ Original tensor mean: 16.374348
|
|
|
+ Converted tensor mean: 15.243568
|
|
|
+ Mean difference: 3.14505911
|
|
|
+ Maximum pointwise difference: 6.92271805
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 25.71545982, Converted: 18.79274178
|
|
|
+ Biggest difference in row (0, 0), sum 130.994781 vs 121.948547
|
|
|
+
|
|
|
+Layer 11, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 227.322296
|
|
|
+ Converted tensor sum: 221.945038
|
|
|
+ Original tensor mean: 28.415287
|
|
|
+ Converted tensor mean: 27.743130
|
|
|
+ Mean difference: 2.92038918
|
|
|
+ Maximum pointwise difference: 6.72454262
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 35.09742355, Converted: 28.37288094
|
|
|
+ Biggest difference in row (0, 0), sum 227.322296 vs 221.945038
|
|
|
+
|
|
|
+Layer 12, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 226.411957
|
|
|
+ Converted tensor sum: 219.124207
|
|
|
+ Original tensor mean: 28.301495
|
|
|
+ Converted tensor mean: 27.390526
|
|
|
+ Mean difference: 3.00309324
|
|
|
+ Maximum pointwise difference: 5.31435776
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 32.55270767, Converted: 27.23834991
|
|
|
+ Biggest difference in row (0, 0), sum 226.411957 vs 219.124207
|
|
|
+
|
|
|
+Layer 13, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 222.480804
|
|
|
+ Converted tensor sum: 215.029236
|
|
|
+ Original tensor mean: 27.810101
|
|
|
+ Converted tensor mean: 26.878654
|
|
|
+ Mean difference: 3.01644969
|
|
|
+ Maximum pointwise difference: 5.75550079
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 32.34063721, Converted: 26.58513641
|
|
|
+ Biggest difference in row (0, 0), sum 222.480804 vs 215.029236
|
|
|
+
|
|
|
+Layer 14, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 217.584625
|
|
|
+ Converted tensor sum: 210.219940
|
|
|
+ Original tensor mean: 27.198078
|
|
|
+ Converted tensor mean: 26.277493
|
|
|
+ Mean difference: 3.42921877
|
|
|
+ Maximum pointwise difference: 5.59035873
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 31.23370743, Converted: 25.64334869
|
|
|
+ Biggest difference in row (0, 0), sum 217.584625 vs 210.219940
|
|
|
+
|
|
|
+Layer 15, Token 14 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 347.902100
|
|
|
+ Converted tensor sum: 344.275635
|
|
|
+ Original tensor mean: 43.487762
|
|
|
+ Converted tensor mean: 43.034454
|
|
|
+ Mean difference: 3.27294016
|
|
|
+ Maximum pointwise difference: 5.50515747
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 41.73074341, Converted: 47.23590088
|
|
|
+ Biggest difference in row (0, 0), sum 347.902100 vs 344.275635
|
|
|
+
|
|
|
+Layer 0, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 2.268566
|
|
|
+ Converted tensor sum: -1.956201
|
|
|
+ Original tensor mean: 0.283571
|
|
|
+ Converted tensor mean: -0.244525
|
|
|
+ Mean difference: 1.30659735
|
|
|
+ Maximum pointwise difference: 3.65664506
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 3.25675011, Converted: -0.39989486
|
|
|
+ Biggest difference in row (0, 0), sum 2.268566 vs -1.956201
|
|
|
+
|
|
|
+Layer 1, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -3.244995
|
|
|
+ Converted tensor sum: -0.596967
|
|
|
+ Original tensor mean: -0.405624
|
|
|
+ Converted tensor mean: -0.074621
|
|
|
+ Mean difference: 1.73462176
|
|
|
+ Maximum pointwise difference: 3.99903250
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 2.07227492, Converted: -1.92675745
|
|
|
+ Biggest difference in row (0, 0), sum -3.244995 vs -0.596967
|
|
|
+
|
|
|
+Layer 2, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 18.643393
|
|
|
+ Converted tensor sum: -7.624215
|
|
|
+ Original tensor mean: 2.330424
|
|
|
+ Converted tensor mean: -0.953027
|
|
|
+ Mean difference: 3.99837518
|
|
|
+ Maximum pointwise difference: 9.85657215
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 9.41628456, Converted: -0.44028741
|
|
|
+ Biggest difference in row (0, 0), sum 18.643393 vs -7.624215
|
|
|
+
|
|
|
+Layer 3, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 77.711205
|
|
|
+ Converted tensor sum: -115.602707
|
|
|
+ Original tensor mean: 9.713901
|
|
|
+ Converted tensor mean: -14.450338
|
|
|
+ Mean difference: 24.16423798
|
|
|
+ Maximum pointwise difference: 33.14313507
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 17.84219551, Converted: -15.30093956
|
|
|
+ Biggest difference in row (0, 0), sum 77.711205 vs -115.602707
|
|
|
+
|
|
|
+Layer 4, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 71.264816
|
|
|
+ Converted tensor sum: -87.184593
|
|
|
+ Original tensor mean: 8.908102
|
|
|
+ Converted tensor mean: -10.898074
|
|
|
+ Mean difference: 19.80617714
|
|
|
+ Maximum pointwise difference: 27.60903931
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 16.59056091, Converted: -11.01847839
|
|
|
+ Biggest difference in row (0, 0), sum 71.264816 vs -87.184593
|
|
|
+
|
|
|
+Layer 5, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 65.154488
|
|
|
+ Converted tensor sum: -20.586208
|
|
|
+ Original tensor mean: 8.144311
|
|
|
+ Converted tensor mean: -2.573276
|
|
|
+ Mean difference: 11.36003971
|
|
|
+ Maximum pointwise difference: 17.89420700
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 3.50937057, Converted: -14.38483620
|
|
|
+ Biggest difference in row (0, 0), sum 65.154488 vs -20.586208
|
|
|
+
|
|
|
+Layer 6, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 62.447323
|
|
|
+ Converted tensor sum: -39.734089
|
|
|
+ Original tensor mean: 7.805915
|
|
|
+ Converted tensor mean: -4.966761
|
|
|
+ Mean difference: 12.77267647
|
|
|
+ Maximum pointwise difference: 22.75133705
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 14.95188141, Converted: -7.79945612
|
|
|
+ Biggest difference in row (0, 0), sum 62.447323 vs -39.734089
|
|
|
+
|
|
|
+Layer 7, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 127.895920
|
|
|
+ Converted tensor sum: -184.804230
|
|
|
+ Original tensor mean: 15.986990
|
|
|
+ Converted tensor mean: -23.100529
|
|
|
+ Mean difference: 39.08751678
|
|
|
+ Maximum pointwise difference: 51.54846191
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 16.32706261, Converted: -35.22139740
|
|
|
+ Biggest difference in row (0, 0), sum 127.895920 vs -184.804230
|
|
|
+
|
|
|
+Layer 8, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 109.946281
|
|
|
+ Converted tensor sum: -183.545380
|
|
|
+ Original tensor mean: 13.743285
|
|
|
+ Converted tensor mean: -22.943172
|
|
|
+ Mean difference: 36.68645859
|
|
|
+ Maximum pointwise difference: 44.14192963
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 21.54407120, Converted: -22.59785843
|
|
|
+ Biggest difference in row (0, 0), sum 109.946281 vs -183.545380
|
|
|
+
|
|
|
+Layer 9, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 100.719040
|
|
|
+ Converted tensor sum: -189.035889
|
|
|
+ Original tensor mean: 12.589880
|
|
|
+ Converted tensor mean: -23.629486
|
|
|
+ Mean difference: 36.21936798
|
|
|
+ Maximum pointwise difference: 49.71876526
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 20.62917519, Converted: -29.08958817
|
|
|
+ Biggest difference in row (0, 0), sum 100.719040 vs -189.035889
|
|
|
+
|
|
|
+Layer 10, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 94.437965
|
|
|
+ Converted tensor sum: -184.073608
|
|
|
+ Original tensor mean: 11.804746
|
|
|
+ Converted tensor mean: -23.009201
|
|
|
+ Mean difference: 34.81394577
|
|
|
+ Maximum pointwise difference: 49.50559998
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 19.98403168, Converted: -29.52156830
|
|
|
+ Biggest difference in row (0, 0), sum 94.437965 vs -184.073608
|
|
|
+
|
|
|
+Layer 11, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 187.329086
|
|
|
+ Converted tensor sum: -525.129150
|
|
|
+ Original tensor mean: 23.416136
|
|
|
+ Converted tensor mean: -65.641144
|
|
|
+ Mean difference: 89.05728149
|
|
|
+ Maximum pointwise difference: 114.85643005
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 32.40055466, Converted: -82.45587158
|
|
|
+ Biggest difference in row (0, 0), sum 187.329086 vs -525.129150
|
|
|
+
|
|
|
+Layer 12, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 189.391296
|
|
|
+ Converted tensor sum: -524.645203
|
|
|
+ Original tensor mean: 23.673912
|
|
|
+ Converted tensor mean: -65.580650
|
|
|
+ Mean difference: 89.25456238
|
|
|
+ Maximum pointwise difference: 119.02915955
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 33.67853165, Converted: -85.35062408
|
|
|
+ Biggest difference in row (0, 0), sum 189.391296 vs -524.645203
|
|
|
+
|
|
|
+Layer 13, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 183.008652
|
|
|
+ Converted tensor sum: -545.134033
|
|
|
+ Original tensor mean: 22.876081
|
|
|
+ Converted tensor mean: -68.141754
|
|
|
+ Mean difference: 91.01783752
|
|
|
+ Maximum pointwise difference: 119.28398132
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 33.81208420, Converted: -85.47189331
|
|
|
+ Biggest difference in row (0, 0), sum 183.008652 vs -545.134033
|
|
|
+
|
|
|
+Layer 14, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 179.184265
|
|
|
+ Converted tensor sum: -590.197998
|
|
|
+ Original tensor mean: 22.398033
|
|
|
+ Converted tensor mean: -73.774750
|
|
|
+ Mean difference: 96.17278290
|
|
|
+ Maximum pointwise difference: 126.14685059
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 33.16656876, Converted: -92.98027802
|
|
|
+ Biggest difference in row (0, 0), sum 179.184265 vs -590.197998
|
|
|
+
|
|
|
+Layer 15, Token 15 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 315.300140
|
|
|
+ Converted tensor sum: -976.074097
|
|
|
+ Original tensor mean: 39.412518
|
|
|
+ Converted tensor mean: -122.009262
|
|
|
+ Mean difference: 161.42178345
|
|
|
+ Maximum pointwise difference: 201.52458191
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 52.86392212, Converted: -148.66065979
|
|
|
+ Biggest difference in row (0, 0), sum 315.300140 vs -976.074097
|
|
|
+
|
|
|
+Layer 0, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 12.044241
|
|
|
+ Converted tensor sum: 14.548074
|
|
|
+ Original tensor mean: 1.505530
|
|
|
+ Converted tensor mean: 1.818509
|
|
|
+ Mean difference: 3.51175261
|
|
|
+ Maximum pointwise difference: 7.44231224
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -4.31869221, Converted: 3.12362027
|
|
|
+ Biggest difference in row (0, 0), sum 12.044241 vs 14.548074
|
|
|
+
|
|
|
+Layer 1, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 7.660315
|
|
|
+ Converted tensor sum: 1.425261
|
|
|
+ Original tensor mean: 0.957539
|
|
|
+ Converted tensor mean: 0.178158
|
|
|
+ Mean difference: 4.00331783
|
|
|
+ Maximum pointwise difference: 8.79326248
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 3.55122566, Converted: -5.24203634
|
|
|
+ Biggest difference in row (0, 0), sum 7.660315 vs 1.425261
|
|
|
+
|
|
|
+Layer 2, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 5.985608
|
|
|
+ Converted tensor sum: -2.881522
|
|
|
+ Original tensor mean: 0.748201
|
|
|
+ Converted tensor mean: -0.360190
|
|
|
+ Mean difference: 6.00233269
|
|
|
+ Maximum pointwise difference: 9.75814056
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 3.30634618, Converted: -6.45179462
|
|
|
+ Biggest difference in row (0, 0), sum 5.985608 vs -2.881522
|
|
|
+
|
|
|
+Layer 3, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 66.644623
|
|
|
+ Converted tensor sum: 38.471397
|
|
|
+ Original tensor mean: 8.330578
|
|
|
+ Converted tensor mean: 4.808925
|
|
|
+ Mean difference: 5.99987411
|
|
|
+ Maximum pointwise difference: 11.70975304
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 11.37678432, Converted: -0.33296829
|
|
|
+ Biggest difference in row (0, 0), sum 66.644623 vs 38.471397
|
|
|
+
|
|
|
+Layer 4, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 55.084259
|
|
|
+ Converted tensor sum: 39.585022
|
|
|
+ Original tensor mean: 6.885532
|
|
|
+ Converted tensor mean: 4.948128
|
|
|
+ Mean difference: 5.54818344
|
|
|
+ Maximum pointwise difference: 10.42512989
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 8.96806908, Converted: -1.45706093
|
|
|
+ Biggest difference in row (0, 0), sum 55.084259 vs 39.585022
|
|
|
+
|
|
|
+Layer 5, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 47.768257
|
|
|
+ Converted tensor sum: 29.551674
|
|
|
+ Original tensor mean: 5.971032
|
|
|
+ Converted tensor mean: 3.693959
|
|
|
+ Mean difference: 5.40017319
|
|
|
+ Maximum pointwise difference: 11.83149147
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 9.62209320, Converted: -2.20939875
|
|
|
+ Biggest difference in row (0, 0), sum 47.768257 vs 29.551674
|
|
|
+
|
|
|
+Layer 6, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 47.378487
|
|
|
+ Converted tensor sum: 33.471664
|
|
|
+ Original tensor mean: 5.922311
|
|
|
+ Converted tensor mean: 4.183958
|
|
|
+ Mean difference: 5.35756683
|
|
|
+ Maximum pointwise difference: 11.70071220
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 10.01993370, Converted: -1.68077850
|
|
|
+ Biggest difference in row (0, 0), sum 47.378487 vs 33.471664
|
|
|
+
|
|
|
+Layer 7, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 121.329849
|
|
|
+ Converted tensor sum: 101.072693
|
|
|
+ Original tensor mean: 15.166231
|
|
|
+ Converted tensor mean: 12.634087
|
|
|
+ Mean difference: 4.85845757
|
|
|
+ Maximum pointwise difference: 11.92098331
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 18.39835739, Converted: 6.47737408
|
|
|
+ Biggest difference in row (0, 0), sum 121.329849 vs 101.072693
|
|
|
+
|
|
|
+Layer 8, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 105.626358
|
|
|
+ Converted tensor sum: 92.869370
|
|
|
+ Original tensor mean: 13.203295
|
|
|
+ Converted tensor mean: 11.608671
|
|
|
+ Mean difference: 5.01301622
|
|
|
+ Maximum pointwise difference: 11.09072685
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 15.02331066, Converted: 3.93258405
|
|
|
+ Biggest difference in row (0, 0), sum 105.626358 vs 92.869370
|
|
|
+
|
|
|
+Layer 9, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 94.886589
|
|
|
+ Converted tensor sum: 86.461792
|
|
|
+ Original tensor mean: 11.860824
|
|
|
+ Converted tensor mean: 10.807724
|
|
|
+ Mean difference: 5.16425228
|
|
|
+ Maximum pointwise difference: 10.79585648
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 2.03169847, Converted: 12.82755470
|
|
|
+ Biggest difference in row (0, 0), sum 94.886589 vs 86.461792
|
|
|
+
|
|
|
+Layer 10, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 93.657555
|
|
|
+ Converted tensor sum: 77.932861
|
|
|
+ Original tensor mean: 11.707194
|
|
|
+ Converted tensor mean: 9.741608
|
|
|
+ Mean difference: 5.07010078
|
|
|
+ Maximum pointwise difference: 11.53797054
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 13.33782196, Converted: 1.79985178
|
|
|
+ Biggest difference in row (0, 0), sum 93.657555 vs 77.932861
|
|
|
+
|
|
|
+Layer 11, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 186.086578
|
|
|
+ Converted tensor sum: 176.759811
|
|
|
+ Original tensor mean: 23.260822
|
|
|
+ Converted tensor mean: 22.094976
|
|
|
+ Mean difference: 4.87584686
|
|
|
+ Maximum pointwise difference: 10.12077332
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 14.05643463, Converted: 24.17720795
|
|
|
+ Biggest difference in row (0, 0), sum 186.086578 vs 176.759811
|
|
|
+
|
|
|
+Layer 12, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 188.253220
|
|
|
+ Converted tensor sum: 173.150467
|
|
|
+ Original tensor mean: 23.531652
|
|
|
+ Converted tensor mean: 21.643808
|
|
|
+ Mean difference: 5.08278847
|
|
|
+ Maximum pointwise difference: 9.91738033
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 14.99966526, Converted: 24.91704559
|
|
|
+ Biggest difference in row (0, 0), sum 188.253220 vs 173.150467
|
|
|
+
|
|
|
+Layer 13, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 181.761749
|
|
|
+ Converted tensor sum: 171.658249
|
|
|
+ Original tensor mean: 22.720219
|
|
|
+ Converted tensor mean: 21.457281
|
|
|
+ Mean difference: 4.79229736
|
|
|
+ Maximum pointwise difference: 9.82627106
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 14.16268539, Converted: 23.98895645
|
|
|
+ Biggest difference in row (0, 0), sum 181.761749 vs 171.658249
|
|
|
+
|
|
|
+Layer 14, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 176.198990
|
|
|
+ Converted tensor sum: 170.420898
|
|
|
+ Original tensor mean: 22.024874
|
|
|
+ Converted tensor mean: 21.302612
|
|
|
+ Mean difference: 4.28427029
|
|
|
+ Maximum pointwise difference: 9.05801964
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 13.50310326, Converted: 22.56112289
|
|
|
+ Biggest difference in row (0, 0), sum 176.198990 vs 170.420898
|
|
|
+
|
|
|
+Layer 15, Token 16 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 314.888916
|
|
|
+ Converted tensor sum: 308.839905
|
|
|
+ Original tensor mean: 39.361115
|
|
|
+ Converted tensor mean: 38.604988
|
|
|
+ Mean difference: 4.36002254
|
|
|
+ Maximum pointwise difference: 9.44413185
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 25.14219856, Converted: 34.58633041
|
|
|
+ Biggest difference in row (0, 0), sum 314.888916 vs 308.839905
|
|
|
+
|
|
|
+Layer 0, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 6.615214
|
|
|
+ Converted tensor sum: -14.476066
|
|
|
+ Original tensor mean: 0.826902
|
|
|
+ Converted tensor mean: -1.809508
|
|
|
+ Mean difference: 4.01758480
|
|
|
+ Maximum pointwise difference: 12.95696259
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 8.16467762, Converted: -4.79228544
|
|
|
+ Biggest difference in row (0, 0), sum 6.615214 vs -14.476066
|
|
|
+
|
|
|
+Layer 1, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 4.332821
|
|
|
+ Converted tensor sum: -48.476418
|
|
|
+ Original tensor mean: 0.541603
|
|
|
+ Converted tensor mean: -6.059552
|
|
|
+ Mean difference: 8.00736046
|
|
|
+ Maximum pointwise difference: 13.83443928
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 7.88728952, Converted: -5.94714975
|
|
|
+ Biggest difference in row (0, 0), sum 4.332821 vs -48.476418
|
|
|
+
|
|
|
+Layer 2, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 13.631664
|
|
|
+ Converted tensor sum: -24.375608
|
|
|
+ Original tensor mean: 1.703958
|
|
|
+ Converted tensor mean: -3.046951
|
|
|
+ Mean difference: 9.48411465
|
|
|
+ Maximum pointwise difference: 15.28743267
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 2.43811703, Converted: -12.84931564
|
|
|
+ Biggest difference in row (0, 0), sum 13.631664 vs -24.375608
|
|
|
+
|
|
|
+Layer 3, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 59.143936
|
|
|
+ Converted tensor sum: -80.541725
|
|
|
+ Original tensor mean: 7.392992
|
|
|
+ Converted tensor mean: -10.067716
|
|
|
+ Mean difference: 17.46070862
|
|
|
+ Maximum pointwise difference: 28.83273697
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 9.60771275, Converted: -19.22502327
|
|
|
+ Biggest difference in row (0, 0), sum 59.143936 vs -80.541725
|
|
|
+
|
|
|
+Layer 4, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 51.750626
|
|
|
+ Converted tensor sum: -81.567123
|
|
|
+ Original tensor mean: 6.468828
|
|
|
+ Converted tensor mean: -10.195890
|
|
|
+ Mean difference: 17.13005066
|
|
|
+ Maximum pointwise difference: 30.73341751
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 9.45896626, Converted: -21.27445221
|
|
|
+ Biggest difference in row (0, 0), sum 51.750626 vs -81.567123
|
|
|
+
|
|
|
+Layer 5, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 33.377792
|
|
|
+ Converted tensor sum: -8.966677
|
|
|
+ Original tensor mean: 4.172224
|
|
|
+ Converted tensor mean: -1.120835
|
|
|
+ Mean difference: 11.87618256
|
|
|
+ Maximum pointwise difference: 19.17303848
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 0.59302533, Converted: 19.76606369
|
|
|
+ Biggest difference in row (0, 0), sum 33.377792 vs -8.966677
|
|
|
+
|
|
|
+Layer 6, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 34.373646
|
|
|
+ Converted tensor sum: -17.893101
|
|
|
+ Original tensor mean: 4.296706
|
|
|
+ Converted tensor mean: -2.236638
|
|
|
+ Mean difference: 12.44108009
|
|
|
+ Maximum pointwise difference: 21.66391373
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 7.57746935, Converted: -14.08644485
|
|
|
+ Biggest difference in row (0, 0), sum 34.373646 vs -17.893101
|
|
|
+
|
|
|
+Layer 7, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 117.899002
|
|
|
+ Converted tensor sum: -60.493092
|
|
|
+ Original tensor mean: 14.737375
|
|
|
+ Converted tensor mean: -7.561636
|
|
|
+ Mean difference: 22.75322723
|
|
|
+ Maximum pointwise difference: 41.73314667
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 20.46781158, Converted: -21.26533699
|
|
|
+ Biggest difference in row (0, 0), sum 117.899002 vs -60.493092
|
|
|
+
|
|
|
+Layer 8, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 102.151550
|
|
|
+ Converted tensor sum: -53.178627
|
|
|
+ Original tensor mean: 12.768944
|
|
|
+ Converted tensor mean: -6.647328
|
|
|
+ Mean difference: 21.35518456
|
|
|
+ Maximum pointwise difference: 40.89769745
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 18.52126884, Converted: -22.37642860
|
|
|
+ Biggest difference in row (0, 0), sum 102.151550 vs -53.178627
|
|
|
+
|
|
|
+Layer 9, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 90.451920
|
|
|
+ Converted tensor sum: -34.497658
|
|
|
+ Original tensor mean: 11.306490
|
|
|
+ Converted tensor mean: -4.312207
|
|
|
+ Mean difference: 18.82321548
|
|
|
+ Maximum pointwise difference: 37.83747864
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 18.08675385, Converted: -19.75072479
|
|
|
+ Biggest difference in row (0, 0), sum 90.451920 vs -34.497658
|
|
|
+
|
|
|
+Layer 10, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 87.881783
|
|
|
+ Converted tensor sum: -25.459152
|
|
|
+ Original tensor mean: 10.985223
|
|
|
+ Converted tensor mean: -3.182394
|
|
|
+ Mean difference: 17.43336678
|
|
|
+ Maximum pointwise difference: 35.29803467
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 17.46567726, Converted: -17.83235931
|
|
|
+ Biggest difference in row (0, 0), sum 87.881783 vs -25.459152
|
|
|
+
|
|
|
+Layer 11, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 185.306732
|
|
|
+ Converted tensor sum: -264.026886
|
|
|
+ Original tensor mean: 23.163342
|
|
|
+ Converted tensor mean: -33.003361
|
|
|
+ Mean difference: 56.16670227
|
|
|
+ Maximum pointwise difference: 73.40274048
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 27.15820312, Converted: -46.24454117
|
|
|
+ Biggest difference in row (0, 0), sum 185.306732 vs -264.026886
|
|
|
+
|
|
|
+Layer 12, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 186.018799
|
|
|
+ Converted tensor sum: -238.738007
|
|
|
+ Original tensor mean: 23.252350
|
|
|
+ Converted tensor mean: -29.842251
|
|
|
+ Mean difference: 53.09460068
|
|
|
+ Maximum pointwise difference: 71.14258575
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 27.12987900, Converted: -44.01270676
|
|
|
+ Biggest difference in row (0, 0), sum 186.018799 vs -238.738007
|
|
|
+
|
|
|
+Layer 13, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 178.633179
|
|
|
+ Converted tensor sum: -250.662323
|
|
|
+ Original tensor mean: 22.329147
|
|
|
+ Converted tensor mean: -31.332790
|
|
|
+ Mean difference: 53.66194153
|
|
|
+ Maximum pointwise difference: 72.33184814
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 22.06610680, Converted: -50.26573944
|
|
|
+ Biggest difference in row (0, 0), sum 178.633179 vs -250.662323
|
|
|
+
|
|
|
+Layer 14, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 171.761902
|
|
|
+ Converted tensor sum: -301.707916
|
|
|
+ Original tensor mean: 21.470238
|
|
|
+ Converted tensor mean: -37.713490
|
|
|
+ Mean difference: 59.18372726
|
|
|
+ Maximum pointwise difference: 84.33922577
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 21.43586349, Converted: -62.90336227
|
|
|
+ Biggest difference in row (0, 0), sum 171.761902 vs -301.707916
|
|
|
+
|
|
|
+Layer 15, Token 17 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 313.503632
|
|
|
+ Converted tensor sum: -672.745667
|
|
|
+ Original tensor mean: 39.187954
|
|
|
+ Converted tensor mean: -84.093208
|
|
|
+ Mean difference: 123.28115845
|
|
|
+ Maximum pointwise difference: 153.27690125
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 38.26152039, Converted: -115.01538086
|
|
|
+ Biggest difference in row (0, 0), sum 313.503632 vs -672.745667
|
|
|
+
|
|
|
+Layer 0, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 37.370514
|
|
|
+ Converted tensor sum: 2.800200
|
|
|
+ Original tensor mean: 4.671314
|
|
|
+ Converted tensor mean: 0.350025
|
|
|
+ Mean difference: 5.54810905
|
|
|
+ Maximum pointwise difference: 9.22967339
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 9.28797436, Converted: 0.05830121
|
|
|
+ Biggest difference in row (0, 0), sum 37.370514 vs 2.800200
|
|
|
+
|
|
|
+Layer 1, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 27.386568
|
|
|
+ Converted tensor sum: -8.815313
|
|
|
+ Original tensor mean: 3.423321
|
|
|
+ Converted tensor mean: -1.101914
|
|
|
+ Mean difference: 5.46173763
|
|
|
+ Maximum pointwise difference: 11.23313618
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 7.72619963, Converted: -3.50693655
|
|
|
+ Biggest difference in row (0, 0), sum 27.386568 vs -8.815313
|
|
|
+
|
|
|
+Layer 2, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 22.950966
|
|
|
+ Converted tensor sum: -26.951405
|
|
|
+ Original tensor mean: 2.868871
|
|
|
+ Converted tensor mean: -3.368926
|
|
|
+ Mean difference: 7.41814232
|
|
|
+ Maximum pointwise difference: 14.15112782
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 7.93941879, Converted: -6.21170902
|
|
|
+ Biggest difference in row (0, 0), sum 22.950966 vs -26.951405
|
|
|
+
|
|
|
+Layer 3, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 75.358887
|
|
|
+ Converted tensor sum: -194.584152
|
|
|
+ Original tensor mean: 9.419861
|
|
|
+ Converted tensor mean: -24.323019
|
|
|
+ Mean difference: 33.74287796
|
|
|
+ Maximum pointwise difference: 39.03241730
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 16.72500801, Converted: -22.30740929
|
|
|
+ Biggest difference in row (0, 0), sum 75.358887 vs -194.584152
|
|
|
+
|
|
|
+Layer 4, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 63.885963
|
|
|
+ Converted tensor sum: -193.801666
|
|
|
+ Original tensor mean: 7.985745
|
|
|
+ Converted tensor mean: -24.225208
|
|
|
+ Mean difference: 32.21095276
|
|
|
+ Maximum pointwise difference: 39.04253769
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 15.83776665, Converted: -23.20477104
|
|
|
+ Biggest difference in row (0, 0), sum 63.885963 vs -193.801666
|
|
|
+
|
|
|
+Layer 5, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 51.427219
|
|
|
+ Converted tensor sum: -189.920349
|
|
|
+ Original tensor mean: 6.428402
|
|
|
+ Converted tensor mean: -23.740044
|
|
|
+ Mean difference: 30.16844559
|
|
|
+ Maximum pointwise difference: 35.64602280
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 8.10052967, Converted: -27.54549408
|
|
|
+ Biggest difference in row (0, 0), sum 51.427219 vs -189.920349
|
|
|
+
|
|
|
+Layer 6, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 52.837097
|
|
|
+ Converted tensor sum: -237.793671
|
|
|
+ Original tensor mean: 6.604637
|
|
|
+ Converted tensor mean: -29.724209
|
|
|
+ Mean difference: 36.32884598
|
|
|
+ Maximum pointwise difference: 41.40105438
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 7.84163952, Converted: -33.55941391
|
|
|
+ Biggest difference in row (0, 0), sum 52.837097 vs -237.793671
|
|
|
+
|
|
|
+Layer 7, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 129.848618
|
|
|
+ Converted tensor sum: -405.475128
|
|
|
+ Original tensor mean: 16.231077
|
|
|
+ Converted tensor mean: -50.684391
|
|
|
+ Mean difference: 66.91546631
|
|
|
+ Maximum pointwise difference: 75.46723938
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 22.33297348, Converted: -53.13426590
|
|
|
+ Biggest difference in row (0, 0), sum 129.848618 vs -405.475128
|
|
|
+
|
|
|
+Layer 8, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 112.813950
|
|
|
+ Converted tensor sum: -388.213379
|
|
|
+ Original tensor mean: 14.101744
|
|
|
+ Converted tensor mean: -48.526672
|
|
|
+ Mean difference: 62.62841415
|
|
|
+ Maximum pointwise difference: 74.58121490
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 20.05025291, Converted: -54.53096390
|
|
|
+ Biggest difference in row (0, 0), sum 112.813950 vs -388.213379
|
|
|
+
|
|
|
+Layer 9, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 98.625351
|
|
|
+ Converted tensor sum: -428.683411
|
|
|
+ Original tensor mean: 12.328169
|
|
|
+ Converted tensor mean: -53.585426
|
|
|
+ Mean difference: 65.91359711
|
|
|
+ Maximum pointwise difference: 78.76679230
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 5.12599134, Converted: -73.64080048
|
|
|
+ Biggest difference in row (0, 0), sum 98.625351 vs -428.683411
|
|
|
+
|
|
|
+Layer 10, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 93.009445
|
|
|
+ Converted tensor sum: -432.554626
|
|
|
+ Original tensor mean: 11.626181
|
|
|
+ Converted tensor mean: -54.069328
|
|
|
+ Mean difference: 65.69551086
|
|
|
+ Maximum pointwise difference: 76.13760376
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 4.86473036, Converted: -71.27287292
|
|
|
+ Biggest difference in row (0, 0), sum 93.009445 vs -432.554626
|
|
|
+
|
|
|
+Layer 11, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 188.645950
|
|
|
+ Converted tensor sum: -772.146790
|
|
|
+ Original tensor mean: 23.580744
|
|
|
+ Converted tensor mean: -96.518349
|
|
|
+ Mean difference: 120.09909058
|
|
|
+ Maximum pointwise difference: 140.57998657
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 17.72886276, Converted: -122.85112000
|
|
|
+ Biggest difference in row (0, 0), sum 188.645950 vs -772.146790
|
|
|
+
|
|
|
+Layer 12, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 191.028870
|
|
|
+ Converted tensor sum: -781.472900
|
|
|
+ Original tensor mean: 23.878609
|
|
|
+ Converted tensor mean: -97.684113
|
|
|
+ Mean difference: 121.56272888
|
|
|
+ Maximum pointwise difference: 143.88111877
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 18.69833946, Converted: -125.18278503
|
|
|
+ Biggest difference in row (0, 0), sum 191.028870 vs -781.472900
|
|
|
+
|
|
|
+Layer 13, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 183.829086
|
|
|
+ Converted tensor sum: -808.856689
|
|
|
+ Original tensor mean: 22.978636
|
|
|
+ Converted tensor mean: -101.107086
|
|
|
+ Mean difference: 124.08572388
|
|
|
+ Maximum pointwise difference: 147.60656738
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 18.44003105, Converted: -129.16653442
|
|
|
+ Biggest difference in row (0, 0), sum 183.829086 vs -808.856689
|
|
|
+
|
|
|
+Layer 14, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 177.643005
|
|
|
+ Converted tensor sum: -844.687622
|
|
|
+ Original tensor mean: 22.205376
|
|
|
+ Converted tensor mean: -105.585953
|
|
|
+ Mean difference: 127.79132843
|
|
|
+ Maximum pointwise difference: 148.00994873
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 17.69933319, Converted: -130.31060791
|
|
|
+ Biggest difference in row (0, 0), sum 177.643005 vs -844.687622
|
|
|
+
|
|
|
+Layer 15, Token 18 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 320.725769
|
|
|
+ Converted tensor sum: -1234.242676
|
|
|
+ Original tensor mean: 40.090721
|
|
|
+ Converted tensor mean: -154.280334
|
|
|
+ Mean difference: 194.37104797
|
|
|
+ Maximum pointwise difference: 225.51652527
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 39.54684830, Converted: -185.96968079
|
|
|
+ Biggest difference in row (0, 0), sum 320.725769 vs -1234.242676
|
|
|
+
|
|
|
+Layer 0, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -9.932329
|
|
|
+ Converted tensor sum: -1.418950
|
|
|
+ Original tensor mean: -1.241541
|
|
|
+ Converted tensor mean: -0.177369
|
|
|
+ Mean difference: 1.91613591
|
|
|
+ Maximum pointwise difference: 5.37744808
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -4.92564631, Converted: 0.45180166
|
|
|
+ Biggest difference in row (0, 0), sum -9.932329 vs -1.418950
|
|
|
+
|
|
|
+Layer 1, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -28.079020
|
|
|
+ Converted tensor sum: 4.360578
|
|
|
+ Original tensor mean: -3.509877
|
|
|
+ Converted tensor mean: 0.545072
|
|
|
+ Mean difference: 4.81566954
|
|
|
+ Maximum pointwise difference: 12.93084526
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -11.63085365, Converted: 1.29999185
|
|
|
+ Biggest difference in row (0, 0), sum -28.079020 vs 4.360578
|
|
|
+
|
|
|
+Layer 2, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -9.719646
|
|
|
+ Converted tensor sum: 14.192688
|
|
|
+ Original tensor mean: -1.214956
|
|
|
+ Converted tensor mean: 1.774086
|
|
|
+ Mean difference: 5.83081627
|
|
|
+ Maximum pointwise difference: 15.01737213
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -10.13109303, Converted: 4.88627911
|
|
|
+ Biggest difference in row (0, 0), sum -9.719646 vs 14.192688
|
|
|
+
|
|
|
+Layer 3, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -78.071198
|
|
|
+ Converted tensor sum: 44.287003
|
|
|
+ Original tensor mean: -9.758900
|
|
|
+ Converted tensor mean: 5.535875
|
|
|
+ Mean difference: 15.29477501
|
|
|
+ Maximum pointwise difference: 25.90341759
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -17.29398918, Converted: 8.60942841
|
|
|
+ Biggest difference in row (0, 0), sum -78.071198 vs 44.287003
|
|
|
+
|
|
|
+Layer 4, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -17.936802
|
|
|
+ Converted tensor sum: 43.255585
|
|
|
+ Original tensor mean: -2.242100
|
|
|
+ Converted tensor mean: 5.406948
|
|
|
+ Mean difference: 9.52408981
|
|
|
+ Maximum pointwise difference: 16.11044312
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -8.10731792, Converted: 8.00312424
|
|
|
+ Biggest difference in row (0, 0), sum -17.936802 vs 43.255585
|
|
|
+
|
|
|
+Layer 5, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 14.270342
|
|
|
+ Converted tensor sum: 40.868690
|
|
|
+ Original tensor mean: 1.783793
|
|
|
+ Converted tensor mean: 5.108586
|
|
|
+ Mean difference: 6.39925480
|
|
|
+ Maximum pointwise difference: 13.00582123
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -10.49264050, Converted: 2.51318097
|
|
|
+ Biggest difference in row (0, 0), sum 14.270342 vs 40.868690
|
|
|
+
|
|
|
+Layer 6, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 8.770991
|
|
|
+ Converted tensor sum: 44.250122
|
|
|
+ Original tensor mean: 1.096374
|
|
|
+ Converted tensor mean: 5.531265
|
|
|
+ Mean difference: 7.05475235
|
|
|
+ Maximum pointwise difference: 14.57606697
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -11.80261707, Converted: 2.77344990
|
|
|
+ Biggest difference in row (0, 0), sum 8.770991 vs 44.250122
|
|
|
+
|
|
|
+Layer 7, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 27.567080
|
|
|
+ Converted tensor sum: 110.976578
|
|
|
+ Original tensor mean: 3.445885
|
|
|
+ Converted tensor mean: 13.872072
|
|
|
+ Mean difference: 11.90625381
|
|
|
+ Maximum pointwise difference: 20.18301392
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -9.75880718, Converted: 10.42420769
|
|
|
+ Biggest difference in row (0, 0), sum 27.567080 vs 110.976578
|
|
|
+
|
|
|
+Layer 8, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 12.723747
|
|
|
+ Converted tensor sum: 112.570312
|
|
|
+ Original tensor mean: 1.590468
|
|
|
+ Converted tensor mean: 14.071289
|
|
|
+ Mean difference: 12.89592552
|
|
|
+ Maximum pointwise difference: 20.84409904
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -12.16371441, Converted: 8.68038464
|
|
|
+ Biggest difference in row (0, 0), sum 12.723747 vs 112.570312
|
|
|
+
|
|
|
+Layer 9, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 10.056442
|
|
|
+ Converted tensor sum: 106.334442
|
|
|
+ Original tensor mean: 1.257055
|
|
|
+ Converted tensor mean: 13.291805
|
|
|
+ Mean difference: 12.47594643
|
|
|
+ Maximum pointwise difference: 22.08431053
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -14.35114861, Converted: 7.73316193
|
|
|
+ Biggest difference in row (0, 0), sum 10.056442 vs 106.334442
|
|
|
+
|
|
|
+Layer 10, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -1.989794
|
|
|
+ Converted tensor sum: 99.182007
|
|
|
+ Original tensor mean: -0.248724
|
|
|
+ Converted tensor mean: 12.397751
|
|
|
+ Mean difference: 13.17310143
|
|
|
+ Maximum pointwise difference: 24.05181694
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -17.15268326, Converted: 6.89913368
|
|
|
+ Biggest difference in row (0, 0), sum -1.989794 vs 99.182007
|
|
|
+
|
|
|
+Layer 11, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 67.349617
|
|
|
+ Converted tensor sum: 188.920929
|
|
|
+ Original tensor mean: 8.418702
|
|
|
+ Converted tensor mean: 23.615116
|
|
|
+ Mean difference: 15.19641399
|
|
|
+ Maximum pointwise difference: 29.17947769
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -10.99394608, Converted: 18.18553162
|
|
|
+ Biggest difference in row (0, 0), sum 67.349617 vs 188.920929
|
|
|
+
|
|
|
+Layer 12, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 65.645859
|
|
|
+ Converted tensor sum: 187.996002
|
|
|
+ Original tensor mean: 8.205732
|
|
|
+ Converted tensor mean: 23.499500
|
|
|
+ Mean difference: 15.29376984
|
|
|
+ Maximum pointwise difference: 29.97419739
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -11.02998257, Converted: 18.94421577
|
|
|
+ Biggest difference in row (0, 0), sum 65.645859 vs 187.996002
|
|
|
+
|
|
|
+Layer 13, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 62.775318
|
|
|
+ Converted tensor sum: 186.939407
|
|
|
+ Original tensor mean: 7.846915
|
|
|
+ Converted tensor mean: 23.367426
|
|
|
+ Mean difference: 15.52051163
|
|
|
+ Maximum pointwise difference: 30.78374863
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -11.59408474, Converted: 19.18966293
|
|
|
+ Biggest difference in row (0, 0), sum 62.775318 vs 186.939407
|
|
|
+
|
|
|
+Layer 14, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 66.572449
|
|
|
+ Converted tensor sum: 192.538483
|
|
|
+ Original tensor mean: 8.321556
|
|
|
+ Converted tensor mean: 24.067310
|
|
|
+ Mean difference: 15.74575615
|
|
|
+ Maximum pointwise difference: 32.26174927
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -11.68817997, Converted: 20.57357025
|
|
|
+ Biggest difference in row (0, 0), sum 66.572449 vs 192.538483
|
|
|
+
|
|
|
+Layer 15, Token 19 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 224.145126
|
|
|
+ Converted tensor sum: 325.050964
|
|
|
+ Original tensor mean: 28.018141
|
|
|
+ Converted tensor mean: 40.631371
|
|
|
+ Mean difference: 13.45689964
|
|
|
+ Maximum pointwise difference: 28.69198799
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 9.77963829, Converted: 38.47162628
|
|
|
+ Biggest difference in row (0, 0), sum 224.145126 vs 325.050964
|
|
|
+
|
|
|
+Layer 0, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -29.569780
|
|
|
+ Converted tensor sum: 10.794893
|
|
|
+ Original tensor mean: -3.696223
|
|
|
+ Converted tensor mean: 1.349362
|
|
|
+ Mean difference: 6.44896221
|
|
|
+ Maximum pointwise difference: 13.91718292
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -9.61637592, Converted: 4.30080748
|
|
|
+ Biggest difference in row (0, 0), sum -29.569780 vs 10.794893
|
|
|
+
|
|
|
+Layer 1, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 1.025735
|
|
|
+ Converted tensor sum: 6.199029
|
|
|
+ Original tensor mean: 0.128217
|
|
|
+ Converted tensor mean: 0.774879
|
|
|
+ Mean difference: 7.59240437
|
|
|
+ Maximum pointwise difference: 14.00857544
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -9.50434303, Converted: 4.50423241
|
|
|
+ Biggest difference in row (0, 0), sum 1.025735 vs 6.199029
|
|
|
+
|
|
|
+Layer 2, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 17.293440
|
|
|
+ Converted tensor sum: 7.479863
|
|
|
+ Original tensor mean: 2.161680
|
|
|
+ Converted tensor mean: 0.934983
|
|
|
+ Mean difference: 7.88275719
|
|
|
+ Maximum pointwise difference: 14.18584061
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -8.63929176, Converted: 5.54654837
|
|
|
+ Biggest difference in row (0, 0), sum 17.293440 vs 7.479863
|
|
|
+
|
|
|
+Layer 3, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 36.610168
|
|
|
+ Converted tensor sum: 49.467545
|
|
|
+ Original tensor mean: 4.576271
|
|
|
+ Converted tensor mean: 6.183443
|
|
|
+ Mean difference: 6.93841553
|
|
|
+ Maximum pointwise difference: 18.18937302
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -5.24265194, Converted: 12.94672108
|
|
|
+ Biggest difference in row (0, 0), sum 36.610168 vs 49.467545
|
|
|
+
|
|
|
+Layer 4, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 29.254171
|
|
|
+ Converted tensor sum: 47.750710
|
|
|
+ Original tensor mean: 3.656771
|
|
|
+ Converted tensor mean: 5.968839
|
|
|
+ Mean difference: 7.21544361
|
|
|
+ Maximum pointwise difference: 18.54884338
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -5.63391066, Converted: 12.91493225
|
|
|
+ Biggest difference in row (0, 0), sum 29.254171 vs 47.750710
|
|
|
+
|
|
|
+Layer 5, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 35.151703
|
|
|
+ Converted tensor sum: 48.878067
|
|
|
+ Original tensor mean: 4.393963
|
|
|
+ Converted tensor mean: 6.109758
|
|
|
+ Mean difference: 6.99968100
|
|
|
+ Maximum pointwise difference: 14.96766090
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -3.65913010, Converted: 11.30853081
|
|
|
+ Biggest difference in row (0, 0), sum 35.151703 vs 48.878067
|
|
|
+
|
|
|
+Layer 6, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 30.034544
|
|
|
+ Converted tensor sum: 47.318748
|
|
|
+ Original tensor mean: 3.754318
|
|
|
+ Converted tensor mean: 5.914844
|
|
|
+ Mean difference: 7.24886227
|
|
|
+ Maximum pointwise difference: 14.76261425
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -3.74199128, Converted: 11.02062321
|
|
|
+ Biggest difference in row (0, 0), sum 30.034544 vs 47.318748
|
|
|
+
|
|
|
+Layer 7, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 93.501678
|
|
|
+ Converted tensor sum: 109.843590
|
|
|
+ Original tensor mean: 11.687710
|
|
|
+ Converted tensor mean: 13.730449
|
|
|
+ Mean difference: 6.95008612
|
|
|
+ Maximum pointwise difference: 15.00504684
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 6.03040743, Converted: 21.03545380
|
|
|
+ Biggest difference in row (0, 0), sum 93.501678 vs 109.843590
|
|
|
+
|
|
|
+Layer 8, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 79.472687
|
|
|
+ Converted tensor sum: 102.823357
|
|
|
+ Original tensor mean: 9.934086
|
|
|
+ Converted tensor mean: 12.852920
|
|
|
+ Mean difference: 7.54766369
|
|
|
+ Maximum pointwise difference: 16.02755737
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 5.07132435, Converted: 21.09888077
|
|
|
+ Biggest difference in row (0, 0), sum 79.472687 vs 102.823357
|
|
|
+
|
|
|
+Layer 9, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 67.706139
|
|
|
+ Converted tensor sum: 99.777931
|
|
|
+ Original tensor mean: 8.463267
|
|
|
+ Converted tensor mean: 12.472241
|
|
|
+ Mean difference: 8.86232471
|
|
|
+ Maximum pointwise difference: 16.78725052
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 4.42850208, Converted: 21.21575165
|
|
|
+ Biggest difference in row (0, 0), sum 67.706139 vs 99.777931
|
|
|
+
|
|
|
+Layer 10, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 63.760403
|
|
|
+ Converted tensor sum: 96.691109
|
|
|
+ Original tensor mean: 7.970050
|
|
|
+ Converted tensor mean: 12.086389
|
|
|
+ Mean difference: 9.02034378
|
|
|
+ Maximum pointwise difference: 16.24016762
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 4.14136124, Converted: 20.38152885
|
|
|
+ Biggest difference in row (0, 0), sum 63.760403 vs 96.691109
|
|
|
+
|
|
|
+Layer 11, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 158.635681
|
|
|
+ Converted tensor sum: 194.330322
|
|
|
+ Original tensor mean: 19.829460
|
|
|
+ Converted tensor mean: 24.291290
|
|
|
+ Mean difference: 8.75148964
|
|
|
+ Maximum pointwise difference: 16.25316620
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 15.08195591, Converted: 31.33512306
|
|
|
+ Biggest difference in row (0, 0), sum 158.635681 vs 194.330322
|
|
|
+
|
|
|
+Layer 12, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 159.106079
|
|
|
+ Converted tensor sum: 194.084503
|
|
|
+ Original tensor mean: 19.888260
|
|
|
+ Converted tensor mean: 24.260563
|
|
|
+ Mean difference: 8.75931835
|
|
|
+ Maximum pointwise difference: 16.29665756
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 14.28990650, Converted: 30.58656502
|
|
|
+ Biggest difference in row (0, 0), sum 159.106079 vs 194.084503
|
|
|
+
|
|
|
+Layer 13, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 153.442200
|
|
|
+ Converted tensor sum: 186.870270
|
|
|
+ Original tensor mean: 19.180275
|
|
|
+ Converted tensor mean: 23.358784
|
|
|
+ Mean difference: 8.66864204
|
|
|
+ Maximum pointwise difference: 15.29904747
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 14.04843903, Converted: 29.34748650
|
|
|
+ Biggest difference in row (0, 0), sum 153.442200 vs 186.870270
|
|
|
+
|
|
|
+Layer 14, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 147.691605
|
|
|
+ Converted tensor sum: 175.338470
|
|
|
+ Original tensor mean: 18.461451
|
|
|
+ Converted tensor mean: 21.917309
|
|
|
+ Mean difference: 8.84063625
|
|
|
+ Maximum pointwise difference: 15.63497734
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 13.52752876, Converted: 29.16250610
|
|
|
+ Biggest difference in row (0, 0), sum 147.691605 vs 175.338470
|
|
|
+
|
|
|
+Layer 15, Token 20 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 294.136749
|
|
|
+ Converted tensor sum: 310.250946
|
|
|
+ Original tensor mean: 36.767094
|
|
|
+ Converted tensor mean: 38.781368
|
|
|
+ Mean difference: 9.18845367
|
|
|
+ Maximum pointwise difference: 14.23109627
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 30.77650642, Converted: 45.00760269
|
|
|
+ Biggest difference in row (0, 0), sum 294.136749 vs 310.250946
|
|
|
+
|
|
|
+Layer 0, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -18.838482
|
|
|
+ Converted tensor sum: -1.325968
|
|
|
+ Original tensor mean: -2.354810
|
|
|
+ Converted tensor mean: -0.165746
|
|
|
+ Mean difference: 2.79272628
|
|
|
+ Maximum pointwise difference: 6.95248222
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -6.02015686, Converted: 0.93232512
|
|
|
+ Biggest difference in row (0, 0), sum -18.838482 vs -1.325968
|
|
|
+
|
|
|
+Layer 1, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -6.250936
|
|
|
+ Converted tensor sum: -2.277201
|
|
|
+ Original tensor mean: -0.781367
|
|
|
+ Converted tensor mean: -0.284650
|
|
|
+ Mean difference: 5.05594349
|
|
|
+ Maximum pointwise difference: 9.99544907
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 6.23908186, Converted: -3.75636768
|
|
|
+ Biggest difference in row (0, 0), sum -6.250936 vs -2.277201
|
|
|
+
|
|
|
+Layer 2, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -2.587172
|
|
|
+ Converted tensor sum: 0.977817
|
|
|
+ Original tensor mean: -0.323396
|
|
|
+ Converted tensor mean: 0.122227
|
|
|
+ Mean difference: 3.66970563
|
|
|
+ Maximum pointwise difference: 9.02869225
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -7.89160728, Converted: 1.13708520
|
|
|
+ Biggest difference in row (0, 0), sum -2.587172 vs 0.977817
|
|
|
+
|
|
|
+Layer 3, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -37.525734
|
|
|
+ Converted tensor sum: 5.221979
|
|
|
+ Original tensor mean: -4.690717
|
|
|
+ Converted tensor mean: 0.652747
|
|
|
+ Mean difference: 6.04651690
|
|
|
+ Maximum pointwise difference: 12.38726807
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -10.43804359, Converted: 1.94922423
|
|
|
+ Biggest difference in row (0, 0), sum -37.525734 vs 5.221979
|
|
|
+
|
|
|
+Layer 4, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 4.066291
|
|
|
+ Converted tensor sum: 13.447447
|
|
|
+ Original tensor mean: 0.508286
|
|
|
+ Converted tensor mean: 1.680931
|
|
|
+ Mean difference: 5.62788296
|
|
|
+ Maximum pointwise difference: 14.04961491
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -10.56051826, Converted: 3.48909688
|
|
|
+ Biggest difference in row (0, 0), sum 4.066291 vs 13.447447
|
|
|
+
|
|
|
+Layer 5, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 22.123846
|
|
|
+ Converted tensor sum: 14.835675
|
|
|
+ Original tensor mean: 2.765481
|
|
|
+ Converted tensor mean: 1.854459
|
|
|
+ Mean difference: 5.25254917
|
|
|
+ Maximum pointwise difference: 11.90699482
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 7.93798828, Converted: -3.96900630
|
|
|
+ Biggest difference in row (0, 0), sum 22.123846 vs 14.835675
|
|
|
+
|
|
|
+Layer 6, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 22.319403
|
|
|
+ Converted tensor sum: 11.047790
|
|
|
+ Original tensor mean: 2.789925
|
|
|
+ Converted tensor mean: 1.380974
|
|
|
+ Mean difference: 5.50898457
|
|
|
+ Maximum pointwise difference: 13.00136471
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 7.73285818, Converted: -5.26850653
|
|
|
+ Biggest difference in row (0, 0), sum 22.319403 vs 11.047790
|
|
|
+
|
|
|
+Layer 7, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 55.420013
|
|
|
+ Converted tensor sum: 74.081238
|
|
|
+ Original tensor mean: 6.927502
|
|
|
+ Converted tensor mean: 9.260155
|
|
|
+ Mean difference: 5.90270138
|
|
|
+ Maximum pointwise difference: 12.46957588
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -1.65777194, Converted: 10.81180382
|
|
|
+ Biggest difference in row (0, 0), sum 55.420013 vs 74.081238
|
|
|
+
|
|
|
+Layer 8, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 39.922848
|
|
|
+ Converted tensor sum: 72.282196
|
|
|
+ Original tensor mean: 4.990356
|
|
|
+ Converted tensor mean: 9.035275
|
|
|
+ Mean difference: 6.12995577
|
|
|
+ Maximum pointwise difference: 13.29505730
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -3.36732197, Converted: 9.92773533
|
|
|
+ Biggest difference in row (0, 0), sum 39.922848 vs 72.282196
|
|
|
+
|
|
|
+Layer 9, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 29.193859
|
|
|
+ Converted tensor sum: 64.425896
|
|
|
+ Original tensor mean: 3.649232
|
|
|
+ Converted tensor mean: 8.053237
|
|
|
+ Mean difference: 6.22422409
|
|
|
+ Maximum pointwise difference: 14.81417084
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -3.98170996, Converted: 10.83246040
|
|
|
+ Biggest difference in row (0, 0), sum 29.193859 vs 64.425896
|
|
|
+
|
|
|
+Layer 10, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 23.706369
|
|
|
+ Converted tensor sum: 55.726307
|
|
|
+ Original tensor mean: 2.963296
|
|
|
+ Converted tensor mean: 6.965788
|
|
|
+ Mean difference: 6.04786444
|
|
|
+ Maximum pointwise difference: 14.39242363
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -4.86538124, Converted: 9.52704239
|
|
|
+ Biggest difference in row (0, 0), sum 23.706369 vs 55.726307
|
|
|
+
|
|
|
+Layer 11, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 123.990646
|
|
|
+ Converted tensor sum: 150.405350
|
|
|
+ Original tensor mean: 15.498831
|
|
|
+ Converted tensor mean: 18.800669
|
|
|
+ Mean difference: 5.61389732
|
|
|
+ Maximum pointwise difference: 14.70817947
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 6.49463272, Converted: 21.20281219
|
|
|
+ Biggest difference in row (0, 0), sum 123.990646 vs 150.405350
|
|
|
+
|
|
|
+Layer 12, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 120.701889
|
|
|
+ Converted tensor sum: 144.158798
|
|
|
+ Original tensor mean: 15.087736
|
|
|
+ Converted tensor mean: 18.019850
|
|
|
+ Mean difference: 5.24121237
|
|
|
+ Maximum pointwise difference: 14.31963730
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 5.24581337, Converted: 19.56545067
|
|
|
+ Biggest difference in row (0, 0), sum 120.701889 vs 144.158798
|
|
|
+
|
|
|
+Layer 13, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 114.196152
|
|
|
+ Converted tensor sum: 142.528229
|
|
|
+ Original tensor mean: 14.274519
|
|
|
+ Converted tensor mean: 17.816029
|
|
|
+ Mean difference: 5.27994871
|
|
|
+ Maximum pointwise difference: 14.28137684
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 4.50468159, Converted: 18.78605843
|
|
|
+ Biggest difference in row (0, 0), sum 114.196152 vs 142.528229
|
|
|
+
|
|
|
+Layer 14, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 109.654587
|
|
|
+ Converted tensor sum: 141.504807
|
|
|
+ Original tensor mean: 13.706823
|
|
|
+ Converted tensor mean: 17.688101
|
|
|
+ Mean difference: 5.26909733
|
|
|
+ Maximum pointwise difference: 15.04267311
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 3.59476113, Converted: 18.63743401
|
|
|
+ Biggest difference in row (0, 0), sum 109.654587 vs 141.504807
|
|
|
+
|
|
|
+Layer 15, Token 21 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 258.799988
|
|
|
+ Converted tensor sum: 280.546570
|
|
|
+ Original tensor mean: 32.349998
|
|
|
+ Converted tensor mean: 35.068321
|
|
|
+ Mean difference: 5.38046169
|
|
|
+ Maximum pointwise difference: 13.79010963
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 21.50290108, Converted: 35.29301071
|
|
|
+ Biggest difference in row (0, 0), sum 258.799988 vs 280.546570
|
|
|
+
|
|
|
+Layer 0, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 22.958118
|
|
|
+ Converted tensor sum: -3.202849
|
|
|
+ Original tensor mean: 2.869765
|
|
|
+ Converted tensor mean: -0.400356
|
|
|
+ Mean difference: 4.91125917
|
|
|
+ Maximum pointwise difference: 8.36230850
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 9.21100616, Converted: 0.84869760
|
|
|
+ Biggest difference in row (0, 0), sum 22.958118 vs -3.202849
|
|
|
+
|
|
|
+Layer 1, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 25.125549
|
|
|
+ Converted tensor sum: -10.143456
|
|
|
+ Original tensor mean: 3.140694
|
|
|
+ Converted tensor mean: -1.267932
|
|
|
+ Mean difference: 5.89313412
|
|
|
+ Maximum pointwise difference: 9.59223843
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 9.62790585, Converted: 0.03566782
|
|
|
+ Biggest difference in row (0, 0), sum 25.125549 vs -10.143456
|
|
|
+
|
|
|
+Layer 2, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 27.315422
|
|
|
+ Converted tensor sum: -15.748328
|
|
|
+ Original tensor mean: 3.414428
|
|
|
+ Converted tensor mean: -1.968541
|
|
|
+ Mean difference: 8.97875500
|
|
|
+ Maximum pointwise difference: 14.55634785
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 2.85774899, Converted: -11.69859886
|
|
|
+ Biggest difference in row (0, 0), sum 27.315422 vs -15.748328
|
|
|
+
|
|
|
+Layer 3, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 65.650429
|
|
|
+ Converted tensor sum: -88.889626
|
|
|
+ Original tensor mean: 8.206304
|
|
|
+ Converted tensor mean: -11.111203
|
|
|
+ Mean difference: 19.31750679
|
|
|
+ Maximum pointwise difference: 27.03379250
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 6.11478758, Converted: -20.91900444
|
|
|
+ Biggest difference in row (0, 0), sum 65.650429 vs -88.889626
|
|
|
+
|
|
|
+Layer 4, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 61.788639
|
|
|
+ Converted tensor sum: -42.131989
|
|
|
+ Original tensor mean: 7.723580
|
|
|
+ Converted tensor mean: -5.266499
|
|
|
+ Mean difference: 12.99007797
|
|
|
+ Maximum pointwise difference: 18.81860924
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 14.83176613, Converted: -3.98684263
|
|
|
+ Biggest difference in row (0, 0), sum 61.788639 vs -42.131989
|
|
|
+
|
|
|
+Layer 5, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 57.004955
|
|
|
+ Converted tensor sum: 4.555844
|
|
|
+ Original tensor mean: 7.125619
|
|
|
+ Converted tensor mean: 0.569481
|
|
|
+ Mean difference: 8.63973427
|
|
|
+ Maximum pointwise difference: 19.13692093
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 2.54869914, Converted: -16.58822250
|
|
|
+ Biggest difference in row (0, 0), sum 57.004955 vs 4.555844
|
|
|
+
|
|
|
+Layer 6, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 54.908669
|
|
|
+ Converted tensor sum: -0.669161
|
|
|
+ Original tensor mean: 6.863584
|
|
|
+ Converted tensor mean: -0.083645
|
|
|
+ Mean difference: 8.70907402
|
|
|
+ Maximum pointwise difference: 18.54141235
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 2.42641473, Converted: -16.11499786
|
|
|
+ Biggest difference in row (0, 0), sum 54.908669 vs -0.669161
|
|
|
+
|
|
|
+Layer 7, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 125.605499
|
|
|
+ Converted tensor sum: -1.624224
|
|
|
+ Original tensor mean: 15.700687
|
|
|
+ Converted tensor mean: -0.203028
|
|
|
+ Mean difference: 15.90371513
|
|
|
+ Maximum pointwise difference: 27.27110672
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 10.48501492, Converted: -16.78609276
|
|
|
+ Biggest difference in row (0, 0), sum 125.605499 vs -1.624224
|
|
|
+
|
|
|
+Layer 8, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 109.340508
|
|
|
+ Converted tensor sum: -1.809371
|
|
|
+ Original tensor mean: 13.667563
|
|
|
+ Converted tensor mean: -0.226171
|
|
|
+ Mean difference: 14.19305420
|
|
|
+ Maximum pointwise difference: 24.19651794
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 6.68867207, Converted: -17.50784492
|
|
|
+ Biggest difference in row (0, 0), sum 109.340508 vs -1.809371
|
|
|
+
|
|
|
+Layer 9, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 93.036400
|
|
|
+ Converted tensor sum: -10.185041
|
|
|
+ Original tensor mean: 11.629550
|
|
|
+ Converted tensor mean: -1.273130
|
|
|
+ Mean difference: 13.22967815
|
|
|
+ Maximum pointwise difference: 22.35823822
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 2.86865139, Converted: -19.48958588
|
|
|
+ Biggest difference in row (0, 0), sum 93.036400 vs -10.185041
|
|
|
+
|
|
|
+Layer 10, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 85.756668
|
|
|
+ Converted tensor sum: -2.302891
|
|
|
+ Original tensor mean: 10.719584
|
|
|
+ Converted tensor mean: -0.287861
|
|
|
+ Mean difference: 11.49190331
|
|
|
+ Maximum pointwise difference: 20.63401985
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 19.00829315, Converted: -1.62572634
|
|
|
+ Biggest difference in row (0, 0), sum 85.756668 vs -2.302891
|
|
|
+
|
|
|
+Layer 11, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 182.162292
|
|
|
+ Converted tensor sum: -8.586711
|
|
|
+ Original tensor mean: 22.770287
|
|
|
+ Converted tensor mean: -1.073339
|
|
|
+ Mean difference: 23.84362602
|
|
|
+ Maximum pointwise difference: 34.19173050
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 13.77398682, Converted: -20.41774368
|
|
|
+ Biggest difference in row (0, 0), sum 182.162292 vs -8.586711
|
|
|
+
|
|
|
+Layer 12, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 182.096252
|
|
|
+ Converted tensor sum: -6.677206
|
|
|
+ Original tensor mean: 22.762032
|
|
|
+ Converted tensor mean: -0.834651
|
|
|
+ Mean difference: 23.59668159
|
|
|
+ Maximum pointwise difference: 35.20670319
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 14.20073891, Converted: -21.00596619
|
|
|
+ Biggest difference in row (0, 0), sum 182.096252 vs -6.677206
|
|
|
+
|
|
|
+Layer 13, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 176.400360
|
|
|
+ Converted tensor sum: -0.142300
|
|
|
+ Original tensor mean: 22.050045
|
|
|
+ Converted tensor mean: -0.017787
|
|
|
+ Mean difference: 22.06783295
|
|
|
+ Maximum pointwise difference: 34.37791824
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 13.55050278, Converted: -20.82741547
|
|
|
+ Biggest difference in row (0, 0), sum 176.400360 vs -0.142300
|
|
|
+
|
|
|
+Layer 14, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 169.308212
|
|
|
+ Converted tensor sum: 22.573196
|
|
|
+ Original tensor mean: 21.163527
|
|
|
+ Converted tensor mean: 2.821650
|
|
|
+ Mean difference: 18.34187508
|
|
|
+ Maximum pointwise difference: 31.75983810
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 11.94197941, Converted: -19.81785965
|
|
|
+ Biggest difference in row (0, 0), sum 169.308212 vs 22.573196
|
|
|
+
|
|
|
+Layer 15, Token 22 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 321.080658
|
|
|
+ Converted tensor sum: 136.787018
|
|
|
+ Original tensor mean: 40.135082
|
|
|
+ Converted tensor mean: 17.098377
|
|
|
+ Mean difference: 23.03670502
|
|
|
+ Maximum pointwise difference: 37.83760452
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 31.89689064, Converted: -5.94071388
|
|
|
+ Biggest difference in row (0, 0), sum 321.080658 vs 136.787018
|
|
|
+
|
|
|
+Layer 0, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 3.588341
|
|
|
+ Converted tensor sum: 9.359616
|
|
|
+ Original tensor mean: 0.448543
|
|
|
+ Converted tensor mean: 1.169952
|
|
|
+ Mean difference: 3.70246077
|
|
|
+ Maximum pointwise difference: 5.65140629
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -1.32952428, Converted: 4.32188225
|
|
|
+ Biggest difference in row (0, 0), sum 3.588341 vs 9.359616
|
|
|
+
|
|
|
+Layer 1, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -13.513486
|
|
|
+ Converted tensor sum: 3.000220
|
|
|
+ Original tensor mean: -1.689186
|
|
|
+ Converted tensor mean: 0.375028
|
|
|
+ Mean difference: 3.49640799
|
|
|
+ Maximum pointwise difference: 8.52665997
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -4.16102409, Converted: 4.36563587
|
|
|
+ Biggest difference in row (0, 0), sum -13.513486 vs 3.000220
|
|
|
+
|
|
|
+Layer 2, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -19.782562
|
|
|
+ Converted tensor sum: 4.253428
|
|
|
+ Original tensor mean: -2.472820
|
|
|
+ Converted tensor mean: 0.531678
|
|
|
+ Mean difference: 5.22110939
|
|
|
+ Maximum pointwise difference: 11.62318039
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -9.56802559, Converted: 2.05515456
|
|
|
+ Biggest difference in row (0, 0), sum -19.782562 vs 4.253428
|
|
|
+
|
|
|
+Layer 3, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -117.794266
|
|
|
+ Converted tensor sum: 14.072861
|
|
|
+ Original tensor mean: -14.724283
|
|
|
+ Converted tensor mean: 1.759108
|
|
|
+ Mean difference: 16.48339081
|
|
|
+ Maximum pointwise difference: 22.75023079
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -16.32844543, Converted: 6.42178583
|
|
|
+ Biggest difference in row (0, 0), sum -117.794266 vs 14.072861
|
|
|
+
|
|
|
+Layer 4, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -73.092270
|
|
|
+ Converted tensor sum: 6.691208
|
|
|
+ Original tensor mean: -9.136534
|
|
|
+ Converted tensor mean: 0.836401
|
|
|
+ Mean difference: 10.72612858
|
|
|
+ Maximum pointwise difference: 19.55576324
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -16.38935280, Converted: 3.16641092
|
|
|
+ Biggest difference in row (0, 0), sum -73.092270 vs 6.691208
|
|
|
+
|
|
|
+Layer 5, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -37.015450
|
|
|
+ Converted tensor sum: 8.681388
|
|
|
+ Original tensor mean: -4.626931
|
|
|
+ Converted tensor mean: 1.085173
|
|
|
+ Mean difference: 7.61082363
|
|
|
+ Maximum pointwise difference: 20.55440712
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -13.93057537, Converted: 6.62383223
|
|
|
+ Biggest difference in row (0, 0), sum -37.015450 vs 8.681388
|
|
|
+
|
|
|
+Layer 6, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -90.333237
|
|
|
+ Converted tensor sum: 7.396842
|
|
|
+ Original tensor mean: -11.291655
|
|
|
+ Converted tensor mean: 0.924605
|
|
|
+ Mean difference: 12.82605839
|
|
|
+ Maximum pointwise difference: 34.69086456
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -27.39507294, Converted: 7.29579258
|
|
|
+ Biggest difference in row (0, 0), sum -90.333237 vs 7.396842
|
|
|
+
|
|
|
+Layer 7, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -214.526337
|
|
|
+ Converted tensor sum: 60.269241
|
|
|
+ Original tensor mean: -26.815792
|
|
|
+ Converted tensor mean: 7.533655
|
|
|
+ Mean difference: 34.34944916
|
|
|
+ Maximum pointwise difference: 55.60475159
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -42.02355576, Converted: 13.58119678
|
|
|
+ Biggest difference in row (0, 0), sum -214.526337 vs 60.269241
|
|
|
+
|
|
|
+Layer 8, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -138.238464
|
|
|
+ Converted tensor sum: 48.862061
|
|
|
+ Original tensor mean: -17.279808
|
|
|
+ Converted tensor mean: 6.107758
|
|
|
+ Mean difference: 23.38756561
|
|
|
+ Maximum pointwise difference: 37.09150314
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -25.42422676, Converted: 11.66727638
|
|
|
+ Biggest difference in row (0, 0), sum -138.238464 vs 48.862061
|
|
|
+
|
|
|
+Layer 9, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -129.366013
|
|
|
+ Converted tensor sum: 32.791050
|
|
|
+ Original tensor mean: -16.170752
|
|
|
+ Converted tensor mean: 4.098881
|
|
|
+ Mean difference: 20.26963234
|
|
|
+ Maximum pointwise difference: 31.74017334
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -22.80648041, Converted: 8.93369293
|
|
|
+ Biggest difference in row (0, 0), sum -129.366013 vs 32.791050
|
|
|
+
|
|
|
+Layer 10, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -112.076103
|
|
|
+ Converted tensor sum: 33.542336
|
|
|
+ Original tensor mean: -14.009513
|
|
|
+ Converted tensor mean: 4.192792
|
|
|
+ Mean difference: 18.20230484
|
|
|
+ Maximum pointwise difference: 30.71049118
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -22.22323608, Converted: 8.48725605
|
|
|
+ Biggest difference in row (0, 0), sum -112.076103 vs 33.542336
|
|
|
+
|
|
|
+Layer 11, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -392.294312
|
|
|
+ Converted tensor sum: 130.177963
|
|
|
+ Original tensor mean: -49.036789
|
|
|
+ Converted tensor mean: 16.272245
|
|
|
+ Mean difference: 65.30903625
|
|
|
+ Maximum pointwise difference: 80.34357452
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -59.32800293, Converted: 21.01557350
|
|
|
+ Biggest difference in row (0, 0), sum -392.294312 vs 130.177963
|
|
|
+
|
|
|
+Layer 12, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -416.741821
|
|
|
+ Converted tensor sum: 126.312363
|
|
|
+ Original tensor mean: -52.092728
|
|
|
+ Converted tensor mean: 15.789045
|
|
|
+ Mean difference: 67.88177490
|
|
|
+ Maximum pointwise difference: 87.29133606
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -65.95119476, Converted: 21.34013939
|
|
|
+ Biggest difference in row (0, 0), sum -416.741821 vs 126.312363
|
|
|
+
|
|
|
+Layer 13, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -420.622223
|
|
|
+ Converted tensor sum: 122.472458
|
|
|
+ Original tensor mean: -52.577778
|
|
|
+ Converted tensor mean: 15.309057
|
|
|
+ Mean difference: 67.88684082
|
|
|
+ Maximum pointwise difference: 89.02587891
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -68.22624969, Converted: 20.79962921
|
|
|
+ Biggest difference in row (0, 0), sum -420.622223 vs 122.472458
|
|
|
+
|
|
|
+Layer 14, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -398.408966
|
|
|
+ Converted tensor sum: 120.881279
|
|
|
+ Original tensor mean: -49.801121
|
|
|
+ Converted tensor mean: 15.110160
|
|
|
+ Mean difference: 64.91127777
|
|
|
+ Maximum pointwise difference: 91.32544708
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -69.91543579, Converted: 21.41001320
|
|
|
+ Biggest difference in row (0, 0), sum -398.408966 vs 120.881279
|
|
|
+
|
|
|
+Layer 15, Token 23 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -754.637085
|
|
|
+ Converted tensor sum: 262.993530
|
|
|
+ Original tensor mean: -94.329636
|
|
|
+ Converted tensor mean: 32.874191
|
|
|
+ Mean difference: 127.20383453
|
|
|
+ Maximum pointwise difference: 157.75305176
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -119.83902740, Converted: 37.91403198
|
|
|
+ Biggest difference in row (0, 0), sum -754.637085 vs 262.993530
|
|
|
+
|
|
|
+Layer 0, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 14.859251
|
|
|
+ Converted tensor sum: 2.731961
|
|
|
+ Original tensor mean: 1.857406
|
|
|
+ Converted tensor mean: 0.341495
|
|
|
+ Mean difference: 4.21605587
|
|
|
+ Maximum pointwise difference: 9.80887794
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 3.86449504, Converted: -5.94438314
|
|
|
+ Biggest difference in row (0, 0), sum 14.859251 vs 2.731961
|
|
|
+
|
|
|
+Layer 1, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 13.986740
|
|
|
+ Converted tensor sum: -2.697716
|
|
|
+ Original tensor mean: 1.748343
|
|
|
+ Converted tensor mean: -0.337215
|
|
|
+ Mean difference: 4.70033360
|
|
|
+ Maximum pointwise difference: 10.86390495
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 3.54197407, Converted: -7.32193136
|
|
|
+ Biggest difference in row (0, 0), sum 13.986740 vs -2.697716
|
|
|
+
|
|
|
+Layer 2, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 13.856454
|
|
|
+ Converted tensor sum: -1.915652
|
|
|
+ Original tensor mean: 1.732057
|
|
|
+ Converted tensor mean: -0.239456
|
|
|
+ Mean difference: 5.10369968
|
|
|
+ Maximum pointwise difference: 13.13724899
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 3.00663447, Converted: -10.13061428
|
|
|
+ Biggest difference in row (0, 0), sum 13.856454 vs -1.915652
|
|
|
+
|
|
|
+Layer 3, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 63.979485
|
|
|
+ Converted tensor sum: -50.051231
|
|
|
+ Original tensor mean: 7.997436
|
|
|
+ Converted tensor mean: -6.256404
|
|
|
+ Mean difference: 14.25383949
|
|
|
+ Maximum pointwise difference: 25.59371948
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 9.45009327, Converted: -16.14362717
|
|
|
+ Biggest difference in row (0, 0), sum 63.979485 vs -50.051231
|
|
|
+
|
|
|
+Layer 4, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 60.174347
|
|
|
+ Converted tensor sum: -64.423790
|
|
|
+ Original tensor mean: 7.521793
|
|
|
+ Converted tensor mean: -8.052974
|
|
|
+ Mean difference: 15.57476616
|
|
|
+ Maximum pointwise difference: 27.42375755
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 8.99039078, Converted: -18.43336678
|
|
|
+ Biggest difference in row (0, 0), sum 60.174347 vs -64.423790
|
|
|
+
|
|
|
+Layer 5, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 53.195156
|
|
|
+ Converted tensor sum: -88.183350
|
|
|
+ Original tensor mean: 6.649395
|
|
|
+ Converted tensor mean: -11.022919
|
|
|
+ Mean difference: 17.67231369
|
|
|
+ Maximum pointwise difference: 35.18456650
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 7.48332596, Converted: -27.70124054
|
|
|
+ Biggest difference in row (0, 0), sum 53.195156 vs -88.183350
|
|
|
+
|
|
|
+Layer 6, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 55.262775
|
|
|
+ Converted tensor sum: -106.113434
|
|
|
+ Original tensor mean: 6.907847
|
|
|
+ Converted tensor mean: -13.264179
|
|
|
+ Mean difference: 20.17202759
|
|
|
+ Maximum pointwise difference: 40.46305084
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 8.41111183, Converted: -32.05193710
|
|
|
+ Biggest difference in row (0, 0), sum 55.262775 vs -106.113434
|
|
|
+
|
|
|
+Layer 7, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 120.454941
|
|
|
+ Converted tensor sum: -239.645325
|
|
|
+ Original tensor mean: 15.056868
|
|
|
+ Converted tensor mean: -29.955666
|
|
|
+ Mean difference: 45.01253128
|
|
|
+ Maximum pointwise difference: 65.79338074
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 14.88038158, Converted: -50.91299820
|
|
|
+ Biggest difference in row (0, 0), sum 120.454941 vs -239.645325
|
|
|
+
|
|
|
+Layer 8, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 103.648430
|
|
|
+ Converted tensor sum: -223.958084
|
|
|
+ Original tensor mean: 12.956054
|
|
|
+ Converted tensor mean: -27.994761
|
|
|
+ Mean difference: 40.95081329
|
|
|
+ Maximum pointwise difference: 71.07021332
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 13.01342583, Converted: -58.05678558
|
|
|
+ Biggest difference in row (0, 0), sum 103.648430 vs -223.958084
|
|
|
+
|
|
|
+Layer 9, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 90.361565
|
|
|
+ Converted tensor sum: -216.935654
|
|
|
+ Original tensor mean: 11.295196
|
|
|
+ Converted tensor mean: -27.116957
|
|
|
+ Mean difference: 38.41215515
|
|
|
+ Maximum pointwise difference: 69.46690369
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 9.60147953, Converted: -59.86542511
|
|
|
+ Biggest difference in row (0, 0), sum 90.361565 vs -216.935654
|
|
|
+
|
|
|
+Layer 10, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 83.880753
|
|
|
+ Converted tensor sum: -215.275970
|
|
|
+ Original tensor mean: 10.485094
|
|
|
+ Converted tensor mean: -26.909496
|
|
|
+ Mean difference: 37.39459229
|
|
|
+ Maximum pointwise difference: 70.35929108
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 8.32141781, Converted: -62.03787613
|
|
|
+ Biggest difference in row (0, 0), sum 83.880753 vs -215.275970
|
|
|
+
|
|
|
+Layer 11, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 169.893204
|
|
|
+ Converted tensor sum: -521.842712
|
|
|
+ Original tensor mean: 21.236650
|
|
|
+ Converted tensor mean: -65.230339
|
|
|
+ Mean difference: 86.46699524
|
|
|
+ Maximum pointwise difference: 124.57461548
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 19.84806633, Converted: -104.72654724
|
|
|
+ Biggest difference in row (0, 0), sum 169.893204 vs -521.842712
|
|
|
+
|
|
|
+Layer 12, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 170.650391
|
|
|
+ Converted tensor sum: -527.495605
|
|
|
+ Original tensor mean: 21.331299
|
|
|
+ Converted tensor mean: -65.936951
|
|
|
+ Mean difference: 87.26824951
|
|
|
+ Maximum pointwise difference: 124.01423645
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 20.41718483, Converted: -103.59705353
|
|
|
+ Biggest difference in row (0, 0), sum 170.650391 vs -527.495605
|
|
|
+
|
|
|
+Layer 13, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 167.707260
|
|
|
+ Converted tensor sum: -525.824341
|
|
|
+ Original tensor mean: 20.963408
|
|
|
+ Converted tensor mean: -65.728043
|
|
|
+ Mean difference: 86.69145203
|
|
|
+ Maximum pointwise difference: 120.31568909
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 18.97763062, Converted: -101.33805847
|
|
|
+ Biggest difference in row (0, 0), sum 167.707260 vs -525.824341
|
|
|
+
|
|
|
+Layer 14, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 160.910034
|
|
|
+ Converted tensor sum: -562.698975
|
|
|
+ Original tensor mean: 20.113754
|
|
|
+ Converted tensor mean: -70.337372
|
|
|
+ Mean difference: 90.45112610
|
|
|
+ Maximum pointwise difference: 127.80590057
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 17.37784767, Converted: -110.42805481
|
|
|
+ Biggest difference in row (0, 0), sum 160.910034 vs -562.698975
|
|
|
+
|
|
|
+Layer 15, Token 24 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 306.123810
|
|
|
+ Converted tensor sum: -931.621094
|
|
|
+ Original tensor mean: 38.265476
|
|
|
+ Converted tensor mean: -116.452637
|
|
|
+ Mean difference: 154.71810913
|
|
|
+ Maximum pointwise difference: 176.81520081
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 29.99453545, Converted: -146.82066345
|
|
|
+ Biggest difference in row (0, 0), sum 306.123810 vs -931.621094
|
|
|
+
|
|
|
+Layer 0, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -6.641135
|
|
|
+ Converted tensor sum: -3.933383
|
|
|
+ Original tensor mean: -0.830142
|
|
|
+ Converted tensor mean: -0.491673
|
|
|
+ Mean difference: 3.03462601
|
|
|
+ Maximum pointwise difference: 5.75030708
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -6.01051331, Converted: -0.26020634
|
|
|
+ Biggest difference in row (0, 0), sum -6.641135 vs -3.933383
|
|
|
+
|
|
|
+Layer 1, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -1.642994
|
|
|
+ Converted tensor sum: -11.347046
|
|
|
+ Original tensor mean: -0.205374
|
|
|
+ Converted tensor mean: -1.418381
|
|
|
+ Mean difference: 2.82665110
|
|
|
+ Maximum pointwise difference: 5.44076443
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: -3.67477202, Converted: -9.11553669
|
|
|
+ Biggest difference in row (0, 0), sum -1.642994 vs -11.347046
|
|
|
+
|
|
|
+Layer 2, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 6.404377
|
|
|
+ Converted tensor sum: -14.681939
|
|
|
+ Original tensor mean: 0.800547
|
|
|
+ Converted tensor mean: -1.835242
|
|
|
+ Mean difference: 3.35868859
|
|
|
+ Maximum pointwise difference: 7.97232580
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 5.46229649, Converted: -2.51002932
|
|
|
+ Biggest difference in row (0, 0), sum 6.404377 vs -14.681939
|
|
|
+
|
|
|
+Layer 3, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 73.178505
|
|
|
+ Converted tensor sum: -57.235046
|
|
|
+ Original tensor mean: 9.147313
|
|
|
+ Converted tensor mean: -7.154381
|
|
|
+ Mean difference: 16.30169487
|
|
|
+ Maximum pointwise difference: 20.31940651
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 6.96401119, Converted: -13.35539532
|
|
|
+ Biggest difference in row (0, 0), sum 73.178505 vs -57.235046
|
|
|
+
|
|
|
+Layer 4, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 65.662933
|
|
|
+ Converted tensor sum: -75.145912
|
|
|
+ Original tensor mean: 8.207867
|
|
|
+ Converted tensor mean: -9.393239
|
|
|
+ Mean difference: 17.60110474
|
|
|
+ Maximum pointwise difference: 25.96934509
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 5.62515926, Converted: -20.34418488
|
|
|
+ Biggest difference in row (0, 0), sum 65.662933 vs -75.145912
|
|
|
+
|
|
|
+Layer 5, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 54.107101
|
|
|
+ Converted tensor sum: -105.733917
|
|
|
+ Original tensor mean: 6.763388
|
|
|
+ Converted tensor mean: -13.216740
|
|
|
+ Mean difference: 19.98012924
|
|
|
+ Maximum pointwise difference: 28.99731064
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 10.49883652, Converted: -18.49847412
|
|
|
+ Biggest difference in row (0, 0), sum 54.107101 vs -105.733917
|
|
|
+
|
|
|
+Layer 6, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 48.177361
|
|
|
+ Converted tensor sum: -134.772308
|
|
|
+ Original tensor mean: 6.022170
|
|
|
+ Converted tensor mean: -16.846539
|
|
|
+ Mean difference: 22.86870766
|
|
|
+ Maximum pointwise difference: 36.34035110
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 2.22752476, Converted: -34.11282730
|
|
|
+ Biggest difference in row (0, 0), sum 48.177361 vs -134.772308
|
|
|
+
|
|
|
+Layer 7, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 111.839172
|
|
|
+ Converted tensor sum: -277.301056
|
|
|
+ Original tensor mean: 13.979897
|
|
|
+ Converted tensor mean: -34.662632
|
|
|
+ Mean difference: 48.64252853
|
|
|
+ Maximum pointwise difference: 62.89208221
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 9.78997040, Converted: -53.10211182
|
|
|
+ Biggest difference in row (0, 0), sum 111.839172 vs -277.301056
|
|
|
+
|
|
|
+Layer 8, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 104.861267
|
|
|
+ Converted tensor sum: -286.217560
|
|
|
+ Original tensor mean: 13.107658
|
|
|
+ Converted tensor mean: -35.777195
|
|
|
+ Mean difference: 48.88484955
|
|
|
+ Maximum pointwise difference: 65.30915833
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 20.24993896, Converted: -45.05921936
|
|
|
+ Biggest difference in row (0, 0), sum 104.861267 vs -286.217560
|
|
|
+
|
|
|
+Layer 9, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 96.630295
|
|
|
+ Converted tensor sum: -313.393005
|
|
|
+ Original tensor mean: 12.078787
|
|
|
+ Converted tensor mean: -39.174126
|
|
|
+ Mean difference: 51.25291061
|
|
|
+ Maximum pointwise difference: 67.83577728
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 19.77431297, Converted: -48.06146622
|
|
|
+ Biggest difference in row (0, 0), sum 96.630295 vs -313.393005
|
|
|
+
|
|
|
+Layer 10, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 89.098160
|
|
|
+ Converted tensor sum: -316.188721
|
|
|
+ Original tensor mean: 11.137270
|
|
|
+ Converted tensor mean: -39.523590
|
|
|
+ Mean difference: 50.66085815
|
|
|
+ Maximum pointwise difference: 63.01490784
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 18.63522339, Converted: -44.37968445
|
|
|
+ Biggest difference in row (0, 0), sum 89.098160 vs -316.188721
|
|
|
+
|
|
|
+Layer 11, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 183.329193
|
|
|
+ Converted tensor sum: -640.859741
|
|
|
+ Original tensor mean: 22.916149
|
|
|
+ Converted tensor mean: -80.107468
|
|
|
+ Mean difference: 103.02362061
|
|
|
+ Maximum pointwise difference: 123.61917114
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 28.08130074, Converted: -95.53787231
|
|
|
+ Biggest difference in row (0, 0), sum 183.329193 vs -640.859741
|
|
|
+
|
|
|
+Layer 12, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 183.012512
|
|
|
+ Converted tensor sum: -647.243774
|
|
|
+ Original tensor mean: 22.876564
|
|
|
+ Converted tensor mean: -80.905472
|
|
|
+ Mean difference: 103.78203583
|
|
|
+ Maximum pointwise difference: 121.95301819
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 28.78862381, Converted: -93.16439056
|
|
|
+ Biggest difference in row (0, 0), sum 183.012512 vs -647.243774
|
|
|
+
|
|
|
+Layer 13, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 179.038055
|
|
|
+ Converted tensor sum: -675.284363
|
|
|
+ Original tensor mean: 22.379757
|
|
|
+ Converted tensor mean: -84.410545
|
|
|
+ Mean difference: 106.79029846
|
|
|
+ Maximum pointwise difference: 124.18766785
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 29.24967384, Converted: -94.93799591
|
|
|
+ Biggest difference in row (0, 0), sum 179.038055 vs -675.284363
|
|
|
+
|
|
|
+Layer 14, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 177.600830
|
|
|
+ Converted tensor sum: -653.687622
|
|
|
+ Original tensor mean: 22.200104
|
|
|
+ Converted tensor mean: -81.710953
|
|
|
+ Mean difference: 103.91105652
|
|
|
+ Maximum pointwise difference: 120.82553864
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 21.10656929, Converted: -99.71897125
|
|
|
+ Biggest difference in row (0, 0), sum 177.600830 vs -653.687622
|
|
|
+
|
|
|
+Layer 15, Token 25 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 323.013031
|
|
|
+ Converted tensor sum: -1030.671143
|
|
|
+ Original tensor mean: 40.376629
|
|
|
+ Converted tensor mean: -128.833893
|
|
|
+ Mean difference: 169.21054077
|
|
|
+ Maximum pointwise difference: 193.25675964
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 50.33515549, Converted: -142.92160034
|
|
|
+ Biggest difference in row (0, 0), sum 323.013031 vs -1030.671143
|
|
|
+
|
|
|
+Layer 0, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 65.941025
|
|
|
+ Converted tensor sum: -21.309677
|
|
|
+ Original tensor mean: 8.242628
|
|
|
+ Converted tensor mean: -2.663710
|
|
|
+ Mean difference: 10.92460823
|
|
|
+ Maximum pointwise difference: 22.60500336
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 19.03843307, Converted: -3.56657028
|
|
|
+ Biggest difference in row (0, 0), sum 65.941025 vs -21.309677
|
|
|
+
|
|
|
+Layer 1, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 52.076649
|
|
|
+ Converted tensor sum: -57.925156
|
|
|
+ Original tensor mean: 6.509581
|
|
|
+ Converted tensor mean: -7.240644
|
|
|
+ Mean difference: 14.23825073
|
|
|
+ Maximum pointwise difference: 19.17949104
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 5.37531137, Converted: -13.80417919
|
|
|
+ Biggest difference in row (0, 0), sum 52.076649 vs -57.925156
|
|
|
+
|
|
|
+Layer 2, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 51.231728
|
|
|
+ Converted tensor sum: -47.847797
|
|
|
+ Original tensor mean: 6.403966
|
|
|
+ Converted tensor mean: -5.980975
|
|
|
+ Mean difference: 12.38494110
|
|
|
+ Maximum pointwise difference: 23.60085297
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 9.02445030, Converted: -14.57640362
|
|
|
+ Biggest difference in row (0, 0), sum 51.231728 vs -47.847797
|
|
|
+
|
|
|
+Layer 3, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 107.302612
|
|
|
+ Converted tensor sum: -173.292923
|
|
|
+ Original tensor mean: 13.412827
|
|
|
+ Converted tensor mean: -21.661615
|
|
|
+ Mean difference: 35.07444000
|
|
|
+ Maximum pointwise difference: 43.60850143
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 14.85190392, Converted: -28.75659752
|
|
|
+ Biggest difference in row (0, 0), sum 107.302612 vs -173.292923
|
|
|
+
|
|
|
+Layer 4, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 97.273697
|
|
|
+ Converted tensor sum: -182.550171
|
|
|
+ Original tensor mean: 12.159212
|
|
|
+ Converted tensor mean: -22.818771
|
|
|
+ Mean difference: 34.97798157
|
|
|
+ Maximum pointwise difference: 46.59681320
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 14.26772594, Converted: -32.32908630
|
|
|
+ Biggest difference in row (0, 0), sum 97.273697 vs -182.550171
|
|
|
+
|
|
|
+Layer 5, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 85.259064
|
|
|
+ Converted tensor sum: -172.859528
|
|
|
+ Original tensor mean: 10.657383
|
|
|
+ Converted tensor mean: -21.607441
|
|
|
+ Mean difference: 32.26482391
|
|
|
+ Maximum pointwise difference: 44.72983170
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 13.95336819, Converted: -30.77646255
|
|
|
+ Biggest difference in row (0, 0), sum 85.259064 vs -172.859528
|
|
|
+
|
|
|
+Layer 6, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 87.096161
|
|
|
+ Converted tensor sum: -208.315033
|
|
|
+ Original tensor mean: 10.887020
|
|
|
+ Converted tensor mean: -26.039379
|
|
|
+ Mean difference: 36.92639923
|
|
|
+ Maximum pointwise difference: 45.54611206
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 14.15797043, Converted: -31.38814354
|
|
|
+ Biggest difference in row (0, 0), sum 87.096161 vs -208.315033
|
|
|
+
|
|
|
+Layer 7, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 160.905060
|
|
|
+ Converted tensor sum: -356.607910
|
|
|
+ Original tensor mean: 20.113132
|
|
|
+ Converted tensor mean: -44.575989
|
|
|
+ Mean difference: 64.68911743
|
|
|
+ Maximum pointwise difference: 73.27433014
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 19.29874229, Converted: -53.97558594
|
|
|
+ Biggest difference in row (0, 0), sum 160.905060 vs -356.607910
|
|
|
+
|
|
|
+Layer 8, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 147.546188
|
|
|
+ Converted tensor sum: -372.627655
|
|
|
+ Original tensor mean: 18.443274
|
|
|
+ Converted tensor mean: -46.578457
|
|
|
+ Mean difference: 65.02172852
|
|
|
+ Maximum pointwise difference: 75.06597900
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 17.32047462, Converted: -57.74550629
|
|
|
+ Biggest difference in row (0, 0), sum 147.546188 vs -372.627655
|
|
|
+
|
|
|
+Layer 9, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 142.108231
|
|
|
+ Converted tensor sum: -384.533997
|
|
|
+ Original tensor mean: 17.763529
|
|
|
+ Converted tensor mean: -48.066750
|
|
|
+ Mean difference: 65.83027649
|
|
|
+ Maximum pointwise difference: 80.39822388
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 9.57865334, Converted: -70.81957245
|
|
|
+ Biggest difference in row (0, 0), sum 142.108231 vs -384.533997
|
|
|
+
|
|
|
+Layer 10, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 136.597595
|
|
|
+ Converted tensor sum: -406.001617
|
|
|
+ Original tensor mean: 17.074699
|
|
|
+ Converted tensor mean: -50.750202
|
|
|
+ Mean difference: 67.82489777
|
|
|
+ Maximum pointwise difference: 83.06503296
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 16.25280952, Converted: -66.81222534
|
|
|
+ Biggest difference in row (0, 0), sum 136.597595 vs -406.001617
|
|
|
+
|
|
|
+Layer 11, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 234.238876
|
|
|
+ Converted tensor sum: -719.742371
|
|
|
+ Original tensor mean: 29.279860
|
|
|
+ Converted tensor mean: -89.967796
|
|
|
+ Mean difference: 119.24765778
|
|
|
+ Maximum pointwise difference: 144.35720825
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 22.54579163, Converted: -121.81141663
|
|
|
+ Biggest difference in row (0, 0), sum 234.238876 vs -719.742371
|
|
|
+
|
|
|
+Layer 12, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 230.967987
|
|
|
+ Converted tensor sum: -737.411499
|
|
|
+ Original tensor mean: 28.870998
|
|
|
+ Converted tensor mean: -92.176437
|
|
|
+ Mean difference: 121.04743958
|
|
|
+ Maximum pointwise difference: 145.76480103
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 23.33647728, Converted: -122.42832184
|
|
|
+ Biggest difference in row (0, 0), sum 230.967987 vs -737.411499
|
|
|
+
|
|
|
+Layer 13, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 225.836136
|
|
|
+ Converted tensor sum: -743.471008
|
|
|
+ Original tensor mean: 28.229517
|
|
|
+ Converted tensor mean: -92.933876
|
|
|
+ Mean difference: 121.16339111
|
|
|
+ Maximum pointwise difference: 141.17944336
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 23.16177559, Converted: -118.01766205
|
|
|
+ Biggest difference in row (0, 0), sum 225.836136 vs -743.471008
|
|
|
+
|
|
|
+Layer 14, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 222.057236
|
|
|
+ Converted tensor sum: -845.007874
|
|
|
+ Original tensor mean: 27.757154
|
|
|
+ Converted tensor mean: -105.625984
|
|
|
+ Mean difference: 133.38313293
|
|
|
+ Maximum pointwise difference: 164.57283020
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 29.71310997, Converted: -134.85972595
|
|
|
+ Biggest difference in row (0, 0), sum 222.057236 vs -845.007874
|
|
|
+
|
|
|
+Layer 15, Token 26 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 366.139526
|
|
|
+ Converted tensor sum: -1227.681152
|
|
|
+ Original tensor mean: 45.767441
|
|
|
+ Converted tensor mean: -153.460144
|
|
|
+ Mean difference: 199.22756958
|
|
|
+ Maximum pointwise difference: 235.55526733
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 46.65935516, Converted: -188.89590454
|
|
|
+ Biggest difference in row (0, 0), sum 366.139526 vs -1227.681152
|
|
|
+
|
|
|
+Layer 0, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 0.538792
|
|
|
+ Converted tensor sum: -2.767126
|
|
|
+ Original tensor mean: 0.067349
|
|
|
+ Converted tensor mean: -0.345891
|
|
|
+ Mean difference: 1.04583490
|
|
|
+ Maximum pointwise difference: 4.03163290
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 4.54428434, Converted: 0.51265144
|
|
|
+ Biggest difference in row (0, 0), sum 0.538792 vs -2.767126
|
|
|
+
|
|
|
+Layer 1, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -13.666726
|
|
|
+ Converted tensor sum: 4.859785
|
|
|
+ Original tensor mean: -1.708341
|
|
|
+ Converted tensor mean: 0.607473
|
|
|
+ Mean difference: 3.73808312
|
|
|
+ Maximum pointwise difference: 11.04657841
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -6.84830761, Converted: 4.19827080
|
|
|
+ Biggest difference in row (0, 0), sum -13.666726 vs 4.859785
|
|
|
+
|
|
|
+Layer 2, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 19.892342
|
|
|
+ Converted tensor sum: 18.553621
|
|
|
+ Original tensor mean: 2.486543
|
|
|
+ Converted tensor mean: 2.319203
|
|
|
+ Mean difference: 3.86019540
|
|
|
+ Maximum pointwise difference: 12.85380554
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -5.59446335, Converted: 7.25934219
|
|
|
+ Biggest difference in row (0, 0), sum 19.892342 vs 18.553621
|
|
|
+
|
|
|
+Layer 3, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 84.246483
|
|
|
+ Converted tensor sum: 49.827652
|
|
|
+ Original tensor mean: 10.530810
|
|
|
+ Converted tensor mean: 6.228456
|
|
|
+ Mean difference: 6.56024361
|
|
|
+ Maximum pointwise difference: 11.30776882
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 18.61387444, Converted: 7.30610561
|
|
|
+ Biggest difference in row (0, 0), sum 84.246483 vs 49.827652
|
|
|
+
|
|
|
+Layer 4, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 72.374397
|
|
|
+ Converted tensor sum: 50.589382
|
|
|
+ Original tensor mean: 9.046800
|
|
|
+ Converted tensor mean: 6.323673
|
|
|
+ Mean difference: 5.51325321
|
|
|
+ Maximum pointwise difference: 11.16050529
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -2.93389368, Converted: 8.22661209
|
|
|
+ Biggest difference in row (0, 0), sum 72.374397 vs 50.589382
|
|
|
+
|
|
|
+Layer 5, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 68.200790
|
|
|
+ Converted tensor sum: 51.359711
|
|
|
+ Original tensor mean: 8.525099
|
|
|
+ Converted tensor mean: 6.419964
|
|
|
+ Mean difference: 4.32947350
|
|
|
+ Maximum pointwise difference: 8.89735222
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -3.28547406, Converted: 5.61187792
|
|
|
+ Biggest difference in row (0, 0), sum 68.200790 vs 51.359711
|
|
|
+
|
|
|
+Layer 6, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 70.421684
|
|
|
+ Converted tensor sum: 41.851700
|
|
|
+ Original tensor mean: 8.802711
|
|
|
+ Converted tensor mean: 5.231462
|
|
|
+ Mean difference: 5.60544014
|
|
|
+ Maximum pointwise difference: 9.42855549
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: 15.64872551, Converted: 6.22017002
|
|
|
+ Biggest difference in row (0, 0), sum 70.421684 vs 41.851700
|
|
|
+
|
|
|
+Layer 7, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 138.012558
|
|
|
+ Converted tensor sum: 106.052734
|
|
|
+ Original tensor mean: 17.251570
|
|
|
+ Converted tensor mean: 13.256592
|
|
|
+ Mean difference: 5.83357430
|
|
|
+ Maximum pointwise difference: 9.46822166
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: 20.60037422, Converted: 11.13215256
|
|
|
+ Biggest difference in row (0, 0), sum 138.012558 vs 106.052734
|
|
|
+
|
|
|
+Layer 8, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 124.592545
|
|
|
+ Converted tensor sum: 109.657555
|
|
|
+ Original tensor mean: 15.574068
|
|
|
+ Converted tensor mean: 13.707194
|
|
|
+ Mean difference: 4.43112850
|
|
|
+ Maximum pointwise difference: 10.25702190
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 3.63314724, Converted: 13.89016914
|
|
|
+ Biggest difference in row (0, 0), sum 124.592545 vs 109.657555
|
|
|
+
|
|
|
+Layer 9, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 110.794357
|
|
|
+ Converted tensor sum: 109.277565
|
|
|
+ Original tensor mean: 13.849295
|
|
|
+ Converted tensor mean: 13.659696
|
|
|
+ Mean difference: 4.23832560
|
|
|
+ Maximum pointwise difference: 11.81062031
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 1.84443951, Converted: 13.65505981
|
|
|
+ Biggest difference in row (0, 0), sum 110.794357 vs 109.277565
|
|
|
+
|
|
|
+Layer 10, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 104.034340
|
|
|
+ Converted tensor sum: 104.158554
|
|
|
+ Original tensor mean: 13.004292
|
|
|
+ Converted tensor mean: 13.019819
|
|
|
+ Mean difference: 4.04881191
|
|
|
+ Maximum pointwise difference: 12.64350224
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 0.44486341, Converted: 13.08836555
|
|
|
+ Biggest difference in row (0, 0), sum 104.034340 vs 104.158554
|
|
|
+
|
|
|
+Layer 11, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 194.747101
|
|
|
+ Converted tensor sum: 186.990341
|
|
|
+ Original tensor mean: 24.343388
|
|
|
+ Converted tensor mean: 23.373793
|
|
|
+ Mean difference: 4.42853832
|
|
|
+ Maximum pointwise difference: 11.92787266
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 11.99839497, Converted: 23.92626762
|
|
|
+ Biggest difference in row (0, 0), sum 194.747101 vs 186.990341
|
|
|
+
|
|
|
+Layer 12, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 195.014465
|
|
|
+ Converted tensor sum: 185.515793
|
|
|
+ Original tensor mean: 24.376808
|
|
|
+ Converted tensor mean: 23.189474
|
|
|
+ Mean difference: 4.49333429
|
|
|
+ Maximum pointwise difference: 11.10862160
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 11.92906380, Converted: 23.03768539
|
|
|
+ Biggest difference in row (0, 0), sum 195.014465 vs 185.515793
|
|
|
+
|
|
|
+Layer 13, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 187.897064
|
|
|
+ Converted tensor sum: 182.353088
|
|
|
+ Original tensor mean: 23.487133
|
|
|
+ Converted tensor mean: 22.794136
|
|
|
+ Mean difference: 4.64961338
|
|
|
+ Maximum pointwise difference: 12.63825989
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 10.51706123, Converted: 23.15532112
|
|
|
+ Biggest difference in row (0, 0), sum 187.897064 vs 182.353088
|
|
|
+
|
|
|
+Layer 14, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 182.226410
|
|
|
+ Converted tensor sum: 180.585373
|
|
|
+ Original tensor mean: 22.778301
|
|
|
+ Converted tensor mean: 22.573172
|
|
|
+ Mean difference: 4.70111561
|
|
|
+ Maximum pointwise difference: 12.44419956
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 11.31790829, Converted: 23.76210785
|
|
|
+ Biggest difference in row (0, 0), sum 182.226410 vs 180.585373
|
|
|
+
|
|
|
+Layer 15, Token 27 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 333.560730
|
|
|
+ Converted tensor sum: 318.274811
|
|
|
+ Original tensor mean: 41.695091
|
|
|
+ Converted tensor mean: 39.784351
|
|
|
+ Mean difference: 4.67095470
|
|
|
+ Maximum pointwise difference: 11.04085732
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: 28.01206779, Converted: 39.05292511
|
|
|
+ Biggest difference in row (0, 0), sum 333.560730 vs 318.274811
|
|
|
+
|
|
|
+Layer 0, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -40.607262
|
|
|
+ Converted tensor sum: 42.743095
|
|
|
+ Original tensor mean: -5.075908
|
|
|
+ Converted tensor mean: 5.342887
|
|
|
+ Mean difference: 11.17178345
|
|
|
+ Maximum pointwise difference: 22.58385468
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -14.17651558, Converted: 8.40733814
|
|
|
+ Biggest difference in row (0, 0), sum -40.607262 vs 42.743095
|
|
|
+
|
|
|
+Layer 1, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -43.333393
|
|
|
+ Converted tensor sum: 31.481144
|
|
|
+ Original tensor mean: -5.416674
|
|
|
+ Converted tensor mean: 3.935143
|
|
|
+ Mean difference: 11.11242485
|
|
|
+ Maximum pointwise difference: 18.85606575
|
|
|
+ Max difference location: (0, 0, 5)
|
|
|
+ Values at max diff - Original: -10.93557739, Converted: 7.92048883
|
|
|
+ Biggest difference in row (0, 0), sum -43.333393 vs 31.481144
|
|
|
+
|
|
|
+Layer 2, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -67.416214
|
|
|
+ Converted tensor sum: 33.172539
|
|
|
+ Original tensor mean: -8.427027
|
|
|
+ Converted tensor mean: 4.146567
|
|
|
+ Mean difference: 14.60656548
|
|
|
+ Maximum pointwise difference: 20.67273331
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -13.93512535, Converted: 6.73760748
|
|
|
+ Biggest difference in row (0, 0), sum -67.416214 vs 33.172539
|
|
|
+
|
|
|
+Layer 3, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -199.361206
|
|
|
+ Converted tensor sum: 72.683899
|
|
|
+ Original tensor mean: -24.920151
|
|
|
+ Converted tensor mean: 9.085487
|
|
|
+ Mean difference: 34.00563812
|
|
|
+ Maximum pointwise difference: 41.37638092
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -29.60864067, Converted: 11.76773930
|
|
|
+ Biggest difference in row (0, 0), sum -199.361206 vs 72.683899
|
|
|
+
|
|
|
+Layer 4, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -137.055893
|
|
|
+ Converted tensor sum: 63.596687
|
|
|
+ Original tensor mean: -17.131987
|
|
|
+ Converted tensor mean: 7.949586
|
|
|
+ Mean difference: 25.75262260
|
|
|
+ Maximum pointwise difference: 40.96822739
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -29.79143906, Converted: 11.17678833
|
|
|
+ Biggest difference in row (0, 0), sum -137.055893 vs 63.596687
|
|
|
+
|
|
|
+Layer 5, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -73.715279
|
|
|
+ Converted tensor sum: 62.123581
|
|
|
+ Original tensor mean: -9.214410
|
|
|
+ Converted tensor mean: 7.765448
|
|
|
+ Mean difference: 17.84333420
|
|
|
+ Maximum pointwise difference: 31.90785027
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -24.06629181, Converted: 7.84155846
|
|
|
+ Biggest difference in row (0, 0), sum -73.715279 vs 62.123581
|
|
|
+
|
|
|
+Layer 6, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -126.770874
|
|
|
+ Converted tensor sum: 61.464096
|
|
|
+ Original tensor mean: -15.846359
|
|
|
+ Converted tensor mean: 7.683012
|
|
|
+ Mean difference: 23.61796379
|
|
|
+ Maximum pointwise difference: 36.02120209
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -27.60279655, Converted: 8.41840744
|
|
|
+ Biggest difference in row (0, 0), sum -126.770874 vs 61.464096
|
|
|
+
|
|
|
+Layer 7, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -254.607422
|
|
|
+ Converted tensor sum: 126.028885
|
|
|
+ Original tensor mean: -31.825928
|
|
|
+ Converted tensor mean: 15.753611
|
|
|
+ Mean difference: 47.57954025
|
|
|
+ Maximum pointwise difference: 61.42348480
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -40.33562851, Converted: 21.08785439
|
|
|
+ Biggest difference in row (0, 0), sum -254.607422 vs 126.028885
|
|
|
+
|
|
|
+Layer 8, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -198.536194
|
|
|
+ Converted tensor sum: 120.381157
|
|
|
+ Original tensor mean: -24.817024
|
|
|
+ Converted tensor mean: 15.047645
|
|
|
+ Mean difference: 39.86466980
|
|
|
+ Maximum pointwise difference: 52.24274063
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -30.55666733, Converted: 21.68607330
|
|
|
+ Biggest difference in row (0, 0), sum -198.536194 vs 120.381157
|
|
|
+
|
|
|
+Layer 9, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -203.318542
|
|
|
+ Converted tensor sum: 118.674896
|
|
|
+ Original tensor mean: -25.414818
|
|
|
+ Converted tensor mean: 14.834362
|
|
|
+ Mean difference: 40.24917984
|
|
|
+ Maximum pointwise difference: 51.74636078
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -36.37534714, Converted: 15.37101555
|
|
|
+ Biggest difference in row (0, 0), sum -203.318542 vs 118.674896
|
|
|
+
|
|
|
+Layer 10, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -173.929123
|
|
|
+ Converted tensor sum: 115.971573
|
|
|
+ Original tensor mean: -21.741140
|
|
|
+ Converted tensor mean: 14.496447
|
|
|
+ Mean difference: 36.23758698
|
|
|
+ Maximum pointwise difference: 47.99763489
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -32.96516418, Converted: 15.03247166
|
|
|
+ Biggest difference in row (0, 0), sum -173.929123 vs 115.971573
|
|
|
+
|
|
|
+Layer 11, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -450.842834
|
|
|
+ Converted tensor sum: 202.799988
|
|
|
+ Original tensor mean: -56.355354
|
|
|
+ Converted tensor mean: 25.349998
|
|
|
+ Mean difference: 81.70535278
|
|
|
+ Maximum pointwise difference: 92.55924988
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -66.56226349, Converted: 25.99698830
|
|
|
+ Biggest difference in row (0, 0), sum -450.842834 vs 202.799988
|
|
|
+
|
|
|
+Layer 12, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -483.456177
|
|
|
+ Converted tensor sum: 204.607147
|
|
|
+ Original tensor mean: -60.432022
|
|
|
+ Converted tensor mean: 25.575893
|
|
|
+ Mean difference: 86.00791931
|
|
|
+ Maximum pointwise difference: 97.30514526
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -70.58811951, Converted: 26.71702957
|
|
|
+ Biggest difference in row (0, 0), sum -483.456177 vs 204.607147
|
|
|
+
|
|
|
+Layer 13, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -487.978210
|
|
|
+ Converted tensor sum: 194.803741
|
|
|
+ Original tensor mean: -60.997276
|
|
|
+ Converted tensor mean: 24.350468
|
|
|
+ Mean difference: 85.34774780
|
|
|
+ Maximum pointwise difference: 97.00595093
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: -71.41757965, Converted: 25.58836937
|
|
|
+ Biggest difference in row (0, 0), sum -487.978210 vs 194.803741
|
|
|
+
|
|
|
+Layer 14, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -487.676697
|
|
|
+ Converted tensor sum: 192.080292
|
|
|
+ Original tensor mean: -60.959587
|
|
|
+ Converted tensor mean: 24.010036
|
|
|
+ Mean difference: 84.96962738
|
|
|
+ Maximum pointwise difference: 101.62533569
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -75.57343292, Converted: 26.05190277
|
|
|
+ Biggest difference in row (0, 0), sum -487.676697 vs 192.080292
|
|
|
+
|
|
|
+Layer 15, Token 28 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -826.685791
|
|
|
+ Converted tensor sum: 324.333130
|
|
|
+ Original tensor mean: -103.335724
|
|
|
+ Converted tensor mean: 40.541641
|
|
|
+ Mean difference: 143.87736511
|
|
|
+ Maximum pointwise difference: 160.84576416
|
|
|
+ Max difference location: (0, 0, 4)
|
|
|
+ Values at max diff - Original: -111.58706665, Converted: 49.25869751
|
|
|
+ Biggest difference in row (0, 0), sum -826.685791 vs 324.333130
|
|
|
+
|
|
|
+Layer 0, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -7.335809
|
|
|
+ Converted tensor sum: 5.924038
|
|
|
+ Original tensor mean: -0.916976
|
|
|
+ Converted tensor mean: 0.740505
|
|
|
+ Mean difference: 2.81220579
|
|
|
+ Maximum pointwise difference: 5.74731255
|
|
|
+ Max difference location: (0, 0, 2)
|
|
|
+ Values at max diff - Original: -3.16068745, Converted: 2.58662534
|
|
|
+ Biggest difference in row (0, 0), sum -7.335809 vs 5.924038
|
|
|
+
|
|
|
+Layer 1, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: -4.554134
|
|
|
+ Converted tensor sum: 7.198357
|
|
|
+ Original tensor mean: -0.569267
|
|
|
+ Converted tensor mean: 0.899795
|
|
|
+ Mean difference: 4.59539890
|
|
|
+ Maximum pointwise difference: 12.22019768
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -9.41592121, Converted: 2.80427670
|
|
|
+ Biggest difference in row (0, 0), sum -4.554134 vs 7.198357
|
|
|
+
|
|
|
+Layer 2, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 18.821238
|
|
|
+ Converted tensor sum: -2.444355
|
|
|
+ Original tensor mean: 2.352655
|
|
|
+ Converted tensor mean: -0.305544
|
|
|
+ Mean difference: 5.75418472
|
|
|
+ Maximum pointwise difference: 9.27616215
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: -6.88754845, Converted: 2.38861346
|
|
|
+ Biggest difference in row (0, 0), sum 18.821238 vs -2.444355
|
|
|
+
|
|
|
+Layer 3, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 70.965004
|
|
|
+ Converted tensor sum: -68.014175
|
|
|
+ Original tensor mean: 8.870625
|
|
|
+ Converted tensor mean: -8.501772
|
|
|
+ Mean difference: 17.37239647
|
|
|
+ Maximum pointwise difference: 24.10712433
|
|
|
+ Max difference location: (0, 0, 3)
|
|
|
+ Values at max diff - Original: 16.14313126, Converted: -7.96399307
|
|
|
+ Biggest difference in row (0, 0), sum 70.965004 vs -68.014175
|
|
|
+
|
|
|
+Layer 4, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 62.607174
|
|
|
+ Converted tensor sum: -17.623362
|
|
|
+ Original tensor mean: 7.825897
|
|
|
+ Converted tensor mean: -2.202920
|
|
|
+ Mean difference: 10.34164429
|
|
|
+ Maximum pointwise difference: 18.22177315
|
|
|
+ Max difference location: (0, 0, 0)
|
|
|
+ Values at max diff - Original: 8.43466568, Converted: -9.78710747
|
|
|
+ Biggest difference in row (0, 0), sum 62.607174 vs -17.623362
|
|
|
+
|
|
|
+Layer 5, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 52.727810
|
|
|
+ Converted tensor sum: 1.219590
|
|
|
+ Original tensor mean: 6.590976
|
|
|
+ Converted tensor mean: 0.152449
|
|
|
+ Mean difference: 7.65116024
|
|
|
+ Maximum pointwise difference: 18.62134933
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 9.17143154, Converted: -9.44991875
|
|
|
+ Biggest difference in row (0, 0), sum 52.727810 vs 1.219590
|
|
|
+
|
|
|
+Layer 6, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 56.382370
|
|
|
+ Converted tensor sum: 4.153158
|
|
|
+ Original tensor mean: 7.047796
|
|
|
+ Converted tensor mean: 0.519145
|
|
|
+ Mean difference: 7.41536808
|
|
|
+ Maximum pointwise difference: 17.59628677
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 10.29856682, Converted: -7.29772091
|
|
|
+ Biggest difference in row (0, 0), sum 56.382370 vs 4.153158
|
|
|
+
|
|
|
+Layer 7, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 136.310486
|
|
|
+ Converted tensor sum: 3.958838
|
|
|
+ Original tensor mean: 17.038811
|
|
|
+ Converted tensor mean: 0.494855
|
|
|
+ Mean difference: 16.54395676
|
|
|
+ Maximum pointwise difference: 26.66838837
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 18.93185425, Converted: -7.73653507
|
|
|
+ Biggest difference in row (0, 0), sum 136.310486 vs 3.958838
|
|
|
+
|
|
|
+Layer 8, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 119.467941
|
|
|
+ Converted tensor sum: 9.372761
|
|
|
+ Original tensor mean: 14.933493
|
|
|
+ Converted tensor mean: 1.171595
|
|
|
+ Mean difference: 13.76189804
|
|
|
+ Maximum pointwise difference: 22.09118652
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 20.88569450, Converted: -1.20549154
|
|
|
+ Biggest difference in row (0, 0), sum 119.467941 vs 9.372761
|
|
|
+
|
|
|
+Layer 9, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 111.468323
|
|
|
+ Converted tensor sum: 12.752249
|
|
|
+ Original tensor mean: 13.933540
|
|
|
+ Converted tensor mean: 1.594031
|
|
|
+ Mean difference: 12.36562347
|
|
|
+ Maximum pointwise difference: 19.99691391
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 20.75084877, Converted: 0.75393468
|
|
|
+ Biggest difference in row (0, 0), sum 111.468323 vs 12.752249
|
|
|
+
|
|
|
+Layer 10, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 103.290207
|
|
|
+ Converted tensor sum: 4.031506
|
|
|
+ Original tensor mean: 12.911276
|
|
|
+ Converted tensor mean: 0.503938
|
|
|
+ Mean difference: 12.90593433
|
|
|
+ Maximum pointwise difference: 20.97147560
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 20.37113762, Converted: -0.60033715
|
|
|
+ Biggest difference in row (0, 0), sum 103.290207 vs 4.031506
|
|
|
+
|
|
|
+Layer 11, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 195.291718
|
|
|
+ Converted tensor sum: 60.566498
|
|
|
+ Original tensor mean: 24.411465
|
|
|
+ Converted tensor mean: 7.570812
|
|
|
+ Mean difference: 16.84065247
|
|
|
+ Maximum pointwise difference: 26.14917755
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 32.22053146, Converted: 6.07135296
|
|
|
+ Biggest difference in row (0, 0), sum 195.291718 vs 60.566498
|
|
|
+
|
|
|
+Layer 12, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 193.868057
|
|
|
+ Converted tensor sum: 56.865105
|
|
|
+ Original tensor mean: 24.233507
|
|
|
+ Converted tensor mean: 7.108138
|
|
|
+ Mean difference: 17.12537003
|
|
|
+ Maximum pointwise difference: 27.68391991
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 25.67190361, Converted: -2.01201606
|
|
|
+ Biggest difference in row (0, 0), sum 193.868057 vs 56.865105
|
|
|
+
|
|
|
+Layer 13, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 191.697586
|
|
|
+ Converted tensor sum: 55.096077
|
|
|
+ Original tensor mean: 23.962198
|
|
|
+ Converted tensor mean: 6.887010
|
|
|
+ Mean difference: 17.07518768
|
|
|
+ Maximum pointwise difference: 27.05913162
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 24.89521027, Converted: -2.16392159
|
|
|
+ Biggest difference in row (0, 0), sum 191.697586 vs 55.096077
|
|
|
+
|
|
|
+Layer 14, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 188.843628
|
|
|
+ Converted tensor sum: 53.397236
|
|
|
+ Original tensor mean: 23.605453
|
|
|
+ Converted tensor mean: 6.674654
|
|
|
+ Mean difference: 16.93079758
|
|
|
+ Maximum pointwise difference: 25.94162941
|
|
|
+ Max difference location: (0, 0, 6)
|
|
|
+ Values at max diff - Original: 23.64732933, Converted: -2.29430056
|
|
|
+ Biggest difference in row (0, 0), sum 188.843628 vs 53.397236
|
|
|
+
|
|
|
+Layer 15, Token 29 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 336.074646
|
|
|
+ Converted tensor sum: 200.162903
|
|
|
+ Original tensor mean: 42.009331
|
|
|
+ Converted tensor mean: 25.020363
|
|
|
+ Mean difference: 16.98896790
|
|
|
+ Maximum pointwise difference: 25.90124702
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: 47.47709274, Converted: 21.57584572
|
|
|
+ Biggest difference in row (0, 0), sum 336.074646 vs 200.162903
|
|
|
+
|
|
|
+Layer 0, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 17.017063
|
|
|
+ Converted tensor sum: 23.545963
|
|
|
+ Original tensor mean: 2.127133
|
|
|
+ Converted tensor mean: 2.943245
|
|
|
+ Mean difference: 2.51119232
|
|
|
+ Maximum pointwise difference: 4.74783516
|
|
|
+ Max difference location: (0, 0, 7)
|
|
|
+ Values at max diff - Original: -3.57869840, Converted: 1.16913700
|
|
|
+ Biggest difference in row (0, 0), sum 17.017063 vs 23.545963
|
|
|
+
|
|
|
+Layer 1, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 20.432869
|
|
|
+ Converted tensor sum: 19.928423
|
|
|
+ Original tensor mean: 2.554109
|
|
|
+ Converted tensor mean: 2.491053
|
|
|
+ Mean difference: 3.21921587
|
|
|
+ Maximum pointwise difference: 5.61581087
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 4.57620192, Converted: -1.03960896
|
|
|
+ Biggest difference in row (0, 0), sum 20.432869 vs 19.928423
|
|
|
+
|
|
|
+Layer 2, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 28.017879
|
|
|
+ Converted tensor sum: 17.077301
|
|
|
+ Original tensor mean: 3.502235
|
|
|
+ Converted tensor mean: 2.134663
|
|
|
+ Mean difference: 3.63509035
|
|
|
+ Maximum pointwise difference: 8.41316605
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 6.66613007, Converted: -1.74703574
|
|
|
+ Biggest difference in row (0, 0), sum 28.017879 vs 17.077301
|
|
|
+
|
|
|
+Layer 3, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 85.620071
|
|
|
+ Converted tensor sum: 45.387245
|
|
|
+ Original tensor mean: 10.702509
|
|
|
+ Converted tensor mean: 5.673406
|
|
|
+ Mean difference: 5.25029612
|
|
|
+ Maximum pointwise difference: 14.27389336
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 17.08827591, Converted: 2.81438255
|
|
|
+ Biggest difference in row (0, 0), sum 85.620071 vs 45.387245
|
|
|
+
|
|
|
+Layer 4, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 76.943909
|
|
|
+ Converted tensor sum: 38.849068
|
|
|
+ Original tensor mean: 9.617989
|
|
|
+ Converted tensor mean: 4.856133
|
|
|
+ Mean difference: 5.60086536
|
|
|
+ Maximum pointwise difference: 14.69901657
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 17.17034531, Converted: 2.47132850
|
|
|
+ Biggest difference in row (0, 0), sum 76.943909 vs 38.849068
|
|
|
+
|
|
|
+Layer 5, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 59.381409
|
|
|
+ Converted tensor sum: 29.835991
|
|
|
+ Original tensor mean: 7.422676
|
|
|
+ Converted tensor mean: 3.729499
|
|
|
+ Mean difference: 4.96050739
|
|
|
+ Maximum pointwise difference: 12.96257687
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 14.77257729, Converted: 1.81000042
|
|
|
+ Biggest difference in row (0, 0), sum 59.381409 vs 29.835991
|
|
|
+
|
|
|
+Layer 6, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 59.339882
|
|
|
+ Converted tensor sum: 27.141592
|
|
|
+ Original tensor mean: 7.417485
|
|
|
+ Converted tensor mean: 3.392699
|
|
|
+ Mean difference: 4.87107563
|
|
|
+ Maximum pointwise difference: 14.00060558
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 15.80483246, Converted: 1.80422711
|
|
|
+ Biggest difference in row (0, 0), sum 59.339882 vs 27.141592
|
|
|
+
|
|
|
+Layer 7, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 131.503036
|
|
|
+ Converted tensor sum: 91.997757
|
|
|
+ Original tensor mean: 16.437880
|
|
|
+ Converted tensor mean: 11.499720
|
|
|
+ Mean difference: 5.33721828
|
|
|
+ Maximum pointwise difference: 14.37581253
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 27.27588463, Converted: 12.90007210
|
|
|
+ Biggest difference in row (0, 0), sum 131.503036 vs 91.997757
|
|
|
+
|
|
|
+Layer 8, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 123.886139
|
|
|
+ Converted tensor sum: 79.985909
|
|
|
+ Original tensor mean: 15.485767
|
|
|
+ Converted tensor mean: 9.998239
|
|
|
+ Mean difference: 6.03210974
|
|
|
+ Maximum pointwise difference: 16.31963348
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 26.14530563, Converted: 9.82567215
|
|
|
+ Biggest difference in row (0, 0), sum 123.886139 vs 79.985909
|
|
|
+
|
|
|
+Layer 9, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 118.487213
|
|
|
+ Converted tensor sum: 61.110474
|
|
|
+ Original tensor mean: 14.810902
|
|
|
+ Converted tensor mean: 7.638809
|
|
|
+ Mean difference: 7.17209244
|
|
|
+ Maximum pointwise difference: 17.08554077
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 26.00649452, Converted: 8.92095280
|
|
|
+ Biggest difference in row (0, 0), sum 118.487213 vs 61.110474
|
|
|
+
|
|
|
+Layer 10, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 110.301559
|
|
|
+ Converted tensor sum: 57.444092
|
|
|
+ Original tensor mean: 13.787695
|
|
|
+ Converted tensor mean: 7.180511
|
|
|
+ Mean difference: 6.69173956
|
|
|
+ Maximum pointwise difference: 18.18347359
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 26.85584831, Converted: 8.67237473
|
|
|
+ Biggest difference in row (0, 0), sum 110.301559 vs 57.444092
|
|
|
+
|
|
|
+Layer 11, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 209.603394
|
|
|
+ Converted tensor sum: 163.279968
|
|
|
+ Original tensor mean: 26.200424
|
|
|
+ Converted tensor mean: 20.409996
|
|
|
+ Mean difference: 6.23670197
|
|
|
+ Maximum pointwise difference: 18.06859207
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 37.88209915, Converted: 19.81350708
|
|
|
+ Biggest difference in row (0, 0), sum 209.603394 vs 163.279968
|
|
|
+
|
|
|
+Layer 12, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 210.341476
|
|
|
+ Converted tensor sum: 159.541199
|
|
|
+ Original tensor mean: 26.292685
|
|
|
+ Converted tensor mean: 19.942650
|
|
|
+ Mean difference: 6.62348843
|
|
|
+ Maximum pointwise difference: 17.79612160
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 37.21872330, Converted: 19.42260170
|
|
|
+ Biggest difference in row (0, 0), sum 210.341476 vs 159.541199
|
|
|
+
|
|
|
+Layer 13, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 206.045227
|
|
|
+ Converted tensor sum: 156.530212
|
|
|
+ Original tensor mean: 25.755653
|
|
|
+ Converted tensor mean: 19.566277
|
|
|
+ Mean difference: 6.46108055
|
|
|
+ Maximum pointwise difference: 17.11543655
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 36.85726547, Converted: 19.74182892
|
|
|
+ Biggest difference in row (0, 0), sum 206.045227 vs 156.530212
|
|
|
+
|
|
|
+Layer 14, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 204.884491
|
|
|
+ Converted tensor sum: 151.571396
|
|
|
+ Original tensor mean: 25.610561
|
|
|
+ Converted tensor mean: 18.946424
|
|
|
+ Mean difference: 6.66413498
|
|
|
+ Maximum pointwise difference: 18.41207695
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 37.42459488, Converted: 19.01251793
|
|
|
+ Biggest difference in row (0, 0), sum 204.884491 vs 151.571396
|
|
|
+
|
|
|
+Layer 15, Token 30 (model.layers.out comparison):
|
|
|
+ Original tensor sum: 358.352844
|
|
|
+ Converted tensor sum: 289.552582
|
|
|
+ Original tensor mean: 44.794106
|
|
|
+ Converted tensor mean: 36.194073
|
|
|
+ Mean difference: 8.60003757
|
|
|
+ Maximum pointwise difference: 19.94306946
|
|
|
+ Max difference location: (0, 0, 1)
|
|
|
+ Values at max diff - Original: 55.22903824, Converted: 35.28596878
|
|
|
+ Biggest difference in row (0, 0), sum 358.352844 vs 289.552582
|
|
|
+
|
|
|
+================================================================================
|
|
|
+Comparing recurrent cache tensors...
|
|
|
+================================================================================
|
|
|
+
|
|
|
+Layer 0, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: -3.317356
|
|
|
+ Converted tensor sum: -3.317369
|
|
|
+ Original tensor mean: -0.001037
|
|
|
+ Converted tensor mean: -0.001037
|
|
|
+ Mean difference: 0.00000005
|
|
|
+ Maximum pointwise difference: 0.00000250
|
|
|
+ Max difference location: (0, 4, 8, 1)
|
|
|
+ Values at max diff - Original: -1.34675360, Converted: -1.34675610
|
|
|
+ Biggest difference in row (0, 4, 3), sum -1.531199 vs -1.531201
|
|
|
+Original tensor:
|
|
|
+
|
|
|
+[[[[-0.01188182 0.00870434 -0.00525597 ... 0.01664828 0.0042294
|
|
|
+ 0.01396134]
|
|
|
+ [-0.00601455 0.00372374 0.00119549 ... -0.00689575 0.00234476
|
|
|
+ -0.00023902]
|
|
|
+ [ 0.12993637 -0.07801484 0.03047845 ... -0.05703255 -0.06261977
|
|
|
+ -0.10933896]
|
|
|
+ ...
|
|
|
+ [-0.04649648 0.02312872 -0.00121024 ... -0.02114891 0.02579406
|
|
|
+ 0.02258455]
|
|
|
+ [-0.04175662 0.02266306 -0.0035618 ... -0.0084533 0.02211451
|
|
|
+ 0.02416236]
|
|
|
+ [ 0.02032246 -0.01281894 0.00930294 ... -0.02656155 -0.00984932
|
|
|
+ -0.02582185]]
|
|
|
+
|
|
|
+ [[ 0.01767723 0.01862493 0.00546727 ... 0.00556207 0.00562948
|
|
|
+ 0.02792829]
|
|
|
+ [ 0.00329595 0.00522457 0.00275346 ... 0.00801896 0.0103077
|
|
|
+ -0.00079376]
|
|
|
+ [-0.15666749 -0.19953263 -0.06468897 ... -0.12443222 -0.10325672
|
|
|
+ -0.20960501]
|
|
|
+ ...
|
|
|
+ [ 0.04138051 0.06359718 0.02354327 ... 0.06241166 0.05219408
|
|
|
+ 0.03928925]
|
|
|
+ [ 0.04164674 0.06036352 0.02137833 ... 0.05146553 0.04422566
|
|
|
+ 0.0441802 ]
|
|
|
+ [-0.03129916 -0.03683262 -0.01027868 ... -0.01391416 -0.00729654
|
|
|
+ -0.0505065 ]]
|
|
|
+
|
|
|
+ [[-0.12362282 0.10214025 -0.01907291 ... -0.06202121 -0.10286148
|
|
|
+ -0.04492377]
|
|
|
+ [-0.0150543 0.08293391 -0.00673187 ... -0.00035791 -0.01116562
|
|
|
+ -0.00036771]
|
|
|
+ [-0.02004597 0.00927652 -0.00294111 ... -0.01171783 -0.01758975
|
|
|
+ -0.00819483]
|
|
|
+ ...
|
|
|
+ [ 0.00270219 -0.04824698 0.00360209 ... -0.00234267 0.00216798
|
|
|
+ -0.00194733]
|
|
|
+ [ 0.01524375 -0.03120736 0.00455077 ... 0.00138342 0.01178958
|
|
|
+ 0.00394295]
|
|
|
+ [ 0.02191158 -0.03620601 0.00567079 ... 0.00745023 0.01862757
|
|
|
+ 0.00703449]]
|
|
|
+
|
|
|
+ ...
|
|
|
+
|
|
|
+ [[ 0.00741537 -0.04865595 -0.00886576 ... -0.02448454 0.01194548
|
|
|
+ -0.00861733]
|
|
|
+ [-0.00134769 0.01334649 0.01967893 ... 0.02112496 -0.01624596
|
|
|
+ 0.00516407]
|
|
|
+ [-0.0050677 0.02272661 0.01807955 ... 0.02094838 -0.01449073
|
|
|
+ 0.00967227]
|
|
|
+ ...
|
|
|
+ [-0.02633221 0.05768563 0.01628287 ... 0.0149423 -0.00576269
|
|
|
+ 0.04385136]
|
|
|
+ [-0.03326959 0.185886 -0.02219751 ... 0.04430137 -0.00146678
|
|
|
+ 0.02707055]
|
|
|
+ [-0.00715611 -0.00657876 -0.10976178 ... -0.09874185 0.08591411
|
|
|
+ -0.00940268]]
|
|
|
+
|
|
|
+ [[-0.03609058 -0.07579004 0.01501239 ... -0.00192132 -0.01605882
|
|
|
+ 0.00820769]
|
|
|
+ [-0.00521284 -0.03044076 0.01835437 ... -0.00124992 -0.01034386
|
|
|
+ 0.00627647]
|
|
|
+ [ 0.02380822 0.16997556 -0.04292414 ... 0.01702266 0.04020631
|
|
|
+ -0.03895959]
|
|
|
+ ...
|
|
|
+ [-0.00115029 -0.0217499 0.00398471 ... -0.00293407 -0.00470166
|
|
|
+ 0.00579625]
|
|
|
+ [-0.00415053 -0.03030142 0.02518196 ... -0.00043284 -0.01240897
|
|
|
+ 0.00634339]
|
|
|
+ [ 0.00861687 -0.01112233 -0.03039085 ... -0.00862329 0.00705495
|
|
|
+ 0.00750164]]
|
|
|
+
|
|
|
+ [[ 0.00614664 -0.01302179 -0.0609244 ... -0.05605923 -0.06379453
|
|
|
+ 0.01912303]
|
|
|
+ [ 0.01061937 -0.00787821 -0.02997783 ... -0.03494435 -0.04587581
|
|
|
+ 0.01142649]
|
|
|
+ [-0.04273459 0.08807568 0.18954179 ... 0.19141153 0.05976401
|
|
|
+ -0.01481191]
|
|
|
+ ...
|
|
|
+ [ 0.0059959 -0.01474381 -0.02677062 ... -0.02669823 0.00146604
|
|
|
+ -0.00064257]
|
|
|
+ [ 0.01313105 -0.0043188 -0.02868656 ... -0.03682106 -0.06574353
|
|
|
+ 0.01620813]
|
|
|
+ [-0.00286384 -0.03923091 -0.03224784 ... -0.01919729 0.12107897
|
|
|
+ -0.03120236]]]]
|
|
|
+
|
|
|
+Converted tensor:
|
|
|
+
|
|
|
+[[[[-0.01188182 0.00870434 -0.00525597 ... 0.0166483 0.0042294
|
|
|
+ 0.01396135]
|
|
|
+ [-0.00601455 0.00372375 0.00119549 ... -0.00689576 0.00234477
|
|
|
+ -0.00023903]
|
|
|
+ [ 0.12993638 -0.07801486 0.03047847 ... -0.05703259 -0.06261978
|
|
|
+ -0.10933899]
|
|
|
+ ...
|
|
|
+ [-0.04649651 0.02312873 -0.00121024 ... -0.02114895 0.02579408
|
|
|
+ 0.02258454]
|
|
|
+ [-0.04175663 0.02266307 -0.0035618 ... -0.00845332 0.02211452
|
|
|
+ 0.02416236]
|
|
|
+ [ 0.02032245 -0.01281894 0.00930295 ... -0.02656158 -0.00984932
|
|
|
+ -0.02582186]]
|
|
|
+
|
|
|
+ [[ 0.01767723 0.01862492 0.00546727 ... 0.00556206 0.00562947
|
|
|
+ 0.02792831]
|
|
|
+ [ 0.00329595 0.00522458 0.00275346 ... 0.00801897 0.01030772
|
|
|
+ -0.00079377]
|
|
|
+ [-0.15666753 -0.19953264 -0.06468898 ... -0.12443225 -0.10325674
|
|
|
+ -0.20960508]
|
|
|
+ ...
|
|
|
+ [ 0.04138052 0.06359721 0.02354329 ... 0.06241173 0.05219414
|
|
|
+ 0.03928925]
|
|
|
+ [ 0.04164676 0.06036354 0.02137835 ... 0.05146557 0.04422571
|
|
|
+ 0.0441802 ]
|
|
|
+ [-0.03129917 -0.03683261 -0.01027868 ... -0.01391415 -0.00729652
|
|
|
+ -0.05050653]]
|
|
|
+
|
|
|
+ [[-0.12362286 0.10214026 -0.01907291 ... -0.06202124 -0.10286151
|
|
|
+ -0.04492378]
|
|
|
+ [-0.01505431 0.08293395 -0.00673187 ... -0.00035791 -0.01116562
|
|
|
+ -0.00036771]
|
|
|
+ [-0.02004598 0.00927651 -0.00294111 ... -0.01171784 -0.01758976
|
|
|
+ -0.00819483]
|
|
|
+ ...
|
|
|
+ [ 0.00270219 -0.04824701 0.00360209 ... -0.00234266 0.00216798
|
|
|
+ -0.00194733]
|
|
|
+ [ 0.01524375 -0.03120738 0.00455077 ... 0.00138341 0.01178958
|
|
|
+ 0.00394295]
|
|
|
+ [ 0.02191159 -0.03620601 0.00567079 ... 0.00745023 0.01862758
|
|
|
+ 0.00703449]]
|
|
|
+
|
|
|
+ ...
|
|
|
+
|
|
|
+ [[ 0.00741537 -0.04865595 -0.00886576 ... -0.02448454 0.01194548
|
|
|
+ -0.00861733]
|
|
|
+ [-0.00134769 0.01334648 0.01967893 ... 0.02112496 -0.01624596
|
|
|
+ 0.00516407]
|
|
|
+ [-0.0050677 0.02272661 0.01807955 ... 0.02094838 -0.01449073
|
|
|
+ 0.00967227]
|
|
|
+ ...
|
|
|
+ [-0.02633222 0.05768563 0.01628287 ... 0.01494229 -0.00576268
|
|
|
+ 0.04385137]
|
|
|
+ [-0.03326959 0.18588606 -0.02219752 ... 0.04430138 -0.00146678
|
|
|
+ 0.02707056]
|
|
|
+ [-0.00715612 -0.00657868 -0.1097618 ... -0.09874186 0.08591412
|
|
|
+ -0.00940266]]
|
|
|
+
|
|
|
+ [[-0.03609059 -0.07579008 0.01501241 ... -0.00192132 -0.01605884
|
|
|
+ 0.00820769]
|
|
|
+ [-0.00521284 -0.03044078 0.01835438 ... -0.00124992 -0.01034387
|
|
|
+ 0.00627648]
|
|
|
+ [ 0.02380823 0.16997567 -0.04292417 ... 0.01702267 0.04020633
|
|
|
+ -0.03895961]
|
|
|
+ ...
|
|
|
+ [-0.00115029 -0.02174992 0.00398472 ... -0.00293407 -0.00470167
|
|
|
+ 0.00579625]
|
|
|
+ [-0.00415053 -0.03030144 0.02518198 ... -0.00043284 -0.01240898
|
|
|
+ 0.00634339]
|
|
|
+ [ 0.00861687 -0.01112236 -0.03039089 ... -0.0086233 0.00705496
|
|
|
+ 0.00750165]]
|
|
|
+
|
|
|
+ [[ 0.00614664 -0.0130218 -0.06092443 ... -0.05605926 -0.06379459
|
|
|
+ 0.01912304]
|
|
|
+ [ 0.01061938 -0.00787821 -0.02997785 ... -0.03494437 -0.04587585
|
|
|
+ 0.0114265 ]
|
|
|
+ [-0.04273462 0.08807574 0.18954192 ... 0.19141163 0.05976404
|
|
|
+ -0.01481192]
|
|
|
+ ...
|
|
|
+ [ 0.0059959 -0.01474382 -0.02677064 ... -0.02669825 0.00146605
|
|
|
+ -0.00064257]
|
|
|
+ [ 0.01313106 -0.00431879 -0.02868656 ... -0.03682107 -0.06574361
|
|
|
+ 0.01620815]
|
|
|
+ [-0.00286384 -0.03923097 -0.0322479 ... -0.01919733 0.12107915
|
|
|
+ -0.03120241]]]]
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+Layer 1, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 5.922648
|
|
|
+ Converted tensor sum: 5.922640
|
|
|
+ Original tensor mean: 0.001851
|
|
|
+ Converted tensor mean: 0.001851
|
|
|
+ Mean difference: 0.00000005
|
|
|
+ Maximum pointwise difference: 0.00000155
|
|
|
+ Max difference location: (0, 24, 4, 5)
|
|
|
+ Values at max diff - Original: -0.26876855, Converted: -0.26877010
|
|
|
+ Biggest difference in row (0, 14, 3), sum -0.918731 vs -0.918733
|
|
|
+
|
|
|
+Layer 2, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 12.229185
|
|
|
+ Converted tensor sum: 12.229182
|
|
|
+ Original tensor mean: 0.003822
|
|
|
+ Converted tensor mean: 0.003822
|
|
|
+ Mean difference: 0.00000009
|
|
|
+ Maximum pointwise difference: 0.00000620
|
|
|
+ Max difference location: (0, 3, 6, 0)
|
|
|
+ Values at max diff - Original: 2.35518169, Converted: 2.35517550
|
|
|
+ Biggest difference in row (0, 3, 6), sum 3.961787 vs 3.961781
|
|
|
+
|
|
|
+Layer 4, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 4.260600
|
|
|
+ Converted tensor sum: 4.260149
|
|
|
+ Original tensor mean: 0.001331
|
|
|
+ Converted tensor mean: 0.001331
|
|
|
+ Mean difference: 0.00000526
|
|
|
+ Maximum pointwise difference: 0.00011003
|
|
|
+ Max difference location: (0, 25, 2, 4)
|
|
|
+ Values at max diff - Original: 0.21691340, Converted: 0.21702343
|
|
|
+ Biggest difference in row (0, 3, 1), sum -0.358275 vs -0.358136
|
|
|
+
|
|
|
+Layer 5, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 12.744413
|
|
|
+ Converted tensor sum: 12.744514
|
|
|
+ Original tensor mean: 0.003983
|
|
|
+ Converted tensor mean: 0.003983
|
|
|
+ Mean difference: 0.00000413
|
|
|
+ Maximum pointwise difference: 0.00011247
|
|
|
+ Max difference location: (0, 5, 2, 8)
|
|
|
+ Values at max diff - Original: 0.86490124, Converted: 0.86478877
|
|
|
+ Biggest difference in row (0, 5, 2), sum -0.456235 vs -0.456385
|
|
|
+
|
|
|
+Layer 6, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: -14.490761
|
|
|
+ Converted tensor sum: -14.493523
|
|
|
+ Original tensor mean: -0.004528
|
|
|
+ Converted tensor mean: -0.004529
|
|
|
+ Mean difference: 0.00002331
|
|
|
+ Maximum pointwise difference: 0.00149512
|
|
|
+ Max difference location: (0, 28, 9, 8)
|
|
|
+ Values at max diff - Original: 2.97030377, Converted: 2.96880865
|
|
|
+ Biggest difference in row (0, 8, 5), sum 5.080033 vs 5.077976
|
|
|
+
|
|
|
+Layer 8, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: -18.806082
|
|
|
+ Converted tensor sum: -18.808296
|
|
|
+ Original tensor mean: -0.005877
|
|
|
+ Converted tensor mean: -0.005878
|
|
|
+ Mean difference: 0.00002112
|
|
|
+ Maximum pointwise difference: 0.00074953
|
|
|
+ Max difference location: (0, 20, 1, 8)
|
|
|
+ Values at max diff - Original: 0.62514198, Converted: 0.62439245
|
|
|
+ Biggest difference in row (0, 25, 6), sum 1.048032 vs 1.047222
|
|
|
+
|
|
|
+Layer 9, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 16.764290
|
|
|
+ Converted tensor sum: 16.760258
|
|
|
+ Original tensor mean: 0.005239
|
|
|
+ Converted tensor mean: 0.005238
|
|
|
+ Mean difference: 0.00002129
|
|
|
+ Maximum pointwise difference: 0.00044209
|
|
|
+ Max difference location: (0, 21, 5, 8)
|
|
|
+ Values at max diff - Original: 0.85285813, Converted: 0.85241604
|
|
|
+ Biggest difference in row (0, 0, 1), sum -0.046629 vs -0.046069
|
|
|
+
|
|
|
+Layer 10, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 13.242327
|
|
|
+ Converted tensor sum: 13.242817
|
|
|
+ Original tensor mean: 0.004138
|
|
|
+ Converted tensor mean: 0.004138
|
|
|
+ Mean difference: 0.00002325
|
|
|
+ Maximum pointwise difference: 0.00070238
|
|
|
+ Max difference location: (0, 18, 5, 1)
|
|
|
+ Values at max diff - Original: 0.48423475, Converted: 0.48353237
|
|
|
+ Biggest difference in row (0, 10, 0), sum -0.502937 vs -0.502024
|
|
|
+
|
|
|
+Layer 12, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 14.374599
|
|
|
+ Converted tensor sum: 14.372844
|
|
|
+ Original tensor mean: 0.004492
|
|
|
+ Converted tensor mean: 0.004492
|
|
|
+ Mean difference: 0.00002070
|
|
|
+ Maximum pointwise difference: 0.00084567
|
|
|
+ Max difference location: (0, 0, 3, 1)
|
|
|
+ Values at max diff - Original: 1.31967652, Converted: 1.31883085
|
|
|
+ Biggest difference in row (0, 0, 5), sum -0.676982 vs -0.676066
|
|
|
+
|
|
|
+Layer 13, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 28.120127
|
|
|
+ Converted tensor sum: 28.128502
|
|
|
+ Original tensor mean: 0.008788
|
|
|
+ Converted tensor mean: 0.008790
|
|
|
+ Mean difference: 0.00001703
|
|
|
+ Maximum pointwise difference: 0.00037390
|
|
|
+ Max difference location: (0, 4, 2, 1)
|
|
|
+ Values at max diff - Original: -0.33164161, Converted: -0.33126771
|
|
|
+ Biggest difference in row (0, 24, 1), sum -0.030439 vs -0.029779
|
|
|
+
|
|
|
+Layer 14, Token 1 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 27.012432
|
|
|
+ Converted tensor sum: 27.011541
|
|
|
+ Original tensor mean: 0.008441
|
|
|
+ Converted tensor mean: 0.008441
|
|
|
+ Mean difference: 0.00002248
|
|
|
+ Maximum pointwise difference: 0.00121775
|
|
|
+ Max difference location: (0, 18, 0, 1)
|
|
|
+ Values at max diff - Original: 0.37722895, Converted: 0.37844670
|
|
|
+ Biggest difference in row (0, 28, 1), sum -0.493242 vs -0.492468
|
|
|
+
|
|
|
+Layer 0, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 4.531467
|
|
|
+ Converted tensor sum: 4.531466
|
|
|
+ Original tensor mean: 0.001416
|
|
|
+ Converted tensor mean: 0.001416
|
|
|
+ Mean difference: 0.08359446
|
|
|
+ Maximum pointwise difference: 1.77142978
|
|
|
+ Max difference location: (0, 1, 3, 5)
|
|
|
+ Values at max diff - Original: -0.02699410, Converted: 1.74443567
|
|
|
+ Biggest difference in row (0, 25, 2), sum -0.057628 vs -2.844908
|
|
|
+
|
|
|
+Layer 1, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 11.008316
|
|
|
+ Converted tensor sum: 11.008326
|
|
|
+ Original tensor mean: 0.003440
|
|
|
+ Converted tensor mean: 0.003440
|
|
|
+ Mean difference: 0.06277661
|
|
|
+ Maximum pointwise difference: 0.71243107
|
|
|
+ Max difference location: (0, 10, 0, 2)
|
|
|
+ Values at max diff - Original: 0.01163737, Converted: 0.72406846
|
|
|
+ Biggest difference in row (0, 12, 3), sum 0.228652 vs -1.768667
|
|
|
+
|
|
|
+Layer 2, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 17.248280
|
|
|
+ Converted tensor sum: 17.248241
|
|
|
+ Original tensor mean: 0.005390
|
|
|
+ Converted tensor mean: 0.005390
|
|
|
+ Mean difference: 0.08558470
|
|
|
+ Maximum pointwise difference: 1.97508693
|
|
|
+ Max difference location: (0, 10, 7, 3)
|
|
|
+ Values at max diff - Original: 1.98190892, Converted: 0.00682194
|
|
|
+ Biggest difference in row (0, 27, 7), sum -0.594255 vs 3.191915
|
|
|
+
|
|
|
+Layer 4, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 7.984356
|
|
|
+ Converted tensor sum: 7.983810
|
|
|
+ Original tensor mean: 0.002495
|
|
|
+ Converted tensor mean: 0.002495
|
|
|
+ Mean difference: 0.07671142
|
|
|
+ Maximum pointwise difference: 1.85330796
|
|
|
+ Max difference location: (0, 20, 4, 6)
|
|
|
+ Values at max diff - Original: 0.01886898, Converted: 1.87217689
|
|
|
+ Biggest difference in row (0, 20, 6), sum 2.845701 vs -0.305152
|
|
|
+
|
|
|
+Layer 5, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 9.205366
|
|
|
+ Converted tensor sum: 9.205467
|
|
|
+ Original tensor mean: 0.002877
|
|
|
+ Converted tensor mean: 0.002877
|
|
|
+ Mean difference: 0.06804129
|
|
|
+ Maximum pointwise difference: 1.41803539
|
|
|
+ Max difference location: (0, 31, 6, 3)
|
|
|
+ Values at max diff - Original: 1.40662789, Converted: -0.01140754
|
|
|
+ Biggest difference in row (0, 24, 8), sum -0.372748 vs -2.656956
|
|
|
+
|
|
|
+Layer 6, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: -7.876884
|
|
|
+ Converted tensor sum: -7.873561
|
|
|
+ Original tensor mean: -0.002462
|
|
|
+ Converted tensor mean: -0.002460
|
|
|
+ Mean difference: 0.10029175
|
|
|
+ Maximum pointwise difference: 2.66715860
|
|
|
+ Max difference location: (0, 28, 9, 8)
|
|
|
+ Values at max diff - Original: 2.59401202, Converted: -0.07314663
|
|
|
+ Biggest difference in row (0, 19, 4), sum 0.710449 vs -5.203167
|
|
|
+
|
|
|
+Layer 8, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: -13.154655
|
|
|
+ Converted tensor sum: -13.156775
|
|
|
+ Original tensor mean: -0.004111
|
|
|
+ Converted tensor mean: -0.004111
|
|
|
+ Mean difference: 0.08601540
|
|
|
+ Maximum pointwise difference: 2.83156943
|
|
|
+ Max difference location: (0, 12, 4, 7)
|
|
|
+ Values at max diff - Original: 2.83582592, Converted: 0.00425647
|
|
|
+ Biggest difference in row (0, 30, 3), sum -0.848019 vs -4.754877
|
|
|
+
|
|
|
+Layer 9, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 13.187357
|
|
|
+ Converted tensor sum: 13.181618
|
|
|
+ Original tensor mean: 0.004121
|
|
|
+ Converted tensor mean: 0.004119
|
|
|
+ Mean difference: 0.05544823
|
|
|
+ Maximum pointwise difference: 0.65544760
|
|
|
+ Max difference location: (0, 21, 5, 8)
|
|
|
+ Values at max diff - Original: 0.71338689, Converted: 0.05793926
|
|
|
+ Biggest difference in row (0, 19, 9), sum -0.054951 vs -1.900613
|
|
|
+
|
|
|
+Layer 10, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 10.860550
|
|
|
+ Converted tensor sum: 10.860478
|
|
|
+ Original tensor mean: 0.003394
|
|
|
+ Converted tensor mean: 0.003394
|
|
|
+ Mean difference: 0.05739149
|
|
|
+ Maximum pointwise difference: 1.22302496
|
|
|
+ Max difference location: (0, 30, 4, 5)
|
|
|
+ Values at max diff - Original: -0.01519475, Converted: 1.20783019
|
|
|
+ Biggest difference in row (0, 23, 3), sum -0.221712 vs -3.841269
|
|
|
+
|
|
|
+Layer 12, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 3.134315
|
|
|
+ Converted tensor sum: 3.132089
|
|
|
+ Original tensor mean: 0.000979
|
|
|
+ Converted tensor mean: 0.000979
|
|
|
+ Mean difference: 0.07305207
|
|
|
+ Maximum pointwise difference: 2.30649829
|
|
|
+ Max difference location: (0, 5, 4, 5)
|
|
|
+ Values at max diff - Original: 2.33141446, Converted: 0.02491626
|
|
|
+ Biggest difference in row (0, 0, 1), sum -1.541565 vs -6.179572
|
|
|
+
|
|
|
+Layer 13, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 18.773312
|
|
|
+ Converted tensor sum: 18.779602
|
|
|
+ Original tensor mean: 0.005867
|
|
|
+ Converted tensor mean: 0.005869
|
|
|
+ Mean difference: 0.04688552
|
|
|
+ Maximum pointwise difference: 0.60163057
|
|
|
+ Max difference location: (0, 6, 1, 7)
|
|
|
+ Values at max diff - Original: 0.04654653, Converted: 0.64817709
|
|
|
+ Biggest difference in row (0, 4, 1), sum -0.566141 vs -2.623919
|
|
|
+
|
|
|
+Layer 14, Token 2 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 13.960938
|
|
|
+ Converted tensor sum: 13.964265
|
|
|
+ Original tensor mean: 0.004363
|
|
|
+ Converted tensor mean: 0.004364
|
|
|
+ Mean difference: 0.06759205
|
|
|
+ Maximum pointwise difference: 1.25844812
|
|
|
+ Max difference location: (0, 15, 8, 4)
|
|
|
+ Values at max diff - Original: 1.26228178, Converted: 0.00383368
|
|
|
+ Biggest difference in row (0, 31, 3), sum -0.096068 vs -5.326997
|
|
|
+
|
|
|
+Layer 0, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 0.684784
|
|
|
+ Converted tensor sum: 0.422194
|
|
|
+ Original tensor mean: 0.000214
|
|
|
+ Converted tensor mean: 0.000132
|
|
|
+ Mean difference: 0.06314481
|
|
|
+ Maximum pointwise difference: 1.39332521
|
|
|
+ Max difference location: (0, 28, 5, 9)
|
|
|
+ Values at max diff - Original: -0.03651731, Converted: 1.35680795
|
|
|
+ Biggest difference in row (0, 4, 9), sum 2.498745 vs 0.335116
|
|
|
+
|
|
|
+Layer 1, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 3.526195
|
|
|
+ Converted tensor sum: 7.632782
|
|
|
+ Original tensor mean: 0.001102
|
|
|
+ Converted tensor mean: 0.002385
|
|
|
+ Mean difference: 0.04427468
|
|
|
+ Maximum pointwise difference: 0.98676205
|
|
|
+ Max difference location: (0, 12, 3, 7)
|
|
|
+ Values at max diff - Original: 0.92044085, Converted: -0.06632122
|
|
|
+ Biggest difference in row (0, 24, 2), sum 0.609889 vs -0.814516
|
|
|
+
|
|
|
+Layer 2, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 15.850447
|
|
|
+ Converted tensor sum: 14.785593
|
|
|
+ Original tensor mean: 0.004953
|
|
|
+ Converted tensor mean: 0.004620
|
|
|
+ Mean difference: 0.06092339
|
|
|
+ Maximum pointwise difference: 2.43390632
|
|
|
+ Max difference location: (0, 1, 0, 4)
|
|
|
+ Values at max diff - Original: 2.80371213, Converted: 0.36980587
|
|
|
+ Biggest difference in row (0, 1, 0), sum 4.370481 vs 0.423526
|
|
|
+
|
|
|
+Layer 4, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 19.856752
|
|
|
+ Converted tensor sum: 11.778177
|
|
|
+ Original tensor mean: 0.006205
|
|
|
+ Converted tensor mean: 0.003681
|
|
|
+ Mean difference: 0.07194611
|
|
|
+ Maximum pointwise difference: 2.48742008
|
|
|
+ Max difference location: (0, 14, 3, 9)
|
|
|
+ Values at max diff - Original: 2.47506452, Converted: -0.01235563
|
|
|
+ Biggest difference in row (0, 19, 2), sum 0.372920 vs -2.973422
|
|
|
+
|
|
|
+Layer 5, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 9.792118
|
|
|
+ Converted tensor sum: 9.138845
|
|
|
+ Original tensor mean: 0.003060
|
|
|
+ Converted tensor mean: 0.002856
|
|
|
+ Mean difference: 0.05089124
|
|
|
+ Maximum pointwise difference: 1.42593253
|
|
|
+ Max difference location: (0, 29, 0, 8)
|
|
|
+ Values at max diff - Original: 1.42089915, Converted: -0.00503340
|
|
|
+ Biggest difference in row (0, 29, 0), sum 2.198264 vs 0.182178
|
|
|
+
|
|
|
+Layer 6, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 39.415325
|
|
|
+ Converted tensor sum: 64.451355
|
|
|
+ Original tensor mean: 0.012317
|
|
|
+ Converted tensor mean: 0.020141
|
|
|
+ Mean difference: 0.08079723
|
|
|
+ Maximum pointwise difference: 4.89647627
|
|
|
+ Max difference location: (0, 15, 3, 6)
|
|
|
+ Values at max diff - Original: -0.18732879, Converted: 4.70914745
|
|
|
+ Biggest difference in row (0, 6, 0), sum 0.836966 vs 8.447909
|
|
|
+
|
|
|
+Layer 8, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 23.689789
|
|
|
+ Converted tensor sum: 12.321936
|
|
|
+ Original tensor mean: 0.007403
|
|
|
+ Converted tensor mean: 0.003851
|
|
|
+ Mean difference: 0.08749782
|
|
|
+ Maximum pointwise difference: 3.85876298
|
|
|
+ Max difference location: (0, 6, 4, 8)
|
|
|
+ Values at max diff - Original: 0.01438628, Converted: 3.87314916
|
|
|
+ Biggest difference in row (0, 6, 4), sum 0.119168 vs 5.376393
|
|
|
+
|
|
|
+Layer 9, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 8.901470
|
|
|
+ Converted tensor sum: 4.339914
|
|
|
+ Original tensor mean: 0.002782
|
|
|
+ Converted tensor mean: 0.001356
|
|
|
+ Mean difference: 0.06287189
|
|
|
+ Maximum pointwise difference: 1.40262556
|
|
|
+ Max difference location: (0, 4, 0, 5)
|
|
|
+ Values at max diff - Original: -0.00241524, Converted: 1.40021038
|
|
|
+ Biggest difference in row (0, 18, 1), sum 1.268483 vs -0.696936
|
|
|
+
|
|
|
+Layer 10, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 18.375820
|
|
|
+ Converted tensor sum: 3.348410
|
|
|
+ Original tensor mean: 0.005742
|
|
|
+ Converted tensor mean: 0.001046
|
|
|
+ Mean difference: 0.06042652
|
|
|
+ Maximum pointwise difference: 2.94567752
|
|
|
+ Max difference location: (0, 3, 8, 7)
|
|
|
+ Values at max diff - Original: -0.26693973, Converted: 2.67873788
|
|
|
+ Biggest difference in row (0, 3, 8), sum 0.004062 vs 2.562259
|
|
|
+
|
|
|
+Layer 12, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 15.322770
|
|
|
+ Converted tensor sum: 2.674777
|
|
|
+ Original tensor mean: 0.004788
|
|
|
+ Converted tensor mean: 0.000836
|
|
|
+ Mean difference: 0.07379209
|
|
|
+ Maximum pointwise difference: 2.73401403
|
|
|
+ Max difference location: (0, 30, 4, 0)
|
|
|
+ Values at max diff - Original: -0.02140745, Converted: 2.71260667
|
|
|
+ Biggest difference in row (0, 7, 6), sum -0.113145 vs 2.612027
|
|
|
+
|
|
|
+Layer 13, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 14.910538
|
|
|
+ Converted tensor sum: 8.724025
|
|
|
+ Original tensor mean: 0.004660
|
|
|
+ Converted tensor mean: 0.002726
|
|
|
+ Mean difference: 0.05616682
|
|
|
+ Maximum pointwise difference: 1.43021226
|
|
|
+ Max difference location: (0, 26, 5, 0)
|
|
|
+ Values at max diff - Original: -1.41802061, Converted: 0.01219167
|
|
|
+ Biggest difference in row (0, 3, 9), sum 0.002563 vs -3.030253
|
|
|
+
|
|
|
+Layer 14, Token 3 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 59.583878
|
|
|
+ Converted tensor sum: -2.192444
|
|
|
+ Original tensor mean: 0.018620
|
|
|
+ Converted tensor mean: -0.000685
|
|
|
+ Mean difference: 0.10199536
|
|
|
+ Maximum pointwise difference: 2.77383018
|
|
|
+ Max difference location: (0, 2, 0, 2)
|
|
|
+ Values at max diff - Original: 2.72142744, Converted: -0.05240267
|
|
|
+ Biggest difference in row (0, 16, 6), sum 0.145663 vs -8.295967
|
|
|
+
|
|
|
+Layer 0, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 7.899137
|
|
|
+ Converted tensor sum: 4.783788
|
|
|
+ Original tensor mean: 0.002468
|
|
|
+ Converted tensor mean: 0.001495
|
|
|
+ Mean difference: 0.06620996
|
|
|
+ Maximum pointwise difference: 1.03156960
|
|
|
+ Max difference location: (0, 1, 3, 7)
|
|
|
+ Values at max diff - Original: -0.00703356, Converted: -1.03860319
|
|
|
+ Biggest difference in row (0, 21, 4), sum 0.038056 vs -1.875101
|
|
|
+
|
|
|
+Layer 1, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 11.224692
|
|
|
+ Converted tensor sum: 15.232712
|
|
|
+ Original tensor mean: 0.003508
|
|
|
+ Converted tensor mean: 0.004760
|
|
|
+ Mean difference: 0.06535107
|
|
|
+ Maximum pointwise difference: 1.53891993
|
|
|
+ Max difference location: (0, 28, 3, 7)
|
|
|
+ Values at max diff - Original: 0.04949531, Converted: 1.58841527
|
|
|
+ Biggest difference in row (0, 28, 3), sum 0.880954 vs 3.297761
|
|
|
+
|
|
|
+Layer 2, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 15.875578
|
|
|
+ Converted tensor sum: 5.908407
|
|
|
+ Original tensor mean: 0.004961
|
|
|
+ Converted tensor mean: 0.001846
|
|
|
+ Mean difference: 0.09298474
|
|
|
+ Maximum pointwise difference: 2.68871808
|
|
|
+ Max difference location: (0, 14, 3, 7)
|
|
|
+ Values at max diff - Original: 2.67706752, Converted: -0.01165051
|
|
|
+ Biggest difference in row (0, 27, 2), sum 3.910276 vs 0.204061
|
|
|
+
|
|
|
+Layer 4, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 34.602001
|
|
|
+ Converted tensor sum: 14.365917
|
|
|
+ Original tensor mean: 0.010813
|
|
|
+ Converted tensor mean: 0.004489
|
|
|
+ Mean difference: 0.10193390
|
|
|
+ Maximum pointwise difference: 3.22817802
|
|
|
+ Max difference location: (0, 26, 6, 5)
|
|
|
+ Values at max diff - Original: -0.04091755, Converted: 3.18726039
|
|
|
+ Biggest difference in row (0, 26, 6), sum 0.735282 vs 4.516615
|
|
|
+
|
|
|
+Layer 5, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 24.322514
|
|
|
+ Converted tensor sum: 18.418108
|
|
|
+ Original tensor mean: 0.007601
|
|
|
+ Converted tensor mean: 0.005756
|
|
|
+ Mean difference: 0.08364967
|
|
|
+ Maximum pointwise difference: 2.23648024
|
|
|
+ Max difference location: (0, 22, 6, 1)
|
|
|
+ Values at max diff - Original: 2.19508362, Converted: -0.04139667
|
|
|
+ Biggest difference in row (0, 3, 0), sum 3.424673 vs -0.163761
|
|
|
+
|
|
|
+Layer 6, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 34.762104
|
|
|
+ Converted tensor sum: 77.105461
|
|
|
+ Original tensor mean: 0.010863
|
|
|
+ Converted tensor mean: 0.024095
|
|
|
+ Mean difference: 0.12376648
|
|
|
+ Maximum pointwise difference: 3.91498804
|
|
|
+ Max difference location: (0, 12, 5, 4)
|
|
|
+ Values at max diff - Original: -0.17797241, Converted: 3.73701572
|
|
|
+ Biggest difference in row (0, 10, 4), sum -0.207436 vs 6.811815
|
|
|
+
|
|
|
+Layer 8, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 52.858780
|
|
|
+ Converted tensor sum: 5.570855
|
|
|
+ Original tensor mean: 0.016518
|
|
|
+ Converted tensor mean: 0.001741
|
|
|
+ Mean difference: 0.12005786
|
|
|
+ Maximum pointwise difference: 5.32569838
|
|
|
+ Max difference location: (0, 12, 3, 5)
|
|
|
+ Values at max diff - Original: 5.34859705, Converted: 0.02289869
|
|
|
+ Biggest difference in row (0, 20, 0), sum 8.008233 vs -0.003253
|
|
|
+
|
|
|
+Layer 9, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 20.435345
|
|
|
+ Converted tensor sum: -2.045311
|
|
|
+ Original tensor mean: 0.006386
|
|
|
+ Converted tensor mean: -0.000639
|
|
|
+ Mean difference: 0.08372314
|
|
|
+ Maximum pointwise difference: 2.78602862
|
|
|
+ Max difference location: (0, 28, 2, 0)
|
|
|
+ Values at max diff - Original: 2.71785426, Converted: -0.06817436
|
|
|
+ Biggest difference in row (0, 28, 2), sum 4.726543 vs 1.302800
|
|
|
+
|
|
|
+Layer 10, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 28.353613
|
|
|
+ Converted tensor sum: 12.385429
|
|
|
+ Original tensor mean: 0.008861
|
|
|
+ Converted tensor mean: 0.003870
|
|
|
+ Mean difference: 0.09276734
|
|
|
+ Maximum pointwise difference: 2.28980851
|
|
|
+ Max difference location: (0, 2, 9, 5)
|
|
|
+ Values at max diff - Original: -0.00412231, Converted: 2.28568625
|
|
|
+ Biggest difference in row (0, 13, 8), sum 3.624647 vs 0.020094
|
|
|
+
|
|
|
+Layer 12, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 70.502647
|
|
|
+ Converted tensor sum: -11.005323
|
|
|
+ Original tensor mean: 0.022032
|
|
|
+ Converted tensor mean: -0.003439
|
|
|
+ Mean difference: 0.13381547
|
|
|
+ Maximum pointwise difference: 3.57928109
|
|
|
+ Max difference location: (0, 30, 0, 4)
|
|
|
+ Values at max diff - Original: 3.95710707, Converted: 0.37782601
|
|
|
+ Biggest difference in row (0, 21, 9), sum -1.532540 vs -12.302475
|
|
|
+
|
|
|
+Layer 13, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 38.753532
|
|
|
+ Converted tensor sum: 5.437235
|
|
|
+ Original tensor mean: 0.012110
|
|
|
+ Converted tensor mean: 0.001699
|
|
|
+ Mean difference: 0.08178755
|
|
|
+ Maximum pointwise difference: 2.55715966
|
|
|
+ Max difference location: (0, 3, 4, 9)
|
|
|
+ Values at max diff - Original: 2.84962225, Converted: 0.29246253
|
|
|
+ Biggest difference in row (0, 3, 4), sum 4.266754 vs 0.847269
|
|
|
+
|
|
|
+Layer 14, Token 4 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 141.714035
|
|
|
+ Converted tensor sum: 3.640444
|
|
|
+ Original tensor mean: 0.044286
|
|
|
+ Converted tensor mean: 0.001138
|
|
|
+ Mean difference: 0.14463389
|
|
|
+ Maximum pointwise difference: 5.68939066
|
|
|
+ Max difference location: (0, 16, 7, 6)
|
|
|
+ Values at max diff - Original: 5.55827475, Converted: -0.13111581
|
|
|
+ Biggest difference in row (0, 28, 1), sum 11.271111 vs 0.609705
|
|
|
+
|
|
|
+Layer 0, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 9.131315
|
|
|
+ Converted tensor sum: 12.396471
|
|
|
+ Original tensor mean: 0.002854
|
|
|
+ Converted tensor mean: 0.003874
|
|
|
+ Mean difference: 0.05539500
|
|
|
+ Maximum pointwise difference: 1.09641600
|
|
|
+ Max difference location: (0, 28, 9, 5)
|
|
|
+ Values at max diff - Original: 1.12920201, Converted: 0.03278603
|
|
|
+ Biggest difference in row (0, 4, 9), sum 1.258661 vs 0.175881
|
|
|
+
|
|
|
+Layer 1, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 24.366199
|
|
|
+ Converted tensor sum: 10.052802
|
|
|
+ Original tensor mean: 0.007614
|
|
|
+ Converted tensor mean: 0.003142
|
|
|
+ Mean difference: 0.05824861
|
|
|
+ Maximum pointwise difference: 2.14620328
|
|
|
+ Max difference location: (0, 14, 2, 5)
|
|
|
+ Values at max diff - Original: 0.00643282, Converted: 2.15263605
|
|
|
+ Biggest difference in row (0, 6, 4), sum 1.356658 vs -0.156208
|
|
|
+
|
|
|
+Layer 2, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 50.376324
|
|
|
+ Converted tensor sum: 20.166676
|
|
|
+ Original tensor mean: 0.015743
|
|
|
+ Converted tensor mean: 0.006302
|
|
|
+ Mean difference: 0.07966200
|
|
|
+ Maximum pointwise difference: 2.04463291
|
|
|
+ Max difference location: (0, 27, 4, 2)
|
|
|
+ Values at max diff - Original: 2.00972342, Converted: -0.03490951
|
|
|
+ Biggest difference in row (0, 27, 2), sum 5.745794 vs 1.959190
|
|
|
+
|
|
|
+Layer 4, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 44.478531
|
|
|
+ Converted tensor sum: 48.696777
|
|
|
+ Original tensor mean: 0.013900
|
|
|
+ Converted tensor mean: 0.015218
|
|
|
+ Mean difference: 0.09315307
|
|
|
+ Maximum pointwise difference: 2.43060613
|
|
|
+ Max difference location: (0, 26, 5, 6)
|
|
|
+ Values at max diff - Original: 0.46136302, Converted: 2.89196920
|
|
|
+ Biggest difference in row (0, 8, 6), sum 0.054414 vs 4.076869
|
|
|
+
|
|
|
+Layer 5, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 57.863758
|
|
|
+ Converted tensor sum: 66.390915
|
|
|
+ Original tensor mean: 0.018082
|
|
|
+ Converted tensor mean: 0.020747
|
|
|
+ Mean difference: 0.10497291
|
|
|
+ Maximum pointwise difference: 2.49356651
|
|
|
+ Max difference location: (0, 17, 3, 6)
|
|
|
+ Values at max diff - Original: 2.50974846, Converted: 0.01618202
|
|
|
+ Biggest difference in row (0, 28, 9), sum 3.771637 vs 0.053981
|
|
|
+
|
|
|
+Layer 6, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 39.502037
|
|
|
+ Converted tensor sum: 161.817169
|
|
|
+ Original tensor mean: 0.012344
|
|
|
+ Converted tensor mean: 0.050568
|
|
|
+ Mean difference: 0.14194940
|
|
|
+ Maximum pointwise difference: 3.58584666
|
|
|
+ Max difference location: (0, 26, 3, 9)
|
|
|
+ Values at max diff - Original: 3.40417242, Converted: -0.18167432
|
|
|
+ Biggest difference in row (0, 12, 4), sum 1.168972 vs 7.813907
|
|
|
+
|
|
|
+Layer 8, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 44.896149
|
|
|
+ Converted tensor sum: 38.246201
|
|
|
+ Original tensor mean: 0.014030
|
|
|
+ Converted tensor mean: 0.011952
|
|
|
+ Mean difference: 0.10806250
|
|
|
+ Maximum pointwise difference: 2.33007479
|
|
|
+ Max difference location: (0, 1, 6, 0)
|
|
|
+ Values at max diff - Original: 2.35027504, Converted: 0.02020025
|
|
|
+ Biggest difference in row (0, 1, 6), sum 5.246045 vs 0.247956
|
|
|
+
|
|
|
+Layer 9, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 20.569098
|
|
|
+ Converted tensor sum: 11.688971
|
|
|
+ Original tensor mean: 0.006428
|
|
|
+ Converted tensor mean: 0.003653
|
|
|
+ Mean difference: 0.08318320
|
|
|
+ Maximum pointwise difference: 1.79917610
|
|
|
+ Max difference location: (0, 28, 0, 3)
|
|
|
+ Values at max diff - Original: 1.69918346, Converted: -0.09999267
|
|
|
+ Biggest difference in row (0, 3, 4), sum 3.283048 vs 0.225886
|
|
|
+
|
|
|
+Layer 10, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 42.493145
|
|
|
+ Converted tensor sum: 26.750286
|
|
|
+ Original tensor mean: 0.013279
|
|
|
+ Converted tensor mean: 0.008359
|
|
|
+ Mean difference: 0.09709122
|
|
|
+ Maximum pointwise difference: 2.97919798
|
|
|
+ Max difference location: (0, 10, 0, 3)
|
|
|
+ Values at max diff - Original: 3.34914303, Converted: 0.36994517
|
|
|
+ Biggest difference in row (0, 10, 0), sum 5.613201 vs -0.079588
|
|
|
+
|
|
|
+Layer 12, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 91.460236
|
|
|
+ Converted tensor sum: 14.637827
|
|
|
+ Original tensor mean: 0.028581
|
|
|
+ Converted tensor mean: 0.004574
|
|
|
+ Mean difference: 0.12184902
|
|
|
+ Maximum pointwise difference: 4.17300320
|
|
|
+ Max difference location: (0, 23, 2, 9)
|
|
|
+ Values at max diff - Original: 3.98550677, Converted: -0.18749636
|
|
|
+ Biggest difference in row (0, 28, 5), sum 5.243108 vs -0.797499
|
|
|
+
|
|
|
+Layer 13, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 50.306297
|
|
|
+ Converted tensor sum: 16.367235
|
|
|
+ Original tensor mean: 0.015721
|
|
|
+ Converted tensor mean: 0.005115
|
|
|
+ Mean difference: 0.08688851
|
|
|
+ Maximum pointwise difference: 2.08200264
|
|
|
+ Max difference location: (0, 19, 9, 3)
|
|
|
+ Values at max diff - Original: -1.59057343, Converted: 0.49142930
|
|
|
+ Biggest difference in row (0, 19, 5), sum 3.595970 vs 0.049368
|
|
|
+
|
|
|
+Layer 14, Token 5 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 120.273888
|
|
|
+ Converted tensor sum: 44.449192
|
|
|
+ Original tensor mean: 0.037586
|
|
|
+ Converted tensor mean: 0.013890
|
|
|
+ Mean difference: 0.13929905
|
|
|
+ Maximum pointwise difference: 4.73129654
|
|
|
+ Max difference location: (0, 18, 5, 9)
|
|
|
+ Values at max diff - Original: 4.35292673, Converted: -0.37836996
|
|
|
+ Biggest difference in row (0, 18, 5), sum 8.950241 vs -0.746074
|
|
|
+
|
|
|
+Layer 0, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 11.608546
|
|
|
+ Converted tensor sum: 10.627696
|
|
|
+ Original tensor mean: 0.003628
|
|
|
+ Converted tensor mean: 0.003321
|
|
|
+ Mean difference: 0.05484011
|
|
|
+ Maximum pointwise difference: 1.12371099
|
|
|
+ Max difference location: (0, 1, 2, 3)
|
|
|
+ Values at max diff - Original: 1.11502755, Converted: -0.00868344
|
|
|
+ Biggest difference in row (0, 28, 5), sum 0.118289 vs 2.332705
|
|
|
+
|
|
|
+Layer 1, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 92.219727
|
|
|
+ Converted tensor sum: 28.768579
|
|
|
+ Original tensor mean: 0.028819
|
|
|
+ Converted tensor mean: 0.008990
|
|
|
+ Mean difference: 0.08724788
|
|
|
+ Maximum pointwise difference: 1.51144505
|
|
|
+ Max difference location: (0, 23, 0, 4)
|
|
|
+ Values at max diff - Original: 1.55765891, Converted: 0.04621384
|
|
|
+ Biggest difference in row (0, 14, 0), sum 2.954077 vs -0.012181
|
|
|
+
|
|
|
+Layer 2, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 101.609215
|
|
|
+ Converted tensor sum: 93.242142
|
|
|
+ Original tensor mean: 0.031753
|
|
|
+ Converted tensor mean: 0.029138
|
|
|
+ Mean difference: 0.12457406
|
|
|
+ Maximum pointwise difference: 2.07845497
|
|
|
+ Max difference location: (0, 13, 1, 9)
|
|
|
+ Values at max diff - Original: 2.17026591, Converted: 0.09181103
|
|
|
+ Biggest difference in row (0, 5, 5), sum 4.805948 vs -0.569050
|
|
|
+
|
|
|
+Layer 4, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 13.856092
|
|
|
+ Converted tensor sum: 22.610188
|
|
|
+ Original tensor mean: 0.004330
|
|
|
+ Converted tensor mean: 0.007066
|
|
|
+ Mean difference: 0.09440003
|
|
|
+ Maximum pointwise difference: 2.37087321
|
|
|
+ Max difference location: (0, 19, 2, 6)
|
|
|
+ Values at max diff - Original: -0.02839734, Converted: 2.34247589
|
|
|
+ Biggest difference in row (0, 28, 1), sum -0.280756 vs 2.458031
|
|
|
+
|
|
|
+Layer 5, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 39.960052
|
|
|
+ Converted tensor sum: 41.437057
|
|
|
+ Original tensor mean: 0.012488
|
|
|
+ Converted tensor mean: 0.012949
|
|
|
+ Mean difference: 0.11209048
|
|
|
+ Maximum pointwise difference: 2.79378676
|
|
|
+ Max difference location: (0, 19, 8, 4)
|
|
|
+ Values at max diff - Original: 0.01245314, Converted: 2.80623984
|
|
|
+ Biggest difference in row (0, 13, 1), sum 6.005285 vs -0.085273
|
|
|
+
|
|
|
+Layer 6, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: -2.419616
|
|
|
+ Converted tensor sum: 156.977676
|
|
|
+ Original tensor mean: -0.000756
|
|
|
+ Converted tensor mean: 0.049056
|
|
|
+ Mean difference: 0.13894926
|
|
|
+ Maximum pointwise difference: 6.69993019
|
|
|
+ Max difference location: (0, 10, 3, 1)
|
|
|
+ Values at max diff - Original: -1.12109971, Converted: 5.57883024
|
|
|
+ Biggest difference in row (0, 12, 1), sum -0.201558 vs 10.382487
|
|
|
+
|
|
|
+Layer 8, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 8.213539
|
|
|
+ Converted tensor sum: 18.368313
|
|
|
+ Original tensor mean: 0.002567
|
|
|
+ Converted tensor mean: 0.005740
|
|
|
+ Mean difference: 0.10382870
|
|
|
+ Maximum pointwise difference: 3.36055303
|
|
|
+ Max difference location: (0, 6, 4, 8)
|
|
|
+ Values at max diff - Original: 0.10355368, Converted: 3.46410680
|
|
|
+ Biggest difference in row (0, 6, 4), sum -0.613209 vs 4.409491
|
|
|
+
|
|
|
+Layer 9, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 12.889297
|
|
|
+ Converted tensor sum: -0.411069
|
|
|
+ Original tensor mean: 0.004028
|
|
|
+ Converted tensor mean: -0.000128
|
|
|
+ Mean difference: 0.08612256
|
|
|
+ Maximum pointwise difference: 1.89322448
|
|
|
+ Max difference location: (0, 6, 4, 1)
|
|
|
+ Values at max diff - Original: -0.52327746, Converted: 1.36994708
|
|
|
+ Biggest difference in row (0, 21, 7), sum 0.245074 vs -2.764518
|
|
|
+
|
|
|
+Layer 10, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 3.506564
|
|
|
+ Converted tensor sum: 11.408216
|
|
|
+ Original tensor mean: 0.001096
|
|
|
+ Converted tensor mean: 0.003565
|
|
|
+ Mean difference: 0.08594991
|
|
|
+ Maximum pointwise difference: 3.30037594
|
|
|
+ Max difference location: (0, 3, 8, 7)
|
|
|
+ Values at max diff - Original: -0.08371022, Converted: 3.21666574
|
|
|
+ Biggest difference in row (0, 0, 7), sum -0.426351 vs 3.218251
|
|
|
+
|
|
|
+Layer 12, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 30.742065
|
|
|
+ Converted tensor sum: 1.932971
|
|
|
+ Original tensor mean: 0.009607
|
|
|
+ Converted tensor mean: 0.000604
|
|
|
+ Mean difference: 0.10983281
|
|
|
+ Maximum pointwise difference: 3.31334734
|
|
|
+ Max difference location: (0, 29, 5, 6)
|
|
|
+ Values at max diff - Original: 3.34788132, Converted: 0.03453401
|
|
|
+ Biggest difference in row (0, 29, 5), sum 6.176572 vs 0.072738
|
|
|
+
|
|
|
+Layer 13, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 14.579787
|
|
|
+ Converted tensor sum: 9.630959
|
|
|
+ Original tensor mean: 0.004556
|
|
|
+ Converted tensor mean: 0.003010
|
|
|
+ Mean difference: 0.08181592
|
|
|
+ Maximum pointwise difference: 2.27647829
|
|
|
+ Max difference location: (0, 19, 1, 3)
|
|
|
+ Values at max diff - Original: 2.46903062, Converted: 0.19255245
|
|
|
+ Biggest difference in row (0, 19, 5), sum 2.241402 vs -0.017656
|
|
|
+
|
|
|
+Layer 14, Token 6 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 42.673443
|
|
|
+ Converted tensor sum: 13.958614
|
|
|
+ Original tensor mean: 0.013335
|
|
|
+ Converted tensor mean: 0.004362
|
|
|
+ Mean difference: 0.12478559
|
|
|
+ Maximum pointwise difference: 3.53676820
|
|
|
+ Max difference location: (0, 15, 8, 4)
|
|
|
+ Values at max diff - Original: 3.58003521, Converted: 0.04326708
|
|
|
+ Biggest difference in row (0, 16, 6), sum 0.056718 vs -5.064022
|
|
|
+
|
|
|
+Layer 0, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 13.531075
|
|
|
+ Converted tensor sum: 7.895350
|
|
|
+ Original tensor mean: 0.004228
|
|
|
+ Converted tensor mean: 0.002467
|
|
|
+ Mean difference: 0.05525878
|
|
|
+ Maximum pointwise difference: 0.84158301
|
|
|
+ Max difference location: (0, 4, 1, 9)
|
|
|
+ Values at max diff - Original: -0.04387791, Converted: 0.79770511
|
|
|
+ Biggest difference in row (0, 11, 9), sum -0.221553 vs -1.606123
|
|
|
+
|
|
|
+Layer 1, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 106.468651
|
|
|
+ Converted tensor sum: 29.931305
|
|
|
+ Original tensor mean: 0.033271
|
|
|
+ Converted tensor mean: 0.009354
|
|
|
+ Mean difference: 0.07464606
|
|
|
+ Maximum pointwise difference: 1.52088320
|
|
|
+ Max difference location: (0, 24, 0, 1)
|
|
|
+ Values at max diff - Original: 1.28372872, Converted: -0.23715444
|
|
|
+ Biggest difference in row (0, 31, 9), sum 2.350637 vs -0.270012
|
|
|
+
|
|
|
+Layer 2, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 129.077255
|
|
|
+ Converted tensor sum: 124.290329
|
|
|
+ Original tensor mean: 0.040337
|
|
|
+ Converted tensor mean: 0.038841
|
|
|
+ Mean difference: 0.12615709
|
|
|
+ Maximum pointwise difference: 3.32020164
|
|
|
+ Max difference location: (0, 23, 3, 9)
|
|
|
+ Values at max diff - Original: 0.05276818, Converted: 3.37296987
|
|
|
+ Biggest difference in row (0, 5, 6), sum -1.585131 vs 2.877644
|
|
|
+
|
|
|
+Layer 4, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 12.337616
|
|
|
+ Converted tensor sum: 29.998875
|
|
|
+ Original tensor mean: 0.003856
|
|
|
+ Converted tensor mean: 0.009375
|
|
|
+ Mean difference: 0.08588156
|
|
|
+ Maximum pointwise difference: 1.48782670
|
|
|
+ Max difference location: (0, 19, 6, 2)
|
|
|
+ Values at max diff - Original: -0.00142645, Converted: 1.48640025
|
|
|
+ Biggest difference in row (0, 8, 3), sum -0.318221 vs 2.809558
|
|
|
+
|
|
|
+Layer 5, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 28.667000
|
|
|
+ Converted tensor sum: 37.180931
|
|
|
+ Original tensor mean: 0.008958
|
|
|
+ Converted tensor mean: 0.011619
|
|
|
+ Mean difference: 0.09552816
|
|
|
+ Maximum pointwise difference: 2.18750906
|
|
|
+ Max difference location: (0, 19, 4, 8)
|
|
|
+ Values at max diff - Original: 0.10599449, Converted: 2.29350352
|
|
|
+ Biggest difference in row (0, 28, 9), sum 2.464837 vs 0.175544
|
|
|
+
|
|
|
+Layer 6, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: -5.179218
|
|
|
+ Converted tensor sum: 165.798248
|
|
|
+ Original tensor mean: -0.001619
|
|
|
+ Converted tensor mean: 0.051812
|
|
|
+ Mean difference: 0.12655024
|
|
|
+ Maximum pointwise difference: 4.26992130
|
|
|
+ Max difference location: (0, 10, 1, 3)
|
|
|
+ Values at max diff - Original: -0.81827015, Converted: 3.45165110
|
|
|
+ Biggest difference in row (0, 12, 6), sum 2.458921 vs 9.472747
|
|
|
+
|
|
|
+Layer 8, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 8.037577
|
|
|
+ Converted tensor sum: 36.050400
|
|
|
+ Original tensor mean: 0.002512
|
|
|
+ Converted tensor mean: 0.011266
|
|
|
+ Mean difference: 0.10181364
|
|
|
+ Maximum pointwise difference: 3.21224403
|
|
|
+ Max difference location: (0, 6, 8, 4)
|
|
|
+ Values at max diff - Original: 0.04581403, Converted: 3.25805807
|
|
|
+ Biggest difference in row (0, 6, 8), sum -0.710102 vs 2.858772
|
|
|
+
|
|
|
+Layer 9, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 10.771255
|
|
|
+ Converted tensor sum: 9.047117
|
|
|
+ Original tensor mean: 0.003366
|
|
|
+ Converted tensor mean: 0.002827
|
|
|
+ Mean difference: 0.07432807
|
|
|
+ Maximum pointwise difference: 1.92723787
|
|
|
+ Max difference location: (0, 18, 5, 2)
|
|
|
+ Values at max diff - Original: 0.10259621, Converted: 2.02983403
|
|
|
+ Biggest difference in row (0, 14, 2), sum 0.009283 vs 3.088803
|
|
|
+
|
|
|
+Layer 10, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 2.196672
|
|
|
+ Converted tensor sum: 31.273930
|
|
|
+ Original tensor mean: 0.000686
|
|
|
+ Converted tensor mean: 0.009773
|
|
|
+ Mean difference: 0.07749946
|
|
|
+ Maximum pointwise difference: 2.52166486
|
|
|
+ Max difference location: (0, 3, 7, 8)
|
|
|
+ Values at max diff - Original: 0.31132898, Converted: 2.83299375
|
|
|
+ Biggest difference in row (0, 20, 9), sum -0.957283 vs 1.748438
|
|
|
+
|
|
|
+Layer 12, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 18.589321
|
|
|
+ Converted tensor sum: 5.047585
|
|
|
+ Original tensor mean: 0.005809
|
|
|
+ Converted tensor mean: 0.001577
|
|
|
+ Mean difference: 0.10475901
|
|
|
+ Maximum pointwise difference: 2.85224462
|
|
|
+ Max difference location: (0, 29, 5, 6)
|
|
|
+ Values at max diff - Original: 2.91423106, Converted: 0.06198643
|
|
|
+ Biggest difference in row (0, 29, 5), sum 5.378224 vs 0.028987
|
|
|
+
|
|
|
+Layer 13, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 10.072084
|
|
|
+ Converted tensor sum: 22.447376
|
|
|
+ Original tensor mean: 0.003148
|
|
|
+ Converted tensor mean: 0.007015
|
|
|
+ Mean difference: 0.06809221
|
|
|
+ Maximum pointwise difference: 1.16759956
|
|
|
+ Max difference location: (0, 27, 3, 5)
|
|
|
+ Values at max diff - Original: -0.07106454, Converted: 1.09653497
|
|
|
+ Biggest difference in row (0, 27, 3), sum -0.724999 vs 1.414439
|
|
|
+
|
|
|
+Layer 14, Token 7 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 24.727911
|
|
|
+ Converted tensor sum: 26.743217
|
|
|
+ Original tensor mean: 0.007727
|
|
|
+ Converted tensor mean: 0.008357
|
|
|
+ Mean difference: 0.11743267
|
|
|
+ Maximum pointwise difference: 2.98747468
|
|
|
+ Max difference location: (0, 18, 5, 1)
|
|
|
+ Values at max diff - Original: 2.95096135, Converted: -0.03651327
|
|
|
+ Biggest difference in row (0, 28, 1), sum -0.138044 vs 7.456189
|
|
|
+
|
|
|
+Layer 0, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 15.709320
|
|
|
+ Converted tensor sum: 12.209140
|
|
|
+ Original tensor mean: 0.004909
|
|
|
+ Converted tensor mean: 0.003815
|
|
|
+ Mean difference: 0.05364013
|
|
|
+ Maximum pointwise difference: 1.00742257
|
|
|
+ Max difference location: (0, 1, 3, 2)
|
|
|
+ Values at max diff - Original: 0.00399712, Converted: 1.01141965
|
|
|
+ Biggest difference in row (0, 28, 5), sum 0.102939 vs 1.531078
|
|
|
+
|
|
|
+Layer 1, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 188.393356
|
|
|
+ Converted tensor sum: 69.447678
|
|
|
+ Original tensor mean: 0.058873
|
|
|
+ Converted tensor mean: 0.021702
|
|
|
+ Mean difference: 0.10494157
|
|
|
+ Maximum pointwise difference: 2.06318974
|
|
|
+ Max difference location: (0, 24, 6, 8)
|
|
|
+ Values at max diff - Original: 2.06102371, Converted: -0.00216593
|
|
|
+ Biggest difference in row (0, 14, 0), sum 8.656445 vs 0.053197
|
|
|
+
|
|
|
+Layer 2, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 204.433716
|
|
|
+ Converted tensor sum: 228.728714
|
|
|
+ Original tensor mean: 0.063886
|
|
|
+ Converted tensor mean: 0.071478
|
|
|
+ Mean difference: 0.17672807
|
|
|
+ Maximum pointwise difference: 4.02747822
|
|
|
+ Max difference location: (0, 14, 7, 4)
|
|
|
+ Values at max diff - Original: -0.50838530, Converted: 3.51909280
|
|
|
+ Biggest difference in row (0, 14, 7), sum -0.459507 vs 8.282653
|
|
|
+
|
|
|
+Layer 4, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 27.791477
|
|
|
+ Converted tensor sum: 81.184990
|
|
|
+ Original tensor mean: 0.008685
|
|
|
+ Converted tensor mean: 0.025370
|
|
|
+ Mean difference: 0.10353857
|
|
|
+ Maximum pointwise difference: 2.46198463
|
|
|
+ Max difference location: (0, 20, 0, 0)
|
|
|
+ Values at max diff - Original: -0.22187454, Converted: 2.24011016
|
|
|
+ Biggest difference in row (0, 20, 0), sum 0.256525 vs 5.813072
|
|
|
+
|
|
|
+Layer 5, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 29.250452
|
|
|
+ Converted tensor sum: 93.253128
|
|
|
+ Original tensor mean: 0.009141
|
|
|
+ Converted tensor mean: 0.029142
|
|
|
+ Mean difference: 0.10660823
|
|
|
+ Maximum pointwise difference: 2.56040263
|
|
|
+ Max difference location: (0, 5, 9, 6)
|
|
|
+ Values at max diff - Original: 2.57331157, Converted: 0.01290902
|
|
|
+ Biggest difference in row (0, 6, 9), sum 0.078166 vs 4.415024
|
|
|
+
|
|
|
+Layer 6, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 27.846973
|
|
|
+ Converted tensor sum: 254.006149
|
|
|
+ Original tensor mean: 0.008702
|
|
|
+ Converted tensor mean: 0.079377
|
|
|
+ Mean difference: 0.15745334
|
|
|
+ Maximum pointwise difference: 4.78712130
|
|
|
+ Max difference location: (0, 6, 0, 1)
|
|
|
+ Values at max diff - Original: -0.02898185, Converted: 4.75813961
|
|
|
+ Biggest difference in row (0, 6, 0), sum 0.390611 vs 12.429944
|
|
|
+
|
|
|
+Layer 8, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 30.536982
|
|
|
+ Converted tensor sum: 101.827225
|
|
|
+ Original tensor mean: 0.009543
|
|
|
+ Converted tensor mean: 0.031821
|
|
|
+ Mean difference: 0.12039161
|
|
|
+ Maximum pointwise difference: 3.22662950
|
|
|
+ Max difference location: (0, 6, 4, 8)
|
|
|
+ Values at max diff - Original: 0.09277204, Converted: 3.31940150
|
|
|
+ Biggest difference in row (0, 6, 4), sum -0.525502 vs 4.532234
|
|
|
+
|
|
|
+Layer 9, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 16.682407
|
|
|
+ Converted tensor sum: 55.948948
|
|
|
+ Original tensor mean: 0.005213
|
|
|
+ Converted tensor mean: 0.017484
|
|
|
+ Mean difference: 0.08395444
|
|
|
+ Maximum pointwise difference: 2.21269536
|
|
|
+ Max difference location: (0, 2, 6, 8)
|
|
|
+ Values at max diff - Original: -0.01177103, Converted: 2.20092440
|
|
|
+ Biggest difference in row (0, 2, 6), sum 0.250594 vs 2.860795
|
|
|
+
|
|
|
+Layer 10, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 12.510189
|
|
|
+ Converted tensor sum: 82.301987
|
|
|
+ Original tensor mean: 0.003909
|
|
|
+ Converted tensor mean: 0.025719
|
|
|
+ Mean difference: 0.08603403
|
|
|
+ Maximum pointwise difference: 2.56086898
|
|
|
+ Max difference location: (0, 3, 8, 7)
|
|
|
+ Values at max diff - Original: -0.06791666, Converted: 2.49295235
|
|
|
+ Biggest difference in row (0, 27, 2), sum -0.661969 vs 2.579364
|
|
|
+
|
|
|
+Layer 12, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 32.357769
|
|
|
+ Converted tensor sum: 70.608459
|
|
|
+ Original tensor mean: 0.010112
|
|
|
+ Converted tensor mean: 0.022065
|
|
|
+ Mean difference: 0.11435273
|
|
|
+ Maximum pointwise difference: 2.54995298
|
|
|
+ Max difference location: (0, 29, 5, 6)
|
|
|
+ Values at max diff - Original: 2.57914209, Converted: 0.02918900
|
|
|
+ Biggest difference in row (0, 24, 2), sum -0.360438 vs 5.434034
|
|
|
+
|
|
|
+Layer 13, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 15.804648
|
|
|
+ Converted tensor sum: 72.853622
|
|
|
+ Original tensor mean: 0.004939
|
|
|
+ Converted tensor mean: 0.022767
|
|
|
+ Mean difference: 0.07997719
|
|
|
+ Maximum pointwise difference: 2.65385294
|
|
|
+ Max difference location: (0, 26, 0, 4)
|
|
|
+ Values at max diff - Original: -0.03116010, Converted: 2.62269282
|
|
|
+ Biggest difference in row (0, 26, 0), sum -1.206431 vs 2.459876
|
|
|
+
|
|
|
+Layer 14, Token 8 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 69.455246
|
|
|
+ Converted tensor sum: 167.620041
|
|
|
+ Original tensor mean: 0.021705
|
|
|
+ Converted tensor mean: 0.052381
|
|
|
+ Mean difference: 0.15660757
|
|
|
+ Maximum pointwise difference: 2.87237978
|
|
|
+ Max difference location: (0, 29, 9, 1)
|
|
|
+ Values at max diff - Original: -0.04621891, Converted: 2.82616091
|
|
|
+ Biggest difference in row (0, 20, 4), sum -0.064347 vs 6.085094
|
|
|
+
|
|
|
+Layer 0, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 13.786104
|
|
|
+ Converted tensor sum: 5.261156
|
|
|
+ Original tensor mean: 0.004308
|
|
|
+ Converted tensor mean: 0.001644
|
|
|
+ Mean difference: 0.06277616
|
|
|
+ Maximum pointwise difference: 1.31032252
|
|
|
+ Max difference location: (0, 4, 1, 9)
|
|
|
+ Values at max diff - Original: -0.02821357, Converted: 1.28210890
|
|
|
+ Biggest difference in row (0, 11, 3), sum 0.289278 vs -0.836586
|
|
|
+
|
|
|
+Layer 1, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 203.497635
|
|
|
+ Converted tensor sum: 111.110443
|
|
|
+ Original tensor mean: 0.063593
|
|
|
+ Converted tensor mean: 0.034722
|
|
|
+ Mean difference: 0.10077493
|
|
|
+ Maximum pointwise difference: 1.97459030
|
|
|
+ Max difference location: (0, 24, 0, 1)
|
|
|
+ Values at max diff - Original: 1.88861251, Converted: -0.08597784
|
|
|
+ Biggest difference in row (0, 14, 0), sum 9.054160 vs 0.974541
|
|
|
+
|
|
|
+Layer 2, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 210.326843
|
|
|
+ Converted tensor sum: 237.847137
|
|
|
+ Original tensor mean: 0.065727
|
|
|
+ Converted tensor mean: 0.074327
|
|
|
+ Mean difference: 0.16504267
|
|
|
+ Maximum pointwise difference: 2.71314573
|
|
|
+ Max difference location: (0, 4, 8, 1)
|
|
|
+ Values at max diff - Original: -0.00067222, Converted: 2.71247363
|
|
|
+ Biggest difference in row (0, 1, 4), sum 2.414350 vs 7.828261
|
|
|
+
|
|
|
+Layer 4, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 76.020309
|
|
|
+ Converted tensor sum: 125.208931
|
|
|
+ Original tensor mean: 0.023756
|
|
|
+ Converted tensor mean: 0.039128
|
|
|
+ Mean difference: 0.11094213
|
|
|
+ Maximum pointwise difference: 3.67572975
|
|
|
+ Max difference location: (0, 27, 7, 5)
|
|
|
+ Values at max diff - Original: 3.66171432, Converted: -0.01401533
|
|
|
+ Biggest difference in row (0, 3, 0), sum 4.612147 vs 0.005273
|
|
|
+
|
|
|
+Layer 5, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 70.017532
|
|
|
+ Converted tensor sum: 128.789795
|
|
|
+ Original tensor mean: 0.021880
|
|
|
+ Converted tensor mean: 0.040247
|
|
|
+ Mean difference: 0.11726990
|
|
|
+ Maximum pointwise difference: 2.56784987
|
|
|
+ Max difference location: (0, 6, 7, 6)
|
|
|
+ Values at max diff - Original: 2.56954336, Converted: 0.00169344
|
|
|
+ Biggest difference in row (0, 6, 7), sum 5.224357 vs 0.050091
|
|
|
+
|
|
|
+Layer 6, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 97.678406
|
|
|
+ Converted tensor sum: 298.968506
|
|
|
+ Original tensor mean: 0.030525
|
|
|
+ Converted tensor mean: 0.093428
|
|
|
+ Mean difference: 0.16553456
|
|
|
+ Maximum pointwise difference: 4.22000217
|
|
|
+ Max difference location: (0, 14, 1, 7)
|
|
|
+ Values at max diff - Original: -0.10210184, Converted: 4.11790037
|
|
|
+ Biggest difference in row (0, 14, 1), sum -0.198166 vs 10.807201
|
|
|
+
|
|
|
+Layer 8, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 106.931870
|
|
|
+ Converted tensor sum: 173.151855
|
|
|
+ Original tensor mean: 0.033416
|
|
|
+ Converted tensor mean: 0.054110
|
|
|
+ Mean difference: 0.14065868
|
|
|
+ Maximum pointwise difference: 3.01797652
|
|
|
+ Max difference location: (0, 14, 9, 5)
|
|
|
+ Values at max diff - Original: -0.05490554, Converted: 2.96307087
|
|
|
+ Biggest difference in row (0, 20, 7), sum 0.154971 vs 7.357482
|
|
|
+
|
|
|
+Layer 9, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 64.670883
|
|
|
+ Converted tensor sum: 92.657562
|
|
|
+ Original tensor mean: 0.020210
|
|
|
+ Converted tensor mean: 0.028955
|
|
|
+ Mean difference: 0.09020478
|
|
|
+ Maximum pointwise difference: 3.22673941
|
|
|
+ Max difference location: (0, 18, 5, 2)
|
|
|
+ Values at max diff - Original: 0.18116489, Converted: 3.40790439
|
|
|
+ Biggest difference in row (0, 18, 2), sum 6.946761 vs 1.273814
|
|
|
+
|
|
|
+Layer 10, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 52.923912
|
|
|
+ Converted tensor sum: 104.621475
|
|
|
+ Original tensor mean: 0.016539
|
|
|
+ Converted tensor mean: 0.032694
|
|
|
+ Mean difference: 0.08354937
|
|
|
+ Maximum pointwise difference: 1.84956801
|
|
|
+ Max difference location: (0, 3, 7, 8)
|
|
|
+ Values at max diff - Original: 0.37758890, Converted: 2.22715688
|
|
|
+ Biggest difference in row (0, 20, 9), sum -1.298731 vs 2.479056
|
|
|
+
|
|
|
+Layer 12, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 87.343620
|
|
|
+ Converted tensor sum: 117.516281
|
|
|
+ Original tensor mean: 0.027295
|
|
|
+ Converted tensor mean: 0.036724
|
|
|
+ Mean difference: 0.12288742
|
|
|
+ Maximum pointwise difference: 3.19170189
|
|
|
+ Max difference location: (0, 13, 2, 4)
|
|
|
+ Values at max diff - Original: -0.11148589, Converted: 3.08021593
|
|
|
+ Biggest difference in row (0, 13, 2), sum 0.993775 vs 6.040417
|
|
|
+
|
|
|
+Layer 13, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 77.928635
|
|
|
+ Converted tensor sum: 116.695862
|
|
|
+ Original tensor mean: 0.024353
|
|
|
+ Converted tensor mean: 0.036467
|
|
|
+ Mean difference: 0.09447044
|
|
|
+ Maximum pointwise difference: 1.43028283
|
|
|
+ Max difference location: (0, 26, 0, 4)
|
|
|
+ Values at max diff - Original: -0.00879327, Converted: 1.42148960
|
|
|
+ Biggest difference in row (0, 25, 3), sum -0.128404 vs 3.423045
|
|
|
+
|
|
|
+Layer 14, Token 9 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 162.069077
|
|
|
+ Converted tensor sum: 247.590637
|
|
|
+ Original tensor mean: 0.050647
|
|
|
+ Converted tensor mean: 0.077372
|
|
|
+ Mean difference: 0.17534283
|
|
|
+ Maximum pointwise difference: 3.21209598
|
|
|
+ Max difference location: (0, 28, 1, 9)
|
|
|
+ Values at max diff - Original: -0.25805441, Converted: 2.95404148
|
|
|
+ Biggest difference in row (0, 28, 1), sum 1.364790 vs 9.833094
|
|
|
+
|
|
|
+Layer 0, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 7.816267
|
|
|
+ Converted tensor sum: 1.466951
|
|
|
+ Original tensor mean: 0.002443
|
|
|
+ Converted tensor mean: 0.000458
|
|
|
+ Mean difference: 0.05842621
|
|
|
+ Maximum pointwise difference: 1.09208894
|
|
|
+ Max difference location: (0, 21, 4, 1)
|
|
|
+ Values at max diff - Original: 0.04324723, Converted: 1.13533616
|
|
|
+ Biggest difference in row (0, 28, 5), sum 0.301255 vs 2.364079
|
|
|
+
|
|
|
+Layer 1, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 223.526520
|
|
|
+ Converted tensor sum: 135.921234
|
|
|
+ Original tensor mean: 0.069852
|
|
|
+ Converted tensor mean: 0.042475
|
|
|
+ Mean difference: 0.10827781
|
|
|
+ Maximum pointwise difference: 1.68770814
|
|
|
+ Max difference location: (0, 16, 6, 1)
|
|
|
+ Values at max diff - Original: 2.02958679, Converted: 0.34187865
|
|
|
+ Biggest difference in row (0, 14, 0), sum 5.745544 vs -0.048143
|
|
|
+
|
|
|
+Layer 2, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 215.104584
|
|
|
+ Converted tensor sum: 227.212708
|
|
|
+ Original tensor mean: 0.067220
|
|
|
+ Converted tensor mean: 0.071004
|
|
|
+ Mean difference: 0.17289215
|
|
|
+ Maximum pointwise difference: 3.18850541
|
|
|
+ Max difference location: (0, 26, 3, 8)
|
|
|
+ Values at max diff - Original: 0.01985940, Converted: 3.20836473
|
|
|
+ Biggest difference in row (0, 12, 7), sum 8.279942 vs -0.264312
|
|
|
+
|
|
|
+Layer 4, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 185.702744
|
|
|
+ Converted tensor sum: 211.499130
|
|
|
+ Original tensor mean: 0.058032
|
|
|
+ Converted tensor mean: 0.066093
|
|
|
+ Mean difference: 0.12541530
|
|
|
+ Maximum pointwise difference: 2.52001357
|
|
|
+ Max difference location: (0, 27, 5, 8)
|
|
|
+ Values at max diff - Original: 0.05403204, Converted: 2.57404566
|
|
|
+ Biggest difference in row (0, 27, 5), sum 0.682007 vs 7.443546
|
|
|
+
|
|
|
+Layer 5, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 169.265594
|
|
|
+ Converted tensor sum: 227.449417
|
|
|
+ Original tensor mean: 0.052895
|
|
|
+ Converted tensor mean: 0.071078
|
|
|
+ Mean difference: 0.13289575
|
|
|
+ Maximum pointwise difference: 3.03736281
|
|
|
+ Max difference location: (0, 6, 2, 6)
|
|
|
+ Values at max diff - Original: 3.01727891, Converted: -0.02008397
|
|
|
+ Biggest difference in row (0, 6, 2), sum 9.659736 vs 0.153498
|
|
|
+
|
|
|
+Layer 6, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 230.247437
|
|
|
+ Converted tensor sum: 418.704895
|
|
|
+ Original tensor mean: 0.071952
|
|
|
+ Converted tensor mean: 0.130845
|
|
|
+ Mean difference: 0.17921637
|
|
|
+ Maximum pointwise difference: 4.08086109
|
|
|
+ Max difference location: (0, 6, 0, 1)
|
|
|
+ Values at max diff - Original: 0.00348123, Converted: 4.08434248
|
|
|
+ Biggest difference in row (0, 6, 0), sum 0.879897 vs 15.160538
|
|
|
+
|
|
|
+Layer 8, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 206.699799
|
|
|
+ Converted tensor sum: 283.296692
|
|
|
+ Original tensor mean: 0.064594
|
|
|
+ Converted tensor mean: 0.088530
|
|
|
+ Mean difference: 0.15303743
|
|
|
+ Maximum pointwise difference: 3.20992827
|
|
|
+ Max difference location: (0, 14, 4, 5)
|
|
|
+ Values at max diff - Original: 0.00341668, Converted: 3.21334505
|
|
|
+ Biggest difference in row (0, 2, 4), sum -0.470056 vs 8.175467
|
|
|
+
|
|
|
+Layer 9, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 155.765579
|
|
|
+ Converted tensor sum: 185.697693
|
|
|
+ Original tensor mean: 0.048677
|
|
|
+ Converted tensor mean: 0.058031
|
|
|
+ Mean difference: 0.09974226
|
|
|
+ Maximum pointwise difference: 2.01155925
|
|
|
+ Max difference location: (0, 14, 1, 8)
|
|
|
+ Values at max diff - Original: -0.00813468, Converted: 2.00342464
|
|
|
+ Biggest difference in row (0, 18, 3), sum -0.273577 vs 5.096995
|
|
|
+
|
|
|
+Layer 10, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 147.632782
|
|
|
+ Converted tensor sum: 177.473785
|
|
|
+ Original tensor mean: 0.046135
|
|
|
+ Converted tensor mean: 0.055461
|
|
|
+ Mean difference: 0.10073428
|
|
|
+ Maximum pointwise difference: 2.04938221
|
|
|
+ Max difference location: (0, 3, 8, 7)
|
|
|
+ Values at max diff - Original: -0.06264466, Converted: 1.98673749
|
|
|
+ Biggest difference in row (0, 24, 0), sum 0.061289 vs 4.106022
|
|
|
+
|
|
|
+Layer 12, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 189.647308
|
|
|
+ Converted tensor sum: 212.602402
|
|
|
+ Original tensor mean: 0.059265
|
|
|
+ Converted tensor mean: 0.066438
|
|
|
+ Mean difference: 0.12409261
|
|
|
+ Maximum pointwise difference: 3.06548572
|
|
|
+ Max difference location: (0, 14, 1, 8)
|
|
|
+ Values at max diff - Original: -0.05504636, Converted: 3.01043940
|
|
|
+ Biggest difference in row (0, 14, 1), sum -1.444618 vs 6.230721
|
|
|
+
|
|
|
+Layer 13, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 176.983215
|
|
|
+ Converted tensor sum: 204.426437
|
|
|
+ Original tensor mean: 0.055307
|
|
|
+ Converted tensor mean: 0.063883
|
|
|
+ Mean difference: 0.10065258
|
|
|
+ Maximum pointwise difference: 1.83688605
|
|
|
+ Max difference location: (0, 26, 0, 4)
|
|
|
+ Values at max diff - Original: -0.00286533, Converted: 1.83402073
|
|
|
+ Biggest difference in row (0, 17, 8), sum 4.395949 vs 0.724224
|
|
|
+
|
|
|
+Layer 14, Token 10 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 362.967407
|
|
|
+ Converted tensor sum: 429.969727
|
|
|
+ Original tensor mean: 0.113427
|
|
|
+ Converted tensor mean: 0.134366
|
|
|
+ Mean difference: 0.20180641
|
|
|
+ Maximum pointwise difference: 3.78999281
|
|
|
+ Max difference location: (0, 8, 9, 2)
|
|
|
+ Values at max diff - Original: -0.03249586, Converted: 3.75749683
|
|
|
+ Biggest difference in row (0, 8, 9), sum 0.437254 vs 14.025442
|
|
|
+
|
|
|
+Layer 0, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 1.054740
|
|
|
+ Converted tensor sum: -4.912385
|
|
|
+ Original tensor mean: 0.000330
|
|
|
+ Converted tensor mean: -0.001535
|
|
|
+ Mean difference: 0.06330946
|
|
|
+ Maximum pointwise difference: 0.92195946
|
|
|
+ Max difference location: (0, 4, 9, 1)
|
|
|
+ Values at max diff - Original: 0.89514881, Converted: -0.02681063
|
|
|
+ Biggest difference in row (0, 4, 9), sum 1.999353 vs 0.163843
|
|
|
+
|
|
|
+Layer 1, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 229.025497
|
|
|
+ Converted tensor sum: 120.378685
|
|
|
+ Original tensor mean: 0.071570
|
|
|
+ Converted tensor mean: 0.037618
|
|
|
+ Mean difference: 0.11386316
|
|
|
+ Maximum pointwise difference: 2.45059752
|
|
|
+ Max difference location: (0, 14, 7, 2)
|
|
|
+ Values at max diff - Original: 2.53569841, Converted: 0.08510098
|
|
|
+ Biggest difference in row (0, 16, 6), sum 5.812350 vs -0.022719
|
|
|
+
|
|
|
+Layer 2, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 158.621384
|
|
|
+ Converted tensor sum: 133.457428
|
|
|
+ Original tensor mean: 0.049569
|
|
|
+ Converted tensor mean: 0.041705
|
|
|
+ Mean difference: 0.14393179
|
|
|
+ Maximum pointwise difference: 2.77776694
|
|
|
+ Max difference location: (0, 12, 7, 9)
|
|
|
+ Values at max diff - Original: 2.95237303, Converted: 0.17460610
|
|
|
+ Biggest difference in row (0, 12, 7), sum 8.065367 vs 1.687768
|
|
|
+
|
|
|
+Layer 4, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 216.897552
|
|
|
+ Converted tensor sum: 241.688950
|
|
|
+ Original tensor mean: 0.067780
|
|
|
+ Converted tensor mean: 0.075528
|
|
|
+ Mean difference: 0.14223064
|
|
|
+ Maximum pointwise difference: 3.88969064
|
|
|
+ Max difference location: (0, 19, 2, 0)
|
|
|
+ Values at max diff - Original: 0.01694401, Converted: 3.90663457
|
|
|
+ Biggest difference in row (0, 19, 2), sum 0.437507 vs 8.962053
|
|
|
+
|
|
|
+Layer 5, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 252.265610
|
|
|
+ Converted tensor sum: 322.771881
|
|
|
+ Original tensor mean: 0.078833
|
|
|
+ Converted tensor mean: 0.100866
|
|
|
+ Mean difference: 0.17598768
|
|
|
+ Maximum pointwise difference: 7.97533512
|
|
|
+ Max difference location: (0, 28, 6, 9)
|
|
|
+ Values at max diff - Original: 0.35858834, Converted: 8.33392334
|
|
|
+ Biggest difference in row (0, 28, 6), sum 5.014431 vs 26.334686
|
|
|
+
|
|
|
+Layer 6, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 291.508423
|
|
|
+ Converted tensor sum: 433.311768
|
|
|
+ Original tensor mean: 0.091096
|
|
|
+ Converted tensor mean: 0.135410
|
|
|
+ Mean difference: 0.17094433
|
|
|
+ Maximum pointwise difference: 3.41666508
|
|
|
+ Max difference location: (0, 6, 4, 5)
|
|
|
+ Values at max diff - Original: 0.27297387, Converted: 3.68963885
|
|
|
+ Biggest difference in row (0, 14, 1), sum -0.165701 vs 10.544808
|
|
|
+
|
|
|
+Layer 8, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 215.415359
|
|
|
+ Converted tensor sum: 351.092529
|
|
|
+ Original tensor mean: 0.067317
|
|
|
+ Converted tensor mean: 0.109716
|
|
|
+ Mean difference: 0.18807893
|
|
|
+ Maximum pointwise difference: 3.95769572
|
|
|
+ Max difference location: (0, 23, 4, 7)
|
|
|
+ Values at max diff - Original: 3.95293593, Converted: -0.00475990
|
|
|
+ Biggest difference in row (0, 2, 4), sum 0.017769 vs 8.146402
|
|
|
+
|
|
|
+Layer 9, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 230.947296
|
|
|
+ Converted tensor sum: 244.599213
|
|
|
+ Original tensor mean: 0.072171
|
|
|
+ Converted tensor mean: 0.076437
|
|
|
+ Mean difference: 0.13342199
|
|
|
+ Maximum pointwise difference: 2.90320230
|
|
|
+ Max difference location: (0, 18, 3, 2)
|
|
|
+ Values at max diff - Original: -0.01862744, Converted: 2.88457489
|
|
|
+ Biggest difference in row (0, 28, 7), sum 8.403417 vs 1.460527
|
|
|
+
|
|
|
+Layer 10, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 271.779785
|
|
|
+ Converted tensor sum: 241.771790
|
|
|
+ Original tensor mean: 0.084931
|
|
|
+ Converted tensor mean: 0.075554
|
|
|
+ Mean difference: 0.15158509
|
|
|
+ Maximum pointwise difference: 3.77889895
|
|
|
+ Max difference location: (0, 0, 3, 7)
|
|
|
+ Values at max diff - Original: 4.08713722, Converted: 0.30823818
|
|
|
+ Biggest difference in row (0, 10, 4), sum 7.732811 vs 0.603564
|
|
|
+
|
|
|
+Layer 12, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 274.425629
|
|
|
+ Converted tensor sum: 286.277039
|
|
|
+ Original tensor mean: 0.085758
|
|
|
+ Converted tensor mean: 0.089462
|
|
|
+ Mean difference: 0.16393411
|
|
|
+ Maximum pointwise difference: 3.90725374
|
|
|
+ Max difference location: (0, 14, 1, 8)
|
|
|
+ Values at max diff - Original: 0.01574333, Converted: 3.92299700
|
|
|
+ Biggest difference in row (0, 23, 2), sum 10.560888 vs 1.081235
|
|
|
+
|
|
|
+Layer 13, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 212.238953
|
|
|
+ Converted tensor sum: 260.726898
|
|
|
+ Original tensor mean: 0.066325
|
|
|
+ Converted tensor mean: 0.081477
|
|
|
+ Mean difference: 0.12856843
|
|
|
+ Maximum pointwise difference: 3.76317525
|
|
|
+ Max difference location: (0, 17, 8, 2)
|
|
|
+ Values at max diff - Original: 4.56109810, Converted: 0.79792279
|
|
|
+ Biggest difference in row (0, 19, 1), sum 10.229995 vs 2.908604
|
|
|
+
|
|
|
+Layer 14, Token 11 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 502.973511
|
|
|
+ Converted tensor sum: 568.935181
|
|
|
+ Original tensor mean: 0.157179
|
|
|
+ Converted tensor mean: 0.177792
|
|
|
+ Mean difference: 0.27989930
|
|
|
+ Maximum pointwise difference: 4.54578638
|
|
|
+ Max difference location: (0, 16, 7, 6)
|
|
|
+ Values at max diff - Original: 4.27132416, Converted: -0.27446240
|
|
|
+ Biggest difference in row (0, 21, 5), sum -0.168386 vs 13.477350
|
|
|
+
|
|
|
+Layer 0, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 4.252830
|
|
|
+ Converted tensor sum: -0.731128
|
|
|
+ Original tensor mean: 0.001329
|
|
|
+ Converted tensor mean: -0.000228
|
|
|
+ Mean difference: 0.06294378
|
|
|
+ Maximum pointwise difference: 1.78251398
|
|
|
+ Max difference location: (0, 1, 3, 2)
|
|
|
+ Values at max diff - Original: -0.00792313, Converted: 1.77459085
|
|
|
+ Biggest difference in row (0, 28, 5), sum 0.238817 vs 2.175461
|
|
|
+
|
|
|
+Layer 1, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 242.003052
|
|
|
+ Converted tensor sum: 66.457909
|
|
|
+ Original tensor mean: 0.075626
|
|
|
+ Converted tensor mean: 0.020768
|
|
|
+ Mean difference: 0.11966369
|
|
|
+ Maximum pointwise difference: 2.80864978
|
|
|
+ Max difference location: (0, 24, 0, 1)
|
|
|
+ Values at max diff - Original: 2.71780372, Converted: -0.09084603
|
|
|
+ Biggest difference in row (0, 14, 0), sum 5.513966 vs -0.057299
|
|
|
+
|
|
|
+Layer 2, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 212.836731
|
|
|
+ Converted tensor sum: 76.092499
|
|
|
+ Original tensor mean: 0.066511
|
|
|
+ Converted tensor mean: 0.023779
|
|
|
+ Mean difference: 0.14941603
|
|
|
+ Maximum pointwise difference: 2.88118339
|
|
|
+ Max difference location: (0, 12, 7, 0)
|
|
|
+ Values at max diff - Original: 2.70842910, Converted: -0.17275429
|
|
|
+ Biggest difference in row (0, 12, 7), sum 7.969865 vs 0.167881
|
|
|
+
|
|
|
+Layer 4, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 128.756699
|
|
|
+ Converted tensor sum: 154.911957
|
|
|
+ Original tensor mean: 0.040236
|
|
|
+ Converted tensor mean: 0.048410
|
|
|
+ Mean difference: 0.10618121
|
|
|
+ Maximum pointwise difference: 2.31433964
|
|
|
+ Max difference location: (0, 8, 1, 6)
|
|
|
+ Values at max diff - Original: 2.26328707, Converted: -0.05105254
|
|
|
+ Biggest difference in row (0, 25, 7), sum 3.269817 vs -0.397900
|
|
|
+
|
|
|
+Layer 5, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 176.745117
|
|
|
+ Converted tensor sum: 232.734680
|
|
|
+ Original tensor mean: 0.055233
|
|
|
+ Converted tensor mean: 0.072730
|
|
|
+ Mean difference: 0.13117053
|
|
|
+ Maximum pointwise difference: 4.35398436
|
|
|
+ Max difference location: (0, 28, 6, 9)
|
|
|
+ Values at max diff - Original: 0.18738972, Converted: 4.54137421
|
|
|
+ Biggest difference in row (0, 28, 6), sum 3.095334 vs 9.516649
|
|
|
+
|
|
|
+Layer 6, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 259.031647
|
|
|
+ Converted tensor sum: 428.069794
|
|
|
+ Original tensor mean: 0.080947
|
|
|
+ Converted tensor mean: 0.133772
|
|
|
+ Mean difference: 0.16942802
|
|
|
+ Maximum pointwise difference: 5.44846153
|
|
|
+ Max difference location: (0, 26, 9, 3)
|
|
|
+ Values at max diff - Original: -0.01164311, Converted: 5.43681860
|
|
|
+ Biggest difference in row (0, 6, 0), sum 0.994667 vs 12.910238
|
|
|
+
|
|
|
+Layer 8, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 221.930222
|
|
|
+ Converted tensor sum: 262.522369
|
|
|
+ Original tensor mean: 0.069353
|
|
|
+ Converted tensor mean: 0.082038
|
|
|
+ Mean difference: 0.17785330
|
|
|
+ Maximum pointwise difference: 4.14597464
|
|
|
+ Max difference location: (0, 21, 9, 9)
|
|
|
+ Values at max diff - Original: -0.07410901, Converted: 4.07186556
|
|
|
+ Biggest difference in row (0, 21, 9), sum -0.204344 vs 10.075971
|
|
|
+
|
|
|
+Layer 9, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 189.028931
|
|
|
+ Converted tensor sum: 238.029388
|
|
|
+ Original tensor mean: 0.059072
|
|
|
+ Converted tensor mean: 0.074384
|
|
|
+ Mean difference: 0.14264640
|
|
|
+ Maximum pointwise difference: 2.92814064
|
|
|
+ Max difference location: (0, 14, 1, 2)
|
|
|
+ Values at max diff - Original: -0.88447762, Converted: 2.04366302
|
|
|
+ Biggest difference in row (0, 28, 0), sum 1.806244 vs 7.562672
|
|
|
+
|
|
|
+Layer 10, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 236.811234
|
|
|
+ Converted tensor sum: 260.771973
|
|
|
+ Original tensor mean: 0.074004
|
|
|
+ Converted tensor mean: 0.081491
|
|
|
+ Mean difference: 0.15943669
|
|
|
+ Maximum pointwise difference: 5.29651165
|
|
|
+ Max difference location: (0, 24, 0, 1)
|
|
|
+ Values at max diff - Original: 0.03258384, Converted: 5.32909536
|
|
|
+ Biggest difference in row (0, 24, 0), sum 0.082025 vs 10.949675
|
|
|
+
|
|
|
+Layer 12, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 244.807922
|
|
|
+ Converted tensor sum: 314.705444
|
|
|
+ Original tensor mean: 0.076502
|
|
|
+ Converted tensor mean: 0.098345
|
|
|
+ Mean difference: 0.16864727
|
|
|
+ Maximum pointwise difference: 4.38556862
|
|
|
+ Max difference location: (0, 20, 3, 2)
|
|
|
+ Values at max diff - Original: -0.00896719, Converted: 4.37660122
|
|
|
+ Biggest difference in row (0, 28, 3), sum 10.509099 vs 0.169576
|
|
|
+
|
|
|
+Layer 13, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 195.554291
|
|
|
+ Converted tensor sum: 222.348053
|
|
|
+ Original tensor mean: 0.061111
|
|
|
+ Converted tensor mean: 0.069484
|
|
|
+ Mean difference: 0.13128105
|
|
|
+ Maximum pointwise difference: 3.68478298
|
|
|
+ Max difference location: (0, 17, 2, 8)
|
|
|
+ Values at max diff - Original: 0.00859472, Converted: 3.69337773
|
|
|
+ Biggest difference in row (0, 17, 2), sum 0.146146 vs 8.692631
|
|
|
+
|
|
|
+Layer 14, Token 12 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 483.896393
|
|
|
+ Converted tensor sum: 527.955566
|
|
|
+ Original tensor mean: 0.151218
|
|
|
+ Converted tensor mean: 0.164986
|
|
|
+ Mean difference: 0.27409020
|
|
|
+ Maximum pointwise difference: 4.70396519
|
|
|
+ Max difference location: (0, 25, 4, 1)
|
|
|
+ Values at max diff - Original: -0.42079771, Converted: 4.28316736
|
|
|
+ Biggest difference in row (0, 16, 6), sum -0.041328 vs 13.549324
|
|
|
+
|
|
|
+Layer 0, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 1.659033
|
|
|
+ Converted tensor sum: -7.970642
|
|
|
+ Original tensor mean: 0.000518
|
|
|
+ Converted tensor mean: -0.002491
|
|
|
+ Mean difference: 0.07536316
|
|
|
+ Maximum pointwise difference: 1.29645300
|
|
|
+ Max difference location: (0, 4, 9, 1)
|
|
|
+ Values at max diff - Original: 1.30392849, Converted: 0.00747545
|
|
|
+ Biggest difference in row (0, 26, 3), sum -0.329301 vs -3.374216
|
|
|
+
|
|
|
+Layer 1, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 239.724915
|
|
|
+ Converted tensor sum: 79.675636
|
|
|
+ Original tensor mean: 0.074914
|
|
|
+ Converted tensor mean: 0.024899
|
|
|
+ Mean difference: 0.12407961
|
|
|
+ Maximum pointwise difference: 2.50358605
|
|
|
+ Max difference location: (0, 24, 0, 1)
|
|
|
+ Values at max diff - Original: 2.48077655, Converted: -0.02280946
|
|
|
+ Biggest difference in row (0, 14, 0), sum 6.016558 vs 0.013054
|
|
|
+
|
|
|
+Layer 2, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 247.626099
|
|
|
+ Converted tensor sum: 106.589592
|
|
|
+ Original tensor mean: 0.077383
|
|
|
+ Converted tensor mean: 0.033309
|
|
|
+ Mean difference: 0.15574569
|
|
|
+ Maximum pointwise difference: 3.29841137
|
|
|
+ Max difference location: (0, 4, 2, 8)
|
|
|
+ Values at max diff - Original: 3.44825506, Converted: 0.14984375
|
|
|
+ Biggest difference in row (0, 12, 7), sum 7.714676 vs 0.758271
|
|
|
+
|
|
|
+Layer 4, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 123.371284
|
|
|
+ Converted tensor sum: 126.859177
|
|
|
+ Original tensor mean: 0.038554
|
|
|
+ Converted tensor mean: 0.039643
|
|
|
+ Mean difference: 0.08389783
|
|
|
+ Maximum pointwise difference: 2.97862935
|
|
|
+ Max difference location: (0, 28, 2, 3)
|
|
|
+ Values at max diff - Original: 3.17326093, Converted: 0.19463167
|
|
|
+ Biggest difference in row (0, 28, 2), sum 4.464350 vs 0.493919
|
|
|
+
|
|
|
+Layer 5, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 147.258102
|
|
|
+ Converted tensor sum: 184.070984
|
|
|
+ Original tensor mean: 0.046018
|
|
|
+ Converted tensor mean: 0.057522
|
|
|
+ Mean difference: 0.10195178
|
|
|
+ Maximum pointwise difference: 2.96551919
|
|
|
+ Max difference location: (0, 28, 6, 9)
|
|
|
+ Values at max diff - Original: 0.10930623, Converted: 3.07482553
|
|
|
+ Biggest difference in row (0, 28, 6), sum 1.825548 vs 9.674469
|
|
|
+
|
|
|
+Layer 6, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 283.270142
|
|
|
+ Converted tensor sum: 448.314880
|
|
|
+ Original tensor mean: 0.088522
|
|
|
+ Converted tensor mean: 0.140098
|
|
|
+ Mean difference: 0.15905625
|
|
|
+ Maximum pointwise difference: 3.46541429
|
|
|
+ Max difference location: (0, 24, 8, 2)
|
|
|
+ Values at max diff - Original: -0.00599505, Converted: 3.45941925
|
|
|
+ Biggest difference in row (0, 6, 4), sum 3.774855 vs 11.804656
|
|
|
+
|
|
|
+Layer 8, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 241.112183
|
|
|
+ Converted tensor sum: 274.130127
|
|
|
+ Original tensor mean: 0.075348
|
|
|
+ Converted tensor mean: 0.085666
|
|
|
+ Mean difference: 0.10883617
|
|
|
+ Maximum pointwise difference: 4.01715469
|
|
|
+ Max difference location: (0, 21, 9, 9)
|
|
|
+ Values at max diff - Original: -0.09944591, Converted: 3.91770887
|
|
|
+ Biggest difference in row (0, 20, 9), sum -0.234172 vs 5.020240
|
|
|
+
|
|
|
+Layer 9, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 173.039688
|
|
|
+ Converted tensor sum: 214.021088
|
|
|
+ Original tensor mean: 0.054075
|
|
|
+ Converted tensor mean: 0.066882
|
|
|
+ Mean difference: 0.09634628
|
|
|
+ Maximum pointwise difference: 1.72028887
|
|
|
+ Max difference location: (0, 18, 6, 2)
|
|
|
+ Values at max diff - Original: 0.10816531, Converted: 1.82845414
|
|
|
+ Biggest difference in row (0, 18, 6), sum 0.705206 vs 4.642780
|
|
|
+
|
|
|
+Layer 10, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 213.873550
|
|
|
+ Converted tensor sum: 242.753281
|
|
|
+ Original tensor mean: 0.066835
|
|
|
+ Converted tensor mean: 0.075860
|
|
|
+ Mean difference: 0.09029815
|
|
|
+ Maximum pointwise difference: 1.41950274
|
|
|
+ Max difference location: (0, 11, 2, 6)
|
|
|
+ Values at max diff - Original: 0.20659086, Converted: 1.62609363
|
|
|
+ Biggest difference in row (0, 23, 5), sum 0.448223 vs 3.806486
|
|
|
+
|
|
|
+Layer 12, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 233.552292
|
|
|
+ Converted tensor sum: 296.583405
|
|
|
+ Original tensor mean: 0.072985
|
|
|
+ Converted tensor mean: 0.092682
|
|
|
+ Mean difference: 0.08977944
|
|
|
+ Maximum pointwise difference: 1.59837830
|
|
|
+ Max difference location: (0, 19, 7, 7)
|
|
|
+ Values at max diff - Original: 0.86349380, Converted: 2.46187210
|
|
|
+ Biggest difference in row (0, 4, 3), sum 5.997213 vs 0.558758
|
|
|
+
|
|
|
+Layer 13, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 172.634430
|
|
|
+ Converted tensor sum: 190.998459
|
|
|
+ Original tensor mean: 0.053948
|
|
|
+ Converted tensor mean: 0.059687
|
|
|
+ Mean difference: 0.07964972
|
|
|
+ Maximum pointwise difference: 2.45006180
|
|
|
+ Max difference location: (0, 26, 4, 0)
|
|
|
+ Values at max diff - Original: 2.51385903, Converted: 0.06379732
|
|
|
+ Biggest difference in row (0, 26, 4), sum 5.078406 vs 0.298857
|
|
|
+
|
|
|
+Layer 14, Token 13 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 516.640808
|
|
|
+ Converted tensor sum: 514.890991
|
|
|
+ Original tensor mean: 0.161450
|
|
|
+ Converted tensor mean: 0.160903
|
|
|
+ Mean difference: 0.14294353
|
|
|
+ Maximum pointwise difference: 2.38266706
|
|
|
+ Max difference location: (0, 8, 9, 3)
|
|
|
+ Values at max diff - Original: 0.05516699, Converted: 2.43783402
|
|
|
+ Biggest difference in row (0, 8, 9), sum -0.157885 vs 10.688316
|
|
|
+
|
|
|
+Layer 0, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 5.983342
|
|
|
+ Converted tensor sum: -8.715725
|
|
|
+ Original tensor mean: 0.001870
|
|
|
+ Converted tensor mean: -0.002724
|
|
|
+ Mean difference: 0.07516728
|
|
|
+ Maximum pointwise difference: 1.55751526
|
|
|
+ Max difference location: (0, 25, 8, 2)
|
|
|
+ Values at max diff - Original: 1.57396424, Converted: 0.01644893
|
|
|
+ Biggest difference in row (0, 7, 3), sum 0.124509 vs -1.539357
|
|
|
+
|
|
|
+Layer 1, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 229.783936
|
|
|
+ Converted tensor sum: 87.531807
|
|
|
+ Original tensor mean: 0.071807
|
|
|
+ Converted tensor mean: 0.027354
|
|
|
+ Mean difference: 0.11559690
|
|
|
+ Maximum pointwise difference: 2.09234738
|
|
|
+ Max difference location: (0, 24, 0, 1)
|
|
|
+ Values at max diff - Original: 2.22317505, Converted: 0.13082767
|
|
|
+ Biggest difference in row (0, 14, 0), sum 5.592927 vs -0.441425
|
|
|
+
|
|
|
+Layer 2, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 268.945923
|
|
|
+ Converted tensor sum: 133.786499
|
|
|
+ Original tensor mean: 0.084046
|
|
|
+ Converted tensor mean: 0.041808
|
|
|
+ Mean difference: 0.16352382
|
|
|
+ Maximum pointwise difference: 2.87041712
|
|
|
+ Max difference location: (0, 11, 9, 6)
|
|
|
+ Values at max diff - Original: 2.94916487, Converted: 0.07874785
|
|
|
+ Biggest difference in row (0, 23, 9), sum 8.145676 vs 0.297307
|
|
|
+
|
|
|
+Layer 4, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 117.762733
|
|
|
+ Converted tensor sum: 118.469772
|
|
|
+ Original tensor mean: 0.036801
|
|
|
+ Converted tensor mean: 0.037022
|
|
|
+ Mean difference: 0.09650213
|
|
|
+ Maximum pointwise difference: 1.50842690
|
|
|
+ Max difference location: (0, 20, 6, 4)
|
|
|
+ Values at max diff - Original: 2.04159784, Converted: 0.53317100
|
|
|
+ Biggest difference in row (0, 20, 6), sum 4.022114 vs 0.957074
|
|
|
+
|
|
|
+Layer 5, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 128.612335
|
|
|
+ Converted tensor sum: 157.030731
|
|
|
+ Original tensor mean: 0.040191
|
|
|
+ Converted tensor mean: 0.049072
|
|
|
+ Mean difference: 0.10371025
|
|
|
+ Maximum pointwise difference: 2.24814534
|
|
|
+ Max difference location: (0, 8, 5, 9)
|
|
|
+ Values at max diff - Original: -0.03905072, Converted: 2.20909452
|
|
|
+ Biggest difference in row (0, 2, 5), sum 3.689715 vs 0.515908
|
|
|
+
|
|
|
+Layer 6, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 284.314667
|
|
|
+ Converted tensor sum: 446.866150
|
|
|
+ Original tensor mean: 0.088848
|
|
|
+ Converted tensor mean: 0.139646
|
|
|
+ Mean difference: 0.16138166
|
|
|
+ Maximum pointwise difference: 3.92217135
|
|
|
+ Max difference location: (0, 26, 9, 3)
|
|
|
+ Values at max diff - Original: -0.00824802, Converted: 3.91392326
|
|
|
+ Biggest difference in row (0, 6, 0), sum 1.467430 vs 14.063056
|
|
|
+
|
|
|
+Layer 8, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 268.420227
|
|
|
+ Converted tensor sum: 298.094666
|
|
|
+ Original tensor mean: 0.083881
|
|
|
+ Converted tensor mean: 0.093155
|
|
|
+ Mean difference: 0.17274044
|
|
|
+ Maximum pointwise difference: 3.57632637
|
|
|
+ Max difference location: (0, 21, 9, 9)
|
|
|
+ Values at max diff - Original: -0.07158025, Converted: 3.50474620
|
|
|
+ Biggest difference in row (0, 21, 9), sum -0.176140 vs 9.883745
|
|
|
+
|
|
|
+Layer 9, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 153.733398
|
|
|
+ Converted tensor sum: 197.629532
|
|
|
+ Original tensor mean: 0.048042
|
|
|
+ Converted tensor mean: 0.061759
|
|
|
+ Mean difference: 0.11230749
|
|
|
+ Maximum pointwise difference: 2.07441854
|
|
|
+ Max difference location: (0, 2, 6, 8)
|
|
|
+ Values at max diff - Original: -0.01318651, Converted: 2.06123209
|
|
|
+ Biggest difference in row (0, 28, 0), sum 1.515908 vs 6.081204
|
|
|
+
|
|
|
+Layer 10, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 196.466980
|
|
|
+ Converted tensor sum: 228.325546
|
|
|
+ Original tensor mean: 0.061396
|
|
|
+ Converted tensor mean: 0.071352
|
|
|
+ Mean difference: 0.11859564
|
|
|
+ Maximum pointwise difference: 4.73182058
|
|
|
+ Max difference location: (0, 24, 0, 1)
|
|
|
+ Values at max diff - Original: 0.02647224, Converted: 4.75829268
|
|
|
+ Biggest difference in row (0, 24, 0), sum 0.182875 vs 9.743350
|
|
|
+
|
|
|
+Layer 12, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 235.148682
|
|
|
+ Converted tensor sum: 279.831421
|
|
|
+ Original tensor mean: 0.073484
|
|
|
+ Converted tensor mean: 0.087447
|
|
|
+ Mean difference: 0.14844361
|
|
|
+ Maximum pointwise difference: 3.64688230
|
|
|
+ Max difference location: (0, 28, 4, 2)
|
|
|
+ Values at max diff - Original: 0.01143306, Converted: 3.65831542
|
|
|
+ Biggest difference in row (0, 28, 2), sum 9.410328 vs 0.407452
|
|
|
+
|
|
|
+Layer 13, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 165.774078
|
|
|
+ Converted tensor sum: 179.691483
|
|
|
+ Original tensor mean: 0.051804
|
|
|
+ Converted tensor mean: 0.056154
|
|
|
+ Mean difference: 0.09881324
|
|
|
+ Maximum pointwise difference: 3.03563190
|
|
|
+ Max difference location: (0, 11, 0, 4)
|
|
|
+ Values at max diff - Original: -0.10383722, Converted: 2.93179464
|
|
|
+ Biggest difference in row (0, 11, 0), sum 0.092786 vs 5.614193
|
|
|
+
|
|
|
+Layer 14, Token 14 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 519.787109
|
|
|
+ Converted tensor sum: 539.567444
|
|
|
+ Original tensor mean: 0.162433
|
|
|
+ Converted tensor mean: 0.168615
|
|
|
+ Mean difference: 0.25360039
|
|
|
+ Maximum pointwise difference: 4.24835634
|
|
|
+ Max difference location: (0, 15, 8, 2)
|
|
|
+ Values at max diff - Original: -0.01945496, Converted: 4.22890139
|
|
|
+ Biggest difference in row (0, 16, 6), sum 0.069426 vs 10.617959
|
|
|
+
|
|
|
+Layer 0, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 1.143128
|
|
|
+ Converted tensor sum: 1.955431
|
|
|
+ Original tensor mean: 0.000357
|
|
|
+ Converted tensor mean: 0.000611
|
|
|
+ Mean difference: 0.06554744
|
|
|
+ Maximum pointwise difference: 1.62353444
|
|
|
+ Max difference location: (0, 1, 3, 2)
|
|
|
+ Values at max diff - Original: -0.04374466, Converted: 1.57978976
|
|
|
+ Biggest difference in row (0, 28, 5), sum 0.256206 vs 2.700654
|
|
|
+
|
|
|
+Layer 1, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 237.576813
|
|
|
+ Converted tensor sum: 84.227829
|
|
|
+ Original tensor mean: 0.074243
|
|
|
+ Converted tensor mean: 0.026321
|
|
|
+ Mean difference: 0.12017149
|
|
|
+ Maximum pointwise difference: 2.73136139
|
|
|
+ Max difference location: (0, 17, 6, 0)
|
|
|
+ Values at max diff - Original: 2.89759755, Converted: 0.16623622
|
|
|
+ Biggest difference in row (0, 16, 6), sum 6.557743 vs 0.035282
|
|
|
+
|
|
|
+Layer 2, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 311.619568
|
|
|
+ Converted tensor sum: 155.972748
|
|
|
+ Original tensor mean: 0.097381
|
|
|
+ Converted tensor mean: 0.048741
|
|
|
+ Mean difference: 0.17912415
|
|
|
+ Maximum pointwise difference: 3.15524197
|
|
|
+ Max difference location: (0, 12, 0, 0)
|
|
|
+ Values at max diff - Original: 3.30613947, Converted: 0.15089758
|
|
|
+ Biggest difference in row (0, 12, 0), sum 9.937962 vs 2.002455
|
|
|
+
|
|
|
+Layer 4, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 167.504608
|
|
|
+ Converted tensor sum: 97.213791
|
|
|
+ Original tensor mean: 0.052345
|
|
|
+ Converted tensor mean: 0.030379
|
|
|
+ Mean difference: 0.11675335
|
|
|
+ Maximum pointwise difference: 2.34569263
|
|
|
+ Max difference location: (0, 28, 2, 3)
|
|
|
+ Values at max diff - Original: 2.36823630, Converted: 0.02254373
|
|
|
+ Biggest difference in row (0, 24, 1), sum 4.970531 vs 0.552202
|
|
|
+
|
|
|
+Layer 5, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 165.098206
|
|
|
+ Converted tensor sum: 106.835938
|
|
|
+ Original tensor mean: 0.051593
|
|
|
+ Converted tensor mean: 0.033386
|
|
|
+ Mean difference: 0.11981978
|
|
|
+ Maximum pointwise difference: 3.00254560
|
|
|
+ Max difference location: (0, 19, 0, 4)
|
|
|
+ Values at max diff - Original: -0.04975805, Converted: 2.95278764
|
|
|
+ Biggest difference in row (0, 6, 7), sum 6.529483 vs 0.743666
|
|
|
+
|
|
|
+Layer 6, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 328.690277
|
|
|
+ Converted tensor sum: 322.121643
|
|
|
+ Original tensor mean: 0.102716
|
|
|
+ Converted tensor mean: 0.100663
|
|
|
+ Mean difference: 0.17106648
|
|
|
+ Maximum pointwise difference: 3.49930573
|
|
|
+ Max difference location: (0, 10, 4, 0)
|
|
|
+ Values at max diff - Original: -0.02206346, Converted: 3.47724223
|
|
|
+ Biggest difference in row (0, 10, 4), sum 0.599032 vs 10.579692
|
|
|
+
|
|
|
+Layer 8, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 317.911224
|
|
|
+ Converted tensor sum: 119.034622
|
|
|
+ Original tensor mean: 0.099347
|
|
|
+ Converted tensor mean: 0.037198
|
|
|
+ Mean difference: 0.17545381
|
|
|
+ Maximum pointwise difference: 5.58166885
|
|
|
+ Max difference location: (0, 12, 5, 9)
|
|
|
+ Values at max diff - Original: -0.00163084, Converted: 5.58003807
|
|
|
+ Biggest difference in row (0, 12, 5), sum -0.115539 vs 9.864284
|
|
|
+
|
|
|
+Layer 9, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 190.170853
|
|
|
+ Converted tensor sum: 99.272003
|
|
|
+ Original tensor mean: 0.059428
|
|
|
+ Converted tensor mean: 0.031023
|
|
|
+ Mean difference: 0.10875368
|
|
|
+ Maximum pointwise difference: 2.41038036
|
|
|
+ Max difference location: (0, 18, 2, 3)
|
|
|
+ Values at max diff - Original: 2.75146770, Converted: 0.34108725
|
|
|
+ Biggest difference in row (0, 18, 2), sum 8.039729 vs 1.441757
|
|
|
+
|
|
|
+Layer 10, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 224.608826
|
|
|
+ Converted tensor sum: 140.291000
|
|
|
+ Original tensor mean: 0.070190
|
|
|
+ Converted tensor mean: 0.043841
|
|
|
+ Mean difference: 0.13173704
|
|
|
+ Maximum pointwise difference: 3.69921541
|
|
|
+ Max difference location: (0, 0, 7, 3)
|
|
|
+ Values at max diff - Original: -0.00459916, Converted: 3.69461632
|
|
|
+ Biggest difference in row (0, 18, 3), sum 0.045224 vs 5.212623
|
|
|
+
|
|
|
+Layer 12, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 284.485657
|
|
|
+ Converted tensor sum: 158.051971
|
|
|
+ Original tensor mean: 0.088902
|
|
|
+ Converted tensor mean: 0.049391
|
|
|
+ Mean difference: 0.16240636
|
|
|
+ Maximum pointwise difference: 3.41311693
|
|
|
+ Max difference location: (0, 30, 4, 0)
|
|
|
+ Values at max diff - Original: -0.00378206, Converted: 3.40933490
|
|
|
+ Biggest difference in row (0, 28, 3), sum 10.288229 vs 0.646799
|
|
|
+
|
|
|
+Layer 13, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 217.891571
|
|
|
+ Converted tensor sum: 114.440430
|
|
|
+ Original tensor mean: 0.068091
|
|
|
+ Converted tensor mean: 0.035763
|
|
|
+ Mean difference: 0.11250080
|
|
|
+ Maximum pointwise difference: 2.57714581
|
|
|
+ Max difference location: (0, 3, 9, 4)
|
|
|
+ Values at max diff - Original: -0.00322327, Converted: 2.57392263
|
|
|
+ Biggest difference in row (0, 8, 1), sum 5.657505 vs 0.025426
|
|
|
+
|
|
|
+Layer 14, Token 15 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 613.393188
|
|
|
+ Converted tensor sum: 259.209320
|
|
|
+ Original tensor mean: 0.191685
|
|
|
+ Converted tensor mean: 0.081003
|
|
|
+ Mean difference: 0.25669345
|
|
|
+ Maximum pointwise difference: 4.67302513
|
|
|
+ Max difference location: (0, 16, 6, 7)
|
|
|
+ Values at max diff - Original: 0.00394140, Converted: 4.67696667
|
|
|
+ Biggest difference in row (0, 16, 6), sum 0.113854 vs 11.389561
|
|
|
+
|
|
|
+Layer 0, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: -7.241831
|
|
|
+ Converted tensor sum: 6.292229
|
|
|
+ Original tensor mean: -0.002263
|
|
|
+ Converted tensor mean: 0.001966
|
|
|
+ Mean difference: 0.07260455
|
|
|
+ Maximum pointwise difference: 1.56294525
|
|
|
+ Max difference location: (0, 4, 9, 1)
|
|
|
+ Values at max diff - Original: 1.55768764, Converted: -0.00525762
|
|
|
+ Biggest difference in row (0, 4, 9), sum 3.422554 vs -0.082252
|
|
|
+
|
|
|
+Layer 1, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 208.371277
|
|
|
+ Converted tensor sum: 121.900169
|
|
|
+ Original tensor mean: 0.065116
|
|
|
+ Converted tensor mean: 0.038094
|
|
|
+ Mean difference: 0.10988435
|
|
|
+ Maximum pointwise difference: 2.54077005
|
|
|
+ Max difference location: (0, 16, 6, 1)
|
|
|
+ Values at max diff - Original: 2.44506192, Converted: -0.09570823
|
|
|
+ Biggest difference in row (0, 16, 6), sum 5.495286 vs 0.369152
|
|
|
+
|
|
|
+Layer 2, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 271.274109
|
|
|
+ Converted tensor sum: 250.062592
|
|
|
+ Original tensor mean: 0.084773
|
|
|
+ Converted tensor mean: 0.078145
|
|
|
+ Mean difference: 0.18668148
|
|
|
+ Maximum pointwise difference: 3.97749329
|
|
|
+ Max difference location: (0, 4, 8, 2)
|
|
|
+ Values at max diff - Original: 0.00367373, Converted: 3.98116708
|
|
|
+ Biggest difference in row (0, 4, 8), sum 0.084576 vs 8.366636
|
|
|
+
|
|
|
+Layer 4, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 245.506393
|
|
|
+ Converted tensor sum: 128.042282
|
|
|
+ Original tensor mean: 0.076721
|
|
|
+ Converted tensor mean: 0.040013
|
|
|
+ Mean difference: 0.13813969
|
|
|
+ Maximum pointwise difference: 2.50754499
|
|
|
+ Max difference location: (0, 27, 2, 5)
|
|
|
+ Values at max diff - Original: 2.48510361, Converted: -0.02244142
|
|
|
+ Biggest difference in row (0, 30, 3), sum 6.143555 vs -0.003137
|
|
|
+
|
|
|
+Layer 5, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 252.541031
|
|
|
+ Converted tensor sum: 153.491074
|
|
|
+ Original tensor mean: 0.078919
|
|
|
+ Converted tensor mean: 0.047966
|
|
|
+ Mean difference: 0.13783714
|
|
|
+ Maximum pointwise difference: 4.67899084
|
|
|
+ Max difference location: (0, 6, 2, 9)
|
|
|
+ Values at max diff - Original: 4.74959278, Converted: 0.07060210
|
|
|
+ Biggest difference in row (0, 6, 2), sum 15.435174 vs 0.669571
|
|
|
+
|
|
|
+Layer 6, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 417.031616
|
|
|
+ Converted tensor sum: 302.490662
|
|
|
+ Original tensor mean: 0.130322
|
|
|
+ Converted tensor mean: 0.094528
|
|
|
+ Mean difference: 0.18095936
|
|
|
+ Maximum pointwise difference: 3.41091108
|
|
|
+ Max difference location: (0, 1, 9, 8)
|
|
|
+ Values at max diff - Original: 3.94837856, Converted: 0.53746736
|
|
|
+ Biggest difference in row (0, 17, 7), sum 10.598001 vs 1.553886
|
|
|
+
|
|
|
+Layer 8, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 360.497803
|
|
|
+ Converted tensor sum: 167.798264
|
|
|
+ Original tensor mean: 0.112656
|
|
|
+ Converted tensor mean: 0.052437
|
|
|
+ Mean difference: 0.18179806
|
|
|
+ Maximum pointwise difference: 4.85258770
|
|
|
+ Max difference location: (0, 20, 6, 7)
|
|
|
+ Values at max diff - Original: 3.78496194, Converted: -1.06762552
|
|
|
+ Biggest difference in row (0, 20, 6), sum 7.293591 vs -2.448533
|
|
|
+
|
|
|
+Layer 9, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 231.574097
|
|
|
+ Converted tensor sum: 117.788071
|
|
|
+ Original tensor mean: 0.072367
|
|
|
+ Converted tensor mean: 0.036809
|
|
|
+ Mean difference: 0.12296900
|
|
|
+ Maximum pointwise difference: 1.94617844
|
|
|
+ Max difference location: (0, 18, 2, 3)
|
|
|
+ Values at max diff - Original: 2.51620770, Converted: 0.57002932
|
|
|
+ Biggest difference in row (0, 18, 2), sum 7.408888 vs 2.509162
|
|
|
+
|
|
|
+Layer 10, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 251.412247
|
|
|
+ Converted tensor sum: 167.548752
|
|
|
+ Original tensor mean: 0.078566
|
|
|
+ Converted tensor mean: 0.052359
|
|
|
+ Mean difference: 0.13002089
|
|
|
+ Maximum pointwise difference: 2.56599689
|
|
|
+ Max difference location: (0, 24, 1, 0)
|
|
|
+ Values at max diff - Original: 4.14129448, Converted: 1.57529759
|
|
|
+ Biggest difference in row (0, 14, 2), sum 5.702995 vs 0.022515
|
|
|
+
|
|
|
+Layer 12, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 309.263367
|
|
|
+ Converted tensor sum: 172.743027
|
|
|
+ Original tensor mean: 0.096645
|
|
|
+ Converted tensor mean: 0.053982
|
|
|
+ Mean difference: 0.16015999
|
|
|
+ Maximum pointwise difference: 4.55992699
|
|
|
+ Max difference location: (0, 28, 3, 4)
|
|
|
+ Values at max diff - Original: 3.40088701, Converted: -1.15903974
|
|
|
+ Biggest difference in row (0, 28, 3), sum 10.782799 vs -1.738761
|
|
|
+
|
|
|
+Layer 13, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 245.305267
|
|
|
+ Converted tensor sum: 135.343552
|
|
|
+ Original tensor mean: 0.076658
|
|
|
+ Converted tensor mean: 0.042295
|
|
|
+ Mean difference: 0.11650297
|
|
|
+ Maximum pointwise difference: 2.94789600
|
|
|
+ Max difference location: (0, 11, 4, 0)
|
|
|
+ Values at max diff - Original: 3.34942126, Converted: 0.40152529
|
|
|
+ Biggest difference in row (0, 27, 4), sum 6.619488 vs 0.377767
|
|
|
+
|
|
|
+Layer 14, Token 16 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 677.616821
|
|
|
+ Converted tensor sum: 309.657593
|
|
|
+ Original tensor mean: 0.211755
|
|
|
+ Converted tensor mean: 0.096768
|
|
|
+ Mean difference: 0.25261062
|
|
|
+ Maximum pointwise difference: 4.12457132
|
|
|
+ Max difference location: (0, 21, 3, 5)
|
|
|
+ Values at max diff - Original: 4.07018948, Converted: -0.05438172
|
|
|
+ Biggest difference in row (0, 21, 3), sum 12.550769 vs -0.320660
|
|
|
+
|
|
|
+Layer 0, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 8.814422
|
|
|
+ Converted tensor sum: 2.569008
|
|
|
+ Original tensor mean: 0.002755
|
|
|
+ Converted tensor mean: 0.000803
|
|
|
+ Mean difference: 0.07054429
|
|
|
+ Maximum pointwise difference: 2.09221244
|
|
|
+ Max difference location: (0, 1, 2, 3)
|
|
|
+ Values at max diff - Original: 2.03968024, Converted: -0.05253213
|
|
|
+ Biggest difference in row (0, 17, 2), sum 2.854507 vs 0.425217
|
|
|
+
|
|
|
+Layer 1, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 202.785217
|
|
|
+ Converted tensor sum: 127.821655
|
|
|
+ Original tensor mean: 0.063370
|
|
|
+ Converted tensor mean: 0.039944
|
|
|
+ Mean difference: 0.11817915
|
|
|
+ Maximum pointwise difference: 2.18196273
|
|
|
+ Max difference location: (0, 23, 4, 0)
|
|
|
+ Values at max diff - Original: 0.00466894, Converted: 2.18663168
|
|
|
+ Biggest difference in row (0, 23, 4), sum 1.189118 vs 6.664180
|
|
|
+
|
|
|
+Layer 2, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 269.547241
|
|
|
+ Converted tensor sum: 202.949875
|
|
|
+ Original tensor mean: 0.084234
|
|
|
+ Converted tensor mean: 0.063422
|
|
|
+ Mean difference: 0.17686243
|
|
|
+ Maximum pointwise difference: 3.38580871
|
|
|
+ Max difference location: (0, 30, 3, 9)
|
|
|
+ Values at max diff - Original: -0.03989490, Converted: 3.34591389
|
|
|
+ Biggest difference in row (0, 23, 4), sum 0.959554 vs 6.602069
|
|
|
+
|
|
|
+Layer 4, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 285.057709
|
|
|
+ Converted tensor sum: 90.890617
|
|
|
+ Original tensor mean: 0.089081
|
|
|
+ Converted tensor mean: 0.028403
|
|
|
+ Mean difference: 0.14633463
|
|
|
+ Maximum pointwise difference: 3.59569287
|
|
|
+ Max difference location: (0, 19, 2, 9)
|
|
|
+ Values at max diff - Original: 0.11129396, Converted: 3.70698690
|
|
|
+ Biggest difference in row (0, 24, 1), sum 6.665072 vs 0.069785
|
|
|
+
|
|
|
+Layer 5, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 305.935303
|
|
|
+ Converted tensor sum: 101.421249
|
|
|
+ Original tensor mean: 0.095605
|
|
|
+ Converted tensor mean: 0.031694
|
|
|
+ Mean difference: 0.15904053
|
|
|
+ Maximum pointwise difference: 2.52599096
|
|
|
+ Max difference location: (0, 6, 2, 9)
|
|
|
+ Values at max diff - Original: 2.51262259, Converted: -0.01336834
|
|
|
+ Biggest difference in row (0, 6, 2), sum 10.206850 vs 1.778209
|
|
|
+
|
|
|
+Layer 6, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 411.833740
|
|
|
+ Converted tensor sum: 250.492935
|
|
|
+ Original tensor mean: 0.128698
|
|
|
+ Converted tensor mean: 0.078279
|
|
|
+ Mean difference: 0.18581259
|
|
|
+ Maximum pointwise difference: 4.02491474
|
|
|
+ Max difference location: (0, 17, 7, 5)
|
|
|
+ Values at max diff - Original: 4.38884020, Converted: 0.36392546
|
|
|
+ Biggest difference in row (0, 17, 7), sum 11.349621 vs 0.846145
|
|
|
+
|
|
|
+Layer 8, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 373.165680
|
|
|
+ Converted tensor sum: 136.027786
|
|
|
+ Original tensor mean: 0.116614
|
|
|
+ Converted tensor mean: 0.042509
|
|
|
+ Mean difference: 0.18740444
|
|
|
+ Maximum pointwise difference: 4.54259586
|
|
|
+ Max difference location: (0, 20, 0, 7)
|
|
|
+ Values at max diff - Original: 4.54873943, Converted: 0.00614343
|
|
|
+ Biggest difference in row (0, 7, 2), sum 8.000880 vs 0.043800
|
|
|
+
|
|
|
+Layer 9, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 239.737335
|
|
|
+ Converted tensor sum: 91.044197
|
|
|
+ Original tensor mean: 0.074918
|
|
|
+ Converted tensor mean: 0.028451
|
|
|
+ Mean difference: 0.11736859
|
|
|
+ Maximum pointwise difference: 1.98427892
|
|
|
+ Max difference location: (0, 14, 2, 1)
|
|
|
+ Values at max diff - Original: 1.90727878, Converted: -0.07700008
|
|
|
+ Biggest difference in row (0, 28, 7), sum 5.596577 vs -0.058259
|
|
|
+
|
|
|
+Layer 10, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 260.470673
|
|
|
+ Converted tensor sum: 162.895706
|
|
|
+ Original tensor mean: 0.081397
|
|
|
+ Converted tensor mean: 0.050905
|
|
|
+ Mean difference: 0.14167482
|
|
|
+ Maximum pointwise difference: 3.23060656
|
|
|
+ Max difference location: (0, 24, 1, 0)
|
|
|
+ Values at max diff - Original: 4.23022413, Converted: 0.99961770
|
|
|
+ Biggest difference in row (0, 24, 0), sum 0.125982 vs 9.195232
|
|
|
+
|
|
|
+Layer 12, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 321.268158
|
|
|
+ Converted tensor sum: 134.452438
|
|
|
+ Original tensor mean: 0.100396
|
|
|
+ Converted tensor mean: 0.042016
|
|
|
+ Mean difference: 0.17344666
|
|
|
+ Maximum pointwise difference: 4.15682602
|
|
|
+ Max difference location: (0, 14, 8, 1)
|
|
|
+ Values at max diff - Original: 4.38615370, Converted: 0.22932746
|
|
|
+ Biggest difference in row (0, 28, 3), sum 11.304427 vs 0.427086
|
|
|
+
|
|
|
+Layer 13, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 255.942596
|
|
|
+ Converted tensor sum: 107.501419
|
|
|
+ Original tensor mean: 0.079982
|
|
|
+ Converted tensor mean: 0.033594
|
|
|
+ Mean difference: 0.11964211
|
|
|
+ Maximum pointwise difference: 2.72310257
|
|
|
+ Max difference location: (0, 11, 4, 0)
|
|
|
+ Values at max diff - Original: 3.71963763, Converted: 0.99653512
|
|
|
+ Biggest difference in row (0, 27, 4), sum 6.949797 vs 0.279431
|
|
|
+
|
|
|
+Layer 14, Token 17 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 718.971008
|
|
|
+ Converted tensor sum: 252.775909
|
|
|
+ Original tensor mean: 0.224678
|
|
|
+ Converted tensor mean: 0.078992
|
|
|
+ Mean difference: 0.28457019
|
|
|
+ Maximum pointwise difference: 4.54859781
|
|
|
+ Max difference location: (0, 5, 8, 9)
|
|
|
+ Values at max diff - Original: -0.00566958, Converted: 4.54292822
|
|
|
+ Biggest difference in row (0, 6, 1), sum 11.820190 vs 0.083275
|
|
|
+
|
|
|
+Layer 0, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: -2.745796
|
|
|
+ Converted tensor sum: 11.596529
|
|
|
+ Original tensor mean: -0.000858
|
|
|
+ Converted tensor mean: 0.003624
|
|
|
+ Mean difference: 0.06698289
|
|
|
+ Maximum pointwise difference: 1.30398095
|
|
|
+ Max difference location: (0, 1, 2, 3)
|
|
|
+ Values at max diff - Original: 1.23424304, Converted: -0.06973789
|
|
|
+ Biggest difference in row (0, 25, 7), sum -1.491066 vs 0.107394
|
|
|
+
|
|
|
+Layer 1, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 196.680084
|
|
|
+ Converted tensor sum: 112.820984
|
|
|
+ Original tensor mean: 0.061463
|
|
|
+ Converted tensor mean: 0.035257
|
|
|
+ Mean difference: 0.11424790
|
|
|
+ Maximum pointwise difference: 1.90677047
|
|
|
+ Max difference location: (0, 14, 2, 3)
|
|
|
+ Values at max diff - Original: 0.10773923, Converted: 2.01450968
|
|
|
+ Biggest difference in row (0, 24, 3), sum 5.388914 vs 0.084538
|
|
|
+
|
|
|
+Layer 2, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 269.808228
|
|
|
+ Converted tensor sum: 106.268402
|
|
|
+ Original tensor mean: 0.084315
|
|
|
+ Converted tensor mean: 0.033209
|
|
|
+ Mean difference: 0.16576965
|
|
|
+ Maximum pointwise difference: 2.41004586
|
|
|
+ Max difference location: (0, 12, 0, 0)
|
|
|
+ Values at max diff - Original: 2.62151933, Converted: 0.21147355
|
|
|
+ Biggest difference in row (0, 12, 0), sum 7.396654 vs 0.148190
|
|
|
+
|
|
|
+Layer 4, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 299.541138
|
|
|
+ Converted tensor sum: 34.684372
|
|
|
+ Original tensor mean: 0.093607
|
|
|
+ Converted tensor mean: 0.010839
|
|
|
+ Mean difference: 0.15344296
|
|
|
+ Maximum pointwise difference: 4.97097397
|
|
|
+ Max difference location: (0, 27, 8, 5)
|
|
|
+ Values at max diff - Original: 4.93650246, Converted: -0.03447145
|
|
|
+ Biggest difference in row (0, 27, 8), sum 10.168988 vs 0.095367
|
|
|
+
|
|
|
+Layer 5, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 322.520721
|
|
|
+ Converted tensor sum: 32.353989
|
|
|
+ Original tensor mean: 0.100788
|
|
|
+ Converted tensor mean: 0.010111
|
|
|
+ Mean difference: 0.15857503
|
|
|
+ Maximum pointwise difference: 3.27807403
|
|
|
+ Max difference location: (0, 28, 9, 6)
|
|
|
+ Values at max diff - Original: 3.39260817, Converted: 0.11453414
|
|
|
+ Biggest difference in row (0, 31, 7), sum 9.317598 vs 0.060667
|
|
|
+
|
|
|
+Layer 6, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 404.272705
|
|
|
+ Converted tensor sum: 105.430817
|
|
|
+ Original tensor mean: 0.126335
|
|
|
+ Converted tensor mean: 0.032947
|
|
|
+ Mean difference: 0.18362552
|
|
|
+ Maximum pointwise difference: 4.36808205
|
|
|
+ Max difference location: (0, 6, 5, 4)
|
|
|
+ Values at max diff - Original: 4.63004971, Converted: 0.26196742
|
|
|
+ Biggest difference in row (0, 30, 4), sum 12.429064 vs 1.549177
|
|
|
+
|
|
|
+Layer 8, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 379.120117
|
|
|
+ Converted tensor sum: 49.316475
|
|
|
+ Original tensor mean: 0.118475
|
|
|
+ Converted tensor mean: 0.015411
|
|
|
+ Mean difference: 0.18690227
|
|
|
+ Maximum pointwise difference: 4.34863997
|
|
|
+ Max difference location: (0, 20, 0, 7)
|
|
|
+ Values at max diff - Original: 4.50196075, Converted: 0.15332088
|
|
|
+ Biggest difference in row (0, 7, 2), sum 8.701149 vs -1.880803
|
|
|
+
|
|
|
+Layer 9, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 247.687454
|
|
|
+ Converted tensor sum: 31.604210
|
|
|
+ Original tensor mean: 0.077402
|
|
|
+ Converted tensor mean: 0.009876
|
|
|
+ Mean difference: 0.12334745
|
|
|
+ Maximum pointwise difference: 2.89748645
|
|
|
+ Max difference location: (0, 14, 2, 1)
|
|
|
+ Values at max diff - Original: 2.54342103, Converted: -0.35406536
|
|
|
+ Biggest difference in row (0, 9, 8), sum 5.984664 vs -0.341670
|
|
|
+
|
|
|
+Layer 10, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 262.752014
|
|
|
+ Converted tensor sum: 52.628201
|
|
|
+ Original tensor mean: 0.082110
|
|
|
+ Converted tensor mean: 0.016446
|
|
|
+ Mean difference: 0.13161205
|
|
|
+ Maximum pointwise difference: 2.92723370
|
|
|
+ Max difference location: (0, 24, 1, 0)
|
|
|
+ Values at max diff - Original: 4.35996389, Converted: 1.43273032
|
|
|
+ Biggest difference in row (0, 11, 6), sum 6.418620 vs 0.589213
|
|
|
+
|
|
|
+Layer 12, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 326.667419
|
|
|
+ Converted tensor sum: 31.792521
|
|
|
+ Original tensor mean: 0.102084
|
|
|
+ Converted tensor mean: 0.009935
|
|
|
+ Mean difference: 0.17550385
|
|
|
+ Maximum pointwise difference: 4.50774860
|
|
|
+ Max difference location: (0, 14, 8, 1)
|
|
|
+ Values at max diff - Original: 4.50715399, Converted: -0.00059444
|
|
|
+ Biggest difference in row (0, 21, 9), sum 0.167931 vs -15.009873
|
|
|
+
|
|
|
+Layer 13, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 261.870972
|
|
|
+ Converted tensor sum: 53.651596
|
|
|
+ Original tensor mean: 0.081835
|
|
|
+ Converted tensor mean: 0.016766
|
|
|
+ Mean difference: 0.12234001
|
|
|
+ Maximum pointwise difference: 4.01087809
|
|
|
+ Max difference location: (0, 11, 4, 0)
|
|
|
+ Values at max diff - Original: 4.08570528, Converted: 0.07482710
|
|
|
+ Biggest difference in row (0, 20, 0), sum 2.293484 vs -4.637159
|
|
|
+
|
|
|
+Layer 14, Token 18 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 740.518921
|
|
|
+ Converted tensor sum: 84.074921
|
|
|
+ Original tensor mean: 0.231412
|
|
|
+ Converted tensor mean: 0.026273
|
|
|
+ Mean difference: 0.28319737
|
|
|
+ Maximum pointwise difference: 4.63366222
|
|
|
+ Max difference location: (0, 1, 4, 6)
|
|
|
+ Values at max diff - Original: -0.00748948, Converted: 4.62617254
|
|
|
+ Biggest difference in row (0, 6, 1), sum 12.889781 vs -0.226667
|
|
|
+
|
|
|
+Layer 0, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 0.866719
|
|
|
+ Converted tensor sum: 13.915674
|
|
|
+ Original tensor mean: 0.000271
|
|
|
+ Converted tensor mean: 0.004349
|
|
|
+ Mean difference: 0.05563419
|
|
|
+ Maximum pointwise difference: 1.58602941
|
|
|
+ Max difference location: (0, 1, 5, 3)
|
|
|
+ Values at max diff - Original: 1.50699055, Converted: -0.07903884
|
|
|
+ Biggest difference in row (0, 28, 5), sum 0.233465 vs 1.374955
|
|
|
+
|
|
|
+Layer 1, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 143.055450
|
|
|
+ Converted tensor sum: 84.285873
|
|
|
+ Original tensor mean: 0.044705
|
|
|
+ Converted tensor mean: 0.026339
|
|
|
+ Mean difference: 0.11436888
|
|
|
+ Maximum pointwise difference: 2.11188436
|
|
|
+ Max difference location: (0, 15, 8, 5)
|
|
|
+ Values at max diff - Original: -0.06675819, Converted: 2.04512620
|
|
|
+ Biggest difference in row (0, 23, 4), sum 0.445206 vs 4.313503
|
|
|
+
|
|
|
+Layer 2, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 206.674835
|
|
|
+ Converted tensor sum: 69.739983
|
|
|
+ Original tensor mean: 0.064586
|
|
|
+ Converted tensor mean: 0.021794
|
|
|
+ Mean difference: 0.14624587
|
|
|
+ Maximum pointwise difference: 2.46052098
|
|
|
+ Max difference location: (0, 5, 4, 5)
|
|
|
+ Values at max diff - Original: 2.46177387, Converted: 0.00125289
|
|
|
+ Biggest difference in row (0, 23, 9), sum 5.872013 vs -0.147400
|
|
|
+
|
|
|
+Layer 4, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 223.180557
|
|
|
+ Converted tensor sum: 57.034431
|
|
|
+ Original tensor mean: 0.069744
|
|
|
+ Converted tensor mean: 0.017823
|
|
|
+ Mean difference: 0.13700224
|
|
|
+ Maximum pointwise difference: 4.09037542
|
|
|
+ Max difference location: (0, 25, 1, 9)
|
|
|
+ Values at max diff - Original: 3.97389102, Converted: -0.11648450
|
|
|
+ Biggest difference in row (0, 24, 1), sum 6.574383 vs 0.271665
|
|
|
+
|
|
|
+Layer 5, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 315.655853
|
|
|
+ Converted tensor sum: 48.647461
|
|
|
+ Original tensor mean: 0.098642
|
|
|
+ Converted tensor mean: 0.015202
|
|
|
+ Mean difference: 0.15315701
|
|
|
+ Maximum pointwise difference: 6.10414743
|
|
|
+ Max difference location: (0, 28, 9, 6)
|
|
|
+ Values at max diff - Original: 6.29615974, Converted: 0.19201221
|
|
|
+ Biggest difference in row (0, 28, 9), sum 11.702868 vs 0.168917
|
|
|
+
|
|
|
+Layer 6, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 358.473572
|
|
|
+ Converted tensor sum: 101.158226
|
|
|
+ Original tensor mean: 0.112023
|
|
|
+ Converted tensor mean: 0.031612
|
|
|
+ Mean difference: 0.16535039
|
|
|
+ Maximum pointwise difference: 3.82374835
|
|
|
+ Max difference location: (0, 18, 1, 7)
|
|
|
+ Values at max diff - Original: 3.88149524, Converted: 0.05774695
|
|
|
+ Biggest difference in row (0, 20, 9), sum 9.851446 vs -0.224849
|
|
|
+
|
|
|
+Layer 8, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 346.821899
|
|
|
+ Converted tensor sum: 80.751968
|
|
|
+ Original tensor mean: 0.108382
|
|
|
+ Converted tensor mean: 0.025235
|
|
|
+ Mean difference: 0.17743167
|
|
|
+ Maximum pointwise difference: 3.78403044
|
|
|
+ Max difference location: (0, 20, 0, 7)
|
|
|
+ Values at max diff - Original: 3.89911222, Converted: 0.11508182
|
|
|
+ Biggest difference in row (0, 7, 2), sum 9.025558 vs -0.167117
|
|
|
+
|
|
|
+Layer 9, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 249.268311
|
|
|
+ Converted tensor sum: 43.202286
|
|
|
+ Original tensor mean: 0.077896
|
|
|
+ Converted tensor mean: 0.013501
|
|
|
+ Mean difference: 0.12318792
|
|
|
+ Maximum pointwise difference: 2.83834696
|
|
|
+ Max difference location: (0, 9, 8, 6)
|
|
|
+ Values at max diff - Original: 2.71989083, Converted: -0.11845621
|
|
|
+ Biggest difference in row (0, 9, 8), sum 8.573050 vs -0.169431
|
|
|
+
|
|
|
+Layer 10, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 291.462646
|
|
|
+ Converted tensor sum: 66.798782
|
|
|
+ Original tensor mean: 0.091082
|
|
|
+ Converted tensor mean: 0.020875
|
|
|
+ Mean difference: 0.14087133
|
|
|
+ Maximum pointwise difference: 3.38042760
|
|
|
+ Max difference location: (0, 25, 1, 9)
|
|
|
+ Values at max diff - Original: 3.51948309, Converted: 0.13905543
|
|
|
+ Biggest difference in row (0, 25, 1), sum 8.942734 vs 0.065733
|
|
|
+
|
|
|
+Layer 12, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 342.570038
|
|
|
+ Converted tensor sum: 48.484200
|
|
|
+ Original tensor mean: 0.107053
|
|
|
+ Converted tensor mean: 0.015151
|
|
|
+ Mean difference: 0.17410682
|
|
|
+ Maximum pointwise difference: 4.36208725
|
|
|
+ Max difference location: (0, 14, 8, 1)
|
|
|
+ Values at max diff - Original: 4.34435558, Converted: -0.01773176
|
|
|
+ Biggest difference in row (0, 28, 3), sum 11.628893 vs -0.798577
|
|
|
+
|
|
|
+Layer 13, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 270.129211
|
|
|
+ Converted tensor sum: 52.121815
|
|
|
+ Original tensor mean: 0.084415
|
|
|
+ Converted tensor mean: 0.016288
|
|
|
+ Mean difference: 0.12223634
|
|
|
+ Maximum pointwise difference: 3.81266069
|
|
|
+ Max difference location: (0, 11, 4, 0)
|
|
|
+ Values at max diff - Original: 4.31173086, Converted: 0.49907014
|
|
|
+ Biggest difference in row (0, 27, 4), sum 5.975472 vs -0.026263
|
|
|
+
|
|
|
+Layer 14, Token 19 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 772.850342
|
|
|
+ Converted tensor sum: 107.083702
|
|
|
+ Original tensor mean: 0.241516
|
|
|
+ Converted tensor mean: 0.033464
|
|
|
+ Mean difference: 0.28851181
|
|
|
+ Maximum pointwise difference: 4.31482410
|
|
|
+ Max difference location: (0, 28, 4, 1)
|
|
|
+ Values at max diff - Original: 4.32322884, Converted: 0.00840468
|
|
|
+ Biggest difference in row (0, 14, 2), sum 14.072536 vs 0.377507
|
|
|
+
|
|
|
+Layer 0, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 2.008890
|
|
|
+ Converted tensor sum: 12.614055
|
|
|
+ Original tensor mean: 0.000628
|
|
|
+ Converted tensor mean: 0.003942
|
|
|
+ Mean difference: 0.05576663
|
|
|
+ Maximum pointwise difference: 1.29991353
|
|
|
+ Max difference location: (0, 1, 5, 3)
|
|
|
+ Values at max diff - Original: 1.36800277, Converted: 0.06808926
|
|
|
+ Biggest difference in row (0, 23, 6), sum 0.260241 vs -1.370477
|
|
|
+
|
|
|
+Layer 1, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 58.587276
|
|
|
+ Converted tensor sum: 76.507767
|
|
|
+ Original tensor mean: 0.018309
|
|
|
+ Converted tensor mean: 0.023909
|
|
|
+ Mean difference: 0.10026859
|
|
|
+ Maximum pointwise difference: 2.19443369
|
|
|
+ Max difference location: (0, 14, 2, 2)
|
|
|
+ Values at max diff - Original: -0.11835258, Converted: 2.07608104
|
|
|
+ Biggest difference in row (0, 14, 2), sum -0.449485 vs 3.433519
|
|
|
+
|
|
|
+Layer 2, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 165.744568
|
|
|
+ Converted tensor sum: 64.695602
|
|
|
+ Original tensor mean: 0.051795
|
|
|
+ Converted tensor mean: 0.020217
|
|
|
+ Mean difference: 0.14529096
|
|
|
+ Maximum pointwise difference: 2.04155922
|
|
|
+ Max difference location: (0, 5, 4, 5)
|
|
|
+ Values at max diff - Original: 2.00637627, Converted: -0.03518293
|
|
|
+ Biggest difference in row (0, 5, 4), sum 4.793974 vs -0.065828
|
|
|
+
|
|
|
+Layer 4, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 212.915298
|
|
|
+ Converted tensor sum: 76.568939
|
|
|
+ Original tensor mean: 0.066536
|
|
|
+ Converted tensor mean: 0.023928
|
|
|
+ Mean difference: 0.12807344
|
|
|
+ Maximum pointwise difference: 3.65112019
|
|
|
+ Max difference location: (0, 25, 1, 9)
|
|
|
+ Values at max diff - Original: 3.82295465, Converted: 0.17183457
|
|
|
+ Biggest difference in row (0, 20, 4), sum 0.743454 vs 6.637871
|
|
|
+
|
|
|
+Layer 5, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 258.077209
|
|
|
+ Converted tensor sum: 49.652397
|
|
|
+ Original tensor mean: 0.080649
|
|
|
+ Converted tensor mean: 0.015516
|
|
|
+ Mean difference: 0.13090378
|
|
|
+ Maximum pointwise difference: 2.72355151
|
|
|
+ Max difference location: (0, 28, 9, 6)
|
|
|
+ Values at max diff - Original: 2.71506453, Converted: -0.00848696
|
|
|
+ Biggest difference in row (0, 28, 9), sum 6.250334 vs 0.160866
|
|
|
+
|
|
|
+Layer 6, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 336.431519
|
|
|
+ Converted tensor sum: 141.819733
|
|
|
+ Original tensor mean: 0.105135
|
|
|
+ Converted tensor mean: 0.044319
|
|
|
+ Mean difference: 0.16430938
|
|
|
+ Maximum pointwise difference: 3.65949225
|
|
|
+ Max difference location: (0, 6, 5, 4)
|
|
|
+ Values at max diff - Original: 3.87317371, Converted: 0.21368141
|
|
|
+ Biggest difference in row (0, 12, 1), sum 12.053196 vs 2.254734
|
|
|
+
|
|
|
+Layer 8, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 345.424561
|
|
|
+ Converted tensor sum: 112.814018
|
|
|
+ Original tensor mean: 0.107945
|
|
|
+ Converted tensor mean: 0.035254
|
|
|
+ Mean difference: 0.17631440
|
|
|
+ Maximum pointwise difference: 3.36074710
|
|
|
+ Max difference location: (0, 20, 0, 7)
|
|
|
+ Values at max diff - Original: 3.50376892, Converted: 0.14302187
|
|
|
+ Biggest difference in row (0, 21, 0), sum 8.499396 vs 0.480686
|
|
|
+
|
|
|
+Layer 9, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 261.041870
|
|
|
+ Converted tensor sum: 41.182373
|
|
|
+ Original tensor mean: 0.081576
|
|
|
+ Converted tensor mean: 0.012869
|
|
|
+ Mean difference: 0.12376894
|
|
|
+ Maximum pointwise difference: 2.65249300
|
|
|
+ Max difference location: (0, 18, 2, 3)
|
|
|
+ Values at max diff - Original: 2.77233696, Converted: 0.11984408
|
|
|
+ Biggest difference in row (0, 9, 8), sum 7.305106 vs -0.383589
|
|
|
+
|
|
|
+Layer 10, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 276.296692
|
|
|
+ Converted tensor sum: 75.410934
|
|
|
+ Original tensor mean: 0.086343
|
|
|
+ Converted tensor mean: 0.023566
|
|
|
+ Mean difference: 0.12437831
|
|
|
+ Maximum pointwise difference: 2.84117389
|
|
|
+ Max difference location: (0, 25, 1, 9)
|
|
|
+ Values at max diff - Original: 3.23575449, Converted: 0.39458057
|
|
|
+ Biggest difference in row (0, 25, 1), sum 8.240932 vs 1.044036
|
|
|
+
|
|
|
+Layer 12, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 345.097260
|
|
|
+ Converted tensor sum: 53.731094
|
|
|
+ Original tensor mean: 0.107843
|
|
|
+ Converted tensor mean: 0.016791
|
|
|
+ Mean difference: 0.17168441
|
|
|
+ Maximum pointwise difference: 4.60863352
|
|
|
+ Max difference location: (0, 14, 8, 1)
|
|
|
+ Values at max diff - Original: 4.63144016, Converted: 0.02280665
|
|
|
+ Biggest difference in row (0, 28, 3), sum 11.591027 vs 0.333645
|
|
|
+
|
|
|
+Layer 13, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 253.047394
|
|
|
+ Converted tensor sum: 40.628811
|
|
|
+ Original tensor mean: 0.079077
|
|
|
+ Converted tensor mean: 0.012697
|
|
|
+ Mean difference: 0.11498150
|
|
|
+ Maximum pointwise difference: 4.22373772
|
|
|
+ Max difference location: (0, 11, 4, 0)
|
|
|
+ Values at max diff - Original: 4.22381639, Converted: 0.00007845
|
|
|
+ Biggest difference in row (0, 27, 4), sum 6.304989 vs 0.143700
|
|
|
+
|
|
|
+Layer 14, Token 20 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 769.098083
|
|
|
+ Converted tensor sum: 130.283981
|
|
|
+ Original tensor mean: 0.240343
|
|
|
+ Converted tensor mean: 0.040714
|
|
|
+ Mean difference: 0.28381503
|
|
|
+ Maximum pointwise difference: 4.93393469
|
|
|
+ Max difference location: (0, 28, 4, 1)
|
|
|
+ Values at max diff - Original: 4.91371727, Converted: -0.02021729
|
|
|
+ Biggest difference in row (0, 6, 1), sum 14.151162 vs 0.315893
|
|
|
+
|
|
|
+Layer 0, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 1.077594
|
|
|
+ Converted tensor sum: 15.438447
|
|
|
+ Original tensor mean: 0.000337
|
|
|
+ Converted tensor mean: 0.004825
|
|
|
+ Mean difference: 0.05193665
|
|
|
+ Maximum pointwise difference: 0.74260694
|
|
|
+ Max difference location: (0, 28, 8, 5)
|
|
|
+ Values at max diff - Original: 0.72446448, Converted: -0.01814246
|
|
|
+ Biggest difference in row (0, 23, 8), sum -0.100890 vs -1.090759
|
|
|
+
|
|
|
+Layer 1, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 28.594997
|
|
|
+ Converted tensor sum: 89.290833
|
|
|
+ Original tensor mean: 0.008936
|
|
|
+ Converted tensor mean: 0.027903
|
|
|
+ Mean difference: 0.10794319
|
|
|
+ Maximum pointwise difference: 1.59959590
|
|
|
+ Max difference location: (0, 20, 2, 0)
|
|
|
+ Values at max diff - Original: 0.00296844, Converted: 1.60256433
|
|
|
+ Biggest difference in row (0, 20, 2), sum 0.038832 vs 3.326198
|
|
|
+
|
|
|
+Layer 2, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 146.744446
|
|
|
+ Converted tensor sum: 85.128494
|
|
|
+ Original tensor mean: 0.045858
|
|
|
+ Converted tensor mean: 0.026603
|
|
|
+ Mean difference: 0.15625563
|
|
|
+ Maximum pointwise difference: 3.40082598
|
|
|
+ Max difference location: (0, 4, 2, 4)
|
|
|
+ Values at max diff - Original: 3.41796732, Converted: 0.01714140
|
|
|
+ Biggest difference in row (0, 4, 2), sum 7.012363 vs 0.098989
|
|
|
+
|
|
|
+Layer 4, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 120.798615
|
|
|
+ Converted tensor sum: 143.282379
|
|
|
+ Original tensor mean: 0.037750
|
|
|
+ Converted tensor mean: 0.044776
|
|
|
+ Mean difference: 0.13425863
|
|
|
+ Maximum pointwise difference: 2.73616052
|
|
|
+ Max difference location: (0, 24, 9, 1)
|
|
|
+ Values at max diff - Original: 0.23530871, Converted: 2.97146916
|
|
|
+ Biggest difference in row (0, 30, 3), sum 1.329738 vs 6.360154
|
|
|
+
|
|
|
+Layer 5, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 222.583710
|
|
|
+ Converted tensor sum: 86.326241
|
|
|
+ Original tensor mean: 0.069557
|
|
|
+ Converted tensor mean: 0.026977
|
|
|
+ Mean difference: 0.13234577
|
|
|
+ Maximum pointwise difference: 2.64859867
|
|
|
+ Max difference location: (0, 28, 7, 6)
|
|
|
+ Values at max diff - Original: 2.67573905, Converted: 0.02714031
|
|
|
+ Biggest difference in row (0, 26, 8), sum 3.963463 vs -0.648591
|
|
|
+
|
|
|
+Layer 6, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 317.078064
|
|
|
+ Converted tensor sum: 162.595886
|
|
|
+ Original tensor mean: 0.099087
|
|
|
+ Converted tensor mean: 0.050811
|
|
|
+ Mean difference: 0.15550284
|
|
|
+ Maximum pointwise difference: 3.79531074
|
|
|
+ Max difference location: (0, 17, 7, 5)
|
|
|
+ Values at max diff - Original: 3.63465667, Converted: -0.16065404
|
|
|
+ Biggest difference in row (0, 20, 9), sum 8.606161 vs 0.369012
|
|
|
+
|
|
|
+Layer 8, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 345.257385
|
|
|
+ Converted tensor sum: 184.546997
|
|
|
+ Original tensor mean: 0.107893
|
|
|
+ Converted tensor mean: 0.057671
|
|
|
+ Mean difference: 0.18574484
|
|
|
+ Maximum pointwise difference: 3.21210074
|
|
|
+ Max difference location: (0, 7, 2, 9)
|
|
|
+ Values at max diff - Original: 3.22117043, Converted: 0.00906963
|
|
|
+ Biggest difference in row (0, 21, 0), sum 7.649475 vs -0.057539
|
|
|
+
|
|
|
+Layer 9, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 268.515228
|
|
|
+ Converted tensor sum: 95.449539
|
|
|
+ Original tensor mean: 0.083911
|
|
|
+ Converted tensor mean: 0.029828
|
|
|
+ Mean difference: 0.13116649
|
|
|
+ Maximum pointwise difference: 3.19655538
|
|
|
+ Max difference location: (0, 18, 2, 3)
|
|
|
+ Values at max diff - Original: 3.71445417, Converted: 0.51789874
|
|
|
+ Biggest difference in row (0, 9, 5), sum 9.129113 vs 0.079633
|
|
|
+
|
|
|
+Layer 10, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 259.887024
|
|
|
+ Converted tensor sum: 104.823151
|
|
|
+ Original tensor mean: 0.081215
|
|
|
+ Converted tensor mean: 0.032757
|
|
|
+ Mean difference: 0.12396878
|
|
|
+ Maximum pointwise difference: 3.03640962
|
|
|
+ Max difference location: (0, 25, 1, 9)
|
|
|
+ Values at max diff - Original: 3.24910450, Converted: 0.21269491
|
|
|
+ Biggest difference in row (0, 25, 1), sum 8.045052 vs 0.162466
|
|
|
+
|
|
|
+Layer 12, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 340.602814
|
|
|
+ Converted tensor sum: 113.082108
|
|
|
+ Original tensor mean: 0.106438
|
|
|
+ Converted tensor mean: 0.035338
|
|
|
+ Mean difference: 0.17276871
|
|
|
+ Maximum pointwise difference: 4.99602270
|
|
|
+ Max difference location: (0, 14, 8, 1)
|
|
|
+ Values at max diff - Original: 4.72621298, Converted: -0.26980966
|
|
|
+ Biggest difference in row (0, 28, 3), sum 11.259501 vs -0.695297
|
|
|
+
|
|
|
+Layer 13, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 236.875137
|
|
|
+ Converted tensor sum: 95.429146
|
|
|
+ Original tensor mean: 0.074023
|
|
|
+ Converted tensor mean: 0.029822
|
|
|
+ Mean difference: 0.11990514
|
|
|
+ Maximum pointwise difference: 3.69410872
|
|
|
+ Max difference location: (0, 11, 4, 0)
|
|
|
+ Values at max diff - Original: 4.10646772, Converted: 0.41235897
|
|
|
+ Biggest difference in row (0, 23, 6), sum 5.276991 vs -0.187177
|
|
|
+
|
|
|
+Layer 14, Token 21 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 782.813049
|
|
|
+ Converted tensor sum: 216.654602
|
|
|
+ Original tensor mean: 0.244629
|
|
|
+ Converted tensor mean: 0.067705
|
|
|
+ Mean difference: 0.29570371
|
|
|
+ Maximum pointwise difference: 5.94400930
|
|
|
+ Max difference location: (0, 28, 4, 1)
|
|
|
+ Values at max diff - Original: 5.97852135, Converted: 0.03451204
|
|
|
+ Biggest difference in row (0, 6, 1), sum 14.360078 vs 0.533817
|
|
|
+
|
|
|
+Layer 0, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 2.700914
|
|
|
+ Converted tensor sum: 8.066211
|
|
|
+ Original tensor mean: 0.000844
|
|
|
+ Converted tensor mean: 0.002521
|
|
|
+ Mean difference: 0.06021541
|
|
|
+ Maximum pointwise difference: 1.02617574
|
|
|
+ Max difference location: (0, 28, 9, 5)
|
|
|
+ Values at max diff - Original: 1.17021942, Converted: 0.14404365
|
|
|
+ Biggest difference in row (0, 4, 9), sum 1.758845 vs -0.049155
|
|
|
+
|
|
|
+Layer 1, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 9.402251
|
|
|
+ Converted tensor sum: 79.292084
|
|
|
+ Original tensor mean: 0.002938
|
|
|
+ Converted tensor mean: 0.024779
|
|
|
+ Mean difference: 0.09312414
|
|
|
+ Maximum pointwise difference: 1.86848283
|
|
|
+ Max difference location: (0, 14, 2, 2)
|
|
|
+ Values at max diff - Original: -0.01261259, Converted: 1.85587025
|
|
|
+ Biggest difference in row (0, 1, 8), sum -0.144765 vs 2.729439
|
|
|
+
|
|
|
+Layer 2, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 150.273865
|
|
|
+ Converted tensor sum: 102.280075
|
|
|
+ Original tensor mean: 0.046961
|
|
|
+ Converted tensor mean: 0.031963
|
|
|
+ Mean difference: 0.15655471
|
|
|
+ Maximum pointwise difference: 2.95679903
|
|
|
+ Max difference location: (0, 4, 8, 6)
|
|
|
+ Values at max diff - Original: -0.15384272, Converted: 2.80295634
|
|
|
+ Biggest difference in row (0, 10, 6), sum -0.449118 vs 3.435276
|
|
|
+
|
|
|
+Layer 4, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 109.346573
|
|
|
+ Converted tensor sum: 167.629913
|
|
|
+ Original tensor mean: 0.034171
|
|
|
+ Converted tensor mean: 0.052384
|
|
|
+ Mean difference: 0.12662907
|
|
|
+ Maximum pointwise difference: 2.69411635
|
|
|
+ Max difference location: (0, 19, 2, 0)
|
|
|
+ Values at max diff - Original: 0.00617844, Converted: 2.70029473
|
|
|
+ Biggest difference in row (0, 19, 2), sum -0.222631 vs 5.908413
|
|
|
+
|
|
|
+Layer 5, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 191.832321
|
|
|
+ Converted tensor sum: 202.874756
|
|
|
+ Original tensor mean: 0.059948
|
|
|
+ Converted tensor mean: 0.063398
|
|
|
+ Mean difference: 0.15467224
|
|
|
+ Maximum pointwise difference: 6.38972092
|
|
|
+ Max difference location: (0, 28, 6, 9)
|
|
|
+ Values at max diff - Original: 0.04361831, Converted: 6.43333912
|
|
|
+ Biggest difference in row (0, 28, 6), sum 0.738313 vs 17.286346
|
|
|
+
|
|
|
+Layer 6, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 304.042816
|
|
|
+ Converted tensor sum: 238.043579
|
|
|
+ Original tensor mean: 0.095013
|
|
|
+ Converted tensor mean: 0.074389
|
|
|
+ Mean difference: 0.15846148
|
|
|
+ Maximum pointwise difference: 3.40163994
|
|
|
+ Max difference location: (0, 12, 2, 1)
|
|
|
+ Values at max diff - Original: 1.29805720, Converted: 4.69969702
|
|
|
+ Biggest difference in row (0, 17, 7), sum 7.862279 vs 0.254134
|
|
|
+
|
|
|
+Layer 8, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 352.235718
|
|
|
+ Converted tensor sum: 277.930298
|
|
|
+ Original tensor mean: 0.110074
|
|
|
+ Converted tensor mean: 0.086853
|
|
|
+ Mean difference: 0.19249398
|
|
|
+ Maximum pointwise difference: 3.61912727
|
|
|
+ Max difference location: (0, 7, 2, 9)
|
|
|
+ Values at max diff - Original: 3.80060625, Converted: 0.18147889
|
|
|
+ Biggest difference in row (0, 21, 0), sum 9.710941 vs 0.797433
|
|
|
+
|
|
|
+Layer 9, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 273.245667
|
|
|
+ Converted tensor sum: 226.375031
|
|
|
+ Original tensor mean: 0.085389
|
|
|
+ Converted tensor mean: 0.070742
|
|
|
+ Mean difference: 0.14207596
|
|
|
+ Maximum pointwise difference: 2.82711124
|
|
|
+ Max difference location: (0, 14, 1, 2)
|
|
|
+ Values at max diff - Original: 0.05765805, Converted: 2.88476920
|
|
|
+ Biggest difference in row (0, 9, 5), sum 9.348074 vs 1.880102
|
|
|
+
|
|
|
+Layer 10, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 239.880463
|
|
|
+ Converted tensor sum: 275.399414
|
|
|
+ Original tensor mean: 0.074963
|
|
|
+ Converted tensor mean: 0.086062
|
|
|
+ Mean difference: 0.15027112
|
|
|
+ Maximum pointwise difference: 3.59689593
|
|
|
+ Max difference location: (0, 0, 7, 8)
|
|
|
+ Values at max diff - Original: -0.00771881, Converted: 3.58917713
|
|
|
+ Biggest difference in row (0, 24, 0), sum 0.303092 vs 7.643524
|
|
|
+
|
|
|
+Layer 12, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 327.704742
|
|
|
+ Converted tensor sum: 271.485931
|
|
|
+ Original tensor mean: 0.102408
|
|
|
+ Converted tensor mean: 0.084839
|
|
|
+ Mean difference: 0.17104822
|
|
|
+ Maximum pointwise difference: 4.17193794
|
|
|
+ Max difference location: (0, 14, 8, 1)
|
|
|
+ Values at max diff - Original: 4.78667879, Converted: 0.61474097
|
|
|
+ Biggest difference in row (0, 28, 3), sum 10.929213 vs 0.205626
|
|
|
+
|
|
|
+Layer 13, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 231.619003
|
|
|
+ Converted tensor sum: 232.506165
|
|
|
+ Original tensor mean: 0.072381
|
|
|
+ Converted tensor mean: 0.072658
|
|
|
+ Mean difference: 0.13752523
|
|
|
+ Maximum pointwise difference: 4.03583384
|
|
|
+ Max difference location: (0, 11, 4, 0)
|
|
|
+ Values at max diff - Original: 3.99545026, Converted: -0.04038341
|
|
|
+ Biggest difference in row (0, 11, 0), sum -0.083207 vs 6.010875
|
|
|
+
|
|
|
+Layer 14, Token 22 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 772.479431
|
|
|
+ Converted tensor sum: 607.419800
|
|
|
+ Original tensor mean: 0.241400
|
|
|
+ Converted tensor mean: 0.189819
|
|
|
+ Mean difference: 0.31881297
|
|
|
+ Maximum pointwise difference: 5.76619625
|
|
|
+ Max difference location: (0, 28, 4, 1)
|
|
|
+ Values at max diff - Original: 6.25043201, Converted: 0.48423576
|
|
|
+ Biggest difference in row (0, 6, 1), sum 14.585131 vs 0.928486
|
|
|
+
|
|
|
+Layer 0, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 4.463778
|
|
|
+ Converted tensor sum: 4.492921
|
|
|
+ Original tensor mean: 0.001395
|
|
|
+ Converted tensor mean: 0.001404
|
|
|
+ Mean difference: 0.06506675
|
|
|
+ Maximum pointwise difference: 1.83452773
|
|
|
+ Max difference location: (0, 1, 3, 5)
|
|
|
+ Values at max diff - Original: -0.04470510, Converted: 1.78982258
|
|
|
+ Biggest difference in row (0, 8, 9), sum 0.088014 vs -1.806111
|
|
|
+
|
|
|
+Layer 1, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 16.812580
|
|
|
+ Converted tensor sum: 109.310081
|
|
|
+ Original tensor mean: 0.005254
|
|
|
+ Converted tensor mean: 0.034159
|
|
|
+ Mean difference: 0.09598633
|
|
|
+ Maximum pointwise difference: 1.58349574
|
|
|
+ Max difference location: (0, 14, 2, 2)
|
|
|
+ Values at max diff - Original: -0.00151580, Converted: 1.58197999
|
|
|
+ Biggest difference in row (0, 31, 9), sum 0.029068 vs 3.659988
|
|
|
+
|
|
|
+Layer 2, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 75.151047
|
|
|
+ Converted tensor sum: 119.211670
|
|
|
+ Original tensor mean: 0.023485
|
|
|
+ Converted tensor mean: 0.037254
|
|
|
+ Mean difference: 0.13861641
|
|
|
+ Maximum pointwise difference: 2.43731642
|
|
|
+ Max difference location: (0, 1, 3, 2)
|
|
|
+ Values at max diff - Original: 0.08128840, Converted: 2.51860476
|
|
|
+ Biggest difference in row (0, 1, 3), sum 0.598150 vs 6.365501
|
|
|
+
|
|
|
+Layer 4, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 76.628754
|
|
|
+ Converted tensor sum: 155.459259
|
|
|
+ Original tensor mean: 0.023946
|
|
|
+ Converted tensor mean: 0.048581
|
|
|
+ Mean difference: 0.11704257
|
|
|
+ Maximum pointwise difference: 2.73834753
|
|
|
+ Max difference location: (0, 19, 9, 2)
|
|
|
+ Values at max diff - Original: 4.03167677, Converted: 1.29332936
|
|
|
+ Biggest difference in row (0, 8, 6), sum 0.016739 vs 3.865431
|
|
|
+
|
|
|
+Layer 5, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 150.354111
|
|
|
+ Converted tensor sum: 169.511383
|
|
|
+ Original tensor mean: 0.046986
|
|
|
+ Converted tensor mean: 0.052972
|
|
|
+ Mean difference: 0.12414169
|
|
|
+ Maximum pointwise difference: 4.08761406
|
|
|
+ Max difference location: (0, 28, 8, 6)
|
|
|
+ Values at max diff - Original: 3.60962462, Converted: -0.47798958
|
|
|
+ Biggest difference in row (0, 28, 6), sum 0.154782 vs 5.205485
|
|
|
+
|
|
|
+Layer 6, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 225.564255
|
|
|
+ Converted tensor sum: 262.560272
|
|
|
+ Original tensor mean: 0.070489
|
|
|
+ Converted tensor mean: 0.082050
|
|
|
+ Mean difference: 0.14410818
|
|
|
+ Maximum pointwise difference: 5.85085487
|
|
|
+ Max difference location: (0, 12, 6, 1)
|
|
|
+ Values at max diff - Original: 6.56323051, Converted: 0.71237558
|
|
|
+ Biggest difference in row (0, 12, 6), sum 15.410420 vs 3.070242
|
|
|
+
|
|
|
+Layer 8, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 216.401703
|
|
|
+ Converted tensor sum: 306.942932
|
|
|
+ Original tensor mean: 0.067626
|
|
|
+ Converted tensor mean: 0.095920
|
|
|
+ Mean difference: 0.14416558
|
|
|
+ Maximum pointwise difference: 3.46720839
|
|
|
+ Max difference location: (0, 22, 4, 7)
|
|
|
+ Values at max diff - Original: 3.77501345, Converted: 0.30780506
|
|
|
+ Biggest difference in row (0, 22, 4), sum 7.765969 vs 0.982070
|
|
|
+
|
|
|
+Layer 9, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 247.185196
|
|
|
+ Converted tensor sum: 250.177109
|
|
|
+ Original tensor mean: 0.077245
|
|
|
+ Converted tensor mean: 0.078180
|
|
|
+ Mean difference: 0.11487159
|
|
|
+ Maximum pointwise difference: 2.80121279
|
|
|
+ Max difference location: (0, 14, 2, 1)
|
|
|
+ Values at max diff - Original: 3.97450233, Converted: 1.17328954
|
|
|
+ Biggest difference in row (0, 28, 2), sum 8.960711 vs 2.242082
|
|
|
+
|
|
|
+Layer 10, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 193.715546
|
|
|
+ Converted tensor sum: 271.413574
|
|
|
+ Original tensor mean: 0.060536
|
|
|
+ Converted tensor mean: 0.084817
|
|
|
+ Mean difference: 0.13006650
|
|
|
+ Maximum pointwise difference: 3.03568482
|
|
|
+ Max difference location: (0, 0, 3, 7)
|
|
|
+ Values at max diff - Original: 3.30636239, Converted: 0.27067760
|
|
|
+ Biggest difference in row (0, 23, 3), sum 6.103652 vs 1.165035
|
|
|
+
|
|
|
+Layer 12, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 277.550171
|
|
|
+ Converted tensor sum: 296.251099
|
|
|
+ Original tensor mean: 0.086734
|
|
|
+ Converted tensor mean: 0.092578
|
|
|
+ Mean difference: 0.12569407
|
|
|
+ Maximum pointwise difference: 2.70571613
|
|
|
+ Max difference location: (0, 20, 2, 3)
|
|
|
+ Values at max diff - Original: 3.96422935, Converted: 1.25851309
|
|
|
+ Biggest difference in row (0, 30, 9), sum 8.374757 vs 2.683706
|
|
|
+
|
|
|
+Layer 13, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 189.736130
|
|
|
+ Converted tensor sum: 235.426422
|
|
|
+ Original tensor mean: 0.059293
|
|
|
+ Converted tensor mean: 0.073571
|
|
|
+ Mean difference: 0.09623930
|
|
|
+ Maximum pointwise difference: 3.40506268
|
|
|
+ Max difference location: (0, 17, 8, 2)
|
|
|
+ Values at max diff - Original: 4.38167763, Converted: 0.97661489
|
|
|
+ Biggest difference in row (0, 17, 8), sum 7.328513 vs 2.010616
|
|
|
+
|
|
|
+Layer 14, Token 23 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 508.593140
|
|
|
+ Converted tensor sum: 650.881714
|
|
|
+ Original tensor mean: 0.158935
|
|
|
+ Converted tensor mean: 0.203401
|
|
|
+ Mean difference: 0.21357311
|
|
|
+ Maximum pointwise difference: 4.58951044
|
|
|
+ Max difference location: (0, 28, 4, 1)
|
|
|
+ Values at max diff - Original: 0.01861674, Converted: 4.60812712
|
|
|
+ Biggest difference in row (0, 27, 6), sum -2.170214 vs 13.869398
|
|
|
+
|
|
|
+Layer 0, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 0.801011
|
|
|
+ Converted tensor sum: -1.634871
|
|
|
+ Original tensor mean: 0.000250
|
|
|
+ Converted tensor mean: -0.000511
|
|
|
+ Mean difference: 0.07332502
|
|
|
+ Maximum pointwise difference: 1.81247604
|
|
|
+ Max difference location: (0, 1, 3, 5)
|
|
|
+ Values at max diff - Original: -0.04395379, Converted: 1.76852226
|
|
|
+ Biggest difference in row (0, 25, 2), sum 0.205085 vs -3.009443
|
|
|
+
|
|
|
+Layer 1, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 23.350971
|
|
|
+ Converted tensor sum: 88.090744
|
|
|
+ Original tensor mean: 0.007297
|
|
|
+ Converted tensor mean: 0.027528
|
|
|
+ Mean difference: 0.08751559
|
|
|
+ Maximum pointwise difference: 1.07916749
|
|
|
+ Max difference location: (0, 20, 7, 8)
|
|
|
+ Values at max diff - Original: 1.04420257, Converted: -0.03496487
|
|
|
+ Biggest difference in row (0, 31, 5), sum -0.311075 vs 1.779173
|
|
|
+
|
|
|
+Layer 2, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 108.804047
|
|
|
+ Converted tensor sum: 87.620453
|
|
|
+ Original tensor mean: 0.034001
|
|
|
+ Converted tensor mean: 0.027381
|
|
|
+ Mean difference: 0.12934437
|
|
|
+ Maximum pointwise difference: 2.40617442
|
|
|
+ Max difference location: (0, 1, 2, 3)
|
|
|
+ Values at max diff - Original: 0.02315997, Converted: 2.42933440
|
|
|
+ Biggest difference in row (0, 27, 2), sum 3.832137 vs 0.454090
|
|
|
+
|
|
|
+Layer 4, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 89.705452
|
|
|
+ Converted tensor sum: 61.452301
|
|
|
+ Original tensor mean: 0.028033
|
|
|
+ Converted tensor mean: 0.019204
|
|
|
+ Mean difference: 0.11625614
|
|
|
+ Maximum pointwise difference: 3.20758009
|
|
|
+ Max difference location: (0, 19, 2, 9)
|
|
|
+ Values at max diff - Original: -0.01131610, Converted: 3.19626403
|
|
|
+ Biggest difference in row (0, 19, 9), sum 3.560462 vs 0.155535
|
|
|
+
|
|
|
+Layer 5, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 153.870117
|
|
|
+ Converted tensor sum: 79.160019
|
|
|
+ Original tensor mean: 0.048084
|
|
|
+ Converted tensor mean: 0.024738
|
|
|
+ Mean difference: 0.12364670
|
|
|
+ Maximum pointwise difference: 2.68913746
|
|
|
+ Max difference location: (0, 28, 3, 6)
|
|
|
+ Values at max diff - Original: 2.79144502, Converted: 0.10230768
|
|
|
+ Biggest difference in row (0, 28, 6), sum 0.099721 vs 5.709799
|
|
|
+
|
|
|
+Layer 6, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 230.254852
|
|
|
+ Converted tensor sum: 174.787750
|
|
|
+ Original tensor mean: 0.071955
|
|
|
+ Converted tensor mean: 0.054621
|
|
|
+ Mean difference: 0.15056056
|
|
|
+ Maximum pointwise difference: 6.34924650
|
|
|
+ Max difference location: (0, 12, 6, 1)
|
|
|
+ Values at max diff - Original: 6.46217585, Converted: 0.11292921
|
|
|
+ Biggest difference in row (0, 12, 6), sum 15.171618 vs 0.722292
|
|
|
+
|
|
|
+Layer 8, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 235.891174
|
|
|
+ Converted tensor sum: 145.097076
|
|
|
+ Original tensor mean: 0.073716
|
|
|
+ Converted tensor mean: 0.045343
|
|
|
+ Mean difference: 0.16653843
|
|
|
+ Maximum pointwise difference: 3.68727565
|
|
|
+ Max difference location: (0, 21, 9, 7)
|
|
|
+ Values at max diff - Original: -0.03629338, Converted: 3.65098238
|
|
|
+ Biggest difference in row (0, 22, 4), sum 6.845831 vs -0.173057
|
|
|
+
|
|
|
+Layer 9, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 230.641953
|
|
|
+ Converted tensor sum: 158.276245
|
|
|
+ Original tensor mean: 0.072076
|
|
|
+ Converted tensor mean: 0.049461
|
|
|
+ Mean difference: 0.13344021
|
|
|
+ Maximum pointwise difference: 2.99997020
|
|
|
+ Max difference location: (0, 28, 7, 0)
|
|
|
+ Values at max diff - Original: 3.18566871, Converted: 0.18569851
|
|
|
+ Biggest difference in row (0, 28, 7), sum 10.468034 vs 1.164585
|
|
|
+
|
|
|
+Layer 10, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 196.116974
|
|
|
+ Converted tensor sum: 120.883209
|
|
|
+ Original tensor mean: 0.061287
|
|
|
+ Converted tensor mean: 0.037776
|
|
|
+ Mean difference: 0.14279810
|
|
|
+ Maximum pointwise difference: 3.15166354
|
|
|
+ Max difference location: (0, 24, 0, 1)
|
|
|
+ Values at max diff - Original: 0.01040818, Converted: 3.16207170
|
|
|
+ Biggest difference in row (0, 24, 0), sum 0.920592 vs 9.820712
|
|
|
+
|
|
|
+Layer 12, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 263.522400
|
|
|
+ Converted tensor sum: 204.364563
|
|
|
+ Original tensor mean: 0.082351
|
|
|
+ Converted tensor mean: 0.063864
|
|
|
+ Mean difference: 0.15161198
|
|
|
+ Maximum pointwise difference: 3.57106376
|
|
|
+ Max difference location: (0, 30, 4, 9)
|
|
|
+ Values at max diff - Original: 0.28180352, Converted: 3.85286736
|
|
|
+ Biggest difference in row (0, 28, 3), sum 8.790596 vs 0.101635
|
|
|
+
|
|
|
+Layer 13, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 174.369919
|
|
|
+ Converted tensor sum: 135.311646
|
|
|
+ Original tensor mean: 0.054491
|
|
|
+ Converted tensor mean: 0.042285
|
|
|
+ Mean difference: 0.11190581
|
|
|
+ Maximum pointwise difference: 3.24499154
|
|
|
+ Max difference location: (0, 9, 2, 1)
|
|
|
+ Values at max diff - Original: -0.00551485, Converted: 3.23947668
|
|
|
+ Biggest difference in row (0, 9, 2), sum -0.041496 vs 5.267887
|
|
|
+
|
|
|
+Layer 14, Token 24 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 507.494324
|
|
|
+ Converted tensor sum: 360.428650
|
|
|
+ Original tensor mean: 0.158592
|
|
|
+ Converted tensor mean: 0.112634
|
|
|
+ Mean difference: 0.25558040
|
|
|
+ Maximum pointwise difference: 5.38855457
|
|
|
+ Max difference location: (0, 28, 6, 1)
|
|
|
+ Values at max diff - Original: 5.42326450, Converted: 0.03470971
|
|
|
+ Biggest difference in row (0, 28, 6), sum 14.975449 vs 0.420049
|
|
|
+
|
|
|
+Layer 0, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 3.754472
|
|
|
+ Converted tensor sum: -0.036336
|
|
|
+ Original tensor mean: 0.001173
|
|
|
+ Converted tensor mean: -0.000011
|
|
|
+ Mean difference: 0.07934358
|
|
|
+ Maximum pointwise difference: 1.86529565
|
|
|
+ Max difference location: (0, 1, 2, 3)
|
|
|
+ Values at max diff - Original: 1.82291889, Converted: -0.04237675
|
|
|
+ Biggest difference in row (0, 26, 9), sum -0.049344 vs -1.991895
|
|
|
+
|
|
|
+Layer 1, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 69.339890
|
|
|
+ Converted tensor sum: 74.604774
|
|
|
+ Original tensor mean: 0.021669
|
|
|
+ Converted tensor mean: 0.023314
|
|
|
+ Mean difference: 0.08100989
|
|
|
+ Maximum pointwise difference: 1.22147357
|
|
|
+ Max difference location: (0, 23, 0, 4)
|
|
|
+ Values at max diff - Original: 1.23978972, Converted: 0.01831620
|
|
|
+ Biggest difference in row (0, 20, 8), sum 2.595490 vs 0.385527
|
|
|
+
|
|
|
+Layer 2, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 122.554489
|
|
|
+ Converted tensor sum: 59.594086
|
|
|
+ Original tensor mean: 0.038298
|
|
|
+ Converted tensor mean: 0.018623
|
|
|
+ Mean difference: 0.14621988
|
|
|
+ Maximum pointwise difference: 3.03828931
|
|
|
+ Max difference location: (0, 8, 9, 3)
|
|
|
+ Values at max diff - Original: 3.01308012, Converted: -0.02520920
|
|
|
+ Biggest difference in row (0, 6, 7), sum 4.544618 vs 0.330778
|
|
|
+
|
|
|
+Layer 4, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 135.021027
|
|
|
+ Converted tensor sum: 31.374174
|
|
|
+ Original tensor mean: 0.042194
|
|
|
+ Converted tensor mean: 0.009804
|
|
|
+ Mean difference: 0.11780138
|
|
|
+ Maximum pointwise difference: 2.41319752
|
|
|
+ Max difference location: (0, 26, 6, 5)
|
|
|
+ Values at max diff - Original: -0.06945831, Converted: 2.34373927
|
|
|
+ Biggest difference in row (0, 20, 0), sum 5.919655 vs -0.125531
|
|
|
+
|
|
|
+Layer 5, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 151.868256
|
|
|
+ Converted tensor sum: 37.756584
|
|
|
+ Original tensor mean: 0.047459
|
|
|
+ Converted tensor mean: 0.011799
|
|
|
+ Mean difference: 0.11239365
|
|
|
+ Maximum pointwise difference: 2.04250264
|
|
|
+ Max difference location: (0, 28, 8, 6)
|
|
|
+ Values at max diff - Original: 1.64249492, Converted: -0.40000769
|
|
|
+ Biggest difference in row (0, 23, 0), sum 3.497306 vs 0.162423
|
|
|
+
|
|
|
+Layer 6, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 251.935211
|
|
|
+ Converted tensor sum: 40.890175
|
|
|
+ Original tensor mean: 0.078730
|
|
|
+ Converted tensor mean: 0.012778
|
|
|
+ Mean difference: 0.15190262
|
|
|
+ Maximum pointwise difference: 5.74138451
|
|
|
+ Max difference location: (0, 12, 6, 1)
|
|
|
+ Values at max diff - Original: 5.98834372, Converted: 0.24695921
|
|
|
+ Biggest difference in row (0, 12, 6), sum 13.863525 vs 0.418773
|
|
|
+
|
|
|
+Layer 8, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 253.027832
|
|
|
+ Converted tensor sum: 38.795532
|
|
|
+ Original tensor mean: 0.079071
|
|
|
+ Converted tensor mean: 0.012124
|
|
|
+ Mean difference: 0.15110740
|
|
|
+ Maximum pointwise difference: 2.77147269
|
|
|
+ Max difference location: (0, 21, 8, 9)
|
|
|
+ Values at max diff - Original: 2.86136007, Converted: 0.08988741
|
|
|
+ Biggest difference in row (0, 6, 2), sum 5.609079 vs -2.170572
|
|
|
+
|
|
|
+Layer 9, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 207.731750
|
|
|
+ Converted tensor sum: 52.985756
|
|
|
+ Original tensor mean: 0.064916
|
|
|
+ Converted tensor mean: 0.016558
|
|
|
+ Mean difference: 0.11516394
|
|
|
+ Maximum pointwise difference: 2.72221398
|
|
|
+ Max difference location: (0, 28, 7, 0)
|
|
|
+ Values at max diff - Original: 2.76798820, Converted: 0.04577418
|
|
|
+ Biggest difference in row (0, 28, 7), sum 9.256445 vs 0.484987
|
|
|
+
|
|
|
+Layer 10, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 196.952515
|
|
|
+ Converted tensor sum: 54.152390
|
|
|
+ Original tensor mean: 0.061548
|
|
|
+ Converted tensor mean: 0.016923
|
|
|
+ Mean difference: 0.12454510
|
|
|
+ Maximum pointwise difference: 2.34993958
|
|
|
+ Max difference location: (0, 10, 3, 5)
|
|
|
+ Values at max diff - Original: -0.00316075, Converted: 2.34677887
|
|
|
+ Biggest difference in row (0, 11, 6), sum 5.878725 vs 0.250239
|
|
|
+
|
|
|
+Layer 12, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 255.808289
|
|
|
+ Converted tensor sum: 65.224335
|
|
|
+ Original tensor mean: 0.079940
|
|
|
+ Converted tensor mean: 0.020383
|
|
|
+ Mean difference: 0.14238897
|
|
|
+ Maximum pointwise difference: 2.58750200
|
|
|
+ Max difference location: (0, 30, 8, 9)
|
|
|
+ Values at max diff - Original: -0.02865839, Converted: 2.55884361
|
|
|
+ Biggest difference in row (0, 28, 3), sum 8.769258 vs 0.354862
|
|
|
+
|
|
|
+Layer 13, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 166.242828
|
|
|
+ Converted tensor sum: 63.081795
|
|
|
+ Original tensor mean: 0.051951
|
|
|
+ Converted tensor mean: 0.019713
|
|
|
+ Mean difference: 0.10068022
|
|
|
+ Maximum pointwise difference: 2.70444345
|
|
|
+ Max difference location: (0, 26, 4, 0)
|
|
|
+ Values at max diff - Original: 2.70685434, Converted: 0.00241077
|
|
|
+ Biggest difference in row (0, 26, 4), sum 5.351704 vs -0.105821
|
|
|
+
|
|
|
+Layer 14, Token 25 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 542.257324
|
|
|
+ Converted tensor sum: 126.161835
|
|
|
+ Original tensor mean: 0.169455
|
|
|
+ Converted tensor mean: 0.039426
|
|
|
+ Mean difference: 0.22693451
|
|
|
+ Maximum pointwise difference: 4.91657877
|
|
|
+ Max difference location: (0, 28, 6, 1)
|
|
|
+ Values at max diff - Original: 5.17964792, Converted: 0.26306900
|
|
|
+ Biggest difference in row (0, 28, 6), sum 14.244452 vs 1.160758
|
|
|
+
|
|
|
+Layer 0, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 2.494154
|
|
|
+ Converted tensor sum: -0.022610
|
|
|
+ Original tensor mean: 0.000779
|
|
|
+ Converted tensor mean: -0.000007
|
|
|
+ Mean difference: 0.07249723
|
|
|
+ Maximum pointwise difference: 1.12537110
|
|
|
+ Max difference location: (0, 23, 8, 6)
|
|
|
+ Values at max diff - Original: -0.77736998, Converted: 0.34800115
|
|
|
+ Biggest difference in row (0, 25, 2), sum 0.139047 vs -2.260486
|
|
|
+
|
|
|
+Layer 1, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 89.948196
|
|
|
+ Converted tensor sum: 28.472143
|
|
|
+ Original tensor mean: 0.028109
|
|
|
+ Converted tensor mean: 0.008898
|
|
|
+ Mean difference: 0.08773426
|
|
|
+ Maximum pointwise difference: 1.21594334
|
|
|
+ Max difference location: (0, 31, 9, 5)
|
|
|
+ Values at max diff - Original: 1.12476408, Converted: -0.09117921
|
|
|
+ Biggest difference in row (0, 3, 0), sum 2.359989 vs -0.070505
|
|
|
+
|
|
|
+Layer 2, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 129.416809
|
|
|
+ Converted tensor sum: 41.503624
|
|
|
+ Original tensor mean: 0.040443
|
|
|
+ Converted tensor mean: 0.012970
|
|
|
+ Mean difference: 0.15461735
|
|
|
+ Maximum pointwise difference: 2.68493867
|
|
|
+ Max difference location: (0, 8, 8, 3)
|
|
|
+ Values at max diff - Original: 2.36720443, Converted: -0.31773427
|
|
|
+ Biggest difference in row (0, 27, 9), sum 4.510338 vs -0.361951
|
|
|
+
|
|
|
+Layer 4, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 167.357330
|
|
|
+ Converted tensor sum: 22.416847
|
|
|
+ Original tensor mean: 0.052299
|
|
|
+ Converted tensor mean: 0.007005
|
|
|
+ Mean difference: 0.12134697
|
|
|
+ Maximum pointwise difference: 2.10167456
|
|
|
+ Max difference location: (0, 27, 2, 5)
|
|
|
+ Values at max diff - Original: 2.16418123, Converted: 0.06250665
|
|
|
+ Biggest difference in row (0, 20, 0), sum 5.742605 vs 0.074519
|
|
|
+
|
|
|
+Layer 5, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 163.754578
|
|
|
+ Converted tensor sum: 25.965012
|
|
|
+ Original tensor mean: 0.051173
|
|
|
+ Converted tensor mean: 0.008114
|
|
|
+ Mean difference: 0.12282242
|
|
|
+ Maximum pointwise difference: 1.71204209
|
|
|
+ Max difference location: (0, 6, 7, 6)
|
|
|
+ Values at max diff - Original: 1.87962317, Converted: 0.16758111
|
|
|
+ Biggest difference in row (0, 6, 7), sum 6.898893 vs 0.817218
|
|
|
+
|
|
|
+Layer 6, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 280.407990
|
|
|
+ Converted tensor sum: 7.497489
|
|
|
+ Original tensor mean: 0.087628
|
|
|
+ Converted tensor mean: 0.002343
|
|
|
+ Mean difference: 0.16469882
|
|
|
+ Maximum pointwise difference: 5.09109163
|
|
|
+ Max difference location: (0, 12, 6, 1)
|
|
|
+ Values at max diff - Original: 5.84504795, Converted: 0.75395638
|
|
|
+ Biggest difference in row (0, 12, 6), sum 13.522006 vs 2.690509
|
|
|
+
|
|
|
+Layer 8, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 290.931335
|
|
|
+ Converted tensor sum: 24.817287
|
|
|
+ Original tensor mean: 0.090916
|
|
|
+ Converted tensor mean: 0.007755
|
|
|
+ Mean difference: 0.16735801
|
|
|
+ Maximum pointwise difference: 2.96624160
|
|
|
+ Max difference location: (0, 12, 7, 4)
|
|
|
+ Values at max diff - Original: 0.00615764, Converted: 2.97239923
|
|
|
+ Biggest difference in row (0, 6, 2), sum 5.174712 vs -3.075627
|
|
|
+
|
|
|
+Layer 9, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 196.708160
|
|
|
+ Converted tensor sum: 30.441196
|
|
|
+ Original tensor mean: 0.061471
|
|
|
+ Converted tensor mean: 0.009513
|
|
|
+ Mean difference: 0.11019707
|
|
|
+ Maximum pointwise difference: 2.66847897
|
|
|
+ Max difference location: (0, 28, 7, 0)
|
|
|
+ Values at max diff - Original: 2.53971243, Converted: -0.12876646
|
|
|
+ Biggest difference in row (0, 28, 7), sum 8.254028 vs 0.381486
|
|
|
+
|
|
|
+Layer 10, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 199.032516
|
|
|
+ Converted tensor sum: 15.679170
|
|
|
+ Original tensor mean: 0.062198
|
|
|
+ Converted tensor mean: 0.004900
|
|
|
+ Mean difference: 0.11978843
|
|
|
+ Maximum pointwise difference: 2.87448788
|
|
|
+ Max difference location: (0, 24, 1, 0)
|
|
|
+ Values at max diff - Original: 3.14507675, Converted: 0.27058893
|
|
|
+ Biggest difference in row (0, 25, 1), sum 5.510708 vs 0.187406
|
|
|
+
|
|
|
+Layer 12, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 260.372742
|
|
|
+ Converted tensor sum: 27.850517
|
|
|
+ Original tensor mean: 0.081366
|
|
|
+ Converted tensor mean: 0.008703
|
|
|
+ Mean difference: 0.15131992
|
|
|
+ Maximum pointwise difference: 3.11937833
|
|
|
+ Max difference location: (0, 29, 6, 5)
|
|
|
+ Values at max diff - Original: -0.00478183, Converted: 3.11459661
|
|
|
+ Biggest difference in row (0, 28, 3), sum 8.629121 vs -0.241569
|
|
|
+
|
|
|
+Layer 13, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 175.842209
|
|
|
+ Converted tensor sum: 31.150665
|
|
|
+ Original tensor mean: 0.054951
|
|
|
+ Converted tensor mean: 0.009735
|
|
|
+ Mean difference: 0.10132494
|
|
|
+ Maximum pointwise difference: 2.68282986
|
|
|
+ Max difference location: (0, 26, 4, 0)
|
|
|
+ Values at max diff - Original: 2.69746804, Converted: 0.01463811
|
|
|
+ Biggest difference in row (0, 26, 4), sum 5.839348 vs 0.118608
|
|
|
+
|
|
|
+Layer 14, Token 26 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 549.098877
|
|
|
+ Converted tensor sum: 57.239769
|
|
|
+ Original tensor mean: 0.171593
|
|
|
+ Converted tensor mean: 0.017887
|
|
|
+ Mean difference: 0.23359555
|
|
|
+ Maximum pointwise difference: 4.78898478
|
|
|
+ Max difference location: (0, 28, 6, 1)
|
|
|
+ Values at max diff - Original: 4.82380438, Converted: 0.03481963
|
|
|
+ Biggest difference in row (0, 28, 6), sum 13.322067 vs -0.096704
|
|
|
+
|
|
|
+Layer 0, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 1.918821
|
|
|
+ Converted tensor sum: 4.296852
|
|
|
+ Original tensor mean: 0.000600
|
|
|
+ Converted tensor mean: 0.001343
|
|
|
+ Mean difference: 0.06445935
|
|
|
+ Maximum pointwise difference: 1.46873963
|
|
|
+ Max difference location: (0, 1, 3, 2)
|
|
|
+ Values at max diff - Original: -0.01301772, Converted: 1.45572186
|
|
|
+ Biggest difference in row (0, 28, 5), sum 0.223120 vs 1.905128
|
|
|
+
|
|
|
+Layer 1, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 160.952576
|
|
|
+ Converted tensor sum: 15.469984
|
|
|
+ Original tensor mean: 0.050298
|
|
|
+ Converted tensor mean: 0.004834
|
|
|
+ Mean difference: 0.10194612
|
|
|
+ Maximum pointwise difference: 1.58813882
|
|
|
+ Max difference location: (0, 10, 6, 8)
|
|
|
+ Values at max diff - Original: 1.63966167, Converted: 0.05152279
|
|
|
+ Biggest difference in row (0, 16, 1), sum 4.988435 vs 0.628698
|
|
|
+
|
|
|
+Layer 2, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 195.883148
|
|
|
+ Converted tensor sum: 23.802681
|
|
|
+ Original tensor mean: 0.061213
|
|
|
+ Converted tensor mean: 0.007438
|
|
|
+ Mean difference: 0.16412406
|
|
|
+ Maximum pointwise difference: 3.51121449
|
|
|
+ Max difference location: (0, 18, 2, 1)
|
|
|
+ Values at max diff - Original: 0.00709479, Converted: 3.51830935
|
|
|
+ Biggest difference in row (0, 0, 2), sum 7.858056 vs -0.148840
|
|
|
+
|
|
|
+Layer 4, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 233.660095
|
|
|
+ Converted tensor sum: 13.142452
|
|
|
+ Original tensor mean: 0.073019
|
|
|
+ Converted tensor mean: 0.004107
|
|
|
+ Mean difference: 0.12733760
|
|
|
+ Maximum pointwise difference: 2.84240961
|
|
|
+ Max difference location: (0, 27, 8, 5)
|
|
|
+ Values at max diff - Original: 2.76694965, Converted: -0.07546007
|
|
|
+ Biggest difference in row (0, 24, 1), sum 6.535775 vs 0.658166
|
|
|
+
|
|
|
+Layer 5, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 251.330231
|
|
|
+ Converted tensor sum: 21.526363
|
|
|
+ Original tensor mean: 0.078541
|
|
|
+ Converted tensor mean: 0.006727
|
|
|
+ Mean difference: 0.13129665
|
|
|
+ Maximum pointwise difference: 2.36431837
|
|
|
+ Max difference location: (0, 6, 2, 8)
|
|
|
+ Values at max diff - Original: 2.37356281, Converted: 0.00924453
|
|
|
+ Biggest difference in row (0, 6, 2), sum 8.637090 vs 0.102351
|
|
|
+
|
|
|
+Layer 6, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 362.387848
|
|
|
+ Converted tensor sum: -2.171665
|
|
|
+ Original tensor mean: 0.113246
|
|
|
+ Converted tensor mean: -0.000679
|
|
|
+ Mean difference: 0.18160143
|
|
|
+ Maximum pointwise difference: 5.93641853
|
|
|
+ Max difference location: (0, 12, 6, 1)
|
|
|
+ Values at max diff - Original: 5.75199318, Converted: -0.18442529
|
|
|
+ Biggest difference in row (0, 12, 6), sum 13.466440 vs 2.236503
|
|
|
+
|
|
|
+Layer 8, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 350.323914
|
|
|
+ Converted tensor sum: 19.725079
|
|
|
+ Original tensor mean: 0.109476
|
|
|
+ Converted tensor mean: 0.006164
|
|
|
+ Mean difference: 0.17721944
|
|
|
+ Maximum pointwise difference: 3.75930500
|
|
|
+ Max difference location: (0, 20, 0, 7)
|
|
|
+ Values at max diff - Original: 3.75676632, Converted: -0.00253879
|
|
|
+ Biggest difference in row (0, 13, 8), sum 9.584435 vs 0.181711
|
|
|
+
|
|
|
+Layer 9, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 240.779663
|
|
|
+ Converted tensor sum: 24.165503
|
|
|
+ Original tensor mean: 0.075244
|
|
|
+ Converted tensor mean: 0.007552
|
|
|
+ Mean difference: 0.11309086
|
|
|
+ Maximum pointwise difference: 2.43383050
|
|
|
+ Max difference location: (0, 28, 7, 0)
|
|
|
+ Values at max diff - Original: 2.44759488, Converted: 0.01376434
|
|
|
+ Biggest difference in row (0, 28, 7), sum 8.022928 vs 0.225877
|
|
|
+
|
|
|
+Layer 10, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 244.469070
|
|
|
+ Converted tensor sum: 12.286395
|
|
|
+ Original tensor mean: 0.076397
|
|
|
+ Converted tensor mean: 0.003839
|
|
|
+ Mean difference: 0.11746948
|
|
|
+ Maximum pointwise difference: 2.32974362
|
|
|
+ Max difference location: (0, 24, 1, 0)
|
|
|
+ Values at max diff - Original: 3.20926118, Converted: 0.87951756
|
|
|
+ Biggest difference in row (0, 11, 6), sum 6.942329 vs -0.007718
|
|
|
+
|
|
|
+Layer 12, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 306.749817
|
|
|
+ Converted tensor sum: 12.790400
|
|
|
+ Original tensor mean: 0.095859
|
|
|
+ Converted tensor mean: 0.003997
|
|
|
+ Mean difference: 0.15706061
|
|
|
+ Maximum pointwise difference: 3.82620597
|
|
|
+ Max difference location: (0, 14, 8, 1)
|
|
|
+ Values at max diff - Original: 3.90818167, Converted: 0.08197562
|
|
|
+ Biggest difference in row (0, 28, 3), sum 9.139596 vs 0.006271
|
|
|
+
|
|
|
+Layer 13, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 231.223206
|
|
|
+ Converted tensor sum: 21.992476
|
|
|
+ Original tensor mean: 0.072257
|
|
|
+ Converted tensor mean: 0.006873
|
|
|
+ Mean difference: 0.10150776
|
|
|
+ Maximum pointwise difference: 2.88272619
|
|
|
+ Max difference location: (0, 11, 4, 0)
|
|
|
+ Values at max diff - Original: 2.93226290, Converted: 0.04953665
|
|
|
+ Biggest difference in row (0, 26, 4), sum 5.809074 vs 0.116277
|
|
|
+
|
|
|
+Layer 14, Token 27 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 648.596985
|
|
|
+ Converted tensor sum: 37.038162
|
|
|
+ Original tensor mean: 0.202687
|
|
|
+ Converted tensor mean: 0.011574
|
|
|
+ Mean difference: 0.25362208
|
|
|
+ Maximum pointwise difference: 4.70936871
|
|
|
+ Max difference location: (0, 28, 6, 1)
|
|
|
+ Values at max diff - Original: 4.71021414, Converted: 0.00084528
|
|
|
+ Biggest difference in row (0, 27, 4), sum 11.931866 vs 0.099372
|
|
|
+
|
|
|
+Layer 0, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 3.825253
|
|
|
+ Converted tensor sum: 10.656538
|
|
|
+ Original tensor mean: 0.001195
|
|
|
+ Converted tensor mean: 0.003330
|
|
|
+ Mean difference: 0.06744900
|
|
|
+ Maximum pointwise difference: 1.23786223
|
|
|
+ Max difference location: (0, 1, 5, 3)
|
|
|
+ Values at max diff - Original: 1.16935611, Converted: -0.06850608
|
|
|
+ Biggest difference in row (0, 23, 7), sum -1.495719 vs 0.880324
|
|
|
+
|
|
|
+Layer 1, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 64.976830
|
|
|
+ Converted tensor sum: 30.582441
|
|
|
+ Original tensor mean: 0.020305
|
|
|
+ Converted tensor mean: 0.009557
|
|
|
+ Mean difference: 0.08607832
|
|
|
+ Maximum pointwise difference: 1.46864974
|
|
|
+ Max difference location: (0, 16, 2, 9)
|
|
|
+ Values at max diff - Original: 1.56847525, Converted: 0.09982550
|
|
|
+ Biggest difference in row (0, 1, 3), sum 2.236484 vs -0.154611
|
|
|
+
|
|
|
+Layer 2, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 104.630646
|
|
|
+ Converted tensor sum: 53.524834
|
|
|
+ Original tensor mean: 0.032697
|
|
|
+ Converted tensor mean: 0.016727
|
|
|
+ Mean difference: 0.14054969
|
|
|
+ Maximum pointwise difference: 2.87744927
|
|
|
+ Max difference location: (0, 13, 1, 7)
|
|
|
+ Values at max diff - Original: 0.03716344, Converted: 2.91461277
|
|
|
+ Biggest difference in row (0, 23, 4), sum 0.081307 vs 4.303990
|
|
|
+
|
|
|
+Layer 4, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 192.219788
|
|
|
+ Converted tensor sum: 29.228979
|
|
|
+ Original tensor mean: 0.060069
|
|
|
+ Converted tensor mean: 0.009134
|
|
|
+ Mean difference: 0.12325959
|
|
|
+ Maximum pointwise difference: 4.08833027
|
|
|
+ Max difference location: (0, 19, 0, 2)
|
|
|
+ Values at max diff - Original: 4.01820278, Converted: -0.07012761
|
|
|
+ Biggest difference in row (0, 19, 0), sum 6.219261 vs -0.327518
|
|
|
+
|
|
|
+Layer 5, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 243.385864
|
|
|
+ Converted tensor sum: 23.409119
|
|
|
+ Original tensor mean: 0.076058
|
|
|
+ Converted tensor mean: 0.007315
|
|
|
+ Mean difference: 0.14056823
|
|
|
+ Maximum pointwise difference: 5.76254559
|
|
|
+ Max difference location: (0, 28, 9, 6)
|
|
|
+ Values at max diff - Original: 6.02726078, Converted: 0.26471528
|
|
|
+ Biggest difference in row (0, 28, 9), sum 9.890844 vs 0.406699
|
|
|
+
|
|
|
+Layer 6, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 283.616272
|
|
|
+ Converted tensor sum: 40.143700
|
|
|
+ Original tensor mean: 0.088630
|
|
|
+ Converted tensor mean: 0.012545
|
|
|
+ Mean difference: 0.16413040
|
|
|
+ Maximum pointwise difference: 4.72735071
|
|
|
+ Max difference location: (0, 12, 1, 2)
|
|
|
+ Values at max diff - Original: 4.75247860, Converted: 0.02512792
|
|
|
+ Biggest difference in row (0, 12, 1), sum 13.120539 vs 0.133712
|
|
|
+
|
|
|
+Layer 8, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 228.649261
|
|
|
+ Converted tensor sum: 44.837063
|
|
|
+ Original tensor mean: 0.071453
|
|
|
+ Converted tensor mean: 0.014012
|
|
|
+ Mean difference: 0.15679255
|
|
|
+ Maximum pointwise difference: 3.82907844
|
|
|
+ Max difference location: (0, 23, 4, 7)
|
|
|
+ Values at max diff - Original: 3.84108162, Converted: 0.01200324
|
|
|
+ Biggest difference in row (0, 1, 4), sum 8.901268 vs 0.416754
|
|
|
+
|
|
|
+Layer 9, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 212.272324
|
|
|
+ Converted tensor sum: 21.536671
|
|
|
+ Original tensor mean: 0.066335
|
|
|
+ Converted tensor mean: 0.006730
|
|
|
+ Mean difference: 0.11465029
|
|
|
+ Maximum pointwise difference: 2.85586047
|
|
|
+ Max difference location: (0, 15, 2, 3)
|
|
|
+ Values at max diff - Original: 2.84589958, Converted: -0.00996090
|
|
|
+ Biggest difference in row (0, 15, 2), sum 8.293229 vs 0.275981
|
|
|
+
|
|
|
+Layer 10, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 212.098206
|
|
|
+ Converted tensor sum: 19.835695
|
|
|
+ Original tensor mean: 0.066281
|
|
|
+ Converted tensor mean: 0.006199
|
|
|
+ Mean difference: 0.14262109
|
|
|
+ Maximum pointwise difference: 4.31178093
|
|
|
+ Max difference location: (0, 24, 1, 0)
|
|
|
+ Values at max diff - Original: 4.53196430, Converted: 0.22018313
|
|
|
+ Biggest difference in row (0, 10, 4), sum 9.766387 vs -0.072625
|
|
|
+
|
|
|
+Layer 12, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 280.744019
|
|
|
+ Converted tensor sum: 26.187149
|
|
|
+ Original tensor mean: 0.087733
|
|
|
+ Converted tensor mean: 0.008183
|
|
|
+ Mean difference: 0.15264840
|
|
|
+ Maximum pointwise difference: 4.41812420
|
|
|
+ Max difference location: (0, 21, 2, 4)
|
|
|
+ Values at max diff - Original: 4.41481018, Converted: -0.00331383
|
|
|
+ Biggest difference in row (0, 23, 2), sum 10.581321 vs 0.608111
|
|
|
+
|
|
|
+Layer 13, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 220.357834
|
|
|
+ Converted tensor sum: 20.228846
|
|
|
+ Original tensor mean: 0.068862
|
|
|
+ Converted tensor mean: 0.006322
|
|
|
+ Mean difference: 0.11583474
|
|
|
+ Maximum pointwise difference: 4.72553635
|
|
|
+ Max difference location: (0, 17, 8, 2)
|
|
|
+ Values at max diff - Original: 4.72810841, Converted: 0.00257226
|
|
|
+ Biggest difference in row (0, 19, 1), sum 9.879478 vs 0.388081
|
|
|
+
|
|
|
+Layer 14, Token 28 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 515.894897
|
|
|
+ Converted tensor sum: 74.440948
|
|
|
+ Original tensor mean: 0.161217
|
|
|
+ Converted tensor mean: 0.023263
|
|
|
+ Mean difference: 0.23548929
|
|
|
+ Maximum pointwise difference: 4.93366051
|
|
|
+ Max difference location: (0, 16, 7, 6)
|
|
|
+ Values at max diff - Original: 4.92017603, Converted: -0.01348470
|
|
|
+ Biggest difference in row (0, 28, 6), sum 14.032580 vs -0.061767
|
|
|
+
|
|
|
+Layer 0, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 7.490709
|
|
|
+ Converted tensor sum: 13.732031
|
|
|
+ Original tensor mean: 0.002341
|
|
|
+ Converted tensor mean: 0.004291
|
|
|
+ Mean difference: 0.06082471
|
|
|
+ Maximum pointwise difference: 1.43740010
|
|
|
+ Max difference location: (0, 1, 3, 3)
|
|
|
+ Values at max diff - Original: 1.39118814, Converted: -0.04621201
|
|
|
+ Biggest difference in row (0, 23, 1), sum -0.646684 vs 1.039518
|
|
|
+
|
|
|
+Layer 1, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 34.789967
|
|
|
+ Converted tensor sum: 32.546562
|
|
|
+ Original tensor mean: 0.010872
|
|
|
+ Converted tensor mean: 0.010171
|
|
|
+ Mean difference: 0.08757141
|
|
|
+ Maximum pointwise difference: 1.04371011
|
|
|
+ Max difference location: (0, 6, 1, 2)
|
|
|
+ Values at max diff - Original: 0.99249512, Converted: -0.05121503
|
|
|
+ Biggest difference in row (0, 3, 8), sum -0.976319 vs 2.533029
|
|
|
+
|
|
|
+Layer 2, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 81.188293
|
|
|
+ Converted tensor sum: 110.873352
|
|
|
+ Original tensor mean: 0.025371
|
|
|
+ Converted tensor mean: 0.034648
|
|
|
+ Mean difference: 0.13966069
|
|
|
+ Maximum pointwise difference: 2.45380425
|
|
|
+ Max difference location: (0, 13, 7, 1)
|
|
|
+ Values at max diff - Original: 0.05114410, Converted: 2.50494838
|
|
|
+ Biggest difference in row (0, 12, 1), sum 5.281791 vs 0.538119
|
|
|
+
|
|
|
+Layer 4, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 188.945206
|
|
|
+ Converted tensor sum: 82.802734
|
|
|
+ Original tensor mean: 0.059045
|
|
|
+ Converted tensor mean: 0.025876
|
|
|
+ Mean difference: 0.13653603
|
|
|
+ Maximum pointwise difference: 2.89840102
|
|
|
+ Max difference location: (0, 19, 0, 2)
|
|
|
+ Values at max diff - Original: 2.89151430, Converted: -0.00688672
|
|
|
+ Biggest difference in row (0, 19, 0), sum 4.444302 vs -0.202434
|
|
|
+
|
|
|
+Layer 5, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 234.074219
|
|
|
+ Converted tensor sum: 65.914871
|
|
|
+ Original tensor mean: 0.073148
|
|
|
+ Converted tensor mean: 0.020598
|
|
|
+ Mean difference: 0.14784601
|
|
|
+ Maximum pointwise difference: 3.25614643
|
|
|
+ Max difference location: (0, 28, 9, 6)
|
|
|
+ Values at max diff - Original: 3.74669981, Converted: 0.49055350
|
|
|
+ Biggest difference in row (0, 28, 9), sum 6.683680 vs 1.130066
|
|
|
+
|
|
|
+Layer 6, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 312.478729
|
|
|
+ Converted tensor sum: 136.998260
|
|
|
+ Original tensor mean: 0.097650
|
|
|
+ Converted tensor mean: 0.042812
|
|
|
+ Mean difference: 0.19563875
|
|
|
+ Maximum pointwise difference: 4.93519068
|
|
|
+ Max difference location: (0, 12, 6, 2)
|
|
|
+ Values at max diff - Original: 4.85506201, Converted: -0.08012870
|
|
|
+ Biggest difference in row (0, 12, 6), sum 14.484787 vs 2.203152
|
|
|
+
|
|
|
+Layer 8, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 249.388092
|
|
|
+ Converted tensor sum: 124.562820
|
|
|
+ Original tensor mean: 0.077934
|
|
|
+ Converted tensor mean: 0.038926
|
|
|
+ Mean difference: 0.18382950
|
|
|
+ Maximum pointwise difference: 3.92004848
|
|
|
+ Max difference location: (0, 20, 7, 0)
|
|
|
+ Values at max diff - Original: 0.21650003, Converted: 4.13654852
|
|
|
+ Biggest difference in row (0, 23, 4), sum 6.951686 vs -0.318011
|
|
|
+
|
|
|
+Layer 9, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 200.171021
|
|
|
+ Converted tensor sum: 82.927864
|
|
|
+ Original tensor mean: 0.062553
|
|
|
+ Converted tensor mean: 0.025915
|
|
|
+ Mean difference: 0.12187681
|
|
|
+ Maximum pointwise difference: 2.69074798
|
|
|
+ Max difference location: (0, 15, 2, 3)
|
|
|
+ Values at max diff - Original: 2.69794440, Converted: 0.00719635
|
|
|
+ Biggest difference in row (0, 15, 2), sum 7.941767 vs 0.050363
|
|
|
+
|
|
|
+Layer 10, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 213.368591
|
|
|
+ Converted tensor sum: 77.427185
|
|
|
+ Original tensor mean: 0.066678
|
|
|
+ Converted tensor mean: 0.024196
|
|
|
+ Mean difference: 0.13651104
|
|
|
+ Maximum pointwise difference: 3.13308334
|
|
|
+ Max difference location: (0, 24, 1, 0)
|
|
|
+ Values at max diff - Original: 3.58547378, Converted: 0.45239034
|
|
|
+ Biggest difference in row (0, 10, 4), sum 6.818930 vs -0.155169
|
|
|
+
|
|
|
+Layer 12, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 263.786377
|
|
|
+ Converted tensor sum: 92.682205
|
|
|
+ Original tensor mean: 0.082433
|
|
|
+ Converted tensor mean: 0.028963
|
|
|
+ Mean difference: 0.15690672
|
|
|
+ Maximum pointwise difference: 3.50486374
|
|
|
+ Max difference location: (0, 23, 2, 9)
|
|
|
+ Values at max diff - Original: 3.44645429, Converted: -0.05840937
|
|
|
+ Biggest difference in row (0, 23, 2), sum 9.830493 vs -0.231371
|
|
|
+
|
|
|
+Layer 13, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 193.539474
|
|
|
+ Converted tensor sum: 79.679726
|
|
|
+ Original tensor mean: 0.060481
|
|
|
+ Converted tensor mean: 0.024900
|
|
|
+ Mean difference: 0.11795644
|
|
|
+ Maximum pointwise difference: 3.62266445
|
|
|
+ Max difference location: (0, 11, 4, 0)
|
|
|
+ Values at max diff - Original: 3.49508691, Converted: -0.12757748
|
|
|
+ Biggest difference in row (0, 18, 1), sum 5.632851 vs -0.122056
|
|
|
+
|
|
|
+Layer 14, Token 29 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 525.021179
|
|
|
+ Converted tensor sum: 197.845932
|
|
|
+ Original tensor mean: 0.164069
|
|
|
+ Converted tensor mean: 0.061827
|
|
|
+ Mean difference: 0.25022614
|
|
|
+ Maximum pointwise difference: 4.42602730
|
|
|
+ Max difference location: (0, 15, 2, 8)
|
|
|
+ Values at max diff - Original: 4.48904753, Converted: 0.06302036
|
|
|
+ Biggest difference in row (0, 28, 6), sum 13.769245 vs 2.199155
|
|
|
+
|
|
|
+Layer 0, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 4.659326
|
|
|
+ Converted tensor sum: 10.953376
|
|
|
+ Original tensor mean: 0.001456
|
|
|
+ Converted tensor mean: 0.003423
|
|
|
+ Mean difference: 0.06142937
|
|
|
+ Maximum pointwise difference: 1.06926394
|
|
|
+ Max difference location: (0, 28, 5, 9)
|
|
|
+ Values at max diff - Original: -0.05087389, Converted: 1.01839006
|
|
|
+ Biggest difference in row (0, 4, 9), sum 2.534327 vs -0.105926
|
|
|
+
|
|
|
+Layer 1, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 24.136578
|
|
|
+ Converted tensor sum: 96.968475
|
|
|
+ Original tensor mean: 0.007543
|
|
|
+ Converted tensor mean: 0.030303
|
|
|
+ Mean difference: 0.08820312
|
|
|
+ Maximum pointwise difference: 1.49761820
|
|
|
+ Max difference location: (0, 6, 4, 4)
|
|
|
+ Values at max diff - Original: 0.06953955, Converted: 1.56715775
|
|
|
+ Biggest difference in row (0, 14, 2), sum 0.115400 vs 3.481205
|
|
|
+
|
|
|
+Layer 2, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 64.494400
|
|
|
+ Converted tensor sum: 246.552582
|
|
|
+ Original tensor mean: 0.020155
|
|
|
+ Converted tensor mean: 0.077048
|
|
|
+ Mean difference: 0.16151237
|
|
|
+ Maximum pointwise difference: 3.98919630
|
|
|
+ Max difference location: (0, 4, 8, 4)
|
|
|
+ Values at max diff - Original: -0.10013573, Converted: 3.88906050
|
|
|
+ Biggest difference in row (0, 23, 4), sum -0.108707 vs 7.892229
|
|
|
+
|
|
|
+Layer 4, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 190.921097
|
|
|
+ Converted tensor sum: 126.537048
|
|
|
+ Original tensor mean: 0.059663
|
|
|
+ Converted tensor mean: 0.039543
|
|
|
+ Mean difference: 0.13220279
|
|
|
+ Maximum pointwise difference: 2.87259126
|
|
|
+ Max difference location: (0, 8, 6, 5)
|
|
|
+ Values at max diff - Original: 0.00449362, Converted: 2.87708497
|
|
|
+ Biggest difference in row (0, 17, 9), sum 0.710816 vs 6.274773
|
|
|
+
|
|
|
+Layer 5, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 222.353195
|
|
|
+ Converted tensor sum: 164.720016
|
|
|
+ Original tensor mean: 0.069485
|
|
|
+ Converted tensor mean: 0.051475
|
|
|
+ Mean difference: 0.15598193
|
|
|
+ Maximum pointwise difference: 2.88562417
|
|
|
+ Max difference location: (0, 28, 9, 6)
|
|
|
+ Values at max diff - Original: 3.18444014, Converted: 0.29881600
|
|
|
+ Biggest difference in row (0, 30, 2), sum 0.004416 vs 6.153850
|
|
|
+
|
|
|
+Layer 6, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 339.244141
|
|
|
+ Converted tensor sum: 317.588440
|
|
|
+ Original tensor mean: 0.106014
|
|
|
+ Converted tensor mean: 0.099246
|
|
|
+ Mean difference: 0.21152201
|
|
|
+ Maximum pointwise difference: 4.30255318
|
|
|
+ Max difference location: (0, 6, 4, 8)
|
|
|
+ Values at max diff - Original: -0.19493943, Converted: 4.10761356
|
|
|
+ Biggest difference in row (0, 12, 6), sum 13.503227 vs 2.285058
|
|
|
+
|
|
|
+Layer 8, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 261.308044
|
|
|
+ Converted tensor sum: 204.488892
|
|
|
+ Original tensor mean: 0.081659
|
|
|
+ Converted tensor mean: 0.063903
|
|
|
+ Mean difference: 0.18225618
|
|
|
+ Maximum pointwise difference: 3.88148618
|
|
|
+ Max difference location: (0, 21, 7, 9)
|
|
|
+ Values at max diff - Original: 3.48627377, Converted: -0.39521238
|
|
|
+ Biggest difference in row (0, 2, 4), sum -0.009086 vs 6.555274
|
|
|
+
|
|
|
+Layer 9, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 187.010895
|
|
|
+ Converted tensor sum: 173.659409
|
|
|
+ Original tensor mean: 0.058441
|
|
|
+ Converted tensor mean: 0.054269
|
|
|
+ Mean difference: 0.12517925
|
|
|
+ Maximum pointwise difference: 2.68900180
|
|
|
+ Max difference location: (0, 15, 2, 3)
|
|
|
+ Values at max diff - Original: 2.59999108, Converted: -0.08901066
|
|
|
+ Biggest difference in row (0, 15, 2), sum 7.543541 vs 0.209705
|
|
|
+
|
|
|
+Layer 10, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 206.371735
|
|
|
+ Converted tensor sum: 145.950043
|
|
|
+ Original tensor mean: 0.064491
|
|
|
+ Converted tensor mean: 0.045609
|
|
|
+ Mean difference: 0.12893555
|
|
|
+ Maximum pointwise difference: 2.97875929
|
|
|
+ Max difference location: (0, 24, 1, 0)
|
|
|
+ Values at max diff - Original: 3.54119730, Converted: 0.56243801
|
|
|
+ Biggest difference in row (0, 11, 6), sum 5.982455 vs 0.632388
|
|
|
+
|
|
|
+Layer 12, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 251.250732
|
|
|
+ Converted tensor sum: 193.503662
|
|
|
+ Original tensor mean: 0.078516
|
|
|
+ Converted tensor mean: 0.060470
|
|
|
+ Mean difference: 0.14629500
|
|
|
+ Maximum pointwise difference: 3.24942660
|
|
|
+ Max difference location: (0, 28, 2, 4)
|
|
|
+ Values at max diff - Original: 3.09908056, Converted: -0.15034601
|
|
|
+ Biggest difference in row (0, 28, 3), sum 9.363594 vs -0.017764
|
|
|
+
|
|
|
+Layer 13, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 176.694855
|
|
|
+ Converted tensor sum: 165.849930
|
|
|
+ Original tensor mean: 0.055217
|
|
|
+ Converted tensor mean: 0.051828
|
|
|
+ Mean difference: 0.11395165
|
|
|
+ Maximum pointwise difference: 3.52955794
|
|
|
+ Max difference location: (0, 11, 4, 0)
|
|
|
+ Values at max diff - Original: 3.33610535, Converted: -0.19345257
|
|
|
+ Biggest difference in row (0, 8, 7), sum -0.009830 vs 4.540796
|
|
|
+
|
|
|
+Layer 14, Token 30 (recurrent cache comparison):
|
|
|
+ Original tensor sum: 562.166748
|
|
|
+ Converted tensor sum: 408.797607
|
|
|
+ Original tensor mean: 0.175677
|
|
|
+ Converted tensor mean: 0.127749
|
|
|
+ Mean difference: 0.25758758
|
|
|
+ Maximum pointwise difference: 4.45499659
|
|
|
+ Max difference location: (0, 15, 2, 8)
|
|
|
+ Values at max diff - Original: 4.37386942, Converted: -0.08112720
|
|
|
+ Biggest difference in row (0, 28, 6), sum 13.013643 vs -0.161676
|
|
|
+
|
|
|
+================================================================================
|
|
|
+Comparing q padded tensors...
|
|
|
+================================================================================
|
|
|
+
|
|
|
+Layer 0, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: 7.958682
|
|
|
+ Converted tensor sum: 7.958661
|
|
|
+ Original tensor mean: 0.000389
|
|
|
+ Converted tensor mean: 0.000389
|
|
|
+ Mean difference: 0.00000000
|
|
|
+ Maximum pointwise difference: 0.00000076
|
|
|
+ Max difference location: (0, 0, 0, 6)
|
|
|
+ Values at max diff - Original: -0.22316068, Converted: -0.22316144
|
|
|
+ Biggest difference in row (0, 0, 0), sum -0.570113 vs -0.570115
|
|
|
+Original tensor:
|
|
|
+
|
|
|
+[[[[ 1.97370015e-02 -7.89398551e-02 2.40650475e-02 ... -3.46655026e-02
|
|
|
+ -1.84459373e-01 1.35031175e-02]
|
|
|
+ [-3.90069596e-02 -6.45441562e-02 -9.85123310e-03 ... -7.10528418e-02
|
|
|
+ 2.86484748e-01 -4.78143468e-02]
|
|
|
+ [-3.32845971e-02 8.48600932e-04 -1.83281749e-02 ... -3.60261202e-02
|
|
|
+ 1.16759300e-01 -3.79200131e-02]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]
|
|
|
+
|
|
|
+ [[ 1.97370015e-02 -7.89398551e-02 2.40650475e-02 ... -3.46655026e-02
|
|
|
+ -1.84459373e-01 1.35031175e-02]
|
|
|
+ [-3.90069596e-02 -6.45441562e-02 -9.85123310e-03 ... -7.10528418e-02
|
|
|
+ 2.86484748e-01 -4.78143468e-02]
|
|
|
+ [-3.32845971e-02 8.48600932e-04 -1.83281749e-02 ... -3.60261202e-02
|
|
|
+ 1.16759300e-01 -3.79200131e-02]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]
|
|
|
+
|
|
|
+ [[-6.58135489e-02 2.45508045e-01 -1.13810226e-02 ... 2.54544546e-03
|
|
|
+ 2.51089204e-02 2.86987983e-04]
|
|
|
+ [-1.25565156e-01 -7.94792548e-02 -9.97955501e-02 ... 7.12259486e-02
|
|
|
+ 9.36590508e-02 -1.65728614e-01]
|
|
|
+ [-1.35633466e-03 -9.60636213e-02 -8.94494876e-02 ... 1.94221988e-01
|
|
|
+ -4.70091067e-02 -9.31773186e-02]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]
|
|
|
+
|
|
|
+ ...
|
|
|
+
|
|
|
+ [[-1.80936769e-01 2.09823474e-02 -1.53481111e-01 ... -6.53458312e-02
|
|
|
+ 9.94268879e-02 8.78875237e-03]
|
|
|
+ [-1.07081555e-01 1.26294538e-01 -9.78934765e-02 ... -5.38439713e-02
|
|
|
+ -5.59990015e-03 1.52285740e-01]
|
|
|
+ [ 2.60844707e-01 8.11591521e-02 1.12913184e-01 ... -1.86833683e-02
|
|
|
+ -1.93844642e-02 -7.96004198e-03]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]
|
|
|
+
|
|
|
+ [[ 5.65589257e-02 -7.43661374e-02 -2.00723484e-01 ... 1.52545767e-02
|
|
|
+ 1.50462063e-02 2.35310309e-02]
|
|
|
+ [ 7.43804872e-02 -1.34884328e-01 2.01406017e-01 ... -9.13856328e-02
|
|
|
+ -5.48248030e-02 8.11865740e-03]
|
|
|
+ [ 1.52915101e-02 -1.20854350e-02 2.73873240e-01 ... -3.24299149e-02
|
|
|
+ -6.92289770e-02 -1.53110905e-05]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]
|
|
|
+
|
|
|
+ [[ 5.65589257e-02 -7.43661374e-02 -2.00723484e-01 ... 1.52545767e-02
|
|
|
+ 1.50462063e-02 2.35310309e-02]
|
|
|
+ [ 7.43804872e-02 -1.34884328e-01 2.01406017e-01 ... -9.13856328e-02
|
|
|
+ -5.48248030e-02 8.11865740e-03]
|
|
|
+ [ 1.52915101e-02 -1.20854350e-02 2.73873240e-01 ... -3.24299149e-02
|
|
|
+ -6.92289770e-02 -1.53110905e-05]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]]]
|
|
|
+
|
|
|
+Converted tensor:
|
|
|
+
|
|
|
+[[[[ 1.97370723e-02 -7.89401382e-02 2.40651332e-02 ... -3.46656255e-02
|
|
|
+ -1.84460029e-01 1.35031650e-02]
|
|
|
+ [-3.90069783e-02 -6.45441785e-02 -9.85123683e-03 ... -7.10528716e-02
|
|
|
+ 2.86484867e-01 -4.78143729e-02]
|
|
|
+ [-3.32845971e-02 8.48600990e-04 -1.83281731e-02 ... -3.60261202e-02
|
|
|
+ 1.16759300e-01 -3.79200131e-02]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]
|
|
|
+
|
|
|
+ [[ 1.97370723e-02 -7.89401382e-02 2.40651332e-02 ... -3.46656255e-02
|
|
|
+ -1.84460029e-01 1.35031650e-02]
|
|
|
+ [-3.90069783e-02 -6.45441785e-02 -9.85123683e-03 ... -7.10528716e-02
|
|
|
+ 2.86484867e-01 -4.78143729e-02]
|
|
|
+ [-3.32845971e-02 8.48600990e-04 -1.83281731e-02 ... -3.60261202e-02
|
|
|
+ 1.16759300e-01 -3.79200131e-02]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]
|
|
|
+
|
|
|
+ [[-6.58135936e-02 2.45508194e-01 -1.13810301e-02 ... 2.54544709e-03
|
|
|
+ 2.51089353e-02 2.86988186e-04]
|
|
|
+ [-1.25565395e-01 -7.94794038e-02 -9.97957364e-02 ... 7.12260827e-02
|
|
|
+ 9.36592296e-02 -1.65728927e-01]
|
|
|
+ [-1.35633559e-03 -9.60636735e-02 -8.94495398e-02 ... 1.94222078e-01
|
|
|
+ -4.70091291e-02 -9.31773633e-02]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]
|
|
|
+
|
|
|
+ ...
|
|
|
+
|
|
|
+ [[-1.80937156e-01 2.09823940e-02 -1.53481439e-01 ... -6.53459728e-02
|
|
|
+ 9.94271040e-02 8.78877100e-03]
|
|
|
+ [-1.07081644e-01 1.26294628e-01 -9.78935510e-02 ... -5.38440198e-02
|
|
|
+ -5.59990434e-03 1.52285874e-01]
|
|
|
+ [ 2.60844767e-01 8.11591670e-02 1.12913206e-01 ... -1.86833721e-02
|
|
|
+ -1.93844680e-02 -7.96004292e-03]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]
|
|
|
+
|
|
|
+ [[ 5.65591007e-02 -7.43663609e-02 -2.00724110e-01 ... 1.52546223e-02
|
|
|
+ 1.50462529e-02 2.35311035e-02]
|
|
|
+ [ 7.43805990e-02 -1.34884506e-01 2.01406300e-01 ... -9.13857669e-02
|
|
|
+ -5.48248850e-02 8.11866950e-03]
|
|
|
+ [ 1.52915157e-02 -1.20854378e-02 2.73873329e-01 ... -3.24299261e-02
|
|
|
+ -6.92289993e-02 -1.53110959e-05]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]
|
|
|
+
|
|
|
+ [[ 5.65591007e-02 -7.43663609e-02 -2.00724110e-01 ... 1.52546223e-02
|
|
|
+ 1.50462529e-02 2.35311035e-02]
|
|
|
+ [ 7.43805990e-02 -1.34884506e-01 2.01406300e-01 ... -9.13857669e-02
|
|
|
+ -5.48248850e-02 8.11866950e-03]
|
|
|
+ [ 1.52915157e-02 -1.20854378e-02 2.73873329e-01 ... -3.24299261e-02
|
|
|
+ -6.92289993e-02 -1.53110959e-05]
|
|
|
+ ...
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]
|
|
|
+ [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
|
|
|
+ 0.00000000e+00 0.00000000e+00]]]]
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+Layer 1, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: 8.938188
|
|
|
+ Converted tensor sum: 8.938201
|
|
|
+ Original tensor mean: 0.000436
|
|
|
+ Converted tensor mean: 0.000436
|
|
|
+ Mean difference: 0.00000001
|
|
|
+ Maximum pointwise difference: 0.00000305
|
|
|
+ Max difference location: (0, 16, 0, 8)
|
|
|
+ Values at max diff - Original: 0.24272950, Converted: 0.24273255
|
|
|
+ Biggest difference in row (0, 16, 0), sum 0.509919 vs 0.509925
|
|
|
+
|
|
|
+Layer 2, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: -2.985352
|
|
|
+ Converted tensor sum: -2.985393
|
|
|
+ Original tensor mean: -0.000146
|
|
|
+ Converted tensor mean: -0.000146
|
|
|
+ Mean difference: 0.00000001
|
|
|
+ Maximum pointwise difference: 0.00000104
|
|
|
+ Max difference location: (0, 12, 3, 1)
|
|
|
+ Values at max diff - Original: -0.02719286, Converted: -0.02719390
|
|
|
+ Biggest difference in row (0, 12, 3), sum -0.530951 vs -0.530954
|
|
|
+
|
|
|
+Layer 4, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: -31.644516
|
|
|
+ Converted tensor sum: -31.643524
|
|
|
+ Original tensor mean: -0.001545
|
|
|
+ Converted tensor mean: -0.001545
|
|
|
+ Mean difference: 0.00000028
|
|
|
+ Maximum pointwise difference: 0.00006898
|
|
|
+ Max difference location: (0, 6, 3, 7)
|
|
|
+ Values at max diff - Original: 0.07510993, Converted: 0.07517891
|
|
|
+ Biggest difference in row (0, 6, 3), sum -0.645874 vs -0.645761
|
|
|
+
|
|
|
+Layer 5, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: -30.684572
|
|
|
+ Converted tensor sum: -30.685047
|
|
|
+ Original tensor mean: -0.001498
|
|
|
+ Converted tensor mean: -0.001498
|
|
|
+ Mean difference: 0.00000021
|
|
|
+ Maximum pointwise difference: 0.00003881
|
|
|
+ Max difference location: (0, 30, 3, 0)
|
|
|
+ Values at max diff - Original: 0.03456598, Converted: 0.03452717
|
|
|
+ Biggest difference in row (0, 30, 3), sum -0.428461 vs -0.428590
|
|
|
+
|
|
|
+Layer 6, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: -10.008605
|
|
|
+ Converted tensor sum: -10.014137
|
|
|
+ Original tensor mean: -0.000489
|
|
|
+ Converted tensor mean: -0.000489
|
|
|
+ Mean difference: 0.00000105
|
|
|
+ Maximum pointwise difference: 0.00017181
|
|
|
+ Max difference location: (0, 6, 2, 7)
|
|
|
+ Values at max diff - Original: 0.01523990, Converted: 0.01506809
|
|
|
+ Biggest difference in row (0, 2, 1), sum -0.388271 vs -0.388545
|
|
|
+
|
|
|
+Layer 8, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: -36.801449
|
|
|
+ Converted tensor sum: -36.801811
|
|
|
+ Original tensor mean: -0.001797
|
|
|
+ Converted tensor mean: -0.001797
|
|
|
+ Mean difference: 0.00000098
|
|
|
+ Maximum pointwise difference: 0.00025206
|
|
|
+ Max difference location: (0, 20, 3, 1)
|
|
|
+ Values at max diff - Original: 0.04204723, Converted: 0.04179518
|
|
|
+ Biggest difference in row (0, 2, 0), sum -0.275884 vs -0.275609
|
|
|
+
|
|
|
+Layer 9, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: -37.401527
|
|
|
+ Converted tensor sum: -37.397404
|
|
|
+ Original tensor mean: -0.001826
|
|
|
+ Converted tensor mean: -0.001826
|
|
|
+ Mean difference: 0.00000135
|
|
|
+ Maximum pointwise difference: 0.00026937
|
|
|
+ Max difference location: (0, 20, 2, 2)
|
|
|
+ Values at max diff - Original: 0.14496517, Converted: 0.14469580
|
|
|
+ Biggest difference in row (0, 20, 3), sum -0.264264 vs -0.264851
|
|
|
+
|
|
|
+Layer 10, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: -43.546944
|
|
|
+ Converted tensor sum: -43.543182
|
|
|
+ Original tensor mean: -0.002126
|
|
|
+ Converted tensor mean: -0.002126
|
|
|
+ Mean difference: 0.00000175
|
|
|
+ Maximum pointwise difference: 0.00031144
|
|
|
+ Max difference location: (0, 0, 2, 5)
|
|
|
+ Values at max diff - Original: -0.03211254, Converted: -0.03180110
|
|
|
+ Biggest difference in row (0, 24, 3), sum -0.476393 vs -0.475955
|
|
|
+
|
|
|
+Layer 12, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: -19.226507
|
|
|
+ Converted tensor sum: -19.226831
|
|
|
+ Original tensor mean: -0.000939
|
|
|
+ Converted tensor mean: -0.000939
|
|
|
+ Mean difference: 0.00000116
|
|
|
+ Maximum pointwise difference: 0.00020705
|
|
|
+ Max difference location: (0, 28, 2, 7)
|
|
|
+ Values at max diff - Original: 0.06080329, Converted: 0.06101035
|
|
|
+ Biggest difference in row (0, 14, 3), sum -0.455543 vs -0.455054
|
|
|
+
|
|
|
+Layer 13, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: -36.510368
|
|
|
+ Converted tensor sum: -36.510063
|
|
|
+ Original tensor mean: -0.001783
|
|
|
+ Converted tensor mean: -0.001783
|
|
|
+ Mean difference: 0.00000135
|
|
|
+ Maximum pointwise difference: 0.00022900
|
|
|
+ Max difference location: (0, 16, 2, 1)
|
|
|
+ Values at max diff - Original: -0.03357363, Converted: -0.03334463
|
|
|
+ Biggest difference in row (0, 18, 2), sum -0.183418 vs -0.183802
|
|
|
+
|
|
|
+Layer 14, Token 1 (q padded comparison):
|
|
|
+ Original tensor sum: -15.543186
|
|
|
+ Converted tensor sum: -15.543753
|
|
|
+ Original tensor mean: -0.000759
|
|
|
+ Converted tensor mean: -0.000759
|
|
|
+ Mean difference: 0.00000116
|
|
|
+ Maximum pointwise difference: 0.00036725
|
|
|
+ Max difference location: (0, 4, 2, 2)
|
|
|
+ Values at max diff - Original: 0.05589651, Converted: 0.05552926
|
|
|
+ Biggest difference in row (0, 18, 1), sum -0.470654 vs -0.470283
|
|
|
+
|
|
|
+================================================================================
|
|
|
+Comparing k padded tensors...
|
|
|
+================================================================================
|
|
|
+
|
|
|
+Layer 0, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: -12.851240
|
|
|
+ Converted tensor sum: -12.851334
|
|
|
+ Original tensor mean: -0.000628
|
|
|
+ Converted tensor mean: -0.000628
|
|
|
+ Mean difference: 0.00000002
|
|
|
+ Maximum pointwise difference: 0.00000304
|
|
|
+ Max difference location: (0, 24, 0, 7)
|
|
|
+ Values at max diff - Original: -0.57623452, Converted: -0.57623756
|
|
|
+ Biggest difference in row (0, 24, 0), sum -1.467058 vs -1.467066
|
|
|
+Original tensor:
|
|
|
+
|
|
|
+[[[[-0.0023386 0.00352692 -0.13370702 ... -0.18872206 0.09370422
|
|
|
+ -0.04139194]
|
|
|
+ [ 0.09375711 0.09519143 0.04368615 ... -0.17057192 -0.09237721
|
|
|
+ 0.09026651]
|
|
|
+ [ 0.19408916 -0.1052211 -0.5198605 ... -0.35431755 -0.18219906
|
|
|
+ -0.31666332]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[-0.0023386 0.00352692 -0.13370702 ... -0.18872206 0.09370422
|
|
|
+ -0.04139194]
|
|
|
+ [ 0.09375711 0.09519143 0.04368615 ... -0.17057192 -0.09237721
|
|
|
+ 0.09026651]
|
|
|
+ [ 0.19408916 -0.1052211 -0.5198605 ... -0.35431755 -0.18219906
|
|
|
+ -0.31666332]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[-0.52186674 -0.28046784 -0.03100401 ... 0.12330638 -0.17640771
|
|
|
+ -0.10358577]
|
|
|
+ [-0.4391339 -0.25189647 0.12411524 ... -0.04670377 0.4796994
|
|
|
+ 0.13396528]
|
|
|
+ [ 0.80941254 0.33414015 0.10742755 ... -0.17197518 -0.16508798
|
|
|
+ -0.20685418]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ ...
|
|
|
+
|
|
|
+ [[-0.02867949 0.05648347 0.01508509 ... 0.7403576 -0.30081272
|
|
|
+ 0.31962797]
|
|
|
+ [ 0.07382206 -0.05249733 0.05087741 ... 0.8205082 -0.03774351
|
|
|
+ 0.4122186 ]
|
|
|
+ [-0.10616651 -0.07183579 -0.02862857 ... 0.13253474 0.73543155
|
|
|
+ 0.63596827]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[ 0.949689 -0.00939775 -0.0047697 ... -0.04689857 -0.0884609
|
|
|
+ -0.20121996]
|
|
|
+ [ 0.9700847 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
|
|
|
+ -0.14288443]
|
|
|
+ [-0.20942387 -0.21343033 -0.00624497 ... 0.05516734 -0.33565474
|
|
|
+ 0.75833493]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[ 0.949689 -0.00939775 -0.0047697 ... -0.04689857 -0.0884609
|
|
|
+ -0.20121996]
|
|
|
+ [ 0.9700847 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
|
|
|
+ -0.14288443]
|
|
|
+ [-0.20942387 -0.21343033 -0.00624497 ... 0.05516734 -0.33565474
|
|
|
+ 0.75833493]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]]]
|
|
|
+
|
|
|
+Converted tensor:
|
|
|
+
|
|
|
+[[[[-0.0023386 0.00352692 -0.13370706 ... -0.18872213 0.09370426
|
|
|
+ -0.04139195]
|
|
|
+ [ 0.09375713 0.09519145 0.04368616 ... -0.17057195 -0.09237722
|
|
|
+ 0.09026653]
|
|
|
+ [ 0.19408953 -0.10522129 -0.5198614 ... -0.3543182 -0.18219939
|
|
|
+ -0.31666392]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[-0.0023386 0.00352692 -0.13370706 ... -0.18872213 0.09370426
|
|
|
+ -0.04139195]
|
|
|
+ [ 0.09375713 0.09519145 0.04368616 ... -0.17057195 -0.09237722
|
|
|
+ 0.09026653]
|
|
|
+ [ 0.19408953 -0.10522129 -0.5198614 ... -0.3543182 -0.18219939
|
|
|
+ -0.31666392]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[-0.5218679 -0.28046846 -0.03100408 ... 0.12330665 -0.1764081
|
|
|
+ -0.10358601]
|
|
|
+ [-0.43913472 -0.25189692 0.12411546 ... -0.04670386 0.47970027
|
|
|
+ 0.1339655 ]
|
|
|
+ [ 0.80941284 0.33414027 0.10742759 ... -0.17197524 -0.16508804
|
|
|
+ -0.20685425]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ ...
|
|
|
+
|
|
|
+ [[-0.02867951 0.0564835 0.0150851 ... 0.74035805 -0.30081287
|
|
|
+ 0.31962818]
|
|
|
+ [ 0.07382207 -0.05249734 0.05087743 ... 0.82050836 -0.03774352
|
|
|
+ 0.41221875]
|
|
|
+ [-0.10616651 -0.07183579 -0.02862857 ... 0.13253474 0.73543155
|
|
|
+ 0.6359683 ]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[ 0.94968927 -0.00939775 -0.0047697 ... -0.04689858 -0.08846093
|
|
|
+ -0.20122004]
|
|
|
+ [ 0.97008485 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
|
|
|
+ -0.14288445]
|
|
|
+ [-0.20942406 -0.21343052 -0.00624497 ... 0.05516739 -0.33565506
|
|
|
+ 0.7583357 ]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[ 0.94968927 -0.00939775 -0.0047697 ... -0.04689858 -0.08846093
|
|
|
+ -0.20122004]
|
|
|
+ [ 0.97008485 -0.03739532 -0.04046015 ... -0.0640891 -0.11664858
|
|
|
+ -0.14288445]
|
|
|
+ [-0.20942406 -0.21343052 -0.00624497 ... 0.05516739 -0.33565506
|
|
|
+ 0.7583357 ]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]]]
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+Layer 1, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: 46.146324
|
|
|
+ Converted tensor sum: 46.146336
|
|
|
+ Original tensor mean: 0.002253
|
|
|
+ Converted tensor mean: 0.002253
|
|
|
+ Mean difference: 0.00000002
|
|
|
+ Maximum pointwise difference: 0.00001496
|
|
|
+ Max difference location: (0, 24, 0, 4)
|
|
|
+ Values at max diff - Original: -0.75322348, Converted: -0.75323844
|
|
|
+ Biggest difference in row (0, 4, 0), sum -1.893247 vs -1.893263
|
|
|
+
|
|
|
+Layer 2, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: 38.402348
|
|
|
+ Converted tensor sum: 38.402321
|
|
|
+ Original tensor mean: 0.001875
|
|
|
+ Converted tensor mean: 0.001875
|
|
|
+ Mean difference: 0.00000002
|
|
|
+ Maximum pointwise difference: 0.00000370
|
|
|
+ Max difference location: (0, 4, 0, 1)
|
|
|
+ Values at max diff - Original: 0.75365573, Converted: 0.75365943
|
|
|
+ Biggest difference in row (0, 8, 0), sum -1.569355 vs -1.569358
|
|
|
+
|
|
|
+Layer 4, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: -80.321693
|
|
|
+ Converted tensor sum: -80.319084
|
|
|
+ Original tensor mean: -0.003922
|
|
|
+ Converted tensor mean: -0.003922
|
|
|
+ Mean difference: 0.00000094
|
|
|
+ Maximum pointwise difference: 0.00016582
|
|
|
+ Max difference location: (0, 12, 1, 2)
|
|
|
+ Values at max diff - Original: 0.42303348, Converted: 0.42286766
|
|
|
+ Biggest difference in row (0, 4, 3), sum -0.373179 vs -0.372919
|
|
|
+
|
|
|
+Layer 5, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: -101.494308
|
|
|
+ Converted tensor sum: -101.496490
|
|
|
+ Original tensor mean: -0.004956
|
|
|
+ Converted tensor mean: -0.004956
|
|
|
+ Mean difference: 0.00000073
|
|
|
+ Maximum pointwise difference: 0.00011382
|
|
|
+ Max difference location: (0, 8, 3, 4)
|
|
|
+ Values at max diff - Original: -0.06280152, Converted: -0.06291535
|
|
|
+ Biggest difference in row (0, 24, 2), sum -1.003613 vs -1.003973
|
|
|
+
|
|
|
+Layer 6, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: -60.378914
|
|
|
+ Converted tensor sum: -60.399891
|
|
|
+ Original tensor mean: -0.002948
|
|
|
+ Converted tensor mean: -0.002949
|
|
|
+ Mean difference: 0.00000342
|
|
|
+ Maximum pointwise difference: 0.00096719
|
|
|
+ Max difference location: (0, 8, 1, 5)
|
|
|
+ Values at max diff - Original: 0.19049226, Converted: 0.19145945
|
|
|
+ Biggest difference in row (0, 20, 0), sum -1.118855 vs -1.120621
|
|
|
+
|
|
|
+Layer 8, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: -61.474350
|
|
|
+ Converted tensor sum: -61.483994
|
|
|
+ Original tensor mean: -0.003002
|
|
|
+ Converted tensor mean: -0.003002
|
|
|
+ Mean difference: 0.00000346
|
|
|
+ Maximum pointwise difference: 0.00061786
|
|
|
+ Max difference location: (0, 8, 2, 7)
|
|
|
+ Values at max diff - Original: 0.35214049, Converted: 0.35275835
|
|
|
+ Biggest difference in row (0, 20, 3), sum -0.407597 vs -0.408426
|
|
|
+
|
|
|
+Layer 9, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: -110.836624
|
|
|
+ Converted tensor sum: -110.841522
|
|
|
+ Original tensor mean: -0.005412
|
|
|
+ Converted tensor mean: -0.005412
|
|
|
+ Mean difference: 0.00000378
|
|
|
+ Maximum pointwise difference: 0.00051466
|
|
|
+ Max difference location: (0, 18, 1, 8)
|
|
|
+ Values at max diff - Original: 0.40876523, Converted: 0.40927988
|
|
|
+ Biggest difference in row (0, 28, 3), sum -0.911474 vs -0.910520
|
|
|
+
|
|
|
+Layer 10, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: -90.985107
|
|
|
+ Converted tensor sum: -90.978966
|
|
|
+ Original tensor mean: -0.004443
|
|
|
+ Converted tensor mean: -0.004442
|
|
|
+ Mean difference: 0.00000465
|
|
|
+ Maximum pointwise difference: 0.00078443
|
|
|
+ Max difference location: (0, 18, 3, 6)
|
|
|
+ Values at max diff - Original: 0.38864151, Converted: 0.38785708
|
|
|
+ Biggest difference in row (0, 18, 3), sum -0.245571 vs -0.247415
|
|
|
+
|
|
|
+Layer 12, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: -80.152397
|
|
|
+ Converted tensor sum: -80.143387
|
|
|
+ Original tensor mean: -0.003914
|
|
|
+ Converted tensor mean: -0.003913
|
|
|
+ Mean difference: 0.00000377
|
|
|
+ Maximum pointwise difference: 0.00053528
|
|
|
+ Max difference location: (0, 4, 2, 6)
|
|
|
+ Values at max diff - Original: 0.33732986, Converted: 0.33786514
|
|
|
+ Biggest difference in row (0, 26, 2), sum -2.083733 vs -2.084640
|
|
|
+
|
|
|
+Layer 13, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: -149.692871
|
|
|
+ Converted tensor sum: -149.699692
|
|
|
+ Original tensor mean: -0.007309
|
|
|
+ Converted tensor mean: -0.007310
|
|
|
+ Mean difference: 0.00000382
|
|
|
+ Maximum pointwise difference: 0.00069700
|
|
|
+ Max difference location: (0, 24, 2, 1)
|
|
|
+ Values at max diff - Original: 0.03209215, Converted: 0.03139514
|
|
|
+ Biggest difference in row (0, 18, 3), sum -1.337807 vs -1.338803
|
|
|
+
|
|
|
+Layer 14, Token 1 (k padded comparison):
|
|
|
+ Original tensor sum: -158.503815
|
|
|
+ Converted tensor sum: -158.505280
|
|
|
+ Original tensor mean: -0.007739
|
|
|
+ Converted tensor mean: -0.007740
|
|
|
+ Mean difference: 0.00000406
|
|
|
+ Maximum pointwise difference: 0.00088650
|
|
|
+ Max difference location: (0, 18, 3, 0)
|
|
|
+ Values at max diff - Original: 0.31103787, Converted: 0.31192437
|
|
|
+ Biggest difference in row (0, 24, 2), sum -2.245067 vs -2.246189
|
|
|
+
|
|
|
+================================================================================
|
|
|
+Comparing v padded tensors...
|
|
|
+================================================================================
|
|
|
+
|
|
|
+Layer 0, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: 43.396095
|
|
|
+ Converted tensor sum: 43.396103
|
|
|
+ Original tensor mean: 0.002119
|
|
|
+ Converted tensor mean: 0.002119
|
|
|
+ Mean difference: 0.00000000
|
|
|
+ Maximum pointwise difference: 0.00000024
|
|
|
+ Max difference location: (0, 4, 3, 1)
|
|
|
+ Values at max diff - Original: 3.02466559, Converted: 3.02466583
|
|
|
+ Biggest difference in row (0, 4, 3), sum 4.080367 vs 4.080368
|
|
|
+Original tensor:
|
|
|
+
|
|
|
+[[[[ 0.29945952 0.07364164 0.00633647 ... -0.03352018 -0.13518293
|
|
|
+ -0.24422395]
|
|
|
+ [-0.06384649 0.34527305 0.05128174 ... 0.10202903 -0.27791512
|
|
|
+ -0.26350227]
|
|
|
+ [ 0.32036152 -0.10731668 -0.13258429 ... 0.7373227 -0.21349299
|
|
|
+ 0.09487297]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[ 0.09463742 0.3331761 0.04175158 ... -0.16139531 0.14495076
|
|
|
+ -0.23538315]
|
|
|
+ [ 0.0059099 -0.22937416 -0.01920018 ... -0.2725759 0.3779854
|
|
|
+ -0.25018957]
|
|
|
+ [-0.02874102 -0.1163442 -0.06129871 ... -0.24273473 -0.2218994
|
|
|
+ 0.09502672]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[-0.01040334 -0.16231607 -0.19213551 ... 0.26839197 -0.14292948
|
|
|
+ -0.0833158 ]
|
|
|
+ [-0.22485131 -0.26889268 -0.03555897 ... -0.26755306 -0.27845183
|
|
|
+ -0.15565467]
|
|
|
+ [-0.27764964 2.820727 -0.24290419 ... 0.12924032 -0.22718066
|
|
|
+ 0.06345078]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ ...
|
|
|
+
|
|
|
+ [[ 0.05224958 -0.27178496 0.02280007 ... -0.17813048 -0.00848302
|
|
|
+ 0.3436797 ]
|
|
|
+ [-0.23870829 0.0102903 0.09486482 ... -0.17058551 0.10059616
|
|
|
+ 0.45001176]
|
|
|
+ [-0.24846101 1.1912329 -0.26268318 ... 0.148858 0.10272522
|
|
|
+ 0.21719539]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[-0.20595089 -0.02217443 0.01070492 ... 0.00675152 0.02506094
|
|
|
+ -0.0267982 ]
|
|
|
+ [-0.21499586 -0.25627282 -0.07001566 ... 0.00795406 -0.02202371
|
|
|
+ -0.01158573]
|
|
|
+ [ 0.04917984 -0.27141818 -0.26334 ... -0.09943416 0.03347556
|
|
|
+ 0.10718762]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[ 0.1791143 -0.0034847 0.9858279 ... 0.19559488 -0.0804936
|
|
|
+ -0.01883564]
|
|
|
+ [-0.17319466 0.07188834 -0.26032022 ... -0.04845351 -0.24498041
|
|
|
+ 0.12539098]
|
|
|
+ [ 0.00640415 -0.22212675 -0.22916575 ... -0.170733 0.5452839
|
|
|
+ -0.14139794]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]]]
|
|
|
+
|
|
|
+Converted tensor:
|
|
|
+
|
|
|
+[[[[ 0.29945952 0.07364164 0.00633647 ... -0.03352018 -0.13518293
|
|
|
+ -0.24422395]
|
|
|
+ [-0.06384649 0.34527302 0.05128174 ... 0.10202905 -0.27791512
|
|
|
+ -0.26350227]
|
|
|
+ [ 0.3203615 -0.10731667 -0.13258429 ... 0.7373226 -0.213493
|
|
|
+ 0.09487297]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[ 0.09463742 0.33317608 0.04175158 ... -0.16139533 0.14495076
|
|
|
+ -0.23538315]
|
|
|
+ [ 0.0059099 -0.22937416 -0.01920018 ... -0.27257589 0.3779854
|
|
|
+ -0.25018957]
|
|
|
+ [-0.02874102 -0.11634421 -0.06129871 ... -0.24273473 -0.22189939
|
|
|
+ 0.09502671]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[-0.01040334 -0.16231604 -0.19213554 ... 0.268392 -0.14292948
|
|
|
+ -0.0833158 ]
|
|
|
+ [-0.22485131 -0.26889268 -0.03555898 ... -0.26755306 -0.27845183
|
|
|
+ -0.15565467]
|
|
|
+ [-0.27764964 2.820727 -0.24290417 ... 0.12924033 -0.22718067
|
|
|
+ 0.06345078]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ ...
|
|
|
+
|
|
|
+ [[ 0.05224958 -0.27178493 0.02280007 ... -0.17813048 -0.00848302
|
|
|
+ 0.34367973]
|
|
|
+ [-0.23870829 0.0102903 0.09486482 ... -0.17058551 0.10059617
|
|
|
+ 0.45001176]
|
|
|
+ [-0.248461 1.1912329 -0.26268318 ... 0.148858 0.10272522
|
|
|
+ 0.21719539]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[-0.20595089 -0.02217443 0.01070492 ... 0.00675152 0.02506094
|
|
|
+ -0.0267982 ]
|
|
|
+ [-0.21499586 -0.2562728 -0.07001566 ... 0.00795406 -0.02202371
|
|
|
+ -0.01158573]
|
|
|
+ [ 0.04917984 -0.27141815 -0.26334 ... -0.09943416 0.03347556
|
|
|
+ 0.10718761]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]
|
|
|
+
|
|
|
+ [[ 0.1791143 -0.0034847 0.985828 ... 0.19559486 -0.08049361
|
|
|
+ -0.01883564]
|
|
|
+ [-0.17319466 0.07188834 -0.2603202 ... -0.04845351 -0.24498038
|
|
|
+ 0.12539098]
|
|
|
+ [ 0.00640414 -0.22212675 -0.22916573 ... -0.17073299 0.5452839
|
|
|
+ -0.14139794]
|
|
|
+ ...
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]
|
|
|
+ [ 0. 0. 0. ... 0. 0.
|
|
|
+ 0. ]]]]
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+Layer 1, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: 64.583611
|
|
|
+ Converted tensor sum: 64.583618
|
|
|
+ Original tensor mean: 0.003153
|
|
|
+ Converted tensor mean: 0.003153
|
|
|
+ Mean difference: 0.00000000
|
|
|
+ Maximum pointwise difference: 0.00000083
|
|
|
+ Max difference location: (0, 31, 2, 0)
|
|
|
+ Values at max diff - Original: 1.71371531, Converted: 1.71371615
|
|
|
+ Biggest difference in row (0, 14, 2), sum 3.047640 vs 3.047641
|
|
|
+
|
|
|
+Layer 2, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: 79.718636
|
|
|
+ Converted tensor sum: 79.718628
|
|
|
+ Original tensor mean: 0.003893
|
|
|
+ Converted tensor mean: 0.003893
|
|
|
+ Mean difference: 0.00000002
|
|
|
+ Maximum pointwise difference: 0.00000691
|
|
|
+ Max difference location: (0, 3, 3, 0)
|
|
|
+ Values at max diff - Original: 3.08589840, Converted: 3.08589149
|
|
|
+ Biggest difference in row (0, 3, 3), sum 5.127280 vs 5.127275
|
|
|
+
|
|
|
+Layer 4, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: -6.421658
|
|
|
+ Converted tensor sum: -6.417439
|
|
|
+ Original tensor mean: -0.000314
|
|
|
+ Converted tensor mean: -0.000313
|
|
|
+ Mean difference: 0.00000083
|
|
|
+ Maximum pointwise difference: 0.00020146
|
|
|
+ Max difference location: (0, 3, 3, 9)
|
|
|
+ Values at max diff - Original: 0.71459866, Converted: 0.71439719
|
|
|
+ Biggest difference in row (0, 2, 2), sum 1.330729 vs 1.330986
|
|
|
+
|
|
|
+Layer 5, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: -22.732481
|
|
|
+ Converted tensor sum: -22.732681
|
|
|
+ Original tensor mean: -0.001110
|
|
|
+ Converted tensor mean: -0.001110
|
|
|
+ Mean difference: 0.00000057
|
|
|
+ Maximum pointwise difference: 0.00014561
|
|
|
+ Max difference location: (0, 5, 2, 8)
|
|
|
+ Values at max diff - Original: 0.86213899, Converted: 0.86199337
|
|
|
+ Biggest difference in row (0, 5, 2), sum 0.321165 vs 0.320951
|
|
|
+
|
|
|
+Layer 6, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: 79.420486
|
|
|
+ Converted tensor sum: 79.392494
|
|
|
+ Original tensor mean: 0.003878
|
|
|
+ Converted tensor mean: 0.003877
|
|
|
+ Mean difference: 0.00000437
|
|
|
+ Maximum pointwise difference: 0.00160646
|
|
|
+ Max difference location: (0, 28, 3, 8)
|
|
|
+ Values at max diff - Original: 3.32436800, Converted: 3.32276154
|
|
|
+ Biggest difference in row (0, 8, 2), sum 5.307434 vs 5.305095
|
|
|
+
|
|
|
+Layer 8, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: 56.337997
|
|
|
+ Converted tensor sum: 56.328655
|
|
|
+ Original tensor mean: 0.002751
|
|
|
+ Converted tensor mean: 0.002750
|
|
|
+ Mean difference: 0.00000345
|
|
|
+ Maximum pointwise difference: 0.00109446
|
|
|
+ Max difference location: (0, 27, 3, 8)
|
|
|
+ Values at max diff - Original: 1.29648387, Converted: 1.29538941
|
|
|
+ Biggest difference in row (0, 0, 2), sum 3.391128 vs 3.390095
|
|
|
+
|
|
|
+Layer 9, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: -60.833374
|
|
|
+ Converted tensor sum: -60.822338
|
|
|
+ Original tensor mean: -0.002970
|
|
|
+ Converted tensor mean: -0.002970
|
|
|
+ Mean difference: 0.00000277
|
|
|
+ Maximum pointwise difference: 0.00082873
|
|
|
+ Max difference location: (0, 4, 2, 0)
|
|
|
+ Values at max diff - Original: 0.17745507, Converted: 0.17828380
|
|
|
+ Biggest difference in row (0, 29, 3), sum -0.619908 vs -0.618863
|
|
|
+
|
|
|
+Layer 10, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: -61.881168
|
|
|
+ Converted tensor sum: -61.881893
|
|
|
+ Original tensor mean: -0.003022
|
|
|
+ Converted tensor mean: -0.003022
|
|
|
+ Mean difference: 0.00000326
|
|
|
+ Maximum pointwise difference: 0.00088513
|
|
|
+ Max difference location: (0, 18, 3, 1)
|
|
|
+ Values at max diff - Original: 0.75186056, Converted: 0.75097543
|
|
|
+ Biggest difference in row (0, 1, 2), sum -0.687588 vs -0.688463
|
|
|
+
|
|
|
+Layer 12, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: -25.326912
|
|
|
+ Converted tensor sum: -25.328352
|
|
|
+ Original tensor mean: -0.001237
|
|
|
+ Converted tensor mean: -0.001237
|
|
|
+ Mean difference: 0.00000326
|
|
|
+ Maximum pointwise difference: 0.00108600
|
|
|
+ Max difference location: (0, 26, 1, 1)
|
|
|
+ Values at max diff - Original: 2.54334521, Converted: 2.54225922
|
|
|
+ Biggest difference in row (0, 16, 2), sum 1.421780 vs 1.420637
|
|
|
+
|
|
|
+Layer 13, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: -76.935516
|
|
|
+ Converted tensor sum: -76.941040
|
|
|
+ Original tensor mean: -0.003757
|
|
|
+ Converted tensor mean: -0.003757
|
|
|
+ Mean difference: 0.00000263
|
|
|
+ Maximum pointwise difference: 0.00127554
|
|
|
+ Max difference location: (0, 19, 1, 3)
|
|
|
+ Values at max diff - Original: 2.36973763, Converted: 2.36846209
|
|
|
+ Biggest difference in row (0, 19, 1), sum 1.449438 vs 1.448400
|
|
|
+
|
|
|
+Layer 14, Token 1 (v padded comparison):
|
|
|
+ Original tensor sum: -45.008949
|
|
|
+ Converted tensor sum: -45.003647
|
|
|
+ Original tensor mean: -0.002198
|
|
|
+ Converted tensor mean: -0.002197
|
|
|
+ Mean difference: 0.00000327
|
|
|
+ Maximum pointwise difference: 0.00136590
|
|
|
+ Max difference location: (0, 28, 3, 5)
|
|
|
+ Values at max diff - Original: 2.56902742, Converted: 2.56766152
|
|
|
+ Biggest difference in row (0, 28, 3), sum 1.363533 vs 1.361795
|
|
|
+
|
|
|
+================================================================================
|
|
|
+SUMMARY:
|
|
|
+Total comparisons attempted: 876
|
|
|
+Successful comparisons: 875
|
|
|
+Failed comparisons: 1
|
|
|
+
|
|
|
+Maximum difference statistics:
|
|
|
+ Min max difference: 0.00000024
|
|
|
+ Max max difference: 235.55526733
|
|
|
+ Mean of max differences: 18.71273422
|
|
|
+ Median of max differences: 5.37744808
|
|
|
+ Comparisons with diff > 1e-5: 804/875
|