Qubitium's picture
Add files using upload-large-folder tool
9256224 verified
layer,module,loss,samples,damp,time
0,mamba.in_proj,0.0001258380,0.05000,0.286
0,mamba.out_proj,0.0000173402,0.05000,0.455
0,shared_mlp.input_linear,0.0000861064,0.05000,0.240
0,shared_mlp.output_linear,0.0000102538,0.05000,0.674
1,mamba.in_proj,0.0002307769,0.05000,0.227
1,mamba.out_proj,0.0000034924,0.05000,0.458
1,shared_mlp.input_linear,0.0000974452,0.05000,0.205
1,shared_mlp.output_linear,0.0000052490,0.05000,0.540
2,mamba.in_proj,0.0001410381,0.05000,0.231
2,mamba.out_proj,0.0000031530,0.05000,0.450
2,shared_mlp.input_linear,0.0000965323,0.05000,0.210
2,shared_mlp.output_linear,0.0000053682,0.05000,0.543
3,mamba.in_proj,0.0001270221,0.05000,0.202
3,mamba.out_proj,0.0000033837,0.05000,0.400
3,shared_mlp.input_linear,0.0001021678,0.05000,0.199
3,shared_mlp.output_linear,0.0000057879,0.05000,0.521
4,mamba.in_proj,0.0002071996,0.05000,0.195
4,mamba.out_proj,0.0000036334,0.05000,0.388
4,shared_mlp.input_linear,0.0001022260,0.05000,0.194
4,shared_mlp.output_linear,0.0000063899,0.05000,0.512
5,mamba.in_proj,0.0002033731,0.05000,0.193
5,mamba.out_proj,0.0000034330,0.05000,0.401
5,shared_mlp.input_linear,0.0001129610,0.05000,0.191
5,shared_mlp.output_linear,0.0000063258,0.05000,0.545
6,mamba.in_proj,0.0003654321,0.05000,0.230
6,mamba.out_proj,0.0000033775,0.05000,0.486
6,shared_mlp.input_linear,0.0001360559,0.05000,0.213
6,shared_mlp.output_linear,0.0000060412,0.05000,0.545
7,mamba.in_proj,0.0004053373,0.05000,0.216
7,mamba.out_proj,0.0000055126,0.05000,0.454
7,shared_mlp.input_linear,0.0001280382,0.05000,0.226
7,shared_mlp.output_linear,0.0000071024,0.05000,0.620
8,mamba.in_proj,0.0002919914,0.05000,0.202
8,mamba.out_proj,0.0000050447,0.05000,0.418
8,shared_mlp.input_linear,0.0001211710,0.05000,0.222
8,shared_mlp.output_linear,0.0000083565,0.05000,0.506
9,mamba.in_proj,0.0002230314,0.05000,0.199
9,mamba.out_proj,0.0000084445,0.05000,0.387
9,shared_mlp.input_linear,0.0001267507,0.05000,0.204
9,shared_mlp.output_linear,0.0004081527,0.05000,0.488
10,self_attn.k_proj,0.0001623139,0.05000,0.703
10,self_attn.q_proj,0.0004302992,0.05000,0.708
10,self_attn.v_proj,0.0000837649,0.05000,0.728
10,self_attn.o_proj,0.0000061797,0.05000,0.189
10,shared_mlp.input_linear,0.0001317226,0.05000,0.245
10,shared_mlp.output_linear,0.0000118062,0.05000,0.508
11,mamba.in_proj,0.0002191654,0.05000,0.250
11,mamba.out_proj,0.0000069709,0.05000,0.387
11,shared_mlp.input_linear,0.0001327564,0.05000,0.191
11,shared_mlp.output_linear,0.0000129761,0.05000,0.504
12,mamba.in_proj,0.0001583058,0.05000,0.192
12,mamba.out_proj,0.0000090464,0.05000,0.366
12,shared_mlp.input_linear,0.0001088442,0.05000,0.186
12,shared_mlp.output_linear,0.0000085337,0.05000,0.509
13,self_attn.q_proj,0.0004986466,0.05000,0.665
13,self_attn.v_proj,0.0001023489,0.05000,0.674
13,self_attn.k_proj,0.0001610719,0.05000,0.686
13,self_attn.o_proj,0.0000056521,0.05000,0.182
13,shared_mlp.input_linear,0.0001161080,0.05000,0.187
13,shared_mlp.output_linear,0.0006525996,0.05000,0.515
14,mamba.in_proj,0.0001599891,0.05000,0.183
14,mamba.out_proj,0.0000064345,0.05000,0.369
14,shared_mlp.input_linear,0.0001056227,0.05000,0.195
14,shared_mlp.output_linear,0.0000098842,0.05000,0.515
15,mamba.in_proj,0.0002849224,0.05000,0.214
15,mamba.out_proj,0.0000060616,0.05000,0.371
15,shared_mlp.input_linear,0.0001007611,0.05000,0.191
15,shared_mlp.output_linear,0.0000087755,0.05000,0.517
16,mamba.in_proj,0.0001854963,0.05000,0.206
16,mamba.out_proj,0.0000095851,0.05000,0.388
16,shared_mlp.input_linear,0.0001092095,0.05000,0.205
16,shared_mlp.output_linear,0.0000084820,0.05000,0.522
17,self_attn.q_proj,0.0003317289,0.05000,0.671
17,self_attn.k_proj,0.0001080381,0.05000,0.674
17,self_attn.v_proj,0.0001242850,0.05000,0.683
17,self_attn.o_proj,0.0000111055,0.05000,0.204
17,shared_mlp.input_linear,0.0001189071,0.05000,0.217
17,shared_mlp.output_linear,0.0000128278,0.05000,0.530
18,mamba.in_proj,0.0002076188,0.05000,0.203
18,mamba.out_proj,0.0000061952,0.05000,0.415
18,shared_mlp.input_linear,0.0001260789,0.05000,0.211
18,shared_mlp.output_linear,0.0000106376,0.05000,0.512
19,mamba.in_proj,0.0001372509,0.05000,0.192
19,mamba.out_proj,0.0000068760,0.05000,0.411
19,shared_mlp.input_linear,0.0001471088,0.05000,0.220
19,shared_mlp.output_linear,0.0000125081,0.05000,0.585
20,mamba.in_proj,0.0002687636,0.05000,0.212
20,mamba.out_proj,0.0000089296,0.05000,0.442
20,shared_mlp.input_linear,0.0001799153,0.05000,0.206
20,shared_mlp.output_linear,0.0000169340,0.05000,0.531
21,mamba.in_proj,0.0006006442,0.05000,0.202
21,mamba.out_proj,0.0000078080,0.05000,0.382
21,shared_mlp.input_linear,0.0001918114,0.05000,0.210
21,shared_mlp.output_linear,0.0000210194,0.05000,0.540
22,mamba.in_proj,0.0005133608,0.05000,0.189
22,mamba.out_proj,0.0000163215,0.05000,0.368
22,shared_mlp.input_linear,0.0002294023,0.05000,0.194
22,shared_mlp.output_linear,0.0000293422,0.05000,0.518
23,mamba.in_proj,0.0004116132,0.05000,0.209
23,mamba.out_proj,0.0000160722,0.05000,0.371
23,shared_mlp.input_linear,0.0002273323,0.05000,0.213
23,shared_mlp.output_linear,0.0000252019,0.05000,0.522
24,mamba.in_proj,0.0004354424,0.05000,0.205
24,mamba.out_proj,0.0000135037,0.05000,0.377
24,shared_mlp.input_linear,0.0002166403,0.05000,0.202
24,shared_mlp.output_linear,0.0000253154,0.05000,0.520
25,mamba.in_proj,0.0003224494,0.05000,0.206
25,mamba.out_proj,0.0000250325,0.05000,0.394
25,shared_mlp.input_linear,0.0002728266,0.05000,0.200
25,shared_mlp.output_linear,0.0000373538,0.05000,0.484
26,mamba.in_proj,0.0003227580,0.05000,0.202
26,mamba.out_proj,0.0000455537,0.05000,0.364
26,shared_mlp.input_linear,0.0002892227,0.05000,0.198
26,shared_mlp.output_linear,0.0000530557,0.05000,0.521
27,self_attn.v_proj,0.0006113269,0.05000,0.535
27,self_attn.q_proj,0.0005311922,0.05000,0.576
27,self_attn.k_proj,0.0001448675,0.05000,0.588
27,self_attn.o_proj,0.0000536515,0.05000,0.189
27,shared_mlp.input_linear,0.0003787509,0.05000,0.190
27,shared_mlp.output_linear,0.0001093897,0.05000,0.516
28,mamba.in_proj,0.0019077307,0.05000,0.193
28,mamba.out_proj,0.0001343735,0.05000,0.371
28,shared_mlp.input_linear,0.0002967288,0.05000,0.192
28,shared_mlp.output_linear,0.0000957567,0.05000,0.513
29,mamba.in_proj,0.0001653794,0.05000,0.190
29,mamba.out_proj,0.0000795585,0.05000,0.366
29,shared_mlp.input_linear,0.0002558183,0.05000,0.192
29,shared_mlp.output_linear,0.0000595038,0.05000,0.520
30,mamba.in_proj,0.0000580291,0.05000,0.190
30,mamba.out_proj,0.0000578531,0.05000,0.372
30,shared_mlp.input_linear,0.0002475183,0.05000,0.193
30,shared_mlp.output_linear,0.0000483499,0.05000,0.514
31,mamba.in_proj,0.0000680402,0.05000,0.198
31,mamba.out_proj,0.0000516371,0.05000,0.372
31,shared_mlp.input_linear,0.0003959683,0.05000,0.197
31,shared_mlp.output_linear,0.0002103811,0.05000,0.511