| layer,module,loss,samples,damp,time | |
| 0,mamba.in_proj,0.0001258380,0.05000,0.286 | |
| 0,mamba.out_proj,0.0000173402,0.05000,0.455 | |
| 0,shared_mlp.input_linear,0.0000861064,0.05000,0.240 | |
| 0,shared_mlp.output_linear,0.0000102538,0.05000,0.674 | |
| 1,mamba.in_proj,0.0002307769,0.05000,0.227 | |
| 1,mamba.out_proj,0.0000034924,0.05000,0.458 | |
| 1,shared_mlp.input_linear,0.0000974452,0.05000,0.205 | |
| 1,shared_mlp.output_linear,0.0000052490,0.05000,0.540 | |
| 2,mamba.in_proj,0.0001410381,0.05000,0.231 | |
| 2,mamba.out_proj,0.0000031530,0.05000,0.450 | |
| 2,shared_mlp.input_linear,0.0000965323,0.05000,0.210 | |
| 2,shared_mlp.output_linear,0.0000053682,0.05000,0.543 | |
| 3,mamba.in_proj,0.0001270221,0.05000,0.202 | |
| 3,mamba.out_proj,0.0000033837,0.05000,0.400 | |
| 3,shared_mlp.input_linear,0.0001021678,0.05000,0.199 | |
| 3,shared_mlp.output_linear,0.0000057879,0.05000,0.521 | |
| 4,mamba.in_proj,0.0002071996,0.05000,0.195 | |
| 4,mamba.out_proj,0.0000036334,0.05000,0.388 | |
| 4,shared_mlp.input_linear,0.0001022260,0.05000,0.194 | |
| 4,shared_mlp.output_linear,0.0000063899,0.05000,0.512 | |
| 5,mamba.in_proj,0.0002033731,0.05000,0.193 | |
| 5,mamba.out_proj,0.0000034330,0.05000,0.401 | |
| 5,shared_mlp.input_linear,0.0001129610,0.05000,0.191 | |
| 5,shared_mlp.output_linear,0.0000063258,0.05000,0.545 | |
| 6,mamba.in_proj,0.0003654321,0.05000,0.230 | |
| 6,mamba.out_proj,0.0000033775,0.05000,0.486 | |
| 6,shared_mlp.input_linear,0.0001360559,0.05000,0.213 | |
| 6,shared_mlp.output_linear,0.0000060412,0.05000,0.545 | |
| 7,mamba.in_proj,0.0004053373,0.05000,0.216 | |
| 7,mamba.out_proj,0.0000055126,0.05000,0.454 | |
| 7,shared_mlp.input_linear,0.0001280382,0.05000,0.226 | |
| 7,shared_mlp.output_linear,0.0000071024,0.05000,0.620 | |
| 8,mamba.in_proj,0.0002919914,0.05000,0.202 | |
| 8,mamba.out_proj,0.0000050447,0.05000,0.418 | |
| 8,shared_mlp.input_linear,0.0001211710,0.05000,0.222 | |
| 8,shared_mlp.output_linear,0.0000083565,0.05000,0.506 | |
| 9,mamba.in_proj,0.0002230314,0.05000,0.199 | |
| 9,mamba.out_proj,0.0000084445,0.05000,0.387 | |
| 9,shared_mlp.input_linear,0.0001267507,0.05000,0.204 | |
| 9,shared_mlp.output_linear,0.0004081527,0.05000,0.488 | |
| 10,self_attn.k_proj,0.0001623139,0.05000,0.703 | |
| 10,self_attn.q_proj,0.0004302992,0.05000,0.708 | |
| 10,self_attn.v_proj,0.0000837649,0.05000,0.728 | |
| 10,self_attn.o_proj,0.0000061797,0.05000,0.189 | |
| 10,shared_mlp.input_linear,0.0001317226,0.05000,0.245 | |
| 10,shared_mlp.output_linear,0.0000118062,0.05000,0.508 | |
| 11,mamba.in_proj,0.0002191654,0.05000,0.250 | |
| 11,mamba.out_proj,0.0000069709,0.05000,0.387 | |
| 11,shared_mlp.input_linear,0.0001327564,0.05000,0.191 | |
| 11,shared_mlp.output_linear,0.0000129761,0.05000,0.504 | |
| 12,mamba.in_proj,0.0001583058,0.05000,0.192 | |
| 12,mamba.out_proj,0.0000090464,0.05000,0.366 | |
| 12,shared_mlp.input_linear,0.0001088442,0.05000,0.186 | |
| 12,shared_mlp.output_linear,0.0000085337,0.05000,0.509 | |
| 13,self_attn.q_proj,0.0004986466,0.05000,0.665 | |
| 13,self_attn.v_proj,0.0001023489,0.05000,0.674 | |
| 13,self_attn.k_proj,0.0001610719,0.05000,0.686 | |
| 13,self_attn.o_proj,0.0000056521,0.05000,0.182 | |
| 13,shared_mlp.input_linear,0.0001161080,0.05000,0.187 | |
| 13,shared_mlp.output_linear,0.0006525996,0.05000,0.515 | |
| 14,mamba.in_proj,0.0001599891,0.05000,0.183 | |
| 14,mamba.out_proj,0.0000064345,0.05000,0.369 | |
| 14,shared_mlp.input_linear,0.0001056227,0.05000,0.195 | |
| 14,shared_mlp.output_linear,0.0000098842,0.05000,0.515 | |
| 15,mamba.in_proj,0.0002849224,0.05000,0.214 | |
| 15,mamba.out_proj,0.0000060616,0.05000,0.371 | |
| 15,shared_mlp.input_linear,0.0001007611,0.05000,0.191 | |
| 15,shared_mlp.output_linear,0.0000087755,0.05000,0.517 | |
| 16,mamba.in_proj,0.0001854963,0.05000,0.206 | |
| 16,mamba.out_proj,0.0000095851,0.05000,0.388 | |
| 16,shared_mlp.input_linear,0.0001092095,0.05000,0.205 | |
| 16,shared_mlp.output_linear,0.0000084820,0.05000,0.522 | |
| 17,self_attn.q_proj,0.0003317289,0.05000,0.671 | |
| 17,self_attn.k_proj,0.0001080381,0.05000,0.674 | |
| 17,self_attn.v_proj,0.0001242850,0.05000,0.683 | |
| 17,self_attn.o_proj,0.0000111055,0.05000,0.204 | |
| 17,shared_mlp.input_linear,0.0001189071,0.05000,0.217 | |
| 17,shared_mlp.output_linear,0.0000128278,0.05000,0.530 | |
| 18,mamba.in_proj,0.0002076188,0.05000,0.203 | |
| 18,mamba.out_proj,0.0000061952,0.05000,0.415 | |
| 18,shared_mlp.input_linear,0.0001260789,0.05000,0.211 | |
| 18,shared_mlp.output_linear,0.0000106376,0.05000,0.512 | |
| 19,mamba.in_proj,0.0001372509,0.05000,0.192 | |
| 19,mamba.out_proj,0.0000068760,0.05000,0.411 | |
| 19,shared_mlp.input_linear,0.0001471088,0.05000,0.220 | |
| 19,shared_mlp.output_linear,0.0000125081,0.05000,0.585 | |
| 20,mamba.in_proj,0.0002687636,0.05000,0.212 | |
| 20,mamba.out_proj,0.0000089296,0.05000,0.442 | |
| 20,shared_mlp.input_linear,0.0001799153,0.05000,0.206 | |
| 20,shared_mlp.output_linear,0.0000169340,0.05000,0.531 | |
| 21,mamba.in_proj,0.0006006442,0.05000,0.202 | |
| 21,mamba.out_proj,0.0000078080,0.05000,0.382 | |
| 21,shared_mlp.input_linear,0.0001918114,0.05000,0.210 | |
| 21,shared_mlp.output_linear,0.0000210194,0.05000,0.540 | |
| 22,mamba.in_proj,0.0005133608,0.05000,0.189 | |
| 22,mamba.out_proj,0.0000163215,0.05000,0.368 | |
| 22,shared_mlp.input_linear,0.0002294023,0.05000,0.194 | |
| 22,shared_mlp.output_linear,0.0000293422,0.05000,0.518 | |
| 23,mamba.in_proj,0.0004116132,0.05000,0.209 | |
| 23,mamba.out_proj,0.0000160722,0.05000,0.371 | |
| 23,shared_mlp.input_linear,0.0002273323,0.05000,0.213 | |
| 23,shared_mlp.output_linear,0.0000252019,0.05000,0.522 | |
| 24,mamba.in_proj,0.0004354424,0.05000,0.205 | |
| 24,mamba.out_proj,0.0000135037,0.05000,0.377 | |
| 24,shared_mlp.input_linear,0.0002166403,0.05000,0.202 | |
| 24,shared_mlp.output_linear,0.0000253154,0.05000,0.520 | |
| 25,mamba.in_proj,0.0003224494,0.05000,0.206 | |
| 25,mamba.out_proj,0.0000250325,0.05000,0.394 | |
| 25,shared_mlp.input_linear,0.0002728266,0.05000,0.200 | |
| 25,shared_mlp.output_linear,0.0000373538,0.05000,0.484 | |
| 26,mamba.in_proj,0.0003227580,0.05000,0.202 | |
| 26,mamba.out_proj,0.0000455537,0.05000,0.364 | |
| 26,shared_mlp.input_linear,0.0002892227,0.05000,0.198 | |
| 26,shared_mlp.output_linear,0.0000530557,0.05000,0.521 | |
| 27,self_attn.v_proj,0.0006113269,0.05000,0.535 | |
| 27,self_attn.q_proj,0.0005311922,0.05000,0.576 | |
| 27,self_attn.k_proj,0.0001448675,0.05000,0.588 | |
| 27,self_attn.o_proj,0.0000536515,0.05000,0.189 | |
| 27,shared_mlp.input_linear,0.0003787509,0.05000,0.190 | |
| 27,shared_mlp.output_linear,0.0001093897,0.05000,0.516 | |
| 28,mamba.in_proj,0.0019077307,0.05000,0.193 | |
| 28,mamba.out_proj,0.0001343735,0.05000,0.371 | |
| 28,shared_mlp.input_linear,0.0002967288,0.05000,0.192 | |
| 28,shared_mlp.output_linear,0.0000957567,0.05000,0.513 | |
| 29,mamba.in_proj,0.0001653794,0.05000,0.190 | |
| 29,mamba.out_proj,0.0000795585,0.05000,0.366 | |
| 29,shared_mlp.input_linear,0.0002558183,0.05000,0.192 | |
| 29,shared_mlp.output_linear,0.0000595038,0.05000,0.520 | |
| 30,mamba.in_proj,0.0000580291,0.05000,0.190 | |
| 30,mamba.out_proj,0.0000578531,0.05000,0.372 | |
| 30,shared_mlp.input_linear,0.0002475183,0.05000,0.193 | |
| 30,shared_mlp.output_linear,0.0000483499,0.05000,0.514 | |
| 31,mamba.in_proj,0.0000680402,0.05000,0.198 | |
| 31,mamba.out_proj,0.0000516371,0.05000,0.372 | |
| 31,shared_mlp.input_linear,0.0003959683,0.05000,0.197 | |
| 31,shared_mlp.output_linear,0.0002103811,0.05000,0.511 | |