| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.998681608437706, | |
| "eval_steps": 500, | |
| "global_step": 7583, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0065919578114700065, | |
| "grad_norm": 11.249381065368652, | |
| "learning_rate": 5.263157894736842e-06, | |
| "loss": 1.324, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.013183915622940013, | |
| "grad_norm": 8.759961128234863, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 1.1761, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01977587343441002, | |
| "grad_norm": 2.233778953552246, | |
| "learning_rate": 1.5789473684210526e-05, | |
| "loss": 0.6049, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.026367831245880026, | |
| "grad_norm": 3.505425453186035, | |
| "learning_rate": 2.105263157894737e-05, | |
| "loss": 0.3419, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03295978905735003, | |
| "grad_norm": 1.7327282428741455, | |
| "learning_rate": 2.6315789473684212e-05, | |
| "loss": 0.2418, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03955174686882004, | |
| "grad_norm": 1.4897282123565674, | |
| "learning_rate": 3.157894736842105e-05, | |
| "loss": 0.2017, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04614370468029005, | |
| "grad_norm": 0.8921314477920532, | |
| "learning_rate": 3.6842105263157895e-05, | |
| "loss": 0.1668, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05273566249176005, | |
| "grad_norm": 1.3826392889022827, | |
| "learning_rate": 4.210526315789474e-05, | |
| "loss": 0.1532, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05932762030323006, | |
| "grad_norm": 1.489062786102295, | |
| "learning_rate": 4.736842105263158e-05, | |
| "loss": 0.1084, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06591957811470006, | |
| "grad_norm": 1.280565619468689, | |
| "learning_rate": 5.2631578947368424e-05, | |
| "loss": 0.1128, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07251153592617007, | |
| "grad_norm": 1.2948462963104248, | |
| "learning_rate": 5.789473684210527e-05, | |
| "loss": 0.1044, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07910349373764008, | |
| "grad_norm": 1.5762895345687866, | |
| "learning_rate": 6.31578947368421e-05, | |
| "loss": 0.1034, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08569545154911008, | |
| "grad_norm": 1.0561785697937012, | |
| "learning_rate": 6.842105263157895e-05, | |
| "loss": 0.0798, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0922874093605801, | |
| "grad_norm": 0.9102309346199036, | |
| "learning_rate": 7.368421052631579e-05, | |
| "loss": 0.0752, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09887936717205009, | |
| "grad_norm": 1.4243663549423218, | |
| "learning_rate": 7.894736842105263e-05, | |
| "loss": 0.0863, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1054713249835201, | |
| "grad_norm": 0.7150789499282837, | |
| "learning_rate": 8.421052631578948e-05, | |
| "loss": 0.0778, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.11206328279499012, | |
| "grad_norm": 0.9231832027435303, | |
| "learning_rate": 8.947368421052632e-05, | |
| "loss": 0.0796, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.11865524060646011, | |
| "grad_norm": 0.5305670499801636, | |
| "learning_rate": 9.473684210526316e-05, | |
| "loss": 0.0733, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.12524719841793014, | |
| "grad_norm": 1.0431275367736816, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0713, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.13183915622940012, | |
| "grad_norm": 1.0667047500610352, | |
| "learning_rate": 0.00010526315789473685, | |
| "loss": 0.0738, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13843111404087013, | |
| "grad_norm": 0.9431530833244324, | |
| "learning_rate": 0.0001105263157894737, | |
| "loss": 0.0695, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.14502307185234015, | |
| "grad_norm": 1.231911063194275, | |
| "learning_rate": 0.00011578947368421053, | |
| "loss": 0.0707, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.15161502966381016, | |
| "grad_norm": 0.5772905945777893, | |
| "learning_rate": 0.00012105263157894738, | |
| "loss": 0.0642, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.15820698747528017, | |
| "grad_norm": 0.6241514086723328, | |
| "learning_rate": 0.0001263157894736842, | |
| "loss": 0.0621, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.16479894528675015, | |
| "grad_norm": 0.7449037432670593, | |
| "learning_rate": 0.00013157894736842108, | |
| "loss": 0.0639, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.17139090309822017, | |
| "grad_norm": 0.9040747880935669, | |
| "learning_rate": 0.0001368421052631579, | |
| "loss": 0.0595, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.17798286090969018, | |
| "grad_norm": 0.6246598958969116, | |
| "learning_rate": 0.00014210526315789474, | |
| "loss": 0.0612, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1845748187211602, | |
| "grad_norm": 0.6300843358039856, | |
| "learning_rate": 0.00014736842105263158, | |
| "loss": 0.0574, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.1911667765326302, | |
| "grad_norm": 0.7051455974578857, | |
| "learning_rate": 0.00015263157894736845, | |
| "loss": 0.0489, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.19775873434410018, | |
| "grad_norm": 0.8903814554214478, | |
| "learning_rate": 0.00015789473684210527, | |
| "loss": 0.0588, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2043506921555702, | |
| "grad_norm": 0.8815051317214966, | |
| "learning_rate": 0.0001631578947368421, | |
| "loss": 0.0605, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.2109426499670402, | |
| "grad_norm": 0.7266796231269836, | |
| "learning_rate": 0.00016842105263157895, | |
| "loss": 0.0555, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.21753460777851022, | |
| "grad_norm": 1.033163070678711, | |
| "learning_rate": 0.0001736842105263158, | |
| "loss": 0.056, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.22412656558998023, | |
| "grad_norm": 1.339528203010559, | |
| "learning_rate": 0.00017894736842105264, | |
| "loss": 0.0513, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.23071852340145024, | |
| "grad_norm": 1.1713142395019531, | |
| "learning_rate": 0.00018421052631578948, | |
| "loss": 0.0604, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.23731048121292023, | |
| "grad_norm": 0.7305978536605835, | |
| "learning_rate": 0.00018947368421052632, | |
| "loss": 0.061, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 0.6867638826370239, | |
| "learning_rate": 0.00019473684210526317, | |
| "loss": 0.0446, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2504943968358603, | |
| "grad_norm": 0.480622798204422, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0507, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.25708635464733026, | |
| "grad_norm": 0.6892393827438354, | |
| "learning_rate": 0.00019999904886484996, | |
| "loss": 0.0562, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.26367831245880025, | |
| "grad_norm": 0.8014799952507019, | |
| "learning_rate": 0.00019999619547749294, | |
| "loss": 0.0407, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 0.8931164741516113, | |
| "learning_rate": 0.0001999914398922081, | |
| "loss": 0.0488, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.27686222808174027, | |
| "grad_norm": 0.5557290315628052, | |
| "learning_rate": 0.00019998478219945958, | |
| "loss": 0.0533, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2834541858932103, | |
| "grad_norm": 0.9810464978218079, | |
| "learning_rate": 0.00019997622252589464, | |
| "loss": 0.052, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2900461437046803, | |
| "grad_norm": 0.6797704696655273, | |
| "learning_rate": 0.00019996576103434137, | |
| "loss": 0.0514, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2966381015161503, | |
| "grad_norm": 1.141650915145874, | |
| "learning_rate": 0.0001999533979238057, | |
| "loss": 0.0489, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3032300593276203, | |
| "grad_norm": 0.6689559817314148, | |
| "learning_rate": 0.00019993913342946734, | |
| "loss": 0.0441, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3098220171390903, | |
| "grad_norm": 0.524917721748352, | |
| "learning_rate": 0.0001999229678226756, | |
| "loss": 0.0457, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.31641397495056034, | |
| "grad_norm": 0.7408258318901062, | |
| "learning_rate": 0.00019990490141094392, | |
| "loss": 0.0428, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3230059327620303, | |
| "grad_norm": 0.5927634835243225, | |
| "learning_rate": 0.0001998849345379444, | |
| "loss": 0.0431, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3295978905735003, | |
| "grad_norm": 0.4574936628341675, | |
| "learning_rate": 0.00019986306758350083, | |
| "loss": 0.038, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.33618984838497035, | |
| "grad_norm": 0.6031479835510254, | |
| "learning_rate": 0.00019983930096358188, | |
| "loss": 0.0442, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.34278180619644033, | |
| "grad_norm": 0.4019775688648224, | |
| "learning_rate": 0.00019981363513029283, | |
| "loss": 0.0336, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.34937376400791037, | |
| "grad_norm": 0.6691102981567383, | |
| "learning_rate": 0.00019978607057186725, | |
| "loss": 0.0387, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.35596572181938035, | |
| "grad_norm": 0.39324843883514404, | |
| "learning_rate": 0.00019975660781265753, | |
| "loss": 0.0449, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.36255767963085034, | |
| "grad_norm": 0.5069633722305298, | |
| "learning_rate": 0.00019972524741312497, | |
| "loss": 0.0319, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3691496374423204, | |
| "grad_norm": 0.5699636936187744, | |
| "learning_rate": 0.00019969198996982917, | |
| "loss": 0.0402, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.37574159525379036, | |
| "grad_norm": 1.0686895847320557, | |
| "learning_rate": 0.00019965683611541655, | |
| "loss": 0.0542, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3823335530652604, | |
| "grad_norm": 0.4853604733943939, | |
| "learning_rate": 0.00019961978651860854, | |
| "loss": 0.0476, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3889255108767304, | |
| "grad_norm": 0.8250619173049927, | |
| "learning_rate": 0.0001995808418841885, | |
| "loss": 0.034, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.39551746868820037, | |
| "grad_norm": 0.6085853576660156, | |
| "learning_rate": 0.00019954000295298871, | |
| "loss": 0.0389, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4021094264996704, | |
| "grad_norm": 3.688549041748047, | |
| "learning_rate": 0.000199497270501876, | |
| "loss": 0.0511, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4087013843111404, | |
| "grad_norm": 1.5635132789611816, | |
| "learning_rate": 0.00019945264534373714, | |
| "loss": 0.1116, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.41529334212261043, | |
| "grad_norm": 0.7884135246276855, | |
| "learning_rate": 0.00019940612832746322, | |
| "loss": 0.0737, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4218852999340804, | |
| "grad_norm": 0.9017935395240784, | |
| "learning_rate": 0.0001993577203379336, | |
| "loss": 0.0789, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.42847725774555045, | |
| "grad_norm": 0.8649272918701172, | |
| "learning_rate": 0.00019930742229599914, | |
| "loss": 0.0728, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.43506921555702044, | |
| "grad_norm": 0.772191047668457, | |
| "learning_rate": 0.00019925523515846455, | |
| "loss": 0.0697, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.4416611733684904, | |
| "grad_norm": 0.5265079140663147, | |
| "learning_rate": 0.00019920115991807022, | |
| "loss": 0.0622, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.44825313117996046, | |
| "grad_norm": 0.8318515419960022, | |
| "learning_rate": 0.0001991451976034734, | |
| "loss": 0.0786, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.45484508899143045, | |
| "grad_norm": 0.7197186946868896, | |
| "learning_rate": 0.0001990873492792286, | |
| "loss": 0.059, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.4614370468029005, | |
| "grad_norm": 0.9418641328811646, | |
| "learning_rate": 0.00019902761604576725, | |
| "loss": 0.078, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.46802900461437047, | |
| "grad_norm": 0.7985256314277649, | |
| "learning_rate": 0.00019896599903937697, | |
| "loss": 0.0834, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.47462096242584045, | |
| "grad_norm": 0.6049144268035889, | |
| "learning_rate": 0.00019890249943217976, | |
| "loss": 0.0656, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4812129202373105, | |
| "grad_norm": 0.6395105719566345, | |
| "learning_rate": 0.0001988371184321098, | |
| "loss": 0.0764, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 0.58722984790802, | |
| "learning_rate": 0.00019876985728289038, | |
| "loss": 0.0588, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.4943968358602505, | |
| "grad_norm": 0.4679464101791382, | |
| "learning_rate": 0.00019870071726401043, | |
| "loss": 0.0638, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5009887936717206, | |
| "grad_norm": 0.509775698184967, | |
| "learning_rate": 0.00019862969969069996, | |
| "loss": 0.0602, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5075807514831905, | |
| "grad_norm": 0.8126184344291687, | |
| "learning_rate": 0.00019855680591390518, | |
| "loss": 0.069, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5141727092946605, | |
| "grad_norm": 0.7676377892494202, | |
| "learning_rate": 0.00019848203732026275, | |
| "loss": 0.0704, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5207646671061306, | |
| "grad_norm": 1.0301965475082397, | |
| "learning_rate": 0.00019840539533207344, | |
| "loss": 0.0666, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5273566249176005, | |
| "grad_norm": 0.6810826063156128, | |
| "learning_rate": 0.000198326881407275, | |
| "loss": 0.0698, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5339485827290705, | |
| "grad_norm": 0.4939572513103485, | |
| "learning_rate": 0.00019824649703941455, | |
| "loss": 0.0548, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.6614457964897156, | |
| "learning_rate": 0.00019816424375762001, | |
| "loss": 0.0748, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5471324983520105, | |
| "grad_norm": 0.7715848088264465, | |
| "learning_rate": 0.00019808012312657114, | |
| "loss": 0.0653, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5537244561634805, | |
| "grad_norm": 0.5254570245742798, | |
| "learning_rate": 0.00019799413674646973, | |
| "loss": 0.0537, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5603164139749506, | |
| "grad_norm": 0.7626491785049438, | |
| "learning_rate": 0.0001979062862530091, | |
| "loss": 0.0599, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5669083717864206, | |
| "grad_norm": 0.6767850518226624, | |
| "learning_rate": 0.00019781657331734316, | |
| "loss": 0.0644, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5735003295978905, | |
| "grad_norm": 0.4016531705856323, | |
| "learning_rate": 0.0001977249996460544, | |
| "loss": 0.0543, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5800922874093606, | |
| "grad_norm": 1.0104889869689941, | |
| "learning_rate": 0.0001976315669811216, | |
| "loss": 0.0681, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5866842452208306, | |
| "grad_norm": 0.7674484252929688, | |
| "learning_rate": 0.00019753627709988658, | |
| "loss": 0.0562, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5932762030323006, | |
| "grad_norm": 1.2781016826629639, | |
| "learning_rate": 0.00019743913181502048, | |
| "loss": 0.0602, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5998681608437706, | |
| "grad_norm": 0.5540818572044373, | |
| "learning_rate": 0.00019734013297448914, | |
| "loss": 0.0631, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.6064601186552406, | |
| "grad_norm": 0.7823266386985779, | |
| "learning_rate": 0.00019723928246151814, | |
| "loss": 0.0637, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.6130520764667106, | |
| "grad_norm": 0.6756680607795715, | |
| "learning_rate": 0.00019713658219455685, | |
| "loss": 0.0684, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.6196440342781806, | |
| "grad_norm": 0.8224459290504456, | |
| "learning_rate": 0.0001970320341272419, | |
| "loss": 0.0512, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6262359920896506, | |
| "grad_norm": 0.8429596424102783, | |
| "learning_rate": 0.00019692564024836016, | |
| "loss": 0.0516, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6328279499011207, | |
| "grad_norm": 0.7025866508483887, | |
| "learning_rate": 0.0001968174025818108, | |
| "loss": 0.0667, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6394199077125906, | |
| "grad_norm": 0.624162495136261, | |
| "learning_rate": 0.00019670732318656677, | |
| "loss": 0.0575, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6460118655240606, | |
| "grad_norm": 0.5887486338615417, | |
| "learning_rate": 0.00019659540415663571, | |
| "loss": 0.0488, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6526038233355307, | |
| "grad_norm": 0.45346468687057495, | |
| "learning_rate": 0.00019648164762102013, | |
| "loss": 0.0483, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6591957811470006, | |
| "grad_norm": 0.6038155555725098, | |
| "learning_rate": 0.0001963660557436768, | |
| "loss": 0.054, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6657877389584707, | |
| "grad_norm": 0.5043258666992188, | |
| "learning_rate": 0.00019624863072347564, | |
| "loss": 0.0631, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.6723796967699407, | |
| "grad_norm": 0.6452742218971252, | |
| "learning_rate": 0.000196129374794158, | |
| "loss": 0.0551, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.6789716545814107, | |
| "grad_norm": 0.6438404321670532, | |
| "learning_rate": 0.0001960082902242939, | |
| "loss": 0.0501, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.6855636123928807, | |
| "grad_norm": 0.8768063187599182, | |
| "learning_rate": 0.00019588537931723927, | |
| "loss": 0.0516, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.6921555702043507, | |
| "grad_norm": 0.767848014831543, | |
| "learning_rate": 0.00019576064441109172, | |
| "loss": 0.0501, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6987475280158207, | |
| "grad_norm": 0.6131387948989868, | |
| "learning_rate": 0.00019563408787864634, | |
| "loss": 0.0595, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.7053394858272907, | |
| "grad_norm": 0.4806978404521942, | |
| "learning_rate": 0.00019550571212735048, | |
| "loss": 0.0475, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.7119314436387607, | |
| "grad_norm": 0.4950248897075653, | |
| "learning_rate": 0.00019537551959925787, | |
| "loss": 0.048, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.7185234014502307, | |
| "grad_norm": 0.5537814497947693, | |
| "learning_rate": 0.0001952435127709824, | |
| "loss": 0.046, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.7251153592617007, | |
| "grad_norm": 0.4151875078678131, | |
| "learning_rate": 0.00019510969415365063, | |
| "loss": 0.0429, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 0.42159780859947205, | |
| "learning_rate": 0.0001949740662928545, | |
| "loss": 0.0434, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7382992748846408, | |
| "grad_norm": 0.454226016998291, | |
| "learning_rate": 0.00019483663176860248, | |
| "loss": 0.0421, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7448912326961108, | |
| "grad_norm": 0.37481585144996643, | |
| "learning_rate": 0.00019469739319527064, | |
| "loss": 0.043, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7514831905075807, | |
| "grad_norm": 0.6487095952033997, | |
| "learning_rate": 0.00019455635322155313, | |
| "loss": 0.0433, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7580751483190508, | |
| "grad_norm": 0.44085580110549927, | |
| "learning_rate": 0.00019441351453041138, | |
| "loss": 0.0492, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7646671061305208, | |
| "grad_norm": 0.49984055757522583, | |
| "learning_rate": 0.00019426887983902343, | |
| "loss": 0.0431, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.7712590639419907, | |
| "grad_norm": 0.5114363431930542, | |
| "learning_rate": 0.00019412245189873203, | |
| "loss": 0.0448, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.7778510217534608, | |
| "grad_norm": 0.5482351779937744, | |
| "learning_rate": 0.00019397423349499246, | |
| "loss": 0.0481, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.7844429795649308, | |
| "grad_norm": 0.7064313888549805, | |
| "learning_rate": 0.00019382422744731933, | |
| "loss": 0.0476, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.7910349373764007, | |
| "grad_norm": 0.5201088190078735, | |
| "learning_rate": 0.0001936724366092332, | |
| "loss": 0.0596, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7976268951878708, | |
| "grad_norm": 0.794978678226471, | |
| "learning_rate": 0.000193518863868206, | |
| "loss": 0.0484, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.8042188529993408, | |
| "grad_norm": 0.5086749196052551, | |
| "learning_rate": 0.00019336351214560647, | |
| "loss": 0.0482, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 0.5501623749732971, | |
| "learning_rate": 0.00019320638439664426, | |
| "loss": 0.0417, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.8174027686222808, | |
| "grad_norm": 0.4340960383415222, | |
| "learning_rate": 0.0001930474836103138, | |
| "loss": 0.0406, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8239947264337508, | |
| "grad_norm": 0.5098422169685364, | |
| "learning_rate": 0.00019288681280933768, | |
| "loss": 0.0485, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8305866842452209, | |
| "grad_norm": 0.4968768358230591, | |
| "learning_rate": 0.00019272437505010877, | |
| "loss": 0.0412, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.8371786420566908, | |
| "grad_norm": 0.46997663378715515, | |
| "learning_rate": 0.00019256017342263228, | |
| "loss": 0.0388, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.8437705998681608, | |
| "grad_norm": 0.5510318279266357, | |
| "learning_rate": 0.00019239421105046706, | |
| "loss": 0.056, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.8503625576796309, | |
| "grad_norm": 0.47607627511024475, | |
| "learning_rate": 0.000192226491090666, | |
| "loss": 0.0462, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.8569545154911009, | |
| "grad_norm": 0.4591579735279083, | |
| "learning_rate": 0.00019205701673371606, | |
| "loss": 0.0456, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8635464733025708, | |
| "grad_norm": 0.45051664113998413, | |
| "learning_rate": 0.00019188579120347766, | |
| "loss": 0.0402, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.8701384311140409, | |
| "grad_norm": 0.3680923283100128, | |
| "learning_rate": 0.00019171281775712316, | |
| "loss": 0.0378, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.8767303889255109, | |
| "grad_norm": 0.4515272080898285, | |
| "learning_rate": 0.00019153809968507505, | |
| "loss": 0.0439, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.8833223467369808, | |
| "grad_norm": 0.5114394426345825, | |
| "learning_rate": 0.00019136164031094337, | |
| "loss": 0.0522, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.8899143045484509, | |
| "grad_norm": 0.6060967445373535, | |
| "learning_rate": 0.00019118344299146235, | |
| "loss": 0.04, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8965062623599209, | |
| "grad_norm": 0.7507016658782959, | |
| "learning_rate": 0.00019100351111642666, | |
| "loss": 0.0557, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.9030982201713909, | |
| "grad_norm": 0.4493657648563385, | |
| "learning_rate": 0.00019082184810862698, | |
| "loss": 0.0424, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.9096901779828609, | |
| "grad_norm": 0.5429974794387817, | |
| "learning_rate": 0.00019063845742378467, | |
| "loss": 0.0441, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.9162821357943309, | |
| "grad_norm": 0.43085166811943054, | |
| "learning_rate": 0.00019045334255048634, | |
| "loss": 0.046, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.922874093605801, | |
| "grad_norm": 0.41755935549736023, | |
| "learning_rate": 0.0001902665070101172, | |
| "loss": 0.0461, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9294660514172709, | |
| "grad_norm": 0.44052428007125854, | |
| "learning_rate": 0.00019007795435679428, | |
| "loss": 0.052, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.9360580092287409, | |
| "grad_norm": 0.4310389757156372, | |
| "learning_rate": 0.00018988768817729864, | |
| "loss": 0.0442, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.942649967040211, | |
| "grad_norm": 0.3892590403556824, | |
| "learning_rate": 0.0001896957120910074, | |
| "loss": 0.0416, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.9492419248516809, | |
| "grad_norm": 0.7788804769515991, | |
| "learning_rate": 0.00018950202974982454, | |
| "loss": 0.0339, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.955833882663151, | |
| "grad_norm": 0.5524693727493286, | |
| "learning_rate": 0.00018930664483811173, | |
| "loss": 0.045, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.962425840474621, | |
| "grad_norm": 0.41249391436576843, | |
| "learning_rate": 0.00018910956107261816, | |
| "loss": 0.0381, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.9690177982860909, | |
| "grad_norm": 0.3245869576931, | |
| "learning_rate": 0.00018891078220240973, | |
| "loss": 0.0277, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 0.28615134954452515, | |
| "learning_rate": 0.0001887103120087979, | |
| "loss": 0.0365, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.982201713909031, | |
| "grad_norm": 0.32258233428001404, | |
| "learning_rate": 0.00018850815430526758, | |
| "loss": 0.0339, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.988793671720501, | |
| "grad_norm": 0.4749410152435303, | |
| "learning_rate": 0.00018830431293740473, | |
| "loss": 0.0414, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.995385629531971, | |
| "grad_norm": 0.44143855571746826, | |
| "learning_rate": 0.00018809879178282313, | |
| "loss": 0.0288, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.0019775873434411, | |
| "grad_norm": 0.4565713107585907, | |
| "learning_rate": 0.00018789159475109067, | |
| "loss": 0.0343, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.008569545154911, | |
| "grad_norm": 0.5609179735183716, | |
| "learning_rate": 0.000187682725783655, | |
| "loss": 0.0423, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.015161502966381, | |
| "grad_norm": 0.4169975221157074, | |
| "learning_rate": 0.00018747218885376842, | |
| "loss": 0.0341, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.0217534607778511, | |
| "grad_norm": 0.44291096925735474, | |
| "learning_rate": 0.0001872599879664124, | |
| "loss": 0.0435, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.028345418589321, | |
| "grad_norm": 0.31878435611724854, | |
| "learning_rate": 0.00018704612715822144, | |
| "loss": 0.0402, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.034937376400791, | |
| "grad_norm": 0.4876072406768799, | |
| "learning_rate": 0.0001868306104974061, | |
| "loss": 0.0298, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.0415293342122611, | |
| "grad_norm": 0.4452480375766754, | |
| "learning_rate": 0.0001866134420836759, | |
| "loss": 0.042, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.048121292023731, | |
| "grad_norm": 0.5295068025588989, | |
| "learning_rate": 0.00018639462604816103, | |
| "loss": 0.0408, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.054713249835201, | |
| "grad_norm": 0.349461168050766, | |
| "learning_rate": 0.00018617416655333395, | |
| "loss": 0.037, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.0613052076466711, | |
| "grad_norm": 0.39832666516304016, | |
| "learning_rate": 0.00018595206779293015, | |
| "loss": 0.0406, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.067897165458141, | |
| "grad_norm": 0.5740079283714294, | |
| "learning_rate": 0.00018572833399186836, | |
| "loss": 0.0411, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.074489123269611, | |
| "grad_norm": 0.20162849128246307, | |
| "learning_rate": 0.00018550296940617034, | |
| "loss": 0.0333, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 0.40781688690185547, | |
| "learning_rate": 0.00018527597832287954, | |
| "loss": 0.036, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.087673038892551, | |
| "grad_norm": 0.2796386182308197, | |
| "learning_rate": 0.00018504736505997997, | |
| "loss": 0.0313, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.094264996704021, | |
| "grad_norm": 0.6502156853675842, | |
| "learning_rate": 0.00018481713396631383, | |
| "loss": 0.0428, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.1008569545154911, | |
| "grad_norm": 0.3565762937068939, | |
| "learning_rate": 0.00018458528942149886, | |
| "loss": 0.0363, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.107448912326961, | |
| "grad_norm": 0.2560652792453766, | |
| "learning_rate": 0.00018435183583584498, | |
| "loss": 0.0404, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.1140408701384312, | |
| "grad_norm": 0.4972442388534546, | |
| "learning_rate": 0.00018411677765027036, | |
| "loss": 0.053, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.1206328279499012, | |
| "grad_norm": 0.36633139848709106, | |
| "learning_rate": 0.0001838801193362171, | |
| "loss": 0.0363, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.127224785761371, | |
| "grad_norm": 0.4480843245983124, | |
| "learning_rate": 0.000183641865395566, | |
| "loss": 0.031, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.133816743572841, | |
| "grad_norm": 0.42788198590278625, | |
| "learning_rate": 0.00018340202036055102, | |
| "loss": 0.0408, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.1404087013843112, | |
| "grad_norm": 0.3363877534866333, | |
| "learning_rate": 0.00018316058879367303, | |
| "loss": 0.0431, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.147000659195781, | |
| "grad_norm": 0.48484691977500916, | |
| "learning_rate": 0.000182917575287613, | |
| "loss": 0.0497, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.1535926170072512, | |
| "grad_norm": 0.4944576025009155, | |
| "learning_rate": 0.00018267298446514473, | |
| "loss": 0.0381, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.1601845748187212, | |
| "grad_norm": 0.31334227323532104, | |
| "learning_rate": 0.00018242682097904673, | |
| "loss": 0.0374, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.166776532630191, | |
| "grad_norm": 0.4245593845844269, | |
| "learning_rate": 0.00018217908951201394, | |
| "loss": 0.0384, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.1733684904416612, | |
| "grad_norm": 0.3156047463417053, | |
| "learning_rate": 0.00018192979477656845, | |
| "loss": 0.0375, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.1799604482531312, | |
| "grad_norm": 0.38936617970466614, | |
| "learning_rate": 0.00018167894151497, | |
| "loss": 0.0383, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.186552406064601, | |
| "grad_norm": 0.39287203550338745, | |
| "learning_rate": 0.00018142653449912564, | |
| "loss": 0.0384, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.1931443638760713, | |
| "grad_norm": 0.4132576882839203, | |
| "learning_rate": 0.0001811725785304991, | |
| "loss": 0.0333, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.1997363216875412, | |
| "grad_norm": 0.42320823669433594, | |
| "learning_rate": 0.00018091707844001935, | |
| "loss": 0.0282, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.2063282794990111, | |
| "grad_norm": 0.4071812927722931, | |
| "learning_rate": 0.00018066003908798873, | |
| "loss": 0.0315, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.2129202373104813, | |
| "grad_norm": 0.40392544865608215, | |
| "learning_rate": 0.0001804014653639904, | |
| "loss": 0.0331, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.2195121951219512, | |
| "grad_norm": 0.4608232080936432, | |
| "learning_rate": 0.00018014136218679567, | |
| "loss": 0.0327, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.2261041529334213, | |
| "grad_norm": 0.5048249959945679, | |
| "learning_rate": 0.00017987973450426994, | |
| "loss": 0.0334, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.2326961107448913, | |
| "grad_norm": 0.5134670734405518, | |
| "learning_rate": 0.0001796165872932789, | |
| "loss": 0.0361, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.2392880685563612, | |
| "grad_norm": 0.339224249124527, | |
| "learning_rate": 0.00017935192555959385, | |
| "loss": 0.0336, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.2458800263678311, | |
| "grad_norm": 0.5917630195617676, | |
| "learning_rate": 0.0001790857543377963, | |
| "loss": 0.0447, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.2524719841793013, | |
| "grad_norm": 0.641945481300354, | |
| "learning_rate": 0.00017881807869118234, | |
| "loss": 0.0546, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.2590639419907712, | |
| "grad_norm": 0.4399726986885071, | |
| "learning_rate": 0.00017854890371166637, | |
| "loss": 0.0358, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.2656558998022414, | |
| "grad_norm": 0.32603511214256287, | |
| "learning_rate": 0.00017827823451968398, | |
| "loss": 0.0342, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.2722478576137113, | |
| "grad_norm": 0.659220814704895, | |
| "learning_rate": 0.0001780060762640949, | |
| "loss": 0.039, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.2788398154251812, | |
| "grad_norm": 0.4240771234035492, | |
| "learning_rate": 0.00017773243412208474, | |
| "loss": 0.035, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.2854317732366514, | |
| "grad_norm": 0.4172196090221405, | |
| "learning_rate": 0.0001774573132990667, | |
| "loss": 0.0379, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.2920237310481213, | |
| "grad_norm": 0.42398178577423096, | |
| "learning_rate": 0.00017718071902858256, | |
| "loss": 0.0373, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.2986156888595912, | |
| "grad_norm": 0.5154095888137817, | |
| "learning_rate": 0.00017690265657220288, | |
| "loss": 0.0403, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.3052076466710614, | |
| "grad_norm": 0.396801233291626, | |
| "learning_rate": 0.00017662313121942727, | |
| "loss": 0.0391, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.3117996044825313, | |
| "grad_norm": 0.4826532006263733, | |
| "learning_rate": 0.00017634214828758342, | |
| "loss": 0.0297, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.3183915622940012, | |
| "grad_norm": 0.508990466594696, | |
| "learning_rate": 0.00017605971312172622, | |
| "loss": 0.0378, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3249835201054714, | |
| "grad_norm": 0.3308925926685333, | |
| "learning_rate": 0.000175775831094536, | |
| "loss": 0.0379, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.3315754779169413, | |
| "grad_norm": 0.4720020294189453, | |
| "learning_rate": 0.00017549050760621614, | |
| "loss": 0.0392, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.3381674357284115, | |
| "grad_norm": 0.6246912479400635, | |
| "learning_rate": 0.00017520374808439076, | |
| "loss": 0.0363, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.3447593935398814, | |
| "grad_norm": 0.33079174160957336, | |
| "learning_rate": 0.00017491555798400095, | |
| "loss": 0.0316, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "grad_norm": 0.2520120143890381, | |
| "learning_rate": 0.00017462594278720145, | |
| "loss": 0.0325, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.3579433091628212, | |
| "grad_norm": 0.23862145841121674, | |
| "learning_rate": 0.00017433490800325614, | |
| "loss": 0.0351, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.3645352669742914, | |
| "grad_norm": 0.3477911353111267, | |
| "learning_rate": 0.00017404245916843324, | |
| "loss": 0.0389, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.3711272247857613, | |
| "grad_norm": 0.5003520846366882, | |
| "learning_rate": 0.00017374860184590015, | |
| "loss": 0.0368, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.3777191825972315, | |
| "grad_norm": 0.3755623698234558, | |
| "learning_rate": 0.00017345334162561734, | |
| "loss": 0.0341, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.3843111404087014, | |
| "grad_norm": 0.5258712768554688, | |
| "learning_rate": 0.00017315668412423238, | |
| "loss": 0.0334, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.3909030982201713, | |
| "grad_norm": 0.567348062992096, | |
| "learning_rate": 0.0001728586349849728, | |
| "loss": 0.0366, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.3974950560316415, | |
| "grad_norm": 0.4541948139667511, | |
| "learning_rate": 0.00017255919987753878, | |
| "loss": 0.0503, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.4040870138431114, | |
| "grad_norm": 0.44722017645835876, | |
| "learning_rate": 0.0001722583844979955, | |
| "loss": 0.0433, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.4106789716545813, | |
| "grad_norm": 0.25077545642852783, | |
| "learning_rate": 0.0001719561945686646, | |
| "loss": 0.0345, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.4172709294660515, | |
| "grad_norm": 0.3619667887687683, | |
| "learning_rate": 0.00017165263583801535, | |
| "loss": 0.0325, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.4238628872775214, | |
| "grad_norm": 0.6268120408058167, | |
| "learning_rate": 0.0001713477140805553, | |
| "loss": 0.0364, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.4304548450889913, | |
| "grad_norm": 0.5806043148040771, | |
| "learning_rate": 0.0001710414350967204, | |
| "loss": 0.037, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.4370468029004615, | |
| "grad_norm": 0.3783499002456665, | |
| "learning_rate": 0.00017073380471276496, | |
| "loss": 0.0318, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.4436387607119314, | |
| "grad_norm": 0.45143669843673706, | |
| "learning_rate": 0.0001704248287806503, | |
| "loss": 0.0344, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.4502307185234016, | |
| "grad_norm": 0.3384231626987457, | |
| "learning_rate": 0.00017011451317793384, | |
| "loss": 0.0306, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.4568226763348715, | |
| "grad_norm": 0.45972728729248047, | |
| "learning_rate": 0.00016980286380765714, | |
| "loss": 0.0394, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.4634146341463414, | |
| "grad_norm": 0.31935372948646545, | |
| "learning_rate": 0.0001694898865982336, | |
| "loss": 0.0327, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.4700065919578114, | |
| "grad_norm": 0.3758127689361572, | |
| "learning_rate": 0.0001691755875033357, | |
| "loss": 0.0376, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.4765985497692815, | |
| "grad_norm": 0.7778825759887695, | |
| "learning_rate": 0.00016885997250178184, | |
| "loss": 0.0346, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.4831905075807514, | |
| "grad_norm": 0.7735721468925476, | |
| "learning_rate": 0.00016854304759742237, | |
| "loss": 0.038, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.4897824653922216, | |
| "grad_norm": 0.6678999662399292, | |
| "learning_rate": 0.00016822481881902568, | |
| "loss": 0.0488, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.4963744232036915, | |
| "grad_norm": 0.5145410895347595, | |
| "learning_rate": 0.00016790529222016328, | |
| "loss": 0.0423, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.5029663810151614, | |
| "grad_norm": 1.2216230630874634, | |
| "learning_rate": 0.00016758447387909474, | |
| "loss": 0.0435, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.5095583388266314, | |
| "grad_norm": 0.46562644839286804, | |
| "learning_rate": 0.00016726236989865213, | |
| "loss": 0.0329, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.5161502966381015, | |
| "grad_norm": 0.552429735660553, | |
| "learning_rate": 0.00016693898640612382, | |
| "loss": 0.041, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.5227422544495717, | |
| "grad_norm": 0.4718281328678131, | |
| "learning_rate": 0.00016661432955313789, | |
| "loss": 0.0317, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.5293342122610416, | |
| "grad_norm": 0.5447438955307007, | |
| "learning_rate": 0.00016628840551554522, | |
| "loss": 0.0365, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.5359261700725115, | |
| "grad_norm": 0.5384830236434937, | |
| "learning_rate": 0.00016596122049330206, | |
| "loss": 0.0365, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.5425181278839815, | |
| "grad_norm": 0.48313167691230774, | |
| "learning_rate": 0.0001656327807103518, | |
| "loss": 0.0381, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.5491100856954514, | |
| "grad_norm": 0.4898654520511627, | |
| "learning_rate": 0.000165303092414507, | |
| "loss": 0.0343, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.5557020435069215, | |
| "grad_norm": 0.47862598299980164, | |
| "learning_rate": 0.00016497216187733016, | |
| "loss": 0.0333, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.5622940013183917, | |
| "grad_norm": 0.4709709584712982, | |
| "learning_rate": 0.00016463999539401454, | |
| "loss": 0.0351, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.5688859591298616, | |
| "grad_norm": 0.5032598972320557, | |
| "learning_rate": 0.00016430659928326458, | |
| "loss": 0.0306, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.5754779169413315, | |
| "grad_norm": 0.9953115582466125, | |
| "learning_rate": 0.00016397197988717542, | |
| "loss": 0.0388, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.5820698747528015, | |
| "grad_norm": 0.5729079246520996, | |
| "learning_rate": 0.00016363614357111245, | |
| "loss": 0.0336, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.5886618325642716, | |
| "grad_norm": 0.8332236409187317, | |
| "learning_rate": 0.0001632990967235902, | |
| "loss": 0.0414, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.5952537903757416, | |
| "grad_norm": 1.0546754598617554, | |
| "learning_rate": 0.00016296084575615077, | |
| "loss": 0.0383, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.6018457481872117, | |
| "grad_norm": 0.546684205532074, | |
| "learning_rate": 0.0001626213971032418, | |
| "loss": 0.0382, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.6084377059986816, | |
| "grad_norm": 0.6224532723426819, | |
| "learning_rate": 0.00016228075722209422, | |
| "loss": 0.0379, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.6150296638101516, | |
| "grad_norm": 0.39089900255203247, | |
| "learning_rate": 0.00016193893259259934, | |
| "loss": 0.0364, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.6216216216216215, | |
| "grad_norm": 0.5209794044494629, | |
| "learning_rate": 0.00016159592971718548, | |
| "loss": 0.0329, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.6282135794330916, | |
| "grad_norm": 0.45939525961875916, | |
| "learning_rate": 0.0001612517551206946, | |
| "loss": 0.0316, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.6348055372445618, | |
| "grad_norm": 0.4331035614013672, | |
| "learning_rate": 0.00016090641535025774, | |
| "loss": 0.0424, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.6413974950560317, | |
| "grad_norm": 0.447710782289505, | |
| "learning_rate": 0.0001605599169751708, | |
| "loss": 0.0387, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.6479894528675016, | |
| "grad_norm": 0.4073365330696106, | |
| "learning_rate": 0.00016021226658676947, | |
| "loss": 0.0404, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.6545814106789716, | |
| "grad_norm": 0.36032500863075256, | |
| "learning_rate": 0.00015986347079830382, | |
| "loss": 0.0311, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.6611733684904415, | |
| "grad_norm": 0.23349802196025848, | |
| "learning_rate": 0.00015951353624481257, | |
| "loss": 0.0248, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.6677653263019117, | |
| "grad_norm": 0.3381997048854828, | |
| "learning_rate": 0.0001591624695829968, | |
| "loss": 0.0316, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.6743572841133818, | |
| "grad_norm": 0.39666473865509033, | |
| "learning_rate": 0.0001588102774910933, | |
| "loss": 0.0399, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.6809492419248517, | |
| "grad_norm": 0.38981807231903076, | |
| "learning_rate": 0.00015845696666874772, | |
| "loss": 0.0325, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.6875411997363217, | |
| "grad_norm": 0.614475667476654, | |
| "learning_rate": 0.00015810254383688682, | |
| "loss": 0.0386, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.6941331575477916, | |
| "grad_norm": 0.6012241244316101, | |
| "learning_rate": 0.0001577470157375909, | |
| "loss": 0.0426, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.7007251153592617, | |
| "grad_norm": 0.8984513878822327, | |
| "learning_rate": 0.00015739038913396546, | |
| "loss": 0.0385, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.7073170731707317, | |
| "grad_norm": 0.5758917331695557, | |
| "learning_rate": 0.00015703267081001237, | |
| "loss": 0.0327, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.7139090309822018, | |
| "grad_norm": 0.39728182554244995, | |
| "learning_rate": 0.00015667386757050106, | |
| "loss": 0.0359, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.7205009887936717, | |
| "grad_norm": 0.44694146513938904, | |
| "learning_rate": 0.00015631398624083907, | |
| "loss": 0.032, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.7270929466051417, | |
| "grad_norm": 0.5872260332107544, | |
| "learning_rate": 0.000155953033666942, | |
| "loss": 0.0307, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.7336849044166116, | |
| "grad_norm": 0.5661513209342957, | |
| "learning_rate": 0.00015559101671510349, | |
| "loss": 0.0326, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.7402768622280818, | |
| "grad_norm": 0.3842809796333313, | |
| "learning_rate": 0.00015522794227186443, | |
| "loss": 0.0326, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.746868820039552, | |
| "grad_norm": 0.24816927313804626, | |
| "learning_rate": 0.00015486381724388222, | |
| "loss": 0.0251, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.7534607778510218, | |
| "grad_norm": 0.2353767305612564, | |
| "learning_rate": 0.00015449864855779903, | |
| "loss": 0.0272, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.7600527356624918, | |
| "grad_norm": 0.25328564643859863, | |
| "learning_rate": 0.00015413244316011038, | |
| "loss": 0.0338, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.7666446934739617, | |
| "grad_norm": 0.37852951884269714, | |
| "learning_rate": 0.0001537652080170328, | |
| "loss": 0.0308, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.7732366512854316, | |
| "grad_norm": 0.294085294008255, | |
| "learning_rate": 0.00015339695011437127, | |
| "loss": 0.0236, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.7798286090969018, | |
| "grad_norm": 0.3499051034450531, | |
| "learning_rate": 0.00015302767645738655, | |
| "loss": 0.0305, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.786420566908372, | |
| "grad_norm": 0.4269741177558899, | |
| "learning_rate": 0.00015265739407066176, | |
| "loss": 0.0279, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.7930125247198418, | |
| "grad_norm": 0.3368455767631531, | |
| "learning_rate": 0.00015228610999796875, | |
| "loss": 0.0306, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.7996044825313118, | |
| "grad_norm": 0.36064472794532776, | |
| "learning_rate": 0.00015191383130213417, | |
| "loss": 0.0281, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.8061964403427817, | |
| "grad_norm": 0.42101433873176575, | |
| "learning_rate": 0.00015154056506490505, | |
| "loss": 0.0299, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.8127883981542519, | |
| "grad_norm": 0.3719172179698944, | |
| "learning_rate": 0.0001511663183868142, | |
| "loss": 0.0323, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.8193803559657218, | |
| "grad_norm": 0.3902226984500885, | |
| "learning_rate": 0.00015079109838704504, | |
| "loss": 0.0327, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.825972313777192, | |
| "grad_norm": 0.36405107378959656, | |
| "learning_rate": 0.00015041491220329616, | |
| "loss": 0.0278, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.8325642715886619, | |
| "grad_norm": 0.31391507387161255, | |
| "learning_rate": 0.0001500377669916456, | |
| "loss": 0.0325, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.8391562294001318, | |
| "grad_norm": 0.4089469611644745, | |
| "learning_rate": 0.0001496596699264147, | |
| "loss": 0.0253, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.8457481872116017, | |
| "grad_norm": 0.5822712779045105, | |
| "learning_rate": 0.00014928062820003166, | |
| "loss": 0.0337, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.8523401450230719, | |
| "grad_norm": 0.5532752275466919, | |
| "learning_rate": 0.00014890064902289466, | |
| "loss": 0.0316, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.858932102834542, | |
| "grad_norm": 0.39222195744514465, | |
| "learning_rate": 0.0001485197396232348, | |
| "loss": 0.0304, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.865524060646012, | |
| "grad_norm": 0.3746655285358429, | |
| "learning_rate": 0.00014813790724697832, | |
| "loss": 0.0361, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.8721160184574819, | |
| "grad_norm": 0.5020349621772766, | |
| "learning_rate": 0.0001477551591576092, | |
| "loss": 0.0351, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.8787079762689518, | |
| "grad_norm": 0.40259358286857605, | |
| "learning_rate": 0.00014737150263603063, | |
| "loss": 0.027, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.8852999340804217, | |
| "grad_norm": 0.6693785190582275, | |
| "learning_rate": 0.00014698694498042675, | |
| "loss": 0.0345, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.8918918918918919, | |
| "grad_norm": 0.6384851932525635, | |
| "learning_rate": 0.00014660149350612353, | |
| "loss": 0.0315, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.898483849703362, | |
| "grad_norm": 0.5224544405937195, | |
| "learning_rate": 0.00014621515554544997, | |
| "loss": 0.0259, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.905075807514832, | |
| "grad_norm": 0.5825631022453308, | |
| "learning_rate": 0.0001458279384475983, | |
| "loss": 0.0415, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.911667765326302, | |
| "grad_norm": 0.36511966586112976, | |
| "learning_rate": 0.0001454398495784844, | |
| "loss": 0.033, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.9182597231377718, | |
| "grad_norm": 0.4093778431415558, | |
| "learning_rate": 0.00014505089632060753, | |
| "loss": 0.0309, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.924851680949242, | |
| "grad_norm": 0.4290638566017151, | |
| "learning_rate": 0.00014466108607291003, | |
| "loss": 0.0309, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.931443638760712, | |
| "grad_norm": 0.6213640570640564, | |
| "learning_rate": 0.00014427042625063646, | |
| "loss": 0.0358, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.938035596572182, | |
| "grad_norm": 0.6244672536849976, | |
| "learning_rate": 0.00014387892428519258, | |
| "loss": 0.0387, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.944627554383652, | |
| "grad_norm": 0.380691796541214, | |
| "learning_rate": 0.000143486587624004, | |
| "loss": 0.0464, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.951219512195122, | |
| "grad_norm": 0.4133692979812622, | |
| "learning_rate": 0.00014309342373037455, | |
| "loss": 0.0329, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.9578114700065918, | |
| "grad_norm": 0.4502374529838562, | |
| "learning_rate": 0.00014269944008334418, | |
| "loss": 0.0334, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.964403427818062, | |
| "grad_norm": 0.5235921740531921, | |
| "learning_rate": 0.00014230464417754675, | |
| "loss": 0.033, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.9709953856295321, | |
| "grad_norm": 0.5345565676689148, | |
| "learning_rate": 0.00014190904352306757, | |
| "loss": 0.0371, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.977587343441002, | |
| "grad_norm": 0.34067875146865845, | |
| "learning_rate": 0.0001415126456453004, | |
| "loss": 0.0408, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.984179301252472, | |
| "grad_norm": 0.36922353506088257, | |
| "learning_rate": 0.00014111545808480434, | |
| "loss": 0.0315, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.990771259063942, | |
| "grad_norm": 0.36315643787384033, | |
| "learning_rate": 0.0001407174883971604, | |
| "loss": 0.0311, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.9973632168754119, | |
| "grad_norm": 0.35053545236587524, | |
| "learning_rate": 0.0001403187441528277, | |
| "loss": 0.0367, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.0039551746868822, | |
| "grad_norm": 0.5017916560173035, | |
| "learning_rate": 0.00013991923293699956, | |
| "loss": 0.0353, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.010547132498352, | |
| "grad_norm": 0.3657391667366028, | |
| "learning_rate": 0.00013951896234945925, | |
| "loss": 0.0404, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.017139090309822, | |
| "grad_norm": 0.5382429957389832, | |
| "learning_rate": 0.00013911794000443528, | |
| "loss": 0.0346, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.023731048121292, | |
| "grad_norm": 0.5115209221839905, | |
| "learning_rate": 0.0001387161735304566, | |
| "loss": 0.0288, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.030323005932762, | |
| "grad_norm": 0.5078955888748169, | |
| "learning_rate": 0.00013831367057020748, | |
| "loss": 0.0323, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.036914963744232, | |
| "grad_norm": 0.4034331440925598, | |
| "learning_rate": 0.00013791043878038224, | |
| "loss": 0.0397, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.0435069215557022, | |
| "grad_norm": 0.23669302463531494, | |
| "learning_rate": 0.0001375064858315394, | |
| "loss": 0.0314, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.050098879367172, | |
| "grad_norm": 0.3059588074684143, | |
| "learning_rate": 0.000137101819407956, | |
| "loss": 0.0276, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.056690837178642, | |
| "grad_norm": 0.5819403529167175, | |
| "learning_rate": 0.00013669644720748118, | |
| "loss": 0.0285, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.063282794990112, | |
| "grad_norm": 0.6815973520278931, | |
| "learning_rate": 0.00013629037694138995, | |
| "loss": 0.0329, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.069874752801582, | |
| "grad_norm": 0.28361934423446655, | |
| "learning_rate": 0.0001358836163342364, | |
| "loss": 0.0271, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.076466710613052, | |
| "grad_norm": 0.2907734513282776, | |
| "learning_rate": 0.00013547617312370663, | |
| "loss": 0.0309, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.0830586684245223, | |
| "grad_norm": 0.5272607207298279, | |
| "learning_rate": 0.00013506805506047198, | |
| "loss": 0.0308, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.089650626235992, | |
| "grad_norm": 0.23821255564689636, | |
| "learning_rate": 0.00013465926990804107, | |
| "loss": 0.0341, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.096242584047462, | |
| "grad_norm": 0.5370649099349976, | |
| "learning_rate": 0.00013424982544261248, | |
| "loss": 0.0316, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.102834541858932, | |
| "grad_norm": 0.3361760675907135, | |
| "learning_rate": 0.00013383972945292665, | |
| "loss": 0.0248, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.109426499670402, | |
| "grad_norm": 0.48819541931152344, | |
| "learning_rate": 0.00013342898974011774, | |
| "loss": 0.0347, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.1160184574818723, | |
| "grad_norm": 0.24430608749389648, | |
| "learning_rate": 0.00013301761411756543, | |
| "loss": 0.0269, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.1226104152933423, | |
| "grad_norm": 0.4588664770126343, | |
| "learning_rate": 0.00013260561041074598, | |
| "loss": 0.0276, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.129202373104812, | |
| "grad_norm": 0.5559895634651184, | |
| "learning_rate": 0.0001321929864570835, | |
| "loss": 0.0257, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.135794330916282, | |
| "grad_norm": 0.547458827495575, | |
| "learning_rate": 0.00013177975010580085, | |
| "loss": 0.0223, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.142386288727752, | |
| "grad_norm": 0.3017808198928833, | |
| "learning_rate": 0.00013136590921777053, | |
| "loss": 0.031, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.148978246539222, | |
| "grad_norm": 0.44043952226638794, | |
| "learning_rate": 0.00013095147166536486, | |
| "loss": 0.0276, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.1555702043506924, | |
| "grad_norm": 0.4227822422981262, | |
| "learning_rate": 0.0001305364453323062, | |
| "loss": 0.0296, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "grad_norm": 0.4026118516921997, | |
| "learning_rate": 0.0001301208381135173, | |
| "loss": 0.0301, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.168754119973632, | |
| "grad_norm": 0.5354869961738586, | |
| "learning_rate": 0.0001297046579149708, | |
| "loss": 0.0286, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.175346077785102, | |
| "grad_norm": 0.42211246490478516, | |
| "learning_rate": 0.00012928791265353902, | |
| "loss": 0.0336, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.181938035596572, | |
| "grad_norm": 0.3645992577075958, | |
| "learning_rate": 0.00012887061025684333, | |
| "loss": 0.0242, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.188529993408042, | |
| "grad_norm": 0.2105298638343811, | |
| "learning_rate": 0.00012845275866310324, | |
| "loss": 0.0228, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.1951219512195124, | |
| "grad_norm": 0.25215044617652893, | |
| "learning_rate": 0.00012803436582098558, | |
| "loss": 0.0243, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.2017139090309823, | |
| "grad_norm": 0.4196263253688812, | |
| "learning_rate": 0.00012761543968945306, | |
| "loss": 0.0282, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.2083058668424522, | |
| "grad_norm": 0.1937485933303833, | |
| "learning_rate": 0.00012719598823761308, | |
| "loss": 0.0278, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.214897824653922, | |
| "grad_norm": 0.5221042037010193, | |
| "learning_rate": 0.00012677601944456604, | |
| "loss": 0.0311, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.221489782465392, | |
| "grad_norm": 0.2941031754016876, | |
| "learning_rate": 0.0001263555412992535, | |
| "loss": 0.0303, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.2280817402768625, | |
| "grad_norm": 0.31689217686653137, | |
| "learning_rate": 0.00012593456180030646, | |
| "loss": 0.0252, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.2346736980883324, | |
| "grad_norm": 0.42106205224990845, | |
| "learning_rate": 0.0001255130889558928, | |
| "loss": 0.0249, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.2412656558998023, | |
| "grad_norm": 0.576701283454895, | |
| "learning_rate": 0.0001250911307835653, | |
| "loss": 0.0303, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.2478576137112722, | |
| "grad_norm": 0.49954476952552795, | |
| "learning_rate": 0.00012466869531010895, | |
| "loss": 0.0323, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.254449571522742, | |
| "grad_norm": 0.4963241517543793, | |
| "learning_rate": 0.0001242457905713883, | |
| "loss": 0.0316, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.261041529334212, | |
| "grad_norm": 0.23066122829914093, | |
| "learning_rate": 0.00012382242461219452, | |
| "loss": 0.0226, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.267633487145682, | |
| "grad_norm": 0.540354311466217, | |
| "learning_rate": 0.00012339860548609262, | |
| "loss": 0.0365, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.2742254449571524, | |
| "grad_norm": 0.48116335272789, | |
| "learning_rate": 0.0001229743412552679, | |
| "loss": 0.0268, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.2808174027686223, | |
| "grad_norm": 0.4430583417415619, | |
| "learning_rate": 0.00012254963999037285, | |
| "loss": 0.0263, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.2874093605800923, | |
| "grad_norm": 0.42470598220825195, | |
| "learning_rate": 0.0001221245097703735, | |
| "loss": 0.0354, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.294001318391562, | |
| "grad_norm": 0.31455087661743164, | |
| "learning_rate": 0.00012169895868239574, | |
| "loss": 0.0241, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.300593276203032, | |
| "grad_norm": 0.3215204179286957, | |
| "learning_rate": 0.00012127299482157149, | |
| "loss": 0.0332, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.3071852340145025, | |
| "grad_norm": 0.3963293135166168, | |
| "learning_rate": 0.00012084662629088481, | |
| "loss": 0.025, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.3137771918259724, | |
| "grad_norm": 0.4304813742637634, | |
| "learning_rate": 0.00012041986120101764, | |
| "loss": 0.0354, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.3203691496374423, | |
| "grad_norm": 0.3873739242553711, | |
| "learning_rate": 0.00011999270767019553, | |
| "loss": 0.0277, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.3269611074489123, | |
| "grad_norm": 0.4315703809261322, | |
| "learning_rate": 0.00011956517382403321, | |
| "loss": 0.0301, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.333553065260382, | |
| "grad_norm": 0.4416598081588745, | |
| "learning_rate": 0.00011913726779538008, | |
| "loss": 0.0283, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.3401450230718526, | |
| "grad_norm": 0.3677782416343689, | |
| "learning_rate": 0.0001187089977241654, | |
| "loss": 0.0355, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.3467369808833225, | |
| "grad_norm": 0.4988672733306885, | |
| "learning_rate": 0.00011828037175724356, | |
| "loss": 0.0314, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.3533289386947924, | |
| "grad_norm": 0.4604177474975586, | |
| "learning_rate": 0.00011785139804823906, | |
| "loss": 0.0337, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 2.3599208965062624, | |
| "grad_norm": 0.3596359193325043, | |
| "learning_rate": 0.00011742208475739133, | |
| "loss": 0.0295, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 2.3665128543177323, | |
| "grad_norm": 0.16485251486301422, | |
| "learning_rate": 0.0001169924400513996, | |
| "loss": 0.0275, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 2.373104812129202, | |
| "grad_norm": 0.3272377550601959, | |
| "learning_rate": 0.00011656247210326748, | |
| "loss": 0.0305, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.379696769940672, | |
| "grad_norm": 0.32883545756340027, | |
| "learning_rate": 0.0001161321890921476, | |
| "loss": 0.0314, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 2.3862887277521425, | |
| "grad_norm": 0.49502697587013245, | |
| "learning_rate": 0.00011570159920318584, | |
| "loss": 0.0323, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 2.3928806855636124, | |
| "grad_norm": 0.3317064344882965, | |
| "learning_rate": 0.00011527071062736583, | |
| "loss": 0.0284, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 2.3994726433750824, | |
| "grad_norm": 0.29318150877952576, | |
| "learning_rate": 0.00011483953156135292, | |
| "loss": 0.0226, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 2.4060646011865523, | |
| "grad_norm": 0.48932701349258423, | |
| "learning_rate": 0.00011440807020733843, | |
| "loss": 0.0287, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.4126565589980222, | |
| "grad_norm": 0.358005166053772, | |
| "learning_rate": 0.00011397633477288359, | |
| "loss": 0.0235, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.4192485168094926, | |
| "grad_norm": 0.3554854691028595, | |
| "learning_rate": 0.00011354433347076331, | |
| "loss": 0.0269, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 2.4258404746209625, | |
| "grad_norm": 0.3954286277294159, | |
| "learning_rate": 0.00011311207451881008, | |
| "loss": 0.0264, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 2.4324324324324325, | |
| "grad_norm": 0.3300182819366455, | |
| "learning_rate": 0.00011267956613975752, | |
| "loss": 0.0291, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.4390243902439024, | |
| "grad_norm": 0.22343868017196655, | |
| "learning_rate": 0.00011224681656108411, | |
| "loss": 0.0251, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.4456163480553723, | |
| "grad_norm": 0.3663915991783142, | |
| "learning_rate": 0.00011181383401485656, | |
| "loss": 0.0295, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.4522083058668427, | |
| "grad_norm": 0.39715585112571716, | |
| "learning_rate": 0.00011138062673757325, | |
| "loss": 0.0299, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.4588002636783126, | |
| "grad_norm": 0.3747979402542114, | |
| "learning_rate": 0.00011094720297000753, | |
| "loss": 0.0295, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.4653922214897825, | |
| "grad_norm": 0.2834596037864685, | |
| "learning_rate": 0.00011051357095705101, | |
| "loss": 0.0284, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.4719841793012525, | |
| "grad_norm": 0.3044513165950775, | |
| "learning_rate": 0.0001100797389475567, | |
| "loss": 0.0272, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.4785761371127224, | |
| "grad_norm": 0.39235764741897583, | |
| "learning_rate": 0.00010964571519418207, | |
| "loss": 0.024, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 2.4851680949241923, | |
| "grad_norm": 0.31392836570739746, | |
| "learning_rate": 0.00010921150795323207, | |
| "loss": 0.0229, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 2.4917600527356623, | |
| "grad_norm": 0.3227923512458801, | |
| "learning_rate": 0.00010877712548450207, | |
| "loss": 0.0235, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.4983520105471326, | |
| "grad_norm": 0.35434576869010925, | |
| "learning_rate": 0.00010834257605112079, | |
| "loss": 0.0265, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 2.5049439683586026, | |
| "grad_norm": 0.3610621988773346, | |
| "learning_rate": 0.00010790786791939301, | |
| "loss": 0.0286, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.5115359261700725, | |
| "grad_norm": 0.26061367988586426, | |
| "learning_rate": 0.00010747300935864243, | |
| "loss": 0.0302, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 2.5181278839815424, | |
| "grad_norm": 0.3455495536327362, | |
| "learning_rate": 0.00010703800864105429, | |
| "loss": 0.0283, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 2.5247198417930123, | |
| "grad_norm": 0.5354321002960205, | |
| "learning_rate": 0.00010660287404151807, | |
| "loss": 0.0279, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 2.5313117996044827, | |
| "grad_norm": 0.23394666612148285, | |
| "learning_rate": 0.00010616761383747, | |
| "loss": 0.0318, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.5379037574159526, | |
| "grad_norm": 0.3995780348777771, | |
| "learning_rate": 0.00010573223630873565, | |
| "loss": 0.0265, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.5444957152274226, | |
| "grad_norm": 0.4800235331058502, | |
| "learning_rate": 0.00010529674973737252, | |
| "loss": 0.0281, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.5510876730388925, | |
| "grad_norm": 0.2611030042171478, | |
| "learning_rate": 0.00010486116240751223, | |
| "loss": 0.0297, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.5576796308503624, | |
| "grad_norm": 0.3945279121398926, | |
| "learning_rate": 0.0001044254826052032, | |
| "loss": 0.025, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.564271588661833, | |
| "grad_norm": 0.5326240658760071, | |
| "learning_rate": 0.00010398971861825297, | |
| "loss": 0.0264, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.5708635464733027, | |
| "grad_norm": 0.3610016703605652, | |
| "learning_rate": 0.00010355387873607036, | |
| "loss": 0.0259, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.5774555042847727, | |
| "grad_norm": 0.3786564767360687, | |
| "learning_rate": 0.0001031179712495081, | |
| "loss": 0.0253, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.5840474620962426, | |
| "grad_norm": 0.5698022246360779, | |
| "learning_rate": 0.0001026820044507048, | |
| "loss": 0.021, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.5906394199077125, | |
| "grad_norm": 0.4795434772968292, | |
| "learning_rate": 0.00010224598663292737, | |
| "loss": 0.0267, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.5972313777191824, | |
| "grad_norm": 0.4011961817741394, | |
| "learning_rate": 0.00010180992609041325, | |
| "loss": 0.035, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.6038233355306524, | |
| "grad_norm": 0.5173267126083374, | |
| "learning_rate": 0.00010137383111821266, | |
| "loss": 0.0298, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.6104152933421227, | |
| "grad_norm": 0.47045668959617615, | |
| "learning_rate": 0.00010093771001203076, | |
| "loss": 0.0296, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.6170072511535927, | |
| "grad_norm": 0.5313148498535156, | |
| "learning_rate": 0.0001005015710680698, | |
| "loss": 0.026, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.6235992089650626, | |
| "grad_norm": 0.40992313623428345, | |
| "learning_rate": 0.00010006542258287139, | |
| "loss": 0.0213, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.6301911667765325, | |
| "grad_norm": 0.2713076174259186, | |
| "learning_rate": 9.96292728531586e-05, | |
| "loss": 0.0238, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.6367831245880025, | |
| "grad_norm": 0.41798898577690125, | |
| "learning_rate": 9.919313017567822e-05, | |
| "loss": 0.0269, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.643375082399473, | |
| "grad_norm": 0.26005855202674866, | |
| "learning_rate": 9.875700284704286e-05, | |
| "loss": 0.0262, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.6499670402109428, | |
| "grad_norm": 0.24366049468517303, | |
| "learning_rate": 9.83208991635732e-05, | |
| "loss": 0.0234, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.6565589980224127, | |
| "grad_norm": 0.424334317445755, | |
| "learning_rate": 9.788482742114003e-05, | |
| "loss": 0.0296, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.6631509558338826, | |
| "grad_norm": 0.3093094229698181, | |
| "learning_rate": 9.744879591500662e-05, | |
| "loss": 0.0282, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.6697429136453525, | |
| "grad_norm": 0.42985987663269043, | |
| "learning_rate": 9.701281293967083e-05, | |
| "loss": 0.031, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.676334871456823, | |
| "grad_norm": 0.3328607380390167, | |
| "learning_rate": 9.657688678870728e-05, | |
| "loss": 0.0318, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.682926829268293, | |
| "grad_norm": 0.35078462958335876, | |
| "learning_rate": 9.614102575460973e-05, | |
| "loss": 0.0268, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.6895187870797628, | |
| "grad_norm": 0.4191462993621826, | |
| "learning_rate": 9.57052381286331e-05, | |
| "loss": 0.03, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.6961107448912327, | |
| "grad_norm": 0.4283992648124695, | |
| "learning_rate": 9.526953220063603e-05, | |
| "loss": 0.0235, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "grad_norm": 0.35658934712409973, | |
| "learning_rate": 9.483391625892293e-05, | |
| "loss": 0.0243, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.7092946605141726, | |
| "grad_norm": 0.2613814175128937, | |
| "learning_rate": 9.439839859008653e-05, | |
| "loss": 0.0232, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.7158866183256425, | |
| "grad_norm": 0.24698810279369354, | |
| "learning_rate": 9.396298747885013e-05, | |
| "loss": 0.0232, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.722478576137113, | |
| "grad_norm": 0.25733861327171326, | |
| "learning_rate": 9.352769120790988e-05, | |
| "loss": 0.0231, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.729070533948583, | |
| "grad_norm": 0.288001149892807, | |
| "learning_rate": 9.309251805777754e-05, | |
| "loss": 0.0247, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.7356624917600527, | |
| "grad_norm": 0.47979527711868286, | |
| "learning_rate": 9.265747630662265e-05, | |
| "loss": 0.0315, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.7422544495715226, | |
| "grad_norm": 0.5932050943374634, | |
| "learning_rate": 9.22225742301153e-05, | |
| "loss": 0.0252, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.7488464073829926, | |
| "grad_norm": 0.3525910973548889, | |
| "learning_rate": 9.178782010126844e-05, | |
| "loss": 0.0249, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.755438365194463, | |
| "grad_norm": 0.27204054594039917, | |
| "learning_rate": 9.135322219028079e-05, | |
| "loss": 0.025, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.762030323005933, | |
| "grad_norm": 0.3478144407272339, | |
| "learning_rate": 9.091878876437933e-05, | |
| "loss": 0.0216, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.768622280817403, | |
| "grad_norm": 0.29393240809440613, | |
| "learning_rate": 9.04845280876621e-05, | |
| "loss": 0.0214, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.7752142386288727, | |
| "grad_norm": 0.21876759827136993, | |
| "learning_rate": 9.005044842094101e-05, | |
| "loss": 0.0245, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.7818061964403427, | |
| "grad_norm": 0.423742413520813, | |
| "learning_rate": 8.961655802158456e-05, | |
| "loss": 0.0241, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.788398154251813, | |
| "grad_norm": 0.38848140835762024, | |
| "learning_rate": 8.918286514336099e-05, | |
| "loss": 0.0238, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.794990112063283, | |
| "grad_norm": 0.28686466813087463, | |
| "learning_rate": 8.874937803628115e-05, | |
| "loss": 0.022, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.801582069874753, | |
| "grad_norm": 0.3457236588001251, | |
| "learning_rate": 8.831610494644148e-05, | |
| "loss": 0.0345, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.808174027686223, | |
| "grad_norm": 0.339136004447937, | |
| "learning_rate": 8.788305411586736e-05, | |
| "loss": 0.0194, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.8147659854976927, | |
| "grad_norm": 0.3297877907752991, | |
| "learning_rate": 8.745023378235602e-05, | |
| "loss": 0.0199, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.8213579433091627, | |
| "grad_norm": 0.39552271366119385, | |
| "learning_rate": 8.701765217932022e-05, | |
| "loss": 0.0266, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.8279499011206326, | |
| "grad_norm": 0.40580829977989197, | |
| "learning_rate": 8.658531753563122e-05, | |
| "loss": 0.0367, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.834541858932103, | |
| "grad_norm": 0.3342481553554535, | |
| "learning_rate": 8.615323807546258e-05, | |
| "loss": 0.0223, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.841133816743573, | |
| "grad_norm": 0.25729164481163025, | |
| "learning_rate": 8.572142201813363e-05, | |
| "loss": 0.023, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.847725774555043, | |
| "grad_norm": 0.3168254792690277, | |
| "learning_rate": 8.528987757795286e-05, | |
| "loss": 0.0237, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.8543177323665128, | |
| "grad_norm": 0.4179421365261078, | |
| "learning_rate": 8.485861296406207e-05, | |
| "loss": 0.0268, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.8609096901779827, | |
| "grad_norm": 0.46458080410957336, | |
| "learning_rate": 8.442763638027985e-05, | |
| "loss": 0.0216, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.867501647989453, | |
| "grad_norm": 0.35828524827957153, | |
| "learning_rate": 8.399695602494581e-05, | |
| "loss": 0.0204, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.874093605800923, | |
| "grad_norm": 0.34387773275375366, | |
| "learning_rate": 8.356658009076441e-05, | |
| "loss": 0.0239, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.880685563612393, | |
| "grad_norm": 0.3083021342754364, | |
| "learning_rate": 8.313651676464923e-05, | |
| "loss": 0.0228, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.887277521423863, | |
| "grad_norm": 0.2175825834274292, | |
| "learning_rate": 8.270677422756725e-05, | |
| "loss": 0.0201, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.8938694792353328, | |
| "grad_norm": 0.2774793803691864, | |
| "learning_rate": 8.227736065438302e-05, | |
| "loss": 0.0234, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.900461437046803, | |
| "grad_norm": 0.2598700523376465, | |
| "learning_rate": 8.184828421370348e-05, | |
| "loss": 0.0241, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.9070533948582726, | |
| "grad_norm": 0.3586549460887909, | |
| "learning_rate": 8.141955306772229e-05, | |
| "loss": 0.0162, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.913645352669743, | |
| "grad_norm": 0.26286324858665466, | |
| "learning_rate": 8.099117537206477e-05, | |
| "loss": 0.0212, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.920237310481213, | |
| "grad_norm": 0.4125373661518097, | |
| "learning_rate": 8.05631592756325e-05, | |
| "loss": 0.0202, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.926829268292683, | |
| "grad_norm": 0.29703447222709656, | |
| "learning_rate": 8.013551292044859e-05, | |
| "loss": 0.0213, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.933421226104153, | |
| "grad_norm": 0.3580416738986969, | |
| "learning_rate": 7.97082444415027e-05, | |
| "loss": 0.0226, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.9400131839156227, | |
| "grad_norm": 0.4119264781475067, | |
| "learning_rate": 7.928136196659614e-05, | |
| "loss": 0.0242, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.946605141727093, | |
| "grad_norm": 0.5699878931045532, | |
| "learning_rate": 7.885487361618754e-05, | |
| "loss": 0.0262, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.953197099538563, | |
| "grad_norm": 0.4126439094543457, | |
| "learning_rate": 7.842878750323801e-05, | |
| "loss": 0.021, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.959789057350033, | |
| "grad_norm": 0.42604967951774597, | |
| "learning_rate": 7.800311173305718e-05, | |
| "loss": 0.0219, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.966381015161503, | |
| "grad_norm": 0.19208472967147827, | |
| "learning_rate": 7.757785440314882e-05, | |
| "loss": 0.0284, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.972972972972973, | |
| "grad_norm": 0.43162015080451965, | |
| "learning_rate": 7.715302360305678e-05, | |
| "loss": 0.0192, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.979564930784443, | |
| "grad_norm": 0.7263951301574707, | |
| "learning_rate": 7.672862741421126e-05, | |
| "loss": 0.0299, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.986156888595913, | |
| "grad_norm": 0.3890402615070343, | |
| "learning_rate": 7.63046739097748e-05, | |
| "loss": 0.0222, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.992748846407383, | |
| "grad_norm": 0.25311848521232605, | |
| "learning_rate": 7.588117115448911e-05, | |
| "loss": 0.0208, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.999340804218853, | |
| "grad_norm": 0.33752700686454773, | |
| "learning_rate": 7.545812720452127e-05, | |
| "loss": 0.0263, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 3.005932762030323, | |
| "grad_norm": 0.2610788345336914, | |
| "learning_rate": 7.50355501073107e-05, | |
| "loss": 0.0246, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 3.012524719841793, | |
| "grad_norm": 0.32036837935447693, | |
| "learning_rate": 7.461344790141607e-05, | |
| "loss": 0.0283, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 3.019116677653263, | |
| "grad_norm": 0.4340413212776184, | |
| "learning_rate": 7.419182861636218e-05, | |
| "loss": 0.0293, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 3.025708635464733, | |
| "grad_norm": 0.39858514070510864, | |
| "learning_rate": 7.377070027248756e-05, | |
| "loss": 0.0186, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 3.032300593276203, | |
| "grad_norm": 0.26919031143188477, | |
| "learning_rate": 7.335007088079156e-05, | |
| "loss": 0.0208, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.038892551087673, | |
| "grad_norm": 0.4067997634410858, | |
| "learning_rate": 7.292994844278223e-05, | |
| "loss": 0.0261, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 3.045484508899143, | |
| "grad_norm": 0.4950489103794098, | |
| "learning_rate": 7.251034095032388e-05, | |
| "loss": 0.0292, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 3.052076466710613, | |
| "grad_norm": 0.2269221693277359, | |
| "learning_rate": 7.20912563854852e-05, | |
| "loss": 0.0175, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 3.058668424522083, | |
| "grad_norm": 0.32157209515571594, | |
| "learning_rate": 7.167270272038747e-05, | |
| "loss": 0.0187, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 3.065260382333553, | |
| "grad_norm": 0.2660551369190216, | |
| "learning_rate": 7.12546879170527e-05, | |
| "loss": 0.023, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 3.071852340145023, | |
| "grad_norm": 0.29758307337760925, | |
| "learning_rate": 7.08372199272524e-05, | |
| "loss": 0.0291, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 3.078444297956493, | |
| "grad_norm": 0.32291552424430847, | |
| "learning_rate": 7.042030669235606e-05, | |
| "loss": 0.0334, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 3.085036255767963, | |
| "grad_norm": 0.481623113155365, | |
| "learning_rate": 7.000395614318038e-05, | |
| "loss": 0.0192, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 3.0916282135794333, | |
| "grad_norm": 0.36292940378189087, | |
| "learning_rate": 6.958817619983822e-05, | |
| "loss": 0.0279, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 3.098220171390903, | |
| "grad_norm": 0.34903573989868164, | |
| "learning_rate": 6.917297477158792e-05, | |
| "loss": 0.0219, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.104812129202373, | |
| "grad_norm": 0.290768563747406, | |
| "learning_rate": 6.875835975668298e-05, | |
| "loss": 0.0245, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 3.111404087013843, | |
| "grad_norm": 0.4250969886779785, | |
| "learning_rate": 6.834433904222162e-05, | |
| "loss": 0.0239, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 3.117996044825313, | |
| "grad_norm": 0.31465357542037964, | |
| "learning_rate": 6.793092050399698e-05, | |
| "loss": 0.0227, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 3.124588002636783, | |
| "grad_norm": 0.46385765075683594, | |
| "learning_rate": 6.75181120063471e-05, | |
| "loss": 0.0271, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 3.1311799604482533, | |
| "grad_norm": 0.37862929701805115, | |
| "learning_rate": 6.710592140200542e-05, | |
| "loss": 0.0227, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.1377719182597232, | |
| "grad_norm": 0.49200916290283203, | |
| "learning_rate": 6.669435653195146e-05, | |
| "loss": 0.0201, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 3.144363876071193, | |
| "grad_norm": 0.4198756217956543, | |
| "learning_rate": 6.628342522526143e-05, | |
| "loss": 0.0216, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 3.150955833882663, | |
| "grad_norm": 0.5533847212791443, | |
| "learning_rate": 6.587313529895957e-05, | |
| "loss": 0.034, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 3.157547791694133, | |
| "grad_norm": 0.37719669938087463, | |
| "learning_rate": 6.546349455786926e-05, | |
| "loss": 0.0282, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 3.164139749505603, | |
| "grad_norm": 0.6606992483139038, | |
| "learning_rate": 6.505451079446467e-05, | |
| "loss": 0.0217, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.1707317073170733, | |
| "grad_norm": 0.20845943689346313, | |
| "learning_rate": 6.464619178872247e-05, | |
| "loss": 0.023, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 3.1773236651285433, | |
| "grad_norm": 0.23495689034461975, | |
| "learning_rate": 6.42385453079738e-05, | |
| "loss": 0.0256, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 3.183915622940013, | |
| "grad_norm": 0.1919371336698532, | |
| "learning_rate": 6.38315791067567e-05, | |
| "loss": 0.019, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 3.190507580751483, | |
| "grad_norm": 0.3485127091407776, | |
| "learning_rate": 6.342530092666821e-05, | |
| "loss": 0.0205, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 3.197099538562953, | |
| "grad_norm": 0.2419605702161789, | |
| "learning_rate": 6.301971849621757e-05, | |
| "loss": 0.0197, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.2036914963744234, | |
| "grad_norm": 0.23359638452529907, | |
| "learning_rate": 6.261483953067886e-05, | |
| "loss": 0.0215, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 3.2102834541858933, | |
| "grad_norm": 0.4236893355846405, | |
| "learning_rate": 6.221067173194442e-05, | |
| "loss": 0.0259, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 3.2168754119973633, | |
| "grad_norm": 0.35271692276000977, | |
| "learning_rate": 6.180722278837825e-05, | |
| "loss": 0.0229, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 3.223467369808833, | |
| "grad_norm": 0.5368591547012329, | |
| "learning_rate": 6.140450037466974e-05, | |
| "loss": 0.0227, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 3.230059327620303, | |
| "grad_norm": 0.3813161849975586, | |
| "learning_rate": 6.1002512151687796e-05, | |
| "loss": 0.0175, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.236651285431773, | |
| "grad_norm": 0.40781912207603455, | |
| "learning_rate": 6.060126576633497e-05, | |
| "loss": 0.0278, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 3.2432432432432434, | |
| "grad_norm": 0.3028331398963928, | |
| "learning_rate": 6.0200768851402133e-05, | |
| "loss": 0.0212, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 3.2498352010547134, | |
| "grad_norm": 0.20801442861557007, | |
| "learning_rate": 5.980102902542306e-05, | |
| "loss": 0.0244, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 3.2564271588661833, | |
| "grad_norm": 0.3236633241176605, | |
| "learning_rate": 5.9402053892529794e-05, | |
| "loss": 0.023, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 3.263019116677653, | |
| "grad_norm": 0.3075791895389557, | |
| "learning_rate": 5.9003851042307804e-05, | |
| "loss": 0.0193, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.269611074489123, | |
| "grad_norm": 0.33486539125442505, | |
| "learning_rate": 5.86064280496516e-05, | |
| "loss": 0.0212, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 3.276203032300593, | |
| "grad_norm": 0.4018231928348541, | |
| "learning_rate": 5.8209792474620815e-05, | |
| "loss": 0.0215, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 3.2827949901120634, | |
| "grad_norm": 0.35829004645347595, | |
| "learning_rate": 5.78139518622961e-05, | |
| "loss": 0.0228, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 3.2893869479235334, | |
| "grad_norm": 0.2682739496231079, | |
| "learning_rate": 5.741891374263593e-05, | |
| "loss": 0.0255, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 3.2959789057350033, | |
| "grad_norm": 0.3929627537727356, | |
| "learning_rate": 5.702468563033306e-05, | |
| "loss": 0.0228, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.3025708635464732, | |
| "grad_norm": 0.2807949483394623, | |
| "learning_rate": 5.663127502467184e-05, | |
| "loss": 0.0207, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 3.309162821357943, | |
| "grad_norm": 0.33235079050064087, | |
| "learning_rate": 5.6238689409385346e-05, | |
| "loss": 0.0243, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 3.3157547791694135, | |
| "grad_norm": 0.28995218873023987, | |
| "learning_rate": 5.5846936252513174e-05, | |
| "loss": 0.017, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 3.3223467369808835, | |
| "grad_norm": 0.2601809799671173, | |
| "learning_rate": 5.54560230062593e-05, | |
| "loss": 0.0166, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 3.3289386947923534, | |
| "grad_norm": 0.3650406301021576, | |
| "learning_rate": 5.5065957106850204e-05, | |
| "loss": 0.021, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.3355306526038233, | |
| "grad_norm": 0.48497456312179565, | |
| "learning_rate": 5.4676745974393764e-05, | |
| "loss": 0.0173, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 3.3421226104152932, | |
| "grad_norm": 0.3954178988933563, | |
| "learning_rate": 5.4288397012737646e-05, | |
| "loss": 0.02, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 3.348714568226763, | |
| "grad_norm": 0.21555176377296448, | |
| "learning_rate": 5.390091760932887e-05, | |
| "loss": 0.0208, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 3.3553065260382335, | |
| "grad_norm": 0.4477789103984833, | |
| "learning_rate": 5.3514315135073076e-05, | |
| "loss": 0.023, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 3.3618984838497035, | |
| "grad_norm": 0.4595910906791687, | |
| "learning_rate": 5.3128596944194234e-05, | |
| "loss": 0.027, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.3684904416611734, | |
| "grad_norm": 0.3426424264907837, | |
| "learning_rate": 5.274377037409497e-05, | |
| "loss": 0.0224, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 3.3750823994726433, | |
| "grad_norm": 0.2647363841533661, | |
| "learning_rate": 5.235984274521684e-05, | |
| "loss": 0.0238, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 3.3816743572841133, | |
| "grad_norm": 0.21992464363574982, | |
| "learning_rate": 5.197682136090107e-05, | |
| "loss": 0.0163, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 3.388266315095583, | |
| "grad_norm": 0.6907774209976196, | |
| "learning_rate": 5.159471350724978e-05, | |
| "loss": 0.0223, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 3.3948582729070536, | |
| "grad_norm": 0.44378501176834106, | |
| "learning_rate": 5.121352645298708e-05, | |
| "loss": 0.0245, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.4014502307185235, | |
| "grad_norm": 0.25844740867614746, | |
| "learning_rate": 5.083326744932117e-05, | |
| "loss": 0.0211, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 3.4080421885299934, | |
| "grad_norm": 0.3211382031440735, | |
| "learning_rate": 5.0453943729806094e-05, | |
| "loss": 0.0207, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 3.4146341463414633, | |
| "grad_norm": 0.25202128291130066, | |
| "learning_rate": 5.007556251020434e-05, | |
| "loss": 0.0215, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 3.4212261041529333, | |
| "grad_norm": 0.3003428876399994, | |
| "learning_rate": 4.9698130988349424e-05, | |
| "loss": 0.0207, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 3.4278180619644036, | |
| "grad_norm": 0.32026761770248413, | |
| "learning_rate": 4.9321656344009115e-05, | |
| "loss": 0.0196, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.4344100197758736, | |
| "grad_norm": 0.26623809337615967, | |
| "learning_rate": 4.894614573874877e-05, | |
| "loss": 0.0219, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 3.4410019775873435, | |
| "grad_norm": 0.35238540172576904, | |
| "learning_rate": 4.857160631579509e-05, | |
| "loss": 0.0152, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 3.4475939353988134, | |
| "grad_norm": 0.3443749248981476, | |
| "learning_rate": 4.819804519990033e-05, | |
| "loss": 0.0232, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 3.4541858932102834, | |
| "grad_norm": 0.35800328850746155, | |
| "learning_rate": 4.782546949720658e-05, | |
| "loss": 0.0217, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 3.4607778510217533, | |
| "grad_norm": 0.37850216031074524, | |
| "learning_rate": 4.745388629511084e-05, | |
| "loss": 0.0167, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.4673698088332237, | |
| "grad_norm": 0.24581514298915863, | |
| "learning_rate": 4.708330266212993e-05, | |
| "loss": 0.0179, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 3.4739617666446936, | |
| "grad_norm": 0.16642197966575623, | |
| "learning_rate": 4.671372564776629e-05, | |
| "loss": 0.0169, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 3.4805537244561635, | |
| "grad_norm": 0.32910865545272827, | |
| "learning_rate": 4.634516228237372e-05, | |
| "loss": 0.019, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 3.4871456822676334, | |
| "grad_norm": 0.21662920713424683, | |
| "learning_rate": 4.59776195770236e-05, | |
| "loss": 0.0162, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 3.4937376400791034, | |
| "grad_norm": 0.3485572934150696, | |
| "learning_rate": 4.561110452337171e-05, | |
| "loss": 0.0217, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.5003295978905733, | |
| "grad_norm": 0.20581798255443573, | |
| "learning_rate": 4.5245624093525e-05, | |
| "loss": 0.0296, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 3.5069215557020437, | |
| "grad_norm": 0.35009968280792236, | |
| "learning_rate": 4.488118523990915e-05, | |
| "loss": 0.0208, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 3.5135135135135136, | |
| "grad_norm": 0.39382439851760864, | |
| "learning_rate": 4.451779489513628e-05, | |
| "loss": 0.0217, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 3.5201054713249835, | |
| "grad_norm": 0.348563551902771, | |
| "learning_rate": 4.415545997187296e-05, | |
| "loss": 0.0165, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 3.5266974291364535, | |
| "grad_norm": 0.494354784488678, | |
| "learning_rate": 4.379418736270886e-05, | |
| "loss": 0.0232, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.5332893869479234, | |
| "grad_norm": 0.1578008085489273, | |
| "learning_rate": 4.343398394002547e-05, | |
| "loss": 0.0226, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 3.5398813447593938, | |
| "grad_norm": 0.3410768210887909, | |
| "learning_rate": 4.307485655586557e-05, | |
| "loss": 0.0219, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 3.5464733025708637, | |
| "grad_norm": 0.20960773527622223, | |
| "learning_rate": 4.271681204180268e-05, | |
| "loss": 0.0209, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 3.5530652603823336, | |
| "grad_norm": 0.22281195223331451, | |
| "learning_rate": 4.2359857208811284e-05, | |
| "loss": 0.0233, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 3.5596572181938035, | |
| "grad_norm": 0.3393511474132538, | |
| "learning_rate": 4.2003998847137174e-05, | |
| "loss": 0.0209, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.5662491760052735, | |
| "grad_norm": 0.6712432503700256, | |
| "learning_rate": 4.164924372616821e-05, | |
| "loss": 0.0249, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 3.572841133816744, | |
| "grad_norm": 0.18807201087474823, | |
| "learning_rate": 4.129559859430573e-05, | |
| "loss": 0.024, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 3.5794330916282133, | |
| "grad_norm": 0.4251366853713989, | |
| "learning_rate": 4.094307017883606e-05, | |
| "loss": 0.0174, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 3.5860250494396837, | |
| "grad_norm": 0.2247576266527176, | |
| "learning_rate": 4.0591665185802576e-05, | |
| "loss": 0.0214, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 3.5926170072511536, | |
| "grad_norm": 0.643822968006134, | |
| "learning_rate": 4.0241390299878e-05, | |
| "loss": 0.0222, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.5992089650626236, | |
| "grad_norm": 0.37506723403930664, | |
| "learning_rate": 3.989225218423753e-05, | |
| "loss": 0.0147, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 3.6058009228740935, | |
| "grad_norm": 0.3052820861339569, | |
| "learning_rate": 3.954425748043186e-05, | |
| "loss": 0.0191, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 3.6123928806855634, | |
| "grad_norm": 0.3424012362957001, | |
| "learning_rate": 3.9197412808260805e-05, | |
| "loss": 0.0214, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 3.618984838497034, | |
| "grad_norm": 0.24967588484287262, | |
| "learning_rate": 3.885172476564765e-05, | |
| "loss": 0.0157, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 3.6255767963085037, | |
| "grad_norm": 0.2771139442920685, | |
| "learning_rate": 3.850719992851326e-05, | |
| "loss": 0.0198, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.6321687541199736, | |
| "grad_norm": 0.3275032043457031, | |
| "learning_rate": 3.8163844850651346e-05, | |
| "loss": 0.0204, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 3.6387607119314436, | |
| "grad_norm": 0.3696538507938385, | |
| "learning_rate": 3.7821666063603566e-05, | |
| "loss": 0.0172, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 3.6453526697429135, | |
| "grad_norm": 0.43786558508872986, | |
| "learning_rate": 3.748067007653536e-05, | |
| "loss": 0.0199, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 3.651944627554384, | |
| "grad_norm": 0.15298739075660706, | |
| "learning_rate": 3.714086337611217e-05, | |
| "loss": 0.0118, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 3.658536585365854, | |
| "grad_norm": 0.2643417716026306, | |
| "learning_rate": 3.680225242637583e-05, | |
| "loss": 0.0217, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.6651285431773237, | |
| "grad_norm": 0.29987242817878723, | |
| "learning_rate": 3.646484366862197e-05, | |
| "loss": 0.0218, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 3.6717205009887937, | |
| "grad_norm": 0.2553282678127289, | |
| "learning_rate": 3.6128643521277096e-05, | |
| "loss": 0.0192, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 3.6783124588002636, | |
| "grad_norm": 0.24411100149154663, | |
| "learning_rate": 3.57936583797768e-05, | |
| "loss": 0.0156, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 3.684904416611734, | |
| "grad_norm": 0.2638270854949951, | |
| "learning_rate": 3.5459894616443954e-05, | |
| "loss": 0.0188, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 3.6914963744232034, | |
| "grad_norm": 0.19742664694786072, | |
| "learning_rate": 3.5127358580367463e-05, | |
| "loss": 0.021, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.698088332234674, | |
| "grad_norm": 0.3131982386112213, | |
| "learning_rate": 3.479605659728159e-05, | |
| "loss": 0.0176, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 3.7046802900461437, | |
| "grad_norm": 0.24199941754341125, | |
| "learning_rate": 3.446599496944557e-05, | |
| "loss": 0.0178, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 3.7112722478576137, | |
| "grad_norm": 0.18790839612483978, | |
| "learning_rate": 3.413717997552376e-05, | |
| "loss": 0.012, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 3.7178642056690836, | |
| "grad_norm": 0.4031229317188263, | |
| "learning_rate": 3.380961787046605e-05, | |
| "loss": 0.022, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 3.7244561634805535, | |
| "grad_norm": 0.3094145357608795, | |
| "learning_rate": 3.348331488538913e-05, | |
| "loss": 0.0207, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.731048121292024, | |
| "grad_norm": 0.31893035769462585, | |
| "learning_rate": 3.315827722745779e-05, | |
| "loss": 0.0195, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 3.737640079103494, | |
| "grad_norm": 0.2687014639377594, | |
| "learning_rate": 3.28345110797668e-05, | |
| "loss": 0.0152, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 3.7442320369149638, | |
| "grad_norm": 0.3952026963233948, | |
| "learning_rate": 3.2512022601223515e-05, | |
| "loss": 0.0247, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 3.7508239947264337, | |
| "grad_norm": 0.25332149863243103, | |
| "learning_rate": 3.21908179264304e-05, | |
| "loss": 0.0142, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 3.7574159525379036, | |
| "grad_norm": 0.4335060119628906, | |
| "learning_rate": 3.187090316556861e-05, | |
| "loss": 0.0202, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.764007910349374, | |
| "grad_norm": 0.25930336117744446, | |
| "learning_rate": 3.155228440428164e-05, | |
| "loss": 0.0208, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 3.770599868160844, | |
| "grad_norm": 0.6695492267608643, | |
| "learning_rate": 3.123496770355956e-05, | |
| "loss": 0.0153, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 3.777191825972314, | |
| "grad_norm": 0.3357510566711426, | |
| "learning_rate": 3.091895909962375e-05, | |
| "loss": 0.021, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 3.7837837837837838, | |
| "grad_norm": 0.4220266342163086, | |
| "learning_rate": 3.060426460381195e-05, | |
| "loss": 0.0155, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 3.7903757415952537, | |
| "grad_norm": 0.2396579086780548, | |
| "learning_rate": 3.0290890202464182e-05, | |
| "loss": 0.017, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.796967699406724, | |
| "grad_norm": 0.4336076080799103, | |
| "learning_rate": 2.9978841856808525e-05, | |
| "loss": 0.0193, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 3.8035596572181936, | |
| "grad_norm": 0.4535181224346161, | |
| "learning_rate": 2.966812550284803e-05, | |
| "loss": 0.0151, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 3.810151615029664, | |
| "grad_norm": 0.2847338020801544, | |
| "learning_rate": 2.9358747051247637e-05, | |
| "loss": 0.0164, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 3.816743572841134, | |
| "grad_norm": 0.33757925033569336, | |
| "learning_rate": 2.905071238722169e-05, | |
| "loss": 0.0173, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 3.823335530652604, | |
| "grad_norm": 0.21222251653671265, | |
| "learning_rate": 2.8744027370422167e-05, | |
| "loss": 0.0186, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.8299274884640737, | |
| "grad_norm": 0.8053876757621765, | |
| "learning_rate": 2.843869783482701e-05, | |
| "loss": 0.0189, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 3.8365194462755436, | |
| "grad_norm": 0.2711152732372284, | |
| "learning_rate": 2.8134729588629303e-05, | |
| "loss": 0.0281, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 3.843111404087014, | |
| "grad_norm": 0.24810029566287994, | |
| "learning_rate": 2.7832128414126735e-05, | |
| "loss": 0.0169, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 3.849703361898484, | |
| "grad_norm": 0.3628500998020172, | |
| "learning_rate": 2.7530900067611577e-05, | |
| "loss": 0.0138, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 3.856295319709954, | |
| "grad_norm": 0.1820344775915146, | |
| "learning_rate": 2.7231050279261217e-05, | |
| "loss": 0.0201, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.862887277521424, | |
| "grad_norm": 0.5230331420898438, | |
| "learning_rate": 2.6932584753029068e-05, | |
| "loss": 0.0162, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 3.8694792353328937, | |
| "grad_norm": 0.27183738350868225, | |
| "learning_rate": 2.6635509166536243e-05, | |
| "loss": 0.0173, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 3.876071193144364, | |
| "grad_norm": 0.19195932149887085, | |
| "learning_rate": 2.633982917096335e-05, | |
| "loss": 0.0207, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 3.882663150955834, | |
| "grad_norm": 0.42282554507255554, | |
| "learning_rate": 2.6045550390943185e-05, | |
| "loss": 0.0159, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 3.889255108767304, | |
| "grad_norm": 0.2981650233268738, | |
| "learning_rate": 2.5752678424453514e-05, | |
| "loss": 0.0173, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.895847066578774, | |
| "grad_norm": 0.32203352451324463, | |
| "learning_rate": 2.5461218842710798e-05, | |
| "loss": 0.021, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 3.902439024390244, | |
| "grad_norm": 0.2388588786125183, | |
| "learning_rate": 2.517117719006411e-05, | |
| "loss": 0.0219, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 3.9090309822017137, | |
| "grad_norm": 0.40328285098075867, | |
| "learning_rate": 2.488255898388966e-05, | |
| "loss": 0.0169, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 3.9156229400131837, | |
| "grad_norm": 0.14190708100795746, | |
| "learning_rate": 2.4595369714485895e-05, | |
| "loss": 0.0167, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 3.922214897824654, | |
| "grad_norm": 0.418643593788147, | |
| "learning_rate": 2.430961484496893e-05, | |
| "loss": 0.0187, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.928806855636124, | |
| "grad_norm": 0.2280479073524475, | |
| "learning_rate": 2.4025299811168843e-05, | |
| "loss": 0.0151, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 3.935398813447594, | |
| "grad_norm": 0.5002431869506836, | |
| "learning_rate": 2.3742430021526018e-05, | |
| "loss": 0.019, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 3.941990771259064, | |
| "grad_norm": 0.22551734745502472, | |
| "learning_rate": 2.3461010856988473e-05, | |
| "loss": 0.013, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 3.9485827290705338, | |
| "grad_norm": 0.3069497048854828, | |
| "learning_rate": 2.318104767090944e-05, | |
| "loss": 0.018, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 3.955174686882004, | |
| "grad_norm": 0.36286690831184387, | |
| "learning_rate": 2.2902545788945396e-05, | |
| "loss": 0.024, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.961766644693474, | |
| "grad_norm": 0.2421414703130722, | |
| "learning_rate": 2.2625510508954952e-05, | |
| "loss": 0.0212, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 3.968358602504944, | |
| "grad_norm": 0.23019398748874664, | |
| "learning_rate": 2.234994710089795e-05, | |
| "loss": 0.0188, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 3.974950560316414, | |
| "grad_norm": 0.2802564203739166, | |
| "learning_rate": 2.207586080673528e-05, | |
| "loss": 0.0192, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 3.981542518127884, | |
| "grad_norm": 0.2667250633239746, | |
| "learning_rate": 2.1803256840329134e-05, | |
| "loss": 0.0213, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 3.988134475939354, | |
| "grad_norm": 0.4056625962257385, | |
| "learning_rate": 2.1532140387343735e-05, | |
| "loss": 0.0169, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 3.994726433750824, | |
| "grad_norm": 0.1790419965982437, | |
| "learning_rate": 2.126251660514691e-05, | |
| "loss": 0.0185, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 4.001318391562294, | |
| "grad_norm": 0.2861385941505432, | |
| "learning_rate": 2.0994390622711734e-05, | |
| "loss": 0.0191, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 4.0079103493737644, | |
| "grad_norm": 0.20970335602760315, | |
| "learning_rate": 2.0727767540519193e-05, | |
| "loss": 0.0171, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 4.014502307185234, | |
| "grad_norm": 0.2126467227935791, | |
| "learning_rate": 2.046265243046094e-05, | |
| "loss": 0.0175, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 4.021094264996704, | |
| "grad_norm": 0.4862785339355469, | |
| "learning_rate": 2.0199050335743007e-05, | |
| "loss": 0.0212, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 4.027686222808174, | |
| "grad_norm": 0.36454570293426514, | |
| "learning_rate": 1.9936966270789738e-05, | |
| "loss": 0.0159, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 4.034278180619644, | |
| "grad_norm": 0.1897134780883789, | |
| "learning_rate": 1.9676405221148475e-05, | |
| "loss": 0.0172, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 4.040870138431114, | |
| "grad_norm": 0.2542422115802765, | |
| "learning_rate": 1.9417372143394697e-05, | |
| "loss": 0.0251, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 4.047462096242584, | |
| "grad_norm": 0.20512335002422333, | |
| "learning_rate": 1.9159871965037657e-05, | |
| "loss": 0.0172, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 4.054054054054054, | |
| "grad_norm": 0.21565409004688263, | |
| "learning_rate": 1.8903909584426826e-05, | |
| "loss": 0.018, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 4.060646011865524, | |
| "grad_norm": 0.3546988368034363, | |
| "learning_rate": 1.86494898706585e-05, | |
| "loss": 0.0169, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 4.067237969676994, | |
| "grad_norm": 0.5294975638389587, | |
| "learning_rate": 1.8396617663483363e-05, | |
| "loss": 0.0159, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 4.073829927488464, | |
| "grad_norm": 0.2470693439245224, | |
| "learning_rate": 1.814529777321432e-05, | |
| "loss": 0.0211, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 4.080421885299934, | |
| "grad_norm": 0.4331272542476654, | |
| "learning_rate": 1.7895534980634954e-05, | |
| "loss": 0.0176, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 4.0870138431114045, | |
| "grad_norm": 0.3057391941547394, | |
| "learning_rate": 1.764733403690875e-05, | |
| "loss": 0.0203, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 4.093605800922874, | |
| "grad_norm": 0.11541125923395157, | |
| "learning_rate": 1.740069966348846e-05, | |
| "loss": 0.0193, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 4.100197758734344, | |
| "grad_norm": 0.28473731875419617, | |
| "learning_rate": 1.71556365520266e-05, | |
| "loss": 0.0196, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 4.106789716545814, | |
| "grad_norm": 0.14990141987800598, | |
| "learning_rate": 1.6912149364285958e-05, | |
| "loss": 0.0147, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 4.113381674357284, | |
| "grad_norm": 0.33358579874038696, | |
| "learning_rate": 1.667024273205092e-05, | |
| "loss": 0.02, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 4.119973632168755, | |
| "grad_norm": 0.2164691537618637, | |
| "learning_rate": 1.6429921257039592e-05, | |
| "loss": 0.0171, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 4.126565589980224, | |
| "grad_norm": 0.29503509402275085, | |
| "learning_rate": 1.619118951081594e-05, | |
| "loss": 0.0156, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 4.133157547791694, | |
| "grad_norm": 0.29893797636032104, | |
| "learning_rate": 1.5954052034703125e-05, | |
| "loss": 0.016, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 4.139749505603164, | |
| "grad_norm": 0.3970952033996582, | |
| "learning_rate": 1.5718513339696883e-05, | |
| "loss": 0.0191, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 4.146341463414634, | |
| "grad_norm": 0.2718060612678528, | |
| "learning_rate": 1.548457790637987e-05, | |
| "loss": 0.014, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 4.152933421226104, | |
| "grad_norm": 0.3720945119857788, | |
| "learning_rate": 1.525225018483638e-05, | |
| "loss": 0.0168, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 4.159525379037574, | |
| "grad_norm": 0.21513940393924713, | |
| "learning_rate": 1.5021534594567621e-05, | |
| "loss": 0.0159, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 4.1661173368490445, | |
| "grad_norm": 0.30618909001350403, | |
| "learning_rate": 1.4792435524407755e-05, | |
| "loss": 0.0151, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 4.172709294660514, | |
| "grad_norm": 0.409757524728775, | |
| "learning_rate": 1.4564957332440365e-05, | |
| "loss": 0.0177, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 4.179301252471984, | |
| "grad_norm": 0.2687203884124756, | |
| "learning_rate": 1.4339104345915554e-05, | |
| "loss": 0.0202, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 4.185893210283454, | |
| "grad_norm": 0.25398269295692444, | |
| "learning_rate": 1.4114880861167557e-05, | |
| "loss": 0.0189, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 4.192485168094924, | |
| "grad_norm": 0.2254013866186142, | |
| "learning_rate": 1.3892291143533154e-05, | |
| "loss": 0.0144, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 4.199077125906395, | |
| "grad_norm": 0.32205384969711304, | |
| "learning_rate": 1.3671339427270458e-05, | |
| "loss": 0.0161, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 4.205669083717864, | |
| "grad_norm": 0.3406763970851898, | |
| "learning_rate": 1.3452029915478304e-05, | |
| "loss": 0.02, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 4.2122610415293344, | |
| "grad_norm": 0.31815874576568604, | |
| "learning_rate": 1.3234366780016438e-05, | |
| "loss": 0.0185, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 4.218852999340804, | |
| "grad_norm": 0.1224733293056488, | |
| "learning_rate": 1.3018354161425994e-05, | |
| "loss": 0.0181, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 4.225444957152274, | |
| "grad_norm": 0.42326441407203674, | |
| "learning_rate": 1.2803996168850896e-05, | |
| "loss": 0.016, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 4.232036914963745, | |
| "grad_norm": 0.2917204797267914, | |
| "learning_rate": 1.2591296879959557e-05, | |
| "loss": 0.0146, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 4.238628872775214, | |
| "grad_norm": 0.27973493933677673, | |
| "learning_rate": 1.238026034086739e-05, | |
| "loss": 0.0167, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 4.2452208305866845, | |
| "grad_norm": 0.13871712982654572, | |
| "learning_rate": 1.2170890566059811e-05, | |
| "loss": 0.0161, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 4.251812788398154, | |
| "grad_norm": 0.2724437713623047, | |
| "learning_rate": 1.1963191538315833e-05, | |
| "loss": 0.0188, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 4.258404746209624, | |
| "grad_norm": 0.24582289159297943, | |
| "learning_rate": 1.1757167208632414e-05, | |
| "loss": 0.0142, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 4.264996704021094, | |
| "grad_norm": 0.6128583550453186, | |
| "learning_rate": 1.1552821496149135e-05, | |
| "loss": 0.015, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 4.271588661832564, | |
| "grad_norm": 0.38243502378463745, | |
| "learning_rate": 1.135015828807382e-05, | |
| "loss": 0.0135, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 4.278180619644035, | |
| "grad_norm": 0.22540901601314545, | |
| "learning_rate": 1.1149181439608514e-05, | |
| "loss": 0.0156, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 4.284772577455504, | |
| "grad_norm": 0.4100974500179291, | |
| "learning_rate": 1.0949894773876079e-05, | |
| "loss": 0.0156, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.2913645352669745, | |
| "grad_norm": 0.1929452121257782, | |
| "learning_rate": 1.0752302081847565e-05, | |
| "loss": 0.0184, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 4.297956493078444, | |
| "grad_norm": 0.27612316608428955, | |
| "learning_rate": 1.0556407122270096e-05, | |
| "loss": 0.0192, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 4.304548450889914, | |
| "grad_norm": 0.20837433636188507, | |
| "learning_rate": 1.0362213621595307e-05, | |
| "loss": 0.0135, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 4.311140408701385, | |
| "grad_norm": 0.38383790850639343, | |
| "learning_rate": 1.016972527390846e-05, | |
| "loss": 0.0186, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 4.317732366512854, | |
| "grad_norm": 0.3808279037475586, | |
| "learning_rate": 9.978945740858226e-06, | |
| "loss": 0.0172, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 4.324324324324325, | |
| "grad_norm": 0.12612776458263397, | |
| "learning_rate": 9.789878651587036e-06, | |
| "loss": 0.0131, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 4.330916282135794, | |
| "grad_norm": 0.47806084156036377, | |
| "learning_rate": 9.602527602661949e-06, | |
| "loss": 0.0175, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 4.337508239947264, | |
| "grad_norm": 0.5602189302444458, | |
| "learning_rate": 9.416896158006328e-06, | |
| "loss": 0.0161, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 4.344100197758735, | |
| "grad_norm": 0.5258492231369019, | |
| "learning_rate": 9.232987848832009e-06, | |
| "loss": 0.0151, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 4.350692155570204, | |
| "grad_norm": 0.18115440011024475, | |
| "learning_rate": 9.050806173572134e-06, | |
| "loss": 0.0115, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 4.357284113381675, | |
| "grad_norm": 0.2673959732055664, | |
| "learning_rate": 8.870354597814622e-06, | |
| "loss": 0.013, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 4.363876071193144, | |
| "grad_norm": 0.4614759385585785, | |
| "learning_rate": 8.691636554236182e-06, | |
| "loss": 0.0179, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 4.3704680290046145, | |
| "grad_norm": 0.31257471442222595, | |
| "learning_rate": 8.514655442537122e-06, | |
| "loss": 0.0152, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 4.377059986816084, | |
| "grad_norm": 0.1402910202741623, | |
| "learning_rate": 8.339414629376507e-06, | |
| "loss": 0.0155, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 4.383651944627554, | |
| "grad_norm": 0.19149114191532135, | |
| "learning_rate": 8.165917448308324e-06, | |
| "loss": 0.0132, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 4.390243902439025, | |
| "grad_norm": 0.31132665276527405, | |
| "learning_rate": 7.994167199717894e-06, | |
| "loss": 0.0159, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 4.396835860250494, | |
| "grad_norm": 0.30715203285217285, | |
| "learning_rate": 7.824167150759188e-06, | |
| "loss": 0.022, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 4.403427818061965, | |
| "grad_norm": 0.23801127076148987, | |
| "learning_rate": 7.655920535292682e-06, | |
| "loss": 0.0123, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 4.410019775873434, | |
| "grad_norm": 0.3437555730342865, | |
| "learning_rate": 7.4894305538237285e-06, | |
| "loss": 0.0154, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 4.4166117336849045, | |
| "grad_norm": 0.23300838470458984, | |
| "learning_rate": 7.324700373441828e-06, | |
| "loss": 0.0188, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 4.423203691496375, | |
| "grad_norm": 0.2827889621257782, | |
| "learning_rate": 7.161733127760228e-06, | |
| "loss": 0.0151, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 4.429795649307844, | |
| "grad_norm": 0.2165522575378418, | |
| "learning_rate": 7.000531916856512e-06, | |
| "loss": 0.0145, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 4.436387607119315, | |
| "grad_norm": 0.3993603587150574, | |
| "learning_rate": 6.841099807213392e-06, | |
| "loss": 0.024, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 4.442979564930784, | |
| "grad_norm": 0.21347716450691223, | |
| "learning_rate": 6.683439831660554e-06, | |
| "loss": 0.0254, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 4.4495715227422545, | |
| "grad_norm": 0.4783138036727905, | |
| "learning_rate": 6.527554989316897e-06, | |
| "loss": 0.0141, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 4.456163480553725, | |
| "grad_norm": 0.2551850378513336, | |
| "learning_rate": 6.373448245533464e-06, | |
| "loss": 0.0203, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 4.462755438365194, | |
| "grad_norm": 0.22933778166770935, | |
| "learning_rate": 6.221122531837076e-06, | |
| "loss": 0.0193, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 4.469347396176665, | |
| "grad_norm": 0.1832355260848999, | |
| "learning_rate": 6.070580745874544e-06, | |
| "loss": 0.0134, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 4.475939353988134, | |
| "grad_norm": 0.3792283535003662, | |
| "learning_rate": 5.921825751357557e-06, | |
| "loss": 0.0159, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 4.482531311799605, | |
| "grad_norm": 0.18225885927677155, | |
| "learning_rate": 5.7748603780081735e-06, | |
| "loss": 0.0217, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 4.489123269611074, | |
| "grad_norm": 0.49436914920806885, | |
| "learning_rate": 5.62968742150507e-06, | |
| "loss": 0.0158, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 4.4957152274225445, | |
| "grad_norm": 0.2793099582195282, | |
| "learning_rate": 5.4863096434302655e-06, | |
| "loss": 0.016, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 4.502307185234015, | |
| "grad_norm": 0.2998494505882263, | |
| "learning_rate": 5.344729771216661e-06, | |
| "loss": 0.0174, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 4.508899143045484, | |
| "grad_norm": 0.45131003856658936, | |
| "learning_rate": 5.204950498096117e-06, | |
| "loss": 0.0196, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 4.515491100856955, | |
| "grad_norm": 0.37397655844688416, | |
| "learning_rate": 5.066974483048215e-06, | |
| "loss": 0.0158, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 4.522083058668424, | |
| "grad_norm": 0.5381725430488586, | |
| "learning_rate": 4.930804350749729e-06, | |
| "loss": 0.016, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 4.528675016479895, | |
| "grad_norm": 0.2811379134654999, | |
| "learning_rate": 4.796442691524638e-06, | |
| "loss": 0.013, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 4.535266974291364, | |
| "grad_norm": 0.205452561378479, | |
| "learning_rate": 4.663892061294872e-06, | |
| "loss": 0.0165, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 4.541858932102834, | |
| "grad_norm": 0.2746995687484741, | |
| "learning_rate": 4.5331549815317174e-06, | |
| "loss": 0.0227, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 4.548450889914305, | |
| "grad_norm": 0.30904215574264526, | |
| "learning_rate": 4.404233939207791e-06, | |
| "loss": 0.0153, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 4.555042847725774, | |
| "grad_norm": 0.42725998163223267, | |
| "learning_rate": 4.2771313867498e-06, | |
| "loss": 0.0192, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 4.561634805537245, | |
| "grad_norm": 0.18472789227962494, | |
| "learning_rate": 4.151849741991864e-06, | |
| "loss": 0.025, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 4.568226763348715, | |
| "grad_norm": 0.3807401955127716, | |
| "learning_rate": 4.0283913881294935e-06, | |
| "loss": 0.0181, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 4.5748187211601845, | |
| "grad_norm": 0.17289142310619354, | |
| "learning_rate": 3.906758673674293e-06, | |
| "loss": 0.0148, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 4.581410678971655, | |
| "grad_norm": 0.32773271203041077, | |
| "learning_rate": 3.7869539124092525e-06, | |
| "loss": 0.0173, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 4.588002636783124, | |
| "grad_norm": 0.2213710993528366, | |
| "learning_rate": 3.6689793833447837e-06, | |
| "loss": 0.0137, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 4.594594594594595, | |
| "grad_norm": 0.17836393415927887, | |
| "learning_rate": 3.552837330675296e-06, | |
| "loss": 0.0184, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 4.601186552406064, | |
| "grad_norm": 0.2593984603881836, | |
| "learning_rate": 3.43852996373657e-06, | |
| "loss": 0.0138, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 4.607778510217535, | |
| "grad_norm": 0.2913285195827484, | |
| "learning_rate": 3.3260594569636928e-06, | |
| "loss": 0.0212, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 4.614370468029005, | |
| "grad_norm": 0.18963216245174408, | |
| "learning_rate": 3.215427949849714e-06, | |
| "loss": 0.0155, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.6209624258404745, | |
| "grad_norm": 0.30186694860458374, | |
| "learning_rate": 3.1066375469049337e-06, | |
| "loss": 0.0185, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 4.627554383651945, | |
| "grad_norm": 0.3594430685043335, | |
| "learning_rate": 2.9996903176168765e-06, | |
| "loss": 0.0157, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 4.634146341463414, | |
| "grad_norm": 0.407387912273407, | |
| "learning_rate": 2.8945882964109496e-06, | |
| "loss": 0.0155, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 4.640738299274885, | |
| "grad_norm": 0.1670001596212387, | |
| "learning_rate": 2.7913334826116357e-06, | |
| "loss": 0.0156, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 4.647330257086354, | |
| "grad_norm": 0.3461068272590637, | |
| "learning_rate": 2.689927840404638e-06, | |
| "loss": 0.0155, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 4.6539222148978245, | |
| "grad_norm": 0.1870720386505127, | |
| "learning_rate": 2.590373298799342e-06, | |
| "loss": 0.0137, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 4.660514172709295, | |
| "grad_norm": 0.5297737717628479, | |
| "learning_rate": 2.492671751592235e-06, | |
| "loss": 0.021, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 4.667106130520764, | |
| "grad_norm": 0.41437268257141113, | |
| "learning_rate": 2.3968250573308424e-06, | |
| "loss": 0.0166, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 4.673698088332235, | |
| "grad_norm": 0.2162405252456665, | |
| "learning_rate": 2.302835039278339e-06, | |
| "loss": 0.0163, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 4.680290046143705, | |
| "grad_norm": 0.3162844479084015, | |
| "learning_rate": 2.2107034853789288e-06, | |
| "loss": 0.0184, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 4.686882003955175, | |
| "grad_norm": 0.23974072933197021, | |
| "learning_rate": 2.1204321482238164e-06, | |
| "loss": 0.0187, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 4.693473961766645, | |
| "grad_norm": 0.24216875433921814, | |
| "learning_rate": 2.0320227450178254e-06, | |
| "loss": 0.0145, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 4.7000659195781145, | |
| "grad_norm": 0.3286508023738861, | |
| "learning_rate": 1.945476957546788e-06, | |
| "loss": 0.0189, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 4.706657877389585, | |
| "grad_norm": 0.22018277645111084, | |
| "learning_rate": 1.860796432145495e-06, | |
| "loss": 0.0164, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 4.713249835201054, | |
| "grad_norm": 0.18138107657432556, | |
| "learning_rate": 1.7779827796664538e-06, | |
| "loss": 0.0173, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 4.719841793012525, | |
| "grad_norm": 0.3609893321990967, | |
| "learning_rate": 1.6970375754491562e-06, | |
| "loss": 0.0291, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 4.726433750823995, | |
| "grad_norm": 0.31565043330192566, | |
| "learning_rate": 1.6179623592901926e-06, | |
| "loss": 0.014, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 4.733025708635465, | |
| "grad_norm": 0.27240124344825745, | |
| "learning_rate": 1.5407586354139193e-06, | |
| "loss": 0.0167, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 4.739617666446935, | |
| "grad_norm": 0.3199063837528229, | |
| "learning_rate": 1.4654278724438364e-06, | |
| "loss": 0.0164, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 4.746209624258404, | |
| "grad_norm": 0.23247933387756348, | |
| "learning_rate": 1.3919715033746893e-06, | |
| "loss": 0.0195, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 4.752801582069875, | |
| "grad_norm": 0.26770317554473877, | |
| "learning_rate": 1.3203909255451452e-06, | |
| "loss": 0.0125, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 4.759393539881344, | |
| "grad_norm": 0.2076646387577057, | |
| "learning_rate": 1.2506875006113027e-06, | |
| "loss": 0.0162, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 4.765985497692815, | |
| "grad_norm": 0.1567927598953247, | |
| "learning_rate": 1.1828625545207027e-06, | |
| "loss": 0.0142, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 4.772577455504285, | |
| "grad_norm": 0.3224427402019501, | |
| "learning_rate": 1.1169173774871478e-06, | |
| "loss": 0.0161, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 4.7791694133157545, | |
| "grad_norm": 0.5948562622070312, | |
| "learning_rate": 1.0528532239661547e-06, | |
| "loss": 0.0164, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 4.785761371127225, | |
| "grad_norm": 0.30895039439201355, | |
| "learning_rate": 9.906713126310974e-07, | |
| "loss": 0.0171, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 4.792353328938695, | |
| "grad_norm": 0.14259961247444153, | |
| "learning_rate": 9.303728263500011e-07, | |
| "loss": 0.0194, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 4.798945286750165, | |
| "grad_norm": 0.15019071102142334, | |
| "learning_rate": 8.719589121630622e-07, | |
| "loss": 0.0163, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 4.805537244561635, | |
| "grad_norm": 0.2892571687698364, | |
| "learning_rate": 8.154306812608315e-07, | |
| "loss": 0.0173, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 4.812129202373105, | |
| "grad_norm": 0.2563762962818146, | |
| "learning_rate": 7.607892089630308e-07, | |
| "loss": 0.0168, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 4.818721160184575, | |
| "grad_norm": 0.2222357541322708, | |
| "learning_rate": 7.080355346981815e-07, | |
| "loss": 0.014, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 4.8253131179960445, | |
| "grad_norm": 0.22898784279823303, | |
| "learning_rate": 6.571706619837526e-07, | |
| "loss": 0.0135, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 4.831905075807515, | |
| "grad_norm": 0.23187340795993805, | |
| "learning_rate": 6.081955584071097e-07, | |
| "loss": 0.0142, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 4.838497033618985, | |
| "grad_norm": 0.3049458861351013, | |
| "learning_rate": 5.61111155607108e-07, | |
| "loss": 0.0199, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 4.845088991430455, | |
| "grad_norm": 0.17564386129379272, | |
| "learning_rate": 5.159183492563613e-07, | |
| "loss": 0.0151, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 4.851680949241925, | |
| "grad_norm": 0.3510572016239166, | |
| "learning_rate": 4.7261799904420035e-07, | |
| "loss": 0.0164, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 4.8582729070533945, | |
| "grad_norm": 0.31466346979141235, | |
| "learning_rate": 4.3121092866031945e-07, | |
| "loss": 0.0176, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 4.864864864864865, | |
| "grad_norm": 0.2005147635936737, | |
| "learning_rate": 3.91697925779122e-07, | |
| "loss": 0.0168, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 4.871456822676334, | |
| "grad_norm": 0.1678527295589447, | |
| "learning_rate": 3.5407974204473284e-07, | |
| "loss": 0.0175, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 4.878048780487805, | |
| "grad_norm": 0.21754373610019684, | |
| "learning_rate": 3.1835709305668703e-07, | |
| "loss": 0.0127, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 4.884640738299275, | |
| "grad_norm": 0.21587257087230682, | |
| "learning_rate": 2.84530658356319e-07, | |
| "loss": 0.017, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 4.891232696110745, | |
| "grad_norm": 0.31447526812553406, | |
| "learning_rate": 2.526010814138613e-07, | |
| "loss": 0.0217, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 4.897824653922215, | |
| "grad_norm": 0.30843478441238403, | |
| "learning_rate": 2.2256896961616592e-07, | |
| "loss": 0.0181, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 4.904416611733685, | |
| "grad_norm": 0.29951369762420654, | |
| "learning_rate": 1.9443489425517992e-07, | |
| "loss": 0.0152, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 4.911008569545155, | |
| "grad_norm": 0.4117021858692169, | |
| "learning_rate": 1.6819939051706535e-07, | |
| "loss": 0.0127, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 4.917600527356625, | |
| "grad_norm": 0.11666778475046158, | |
| "learning_rate": 1.438629574720074e-07, | |
| "loss": 0.0144, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 4.924192485168095, | |
| "grad_norm": 0.3991844356060028, | |
| "learning_rate": 1.2142605806474417e-07, | |
| "loss": 0.0162, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 4.930784442979565, | |
| "grad_norm": 0.09675031900405884, | |
| "learning_rate": 1.0088911910576259e-07, | |
| "loss": 0.0223, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 4.937376400791035, | |
| "grad_norm": 0.3356577157974243, | |
| "learning_rate": 8.225253126314947e-08, | |
| "loss": 0.0168, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 4.943968358602505, | |
| "grad_norm": 0.27056625485420227, | |
| "learning_rate": 6.551664905517508e-08, | |
| "loss": 0.0166, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.950560316413975, | |
| "grad_norm": 0.24081185460090637, | |
| "learning_rate": 5.068179084355418e-08, | |
| "loss": 0.0164, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 4.957152274225445, | |
| "grad_norm": 0.3618698716163635, | |
| "learning_rate": 3.774823882738421e-08, | |
| "loss": 0.0176, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 4.963744232036915, | |
| "grad_norm": 0.20548762381076813, | |
| "learning_rate": 2.6716239037805068e-08, | |
| "loss": 0.0183, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 4.970336189848385, | |
| "grad_norm": 0.24806766211986542, | |
| "learning_rate": 1.7586001333258495e-08, | |
| "loss": 0.0156, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 4.976928147659855, | |
| "grad_norm": 0.3018137216567993, | |
| "learning_rate": 1.0357699395535658e-08, | |
| "loss": 0.0196, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 4.9835201054713245, | |
| "grad_norm": 0.24933604896068573, | |
| "learning_rate": 5.031470726490906e-09, | |
| "loss": 0.015, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 4.990112063282795, | |
| "grad_norm": 0.23485144972801208, | |
| "learning_rate": 1.6074166453883266e-09, | |
| "loss": 0.0103, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 4.996704021094265, | |
| "grad_norm": 0.4469901919364929, | |
| "learning_rate": 8.560228699217021e-11, | |
| "loss": 0.0147, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 4.998681608437706, | |
| "step": 7583, | |
| "total_flos": 2.658519488376864e+17, | |
| "train_loss": 0.03622536294503214, | |
| "train_runtime": 3445.8975, | |
| "train_samples_per_second": 35.209, | |
| "train_steps_per_second": 2.201 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 7583, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.658519488376864e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |