| { | |
| "best_metric": 0.7994034079615475, | |
| "best_model_checkpoint": "hf-importance-production/checkpoint-674", | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 674, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02967359050445104, | |
| "grad_norm": 168.60842895507812, | |
| "learning_rate": 2.9761904761904765e-07, | |
| "loss": 1.824, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05934718100890208, | |
| "grad_norm": 197.9370574951172, | |
| "learning_rate": 5.952380952380953e-07, | |
| "loss": 1.9581, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08902077151335312, | |
| "grad_norm": 18.61470603942871, | |
| "learning_rate": 8.928571428571429e-07, | |
| "loss": 1.3577, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.11869436201780416, | |
| "grad_norm": 171.209228515625, | |
| "learning_rate": 1.1904761904761906e-06, | |
| "loss": 1.5439, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14836795252225518, | |
| "grad_norm": 30.70751953125, | |
| "learning_rate": 1.4880952380952381e-06, | |
| "loss": 0.935, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.17804154302670624, | |
| "grad_norm": 1.3179957866668701, | |
| "learning_rate": 1.7857142857142859e-06, | |
| "loss": 0.863, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.20771513353115728, | |
| "grad_norm": 46.17071533203125, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "loss": 0.9118, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.23738872403560832, | |
| "grad_norm": 136.59942626953125, | |
| "learning_rate": 2.380952380952381e-06, | |
| "loss": 1.3016, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.26706231454005935, | |
| "grad_norm": 164.12306213378906, | |
| "learning_rate": 2.6785714285714285e-06, | |
| "loss": 1.3362, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.29673590504451036, | |
| "grad_norm": 72.49063110351562, | |
| "learning_rate": 2.9761904761904763e-06, | |
| "loss": 0.7472, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3264094955489614, | |
| "grad_norm": 63.815528869628906, | |
| "learning_rate": 3.273809523809524e-06, | |
| "loss": 0.5805, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3560830860534125, | |
| "grad_norm": 97.9521255493164, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 0.6095, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3857566765578635, | |
| "grad_norm": 46.075660705566406, | |
| "learning_rate": 3.869047619047619e-06, | |
| "loss": 0.9813, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.41543026706231456, | |
| "grad_norm": 56.08775329589844, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.8756, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.44510385756676557, | |
| "grad_norm": 41.81604766845703, | |
| "learning_rate": 4.464285714285715e-06, | |
| "loss": 1.0463, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.47477744807121663, | |
| "grad_norm": 42.02016067504883, | |
| "learning_rate": 4.761904761904762e-06, | |
| "loss": 1.1095, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5044510385756676, | |
| "grad_norm": 92.67354583740234, | |
| "learning_rate": 5.05952380952381e-06, | |
| "loss": 1.0643, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5341246290801187, | |
| "grad_norm": 92.83062744140625, | |
| "learning_rate": 5.357142857142857e-06, | |
| "loss": 0.6986, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5637982195845698, | |
| "grad_norm": 37.39644241333008, | |
| "learning_rate": 5.654761904761905e-06, | |
| "loss": 1.1433, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5934718100890207, | |
| "grad_norm": 50.91009521484375, | |
| "learning_rate": 5.9523809523809525e-06, | |
| "loss": 0.7077, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6231454005934718, | |
| "grad_norm": 38.2639045715332, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.7619, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6528189910979229, | |
| "grad_norm": 2.9356582164764404, | |
| "learning_rate": 6.547619047619048e-06, | |
| "loss": 0.4398, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6824925816023739, | |
| "grad_norm": 49.96760940551758, | |
| "learning_rate": 6.845238095238096e-06, | |
| "loss": 1.1917, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.712166172106825, | |
| "grad_norm": 73.6959228515625, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.4473, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7418397626112759, | |
| "grad_norm": 52.25584411621094, | |
| "learning_rate": 7.440476190476191e-06, | |
| "loss": 1.3084, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.771513353115727, | |
| "grad_norm": 66.60286712646484, | |
| "learning_rate": 7.738095238095238e-06, | |
| "loss": 0.9924, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8011869436201781, | |
| "grad_norm": 61.88084411621094, | |
| "learning_rate": 8.035714285714286e-06, | |
| "loss": 0.9188, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8308605341246291, | |
| "grad_norm": 32.82881546020508, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.5267, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8605341246290801, | |
| "grad_norm": 50.554412841796875, | |
| "learning_rate": 8.630952380952381e-06, | |
| "loss": 0.8353, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.8902077151335311, | |
| "grad_norm": 74.67025756835938, | |
| "learning_rate": 8.92857142857143e-06, | |
| "loss": 0.7204, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9198813056379822, | |
| "grad_norm": 73.39923095703125, | |
| "learning_rate": 9.226190476190477e-06, | |
| "loss": 1.5639, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9495548961424333, | |
| "grad_norm": 43.86273193359375, | |
| "learning_rate": 9.523809523809525e-06, | |
| "loss": 0.6084, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.9792284866468842, | |
| "grad_norm": 60.18267822265625, | |
| "learning_rate": 9.821428571428573e-06, | |
| "loss": 0.8311, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.72, | |
| "eval_f1": 0.6758345853065657, | |
| "eval_f1_macro": 0.5593865060621974, | |
| "eval_loss": 1.2557945251464844, | |
| "eval_precision": 0.7713116348671903, | |
| "eval_precision_macro": 0.7813692480359148, | |
| "eval_recall": 0.72, | |
| "eval_recall_macro": 0.5248060178139268, | |
| "eval_runtime": 247.1233, | |
| "eval_samples_per_second": 1.821, | |
| "eval_steps_per_second": 0.457, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.0089020771513353, | |
| "grad_norm": 86.58915710449219, | |
| "learning_rate": 9.88165680473373e-06, | |
| "loss": 0.9621, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.0385756676557865, | |
| "grad_norm": 46.461692810058594, | |
| "learning_rate": 9.585798816568049e-06, | |
| "loss": 0.5792, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.0682492581602374, | |
| "grad_norm": 49.093719482421875, | |
| "learning_rate": 9.289940828402368e-06, | |
| "loss": 0.6038, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.0979228486646884, | |
| "grad_norm": 43.62152862548828, | |
| "learning_rate": 8.994082840236687e-06, | |
| "loss": 0.6391, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.1275964391691395, | |
| "grad_norm": 14.27550983428955, | |
| "learning_rate": 8.698224852071006e-06, | |
| "loss": 0.573, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.1572700296735905, | |
| "grad_norm": 67.1692123413086, | |
| "learning_rate": 8.402366863905327e-06, | |
| "loss": 0.409, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.1869436201780414, | |
| "grad_norm": 43.99798583984375, | |
| "learning_rate": 8.106508875739646e-06, | |
| "loss": 0.4652, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.2166172106824926, | |
| "grad_norm": 29.33543586730957, | |
| "learning_rate": 7.810650887573965e-06, | |
| "loss": 0.2783, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.2462908011869436, | |
| "grad_norm": 61.13819885253906, | |
| "learning_rate": 7.5147928994082845e-06, | |
| "loss": 0.8814, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.2759643916913945, | |
| "grad_norm": 37.44241714477539, | |
| "learning_rate": 7.218934911242604e-06, | |
| "loss": 0.4763, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.3056379821958457, | |
| "grad_norm": 58.24061965942383, | |
| "learning_rate": 6.923076923076923e-06, | |
| "loss": 0.9232, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.3353115727002967, | |
| "grad_norm": 0.6983101963996887, | |
| "learning_rate": 6.627218934911244e-06, | |
| "loss": 0.3548, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.3649851632047478, | |
| "grad_norm": 81.1714096069336, | |
| "learning_rate": 6.331360946745563e-06, | |
| "loss": 1.1273, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.3946587537091988, | |
| "grad_norm": 9.78393840789795, | |
| "learning_rate": 6.035502958579882e-06, | |
| "loss": 0.5734, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.4243323442136497, | |
| "grad_norm": 3.354196071624756, | |
| "learning_rate": 5.739644970414202e-06, | |
| "loss": 0.4616, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.454005934718101, | |
| "grad_norm": 30.13648796081543, | |
| "learning_rate": 5.443786982248521e-06, | |
| "loss": 1.4066, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.4836795252225519, | |
| "grad_norm": 56.0758056640625, | |
| "learning_rate": 5.14792899408284e-06, | |
| "loss": 0.5536, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.513353115727003, | |
| "grad_norm": 41.43285369873047, | |
| "learning_rate": 4.85207100591716e-06, | |
| "loss": 0.3679, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.543026706231454, | |
| "grad_norm": 45.90950012207031, | |
| "learning_rate": 4.55621301775148e-06, | |
| "loss": 0.4564, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.572700296735905, | |
| "grad_norm": 75.42010498046875, | |
| "learning_rate": 4.2603550295858e-06, | |
| "loss": 0.6049, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.6023738872403561, | |
| "grad_norm": 115.4093246459961, | |
| "learning_rate": 3.964497041420119e-06, | |
| "loss": 0.4721, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.632047477744807, | |
| "grad_norm": 105.15296936035156, | |
| "learning_rate": 3.668639053254438e-06, | |
| "loss": 0.5965, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.6617210682492582, | |
| "grad_norm": 28.678022384643555, | |
| "learning_rate": 3.3727810650887576e-06, | |
| "loss": 0.629, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.6913946587537092, | |
| "grad_norm": 8.347318649291992, | |
| "learning_rate": 3.0769230769230774e-06, | |
| "loss": 0.8679, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.7210682492581602, | |
| "grad_norm": 78.90312194824219, | |
| "learning_rate": 2.7810650887573965e-06, | |
| "loss": 0.6902, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.7507418397626113, | |
| "grad_norm": 58.25022506713867, | |
| "learning_rate": 2.485207100591716e-06, | |
| "loss": 0.2924, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.7804154302670623, | |
| "grad_norm": 6.382408618927002, | |
| "learning_rate": 2.1893491124260358e-06, | |
| "loss": 0.5099, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.8100890207715135, | |
| "grad_norm": 42.28348159790039, | |
| "learning_rate": 1.8934911242603552e-06, | |
| "loss": 0.2495, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.8397626112759644, | |
| "grad_norm": 53.895606994628906, | |
| "learning_rate": 1.5976331360946749e-06, | |
| "loss": 0.3869, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.8694362017804154, | |
| "grad_norm": 19.67365074157715, | |
| "learning_rate": 1.301775147928994e-06, | |
| "loss": 0.4416, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.8991097922848663, | |
| "grad_norm": 6.208206653594971, | |
| "learning_rate": 1.0059171597633138e-06, | |
| "loss": 0.2573, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.9287833827893175, | |
| "grad_norm": 76.81669616699219, | |
| "learning_rate": 7.100591715976332e-07, | |
| "loss": 0.9684, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.9584569732937687, | |
| "grad_norm": 70.443115234375, | |
| "learning_rate": 4.1420118343195276e-07, | |
| "loss": 0.7257, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.9881305637982196, | |
| "grad_norm": 18.408775329589844, | |
| "learning_rate": 1.183431952662722e-07, | |
| "loss": 0.4501, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8044444444444444, | |
| "eval_f1": 0.7994034079615475, | |
| "eval_f1_macro": 0.7226974575811784, | |
| "eval_loss": 0.6364149451255798, | |
| "eval_precision": 0.7989471347051534, | |
| "eval_precision_macro": 0.7605190105677089, | |
| "eval_recall": 0.8044444444444444, | |
| "eval_recall_macro": 0.697208985138428, | |
| "eval_runtime": 247.0192, | |
| "eval_samples_per_second": 1.822, | |
| "eval_steps_per_second": 0.457, | |
| "step": 674 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 674, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 1, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 708827547359232.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |