{ "best_metric": 0.7994034079615475, "best_model_checkpoint": "hf-importance-production/checkpoint-674", "epoch": 2.0, "eval_steps": 500, "global_step": 674, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02967359050445104, "grad_norm": 168.60842895507812, "learning_rate": 2.9761904761904765e-07, "loss": 1.824, "step": 10 }, { "epoch": 0.05934718100890208, "grad_norm": 197.9370574951172, "learning_rate": 5.952380952380953e-07, "loss": 1.9581, "step": 20 }, { "epoch": 0.08902077151335312, "grad_norm": 18.61470603942871, "learning_rate": 8.928571428571429e-07, "loss": 1.3577, "step": 30 }, { "epoch": 0.11869436201780416, "grad_norm": 171.209228515625, "learning_rate": 1.1904761904761906e-06, "loss": 1.5439, "step": 40 }, { "epoch": 0.14836795252225518, "grad_norm": 30.70751953125, "learning_rate": 1.4880952380952381e-06, "loss": 0.935, "step": 50 }, { "epoch": 0.17804154302670624, "grad_norm": 1.3179957866668701, "learning_rate": 1.7857142857142859e-06, "loss": 0.863, "step": 60 }, { "epoch": 0.20771513353115728, "grad_norm": 46.17071533203125, "learning_rate": 2.0833333333333334e-06, "loss": 0.9118, "step": 70 }, { "epoch": 0.23738872403560832, "grad_norm": 136.59942626953125, "learning_rate": 2.380952380952381e-06, "loss": 1.3016, "step": 80 }, { "epoch": 0.26706231454005935, "grad_norm": 164.12306213378906, "learning_rate": 2.6785714285714285e-06, "loss": 1.3362, "step": 90 }, { "epoch": 0.29673590504451036, "grad_norm": 72.49063110351562, "learning_rate": 2.9761904761904763e-06, "loss": 0.7472, "step": 100 }, { "epoch": 0.3264094955489614, "grad_norm": 63.815528869628906, "learning_rate": 3.273809523809524e-06, "loss": 0.5805, "step": 110 }, { "epoch": 0.3560830860534125, "grad_norm": 97.9521255493164, "learning_rate": 3.5714285714285718e-06, "loss": 0.6095, "step": 120 }, { "epoch": 0.3857566765578635, "grad_norm": 46.075660705566406, "learning_rate": 3.869047619047619e-06, "loss": 0.9813, "step": 130 }, { "epoch": 0.41543026706231456, "grad_norm": 56.08775329589844, "learning_rate": 4.166666666666667e-06, "loss": 0.8756, "step": 140 }, { "epoch": 0.44510385756676557, "grad_norm": 41.81604766845703, "learning_rate": 4.464285714285715e-06, "loss": 1.0463, "step": 150 }, { "epoch": 0.47477744807121663, "grad_norm": 42.02016067504883, "learning_rate": 4.761904761904762e-06, "loss": 1.1095, "step": 160 }, { "epoch": 0.5044510385756676, "grad_norm": 92.67354583740234, "learning_rate": 5.05952380952381e-06, "loss": 1.0643, "step": 170 }, { "epoch": 0.5341246290801187, "grad_norm": 92.83062744140625, "learning_rate": 5.357142857142857e-06, "loss": 0.6986, "step": 180 }, { "epoch": 0.5637982195845698, "grad_norm": 37.39644241333008, "learning_rate": 5.654761904761905e-06, "loss": 1.1433, "step": 190 }, { "epoch": 0.5934718100890207, "grad_norm": 50.91009521484375, "learning_rate": 5.9523809523809525e-06, "loss": 0.7077, "step": 200 }, { "epoch": 0.6231454005934718, "grad_norm": 38.2639045715332, "learning_rate": 6.25e-06, "loss": 0.7619, "step": 210 }, { "epoch": 0.6528189910979229, "grad_norm": 2.9356582164764404, "learning_rate": 6.547619047619048e-06, "loss": 0.4398, "step": 220 }, { "epoch": 0.6824925816023739, "grad_norm": 49.96760940551758, "learning_rate": 6.845238095238096e-06, "loss": 1.1917, "step": 230 }, { "epoch": 0.712166172106825, "grad_norm": 73.6959228515625, "learning_rate": 7.1428571428571436e-06, "loss": 0.4473, "step": 240 }, { "epoch": 0.7418397626112759, "grad_norm": 52.25584411621094, "learning_rate": 7.440476190476191e-06, "loss": 1.3084, "step": 250 }, { "epoch": 0.771513353115727, "grad_norm": 66.60286712646484, "learning_rate": 7.738095238095238e-06, "loss": 0.9924, "step": 260 }, { "epoch": 0.8011869436201781, "grad_norm": 61.88084411621094, "learning_rate": 8.035714285714286e-06, "loss": 0.9188, "step": 270 }, { "epoch": 0.8308605341246291, "grad_norm": 32.82881546020508, "learning_rate": 8.333333333333334e-06, "loss": 0.5267, "step": 280 }, { "epoch": 0.8605341246290801, "grad_norm": 50.554412841796875, "learning_rate": 8.630952380952381e-06, "loss": 0.8353, "step": 290 }, { "epoch": 0.8902077151335311, "grad_norm": 74.67025756835938, "learning_rate": 8.92857142857143e-06, "loss": 0.7204, "step": 300 }, { "epoch": 0.9198813056379822, "grad_norm": 73.39923095703125, "learning_rate": 9.226190476190477e-06, "loss": 1.5639, "step": 310 }, { "epoch": 0.9495548961424333, "grad_norm": 43.86273193359375, "learning_rate": 9.523809523809525e-06, "loss": 0.6084, "step": 320 }, { "epoch": 0.9792284866468842, "grad_norm": 60.18267822265625, "learning_rate": 9.821428571428573e-06, "loss": 0.8311, "step": 330 }, { "epoch": 1.0, "eval_accuracy": 0.72, "eval_f1": 0.6758345853065657, "eval_f1_macro": 0.5593865060621974, "eval_loss": 1.2557945251464844, "eval_precision": 0.7713116348671903, "eval_precision_macro": 0.7813692480359148, "eval_recall": 0.72, "eval_recall_macro": 0.5248060178139268, "eval_runtime": 247.1233, "eval_samples_per_second": 1.821, "eval_steps_per_second": 0.457, "step": 337 }, { "epoch": 1.0089020771513353, "grad_norm": 86.58915710449219, "learning_rate": 9.88165680473373e-06, "loss": 0.9621, "step": 340 }, { "epoch": 1.0385756676557865, "grad_norm": 46.461692810058594, "learning_rate": 9.585798816568049e-06, "loss": 0.5792, "step": 350 }, { "epoch": 1.0682492581602374, "grad_norm": 49.093719482421875, "learning_rate": 9.289940828402368e-06, "loss": 0.6038, "step": 360 }, { "epoch": 1.0979228486646884, "grad_norm": 43.62152862548828, "learning_rate": 8.994082840236687e-06, "loss": 0.6391, "step": 370 }, { "epoch": 1.1275964391691395, "grad_norm": 14.27550983428955, "learning_rate": 8.698224852071006e-06, "loss": 0.573, "step": 380 }, { "epoch": 1.1572700296735905, "grad_norm": 67.1692123413086, "learning_rate": 8.402366863905327e-06, "loss": 0.409, "step": 390 }, { "epoch": 1.1869436201780414, "grad_norm": 43.99798583984375, "learning_rate": 8.106508875739646e-06, "loss": 0.4652, "step": 400 }, { "epoch": 1.2166172106824926, "grad_norm": 29.33543586730957, "learning_rate": 7.810650887573965e-06, "loss": 0.2783, "step": 410 }, { "epoch": 1.2462908011869436, "grad_norm": 61.13819885253906, "learning_rate": 7.5147928994082845e-06, "loss": 0.8814, "step": 420 }, { "epoch": 1.2759643916913945, "grad_norm": 37.44241714477539, "learning_rate": 7.218934911242604e-06, "loss": 0.4763, "step": 430 }, { "epoch": 1.3056379821958457, "grad_norm": 58.24061965942383, "learning_rate": 6.923076923076923e-06, "loss": 0.9232, "step": 440 }, { "epoch": 1.3353115727002967, "grad_norm": 0.6983101963996887, "learning_rate": 6.627218934911244e-06, "loss": 0.3548, "step": 450 }, { "epoch": 1.3649851632047478, "grad_norm": 81.1714096069336, "learning_rate": 6.331360946745563e-06, "loss": 1.1273, "step": 460 }, { "epoch": 1.3946587537091988, "grad_norm": 9.78393840789795, "learning_rate": 6.035502958579882e-06, "loss": 0.5734, "step": 470 }, { "epoch": 1.4243323442136497, "grad_norm": 3.354196071624756, "learning_rate": 5.739644970414202e-06, "loss": 0.4616, "step": 480 }, { "epoch": 1.454005934718101, "grad_norm": 30.13648796081543, "learning_rate": 5.443786982248521e-06, "loss": 1.4066, "step": 490 }, { "epoch": 1.4836795252225519, "grad_norm": 56.0758056640625, "learning_rate": 5.14792899408284e-06, "loss": 0.5536, "step": 500 }, { "epoch": 1.513353115727003, "grad_norm": 41.43285369873047, "learning_rate": 4.85207100591716e-06, "loss": 0.3679, "step": 510 }, { "epoch": 1.543026706231454, "grad_norm": 45.90950012207031, "learning_rate": 4.55621301775148e-06, "loss": 0.4564, "step": 520 }, { "epoch": 1.572700296735905, "grad_norm": 75.42010498046875, "learning_rate": 4.2603550295858e-06, "loss": 0.6049, "step": 530 }, { "epoch": 1.6023738872403561, "grad_norm": 115.4093246459961, "learning_rate": 3.964497041420119e-06, "loss": 0.4721, "step": 540 }, { "epoch": 1.632047477744807, "grad_norm": 105.15296936035156, "learning_rate": 3.668639053254438e-06, "loss": 0.5965, "step": 550 }, { "epoch": 1.6617210682492582, "grad_norm": 28.678022384643555, "learning_rate": 3.3727810650887576e-06, "loss": 0.629, "step": 560 }, { "epoch": 1.6913946587537092, "grad_norm": 8.347318649291992, "learning_rate": 3.0769230769230774e-06, "loss": 0.8679, "step": 570 }, { "epoch": 1.7210682492581602, "grad_norm": 78.90312194824219, "learning_rate": 2.7810650887573965e-06, "loss": 0.6902, "step": 580 }, { "epoch": 1.7507418397626113, "grad_norm": 58.25022506713867, "learning_rate": 2.485207100591716e-06, "loss": 0.2924, "step": 590 }, { "epoch": 1.7804154302670623, "grad_norm": 6.382408618927002, "learning_rate": 2.1893491124260358e-06, "loss": 0.5099, "step": 600 }, { "epoch": 1.8100890207715135, "grad_norm": 42.28348159790039, "learning_rate": 1.8934911242603552e-06, "loss": 0.2495, "step": 610 }, { "epoch": 1.8397626112759644, "grad_norm": 53.895606994628906, "learning_rate": 1.5976331360946749e-06, "loss": 0.3869, "step": 620 }, { "epoch": 1.8694362017804154, "grad_norm": 19.67365074157715, "learning_rate": 1.301775147928994e-06, "loss": 0.4416, "step": 630 }, { "epoch": 1.8991097922848663, "grad_norm": 6.208206653594971, "learning_rate": 1.0059171597633138e-06, "loss": 0.2573, "step": 640 }, { "epoch": 1.9287833827893175, "grad_norm": 76.81669616699219, "learning_rate": 7.100591715976332e-07, "loss": 0.9684, "step": 650 }, { "epoch": 1.9584569732937687, "grad_norm": 70.443115234375, "learning_rate": 4.1420118343195276e-07, "loss": 0.7257, "step": 660 }, { "epoch": 1.9881305637982196, "grad_norm": 18.408775329589844, "learning_rate": 1.183431952662722e-07, "loss": 0.4501, "step": 670 }, { "epoch": 2.0, "eval_accuracy": 0.8044444444444444, "eval_f1": 0.7994034079615475, "eval_f1_macro": 0.7226974575811784, "eval_loss": 0.6364149451255798, "eval_precision": 0.7989471347051534, "eval_precision_macro": 0.7605190105677089, "eval_recall": 0.8044444444444444, "eval_recall_macro": 0.697208985138428, "eval_runtime": 247.0192, "eval_samples_per_second": 1.822, "eval_steps_per_second": 0.457, "step": 674 } ], "logging_steps": 10, "max_steps": 674, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 708827547359232.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }