{ "best_metric": 0.007734560873359442, "best_model_checkpoint": "autotrain-5zkp2-pa5ot/checkpoint-2871", "epoch": 3.0, "eval_steps": 500, "global_step": 2871, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02612330198537095, "grad_norm": 3.338355302810669, "learning_rate": 4.340277777777778e-06, "loss": 1.1122, "step": 25 }, { "epoch": 0.0522466039707419, "grad_norm": 1.7776278257369995, "learning_rate": 8.680555555555556e-06, "loss": 1.0796, "step": 50 }, { "epoch": 0.07836990595611286, "grad_norm": 3.9048593044281006, "learning_rate": 1.3020833333333334e-05, "loss": 1.0438, "step": 75 }, { "epoch": 0.1044932079414838, "grad_norm": 4.985559940338135, "learning_rate": 1.736111111111111e-05, "loss": 0.944, "step": 100 }, { "epoch": 0.13061650992685475, "grad_norm": 6.691493988037109, "learning_rate": 2.170138888888889e-05, "loss": 0.8765, "step": 125 }, { "epoch": 0.15673981191222572, "grad_norm": 15.063920974731445, "learning_rate": 2.604166666666667e-05, "loss": 0.5913, "step": 150 }, { "epoch": 0.18286311389759666, "grad_norm": 55.3206901550293, "learning_rate": 3.0381944444444444e-05, "loss": 0.3799, "step": 175 }, { "epoch": 0.2089864158829676, "grad_norm": 8.026313781738281, "learning_rate": 3.472222222222222e-05, "loss": 0.3776, "step": 200 }, { "epoch": 0.23510971786833856, "grad_norm": 3.5273993015289307, "learning_rate": 3.90625e-05, "loss": 0.2117, "step": 225 }, { "epoch": 0.2612330198537095, "grad_norm": 0.12634535133838654, "learning_rate": 4.340277777777778e-05, "loss": 0.2089, "step": 250 }, { "epoch": 0.28735632183908044, "grad_norm": 0.2070770412683487, "learning_rate": 4.774305555555556e-05, "loss": 0.1633, "step": 275 }, { "epoch": 0.31347962382445144, "grad_norm": 62.55227279663086, "learning_rate": 4.9767711962833916e-05, "loss": 0.1345, "step": 300 }, { "epoch": 0.3396029258098224, "grad_norm": 0.017617134377360344, "learning_rate": 4.928377855207124e-05, "loss": 0.0022, "step": 325 }, { "epoch": 0.3657262277951933, "grad_norm": 0.02495918981730938, "learning_rate": 4.879984514130856e-05, "loss": 0.0762, "step": 350 }, { "epoch": 0.39184952978056425, "grad_norm": 23.060834884643555, "learning_rate": 4.831591173054588e-05, "loss": 0.0773, "step": 375 }, { "epoch": 0.4179728317659352, "grad_norm": 0.018387485295534134, "learning_rate": 4.78319783197832e-05, "loss": 0.0877, "step": 400 }, { "epoch": 0.4440961337513062, "grad_norm": 0.08322528004646301, "learning_rate": 4.734804490902052e-05, "loss": 0.1489, "step": 425 }, { "epoch": 0.4702194357366771, "grad_norm": 0.018301822245121002, "learning_rate": 4.686411149825784e-05, "loss": 0.1955, "step": 450 }, { "epoch": 0.49634273772204807, "grad_norm": 159.80633544921875, "learning_rate": 4.638017808749516e-05, "loss": 0.1085, "step": 475 }, { "epoch": 0.522466039707419, "grad_norm": 0.010405668057501316, "learning_rate": 4.5896244676732484e-05, "loss": 0.0424, "step": 500 }, { "epoch": 0.54858934169279, "grad_norm": 0.016250956803560257, "learning_rate": 4.5412311265969805e-05, "loss": 0.0995, "step": 525 }, { "epoch": 0.5747126436781609, "grad_norm": 6.8752760887146, "learning_rate": 4.4928377855207126e-05, "loss": 0.1536, "step": 550 }, { "epoch": 0.6008359456635318, "grad_norm": 0.012155416421592236, "learning_rate": 4.4444444444444447e-05, "loss": 0.0141, "step": 575 }, { "epoch": 0.6269592476489029, "grad_norm": 0.008480357006192207, "learning_rate": 4.396051103368177e-05, "loss": 0.002, "step": 600 }, { "epoch": 0.6530825496342738, "grad_norm": 0.010472940281033516, "learning_rate": 4.347657762291909e-05, "loss": 0.0437, "step": 625 }, { "epoch": 0.6792058516196448, "grad_norm": 0.021664993837475777, "learning_rate": 4.299264421215641e-05, "loss": 0.0272, "step": 650 }, { "epoch": 0.7053291536050157, "grad_norm": 0.006474316120147705, "learning_rate": 4.250871080139373e-05, "loss": 0.0324, "step": 675 }, { "epoch": 0.7314524555903866, "grad_norm": 0.0071400972083210945, "learning_rate": 4.202477739063105e-05, "loss": 0.0674, "step": 700 }, { "epoch": 0.7575757575757576, "grad_norm": 0.05537139251828194, "learning_rate": 4.154084397986837e-05, "loss": 0.1479, "step": 725 }, { "epoch": 0.7836990595611285, "grad_norm": 0.03653930872678757, "learning_rate": 4.105691056910569e-05, "loss": 0.0746, "step": 750 }, { "epoch": 0.8098223615464994, "grad_norm": 0.030840527266263962, "learning_rate": 4.0572977158343014e-05, "loss": 0.0381, "step": 775 }, { "epoch": 0.8359456635318704, "grad_norm": 0.02931591309607029, "learning_rate": 4.0089043747580335e-05, "loss": 0.0491, "step": 800 }, { "epoch": 0.8620689655172413, "grad_norm": 0.01176014170050621, "learning_rate": 3.9605110336817656e-05, "loss": 0.0348, "step": 825 }, { "epoch": 0.8881922675026124, "grad_norm": 0.03229213505983353, "learning_rate": 3.912117692605498e-05, "loss": 0.0834, "step": 850 }, { "epoch": 0.9143155694879833, "grad_norm": 0.015275160782039165, "learning_rate": 3.86372435152923e-05, "loss": 0.05, "step": 875 }, { "epoch": 0.9404388714733543, "grad_norm": 0.013074109330773354, "learning_rate": 3.815331010452962e-05, "loss": 0.0012, "step": 900 }, { "epoch": 0.9665621734587252, "grad_norm": 0.007205578964203596, "learning_rate": 3.766937669376694e-05, "loss": 0.0006, "step": 925 }, { "epoch": 0.9926854754440961, "grad_norm": 0.023728761821985245, "learning_rate": 3.718544328300426e-05, "loss": 0.0409, "step": 950 }, { "epoch": 1.0, "eval_accuracy": 0.9973863042341872, "eval_f1_macro": 0.9973126862743199, "eval_f1_micro": 0.9973863042341872, "eval_f1_weighted": 0.9973863333834622, "eval_loss": 0.020341886207461357, "eval_precision_macro": 0.9963851515122331, "eval_precision_micro": 0.9973863042341872, "eval_precision_weighted": 0.9974001038510532, "eval_recall_macro": 0.9982547993019196, "eval_recall_micro": 0.9973863042341872, "eval_recall_weighted": 0.9973863042341872, "eval_runtime": 209.7986, "eval_samples_per_second": 9.118, "eval_steps_per_second": 0.572, "step": 957 }, { "epoch": 1.0188087774294672, "grad_norm": 0.01665070652961731, "learning_rate": 3.670150987224158e-05, "loss": 0.0121, "step": 975 }, { "epoch": 1.044932079414838, "grad_norm": 0.00787193700671196, "learning_rate": 3.62175764614789e-05, "loss": 0.0374, "step": 1000 }, { "epoch": 1.071055381400209, "grad_norm": 0.004602901637554169, "learning_rate": 3.5733643050716224e-05, "loss": 0.0029, "step": 1025 }, { "epoch": 1.09717868338558, "grad_norm": 0.0053739468567073345, "learning_rate": 3.5249709639953545e-05, "loss": 0.0004, "step": 1050 }, { "epoch": 1.123301985370951, "grad_norm": 0.0077377124689519405, "learning_rate": 3.4765776229190865e-05, "loss": 0.0003, "step": 1075 }, { "epoch": 1.1494252873563218, "grad_norm": 0.005279259290546179, "learning_rate": 3.4281842818428186e-05, "loss": 0.0798, "step": 1100 }, { "epoch": 1.1755485893416928, "grad_norm": 0.005335587542504072, "learning_rate": 3.379790940766551e-05, "loss": 0.0008, "step": 1125 }, { "epoch": 1.2016718913270636, "grad_norm": 0.013302076607942581, "learning_rate": 3.331397599690283e-05, "loss": 0.0007, "step": 1150 }, { "epoch": 1.2277951933124347, "grad_norm": 0.060083452612161636, "learning_rate": 3.283004258614015e-05, "loss": 0.1174, "step": 1175 }, { "epoch": 1.2539184952978055, "grad_norm": 0.005654782988131046, "learning_rate": 3.234610917537747e-05, "loss": 0.0008, "step": 1200 }, { "epoch": 1.2800417972831766, "grad_norm": 0.004458857234567404, "learning_rate": 3.186217576461479e-05, "loss": 0.0004, "step": 1225 }, { "epoch": 1.3061650992685476, "grad_norm": 0.005284965503960848, "learning_rate": 3.137824235385211e-05, "loss": 0.0396, "step": 1250 }, { "epoch": 1.3322884012539185, "grad_norm": 0.008645354770123959, "learning_rate": 3.089430894308943e-05, "loss": 0.0007, "step": 1275 }, { "epoch": 1.3584117032392895, "grad_norm": 0.0050517115741968155, "learning_rate": 3.041037553232675e-05, "loss": 0.0126, "step": 1300 }, { "epoch": 1.3845350052246603, "grad_norm": 0.013066472485661507, "learning_rate": 2.9926442121564075e-05, "loss": 0.0831, "step": 1325 }, { "epoch": 1.4106583072100314, "grad_norm": 0.017604809254407883, "learning_rate": 2.9442508710801396e-05, "loss": 0.0012, "step": 1350 }, { "epoch": 1.4367816091954024, "grad_norm": 0.006513866595923901, "learning_rate": 2.8958575300038713e-05, "loss": 0.0416, "step": 1375 }, { "epoch": 1.4629049111807733, "grad_norm": 0.012942259199917316, "learning_rate": 2.8474641889276038e-05, "loss": 0.0007, "step": 1400 }, { "epoch": 1.489028213166144, "grad_norm": 344.988037109375, "learning_rate": 2.7990708478513355e-05, "loss": 0.0425, "step": 1425 }, { "epoch": 1.5151515151515151, "grad_norm": 0.03896530717611313, "learning_rate": 2.750677506775068e-05, "loss": 0.064, "step": 1450 }, { "epoch": 1.5412748171368862, "grad_norm": 0.009162936359643936, "learning_rate": 2.7022841656988e-05, "loss": 0.001, "step": 1475 }, { "epoch": 1.567398119122257, "grad_norm": 0.010370401665568352, "learning_rate": 2.6538908246225318e-05, "loss": 0.0365, "step": 1500 }, { "epoch": 1.5935214211076278, "grad_norm": 0.009396117180585861, "learning_rate": 2.6054974835462642e-05, "loss": 0.0466, "step": 1525 }, { "epoch": 1.619644723092999, "grad_norm": 0.016944007948040962, "learning_rate": 2.5571041424699967e-05, "loss": 0.0008, "step": 1550 }, { "epoch": 1.64576802507837, "grad_norm": 0.008113248273730278, "learning_rate": 2.5087108013937284e-05, "loss": 0.034, "step": 1575 }, { "epoch": 1.671891327063741, "grad_norm": 0.008707555942237377, "learning_rate": 2.4603174603174602e-05, "loss": 0.0007, "step": 1600 }, { "epoch": 1.6980146290491118, "grad_norm": 0.06956545263528824, "learning_rate": 2.4119241192411926e-05, "loss": 0.0005, "step": 1625 }, { "epoch": 1.7241379310344827, "grad_norm": 0.010159909725189209, "learning_rate": 2.3635307781649247e-05, "loss": 0.0822, "step": 1650 }, { "epoch": 1.7502612330198537, "grad_norm": 0.007952134124934673, "learning_rate": 2.3151374370886568e-05, "loss": 0.0768, "step": 1675 }, { "epoch": 1.7763845350052248, "grad_norm": 0.022700520232319832, "learning_rate": 2.2667440960123886e-05, "loss": 0.0325, "step": 1700 }, { "epoch": 1.8025078369905956, "grad_norm": 0.04925369843840599, "learning_rate": 2.218350754936121e-05, "loss": 0.0832, "step": 1725 }, { "epoch": 1.8286311389759664, "grad_norm": 0.010277110151946545, "learning_rate": 2.169957413859853e-05, "loss": 0.0007, "step": 1750 }, { "epoch": 1.8547544409613375, "grad_norm": 0.009189656004309654, "learning_rate": 2.1215640727835852e-05, "loss": 0.0004, "step": 1775 }, { "epoch": 1.8808777429467085, "grad_norm": 0.00968814454972744, "learning_rate": 2.073170731707317e-05, "loss": 0.0429, "step": 1800 }, { "epoch": 1.9070010449320796, "grad_norm": 0.009455765597522259, "learning_rate": 2.0247773906310494e-05, "loss": 0.0874, "step": 1825 }, { "epoch": 1.9331243469174504, "grad_norm": 0.00541004678234458, "learning_rate": 1.9763840495547815e-05, "loss": 0.0019, "step": 1850 }, { "epoch": 1.9592476489028212, "grad_norm": 0.027987977489829063, "learning_rate": 1.9279907084785136e-05, "loss": 0.0375, "step": 1875 }, { "epoch": 1.9853709508881923, "grad_norm": 0.012382814660668373, "learning_rate": 1.8795973674022453e-05, "loss": 0.0006, "step": 1900 }, { "epoch": 2.0, "eval_accuracy": 0.9952953476215368, "eval_f1_macro": 0.9958184083774263, "eval_f1_micro": 0.9952953476215368, "eval_f1_weighted": 0.995297723057706, "eval_loss": 0.03401019424200058, "eval_precision_macro": 0.9948206725776819, "eval_precision_micro": 0.9952953476215368, "eval_precision_weighted": 0.9953490671179285, "eval_recall_macro": 0.9968586387434555, "eval_recall_micro": 0.9952953476215368, "eval_recall_weighted": 0.9952953476215368, "eval_runtime": 204.9069, "eval_samples_per_second": 9.336, "eval_steps_per_second": 0.586, "step": 1914 }, { "epoch": 2.0114942528735633, "grad_norm": 0.01057450845837593, "learning_rate": 1.8312040263259778e-05, "loss": 0.0421, "step": 1925 }, { "epoch": 2.0376175548589344, "grad_norm": 0.005483025684952736, "learning_rate": 1.78281068524971e-05, "loss": 0.0004, "step": 1950 }, { "epoch": 2.063740856844305, "grad_norm": 0.0031655074562877417, "learning_rate": 1.734417344173442e-05, "loss": 0.0065, "step": 1975 }, { "epoch": 2.089864158829676, "grad_norm": 0.0039079682901501656, "learning_rate": 1.6860240030971737e-05, "loss": 0.0003, "step": 2000 }, { "epoch": 2.115987460815047, "grad_norm": 6.877926826477051, "learning_rate": 1.6376306620209058e-05, "loss": 0.0447, "step": 2025 }, { "epoch": 2.142110762800418, "grad_norm": 0.006742693949490786, "learning_rate": 1.5892373209446382e-05, "loss": 0.0387, "step": 2050 }, { "epoch": 2.1682340647857887, "grad_norm": 0.005568367429077625, "learning_rate": 1.5408439798683703e-05, "loss": 0.0004, "step": 2075 }, { "epoch": 2.19435736677116, "grad_norm": 0.010062599554657936, "learning_rate": 1.4924506387921023e-05, "loss": 0.0003, "step": 2100 }, { "epoch": 2.220480668756531, "grad_norm": 0.0033714643213897943, "learning_rate": 1.4440572977158342e-05, "loss": 0.0002, "step": 2125 }, { "epoch": 2.246603970741902, "grad_norm": 0.0053630974143743515, "learning_rate": 1.3956639566395666e-05, "loss": 0.0002, "step": 2150 }, { "epoch": 2.2727272727272725, "grad_norm": 0.012313129380345345, "learning_rate": 1.3472706155632985e-05, "loss": 0.0803, "step": 2175 }, { "epoch": 2.2988505747126435, "grad_norm": 0.010815066285431385, "learning_rate": 1.2988772744870306e-05, "loss": 0.0014, "step": 2200 }, { "epoch": 2.3249738766980146, "grad_norm": 0.002910745795816183, "learning_rate": 1.2504839334107627e-05, "loss": 0.0014, "step": 2225 }, { "epoch": 2.3510971786833856, "grad_norm": 0.003235210431739688, "learning_rate": 1.2020905923344948e-05, "loss": 0.0004, "step": 2250 }, { "epoch": 2.3772204806687567, "grad_norm": 0.002846105257049203, "learning_rate": 1.1536972512582269e-05, "loss": 0.0002, "step": 2275 }, { "epoch": 2.4033437826541273, "grad_norm": 0.0029369164258241653, "learning_rate": 1.105303910181959e-05, "loss": 0.0326, "step": 2300 }, { "epoch": 2.4294670846394983, "grad_norm": 0.002867381554096937, "learning_rate": 1.0569105691056911e-05, "loss": 0.0547, "step": 2325 }, { "epoch": 2.4555903866248694, "grad_norm": 0.0030547629576176405, "learning_rate": 1.0085172280294232e-05, "loss": 0.0002, "step": 2350 }, { "epoch": 2.4817136886102404, "grad_norm": 0.003066838486120105, "learning_rate": 9.601238869531553e-06, "loss": 0.0002, "step": 2375 }, { "epoch": 2.507836990595611, "grad_norm": 0.003202056046575308, "learning_rate": 9.117305458768874e-06, "loss": 0.0002, "step": 2400 }, { "epoch": 2.533960292580982, "grad_norm": 0.002831035992130637, "learning_rate": 8.633372048006195e-06, "loss": 0.0002, "step": 2425 }, { "epoch": 2.560083594566353, "grad_norm": 0.0025005133356899023, "learning_rate": 8.149438637243516e-06, "loss": 0.0002, "step": 2450 }, { "epoch": 2.586206896551724, "grad_norm": 0.0023419370409101248, "learning_rate": 7.665505226480837e-06, "loss": 0.0002, "step": 2475 }, { "epoch": 2.6123301985370952, "grad_norm": 0.004286649636924267, "learning_rate": 7.181571815718158e-06, "loss": 0.022, "step": 2500 }, { "epoch": 2.6384535005224663, "grad_norm": 0.0027705898974090815, "learning_rate": 6.697638404955478e-06, "loss": 0.0002, "step": 2525 }, { "epoch": 2.664576802507837, "grad_norm": 0.002411492168903351, "learning_rate": 6.2137049941927995e-06, "loss": 0.0301, "step": 2550 }, { "epoch": 2.690700104493208, "grad_norm": 0.003571214620023966, "learning_rate": 5.7297715834301205e-06, "loss": 0.0002, "step": 2575 }, { "epoch": 2.716823406478579, "grad_norm": 0.002616529120132327, "learning_rate": 5.245838172667441e-06, "loss": 0.0002, "step": 2600 }, { "epoch": 2.7429467084639496, "grad_norm": 0.002348940121009946, "learning_rate": 4.7619047619047615e-06, "loss": 0.0454, "step": 2625 }, { "epoch": 2.7690700104493207, "grad_norm": 0.0026359122712165117, "learning_rate": 4.2779713511420825e-06, "loss": 0.0195, "step": 2650 }, { "epoch": 2.7951933124346917, "grad_norm": 0.002935645403340459, "learning_rate": 3.794037940379404e-06, "loss": 0.0508, "step": 2675 }, { "epoch": 2.8213166144200628, "grad_norm": 0.009833462536334991, "learning_rate": 3.3101045296167248e-06, "loss": 0.0002, "step": 2700 }, { "epoch": 2.847439916405434, "grad_norm": 6.756618976593018, "learning_rate": 2.8261711188540457e-06, "loss": 0.1122, "step": 2725 }, { "epoch": 2.873563218390805, "grad_norm": 0.005804854445159435, "learning_rate": 2.3422377080913667e-06, "loss": 0.0002, "step": 2750 }, { "epoch": 2.8996865203761755, "grad_norm": 0.005187211558222771, "learning_rate": 1.8583042973286876e-06, "loss": 0.0003, "step": 2775 }, { "epoch": 2.9258098223615465, "grad_norm": 0.0601598359644413, "learning_rate": 1.3743708865660086e-06, "loss": 0.0002, "step": 2800 }, { "epoch": 2.9519331243469176, "grad_norm": 0.0032336723525077105, "learning_rate": 8.904374758033296e-07, "loss": 0.0318, "step": 2825 }, { "epoch": 2.978056426332288, "grad_norm": 0.003166941227391362, "learning_rate": 4.0650406504065046e-07, "loss": 0.0004, "step": 2850 }, { "epoch": 3.0, "eval_accuracy": 0.9989545216936748, "eval_f1_macro": 0.9991256596070146, "eval_f1_micro": 0.9989545216936748, "eval_f1_weighted": 0.9989547969603347, "eval_loss": 0.007734560873359442, "eval_precision_macro": 0.9989517819706499, "eval_precision_micro": 0.9989545216936748, "eval_precision_weighted": 0.9989578093613047, "eval_recall_macro": 0.9993019197207679, "eval_recall_micro": 0.9989545216936748, "eval_recall_weighted": 0.9989545216936748, "eval_runtime": 176.5223, "eval_samples_per_second": 10.837, "eval_steps_per_second": 0.68, "step": 2871 } ], "logging_steps": 25, "max_steps": 2871, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1509810569277696.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }