zkava01's picture
Upload folder using huggingface_hub
49d4292 verified
{
"best_metric": 0.007734560873359442,
"best_model_checkpoint": "autotrain-5zkp2-pa5ot/checkpoint-2871",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2871,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02612330198537095,
"grad_norm": 3.338355302810669,
"learning_rate": 4.340277777777778e-06,
"loss": 1.1122,
"step": 25
},
{
"epoch": 0.0522466039707419,
"grad_norm": 1.7776278257369995,
"learning_rate": 8.680555555555556e-06,
"loss": 1.0796,
"step": 50
},
{
"epoch": 0.07836990595611286,
"grad_norm": 3.9048593044281006,
"learning_rate": 1.3020833333333334e-05,
"loss": 1.0438,
"step": 75
},
{
"epoch": 0.1044932079414838,
"grad_norm": 4.985559940338135,
"learning_rate": 1.736111111111111e-05,
"loss": 0.944,
"step": 100
},
{
"epoch": 0.13061650992685475,
"grad_norm": 6.691493988037109,
"learning_rate": 2.170138888888889e-05,
"loss": 0.8765,
"step": 125
},
{
"epoch": 0.15673981191222572,
"grad_norm": 15.063920974731445,
"learning_rate": 2.604166666666667e-05,
"loss": 0.5913,
"step": 150
},
{
"epoch": 0.18286311389759666,
"grad_norm": 55.3206901550293,
"learning_rate": 3.0381944444444444e-05,
"loss": 0.3799,
"step": 175
},
{
"epoch": 0.2089864158829676,
"grad_norm": 8.026313781738281,
"learning_rate": 3.472222222222222e-05,
"loss": 0.3776,
"step": 200
},
{
"epoch": 0.23510971786833856,
"grad_norm": 3.5273993015289307,
"learning_rate": 3.90625e-05,
"loss": 0.2117,
"step": 225
},
{
"epoch": 0.2612330198537095,
"grad_norm": 0.12634535133838654,
"learning_rate": 4.340277777777778e-05,
"loss": 0.2089,
"step": 250
},
{
"epoch": 0.28735632183908044,
"grad_norm": 0.2070770412683487,
"learning_rate": 4.774305555555556e-05,
"loss": 0.1633,
"step": 275
},
{
"epoch": 0.31347962382445144,
"grad_norm": 62.55227279663086,
"learning_rate": 4.9767711962833916e-05,
"loss": 0.1345,
"step": 300
},
{
"epoch": 0.3396029258098224,
"grad_norm": 0.017617134377360344,
"learning_rate": 4.928377855207124e-05,
"loss": 0.0022,
"step": 325
},
{
"epoch": 0.3657262277951933,
"grad_norm": 0.02495918981730938,
"learning_rate": 4.879984514130856e-05,
"loss": 0.0762,
"step": 350
},
{
"epoch": 0.39184952978056425,
"grad_norm": 23.060834884643555,
"learning_rate": 4.831591173054588e-05,
"loss": 0.0773,
"step": 375
},
{
"epoch": 0.4179728317659352,
"grad_norm": 0.018387485295534134,
"learning_rate": 4.78319783197832e-05,
"loss": 0.0877,
"step": 400
},
{
"epoch": 0.4440961337513062,
"grad_norm": 0.08322528004646301,
"learning_rate": 4.734804490902052e-05,
"loss": 0.1489,
"step": 425
},
{
"epoch": 0.4702194357366771,
"grad_norm": 0.018301822245121002,
"learning_rate": 4.686411149825784e-05,
"loss": 0.1955,
"step": 450
},
{
"epoch": 0.49634273772204807,
"grad_norm": 159.80633544921875,
"learning_rate": 4.638017808749516e-05,
"loss": 0.1085,
"step": 475
},
{
"epoch": 0.522466039707419,
"grad_norm": 0.010405668057501316,
"learning_rate": 4.5896244676732484e-05,
"loss": 0.0424,
"step": 500
},
{
"epoch": 0.54858934169279,
"grad_norm": 0.016250956803560257,
"learning_rate": 4.5412311265969805e-05,
"loss": 0.0995,
"step": 525
},
{
"epoch": 0.5747126436781609,
"grad_norm": 6.8752760887146,
"learning_rate": 4.4928377855207126e-05,
"loss": 0.1536,
"step": 550
},
{
"epoch": 0.6008359456635318,
"grad_norm": 0.012155416421592236,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.0141,
"step": 575
},
{
"epoch": 0.6269592476489029,
"grad_norm": 0.008480357006192207,
"learning_rate": 4.396051103368177e-05,
"loss": 0.002,
"step": 600
},
{
"epoch": 0.6530825496342738,
"grad_norm": 0.010472940281033516,
"learning_rate": 4.347657762291909e-05,
"loss": 0.0437,
"step": 625
},
{
"epoch": 0.6792058516196448,
"grad_norm": 0.021664993837475777,
"learning_rate": 4.299264421215641e-05,
"loss": 0.0272,
"step": 650
},
{
"epoch": 0.7053291536050157,
"grad_norm": 0.006474316120147705,
"learning_rate": 4.250871080139373e-05,
"loss": 0.0324,
"step": 675
},
{
"epoch": 0.7314524555903866,
"grad_norm": 0.0071400972083210945,
"learning_rate": 4.202477739063105e-05,
"loss": 0.0674,
"step": 700
},
{
"epoch": 0.7575757575757576,
"grad_norm": 0.05537139251828194,
"learning_rate": 4.154084397986837e-05,
"loss": 0.1479,
"step": 725
},
{
"epoch": 0.7836990595611285,
"grad_norm": 0.03653930872678757,
"learning_rate": 4.105691056910569e-05,
"loss": 0.0746,
"step": 750
},
{
"epoch": 0.8098223615464994,
"grad_norm": 0.030840527266263962,
"learning_rate": 4.0572977158343014e-05,
"loss": 0.0381,
"step": 775
},
{
"epoch": 0.8359456635318704,
"grad_norm": 0.02931591309607029,
"learning_rate": 4.0089043747580335e-05,
"loss": 0.0491,
"step": 800
},
{
"epoch": 0.8620689655172413,
"grad_norm": 0.01176014170050621,
"learning_rate": 3.9605110336817656e-05,
"loss": 0.0348,
"step": 825
},
{
"epoch": 0.8881922675026124,
"grad_norm": 0.03229213505983353,
"learning_rate": 3.912117692605498e-05,
"loss": 0.0834,
"step": 850
},
{
"epoch": 0.9143155694879833,
"grad_norm": 0.015275160782039165,
"learning_rate": 3.86372435152923e-05,
"loss": 0.05,
"step": 875
},
{
"epoch": 0.9404388714733543,
"grad_norm": 0.013074109330773354,
"learning_rate": 3.815331010452962e-05,
"loss": 0.0012,
"step": 900
},
{
"epoch": 0.9665621734587252,
"grad_norm": 0.007205578964203596,
"learning_rate": 3.766937669376694e-05,
"loss": 0.0006,
"step": 925
},
{
"epoch": 0.9926854754440961,
"grad_norm": 0.023728761821985245,
"learning_rate": 3.718544328300426e-05,
"loss": 0.0409,
"step": 950
},
{
"epoch": 1.0,
"eval_accuracy": 0.9973863042341872,
"eval_f1_macro": 0.9973126862743199,
"eval_f1_micro": 0.9973863042341872,
"eval_f1_weighted": 0.9973863333834622,
"eval_loss": 0.020341886207461357,
"eval_precision_macro": 0.9963851515122331,
"eval_precision_micro": 0.9973863042341872,
"eval_precision_weighted": 0.9974001038510532,
"eval_recall_macro": 0.9982547993019196,
"eval_recall_micro": 0.9973863042341872,
"eval_recall_weighted": 0.9973863042341872,
"eval_runtime": 209.7986,
"eval_samples_per_second": 9.118,
"eval_steps_per_second": 0.572,
"step": 957
},
{
"epoch": 1.0188087774294672,
"grad_norm": 0.01665070652961731,
"learning_rate": 3.670150987224158e-05,
"loss": 0.0121,
"step": 975
},
{
"epoch": 1.044932079414838,
"grad_norm": 0.00787193700671196,
"learning_rate": 3.62175764614789e-05,
"loss": 0.0374,
"step": 1000
},
{
"epoch": 1.071055381400209,
"grad_norm": 0.004602901637554169,
"learning_rate": 3.5733643050716224e-05,
"loss": 0.0029,
"step": 1025
},
{
"epoch": 1.09717868338558,
"grad_norm": 0.0053739468567073345,
"learning_rate": 3.5249709639953545e-05,
"loss": 0.0004,
"step": 1050
},
{
"epoch": 1.123301985370951,
"grad_norm": 0.0077377124689519405,
"learning_rate": 3.4765776229190865e-05,
"loss": 0.0003,
"step": 1075
},
{
"epoch": 1.1494252873563218,
"grad_norm": 0.005279259290546179,
"learning_rate": 3.4281842818428186e-05,
"loss": 0.0798,
"step": 1100
},
{
"epoch": 1.1755485893416928,
"grad_norm": 0.005335587542504072,
"learning_rate": 3.379790940766551e-05,
"loss": 0.0008,
"step": 1125
},
{
"epoch": 1.2016718913270636,
"grad_norm": 0.013302076607942581,
"learning_rate": 3.331397599690283e-05,
"loss": 0.0007,
"step": 1150
},
{
"epoch": 1.2277951933124347,
"grad_norm": 0.060083452612161636,
"learning_rate": 3.283004258614015e-05,
"loss": 0.1174,
"step": 1175
},
{
"epoch": 1.2539184952978055,
"grad_norm": 0.005654782988131046,
"learning_rate": 3.234610917537747e-05,
"loss": 0.0008,
"step": 1200
},
{
"epoch": 1.2800417972831766,
"grad_norm": 0.004458857234567404,
"learning_rate": 3.186217576461479e-05,
"loss": 0.0004,
"step": 1225
},
{
"epoch": 1.3061650992685476,
"grad_norm": 0.005284965503960848,
"learning_rate": 3.137824235385211e-05,
"loss": 0.0396,
"step": 1250
},
{
"epoch": 1.3322884012539185,
"grad_norm": 0.008645354770123959,
"learning_rate": 3.089430894308943e-05,
"loss": 0.0007,
"step": 1275
},
{
"epoch": 1.3584117032392895,
"grad_norm": 0.0050517115741968155,
"learning_rate": 3.041037553232675e-05,
"loss": 0.0126,
"step": 1300
},
{
"epoch": 1.3845350052246603,
"grad_norm": 0.013066472485661507,
"learning_rate": 2.9926442121564075e-05,
"loss": 0.0831,
"step": 1325
},
{
"epoch": 1.4106583072100314,
"grad_norm": 0.017604809254407883,
"learning_rate": 2.9442508710801396e-05,
"loss": 0.0012,
"step": 1350
},
{
"epoch": 1.4367816091954024,
"grad_norm": 0.006513866595923901,
"learning_rate": 2.8958575300038713e-05,
"loss": 0.0416,
"step": 1375
},
{
"epoch": 1.4629049111807733,
"grad_norm": 0.012942259199917316,
"learning_rate": 2.8474641889276038e-05,
"loss": 0.0007,
"step": 1400
},
{
"epoch": 1.489028213166144,
"grad_norm": 344.988037109375,
"learning_rate": 2.7990708478513355e-05,
"loss": 0.0425,
"step": 1425
},
{
"epoch": 1.5151515151515151,
"grad_norm": 0.03896530717611313,
"learning_rate": 2.750677506775068e-05,
"loss": 0.064,
"step": 1450
},
{
"epoch": 1.5412748171368862,
"grad_norm": 0.009162936359643936,
"learning_rate": 2.7022841656988e-05,
"loss": 0.001,
"step": 1475
},
{
"epoch": 1.567398119122257,
"grad_norm": 0.010370401665568352,
"learning_rate": 2.6538908246225318e-05,
"loss": 0.0365,
"step": 1500
},
{
"epoch": 1.5935214211076278,
"grad_norm": 0.009396117180585861,
"learning_rate": 2.6054974835462642e-05,
"loss": 0.0466,
"step": 1525
},
{
"epoch": 1.619644723092999,
"grad_norm": 0.016944007948040962,
"learning_rate": 2.5571041424699967e-05,
"loss": 0.0008,
"step": 1550
},
{
"epoch": 1.64576802507837,
"grad_norm": 0.008113248273730278,
"learning_rate": 2.5087108013937284e-05,
"loss": 0.034,
"step": 1575
},
{
"epoch": 1.671891327063741,
"grad_norm": 0.008707555942237377,
"learning_rate": 2.4603174603174602e-05,
"loss": 0.0007,
"step": 1600
},
{
"epoch": 1.6980146290491118,
"grad_norm": 0.06956545263528824,
"learning_rate": 2.4119241192411926e-05,
"loss": 0.0005,
"step": 1625
},
{
"epoch": 1.7241379310344827,
"grad_norm": 0.010159909725189209,
"learning_rate": 2.3635307781649247e-05,
"loss": 0.0822,
"step": 1650
},
{
"epoch": 1.7502612330198537,
"grad_norm": 0.007952134124934673,
"learning_rate": 2.3151374370886568e-05,
"loss": 0.0768,
"step": 1675
},
{
"epoch": 1.7763845350052248,
"grad_norm": 0.022700520232319832,
"learning_rate": 2.2667440960123886e-05,
"loss": 0.0325,
"step": 1700
},
{
"epoch": 1.8025078369905956,
"grad_norm": 0.04925369843840599,
"learning_rate": 2.218350754936121e-05,
"loss": 0.0832,
"step": 1725
},
{
"epoch": 1.8286311389759664,
"grad_norm": 0.010277110151946545,
"learning_rate": 2.169957413859853e-05,
"loss": 0.0007,
"step": 1750
},
{
"epoch": 1.8547544409613375,
"grad_norm": 0.009189656004309654,
"learning_rate": 2.1215640727835852e-05,
"loss": 0.0004,
"step": 1775
},
{
"epoch": 1.8808777429467085,
"grad_norm": 0.00968814454972744,
"learning_rate": 2.073170731707317e-05,
"loss": 0.0429,
"step": 1800
},
{
"epoch": 1.9070010449320796,
"grad_norm": 0.009455765597522259,
"learning_rate": 2.0247773906310494e-05,
"loss": 0.0874,
"step": 1825
},
{
"epoch": 1.9331243469174504,
"grad_norm": 0.00541004678234458,
"learning_rate": 1.9763840495547815e-05,
"loss": 0.0019,
"step": 1850
},
{
"epoch": 1.9592476489028212,
"grad_norm": 0.027987977489829063,
"learning_rate": 1.9279907084785136e-05,
"loss": 0.0375,
"step": 1875
},
{
"epoch": 1.9853709508881923,
"grad_norm": 0.012382814660668373,
"learning_rate": 1.8795973674022453e-05,
"loss": 0.0006,
"step": 1900
},
{
"epoch": 2.0,
"eval_accuracy": 0.9952953476215368,
"eval_f1_macro": 0.9958184083774263,
"eval_f1_micro": 0.9952953476215368,
"eval_f1_weighted": 0.995297723057706,
"eval_loss": 0.03401019424200058,
"eval_precision_macro": 0.9948206725776819,
"eval_precision_micro": 0.9952953476215368,
"eval_precision_weighted": 0.9953490671179285,
"eval_recall_macro": 0.9968586387434555,
"eval_recall_micro": 0.9952953476215368,
"eval_recall_weighted": 0.9952953476215368,
"eval_runtime": 204.9069,
"eval_samples_per_second": 9.336,
"eval_steps_per_second": 0.586,
"step": 1914
},
{
"epoch": 2.0114942528735633,
"grad_norm": 0.01057450845837593,
"learning_rate": 1.8312040263259778e-05,
"loss": 0.0421,
"step": 1925
},
{
"epoch": 2.0376175548589344,
"grad_norm": 0.005483025684952736,
"learning_rate": 1.78281068524971e-05,
"loss": 0.0004,
"step": 1950
},
{
"epoch": 2.063740856844305,
"grad_norm": 0.0031655074562877417,
"learning_rate": 1.734417344173442e-05,
"loss": 0.0065,
"step": 1975
},
{
"epoch": 2.089864158829676,
"grad_norm": 0.0039079682901501656,
"learning_rate": 1.6860240030971737e-05,
"loss": 0.0003,
"step": 2000
},
{
"epoch": 2.115987460815047,
"grad_norm": 6.877926826477051,
"learning_rate": 1.6376306620209058e-05,
"loss": 0.0447,
"step": 2025
},
{
"epoch": 2.142110762800418,
"grad_norm": 0.006742693949490786,
"learning_rate": 1.5892373209446382e-05,
"loss": 0.0387,
"step": 2050
},
{
"epoch": 2.1682340647857887,
"grad_norm": 0.005568367429077625,
"learning_rate": 1.5408439798683703e-05,
"loss": 0.0004,
"step": 2075
},
{
"epoch": 2.19435736677116,
"grad_norm": 0.010062599554657936,
"learning_rate": 1.4924506387921023e-05,
"loss": 0.0003,
"step": 2100
},
{
"epoch": 2.220480668756531,
"grad_norm": 0.0033714643213897943,
"learning_rate": 1.4440572977158342e-05,
"loss": 0.0002,
"step": 2125
},
{
"epoch": 2.246603970741902,
"grad_norm": 0.0053630974143743515,
"learning_rate": 1.3956639566395666e-05,
"loss": 0.0002,
"step": 2150
},
{
"epoch": 2.2727272727272725,
"grad_norm": 0.012313129380345345,
"learning_rate": 1.3472706155632985e-05,
"loss": 0.0803,
"step": 2175
},
{
"epoch": 2.2988505747126435,
"grad_norm": 0.010815066285431385,
"learning_rate": 1.2988772744870306e-05,
"loss": 0.0014,
"step": 2200
},
{
"epoch": 2.3249738766980146,
"grad_norm": 0.002910745795816183,
"learning_rate": 1.2504839334107627e-05,
"loss": 0.0014,
"step": 2225
},
{
"epoch": 2.3510971786833856,
"grad_norm": 0.003235210431739688,
"learning_rate": 1.2020905923344948e-05,
"loss": 0.0004,
"step": 2250
},
{
"epoch": 2.3772204806687567,
"grad_norm": 0.002846105257049203,
"learning_rate": 1.1536972512582269e-05,
"loss": 0.0002,
"step": 2275
},
{
"epoch": 2.4033437826541273,
"grad_norm": 0.0029369164258241653,
"learning_rate": 1.105303910181959e-05,
"loss": 0.0326,
"step": 2300
},
{
"epoch": 2.4294670846394983,
"grad_norm": 0.002867381554096937,
"learning_rate": 1.0569105691056911e-05,
"loss": 0.0547,
"step": 2325
},
{
"epoch": 2.4555903866248694,
"grad_norm": 0.0030547629576176405,
"learning_rate": 1.0085172280294232e-05,
"loss": 0.0002,
"step": 2350
},
{
"epoch": 2.4817136886102404,
"grad_norm": 0.003066838486120105,
"learning_rate": 9.601238869531553e-06,
"loss": 0.0002,
"step": 2375
},
{
"epoch": 2.507836990595611,
"grad_norm": 0.003202056046575308,
"learning_rate": 9.117305458768874e-06,
"loss": 0.0002,
"step": 2400
},
{
"epoch": 2.533960292580982,
"grad_norm": 0.002831035992130637,
"learning_rate": 8.633372048006195e-06,
"loss": 0.0002,
"step": 2425
},
{
"epoch": 2.560083594566353,
"grad_norm": 0.0025005133356899023,
"learning_rate": 8.149438637243516e-06,
"loss": 0.0002,
"step": 2450
},
{
"epoch": 2.586206896551724,
"grad_norm": 0.0023419370409101248,
"learning_rate": 7.665505226480837e-06,
"loss": 0.0002,
"step": 2475
},
{
"epoch": 2.6123301985370952,
"grad_norm": 0.004286649636924267,
"learning_rate": 7.181571815718158e-06,
"loss": 0.022,
"step": 2500
},
{
"epoch": 2.6384535005224663,
"grad_norm": 0.0027705898974090815,
"learning_rate": 6.697638404955478e-06,
"loss": 0.0002,
"step": 2525
},
{
"epoch": 2.664576802507837,
"grad_norm": 0.002411492168903351,
"learning_rate": 6.2137049941927995e-06,
"loss": 0.0301,
"step": 2550
},
{
"epoch": 2.690700104493208,
"grad_norm": 0.003571214620023966,
"learning_rate": 5.7297715834301205e-06,
"loss": 0.0002,
"step": 2575
},
{
"epoch": 2.716823406478579,
"grad_norm": 0.002616529120132327,
"learning_rate": 5.245838172667441e-06,
"loss": 0.0002,
"step": 2600
},
{
"epoch": 2.7429467084639496,
"grad_norm": 0.002348940121009946,
"learning_rate": 4.7619047619047615e-06,
"loss": 0.0454,
"step": 2625
},
{
"epoch": 2.7690700104493207,
"grad_norm": 0.0026359122712165117,
"learning_rate": 4.2779713511420825e-06,
"loss": 0.0195,
"step": 2650
},
{
"epoch": 2.7951933124346917,
"grad_norm": 0.002935645403340459,
"learning_rate": 3.794037940379404e-06,
"loss": 0.0508,
"step": 2675
},
{
"epoch": 2.8213166144200628,
"grad_norm": 0.009833462536334991,
"learning_rate": 3.3101045296167248e-06,
"loss": 0.0002,
"step": 2700
},
{
"epoch": 2.847439916405434,
"grad_norm": 6.756618976593018,
"learning_rate": 2.8261711188540457e-06,
"loss": 0.1122,
"step": 2725
},
{
"epoch": 2.873563218390805,
"grad_norm": 0.005804854445159435,
"learning_rate": 2.3422377080913667e-06,
"loss": 0.0002,
"step": 2750
},
{
"epoch": 2.8996865203761755,
"grad_norm": 0.005187211558222771,
"learning_rate": 1.8583042973286876e-06,
"loss": 0.0003,
"step": 2775
},
{
"epoch": 2.9258098223615465,
"grad_norm": 0.0601598359644413,
"learning_rate": 1.3743708865660086e-06,
"loss": 0.0002,
"step": 2800
},
{
"epoch": 2.9519331243469176,
"grad_norm": 0.0032336723525077105,
"learning_rate": 8.904374758033296e-07,
"loss": 0.0318,
"step": 2825
},
{
"epoch": 2.978056426332288,
"grad_norm": 0.003166941227391362,
"learning_rate": 4.0650406504065046e-07,
"loss": 0.0004,
"step": 2850
},
{
"epoch": 3.0,
"eval_accuracy": 0.9989545216936748,
"eval_f1_macro": 0.9991256596070146,
"eval_f1_micro": 0.9989545216936748,
"eval_f1_weighted": 0.9989547969603347,
"eval_loss": 0.007734560873359442,
"eval_precision_macro": 0.9989517819706499,
"eval_precision_micro": 0.9989545216936748,
"eval_precision_weighted": 0.9989578093613047,
"eval_recall_macro": 0.9993019197207679,
"eval_recall_micro": 0.9989545216936748,
"eval_recall_weighted": 0.9989545216936748,
"eval_runtime": 176.5223,
"eval_samples_per_second": 10.837,
"eval_steps_per_second": 0.68,
"step": 2871
}
],
"logging_steps": 25,
"max_steps": 2871,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1509810569277696.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}