multi-label-email-classifier / trainer_state.json
imnim's picture
Upload folder using huggingface_hub
890a33d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 711,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.042238648363252376,
"grad_norm": 1.2689597606658936,
"learning_rate": 0.00019746835443037975,
"loss": 1.5151,
"mean_token_accuracy": 0.6356319591403008,
"num_tokens": 8259.0,
"step": 10
},
{
"epoch": 0.08447729672650475,
"grad_norm": 1.168426513671875,
"learning_rate": 0.00019465541490857948,
"loss": 0.9503,
"mean_token_accuracy": 0.7329184293746949,
"num_tokens": 16580.0,
"step": 20
},
{
"epoch": 0.12671594508975711,
"grad_norm": 1.2102173566818237,
"learning_rate": 0.0001918424753867792,
"loss": 0.7998,
"mean_token_accuracy": 0.7558803096413612,
"num_tokens": 24912.0,
"step": 30
},
{
"epoch": 0.1689545934530095,
"grad_norm": 1.0103662014007568,
"learning_rate": 0.00018902953586497892,
"loss": 0.7087,
"mean_token_accuracy": 0.7875036194920539,
"num_tokens": 33031.0,
"step": 40
},
{
"epoch": 0.21119324181626187,
"grad_norm": 1.1300240755081177,
"learning_rate": 0.00018621659634317862,
"loss": 0.6805,
"mean_token_accuracy": 0.8003556072711945,
"num_tokens": 41072.0,
"step": 50
},
{
"epoch": 0.25343189017951423,
"grad_norm": 1.1537259817123413,
"learning_rate": 0.00018340365682137835,
"loss": 0.6349,
"mean_token_accuracy": 0.8001961380243301,
"num_tokens": 49265.0,
"step": 60
},
{
"epoch": 0.29567053854276665,
"grad_norm": 1.1879968643188477,
"learning_rate": 0.00018059071729957806,
"loss": 0.6231,
"mean_token_accuracy": 0.8075164943933487,
"num_tokens": 57420.0,
"step": 70
},
{
"epoch": 0.337909186906019,
"grad_norm": 0.9328457713127136,
"learning_rate": 0.00017777777777777779,
"loss": 0.6012,
"mean_token_accuracy": 0.8100895985960961,
"num_tokens": 65688.0,
"step": 80
},
{
"epoch": 0.3801478352692714,
"grad_norm": 1.1767158508300781,
"learning_rate": 0.00017496483825597752,
"loss": 0.6067,
"mean_token_accuracy": 0.806154166162014,
"num_tokens": 73786.0,
"step": 90
},
{
"epoch": 0.42238648363252373,
"grad_norm": 1.0586782693862915,
"learning_rate": 0.00017215189873417722,
"loss": 0.5681,
"mean_token_accuracy": 0.8188158735632897,
"num_tokens": 81919.0,
"step": 100
},
{
"epoch": 0.46462513199577615,
"grad_norm": 1.148360013961792,
"learning_rate": 0.00016933895921237695,
"loss": 0.5803,
"mean_token_accuracy": 0.8167036339640618,
"num_tokens": 90088.0,
"step": 110
},
{
"epoch": 0.5068637803590285,
"grad_norm": 1.1444052457809448,
"learning_rate": 0.00016652601969057665,
"loss": 0.5345,
"mean_token_accuracy": 0.8276747301220894,
"num_tokens": 98076.0,
"step": 120
},
{
"epoch": 0.5491024287222809,
"grad_norm": 1.2006137371063232,
"learning_rate": 0.00016371308016877638,
"loss": 0.5088,
"mean_token_accuracy": 0.8310476973652839,
"num_tokens": 105900.0,
"step": 130
},
{
"epoch": 0.5913410770855333,
"grad_norm": 1.1461126804351807,
"learning_rate": 0.0001609001406469761,
"loss": 0.5117,
"mean_token_accuracy": 0.8274188995361328,
"num_tokens": 113946.0,
"step": 140
},
{
"epoch": 0.6335797254487856,
"grad_norm": 1.0241153240203857,
"learning_rate": 0.00015808720112517582,
"loss": 0.5327,
"mean_token_accuracy": 0.8250815704464912,
"num_tokens": 122100.0,
"step": 150
},
{
"epoch": 0.675818373812038,
"grad_norm": 1.1967337131500244,
"learning_rate": 0.00015527426160337552,
"loss": 0.5077,
"mean_token_accuracy": 0.840242950618267,
"num_tokens": 130278.0,
"step": 160
},
{
"epoch": 0.7180570221752904,
"grad_norm": 1.1159100532531738,
"learning_rate": 0.00015246132208157525,
"loss": 0.4862,
"mean_token_accuracy": 0.846737214922905,
"num_tokens": 138447.0,
"step": 170
},
{
"epoch": 0.7602956705385427,
"grad_norm": 1.1775243282318115,
"learning_rate": 0.00014964838255977498,
"loss": 0.4907,
"mean_token_accuracy": 0.8381337329745293,
"num_tokens": 146615.0,
"step": 180
},
{
"epoch": 0.8025343189017952,
"grad_norm": 1.4861679077148438,
"learning_rate": 0.0001468354430379747,
"loss": 0.4589,
"mean_token_accuracy": 0.8465609878301621,
"num_tokens": 154622.0,
"step": 190
},
{
"epoch": 0.8447729672650475,
"grad_norm": 1.2809723615646362,
"learning_rate": 0.00014402250351617442,
"loss": 0.454,
"mean_token_accuracy": 0.8467179164290428,
"num_tokens": 162759.0,
"step": 200
},
{
"epoch": 0.8870116156282999,
"grad_norm": 1.182682752609253,
"learning_rate": 0.00014120956399437412,
"loss": 0.489,
"mean_token_accuracy": 0.8346109226346016,
"num_tokens": 171037.0,
"step": 210
},
{
"epoch": 0.9292502639915523,
"grad_norm": 1.338064193725586,
"learning_rate": 0.00013839662447257385,
"loss": 0.4654,
"mean_token_accuracy": 0.8418598353862763,
"num_tokens": 179014.0,
"step": 220
},
{
"epoch": 0.9714889123548046,
"grad_norm": 1.2925671339035034,
"learning_rate": 0.00013558368495077356,
"loss": 0.4689,
"mean_token_accuracy": 0.8409796461462975,
"num_tokens": 187072.0,
"step": 230
},
{
"epoch": 1.0,
"eval_loss": 0.4723573327064514,
"eval_mean_token_accuracy": 0.8419052379311256,
"eval_num_tokens": 192612.0,
"eval_runtime": 119.4772,
"eval_samples_per_second": 1.766,
"eval_steps_per_second": 0.887,
"step": 237
},
{
"epoch": 1.0126715945089757,
"grad_norm": 1.0667223930358887,
"learning_rate": 0.00013277074542897329,
"loss": 0.4289,
"mean_token_accuracy": 0.8503327415539668,
"num_tokens": 195001.0,
"step": 240
},
{
"epoch": 1.0549102428722281,
"grad_norm": 1.223859429359436,
"learning_rate": 0.000129957805907173,
"loss": 0.4351,
"mean_token_accuracy": 0.8469812393188476,
"num_tokens": 203291.0,
"step": 250
},
{
"epoch": 1.0971488912354805,
"grad_norm": 1.1872385740280151,
"learning_rate": 0.00012714486638537272,
"loss": 0.4228,
"mean_token_accuracy": 0.8564460396766662,
"num_tokens": 211464.0,
"step": 260
},
{
"epoch": 1.139387539598733,
"grad_norm": 1.1780558824539185,
"learning_rate": 0.00012433192686357245,
"loss": 0.4309,
"mean_token_accuracy": 0.8535082414746284,
"num_tokens": 219545.0,
"step": 270
},
{
"epoch": 1.1816261879619852,
"grad_norm": 1.3616076707839966,
"learning_rate": 0.00012151898734177217,
"loss": 0.4322,
"mean_token_accuracy": 0.849582402408123,
"num_tokens": 227716.0,
"step": 280
},
{
"epoch": 1.2238648363252376,
"grad_norm": 1.237313151359558,
"learning_rate": 0.00011870604781997187,
"loss": 0.4261,
"mean_token_accuracy": 0.8547895699739456,
"num_tokens": 235994.0,
"step": 290
},
{
"epoch": 1.26610348468849,
"grad_norm": 1.2718459367752075,
"learning_rate": 0.00011589310829817159,
"loss": 0.4226,
"mean_token_accuracy": 0.8583998143672943,
"num_tokens": 244225.0,
"step": 300
},
{
"epoch": 1.3083421330517424,
"grad_norm": 1.1994160413742065,
"learning_rate": 0.0001130801687763713,
"loss": 0.4115,
"mean_token_accuracy": 0.8600013121962548,
"num_tokens": 252316.0,
"step": 310
},
{
"epoch": 1.3505807814149948,
"grad_norm": 1.270212173461914,
"learning_rate": 0.00011026722925457102,
"loss": 0.4444,
"mean_token_accuracy": 0.8437218397855759,
"num_tokens": 260537.0,
"step": 320
},
{
"epoch": 1.392819429778247,
"grad_norm": 1.3856836557388306,
"learning_rate": 0.00010745428973277074,
"loss": 0.4027,
"mean_token_accuracy": 0.8568239450454712,
"num_tokens": 268657.0,
"step": 330
},
{
"epoch": 1.4350580781414994,
"grad_norm": 1.132204294204712,
"learning_rate": 0.00010464135021097048,
"loss": 0.4209,
"mean_token_accuracy": 0.858132703602314,
"num_tokens": 276899.0,
"step": 340
},
{
"epoch": 1.4772967265047519,
"grad_norm": 1.1543930768966675,
"learning_rate": 0.0001018284106891702,
"loss": 0.4242,
"mean_token_accuracy": 0.852642023563385,
"num_tokens": 285106.0,
"step": 350
},
{
"epoch": 1.5195353748680043,
"grad_norm": 1.2410894632339478,
"learning_rate": 9.901547116736992e-05,
"loss": 0.4219,
"mean_token_accuracy": 0.855118528008461,
"num_tokens": 293091.0,
"step": 360
},
{
"epoch": 1.5617740232312567,
"grad_norm": 1.2626174688339233,
"learning_rate": 9.620253164556962e-05,
"loss": 0.4199,
"mean_token_accuracy": 0.853211036324501,
"num_tokens": 301179.0,
"step": 370
},
{
"epoch": 1.6040126715945089,
"grad_norm": 1.2617233991622925,
"learning_rate": 9.338959212376934e-05,
"loss": 0.4435,
"mean_token_accuracy": 0.8477905824780464,
"num_tokens": 309179.0,
"step": 380
},
{
"epoch": 1.6462513199577613,
"grad_norm": 1.3220487833023071,
"learning_rate": 9.057665260196905e-05,
"loss": 0.4654,
"mean_token_accuracy": 0.8420799180865288,
"num_tokens": 317186.0,
"step": 390
},
{
"epoch": 1.6884899683210137,
"grad_norm": 1.3132396936416626,
"learning_rate": 8.776371308016879e-05,
"loss": 0.4116,
"mean_token_accuracy": 0.8604853063821792,
"num_tokens": 325180.0,
"step": 400
},
{
"epoch": 1.7307286166842661,
"grad_norm": 1.2874078750610352,
"learning_rate": 8.49507735583685e-05,
"loss": 0.4218,
"mean_token_accuracy": 0.8567224040627479,
"num_tokens": 333261.0,
"step": 410
},
{
"epoch": 1.7729672650475186,
"grad_norm": 1.3787081241607666,
"learning_rate": 8.213783403656822e-05,
"loss": 0.3923,
"mean_token_accuracy": 0.8700995787978172,
"num_tokens": 341158.0,
"step": 420
},
{
"epoch": 1.8152059134107708,
"grad_norm": 1.1558738946914673,
"learning_rate": 7.932489451476794e-05,
"loss": 0.4156,
"mean_token_accuracy": 0.8640454620122909,
"num_tokens": 349185.0,
"step": 430
},
{
"epoch": 1.8574445617740234,
"grad_norm": 1.1682510375976562,
"learning_rate": 7.651195499296765e-05,
"loss": 0.4269,
"mean_token_accuracy": 0.8545186176896096,
"num_tokens": 357356.0,
"step": 440
},
{
"epoch": 1.8996832101372756,
"grad_norm": 1.2466729879379272,
"learning_rate": 7.369901547116737e-05,
"loss": 0.4119,
"mean_token_accuracy": 0.8519671753048896,
"num_tokens": 365850.0,
"step": 450
},
{
"epoch": 1.941921858500528,
"grad_norm": 1.0788018703460693,
"learning_rate": 7.088607594936709e-05,
"loss": 0.422,
"mean_token_accuracy": 0.8588810846209526,
"num_tokens": 374078.0,
"step": 460
},
{
"epoch": 1.9841605068637804,
"grad_norm": 1.2191482782363892,
"learning_rate": 6.80731364275668e-05,
"loss": 0.4069,
"mean_token_accuracy": 0.8567217096686364,
"num_tokens": 382229.0,
"step": 470
},
{
"epoch": 2.0,
"eval_loss": 0.44175705313682556,
"eval_mean_token_accuracy": 0.8499138732001467,
"eval_num_tokens": 385224.0,
"eval_runtime": 119.4721,
"eval_samples_per_second": 1.766,
"eval_steps_per_second": 0.887,
"step": 474
},
{
"epoch": 2.0253431890179514,
"grad_norm": 1.3298813104629517,
"learning_rate": 6.526019690576652e-05,
"loss": 0.3798,
"mean_token_accuracy": 0.8719363472400568,
"num_tokens": 390030.0,
"step": 480
},
{
"epoch": 2.0675818373812036,
"grad_norm": 1.2408016920089722,
"learning_rate": 6.244725738396625e-05,
"loss": 0.3816,
"mean_token_accuracy": 0.8682785838842392,
"num_tokens": 398313.0,
"step": 490
},
{
"epoch": 2.1098204857444562,
"grad_norm": 1.4436272382736206,
"learning_rate": 5.963431786216597e-05,
"loss": 0.3732,
"mean_token_accuracy": 0.8659846156835556,
"num_tokens": 406525.0,
"step": 500
},
{
"epoch": 2.1520591341077084,
"grad_norm": 1.330967903137207,
"learning_rate": 5.6821378340365686e-05,
"loss": 0.3679,
"mean_token_accuracy": 0.8739217355847358,
"num_tokens": 414827.0,
"step": 510
},
{
"epoch": 2.194297782470961,
"grad_norm": 1.227726697921753,
"learning_rate": 5.4008438818565396e-05,
"loss": 0.3867,
"mean_token_accuracy": 0.863666070997715,
"num_tokens": 422858.0,
"step": 520
},
{
"epoch": 2.2365364308342133,
"grad_norm": 1.287386417388916,
"learning_rate": 5.119549929676513e-05,
"loss": 0.3993,
"mean_token_accuracy": 0.8624689444899559,
"num_tokens": 430999.0,
"step": 530
},
{
"epoch": 2.278775079197466,
"grad_norm": 1.3982223272323608,
"learning_rate": 4.8382559774964844e-05,
"loss": 0.4098,
"mean_token_accuracy": 0.8576759606599808,
"num_tokens": 438940.0,
"step": 540
},
{
"epoch": 2.321013727560718,
"grad_norm": 1.378894329071045,
"learning_rate": 4.556962025316456e-05,
"loss": 0.3804,
"mean_token_accuracy": 0.869555501639843,
"num_tokens": 447201.0,
"step": 550
},
{
"epoch": 2.3632523759239703,
"grad_norm": 1.3545656204223633,
"learning_rate": 4.275668073136428e-05,
"loss": 0.3977,
"mean_token_accuracy": 0.8603558391332626,
"num_tokens": 455394.0,
"step": 560
},
{
"epoch": 2.405491024287223,
"grad_norm": 1.2987319231033325,
"learning_rate": 3.9943741209563995e-05,
"loss": 0.375,
"mean_token_accuracy": 0.8725894778966904,
"num_tokens": 463673.0,
"step": 570
},
{
"epoch": 2.447729672650475,
"grad_norm": 1.4550727605819702,
"learning_rate": 3.713080168776372e-05,
"loss": 0.373,
"mean_token_accuracy": 0.8659988775849342,
"num_tokens": 471691.0,
"step": 580
},
{
"epoch": 2.489968321013728,
"grad_norm": 1.3944754600524902,
"learning_rate": 3.431786216596343e-05,
"loss": 0.3965,
"mean_token_accuracy": 0.8628205105662345,
"num_tokens": 479994.0,
"step": 590
},
{
"epoch": 2.53220696937698,
"grad_norm": 1.268272042274475,
"learning_rate": 3.150492264416315e-05,
"loss": 0.3682,
"mean_token_accuracy": 0.8687554150819778,
"num_tokens": 487969.0,
"step": 600
},
{
"epoch": 2.574445617740232,
"grad_norm": 1.2889764308929443,
"learning_rate": 2.869198312236287e-05,
"loss": 0.3716,
"mean_token_accuracy": 0.8728931903839111,
"num_tokens": 496072.0,
"step": 610
},
{
"epoch": 2.616684266103485,
"grad_norm": 1.4896411895751953,
"learning_rate": 2.587904360056259e-05,
"loss": 0.3861,
"mean_token_accuracy": 0.8661490485072136,
"num_tokens": 504253.0,
"step": 620
},
{
"epoch": 2.658922914466737,
"grad_norm": 1.460020899772644,
"learning_rate": 2.3066104078762308e-05,
"loss": 0.3798,
"mean_token_accuracy": 0.8687096312642097,
"num_tokens": 512506.0,
"step": 630
},
{
"epoch": 2.7011615628299896,
"grad_norm": 1.4051485061645508,
"learning_rate": 2.0253164556962025e-05,
"loss": 0.4031,
"mean_token_accuracy": 0.8599234834313393,
"num_tokens": 520753.0,
"step": 640
},
{
"epoch": 2.743400211193242,
"grad_norm": 1.3228349685668945,
"learning_rate": 1.7440225035161745e-05,
"loss": 0.3696,
"mean_token_accuracy": 0.8746302232146264,
"num_tokens": 528954.0,
"step": 650
},
{
"epoch": 2.785638859556494,
"grad_norm": 1.2899895906448364,
"learning_rate": 1.4627285513361464e-05,
"loss": 0.384,
"mean_token_accuracy": 0.8671007707715035,
"num_tokens": 537170.0,
"step": 660
},
{
"epoch": 2.8278775079197467,
"grad_norm": 1.2739366292953491,
"learning_rate": 1.1814345991561182e-05,
"loss": 0.3864,
"mean_token_accuracy": 0.8667002618312836,
"num_tokens": 545043.0,
"step": 670
},
{
"epoch": 2.870116156282999,
"grad_norm": 1.4002952575683594,
"learning_rate": 9.001406469760901e-06,
"loss": 0.3929,
"mean_token_accuracy": 0.8623571470379829,
"num_tokens": 553068.0,
"step": 680
},
{
"epoch": 2.9123548046462515,
"grad_norm": 1.4770135879516602,
"learning_rate": 6.18846694796062e-06,
"loss": 0.3755,
"mean_token_accuracy": 0.871557529270649,
"num_tokens": 561129.0,
"step": 690
},
{
"epoch": 2.9545934530095037,
"grad_norm": 1.4194457530975342,
"learning_rate": 3.3755274261603373e-06,
"loss": 0.3649,
"mean_token_accuracy": 0.875392484664917,
"num_tokens": 569189.0,
"step": 700
},
{
"epoch": 2.996832101372756,
"grad_norm": 1.3790485858917236,
"learning_rate": 5.625879043600563e-07,
"loss": 0.3891,
"mean_token_accuracy": 0.8654240190982818,
"num_tokens": 577201.0,
"step": 710
},
{
"epoch": 3.0,
"eval_loss": 0.43017664551734924,
"eval_mean_token_accuracy": 0.8548184953770548,
"eval_num_tokens": 577836.0,
"eval_runtime": 119.5201,
"eval_samples_per_second": 1.765,
"eval_steps_per_second": 0.887,
"step": 711
}
],
"logging_steps": 10,
"max_steps": 711,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.791342756552704e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}