trocr_model / trainer_state.json
anjikya07's picture
Upload folder using huggingface_hub
b04e243 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.272984441301273,
"eval_steps": 500,
"global_step": 900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014144271570014143,
"grad_norm": 85.7519302368164,
"learning_rate": 4.976426214049977e-05,
"loss": 9.3898,
"step": 10
},
{
"epoch": 0.028288543140028287,
"grad_norm": 33.235904693603516,
"learning_rate": 4.952852428099953e-05,
"loss": 5.3413,
"step": 20
},
{
"epoch": 0.042432814710042434,
"grad_norm": 5.113602161407471,
"learning_rate": 4.9292786421499294e-05,
"loss": 4.854,
"step": 30
},
{
"epoch": 0.056577086280056574,
"grad_norm": 36.069705963134766,
"learning_rate": 4.9057048561999055e-05,
"loss": 5.0425,
"step": 40
},
{
"epoch": 0.07072135785007072,
"grad_norm": 33.71372604370117,
"learning_rate": 4.882131070249882e-05,
"loss": 5.04,
"step": 50
},
{
"epoch": 0.08486562942008487,
"grad_norm": 10.95967960357666,
"learning_rate": 4.858557284299859e-05,
"loss": 4.5402,
"step": 60
},
{
"epoch": 0.09900990099009901,
"grad_norm": 57.9084358215332,
"learning_rate": 4.834983498349835e-05,
"loss": 4.8505,
"step": 70
},
{
"epoch": 0.11315417256011315,
"grad_norm": 34.007869720458984,
"learning_rate": 4.8114097123998114e-05,
"loss": 4.3305,
"step": 80
},
{
"epoch": 0.1272984441301273,
"grad_norm": 15.245634078979492,
"learning_rate": 4.787835926449788e-05,
"loss": 4.4707,
"step": 90
},
{
"epoch": 0.14144271570014144,
"grad_norm": 24.47016143798828,
"learning_rate": 4.7642621404997644e-05,
"loss": 4.5379,
"step": 100
},
{
"epoch": 0.15558698727015557,
"grad_norm": 14.591358184814453,
"learning_rate": 4.740688354549741e-05,
"loss": 4.5205,
"step": 110
},
{
"epoch": 0.16973125884016974,
"grad_norm": 18.776493072509766,
"learning_rate": 4.7171145685997174e-05,
"loss": 4.6578,
"step": 120
},
{
"epoch": 0.18387553041018387,
"grad_norm": 4.018069267272949,
"learning_rate": 4.6935407826496935e-05,
"loss": 4.1864,
"step": 130
},
{
"epoch": 0.19801980198019803,
"grad_norm": 8.038480758666992,
"learning_rate": 4.6699669966996704e-05,
"loss": 4.5046,
"step": 140
},
{
"epoch": 0.21216407355021216,
"grad_norm": 2.9573426246643066,
"learning_rate": 4.6463932107496465e-05,
"loss": 4.2317,
"step": 150
},
{
"epoch": 0.2263083451202263,
"grad_norm": 3.0037927627563477,
"learning_rate": 4.622819424799623e-05,
"loss": 4.0951,
"step": 160
},
{
"epoch": 0.24045261669024046,
"grad_norm": 9.886232376098633,
"learning_rate": 4.5992456388495995e-05,
"loss": 4.0168,
"step": 170
},
{
"epoch": 0.2545968882602546,
"grad_norm": 2.463179588317871,
"learning_rate": 4.5756718528995756e-05,
"loss": 4.0372,
"step": 180
},
{
"epoch": 0.26874115983026875,
"grad_norm": 13.546555519104004,
"learning_rate": 4.5520980669495525e-05,
"loss": 4.1929,
"step": 190
},
{
"epoch": 0.2828854314002829,
"grad_norm": 2.4467103481292725,
"learning_rate": 4.5285242809995286e-05,
"loss": 4.2146,
"step": 200
},
{
"epoch": 0.297029702970297,
"grad_norm": 5.936313152313232,
"learning_rate": 4.5049504950495054e-05,
"loss": 4.6069,
"step": 210
},
{
"epoch": 0.31117397454031115,
"grad_norm": 6.5272536277771,
"learning_rate": 4.4813767090994816e-05,
"loss": 4.3263,
"step": 220
},
{
"epoch": 0.32531824611032534,
"grad_norm": 4.2881598472595215,
"learning_rate": 4.457802923149458e-05,
"loss": 3.8218,
"step": 230
},
{
"epoch": 0.33946251768033947,
"grad_norm": 3.9945058822631836,
"learning_rate": 4.4342291371994345e-05,
"loss": 4.1532,
"step": 240
},
{
"epoch": 0.3536067892503536,
"grad_norm": 4.577730655670166,
"learning_rate": 4.410655351249411e-05,
"loss": 4.0727,
"step": 250
},
{
"epoch": 0.36775106082036774,
"grad_norm": 2.6052353382110596,
"learning_rate": 4.3870815652993875e-05,
"loss": 3.9074,
"step": 260
},
{
"epoch": 0.38189533239038187,
"grad_norm": 15.787618637084961,
"learning_rate": 4.363507779349364e-05,
"loss": 4.2402,
"step": 270
},
{
"epoch": 0.39603960396039606,
"grad_norm": 23.970670700073242,
"learning_rate": 4.33993399339934e-05,
"loss": 4.3765,
"step": 280
},
{
"epoch": 0.4101838755304102,
"grad_norm": 2.5313973426818848,
"learning_rate": 4.3163602074493166e-05,
"loss": 3.9863,
"step": 290
},
{
"epoch": 0.4243281471004243,
"grad_norm": 21.020267486572266,
"learning_rate": 4.292786421499293e-05,
"loss": 4.0341,
"step": 300
},
{
"epoch": 0.43847241867043846,
"grad_norm": 9.731268882751465,
"learning_rate": 4.2692126355492696e-05,
"loss": 4.1089,
"step": 310
},
{
"epoch": 0.4526166902404526,
"grad_norm": 4.240326881408691,
"learning_rate": 4.245638849599246e-05,
"loss": 3.9737,
"step": 320
},
{
"epoch": 0.4667609618104668,
"grad_norm": 15.72867202758789,
"learning_rate": 4.222065063649222e-05,
"loss": 4.454,
"step": 330
},
{
"epoch": 0.4809052333804809,
"grad_norm": 10.669405937194824,
"learning_rate": 4.198491277699199e-05,
"loss": 4.1596,
"step": 340
},
{
"epoch": 0.49504950495049505,
"grad_norm": 11.927492141723633,
"learning_rate": 4.174917491749175e-05,
"loss": 4.0485,
"step": 350
},
{
"epoch": 0.5091937765205092,
"grad_norm": 7.629958629608154,
"learning_rate": 4.151343705799152e-05,
"loss": 4.1567,
"step": 360
},
{
"epoch": 0.5233380480905233,
"grad_norm": 32.22209930419922,
"learning_rate": 4.1277699198491285e-05,
"loss": 4.9187,
"step": 370
},
{
"epoch": 0.5374823196605375,
"grad_norm": 7.841526985168457,
"learning_rate": 4.104196133899104e-05,
"loss": 4.0989,
"step": 380
},
{
"epoch": 0.5516265912305516,
"grad_norm": 3.8099868297576904,
"learning_rate": 4.080622347949081e-05,
"loss": 4.4586,
"step": 390
},
{
"epoch": 0.5657708628005658,
"grad_norm": 11.720135688781738,
"learning_rate": 4.057048561999057e-05,
"loss": 4.1915,
"step": 400
},
{
"epoch": 0.57991513437058,
"grad_norm": 5.8960280418396,
"learning_rate": 4.033474776049034e-05,
"loss": 4.3458,
"step": 410
},
{
"epoch": 0.594059405940594,
"grad_norm": 3.532780885696411,
"learning_rate": 4.0099009900990106e-05,
"loss": 4.2094,
"step": 420
},
{
"epoch": 0.6082036775106082,
"grad_norm": 6.77498722076416,
"learning_rate": 3.986327204148986e-05,
"loss": 4.3139,
"step": 430
},
{
"epoch": 0.6223479490806223,
"grad_norm": 23.035005569458008,
"learning_rate": 3.962753418198963e-05,
"loss": 4.0213,
"step": 440
},
{
"epoch": 0.6364922206506365,
"grad_norm": 3.033621311187744,
"learning_rate": 3.939179632248939e-05,
"loss": 4.0612,
"step": 450
},
{
"epoch": 0.6506364922206507,
"grad_norm": 32.6967887878418,
"learning_rate": 3.915605846298916e-05,
"loss": 4.1847,
"step": 460
},
{
"epoch": 0.6647807637906648,
"grad_norm": 9.779464721679688,
"learning_rate": 3.892032060348893e-05,
"loss": 4.1741,
"step": 470
},
{
"epoch": 0.6789250353606789,
"grad_norm": 14.904414176940918,
"learning_rate": 3.868458274398868e-05,
"loss": 4.1909,
"step": 480
},
{
"epoch": 0.693069306930693,
"grad_norm": 34.94367218017578,
"learning_rate": 3.844884488448845e-05,
"loss": 4.9592,
"step": 490
},
{
"epoch": 0.7072135785007072,
"grad_norm": 6.339000701904297,
"learning_rate": 3.821310702498822e-05,
"loss": 3.7726,
"step": 500
},
{
"epoch": 0.7213578500707214,
"grad_norm": 17.672000885009766,
"learning_rate": 3.797736916548798e-05,
"loss": 3.9599,
"step": 510
},
{
"epoch": 0.7355021216407355,
"grad_norm": 13.348356246948242,
"learning_rate": 3.774163130598775e-05,
"loss": 3.8342,
"step": 520
},
{
"epoch": 0.7496463932107497,
"grad_norm": 1.8930085897445679,
"learning_rate": 3.75058934464875e-05,
"loss": 4.0049,
"step": 530
},
{
"epoch": 0.7637906647807637,
"grad_norm": 35.62409210205078,
"learning_rate": 3.727015558698727e-05,
"loss": 3.9412,
"step": 540
},
{
"epoch": 0.7779349363507779,
"grad_norm": 2.246541738510132,
"learning_rate": 3.703441772748704e-05,
"loss": 3.893,
"step": 550
},
{
"epoch": 0.7920792079207921,
"grad_norm": 57.89748001098633,
"learning_rate": 3.67986798679868e-05,
"loss": 4.2004,
"step": 560
},
{
"epoch": 0.8062234794908062,
"grad_norm": 13.958605766296387,
"learning_rate": 3.656294200848657e-05,
"loss": 4.5987,
"step": 570
},
{
"epoch": 0.8203677510608204,
"grad_norm": 7.963130950927734,
"learning_rate": 3.6327204148986324e-05,
"loss": 3.8688,
"step": 580
},
{
"epoch": 0.8345120226308345,
"grad_norm": 12.124194145202637,
"learning_rate": 3.609146628948609e-05,
"loss": 3.8787,
"step": 590
},
{
"epoch": 0.8486562942008486,
"grad_norm": 19.39701271057129,
"learning_rate": 3.585572842998586e-05,
"loss": 3.8274,
"step": 600
},
{
"epoch": 0.8628005657708628,
"grad_norm": 7.561882495880127,
"learning_rate": 3.561999057048562e-05,
"loss": 3.917,
"step": 610
},
{
"epoch": 0.8769448373408769,
"grad_norm": 8.699311256408691,
"learning_rate": 3.538425271098539e-05,
"loss": 3.8819,
"step": 620
},
{
"epoch": 0.8910891089108911,
"grad_norm": 10.60632038116455,
"learning_rate": 3.514851485148515e-05,
"loss": 4.3288,
"step": 630
},
{
"epoch": 0.9052333804809052,
"grad_norm": 5.851240634918213,
"learning_rate": 3.491277699198491e-05,
"loss": 3.7157,
"step": 640
},
{
"epoch": 0.9193776520509194,
"grad_norm": 12.624049186706543,
"learning_rate": 3.467703913248468e-05,
"loss": 4.0151,
"step": 650
},
{
"epoch": 0.9335219236209336,
"grad_norm": 10.379075050354004,
"learning_rate": 3.444130127298444e-05,
"loss": 4.1419,
"step": 660
},
{
"epoch": 0.9476661951909476,
"grad_norm": 11.247940063476562,
"learning_rate": 3.420556341348421e-05,
"loss": 3.5958,
"step": 670
},
{
"epoch": 0.9618104667609618,
"grad_norm": 10.014704704284668,
"learning_rate": 3.396982555398397e-05,
"loss": 3.7822,
"step": 680
},
{
"epoch": 0.9759547383309759,
"grad_norm": 8.791955947875977,
"learning_rate": 3.3734087694483734e-05,
"loss": 3.9809,
"step": 690
},
{
"epoch": 0.9900990099009901,
"grad_norm": 30.620357513427734,
"learning_rate": 3.34983498349835e-05,
"loss": 3.9702,
"step": 700
},
{
"epoch": 1.0042432814710043,
"grad_norm": 23.29230499267578,
"learning_rate": 3.326261197548326e-05,
"loss": 3.8758,
"step": 710
},
{
"epoch": 1.0183875530410185,
"grad_norm": 6.364682674407959,
"learning_rate": 3.302687411598303e-05,
"loss": 3.6476,
"step": 720
},
{
"epoch": 1.0325318246110324,
"grad_norm": 22.594091415405273,
"learning_rate": 3.279113625648279e-05,
"loss": 4.2427,
"step": 730
},
{
"epoch": 1.0466760961810466,
"grad_norm": 31.865617752075195,
"learning_rate": 3.2555398396982555e-05,
"loss": 3.8927,
"step": 740
},
{
"epoch": 1.0608203677510608,
"grad_norm": 2.553858757019043,
"learning_rate": 3.231966053748232e-05,
"loss": 3.6406,
"step": 750
},
{
"epoch": 1.074964639321075,
"grad_norm": 24.558555603027344,
"learning_rate": 3.2083922677982084e-05,
"loss": 3.9391,
"step": 760
},
{
"epoch": 1.0891089108910892,
"grad_norm": 3.9098362922668457,
"learning_rate": 3.184818481848185e-05,
"loss": 4.2241,
"step": 770
},
{
"epoch": 1.1032531824611032,
"grad_norm": 14.435652732849121,
"learning_rate": 3.1612446958981614e-05,
"loss": 3.5212,
"step": 780
},
{
"epoch": 1.1173974540311173,
"grad_norm": 4.891509056091309,
"learning_rate": 3.1376709099481375e-05,
"loss": 3.3049,
"step": 790
},
{
"epoch": 1.1315417256011315,
"grad_norm": 10.893304824829102,
"learning_rate": 3.1140971239981144e-05,
"loss": 3.7428,
"step": 800
},
{
"epoch": 1.1456859971711457,
"grad_norm": 3.631542921066284,
"learning_rate": 3.0905233380480905e-05,
"loss": 3.7109,
"step": 810
},
{
"epoch": 1.15983026874116,
"grad_norm": 17.292734146118164,
"learning_rate": 3.0669495520980673e-05,
"loss": 3.748,
"step": 820
},
{
"epoch": 1.1739745403111739,
"grad_norm": 12.438305854797363,
"learning_rate": 3.043375766148043e-05,
"loss": 4.5263,
"step": 830
},
{
"epoch": 1.188118811881188,
"grad_norm": 7.694697380065918,
"learning_rate": 3.01980198019802e-05,
"loss": 3.9575,
"step": 840
},
{
"epoch": 1.2022630834512023,
"grad_norm": 9.88021469116211,
"learning_rate": 2.9962281942479965e-05,
"loss": 3.5793,
"step": 850
},
{
"epoch": 1.2164073550212164,
"grad_norm": 18.16057586669922,
"learning_rate": 2.972654408297973e-05,
"loss": 3.7951,
"step": 860
},
{
"epoch": 1.2305516265912306,
"grad_norm": 3.5214946269989014,
"learning_rate": 2.9490806223479494e-05,
"loss": 4.0263,
"step": 870
},
{
"epoch": 1.2446958981612446,
"grad_norm": 20.135046005249023,
"learning_rate": 2.9255068363979256e-05,
"loss": 3.678,
"step": 880
},
{
"epoch": 1.2588401697312588,
"grad_norm": 35.220733642578125,
"learning_rate": 2.901933050447902e-05,
"loss": 4.1981,
"step": 890
},
{
"epoch": 1.272984441301273,
"grad_norm": 39.2838134765625,
"learning_rate": 2.8783592644978786e-05,
"loss": 3.9173,
"step": 900
}
],
"logging_steps": 10,
"max_steps": 2121,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.3469133437927424e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}