sft-zephyr-7b-beta-v4 / trainer_state.json
hllj's picture
Model save
f78d5e5
raw
history blame
7.05 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9801295896328295,
"eval_steps": 1000,
"global_step": 4538,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.1551724137931036e-07,
"loss": 0.9021,
"step": 1
},
{
"epoch": 0.04,
"learning_rate": 2.0905172413793102e-05,
"loss": 0.7496,
"step": 100
},
{
"epoch": 0.09,
"learning_rate": 4.245689655172414e-05,
"loss": 0.44,
"step": 200
},
{
"epoch": 0.13,
"learning_rate": 4.997305687302146e-05,
"loss": 0.4213,
"step": 300
},
{
"epoch": 0.17,
"learning_rate": 4.982655396386503e-05,
"loss": 0.4123,
"step": 400
},
{
"epoch": 0.22,
"learning_rate": 4.95534255874619e-05,
"loss": 0.3963,
"step": 500
},
{
"epoch": 0.26,
"learning_rate": 4.9155064809008765e-05,
"loss": 0.3985,
"step": 600
},
{
"epoch": 0.3,
"learning_rate": 4.863350342960769e-05,
"loss": 0.3956,
"step": 700
},
{
"epoch": 0.35,
"learning_rate": 4.799140162325867e-05,
"loss": 0.3949,
"step": 800
},
{
"epoch": 0.39,
"learning_rate": 4.723203436889627e-05,
"loss": 0.3912,
"step": 900
},
{
"epoch": 0.43,
"learning_rate": 4.6359274746672286e-05,
"loss": 0.3913,
"step": 1000
},
{
"epoch": 0.43,
"eval_loss": 0.5562438368797302,
"eval_runtime": 160.9876,
"eval_samples_per_second": 6.392,
"eval_steps_per_second": 1.603,
"step": 1000
},
{
"epoch": 0.48,
"learning_rate": 4.5377574183680436e-05,
"loss": 0.3881,
"step": 1100
},
{
"epoch": 0.52,
"learning_rate": 4.42919397498776e-05,
"loss": 0.38,
"step": 1200
},
{
"epoch": 0.56,
"learning_rate": 4.310790862000173e-05,
"loss": 0.387,
"step": 1300
},
{
"epoch": 0.6,
"learning_rate": 4.183151983174099e-05,
"loss": 0.3746,
"step": 1400
},
{
"epoch": 0.65,
"learning_rate": 4.0469283484198845e-05,
"loss": 0.3793,
"step": 1500
},
{
"epoch": 0.69,
"learning_rate": 3.902814753375534e-05,
"loss": 0.3728,
"step": 1600
},
{
"epoch": 0.73,
"learning_rate": 3.7515462356679224e-05,
"loss": 0.3735,
"step": 1700
},
{
"epoch": 0.78,
"learning_rate": 3.5938943259235857e-05,
"loss": 0.3624,
"step": 1800
},
{
"epoch": 0.82,
"learning_rate": 3.430663112650447e-05,
"loss": 0.3723,
"step": 1900
},
{
"epoch": 0.86,
"learning_rate": 3.2626851410611876e-05,
"loss": 0.3639,
"step": 2000
},
{
"epoch": 0.86,
"eval_loss": 0.5547167658805847,
"eval_runtime": 161.1485,
"eval_samples_per_second": 6.385,
"eval_steps_per_second": 1.601,
"step": 2000
},
{
"epoch": 0.91,
"learning_rate": 3.090817166755903e-05,
"loss": 0.3735,
"step": 2100
},
{
"epoch": 0.95,
"learning_rate": 2.9159357859219982e-05,
"loss": 0.3628,
"step": 2200
},
{
"epoch": 1.01,
"learning_rate": 2.7389329643390623e-05,
"loss": 0.3724,
"step": 2300
},
{
"epoch": 1.06,
"learning_rate": 2.560711487992597e-05,
"loss": 0.3534,
"step": 2400
},
{
"epoch": 1.1,
"learning_rate": 2.382180358500331e-05,
"loss": 0.3501,
"step": 2500
},
{
"epoch": 1.14,
"learning_rate": 2.2042501568362862e-05,
"loss": 0.363,
"step": 2600
},
{
"epoch": 1.19,
"learning_rate": 2.0278283989994943e-05,
"loss": 0.3522,
"step": 2700
},
{
"epoch": 1.23,
"learning_rate": 1.8538149073153196e-05,
"loss": 0.3586,
"step": 2800
},
{
"epoch": 1.27,
"learning_rate": 1.6830972209776355e-05,
"loss": 0.354,
"step": 2900
},
{
"epoch": 1.32,
"learning_rate": 1.5165460692399357e-05,
"loss": 0.3545,
"step": 3000
},
{
"epoch": 1.32,
"eval_loss": 0.5536707639694214,
"eval_runtime": 161.1649,
"eval_samples_per_second": 6.385,
"eval_steps_per_second": 1.601,
"step": 3000
},
{
"epoch": 1.36,
"learning_rate": 1.3550109303439428e-05,
"loss": 0.3467,
"step": 3100
},
{
"epoch": 1.4,
"learning_rate": 1.1993156988369722e-05,
"loss": 0.3421,
"step": 3200
},
{
"epoch": 1.45,
"learning_rate": 1.0502544833764878e-05,
"loss": 0.3565,
"step": 3300
},
{
"epoch": 1.49,
"learning_rate": 9.08587556454735e-06,
"loss": 0.3444,
"step": 3400
},
{
"epoch": 1.53,
"learning_rate": 7.750374767015183e-06,
"loss": 0.3536,
"step": 3500
},
{
"epoch": 1.57,
"learning_rate": 6.502854035429184e-06,
"loss": 0.348,
"step": 3600
},
{
"epoch": 1.62,
"learning_rate": 5.349676230126996e-06,
"loss": 0.3396,
"step": 3700
},
{
"epoch": 1.66,
"learning_rate": 4.296723024361665e-06,
"loss": 0.3375,
"step": 3800
},
{
"epoch": 1.7,
"learning_rate": 3.3493649053890326e-06,
"loss": 0.3453,
"step": 3900
},
{
"epoch": 1.75,
"learning_rate": 2.512433782810378e-06,
"loss": 0.3411,
"step": 4000
},
{
"epoch": 1.75,
"eval_loss": 0.557060718536377,
"eval_runtime": 161.2179,
"eval_samples_per_second": 6.383,
"eval_steps_per_second": 1.6,
"step": 4000
},
{
"epoch": 1.79,
"learning_rate": 1.7901983438790348e-06,
"loss": 0.3423,
"step": 4100
},
{
"epoch": 1.83,
"learning_rate": 1.1863422814687664e-06,
"loss": 0.3473,
"step": 4200
},
{
"epoch": 1.88,
"learning_rate": 7.039455057501032e-07,
"loss": 0.341,
"step": 4300
},
{
"epoch": 1.92,
"learning_rate": 3.454684354026766e-07,
"loss": 0.3471,
"step": 4400
},
{
"epoch": 1.96,
"learning_rate": 1.1273944848475826e-07,
"loss": 0.3381,
"step": 4500
},
{
"epoch": 1.98,
"step": 4538,
"total_flos": 3.994638135454597e+17,
"train_loss": 0.3754801067185854,
"train_runtime": 9090.1825,
"train_samples_per_second": 2.037,
"train_steps_per_second": 0.509
}
],
"logging_steps": 100,
"max_steps": 4630,
"num_train_epochs": 2,
"save_steps": 1000,
"total_flos": 3.994638135454597e+17,
"trial_name": null,
"trial_params": null
}