| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999220394480393, | |
| "global_step": 6413, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.610166848588804e-05, | |
| "loss": 1.501, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_gen_len": 38.430038252466275, | |
| "eval_loss": 0.11725818365812302, | |
| "eval_runtime": 1525.1588, | |
| "eval_samples_per_second": 3.257, | |
| "eval_steps_per_second": 0.407, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.220333697177608e-05, | |
| "loss": 0.0972, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_gen_len": 38.57358566539158, | |
| "eval_loss": 0.10414853692054749, | |
| "eval_runtime": 1374.4335, | |
| "eval_samples_per_second": 3.614, | |
| "eval_steps_per_second": 0.452, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.830500545766412e-05, | |
| "loss": 0.0871, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_gen_len": 41.80269780551641, | |
| "eval_loss": 0.09756787866353989, | |
| "eval_runtime": 1416.1217, | |
| "eval_samples_per_second": 3.507, | |
| "eval_steps_per_second": 0.439, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.440667394355216e-05, | |
| "loss": 0.0821, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_gen_len": 38.83108516206966, | |
| "eval_loss": 0.09272466599941254, | |
| "eval_runtime": 1386.2597, | |
| "eval_samples_per_second": 3.583, | |
| "eval_steps_per_second": 0.448, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.05083424294402e-05, | |
| "loss": 0.0796, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_gen_len": 42.82927320314073, | |
| "eval_loss": 0.09029995650053024, | |
| "eval_runtime": 1431.1493, | |
| "eval_samples_per_second": 3.471, | |
| "eval_steps_per_second": 0.434, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6610010915328243e-05, | |
| "loss": 0.0766, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_gen_len": 41.111938796053956, | |
| "eval_loss": 0.08799975365400314, | |
| "eval_runtime": 1401.8123, | |
| "eval_samples_per_second": 3.543, | |
| "eval_steps_per_second": 0.443, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.271167940121628e-05, | |
| "loss": 0.0741, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_gen_len": 40.73605798268573, | |
| "eval_loss": 0.08718982338905334, | |
| "eval_runtime": 1373.6325, | |
| "eval_samples_per_second": 3.616, | |
| "eval_steps_per_second": 0.452, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.881334788710432e-05, | |
| "loss": 0.0729, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_gen_len": 39.60056372055567, | |
| "eval_loss": 0.08510363847017288, | |
| "eval_runtime": 1369.314, | |
| "eval_samples_per_second": 3.627, | |
| "eval_steps_per_second": 0.454, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.491501637299236e-05, | |
| "loss": 0.0723, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_gen_len": 40.53654117173344, | |
| "eval_loss": 0.08480597287416458, | |
| "eval_runtime": 1394.4001, | |
| "eval_samples_per_second": 3.562, | |
| "eval_steps_per_second": 0.445, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.1016684858880399e-05, | |
| "loss": 0.0729, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_gen_len": 41.251258304811756, | |
| "eval_loss": 0.08257019519805908, | |
| "eval_runtime": 1406.7799, | |
| "eval_samples_per_second": 3.531, | |
| "eval_steps_per_second": 0.441, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.11835334476844e-06, | |
| "loss": 0.071, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_gen_len": 41.85141936782766, | |
| "eval_loss": 0.08206828683614731, | |
| "eval_runtime": 1411.1175, | |
| "eval_samples_per_second": 3.52, | |
| "eval_steps_per_second": 0.44, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.220021830656479e-06, | |
| "loss": 0.0699, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_gen_len": 41.36098248439702, | |
| "eval_loss": 0.08196299523115158, | |
| "eval_runtime": 1408.2137, | |
| "eval_samples_per_second": 3.527, | |
| "eval_steps_per_second": 0.441, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 6413, | |
| "total_flos": 1.2273483536252928e+17, | |
| "train_loss": 0.18819899583558072, | |
| "train_runtime": 25349.1542, | |
| "train_samples_per_second": 4.048, | |
| "train_steps_per_second": 0.253 | |
| } | |
| ], | |
| "max_steps": 6413, | |
| "num_train_epochs": 1, | |
| "total_flos": 1.2273483536252928e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |