| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4153686396677051, | |
| "eval_steps": 50, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_entropy": 1.202019446364073, | |
| "eval_loss": 1.0483194589614868, | |
| "eval_mean_token_accuracy": 0.7420558578500124, | |
| "eval_num_tokens": 0.0, | |
| "eval_runtime": 129.0837, | |
| "eval_samples_per_second": 0.829, | |
| "eval_steps_per_second": 0.829, | |
| "step": 0 | |
| }, | |
| { | |
| "entropy": 0.8833672893047333, | |
| "epoch": 0.05192107995846314, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 9.785867237687366e-06, | |
| "loss": 0.7974, | |
| "mean_token_accuracy": 0.7731659519672394, | |
| "num_tokens": 117579.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05192107995846314, | |
| "eval_entropy": 0.6945631228317725, | |
| "eval_loss": 0.661672055721283, | |
| "eval_mean_token_accuracy": 0.7917334231260781, | |
| "eval_num_tokens": 117579.0, | |
| "eval_runtime": 3670.0079, | |
| "eval_samples_per_second": 0.029, | |
| "eval_steps_per_second": 0.029, | |
| "step": 50 | |
| }, | |
| { | |
| "entropy": 0.6754305803775787, | |
| "epoch": 0.10384215991692627, | |
| "grad_norm": 10.375, | |
| "learning_rate": 9.250535331905782e-06, | |
| "loss": 0.6539, | |
| "mean_token_accuracy": 0.7942240250110626, | |
| "num_tokens": 234762.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10384215991692627, | |
| "eval_entropy": 0.6633974996125587, | |
| "eval_loss": 0.6503159999847412, | |
| "eval_mean_token_accuracy": 0.7945662413802103, | |
| "eval_num_tokens": 234762.0, | |
| "eval_runtime": 3639.4734, | |
| "eval_samples_per_second": 0.029, | |
| "eval_steps_per_second": 0.029, | |
| "step": 100 | |
| }, | |
| { | |
| "entropy": 0.673185322880745, | |
| "epoch": 0.1557632398753894, | |
| "grad_norm": 10.75, | |
| "learning_rate": 8.715203426124197e-06, | |
| "loss": 0.654, | |
| "mean_token_accuracy": 0.7920358991622924, | |
| "num_tokens": 340474.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1557632398753894, | |
| "eval_entropy": 0.6714856841853846, | |
| "eval_loss": 0.6473907232284546, | |
| "eval_mean_token_accuracy": 0.7945645585238377, | |
| "eval_num_tokens": 340474.0, | |
| "eval_runtime": 3648.7713, | |
| "eval_samples_per_second": 0.029, | |
| "eval_steps_per_second": 0.029, | |
| "step": 150 | |
| }, | |
| { | |
| "entropy": 0.6565037113428116, | |
| "epoch": 0.20768431983385255, | |
| "grad_norm": 15.875, | |
| "learning_rate": 8.179871520342612e-06, | |
| "loss": 0.6425, | |
| "mean_token_accuracy": 0.7962384045124054, | |
| "num_tokens": 457598.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.20768431983385255, | |
| "eval_entropy": 0.6561593619462486, | |
| "eval_loss": 0.6449708342552185, | |
| "eval_mean_token_accuracy": 0.795283340405081, | |
| "eval_num_tokens": 457598.0, | |
| "eval_runtime": 3483.4231, | |
| "eval_samples_per_second": 0.031, | |
| "eval_steps_per_second": 0.031, | |
| "step": 200 | |
| }, | |
| { | |
| "entropy": 0.6419710898399353, | |
| "epoch": 0.25960539979231567, | |
| "grad_norm": 12.0, | |
| "learning_rate": 7.644539614561029e-06, | |
| "loss": 0.6278, | |
| "mean_token_accuracy": 0.7990661442279816, | |
| "num_tokens": 575217.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.25960539979231567, | |
| "eval_entropy": 0.6565199853660905, | |
| "eval_loss": 0.6442670226097107, | |
| "eval_mean_token_accuracy": 0.7952453271250859, | |
| "eval_num_tokens": 575217.0, | |
| "eval_runtime": 3485.0746, | |
| "eval_samples_per_second": 0.031, | |
| "eval_steps_per_second": 0.031, | |
| "step": 250 | |
| }, | |
| { | |
| "entropy": 0.6925940608978272, | |
| "epoch": 0.3115264797507788, | |
| "grad_norm": 14.125, | |
| "learning_rate": 7.109207708779444e-06, | |
| "loss": 0.6782, | |
| "mean_token_accuracy": 0.7854274523258209, | |
| "num_tokens": 690541.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3115264797507788, | |
| "eval_entropy": 0.64524694051698, | |
| "eval_loss": 0.6422281265258789, | |
| "eval_mean_token_accuracy": 0.7960561396919679, | |
| "eval_num_tokens": 690541.0, | |
| "eval_runtime": 3645.789, | |
| "eval_samples_per_second": 0.029, | |
| "eval_steps_per_second": 0.029, | |
| "step": 300 | |
| }, | |
| { | |
| "entropy": 0.6548466223478318, | |
| "epoch": 0.363447559709242, | |
| "grad_norm": 10.375, | |
| "learning_rate": 6.573875802997859e-06, | |
| "loss": 0.6461, | |
| "mean_token_accuracy": 0.7949914515018464, | |
| "num_tokens": 811130.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.363447559709242, | |
| "eval_entropy": 0.6484908422577047, | |
| "eval_loss": 0.6409919261932373, | |
| "eval_mean_token_accuracy": 0.7962207621503099, | |
| "eval_num_tokens": 811130.0, | |
| "eval_runtime": 3645.1814, | |
| "eval_samples_per_second": 0.029, | |
| "eval_steps_per_second": 0.029, | |
| "step": 350 | |
| }, | |
| { | |
| "entropy": 0.6180817884206772, | |
| "epoch": 0.4153686396677051, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 6.038543897216275e-06, | |
| "loss": 0.6089, | |
| "mean_token_accuracy": 0.8053069579601287, | |
| "num_tokens": 923200.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4153686396677051, | |
| "eval_entropy": 0.6479496045090328, | |
| "eval_loss": 0.6389999985694885, | |
| "eval_mean_token_accuracy": 0.7966728973611493, | |
| "eval_num_tokens": 923200.0, | |
| "eval_runtime": 3638.2153, | |
| "eval_samples_per_second": 0.029, | |
| "eval_steps_per_second": 0.029, | |
| "step": 400 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 963, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.126430870427648e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |