| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.316831683168317, | |
| "eval_steps": 500, | |
| "global_step": 150, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08800880088008801, | |
| "grad_norm": 0.008909749798476696, | |
| "learning_rate": 8.18181797512807e-05, | |
| "loss": 12.2157, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.17601760176017603, | |
| "grad_norm": 0.007974537089467049, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2154, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.264026402640264, | |
| "grad_norm": 0.010009167715907097, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2152, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.35203520352035206, | |
| "grad_norm": 0.010353786870837212, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2155, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.44004400440044006, | |
| "grad_norm": 0.01487517450004816, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2151, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.528052805280528, | |
| "grad_norm": 0.011968914419412613, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.215, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.6160616061606161, | |
| "grad_norm": 0.015747424215078354, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2146, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.7040704070407041, | |
| "grad_norm": 0.01620076224207878, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2132, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.7920792079207921, | |
| "grad_norm": 0.016200672835111618, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2131, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8800880088008801, | |
| "grad_norm": 0.01671593263745308, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2126, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9680968096809681, | |
| "grad_norm": 0.019221968948841095, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2124, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.0528052805280528, | |
| "grad_norm": 0.02141665853559971, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2117, | |
| "memory/device_mem_reserved(gib)": 2.73, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.140814081408141, | |
| "grad_norm": 0.015679990872740746, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2111, | |
| "memory/device_mem_reserved(gib)": 2.74, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.2288228822882288, | |
| "grad_norm": 0.022765284404158592, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2093, | |
| "memory/device_mem_reserved(gib)": 2.74, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.316831683168317, | |
| "grad_norm": 0.01766524650156498, | |
| "learning_rate": 9.999999747378752e-05, | |
| "loss": 12.2088, | |
| "memory/device_mem_reserved(gib)": 2.74, | |
| "memory/max_mem_active(gib)": 2.34, | |
| "memory/max_mem_allocated(gib)": 2.34, | |
| "step": 150 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 150, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3064545861120.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |