{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.019436345966958212, "eval_steps": 500, "global_step": 5, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003887269193391642, "grad_norm": 8.51073697097684, "learning_rate": 0.0, "log_odds_chosen": 0.4093017578125, "log_odds_ratio": -0.6591796875, "logits/chosen": 1.3271484375, "logits/rejected": -1.109375, "logps/chosen": -2.451171875, "logps/rejected": -2.826171875, "loss": 2.5861, "nll_loss": 2.521484375, "rewards/accuracies": 0.661458432674408, "rewards/chosen": -0.2449951171875, "rewards/margins": 0.03772735595703125, "rewards/rejected": -0.2825927734375, "step": 1 }, { "epoch": 0.007774538386783284, "grad_norm": 8.185303894379656, "learning_rate": 2e-05, "log_odds_chosen": 0.496826171875, "log_odds_ratio": -0.69140625, "logits/chosen": 1.3232421875, "logits/rejected": -1.0576171875, "logps/chosen": -2.431640625, "logps/rejected": -2.9140625, "loss": 2.5667, "nll_loss": 2.49609375, "rewards/accuracies": 0.6458333134651184, "rewards/chosen": -0.2430419921875, "rewards/margins": 0.048309326171875, "rewards/rejected": -0.291259765625, "step": 2 }, { "epoch": 0.011661807580174927, "grad_norm": 5.074940327572319, "learning_rate": 1.736396103067893e-05, "log_odds_chosen": 0.789306640625, "log_odds_ratio": -0.4697265625, "logits/chosen": 0.429443359375, "logits/rejected": -1.03173828125, "logps/chosen": -1.962890625, "logps/rejected": -2.6796875, "loss": 2.128, "nll_loss": 2.0810546875, "rewards/accuracies": 0.7864583730697632, "rewards/chosen": -0.1962890625, "rewards/margins": 0.071533203125, "rewards/rejected": -0.267822265625, "step": 3 }, { "epoch": 0.015549076773566569, "grad_norm": 3.76757860778528, "learning_rate": 1.1000000000000001e-05, "log_odds_chosen": 1.40673828125, "log_odds_ratio": -0.316162109375, "logits/chosen": 1.06201171875, "logits/rejected": -1.263671875, "logps/chosen": -1.5263671875, "logps/rejected": -2.7578125, "loss": 1.6897, "nll_loss": 1.6591796875, "rewards/accuracies": 0.9166667461395264, "rewards/chosen": -0.152587890625, "rewards/margins": 0.12322998046875, "rewards/rejected": -0.2757568359375, "step": 4 }, { "epoch": 0.019436345966958212, "grad_norm": 3.555226780695627, "learning_rate": 4.636038969321073e-06, "log_odds_chosen": 1.5322265625, "log_odds_ratio": -0.2637939453125, "logits/chosen": 0.80712890625, "logits/rejected": -1.6494140625, "logps/chosen": -1.4091796875, "logps/rejected": -2.720703125, "loss": 1.5929, "nll_loss": 1.56640625, "rewards/accuracies": 0.96875, "rewards/chosen": -0.14105224609375, "rewards/margins": 0.131591796875, "rewards/rejected": -0.2725830078125, "step": 5 }, { "epoch": 0.019436345966958212, "step": 5, "total_flos": 0.0, "train_loss": 2.112666893005371, "train_runtime": 260.1894, "train_samples_per_second": 3.69, "train_steps_per_second": 0.019 } ], "logging_steps": 1, "max_steps": 5, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }