| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9992122883024814, | |
| "eval_steps": 500, | |
| "global_step": 2538, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.010698237247525, | |
| "learning_rate": 1.968503937007874e-08, | |
| "log_odds_chosen": 0.27912598848342896, | |
| "log_odds_ratio": -0.7284179925918579, | |
| "logits/chosen": -2.015625, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -2.03125, | |
| "logps/rejected": -2.28125, | |
| "loss": 1.5763, | |
| "nll_loss": 1.4375, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.2041015625, | |
| "rewards/margins": 0.023681640625, | |
| "rewards/rejected": -0.2275390625, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 5.135758173057069, | |
| "learning_rate": 3.937007874015748e-08, | |
| "log_odds_chosen": 0.36018067598342896, | |
| "log_odds_ratio": -0.750683605670929, | |
| "logits/chosen": -2.109375, | |
| "logits/rejected": -2.140625, | |
| "logps/chosen": -1.9765625, | |
| "logps/rejected": -2.296875, | |
| "loss": 1.5927, | |
| "nll_loss": 1.5, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.197265625, | |
| "rewards/margins": 0.0322265625, | |
| "rewards/rejected": -0.2294921875, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 6.927967353705024, | |
| "learning_rate": 5.9055118110236216e-08, | |
| "log_odds_chosen": 0.17539063096046448, | |
| "log_odds_ratio": -0.812207043170929, | |
| "logits/chosen": -1.9375, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -2.0625, | |
| "logps/rejected": -2.1875, | |
| "loss": 1.5598, | |
| "nll_loss": 1.5, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.2060546875, | |
| "rewards/margins": 0.013427734375, | |
| "rewards/rejected": -0.2197265625, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 9.514842032339935, | |
| "learning_rate": 7.874015748031496e-08, | |
| "log_odds_chosen": 0.3271545469760895, | |
| "log_odds_ratio": -0.705859363079071, | |
| "logits/chosen": -2.03125, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -1.9921875, | |
| "logps/rejected": -2.296875, | |
| "loss": 1.6148, | |
| "nll_loss": 1.546875, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.19921875, | |
| "rewards/margins": 0.031005859375, | |
| "rewards/rejected": -0.23046875, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.878873460342644, | |
| "learning_rate": 9.84251968503937e-08, | |
| "log_odds_chosen": 0.1710205078125, | |
| "log_odds_ratio": -0.77587890625, | |
| "logits/chosen": -2.03125, | |
| "logits/rejected": -2.0, | |
| "logps/chosen": -1.9609375, | |
| "logps/rejected": -2.09375, | |
| "loss": 1.5496, | |
| "nll_loss": 1.484375, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.1962890625, | |
| "rewards/margins": 0.0137939453125, | |
| "rewards/rejected": -0.2099609375, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.390918916154321, | |
| "learning_rate": 1.1811023622047243e-07, | |
| "log_odds_chosen": 0.21818237006664276, | |
| "log_odds_ratio": -0.7723633050918579, | |
| "logits/chosen": -2.046875, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -2.0625, | |
| "logps/rejected": -2.25, | |
| "loss": 1.6078, | |
| "nll_loss": 1.5625, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.2060546875, | |
| "rewards/margins": 0.0185546875, | |
| "rewards/rejected": -0.224609375, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 7.289138170054862, | |
| "learning_rate": 1.3779527559055117e-07, | |
| "log_odds_chosen": 0.34990233182907104, | |
| "log_odds_ratio": -0.7339843511581421, | |
| "logits/chosen": -2.015625, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -2.0625, | |
| "logps/rejected": -2.375, | |
| "loss": 1.5634, | |
| "nll_loss": 1.5078125, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.20703125, | |
| "rewards/margins": 0.0311279296875, | |
| "rewards/rejected": -0.23828125, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.612859448964533, | |
| "learning_rate": 1.5748031496062992e-07, | |
| "log_odds_chosen": 0.4515624940395355, | |
| "log_odds_ratio": -0.640917956829071, | |
| "logits/chosen": -1.9609375, | |
| "logits/rejected": -2.015625, | |
| "logps/chosen": -1.7734375, | |
| "logps/rejected": -2.171875, | |
| "loss": 1.532, | |
| "nll_loss": 1.421875, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.177734375, | |
| "rewards/margins": 0.039794921875, | |
| "rewards/rejected": -0.2177734375, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 8.397857351399276, | |
| "learning_rate": 1.7716535433070863e-07, | |
| "log_odds_chosen": 0.17760619521141052, | |
| "log_odds_ratio": -0.7791992425918579, | |
| "logits/chosen": -2.0625, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -2.0625, | |
| "logps/rejected": -2.21875, | |
| "loss": 1.5648, | |
| "nll_loss": 1.546875, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.2060546875, | |
| "rewards/margins": 0.0166015625, | |
| "rewards/rejected": -0.22265625, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 7.586832559599352, | |
| "learning_rate": 1.968503937007874e-07, | |
| "log_odds_chosen": 0.2771240174770355, | |
| "log_odds_ratio": -0.6792968511581421, | |
| "logits/chosen": -2.140625, | |
| "logits/rejected": -2.171875, | |
| "logps/chosen": -1.890625, | |
| "logps/rejected": -2.125, | |
| "loss": 1.5286, | |
| "nll_loss": 1.484375, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.189453125, | |
| "rewards/margins": 0.02392578125, | |
| "rewards/rejected": -0.212890625, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 5.07791601254699, | |
| "learning_rate": 2.1653543307086615e-07, | |
| "log_odds_chosen": 0.214080810546875, | |
| "log_odds_ratio": -0.7261718511581421, | |
| "logits/chosen": -2.09375, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -1.890625, | |
| "logps/rejected": -2.0625, | |
| "loss": 1.4891, | |
| "nll_loss": 1.4140625, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.189453125, | |
| "rewards/margins": 0.017333984375, | |
| "rewards/rejected": -0.2060546875, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 5.712206786453907, | |
| "learning_rate": 2.3622047244094486e-07, | |
| "log_odds_chosen": 0.14680786430835724, | |
| "log_odds_ratio": -0.7562500238418579, | |
| "logits/chosen": -2.078125, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -1.9375, | |
| "logps/rejected": -2.03125, | |
| "loss": 1.4901, | |
| "nll_loss": 1.34375, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.193359375, | |
| "rewards/margins": 0.00994873046875, | |
| "rewards/rejected": -0.2041015625, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 5.959474046553222, | |
| "learning_rate": 2.559055118110236e-07, | |
| "log_odds_chosen": 0.2553772032260895, | |
| "log_odds_ratio": -0.6973632574081421, | |
| "logits/chosen": -2.15625, | |
| "logits/rejected": -2.21875, | |
| "logps/chosen": -1.7890625, | |
| "logps/rejected": -1.9921875, | |
| "loss": 1.497, | |
| "nll_loss": 1.3828125, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.1787109375, | |
| "rewards/margins": 0.0205078125, | |
| "rewards/rejected": -0.19921875, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 5.960730803694658, | |
| "learning_rate": 2.7559055118110235e-07, | |
| "log_odds_chosen": 0.16423340141773224, | |
| "log_odds_ratio": -0.718457043170929, | |
| "logits/chosen": -2.15625, | |
| "logits/rejected": -2.21875, | |
| "logps/chosen": -1.765625, | |
| "logps/rejected": -1.8984375, | |
| "loss": 1.5228, | |
| "nll_loss": 1.453125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.1767578125, | |
| "rewards/margins": 0.0133056640625, | |
| "rewards/rejected": -0.189453125, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 4.558212884083645, | |
| "learning_rate": 2.9527559055118104e-07, | |
| "log_odds_chosen": 0.24582520127296448, | |
| "log_odds_ratio": -0.671191394329071, | |
| "logits/chosen": -2.125, | |
| "logits/rejected": -2.1875, | |
| "logps/chosen": -1.546875, | |
| "logps/rejected": -1.7421875, | |
| "loss": 1.4124, | |
| "nll_loss": 1.3125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.154296875, | |
| "rewards/margins": 0.0191650390625, | |
| "rewards/rejected": -0.173828125, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 3.4831822067708686, | |
| "learning_rate": 3.1496062992125984e-07, | |
| "log_odds_chosen": 0.17824706435203552, | |
| "log_odds_ratio": -0.69384765625, | |
| "logits/chosen": -2.25, | |
| "logits/rejected": -2.28125, | |
| "logps/chosen": -1.671875, | |
| "logps/rejected": -1.8203125, | |
| "loss": 1.4297, | |
| "nll_loss": 1.390625, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.1669921875, | |
| "rewards/margins": 0.0145263671875, | |
| "rewards/rejected": -0.181640625, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 4.034138231637428, | |
| "learning_rate": 3.346456692913386e-07, | |
| "log_odds_chosen": 0.12167968600988388, | |
| "log_odds_ratio": -0.7186523675918579, | |
| "logits/chosen": -2.1875, | |
| "logits/rejected": -2.296875, | |
| "logps/chosen": -1.5625, | |
| "logps/rejected": -1.6640625, | |
| "loss": 1.3835, | |
| "nll_loss": 1.3203125, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.15625, | |
| "rewards/margins": 0.01007080078125, | |
| "rewards/rejected": -0.166015625, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 4.146506823609489, | |
| "learning_rate": 3.5433070866141727e-07, | |
| "log_odds_chosen": 0.13695068657398224, | |
| "log_odds_ratio": -0.719433605670929, | |
| "logits/chosen": -2.28125, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -1.5546875, | |
| "logps/rejected": -1.671875, | |
| "loss": 1.4352, | |
| "nll_loss": 1.3828125, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.1552734375, | |
| "rewards/margins": 0.0115966796875, | |
| "rewards/rejected": -0.1669921875, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 3.5266117677319087, | |
| "learning_rate": 3.7401574803149606e-07, | |
| "log_odds_chosen": 0.17273560166358948, | |
| "log_odds_ratio": -0.681640625, | |
| "logits/chosen": -2.3125, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -1.4765625, | |
| "logps/rejected": -1.625, | |
| "loss": 1.3599, | |
| "nll_loss": 1.3046875, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.1474609375, | |
| "rewards/margins": 0.01495361328125, | |
| "rewards/rejected": -0.162109375, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 3.4327683095111072, | |
| "learning_rate": 3.937007874015748e-07, | |
| "log_odds_chosen": 0.11888428032398224, | |
| "log_odds_ratio": -0.711230456829071, | |
| "logits/chosen": -2.296875, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -1.3828125, | |
| "logps/rejected": -1.484375, | |
| "loss": 1.3592, | |
| "nll_loss": 1.28125, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.138671875, | |
| "rewards/margins": 0.0093994140625, | |
| "rewards/rejected": -0.1484375, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 2.811761812417915, | |
| "learning_rate": 4.133858267716535e-07, | |
| "log_odds_chosen": 0.13620606064796448, | |
| "log_odds_ratio": -0.692089855670929, | |
| "logits/chosen": -2.3125, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -1.359375, | |
| "logps/rejected": -1.453125, | |
| "loss": 1.3822, | |
| "nll_loss": 1.2421875, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.1357421875, | |
| "rewards/margins": 0.010009765625, | |
| "rewards/rejected": -0.1455078125, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 3.1783123040584775, | |
| "learning_rate": 4.330708661417323e-07, | |
| "log_odds_chosen": 0.23652343451976776, | |
| "log_odds_ratio": -0.656054675579071, | |
| "logits/chosen": -2.3125, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -1.296875, | |
| "logps/rejected": -1.46875, | |
| "loss": 1.3022, | |
| "nll_loss": 1.2265625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.12890625, | |
| "rewards/margins": 0.017578125, | |
| "rewards/rejected": -0.146484375, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 3.398094645144472, | |
| "learning_rate": 4.52755905511811e-07, | |
| "log_odds_chosen": 0.10042724758386612, | |
| "log_odds_ratio": -0.737500011920929, | |
| "logits/chosen": -2.421875, | |
| "logits/rejected": -2.546875, | |
| "logps/chosen": -1.265625, | |
| "logps/rejected": -1.3359375, | |
| "loss": 1.3118, | |
| "nll_loss": 1.21875, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.126953125, | |
| "rewards/margins": 0.007049560546875, | |
| "rewards/rejected": -0.1337890625, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 3.0602456337138735, | |
| "learning_rate": 4.7244094488188973e-07, | |
| "log_odds_chosen": 0.05767212063074112, | |
| "log_odds_ratio": -0.7372070550918579, | |
| "logits/chosen": -2.328125, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -1.359375, | |
| "logps/rejected": -1.40625, | |
| "loss": 1.3639, | |
| "nll_loss": 1.3359375, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.1357421875, | |
| "rewards/margins": 0.0050048828125, | |
| "rewards/rejected": -0.140625, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 3.1390537608793543, | |
| "learning_rate": 4.921259842519685e-07, | |
| "log_odds_chosen": 0.16054077446460724, | |
| "log_odds_ratio": -0.6732422113418579, | |
| "logits/chosen": -2.34375, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -1.2421875, | |
| "logps/rejected": -1.3515625, | |
| "loss": 1.3024, | |
| "nll_loss": 1.234375, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.1240234375, | |
| "rewards/margins": 0.01104736328125, | |
| "rewards/rejected": -0.134765625, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 2.518997318792438, | |
| "learning_rate": 4.999914863146575e-07, | |
| "log_odds_chosen": 0.16718749701976776, | |
| "log_odds_ratio": -0.6884765625, | |
| "logits/chosen": -2.359375, | |
| "logits/rejected": -2.5, | |
| "logps/chosen": -1.234375, | |
| "logps/rejected": -1.3359375, | |
| "loss": 1.3314, | |
| "nll_loss": 1.234375, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.12353515625, | |
| "rewards/margins": 0.01019287109375, | |
| "rewards/rejected": -0.1337890625, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 2.798537540317038, | |
| "learning_rate": 4.999394603374641e-07, | |
| "log_odds_chosen": 0.19011840224266052, | |
| "log_odds_ratio": -0.676562488079071, | |
| "logits/chosen": -2.359375, | |
| "logits/rejected": -2.46875, | |
| "logps/chosen": -1.203125, | |
| "logps/rejected": -1.3515625, | |
| "loss": 1.2872, | |
| "nll_loss": 1.1875, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.12060546875, | |
| "rewards/margins": 0.01422119140625, | |
| "rewards/rejected": -0.134765625, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 2.8758259093431437, | |
| "learning_rate": 4.99840148039188e-07, | |
| "log_odds_chosen": 0.29682618379592896, | |
| "log_odds_ratio": -0.637890636920929, | |
| "logits/chosen": -2.3125, | |
| "logits/rejected": -2.5, | |
| "logps/chosen": -1.203125, | |
| "logps/rejected": -1.4140625, | |
| "loss": 1.2201, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.1201171875, | |
| "rewards/margins": 0.021484375, | |
| "rewards/rejected": -0.1416015625, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 2.695897803134525, | |
| "learning_rate": 4.996935682088318e-07, | |
| "log_odds_chosen": 0.22941894829273224, | |
| "log_odds_ratio": -0.6490234136581421, | |
| "logits/chosen": -2.359375, | |
| "logits/rejected": -2.46875, | |
| "logps/chosen": -1.21875, | |
| "logps/rejected": -1.375, | |
| "loss": 1.2819, | |
| "nll_loss": 1.1875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.1220703125, | |
| "rewards/margins": 0.015380859375, | |
| "rewards/rejected": -0.1376953125, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 2.8982419601141585, | |
| "learning_rate": 4.994997485779947e-07, | |
| "log_odds_chosen": 0.23259887099266052, | |
| "log_odds_ratio": -0.666796863079071, | |
| "logits/chosen": -2.25, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -1.1796875, | |
| "logps/rejected": -1.34375, | |
| "loss": 1.2759, | |
| "nll_loss": 1.1953125, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.1181640625, | |
| "rewards/margins": 0.015869140625, | |
| "rewards/rejected": -0.1337890625, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 2.9056748531961585, | |
| "learning_rate": 4.992587258156258e-07, | |
| "log_odds_chosen": 0.17786864936351776, | |
| "log_odds_ratio": -0.6656249761581421, | |
| "logits/chosen": -2.265625, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -1.21875, | |
| "logps/rejected": -1.34375, | |
| "loss": 1.2812, | |
| "nll_loss": 1.2109375, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.12158203125, | |
| "rewards/margins": 0.01324462890625, | |
| "rewards/rejected": -0.134765625, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 2.596709437423632, | |
| "learning_rate": 4.989705455210862e-07, | |
| "log_odds_chosen": 0.21816405653953552, | |
| "log_odds_ratio": -0.662890613079071, | |
| "logits/chosen": -2.3125, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -1.1875, | |
| "logps/rejected": -1.3515625, | |
| "loss": 1.2184, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.11865234375, | |
| "rewards/margins": 0.016357421875, | |
| "rewards/rejected": -0.134765625, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 2.476759669633908, | |
| "learning_rate": 4.986352622155222e-07, | |
| "log_odds_chosen": 0.17100830376148224, | |
| "log_odds_ratio": -0.6903320550918579, | |
| "logits/chosen": -2.359375, | |
| "logits/rejected": -2.46875, | |
| "logps/chosen": -1.1953125, | |
| "logps/rejected": -1.3046875, | |
| "loss": 1.2865, | |
| "nll_loss": 1.21875, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.119140625, | |
| "rewards/margins": 0.01129150390625, | |
| "rewards/rejected": -0.130859375, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 2.628189970289334, | |
| "learning_rate": 4.98252939331551e-07, | |
| "log_odds_chosen": 0.17416992783546448, | |
| "log_odds_ratio": -0.697070300579071, | |
| "logits/chosen": -2.328125, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -1.21875, | |
| "logps/rejected": -1.34375, | |
| "loss": 1.2797, | |
| "nll_loss": 1.2109375, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.12158203125, | |
| "rewards/margins": 0.01318359375, | |
| "rewards/rejected": -0.134765625, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 2.6295659881126943, | |
| "learning_rate": 4.978236492012589e-07, | |
| "log_odds_chosen": 0.02346191368997097, | |
| "log_odds_ratio": -0.755078136920929, | |
| "logits/chosen": -2.421875, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -1.2578125, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.3077, | |
| "nll_loss": 1.25, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.1259765625, | |
| "rewards/margins": 0.00180816650390625, | |
| "rewards/rejected": -0.126953125, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 2.959773704803729, | |
| "learning_rate": 4.973474730425173e-07, | |
| "log_odds_chosen": 0.19099120795726776, | |
| "log_odds_ratio": -0.6749023199081421, | |
| "logits/chosen": -2.25, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -1.1875, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.2568, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.11865234375, | |
| "rewards/margins": 0.0125732421875, | |
| "rewards/rejected": -0.1318359375, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 2.613182586833654, | |
| "learning_rate": 4.968245009436167e-07, | |
| "log_odds_chosen": 0.10064697265625, | |
| "log_odds_ratio": -0.72216796875, | |
| "logits/chosen": -2.375, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -1.28125, | |
| "logps/rejected": -1.3671875, | |
| "loss": 1.2944, | |
| "nll_loss": 1.2890625, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.1279296875, | |
| "rewards/margins": 0.00860595703125, | |
| "rewards/rejected": -0.13671875, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 2.8683189618015126, | |
| "learning_rate": 4.962548318462231e-07, | |
| "log_odds_chosen": 0.19755859673023224, | |
| "log_odds_ratio": -0.6724609136581421, | |
| "logits/chosen": -2.296875, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -1.171875, | |
| "logps/rejected": -1.3046875, | |
| "loss": 1.2778, | |
| "nll_loss": 1.2265625, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.11669921875, | |
| "rewards/margins": 0.01373291015625, | |
| "rewards/rejected": -0.130859375, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 2.9807597681868305, | |
| "learning_rate": 4.95638573526659e-07, | |
| "log_odds_chosen": 0.14707031846046448, | |
| "log_odds_ratio": -0.7005859613418579, | |
| "logits/chosen": -2.28125, | |
| "logits/rejected": -2.453125, | |
| "logps/chosen": -1.171875, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.2844, | |
| "nll_loss": 1.203125, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.11767578125, | |
| "rewards/margins": 0.0098876953125, | |
| "rewards/rejected": -0.126953125, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.655124275329291, | |
| "learning_rate": 4.949758425755127e-07, | |
| "log_odds_chosen": 0.10791015625, | |
| "log_odds_ratio": -0.7230468988418579, | |
| "logits/chosen": -2.1875, | |
| "logits/rejected": -2.28125, | |
| "logps/chosen": -1.203125, | |
| "logps/rejected": -1.28125, | |
| "loss": 1.2902, | |
| "nll_loss": 1.1875, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.1201171875, | |
| "rewards/margins": 0.00762939453125, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.4371959032830293, | |
| "learning_rate": 4.94266764375581e-07, | |
| "log_odds_chosen": 0.20887450873851776, | |
| "log_odds_ratio": -0.6651366949081421, | |
| "logits/chosen": -2.21875, | |
| "logits/rejected": -2.34375, | |
| "logps/chosen": -1.1328125, | |
| "logps/rejected": -1.2890625, | |
| "loss": 1.2674, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.11376953125, | |
| "rewards/margins": 0.01544189453125, | |
| "rewards/rejected": -0.12890625, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 2.729848906556158, | |
| "learning_rate": 4.935114730781475e-07, | |
| "log_odds_chosen": 0.27691650390625, | |
| "log_odds_ratio": -0.6527343988418579, | |
| "logits/chosen": -2.140625, | |
| "logits/rejected": -2.296875, | |
| "logps/chosen": -1.1953125, | |
| "logps/rejected": -1.3828125, | |
| "loss": 1.2544, | |
| "nll_loss": 1.1875, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.11962890625, | |
| "rewards/margins": 0.018798828125, | |
| "rewards/rejected": -0.138671875, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 2.50393555238819, | |
| "learning_rate": 4.927101115776026e-07, | |
| "log_odds_chosen": 0.14921875298023224, | |
| "log_odds_ratio": -0.702343761920929, | |
| "logits/chosen": -2.28125, | |
| "logits/rejected": -2.34375, | |
| "logps/chosen": -1.203125, | |
| "logps/rejected": -1.3203125, | |
| "loss": 1.2471, | |
| "nll_loss": 1.2265625, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.12060546875, | |
| "rewards/margins": 0.01129150390625, | |
| "rewards/rejected": -0.1318359375, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 2.7483117165130744, | |
| "learning_rate": 4.918628314844088e-07, | |
| "log_odds_chosen": 0.04735717922449112, | |
| "log_odds_ratio": -0.749218761920929, | |
| "logits/chosen": -2.265625, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -1.2109375, | |
| "logps/rejected": -1.25, | |
| "loss": 1.2351, | |
| "nll_loss": 1.21875, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.12060546875, | |
| "rewards/margins": 0.0040283203125, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 2.9881422727710887, | |
| "learning_rate": 4.909697930964179e-07, | |
| "log_odds_chosen": 0.16976317763328552, | |
| "log_odds_ratio": -0.6986328363418579, | |
| "logits/chosen": -2.25, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -1.1953125, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.2467, | |
| "nll_loss": 1.2421875, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.11962890625, | |
| "rewards/margins": 0.01153564453125, | |
| "rewards/rejected": -0.130859375, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 2.6175879243996363, | |
| "learning_rate": 4.900311653685437e-07, | |
| "log_odds_chosen": 0.20297852158546448, | |
| "log_odds_ratio": -0.669140636920929, | |
| "logits/chosen": -2.296875, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -1.1640625, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.2102, | |
| "nll_loss": 1.1953125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.1162109375, | |
| "rewards/margins": 0.01513671875, | |
| "rewards/rejected": -0.1318359375, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 2.2300783745527317, | |
| "learning_rate": 4.890471258807968e-07, | |
| "log_odds_chosen": 0.19609375298023224, | |
| "log_odds_ratio": -0.6773437261581421, | |
| "logits/chosen": -2.1875, | |
| "logits/rejected": -2.265625, | |
| "logps/chosen": -1.1875, | |
| "logps/rejected": -1.296875, | |
| "loss": 1.2387, | |
| "nll_loss": 1.203125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.11865234375, | |
| "rewards/margins": 0.01116943359375, | |
| "rewards/rejected": -0.1298828125, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 2.9471805049826094, | |
| "learning_rate": 4.880178608046894e-07, | |
| "log_odds_chosen": 0.14970703423023224, | |
| "log_odds_ratio": -0.7040039300918579, | |
| "logits/chosen": -2.171875, | |
| "logits/rejected": -2.203125, | |
| "logps/chosen": -1.140625, | |
| "logps/rejected": -1.25, | |
| "loss": 1.2675, | |
| "nll_loss": 1.2109375, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.1142578125, | |
| "rewards/margins": 0.0108642578125, | |
| "rewards/rejected": -0.125, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 2.7162956655728623, | |
| "learning_rate": 4.869435648680116e-07, | |
| "log_odds_chosen": 0.15129394829273224, | |
| "log_odds_ratio": -0.6802734136581421, | |
| "logits/chosen": -2.1875, | |
| "logits/rejected": -2.28125, | |
| "logps/chosen": -1.1171875, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.1889, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.11181640625, | |
| "rewards/margins": 0.01043701171875, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 4.459550453771863, | |
| "learning_rate": 4.858244413179923e-07, | |
| "log_odds_chosen": 0.2993102967739105, | |
| "log_odds_ratio": -0.64111328125, | |
| "logits/chosen": -2.171875, | |
| "logits/rejected": -2.25, | |
| "logps/chosen": -1.1171875, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.2095, | |
| "nll_loss": 1.1640625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.111328125, | |
| "rewards/margins": 0.0198974609375, | |
| "rewards/rejected": -0.1318359375, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 3.2619422051015836, | |
| "learning_rate": 4.846607018828449e-07, | |
| "log_odds_chosen": 0.210205078125, | |
| "log_odds_ratio": -0.673632800579071, | |
| "logits/chosen": -2.21875, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -1.1875, | |
| "logps/rejected": -1.3203125, | |
| "loss": 1.2653, | |
| "nll_loss": 1.21875, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.119140625, | |
| "rewards/margins": 0.013427734375, | |
| "rewards/rejected": -0.1328125, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 2.7715133106941576, | |
| "learning_rate": 4.834525667317121e-07, | |
| "log_odds_chosen": 0.22309570014476776, | |
| "log_odds_ratio": -0.66357421875, | |
| "logits/chosen": -2.0625, | |
| "logits/rejected": -2.234375, | |
| "logps/chosen": -1.15625, | |
| "logps/rejected": -1.3046875, | |
| "loss": 1.2614, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.115234375, | |
| "rewards/margins": 0.0150146484375, | |
| "rewards/rejected": -0.1298828125, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 2.4609772540284593, | |
| "learning_rate": 4.822002644330101e-07, | |
| "log_odds_chosen": 0.19017334282398224, | |
| "log_odds_ratio": -0.711718738079071, | |
| "logits/chosen": -2.171875, | |
| "logits/rejected": -2.296875, | |
| "logps/chosen": -1.21875, | |
| "logps/rejected": -1.3515625, | |
| "loss": 1.2548, | |
| "nll_loss": 1.2421875, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.12158203125, | |
| "rewards/margins": 0.0137939453125, | |
| "rewards/rejected": -0.1357421875, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 2.645873831673924, | |
| "learning_rate": 4.809040319111865e-07, | |
| "log_odds_chosen": 0.12646484375, | |
| "log_odds_ratio": -0.713671863079071, | |
| "logits/chosen": -2.09375, | |
| "logits/rejected": -2.25, | |
| "logps/chosen": -1.1484375, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.2402, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.11474609375, | |
| "rewards/margins": 0.0096435546875, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 2.45398616162251, | |
| "learning_rate": 4.795641144018965e-07, | |
| "log_odds_chosen": 0.09213867038488388, | |
| "log_odds_ratio": -0.74267578125, | |
| "logits/chosen": -2.203125, | |
| "logits/rejected": -2.203125, | |
| "logps/chosen": -1.2109375, | |
| "logps/rejected": -1.28125, | |
| "loss": 1.2755, | |
| "nll_loss": 1.25, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.12109375, | |
| "rewards/margins": 0.006805419921875, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 2.5512768402172683, | |
| "learning_rate": 4.781807654056053e-07, | |
| "log_odds_chosen": 0.214599609375, | |
| "log_odds_ratio": -0.692675769329071, | |
| "logits/chosen": -2.109375, | |
| "logits/rejected": -2.21875, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.2890625, | |
| "loss": 1.2303, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.11279296875, | |
| "rewards/margins": 0.0159912109375, | |
| "rewards/rejected": -0.12890625, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 2.411346604585139, | |
| "learning_rate": 4.7675424663962933e-07, | |
| "log_odds_chosen": 0.165435791015625, | |
| "log_odds_ratio": -0.700390636920929, | |
| "logits/chosen": -2.140625, | |
| "logits/rejected": -2.21875, | |
| "logps/chosen": -1.2109375, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.2571, | |
| "nll_loss": 1.234375, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.12109375, | |
| "rewards/margins": 0.010498046875, | |
| "rewards/rejected": -0.1318359375, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 2.48077566767054, | |
| "learning_rate": 4.752848279886212e-07, | |
| "log_odds_chosen": 0.207489013671875, | |
| "log_odds_ratio": -0.66748046875, | |
| "logits/chosen": -2.0625, | |
| "logits/rejected": -2.234375, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.1978, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.10986328125, | |
| "rewards/margins": 0.0145263671875, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 2.7897141919738786, | |
| "learning_rate": 4.7377278745350984e-07, | |
| "log_odds_chosen": 0.2662353515625, | |
| "log_odds_ratio": -0.663867175579071, | |
| "logits/chosen": -2.03125, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.1875, | |
| "loss": 1.2309, | |
| "nll_loss": 1.0859375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.1044921875, | |
| "rewards/margins": 0.0147705078125, | |
| "rewards/rejected": -0.119140625, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 2.758617809500896, | |
| "learning_rate": 4.7221841109890506e-07, | |
| "log_odds_chosen": 0.22445067763328552, | |
| "log_odds_ratio": -0.695117175579071, | |
| "logits/chosen": -2.0625, | |
| "logits/rejected": -2.140625, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.2281, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.10986328125, | |
| "rewards/margins": 0.0159912109375, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 2.6514784587831204, | |
| "learning_rate": 4.706219929989771e-07, | |
| "log_odds_chosen": 0.2147216796875, | |
| "log_odds_ratio": -0.660937488079071, | |
| "logits/chosen": -2.078125, | |
| "logits/rejected": -2.203125, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.2094, | |
| "nll_loss": 1.0859375, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.10986328125, | |
| "rewards/margins": 0.0140380859375, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 2.630207221232529, | |
| "learning_rate": 4.6898383518182007e-07, | |
| "log_odds_chosen": 0.19202271103858948, | |
| "log_odds_ratio": -0.6786133050918579, | |
| "logits/chosen": -2.046875, | |
| "logits/rejected": -2.125, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.2307, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.1083984375, | |
| "rewards/margins": 0.014404296875, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 2.9225816829730427, | |
| "learning_rate": 4.67304247572311e-07, | |
| "log_odds_chosen": 0.2799316346645355, | |
| "log_odds_ratio": -0.659960925579071, | |
| "logits/chosen": -2.09375, | |
| "logits/rejected": -2.1875, | |
| "logps/chosen": -1.1171875, | |
| "logps/rejected": -1.3046875, | |
| "loss": 1.2257, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.11181640625, | |
| "rewards/margins": 0.018310546875, | |
| "rewards/rejected": -0.130859375, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 2.331606515139032, | |
| "learning_rate": 4.65583547933475e-07, | |
| "log_odds_chosen": 0.16041259467601776, | |
| "log_odds_ratio": -0.713085949420929, | |
| "logits/chosen": -2.0, | |
| "logits/rejected": -2.140625, | |
| "logps/chosen": -1.1171875, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.21, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.11181640625, | |
| "rewards/margins": 0.01031494140625, | |
| "rewards/rejected": -0.1220703125, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 2.541498557632385, | |
| "learning_rate": 4.6382206180636705e-07, | |
| "log_odds_chosen": 0.12631836533546448, | |
| "log_odds_ratio": -0.7256835699081421, | |
| "logits/chosen": -2.0625, | |
| "logits/rejected": -2.1875, | |
| "logps/chosen": -1.140625, | |
| "logps/rejected": -1.25, | |
| "loss": 1.2675, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.1142578125, | |
| "rewards/margins": 0.01019287109375, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 3.2675127960880586, | |
| "learning_rate": 4.620201224484827e-07, | |
| "log_odds_chosen": 0.2113037109375, | |
| "log_odds_ratio": -0.658496081829071, | |
| "logits/chosen": -2.0, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.209, | |
| "nll_loss": 1.0703125, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.01470947265625, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 2.5735852092457248, | |
| "learning_rate": 4.601780707707087e-07, | |
| "log_odds_chosen": 0.25184327363967896, | |
| "log_odds_ratio": -0.679394543170929, | |
| "logits/chosen": -2.015625, | |
| "logits/rejected": -2.109375, | |
| "logps/chosen": -1.109375, | |
| "logps/rejected": -1.28125, | |
| "loss": 1.1888, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.11083984375, | |
| "rewards/margins": 0.017333984375, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 2.678233631526468, | |
| "learning_rate": 4.5829625527282554e-07, | |
| "log_odds_chosen": 0.15609130263328552, | |
| "log_odds_ratio": -0.700488269329071, | |
| "logits/chosen": -2.03125, | |
| "logits/rejected": -2.125, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.2431, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.11279296875, | |
| "rewards/margins": 0.01080322265625, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 2.423777152319806, | |
| "learning_rate": 4.5637503197757474e-07, | |
| "log_odds_chosen": 0.089111328125, | |
| "log_odds_ratio": -0.746874988079071, | |
| "logits/chosen": -1.890625, | |
| "logits/rejected": -2.015625, | |
| "logps/chosen": -1.140625, | |
| "logps/rejected": -1.2109375, | |
| "loss": 1.1964, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.11376953125, | |
| "rewards/margins": 0.007568359375, | |
| "rewards/rejected": -0.12158203125, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 3.0765189053391633, | |
| "learning_rate": 4.5441476436330204e-07, | |
| "log_odds_chosen": 0.27679443359375, | |
| "log_odds_ratio": -0.677929699420929, | |
| "logits/chosen": -2.078125, | |
| "logits/rejected": -2.21875, | |
| "logps/chosen": -1.109375, | |
| "logps/rejected": -1.296875, | |
| "loss": 1.2492, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.111328125, | |
| "rewards/margins": 0.018310546875, | |
| "rewards/rejected": -0.1298828125, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 2.6130205345904334, | |
| "learning_rate": 4.5241582329519105e-07, | |
| "log_odds_chosen": 0.150299072265625, | |
| "log_odds_ratio": -0.7164062261581421, | |
| "logits/chosen": -1.984375, | |
| "logits/rejected": -2.109375, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.2128, | |
| "nll_loss": 1.1640625, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.11279296875, | |
| "rewards/margins": 0.0115966796875, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 2.710305930916119, | |
| "learning_rate": 4.503785869550984e-07, | |
| "log_odds_chosen": 0.17982177436351776, | |
| "log_odds_ratio": -0.708300769329071, | |
| "logits/chosen": -2.046875, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -1.1796875, | |
| "logps/rejected": -1.3203125, | |
| "loss": 1.2557, | |
| "nll_loss": 1.2109375, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.1181640625, | |
| "rewards/margins": 0.01446533203125, | |
| "rewards/rejected": -0.1328125, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 2.784125203819912, | |
| "learning_rate": 4.4830344077000535e-07, | |
| "log_odds_chosen": 0.17173461616039276, | |
| "log_odds_ratio": -0.70556640625, | |
| "logits/chosen": -1.921875, | |
| "logits/rejected": -2.109375, | |
| "logps/chosen": -1.1328125, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.2264, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.11376953125, | |
| "rewards/margins": 0.012451171875, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 2.969932216303278, | |
| "learning_rate": 4.461907773390984e-07, | |
| "log_odds_chosen": 0.24876098334789276, | |
| "log_odds_ratio": -0.671191394329071, | |
| "logits/chosen": -2.0625, | |
| "logits/rejected": -2.171875, | |
| "logps/chosen": -1.1171875, | |
| "logps/rejected": -1.2890625, | |
| "loss": 1.2521, | |
| "nll_loss": 1.203125, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.11181640625, | |
| "rewards/margins": 0.0169677734375, | |
| "rewards/rejected": -0.12890625, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 2.3389098001594553, | |
| "learning_rate": 4.4404099635949297e-07, | |
| "log_odds_chosen": 0.20144042372703552, | |
| "log_odds_ratio": -0.679394543170929, | |
| "logits/chosen": -1.984375, | |
| "logits/rejected": -2.0625, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.1809, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.012939453125, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 3.0086699300119872, | |
| "learning_rate": 4.418545045506144e-07, | |
| "log_odds_chosen": 0.10061035305261612, | |
| "log_odds_ratio": -0.7476562261581421, | |
| "logits/chosen": -2.0625, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.140625, | |
| "loss": 1.239, | |
| "nll_loss": 1.1015625, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.10693359375, | |
| "rewards/margins": 0.007415771484375, | |
| "rewards/rejected": -0.1142578125, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 2.2320233236738143, | |
| "learning_rate": 4.3963171557725004e-07, | |
| "log_odds_chosen": 0.0516357421875, | |
| "log_odds_ratio": -0.776171863079071, | |
| "logits/chosen": -2.015625, | |
| "logits/rejected": -2.109375, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.1796875, | |
| "loss": 1.2253, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.1123046875, | |
| "rewards/margins": 0.00567626953125, | |
| "rewards/rejected": -0.1181640625, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 2.5006901009181877, | |
| "learning_rate": 4.3737304997128765e-07, | |
| "log_odds_chosen": 0.2787841856479645, | |
| "log_odds_ratio": -0.668652355670929, | |
| "logits/chosen": -1.96875, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.2413, | |
| "nll_loss": 1.203125, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.11279296875, | |
| "rewards/margins": 0.0184326171875, | |
| "rewards/rejected": -0.130859375, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 2.7387563755483, | |
| "learning_rate": 4.350789350521548e-07, | |
| "log_odds_chosen": 0.19570311903953552, | |
| "log_odds_ratio": -0.71337890625, | |
| "logits/chosen": -1.9921875, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.2163, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.014892578125, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 2.898794879634056, | |
| "learning_rate": 4.32749804845973e-07, | |
| "log_odds_chosen": 0.16457518935203552, | |
| "log_odds_ratio": -0.7186523675918579, | |
| "logits/chosen": -2.03125, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -1.1484375, | |
| "logps/rejected": -1.25, | |
| "loss": 1.2604, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.115234375, | |
| "rewards/margins": 0.0098876953125, | |
| "rewards/rejected": -0.125, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 2.5762145094524973, | |
| "learning_rate": 4.303861000034449e-07, | |
| "log_odds_chosen": 0.17528076469898224, | |
| "log_odds_ratio": -0.6820312738418579, | |
| "logits/chosen": -1.953125, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.1875, | |
| "loss": 1.1942, | |
| "nll_loss": 1.0625, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.01123046875, | |
| "rewards/rejected": -0.1181640625, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 2.865127283376686, | |
| "learning_rate": 4.2798826771648635e-07, | |
| "log_odds_chosen": 0.24028930068016052, | |
| "log_odds_ratio": -0.6766601800918579, | |
| "logits/chosen": -1.9375, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.296875, | |
| "loss": 1.2299, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.1123046875, | |
| "rewards/margins": 0.017333984375, | |
| "rewards/rejected": -0.1298828125, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 2.446066437808379, | |
| "learning_rate": 4.2555676163362205e-07, | |
| "log_odds_chosen": 0.21907348930835724, | |
| "log_odds_ratio": -0.6822265386581421, | |
| "logits/chosen": -2.015625, | |
| "logits/rejected": -2.140625, | |
| "logps/chosen": -1.1640625, | |
| "logps/rejected": -1.296875, | |
| "loss": 1.2354, | |
| "nll_loss": 1.21875, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.1162109375, | |
| "rewards/margins": 0.0133056640625, | |
| "rewards/rejected": -0.12890625, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 2.372183587847385, | |
| "learning_rate": 4.230920417741589e-07, | |
| "log_odds_chosen": 0.27910155057907104, | |
| "log_odds_ratio": -0.666308581829071, | |
| "logits/chosen": -2.0625, | |
| "logits/rejected": -2.203125, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.2168, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.10693359375, | |
| "rewards/margins": 0.020263671875, | |
| "rewards/rejected": -0.126953125, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 2.9436571486068623, | |
| "learning_rate": 4.205945744411551e-07, | |
| "log_odds_chosen": 0.12534180283546448, | |
| "log_odds_ratio": -0.73974609375, | |
| "logits/chosen": -2.046875, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.1640625, | |
| "loss": 1.2815, | |
| "nll_loss": 1.203125, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.00909423828125, | |
| "rewards/rejected": -0.11669921875, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 2.8833427572143133, | |
| "learning_rate": 4.1806483213319877e-07, | |
| "log_odds_chosen": 0.23845215141773224, | |
| "log_odds_ratio": -0.6595703363418579, | |
| "logits/chosen": -1.859375, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.265625, | |
| "loss": 1.2544, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.1103515625, | |
| "rewards/margins": 0.0167236328125, | |
| "rewards/rejected": -0.126953125, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 2.715581746962796, | |
| "learning_rate": 4.155032934550165e-07, | |
| "log_odds_chosen": 0.16794434189796448, | |
| "log_odds_ratio": -0.6874023675918579, | |
| "logits/chosen": -1.9296875, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.1640625, | |
| "loss": 1.1984, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.00909423828125, | |
| "rewards/rejected": -0.11669921875, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 2.784822109898019, | |
| "learning_rate": 4.129104430269248e-07, | |
| "log_odds_chosen": 0.15845946967601776, | |
| "log_odds_ratio": -0.693359375, | |
| "logits/chosen": -1.9453125, | |
| "logits/rejected": -2.09375, | |
| "logps/chosen": -1.109375, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.2345, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.11083984375, | |
| "rewards/margins": 0.0106201171875, | |
| "rewards/rejected": -0.12158203125, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 2.5161836223908263, | |
| "learning_rate": 4.102867713931448e-07, | |
| "log_odds_chosen": 0.16597899794578552, | |
| "log_odds_ratio": -0.6788085699081421, | |
| "logits/chosen": -1.9765625, | |
| "logits/rejected": -2.125, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.1953125, | |
| "loss": 1.2663, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.10986328125, | |
| "rewards/margins": 0.0096435546875, | |
| "rewards/rejected": -0.119140625, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 2.644860041118969, | |
| "learning_rate": 4.0763277492899504e-07, | |
| "log_odds_chosen": 0.23768310248851776, | |
| "log_odds_ratio": -0.6807616949081421, | |
| "logits/chosen": -1.984375, | |
| "logits/rejected": -2.109375, | |
| "logps/chosen": -1.1328125, | |
| "logps/rejected": -1.2890625, | |
| "loss": 1.2307, | |
| "nll_loss": 1.1953125, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.11328125, | |
| "rewards/margins": 0.0157470703125, | |
| "rewards/rejected": -0.12890625, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 2.7157553266494503, | |
| "learning_rate": 4.049489557469824e-07, | |
| "log_odds_chosen": 0.15152588486671448, | |
| "log_odds_ratio": -0.7015625238418579, | |
| "logits/chosen": -1.7890625, | |
| "logits/rejected": -1.96875, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.1640625, | |
| "loss": 1.168, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.10595703125, | |
| "rewards/margins": 0.01025390625, | |
| "rewards/rejected": -0.1162109375, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 2.302289432995534, | |
| "learning_rate": 4.0223582160180623e-07, | |
| "log_odds_chosen": 0.13297119736671448, | |
| "log_odds_ratio": -0.725781261920929, | |
| "logits/chosen": -1.9375, | |
| "logits/rejected": -2.0, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.1737, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.1123046875, | |
| "rewards/margins": 0.00994873046875, | |
| "rewards/rejected": -0.1220703125, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 2.6556730641084543, | |
| "learning_rate": 3.9949388579429614e-07, | |
| "log_odds_chosen": 0.00870361365377903, | |
| "log_odds_ratio": -0.7632812261581421, | |
| "logits/chosen": -1.8828125, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.1015625, | |
| "loss": 1.2113, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.10986328125, | |
| "rewards/margins": -0.0003070831298828125, | |
| "rewards/rejected": -0.10986328125, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 2.3947626659116406, | |
| "learning_rate": 3.967236670742998e-07, | |
| "log_odds_chosen": 0.22456054389476776, | |
| "log_odds_ratio": -0.681347668170929, | |
| "logits/chosen": -2.0, | |
| "logits/rejected": -2.109375, | |
| "logps/chosen": -1.1796875, | |
| "logps/rejected": -1.3515625, | |
| "loss": 1.2084, | |
| "nll_loss": 1.203125, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.11767578125, | |
| "rewards/margins": 0.0174560546875, | |
| "rewards/rejected": -0.134765625, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 2.7830252945871896, | |
| "learning_rate": 3.9392568954254023e-07, | |
| "log_odds_chosen": 0.2349853515625, | |
| "log_odds_ratio": -0.6742187738418579, | |
| "logits/chosen": -1.90625, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -1.140625, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.2185, | |
| "nll_loss": 1.1640625, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.11376953125, | |
| "rewards/margins": 0.01708984375, | |
| "rewards/rejected": -0.130859375, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 2.4217959208998723, | |
| "learning_rate": 3.9110048255146043e-07, | |
| "log_odds_chosen": 0.16409912705421448, | |
| "log_odds_ratio": -0.693164050579071, | |
| "logits/chosen": -2.046875, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -1.109375, | |
| "logps/rejected": -1.2109375, | |
| "loss": 1.2102, | |
| "nll_loss": 1.1953125, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.1103515625, | |
| "rewards/margins": 0.0107421875, | |
| "rewards/rejected": -0.12158203125, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 2.7469080448706706, | |
| "learning_rate": 3.882485806050748e-07, | |
| "log_odds_chosen": 0.31447142362594604, | |
| "log_odds_ratio": -0.639355480670929, | |
| "logits/chosen": -1.9609375, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.2235, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.10595703125, | |
| "rewards/margins": 0.0194091796875, | |
| "rewards/rejected": -0.125, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 2.596832510754079, | |
| "learning_rate": 3.8537052325784573e-07, | |
| "log_odds_chosen": 0.2929016053676605, | |
| "log_odds_ratio": -0.650585949420929, | |
| "logits/chosen": -1.96875, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.1857, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.1103515625, | |
| "rewards/margins": 0.0205078125, | |
| "rewards/rejected": -0.130859375, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 2.7575190212441383, | |
| "learning_rate": 3.824668550126046e-07, | |
| "log_odds_chosen": 0.19545897841453552, | |
| "log_odds_ratio": -0.6885741949081421, | |
| "logits/chosen": -1.96875, | |
| "logits/rejected": -2.0625, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.1953125, | |
| "loss": 1.1889, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.10693359375, | |
| "rewards/margins": 0.0123291015625, | |
| "rewards/rejected": -0.119140625, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 3.592994219979355, | |
| "learning_rate": 3.7953812521753643e-07, | |
| "log_odds_chosen": 0.16755370795726776, | |
| "log_odds_ratio": -0.6943359375, | |
| "logits/chosen": -1.921875, | |
| "logits/rejected": -2.0, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.171875, | |
| "loss": 1.1494, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.0098876953125, | |
| "rewards/rejected": -0.1171875, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.5202564213089405, | |
| "learning_rate": 3.7658488796224885e-07, | |
| "log_odds_chosen": 0.11643066257238388, | |
| "log_odds_ratio": -0.716796875, | |
| "logits/chosen": -2.046875, | |
| "logits/rejected": -2.125, | |
| "logps/chosen": -1.1484375, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.2212, | |
| "nll_loss": 1.203125, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.11474609375, | |
| "rewards/margins": 0.00860595703125, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.724799909308137, | |
| "learning_rate": 3.736077019729425e-07, | |
| "log_odds_chosen": 0.302978515625, | |
| "log_odds_ratio": -0.6385742425918579, | |
| "logits/chosen": -1.9296875, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -1.0390625, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.1893, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.10400390625, | |
| "rewards/margins": 0.0205078125, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 2.4835614341515053, | |
| "learning_rate": 3.7060713050670546e-07, | |
| "log_odds_chosen": 0.2666015625, | |
| "log_odds_ratio": -0.6908203363418579, | |
| "logits/chosen": -1.875, | |
| "logits/rejected": -2.015625, | |
| "logps/chosen": -1.1171875, | |
| "logps/rejected": -1.328125, | |
| "loss": 1.2376, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.1123046875, | |
| "rewards/margins": 0.020751953125, | |
| "rewards/rejected": -0.1328125, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 2.709722079150454, | |
| "learning_rate": 3.6758374124494973e-07, | |
| "log_odds_chosen": 0.185791015625, | |
| "log_odds_ratio": -0.6966797113418579, | |
| "logits/chosen": -1.8984375, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.2109375, | |
| "loss": 1.2082, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.01458740234375, | |
| "rewards/rejected": -0.12109375, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 2.8331342756102167, | |
| "learning_rate": 3.645381061860113e-07, | |
| "log_odds_chosen": 0.3631835877895355, | |
| "log_odds_ratio": -0.6460937261581421, | |
| "logits/chosen": -1.921875, | |
| "logits/rejected": -1.9765625, | |
| "logps/chosen": -1.0078125, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.1933, | |
| "nll_loss": 1.1015625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.1005859375, | |
| "rewards/margins": 0.025146484375, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 2.6269186805524143, | |
| "learning_rate": 3.61470801536933e-07, | |
| "log_odds_chosen": 0.12788085639476776, | |
| "log_odds_ratio": -0.7337890863418579, | |
| "logits/chosen": -2.0, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -1.0390625, | |
| "logps/rejected": -1.1328125, | |
| "loss": 1.2153, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.10400390625, | |
| "rewards/margins": 0.00909423828125, | |
| "rewards/rejected": -0.11279296875, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 2.60712425422802, | |
| "learning_rate": 3.583824076044508e-07, | |
| "log_odds_chosen": 0.08272705227136612, | |
| "log_odds_ratio": -0.7518554925918579, | |
| "logits/chosen": -1.890625, | |
| "logits/rejected": -1.984375, | |
| "logps/chosen": -1.140625, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.2114, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.1142578125, | |
| "rewards/margins": 0.0078125, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 2.742344457324174, | |
| "learning_rate": 3.55273508685206e-07, | |
| "log_odds_chosen": 0.11997070163488388, | |
| "log_odds_ratio": -0.7144531011581421, | |
| "logits/chosen": -1.8828125, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -1.140625, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.2194, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.11376953125, | |
| "rewards/margins": 0.0084228515625, | |
| "rewards/rejected": -0.1220703125, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 2.677923442608537, | |
| "learning_rate": 3.5214469295520033e-07, | |
| "log_odds_chosen": 0.2944091856479645, | |
| "log_odds_ratio": -0.6474609375, | |
| "logits/chosen": -1.953125, | |
| "logits/rejected": -2.0625, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.296875, | |
| "loss": 1.1926, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.0206298828125, | |
| "rewards/rejected": -0.1298828125, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 2.614103984779814, | |
| "learning_rate": 3.4899655235851903e-07, | |
| "log_odds_chosen": 0.15128174424171448, | |
| "log_odds_ratio": -0.692187488079071, | |
| "logits/chosen": -1.875, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -1.140625, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.2353, | |
| "nll_loss": 1.2265625, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.11376953125, | |
| "rewards/margins": 0.01055908203125, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 2.661524044558228, | |
| "learning_rate": 3.458296824953403e-07, | |
| "log_odds_chosen": 0.19251708686351776, | |
| "log_odds_ratio": -0.681445300579071, | |
| "logits/chosen": -1.8125, | |
| "logits/rejected": -1.9609375, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.2002, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.1123046875, | |
| "rewards/margins": 0.01312255859375, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 2.479788982713935, | |
| "learning_rate": 3.426446825092525e-07, | |
| "log_odds_chosen": 0.30213624238967896, | |
| "log_odds_ratio": -0.6465820074081421, | |
| "logits/chosen": -1.875, | |
| "logits/rejected": -1.9765625, | |
| "logps/chosen": -1.0546875, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.2165, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.10546875, | |
| "rewards/margins": 0.021728515625, | |
| "rewards/rejected": -0.126953125, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 3.266264486839817, | |
| "learning_rate": 3.3944215497390197e-07, | |
| "log_odds_chosen": 0.12014160305261612, | |
| "log_odds_ratio": -0.707812488079071, | |
| "logits/chosen": -1.875, | |
| "logits/rejected": -1.9375, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.1875, | |
| "loss": 1.2284, | |
| "nll_loss": 1.1640625, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.10986328125, | |
| "rewards/margins": 0.00848388671875, | |
| "rewards/rejected": -0.1181640625, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 2.397641031210895, | |
| "learning_rate": 3.362227057789915e-07, | |
| "log_odds_chosen": 0.3463378846645355, | |
| "log_odds_ratio": -0.619335949420929, | |
| "logits/chosen": -1.890625, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.2890625, | |
| "loss": 1.1821, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.10693359375, | |
| "rewards/margins": 0.0220947265625, | |
| "rewards/rejected": -0.12890625, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 2.4645155740078617, | |
| "learning_rate": 3.329869440156512e-07, | |
| "log_odds_chosen": 0.357086181640625, | |
| "log_odds_ratio": -0.63232421875, | |
| "logits/chosen": -1.890625, | |
| "logits/rejected": -1.9609375, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.176, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.024658203125, | |
| "rewards/rejected": -0.130859375, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 2.8912394279639084, | |
| "learning_rate": 3.297354818612037e-07, | |
| "log_odds_chosen": 0.05325927585363388, | |
| "log_odds_ratio": -0.7728515863418579, | |
| "logits/chosen": -1.9453125, | |
| "logits/rejected": -2.015625, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.125, | |
| "loss": 1.2402, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.10986328125, | |
| "rewards/margins": 0.0030975341796875, | |
| "rewards/rejected": -0.11279296875, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 2.563419103608563, | |
| "learning_rate": 3.264689344633461e-07, | |
| "log_odds_chosen": 0.14066162705421448, | |
| "log_odds_ratio": -0.6943359375, | |
| "logits/chosen": -1.8125, | |
| "logits/rejected": -1.859375, | |
| "logps/chosen": -1.140625, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.1959, | |
| "nll_loss": 1.1953125, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.1142578125, | |
| "rewards/margins": 0.0084228515625, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 2.8288693775232643, | |
| "learning_rate": 3.2318791982376923e-07, | |
| "log_odds_chosen": 0.19826659560203552, | |
| "log_odds_ratio": -0.6885741949081421, | |
| "logits/chosen": -1.9375, | |
| "logits/rejected": -2.09375, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.2221, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.01275634765625, | |
| "rewards/rejected": -0.12158203125, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 2.9337235954606844, | |
| "learning_rate": 3.198930586812372e-07, | |
| "log_odds_chosen": 0.3016296327114105, | |
| "log_odds_ratio": -0.67626953125, | |
| "logits/chosen": -1.8671875, | |
| "logits/rejected": -2.0, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.3046875, | |
| "loss": 1.1805, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.1083984375, | |
| "rewards/margins": 0.02197265625, | |
| "rewards/rejected": -0.1298828125, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 2.815544385281363, | |
| "learning_rate": 3.1658497439414935e-07, | |
| "log_odds_chosen": 0.18316039443016052, | |
| "log_odds_ratio": -0.6927734613418579, | |
| "logits/chosen": -1.96875, | |
| "logits/rejected": -2.015625, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.203125, | |
| "loss": 1.2118, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.1083984375, | |
| "rewards/margins": 0.01153564453125, | |
| "rewards/rejected": -0.1201171875, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 2.806645073099231, | |
| "learning_rate": 3.132642928226061e-07, | |
| "log_odds_chosen": 0.33399659395217896, | |
| "log_odds_ratio": -0.6348632574081421, | |
| "logits/chosen": -1.8828125, | |
| "logits/rejected": -1.9921875, | |
| "logps/chosen": -1.0546875, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.1911, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.10498046875, | |
| "rewards/margins": 0.0223388671875, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 2.4581782453300884, | |
| "learning_rate": 3.0993164221000207e-07, | |
| "log_odds_chosen": 0.215545654296875, | |
| "log_odds_ratio": -0.672070324420929, | |
| "logits/chosen": -1.9140625, | |
| "logits/rejected": -2.0, | |
| "logps/chosen": -1.109375, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.182, | |
| "nll_loss": 1.1640625, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.11083984375, | |
| "rewards/margins": 0.0145263671875, | |
| "rewards/rejected": -0.125, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 5.263613381972474, | |
| "learning_rate": 3.0658765306416794e-07, | |
| "log_odds_chosen": 0.166778564453125, | |
| "log_odds_ratio": -0.6953125, | |
| "logits/chosen": -1.8359375, | |
| "logits/rejected": -1.921875, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.2109375, | |
| "loss": 1.2193, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.0118408203125, | |
| "rewards/rejected": -0.12109375, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 2.5240280415155723, | |
| "learning_rate": 3.032329580380838e-07, | |
| "log_odds_chosen": 0.28306883573532104, | |
| "log_odds_ratio": -0.6612304449081421, | |
| "logits/chosen": -1.90625, | |
| "logits/rejected": -1.96875, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.28125, | |
| "loss": 1.1956, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.01904296875, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 2.743773542575128, | |
| "learning_rate": 2.998681918101871e-07, | |
| "log_odds_chosen": 0.3384033143520355, | |
| "log_odds_ratio": -0.6493164300918579, | |
| "logits/chosen": -1.8828125, | |
| "logits/rejected": -1.9453125, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.328125, | |
| "loss": 1.206, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.025390625, | |
| "rewards/rejected": -0.1328125, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 2.8074211611598066, | |
| "learning_rate": 2.9649399096429714e-07, | |
| "log_odds_chosen": 0.23601074516773224, | |
| "log_odds_ratio": -0.6533203125, | |
| "logits/chosen": -1.859375, | |
| "logits/rejected": -1.9140625, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.188, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.015625, | |
| "rewards/rejected": -0.123046875, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.525044784627154, | |
| "learning_rate": 2.931109938691786e-07, | |
| "log_odds_chosen": 0.16881103813648224, | |
| "log_odds_ratio": -0.684277355670929, | |
| "logits/chosen": -1.8515625, | |
| "logits/rejected": -2.0, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.188, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.0128173828125, | |
| "rewards/rejected": -0.1220703125, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 2.668512392567912, | |
| "learning_rate": 2.8971984055776853e-07, | |
| "log_odds_chosen": 0.21584472060203552, | |
| "log_odds_ratio": -0.672558605670929, | |
| "logits/chosen": -1.84375, | |
| "logits/rejected": -1.9765625, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.2336, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.10791015625, | |
| "rewards/margins": 0.01470947265625, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 2.659729033509314, | |
| "learning_rate": 2.863211726060875e-07, | |
| "log_odds_chosen": 0.2547973692417145, | |
| "log_odds_ratio": -0.6659179925918579, | |
| "logits/chosen": -1.96875, | |
| "logits/rejected": -2.015625, | |
| "logps/chosen": -1.140625, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.2367, | |
| "nll_loss": 1.1953125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.1142578125, | |
| "rewards/margins": 0.0167236328125, | |
| "rewards/rejected": -0.130859375, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 2.470961884835421, | |
| "learning_rate": 2.829156330118589e-07, | |
| "log_odds_chosen": 0.24007567763328552, | |
| "log_odds_ratio": -0.65283203125, | |
| "logits/chosen": -1.859375, | |
| "logits/rejected": -1.9609375, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.28125, | |
| "loss": 1.2008, | |
| "nll_loss": 1.1953125, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.11181640625, | |
| "rewards/margins": 0.0159912109375, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 2.5904466369333026, | |
| "learning_rate": 2.7950386607286e-07, | |
| "log_odds_chosen": 0.28740233182907104, | |
| "log_odds_ratio": -0.6572265625, | |
| "logits/chosen": -1.8671875, | |
| "logits/rejected": -1.9609375, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.2003, | |
| "nll_loss": 1.0859375, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.019775390625, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 2.8679276152227726, | |
| "learning_rate": 2.7608651726502607e-07, | |
| "log_odds_chosen": 0.29725342988967896, | |
| "log_odds_ratio": -0.6602538824081421, | |
| "logits/chosen": -1.84375, | |
| "logits/rejected": -2.015625, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.2296, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.0213623046875, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 2.5150772854856243, | |
| "learning_rate": 2.7266423312033226e-07, | |
| "log_odds_chosen": 0.2159423828125, | |
| "log_odds_ratio": -0.7059570550918579, | |
| "logits/chosen": -1.9140625, | |
| "logits/rejected": -2.0, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.25, | |
| "loss": 1.185, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.10791015625, | |
| "rewards/margins": 0.017333984375, | |
| "rewards/rejected": -0.125, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 2.737219590030928, | |
| "learning_rate": 2.692376611044757e-07, | |
| "log_odds_chosen": 0.3914794921875, | |
| "log_odds_ratio": -0.640332043170929, | |
| "logits/chosen": -1.796875, | |
| "logits/rejected": -1.8671875, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.2890625, | |
| "loss": 1.2041, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.1025390625, | |
| "rewards/margins": 0.026123046875, | |
| "rewards/rejected": -0.12890625, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 2.7769961907081293, | |
| "learning_rate": 2.6580744949438045e-07, | |
| "log_odds_chosen": 0.08111572265625, | |
| "log_odds_ratio": -0.731249988079071, | |
| "logits/chosen": -1.8828125, | |
| "logits/rejected": -1.9921875, | |
| "logps/chosen": -1.1484375, | |
| "logps/rejected": -1.1875, | |
| "loss": 1.2605, | |
| "nll_loss": 1.203125, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.11474609375, | |
| "rewards/margins": 0.0037078857421875, | |
| "rewards/rejected": -0.11865234375, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 2.9775601305183463, | |
| "learning_rate": 2.6237424725554935e-07, | |
| "log_odds_chosen": 0.3329834043979645, | |
| "log_odds_ratio": -0.635937511920929, | |
| "logits/chosen": -1.8359375, | |
| "logits/rejected": -1.921875, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.296875, | |
| "loss": 1.2152, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.0233154296875, | |
| "rewards/rejected": -0.1298828125, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 2.279068955006949, | |
| "learning_rate": 2.589387039192858e-07, | |
| "log_odds_chosen": 0.20733642578125, | |
| "log_odds_ratio": -0.667675793170929, | |
| "logits/chosen": -1.859375, | |
| "logits/rejected": -1.9765625, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.2064, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.1123046875, | |
| "rewards/margins": 0.013671875, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 3.0021514828628746, | |
| "learning_rate": 2.555014694598077e-07, | |
| "log_odds_chosen": 0.23118896782398224, | |
| "log_odds_ratio": -0.6884765625, | |
| "logits/chosen": -1.765625, | |
| "logits/rejected": -1.9453125, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.2152, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.1083984375, | |
| "rewards/margins": 0.0145263671875, | |
| "rewards/rejected": -0.123046875, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 2.735522050073968, | |
| "learning_rate": 2.5206319417127873e-07, | |
| "log_odds_chosen": 0.3378845155239105, | |
| "log_odds_ratio": -0.632128894329071, | |
| "logits/chosen": -1.7421875, | |
| "logits/rejected": -1.8984375, | |
| "logps/chosen": -1.0234375, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.1638, | |
| "nll_loss": 1.0703125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.1025390625, | |
| "rewards/margins": 0.0228271484375, | |
| "rewards/rejected": -0.125, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 2.5736254747923923, | |
| "learning_rate": 2.4862452854477784e-07, | |
| "log_odds_chosen": 0.3209228515625, | |
| "log_odds_ratio": -0.65576171875, | |
| "logits/chosen": -1.734375, | |
| "logits/rejected": -1.8984375, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.166, | |
| "nll_loss": 1.046875, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.103515625, | |
| "rewards/margins": 0.0225830078125, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 2.7098667746876073, | |
| "learning_rate": 2.4518612314523265e-07, | |
| "log_odds_chosen": 0.08408202975988388, | |
| "log_odds_ratio": -0.732421875, | |
| "logits/chosen": -1.84375, | |
| "logits/rejected": -1.953125, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.140625, | |
| "loss": 1.1805, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.10986328125, | |
| "rewards/margins": 0.00457763671875, | |
| "rewards/rejected": -0.1142578125, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 2.7073252776256966, | |
| "learning_rate": 2.4174862848833806e-07, | |
| "log_odds_chosen": 0.20045165717601776, | |
| "log_odds_ratio": -0.67236328125, | |
| "logits/chosen": -1.7578125, | |
| "logits/rejected": -1.8359375, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.1875, | |
| "loss": 1.2051, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.10693359375, | |
| "rewards/margins": 0.0118408203125, | |
| "rewards/rejected": -0.119140625, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 2.7514653552282233, | |
| "learning_rate": 2.3831269491748467e-07, | |
| "log_odds_chosen": 0.22596435248851776, | |
| "log_odds_ratio": -0.708984375, | |
| "logits/chosen": -1.796875, | |
| "logits/rejected": -1.8828125, | |
| "logps/chosen": -1.1171875, | |
| "logps/rejected": -1.2890625, | |
| "loss": 1.217, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.11181640625, | |
| "rewards/margins": 0.017333984375, | |
| "rewards/rejected": -0.12890625, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 2.8823498677475183, | |
| "learning_rate": 2.3487897248071941e-07, | |
| "log_odds_chosen": 0.2939697206020355, | |
| "log_odds_ratio": -0.664257824420929, | |
| "logits/chosen": -1.7890625, | |
| "logits/rejected": -1.9375, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.25, | |
| "loss": 1.1892, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.10498046875, | |
| "rewards/margins": 0.0205078125, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 2.69332509317782, | |
| "learning_rate": 2.314481108077624e-07, | |
| "log_odds_chosen": 0.1607666015625, | |
| "log_odds_ratio": -0.6968749761581421, | |
| "logits/chosen": -1.8203125, | |
| "logits/rejected": -1.8515625, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.1796875, | |
| "loss": 1.1978, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.01068115234375, | |
| "rewards/rejected": -0.1181640625, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 2.5989208277674356, | |
| "learning_rate": 2.280207589871026e-07, | |
| "log_odds_chosen": 0.3521362245082855, | |
| "log_odds_ratio": -0.642382800579071, | |
| "logits/chosen": -1.8125, | |
| "logits/rejected": -1.9375, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.3203125, | |
| "loss": 1.1628, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.10791015625, | |
| "rewards/margins": 0.0242919921875, | |
| "rewards/rejected": -0.1318359375, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 2.5631030942900805, | |
| "learning_rate": 2.2459756544319627e-07, | |
| "log_odds_chosen": 0.1890869140625, | |
| "log_odds_ratio": -0.696972668170929, | |
| "logits/chosen": -1.796875, | |
| "logits/rejected": -1.890625, | |
| "logps/chosen": -1.015625, | |
| "logps/rejected": -1.1328125, | |
| "loss": 1.1771, | |
| "nll_loss": 1.0546875, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.10205078125, | |
| "rewards/margins": 0.01129150390625, | |
| "rewards/rejected": -0.11328125, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 2.7548023973263613, | |
| "learning_rate": 2.2117917781379067e-07, | |
| "log_odds_chosen": 0.19255371391773224, | |
| "log_odds_ratio": -0.679394543170929, | |
| "logits/chosen": -1.734375, | |
| "logits/rejected": -1.859375, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.2441, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.0123291015625, | |
| "rewards/rejected": -0.12158203125, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 2.6382486056871177, | |
| "learning_rate": 2.177662428273968e-07, | |
| "log_odds_chosen": 0.23670653998851776, | |
| "log_odds_ratio": -0.67626953125, | |
| "logits/chosen": -1.7578125, | |
| "logits/rejected": -1.8984375, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.203125, | |
| "loss": 1.1895, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.10498046875, | |
| "rewards/margins": 0.0150146484375, | |
| "rewards/rejected": -0.1201171875, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 2.5099170844954317, | |
| "learning_rate": 2.1435940618093414e-07, | |
| "log_odds_chosen": 0.19310303032398224, | |
| "log_odds_ratio": -0.690625011920929, | |
| "logits/chosen": -1.765625, | |
| "logits/rejected": -1.875, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.1881, | |
| "nll_loss": 1.1015625, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.0140380859375, | |
| "rewards/rejected": -0.123046875, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 2.7950237991583493, | |
| "learning_rate": 2.1095931241757062e-07, | |
| "log_odds_chosen": 0.2502685487270355, | |
| "log_odds_ratio": -0.680957019329071, | |
| "logits/chosen": -1.7734375, | |
| "logits/rejected": -1.8515625, | |
| "logps/chosen": -1.0390625, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.1906, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.10400390625, | |
| "rewards/margins": 0.0185546875, | |
| "rewards/rejected": -0.1220703125, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 2.609790265054367, | |
| "learning_rate": 2.075666048047806e-07, | |
| "log_odds_chosen": 0.15053710341453552, | |
| "log_odds_ratio": -0.698437511920929, | |
| "logits/chosen": -1.7578125, | |
| "logits/rejected": -1.828125, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.221, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.1103515625, | |
| "rewards/margins": 0.01171875, | |
| "rewards/rejected": -0.1220703125, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 2.5467686003601697, | |
| "learning_rate": 2.0418192521264454e-07, | |
| "log_odds_chosen": 0.23857422173023224, | |
| "log_odds_ratio": -0.659863293170929, | |
| "logits/chosen": -1.7734375, | |
| "logits/rejected": -1.8515625, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.2109375, | |
| "loss": 1.1898, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.01397705078125, | |
| "rewards/rejected": -0.12060546875, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 2.459650956326835, | |
| "learning_rate": 2.0080591399241292e-07, | |
| "log_odds_chosen": 0.23247070610523224, | |
| "log_odds_ratio": -0.6712890863418579, | |
| "logits/chosen": -1.7578125, | |
| "logits/rejected": -1.7734375, | |
| "logps/chosen": -1.109375, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.1708, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.1103515625, | |
| "rewards/margins": 0.014892578125, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 2.47537989067237, | |
| "learning_rate": 1.9743920985535729e-07, | |
| "log_odds_chosen": 0.3998779356479645, | |
| "log_odds_ratio": -0.625781238079071, | |
| "logits/chosen": -1.6484375, | |
| "logits/rejected": -1.796875, | |
| "logps/chosen": -0.98828125, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.1589, | |
| "nll_loss": 1.0390625, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.09912109375, | |
| "rewards/margins": 0.0269775390625, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 2.7703541098291455, | |
| "learning_rate": 1.94082449751932e-07, | |
| "log_odds_chosen": 0.2127685546875, | |
| "log_odds_ratio": -0.6846679449081421, | |
| "logits/chosen": -1.734375, | |
| "logits/rejected": -1.8125, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.25, | |
| "loss": 1.1794, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.0157470703125, | |
| "rewards/rejected": -0.125, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 2.885795668675382, | |
| "learning_rate": 1.9073626875126874e-07, | |
| "log_odds_chosen": 0.26057130098342896, | |
| "log_odds_ratio": -0.649707019329071, | |
| "logits/chosen": -1.7734375, | |
| "logits/rejected": -1.7890625, | |
| "logps/chosen": -1.0234375, | |
| "logps/rejected": -1.1875, | |
| "loss": 1.1671, | |
| "nll_loss": 1.1015625, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.1025390625, | |
| "rewards/margins": 0.01611328125, | |
| "rewards/rejected": -0.11865234375, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 2.722691086755302, | |
| "learning_rate": 1.874012999210271e-07, | |
| "log_odds_chosen": 0.19356079399585724, | |
| "log_odds_ratio": -0.694140613079071, | |
| "logits/chosen": -1.859375, | |
| "logits/rejected": -1.8828125, | |
| "logps/chosen": -1.1171875, | |
| "logps/rejected": -1.25, | |
| "loss": 1.1779, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.111328125, | |
| "rewards/margins": 0.01385498046875, | |
| "rewards/rejected": -0.125, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 2.604921389210434, | |
| "learning_rate": 1.8407817420762383e-07, | |
| "log_odds_chosen": 0.26337891817092896, | |
| "log_odds_ratio": -0.6572265625, | |
| "logits/chosen": -1.734375, | |
| "logits/rejected": -1.875, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.184, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.01806640625, | |
| "rewards/rejected": -0.126953125, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 2.732739073707677, | |
| "learning_rate": 1.8076752031686343e-07, | |
| "log_odds_chosen": 0.14312133193016052, | |
| "log_odds_ratio": -0.704882800579071, | |
| "logits/chosen": -1.7734375, | |
| "logits/rejected": -1.8671875, | |
| "logps/chosen": -1.0390625, | |
| "logps/rejected": -1.140625, | |
| "loss": 1.188, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.103515625, | |
| "rewards/margins": 0.010498046875, | |
| "rewards/rejected": -0.1142578125, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 2.395290977769547, | |
| "learning_rate": 1.7746996459499254e-07, | |
| "log_odds_chosen": 0.242431640625, | |
| "log_odds_ratio": -0.6644531488418579, | |
| "logits/chosen": -1.65625, | |
| "logits/rejected": -1.84375, | |
| "logps/chosen": -1.0546875, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.1618, | |
| "nll_loss": 1.046875, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.10546875, | |
| "rewards/margins": 0.016357421875, | |
| "rewards/rejected": -0.1220703125, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 2.547454975163186, | |
| "learning_rate": 1.741861309102009e-07, | |
| "log_odds_chosen": 0.26506346464157104, | |
| "log_odds_ratio": -0.6689453125, | |
| "logits/chosen": -1.640625, | |
| "logits/rejected": -1.7890625, | |
| "logps/chosen": -1.0546875, | |
| "logps/rejected": -1.2109375, | |
| "loss": 1.1665, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.10498046875, | |
| "rewards/margins": 0.0159912109375, | |
| "rewards/rejected": -0.12109375, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 2.396584161009059, | |
| "learning_rate": 1.7091664053459088e-07, | |
| "log_odds_chosen": 0.18143311142921448, | |
| "log_odds_ratio": -0.6865234375, | |
| "logits/chosen": -1.71875, | |
| "logits/rejected": -1.8359375, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.1875, | |
| "loss": 1.2118, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.10791015625, | |
| "rewards/margins": 0.01043701171875, | |
| "rewards/rejected": -0.11865234375, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 2.279515658884766, | |
| "learning_rate": 1.6766211202663844e-07, | |
| "log_odds_chosen": 0.05356445163488388, | |
| "log_odds_ratio": -0.739453136920929, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -1.7890625, | |
| "logps/chosen": -1.1171875, | |
| "logps/rejected": -1.140625, | |
| "loss": 1.2049, | |
| "nll_loss": 1.1640625, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.111328125, | |
| "rewards/margins": 0.0027008056640625, | |
| "rewards/rejected": -0.1142578125, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 2.4004304534033265, | |
| "learning_rate": 1.6442316111416743e-07, | |
| "log_odds_chosen": 0.26105958223342896, | |
| "log_odds_ratio": -0.6631835699081421, | |
| "logits/chosen": -1.6796875, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.1816, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.0184326171875, | |
| "rewards/rejected": -0.126953125, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 2.763579524745402, | |
| "learning_rate": 1.6120040057785928e-07, | |
| "log_odds_chosen": 0.29625242948532104, | |
| "log_odds_ratio": -0.6499999761581421, | |
| "logits/chosen": -1.8125, | |
| "logits/rejected": -1.875, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.28125, | |
| "loss": 1.1727, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.1083984375, | |
| "rewards/margins": 0.0194091796875, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 2.8036257747935154, | |
| "learning_rate": 1.5799444013532038e-07, | |
| "log_odds_chosen": 0.23708495497703552, | |
| "log_odds_ratio": -0.67431640625, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.8203125, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.1238, | |
| "nll_loss": 1.0859375, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.01556396484375, | |
| "rewards/rejected": -0.1220703125, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 2.6570430804218566, | |
| "learning_rate": 1.5480588632572885e-07, | |
| "log_odds_chosen": 0.37006837129592896, | |
| "log_odds_ratio": -0.635937511920929, | |
| "logits/chosen": -1.7421875, | |
| "logits/rejected": -1.8046875, | |
| "logps/chosen": -1.0390625, | |
| "logps/rejected": -1.2890625, | |
| "loss": 1.1907, | |
| "nll_loss": 1.078125, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.103515625, | |
| "rewards/margins": 0.025390625, | |
| "rewards/rejected": -0.12890625, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 2.590262402608133, | |
| "learning_rate": 1.516353423950829e-07, | |
| "log_odds_chosen": 0.3837524354457855, | |
| "log_odds_ratio": -0.625, | |
| "logits/chosen": -1.796875, | |
| "logits/rejected": -1.9140625, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.28125, | |
| "loss": 1.1898, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.1044921875, | |
| "rewards/margins": 0.0235595703125, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 2.568483644438331, | |
| "learning_rate": 1.4848340818207184e-07, | |
| "log_odds_chosen": 0.26896971464157104, | |
| "log_odds_ratio": -0.66455078125, | |
| "logits/chosen": -1.75, | |
| "logits/rejected": -1.859375, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.1799, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.10498046875, | |
| "rewards/margins": 0.019287109375, | |
| "rewards/rejected": -0.1240234375, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 3.318714021827115, | |
| "learning_rate": 1.453506800045921e-07, | |
| "log_odds_chosen": 0.12944336235523224, | |
| "log_odds_ratio": -0.7064453363418579, | |
| "logits/chosen": -1.75, | |
| "logits/rejected": -1.7890625, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.1796875, | |
| "loss": 1.2096, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.1083984375, | |
| "rewards/margins": 0.00970458984375, | |
| "rewards/rejected": -0.1181640625, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 2.7195091239689426, | |
| "learning_rate": 1.422377505469293e-07, | |
| "log_odds_chosen": 0.14760741591453552, | |
| "log_odds_ratio": -0.72802734375, | |
| "logits/chosen": -1.703125, | |
| "logits/rejected": -1.75, | |
| "logps/chosen": -1.1328125, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.2695, | |
| "nll_loss": 1.234375, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.11328125, | |
| "rewards/margins": 0.00823974609375, | |
| "rewards/rejected": -0.12158203125, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 2.4116745200019696, | |
| "learning_rate": 1.3914520874762726e-07, | |
| "log_odds_chosen": 0.2623352110385895, | |
| "log_odds_ratio": -0.6844726800918579, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -1.765625, | |
| "logps/chosen": -1.109375, | |
| "logps/rejected": -1.3046875, | |
| "loss": 1.2307, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.111328125, | |
| "rewards/margins": 0.0191650390625, | |
| "rewards/rejected": -0.130859375, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 2.6643696734974127, | |
| "learning_rate": 1.3607363968806645e-07, | |
| "log_odds_chosen": 0.3259033262729645, | |
| "log_odds_ratio": -0.623046875, | |
| "logits/chosen": -1.6953125, | |
| "logits/rejected": -1.78125, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.25, | |
| "loss": 1.2087, | |
| "nll_loss": 1.046875, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.10302734375, | |
| "rewards/margins": 0.022216796875, | |
| "rewards/rejected": -0.125, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 2.734224521952181, | |
| "learning_rate": 1.3302362448177167e-07, | |
| "log_odds_chosen": 0.30589598417282104, | |
| "log_odds_ratio": -0.6283203363418579, | |
| "logits/chosen": -1.703125, | |
| "logits/rejected": -1.796875, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.1853, | |
| "nll_loss": 1.1015625, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.103515625, | |
| "rewards/margins": 0.0198974609375, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 3.213949669653505, | |
| "learning_rate": 1.2999574016447056e-07, | |
| "log_odds_chosen": 0.3102783262729645, | |
| "log_odds_ratio": -0.6421874761581421, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -1.8125, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.196, | |
| "nll_loss": 1.0703125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.1044921875, | |
| "rewards/margins": 0.0211181640625, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 2.3847902873067492, | |
| "learning_rate": 1.2699055958492344e-07, | |
| "log_odds_chosen": 0.19971923530101776, | |
| "log_odds_ratio": -0.6748046875, | |
| "logits/chosen": -1.7421875, | |
| "logits/rejected": -1.8203125, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.171875, | |
| "loss": 1.2064, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.1044921875, | |
| "rewards/margins": 0.01287841796875, | |
| "rewards/rejected": -0.11767578125, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 2.5039350155364573, | |
| "learning_rate": 1.2400865129654567e-07, | |
| "log_odds_chosen": 0.27821046113967896, | |
| "log_odds_ratio": -0.652050793170929, | |
| "logits/chosen": -1.6796875, | |
| "logits/rejected": -1.796875, | |
| "logps/chosen": -1.0390625, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.1371, | |
| "nll_loss": 1.0703125, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.10400390625, | |
| "rewards/margins": 0.018310546875, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 2.5879591956281995, | |
| "learning_rate": 1.210505794498422e-07, | |
| "log_odds_chosen": 0.23630371689796448, | |
| "log_odds_ratio": -0.673828125, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.1706, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.0164794921875, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 2.752234308496576, | |
| "learning_rate": 1.1811690368567545e-07, | |
| "log_odds_chosen": 0.14584961533546448, | |
| "log_odds_ratio": -0.6947265863418579, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.8203125, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.1328125, | |
| "loss": 1.236, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.1044921875, | |
| "rewards/margins": 0.00872802734375, | |
| "rewards/rejected": -0.11328125, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 2.573963727957766, | |
| "learning_rate": 1.1520817902938618e-07, | |
| "log_odds_chosen": 0.07918091118335724, | |
| "log_odds_ratio": -0.7347656488418579, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.78125, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.1875, | |
| "loss": 1.1915, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.11279296875, | |
| "rewards/margins": 0.00604248046875, | |
| "rewards/rejected": -0.11865234375, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 2.7806823611081177, | |
| "learning_rate": 1.1232495578578755e-07, | |
| "log_odds_chosen": 0.15264892578125, | |
| "log_odds_ratio": -0.6976562738418579, | |
| "logits/chosen": -1.75, | |
| "logits/rejected": -1.78125, | |
| "logps/chosen": -1.0390625, | |
| "logps/rejected": -1.1171875, | |
| "loss": 1.1626, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.103515625, | |
| "rewards/margins": 0.00823974609375, | |
| "rewards/rejected": -0.11181640625, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 2.436201094808605, | |
| "learning_rate": 1.0946777943505254e-07, | |
| "log_odds_chosen": 0.23690184950828552, | |
| "log_odds_ratio": -0.6917968988418579, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.78125, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.1971, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.10595703125, | |
| "rewards/margins": 0.0167236328125, | |
| "rewards/rejected": -0.123046875, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 2.417259454035703, | |
| "learning_rate": 1.0663719052951381e-07, | |
| "log_odds_chosen": 0.19016113877296448, | |
| "log_odds_ratio": -0.699414074420929, | |
| "logits/chosen": -1.6796875, | |
| "logits/rejected": -1.765625, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.1953125, | |
| "loss": 1.1861, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.0107421875, | |
| "rewards/rejected": -0.119140625, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 2.340681686492189, | |
| "learning_rate": 1.0383372459139608e-07, | |
| "log_odds_chosen": 0.30018919706344604, | |
| "log_odds_ratio": -0.6387695074081421, | |
| "logits/chosen": -1.78125, | |
| "logits/rejected": -1.921875, | |
| "logps/chosen": -1.0546875, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.1958, | |
| "nll_loss": 1.078125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.10498046875, | |
| "rewards/margins": 0.0191650390625, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 2.530421254724575, | |
| "learning_rate": 1.0105791201150002e-07, | |
| "log_odds_chosen": 0.3886962831020355, | |
| "log_odds_ratio": -0.620312511920929, | |
| "logits/chosen": -1.6796875, | |
| "logits/rejected": -1.7734375, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.3359375, | |
| "loss": 1.16, | |
| "nll_loss": 1.046875, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.10595703125, | |
| "rewards/margins": 0.027587890625, | |
| "rewards/rejected": -0.1337890625, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 2.6492828085260225, | |
| "learning_rate": 9.831027794885713e-08, | |
| "log_odds_chosen": 0.34185791015625, | |
| "log_odds_ratio": -0.6444336175918579, | |
| "logits/chosen": -1.6484375, | |
| "logits/rejected": -1.671875, | |
| "logps/chosen": -1.0234375, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.1779, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.1025390625, | |
| "rewards/margins": 0.0211181640625, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 2.6971126252475286, | |
| "learning_rate": 9.559134223137424e-08, | |
| "log_odds_chosen": 0.2640136778354645, | |
| "log_odds_ratio": -0.673046886920929, | |
| "logits/chosen": -1.7578125, | |
| "logits/rejected": -1.8046875, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.2186, | |
| "nll_loss": 1.171875, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.016845703125, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 2.768346463128813, | |
| "learning_rate": 9.290161925748674e-08, | |
| "log_odds_chosen": 0.333740234375, | |
| "log_odds_ratio": -0.6434570550918579, | |
| "logits/chosen": -1.71875, | |
| "logits/rejected": -1.796875, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.328125, | |
| "loss": 1.1758, | |
| "nll_loss": 1.1640625, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.1103515625, | |
| "rewards/margins": 0.0224609375, | |
| "rewards/rejected": -0.1328125, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 2.4131576506486168, | |
| "learning_rate": 9.024161789883897e-08, | |
| "log_odds_chosen": 0.13895873725414276, | |
| "log_odds_ratio": -0.695507824420929, | |
| "logits/chosen": -1.625, | |
| "logits/rejected": -1.71875, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.1171875, | |
| "loss": 1.1368, | |
| "nll_loss": 1.0625, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.10302734375, | |
| "rewards/margins": 0.009033203125, | |
| "rewards/rejected": -0.1123046875, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 2.5654624061461253, | |
| "learning_rate": 8.761184140401023e-08, | |
| "log_odds_chosen": 0.25886231660842896, | |
| "log_odds_ratio": -0.662109375, | |
| "logits/chosen": -1.609375, | |
| "logits/rejected": -1.6875, | |
| "logps/chosen": -1.0390625, | |
| "logps/rejected": -1.203125, | |
| "loss": 1.1906, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.10400390625, | |
| "rewards/margins": 0.0164794921875, | |
| "rewards/rejected": -0.12060546875, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 2.6681020896294676, | |
| "learning_rate": 8.501278730330463e-08, | |
| "log_odds_chosen": 0.36528319120407104, | |
| "log_odds_ratio": -0.626269519329071, | |
| "logits/chosen": -1.609375, | |
| "logits/rejected": -1.671875, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.3203125, | |
| "loss": 1.1977, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.02490234375, | |
| "rewards/rejected": -0.1318359375, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 2.7519831354278512, | |
| "learning_rate": 8.244494731462279e-08, | |
| "log_odds_chosen": 0.24447020888328552, | |
| "log_odds_ratio": -0.6788085699081421, | |
| "logits/chosen": -1.59375, | |
| "logits/rejected": -1.703125, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.1953125, | |
| "loss": 1.164, | |
| "nll_loss": 1.0546875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.103515625, | |
| "rewards/margins": 0.015869140625, | |
| "rewards/rejected": -0.119140625, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 2.870452087544285, | |
| "learning_rate": 7.990880725043322e-08, | |
| "log_odds_chosen": 0.2567138671875, | |
| "log_odds_ratio": -0.670703113079071, | |
| "logits/chosen": -1.625, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.1622, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.10498046875, | |
| "rewards/margins": 0.0167236328125, | |
| "rewards/rejected": -0.12158203125, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 2.6968676817822645, | |
| "learning_rate": 7.740484692586074e-08, | |
| "log_odds_chosen": 0.2530761659145355, | |
| "log_odds_ratio": -0.680371105670929, | |
| "logits/chosen": -1.671875, | |
| "logits/rejected": -1.8203125, | |
| "logps/chosen": -1.1796875, | |
| "logps/rejected": -1.3515625, | |
| "loss": 1.2242, | |
| "nll_loss": 1.1953125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.11767578125, | |
| "rewards/margins": 0.0174560546875, | |
| "rewards/rejected": -0.1357421875, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 2.735947390317322, | |
| "learning_rate": 7.493354006791006e-08, | |
| "log_odds_chosen": 0.24350586533546448, | |
| "log_odds_ratio": -0.682421863079071, | |
| "logits/chosen": -1.6171875, | |
| "logits/rejected": -1.6796875, | |
| "logps/chosen": -1.0546875, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.1973, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.10546875, | |
| "rewards/margins": 0.0169677734375, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 2.4135041559261885, | |
| "learning_rate": 7.249535422584055e-08, | |
| "log_odds_chosen": 0.19566650688648224, | |
| "log_odds_ratio": -0.6849609613418579, | |
| "logits/chosen": -1.8125, | |
| "logits/rejected": -1.84375, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.1875, | |
| "loss": 1.1835, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.10791015625, | |
| "rewards/margins": 0.01129150390625, | |
| "rewards/rejected": -0.119140625, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 2.3578015206878575, | |
| "learning_rate": 7.009075068271031e-08, | |
| "log_odds_chosen": 0.12241820991039276, | |
| "log_odds_ratio": -0.7289062738418579, | |
| "logits/chosen": -1.578125, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.1796875, | |
| "loss": 1.1747, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.0087890625, | |
| "rewards/rejected": -0.1181640625, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 2.7517093669068933, | |
| "learning_rate": 6.772018436810525e-08, | |
| "log_odds_chosen": 0.34681397676467896, | |
| "log_odds_ratio": -0.616992175579071, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.7734375, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.34375, | |
| "loss": 1.1863, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.1103515625, | |
| "rewards/margins": 0.0240478515625, | |
| "rewards/rejected": -0.134765625, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 2.758447250920097, | |
| "learning_rate": 6.538410377207082e-08, | |
| "log_odds_chosen": 0.03367309644818306, | |
| "log_odds_ratio": -0.773144543170929, | |
| "logits/chosen": -1.625, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.15625, | |
| "loss": 1.2376, | |
| "nll_loss": 1.1640625, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.1123046875, | |
| "rewards/margins": 0.0030364990234375, | |
| "rewards/rejected": -0.115234375, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 2.880552308468262, | |
| "learning_rate": 6.308295086026133e-08, | |
| "log_odds_chosen": 0.17825928330421448, | |
| "log_odds_ratio": -0.696972668170929, | |
| "logits/chosen": -1.65625, | |
| "logits/rejected": -1.671875, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.25, | |
| "loss": 1.2262, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.1123046875, | |
| "rewards/margins": 0.01220703125, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 2.439317791546686, | |
| "learning_rate": 6.081716099032417e-08, | |
| "log_odds_chosen": 0.3602050840854645, | |
| "log_odds_ratio": -0.6319335699081421, | |
| "logits/chosen": -1.609375, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -0.99609375, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.1792, | |
| "nll_loss": 1.0625, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.099609375, | |
| "rewards/margins": 0.024169921875, | |
| "rewards/rejected": -0.1240234375, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 2.810922238332508, | |
| "learning_rate": 5.858716282953407e-08, | |
| "log_odds_chosen": 0.24152831733226776, | |
| "log_odds_ratio": -0.654492199420929, | |
| "logits/chosen": -1.6328125, | |
| "logits/rejected": -1.75, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.1982, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.0166015625, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 2.5961073589782466, | |
| "learning_rate": 5.639337827369289e-08, | |
| "log_odds_chosen": 0.17608642578125, | |
| "log_odds_ratio": -0.702832043170929, | |
| "logits/chosen": -1.671875, | |
| "logits/rejected": -1.7890625, | |
| "logps/chosen": -1.1171875, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.1879, | |
| "nll_loss": 1.1640625, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.111328125, | |
| "rewards/margins": 0.0111083984375, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 2.3150779833374266, | |
| "learning_rate": 5.4236222367310816e-08, | |
| "log_odds_chosen": 0.29583740234375, | |
| "log_odds_ratio": -0.653124988079071, | |
| "logits/chosen": -1.578125, | |
| "logits/rejected": -1.71875, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.1965, | |
| "nll_loss": 1.0859375, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.10693359375, | |
| "rewards/margins": 0.020263671875, | |
| "rewards/rejected": -0.126953125, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 2.1606096674823068, | |
| "learning_rate": 5.211610322508364e-08, | |
| "log_odds_chosen": 0.15689697861671448, | |
| "log_odds_ratio": -0.70263671875, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -1.765625, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.1640625, | |
| "loss": 1.1801, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.0098876953125, | |
| "rewards/rejected": -0.11669921875, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 2.547538507688686, | |
| "learning_rate": 5.003342195468102e-08, | |
| "log_odds_chosen": 0.221527099609375, | |
| "log_odds_ratio": -0.66357421875, | |
| "logits/chosen": -1.59375, | |
| "logits/rejected": -1.640625, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.203125, | |
| "loss": 1.2133, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.01263427734375, | |
| "rewards/rejected": -0.11962890625, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 2.8414624562639546, | |
| "learning_rate": 4.798857258086053e-08, | |
| "log_odds_chosen": 0.25762939453125, | |
| "log_odds_ratio": -0.6714843511581421, | |
| "logits/chosen": -1.609375, | |
| "logits/rejected": -1.7109375, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.25, | |
| "loss": 1.2074, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.10595703125, | |
| "rewards/margins": 0.0186767578125, | |
| "rewards/rejected": -0.125, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 3.120660574019042, | |
| "learning_rate": 4.5981941970921646e-08, | |
| "log_odds_chosen": 0.48786622285842896, | |
| "log_odds_ratio": -0.591992199420929, | |
| "logits/chosen": -1.6171875, | |
| "logits/rejected": -1.65625, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.375, | |
| "loss": 1.1441, | |
| "nll_loss": 1.0390625, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.10302734375, | |
| "rewards/margins": 0.034423828125, | |
| "rewards/rejected": -0.1376953125, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 2.635871013914355, | |
| "learning_rate": 4.4013909761513894e-08, | |
| "log_odds_chosen": 0.2707275450229645, | |
| "log_odds_ratio": -0.649218738079071, | |
| "logits/chosen": -1.71875, | |
| "logits/rejected": -1.7109375, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.265625, | |
| "loss": 1.1887, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.0169677734375, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 3.1511666169638346, | |
| "learning_rate": 4.2084848286813105e-08, | |
| "log_odds_chosen": 0.3526855409145355, | |
| "log_odds_ratio": -0.6600586175918579, | |
| "logits/chosen": -1.5859375, | |
| "logits/rejected": -1.65625, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.34375, | |
| "loss": 1.1851, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.0281982421875, | |
| "rewards/rejected": -0.134765625, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 2.7305670197672747, | |
| "learning_rate": 4.0195122508078886e-08, | |
| "log_odds_chosen": 0.27125245332717896, | |
| "log_odds_ratio": -0.65625, | |
| "logits/chosen": -1.5859375, | |
| "logits/rejected": -1.6875, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.1709, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.017822265625, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 3.2312418757529726, | |
| "learning_rate": 3.834508994460736e-08, | |
| "log_odds_chosen": 0.23995360732078552, | |
| "log_odds_ratio": -0.654589831829071, | |
| "logits/chosen": -1.578125, | |
| "logits/rejected": -1.640625, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.1875, | |
| "loss": 1.1604, | |
| "nll_loss": 1.046875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.103515625, | |
| "rewards/margins": 0.0150146484375, | |
| "rewards/rejected": -0.1181640625, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 2.874032565275268, | |
| "learning_rate": 3.653510060609166e-08, | |
| "log_odds_chosen": 0.13707275688648224, | |
| "log_odds_ratio": -0.7138671875, | |
| "logits/chosen": -1.6796875, | |
| "logits/rejected": -1.765625, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.1484375, | |
| "loss": 1.173, | |
| "nll_loss": 1.078125, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.10595703125, | |
| "rewards/margins": 0.00848388671875, | |
| "rewards/rejected": -0.1142578125, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 2.6604754366861822, | |
| "learning_rate": 3.476549692640316e-08, | |
| "log_odds_chosen": 0.34288328886032104, | |
| "log_odds_ratio": -0.620312511920929, | |
| "logits/chosen": -1.6015625, | |
| "logits/rejected": -1.7109375, | |
| "logps/chosen": -1.0, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.1585, | |
| "nll_loss": 1.078125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.099609375, | |
| "rewards/margins": 0.0218505859375, | |
| "rewards/rejected": -0.12158203125, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 2.71892900942932, | |
| "learning_rate": 3.3036613698806085e-08, | |
| "log_odds_chosen": 0.21519775688648224, | |
| "log_odds_ratio": -0.691601574420929, | |
| "logits/chosen": -1.640625, | |
| "logits/rejected": -1.734375, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.2288, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.10791015625, | |
| "rewards/margins": 0.01434326171875, | |
| "rewards/rejected": -0.1220703125, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 2.878603239597823, | |
| "learning_rate": 3.134877801261765e-08, | |
| "log_odds_chosen": 0.3372802734375, | |
| "log_odds_ratio": -0.642285168170929, | |
| "logits/chosen": -1.703125, | |
| "logits/rejected": -1.734375, | |
| "logps/chosen": -1.109375, | |
| "logps/rejected": -1.328125, | |
| "loss": 1.2136, | |
| "nll_loss": 1.1640625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.11083984375, | |
| "rewards/margins": 0.0218505859375, | |
| "rewards/rejected": -0.1328125, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 2.6992535601969085, | |
| "learning_rate": 2.9702309191325492e-08, | |
| "log_odds_chosen": 0.24042968451976776, | |
| "log_odds_ratio": -0.675585925579071, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.796875, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.2242, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.10693359375, | |
| "rewards/margins": 0.0159912109375, | |
| "rewards/rejected": -0.123046875, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 2.971420026998493, | |
| "learning_rate": 2.809751873217478e-08, | |
| "log_odds_chosen": 0.32117921113967896, | |
| "log_odds_ratio": -0.6463867425918579, | |
| "logits/chosen": -1.734375, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.3046875, | |
| "loss": 1.1702, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.0218505859375, | |
| "rewards/rejected": -0.130859375, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 2.311036000971507, | |
| "learning_rate": 2.653471024723547e-08, | |
| "log_odds_chosen": 0.43181151151657104, | |
| "log_odds_ratio": -0.5894531011581421, | |
| "logits/chosen": -1.6171875, | |
| "logits/rejected": -1.6875, | |
| "logps/chosen": -1.015625, | |
| "logps/rejected": -1.3046875, | |
| "loss": 1.1592, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.1015625, | |
| "rewards/margins": 0.0289306640625, | |
| "rewards/rejected": -0.130859375, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 2.5416101230783363, | |
| "learning_rate": 2.501417940596168e-08, | |
| "log_odds_chosen": 0.02521972730755806, | |
| "log_odds_ratio": -0.75, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -1.7109375, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.109375, | |
| "loss": 1.1748, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.1103515625, | |
| "rewards/margins": 0.000507354736328125, | |
| "rewards/rejected": -0.11083984375, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 2.471954369214634, | |
| "learning_rate": 2.353621387925375e-08, | |
| "log_odds_chosen": 0.3322509825229645, | |
| "log_odds_ratio": -0.6597656011581421, | |
| "logits/chosen": -1.6015625, | |
| "logits/rejected": -1.671875, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.1634, | |
| "nll_loss": 1.078125, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.10302734375, | |
| "rewards/margins": 0.022705078125, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 2.801308375939805, | |
| "learning_rate": 2.2101093285033373e-08, | |
| "log_odds_chosen": 0.3058715760707855, | |
| "log_odds_ratio": -0.6502929925918579, | |
| "logits/chosen": -1.6484375, | |
| "logits/rejected": -1.7265625, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.25, | |
| "loss": 1.1307, | |
| "nll_loss": 1.03125, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.10498046875, | |
| "rewards/margins": 0.020263671875, | |
| "rewards/rejected": -0.125, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 2.6457419438315233, | |
| "learning_rate": 2.070908913534236e-08, | |
| "log_odds_chosen": 0.24928589165210724, | |
| "log_odds_ratio": -0.6776367425918579, | |
| "logits/chosen": -1.65625, | |
| "logits/rejected": -1.6640625, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.2119, | |
| "nll_loss": 1.1953125, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.10791015625, | |
| "rewards/margins": 0.015869140625, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 2.8172476905649764, | |
| "learning_rate": 1.9360464784975024e-08, | |
| "log_odds_chosen": 0.21148681640625, | |
| "log_odds_ratio": -0.6849609613418579, | |
| "logits/chosen": -1.5625, | |
| "logits/rejected": -1.65625, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.1771, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.0146484375, | |
| "rewards/rejected": -0.1220703125, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 2.963550785069561, | |
| "learning_rate": 1.8055475381653807e-08, | |
| "log_odds_chosen": 0.27608031034469604, | |
| "log_odds_ratio": -0.6513671875, | |
| "logits/chosen": -1.71875, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1.046875, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.2014, | |
| "nll_loss": 1.109375, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.1044921875, | |
| "rewards/margins": 0.018798828125, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 3.1613513759896534, | |
| "learning_rate": 1.679436781775759e-08, | |
| "log_odds_chosen": 0.31138914823532104, | |
| "log_odds_ratio": -0.675976574420929, | |
| "logits/chosen": -1.65625, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1.109375, | |
| "logps/rejected": -1.3203125, | |
| "loss": 1.1978, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.11083984375, | |
| "rewards/margins": 0.0213623046875, | |
| "rewards/rejected": -0.1318359375, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 2.92153711849868, | |
| "learning_rate": 1.5577380683611807e-08, | |
| "log_odds_chosen": 0.2562316954135895, | |
| "log_odds_ratio": -0.6595703363418579, | |
| "logits/chosen": -1.671875, | |
| "logits/rejected": -1.7265625, | |
| "logps/chosen": -1.0546875, | |
| "logps/rejected": -1.21875, | |
| "loss": 1.198, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.10546875, | |
| "rewards/margins": 0.0164794921875, | |
| "rewards/rejected": -0.12158203125, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 3.486180847986093, | |
| "learning_rate": 1.4404744222349358e-08, | |
| "log_odds_chosen": 0.48161619901657104, | |
| "log_odds_ratio": -0.605664074420929, | |
| "logits/chosen": -1.625, | |
| "logits/rejected": -1.6875, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.359375, | |
| "loss": 1.172, | |
| "nll_loss": 1.078125, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.10302734375, | |
| "rewards/margins": 0.032470703125, | |
| "rewards/rejected": -0.1357421875, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 2.50535325154869, | |
| "learning_rate": 1.3276680286350594e-08, | |
| "log_odds_chosen": 0.31635743379592896, | |
| "log_odds_ratio": -0.641796886920929, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -1.8203125, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.204, | |
| "nll_loss": 1.1015625, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.10693359375, | |
| "rewards/margins": 0.0203857421875, | |
| "rewards/rejected": -0.126953125, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 2.6381410134685392, | |
| "learning_rate": 1.2193402295270854e-08, | |
| "log_odds_chosen": 0.2996459901332855, | |
| "log_odds_ratio": -0.6700195074081421, | |
| "logits/chosen": -1.5234375, | |
| "logits/rejected": -1.671875, | |
| "logps/chosen": -1.03125, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.1512, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.10302734375, | |
| "rewards/margins": 0.0205078125, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 2.344444293869932, | |
| "learning_rate": 1.115511519566334e-08, | |
| "log_odds_chosen": 0.3412719666957855, | |
| "log_odds_ratio": -0.657519519329071, | |
| "logits/chosen": -1.6171875, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -0.98828125, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.1698, | |
| "nll_loss": 1.03125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.0986328125, | |
| "rewards/margins": 0.0244140625, | |
| "rewards/rejected": -0.123046875, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 2.277202951820151, | |
| "learning_rate": 1.01620154222051e-08, | |
| "log_odds_chosen": 0.14781494438648224, | |
| "log_odds_ratio": -0.708984375, | |
| "logits/chosen": -1.6171875, | |
| "logits/rejected": -1.6953125, | |
| "logps/chosen": -1.125, | |
| "logps/rejected": -1.203125, | |
| "loss": 1.1759, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.11181640625, | |
| "rewards/margins": 0.0079345703125, | |
| "rewards/rejected": -0.1201171875, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 2.5811302552961943, | |
| "learning_rate": 9.214290860533242e-09, | |
| "log_odds_chosen": 0.22308655083179474, | |
| "log_odds_ratio": -0.6734374761581421, | |
| "logits/chosen": -1.6640625, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.205, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.1103515625, | |
| "rewards/margins": 0.0133056640625, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 2.4190439831152326, | |
| "learning_rate": 8.312120811698798e-09, | |
| "log_odds_chosen": 0.24127808213233948, | |
| "log_odds_ratio": -0.6958984136581421, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -1.7734375, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.1753, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.1064453125, | |
| "rewards/margins": 0.017333984375, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 2.3733902102708897, | |
| "learning_rate": 7.455675958244422e-09, | |
| "log_odds_chosen": 0.2683349549770355, | |
| "log_odds_ratio": -0.660449206829071, | |
| "logits/chosen": -1.6484375, | |
| "logits/rejected": -1.765625, | |
| "logps/chosen": -1.078125, | |
| "logps/rejected": -1.2578125, | |
| "loss": 1.1939, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.0184326171875, | |
| "rewards/rejected": -0.1259765625, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 2.581589417454669, | |
| "learning_rate": 6.64511833191278e-09, | |
| "log_odds_chosen": 0.16912230849266052, | |
| "log_odds_ratio": -0.703125, | |
| "logits/chosen": -1.640625, | |
| "logits/rejected": -1.765625, | |
| "logps/chosen": -1.0546875, | |
| "logps/rejected": -1.1796875, | |
| "loss": 1.1997, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.10595703125, | |
| "rewards/margins": 0.01220703125, | |
| "rewards/rejected": -0.1181640625, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 2.8853418036737297, | |
| "learning_rate": 5.8806012829916985e-09, | |
| "log_odds_chosen": 0.3464111387729645, | |
| "log_odds_ratio": -0.615234375, | |
| "logits/chosen": -1.71875, | |
| "logits/rejected": -1.8125, | |
| "logps/chosen": -1.0078125, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.1569, | |
| "nll_loss": 1.078125, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.10107421875, | |
| "rewards/margins": 0.02294921875, | |
| "rewards/rejected": -0.1240234375, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 2.5807272249096913, | |
| "learning_rate": 5.162269451301576e-09, | |
| "log_odds_chosen": 0.16444091498851776, | |
| "log_odds_ratio": -0.6973632574081421, | |
| "logits/chosen": -1.78125, | |
| "logits/rejected": -1.7890625, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.2109375, | |
| "loss": 1.2152, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.01202392578125, | |
| "rewards/rejected": -0.12060546875, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 2.6925182997680515, | |
| "learning_rate": 4.490258738830771e-09, | |
| "log_odds_chosen": 0.2374267578125, | |
| "log_odds_ratio": -0.6766601800918579, | |
| "logits/chosen": -1.640625, | |
| "logits/rejected": -1.6875, | |
| "logps/chosen": -1.15625, | |
| "logps/rejected": -1.3203125, | |
| "loss": 1.2008, | |
| "nll_loss": 1.1796875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.115234375, | |
| "rewards/margins": 0.016357421875, | |
| "rewards/rejected": -0.1318359375, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 3.01082383722649, | |
| "learning_rate": 3.864696284024249e-09, | |
| "log_odds_chosen": 0.38875120878219604, | |
| "log_odds_ratio": -0.6109374761581421, | |
| "logits/chosen": -1.59375, | |
| "logits/rejected": -1.71875, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.3359375, | |
| "loss": 1.1823, | |
| "nll_loss": 1.0859375, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.10595703125, | |
| "rewards/margins": 0.028076171875, | |
| "rewards/rejected": -0.1337890625, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 2.7770288933270755, | |
| "learning_rate": 3.285700437730077e-09, | |
| "log_odds_chosen": 0.35822755098342896, | |
| "log_odds_ratio": -0.6693359613418579, | |
| "logits/chosen": -1.5859375, | |
| "logits/rejected": -1.6796875, | |
| "logps/chosen": -1.0859375, | |
| "logps/rejected": -1.3515625, | |
| "loss": 1.1699, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.1083984375, | |
| "rewards/margins": 0.0264892578125, | |
| "rewards/rejected": -0.134765625, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 2.5907890754339262, | |
| "learning_rate": 2.7533807408084973e-09, | |
| "log_odds_chosen": 0.17686156928539276, | |
| "log_odds_ratio": -0.698535144329071, | |
| "logits/chosen": -1.7421875, | |
| "logits/rejected": -1.8203125, | |
| "logps/chosen": -1.1015625, | |
| "logps/rejected": -1.2109375, | |
| "loss": 1.1861, | |
| "nll_loss": 1.140625, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.10986328125, | |
| "rewards/margins": 0.01153564453125, | |
| "rewards/rejected": -0.12109375, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 2.4966778392886955, | |
| "learning_rate": 2.2678379034077877e-09, | |
| "log_odds_chosen": 0.2807373106479645, | |
| "log_odds_ratio": -0.6429687738418579, | |
| "logits/chosen": -1.640625, | |
| "logits/rejected": -1.7109375, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.2285, | |
| "nll_loss": 1.1484375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.10888671875, | |
| "rewards/margins": 0.018310546875, | |
| "rewards/rejected": -0.126953125, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 2.3101513660116466, | |
| "learning_rate": 1.82916378591072e-09, | |
| "log_odds_chosen": 0.35594481229782104, | |
| "log_odds_ratio": -0.6319335699081421, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -1.7890625, | |
| "logps/chosen": -1.0546875, | |
| "logps/rejected": -1.28125, | |
| "loss": 1.1772, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.10546875, | |
| "rewards/margins": 0.0225830078125, | |
| "rewards/rejected": -0.1279296875, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 2.550820385872949, | |
| "learning_rate": 1.4374413815555763e-09, | |
| "log_odds_chosen": 0.21019287407398224, | |
| "log_odds_ratio": -0.6913086175918579, | |
| "logits/chosen": -1.6484375, | |
| "logits/rejected": -1.6796875, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.25, | |
| "loss": 1.2096, | |
| "nll_loss": 1.15625, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.0157470703125, | |
| "rewards/rejected": -0.125, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 2.3418264361304293, | |
| "learning_rate": 1.0927448007343188e-09, | |
| "log_odds_chosen": 0.2827392518520355, | |
| "log_odds_ratio": -0.6519531011581421, | |
| "logits/chosen": -1.671875, | |
| "logits/rejected": -1.796875, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.2734375, | |
| "loss": 1.2057, | |
| "nll_loss": 1.1328125, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.10693359375, | |
| "rewards/margins": 0.0203857421875, | |
| "rewards/rejected": -0.126953125, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 3.0181770379881936, | |
| "learning_rate": 7.951392569717774e-10, | |
| "log_odds_chosen": 0.32861328125, | |
| "log_odds_ratio": -0.641406238079071, | |
| "logits/chosen": -1.71875, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1.09375, | |
| "logps/rejected": -1.296875, | |
| "loss": 1.2017, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.109375, | |
| "rewards/margins": 0.0206298828125, | |
| "rewards/rejected": -0.1298828125, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 2.6598066518808965, | |
| "learning_rate": 5.446810545877423e-10, | |
| "log_odds_chosen": 0.29930418729782104, | |
| "log_odds_ratio": -0.6373046636581421, | |
| "logits/chosen": -1.703125, | |
| "logits/rejected": -1.7890625, | |
| "logps/chosen": -1.0703125, | |
| "logps/rejected": -1.28125, | |
| "loss": 1.1938, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.107421875, | |
| "rewards/margins": 0.021240234375, | |
| "rewards/rejected": -0.12890625, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 2.612456535550154, | |
| "learning_rate": 3.414175780446227e-10, | |
| "log_odds_chosen": 0.27032470703125, | |
| "log_odds_ratio": -0.64501953125, | |
| "logits/chosen": -1.6015625, | |
| "logits/rejected": -1.671875, | |
| "logps/chosen": -1.0625, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.2178, | |
| "nll_loss": 1.125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.10595703125, | |
| "rewards/margins": 0.0167236328125, | |
| "rewards/rejected": -0.12255859375, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 2.5663855569599123, | |
| "learning_rate": 1.8538728298292395e-10, | |
| "log_odds_chosen": 0.25556641817092896, | |
| "log_odds_ratio": -0.6698242425918579, | |
| "logits/chosen": -1.6171875, | |
| "logits/rejected": -1.6640625, | |
| "logps/chosen": -1.0546875, | |
| "logps/rejected": -1.234375, | |
| "loss": 1.198, | |
| "nll_loss": 1.1015625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.10546875, | |
| "rewards/margins": 0.01806640625, | |
| "rewards/rejected": -0.12353515625, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 2.5581951001300336, | |
| "learning_rate": 7.661968894551174e-11, | |
| "log_odds_chosen": 0.3158630430698395, | |
| "log_odds_ratio": -0.636425793170929, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.828125, | |
| "logps/chosen": -1.0234375, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.1801, | |
| "nll_loss": 1.09375, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.1025390625, | |
| "rewards/margins": 0.0205078125, | |
| "rewards/rejected": -0.123046875, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 2.6003805241560958, | |
| "learning_rate": 1.513537379305152e-11, | |
| "log_odds_chosen": 0.20820312201976776, | |
| "log_odds_ratio": -0.664843738079071, | |
| "logits/chosen": -1.671875, | |
| "logits/rejected": -1.734375, | |
| "logps/chosen": -1.109375, | |
| "logps/rejected": -1.2421875, | |
| "loss": 1.2092, | |
| "nll_loss": 1.1171875, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.11083984375, | |
| "rewards/margins": 0.0135498046875, | |
| "rewards/rejected": -0.12451171875, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 2538, | |
| "total_flos": 0.0, | |
| "train_loss": 1.2334148878183206, | |
| "train_runtime": 17382.4906, | |
| "train_samples_per_second": 7.009, | |
| "train_steps_per_second": 0.146 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2538, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |