{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.7843137254901961,
  "eval_steps": 500,
  "global_step": 100,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0392156862745098,
      "grad_norm": 8.043305397033691,
      "learning_rate": 0.0,
      "loss": 3.2159,
      "step": 5
    },
    {
      "epoch": 0.0784313725490196,
      "grad_norm": 13.835046768188477,
      "learning_rate": 0.0001859375,
      "loss": 2.8785,
      "step": 10
    },
    {
      "epoch": 0.11764705882352941,
      "grad_norm": 9.165813446044922,
      "learning_rate": 0.000178125,
      "loss": 1.897,
      "step": 15
    },
    {
      "epoch": 0.1568627450980392,
      "grad_norm": 10.37258243560791,
      "learning_rate": 0.0001703125,
      "loss": 1.3101,
      "step": 20
    },
    {
      "epoch": 0.19607843137254902,
      "grad_norm": 22.90947914123535,
      "learning_rate": 0.00016250000000000002,
      "loss": 1.1758,
      "step": 25
    },
    {
      "epoch": 0.23529411764705882,
      "grad_norm": 12.246542930603027,
      "learning_rate": 0.0001546875,
      "loss": 0.9672,
      "step": 30
    },
    {
      "epoch": 0.27450980392156865,
      "grad_norm": 7.604257583618164,
      "learning_rate": 0.000146875,
      "loss": 0.869,
      "step": 35
    },
    {
      "epoch": 0.3137254901960784,
      "grad_norm": 8.037153244018555,
      "learning_rate": 0.0001390625,
      "loss": 0.7768,
      "step": 40
    },
    {
      "epoch": 0.35294117647058826,
      "grad_norm": 10.111647605895996,
      "learning_rate": 0.00013125000000000002,
      "loss": 0.7666,
      "step": 45
    },
    {
      "epoch": 0.39215686274509803,
      "grad_norm": 11.63117504119873,
      "learning_rate": 0.0001234375,
      "loss": 0.6542,
      "step": 50
    },
    {
      "epoch": 0.43137254901960786,
      "grad_norm": 8.213428497314453,
      "learning_rate": 0.000115625,
      "loss": 0.5614,
      "step": 55
    },
    {
      "epoch": 0.47058823529411764,
      "grad_norm": 7.769505500793457,
      "learning_rate": 0.00010781250000000001,
      "loss": 0.6109,
      "step": 60
    },
    {
      "epoch": 0.5098039215686274,
      "grad_norm": 8.566787719726562,
      "learning_rate": 0.0001,
      "loss": 0.6365,
      "step": 65
    },
    {
      "epoch": 0.5490196078431373,
      "grad_norm": 11.48520278930664,
      "learning_rate": 9.21875e-05,
      "loss": 0.6551,
      "step": 70
    },
    {
      "epoch": 0.5882352941176471,
      "grad_norm": 8.8996000289917,
      "learning_rate": 8.4375e-05,
      "loss": 0.5576,
      "step": 75
    },
    {
      "epoch": 0.6274509803921569,
      "grad_norm": 7.0039896965026855,
      "learning_rate": 7.65625e-05,
      "loss": 0.5148,
      "step": 80
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 8.207042694091797,
      "learning_rate": 6.875e-05,
      "loss": 0.586,
      "step": 85
    },
    {
      "epoch": 0.7058823529411765,
      "grad_norm": 9.059426307678223,
      "learning_rate": 6.0937500000000004e-05,
      "loss": 0.5019,
      "step": 90
    },
    {
      "epoch": 0.7450980392156863,
      "grad_norm": 7.633181095123291,
      "learning_rate": 5.3125000000000004e-05,
      "loss": 0.4628,
      "step": 95
    },
    {
      "epoch": 0.7843137254901961,
      "grad_norm": 10.623108863830566,
      "learning_rate": 4.5312500000000004e-05,
      "loss": 0.5328,
      "step": 100
    }
  ],
  "logging_steps": 5,
  "max_steps": 128,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.0342495381942272e+16,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}