Llama-3.1-8B-SFT / trainer_state.json
rshwndsz's picture
Upload folder using huggingface_hub
ae4f17e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 6644,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015051173991571343,
"grad_norm": 1.8453121109105117,
"learning_rate": 1.9972907886815173e-05,
"loss": 1.1924,
"mean_token_accuracy": 0.7275555655360222,
"num_tokens": 655360.0,
"step": 10
},
{
"epoch": 0.0030102347983142685,
"grad_norm": 1.4854652689978918,
"learning_rate": 1.9942805538832032e-05,
"loss": 1.093,
"mean_token_accuracy": 0.7368938684463501,
"num_tokens": 1310497.0,
"step": 20
},
{
"epoch": 0.004515352197471403,
"grad_norm": 1.457367925139778,
"learning_rate": 1.9912703190848888e-05,
"loss": 0.9763,
"mean_token_accuracy": 0.757565951347351,
"num_tokens": 1965857.0,
"step": 30
},
{
"epoch": 0.006020469596628537,
"grad_norm": 1.336380809253597,
"learning_rate": 1.9882600842865743e-05,
"loss": 0.9562,
"mean_token_accuracy": 0.7597963467240334,
"num_tokens": 2621217.0,
"step": 40
},
{
"epoch": 0.007525586995785672,
"grad_norm": 1.271042190976678,
"learning_rate": 1.9852498494882602e-05,
"loss": 0.9111,
"mean_token_accuracy": 0.7664208248257637,
"num_tokens": 3275522.0,
"step": 50
},
{
"epoch": 0.009030704394942806,
"grad_norm": 1.601127132462518,
"learning_rate": 1.982239614689946e-05,
"loss": 0.9061,
"mean_token_accuracy": 0.7658738359808922,
"num_tokens": 3930882.0,
"step": 60
},
{
"epoch": 0.01053582179409994,
"grad_norm": 1.2565022237536327,
"learning_rate": 1.9792293798916317e-05,
"loss": 0.9142,
"mean_token_accuracy": 0.7641694605350494,
"num_tokens": 4583016.0,
"step": 70
},
{
"epoch": 0.012040939193257074,
"grad_norm": 1.151400416011175,
"learning_rate": 1.9762191450933176e-05,
"loss": 0.8912,
"mean_token_accuracy": 0.7694499135017395,
"num_tokens": 5238022.0,
"step": 80
},
{
"epoch": 0.013546056592414209,
"grad_norm": 1.2919672366499955,
"learning_rate": 1.973208910295003e-05,
"loss": 0.8648,
"mean_token_accuracy": 0.7729788482189178,
"num_tokens": 5891895.0,
"step": 90
},
{
"epoch": 0.015051173991571343,
"grad_norm": 1.1577649641155117,
"learning_rate": 1.970198675496689e-05,
"loss": 0.8343,
"mean_token_accuracy": 0.7795944586396217,
"num_tokens": 6544042.0,
"step": 100
},
{
"epoch": 0.016556291390728478,
"grad_norm": 1.1650145577708801,
"learning_rate": 1.9671884406983746e-05,
"loss": 0.8326,
"mean_token_accuracy": 0.7799147427082062,
"num_tokens": 7197586.0,
"step": 110
},
{
"epoch": 0.018061408789885613,
"grad_norm": 1.042071712058231,
"learning_rate": 1.9641782059000605e-05,
"loss": 0.8124,
"mean_token_accuracy": 0.783945745229721,
"num_tokens": 7852585.0,
"step": 120
},
{
"epoch": 0.019566526189042744,
"grad_norm": 1.0693315253462947,
"learning_rate": 1.961167971101746e-05,
"loss": 0.8597,
"mean_token_accuracy": 0.7736718013882637,
"num_tokens": 8504289.0,
"step": 130
},
{
"epoch": 0.02107164358819988,
"grad_norm": 1.0566333790719504,
"learning_rate": 1.958157736303432e-05,
"loss": 0.8939,
"mean_token_accuracy": 0.7663755238056182,
"num_tokens": 9159625.0,
"step": 140
},
{
"epoch": 0.022576760987357013,
"grad_norm": 1.1053130861967317,
"learning_rate": 1.9551475015051175e-05,
"loss": 0.8191,
"mean_token_accuracy": 0.7820270523428917,
"num_tokens": 9814985.0,
"step": 150
},
{
"epoch": 0.024081878386514148,
"grad_norm": 1.1367777908835162,
"learning_rate": 1.9521372667068034e-05,
"loss": 0.8075,
"mean_token_accuracy": 0.7839680761098862,
"num_tokens": 10468781.0,
"step": 160
},
{
"epoch": 0.025586995785671283,
"grad_norm": 1.1232727757104992,
"learning_rate": 1.949127031908489e-05,
"loss": 0.8044,
"mean_token_accuracy": 0.7850238159298897,
"num_tokens": 11124141.0,
"step": 170
},
{
"epoch": 0.027092113184828417,
"grad_norm": 1.0465176815873063,
"learning_rate": 1.9461167971101745e-05,
"loss": 0.8152,
"mean_token_accuracy": 0.783750994503498,
"num_tokens": 11776430.0,
"step": 180
},
{
"epoch": 0.028597230583985552,
"grad_norm": 1.2945946368996537,
"learning_rate": 1.9431065623118607e-05,
"loss": 0.8485,
"mean_token_accuracy": 0.7770387619733811,
"num_tokens": 12427616.0,
"step": 190
},
{
"epoch": 0.030102347983142687,
"grad_norm": 0.9842224206674687,
"learning_rate": 1.9400963275135463e-05,
"loss": 0.8264,
"mean_token_accuracy": 0.779655097424984,
"num_tokens": 13079739.0,
"step": 200
},
{
"epoch": 0.03160746538229982,
"grad_norm": 1.0666414615153936,
"learning_rate": 1.9370860927152318e-05,
"loss": 0.8162,
"mean_token_accuracy": 0.782223354279995,
"num_tokens": 13734917.0,
"step": 210
},
{
"epoch": 0.033112582781456956,
"grad_norm": 0.9945764345827092,
"learning_rate": 1.9340758579169177e-05,
"loss": 0.8704,
"mean_token_accuracy": 0.7713055685162544,
"num_tokens": 14390277.0,
"step": 220
},
{
"epoch": 0.03461770018061409,
"grad_norm": 1.1032502230662455,
"learning_rate": 1.9310656231186033e-05,
"loss": 0.8162,
"mean_token_accuracy": 0.7820397764444351,
"num_tokens": 15044520.0,
"step": 230
},
{
"epoch": 0.036122817579771226,
"grad_norm": 0.9162140424517378,
"learning_rate": 1.9280553883202892e-05,
"loss": 0.7813,
"mean_token_accuracy": 0.7926082789897919,
"num_tokens": 15698080.0,
"step": 240
},
{
"epoch": 0.03762793497892836,
"grad_norm": 0.9769358855408936,
"learning_rate": 1.925045153521975e-05,
"loss": 0.8643,
"mean_token_accuracy": 0.7731036424636841,
"num_tokens": 16349750.0,
"step": 250
},
{
"epoch": 0.03913305237808549,
"grad_norm": 1.0281959693887788,
"learning_rate": 1.9220349187236606e-05,
"loss": 0.807,
"mean_token_accuracy": 0.785991695523262,
"num_tokens": 17005110.0,
"step": 260
},
{
"epoch": 0.040638169777242626,
"grad_norm": 1.1457279024679698,
"learning_rate": 1.9190246839253465e-05,
"loss": 0.8576,
"mean_token_accuracy": 0.7716442331671715,
"num_tokens": 17658497.0,
"step": 270
},
{
"epoch": 0.04214328717639976,
"grad_norm": 0.9884584269395141,
"learning_rate": 1.916014449127032e-05,
"loss": 0.853,
"mean_token_accuracy": 0.7742137908935547,
"num_tokens": 18312553.0,
"step": 280
},
{
"epoch": 0.043648404575556896,
"grad_norm": 1.0510970671462876,
"learning_rate": 1.9130042143287176e-05,
"loss": 0.8107,
"mean_token_accuracy": 0.7836480632424354,
"num_tokens": 18967136.0,
"step": 290
},
{
"epoch": 0.04515352197471403,
"grad_norm": 0.9766990463983991,
"learning_rate": 1.9099939795304035e-05,
"loss": 0.7804,
"mean_token_accuracy": 0.7905746832489967,
"num_tokens": 19620255.0,
"step": 300
},
{
"epoch": 0.046658639373871165,
"grad_norm": 0.9709386691440339,
"learning_rate": 1.9069837447320894e-05,
"loss": 0.8228,
"mean_token_accuracy": 0.7801810503005981,
"num_tokens": 20274486.0,
"step": 310
},
{
"epoch": 0.048163756773028296,
"grad_norm": 0.9797976606519577,
"learning_rate": 1.903973509933775e-05,
"loss": 0.7985,
"mean_token_accuracy": 0.7887556836009025,
"num_tokens": 20926124.0,
"step": 320
},
{
"epoch": 0.04966887417218543,
"grad_norm": 0.8691866193641657,
"learning_rate": 1.900963275135461e-05,
"loss": 0.7733,
"mean_token_accuracy": 0.7907425567507744,
"num_tokens": 21581484.0,
"step": 330
},
{
"epoch": 0.051173991571342566,
"grad_norm": 0.8353763531765348,
"learning_rate": 1.8979530403371464e-05,
"loss": 0.8049,
"mean_token_accuracy": 0.7866038784384728,
"num_tokens": 22236844.0,
"step": 340
},
{
"epoch": 0.0526791089704997,
"grad_norm": 1.047787346678463,
"learning_rate": 1.894942805538832e-05,
"loss": 0.801,
"mean_token_accuracy": 0.7864298403263092,
"num_tokens": 22892204.0,
"step": 350
},
{
"epoch": 0.054184226369656835,
"grad_norm": 0.9624940693468708,
"learning_rate": 1.891932570740518e-05,
"loss": 0.7777,
"mean_token_accuracy": 0.7911544889211655,
"num_tokens": 23546108.0,
"step": 360
},
{
"epoch": 0.055689343768813966,
"grad_norm": 0.9304563256474666,
"learning_rate": 1.8889223359422038e-05,
"loss": 0.7793,
"mean_token_accuracy": 0.7903302207589149,
"num_tokens": 24198050.0,
"step": 370
},
{
"epoch": 0.057194461167971104,
"grad_norm": 0.9586403555276852,
"learning_rate": 1.8859121011438893e-05,
"loss": 0.8184,
"mean_token_accuracy": 0.7825526088476181,
"num_tokens": 24850653.0,
"step": 380
},
{
"epoch": 0.058699578567128236,
"grad_norm": 0.9862346398087346,
"learning_rate": 1.8829018663455752e-05,
"loss": 0.8419,
"mean_token_accuracy": 0.7795390293002129,
"num_tokens": 25502964.0,
"step": 390
},
{
"epoch": 0.060204695966285374,
"grad_norm": 0.9988458761226531,
"learning_rate": 1.8798916315472608e-05,
"loss": 0.8113,
"mean_token_accuracy": 0.7838620856404305,
"num_tokens": 26157878.0,
"step": 400
},
{
"epoch": 0.061709813365442505,
"grad_norm": 0.9487172389927636,
"learning_rate": 1.8768813967489467e-05,
"loss": 0.7814,
"mean_token_accuracy": 0.789660955965519,
"num_tokens": 26811933.0,
"step": 410
},
{
"epoch": 0.06321493076459964,
"grad_norm": 0.9851037653184612,
"learning_rate": 1.8738711619506322e-05,
"loss": 0.8332,
"mean_token_accuracy": 0.7792101621627807,
"num_tokens": 27465720.0,
"step": 420
},
{
"epoch": 0.06472004816375677,
"grad_norm": 1.0042540429584381,
"learning_rate": 1.8708609271523178e-05,
"loss": 0.7903,
"mean_token_accuracy": 0.7871855169534683,
"num_tokens": 28121080.0,
"step": 430
},
{
"epoch": 0.06622516556291391,
"grad_norm": 0.9927572758373544,
"learning_rate": 1.867850692354004e-05,
"loss": 0.7727,
"mean_token_accuracy": 0.7941063031554222,
"num_tokens": 28774438.0,
"step": 440
},
{
"epoch": 0.06773028296207104,
"grad_norm": 0.8834285403649499,
"learning_rate": 1.8648404575556896e-05,
"loss": 0.8085,
"mean_token_accuracy": 0.7830948889255523,
"num_tokens": 29428023.0,
"step": 450
},
{
"epoch": 0.06923540036122817,
"grad_norm": 0.9113434494758214,
"learning_rate": 1.861830222757375e-05,
"loss": 0.7767,
"mean_token_accuracy": 0.7919638514518738,
"num_tokens": 30083383.0,
"step": 460
},
{
"epoch": 0.07074051776038531,
"grad_norm": 0.9341036882318858,
"learning_rate": 1.858819987959061e-05,
"loss": 0.7916,
"mean_token_accuracy": 0.7892049625515938,
"num_tokens": 30737589.0,
"step": 470
},
{
"epoch": 0.07224563515954245,
"grad_norm": 1.4816757132686853,
"learning_rate": 1.8558097531607466e-05,
"loss": 0.8253,
"mean_token_accuracy": 0.7808192431926727,
"num_tokens": 31391356.0,
"step": 480
},
{
"epoch": 0.07375075255869958,
"grad_norm": 0.9551440074457215,
"learning_rate": 1.8527995183624325e-05,
"loss": 0.7825,
"mean_token_accuracy": 0.7899382427334786,
"num_tokens": 32045281.0,
"step": 490
},
{
"epoch": 0.07525586995785671,
"grad_norm": 0.957693500309248,
"learning_rate": 1.8497892835641184e-05,
"loss": 0.8357,
"mean_token_accuracy": 0.7793286591768265,
"num_tokens": 32699618.0,
"step": 500
},
{
"epoch": 0.07676098735701385,
"grad_norm": 1.0341152031457617,
"learning_rate": 1.846779048765804e-05,
"loss": 0.7896,
"mean_token_accuracy": 0.7893010467290879,
"num_tokens": 33351567.0,
"step": 510
},
{
"epoch": 0.07826610475617098,
"grad_norm": 0.9549231501402184,
"learning_rate": 1.8437688139674898e-05,
"loss": 0.8599,
"mean_token_accuracy": 0.7733965054154396,
"num_tokens": 34003033.0,
"step": 520
},
{
"epoch": 0.07977122215532811,
"grad_norm": 0.9923234542168496,
"learning_rate": 1.8407585791691754e-05,
"loss": 0.8205,
"mean_token_accuracy": 0.7820205718278885,
"num_tokens": 34653481.0,
"step": 530
},
{
"epoch": 0.08127633955448525,
"grad_norm": 0.9524724869535042,
"learning_rate": 1.837748344370861e-05,
"loss": 0.8063,
"mean_token_accuracy": 0.7859901934862137,
"num_tokens": 35306197.0,
"step": 540
},
{
"epoch": 0.08278145695364239,
"grad_norm": 0.9976717179204758,
"learning_rate": 1.8347381095725468e-05,
"loss": 0.8214,
"mean_token_accuracy": 0.7831726789474487,
"num_tokens": 35957015.0,
"step": 550
},
{
"epoch": 0.08428657435279951,
"grad_norm": 0.9085860126433437,
"learning_rate": 1.8317278747742327e-05,
"loss": 0.7929,
"mean_token_accuracy": 0.7867233589291572,
"num_tokens": 36611115.0,
"step": 560
},
{
"epoch": 0.08579169175195665,
"grad_norm": 0.9266872864888698,
"learning_rate": 1.8287176399759183e-05,
"loss": 0.7896,
"mean_token_accuracy": 0.7881813287734986,
"num_tokens": 37266182.0,
"step": 570
},
{
"epoch": 0.08729680915111379,
"grad_norm": 0.8724843087705262,
"learning_rate": 1.825707405177604e-05,
"loss": 0.7713,
"mean_token_accuracy": 0.7928503587841987,
"num_tokens": 37920028.0,
"step": 580
},
{
"epoch": 0.08880192655027092,
"grad_norm": 0.9053691052273664,
"learning_rate": 1.8226971703792897e-05,
"loss": 0.7898,
"mean_token_accuracy": 0.789909017086029,
"num_tokens": 38575388.0,
"step": 590
},
{
"epoch": 0.09030704394942805,
"grad_norm": 0.8479670947647756,
"learning_rate": 1.8196869355809753e-05,
"loss": 0.7627,
"mean_token_accuracy": 0.7941003635525703,
"num_tokens": 39228780.0,
"step": 600
},
{
"epoch": 0.09181216134858519,
"grad_norm": 0.8878872189126786,
"learning_rate": 1.816676700782661e-05,
"loss": 0.7863,
"mean_token_accuracy": 0.7889234691858291,
"num_tokens": 39882466.0,
"step": 610
},
{
"epoch": 0.09331727874774233,
"grad_norm": 0.9378936695301893,
"learning_rate": 1.813666465984347e-05,
"loss": 0.8074,
"mean_token_accuracy": 0.7841841742396355,
"num_tokens": 40537826.0,
"step": 620
},
{
"epoch": 0.09482239614689945,
"grad_norm": 0.8704173755978687,
"learning_rate": 1.8106562311860326e-05,
"loss": 0.8057,
"mean_token_accuracy": 0.7853535667061806,
"num_tokens": 41193186.0,
"step": 630
},
{
"epoch": 0.09632751354605659,
"grad_norm": 0.980576606661472,
"learning_rate": 1.8076459963877185e-05,
"loss": 0.8312,
"mean_token_accuracy": 0.781511053442955,
"num_tokens": 41848546.0,
"step": 640
},
{
"epoch": 0.09783263094521373,
"grad_norm": 1.054602428103673,
"learning_rate": 1.804635761589404e-05,
"loss": 0.802,
"mean_token_accuracy": 0.7860093146562577,
"num_tokens": 42503565.0,
"step": 650
},
{
"epoch": 0.09933774834437085,
"grad_norm": 0.9031118355106363,
"learning_rate": 1.80162552679109e-05,
"loss": 0.771,
"mean_token_accuracy": 0.7914280131459236,
"num_tokens": 43158925.0,
"step": 660
},
{
"epoch": 0.10084286574352799,
"grad_norm": 0.8409564496447067,
"learning_rate": 1.7986152919927755e-05,
"loss": 0.8108,
"mean_token_accuracy": 0.7854543194174767,
"num_tokens": 43814285.0,
"step": 670
},
{
"epoch": 0.10234798314268513,
"grad_norm": 0.8977180215627701,
"learning_rate": 1.795605057194461e-05,
"loss": 0.7866,
"mean_token_accuracy": 0.790862138569355,
"num_tokens": 44468193.0,
"step": 680
},
{
"epoch": 0.10385310054184227,
"grad_norm": 0.9113767976777215,
"learning_rate": 1.7925948223961473e-05,
"loss": 0.7674,
"mean_token_accuracy": 0.7942569464445114,
"num_tokens": 45119814.0,
"step": 690
},
{
"epoch": 0.1053582179409994,
"grad_norm": 0.9283532440136132,
"learning_rate": 1.789584587597833e-05,
"loss": 0.788,
"mean_token_accuracy": 0.7885141059756279,
"num_tokens": 45773988.0,
"step": 700
},
{
"epoch": 0.10686333534015653,
"grad_norm": 0.8961049192838797,
"learning_rate": 1.7865743527995184e-05,
"loss": 0.7746,
"mean_token_accuracy": 0.7916218876838684,
"num_tokens": 46429348.0,
"step": 710
},
{
"epoch": 0.10836845273931367,
"grad_norm": 0.8940233613616361,
"learning_rate": 1.7835641180012043e-05,
"loss": 0.7571,
"mean_token_accuracy": 0.7955988764762878,
"num_tokens": 47082837.0,
"step": 720
},
{
"epoch": 0.1098735701384708,
"grad_norm": 0.9041335332517931,
"learning_rate": 1.78055388320289e-05,
"loss": 0.7784,
"mean_token_accuracy": 0.7905792102217675,
"num_tokens": 47738197.0,
"step": 730
},
{
"epoch": 0.11137868753762793,
"grad_norm": 0.8763038167699705,
"learning_rate": 1.7775436484045757e-05,
"loss": 0.8217,
"mean_token_accuracy": 0.7831670209765434,
"num_tokens": 48391556.0,
"step": 740
},
{
"epoch": 0.11288380493678507,
"grad_norm": 0.9585333018523788,
"learning_rate": 1.7745334136062616e-05,
"loss": 0.8045,
"mean_token_accuracy": 0.7855499908328056,
"num_tokens": 49046501.0,
"step": 750
},
{
"epoch": 0.11438892233594221,
"grad_norm": 0.9581547193027541,
"learning_rate": 1.7715231788079472e-05,
"loss": 0.7987,
"mean_token_accuracy": 0.7847492977976799,
"num_tokens": 49700398.0,
"step": 760
},
{
"epoch": 0.11589403973509933,
"grad_norm": 0.876463264934873,
"learning_rate": 1.7685129440096327e-05,
"loss": 0.7759,
"mean_token_accuracy": 0.7922845646739006,
"num_tokens": 50353303.0,
"step": 770
},
{
"epoch": 0.11739915713425647,
"grad_norm": 0.7912414198907308,
"learning_rate": 1.7655027092113186e-05,
"loss": 0.7635,
"mean_token_accuracy": 0.7954950258135796,
"num_tokens": 51004008.0,
"step": 780
},
{
"epoch": 0.11890427453341361,
"grad_norm": 0.8567434891484385,
"learning_rate": 1.7624924744130042e-05,
"loss": 0.786,
"mean_token_accuracy": 0.7885375574231148,
"num_tokens": 51657183.0,
"step": 790
},
{
"epoch": 0.12040939193257075,
"grad_norm": 0.8674077034489452,
"learning_rate": 1.75948223961469e-05,
"loss": 0.7491,
"mean_token_accuracy": 0.7960452109575271,
"num_tokens": 52306165.0,
"step": 800
},
{
"epoch": 0.12191450933172787,
"grad_norm": 0.9532040218592741,
"learning_rate": 1.756472004816376e-05,
"loss": 0.8002,
"mean_token_accuracy": 0.7860918015241622,
"num_tokens": 52961289.0,
"step": 810
},
{
"epoch": 0.12341962673088501,
"grad_norm": 0.9311440870377063,
"learning_rate": 1.7534617700180615e-05,
"loss": 0.7658,
"mean_token_accuracy": 0.7935622245073318,
"num_tokens": 53616649.0,
"step": 820
},
{
"epoch": 0.12492474413004215,
"grad_norm": 0.8105073832679359,
"learning_rate": 1.7504515352197474e-05,
"loss": 0.7367,
"mean_token_accuracy": 0.7998397067189217,
"num_tokens": 54272009.0,
"step": 830
},
{
"epoch": 0.12642986152919927,
"grad_norm": 0.8683619505144872,
"learning_rate": 1.747441300421433e-05,
"loss": 0.7428,
"mean_token_accuracy": 0.8006275922060013,
"num_tokens": 54925539.0,
"step": 840
},
{
"epoch": 0.12793497892835642,
"grad_norm": 0.8155490993503403,
"learning_rate": 1.7444310656231185e-05,
"loss": 0.7924,
"mean_token_accuracy": 0.7886917501688003,
"num_tokens": 55579974.0,
"step": 850
},
{
"epoch": 0.12944009632751355,
"grad_norm": 0.8994872501062333,
"learning_rate": 1.7414208308248044e-05,
"loss": 0.7884,
"mean_token_accuracy": 0.7879066273570061,
"num_tokens": 56234346.0,
"step": 860
},
{
"epoch": 0.13094521372667067,
"grad_norm": 0.944052673697117,
"learning_rate": 1.7384105960264903e-05,
"loss": 0.7644,
"mean_token_accuracy": 0.7921950727701187,
"num_tokens": 56888265.0,
"step": 870
},
{
"epoch": 0.13245033112582782,
"grad_norm": 0.8348805534260644,
"learning_rate": 1.735400361228176e-05,
"loss": 0.7852,
"mean_token_accuracy": 0.7882557049393654,
"num_tokens": 57542064.0,
"step": 880
},
{
"epoch": 0.13395544852498495,
"grad_norm": 0.8685556781874473,
"learning_rate": 1.7323901264298618e-05,
"loss": 0.7643,
"mean_token_accuracy": 0.7916644081473351,
"num_tokens": 58191468.0,
"step": 890
},
{
"epoch": 0.13546056592414207,
"grad_norm": 0.8819964488751374,
"learning_rate": 1.7293798916315473e-05,
"loss": 0.7804,
"mean_token_accuracy": 0.7900642126798629,
"num_tokens": 58845375.0,
"step": 900
},
{
"epoch": 0.13696568332329923,
"grad_norm": 0.7706719597380793,
"learning_rate": 1.7263696568332332e-05,
"loss": 0.7468,
"mean_token_accuracy": 0.7974156990647316,
"num_tokens": 59499003.0,
"step": 910
},
{
"epoch": 0.13847080072245635,
"grad_norm": 0.8949708540928332,
"learning_rate": 1.7233594220349188e-05,
"loss": 0.7697,
"mean_token_accuracy": 0.7942784354090691,
"num_tokens": 60153143.0,
"step": 920
},
{
"epoch": 0.13997591812161347,
"grad_norm": 0.8938471826741746,
"learning_rate": 1.7203491872366043e-05,
"loss": 0.7565,
"mean_token_accuracy": 0.7957235768437385,
"num_tokens": 60808223.0,
"step": 930
},
{
"epoch": 0.14148103552077063,
"grad_norm": 0.8358286535449414,
"learning_rate": 1.7173389524382902e-05,
"loss": 0.767,
"mean_token_accuracy": 0.7933147415518761,
"num_tokens": 61461734.0,
"step": 940
},
{
"epoch": 0.14298615291992775,
"grad_norm": 0.8712281153910671,
"learning_rate": 1.714328717639976e-05,
"loss": 0.7638,
"mean_token_accuracy": 0.7921429499983788,
"num_tokens": 62116634.0,
"step": 950
},
{
"epoch": 0.1444912703190849,
"grad_norm": 0.8684270174776013,
"learning_rate": 1.7113184828416617e-05,
"loss": 0.7897,
"mean_token_accuracy": 0.7875265553593636,
"num_tokens": 62771245.0,
"step": 960
},
{
"epoch": 0.14599638771824203,
"grad_norm": 0.7955045486378506,
"learning_rate": 1.7083082480433476e-05,
"loss": 0.7261,
"mean_token_accuracy": 0.8014863416552543,
"num_tokens": 63424706.0,
"step": 970
},
{
"epoch": 0.14750150511739915,
"grad_norm": 0.8899697106487319,
"learning_rate": 1.705298013245033e-05,
"loss": 0.7632,
"mean_token_accuracy": 0.7941789865493775,
"num_tokens": 64080066.0,
"step": 980
},
{
"epoch": 0.1490066225165563,
"grad_norm": 0.804285362005701,
"learning_rate": 1.702287778446719e-05,
"loss": 0.7726,
"mean_token_accuracy": 0.7943894654512406,
"num_tokens": 64733226.0,
"step": 990
},
{
"epoch": 0.15051173991571343,
"grad_norm": 0.8787668654860644,
"learning_rate": 1.699277543648405e-05,
"loss": 0.7698,
"mean_token_accuracy": 0.7918394789099693,
"num_tokens": 65387161.0,
"step": 1000
},
{
"epoch": 0.15201685731487055,
"grad_norm": 0.8352868459629883,
"learning_rate": 1.6962673088500905e-05,
"loss": 0.7785,
"mean_token_accuracy": 0.79125065356493,
"num_tokens": 66041447.0,
"step": 1010
},
{
"epoch": 0.1535219747140277,
"grad_norm": 0.7673967975443655,
"learning_rate": 1.693257074051776e-05,
"loss": 0.757,
"mean_token_accuracy": 0.7955381035804748,
"num_tokens": 66695643.0,
"step": 1020
},
{
"epoch": 0.15502709211318483,
"grad_norm": 0.9619399874547646,
"learning_rate": 1.690246839253462e-05,
"loss": 0.8063,
"mean_token_accuracy": 0.7852592051029206,
"num_tokens": 67349265.0,
"step": 1030
},
{
"epoch": 0.15653220951234195,
"grad_norm": 0.862700449720151,
"learning_rate": 1.6872366044551475e-05,
"loss": 0.7515,
"mean_token_accuracy": 0.7977708280086517,
"num_tokens": 68004324.0,
"step": 1040
},
{
"epoch": 0.1580373269114991,
"grad_norm": 0.8137469077952871,
"learning_rate": 1.6842263696568334e-05,
"loss": 0.7265,
"mean_token_accuracy": 0.8019616097211838,
"num_tokens": 68657707.0,
"step": 1050
},
{
"epoch": 0.15954244431065623,
"grad_norm": 0.8704908427956669,
"learning_rate": 1.6812161348585193e-05,
"loss": 0.7652,
"mean_token_accuracy": 0.7943032309412956,
"num_tokens": 69309982.0,
"step": 1060
},
{
"epoch": 0.16104756170981335,
"grad_norm": 0.8721161816172613,
"learning_rate": 1.6782059000602048e-05,
"loss": 0.7702,
"mean_token_accuracy": 0.7941267043352127,
"num_tokens": 69964695.0,
"step": 1070
},
{
"epoch": 0.1625526791089705,
"grad_norm": 0.8189061107425057,
"learning_rate": 1.6751956652618907e-05,
"loss": 0.7685,
"mean_token_accuracy": 0.7943713366985321,
"num_tokens": 70620055.0,
"step": 1080
},
{
"epoch": 0.16405779650812763,
"grad_norm": 0.8436846845627259,
"learning_rate": 1.6721854304635763e-05,
"loss": 0.7319,
"mean_token_accuracy": 0.8026227414608001,
"num_tokens": 71275415.0,
"step": 1090
},
{
"epoch": 0.16556291390728478,
"grad_norm": 0.8274262661586408,
"learning_rate": 1.669175195665262e-05,
"loss": 0.7545,
"mean_token_accuracy": 0.7971039965748787,
"num_tokens": 71930775.0,
"step": 1100
},
{
"epoch": 0.1670680313064419,
"grad_norm": 0.8750608846258383,
"learning_rate": 1.6661649608669477e-05,
"loss": 0.7463,
"mean_token_accuracy": 0.7971311554312706,
"num_tokens": 72584266.0,
"step": 1110
},
{
"epoch": 0.16857314870559903,
"grad_norm": 0.829213357340937,
"learning_rate": 1.6631547260686336e-05,
"loss": 0.7532,
"mean_token_accuracy": 0.7960872635245323,
"num_tokens": 73239626.0,
"step": 1120
},
{
"epoch": 0.17007826610475618,
"grad_norm": 0.8094988426657819,
"learning_rate": 1.6601444912703192e-05,
"loss": 0.7601,
"mean_token_accuracy": 0.7956007704138756,
"num_tokens": 73892202.0,
"step": 1130
},
{
"epoch": 0.1715833835039133,
"grad_norm": 0.8040984765694252,
"learning_rate": 1.657134256472005e-05,
"loss": 0.7277,
"mean_token_accuracy": 0.8024196982383728,
"num_tokens": 74547562.0,
"step": 1140
},
{
"epoch": 0.17308850090307043,
"grad_norm": 0.9302568035038656,
"learning_rate": 1.6541240216736906e-05,
"loss": 0.7692,
"mean_token_accuracy": 0.7915791377425194,
"num_tokens": 75202922.0,
"step": 1150
},
{
"epoch": 0.17459361830222758,
"grad_norm": 0.8529288739205021,
"learning_rate": 1.6511137868753765e-05,
"loss": 0.7633,
"mean_token_accuracy": 0.794321759045124,
"num_tokens": 75855977.0,
"step": 1160
},
{
"epoch": 0.1760987357013847,
"grad_norm": 0.9492954962518395,
"learning_rate": 1.648103552077062e-05,
"loss": 0.7658,
"mean_token_accuracy": 0.7943087443709373,
"num_tokens": 76511337.0,
"step": 1170
},
{
"epoch": 0.17760385310054183,
"grad_norm": 0.8701282778319047,
"learning_rate": 1.6450933172787476e-05,
"loss": 0.8226,
"mean_token_accuracy": 0.780410946905613,
"num_tokens": 77166563.0,
"step": 1180
},
{
"epoch": 0.17910897049969898,
"grad_norm": 0.8747754872764684,
"learning_rate": 1.6420830824804335e-05,
"loss": 0.7607,
"mean_token_accuracy": 0.7948632016777992,
"num_tokens": 77821721.0,
"step": 1190
},
{
"epoch": 0.1806140878988561,
"grad_norm": 0.7831429169639552,
"learning_rate": 1.6390728476821194e-05,
"loss": 0.7541,
"mean_token_accuracy": 0.7957193419337273,
"num_tokens": 78477081.0,
"step": 1200
},
{
"epoch": 0.18211920529801323,
"grad_norm": 0.8432752703822518,
"learning_rate": 1.636062612883805e-05,
"loss": 0.7925,
"mean_token_accuracy": 0.7887899950146675,
"num_tokens": 79132441.0,
"step": 1210
},
{
"epoch": 0.18362432269717038,
"grad_norm": 0.8504998453679667,
"learning_rate": 1.633052378085491e-05,
"loss": 0.7648,
"mean_token_accuracy": 0.7930545896291733,
"num_tokens": 79787010.0,
"step": 1220
},
{
"epoch": 0.1851294400963275,
"grad_norm": 0.9010706298818607,
"learning_rate": 1.6300421432871764e-05,
"loss": 0.7745,
"mean_token_accuracy": 0.7909213706851006,
"num_tokens": 80440325.0,
"step": 1230
},
{
"epoch": 0.18663455749548466,
"grad_norm": 0.8884532116760039,
"learning_rate": 1.6270319084888623e-05,
"loss": 0.7582,
"mean_token_accuracy": 0.7969711780548095,
"num_tokens": 81095685.0,
"step": 1240
},
{
"epoch": 0.18813967489464178,
"grad_norm": 0.8037962066802894,
"learning_rate": 1.6240216736905482e-05,
"loss": 0.7608,
"mean_token_accuracy": 0.7947753235697746,
"num_tokens": 81750400.0,
"step": 1250
},
{
"epoch": 0.1896447922937989,
"grad_norm": 0.8812056476404384,
"learning_rate": 1.6210114388922338e-05,
"loss": 0.7723,
"mean_token_accuracy": 0.794083659350872,
"num_tokens": 82405478.0,
"step": 1260
},
{
"epoch": 0.19114990969295606,
"grad_norm": 0.9960349572295231,
"learning_rate": 1.6180012040939193e-05,
"loss": 0.7704,
"mean_token_accuracy": 0.7944280609488488,
"num_tokens": 83058284.0,
"step": 1270
},
{
"epoch": 0.19265502709211318,
"grad_norm": 0.8677664262970063,
"learning_rate": 1.6149909692956052e-05,
"loss": 0.7835,
"mean_token_accuracy": 0.7911761164665222,
"num_tokens": 83713644.0,
"step": 1280
},
{
"epoch": 0.1941601444912703,
"grad_norm": 0.7916221716489966,
"learning_rate": 1.6119807344972908e-05,
"loss": 0.7442,
"mean_token_accuracy": 0.7980069324374199,
"num_tokens": 84366958.0,
"step": 1290
},
{
"epoch": 0.19566526189042746,
"grad_norm": 0.8445952572495113,
"learning_rate": 1.6089704996989767e-05,
"loss": 0.7765,
"mean_token_accuracy": 0.7921729102730751,
"num_tokens": 85021614.0,
"step": 1300
},
{
"epoch": 0.19717037928958459,
"grad_norm": 0.8925383428599953,
"learning_rate": 1.6059602649006626e-05,
"loss": 0.761,
"mean_token_accuracy": 0.7955117270350456,
"num_tokens": 85676974.0,
"step": 1310
},
{
"epoch": 0.1986754966887417,
"grad_norm": 0.8841497219076043,
"learning_rate": 1.602950030102348e-05,
"loss": 0.7856,
"mean_token_accuracy": 0.7890258222818375,
"num_tokens": 86329344.0,
"step": 1320
},
{
"epoch": 0.20018061408789886,
"grad_norm": 0.8277609911959379,
"learning_rate": 1.599939795304034e-05,
"loss": 0.7176,
"mean_token_accuracy": 0.8031543210148812,
"num_tokens": 86983588.0,
"step": 1330
},
{
"epoch": 0.20168573148705599,
"grad_norm": 0.7762617521752686,
"learning_rate": 1.5969295605057196e-05,
"loss": 0.7262,
"mean_token_accuracy": 0.8033408597111702,
"num_tokens": 87638093.0,
"step": 1340
},
{
"epoch": 0.20319084888621314,
"grad_norm": 0.8574797249960955,
"learning_rate": 1.593919325707405e-05,
"loss": 0.7721,
"mean_token_accuracy": 0.7914051085710525,
"num_tokens": 88293453.0,
"step": 1350
},
{
"epoch": 0.20469596628537026,
"grad_norm": 0.8525932598240934,
"learning_rate": 1.590909090909091e-05,
"loss": 0.7692,
"mean_token_accuracy": 0.7919760629534721,
"num_tokens": 88947192.0,
"step": 1360
},
{
"epoch": 0.2062010836845274,
"grad_norm": 0.8470054492079515,
"learning_rate": 1.587898856110777e-05,
"loss": 0.7487,
"mean_token_accuracy": 0.7974001586437225,
"num_tokens": 89602552.0,
"step": 1370
},
{
"epoch": 0.20770620108368454,
"grad_norm": 0.7728762356874295,
"learning_rate": 1.5848886213124625e-05,
"loss": 0.7861,
"mean_token_accuracy": 0.7887239217758178,
"num_tokens": 90256955.0,
"step": 1380
},
{
"epoch": 0.20921131848284166,
"grad_norm": 0.8411401203292108,
"learning_rate": 1.5818783865141484e-05,
"loss": 0.7432,
"mean_token_accuracy": 0.800141978263855,
"num_tokens": 90912315.0,
"step": 1390
},
{
"epoch": 0.2107164358819988,
"grad_norm": 0.8028334181793002,
"learning_rate": 1.578868151715834e-05,
"loss": 0.727,
"mean_token_accuracy": 0.8017411068081856,
"num_tokens": 91566721.0,
"step": 1400
},
{
"epoch": 0.21222155328115594,
"grad_norm": 0.9408129549604158,
"learning_rate": 1.5758579169175198e-05,
"loss": 0.7753,
"mean_token_accuracy": 0.79281265437603,
"num_tokens": 92222081.0,
"step": 1410
},
{
"epoch": 0.21372667068031306,
"grad_norm": 0.870622637546394,
"learning_rate": 1.5728476821192054e-05,
"loss": 0.7391,
"mean_token_accuracy": 0.7998394921422005,
"num_tokens": 92873611.0,
"step": 1420
},
{
"epoch": 0.2152317880794702,
"grad_norm": 0.9912659604734557,
"learning_rate": 1.569837447320891e-05,
"loss": 0.7608,
"mean_token_accuracy": 0.7938985392451287,
"num_tokens": 93526682.0,
"step": 1430
},
{
"epoch": 0.21673690547862734,
"grad_norm": 0.8130531325261461,
"learning_rate": 1.5668272125225768e-05,
"loss": 0.7768,
"mean_token_accuracy": 0.7910584717988968,
"num_tokens": 94180965.0,
"step": 1440
},
{
"epoch": 0.21824202287778446,
"grad_norm": 0.9191216718462245,
"learning_rate": 1.5638169777242627e-05,
"loss": 0.7246,
"mean_token_accuracy": 0.8026754096150398,
"num_tokens": 94835088.0,
"step": 1450
},
{
"epoch": 0.2197471402769416,
"grad_norm": 0.8300249028967778,
"learning_rate": 1.5608067429259483e-05,
"loss": 0.7511,
"mean_token_accuracy": 0.7983420848846435,
"num_tokens": 95490448.0,
"step": 1460
},
{
"epoch": 0.22125225767609874,
"grad_norm": 0.7951346672855284,
"learning_rate": 1.557796508127634e-05,
"loss": 0.7579,
"mean_token_accuracy": 0.7955086678266525,
"num_tokens": 96145808.0,
"step": 1470
},
{
"epoch": 0.22275737507525586,
"grad_norm": 0.88803863878293,
"learning_rate": 1.5547862733293197e-05,
"loss": 0.7456,
"mean_token_accuracy": 0.7965317487716674,
"num_tokens": 96799113.0,
"step": 1480
},
{
"epoch": 0.22426249247441302,
"grad_norm": 0.8643113106520561,
"learning_rate": 1.5517760385310056e-05,
"loss": 0.7592,
"mean_token_accuracy": 0.794936190545559,
"num_tokens": 97454473.0,
"step": 1490
},
{
"epoch": 0.22576760987357014,
"grad_norm": 0.7656311573450891,
"learning_rate": 1.5487658037326915e-05,
"loss": 0.744,
"mean_token_accuracy": 0.798383304476738,
"num_tokens": 98109833.0,
"step": 1500
},
{
"epoch": 0.22727272727272727,
"grad_norm": 0.7807264440245045,
"learning_rate": 1.545755568934377e-05,
"loss": 0.7632,
"mean_token_accuracy": 0.7929225742816925,
"num_tokens": 98765193.0,
"step": 1510
},
{
"epoch": 0.22877784467188442,
"grad_norm": 0.8577954072341681,
"learning_rate": 1.5427453341360626e-05,
"loss": 0.7809,
"mean_token_accuracy": 0.7900413990020752,
"num_tokens": 99417533.0,
"step": 1520
},
{
"epoch": 0.23028296207104154,
"grad_norm": 0.7745693995209237,
"learning_rate": 1.5397350993377485e-05,
"loss": 0.7324,
"mean_token_accuracy": 0.8006270915269852,
"num_tokens": 100071717.0,
"step": 1530
},
{
"epoch": 0.23178807947019867,
"grad_norm": 0.8269975042567316,
"learning_rate": 1.536724864539434e-05,
"loss": 0.7562,
"mean_token_accuracy": 0.7966322675347328,
"num_tokens": 100727077.0,
"step": 1540
},
{
"epoch": 0.23329319686935582,
"grad_norm": 0.8059920867147564,
"learning_rate": 1.53371462974112e-05,
"loss": 0.7915,
"mean_token_accuracy": 0.7891182228922844,
"num_tokens": 101382437.0,
"step": 1550
},
{
"epoch": 0.23479831426851294,
"grad_norm": 0.8646678410794064,
"learning_rate": 1.530704394942806e-05,
"loss": 0.7897,
"mean_token_accuracy": 0.7905606806278229,
"num_tokens": 102033756.0,
"step": 1560
},
{
"epoch": 0.23630343166767007,
"grad_norm": 0.8547304753227982,
"learning_rate": 1.5276941601444914e-05,
"loss": 0.7752,
"mean_token_accuracy": 0.790402115881443,
"num_tokens": 102689116.0,
"step": 1570
},
{
"epoch": 0.23780854906682722,
"grad_norm": 1.4190184748774233,
"learning_rate": 1.5246839253461771e-05,
"loss": 0.7318,
"mean_token_accuracy": 0.8021197319030762,
"num_tokens": 103342715.0,
"step": 1580
},
{
"epoch": 0.23931366646598434,
"grad_norm": 0.7737584676524992,
"learning_rate": 1.5216736905478629e-05,
"loss": 0.7635,
"mean_token_accuracy": 0.7938489958643913,
"num_tokens": 103997145.0,
"step": 1590
},
{
"epoch": 0.2408187838651415,
"grad_norm": 0.9280215259490882,
"learning_rate": 1.5186634557495486e-05,
"loss": 0.7452,
"mean_token_accuracy": 0.7994018048048019,
"num_tokens": 104649816.0,
"step": 1600
},
{
"epoch": 0.24232390126429862,
"grad_norm": 0.8257952543204938,
"learning_rate": 1.5156532209512343e-05,
"loss": 0.7342,
"mean_token_accuracy": 0.8016293570399284,
"num_tokens": 105302893.0,
"step": 1610
},
{
"epoch": 0.24382901866345574,
"grad_norm": 0.7807229346437049,
"learning_rate": 1.5126429861529202e-05,
"loss": 0.7469,
"mean_token_accuracy": 0.7980134293437005,
"num_tokens": 105956619.0,
"step": 1620
},
{
"epoch": 0.2453341360626129,
"grad_norm": 0.9502338247595454,
"learning_rate": 1.509632751354606e-05,
"loss": 0.722,
"mean_token_accuracy": 0.8015266269445419,
"num_tokens": 106611979.0,
"step": 1630
},
{
"epoch": 0.24683925346177002,
"grad_norm": 0.9054067925446354,
"learning_rate": 1.5066225165562915e-05,
"loss": 0.7834,
"mean_token_accuracy": 0.7897364005446434,
"num_tokens": 107266458.0,
"step": 1640
},
{
"epoch": 0.24834437086092714,
"grad_norm": 0.8096932012825973,
"learning_rate": 1.5036122817579772e-05,
"loss": 0.7141,
"mean_token_accuracy": 0.8049699932336807,
"num_tokens": 107919891.0,
"step": 1650
},
{
"epoch": 0.2498494882600843,
"grad_norm": 0.8111660189851574,
"learning_rate": 1.500602046959663e-05,
"loss": 0.7389,
"mean_token_accuracy": 0.7994946867227555,
"num_tokens": 108575251.0,
"step": 1660
},
{
"epoch": 0.25135460565924145,
"grad_norm": 0.872485698341458,
"learning_rate": 1.4975918121613487e-05,
"loss": 0.7638,
"mean_token_accuracy": 0.793247839808464,
"num_tokens": 109229397.0,
"step": 1670
},
{
"epoch": 0.25285972305839854,
"grad_norm": 0.7989740529438174,
"learning_rate": 1.4945815773630344e-05,
"loss": 0.7406,
"mean_token_accuracy": 0.8004824161529541,
"num_tokens": 109884757.0,
"step": 1680
},
{
"epoch": 0.2543648404575557,
"grad_norm": 0.9962226601760668,
"learning_rate": 1.4915713425647203e-05,
"loss": 0.7389,
"mean_token_accuracy": 0.8010228395462036,
"num_tokens": 110539326.0,
"step": 1690
},
{
"epoch": 0.25586995785671285,
"grad_norm": 0.8780144542163605,
"learning_rate": 1.488561107766406e-05,
"loss": 0.7597,
"mean_token_accuracy": 0.7951804459095001,
"num_tokens": 111194686.0,
"step": 1700
},
{
"epoch": 0.25737507525586995,
"grad_norm": 0.843043910756463,
"learning_rate": 1.4855508729680917e-05,
"loss": 0.7545,
"mean_token_accuracy": 0.7975330114364624,
"num_tokens": 111849236.0,
"step": 1710
},
{
"epoch": 0.2588801926550271,
"grad_norm": 0.8853179593156032,
"learning_rate": 1.4825406381697773e-05,
"loss": 0.7509,
"mean_token_accuracy": 0.7967573747038841,
"num_tokens": 112502230.0,
"step": 1720
},
{
"epoch": 0.26038531005418425,
"grad_norm": 1.5017248864744905,
"learning_rate": 1.479530403371463e-05,
"loss": 0.7542,
"mean_token_accuracy": 0.7963590011000633,
"num_tokens": 113157590.0,
"step": 1730
},
{
"epoch": 0.26189042745334135,
"grad_norm": 0.7446024351040114,
"learning_rate": 1.4765201685731487e-05,
"loss": 0.712,
"mean_token_accuracy": 0.8053502306342125,
"num_tokens": 113812483.0,
"step": 1740
},
{
"epoch": 0.2633955448524985,
"grad_norm": 0.7781825583653845,
"learning_rate": 1.4735099337748346e-05,
"loss": 0.7794,
"mean_token_accuracy": 0.7910980373620987,
"num_tokens": 114467211.0,
"step": 1750
},
{
"epoch": 0.26490066225165565,
"grad_norm": 0.8906574727322736,
"learning_rate": 1.4704996989765203e-05,
"loss": 0.7804,
"mean_token_accuracy": 0.7923104777932167,
"num_tokens": 115122474.0,
"step": 1760
},
{
"epoch": 0.26640577965081275,
"grad_norm": 0.8521902207969323,
"learning_rate": 1.467489464178206e-05,
"loss": 0.7482,
"mean_token_accuracy": 0.7965755835175514,
"num_tokens": 115775617.0,
"step": 1770
},
{
"epoch": 0.2679108970499699,
"grad_norm": 0.8391702368301299,
"learning_rate": 1.4644792293798918e-05,
"loss": 0.7342,
"mean_token_accuracy": 0.8010420575737953,
"num_tokens": 116427646.0,
"step": 1780
},
{
"epoch": 0.26941601444912705,
"grad_norm": 0.7571804485530916,
"learning_rate": 1.4614689945815773e-05,
"loss": 0.7439,
"mean_token_accuracy": 0.7991481438279152,
"num_tokens": 117083006.0,
"step": 1790
},
{
"epoch": 0.27092113184828415,
"grad_norm": 0.8948756751173403,
"learning_rate": 1.458458759783263e-05,
"loss": 0.7035,
"mean_token_accuracy": 0.8078163161873817,
"num_tokens": 117738366.0,
"step": 1800
},
{
"epoch": 0.2724262492474413,
"grad_norm": 0.8189089996058311,
"learning_rate": 1.455448524984949e-05,
"loss": 0.7333,
"mean_token_accuracy": 0.8022503554821014,
"num_tokens": 118390911.0,
"step": 1810
},
{
"epoch": 0.27393136664659845,
"grad_norm": 0.8562867154390081,
"learning_rate": 1.4524382901866347e-05,
"loss": 0.6998,
"mean_token_accuracy": 0.8098207741975785,
"num_tokens": 119046271.0,
"step": 1820
},
{
"epoch": 0.27543648404575555,
"grad_norm": 0.8000412239036848,
"learning_rate": 1.4494280553883204e-05,
"loss": 0.7593,
"mean_token_accuracy": 0.7960475966334343,
"num_tokens": 119700872.0,
"step": 1830
},
{
"epoch": 0.2769416014449127,
"grad_norm": 0.7022083363095177,
"learning_rate": 1.4464178205900061e-05,
"loss": 0.7187,
"mean_token_accuracy": 0.8039081588387489,
"num_tokens": 120356232.0,
"step": 1840
},
{
"epoch": 0.27844671884406985,
"grad_norm": 0.7330810854396546,
"learning_rate": 1.4434075857916919e-05,
"loss": 0.6802,
"mean_token_accuracy": 0.811624014377594,
"num_tokens": 121008241.0,
"step": 1850
},
{
"epoch": 0.27995183624322695,
"grad_norm": 0.8709735932681155,
"learning_rate": 1.4403973509933776e-05,
"loss": 0.7587,
"mean_token_accuracy": 0.7946249365806579,
"num_tokens": 121662181.0,
"step": 1860
},
{
"epoch": 0.2814569536423841,
"grad_norm": 0.7271993838315252,
"learning_rate": 1.4373871161950635e-05,
"loss": 0.7252,
"mean_token_accuracy": 0.801104761660099,
"num_tokens": 122316062.0,
"step": 1870
},
{
"epoch": 0.28296207104154125,
"grad_norm": 0.9081751258700796,
"learning_rate": 1.4343768813967492e-05,
"loss": 0.7551,
"mean_token_accuracy": 0.7959865048527718,
"num_tokens": 122971422.0,
"step": 1880
},
{
"epoch": 0.28446718844069835,
"grad_norm": 0.816281030997641,
"learning_rate": 1.4313666465984348e-05,
"loss": 0.74,
"mean_token_accuracy": 0.797758474946022,
"num_tokens": 123624695.0,
"step": 1890
},
{
"epoch": 0.2859723058398555,
"grad_norm": 1.0641385504831533,
"learning_rate": 1.4283564118001205e-05,
"loss": 0.7221,
"mean_token_accuracy": 0.8031127870082855,
"num_tokens": 124280055.0,
"step": 1900
},
{
"epoch": 0.28747742323901265,
"grad_norm": 0.8275815728201844,
"learning_rate": 1.4253461770018062e-05,
"loss": 0.7253,
"mean_token_accuracy": 0.8021907031536102,
"num_tokens": 124935415.0,
"step": 1910
},
{
"epoch": 0.2889825406381698,
"grad_norm": 0.8414855000219864,
"learning_rate": 1.422335942203492e-05,
"loss": 0.7182,
"mean_token_accuracy": 0.8061159908771515,
"num_tokens": 125588888.0,
"step": 1920
},
{
"epoch": 0.2904876580373269,
"grad_norm": 0.9158958808999972,
"learning_rate": 1.4193257074051777e-05,
"loss": 0.7462,
"mean_token_accuracy": 0.7974677577614784,
"num_tokens": 126241947.0,
"step": 1930
},
{
"epoch": 0.29199277543648405,
"grad_norm": 0.8185830922905779,
"learning_rate": 1.4163154726068636e-05,
"loss": 0.7372,
"mean_token_accuracy": 0.8008592411875725,
"num_tokens": 126895869.0,
"step": 1940
},
{
"epoch": 0.2934978928356412,
"grad_norm": 0.8125313384818498,
"learning_rate": 1.4133052378085493e-05,
"loss": 0.7335,
"mean_token_accuracy": 0.7997892886400223,
"num_tokens": 127549296.0,
"step": 1950
},
{
"epoch": 0.2950030102347983,
"grad_norm": 0.8853228359721746,
"learning_rate": 1.4102950030102348e-05,
"loss": 0.7578,
"mean_token_accuracy": 0.7951950415968895,
"num_tokens": 128199122.0,
"step": 1960
},
{
"epoch": 0.29650812763395545,
"grad_norm": 0.8312902050411218,
"learning_rate": 1.4072847682119206e-05,
"loss": 0.7349,
"mean_token_accuracy": 0.8013306766748428,
"num_tokens": 128852084.0,
"step": 1970
},
{
"epoch": 0.2980132450331126,
"grad_norm": 0.9400229441738973,
"learning_rate": 1.4042745334136063e-05,
"loss": 0.7709,
"mean_token_accuracy": 0.792121222615242,
"num_tokens": 129505684.0,
"step": 1980
},
{
"epoch": 0.2995183624322697,
"grad_norm": 0.767457194540183,
"learning_rate": 1.401264298615292e-05,
"loss": 0.7439,
"mean_token_accuracy": 0.7998887673020363,
"num_tokens": 130159751.0,
"step": 1990
},
{
"epoch": 0.30102347983142685,
"grad_norm": 0.8181485482754092,
"learning_rate": 1.3982540638169779e-05,
"loss": 0.7695,
"mean_token_accuracy": 0.793725848197937,
"num_tokens": 130814051.0,
"step": 2000
},
{
"epoch": 0.302528597230584,
"grad_norm": 0.8817939411380016,
"learning_rate": 1.3952438290186636e-05,
"loss": 0.7635,
"mean_token_accuracy": 0.7946731060743332,
"num_tokens": 131469233.0,
"step": 2010
},
{
"epoch": 0.3040337146297411,
"grad_norm": 0.8545142349530216,
"learning_rate": 1.3922335942203494e-05,
"loss": 0.7649,
"mean_token_accuracy": 0.7935700654983521,
"num_tokens": 132124451.0,
"step": 2020
},
{
"epoch": 0.30553883202889826,
"grad_norm": 0.7376149691050216,
"learning_rate": 1.389223359422035e-05,
"loss": 0.7016,
"mean_token_accuracy": 0.8074377551674843,
"num_tokens": 132779246.0,
"step": 2030
},
{
"epoch": 0.3070439494280554,
"grad_norm": 0.8034691098129949,
"learning_rate": 1.3862131246237206e-05,
"loss": 0.7396,
"mean_token_accuracy": 0.7991329744458199,
"num_tokens": 133430264.0,
"step": 2040
},
{
"epoch": 0.3085490668272125,
"grad_norm": 0.8410179177737847,
"learning_rate": 1.3832028898254064e-05,
"loss": 0.7354,
"mean_token_accuracy": 0.800162672996521,
"num_tokens": 134084988.0,
"step": 2050
},
{
"epoch": 0.31005418422636966,
"grad_norm": 0.7567872331068635,
"learning_rate": 1.3801926550270923e-05,
"loss": 0.7419,
"mean_token_accuracy": 0.8004732549190521,
"num_tokens": 134740348.0,
"step": 2060
},
{
"epoch": 0.3115593016255268,
"grad_norm": 0.8016571016171221,
"learning_rate": 1.377182420228778e-05,
"loss": 0.7298,
"mean_token_accuracy": 0.8019894018769265,
"num_tokens": 135392922.0,
"step": 2070
},
{
"epoch": 0.3130644190246839,
"grad_norm": 0.719704414551152,
"learning_rate": 1.3741721854304637e-05,
"loss": 0.7449,
"mean_token_accuracy": 0.7986428335309028,
"num_tokens": 136048282.0,
"step": 2080
},
{
"epoch": 0.31456953642384106,
"grad_norm": 0.8542849260567408,
"learning_rate": 1.3711619506321494e-05,
"loss": 0.7375,
"mean_token_accuracy": 0.799374783039093,
"num_tokens": 136702532.0,
"step": 2090
},
{
"epoch": 0.3160746538229982,
"grad_norm": 0.8615669462955476,
"learning_rate": 1.3681517158338352e-05,
"loss": 0.7581,
"mean_token_accuracy": 0.7959140941500664,
"num_tokens": 137354096.0,
"step": 2100
},
{
"epoch": 0.3175797712221553,
"grad_norm": 0.8623192413813401,
"learning_rate": 1.3651414810355209e-05,
"loss": 0.7677,
"mean_token_accuracy": 0.7939656764268875,
"num_tokens": 138008866.0,
"step": 2110
},
{
"epoch": 0.31908488862131246,
"grad_norm": 0.8638683869743261,
"learning_rate": 1.3621312462372068e-05,
"loss": 0.7854,
"mean_token_accuracy": 0.7892706394195557,
"num_tokens": 138662243.0,
"step": 2120
},
{
"epoch": 0.3205900060204696,
"grad_norm": 0.7788945368357936,
"learning_rate": 1.3591210114388925e-05,
"loss": 0.7033,
"mean_token_accuracy": 0.8076969027519226,
"num_tokens": 139315867.0,
"step": 2130
},
{
"epoch": 0.3220951234196267,
"grad_norm": 0.7810837509877583,
"learning_rate": 1.356110776640578e-05,
"loss": 0.7191,
"mean_token_accuracy": 0.8021959617733956,
"num_tokens": 139968814.0,
"step": 2140
},
{
"epoch": 0.32360024081878386,
"grad_norm": 0.8230298462354105,
"learning_rate": 1.3531005418422638e-05,
"loss": 0.7472,
"mean_token_accuracy": 0.7974795445799827,
"num_tokens": 140624174.0,
"step": 2150
},
{
"epoch": 0.325105358217941,
"grad_norm": 0.8121478510778044,
"learning_rate": 1.3500903070439495e-05,
"loss": 0.7275,
"mean_token_accuracy": 0.8007212415337562,
"num_tokens": 141278692.0,
"step": 2160
},
{
"epoch": 0.3266104756170981,
"grad_norm": 0.7615542679915625,
"learning_rate": 1.3470800722456352e-05,
"loss": 0.695,
"mean_token_accuracy": 0.8095154449343681,
"num_tokens": 141932698.0,
"step": 2170
},
{
"epoch": 0.32811559301625526,
"grad_norm": 0.8364388460827363,
"learning_rate": 1.344069837447321e-05,
"loss": 0.7535,
"mean_token_accuracy": 0.7957231163978576,
"num_tokens": 142583403.0,
"step": 2180
},
{
"epoch": 0.3296207104154124,
"grad_norm": 0.8118011063357086,
"learning_rate": 1.3410596026490068e-05,
"loss": 0.7265,
"mean_token_accuracy": 0.8028008803725243,
"num_tokens": 143236811.0,
"step": 2190
},
{
"epoch": 0.33112582781456956,
"grad_norm": 0.7816385932744847,
"learning_rate": 1.3380493678506926e-05,
"loss": 0.7087,
"mean_token_accuracy": 0.8058973535895347,
"num_tokens": 143892171.0,
"step": 2200
},
{
"epoch": 0.33263094521372666,
"grad_norm": 0.7980286746369547,
"learning_rate": 1.3350391330523781e-05,
"loss": 0.7001,
"mean_token_accuracy": 0.8083798885345459,
"num_tokens": 144547279.0,
"step": 2210
},
{
"epoch": 0.3341360626128838,
"grad_norm": 0.8103280863411036,
"learning_rate": 1.3320288982540638e-05,
"loss": 0.7147,
"mean_token_accuracy": 0.8047249019145966,
"num_tokens": 145202639.0,
"step": 2220
},
{
"epoch": 0.33564118001204096,
"grad_norm": 0.7817312732868804,
"learning_rate": 1.3290186634557496e-05,
"loss": 0.7547,
"mean_token_accuracy": 0.7974317491054534,
"num_tokens": 145856181.0,
"step": 2230
},
{
"epoch": 0.33714629741119806,
"grad_norm": 0.809969017978389,
"learning_rate": 1.3260084286574353e-05,
"loss": 0.7386,
"mean_token_accuracy": 0.7985619261860848,
"num_tokens": 146511541.0,
"step": 2240
},
{
"epoch": 0.3386514148103552,
"grad_norm": 0.8423961245555179,
"learning_rate": 1.3229981938591212e-05,
"loss": 0.7413,
"mean_token_accuracy": 0.8008396446704864,
"num_tokens": 147166901.0,
"step": 2250
},
{
"epoch": 0.34015653220951236,
"grad_norm": 0.8490396844604676,
"learning_rate": 1.319987959060807e-05,
"loss": 0.7428,
"mean_token_accuracy": 0.7985736206173897,
"num_tokens": 147821517.0,
"step": 2260
},
{
"epoch": 0.34166164960866946,
"grad_norm": 0.9544565589432954,
"learning_rate": 1.3169777242624926e-05,
"loss": 0.71,
"mean_token_accuracy": 0.8067162126302719,
"num_tokens": 148475213.0,
"step": 2270
},
{
"epoch": 0.3431667670078266,
"grad_norm": 0.8532463095029145,
"learning_rate": 1.3139674894641784e-05,
"loss": 0.7629,
"mean_token_accuracy": 0.7937496155500412,
"num_tokens": 149129302.0,
"step": 2280
},
{
"epoch": 0.34467188440698376,
"grad_norm": 0.8697931847348629,
"learning_rate": 1.310957254665864e-05,
"loss": 0.7561,
"mean_token_accuracy": 0.7952659383416176,
"num_tokens": 149784662.0,
"step": 2290
},
{
"epoch": 0.34617700180614086,
"grad_norm": 0.8108775998481546,
"learning_rate": 1.3079470198675496e-05,
"loss": 0.7367,
"mean_token_accuracy": 0.800705449283123,
"num_tokens": 150438278.0,
"step": 2300
},
{
"epoch": 0.347682119205298,
"grad_norm": 0.8438997210922776,
"learning_rate": 1.3049367850692355e-05,
"loss": 0.7235,
"mean_token_accuracy": 0.8016945570707321,
"num_tokens": 151093638.0,
"step": 2310
},
{
"epoch": 0.34918723660445516,
"grad_norm": 0.7471884183215057,
"learning_rate": 1.3019265502709213e-05,
"loss": 0.6997,
"mean_token_accuracy": 0.8057456374168396,
"num_tokens": 151748101.0,
"step": 2320
},
{
"epoch": 0.35069235400361226,
"grad_norm": 0.8204306141235812,
"learning_rate": 1.298916315472607e-05,
"loss": 0.7182,
"mean_token_accuracy": 0.8028990581631661,
"num_tokens": 152403461.0,
"step": 2330
},
{
"epoch": 0.3521974714027694,
"grad_norm": 0.7380655579017532,
"learning_rate": 1.2959060806742927e-05,
"loss": 0.7054,
"mean_token_accuracy": 0.8069384634494782,
"num_tokens": 153055319.0,
"step": 2340
},
{
"epoch": 0.35370258880192657,
"grad_norm": 0.8820967882531356,
"learning_rate": 1.2928958458759784e-05,
"loss": 0.7534,
"mean_token_accuracy": 0.7969329059123993,
"num_tokens": 153707035.0,
"step": 2350
},
{
"epoch": 0.35520770620108366,
"grad_norm": 0.8315059717289699,
"learning_rate": 1.289885611077664e-05,
"loss": 0.7187,
"mean_token_accuracy": 0.8047081142663955,
"num_tokens": 154362395.0,
"step": 2360
},
{
"epoch": 0.3567128236002408,
"grad_norm": 0.8123195898096921,
"learning_rate": 1.28687537627935e-05,
"loss": 0.7531,
"mean_token_accuracy": 0.7959761649370194,
"num_tokens": 155016494.0,
"step": 2370
},
{
"epoch": 0.35821794099939797,
"grad_norm": 0.8707921062012124,
"learning_rate": 1.2838651414810356e-05,
"loss": 0.7309,
"mean_token_accuracy": 0.8021174252033234,
"num_tokens": 155671854.0,
"step": 2380
},
{
"epoch": 0.35972305839855506,
"grad_norm": 0.8506282480491654,
"learning_rate": 1.2808549066827213e-05,
"loss": 0.7901,
"mean_token_accuracy": 0.7879240825772286,
"num_tokens": 156324314.0,
"step": 2390
},
{
"epoch": 0.3612281757977122,
"grad_norm": 0.8426621488681493,
"learning_rate": 1.277844671884407e-05,
"loss": 0.7249,
"mean_token_accuracy": 0.8012265384197235,
"num_tokens": 156979374.0,
"step": 2400
},
{
"epoch": 0.36273329319686937,
"grad_norm": 0.8133510365826947,
"learning_rate": 1.2748344370860928e-05,
"loss": 0.7323,
"mean_token_accuracy": 0.8001759797334671,
"num_tokens": 157632571.0,
"step": 2410
},
{
"epoch": 0.36423841059602646,
"grad_norm": 0.915090508369899,
"learning_rate": 1.2718242022877785e-05,
"loss": 0.7696,
"mean_token_accuracy": 0.7921423956751823,
"num_tokens": 158287219.0,
"step": 2420
},
{
"epoch": 0.3657435279951836,
"grad_norm": 0.8328754560870167,
"learning_rate": 1.2688139674894642e-05,
"loss": 0.7001,
"mean_token_accuracy": 0.80736443400383,
"num_tokens": 158942579.0,
"step": 2430
},
{
"epoch": 0.36724864539434077,
"grad_norm": 0.7874069005602204,
"learning_rate": 1.2658037326911501e-05,
"loss": 0.7054,
"mean_token_accuracy": 0.8079196363687515,
"num_tokens": 159596538.0,
"step": 2440
},
{
"epoch": 0.3687537627934979,
"grad_norm": 0.8371364466108957,
"learning_rate": 1.2627934978928359e-05,
"loss": 0.7674,
"mean_token_accuracy": 0.793084391951561,
"num_tokens": 160251898.0,
"step": 2450
},
{
"epoch": 0.370258880192655,
"grad_norm": 0.8051959753781694,
"learning_rate": 1.2597832630945214e-05,
"loss": 0.7008,
"mean_token_accuracy": 0.8092971444129944,
"num_tokens": 160907258.0,
"step": 2460
},
{
"epoch": 0.37176399759181217,
"grad_norm": 0.7887258704761677,
"learning_rate": 1.2567730282962071e-05,
"loss": 0.7584,
"mean_token_accuracy": 0.7960580214858055,
"num_tokens": 161562543.0,
"step": 2470
},
{
"epoch": 0.3732691149909693,
"grad_norm": 0.8142522899450826,
"learning_rate": 1.2537627934978929e-05,
"loss": 0.7244,
"mean_token_accuracy": 0.8027479246258735,
"num_tokens": 162217903.0,
"step": 2480
},
{
"epoch": 0.3747742323901264,
"grad_norm": 0.7897562813440631,
"learning_rate": 1.2507525586995786e-05,
"loss": 0.7025,
"mean_token_accuracy": 0.8074427857995033,
"num_tokens": 162871372.0,
"step": 2490
},
{
"epoch": 0.37627934978928357,
"grad_norm": 0.852644589558439,
"learning_rate": 1.2477423239012645e-05,
"loss": 0.6673,
"mean_token_accuracy": 0.816796886920929,
"num_tokens": 163523919.0,
"step": 2500
},
{
"epoch": 0.3777844671884407,
"grad_norm": 0.7204306408273152,
"learning_rate": 1.2447320891029502e-05,
"loss": 0.7047,
"mean_token_accuracy": 0.806460677087307,
"num_tokens": 164179279.0,
"step": 2510
},
{
"epoch": 0.3792895845875978,
"grad_norm": 0.7560565919266367,
"learning_rate": 1.241721854304636e-05,
"loss": 0.7379,
"mean_token_accuracy": 0.7992819055914879,
"num_tokens": 164834053.0,
"step": 2520
},
{
"epoch": 0.38079470198675497,
"grad_norm": 0.7735931141038619,
"learning_rate": 1.2387116195063217e-05,
"loss": 0.7063,
"mean_token_accuracy": 0.8051950618624687,
"num_tokens": 165486971.0,
"step": 2530
},
{
"epoch": 0.3822998193859121,
"grad_norm": 0.841653983169311,
"learning_rate": 1.2357013847080072e-05,
"loss": 0.7876,
"mean_token_accuracy": 0.7882312595844269,
"num_tokens": 166142331.0,
"step": 2540
},
{
"epoch": 0.3838049367850692,
"grad_norm": 0.7434911792385135,
"learning_rate": 1.232691149909693e-05,
"loss": 0.7465,
"mean_token_accuracy": 0.7978531375527382,
"num_tokens": 166797089.0,
"step": 2550
},
{
"epoch": 0.38531005418422637,
"grad_norm": 0.7726472609806933,
"learning_rate": 1.2296809151113788e-05,
"loss": 0.7462,
"mean_token_accuracy": 0.797703605890274,
"num_tokens": 167451719.0,
"step": 2560
},
{
"epoch": 0.3868151715833835,
"grad_norm": 0.9560931797627115,
"learning_rate": 1.2266706803130646e-05,
"loss": 0.7366,
"mean_token_accuracy": 0.7992733284831047,
"num_tokens": 168107079.0,
"step": 2570
},
{
"epoch": 0.3883202889825406,
"grad_norm": 0.879404934750527,
"learning_rate": 1.2236604455147503e-05,
"loss": 0.7255,
"mean_token_accuracy": 0.8031265258789062,
"num_tokens": 168762439.0,
"step": 2580
},
{
"epoch": 0.38982540638169777,
"grad_norm": 0.8202229177395133,
"learning_rate": 1.220650210716436e-05,
"loss": 0.7146,
"mean_token_accuracy": 0.8043844655156136,
"num_tokens": 169417799.0,
"step": 2590
},
{
"epoch": 0.3913305237808549,
"grad_norm": 0.8098944443556049,
"learning_rate": 1.2176399759181217e-05,
"loss": 0.7035,
"mean_token_accuracy": 0.8080252036452293,
"num_tokens": 170070400.0,
"step": 2600
},
{
"epoch": 0.392835641180012,
"grad_norm": 0.8047659285544517,
"learning_rate": 1.2146297411198073e-05,
"loss": 0.7066,
"mean_token_accuracy": 0.8052042603492737,
"num_tokens": 170725760.0,
"step": 2610
},
{
"epoch": 0.39434075857916917,
"grad_norm": 0.8226203658827208,
"learning_rate": 1.2116195063214933e-05,
"loss": 0.7544,
"mean_token_accuracy": 0.7957366958260537,
"num_tokens": 171378113.0,
"step": 2620
},
{
"epoch": 0.3958458759783263,
"grad_norm": 0.8099664607141047,
"learning_rate": 1.2086092715231789e-05,
"loss": 0.7307,
"mean_token_accuracy": 0.802658586204052,
"num_tokens": 172031920.0,
"step": 2630
},
{
"epoch": 0.3973509933774834,
"grad_norm": 0.7010950454683768,
"learning_rate": 1.2055990367248646e-05,
"loss": 0.7235,
"mean_token_accuracy": 0.8023072630167007,
"num_tokens": 172687010.0,
"step": 2640
},
{
"epoch": 0.39885611077664057,
"grad_norm": 0.7112213084350201,
"learning_rate": 1.2025888019265504e-05,
"loss": 0.7487,
"mean_token_accuracy": 0.7985242143273353,
"num_tokens": 173341760.0,
"step": 2650
},
{
"epoch": 0.4003612281757977,
"grad_norm": 0.7866431133317365,
"learning_rate": 1.199578567128236e-05,
"loss": 0.7124,
"mean_token_accuracy": 0.8047997072339058,
"num_tokens": 173997120.0,
"step": 2660
},
{
"epoch": 0.4018663455749548,
"grad_norm": 0.7958686340138259,
"learning_rate": 1.1965683323299218e-05,
"loss": 0.7264,
"mean_token_accuracy": 0.8047920733690261,
"num_tokens": 174652480.0,
"step": 2670
},
{
"epoch": 0.40337146297411197,
"grad_norm": 0.7931937228548085,
"learning_rate": 1.1935580975316077e-05,
"loss": 0.7166,
"mean_token_accuracy": 0.804453332722187,
"num_tokens": 175307051.0,
"step": 2680
},
{
"epoch": 0.4048765803732691,
"grad_norm": 0.8414047133010927,
"learning_rate": 1.1905478627332934e-05,
"loss": 0.722,
"mean_token_accuracy": 0.803479178249836,
"num_tokens": 175962411.0,
"step": 2690
},
{
"epoch": 0.4063816977724263,
"grad_norm": 0.7700579867503355,
"learning_rate": 1.1875376279349791e-05,
"loss": 0.7377,
"mean_token_accuracy": 0.7987645655870438,
"num_tokens": 176615394.0,
"step": 2700
},
{
"epoch": 0.4078868151715834,
"grad_norm": 0.8302573541607223,
"learning_rate": 1.1845273931366647e-05,
"loss": 0.7472,
"mean_token_accuracy": 0.7968945801258087,
"num_tokens": 177269102.0,
"step": 2710
},
{
"epoch": 0.4093919325707405,
"grad_norm": 0.8765674423163995,
"learning_rate": 1.1815171583383504e-05,
"loss": 0.7945,
"mean_token_accuracy": 0.7873334854841232,
"num_tokens": 177922233.0,
"step": 2720
},
{
"epoch": 0.4108970499698977,
"grad_norm": 0.8043233967618197,
"learning_rate": 1.1785069235400361e-05,
"loss": 0.686,
"mean_token_accuracy": 0.8096742391586303,
"num_tokens": 178575911.0,
"step": 2730
},
{
"epoch": 0.4124021673690548,
"grad_norm": 0.8721399678669114,
"learning_rate": 1.1754966887417219e-05,
"loss": 0.717,
"mean_token_accuracy": 0.8032816737890244,
"num_tokens": 179229495.0,
"step": 2740
},
{
"epoch": 0.4139072847682119,
"grad_norm": 0.8099202933603213,
"learning_rate": 1.1724864539434078e-05,
"loss": 0.6934,
"mean_token_accuracy": 0.810499781370163,
"num_tokens": 179883115.0,
"step": 2750
},
{
"epoch": 0.4154124021673691,
"grad_norm": 0.7882042117138398,
"learning_rate": 1.1694762191450935e-05,
"loss": 0.7385,
"mean_token_accuracy": 0.800557217001915,
"num_tokens": 180538475.0,
"step": 2760
},
{
"epoch": 0.4169175195665262,
"grad_norm": 0.8264936012665707,
"learning_rate": 1.1664659843467792e-05,
"loss": 0.7101,
"mean_token_accuracy": 0.8055298551917076,
"num_tokens": 181193709.0,
"step": 2770
},
{
"epoch": 0.4184226369656833,
"grad_norm": 0.7635453349114087,
"learning_rate": 1.1634557495484648e-05,
"loss": 0.7189,
"mean_token_accuracy": 0.8022925585508347,
"num_tokens": 181848502.0,
"step": 2780
},
{
"epoch": 0.4199277543648405,
"grad_norm": 0.8004600385192592,
"learning_rate": 1.1604455147501505e-05,
"loss": 0.7308,
"mean_token_accuracy": 0.8010586395859718,
"num_tokens": 182502476.0,
"step": 2790
},
{
"epoch": 0.4214328717639976,
"grad_norm": 0.7193388253714986,
"learning_rate": 1.1574352799518362e-05,
"loss": 0.7066,
"mean_token_accuracy": 0.8055385872721672,
"num_tokens": 183157836.0,
"step": 2800
},
{
"epoch": 0.4229379891631547,
"grad_norm": 0.9009713817079101,
"learning_rate": 1.1544250451535221e-05,
"loss": 0.7463,
"mean_token_accuracy": 0.7980786472558975,
"num_tokens": 183811497.0,
"step": 2810
},
{
"epoch": 0.4244431065623119,
"grad_norm": 0.776187136186914,
"learning_rate": 1.1514148103552078e-05,
"loss": 0.6957,
"mean_token_accuracy": 0.8083595156669616,
"num_tokens": 184463896.0,
"step": 2820
},
{
"epoch": 0.425948223961469,
"grad_norm": 0.8130038795159226,
"learning_rate": 1.1484045755568936e-05,
"loss": 0.7635,
"mean_token_accuracy": 0.7954803004860878,
"num_tokens": 185117476.0,
"step": 2830
},
{
"epoch": 0.4274533413606261,
"grad_norm": 0.7564234601113903,
"learning_rate": 1.1453943407585793e-05,
"loss": 0.7116,
"mean_token_accuracy": 0.8060019329190254,
"num_tokens": 185771283.0,
"step": 2840
},
{
"epoch": 0.4289584587597833,
"grad_norm": 0.906398593106907,
"learning_rate": 1.142384105960265e-05,
"loss": 0.7296,
"mean_token_accuracy": 0.8021451219916343,
"num_tokens": 186425549.0,
"step": 2850
},
{
"epoch": 0.4304635761589404,
"grad_norm": 0.8200395489339062,
"learning_rate": 1.1393738711619506e-05,
"loss": 0.7595,
"mean_token_accuracy": 0.7947599649429321,
"num_tokens": 187078321.0,
"step": 2860
},
{
"epoch": 0.4319686935580975,
"grad_norm": 0.8290871333202676,
"learning_rate": 1.1363636363636366e-05,
"loss": 0.7565,
"mean_token_accuracy": 0.7961515039205551,
"num_tokens": 187731135.0,
"step": 2870
},
{
"epoch": 0.4334738109572547,
"grad_norm": 0.7911587392676075,
"learning_rate": 1.1333534015653222e-05,
"loss": 0.7449,
"mean_token_accuracy": 0.7980863243341446,
"num_tokens": 188384784.0,
"step": 2880
},
{
"epoch": 0.4349789283564118,
"grad_norm": 0.711973469569701,
"learning_rate": 1.1303431667670079e-05,
"loss": 0.7251,
"mean_token_accuracy": 0.8034043282270431,
"num_tokens": 189039542.0,
"step": 2890
},
{
"epoch": 0.43648404575556893,
"grad_norm": 0.8341450761931715,
"learning_rate": 1.1273329319686936e-05,
"loss": 0.7024,
"mean_token_accuracy": 0.8091023206710816,
"num_tokens": 189690067.0,
"step": 2900
},
{
"epoch": 0.4379891631547261,
"grad_norm": 0.8160268015867358,
"learning_rate": 1.1243226971703794e-05,
"loss": 0.7087,
"mean_token_accuracy": 0.8077363967895508,
"num_tokens": 190344885.0,
"step": 2910
},
{
"epoch": 0.4394942805538832,
"grad_norm": 0.835780708497786,
"learning_rate": 1.1213124623720651e-05,
"loss": 0.7091,
"mean_token_accuracy": 0.8066539317369461,
"num_tokens": 190997515.0,
"step": 2920
},
{
"epoch": 0.44099939795304033,
"grad_norm": 0.7730508882826491,
"learning_rate": 1.118302227573751e-05,
"loss": 0.7519,
"mean_token_accuracy": 0.7966817542910576,
"num_tokens": 191652638.0,
"step": 2930
},
{
"epoch": 0.4425045153521975,
"grad_norm": 0.7989264828087985,
"learning_rate": 1.1152919927754367e-05,
"loss": 0.7084,
"mean_token_accuracy": 0.8050470232963562,
"num_tokens": 192307998.0,
"step": 2940
},
{
"epoch": 0.44400963275135463,
"grad_norm": 0.7475676253652657,
"learning_rate": 1.1122817579771223e-05,
"loss": 0.7064,
"mean_token_accuracy": 0.804878756403923,
"num_tokens": 192961266.0,
"step": 2950
},
{
"epoch": 0.44551475015051173,
"grad_norm": 0.7886808863615066,
"learning_rate": 1.109271523178808e-05,
"loss": 0.717,
"mean_token_accuracy": 0.8057780176401138,
"num_tokens": 193615763.0,
"step": 2960
},
{
"epoch": 0.4470198675496689,
"grad_norm": 0.8125274746428679,
"learning_rate": 1.1062612883804937e-05,
"loss": 0.7114,
"mean_token_accuracy": 0.8053019717335701,
"num_tokens": 194271123.0,
"step": 2970
},
{
"epoch": 0.44852498494882603,
"grad_norm": 0.8260302921388075,
"learning_rate": 1.1032510535821794e-05,
"loss": 0.7267,
"mean_token_accuracy": 0.8016424849629402,
"num_tokens": 194925375.0,
"step": 2980
},
{
"epoch": 0.45003010234798313,
"grad_norm": 0.8130548390682162,
"learning_rate": 1.1002408187838652e-05,
"loss": 0.7174,
"mean_token_accuracy": 0.8041437566280365,
"num_tokens": 195579677.0,
"step": 2990
},
{
"epoch": 0.4515352197471403,
"grad_norm": 0.7150220204150168,
"learning_rate": 1.097230583985551e-05,
"loss": 0.7132,
"mean_token_accuracy": 0.8052812933921814,
"num_tokens": 196233104.0,
"step": 3000
},
{
"epoch": 0.45304033714629743,
"grad_norm": 0.8186467059744715,
"learning_rate": 1.0942203491872368e-05,
"loss": 0.7536,
"mean_token_accuracy": 0.796061310172081,
"num_tokens": 196888464.0,
"step": 3010
},
{
"epoch": 0.45454545454545453,
"grad_norm": 0.8030232699110134,
"learning_rate": 1.0912101143889225e-05,
"loss": 0.7156,
"mean_token_accuracy": 0.8049913555383682,
"num_tokens": 197542844.0,
"step": 3020
},
{
"epoch": 0.4560505719446117,
"grad_norm": 0.7613073600567963,
"learning_rate": 1.088199879590608e-05,
"loss": 0.7041,
"mean_token_accuracy": 0.8063686951994896,
"num_tokens": 198197017.0,
"step": 3030
},
{
"epoch": 0.45755568934376883,
"grad_norm": 0.8366714650181802,
"learning_rate": 1.0851896447922938e-05,
"loss": 0.7616,
"mean_token_accuracy": 0.7939637005329132,
"num_tokens": 198851028.0,
"step": 3040
},
{
"epoch": 0.45906080674292593,
"grad_norm": 0.8638117040840707,
"learning_rate": 1.0821794099939795e-05,
"loss": 0.7173,
"mean_token_accuracy": 0.8043005108833313,
"num_tokens": 199504777.0,
"step": 3050
},
{
"epoch": 0.4605659241420831,
"grad_norm": 0.8203104992903841,
"learning_rate": 1.0791691751956654e-05,
"loss": 0.7358,
"mean_token_accuracy": 0.8005827903747559,
"num_tokens": 200157604.0,
"step": 3060
},
{
"epoch": 0.46207104154124023,
"grad_norm": 0.8144542650002136,
"learning_rate": 1.0761589403973511e-05,
"loss": 0.7416,
"mean_token_accuracy": 0.7981523618102073,
"num_tokens": 200809091.0,
"step": 3070
},
{
"epoch": 0.46357615894039733,
"grad_norm": 0.7945409434106105,
"learning_rate": 1.0731487055990369e-05,
"loss": 0.6875,
"mean_token_accuracy": 0.8094358786940574,
"num_tokens": 201464171.0,
"step": 3080
},
{
"epoch": 0.4650812763395545,
"grad_norm": 0.8730517194559562,
"learning_rate": 1.0701384708007226e-05,
"loss": 0.7183,
"mean_token_accuracy": 0.8042961657047272,
"num_tokens": 202119072.0,
"step": 3090
},
{
"epoch": 0.46658639373871164,
"grad_norm": 0.8626403160499244,
"learning_rate": 1.0671282360024083e-05,
"loss": 0.7167,
"mean_token_accuracy": 0.8044699609279633,
"num_tokens": 202774432.0,
"step": 3100
},
{
"epoch": 0.46809151113786873,
"grad_norm": 0.8523186755289315,
"learning_rate": 1.0641180012040939e-05,
"loss": 0.6908,
"mean_token_accuracy": 0.8096819415688514,
"num_tokens": 203427227.0,
"step": 3110
},
{
"epoch": 0.4695966285370259,
"grad_norm": 0.8645519988255309,
"learning_rate": 1.06110776640578e-05,
"loss": 0.7216,
"mean_token_accuracy": 0.8030843511223793,
"num_tokens": 204081611.0,
"step": 3120
},
{
"epoch": 0.47110174593618304,
"grad_norm": 0.7653421808114811,
"learning_rate": 1.0580975316074655e-05,
"loss": 0.6535,
"mean_token_accuracy": 0.819001467525959,
"num_tokens": 204736028.0,
"step": 3130
},
{
"epoch": 0.47260686333534013,
"grad_norm": 0.7263321192874824,
"learning_rate": 1.0550872968091512e-05,
"loss": 0.7242,
"mean_token_accuracy": 0.8027195662260056,
"num_tokens": 205387916.0,
"step": 3140
},
{
"epoch": 0.4741119807344973,
"grad_norm": 0.7601599258449856,
"learning_rate": 1.052077062010837e-05,
"loss": 0.7141,
"mean_token_accuracy": 0.8052065283060074,
"num_tokens": 206042288.0,
"step": 3150
},
{
"epoch": 0.47561709813365444,
"grad_norm": 0.8000199112319787,
"learning_rate": 1.0490668272125227e-05,
"loss": 0.7296,
"mean_token_accuracy": 0.8023619994521141,
"num_tokens": 206696451.0,
"step": 3160
},
{
"epoch": 0.47712221553281153,
"grad_norm": 0.8129807447364599,
"learning_rate": 1.0460565924142084e-05,
"loss": 0.7576,
"mean_token_accuracy": 0.7973424136638642,
"num_tokens": 207351750.0,
"step": 3170
},
{
"epoch": 0.4786273329319687,
"grad_norm": 0.7658578926533476,
"learning_rate": 1.0430463576158943e-05,
"loss": 0.7211,
"mean_token_accuracy": 0.8033246964216232,
"num_tokens": 208005316.0,
"step": 3180
},
{
"epoch": 0.48013245033112584,
"grad_norm": 0.8168158779578062,
"learning_rate": 1.04003612281758e-05,
"loss": 0.6937,
"mean_token_accuracy": 0.8084788709878922,
"num_tokens": 208660676.0,
"step": 3190
},
{
"epoch": 0.481637567730283,
"grad_norm": 1.0052382018557156,
"learning_rate": 1.0370258880192655e-05,
"loss": 0.7248,
"mean_token_accuracy": 0.8030228197574616,
"num_tokens": 209312522.0,
"step": 3200
},
{
"epoch": 0.4831426851294401,
"grad_norm": 0.7976089434726377,
"learning_rate": 1.0340156532209513e-05,
"loss": 0.6868,
"mean_token_accuracy": 0.8124389365315438,
"num_tokens": 209967882.0,
"step": 3210
},
{
"epoch": 0.48464780252859724,
"grad_norm": 0.8229870659165623,
"learning_rate": 1.031005418422637e-05,
"loss": 0.7505,
"mean_token_accuracy": 0.7961588889360428,
"num_tokens": 210622270.0,
"step": 3220
},
{
"epoch": 0.4861529199277544,
"grad_norm": 0.7747505536267031,
"learning_rate": 1.0279951836243227e-05,
"loss": 0.7026,
"mean_token_accuracy": 0.807074373960495,
"num_tokens": 211277630.0,
"step": 3230
},
{
"epoch": 0.4876580373269115,
"grad_norm": 0.8871086206356764,
"learning_rate": 1.0249849488260084e-05,
"loss": 0.6974,
"mean_token_accuracy": 0.8071722269058228,
"num_tokens": 211930855.0,
"step": 3240
},
{
"epoch": 0.48916315472606864,
"grad_norm": 0.8155925425450639,
"learning_rate": 1.0219747140276943e-05,
"loss": 0.7116,
"mean_token_accuracy": 0.8057066261768341,
"num_tokens": 212585259.0,
"step": 3250
},
{
"epoch": 0.4906682721252258,
"grad_norm": 0.7849356966516002,
"learning_rate": 1.01896447922938e-05,
"loss": 0.7529,
"mean_token_accuracy": 0.7978712409734726,
"num_tokens": 213238834.0,
"step": 3260
},
{
"epoch": 0.4921733895243829,
"grad_norm": 0.7736249961666276,
"learning_rate": 1.0159542444310658e-05,
"loss": 0.7183,
"mean_token_accuracy": 0.8035428315401077,
"num_tokens": 213892454.0,
"step": 3270
},
{
"epoch": 0.49367850692354004,
"grad_norm": 0.8871576564844669,
"learning_rate": 1.0129440096327513e-05,
"loss": 0.7175,
"mean_token_accuracy": 0.8037295624613762,
"num_tokens": 214546735.0,
"step": 3280
},
{
"epoch": 0.4951836243226972,
"grad_norm": 0.7820627438145885,
"learning_rate": 1.009933774834437e-05,
"loss": 0.704,
"mean_token_accuracy": 0.806598387658596,
"num_tokens": 215199700.0,
"step": 3290
},
{
"epoch": 0.4966887417218543,
"grad_norm": 0.8144225434992196,
"learning_rate": 1.0069235400361228e-05,
"loss": 0.7196,
"mean_token_accuracy": 0.8028386145830154,
"num_tokens": 215854392.0,
"step": 3300
},
{
"epoch": 0.49819385912101144,
"grad_norm": 0.8302919269226839,
"learning_rate": 1.0039133052378087e-05,
"loss": 0.6989,
"mean_token_accuracy": 0.8083430036902428,
"num_tokens": 216509752.0,
"step": 3310
},
{
"epoch": 0.4996989765201686,
"grad_norm": 0.859098671961538,
"learning_rate": 1.0009030704394944e-05,
"loss": 0.6834,
"mean_token_accuracy": 0.8109419390559196,
"num_tokens": 217162720.0,
"step": 3320
},
{
"epoch": 0.5012040939193257,
"grad_norm": 0.9124368923411915,
"learning_rate": 9.978928356411801e-06,
"loss": 0.753,
"mean_token_accuracy": 0.7990192532539367,
"num_tokens": 217816899.0,
"step": 3330
},
{
"epoch": 0.5027092113184829,
"grad_norm": 0.876541714838967,
"learning_rate": 9.948826008428659e-06,
"loss": 0.6949,
"mean_token_accuracy": 0.8079330369830131,
"num_tokens": 218469392.0,
"step": 3340
},
{
"epoch": 0.5042143287176399,
"grad_norm": 0.8237912305252111,
"learning_rate": 9.918723660445514e-06,
"loss": 0.7008,
"mean_token_accuracy": 0.8087765663862229,
"num_tokens": 219124752.0,
"step": 3350
},
{
"epoch": 0.5057194461167971,
"grad_norm": 0.8805985124073732,
"learning_rate": 9.888621312462373e-06,
"loss": 0.737,
"mean_token_accuracy": 0.7990552827715873,
"num_tokens": 219778166.0,
"step": 3360
},
{
"epoch": 0.5072245635159542,
"grad_norm": 0.8021126229570424,
"learning_rate": 9.85851896447923e-06,
"loss": 0.6971,
"mean_token_accuracy": 0.8074719130992889,
"num_tokens": 220431930.0,
"step": 3370
},
{
"epoch": 0.5087296809151114,
"grad_norm": 0.7972712654677118,
"learning_rate": 9.828416616496088e-06,
"loss": 0.6956,
"mean_token_accuracy": 0.8087294608354568,
"num_tokens": 221085830.0,
"step": 3380
},
{
"epoch": 0.5102347983142685,
"grad_norm": 0.8594868396631411,
"learning_rate": 9.798314268512945e-06,
"loss": 0.6783,
"mean_token_accuracy": 0.8133060529828071,
"num_tokens": 221741190.0,
"step": 3390
},
{
"epoch": 0.5117399157134257,
"grad_norm": 0.8195675779575067,
"learning_rate": 9.768211920529802e-06,
"loss": 0.7259,
"mean_token_accuracy": 0.8023128375411034,
"num_tokens": 222396550.0,
"step": 3400
},
{
"epoch": 0.5132450331125827,
"grad_norm": 0.8239469426553997,
"learning_rate": 9.73810957254666e-06,
"loss": 0.6731,
"mean_token_accuracy": 0.8141262501478195,
"num_tokens": 223050376.0,
"step": 3410
},
{
"epoch": 0.5147501505117399,
"grad_norm": 0.7983847311346082,
"learning_rate": 9.708007224563517e-06,
"loss": 0.7026,
"mean_token_accuracy": 0.8072569638490676,
"num_tokens": 223704579.0,
"step": 3420
},
{
"epoch": 0.516255267910897,
"grad_norm": 0.7960686997526522,
"learning_rate": 9.677904876580374e-06,
"loss": 0.7221,
"mean_token_accuracy": 0.8030448406934738,
"num_tokens": 224359850.0,
"step": 3430
},
{
"epoch": 0.5177603853100542,
"grad_norm": 0.8914834941832228,
"learning_rate": 9.647802528597231e-06,
"loss": 0.7401,
"mean_token_accuracy": 0.7997129946947098,
"num_tokens": 225015210.0,
"step": 3440
},
{
"epoch": 0.5192655027092113,
"grad_norm": 0.8819413244629241,
"learning_rate": 9.617700180614088e-06,
"loss": 0.7101,
"mean_token_accuracy": 0.806117182970047,
"num_tokens": 225670570.0,
"step": 3450
},
{
"epoch": 0.5207706201083685,
"grad_norm": 0.7500759430414385,
"learning_rate": 9.587597832630946e-06,
"loss": 0.7091,
"mean_token_accuracy": 0.8057623341679573,
"num_tokens": 226324513.0,
"step": 3460
},
{
"epoch": 0.5222757375075255,
"grad_norm": 0.8674975027620144,
"learning_rate": 9.557495484647803e-06,
"loss": 0.7347,
"mean_token_accuracy": 0.8012763857841492,
"num_tokens": 226979111.0,
"step": 3470
},
{
"epoch": 0.5237808549066827,
"grad_norm": 0.7763764417344521,
"learning_rate": 9.527393136664662e-06,
"loss": 0.6999,
"mean_token_accuracy": 0.8071465358138085,
"num_tokens": 227632704.0,
"step": 3480
},
{
"epoch": 0.5252859723058398,
"grad_norm": 0.7645444118571866,
"learning_rate": 9.497290788681517e-06,
"loss": 0.7076,
"mean_token_accuracy": 0.8069016903638839,
"num_tokens": 228286371.0,
"step": 3490
},
{
"epoch": 0.526791089704997,
"grad_norm": 0.7792334177211975,
"learning_rate": 9.467188440698375e-06,
"loss": 0.696,
"mean_token_accuracy": 0.8077277734875679,
"num_tokens": 228941731.0,
"step": 3500
},
{
"epoch": 0.5282962071041541,
"grad_norm": 0.7424501126051348,
"learning_rate": 9.437086092715234e-06,
"loss": 0.6985,
"mean_token_accuracy": 0.8082936689257622,
"num_tokens": 229595789.0,
"step": 3510
},
{
"epoch": 0.5298013245033113,
"grad_norm": 0.7130346276764096,
"learning_rate": 9.40698374473209e-06,
"loss": 0.7345,
"mean_token_accuracy": 0.8005663812160492,
"num_tokens": 230251149.0,
"step": 3520
},
{
"epoch": 0.5313064419024683,
"grad_norm": 0.8376943633378543,
"learning_rate": 9.376881396748946e-06,
"loss": 0.7263,
"mean_token_accuracy": 0.8031929656863213,
"num_tokens": 230902981.0,
"step": 3530
},
{
"epoch": 0.5328115593016255,
"grad_norm": 0.7293887942764449,
"learning_rate": 9.346779048765805e-06,
"loss": 0.7432,
"mean_token_accuracy": 0.7978022322058678,
"num_tokens": 231557254.0,
"step": 3540
},
{
"epoch": 0.5343166767007826,
"grad_norm": 0.7222945165317631,
"learning_rate": 9.316676700782663e-06,
"loss": 0.7092,
"mean_token_accuracy": 0.8053798228502274,
"num_tokens": 232212614.0,
"step": 3550
},
{
"epoch": 0.5358217940999398,
"grad_norm": 0.8062132119932597,
"learning_rate": 9.286574352799518e-06,
"loss": 0.6995,
"mean_token_accuracy": 0.8085808470845223,
"num_tokens": 232866874.0,
"step": 3560
},
{
"epoch": 0.537326911499097,
"grad_norm": 0.8382257521288697,
"learning_rate": 9.256472004816377e-06,
"loss": 0.716,
"mean_token_accuracy": 0.805452823638916,
"num_tokens": 233522182.0,
"step": 3570
},
{
"epoch": 0.5388320288982541,
"grad_norm": 0.9108274302981122,
"learning_rate": 9.226369656833234e-06,
"loss": 0.7359,
"mean_token_accuracy": 0.799033597111702,
"num_tokens": 234175081.0,
"step": 3580
},
{
"epoch": 0.5403371462974113,
"grad_norm": 0.7577515749647386,
"learning_rate": 9.196267308850092e-06,
"loss": 0.7382,
"mean_token_accuracy": 0.8003885015845299,
"num_tokens": 234830286.0,
"step": 3590
},
{
"epoch": 0.5418422636965683,
"grad_norm": 0.8170263948595179,
"learning_rate": 9.166164960866947e-06,
"loss": 0.7093,
"mean_token_accuracy": 0.8067599460482597,
"num_tokens": 235482479.0,
"step": 3600
},
{
"epoch": 0.5433473810957254,
"grad_norm": 0.8089345629962644,
"learning_rate": 9.136062612883806e-06,
"loss": 0.7013,
"mean_token_accuracy": 0.8066691935062409,
"num_tokens": 236134271.0,
"step": 3610
},
{
"epoch": 0.5448524984948826,
"grad_norm": 0.8096489243491711,
"learning_rate": 9.105960264900663e-06,
"loss": 0.671,
"mean_token_accuracy": 0.8146300122141839,
"num_tokens": 236788343.0,
"step": 3620
},
{
"epoch": 0.5463576158940397,
"grad_norm": 0.7712590758344209,
"learning_rate": 9.07585791691752e-06,
"loss": 0.6936,
"mean_token_accuracy": 0.8090818926692009,
"num_tokens": 237443703.0,
"step": 3630
},
{
"epoch": 0.5478627332931969,
"grad_norm": 0.7331289652614967,
"learning_rate": 9.045755568934378e-06,
"loss": 0.6879,
"mean_token_accuracy": 0.8104536637663842,
"num_tokens": 238097543.0,
"step": 3640
},
{
"epoch": 0.549367850692354,
"grad_norm": 0.7894029957389098,
"learning_rate": 9.015653220951235e-06,
"loss": 0.746,
"mean_token_accuracy": 0.7971894860267639,
"num_tokens": 238752903.0,
"step": 3650
},
{
"epoch": 0.5508729680915111,
"grad_norm": 0.7337667549435172,
"learning_rate": 8.985550872968092e-06,
"loss": 0.6906,
"mean_token_accuracy": 0.8105040520429612,
"num_tokens": 239407909.0,
"step": 3660
},
{
"epoch": 0.5523780854906682,
"grad_norm": 0.7114551170726308,
"learning_rate": 8.95544852498495e-06,
"loss": 0.7012,
"mean_token_accuracy": 0.8078293934464454,
"num_tokens": 240062694.0,
"step": 3670
},
{
"epoch": 0.5538832028898254,
"grad_norm": 0.8053101136590101,
"learning_rate": 8.925346177001807e-06,
"loss": 0.7128,
"mean_token_accuracy": 0.8046775788068772,
"num_tokens": 240718054.0,
"step": 3680
},
{
"epoch": 0.5553883202889826,
"grad_norm": 0.7743261506938821,
"learning_rate": 8.895243829018664e-06,
"loss": 0.6771,
"mean_token_accuracy": 0.8148664027452469,
"num_tokens": 241372573.0,
"step": 3690
},
{
"epoch": 0.5568934376881397,
"grad_norm": 0.7698400139152973,
"learning_rate": 8.865141481035521e-06,
"loss": 0.7031,
"mean_token_accuracy": 0.8072331473231316,
"num_tokens": 242027933.0,
"step": 3700
},
{
"epoch": 0.5583985550872969,
"grad_norm": 0.8364424915545474,
"learning_rate": 8.835039133052378e-06,
"loss": 0.7197,
"mean_token_accuracy": 0.8043941155076026,
"num_tokens": 242682455.0,
"step": 3710
},
{
"epoch": 0.5599036724864539,
"grad_norm": 0.7810243244440305,
"learning_rate": 8.804936785069236e-06,
"loss": 0.7323,
"mean_token_accuracy": 0.800259268283844,
"num_tokens": 243337005.0,
"step": 3720
},
{
"epoch": 0.561408789885611,
"grad_norm": 0.7361666809162144,
"learning_rate": 8.774834437086095e-06,
"loss": 0.6941,
"mean_token_accuracy": 0.8084026902914048,
"num_tokens": 243990915.0,
"step": 3730
},
{
"epoch": 0.5629139072847682,
"grad_norm": 0.7513727479514392,
"learning_rate": 8.74473208910295e-06,
"loss": 0.6985,
"mean_token_accuracy": 0.8086724117398262,
"num_tokens": 244645790.0,
"step": 3740
},
{
"epoch": 0.5644190246839254,
"grad_norm": 0.8119539552530328,
"learning_rate": 8.714629741119807e-06,
"loss": 0.7099,
"mean_token_accuracy": 0.806215389072895,
"num_tokens": 245300587.0,
"step": 3750
},
{
"epoch": 0.5659241420830825,
"grad_norm": 0.8230131090315613,
"learning_rate": 8.684527393136666e-06,
"loss": 0.6961,
"mean_token_accuracy": 0.807398022711277,
"num_tokens": 245955947.0,
"step": 3760
},
{
"epoch": 0.5674292594822397,
"grad_norm": 0.8657958087347106,
"learning_rate": 8.654425045153522e-06,
"loss": 0.6862,
"mean_token_accuracy": 0.8127268105745316,
"num_tokens": 246608889.0,
"step": 3770
},
{
"epoch": 0.5689343768813967,
"grad_norm": 0.8509981229148196,
"learning_rate": 8.62432269717038e-06,
"loss": 0.7104,
"mean_token_accuracy": 0.8052561670541764,
"num_tokens": 247264249.0,
"step": 3780
},
{
"epoch": 0.5704394942805538,
"grad_norm": 0.7620198197373849,
"learning_rate": 8.594220349187238e-06,
"loss": 0.7051,
"mean_token_accuracy": 0.8070444941520691,
"num_tokens": 247918973.0,
"step": 3790
},
{
"epoch": 0.571944611679711,
"grad_norm": 0.7589070350681284,
"learning_rate": 8.564118001204095e-06,
"loss": 0.7077,
"mean_token_accuracy": 0.8067975386977195,
"num_tokens": 248573625.0,
"step": 3800
},
{
"epoch": 0.5734497290788682,
"grad_norm": 0.8182641750295068,
"learning_rate": 8.534015653220951e-06,
"loss": 0.6953,
"mean_token_accuracy": 0.8103018119931221,
"num_tokens": 249226405.0,
"step": 3810
},
{
"epoch": 0.5749548464780253,
"grad_norm": 0.8830083523333203,
"learning_rate": 8.50391330523781e-06,
"loss": 0.7299,
"mean_token_accuracy": 0.8012143760919571,
"num_tokens": 249880176.0,
"step": 3820
},
{
"epoch": 0.5764599638771825,
"grad_norm": 0.8146412671634994,
"learning_rate": 8.473810957254667e-06,
"loss": 0.6632,
"mean_token_accuracy": 0.8136916980147362,
"num_tokens": 250532586.0,
"step": 3830
},
{
"epoch": 0.5779650812763396,
"grad_norm": 0.8528906066399552,
"learning_rate": 8.443708609271524e-06,
"loss": 0.6806,
"mean_token_accuracy": 0.8144933164119721,
"num_tokens": 251186836.0,
"step": 3840
},
{
"epoch": 0.5794701986754967,
"grad_norm": 0.8330221415506783,
"learning_rate": 8.41360626128838e-06,
"loss": 0.7665,
"mean_token_accuracy": 0.7930653065443038,
"num_tokens": 251840226.0,
"step": 3850
},
{
"epoch": 0.5809753160746538,
"grad_norm": 0.8745918315876232,
"learning_rate": 8.383503913305239e-06,
"loss": 0.709,
"mean_token_accuracy": 0.8056306362152099,
"num_tokens": 252494390.0,
"step": 3860
},
{
"epoch": 0.582480433473811,
"grad_norm": 0.9080680717665254,
"learning_rate": 8.353401565322096e-06,
"loss": 0.7128,
"mean_token_accuracy": 0.8059083595871925,
"num_tokens": 253147168.0,
"step": 3870
},
{
"epoch": 0.5839855508729681,
"grad_norm": 0.9177323805657555,
"learning_rate": 8.323299217338953e-06,
"loss": 0.7202,
"mean_token_accuracy": 0.8017226129770278,
"num_tokens": 253801671.0,
"step": 3880
},
{
"epoch": 0.5854906682721253,
"grad_norm": 0.7990780628042131,
"learning_rate": 8.29319686935581e-06,
"loss": 0.6931,
"mean_token_accuracy": 0.8101041629910469,
"num_tokens": 254455490.0,
"step": 3890
},
{
"epoch": 0.5869957856712824,
"grad_norm": 0.8161176055240229,
"learning_rate": 8.263094521372668e-06,
"loss": 0.7182,
"mean_token_accuracy": 0.8037081718444824,
"num_tokens": 255110850.0,
"step": 3900
},
{
"epoch": 0.5885009030704395,
"grad_norm": 0.7603871762926178,
"learning_rate": 8.232992173389525e-06,
"loss": 0.7476,
"mean_token_accuracy": 0.7982825547456741,
"num_tokens": 255764719.0,
"step": 3910
},
{
"epoch": 0.5900060204695966,
"grad_norm": 0.7820111562993072,
"learning_rate": 8.202889825406382e-06,
"loss": 0.7109,
"mean_token_accuracy": 0.806066806614399,
"num_tokens": 256420079.0,
"step": 3920
},
{
"epoch": 0.5915111378687538,
"grad_norm": 0.7059140890336615,
"learning_rate": 8.17278747742324e-06,
"loss": 0.7196,
"mean_token_accuracy": 0.8032822415232659,
"num_tokens": 257075439.0,
"step": 3930
},
{
"epoch": 0.5930162552679109,
"grad_norm": 0.7949546362150071,
"learning_rate": 8.142685129440097e-06,
"loss": 0.7478,
"mean_token_accuracy": 0.797232711315155,
"num_tokens": 257727872.0,
"step": 3940
},
{
"epoch": 0.5945213726670681,
"grad_norm": 0.7485881361967065,
"learning_rate": 8.112582781456954e-06,
"loss": 0.687,
"mean_token_accuracy": 0.8098583161830902,
"num_tokens": 258381731.0,
"step": 3950
},
{
"epoch": 0.5960264900662252,
"grad_norm": 0.7946141713531891,
"learning_rate": 8.082480433473811e-06,
"loss": 0.7007,
"mean_token_accuracy": 0.8085067048668861,
"num_tokens": 259035946.0,
"step": 3960
},
{
"epoch": 0.5975316074653823,
"grad_norm": 0.8230729638127956,
"learning_rate": 8.052378085490669e-06,
"loss": 0.7079,
"mean_token_accuracy": 0.8059585183858872,
"num_tokens": 259689357.0,
"step": 3970
},
{
"epoch": 0.5990367248645394,
"grad_norm": 0.8325239572087008,
"learning_rate": 8.022275737507526e-06,
"loss": 0.7097,
"mean_token_accuracy": 0.8045219138264657,
"num_tokens": 260343427.0,
"step": 3980
},
{
"epoch": 0.6005418422636966,
"grad_norm": 0.8434092525407488,
"learning_rate": 7.992173389524383e-06,
"loss": 0.7327,
"mean_token_accuracy": 0.8014701396226883,
"num_tokens": 260998787.0,
"step": 3990
},
{
"epoch": 0.6020469596628537,
"grad_norm": 0.7986843262365315,
"learning_rate": 7.96207104154124e-06,
"loss": 0.6969,
"mean_token_accuracy": 0.8094117864966393,
"num_tokens": 261652843.0,
"step": 4000
},
{
"epoch": 0.6035520770620109,
"grad_norm": 0.942507818939871,
"learning_rate": 7.9319686935581e-06,
"loss": 0.7051,
"mean_token_accuracy": 0.807870452105999,
"num_tokens": 262307531.0,
"step": 4010
},
{
"epoch": 0.605057194461168,
"grad_norm": 0.7691027289067521,
"learning_rate": 7.901866345574955e-06,
"loss": 0.6997,
"mean_token_accuracy": 0.8096072554588318,
"num_tokens": 262959593.0,
"step": 4020
},
{
"epoch": 0.606562311860325,
"grad_norm": 0.7411382069273657,
"learning_rate": 7.871763997591812e-06,
"loss": 0.6795,
"mean_token_accuracy": 0.8132504016160965,
"num_tokens": 263613258.0,
"step": 4030
},
{
"epoch": 0.6080674292594822,
"grad_norm": 0.7606334503663869,
"learning_rate": 7.841661649608671e-06,
"loss": 0.71,
"mean_token_accuracy": 0.8049027249217033,
"num_tokens": 264267363.0,
"step": 4040
},
{
"epoch": 0.6095725466586394,
"grad_norm": 0.740979735281585,
"learning_rate": 7.811559301625528e-06,
"loss": 0.6776,
"mean_token_accuracy": 0.811868742108345,
"num_tokens": 264919791.0,
"step": 4050
},
{
"epoch": 0.6110776640577965,
"grad_norm": 0.7287167342458135,
"learning_rate": 7.781456953642384e-06,
"loss": 0.6714,
"mean_token_accuracy": 0.8140159368515014,
"num_tokens": 265575151.0,
"step": 4060
},
{
"epoch": 0.6125827814569537,
"grad_norm": 0.769291891701047,
"learning_rate": 7.751354605659243e-06,
"loss": 0.6741,
"mean_token_accuracy": 0.8136517152190208,
"num_tokens": 266229797.0,
"step": 4070
},
{
"epoch": 0.6140878988561108,
"grad_norm": 0.8750720681891121,
"learning_rate": 7.7212522576761e-06,
"loss": 0.662,
"mean_token_accuracy": 0.8171534687280655,
"num_tokens": 266877923.0,
"step": 4080
},
{
"epoch": 0.615593016255268,
"grad_norm": 0.7883482239875798,
"learning_rate": 7.691149909692957e-06,
"loss": 0.6787,
"mean_token_accuracy": 0.8132388830184937,
"num_tokens": 267533283.0,
"step": 4090
},
{
"epoch": 0.617098133654425,
"grad_norm": 0.8284914998217778,
"learning_rate": 7.661047561709813e-06,
"loss": 0.7326,
"mean_token_accuracy": 0.8027213707566261,
"num_tokens": 268187229.0,
"step": 4100
},
{
"epoch": 0.6186032510535822,
"grad_norm": 0.7764151172700101,
"learning_rate": 7.630945213726672e-06,
"loss": 0.7139,
"mean_token_accuracy": 0.8039753317832947,
"num_tokens": 268842589.0,
"step": 4110
},
{
"epoch": 0.6201083684527393,
"grad_norm": 0.8181806298737985,
"learning_rate": 7.600842865743528e-06,
"loss": 0.6847,
"mean_token_accuracy": 0.810582558810711,
"num_tokens": 269497949.0,
"step": 4120
},
{
"epoch": 0.6216134858518965,
"grad_norm": 0.7990406941932666,
"learning_rate": 7.570740517760385e-06,
"loss": 0.6725,
"mean_token_accuracy": 0.8125614732503891,
"num_tokens": 270152948.0,
"step": 4130
},
{
"epoch": 0.6231186032510536,
"grad_norm": 0.8679862562642979,
"learning_rate": 7.5406381697772435e-06,
"loss": 0.7086,
"mean_token_accuracy": 0.806426303088665,
"num_tokens": 270808241.0,
"step": 4140
},
{
"epoch": 0.6246237206502108,
"grad_norm": 0.9042351682289728,
"learning_rate": 7.510535821794101e-06,
"loss": 0.7317,
"mean_token_accuracy": 0.8006155714392662,
"num_tokens": 271461947.0,
"step": 4150
},
{
"epoch": 0.6261288380493678,
"grad_norm": 0.8428199817462217,
"learning_rate": 7.480433473810957e-06,
"loss": 0.6875,
"mean_token_accuracy": 0.8104069977998734,
"num_tokens": 272117307.0,
"step": 4160
},
{
"epoch": 0.627633955448525,
"grad_norm": 0.7636935493635175,
"learning_rate": 7.450331125827815e-06,
"loss": 0.711,
"mean_token_accuracy": 0.8058418169617653,
"num_tokens": 272771056.0,
"step": 4170
},
{
"epoch": 0.6291390728476821,
"grad_norm": 0.7308292189453819,
"learning_rate": 7.4202287778446725e-06,
"loss": 0.6874,
"mean_token_accuracy": 0.8109762862324714,
"num_tokens": 273425542.0,
"step": 4180
},
{
"epoch": 0.6306441902468393,
"grad_norm": 0.7974301033969676,
"learning_rate": 7.39012642986153e-06,
"loss": 0.7122,
"mean_token_accuracy": 0.8060316890478134,
"num_tokens": 274080902.0,
"step": 4190
},
{
"epoch": 0.6321493076459964,
"grad_norm": 0.8281909503451766,
"learning_rate": 7.360024081878388e-06,
"loss": 0.6789,
"mean_token_accuracy": 0.8130032166838645,
"num_tokens": 274734247.0,
"step": 4200
},
{
"epoch": 0.6336544250451536,
"grad_norm": 0.8469223277668484,
"learning_rate": 7.329921733895244e-06,
"loss": 0.7208,
"mean_token_accuracy": 0.8034349054098129,
"num_tokens": 275389607.0,
"step": 4210
},
{
"epoch": 0.6351595424443106,
"grad_norm": 0.8659132521429633,
"learning_rate": 7.2998193859121015e-06,
"loss": 0.7191,
"mean_token_accuracy": 0.8031769096851349,
"num_tokens": 276044967.0,
"step": 4220
},
{
"epoch": 0.6366646598434678,
"grad_norm": 0.8420814798005133,
"learning_rate": 7.2697170379289596e-06,
"loss": 0.7292,
"mean_token_accuracy": 0.8008877292275429,
"num_tokens": 276699487.0,
"step": 4230
},
{
"epoch": 0.6381697772426249,
"grad_norm": 0.8518963463964698,
"learning_rate": 7.239614689945817e-06,
"loss": 0.7155,
"mean_token_accuracy": 0.8045470133423805,
"num_tokens": 277353875.0,
"step": 4240
},
{
"epoch": 0.6396748946417821,
"grad_norm": 0.8352911877693019,
"learning_rate": 7.209512341962673e-06,
"loss": 0.668,
"mean_token_accuracy": 0.8157885015010834,
"num_tokens": 278007391.0,
"step": 4250
},
{
"epoch": 0.6411800120409392,
"grad_norm": 0.7890199230141322,
"learning_rate": 7.179409993979531e-06,
"loss": 0.7163,
"mean_token_accuracy": 0.8045294061303139,
"num_tokens": 278660967.0,
"step": 4260
},
{
"epoch": 0.6426851294400964,
"grad_norm": 0.7907907383344028,
"learning_rate": 7.1493076459963886e-06,
"loss": 0.6951,
"mean_token_accuracy": 0.8099789813160896,
"num_tokens": 279315319.0,
"step": 4270
},
{
"epoch": 0.6441902468392534,
"grad_norm": 0.7752324901881567,
"learning_rate": 7.119205298013245e-06,
"loss": 0.6796,
"mean_token_accuracy": 0.8136557549238205,
"num_tokens": 279970002.0,
"step": 4280
},
{
"epoch": 0.6456953642384106,
"grad_norm": 0.7904244583768335,
"learning_rate": 7.089102950030103e-06,
"loss": 0.6932,
"mean_token_accuracy": 0.8082010626792908,
"num_tokens": 280621981.0,
"step": 4290
},
{
"epoch": 0.6472004816375677,
"grad_norm": 0.7571107948166566,
"learning_rate": 7.05900060204696e-06,
"loss": 0.6714,
"mean_token_accuracy": 0.8147674828767777,
"num_tokens": 281275796.0,
"step": 4300
},
{
"epoch": 0.6487055990367249,
"grad_norm": 0.7519819004545625,
"learning_rate": 7.0288982540638175e-06,
"loss": 0.6843,
"mean_token_accuracy": 0.8124752193689346,
"num_tokens": 281930076.0,
"step": 4310
},
{
"epoch": 0.650210716435882,
"grad_norm": 0.8136271130646697,
"learning_rate": 6.998795906080676e-06,
"loss": 0.6902,
"mean_token_accuracy": 0.8109596386551857,
"num_tokens": 282585436.0,
"step": 4320
},
{
"epoch": 0.6517158338350392,
"grad_norm": 0.7542024433510576,
"learning_rate": 6.968693558097532e-06,
"loss": 0.6853,
"mean_token_accuracy": 0.8105558544397354,
"num_tokens": 283239814.0,
"step": 4330
},
{
"epoch": 0.6532209512341962,
"grad_norm": 0.8232441623305137,
"learning_rate": 6.938591210114389e-06,
"loss": 0.6962,
"mean_token_accuracy": 0.8088379830121994,
"num_tokens": 283893844.0,
"step": 4340
},
{
"epoch": 0.6547260686333534,
"grad_norm": 0.8848354735677997,
"learning_rate": 6.9084888621312465e-06,
"loss": 0.7614,
"mean_token_accuracy": 0.7953187227249146,
"num_tokens": 284548116.0,
"step": 4350
},
{
"epoch": 0.6562311860325105,
"grad_norm": 0.7946687427901056,
"learning_rate": 6.878386514148105e-06,
"loss": 0.6849,
"mean_token_accuracy": 0.809443698823452,
"num_tokens": 285203476.0,
"step": 4360
},
{
"epoch": 0.6577363034316677,
"grad_norm": 0.8589342176440701,
"learning_rate": 6.848284166164961e-06,
"loss": 0.7309,
"mean_token_accuracy": 0.8002122029662132,
"num_tokens": 285858836.0,
"step": 4370
},
{
"epoch": 0.6592414208308248,
"grad_norm": 0.7808127559059683,
"learning_rate": 6.818181818181818e-06,
"loss": 0.6558,
"mean_token_accuracy": 0.8172375440597535,
"num_tokens": 286513731.0,
"step": 4380
},
{
"epoch": 0.660746538229982,
"grad_norm": 0.7929713144978754,
"learning_rate": 6.788079470198676e-06,
"loss": 0.6725,
"mean_token_accuracy": 0.8145379722118378,
"num_tokens": 287166163.0,
"step": 4390
},
{
"epoch": 0.6622516556291391,
"grad_norm": 0.7853169035829088,
"learning_rate": 6.757977122215534e-06,
"loss": 0.7032,
"mean_token_accuracy": 0.8071265637874603,
"num_tokens": 287819834.0,
"step": 4400
},
{
"epoch": 0.6637567730282962,
"grad_norm": 0.7581464532781553,
"learning_rate": 6.72787477423239e-06,
"loss": 0.6989,
"mean_token_accuracy": 0.8081767991185188,
"num_tokens": 288474034.0,
"step": 4410
},
{
"epoch": 0.6652618904274533,
"grad_norm": 0.815968078478177,
"learning_rate": 6.697772426249248e-06,
"loss": 0.6781,
"mean_token_accuracy": 0.8142720222473144,
"num_tokens": 289128588.0,
"step": 4420
},
{
"epoch": 0.6667670078266105,
"grad_norm": 0.6938381862941432,
"learning_rate": 6.667670078266105e-06,
"loss": 0.6601,
"mean_token_accuracy": 0.8173514276742935,
"num_tokens": 289782786.0,
"step": 4430
},
{
"epoch": 0.6682721252257676,
"grad_norm": 0.732123530660249,
"learning_rate": 6.637567730282963e-06,
"loss": 0.7348,
"mean_token_accuracy": 0.8012821659445762,
"num_tokens": 290437574.0,
"step": 4440
},
{
"epoch": 0.6697772426249248,
"grad_norm": 0.8281427123488975,
"learning_rate": 6.60746538229982e-06,
"loss": 0.6719,
"mean_token_accuracy": 0.813536812365055,
"num_tokens": 291091267.0,
"step": 4450
},
{
"epoch": 0.6712823600240819,
"grad_norm": 0.779706260867014,
"learning_rate": 6.577363034316677e-06,
"loss": 0.6524,
"mean_token_accuracy": 0.8184206783771515,
"num_tokens": 291745182.0,
"step": 4460
},
{
"epoch": 0.672787477423239,
"grad_norm": 0.8908264453869105,
"learning_rate": 6.547260686333534e-06,
"loss": 0.7097,
"mean_token_accuracy": 0.8062851145863533,
"num_tokens": 292400542.0,
"step": 4470
},
{
"epoch": 0.6742925948223961,
"grad_norm": 0.7475757803731065,
"learning_rate": 6.5171583383503924e-06,
"loss": 0.7158,
"mean_token_accuracy": 0.8046679839491844,
"num_tokens": 293053114.0,
"step": 4480
},
{
"epoch": 0.6757977122215533,
"grad_norm": 0.7390432299118334,
"learning_rate": 6.487055990367249e-06,
"loss": 0.674,
"mean_token_accuracy": 0.8137899979948997,
"num_tokens": 293708474.0,
"step": 4490
},
{
"epoch": 0.6773028296207104,
"grad_norm": 0.803312400596195,
"learning_rate": 6.456953642384106e-06,
"loss": 0.7021,
"mean_token_accuracy": 0.8084595799446106,
"num_tokens": 294361973.0,
"step": 4500
},
{
"epoch": 0.6788079470198676,
"grad_norm": 0.7831651665240028,
"learning_rate": 6.426851294400964e-06,
"loss": 0.669,
"mean_token_accuracy": 0.8149545326828956,
"num_tokens": 295015304.0,
"step": 4510
},
{
"epoch": 0.6803130644190247,
"grad_norm": 0.7698329146574737,
"learning_rate": 6.3967489464178214e-06,
"loss": 0.6704,
"mean_token_accuracy": 0.8154127985239029,
"num_tokens": 295670664.0,
"step": 4520
},
{
"epoch": 0.6818181818181818,
"grad_norm": 0.7685548489665934,
"learning_rate": 6.366646598434678e-06,
"loss": 0.6691,
"mean_token_accuracy": 0.8155076310038567,
"num_tokens": 296324909.0,
"step": 4530
},
{
"epoch": 0.6833232992173389,
"grad_norm": 0.953070332907545,
"learning_rate": 6.336544250451536e-06,
"loss": 0.7428,
"mean_token_accuracy": 0.7991679951548576,
"num_tokens": 296980269.0,
"step": 4540
},
{
"epoch": 0.6848284166164961,
"grad_norm": 0.8190164713090095,
"learning_rate": 6.306441902468393e-06,
"loss": 0.678,
"mean_token_accuracy": 0.8134281873703003,
"num_tokens": 297635629.0,
"step": 4550
},
{
"epoch": 0.6863335340156532,
"grad_norm": 0.7992478641110825,
"learning_rate": 6.27633955448525e-06,
"loss": 0.6584,
"mean_token_accuracy": 0.8175561770796775,
"num_tokens": 298290989.0,
"step": 4560
},
{
"epoch": 0.6878386514148104,
"grad_norm": 0.755402637130093,
"learning_rate": 6.2462372065021085e-06,
"loss": 0.6935,
"mean_token_accuracy": 0.8109642148017884,
"num_tokens": 298946349.0,
"step": 4570
},
{
"epoch": 0.6893437688139675,
"grad_norm": 0.8090465130369789,
"learning_rate": 6.216134858518965e-06,
"loss": 0.6756,
"mean_token_accuracy": 0.8144601851701736,
"num_tokens": 299601709.0,
"step": 4580
},
{
"epoch": 0.6908488862131246,
"grad_norm": 0.7051718017563176,
"learning_rate": 6.186032510535822e-06,
"loss": 0.6944,
"mean_token_accuracy": 0.8100238159298897,
"num_tokens": 300256713.0,
"step": 4590
},
{
"epoch": 0.6923540036122817,
"grad_norm": 0.8023442611978678,
"learning_rate": 6.15593016255268e-06,
"loss": 0.7102,
"mean_token_accuracy": 0.8061564579606056,
"num_tokens": 300910315.0,
"step": 4600
},
{
"epoch": 0.6938591210114389,
"grad_norm": 0.6592248864741374,
"learning_rate": 6.1258278145695375e-06,
"loss": 0.6394,
"mean_token_accuracy": 0.8232551902532578,
"num_tokens": 301563354.0,
"step": 4610
},
{
"epoch": 0.695364238410596,
"grad_norm": 0.8216711999766739,
"learning_rate": 6.095725466586394e-06,
"loss": 0.7033,
"mean_token_accuracy": 0.8067058518528938,
"num_tokens": 302218621.0,
"step": 4620
},
{
"epoch": 0.6968693558097532,
"grad_norm": 0.7450543082358431,
"learning_rate": 6.065623118603251e-06,
"loss": 0.6414,
"mean_token_accuracy": 0.8213235855102539,
"num_tokens": 302872247.0,
"step": 4630
},
{
"epoch": 0.6983744732089103,
"grad_norm": 0.7184327185317253,
"learning_rate": 6.035520770620109e-06,
"loss": 0.6882,
"mean_token_accuracy": 0.8115840300917625,
"num_tokens": 303527607.0,
"step": 4640
},
{
"epoch": 0.6998795906080675,
"grad_norm": 0.6945338421799874,
"learning_rate": 6.005418422636966e-06,
"loss": 0.6713,
"mean_token_accuracy": 0.81469986140728,
"num_tokens": 304182967.0,
"step": 4650
},
{
"epoch": 0.7013847080072245,
"grad_norm": 0.840530737970437,
"learning_rate": 5.975316074653823e-06,
"loss": 0.7247,
"mean_token_accuracy": 0.802819675207138,
"num_tokens": 304838327.0,
"step": 4660
},
{
"epoch": 0.7028898254063817,
"grad_norm": 0.7290847678467367,
"learning_rate": 5.945213726670681e-06,
"loss": 0.6556,
"mean_token_accuracy": 0.8193713694810867,
"num_tokens": 305492953.0,
"step": 4670
},
{
"epoch": 0.7043949428055388,
"grad_norm": 0.7877331459001613,
"learning_rate": 5.915111378687538e-06,
"loss": 0.6846,
"mean_token_accuracy": 0.8122801646590233,
"num_tokens": 306148313.0,
"step": 4680
},
{
"epoch": 0.705900060204696,
"grad_norm": 0.7764906463226239,
"learning_rate": 5.885009030704395e-06,
"loss": 0.6865,
"mean_token_accuracy": 0.810577142238617,
"num_tokens": 306801862.0,
"step": 4690
},
{
"epoch": 0.7074051776038531,
"grad_norm": 0.7484928423002017,
"learning_rate": 5.854906682721253e-06,
"loss": 0.6511,
"mean_token_accuracy": 0.818084391951561,
"num_tokens": 307457222.0,
"step": 4700
},
{
"epoch": 0.7089102950030103,
"grad_norm": 0.8535454844390912,
"learning_rate": 5.82480433473811e-06,
"loss": 0.6695,
"mean_token_accuracy": 0.814570102095604,
"num_tokens": 308112582.0,
"step": 4710
},
{
"epoch": 0.7104154124021673,
"grad_norm": 0.835114125014516,
"learning_rate": 5.794701986754967e-06,
"loss": 0.7017,
"mean_token_accuracy": 0.8075006246566773,
"num_tokens": 308765597.0,
"step": 4720
},
{
"epoch": 0.7119205298013245,
"grad_norm": 0.8008764952393119,
"learning_rate": 5.764599638771825e-06,
"loss": 0.6593,
"mean_token_accuracy": 0.8166110992431641,
"num_tokens": 309420016.0,
"step": 4730
},
{
"epoch": 0.7134256472004816,
"grad_norm": 0.7196592850104424,
"learning_rate": 5.734497290788682e-06,
"loss": 0.6586,
"mean_token_accuracy": 0.8172907829284668,
"num_tokens": 310073088.0,
"step": 4740
},
{
"epoch": 0.7149307645996388,
"grad_norm": 0.7935183204776026,
"learning_rate": 5.704394942805539e-06,
"loss": 0.7041,
"mean_token_accuracy": 0.8055432423949241,
"num_tokens": 310727014.0,
"step": 4750
},
{
"epoch": 0.7164358819987959,
"grad_norm": 0.7208961075633612,
"learning_rate": 5.674292594822397e-06,
"loss": 0.7052,
"mean_token_accuracy": 0.8065466269850731,
"num_tokens": 311382141.0,
"step": 4760
},
{
"epoch": 0.7179409993979531,
"grad_norm": 0.8241431117457545,
"learning_rate": 5.644190246839254e-06,
"loss": 0.7042,
"mean_token_accuracy": 0.8074371844530106,
"num_tokens": 312034916.0,
"step": 4770
},
{
"epoch": 0.7194461167971101,
"grad_norm": 0.7811410686185015,
"learning_rate": 5.614087898856111e-06,
"loss": 0.6767,
"mean_token_accuracy": 0.8120527639985085,
"num_tokens": 312689580.0,
"step": 4780
},
{
"epoch": 0.7209512341962673,
"grad_norm": 0.7065153170908021,
"learning_rate": 5.583985550872969e-06,
"loss": 0.7057,
"mean_token_accuracy": 0.8083510205149651,
"num_tokens": 313342728.0,
"step": 4790
},
{
"epoch": 0.7224563515954244,
"grad_norm": 0.8288220684385291,
"learning_rate": 5.553883202889826e-06,
"loss": 0.7298,
"mean_token_accuracy": 0.8011138662695885,
"num_tokens": 313995093.0,
"step": 4800
},
{
"epoch": 0.7239614689945816,
"grad_norm": 0.7676652284446706,
"learning_rate": 5.523780854906683e-06,
"loss": 0.6931,
"mean_token_accuracy": 0.811854538321495,
"num_tokens": 314648881.0,
"step": 4810
},
{
"epoch": 0.7254665863937387,
"grad_norm": 0.7885991111534025,
"learning_rate": 5.493678506923541e-06,
"loss": 0.6705,
"mean_token_accuracy": 0.8141166970133782,
"num_tokens": 315304241.0,
"step": 4820
},
{
"epoch": 0.7269717037928959,
"grad_norm": 0.8161757673596582,
"learning_rate": 5.463576158940398e-06,
"loss": 0.6762,
"mean_token_accuracy": 0.814816965162754,
"num_tokens": 315958535.0,
"step": 4830
},
{
"epoch": 0.7284768211920529,
"grad_norm": 0.7968418972928049,
"learning_rate": 5.433473810957255e-06,
"loss": 0.6881,
"mean_token_accuracy": 0.8102676823735238,
"num_tokens": 316613446.0,
"step": 4840
},
{
"epoch": 0.7299819385912101,
"grad_norm": 0.8092373608084555,
"learning_rate": 5.403371462974113e-06,
"loss": 0.6976,
"mean_token_accuracy": 0.8103517308831215,
"num_tokens": 317268325.0,
"step": 4850
},
{
"epoch": 0.7314870559903672,
"grad_norm": 0.8801000450815974,
"learning_rate": 5.3732691149909695e-06,
"loss": 0.6517,
"mean_token_accuracy": 0.819042882323265,
"num_tokens": 317921988.0,
"step": 4860
},
{
"epoch": 0.7329921733895244,
"grad_norm": 0.8380758783209445,
"learning_rate": 5.343166767007827e-06,
"loss": 0.6978,
"mean_token_accuracy": 0.8100692957639695,
"num_tokens": 318575553.0,
"step": 4870
},
{
"epoch": 0.7344972907886815,
"grad_norm": 0.7584475333652385,
"learning_rate": 5.313064419024684e-06,
"loss": 0.6857,
"mean_token_accuracy": 0.8126893028616905,
"num_tokens": 319230913.0,
"step": 4880
},
{
"epoch": 0.7360024081878387,
"grad_norm": 0.7631070843965636,
"learning_rate": 5.282962071041542e-06,
"loss": 0.6765,
"mean_token_accuracy": 0.8138569176197052,
"num_tokens": 319883805.0,
"step": 4890
},
{
"epoch": 0.7375075255869958,
"grad_norm": 0.718262307466775,
"learning_rate": 5.2528597230583985e-06,
"loss": 0.6454,
"mean_token_accuracy": 0.820010906457901,
"num_tokens": 320538065.0,
"step": 4900
},
{
"epoch": 0.7390126429861529,
"grad_norm": 0.7493033725192411,
"learning_rate": 5.222757375075256e-06,
"loss": 0.6806,
"mean_token_accuracy": 0.8114639312028885,
"num_tokens": 321193353.0,
"step": 4910
},
{
"epoch": 0.74051776038531,
"grad_norm": 0.7653187902059273,
"learning_rate": 5.192655027092114e-06,
"loss": 0.7073,
"mean_token_accuracy": 0.8058588966727257,
"num_tokens": 321847074.0,
"step": 4920
},
{
"epoch": 0.7420228777844672,
"grad_norm": 0.8171689434630046,
"learning_rate": 5.162552679108971e-06,
"loss": 0.6806,
"mean_token_accuracy": 0.8125156402587891,
"num_tokens": 322498780.0,
"step": 4930
},
{
"epoch": 0.7435279951836243,
"grad_norm": 0.8542271561789201,
"learning_rate": 5.1324503311258275e-06,
"loss": 0.7199,
"mean_token_accuracy": 0.8045185759663582,
"num_tokens": 323151125.0,
"step": 4940
},
{
"epoch": 0.7450331125827815,
"grad_norm": 0.7132356469162935,
"learning_rate": 5.102347983142686e-06,
"loss": 0.6765,
"mean_token_accuracy": 0.8129563733935357,
"num_tokens": 323806449.0,
"step": 4950
},
{
"epoch": 0.7465382299819386,
"grad_norm": 0.7779913945784402,
"learning_rate": 5.072245635159543e-06,
"loss": 0.675,
"mean_token_accuracy": 0.8126250460743905,
"num_tokens": 324460173.0,
"step": 4960
},
{
"epoch": 0.7480433473810957,
"grad_norm": 0.7915503234573145,
"learning_rate": 5.0421432871764e-06,
"loss": 0.7458,
"mean_token_accuracy": 0.7976020961999893,
"num_tokens": 325113568.0,
"step": 4970
},
{
"epoch": 0.7495484647802528,
"grad_norm": 0.7769387021454827,
"learning_rate": 5.012040939193258e-06,
"loss": 0.6686,
"mean_token_accuracy": 0.8162689179182052,
"num_tokens": 325767952.0,
"step": 4980
},
{
"epoch": 0.75105358217941,
"grad_norm": 0.8144124018793544,
"learning_rate": 4.981938591210115e-06,
"loss": 0.7011,
"mean_token_accuracy": 0.8070224747061729,
"num_tokens": 326423312.0,
"step": 4990
},
{
"epoch": 0.7525586995785671,
"grad_norm": 0.7720856727719556,
"learning_rate": 4.951836243226973e-06,
"loss": 0.677,
"mean_token_accuracy": 0.8127442598342896,
"num_tokens": 327078672.0,
"step": 5000
},
{
"epoch": 0.7540638169777243,
"grad_norm": 0.8464803974508197,
"learning_rate": 4.921733895243829e-06,
"loss": 0.7111,
"mean_token_accuracy": 0.8053447112441063,
"num_tokens": 327734032.0,
"step": 5010
},
{
"epoch": 0.7555689343768814,
"grad_norm": 0.8002088077987107,
"learning_rate": 4.891631547260687e-06,
"loss": 0.7123,
"mean_token_accuracy": 0.8058378130197525,
"num_tokens": 328389392.0,
"step": 5020
},
{
"epoch": 0.7570740517760385,
"grad_norm": 0.7468644534166601,
"learning_rate": 4.861529199277544e-06,
"loss": 0.6742,
"mean_token_accuracy": 0.8150235041975975,
"num_tokens": 329044752.0,
"step": 5030
},
{
"epoch": 0.7585791691751956,
"grad_norm": 0.8015537666197009,
"learning_rate": 4.831426851294402e-06,
"loss": 0.6862,
"mean_token_accuracy": 0.811229458451271,
"num_tokens": 329700020.0,
"step": 5040
},
{
"epoch": 0.7600842865743528,
"grad_norm": 0.7838531070629737,
"learning_rate": 4.801324503311259e-06,
"loss": 0.6858,
"mean_token_accuracy": 0.8106863215565682,
"num_tokens": 330354156.0,
"step": 5050
},
{
"epoch": 0.7615894039735099,
"grad_norm": 0.7591951467548631,
"learning_rate": 4.771222155328115e-06,
"loss": 0.7081,
"mean_token_accuracy": 0.806295795738697,
"num_tokens": 331009516.0,
"step": 5060
},
{
"epoch": 0.7630945213726671,
"grad_norm": 0.7881977216420744,
"learning_rate": 4.741119807344973e-06,
"loss": 0.6688,
"mean_token_accuracy": 0.8141529381275177,
"num_tokens": 331664516.0,
"step": 5070
},
{
"epoch": 0.7645996387718242,
"grad_norm": 0.8045258451588279,
"learning_rate": 4.711017459361831e-06,
"loss": 0.6753,
"mean_token_accuracy": 0.8127610564231873,
"num_tokens": 332319876.0,
"step": 5080
},
{
"epoch": 0.7661047561709813,
"grad_norm": 0.7950453594601595,
"learning_rate": 4.680915111378688e-06,
"loss": 0.6449,
"mean_token_accuracy": 0.8206124827265739,
"num_tokens": 332975236.0,
"step": 5090
},
{
"epoch": 0.7676098735701384,
"grad_norm": 0.83782481780607,
"learning_rate": 4.650812763395545e-06,
"loss": 0.6954,
"mean_token_accuracy": 0.8100070223212242,
"num_tokens": 333630596.0,
"step": 5100
},
{
"epoch": 0.7691149909692956,
"grad_norm": 0.765461042500271,
"learning_rate": 4.620710415412402e-06,
"loss": 0.6878,
"mean_token_accuracy": 0.8108726218342781,
"num_tokens": 334285956.0,
"step": 5110
},
{
"epoch": 0.7706201083684527,
"grad_norm": 0.8595361462765182,
"learning_rate": 4.59060806742926e-06,
"loss": 0.6743,
"mean_token_accuracy": 0.8132791504263878,
"num_tokens": 334940764.0,
"step": 5120
},
{
"epoch": 0.7721252257676099,
"grad_norm": 0.7951576328256589,
"learning_rate": 4.560505719446117e-06,
"loss": 0.6788,
"mean_token_accuracy": 0.8132970303297042,
"num_tokens": 335592227.0,
"step": 5130
},
{
"epoch": 0.773630343166767,
"grad_norm": 0.8314496227848391,
"learning_rate": 4.530403371462975e-06,
"loss": 0.6944,
"mean_token_accuracy": 0.8085516512393951,
"num_tokens": 336244385.0,
"step": 5140
},
{
"epoch": 0.7751354605659242,
"grad_norm": 0.7849604988449984,
"learning_rate": 4.500301023479831e-06,
"loss": 0.6596,
"mean_token_accuracy": 0.8182275414466857,
"num_tokens": 336899083.0,
"step": 5150
},
{
"epoch": 0.7766405779650812,
"grad_norm": 0.7905666003585319,
"learning_rate": 4.4701986754966895e-06,
"loss": 0.6735,
"mean_token_accuracy": 0.8137266218662262,
"num_tokens": 337553265.0,
"step": 5160
},
{
"epoch": 0.7781456953642384,
"grad_norm": 0.795479794053426,
"learning_rate": 4.440096327513547e-06,
"loss": 0.6717,
"mean_token_accuracy": 0.8137852057814599,
"num_tokens": 338207675.0,
"step": 5170
},
{
"epoch": 0.7796508127633955,
"grad_norm": 0.7289284088287575,
"learning_rate": 4.409993979530404e-06,
"loss": 0.652,
"mean_token_accuracy": 0.8170636877417564,
"num_tokens": 338861544.0,
"step": 5180
},
{
"epoch": 0.7811559301625527,
"grad_norm": 0.7196535550987029,
"learning_rate": 4.379891631547261e-06,
"loss": 0.6521,
"mean_token_accuracy": 0.8188467666506767,
"num_tokens": 339516863.0,
"step": 5190
},
{
"epoch": 0.7826610475617098,
"grad_norm": 0.7830400455502452,
"learning_rate": 4.3497892835641185e-06,
"loss": 0.7021,
"mean_token_accuracy": 0.8078183531761169,
"num_tokens": 340167656.0,
"step": 5200
},
{
"epoch": 0.784166164960867,
"grad_norm": 0.7603191091855747,
"learning_rate": 4.319686935580976e-06,
"loss": 0.659,
"mean_token_accuracy": 0.8158310890197754,
"num_tokens": 340823016.0,
"step": 5210
},
{
"epoch": 0.785671282360024,
"grad_norm": 0.7480631602536956,
"learning_rate": 4.289584587597833e-06,
"loss": 0.6682,
"mean_token_accuracy": 0.8159776479005814,
"num_tokens": 341478376.0,
"step": 5220
},
{
"epoch": 0.7871763997591812,
"grad_norm": 0.7270903162865268,
"learning_rate": 4.25948223961469e-06,
"loss": 0.634,
"mean_token_accuracy": 0.8241108819842339,
"num_tokens": 342132186.0,
"step": 5230
},
{
"epoch": 0.7886815171583383,
"grad_norm": 0.8122645136251377,
"learning_rate": 4.2293798916315475e-06,
"loss": 0.6751,
"mean_token_accuracy": 0.8132022470235825,
"num_tokens": 342787546.0,
"step": 5240
},
{
"epoch": 0.7901866345574955,
"grad_norm": 0.7814481654035551,
"learning_rate": 4.199277543648405e-06,
"loss": 0.6769,
"mean_token_accuracy": 0.8136052757501602,
"num_tokens": 343442906.0,
"step": 5250
},
{
"epoch": 0.7916917519566526,
"grad_norm": 0.7611291029949178,
"learning_rate": 4.169175195665262e-06,
"loss": 0.7009,
"mean_token_accuracy": 0.8087088346481324,
"num_tokens": 344096020.0,
"step": 5260
},
{
"epoch": 0.7931968693558098,
"grad_norm": 0.7700160646970695,
"learning_rate": 4.139072847682119e-06,
"loss": 0.6854,
"mean_token_accuracy": 0.8107923865318298,
"num_tokens": 344749548.0,
"step": 5270
},
{
"epoch": 0.7947019867549668,
"grad_norm": 0.7887413891550367,
"learning_rate": 4.108970499698977e-06,
"loss": 0.6733,
"mean_token_accuracy": 0.8150668799877167,
"num_tokens": 345402893.0,
"step": 5280
},
{
"epoch": 0.796207104154124,
"grad_norm": 0.7829705708780775,
"learning_rate": 4.078868151715834e-06,
"loss": 0.6863,
"mean_token_accuracy": 0.8105948135256767,
"num_tokens": 346056278.0,
"step": 5290
},
{
"epoch": 0.7977122215532811,
"grad_norm": 0.8031666958040956,
"learning_rate": 4.048765803732692e-06,
"loss": 0.649,
"mean_token_accuracy": 0.8193706855177879,
"num_tokens": 346709619.0,
"step": 5300
},
{
"epoch": 0.7992173389524383,
"grad_norm": 0.802978454866161,
"learning_rate": 4.018663455749548e-06,
"loss": 0.6909,
"mean_token_accuracy": 0.8110115423798561,
"num_tokens": 347364979.0,
"step": 5310
},
{
"epoch": 0.8007224563515954,
"grad_norm": 0.6952179924863313,
"learning_rate": 3.988561107766406e-06,
"loss": 0.6595,
"mean_token_accuracy": 0.8168508380651474,
"num_tokens": 348019759.0,
"step": 5320
},
{
"epoch": 0.8022275737507526,
"grad_norm": 0.7738986823200219,
"learning_rate": 3.9584587597832635e-06,
"loss": 0.6847,
"mean_token_accuracy": 0.8109239622950554,
"num_tokens": 348673508.0,
"step": 5330
},
{
"epoch": 0.8037326911499096,
"grad_norm": 0.7412260862715365,
"learning_rate": 3.928356411800121e-06,
"loss": 0.695,
"mean_token_accuracy": 0.8071655169129371,
"num_tokens": 349327321.0,
"step": 5340
},
{
"epoch": 0.8052378085490668,
"grad_norm": 0.8010096276041234,
"learning_rate": 3.898254063816978e-06,
"loss": 0.6978,
"mean_token_accuracy": 0.8087977394461632,
"num_tokens": 349982286.0,
"step": 5350
},
{
"epoch": 0.8067429259482239,
"grad_norm": 0.8731170774854645,
"learning_rate": 3.868151715833835e-06,
"loss": 0.7326,
"mean_token_accuracy": 0.8022829025983811,
"num_tokens": 350634416.0,
"step": 5360
},
{
"epoch": 0.8082480433473811,
"grad_norm": 0.7093730462307066,
"learning_rate": 3.8380493678506925e-06,
"loss": 0.6596,
"mean_token_accuracy": 0.8181972727179527,
"num_tokens": 351287751.0,
"step": 5370
},
{
"epoch": 0.8097531607465382,
"grad_norm": 0.8137793008351559,
"learning_rate": 3.80794701986755e-06,
"loss": 0.6859,
"mean_token_accuracy": 0.8102050065994263,
"num_tokens": 351940648.0,
"step": 5380
},
{
"epoch": 0.8112582781456954,
"grad_norm": 0.7904273712123535,
"learning_rate": 3.7778446718844074e-06,
"loss": 0.6795,
"mean_token_accuracy": 0.8127697423100472,
"num_tokens": 352594701.0,
"step": 5390
},
{
"epoch": 0.8127633955448526,
"grad_norm": 0.8285781285803187,
"learning_rate": 3.7477423239012643e-06,
"loss": 0.671,
"mean_token_accuracy": 0.8142693549394607,
"num_tokens": 353250061.0,
"step": 5400
},
{
"epoch": 0.8142685129440096,
"grad_norm": 0.8011770165014716,
"learning_rate": 3.717639975918122e-06,
"loss": 0.7228,
"mean_token_accuracy": 0.8017405867576599,
"num_tokens": 353905112.0,
"step": 5410
},
{
"epoch": 0.8157736303431667,
"grad_norm": 0.8672249780085222,
"learning_rate": 3.6875376279349796e-06,
"loss": 0.6968,
"mean_token_accuracy": 0.8080266386270523,
"num_tokens": 354557456.0,
"step": 5420
},
{
"epoch": 0.8172787477423239,
"grad_norm": 0.8120117834431909,
"learning_rate": 3.6574352799518364e-06,
"loss": 0.6925,
"mean_token_accuracy": 0.8095671758055687,
"num_tokens": 355210797.0,
"step": 5430
},
{
"epoch": 0.818783865141481,
"grad_norm": 0.863727834809856,
"learning_rate": 3.627332931968694e-06,
"loss": 0.7268,
"mean_token_accuracy": 0.8012472525238991,
"num_tokens": 355866157.0,
"step": 5440
},
{
"epoch": 0.8202889825406382,
"grad_norm": 0.781104146631108,
"learning_rate": 3.597230583985551e-06,
"loss": 0.6399,
"mean_token_accuracy": 0.8211751446127892,
"num_tokens": 356519491.0,
"step": 5450
},
{
"epoch": 0.8217940999397954,
"grad_norm": 0.79463590801861,
"learning_rate": 3.5671282360024086e-06,
"loss": 0.6979,
"mean_token_accuracy": 0.8080309733748436,
"num_tokens": 357173455.0,
"step": 5460
},
{
"epoch": 0.8232992173389524,
"grad_norm": 0.7213195682522814,
"learning_rate": 3.537025888019266e-06,
"loss": 0.6714,
"mean_token_accuracy": 0.8130217432975769,
"num_tokens": 357828204.0,
"step": 5470
},
{
"epoch": 0.8248043347381095,
"grad_norm": 0.8689083991804347,
"learning_rate": 3.506923540036123e-06,
"loss": 0.687,
"mean_token_accuracy": 0.8101729631423951,
"num_tokens": 358480367.0,
"step": 5480
},
{
"epoch": 0.8263094521372667,
"grad_norm": 0.689761338219419,
"learning_rate": 3.4768211920529803e-06,
"loss": 0.7044,
"mean_token_accuracy": 0.8075317278504371,
"num_tokens": 359134857.0,
"step": 5490
},
{
"epoch": 0.8278145695364238,
"grad_norm": 0.8806667703934201,
"learning_rate": 3.446718844069838e-06,
"loss": 0.6945,
"mean_token_accuracy": 0.8091181218624115,
"num_tokens": 359789043.0,
"step": 5500
},
{
"epoch": 0.829319686935581,
"grad_norm": 0.7766566151875849,
"learning_rate": 3.416616496086695e-06,
"loss": 0.6394,
"mean_token_accuracy": 0.8194618910551071,
"num_tokens": 360442479.0,
"step": 5510
},
{
"epoch": 0.8308248043347382,
"grad_norm": 0.7831351840930362,
"learning_rate": 3.3865141481035525e-06,
"loss": 0.6729,
"mean_token_accuracy": 0.8144933819770813,
"num_tokens": 361097254.0,
"step": 5520
},
{
"epoch": 0.8323299217338952,
"grad_norm": 0.8509168580247635,
"learning_rate": 3.3564118001204097e-06,
"loss": 0.6533,
"mean_token_accuracy": 0.819504565000534,
"num_tokens": 361750574.0,
"step": 5530
},
{
"epoch": 0.8338350391330523,
"grad_norm": 0.7719049692325927,
"learning_rate": 3.326309452137267e-06,
"loss": 0.6956,
"mean_token_accuracy": 0.8098696261644364,
"num_tokens": 362405934.0,
"step": 5540
},
{
"epoch": 0.8353401565322095,
"grad_norm": 0.8709709923622949,
"learning_rate": 3.2962071041541242e-06,
"loss": 0.6851,
"mean_token_accuracy": 0.8118442639708519,
"num_tokens": 363060679.0,
"step": 5550
},
{
"epoch": 0.8368452739313667,
"grad_norm": 0.724128820806475,
"learning_rate": 3.2661047561709815e-06,
"loss": 0.7068,
"mean_token_accuracy": 0.8077720448374748,
"num_tokens": 363716039.0,
"step": 5560
},
{
"epoch": 0.8383503913305238,
"grad_norm": 0.7771783891240635,
"learning_rate": 3.2360024081878387e-06,
"loss": 0.6975,
"mean_token_accuracy": 0.8087139695882797,
"num_tokens": 364371134.0,
"step": 5570
},
{
"epoch": 0.839855508729681,
"grad_norm": 0.7469591339916538,
"learning_rate": 3.2059000602046964e-06,
"loss": 0.7087,
"mean_token_accuracy": 0.806692723929882,
"num_tokens": 365026494.0,
"step": 5580
},
{
"epoch": 0.841360626128838,
"grad_norm": 0.6846928405954478,
"learning_rate": 3.1757977122215532e-06,
"loss": 0.6524,
"mean_token_accuracy": 0.8203337088227272,
"num_tokens": 365678047.0,
"step": 5590
},
{
"epoch": 0.8428657435279951,
"grad_norm": 0.8165391902454103,
"learning_rate": 3.145695364238411e-06,
"loss": 0.7091,
"mean_token_accuracy": 0.8084227561950683,
"num_tokens": 366332246.0,
"step": 5600
},
{
"epoch": 0.8443708609271523,
"grad_norm": 0.7661797869924849,
"learning_rate": 3.115593016255268e-06,
"loss": 0.6657,
"mean_token_accuracy": 0.814234085381031,
"num_tokens": 366987566.0,
"step": 5610
},
{
"epoch": 0.8458759783263095,
"grad_norm": 0.8544597822050432,
"learning_rate": 3.0854906682721254e-06,
"loss": 0.6731,
"mean_token_accuracy": 0.8137959286570549,
"num_tokens": 367641959.0,
"step": 5620
},
{
"epoch": 0.8473810957254666,
"grad_norm": 0.7163611803006235,
"learning_rate": 3.0553883202889826e-06,
"loss": 0.6752,
"mean_token_accuracy": 0.814427162706852,
"num_tokens": 368294664.0,
"step": 5630
},
{
"epoch": 0.8488862131246238,
"grad_norm": 0.6787078659303751,
"learning_rate": 3.0252859723058403e-06,
"loss": 0.6763,
"mean_token_accuracy": 0.8122038081288337,
"num_tokens": 368948750.0,
"step": 5640
},
{
"epoch": 0.8503913305237809,
"grad_norm": 0.7479345264165043,
"learning_rate": 2.995183624322697e-06,
"loss": 0.6783,
"mean_token_accuracy": 0.8135086625814438,
"num_tokens": 369602071.0,
"step": 5650
},
{
"epoch": 0.851896447922938,
"grad_norm": 0.8040713418680959,
"learning_rate": 2.965081276339555e-06,
"loss": 0.6833,
"mean_token_accuracy": 0.8125656425952912,
"num_tokens": 370257431.0,
"step": 5660
},
{
"epoch": 0.8534015653220951,
"grad_norm": 0.7144369120802795,
"learning_rate": 2.9349789283564125e-06,
"loss": 0.633,
"mean_token_accuracy": 0.8221362918615341,
"num_tokens": 370912305.0,
"step": 5670
},
{
"epoch": 0.8549066827212523,
"grad_norm": 0.7067024966715704,
"learning_rate": 2.9048765803732693e-06,
"loss": 0.6501,
"mean_token_accuracy": 0.8193739414215088,
"num_tokens": 371565909.0,
"step": 5680
},
{
"epoch": 0.8564118001204094,
"grad_norm": 0.8244129429342957,
"learning_rate": 2.874774232390127e-06,
"loss": 0.6643,
"mean_token_accuracy": 0.8159393966197968,
"num_tokens": 372221054.0,
"step": 5690
},
{
"epoch": 0.8579169175195666,
"grad_norm": 0.6788719585230156,
"learning_rate": 2.844671884406984e-06,
"loss": 0.676,
"mean_token_accuracy": 0.814545676112175,
"num_tokens": 372876414.0,
"step": 5700
},
{
"epoch": 0.8594220349187237,
"grad_norm": 0.8130384688506709,
"learning_rate": 2.814569536423841e-06,
"loss": 0.6965,
"mean_token_accuracy": 0.808952122926712,
"num_tokens": 373531774.0,
"step": 5710
},
{
"epoch": 0.8609271523178808,
"grad_norm": 0.7567910527858757,
"learning_rate": 2.7844671884406987e-06,
"loss": 0.6561,
"mean_token_accuracy": 0.8194347232580185,
"num_tokens": 374183754.0,
"step": 5720
},
{
"epoch": 0.8624322697170379,
"grad_norm": 0.8119398632578556,
"learning_rate": 2.7543648404575555e-06,
"loss": 0.7265,
"mean_token_accuracy": 0.8022975295782089,
"num_tokens": 374837993.0,
"step": 5730
},
{
"epoch": 0.863937387116195,
"grad_norm": 0.7200406785036352,
"learning_rate": 2.724262492474413e-06,
"loss": 0.6936,
"mean_token_accuracy": 0.8087088257074356,
"num_tokens": 375491417.0,
"step": 5740
},
{
"epoch": 0.8654425045153522,
"grad_norm": 0.8095096343416092,
"learning_rate": 2.694160144491271e-06,
"loss": 0.6638,
"mean_token_accuracy": 0.815724229812622,
"num_tokens": 376146777.0,
"step": 5750
},
{
"epoch": 0.8669476219145094,
"grad_norm": 0.7838435257134089,
"learning_rate": 2.6640577965081277e-06,
"loss": 0.6436,
"mean_token_accuracy": 0.8215755537152291,
"num_tokens": 376801681.0,
"step": 5760
},
{
"epoch": 0.8684527393136665,
"grad_norm": 0.7514700742048708,
"learning_rate": 2.6339554485249854e-06,
"loss": 0.6746,
"mean_token_accuracy": 0.8149273306131363,
"num_tokens": 377457041.0,
"step": 5770
},
{
"epoch": 0.8699578567128236,
"grad_norm": 0.8210292942650229,
"learning_rate": 2.6038531005418426e-06,
"loss": 0.7044,
"mean_token_accuracy": 0.8063116610050202,
"num_tokens": 378107635.0,
"step": 5780
},
{
"epoch": 0.8714629741119807,
"grad_norm": 0.7396056378113063,
"learning_rate": 2.5737507525587e-06,
"loss": 0.6691,
"mean_token_accuracy": 0.8137347057461739,
"num_tokens": 378761375.0,
"step": 5790
},
{
"epoch": 0.8729680915111379,
"grad_norm": 0.8708274255602088,
"learning_rate": 2.543648404575557e-06,
"loss": 0.6323,
"mean_token_accuracy": 0.8234889656305313,
"num_tokens": 379415908.0,
"step": 5800
},
{
"epoch": 0.874473208910295,
"grad_norm": 0.7943943272899322,
"learning_rate": 2.5135460565924148e-06,
"loss": 0.659,
"mean_token_accuracy": 0.8163654163479805,
"num_tokens": 380071268.0,
"step": 5810
},
{
"epoch": 0.8759783263094522,
"grad_norm": 0.8493004790291228,
"learning_rate": 2.4834437086092716e-06,
"loss": 0.6711,
"mean_token_accuracy": 0.8139961987733841,
"num_tokens": 380724321.0,
"step": 5820
},
{
"epoch": 0.8774834437086093,
"grad_norm": 0.8183163337114058,
"learning_rate": 2.4533413606261293e-06,
"loss": 0.6835,
"mean_token_accuracy": 0.8111695215106011,
"num_tokens": 381379080.0,
"step": 5830
},
{
"epoch": 0.8789885611077664,
"grad_norm": 0.8344949442851441,
"learning_rate": 2.4232390126429865e-06,
"loss": 0.6557,
"mean_token_accuracy": 0.8170079737901688,
"num_tokens": 382032396.0,
"step": 5840
},
{
"epoch": 0.8804936785069235,
"grad_norm": 0.8101785451764406,
"learning_rate": 2.3931366646598438e-06,
"loss": 0.6413,
"mean_token_accuracy": 0.8225623875856399,
"num_tokens": 382685435.0,
"step": 5850
},
{
"epoch": 0.8819987959060807,
"grad_norm": 0.8931720749252036,
"learning_rate": 2.363034316676701e-06,
"loss": 0.6567,
"mean_token_accuracy": 0.8178651258349419,
"num_tokens": 383339721.0,
"step": 5860
},
{
"epoch": 0.8835039133052378,
"grad_norm": 0.8064725902594196,
"learning_rate": 2.3329319686935583e-06,
"loss": 0.6808,
"mean_token_accuracy": 0.8125994563102722,
"num_tokens": 383993935.0,
"step": 5870
},
{
"epoch": 0.885009030704395,
"grad_norm": 0.7727113900390351,
"learning_rate": 2.3028296207104155e-06,
"loss": 0.6607,
"mean_token_accuracy": 0.8179493889212608,
"num_tokens": 384648353.0,
"step": 5880
},
{
"epoch": 0.8865141481035521,
"grad_norm": 0.8692477292585057,
"learning_rate": 2.2727272727272728e-06,
"loss": 0.6836,
"mean_token_accuracy": 0.8113144382834434,
"num_tokens": 385302928.0,
"step": 5890
},
{
"epoch": 0.8880192655027093,
"grad_norm": 0.8390504027322997,
"learning_rate": 2.2426249247441304e-06,
"loss": 0.7002,
"mean_token_accuracy": 0.8089157208800316,
"num_tokens": 385956340.0,
"step": 5900
},
{
"epoch": 0.8895243829018663,
"grad_norm": 0.8150721235314988,
"learning_rate": 2.2125225767609877e-06,
"loss": 0.6929,
"mean_token_accuracy": 0.8088451266288758,
"num_tokens": 386610619.0,
"step": 5910
},
{
"epoch": 0.8910295003010235,
"grad_norm": 0.8046328603226446,
"learning_rate": 2.182420228777845e-06,
"loss": 0.6519,
"mean_token_accuracy": 0.81864313185215,
"num_tokens": 387265979.0,
"step": 5920
},
{
"epoch": 0.8925346177001806,
"grad_norm": 0.756159894396759,
"learning_rate": 2.152317880794702e-06,
"loss": 0.6786,
"mean_token_accuracy": 0.8141127720475196,
"num_tokens": 387919662.0,
"step": 5930
},
{
"epoch": 0.8940397350993378,
"grad_norm": 0.7776430394692136,
"learning_rate": 2.1222155328115594e-06,
"loss": 0.6698,
"mean_token_accuracy": 0.8145410984754562,
"num_tokens": 388575022.0,
"step": 5940
},
{
"epoch": 0.8955448524984949,
"grad_norm": 0.7865527176037842,
"learning_rate": 2.0921131848284167e-06,
"loss": 0.6801,
"mean_token_accuracy": 0.8134633019566536,
"num_tokens": 389230382.0,
"step": 5950
},
{
"epoch": 0.8970499698976521,
"grad_norm": 0.7925365048007713,
"learning_rate": 2.062010836845274e-06,
"loss": 0.6873,
"mean_token_accuracy": 0.8116836905479431,
"num_tokens": 389883695.0,
"step": 5960
},
{
"epoch": 0.8985550872968091,
"grad_norm": 0.8058845590867396,
"learning_rate": 2.0319084888621316e-06,
"loss": 0.682,
"mean_token_accuracy": 0.8133537322282791,
"num_tokens": 390537525.0,
"step": 5970
},
{
"epoch": 0.9000602046959663,
"grad_norm": 0.820259998347803,
"learning_rate": 2.001806140878989e-06,
"loss": 0.6751,
"mean_token_accuracy": 0.8127815544605255,
"num_tokens": 391191825.0,
"step": 5980
},
{
"epoch": 0.9015653220951234,
"grad_norm": 0.7974896272888695,
"learning_rate": 1.971703792895846e-06,
"loss": 0.6484,
"mean_token_accuracy": 0.8203530460596085,
"num_tokens": 391846060.0,
"step": 5990
},
{
"epoch": 0.9030704394942806,
"grad_norm": 0.7428984425282655,
"learning_rate": 1.9416014449127033e-06,
"loss": 0.6588,
"mean_token_accuracy": 0.8173960685729981,
"num_tokens": 392500211.0,
"step": 6000
},
{
"epoch": 0.9045755568934377,
"grad_norm": 0.8040708292776918,
"learning_rate": 1.9114990969295606e-06,
"loss": 0.7021,
"mean_token_accuracy": 0.8088914528489113,
"num_tokens": 393150561.0,
"step": 6010
},
{
"epoch": 0.9060806742925949,
"grad_norm": 0.8807619038953524,
"learning_rate": 1.881396748946418e-06,
"loss": 0.6416,
"mean_token_accuracy": 0.8225369155406952,
"num_tokens": 393805719.0,
"step": 6020
},
{
"epoch": 0.9075857916917519,
"grad_norm": 0.8563259131265296,
"learning_rate": 1.8512944009632753e-06,
"loss": 0.6864,
"mean_token_accuracy": 0.8107398003339767,
"num_tokens": 394461079.0,
"step": 6030
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.8386967646599575,
"learning_rate": 1.8211920529801325e-06,
"loss": 0.6979,
"mean_token_accuracy": 0.8091868460178375,
"num_tokens": 395114814.0,
"step": 6040
},
{
"epoch": 0.9105960264900662,
"grad_norm": 0.782498683599273,
"learning_rate": 1.79108970499699e-06,
"loss": 0.6561,
"mean_token_accuracy": 0.8184771955013275,
"num_tokens": 395766884.0,
"step": 6050
},
{
"epoch": 0.9121011438892234,
"grad_norm": 0.7709652287388354,
"learning_rate": 1.7609873570138472e-06,
"loss": 0.6416,
"mean_token_accuracy": 0.8183257013559342,
"num_tokens": 396422148.0,
"step": 6060
},
{
"epoch": 0.9136062612883805,
"grad_norm": 0.6976938880223098,
"learning_rate": 1.7308850090307045e-06,
"loss": 0.6577,
"mean_token_accuracy": 0.8160424426198005,
"num_tokens": 397075791.0,
"step": 6070
},
{
"epoch": 0.9151113786875377,
"grad_norm": 0.7892179303260687,
"learning_rate": 1.700782661047562e-06,
"loss": 0.7052,
"mean_token_accuracy": 0.8071174398064613,
"num_tokens": 397730862.0,
"step": 6080
},
{
"epoch": 0.9166164960866947,
"grad_norm": 0.7715099994986613,
"learning_rate": 1.6706803130644192e-06,
"loss": 0.6989,
"mean_token_accuracy": 0.808410356938839,
"num_tokens": 398383816.0,
"step": 6090
},
{
"epoch": 0.9181216134858519,
"grad_norm": 0.7866013965422197,
"learning_rate": 1.6405779650812764e-06,
"loss": 0.6853,
"mean_token_accuracy": 0.8117044195532799,
"num_tokens": 399036130.0,
"step": 6100
},
{
"epoch": 0.919626730885009,
"grad_norm": 0.71269595335957,
"learning_rate": 1.6104756170981337e-06,
"loss": 0.6567,
"mean_token_accuracy": 0.8164850741624832,
"num_tokens": 399689802.0,
"step": 6110
},
{
"epoch": 0.9211318482841662,
"grad_norm": 0.7488056490664857,
"learning_rate": 1.5803732691149911e-06,
"loss": 0.6799,
"mean_token_accuracy": 0.8110664993524551,
"num_tokens": 400345162.0,
"step": 6120
},
{
"epoch": 0.9226369656833233,
"grad_norm": 0.7458014489415327,
"learning_rate": 1.5502709211318484e-06,
"loss": 0.681,
"mean_token_accuracy": 0.811719287931919,
"num_tokens": 400999661.0,
"step": 6130
},
{
"epoch": 0.9241420830824805,
"grad_norm": 0.7886363158801694,
"learning_rate": 1.5201685731487056e-06,
"loss": 0.6862,
"mean_token_accuracy": 0.8114742293953896,
"num_tokens": 401653950.0,
"step": 6140
},
{
"epoch": 0.9256472004816376,
"grad_norm": 0.7391585776509119,
"learning_rate": 1.490066225165563e-06,
"loss": 0.696,
"mean_token_accuracy": 0.8081017956137657,
"num_tokens": 402309310.0,
"step": 6150
},
{
"epoch": 0.9271523178807947,
"grad_norm": 0.770098349627336,
"learning_rate": 1.4599638771824203e-06,
"loss": 0.6457,
"mean_token_accuracy": 0.8203165084123611,
"num_tokens": 402964109.0,
"step": 6160
},
{
"epoch": 0.9286574352799518,
"grad_norm": 0.7730099532771224,
"learning_rate": 1.4298615291992776e-06,
"loss": 0.6808,
"mean_token_accuracy": 0.8133900195360184,
"num_tokens": 403619469.0,
"step": 6170
},
{
"epoch": 0.930162552679109,
"grad_norm": 0.7197758247011444,
"learning_rate": 1.3997591812161348e-06,
"loss": 0.6609,
"mean_token_accuracy": 0.8174248903989791,
"num_tokens": 404271956.0,
"step": 6180
},
{
"epoch": 0.9316676700782661,
"grad_norm": 0.7643788659356211,
"learning_rate": 1.3696568332329923e-06,
"loss": 0.6629,
"mean_token_accuracy": 0.8163714617490768,
"num_tokens": 404926972.0,
"step": 6190
},
{
"epoch": 0.9331727874774233,
"grad_norm": 0.8003243746970174,
"learning_rate": 1.3395544852498495e-06,
"loss": 0.6571,
"mean_token_accuracy": 0.8182735562324523,
"num_tokens": 405581226.0,
"step": 6200
},
{
"epoch": 0.9346779048765804,
"grad_norm": 0.8390202133619289,
"learning_rate": 1.3094521372667068e-06,
"loss": 0.6646,
"mean_token_accuracy": 0.8168047055602073,
"num_tokens": 406236010.0,
"step": 6210
},
{
"epoch": 0.9361830222757375,
"grad_norm": 0.8218023276195088,
"learning_rate": 1.2793497892835642e-06,
"loss": 0.6798,
"mean_token_accuracy": 0.813007053732872,
"num_tokens": 406889781.0,
"step": 6220
},
{
"epoch": 0.9376881396748946,
"grad_norm": 0.8049728095384475,
"learning_rate": 1.2492474413004215e-06,
"loss": 0.6977,
"mean_token_accuracy": 0.8095493704080582,
"num_tokens": 407542794.0,
"step": 6230
},
{
"epoch": 0.9391932570740518,
"grad_norm": 0.7952147884853261,
"learning_rate": 1.2191450933172787e-06,
"loss": 0.671,
"mean_token_accuracy": 0.8143075197935105,
"num_tokens": 408198154.0,
"step": 6240
},
{
"epoch": 0.9406983744732089,
"grad_norm": 0.7599989329771313,
"learning_rate": 1.1890427453341362e-06,
"loss": 0.6488,
"mean_token_accuracy": 0.8212261810898781,
"num_tokens": 408853514.0,
"step": 6250
},
{
"epoch": 0.9422034918723661,
"grad_norm": 0.7216867579887762,
"learning_rate": 1.1589403973509934e-06,
"loss": 0.6347,
"mean_token_accuracy": 0.8228215038776397,
"num_tokens": 409508874.0,
"step": 6260
},
{
"epoch": 0.9437086092715232,
"grad_norm": 0.7067922141116312,
"learning_rate": 1.1288380493678507e-06,
"loss": 0.659,
"mean_token_accuracy": 0.8190646037459374,
"num_tokens": 410163248.0,
"step": 6270
},
{
"epoch": 0.9452137266706803,
"grad_norm": 0.948728528316048,
"learning_rate": 1.098735701384708e-06,
"loss": 0.643,
"mean_token_accuracy": 0.8224123731255532,
"num_tokens": 410818608.0,
"step": 6280
},
{
"epoch": 0.9467188440698374,
"grad_norm": 0.7892350481303253,
"learning_rate": 1.0686333534015654e-06,
"loss": 0.6608,
"mean_token_accuracy": 0.8173749342560768,
"num_tokens": 411471107.0,
"step": 6290
},
{
"epoch": 0.9482239614689946,
"grad_norm": 0.8083478104312511,
"learning_rate": 1.0385310054184229e-06,
"loss": 0.6872,
"mean_token_accuracy": 0.811252748966217,
"num_tokens": 412126467.0,
"step": 6300
},
{
"epoch": 0.9497290788681517,
"grad_norm": 0.8938476500962467,
"learning_rate": 1.00842865743528e-06,
"loss": 0.6601,
"mean_token_accuracy": 0.8179592058062554,
"num_tokens": 412781827.0,
"step": 6310
},
{
"epoch": 0.9512341962673089,
"grad_norm": 0.8596759733933999,
"learning_rate": 9.783263094521374e-07,
"loss": 0.6571,
"mean_token_accuracy": 0.8197621509432793,
"num_tokens": 413436727.0,
"step": 6320
},
{
"epoch": 0.952739313666466,
"grad_norm": 0.7467533643516227,
"learning_rate": 9.482239614689946e-07,
"loss": 0.6792,
"mean_token_accuracy": 0.8140778690576553,
"num_tokens": 414088041.0,
"step": 6330
},
{
"epoch": 0.9542444310656231,
"grad_norm": 0.7518313373444699,
"learning_rate": 9.18121613485852e-07,
"loss": 0.6671,
"mean_token_accuracy": 0.8171729937195777,
"num_tokens": 414743401.0,
"step": 6340
},
{
"epoch": 0.9557495484647802,
"grad_norm": 0.7147981832977893,
"learning_rate": 8.880192655027092e-07,
"loss": 0.6693,
"mean_token_accuracy": 0.8162096992135048,
"num_tokens": 415398761.0,
"step": 6350
},
{
"epoch": 0.9572546658639374,
"grad_norm": 0.7787214649354509,
"learning_rate": 8.579169175195666e-07,
"loss": 0.6587,
"mean_token_accuracy": 0.8199209123849869,
"num_tokens": 416054121.0,
"step": 6360
},
{
"epoch": 0.9587597832630945,
"grad_norm": 0.7557139318657753,
"learning_rate": 8.27814569536424e-07,
"loss": 0.6562,
"mean_token_accuracy": 0.8186782419681549,
"num_tokens": 416709481.0,
"step": 6370
},
{
"epoch": 0.9602649006622517,
"grad_norm": 0.8281637709136592,
"learning_rate": 7.977122215532813e-07,
"loss": 0.6656,
"mean_token_accuracy": 0.8177010849118233,
"num_tokens": 417363825.0,
"step": 6380
},
{
"epoch": 0.9617700180614088,
"grad_norm": 0.8103144240736111,
"learning_rate": 7.676098735701386e-07,
"loss": 0.6987,
"mean_token_accuracy": 0.8090856596827507,
"num_tokens": 418018667.0,
"step": 6390
},
{
"epoch": 0.963275135460566,
"grad_norm": 0.8780690778093717,
"learning_rate": 7.375075255869959e-07,
"loss": 0.6565,
"mean_token_accuracy": 0.8164542749524116,
"num_tokens": 418673242.0,
"step": 6400
},
{
"epoch": 0.964780252859723,
"grad_norm": 0.8301073825181444,
"learning_rate": 7.074051776038532e-07,
"loss": 0.7105,
"mean_token_accuracy": 0.8061650216579437,
"num_tokens": 419328467.0,
"step": 6410
},
{
"epoch": 0.9662853702588802,
"grad_norm": 0.7743398747402613,
"learning_rate": 6.773028296207105e-07,
"loss": 0.6952,
"mean_token_accuracy": 0.8110766768455505,
"num_tokens": 419983412.0,
"step": 6420
},
{
"epoch": 0.9677904876580373,
"grad_norm": 0.7729942676504536,
"learning_rate": 6.472004816375678e-07,
"loss": 0.6842,
"mean_token_accuracy": 0.8116374552249909,
"num_tokens": 420638772.0,
"step": 6430
},
{
"epoch": 0.9692956050571945,
"grad_norm": 0.8396932408066435,
"learning_rate": 6.170981336544251e-07,
"loss": 0.624,
"mean_token_accuracy": 0.8242242723703385,
"num_tokens": 421293329.0,
"step": 6440
},
{
"epoch": 0.9708007224563516,
"grad_norm": 0.7456802986717396,
"learning_rate": 5.869957856712824e-07,
"loss": 0.6733,
"mean_token_accuracy": 0.8136846616864204,
"num_tokens": 421948689.0,
"step": 6450
},
{
"epoch": 0.9723058398555088,
"grad_norm": 0.7987936501010671,
"learning_rate": 5.568934376881397e-07,
"loss": 0.6318,
"mean_token_accuracy": 0.8227405115962029,
"num_tokens": 422603207.0,
"step": 6460
},
{
"epoch": 0.9738109572546658,
"grad_norm": 0.7936383192328358,
"learning_rate": 5.26791089704997e-07,
"loss": 0.6563,
"mean_token_accuracy": 0.8187880471348763,
"num_tokens": 423254732.0,
"step": 6470
},
{
"epoch": 0.975316074653823,
"grad_norm": 0.7432652222612889,
"learning_rate": 4.966887417218544e-07,
"loss": 0.6487,
"mean_token_accuracy": 0.818960678577423,
"num_tokens": 423910092.0,
"step": 6480
},
{
"epoch": 0.9768211920529801,
"grad_norm": 0.8511981353686215,
"learning_rate": 4.6658639373871166e-07,
"loss": 0.6799,
"mean_token_accuracy": 0.8121935516595841,
"num_tokens": 424564826.0,
"step": 6490
},
{
"epoch": 0.9783263094521373,
"grad_norm": 0.7313398696799408,
"learning_rate": 4.36484045755569e-07,
"loss": 0.7109,
"mean_token_accuracy": 0.8065851047635079,
"num_tokens": 425219708.0,
"step": 6500
},
{
"epoch": 0.9798314268512944,
"grad_norm": 0.7750629777088383,
"learning_rate": 4.063816977724263e-07,
"loss": 0.6841,
"mean_token_accuracy": 0.8126775458455086,
"num_tokens": 425873155.0,
"step": 6510
},
{
"epoch": 0.9813365442504516,
"grad_norm": 0.7744993603101556,
"learning_rate": 3.762793497892836e-07,
"loss": 0.6323,
"mean_token_accuracy": 0.8231482058763504,
"num_tokens": 426528515.0,
"step": 6520
},
{
"epoch": 0.9828416616496086,
"grad_norm": 0.7716391137183645,
"learning_rate": 3.461770018061409e-07,
"loss": 0.6716,
"mean_token_accuracy": 0.814750799536705,
"num_tokens": 427183304.0,
"step": 6530
},
{
"epoch": 0.9843467790487658,
"grad_norm": 0.8256084259353021,
"learning_rate": 3.160746538229982e-07,
"loss": 0.638,
"mean_token_accuracy": 0.8222943916916847,
"num_tokens": 427835957.0,
"step": 6540
},
{
"epoch": 0.9858518964479229,
"grad_norm": 0.7470228300092132,
"learning_rate": 2.8597230583985557e-07,
"loss": 0.6029,
"mean_token_accuracy": 0.8287707567214966,
"num_tokens": 428491317.0,
"step": 6550
},
{
"epoch": 0.9873570138470801,
"grad_norm": 0.7560617660239669,
"learning_rate": 2.5586995785671287e-07,
"loss": 0.6709,
"mean_token_accuracy": 0.8148993030190468,
"num_tokens": 429142202.0,
"step": 6560
},
{
"epoch": 0.9888621312462372,
"grad_norm": 0.7086279970183982,
"learning_rate": 2.2576760987357014e-07,
"loss": 0.6567,
"mean_token_accuracy": 0.8180391117930412,
"num_tokens": 429796213.0,
"step": 6570
},
{
"epoch": 0.9903672486453944,
"grad_norm": 0.7788087082450003,
"learning_rate": 1.9566526189042744e-07,
"loss": 0.701,
"mean_token_accuracy": 0.8085399270057678,
"num_tokens": 430447380.0,
"step": 6580
},
{
"epoch": 0.9918723660445514,
"grad_norm": 0.7878945507101885,
"learning_rate": 1.655629139072848e-07,
"loss": 0.6506,
"mean_token_accuracy": 0.8179336041212082,
"num_tokens": 431101296.0,
"step": 6590
},
{
"epoch": 0.9933774834437086,
"grad_norm": 0.7121310866638965,
"learning_rate": 1.354605659241421e-07,
"loss": 0.6688,
"mean_token_accuracy": 0.8157944530248642,
"num_tokens": 431756656.0,
"step": 6600
},
{
"epoch": 0.9948826008428657,
"grad_norm": 0.8306725658530479,
"learning_rate": 1.053582179409994e-07,
"loss": 0.6956,
"mean_token_accuracy": 0.8097556039690972,
"num_tokens": 432411158.0,
"step": 6610
},
{
"epoch": 0.9963877182420229,
"grad_norm": 0.8661659471981039,
"learning_rate": 7.525586995785672e-08,
"loss": 0.6507,
"mean_token_accuracy": 0.8198384776711464,
"num_tokens": 433066518.0,
"step": 6620
},
{
"epoch": 0.99789283564118,
"grad_norm": 0.8034747470725212,
"learning_rate": 4.5153521974714037e-08,
"loss": 0.7082,
"mean_token_accuracy": 0.8069040149450302,
"num_tokens": 433720023.0,
"step": 6630
},
{
"epoch": 0.9993979530403372,
"grad_norm": 0.7599732131076518,
"learning_rate": 1.5051173991571343e-08,
"loss": 0.6537,
"mean_token_accuracy": 0.8202048733830452,
"num_tokens": 434375383.0,
"step": 6640
}
],
"logging_steps": 10,
"max_steps": 6644,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 695533043712000.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}