2020-Q3-90p-filtered-random / trainer_state.json
DouglasPontes's picture
End of training
0a0ecee verified
raw
history blame contribute delete
No virus
80.9 kB
{
"best_metric": 2.244624614715576,
"best_model_checkpoint": "./model_tweets_2020_Q3_90/checkpoint-768000",
"epoch": 50.52525209995579,
"eval_steps": 8000,
"global_step": 2400000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17,
"eval_loss": 2.5349206924438477,
"eval_runtime": 47.928,
"eval_samples_per_second": 834.585,
"eval_steps_per_second": 52.162,
"step": 8000
},
{
"epoch": 0.34,
"learning_rate": 4.0726666666666665e-07,
"loss": 2.7955,
"step": 16000
},
{
"epoch": 0.34,
"eval_loss": 2.444758892059326,
"eval_runtime": 48.5664,
"eval_samples_per_second": 823.615,
"eval_steps_per_second": 51.476,
"step": 16000
},
{
"epoch": 0.51,
"eval_loss": 2.3948748111724854,
"eval_runtime": 48.544,
"eval_samples_per_second": 823.995,
"eval_steps_per_second": 51.5,
"step": 24000
},
{
"epoch": 0.67,
"learning_rate": 4.0453333333333336e-07,
"loss": 2.5335,
"step": 32000
},
{
"epoch": 0.67,
"eval_loss": 2.3699398040771484,
"eval_runtime": 48.2115,
"eval_samples_per_second": 829.678,
"eval_steps_per_second": 51.855,
"step": 32000
},
{
"epoch": 0.84,
"eval_loss": 2.354438066482544,
"eval_runtime": 48.5047,
"eval_samples_per_second": 824.662,
"eval_steps_per_second": 51.541,
"step": 40000
},
{
"epoch": 1.01,
"learning_rate": 4.018e-07,
"loss": 2.4757,
"step": 48000
},
{
"epoch": 1.01,
"eval_loss": 2.347705364227295,
"eval_runtime": 48.6892,
"eval_samples_per_second": 821.537,
"eval_steps_per_second": 51.346,
"step": 48000
},
{
"epoch": 1.18,
"eval_loss": 2.3280608654022217,
"eval_runtime": 48.0175,
"eval_samples_per_second": 833.029,
"eval_steps_per_second": 52.064,
"step": 56000
},
{
"epoch": 1.35,
"learning_rate": 3.9906666666666667e-07,
"loss": 2.446,
"step": 64000
},
{
"epoch": 1.35,
"eval_loss": 2.317082166671753,
"eval_runtime": 48.561,
"eval_samples_per_second": 823.706,
"eval_steps_per_second": 51.482,
"step": 64000
},
{
"epoch": 1.52,
"eval_loss": 2.308218240737915,
"eval_runtime": 48.3682,
"eval_samples_per_second": 826.989,
"eval_steps_per_second": 51.687,
"step": 72000
},
{
"epoch": 1.68,
"learning_rate": 3.963333333333333e-07,
"loss": 2.4291,
"step": 80000
},
{
"epoch": 1.68,
"eval_loss": 2.316983222961426,
"eval_runtime": 48.1155,
"eval_samples_per_second": 831.333,
"eval_steps_per_second": 51.958,
"step": 80000
},
{
"epoch": 1.85,
"eval_loss": 2.2962071895599365,
"eval_runtime": 48.0395,
"eval_samples_per_second": 832.648,
"eval_steps_per_second": 52.041,
"step": 88000
},
{
"epoch": 2.02,
"learning_rate": 3.936e-07,
"loss": 2.4275,
"step": 96000
},
{
"epoch": 2.02,
"eval_loss": 2.304356575012207,
"eval_runtime": 48.8826,
"eval_samples_per_second": 818.287,
"eval_steps_per_second": 51.143,
"step": 96000
},
{
"epoch": 2.19,
"eval_loss": 2.2738351821899414,
"eval_runtime": 48.096,
"eval_samples_per_second": 831.671,
"eval_steps_per_second": 51.979,
"step": 104000
},
{
"epoch": 2.36,
"learning_rate": 3.908666666666667e-07,
"loss": 2.4148,
"step": 112000
},
{
"epoch": 2.36,
"eval_loss": 2.2927024364471436,
"eval_runtime": 48.7643,
"eval_samples_per_second": 820.272,
"eval_steps_per_second": 51.267,
"step": 112000
},
{
"epoch": 2.53,
"eval_loss": 2.2684385776519775,
"eval_runtime": 48.701,
"eval_samples_per_second": 821.338,
"eval_steps_per_second": 51.334,
"step": 120000
},
{
"epoch": 2.69,
"learning_rate": 3.8813333333333334e-07,
"loss": 2.4062,
"step": 128000
},
{
"epoch": 2.69,
"eval_loss": 2.289085626602173,
"eval_runtime": 48.1247,
"eval_samples_per_second": 831.174,
"eval_steps_per_second": 51.948,
"step": 128000
},
{
"epoch": 2.86,
"eval_loss": 2.278874397277832,
"eval_runtime": 48.1786,
"eval_samples_per_second": 830.245,
"eval_steps_per_second": 51.89,
"step": 136000
},
{
"epoch": 3.03,
"learning_rate": 3.854e-07,
"loss": 2.4022,
"step": 144000
},
{
"epoch": 3.03,
"eval_loss": 2.2659101486206055,
"eval_runtime": 48.6348,
"eval_samples_per_second": 822.457,
"eval_steps_per_second": 51.404,
"step": 144000
},
{
"epoch": 3.2,
"eval_loss": 2.282435894012451,
"eval_runtime": 48.1725,
"eval_samples_per_second": 830.349,
"eval_steps_per_second": 51.897,
"step": 152000
},
{
"epoch": 3.37,
"learning_rate": 3.8266666666666665e-07,
"loss": 2.3943,
"step": 160000
},
{
"epoch": 3.37,
"eval_loss": 2.268383026123047,
"eval_runtime": 48.1548,
"eval_samples_per_second": 830.655,
"eval_steps_per_second": 51.916,
"step": 160000
},
{
"epoch": 3.54,
"eval_loss": 2.268256187438965,
"eval_runtime": 48.6456,
"eval_samples_per_second": 822.274,
"eval_steps_per_second": 51.392,
"step": 168000
},
{
"epoch": 3.71,
"learning_rate": 3.799333333333333e-07,
"loss": 2.3957,
"step": 176000
},
{
"epoch": 3.71,
"eval_loss": 2.2737274169921875,
"eval_runtime": 48.1068,
"eval_samples_per_second": 831.483,
"eval_steps_per_second": 51.968,
"step": 176000
},
{
"epoch": 3.87,
"eval_loss": 2.2779204845428467,
"eval_runtime": 48.6234,
"eval_samples_per_second": 822.65,
"eval_steps_per_second": 51.416,
"step": 184000
},
{
"epoch": 4.04,
"learning_rate": 3.772e-07,
"loss": 2.3976,
"step": 192000
},
{
"epoch": 4.04,
"eval_loss": 2.2710366249084473,
"eval_runtime": 48.2531,
"eval_samples_per_second": 828.961,
"eval_steps_per_second": 51.81,
"step": 192000
},
{
"epoch": 4.21,
"eval_loss": 2.249539852142334,
"eval_runtime": 48.0877,
"eval_samples_per_second": 831.813,
"eval_steps_per_second": 51.988,
"step": 200000
},
{
"epoch": 4.38,
"learning_rate": 3.7446666666666667e-07,
"loss": 2.3933,
"step": 208000
},
{
"epoch": 4.38,
"eval_loss": 2.266042947769165,
"eval_runtime": 48.7475,
"eval_samples_per_second": 820.554,
"eval_steps_per_second": 51.285,
"step": 208000
},
{
"epoch": 4.55,
"eval_loss": 2.2686843872070312,
"eval_runtime": 48.6373,
"eval_samples_per_second": 822.414,
"eval_steps_per_second": 51.401,
"step": 216000
},
{
"epoch": 4.72,
"learning_rate": 3.7173333333333333e-07,
"loss": 2.4039,
"step": 224000
},
{
"epoch": 4.72,
"eval_loss": 2.258059024810791,
"eval_runtime": 48.8049,
"eval_samples_per_second": 819.591,
"eval_steps_per_second": 51.224,
"step": 224000
},
{
"epoch": 4.88,
"eval_loss": 2.265613079071045,
"eval_runtime": 48.1482,
"eval_samples_per_second": 830.769,
"eval_steps_per_second": 51.923,
"step": 232000
},
{
"epoch": 5.05,
"learning_rate": 3.69e-07,
"loss": 2.3966,
"step": 240000
},
{
"epoch": 5.05,
"eval_loss": 2.254256248474121,
"eval_runtime": 48.4211,
"eval_samples_per_second": 826.087,
"eval_steps_per_second": 51.63,
"step": 240000
},
{
"epoch": 5.22,
"eval_loss": 2.2767629623413086,
"eval_runtime": 48.0495,
"eval_samples_per_second": 832.475,
"eval_steps_per_second": 52.03,
"step": 248000
},
{
"epoch": 5.39,
"learning_rate": 3.6626666666666664e-07,
"loss": 2.3902,
"step": 256000
},
{
"epoch": 5.39,
"eval_loss": 2.255053997039795,
"eval_runtime": 49.0256,
"eval_samples_per_second": 815.901,
"eval_steps_per_second": 50.994,
"step": 256000
},
{
"epoch": 5.56,
"eval_loss": 2.278200626373291,
"eval_runtime": 48.1284,
"eval_samples_per_second": 831.11,
"eval_steps_per_second": 51.944,
"step": 264000
},
{
"epoch": 5.73,
"learning_rate": 3.6353333333333335e-07,
"loss": 2.3906,
"step": 272000
},
{
"epoch": 5.73,
"eval_loss": 2.2639424800872803,
"eval_runtime": 48.2031,
"eval_samples_per_second": 829.822,
"eval_steps_per_second": 51.864,
"step": 272000
},
{
"epoch": 5.89,
"eval_loss": 2.2584660053253174,
"eval_runtime": 48.1979,
"eval_samples_per_second": 829.911,
"eval_steps_per_second": 51.869,
"step": 280000
},
{
"epoch": 6.06,
"learning_rate": 3.608e-07,
"loss": 2.3849,
"step": 288000
},
{
"epoch": 6.06,
"eval_loss": 2.254004955291748,
"eval_runtime": 48.3611,
"eval_samples_per_second": 827.11,
"eval_steps_per_second": 51.694,
"step": 288000
},
{
"epoch": 6.23,
"eval_loss": 2.274897813796997,
"eval_runtime": 48.7282,
"eval_samples_per_second": 820.88,
"eval_steps_per_second": 51.305,
"step": 296000
},
{
"epoch": 6.4,
"learning_rate": 3.5806666666666666e-07,
"loss": 2.3805,
"step": 304000
},
{
"epoch": 6.4,
"eval_loss": 2.250277042388916,
"eval_runtime": 48.3341,
"eval_samples_per_second": 827.574,
"eval_steps_per_second": 51.723,
"step": 304000
},
{
"epoch": 6.57,
"eval_loss": 2.273930311203003,
"eval_runtime": 47.7431,
"eval_samples_per_second": 837.818,
"eval_steps_per_second": 52.364,
"step": 312000
},
{
"epoch": 6.74,
"learning_rate": 3.553333333333333e-07,
"loss": 2.3873,
"step": 320000
},
{
"epoch": 6.74,
"eval_loss": 2.254091739654541,
"eval_runtime": 48.6038,
"eval_samples_per_second": 822.98,
"eval_steps_per_second": 51.436,
"step": 320000
},
{
"epoch": 6.91,
"eval_loss": 2.2511613368988037,
"eval_runtime": 48.0895,
"eval_samples_per_second": 831.783,
"eval_steps_per_second": 51.986,
"step": 328000
},
{
"epoch": 7.07,
"learning_rate": 3.5259999999999997e-07,
"loss": 2.3942,
"step": 336000
},
{
"epoch": 7.07,
"eval_loss": 2.2594754695892334,
"eval_runtime": 48.3413,
"eval_samples_per_second": 827.45,
"eval_steps_per_second": 51.716,
"step": 336000
},
{
"epoch": 7.24,
"eval_loss": 2.260305166244507,
"eval_runtime": 48.7128,
"eval_samples_per_second": 821.14,
"eval_steps_per_second": 51.321,
"step": 344000
},
{
"epoch": 7.41,
"learning_rate": 3.498666666666667e-07,
"loss": 2.386,
"step": 352000
},
{
"epoch": 7.41,
"eval_loss": 2.257478713989258,
"eval_runtime": 48.858,
"eval_samples_per_second": 818.698,
"eval_steps_per_second": 51.169,
"step": 352000
},
{
"epoch": 7.58,
"eval_loss": 2.2788634300231934,
"eval_runtime": 48.3058,
"eval_samples_per_second": 828.059,
"eval_steps_per_second": 51.754,
"step": 360000
},
{
"epoch": 7.75,
"learning_rate": 3.4713333333333333e-07,
"loss": 2.3806,
"step": 368000
},
{
"epoch": 7.75,
"eval_loss": 2.265003204345703,
"eval_runtime": 48.6794,
"eval_samples_per_second": 821.703,
"eval_steps_per_second": 51.356,
"step": 368000
},
{
"epoch": 7.92,
"eval_loss": 2.2706291675567627,
"eval_runtime": 48.2232,
"eval_samples_per_second": 829.476,
"eval_steps_per_second": 51.842,
"step": 376000
},
{
"epoch": 8.08,
"learning_rate": 3.444e-07,
"loss": 2.3883,
"step": 384000
},
{
"epoch": 8.08,
"eval_loss": 2.2652194499969482,
"eval_runtime": 48.2989,
"eval_samples_per_second": 828.177,
"eval_steps_per_second": 51.761,
"step": 384000
},
{
"epoch": 8.25,
"eval_loss": 2.2540171146392822,
"eval_runtime": 48.1211,
"eval_samples_per_second": 831.236,
"eval_steps_per_second": 51.952,
"step": 392000
},
{
"epoch": 8.42,
"learning_rate": 3.416666666666667e-07,
"loss": 2.3922,
"step": 400000
},
{
"epoch": 8.42,
"eval_loss": 2.268253803253174,
"eval_runtime": 48.5611,
"eval_samples_per_second": 823.704,
"eval_steps_per_second": 51.481,
"step": 400000
},
{
"epoch": 8.59,
"eval_loss": 2.263794422149658,
"eval_runtime": 48.7481,
"eval_samples_per_second": 820.544,
"eval_steps_per_second": 51.284,
"step": 408000
},
{
"epoch": 8.76,
"learning_rate": 3.3893333333333335e-07,
"loss": 2.3887,
"step": 416000
},
{
"epoch": 8.76,
"eval_loss": 2.253530502319336,
"eval_runtime": 48.7981,
"eval_samples_per_second": 819.704,
"eval_steps_per_second": 51.232,
"step": 416000
},
{
"epoch": 8.93,
"eval_loss": 2.2529492378234863,
"eval_runtime": 48.6536,
"eval_samples_per_second": 822.139,
"eval_steps_per_second": 51.384,
"step": 424000
},
{
"epoch": 9.09,
"learning_rate": 3.3619999999999995e-07,
"loss": 2.3818,
"step": 432000
},
{
"epoch": 9.09,
"eval_loss": 2.248337507247925,
"eval_runtime": 48.2543,
"eval_samples_per_second": 828.942,
"eval_steps_per_second": 51.809,
"step": 432000
},
{
"epoch": 9.26,
"eval_loss": 2.2573952674865723,
"eval_runtime": 48.2027,
"eval_samples_per_second": 829.829,
"eval_steps_per_second": 51.864,
"step": 440000
},
{
"epoch": 9.43,
"learning_rate": 3.3346666666666666e-07,
"loss": 2.387,
"step": 448000
},
{
"epoch": 9.43,
"eval_loss": 2.2624008655548096,
"eval_runtime": 48.2217,
"eval_samples_per_second": 829.502,
"eval_steps_per_second": 51.844,
"step": 448000
},
{
"epoch": 9.6,
"eval_loss": 2.26644229888916,
"eval_runtime": 48.04,
"eval_samples_per_second": 832.64,
"eval_steps_per_second": 52.04,
"step": 456000
},
{
"epoch": 9.77,
"learning_rate": 3.307333333333333e-07,
"loss": 2.3839,
"step": 464000
},
{
"epoch": 9.77,
"eval_loss": 2.257237195968628,
"eval_runtime": 48.887,
"eval_samples_per_second": 818.214,
"eval_steps_per_second": 51.138,
"step": 464000
},
{
"epoch": 9.94,
"eval_loss": 2.252383232116699,
"eval_runtime": 48.1755,
"eval_samples_per_second": 830.298,
"eval_steps_per_second": 51.894,
"step": 472000
},
{
"epoch": 10.11,
"learning_rate": 3.28e-07,
"loss": 2.3901,
"step": 480000
},
{
"epoch": 10.11,
"eval_loss": 2.2532765865325928,
"eval_runtime": 48.1778,
"eval_samples_per_second": 830.259,
"eval_steps_per_second": 51.891,
"step": 480000
},
{
"epoch": 10.27,
"eval_loss": 2.250092029571533,
"eval_runtime": 48.8687,
"eval_samples_per_second": 818.52,
"eval_steps_per_second": 51.157,
"step": 488000
},
{
"epoch": 10.44,
"learning_rate": 3.252666666666667e-07,
"loss": 2.382,
"step": 496000
},
{
"epoch": 10.44,
"eval_loss": 2.266896963119507,
"eval_runtime": 48.918,
"eval_samples_per_second": 817.695,
"eval_steps_per_second": 51.106,
"step": 496000
},
{
"epoch": 10.61,
"eval_loss": 2.259631395339966,
"eval_runtime": 48.6846,
"eval_samples_per_second": 821.614,
"eval_steps_per_second": 51.351,
"step": 504000
},
{
"epoch": 10.78,
"learning_rate": 3.2253333333333334e-07,
"loss": 2.3829,
"step": 512000
},
{
"epoch": 10.78,
"eval_loss": 2.270488977432251,
"eval_runtime": 49.0149,
"eval_samples_per_second": 816.079,
"eval_steps_per_second": 51.005,
"step": 512000
},
{
"epoch": 10.95,
"eval_loss": 2.2553160190582275,
"eval_runtime": 48.2406,
"eval_samples_per_second": 829.177,
"eval_steps_per_second": 51.824,
"step": 520000
},
{
"epoch": 11.12,
"learning_rate": 3.198e-07,
"loss": 2.3963,
"step": 528000
},
{
"epoch": 11.12,
"eval_loss": 2.2741470336914062,
"eval_runtime": 48.1623,
"eval_samples_per_second": 830.524,
"eval_steps_per_second": 51.908,
"step": 528000
},
{
"epoch": 11.28,
"eval_loss": 2.266427993774414,
"eval_runtime": 48.1335,
"eval_samples_per_second": 831.022,
"eval_steps_per_second": 51.939,
"step": 536000
},
{
"epoch": 11.45,
"learning_rate": 3.1706666666666665e-07,
"loss": 2.3843,
"step": 544000
},
{
"epoch": 11.45,
"eval_loss": 2.2532401084899902,
"eval_runtime": 48.2869,
"eval_samples_per_second": 828.382,
"eval_steps_per_second": 51.774,
"step": 544000
},
{
"epoch": 11.62,
"eval_loss": 2.2719969749450684,
"eval_runtime": 48.3879,
"eval_samples_per_second": 826.653,
"eval_steps_per_second": 51.666,
"step": 552000
},
{
"epoch": 11.79,
"learning_rate": 3.1433333333333336e-07,
"loss": 2.3853,
"step": 560000
},
{
"epoch": 11.79,
"eval_loss": 2.2532143592834473,
"eval_runtime": 48.6521,
"eval_samples_per_second": 822.164,
"eval_steps_per_second": 51.385,
"step": 560000
},
{
"epoch": 11.96,
"eval_loss": 2.2699735164642334,
"eval_runtime": 48.0759,
"eval_samples_per_second": 832.017,
"eval_steps_per_second": 52.001,
"step": 568000
},
{
"epoch": 12.13,
"learning_rate": 3.116e-07,
"loss": 2.3907,
"step": 576000
},
{
"epoch": 12.13,
"eval_loss": 2.257100820541382,
"eval_runtime": 48.264,
"eval_samples_per_second": 828.776,
"eval_steps_per_second": 51.798,
"step": 576000
},
{
"epoch": 12.29,
"eval_loss": 2.252300500869751,
"eval_runtime": 48.085,
"eval_samples_per_second": 831.86,
"eval_steps_per_second": 51.991,
"step": 584000
},
{
"epoch": 12.46,
"learning_rate": 3.0886666666666667e-07,
"loss": 2.3865,
"step": 592000
},
{
"epoch": 12.46,
"eval_loss": 2.245786428451538,
"eval_runtime": 48.9081,
"eval_samples_per_second": 817.861,
"eval_steps_per_second": 51.116,
"step": 592000
},
{
"epoch": 12.63,
"eval_loss": 2.264732599258423,
"eval_runtime": 48.1262,
"eval_samples_per_second": 831.149,
"eval_steps_per_second": 51.947,
"step": 600000
},
{
"epoch": 12.8,
"learning_rate": 3.061333333333333e-07,
"loss": 2.3827,
"step": 608000
},
{
"epoch": 12.8,
"eval_loss": 2.2490034103393555,
"eval_runtime": 48.1786,
"eval_samples_per_second": 830.244,
"eval_steps_per_second": 51.89,
"step": 608000
},
{
"epoch": 12.97,
"eval_loss": 2.2623653411865234,
"eval_runtime": 48.6642,
"eval_samples_per_second": 821.959,
"eval_steps_per_second": 51.372,
"step": 616000
},
{
"epoch": 13.14,
"learning_rate": 3.034e-07,
"loss": 2.3869,
"step": 624000
},
{
"epoch": 13.14,
"eval_loss": 2.253758430480957,
"eval_runtime": 48.2352,
"eval_samples_per_second": 829.269,
"eval_steps_per_second": 51.829,
"step": 624000
},
{
"epoch": 13.3,
"eval_loss": 2.2357494831085205,
"eval_runtime": 47.7426,
"eval_samples_per_second": 837.826,
"eval_steps_per_second": 52.364,
"step": 632000
},
{
"epoch": 13.47,
"learning_rate": 3.0066666666666663e-07,
"loss": 2.3958,
"step": 640000
},
{
"epoch": 13.47,
"eval_loss": 2.2508862018585205,
"eval_runtime": 48.884,
"eval_samples_per_second": 818.263,
"eval_steps_per_second": 51.141,
"step": 640000
},
{
"epoch": 13.64,
"eval_loss": 2.2690088748931885,
"eval_runtime": 48.5211,
"eval_samples_per_second": 824.384,
"eval_steps_per_second": 51.524,
"step": 648000
},
{
"epoch": 13.81,
"learning_rate": 2.9793333333333334e-07,
"loss": 2.3852,
"step": 656000
},
{
"epoch": 13.81,
"eval_loss": 2.247575283050537,
"eval_runtime": 48.4402,
"eval_samples_per_second": 825.761,
"eval_steps_per_second": 51.61,
"step": 656000
},
{
"epoch": 13.98,
"eval_loss": 2.272088050842285,
"eval_runtime": 48.8783,
"eval_samples_per_second": 818.359,
"eval_steps_per_second": 51.147,
"step": 664000
},
{
"epoch": 14.15,
"learning_rate": 2.952e-07,
"loss": 2.3889,
"step": 672000
},
{
"epoch": 14.15,
"eval_loss": 2.253678560256958,
"eval_runtime": 48.7246,
"eval_samples_per_second": 820.94,
"eval_steps_per_second": 51.309,
"step": 672000
},
{
"epoch": 14.32,
"eval_loss": 2.2723231315612793,
"eval_runtime": 48.1926,
"eval_samples_per_second": 830.003,
"eval_steps_per_second": 51.875,
"step": 680000
},
{
"epoch": 14.48,
"learning_rate": 2.9246666666666665e-07,
"loss": 2.3839,
"step": 688000
},
{
"epoch": 14.48,
"eval_loss": 2.2664077281951904,
"eval_runtime": 48.493,
"eval_samples_per_second": 824.861,
"eval_steps_per_second": 51.554,
"step": 688000
},
{
"epoch": 14.65,
"eval_loss": 2.2725985050201416,
"eval_runtime": 49.24,
"eval_samples_per_second": 812.348,
"eval_steps_per_second": 50.772,
"step": 696000
},
{
"epoch": 14.82,
"learning_rate": 2.897333333333333e-07,
"loss": 2.3884,
"step": 704000
},
{
"epoch": 14.82,
"eval_loss": 2.265206813812256,
"eval_runtime": 49.5431,
"eval_samples_per_second": 807.377,
"eval_steps_per_second": 50.461,
"step": 704000
},
{
"epoch": 14.99,
"eval_loss": 2.2633461952209473,
"eval_runtime": 48.6148,
"eval_samples_per_second": 822.795,
"eval_steps_per_second": 51.425,
"step": 712000
},
{
"epoch": 15.16,
"learning_rate": 2.8699999999999996e-07,
"loss": 2.3827,
"step": 720000
},
{
"epoch": 15.16,
"eval_loss": 2.268095016479492,
"eval_runtime": 49.156,
"eval_samples_per_second": 813.737,
"eval_steps_per_second": 50.859,
"step": 720000
},
{
"epoch": 15.33,
"eval_loss": 2.2542901039123535,
"eval_runtime": 48.9808,
"eval_samples_per_second": 816.646,
"eval_steps_per_second": 51.04,
"step": 728000
},
{
"epoch": 15.49,
"learning_rate": 2.8426666666666667e-07,
"loss": 2.3861,
"step": 736000
},
{
"epoch": 15.49,
"eval_loss": 2.2634222507476807,
"eval_runtime": 48.9041,
"eval_samples_per_second": 817.927,
"eval_steps_per_second": 51.12,
"step": 736000
},
{
"epoch": 15.66,
"eval_loss": 2.2706964015960693,
"eval_runtime": 49.2314,
"eval_samples_per_second": 812.49,
"eval_steps_per_second": 50.781,
"step": 744000
},
{
"epoch": 15.83,
"learning_rate": 2.815333333333333e-07,
"loss": 2.3812,
"step": 752000
},
{
"epoch": 15.83,
"eval_loss": 2.2575085163116455,
"eval_runtime": 48.6081,
"eval_samples_per_second": 822.908,
"eval_steps_per_second": 51.432,
"step": 752000
},
{
"epoch": 16.0,
"eval_loss": 2.2549245357513428,
"eval_runtime": 48.9973,
"eval_samples_per_second": 816.371,
"eval_steps_per_second": 51.023,
"step": 760000
},
{
"epoch": 16.17,
"learning_rate": 2.7880000000000003e-07,
"loss": 2.3862,
"step": 768000
},
{
"epoch": 16.17,
"eval_loss": 2.244624614715576,
"eval_runtime": 49.0122,
"eval_samples_per_second": 816.123,
"eval_steps_per_second": 51.008,
"step": 768000
},
{
"epoch": 16.34,
"eval_loss": 2.261697769165039,
"eval_runtime": 48.9467,
"eval_samples_per_second": 817.216,
"eval_steps_per_second": 51.076,
"step": 776000
},
{
"epoch": 16.5,
"learning_rate": 2.7606666666666664e-07,
"loss": 2.3859,
"step": 784000
},
{
"epoch": 16.5,
"eval_loss": 2.250509023666382,
"eval_runtime": 49.5476,
"eval_samples_per_second": 807.304,
"eval_steps_per_second": 50.457,
"step": 784000
},
{
"epoch": 16.67,
"eval_loss": 2.271986722946167,
"eval_runtime": 48.2956,
"eval_samples_per_second": 828.232,
"eval_steps_per_second": 51.765,
"step": 792000
},
{
"epoch": 16.84,
"learning_rate": 2.733333333333333e-07,
"loss": 2.3873,
"step": 800000
},
{
"epoch": 16.84,
"eval_loss": 2.252095937728882,
"eval_runtime": 48.4511,
"eval_samples_per_second": 825.575,
"eval_steps_per_second": 51.598,
"step": 800000
},
{
"epoch": 17.01,
"eval_loss": 2.254298448562622,
"eval_runtime": 49.0762,
"eval_samples_per_second": 815.06,
"eval_steps_per_second": 50.941,
"step": 808000
},
{
"epoch": 17.18,
"learning_rate": 2.706e-07,
"loss": 2.381,
"step": 816000
},
{
"epoch": 17.18,
"eval_loss": 2.267543077468872,
"eval_runtime": 48.6635,
"eval_samples_per_second": 821.971,
"eval_steps_per_second": 51.373,
"step": 816000
},
{
"epoch": 17.35,
"eval_loss": 2.254502296447754,
"eval_runtime": 49.6829,
"eval_samples_per_second": 805.106,
"eval_steps_per_second": 50.319,
"step": 824000
},
{
"epoch": 17.52,
"learning_rate": 2.6786666666666666e-07,
"loss": 2.3851,
"step": 832000
},
{
"epoch": 17.52,
"eval_loss": 2.2488667964935303,
"eval_runtime": 48.7936,
"eval_samples_per_second": 819.78,
"eval_steps_per_second": 51.236,
"step": 832000
},
{
"epoch": 17.68,
"eval_loss": 2.2605700492858887,
"eval_runtime": 49.2213,
"eval_samples_per_second": 812.656,
"eval_steps_per_second": 50.791,
"step": 840000
},
{
"epoch": 17.85,
"learning_rate": 2.651333333333333e-07,
"loss": 2.3878,
"step": 848000
},
{
"epoch": 17.85,
"eval_loss": 2.2579894065856934,
"eval_runtime": 49.0096,
"eval_samples_per_second": 816.167,
"eval_steps_per_second": 51.01,
"step": 848000
},
{
"epoch": 18.02,
"eval_loss": 2.2604382038116455,
"eval_runtime": 48.2778,
"eval_samples_per_second": 828.538,
"eval_steps_per_second": 51.784,
"step": 856000
},
{
"epoch": 18.19,
"learning_rate": 2.624e-07,
"loss": 2.3812,
"step": 864000
},
{
"epoch": 18.19,
"eval_loss": 2.2630739212036133,
"eval_runtime": 48.321,
"eval_samples_per_second": 827.798,
"eval_steps_per_second": 51.737,
"step": 864000
},
{
"epoch": 18.36,
"eval_loss": 2.250539541244507,
"eval_runtime": 48.9155,
"eval_samples_per_second": 817.737,
"eval_steps_per_second": 51.109,
"step": 872000
},
{
"epoch": 18.53,
"learning_rate": 2.596666666666667e-07,
"loss": 2.3849,
"step": 880000
},
{
"epoch": 18.53,
"eval_loss": 2.2657594680786133,
"eval_runtime": 48.513,
"eval_samples_per_second": 824.521,
"eval_steps_per_second": 51.533,
"step": 880000
},
{
"epoch": 18.69,
"eval_loss": 2.2566869258880615,
"eval_runtime": 49.2473,
"eval_samples_per_second": 812.227,
"eval_steps_per_second": 50.764,
"step": 888000
},
{
"epoch": 18.86,
"learning_rate": 2.5693333333333333e-07,
"loss": 2.3833,
"step": 896000
},
{
"epoch": 18.86,
"eval_loss": 2.2533113956451416,
"eval_runtime": 48.9908,
"eval_samples_per_second": 816.479,
"eval_steps_per_second": 51.03,
"step": 896000
},
{
"epoch": 19.03,
"eval_loss": 2.2455687522888184,
"eval_runtime": 49.5558,
"eval_samples_per_second": 807.17,
"eval_steps_per_second": 50.448,
"step": 904000
},
{
"epoch": 19.2,
"learning_rate": 2.542e-07,
"loss": 2.3847,
"step": 912000
},
{
"epoch": 19.2,
"eval_loss": 2.253338098526001,
"eval_runtime": 49.2167,
"eval_samples_per_second": 812.733,
"eval_steps_per_second": 50.796,
"step": 912000
},
{
"epoch": 19.37,
"eval_loss": 2.257462739944458,
"eval_runtime": 48.4436,
"eval_samples_per_second": 825.703,
"eval_steps_per_second": 51.606,
"step": 920000
},
{
"epoch": 19.54,
"learning_rate": 2.5146666666666664e-07,
"loss": 2.3869,
"step": 928000
},
{
"epoch": 19.54,
"eval_loss": 2.2667601108551025,
"eval_runtime": 49.3388,
"eval_samples_per_second": 810.721,
"eval_steps_per_second": 50.67,
"step": 928000
},
{
"epoch": 19.7,
"eval_loss": 2.2598884105682373,
"eval_runtime": 48.9927,
"eval_samples_per_second": 816.449,
"eval_steps_per_second": 51.028,
"step": 936000
},
{
"epoch": 19.87,
"learning_rate": 2.4873333333333335e-07,
"loss": 2.3867,
"step": 944000
},
{
"epoch": 19.87,
"eval_loss": 2.2680182456970215,
"eval_runtime": 48.4318,
"eval_samples_per_second": 825.904,
"eval_steps_per_second": 51.619,
"step": 944000
},
{
"epoch": 20.04,
"eval_loss": 2.2669413089752197,
"eval_runtime": 50.205,
"eval_samples_per_second": 796.733,
"eval_steps_per_second": 49.796,
"step": 952000
},
{
"epoch": 20.21,
"learning_rate": 2.46e-07,
"loss": 2.3942,
"step": 960000
},
{
"epoch": 20.21,
"eval_loss": 2.2482852935791016,
"eval_runtime": 48.9393,
"eval_samples_per_second": 817.34,
"eval_steps_per_second": 51.084,
"step": 960000
},
{
"epoch": 20.38,
"eval_loss": 2.273371934890747,
"eval_runtime": 48.5937,
"eval_samples_per_second": 823.152,
"eval_steps_per_second": 51.447,
"step": 968000
},
{
"epoch": 20.55,
"learning_rate": 2.4326666666666666e-07,
"loss": 2.3863,
"step": 976000
},
{
"epoch": 20.55,
"eval_loss": 2.262270212173462,
"eval_runtime": 48.5495,
"eval_samples_per_second": 823.902,
"eval_steps_per_second": 51.494,
"step": 976000
},
{
"epoch": 20.72,
"eval_loss": 2.264986038208008,
"eval_runtime": 48.8999,
"eval_samples_per_second": 817.998,
"eval_steps_per_second": 51.125,
"step": 984000
},
{
"epoch": 20.88,
"learning_rate": 2.405333333333333e-07,
"loss": 2.3924,
"step": 992000
},
{
"epoch": 20.88,
"eval_loss": 2.2602696418762207,
"eval_runtime": 49.2404,
"eval_samples_per_second": 812.341,
"eval_steps_per_second": 50.771,
"step": 992000
},
{
"epoch": 21.05,
"eval_loss": 2.2708377838134766,
"eval_runtime": 49.0675,
"eval_samples_per_second": 815.204,
"eval_steps_per_second": 50.95,
"step": 1000000
},
{
"epoch": 21.22,
"learning_rate": 2.3779999999999997e-07,
"loss": 2.3871,
"step": 1008000
},
{
"epoch": 21.22,
"eval_loss": 2.2512402534484863,
"eval_runtime": 48.7719,
"eval_samples_per_second": 820.144,
"eval_steps_per_second": 51.259,
"step": 1008000
},
{
"epoch": 21.39,
"eval_loss": 2.2567834854125977,
"eval_runtime": 49.1119,
"eval_samples_per_second": 814.466,
"eval_steps_per_second": 50.904,
"step": 1016000
},
{
"epoch": 21.56,
"learning_rate": 2.3506666666666668e-07,
"loss": 2.3827,
"step": 1024000
},
{
"epoch": 21.56,
"eval_loss": 2.2676033973693848,
"eval_runtime": 49.0505,
"eval_samples_per_second": 815.486,
"eval_steps_per_second": 50.968,
"step": 1024000
},
{
"epoch": 21.73,
"eval_loss": 2.271024465560913,
"eval_runtime": 49.4407,
"eval_samples_per_second": 809.05,
"eval_steps_per_second": 50.566,
"step": 1032000
},
{
"epoch": 21.89,
"learning_rate": 2.3233333333333334e-07,
"loss": 2.3799,
"step": 1040000
},
{
"epoch": 21.89,
"eval_loss": 2.2804324626922607,
"eval_runtime": 49.138,
"eval_samples_per_second": 814.034,
"eval_steps_per_second": 50.877,
"step": 1040000
},
{
"epoch": 22.06,
"eval_loss": 2.2498600482940674,
"eval_runtime": 48.6186,
"eval_samples_per_second": 822.731,
"eval_steps_per_second": 51.421,
"step": 1048000
},
{
"epoch": 22.23,
"learning_rate": 2.2960000000000002e-07,
"loss": 2.3863,
"step": 1056000
},
{
"epoch": 22.23,
"eval_loss": 2.2556710243225098,
"eval_runtime": 49.7999,
"eval_samples_per_second": 803.214,
"eval_steps_per_second": 50.201,
"step": 1056000
},
{
"epoch": 22.4,
"eval_loss": 2.2603883743286133,
"eval_runtime": 49.3365,
"eval_samples_per_second": 810.759,
"eval_steps_per_second": 50.672,
"step": 1064000
},
{
"epoch": 22.57,
"learning_rate": 2.2686666666666667e-07,
"loss": 2.3858,
"step": 1072000
},
{
"epoch": 22.57,
"eval_loss": 2.2832398414611816,
"eval_runtime": 48.6796,
"eval_samples_per_second": 821.699,
"eval_steps_per_second": 51.356,
"step": 1072000
},
{
"epoch": 22.74,
"eval_loss": 2.244276285171509,
"eval_runtime": 48.7816,
"eval_samples_per_second": 819.982,
"eval_steps_per_second": 51.249,
"step": 1080000
},
{
"epoch": 22.9,
"learning_rate": 2.2413333333333333e-07,
"loss": 2.3859,
"step": 1088000
},
{
"epoch": 22.9,
"eval_loss": 2.260357141494751,
"eval_runtime": 49.4857,
"eval_samples_per_second": 808.315,
"eval_steps_per_second": 50.52,
"step": 1088000
},
{
"epoch": 23.07,
"eval_loss": 2.263144016265869,
"eval_runtime": 48.7635,
"eval_samples_per_second": 820.285,
"eval_steps_per_second": 51.268,
"step": 1096000
},
{
"epoch": 23.24,
"learning_rate": 2.214e-07,
"loss": 2.3846,
"step": 1104000
},
{
"epoch": 23.24,
"eval_loss": 2.2689881324768066,
"eval_runtime": 48.7943,
"eval_samples_per_second": 819.768,
"eval_steps_per_second": 51.235,
"step": 1104000
},
{
"epoch": 23.41,
"eval_loss": 2.25949764251709,
"eval_runtime": 48.4368,
"eval_samples_per_second": 825.819,
"eval_steps_per_second": 51.614,
"step": 1112000
},
{
"epoch": 23.58,
"learning_rate": 2.1866666666666667e-07,
"loss": 2.3887,
"step": 1120000
},
{
"epoch": 23.58,
"eval_loss": 2.2500855922698975,
"eval_runtime": 48.8931,
"eval_samples_per_second": 818.111,
"eval_steps_per_second": 51.132,
"step": 1120000
},
{
"epoch": 23.75,
"eval_loss": 2.2532594203948975,
"eval_runtime": 48.868,
"eval_samples_per_second": 818.532,
"eval_steps_per_second": 51.158,
"step": 1128000
},
{
"epoch": 23.92,
"learning_rate": 2.1593333333333332e-07,
"loss": 2.3856,
"step": 1136000
},
{
"epoch": 23.92,
"eval_loss": 2.252855062484741,
"eval_runtime": 49.6155,
"eval_samples_per_second": 806.199,
"eval_steps_per_second": 50.387,
"step": 1136000
},
{
"epoch": 24.08,
"eval_loss": 2.2455570697784424,
"eval_runtime": 48.8925,
"eval_samples_per_second": 818.121,
"eval_steps_per_second": 51.133,
"step": 1144000
},
{
"epoch": 24.25,
"learning_rate": 2.132e-07,
"loss": 2.3856,
"step": 1152000
},
{
"epoch": 24.25,
"eval_loss": 2.254368782043457,
"eval_runtime": 49.1923,
"eval_samples_per_second": 813.135,
"eval_steps_per_second": 50.821,
"step": 1152000
},
{
"epoch": 24.42,
"eval_loss": 2.25541090965271,
"eval_runtime": 48.5868,
"eval_samples_per_second": 823.268,
"eval_steps_per_second": 51.454,
"step": 1160000
},
{
"epoch": 24.59,
"learning_rate": 2.1046666666666666e-07,
"loss": 2.3867,
"step": 1168000
},
{
"epoch": 24.59,
"eval_loss": 2.2595579624176025,
"eval_runtime": 49.0442,
"eval_samples_per_second": 815.591,
"eval_steps_per_second": 50.974,
"step": 1168000
},
{
"epoch": 24.76,
"eval_loss": 2.252202033996582,
"eval_runtime": 49.5166,
"eval_samples_per_second": 807.81,
"eval_steps_per_second": 50.488,
"step": 1176000
},
{
"epoch": 24.93,
"learning_rate": 2.0773333333333334e-07,
"loss": 2.3795,
"step": 1184000
},
{
"epoch": 24.93,
"eval_loss": 2.249300241470337,
"eval_runtime": 49.7952,
"eval_samples_per_second": 803.29,
"eval_steps_per_second": 50.206,
"step": 1184000
},
{
"epoch": 25.09,
"eval_loss": 2.2608890533447266,
"eval_runtime": 48.681,
"eval_samples_per_second": 821.675,
"eval_steps_per_second": 51.355,
"step": 1192000
},
{
"epoch": 25.26,
"learning_rate": 2.05e-07,
"loss": 2.3926,
"step": 1200000
},
{
"epoch": 25.26,
"eval_loss": 2.2658445835113525,
"eval_runtime": 48.9256,
"eval_samples_per_second": 817.569,
"eval_steps_per_second": 51.098,
"step": 1200000
},
{
"epoch": 25.43,
"eval_loss": 2.2592995166778564,
"eval_runtime": 48.8774,
"eval_samples_per_second": 818.374,
"eval_steps_per_second": 51.148,
"step": 1208000
},
{
"epoch": 25.6,
"learning_rate": 2.0226666666666668e-07,
"loss": 2.3887,
"step": 1216000
},
{
"epoch": 25.6,
"eval_loss": 2.2703697681427,
"eval_runtime": 48.9056,
"eval_samples_per_second": 817.902,
"eval_steps_per_second": 51.119,
"step": 1216000
},
{
"epoch": 25.77,
"eval_loss": 2.263197183609009,
"eval_runtime": 49.6098,
"eval_samples_per_second": 806.292,
"eval_steps_per_second": 50.393,
"step": 1224000
},
{
"epoch": 25.94,
"learning_rate": 1.9953333333333333e-07,
"loss": 2.3926,
"step": 1232000
},
{
"epoch": 25.94,
"eval_loss": 2.2628068923950195,
"eval_runtime": 49.6731,
"eval_samples_per_second": 805.265,
"eval_steps_per_second": 50.329,
"step": 1232000
},
{
"epoch": 26.1,
"eval_loss": 2.2656562328338623,
"eval_runtime": 49.8004,
"eval_samples_per_second": 803.207,
"eval_steps_per_second": 50.2,
"step": 1240000
},
{
"epoch": 26.27,
"learning_rate": 1.968e-07,
"loss": 2.3809,
"step": 1248000
},
{
"epoch": 26.27,
"eval_loss": 2.2545762062072754,
"eval_runtime": 49.3811,
"eval_samples_per_second": 810.026,
"eval_steps_per_second": 50.627,
"step": 1248000
},
{
"epoch": 26.44,
"eval_loss": 2.259634017944336,
"eval_runtime": 49.4215,
"eval_samples_per_second": 809.365,
"eval_steps_per_second": 50.585,
"step": 1256000
},
{
"epoch": 26.61,
"learning_rate": 1.9406666666666667e-07,
"loss": 2.3878,
"step": 1264000
},
{
"epoch": 26.61,
"eval_loss": 2.254516124725342,
"eval_runtime": 48.3,
"eval_samples_per_second": 828.158,
"eval_steps_per_second": 51.76,
"step": 1264000
},
{
"epoch": 26.78,
"eval_loss": 2.2667646408081055,
"eval_runtime": 49.742,
"eval_samples_per_second": 804.149,
"eval_steps_per_second": 50.259,
"step": 1272000
},
{
"epoch": 26.95,
"learning_rate": 1.9133333333333333e-07,
"loss": 2.3861,
"step": 1280000
},
{
"epoch": 26.95,
"eval_loss": 2.2534382343292236,
"eval_runtime": 48.5643,
"eval_samples_per_second": 823.65,
"eval_steps_per_second": 51.478,
"step": 1280000
},
{
"epoch": 27.12,
"eval_loss": 2.261183738708496,
"eval_runtime": 49.4288,
"eval_samples_per_second": 809.246,
"eval_steps_per_second": 50.578,
"step": 1288000
},
{
"epoch": 27.28,
"learning_rate": 1.886e-07,
"loss": 2.3815,
"step": 1296000
},
{
"epoch": 27.28,
"eval_loss": 2.2441422939300537,
"eval_runtime": 48.9537,
"eval_samples_per_second": 817.099,
"eval_steps_per_second": 51.069,
"step": 1296000
},
{
"epoch": 27.45,
"eval_loss": 2.271397590637207,
"eval_runtime": 49.5135,
"eval_samples_per_second": 807.861,
"eval_steps_per_second": 50.491,
"step": 1304000
},
{
"epoch": 27.62,
"learning_rate": 1.8586666666666666e-07,
"loss": 2.3861,
"step": 1312000
},
{
"epoch": 27.62,
"eval_loss": 2.2604434490203857,
"eval_runtime": 48.816,
"eval_samples_per_second": 819.403,
"eval_steps_per_second": 51.213,
"step": 1312000
},
{
"epoch": 27.79,
"eval_loss": 2.2535157203674316,
"eval_runtime": 49.1172,
"eval_samples_per_second": 814.378,
"eval_steps_per_second": 50.899,
"step": 1320000
},
{
"epoch": 27.96,
"learning_rate": 1.8313333333333332e-07,
"loss": 2.388,
"step": 1328000
},
{
"epoch": 27.96,
"eval_loss": 2.2466070652008057,
"eval_runtime": 48.6499,
"eval_samples_per_second": 822.201,
"eval_steps_per_second": 51.388,
"step": 1328000
},
{
"epoch": 28.13,
"eval_loss": 2.258121967315674,
"eval_runtime": 48.6067,
"eval_samples_per_second": 822.932,
"eval_steps_per_second": 51.433,
"step": 1336000
},
{
"epoch": 28.29,
"learning_rate": 1.804e-07,
"loss": 2.3864,
"step": 1344000
},
{
"epoch": 28.29,
"eval_loss": 2.257232904434204,
"eval_runtime": 49.3463,
"eval_samples_per_second": 810.598,
"eval_steps_per_second": 50.662,
"step": 1344000
},
{
"epoch": 28.46,
"eval_loss": 2.238109827041626,
"eval_runtime": 48.9872,
"eval_samples_per_second": 816.539,
"eval_steps_per_second": 51.034,
"step": 1352000
},
{
"epoch": 28.63,
"learning_rate": 1.7766666666666666e-07,
"loss": 2.39,
"step": 1360000
},
{
"epoch": 28.63,
"eval_loss": 2.23980712890625,
"eval_runtime": 49.3711,
"eval_samples_per_second": 810.191,
"eval_steps_per_second": 50.637,
"step": 1360000
},
{
"epoch": 28.8,
"eval_loss": 2.269519805908203,
"eval_runtime": 49.3307,
"eval_samples_per_second": 810.853,
"eval_steps_per_second": 50.678,
"step": 1368000
},
{
"epoch": 28.97,
"learning_rate": 1.7493333333333334e-07,
"loss": 2.39,
"step": 1376000
},
{
"epoch": 28.97,
"eval_loss": 2.262801170349121,
"eval_runtime": 49.0301,
"eval_samples_per_second": 815.825,
"eval_steps_per_second": 50.989,
"step": 1376000
},
{
"epoch": 29.14,
"eval_loss": 2.2599363327026367,
"eval_runtime": 49.1614,
"eval_samples_per_second": 813.647,
"eval_steps_per_second": 50.853,
"step": 1384000
},
{
"epoch": 29.3,
"learning_rate": 1.722e-07,
"loss": 2.3804,
"step": 1392000
},
{
"epoch": 29.3,
"eval_loss": 2.262774705886841,
"eval_runtime": 49.576,
"eval_samples_per_second": 806.842,
"eval_steps_per_second": 50.428,
"step": 1392000
},
{
"epoch": 29.47,
"eval_loss": 2.2721939086914062,
"eval_runtime": 48.6201,
"eval_samples_per_second": 822.705,
"eval_steps_per_second": 51.419,
"step": 1400000
},
{
"epoch": 29.64,
"learning_rate": 1.6946666666666668e-07,
"loss": 2.3858,
"step": 1408000
},
{
"epoch": 29.64,
"eval_loss": 2.24898099899292,
"eval_runtime": 49.0807,
"eval_samples_per_second": 814.983,
"eval_steps_per_second": 50.936,
"step": 1408000
},
{
"epoch": 29.81,
"eval_loss": 2.262730360031128,
"eval_runtime": 49.565,
"eval_samples_per_second": 807.021,
"eval_steps_per_second": 50.439,
"step": 1416000
},
{
"epoch": 29.98,
"learning_rate": 1.6673333333333333e-07,
"loss": 2.3804,
"step": 1424000
},
{
"epoch": 29.98,
"eval_loss": 2.262303113937378,
"eval_runtime": 48.6465,
"eval_samples_per_second": 822.258,
"eval_steps_per_second": 51.391,
"step": 1424000
},
{
"epoch": 30.15,
"eval_loss": 2.252244472503662,
"eval_runtime": 49.1968,
"eval_samples_per_second": 813.061,
"eval_steps_per_second": 50.816,
"step": 1432000
},
{
"epoch": 30.32,
"learning_rate": 1.64e-07,
"loss": 2.3834,
"step": 1440000
},
{
"epoch": 30.32,
"eval_loss": 2.2633419036865234,
"eval_runtime": 48.625,
"eval_samples_per_second": 822.622,
"eval_steps_per_second": 51.414,
"step": 1440000
},
{
"epoch": 30.48,
"eval_loss": 2.255260467529297,
"eval_runtime": 48.5565,
"eval_samples_per_second": 823.782,
"eval_steps_per_second": 51.486,
"step": 1448000
},
{
"epoch": 30.65,
"learning_rate": 1.6126666666666667e-07,
"loss": 2.3853,
"step": 1456000
},
{
"epoch": 30.65,
"eval_loss": 2.239067554473877,
"eval_runtime": 49.1758,
"eval_samples_per_second": 813.408,
"eval_steps_per_second": 50.838,
"step": 1456000
},
{
"epoch": 30.82,
"eval_loss": 2.2615532875061035,
"eval_runtime": 48.6338,
"eval_samples_per_second": 822.473,
"eval_steps_per_second": 51.405,
"step": 1464000
},
{
"epoch": 30.99,
"learning_rate": 1.5853333333333332e-07,
"loss": 2.3946,
"step": 1472000
},
{
"epoch": 30.99,
"eval_loss": 2.2630956172943115,
"eval_runtime": 48.6486,
"eval_samples_per_second": 822.224,
"eval_steps_per_second": 51.389,
"step": 1472000
},
{
"epoch": 31.16,
"eval_loss": 2.2638938426971436,
"eval_runtime": 48.6689,
"eval_samples_per_second": 821.88,
"eval_steps_per_second": 51.367,
"step": 1480000
},
{
"epoch": 31.33,
"learning_rate": 1.558e-07,
"loss": 2.385,
"step": 1488000
},
{
"epoch": 31.33,
"eval_loss": 2.27362060546875,
"eval_runtime": 49.1717,
"eval_samples_per_second": 813.476,
"eval_steps_per_second": 50.842,
"step": 1488000
},
{
"epoch": 31.49,
"eval_loss": 2.2715282440185547,
"eval_runtime": 48.6068,
"eval_samples_per_second": 822.931,
"eval_steps_per_second": 51.433,
"step": 1496000
},
{
"epoch": 31.66,
"learning_rate": 1.5306666666666666e-07,
"loss": 2.387,
"step": 1504000
},
{
"epoch": 31.66,
"eval_loss": 2.255669116973877,
"eval_runtime": 49.2692,
"eval_samples_per_second": 811.866,
"eval_steps_per_second": 50.742,
"step": 1504000
},
{
"epoch": 31.83,
"eval_loss": 2.258305311203003,
"eval_runtime": 49.3922,
"eval_samples_per_second": 809.845,
"eval_steps_per_second": 50.615,
"step": 1512000
},
{
"epoch": 32.0,
"learning_rate": 1.5033333333333332e-07,
"loss": 2.3831,
"step": 1520000
},
{
"epoch": 32.0,
"eval_loss": 2.2543575763702393,
"eval_runtime": 48.8562,
"eval_samples_per_second": 818.73,
"eval_steps_per_second": 51.171,
"step": 1520000
},
{
"epoch": 32.17,
"eval_loss": 2.2756261825561523,
"eval_runtime": 48.8463,
"eval_samples_per_second": 818.895,
"eval_steps_per_second": 51.181,
"step": 1528000
},
{
"epoch": 32.34,
"learning_rate": 1.476e-07,
"loss": 2.3835,
"step": 1536000
},
{
"epoch": 32.34,
"eval_loss": 2.2793610095977783,
"eval_runtime": 48.7275,
"eval_samples_per_second": 820.891,
"eval_steps_per_second": 51.306,
"step": 1536000
},
{
"epoch": 32.5,
"eval_loss": 2.2648372650146484,
"eval_runtime": 49.3109,
"eval_samples_per_second": 811.179,
"eval_steps_per_second": 50.699,
"step": 1544000
},
{
"epoch": 32.67,
"learning_rate": 1.4486666666666665e-07,
"loss": 2.3857,
"step": 1552000
},
{
"epoch": 32.67,
"eval_loss": 2.2563135623931885,
"eval_runtime": 49.7733,
"eval_samples_per_second": 803.643,
"eval_steps_per_second": 50.228,
"step": 1552000
},
{
"epoch": 32.84,
"eval_loss": 2.2537479400634766,
"eval_runtime": 49.2356,
"eval_samples_per_second": 812.42,
"eval_steps_per_second": 50.776,
"step": 1560000
},
{
"epoch": 33.01,
"learning_rate": 1.4213333333333334e-07,
"loss": 2.3856,
"step": 1568000
},
{
"epoch": 33.01,
"eval_loss": 2.261024236679077,
"eval_runtime": 49.2157,
"eval_samples_per_second": 812.749,
"eval_steps_per_second": 50.797,
"step": 1568000
},
{
"epoch": 33.18,
"eval_loss": 2.264604330062866,
"eval_runtime": 49.1631,
"eval_samples_per_second": 813.618,
"eval_steps_per_second": 50.851,
"step": 1576000
},
{
"epoch": 33.35,
"learning_rate": 1.3940000000000002e-07,
"loss": 2.3902,
"step": 1584000
},
{
"epoch": 33.35,
"eval_loss": 2.2544610500335693,
"eval_runtime": 48.8485,
"eval_samples_per_second": 818.858,
"eval_steps_per_second": 51.179,
"step": 1584000
},
{
"epoch": 33.52,
"eval_loss": 2.271030902862549,
"eval_runtime": 48.3697,
"eval_samples_per_second": 826.964,
"eval_steps_per_second": 51.685,
"step": 1592000
},
{
"epoch": 33.68,
"learning_rate": 1.3666666666666665e-07,
"loss": 2.3897,
"step": 1600000
},
{
"epoch": 33.68,
"eval_loss": 2.2601163387298584,
"eval_runtime": 48.8818,
"eval_samples_per_second": 818.3,
"eval_steps_per_second": 51.144,
"step": 1600000
},
{
"epoch": 33.85,
"eval_loss": 2.2542924880981445,
"eval_runtime": 49.0945,
"eval_samples_per_second": 814.754,
"eval_steps_per_second": 50.922,
"step": 1608000
},
{
"epoch": 34.02,
"learning_rate": 1.3393333333333333e-07,
"loss": 2.3866,
"step": 1616000
},
{
"epoch": 34.02,
"eval_loss": 2.2525877952575684,
"eval_runtime": 49.2321,
"eval_samples_per_second": 812.478,
"eval_steps_per_second": 50.78,
"step": 1616000
},
{
"epoch": 34.19,
"eval_loss": 2.262938976287842,
"eval_runtime": 49.3213,
"eval_samples_per_second": 811.009,
"eval_steps_per_second": 50.688,
"step": 1624000
},
{
"epoch": 34.36,
"learning_rate": 1.312e-07,
"loss": 2.3823,
"step": 1632000
},
{
"epoch": 34.36,
"eval_loss": 2.2616801261901855,
"eval_runtime": 48.9485,
"eval_samples_per_second": 817.185,
"eval_steps_per_second": 51.074,
"step": 1632000
},
{
"epoch": 34.53,
"eval_loss": 2.2519824504852295,
"eval_runtime": 48.6687,
"eval_samples_per_second": 821.883,
"eval_steps_per_second": 51.368,
"step": 1640000
},
{
"epoch": 34.69,
"learning_rate": 1.2846666666666667e-07,
"loss": 2.3874,
"step": 1648000
},
{
"epoch": 34.69,
"eval_loss": 2.261162042617798,
"eval_runtime": 48.7567,
"eval_samples_per_second": 820.4,
"eval_steps_per_second": 51.275,
"step": 1648000
},
{
"epoch": 34.86,
"eval_loss": 2.2568650245666504,
"eval_runtime": 48.8018,
"eval_samples_per_second": 819.641,
"eval_steps_per_second": 51.228,
"step": 1656000
},
{
"epoch": 35.03,
"learning_rate": 1.2573333333333332e-07,
"loss": 2.3895,
"step": 1664000
},
{
"epoch": 35.03,
"eval_loss": 2.2633254528045654,
"eval_runtime": 48.7101,
"eval_samples_per_second": 821.185,
"eval_steps_per_second": 51.324,
"step": 1664000
},
{
"epoch": 35.2,
"eval_loss": 2.259277820587158,
"eval_runtime": 49.2378,
"eval_samples_per_second": 812.384,
"eval_steps_per_second": 50.774,
"step": 1672000
},
{
"epoch": 35.37,
"learning_rate": 1.23e-07,
"loss": 2.3857,
"step": 1680000
},
{
"epoch": 35.37,
"eval_loss": 2.2650630474090576,
"eval_runtime": 48.8949,
"eval_samples_per_second": 818.082,
"eval_steps_per_second": 51.13,
"step": 1680000
},
{
"epoch": 35.54,
"eval_loss": 2.256744623184204,
"eval_runtime": 48.7371,
"eval_samples_per_second": 820.73,
"eval_steps_per_second": 51.296,
"step": 1688000
},
{
"epoch": 35.7,
"learning_rate": 1.2026666666666666e-07,
"loss": 2.3811,
"step": 1696000
},
{
"epoch": 35.7,
"eval_loss": 2.253361701965332,
"eval_runtime": 49.5532,
"eval_samples_per_second": 807.214,
"eval_steps_per_second": 50.451,
"step": 1696000
},
{
"epoch": 35.87,
"eval_loss": 2.263338088989258,
"eval_runtime": 49.1414,
"eval_samples_per_second": 813.978,
"eval_steps_per_second": 50.874,
"step": 1704000
},
{
"epoch": 36.04,
"learning_rate": 1.1753333333333334e-07,
"loss": 2.3944,
"step": 1712000
},
{
"epoch": 36.04,
"eval_loss": 2.2504327297210693,
"eval_runtime": 49.4998,
"eval_samples_per_second": 808.084,
"eval_steps_per_second": 50.505,
"step": 1712000
},
{
"epoch": 36.21,
"eval_loss": 2.2518932819366455,
"eval_runtime": 49.3816,
"eval_samples_per_second": 810.018,
"eval_steps_per_second": 50.626,
"step": 1720000
},
{
"epoch": 36.38,
"learning_rate": 1.1480000000000001e-07,
"loss": 2.3883,
"step": 1728000
},
{
"epoch": 36.38,
"eval_loss": 2.2571768760681152,
"eval_runtime": 49.5997,
"eval_samples_per_second": 806.456,
"eval_steps_per_second": 50.404,
"step": 1728000
},
{
"epoch": 36.55,
"eval_loss": 2.2575507164001465,
"eval_runtime": 49.3457,
"eval_samples_per_second": 810.607,
"eval_steps_per_second": 50.663,
"step": 1736000
},
{
"epoch": 36.72,
"learning_rate": 1.1206666666666666e-07,
"loss": 2.3859,
"step": 1744000
},
{
"epoch": 36.72,
"eval_loss": 2.2719168663024902,
"eval_runtime": 48.91,
"eval_samples_per_second": 817.828,
"eval_steps_per_second": 51.114,
"step": 1744000
},
{
"epoch": 36.88,
"eval_loss": 2.2667555809020996,
"eval_runtime": 48.8267,
"eval_samples_per_second": 819.223,
"eval_steps_per_second": 51.201,
"step": 1752000
},
{
"epoch": 37.05,
"learning_rate": 1.0933333333333333e-07,
"loss": 2.3914,
"step": 1760000
},
{
"epoch": 37.05,
"eval_loss": 2.250850200653076,
"eval_runtime": 48.8892,
"eval_samples_per_second": 818.176,
"eval_steps_per_second": 51.136,
"step": 1760000
},
{
"epoch": 37.22,
"eval_loss": 2.2601399421691895,
"eval_runtime": 48.8589,
"eval_samples_per_second": 818.684,
"eval_steps_per_second": 51.168,
"step": 1768000
},
{
"epoch": 37.39,
"learning_rate": 1.066e-07,
"loss": 2.3848,
"step": 1776000
},
{
"epoch": 37.39,
"eval_loss": 2.2686824798583984,
"eval_runtime": 48.9048,
"eval_samples_per_second": 817.915,
"eval_steps_per_second": 51.12,
"step": 1776000
},
{
"epoch": 37.56,
"eval_loss": 2.2513012886047363,
"eval_runtime": 48.7112,
"eval_samples_per_second": 821.166,
"eval_steps_per_second": 51.323,
"step": 1784000
},
{
"epoch": 37.73,
"learning_rate": 1.0386666666666667e-07,
"loss": 2.3903,
"step": 1792000
},
{
"epoch": 37.73,
"eval_loss": 2.2519407272338867,
"eval_runtime": 48.938,
"eval_samples_per_second": 817.361,
"eval_steps_per_second": 51.085,
"step": 1792000
},
{
"epoch": 37.89,
"eval_loss": 2.259387731552124,
"eval_runtime": 49.4041,
"eval_samples_per_second": 809.65,
"eval_steps_per_second": 50.603,
"step": 1800000
},
{
"epoch": 38.06,
"learning_rate": 1.0113333333333334e-07,
"loss": 2.3822,
"step": 1808000
},
{
"epoch": 38.06,
"eval_loss": 2.256521701812744,
"eval_runtime": 48.8635,
"eval_samples_per_second": 818.606,
"eval_steps_per_second": 51.163,
"step": 1808000
},
{
"epoch": 38.23,
"eval_loss": 2.2812252044677734,
"eval_runtime": 49.3349,
"eval_samples_per_second": 810.785,
"eval_steps_per_second": 50.674,
"step": 1816000
},
{
"epoch": 38.4,
"learning_rate": 9.84e-08,
"loss": 2.383,
"step": 1824000
},
{
"epoch": 38.4,
"eval_loss": 2.2589097023010254,
"eval_runtime": 48.8806,
"eval_samples_per_second": 818.32,
"eval_steps_per_second": 51.145,
"step": 1824000
},
{
"epoch": 38.57,
"eval_loss": 2.2560157775878906,
"eval_runtime": 48.961,
"eval_samples_per_second": 816.977,
"eval_steps_per_second": 51.061,
"step": 1832000
},
{
"epoch": 38.74,
"learning_rate": 9.566666666666666e-08,
"loss": 2.3868,
"step": 1840000
},
{
"epoch": 38.74,
"eval_loss": 2.264800548553467,
"eval_runtime": 49.3795,
"eval_samples_per_second": 810.053,
"eval_steps_per_second": 50.628,
"step": 1840000
},
{
"epoch": 38.9,
"eval_loss": 2.2506866455078125,
"eval_runtime": 48.8976,
"eval_samples_per_second": 818.037,
"eval_steps_per_second": 51.127,
"step": 1848000
},
{
"epoch": 39.07,
"learning_rate": 9.293333333333333e-08,
"loss": 2.3775,
"step": 1856000
},
{
"epoch": 39.07,
"eval_loss": 2.2569808959960938,
"eval_runtime": 48.9247,
"eval_samples_per_second": 817.584,
"eval_steps_per_second": 51.099,
"step": 1856000
},
{
"epoch": 39.24,
"eval_loss": 2.2549405097961426,
"eval_runtime": 49.5363,
"eval_samples_per_second": 807.488,
"eval_steps_per_second": 50.468,
"step": 1864000
},
{
"epoch": 39.41,
"learning_rate": 9.02e-08,
"loss": 2.3818,
"step": 1872000
},
{
"epoch": 39.41,
"eval_loss": 2.2583167552948,
"eval_runtime": 49.5006,
"eval_samples_per_second": 808.071,
"eval_steps_per_second": 50.504,
"step": 1872000
},
{
"epoch": 39.58,
"eval_loss": 2.261044502258301,
"eval_runtime": 49.628,
"eval_samples_per_second": 805.997,
"eval_steps_per_second": 50.375,
"step": 1880000
},
{
"epoch": 39.75,
"learning_rate": 8.746666666666667e-08,
"loss": 2.3887,
"step": 1888000
},
{
"epoch": 39.75,
"eval_loss": 2.262882947921753,
"eval_runtime": 49.6245,
"eval_samples_per_second": 806.053,
"eval_steps_per_second": 50.378,
"step": 1888000
},
{
"epoch": 39.91,
"eval_loss": 2.273881435394287,
"eval_runtime": 49.4491,
"eval_samples_per_second": 808.913,
"eval_steps_per_second": 50.557,
"step": 1896000
},
{
"epoch": 40.08,
"learning_rate": 8.473333333333334e-08,
"loss": 2.3893,
"step": 1904000
},
{
"epoch": 40.08,
"eval_loss": 2.2657415866851807,
"eval_runtime": 49.1269,
"eval_samples_per_second": 814.217,
"eval_steps_per_second": 50.889,
"step": 1904000
},
{
"epoch": 40.25,
"eval_loss": 2.2507264614105225,
"eval_runtime": 49.5404,
"eval_samples_per_second": 807.422,
"eval_steps_per_second": 50.464,
"step": 1912000
},
{
"epoch": 40.42,
"learning_rate": 8.2e-08,
"loss": 2.3826,
"step": 1920000
},
{
"epoch": 40.42,
"eval_loss": 2.2505505084991455,
"eval_runtime": 49.5643,
"eval_samples_per_second": 807.033,
"eval_steps_per_second": 50.44,
"step": 1920000
},
{
"epoch": 40.59,
"eval_loss": 2.2630043029785156,
"eval_runtime": 48.8805,
"eval_samples_per_second": 818.322,
"eval_steps_per_second": 51.145,
"step": 1928000
},
{
"epoch": 40.76,
"learning_rate": 7.926666666666666e-08,
"loss": 2.3842,
"step": 1936000
},
{
"epoch": 40.76,
"eval_loss": 2.27164363861084,
"eval_runtime": 49.401,
"eval_samples_per_second": 809.701,
"eval_steps_per_second": 50.606,
"step": 1936000
},
{
"epoch": 40.93,
"eval_loss": 2.264181613922119,
"eval_runtime": 49.4342,
"eval_samples_per_second": 809.156,
"eval_steps_per_second": 50.572,
"step": 1944000
},
{
"epoch": 41.09,
"learning_rate": 7.653333333333333e-08,
"loss": 2.3866,
"step": 1952000
},
{
"epoch": 41.09,
"eval_loss": 2.245126485824585,
"eval_runtime": 49.626,
"eval_samples_per_second": 806.029,
"eval_steps_per_second": 50.377,
"step": 1952000
},
{
"epoch": 41.26,
"eval_loss": 2.2520625591278076,
"eval_runtime": 49.5425,
"eval_samples_per_second": 807.388,
"eval_steps_per_second": 50.462,
"step": 1960000
},
{
"epoch": 41.43,
"learning_rate": 7.38e-08,
"loss": 2.3857,
"step": 1968000
},
{
"epoch": 41.43,
"eval_loss": 2.2457118034362793,
"eval_runtime": 48.9259,
"eval_samples_per_second": 817.562,
"eval_steps_per_second": 51.098,
"step": 1968000
},
{
"epoch": 41.6,
"eval_loss": 2.2574808597564697,
"eval_runtime": 49.7757,
"eval_samples_per_second": 803.605,
"eval_steps_per_second": 50.225,
"step": 1976000
},
{
"epoch": 41.77,
"learning_rate": 7.106666666666667e-08,
"loss": 2.3943,
"step": 1984000
},
{
"epoch": 41.77,
"eval_loss": 2.265901565551758,
"eval_runtime": 49.5814,
"eval_samples_per_second": 806.755,
"eval_steps_per_second": 50.422,
"step": 1984000
},
{
"epoch": 41.94,
"eval_loss": 2.260754346847534,
"eval_runtime": 48.9543,
"eval_samples_per_second": 817.089,
"eval_steps_per_second": 51.068,
"step": 1992000
},
{
"epoch": 42.1,
"learning_rate": 6.833333333333332e-08,
"loss": 2.387,
"step": 2000000
},
{
"epoch": 42.1,
"eval_loss": 2.2687227725982666,
"eval_runtime": 49.4379,
"eval_samples_per_second": 809.096,
"eval_steps_per_second": 50.569,
"step": 2000000
},
{
"epoch": 42.27,
"eval_loss": 2.271784543991089,
"eval_runtime": 49.2241,
"eval_samples_per_second": 812.611,
"eval_steps_per_second": 50.788,
"step": 2008000
},
{
"epoch": 42.44,
"learning_rate": 6.56e-08,
"loss": 2.387,
"step": 2016000
},
{
"epoch": 42.44,
"eval_loss": 2.262915849685669,
"eval_runtime": 49.9903,
"eval_samples_per_second": 800.156,
"eval_steps_per_second": 50.01,
"step": 2016000
},
{
"epoch": 42.61,
"eval_loss": 2.2282731533050537,
"eval_runtime": 50.2394,
"eval_samples_per_second": 796.188,
"eval_steps_per_second": 49.762,
"step": 2024000
},
{
"epoch": 42.78,
"learning_rate": 6.286666666666666e-08,
"loss": 2.3804,
"step": 2032000
},
{
"epoch": 42.78,
"eval_loss": 2.2422289848327637,
"eval_runtime": 49.3037,
"eval_samples_per_second": 811.298,
"eval_steps_per_second": 50.706,
"step": 2032000
},
{
"epoch": 42.95,
"eval_loss": 2.243112802505493,
"eval_runtime": 50.264,
"eval_samples_per_second": 795.798,
"eval_steps_per_second": 49.737,
"step": 2040000
},
{
"epoch": 43.11,
"learning_rate": 6.013333333333333e-08,
"loss": 2.3842,
"step": 2048000
},
{
"epoch": 43.11,
"eval_loss": 2.268855094909668,
"eval_runtime": 49.5735,
"eval_samples_per_second": 806.883,
"eval_steps_per_second": 50.43,
"step": 2048000
},
{
"epoch": 43.28,
"eval_loss": 2.2586092948913574,
"eval_runtime": 49.7496,
"eval_samples_per_second": 804.027,
"eval_steps_per_second": 50.252,
"step": 2056000
},
{
"epoch": 43.45,
"learning_rate": 5.7400000000000004e-08,
"loss": 2.3856,
"step": 2064000
},
{
"epoch": 43.45,
"eval_loss": 2.259028434753418,
"eval_runtime": 49.8461,
"eval_samples_per_second": 802.47,
"eval_steps_per_second": 50.154,
"step": 2064000
},
{
"epoch": 43.62,
"eval_loss": 2.2602360248565674,
"eval_runtime": 50.6446,
"eval_samples_per_second": 789.817,
"eval_steps_per_second": 49.364,
"step": 2072000
},
{
"epoch": 43.79,
"learning_rate": 5.4666666666666666e-08,
"loss": 2.3843,
"step": 2080000
},
{
"epoch": 43.79,
"eval_loss": 2.255709648132324,
"eval_runtime": 49.1117,
"eval_samples_per_second": 814.469,
"eval_steps_per_second": 50.904,
"step": 2080000
},
{
"epoch": 43.96,
"eval_loss": 2.2776308059692383,
"eval_runtime": 49.152,
"eval_samples_per_second": 813.801,
"eval_steps_per_second": 50.863,
"step": 2088000
},
{
"epoch": 44.13,
"learning_rate": 5.1933333333333335e-08,
"loss": 2.3891,
"step": 2096000
},
{
"epoch": 44.13,
"eval_loss": 2.255366325378418,
"eval_runtime": 49.1689,
"eval_samples_per_second": 813.522,
"eval_steps_per_second": 50.845,
"step": 2096000
},
{
"epoch": 44.29,
"eval_loss": 2.2615368366241455,
"eval_runtime": 49.9655,
"eval_samples_per_second": 800.553,
"eval_steps_per_second": 50.035,
"step": 2104000
},
{
"epoch": 44.46,
"learning_rate": 4.92e-08,
"loss": 2.3811,
"step": 2112000
},
{
"epoch": 44.46,
"eval_loss": 2.259124517440796,
"eval_runtime": 49.6149,
"eval_samples_per_second": 806.21,
"eval_steps_per_second": 50.388,
"step": 2112000
},
{
"epoch": 44.63,
"eval_loss": 2.259974718093872,
"eval_runtime": 48.8534,
"eval_samples_per_second": 818.777,
"eval_steps_per_second": 51.174,
"step": 2120000
},
{
"epoch": 44.8,
"learning_rate": 4.6466666666666666e-08,
"loss": 2.3874,
"step": 2128000
},
{
"epoch": 44.8,
"eval_loss": 2.259488582611084,
"eval_runtime": 49.484,
"eval_samples_per_second": 808.342,
"eval_steps_per_second": 50.521,
"step": 2128000
},
{
"epoch": 44.97,
"eval_loss": 2.2761764526367188,
"eval_runtime": 49.5444,
"eval_samples_per_second": 807.357,
"eval_steps_per_second": 50.46,
"step": 2136000
},
{
"epoch": 45.14,
"learning_rate": 4.3733333333333335e-08,
"loss": 2.3822,
"step": 2144000
},
{
"epoch": 45.14,
"eval_loss": 2.2516047954559326,
"eval_runtime": 49.0692,
"eval_samples_per_second": 815.176,
"eval_steps_per_second": 50.948,
"step": 2144000
},
{
"epoch": 45.3,
"eval_loss": 2.2529869079589844,
"eval_runtime": 49.0866,
"eval_samples_per_second": 814.886,
"eval_steps_per_second": 50.93,
"step": 2152000
},
{
"epoch": 45.47,
"learning_rate": 4.1e-08,
"loss": 2.3933,
"step": 2160000
},
{
"epoch": 45.47,
"eval_loss": 2.265183210372925,
"eval_runtime": 48.9694,
"eval_samples_per_second": 816.837,
"eval_steps_per_second": 51.052,
"step": 2160000
},
{
"epoch": 45.64,
"eval_loss": 2.2480199337005615,
"eval_runtime": 48.9374,
"eval_samples_per_second": 817.371,
"eval_steps_per_second": 51.086,
"step": 2168000
},
{
"epoch": 45.81,
"learning_rate": 3.8266666666666665e-08,
"loss": 2.3853,
"step": 2176000
},
{
"epoch": 45.81,
"eval_loss": 2.2716729640960693,
"eval_runtime": 49.0277,
"eval_samples_per_second": 815.865,
"eval_steps_per_second": 50.992,
"step": 2176000
},
{
"epoch": 45.98,
"eval_loss": 2.2568676471710205,
"eval_runtime": 49.4939,
"eval_samples_per_second": 808.181,
"eval_steps_per_second": 50.511,
"step": 2184000
},
{
"epoch": 46.15,
"learning_rate": 3.5533333333333334e-08,
"loss": 2.3917,
"step": 2192000
},
{
"epoch": 46.15,
"eval_loss": 2.2564427852630615,
"eval_runtime": 49.3075,
"eval_samples_per_second": 811.235,
"eval_steps_per_second": 50.702,
"step": 2192000
},
{
"epoch": 46.31,
"eval_loss": 2.2512264251708984,
"eval_runtime": 49.6159,
"eval_samples_per_second": 806.193,
"eval_steps_per_second": 50.387,
"step": 2200000
},
{
"epoch": 46.48,
"learning_rate": 3.28e-08,
"loss": 2.3859,
"step": 2208000
},
{
"epoch": 46.48,
"eval_loss": 2.2611992359161377,
"eval_runtime": 49.8507,
"eval_samples_per_second": 802.396,
"eval_steps_per_second": 50.15,
"step": 2208000
},
{
"epoch": 46.65,
"eval_loss": 2.2609057426452637,
"eval_runtime": 49.439,
"eval_samples_per_second": 809.077,
"eval_steps_per_second": 50.567,
"step": 2216000
},
{
"epoch": 46.82,
"learning_rate": 3.0066666666666665e-08,
"loss": 2.3879,
"step": 2224000
},
{
"epoch": 46.82,
"eval_loss": 2.255183219909668,
"eval_runtime": 50.0243,
"eval_samples_per_second": 799.611,
"eval_steps_per_second": 49.976,
"step": 2224000
},
{
"epoch": 46.99,
"eval_loss": 2.2568256855010986,
"eval_runtime": 48.9637,
"eval_samples_per_second": 816.932,
"eval_steps_per_second": 51.058,
"step": 2232000
},
{
"epoch": 47.16,
"learning_rate": 2.7333333333333333e-08,
"loss": 2.3823,
"step": 2240000
},
{
"epoch": 47.16,
"eval_loss": 2.250671148300171,
"eval_runtime": 49.1163,
"eval_samples_per_second": 814.393,
"eval_steps_per_second": 50.9,
"step": 2240000
},
{
"epoch": 47.33,
"eval_loss": 2.2761969566345215,
"eval_runtime": 49.444,
"eval_samples_per_second": 808.995,
"eval_steps_per_second": 50.562,
"step": 2248000
},
{
"epoch": 47.49,
"learning_rate": 2.46e-08,
"loss": 2.388,
"step": 2256000
},
{
"epoch": 47.49,
"eval_loss": 2.252157211303711,
"eval_runtime": 48.9857,
"eval_samples_per_second": 816.565,
"eval_steps_per_second": 51.035,
"step": 2256000
},
{
"epoch": 47.66,
"eval_loss": 2.2531578540802,
"eval_runtime": 48.9438,
"eval_samples_per_second": 817.263,
"eval_steps_per_second": 51.079,
"step": 2264000
},
{
"epoch": 47.83,
"learning_rate": 2.1866666666666667e-08,
"loss": 2.3773,
"step": 2272000
},
{
"epoch": 47.83,
"eval_loss": 2.2489843368530273,
"eval_runtime": 49.0407,
"eval_samples_per_second": 815.65,
"eval_steps_per_second": 50.978,
"step": 2272000
},
{
"epoch": 48.0,
"eval_loss": 2.2648425102233887,
"eval_runtime": 49.5178,
"eval_samples_per_second": 807.791,
"eval_steps_per_second": 50.487,
"step": 2280000
},
{
"epoch": 48.17,
"learning_rate": 1.9133333333333333e-08,
"loss": 2.3828,
"step": 2288000
},
{
"epoch": 48.17,
"eval_loss": 2.25002384185791,
"eval_runtime": 49.6812,
"eval_samples_per_second": 805.134,
"eval_steps_per_second": 50.321,
"step": 2288000
},
{
"epoch": 48.34,
"eval_loss": 2.253399610519409,
"eval_runtime": 49.4626,
"eval_samples_per_second": 808.692,
"eval_steps_per_second": 50.543,
"step": 2296000
},
{
"epoch": 48.5,
"learning_rate": 1.64e-08,
"loss": 2.3816,
"step": 2304000
},
{
"epoch": 48.5,
"eval_loss": 2.251549482345581,
"eval_runtime": 49.4033,
"eval_samples_per_second": 809.663,
"eval_steps_per_second": 50.604,
"step": 2304000
},
{
"epoch": 48.67,
"eval_loss": 2.2701914310455322,
"eval_runtime": 49.577,
"eval_samples_per_second": 806.826,
"eval_steps_per_second": 50.427,
"step": 2312000
},
{
"epoch": 48.84,
"learning_rate": 1.3666666666666667e-08,
"loss": 2.3784,
"step": 2320000
},
{
"epoch": 48.84,
"eval_loss": 2.2583844661712646,
"eval_runtime": 49.2469,
"eval_samples_per_second": 812.233,
"eval_steps_per_second": 50.765,
"step": 2320000
},
{
"epoch": 49.01,
"eval_loss": 2.23818039894104,
"eval_runtime": 48.9834,
"eval_samples_per_second": 816.603,
"eval_steps_per_second": 51.038,
"step": 2328000
},
{
"epoch": 49.18,
"learning_rate": 1.0933333333333334e-08,
"loss": 2.3863,
"step": 2336000
},
{
"epoch": 49.18,
"eval_loss": 2.260406732559204,
"eval_runtime": 49.569,
"eval_samples_per_second": 806.956,
"eval_steps_per_second": 50.435,
"step": 2336000
},
{
"epoch": 49.35,
"eval_loss": 2.2607226371765137,
"eval_runtime": 49.5423,
"eval_samples_per_second": 807.39,
"eval_steps_per_second": 50.462,
"step": 2344000
},
{
"epoch": 49.51,
"learning_rate": 8.2e-09,
"loss": 2.3863,
"step": 2352000
},
{
"epoch": 49.51,
"eval_loss": 2.26461124420166,
"eval_runtime": 48.9942,
"eval_samples_per_second": 816.423,
"eval_steps_per_second": 51.026,
"step": 2352000
},
{
"epoch": 49.68,
"eval_loss": 2.2533907890319824,
"eval_runtime": 49.998,
"eval_samples_per_second": 800.033,
"eval_steps_per_second": 50.002,
"step": 2360000
},
{
"epoch": 49.85,
"learning_rate": 5.466666666666667e-09,
"loss": 2.3873,
"step": 2368000
},
{
"epoch": 49.85,
"eval_loss": 2.2741761207580566,
"eval_runtime": 49.1748,
"eval_samples_per_second": 813.425,
"eval_steps_per_second": 50.839,
"step": 2368000
},
{
"epoch": 50.02,
"eval_loss": 2.2686805725097656,
"eval_runtime": 49.0988,
"eval_samples_per_second": 814.684,
"eval_steps_per_second": 50.918,
"step": 2376000
},
{
"epoch": 50.19,
"learning_rate": 2.7333333333333334e-09,
"loss": 2.39,
"step": 2384000
},
{
"epoch": 50.19,
"eval_loss": 2.2580976486206055,
"eval_runtime": 49.688,
"eval_samples_per_second": 805.024,
"eval_steps_per_second": 50.314,
"step": 2384000
},
{
"epoch": 50.36,
"eval_loss": 2.2459537982940674,
"eval_runtime": 49.7136,
"eval_samples_per_second": 804.608,
"eval_steps_per_second": 50.288,
"step": 2392000
},
{
"epoch": 50.53,
"learning_rate": 0.0,
"loss": 2.3937,
"step": 2400000
},
{
"epoch": 50.53,
"eval_loss": 2.264155387878418,
"eval_runtime": 49.6839,
"eval_samples_per_second": 805.089,
"eval_steps_per_second": 50.318,
"step": 2400000
},
{
"epoch": 50.53,
"step": 2400000,
"total_flos": 8.417954735470524e+17,
"train_loss": 2.392315192057292,
"train_runtime": 173434.012,
"train_samples_per_second": 221.41,
"train_steps_per_second": 13.838
}
],
"logging_steps": 16000,
"max_steps": 2400000,
"num_train_epochs": 51,
"save_steps": 32000,
"total_flos": 8.417954735470524e+17,
"trial_name": null,
"trial_params": null
}