{ "best_metric": 2.244624614715576, "best_model_checkpoint": "./model_tweets_2020_Q3_90/checkpoint-768000", "epoch": 50.52525209995579, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "eval_loss": 2.5349206924438477, "eval_runtime": 47.928, "eval_samples_per_second": 834.585, "eval_steps_per_second": 52.162, "step": 8000 }, { "epoch": 0.34, "learning_rate": 4.0726666666666665e-07, "loss": 2.7955, "step": 16000 }, { "epoch": 0.34, "eval_loss": 2.444758892059326, "eval_runtime": 48.5664, "eval_samples_per_second": 823.615, "eval_steps_per_second": 51.476, "step": 16000 }, { "epoch": 0.51, "eval_loss": 2.3948748111724854, "eval_runtime": 48.544, "eval_samples_per_second": 823.995, "eval_steps_per_second": 51.5, "step": 24000 }, { "epoch": 0.67, "learning_rate": 4.0453333333333336e-07, "loss": 2.5335, "step": 32000 }, { "epoch": 0.67, "eval_loss": 2.3699398040771484, "eval_runtime": 48.2115, "eval_samples_per_second": 829.678, "eval_steps_per_second": 51.855, "step": 32000 }, { "epoch": 0.84, "eval_loss": 2.354438066482544, "eval_runtime": 48.5047, "eval_samples_per_second": 824.662, "eval_steps_per_second": 51.541, "step": 40000 }, { "epoch": 1.01, "learning_rate": 4.018e-07, "loss": 2.4757, "step": 48000 }, { "epoch": 1.01, "eval_loss": 2.347705364227295, "eval_runtime": 48.6892, "eval_samples_per_second": 821.537, "eval_steps_per_second": 51.346, "step": 48000 }, { "epoch": 1.18, "eval_loss": 2.3280608654022217, "eval_runtime": 48.0175, "eval_samples_per_second": 833.029, "eval_steps_per_second": 52.064, "step": 56000 }, { "epoch": 1.35, "learning_rate": 3.9906666666666667e-07, "loss": 2.446, "step": 64000 }, { "epoch": 1.35, "eval_loss": 2.317082166671753, "eval_runtime": 48.561, "eval_samples_per_second": 823.706, "eval_steps_per_second": 51.482, "step": 64000 }, { "epoch": 1.52, "eval_loss": 2.308218240737915, "eval_runtime": 48.3682, "eval_samples_per_second": 826.989, "eval_steps_per_second": 51.687, "step": 72000 }, { "epoch": 1.68, "learning_rate": 3.963333333333333e-07, "loss": 2.4291, "step": 80000 }, { "epoch": 1.68, "eval_loss": 2.316983222961426, "eval_runtime": 48.1155, "eval_samples_per_second": 831.333, "eval_steps_per_second": 51.958, "step": 80000 }, { "epoch": 1.85, "eval_loss": 2.2962071895599365, "eval_runtime": 48.0395, "eval_samples_per_second": 832.648, "eval_steps_per_second": 52.041, "step": 88000 }, { "epoch": 2.02, "learning_rate": 3.936e-07, "loss": 2.4275, "step": 96000 }, { "epoch": 2.02, "eval_loss": 2.304356575012207, "eval_runtime": 48.8826, "eval_samples_per_second": 818.287, "eval_steps_per_second": 51.143, "step": 96000 }, { "epoch": 2.19, "eval_loss": 2.2738351821899414, "eval_runtime": 48.096, "eval_samples_per_second": 831.671, "eval_steps_per_second": 51.979, "step": 104000 }, { "epoch": 2.36, "learning_rate": 3.908666666666667e-07, "loss": 2.4148, "step": 112000 }, { "epoch": 2.36, "eval_loss": 2.2927024364471436, "eval_runtime": 48.7643, "eval_samples_per_second": 820.272, "eval_steps_per_second": 51.267, "step": 112000 }, { "epoch": 2.53, "eval_loss": 2.2684385776519775, "eval_runtime": 48.701, "eval_samples_per_second": 821.338, "eval_steps_per_second": 51.334, "step": 120000 }, { "epoch": 2.69, "learning_rate": 3.8813333333333334e-07, "loss": 2.4062, "step": 128000 }, { "epoch": 2.69, "eval_loss": 2.289085626602173, "eval_runtime": 48.1247, "eval_samples_per_second": 831.174, "eval_steps_per_second": 51.948, "step": 128000 }, { "epoch": 2.86, "eval_loss": 2.278874397277832, "eval_runtime": 48.1786, "eval_samples_per_second": 830.245, "eval_steps_per_second": 51.89, "step": 136000 }, { "epoch": 3.03, "learning_rate": 3.854e-07, "loss": 2.4022, "step": 144000 }, { "epoch": 3.03, "eval_loss": 2.2659101486206055, "eval_runtime": 48.6348, "eval_samples_per_second": 822.457, "eval_steps_per_second": 51.404, "step": 144000 }, { "epoch": 3.2, "eval_loss": 2.282435894012451, "eval_runtime": 48.1725, "eval_samples_per_second": 830.349, "eval_steps_per_second": 51.897, "step": 152000 }, { "epoch": 3.37, "learning_rate": 3.8266666666666665e-07, "loss": 2.3943, "step": 160000 }, { "epoch": 3.37, "eval_loss": 2.268383026123047, "eval_runtime": 48.1548, "eval_samples_per_second": 830.655, "eval_steps_per_second": 51.916, "step": 160000 }, { "epoch": 3.54, "eval_loss": 2.268256187438965, "eval_runtime": 48.6456, "eval_samples_per_second": 822.274, "eval_steps_per_second": 51.392, "step": 168000 }, { "epoch": 3.71, "learning_rate": 3.799333333333333e-07, "loss": 2.3957, "step": 176000 }, { "epoch": 3.71, "eval_loss": 2.2737274169921875, "eval_runtime": 48.1068, "eval_samples_per_second": 831.483, "eval_steps_per_second": 51.968, "step": 176000 }, { "epoch": 3.87, "eval_loss": 2.2779204845428467, "eval_runtime": 48.6234, "eval_samples_per_second": 822.65, "eval_steps_per_second": 51.416, "step": 184000 }, { "epoch": 4.04, "learning_rate": 3.772e-07, "loss": 2.3976, "step": 192000 }, { "epoch": 4.04, "eval_loss": 2.2710366249084473, "eval_runtime": 48.2531, "eval_samples_per_second": 828.961, "eval_steps_per_second": 51.81, "step": 192000 }, { "epoch": 4.21, "eval_loss": 2.249539852142334, "eval_runtime": 48.0877, "eval_samples_per_second": 831.813, "eval_steps_per_second": 51.988, "step": 200000 }, { "epoch": 4.38, "learning_rate": 3.7446666666666667e-07, "loss": 2.3933, "step": 208000 }, { "epoch": 4.38, "eval_loss": 2.266042947769165, "eval_runtime": 48.7475, "eval_samples_per_second": 820.554, "eval_steps_per_second": 51.285, "step": 208000 }, { "epoch": 4.55, "eval_loss": 2.2686843872070312, "eval_runtime": 48.6373, "eval_samples_per_second": 822.414, "eval_steps_per_second": 51.401, "step": 216000 }, { "epoch": 4.72, "learning_rate": 3.7173333333333333e-07, "loss": 2.4039, "step": 224000 }, { "epoch": 4.72, "eval_loss": 2.258059024810791, "eval_runtime": 48.8049, "eval_samples_per_second": 819.591, "eval_steps_per_second": 51.224, "step": 224000 }, { "epoch": 4.88, "eval_loss": 2.265613079071045, "eval_runtime": 48.1482, "eval_samples_per_second": 830.769, "eval_steps_per_second": 51.923, "step": 232000 }, { "epoch": 5.05, "learning_rate": 3.69e-07, "loss": 2.3966, "step": 240000 }, { "epoch": 5.05, "eval_loss": 2.254256248474121, "eval_runtime": 48.4211, "eval_samples_per_second": 826.087, "eval_steps_per_second": 51.63, "step": 240000 }, { "epoch": 5.22, "eval_loss": 2.2767629623413086, "eval_runtime": 48.0495, "eval_samples_per_second": 832.475, "eval_steps_per_second": 52.03, "step": 248000 }, { "epoch": 5.39, "learning_rate": 3.6626666666666664e-07, "loss": 2.3902, "step": 256000 }, { "epoch": 5.39, "eval_loss": 2.255053997039795, "eval_runtime": 49.0256, "eval_samples_per_second": 815.901, "eval_steps_per_second": 50.994, "step": 256000 }, { "epoch": 5.56, "eval_loss": 2.278200626373291, "eval_runtime": 48.1284, "eval_samples_per_second": 831.11, "eval_steps_per_second": 51.944, "step": 264000 }, { "epoch": 5.73, "learning_rate": 3.6353333333333335e-07, "loss": 2.3906, "step": 272000 }, { "epoch": 5.73, "eval_loss": 2.2639424800872803, "eval_runtime": 48.2031, "eval_samples_per_second": 829.822, "eval_steps_per_second": 51.864, "step": 272000 }, { "epoch": 5.89, "eval_loss": 2.2584660053253174, "eval_runtime": 48.1979, "eval_samples_per_second": 829.911, "eval_steps_per_second": 51.869, "step": 280000 }, { "epoch": 6.06, "learning_rate": 3.608e-07, "loss": 2.3849, "step": 288000 }, { "epoch": 6.06, "eval_loss": 2.254004955291748, "eval_runtime": 48.3611, "eval_samples_per_second": 827.11, "eval_steps_per_second": 51.694, "step": 288000 }, { "epoch": 6.23, "eval_loss": 2.274897813796997, "eval_runtime": 48.7282, "eval_samples_per_second": 820.88, "eval_steps_per_second": 51.305, "step": 296000 }, { "epoch": 6.4, "learning_rate": 3.5806666666666666e-07, "loss": 2.3805, "step": 304000 }, { "epoch": 6.4, "eval_loss": 2.250277042388916, "eval_runtime": 48.3341, "eval_samples_per_second": 827.574, "eval_steps_per_second": 51.723, "step": 304000 }, { "epoch": 6.57, "eval_loss": 2.273930311203003, "eval_runtime": 47.7431, "eval_samples_per_second": 837.818, "eval_steps_per_second": 52.364, "step": 312000 }, { "epoch": 6.74, "learning_rate": 3.553333333333333e-07, "loss": 2.3873, "step": 320000 }, { "epoch": 6.74, "eval_loss": 2.254091739654541, "eval_runtime": 48.6038, "eval_samples_per_second": 822.98, "eval_steps_per_second": 51.436, "step": 320000 }, { "epoch": 6.91, "eval_loss": 2.2511613368988037, "eval_runtime": 48.0895, "eval_samples_per_second": 831.783, "eval_steps_per_second": 51.986, "step": 328000 }, { "epoch": 7.07, "learning_rate": 3.5259999999999997e-07, "loss": 2.3942, "step": 336000 }, { "epoch": 7.07, "eval_loss": 2.2594754695892334, "eval_runtime": 48.3413, "eval_samples_per_second": 827.45, "eval_steps_per_second": 51.716, "step": 336000 }, { "epoch": 7.24, "eval_loss": 2.260305166244507, "eval_runtime": 48.7128, "eval_samples_per_second": 821.14, "eval_steps_per_second": 51.321, "step": 344000 }, { "epoch": 7.41, "learning_rate": 3.498666666666667e-07, "loss": 2.386, "step": 352000 }, { "epoch": 7.41, "eval_loss": 2.257478713989258, "eval_runtime": 48.858, "eval_samples_per_second": 818.698, "eval_steps_per_second": 51.169, "step": 352000 }, { "epoch": 7.58, "eval_loss": 2.2788634300231934, "eval_runtime": 48.3058, "eval_samples_per_second": 828.059, "eval_steps_per_second": 51.754, "step": 360000 }, { "epoch": 7.75, "learning_rate": 3.4713333333333333e-07, "loss": 2.3806, "step": 368000 }, { "epoch": 7.75, "eval_loss": 2.265003204345703, "eval_runtime": 48.6794, "eval_samples_per_second": 821.703, "eval_steps_per_second": 51.356, "step": 368000 }, { "epoch": 7.92, "eval_loss": 2.2706291675567627, "eval_runtime": 48.2232, "eval_samples_per_second": 829.476, "eval_steps_per_second": 51.842, "step": 376000 }, { "epoch": 8.08, "learning_rate": 3.444e-07, "loss": 2.3883, "step": 384000 }, { "epoch": 8.08, "eval_loss": 2.2652194499969482, "eval_runtime": 48.2989, "eval_samples_per_second": 828.177, "eval_steps_per_second": 51.761, "step": 384000 }, { "epoch": 8.25, "eval_loss": 2.2540171146392822, "eval_runtime": 48.1211, "eval_samples_per_second": 831.236, "eval_steps_per_second": 51.952, "step": 392000 }, { "epoch": 8.42, "learning_rate": 3.416666666666667e-07, "loss": 2.3922, "step": 400000 }, { "epoch": 8.42, "eval_loss": 2.268253803253174, "eval_runtime": 48.5611, "eval_samples_per_second": 823.704, "eval_steps_per_second": 51.481, "step": 400000 }, { "epoch": 8.59, "eval_loss": 2.263794422149658, "eval_runtime": 48.7481, "eval_samples_per_second": 820.544, "eval_steps_per_second": 51.284, "step": 408000 }, { "epoch": 8.76, "learning_rate": 3.3893333333333335e-07, "loss": 2.3887, "step": 416000 }, { "epoch": 8.76, "eval_loss": 2.253530502319336, "eval_runtime": 48.7981, "eval_samples_per_second": 819.704, "eval_steps_per_second": 51.232, "step": 416000 }, { "epoch": 8.93, "eval_loss": 2.2529492378234863, "eval_runtime": 48.6536, "eval_samples_per_second": 822.139, "eval_steps_per_second": 51.384, "step": 424000 }, { "epoch": 9.09, "learning_rate": 3.3619999999999995e-07, "loss": 2.3818, "step": 432000 }, { "epoch": 9.09, "eval_loss": 2.248337507247925, "eval_runtime": 48.2543, "eval_samples_per_second": 828.942, "eval_steps_per_second": 51.809, "step": 432000 }, { "epoch": 9.26, "eval_loss": 2.2573952674865723, "eval_runtime": 48.2027, "eval_samples_per_second": 829.829, "eval_steps_per_second": 51.864, "step": 440000 }, { "epoch": 9.43, "learning_rate": 3.3346666666666666e-07, "loss": 2.387, "step": 448000 }, { "epoch": 9.43, "eval_loss": 2.2624008655548096, "eval_runtime": 48.2217, "eval_samples_per_second": 829.502, "eval_steps_per_second": 51.844, "step": 448000 }, { "epoch": 9.6, "eval_loss": 2.26644229888916, "eval_runtime": 48.04, "eval_samples_per_second": 832.64, "eval_steps_per_second": 52.04, "step": 456000 }, { "epoch": 9.77, "learning_rate": 3.307333333333333e-07, "loss": 2.3839, "step": 464000 }, { "epoch": 9.77, "eval_loss": 2.257237195968628, "eval_runtime": 48.887, "eval_samples_per_second": 818.214, "eval_steps_per_second": 51.138, "step": 464000 }, { "epoch": 9.94, "eval_loss": 2.252383232116699, "eval_runtime": 48.1755, "eval_samples_per_second": 830.298, "eval_steps_per_second": 51.894, "step": 472000 }, { "epoch": 10.11, "learning_rate": 3.28e-07, "loss": 2.3901, "step": 480000 }, { "epoch": 10.11, "eval_loss": 2.2532765865325928, "eval_runtime": 48.1778, "eval_samples_per_second": 830.259, "eval_steps_per_second": 51.891, "step": 480000 }, { "epoch": 10.27, "eval_loss": 2.250092029571533, "eval_runtime": 48.8687, "eval_samples_per_second": 818.52, "eval_steps_per_second": 51.157, "step": 488000 }, { "epoch": 10.44, "learning_rate": 3.252666666666667e-07, "loss": 2.382, "step": 496000 }, { "epoch": 10.44, "eval_loss": 2.266896963119507, "eval_runtime": 48.918, "eval_samples_per_second": 817.695, "eval_steps_per_second": 51.106, "step": 496000 }, { "epoch": 10.61, "eval_loss": 2.259631395339966, "eval_runtime": 48.6846, "eval_samples_per_second": 821.614, "eval_steps_per_second": 51.351, "step": 504000 }, { "epoch": 10.78, "learning_rate": 3.2253333333333334e-07, "loss": 2.3829, "step": 512000 }, { "epoch": 10.78, "eval_loss": 2.270488977432251, "eval_runtime": 49.0149, "eval_samples_per_second": 816.079, "eval_steps_per_second": 51.005, "step": 512000 }, { "epoch": 10.95, "eval_loss": 2.2553160190582275, "eval_runtime": 48.2406, "eval_samples_per_second": 829.177, "eval_steps_per_second": 51.824, "step": 520000 }, { "epoch": 11.12, "learning_rate": 3.198e-07, "loss": 2.3963, "step": 528000 }, { "epoch": 11.12, "eval_loss": 2.2741470336914062, "eval_runtime": 48.1623, "eval_samples_per_second": 830.524, "eval_steps_per_second": 51.908, "step": 528000 }, { "epoch": 11.28, "eval_loss": 2.266427993774414, "eval_runtime": 48.1335, "eval_samples_per_second": 831.022, "eval_steps_per_second": 51.939, "step": 536000 }, { "epoch": 11.45, "learning_rate": 3.1706666666666665e-07, "loss": 2.3843, "step": 544000 }, { "epoch": 11.45, "eval_loss": 2.2532401084899902, "eval_runtime": 48.2869, "eval_samples_per_second": 828.382, "eval_steps_per_second": 51.774, "step": 544000 }, { "epoch": 11.62, "eval_loss": 2.2719969749450684, "eval_runtime": 48.3879, "eval_samples_per_second": 826.653, "eval_steps_per_second": 51.666, "step": 552000 }, { "epoch": 11.79, "learning_rate": 3.1433333333333336e-07, "loss": 2.3853, "step": 560000 }, { "epoch": 11.79, "eval_loss": 2.2532143592834473, "eval_runtime": 48.6521, "eval_samples_per_second": 822.164, "eval_steps_per_second": 51.385, "step": 560000 }, { "epoch": 11.96, "eval_loss": 2.2699735164642334, "eval_runtime": 48.0759, "eval_samples_per_second": 832.017, "eval_steps_per_second": 52.001, "step": 568000 }, { "epoch": 12.13, "learning_rate": 3.116e-07, "loss": 2.3907, "step": 576000 }, { "epoch": 12.13, "eval_loss": 2.257100820541382, "eval_runtime": 48.264, "eval_samples_per_second": 828.776, "eval_steps_per_second": 51.798, "step": 576000 }, { "epoch": 12.29, "eval_loss": 2.252300500869751, "eval_runtime": 48.085, "eval_samples_per_second": 831.86, "eval_steps_per_second": 51.991, "step": 584000 }, { "epoch": 12.46, "learning_rate": 3.0886666666666667e-07, "loss": 2.3865, "step": 592000 }, { "epoch": 12.46, "eval_loss": 2.245786428451538, "eval_runtime": 48.9081, "eval_samples_per_second": 817.861, "eval_steps_per_second": 51.116, "step": 592000 }, { "epoch": 12.63, "eval_loss": 2.264732599258423, "eval_runtime": 48.1262, "eval_samples_per_second": 831.149, "eval_steps_per_second": 51.947, "step": 600000 }, { "epoch": 12.8, "learning_rate": 3.061333333333333e-07, "loss": 2.3827, "step": 608000 }, { "epoch": 12.8, "eval_loss": 2.2490034103393555, "eval_runtime": 48.1786, "eval_samples_per_second": 830.244, "eval_steps_per_second": 51.89, "step": 608000 }, { "epoch": 12.97, "eval_loss": 2.2623653411865234, "eval_runtime": 48.6642, "eval_samples_per_second": 821.959, "eval_steps_per_second": 51.372, "step": 616000 }, { "epoch": 13.14, "learning_rate": 3.034e-07, "loss": 2.3869, "step": 624000 }, { "epoch": 13.14, "eval_loss": 2.253758430480957, "eval_runtime": 48.2352, "eval_samples_per_second": 829.269, "eval_steps_per_second": 51.829, "step": 624000 }, { "epoch": 13.3, "eval_loss": 2.2357494831085205, "eval_runtime": 47.7426, "eval_samples_per_second": 837.826, "eval_steps_per_second": 52.364, "step": 632000 }, { "epoch": 13.47, "learning_rate": 3.0066666666666663e-07, "loss": 2.3958, "step": 640000 }, { "epoch": 13.47, "eval_loss": 2.2508862018585205, "eval_runtime": 48.884, "eval_samples_per_second": 818.263, "eval_steps_per_second": 51.141, "step": 640000 }, { "epoch": 13.64, "eval_loss": 2.2690088748931885, "eval_runtime": 48.5211, "eval_samples_per_second": 824.384, "eval_steps_per_second": 51.524, "step": 648000 }, { "epoch": 13.81, "learning_rate": 2.9793333333333334e-07, "loss": 2.3852, "step": 656000 }, { "epoch": 13.81, "eval_loss": 2.247575283050537, "eval_runtime": 48.4402, "eval_samples_per_second": 825.761, "eval_steps_per_second": 51.61, "step": 656000 }, { "epoch": 13.98, "eval_loss": 2.272088050842285, "eval_runtime": 48.8783, "eval_samples_per_second": 818.359, "eval_steps_per_second": 51.147, "step": 664000 }, { "epoch": 14.15, "learning_rate": 2.952e-07, "loss": 2.3889, "step": 672000 }, { "epoch": 14.15, "eval_loss": 2.253678560256958, "eval_runtime": 48.7246, "eval_samples_per_second": 820.94, "eval_steps_per_second": 51.309, "step": 672000 }, { "epoch": 14.32, "eval_loss": 2.2723231315612793, "eval_runtime": 48.1926, "eval_samples_per_second": 830.003, "eval_steps_per_second": 51.875, "step": 680000 }, { "epoch": 14.48, "learning_rate": 2.9246666666666665e-07, "loss": 2.3839, "step": 688000 }, { "epoch": 14.48, "eval_loss": 2.2664077281951904, "eval_runtime": 48.493, "eval_samples_per_second": 824.861, "eval_steps_per_second": 51.554, "step": 688000 }, { "epoch": 14.65, "eval_loss": 2.2725985050201416, "eval_runtime": 49.24, "eval_samples_per_second": 812.348, "eval_steps_per_second": 50.772, "step": 696000 }, { "epoch": 14.82, "learning_rate": 2.897333333333333e-07, "loss": 2.3884, "step": 704000 }, { "epoch": 14.82, "eval_loss": 2.265206813812256, "eval_runtime": 49.5431, "eval_samples_per_second": 807.377, "eval_steps_per_second": 50.461, "step": 704000 }, { "epoch": 14.99, "eval_loss": 2.2633461952209473, "eval_runtime": 48.6148, "eval_samples_per_second": 822.795, "eval_steps_per_second": 51.425, "step": 712000 }, { "epoch": 15.16, "learning_rate": 2.8699999999999996e-07, "loss": 2.3827, "step": 720000 }, { "epoch": 15.16, "eval_loss": 2.268095016479492, "eval_runtime": 49.156, "eval_samples_per_second": 813.737, "eval_steps_per_second": 50.859, "step": 720000 }, { "epoch": 15.33, "eval_loss": 2.2542901039123535, "eval_runtime": 48.9808, "eval_samples_per_second": 816.646, "eval_steps_per_second": 51.04, "step": 728000 }, { "epoch": 15.49, "learning_rate": 2.8426666666666667e-07, "loss": 2.3861, "step": 736000 }, { "epoch": 15.49, "eval_loss": 2.2634222507476807, "eval_runtime": 48.9041, "eval_samples_per_second": 817.927, "eval_steps_per_second": 51.12, "step": 736000 }, { "epoch": 15.66, "eval_loss": 2.2706964015960693, "eval_runtime": 49.2314, "eval_samples_per_second": 812.49, "eval_steps_per_second": 50.781, "step": 744000 }, { "epoch": 15.83, "learning_rate": 2.815333333333333e-07, "loss": 2.3812, "step": 752000 }, { "epoch": 15.83, "eval_loss": 2.2575085163116455, "eval_runtime": 48.6081, "eval_samples_per_second": 822.908, "eval_steps_per_second": 51.432, "step": 752000 }, { "epoch": 16.0, "eval_loss": 2.2549245357513428, "eval_runtime": 48.9973, "eval_samples_per_second": 816.371, "eval_steps_per_second": 51.023, "step": 760000 }, { "epoch": 16.17, "learning_rate": 2.7880000000000003e-07, "loss": 2.3862, "step": 768000 }, { "epoch": 16.17, "eval_loss": 2.244624614715576, "eval_runtime": 49.0122, "eval_samples_per_second": 816.123, "eval_steps_per_second": 51.008, "step": 768000 }, { "epoch": 16.34, "eval_loss": 2.261697769165039, "eval_runtime": 48.9467, "eval_samples_per_second": 817.216, "eval_steps_per_second": 51.076, "step": 776000 }, { "epoch": 16.5, "learning_rate": 2.7606666666666664e-07, "loss": 2.3859, "step": 784000 }, { "epoch": 16.5, "eval_loss": 2.250509023666382, "eval_runtime": 49.5476, "eval_samples_per_second": 807.304, "eval_steps_per_second": 50.457, "step": 784000 }, { "epoch": 16.67, "eval_loss": 2.271986722946167, "eval_runtime": 48.2956, "eval_samples_per_second": 828.232, "eval_steps_per_second": 51.765, "step": 792000 }, { "epoch": 16.84, "learning_rate": 2.733333333333333e-07, "loss": 2.3873, "step": 800000 }, { "epoch": 16.84, "eval_loss": 2.252095937728882, "eval_runtime": 48.4511, "eval_samples_per_second": 825.575, "eval_steps_per_second": 51.598, "step": 800000 }, { "epoch": 17.01, "eval_loss": 2.254298448562622, "eval_runtime": 49.0762, "eval_samples_per_second": 815.06, "eval_steps_per_second": 50.941, "step": 808000 }, { "epoch": 17.18, "learning_rate": 2.706e-07, "loss": 2.381, "step": 816000 }, { "epoch": 17.18, "eval_loss": 2.267543077468872, "eval_runtime": 48.6635, "eval_samples_per_second": 821.971, "eval_steps_per_second": 51.373, "step": 816000 }, { "epoch": 17.35, "eval_loss": 2.254502296447754, "eval_runtime": 49.6829, "eval_samples_per_second": 805.106, "eval_steps_per_second": 50.319, "step": 824000 }, { "epoch": 17.52, "learning_rate": 2.6786666666666666e-07, "loss": 2.3851, "step": 832000 }, { "epoch": 17.52, "eval_loss": 2.2488667964935303, "eval_runtime": 48.7936, "eval_samples_per_second": 819.78, "eval_steps_per_second": 51.236, "step": 832000 }, { "epoch": 17.68, "eval_loss": 2.2605700492858887, "eval_runtime": 49.2213, "eval_samples_per_second": 812.656, "eval_steps_per_second": 50.791, "step": 840000 }, { "epoch": 17.85, "learning_rate": 2.651333333333333e-07, "loss": 2.3878, "step": 848000 }, { "epoch": 17.85, "eval_loss": 2.2579894065856934, "eval_runtime": 49.0096, "eval_samples_per_second": 816.167, "eval_steps_per_second": 51.01, "step": 848000 }, { "epoch": 18.02, "eval_loss": 2.2604382038116455, "eval_runtime": 48.2778, "eval_samples_per_second": 828.538, "eval_steps_per_second": 51.784, "step": 856000 }, { "epoch": 18.19, "learning_rate": 2.624e-07, "loss": 2.3812, "step": 864000 }, { "epoch": 18.19, "eval_loss": 2.2630739212036133, "eval_runtime": 48.321, "eval_samples_per_second": 827.798, "eval_steps_per_second": 51.737, "step": 864000 }, { "epoch": 18.36, "eval_loss": 2.250539541244507, "eval_runtime": 48.9155, "eval_samples_per_second": 817.737, "eval_steps_per_second": 51.109, "step": 872000 }, { "epoch": 18.53, "learning_rate": 2.596666666666667e-07, "loss": 2.3849, "step": 880000 }, { "epoch": 18.53, "eval_loss": 2.2657594680786133, "eval_runtime": 48.513, "eval_samples_per_second": 824.521, "eval_steps_per_second": 51.533, "step": 880000 }, { "epoch": 18.69, "eval_loss": 2.2566869258880615, "eval_runtime": 49.2473, "eval_samples_per_second": 812.227, "eval_steps_per_second": 50.764, "step": 888000 }, { "epoch": 18.86, "learning_rate": 2.5693333333333333e-07, "loss": 2.3833, "step": 896000 }, { "epoch": 18.86, "eval_loss": 2.2533113956451416, "eval_runtime": 48.9908, "eval_samples_per_second": 816.479, "eval_steps_per_second": 51.03, "step": 896000 }, { "epoch": 19.03, "eval_loss": 2.2455687522888184, "eval_runtime": 49.5558, "eval_samples_per_second": 807.17, "eval_steps_per_second": 50.448, "step": 904000 }, { "epoch": 19.2, "learning_rate": 2.542e-07, "loss": 2.3847, "step": 912000 }, { "epoch": 19.2, "eval_loss": 2.253338098526001, "eval_runtime": 49.2167, "eval_samples_per_second": 812.733, "eval_steps_per_second": 50.796, "step": 912000 }, { "epoch": 19.37, "eval_loss": 2.257462739944458, "eval_runtime": 48.4436, "eval_samples_per_second": 825.703, "eval_steps_per_second": 51.606, "step": 920000 }, { "epoch": 19.54, "learning_rate": 2.5146666666666664e-07, "loss": 2.3869, "step": 928000 }, { "epoch": 19.54, "eval_loss": 2.2667601108551025, "eval_runtime": 49.3388, "eval_samples_per_second": 810.721, "eval_steps_per_second": 50.67, "step": 928000 }, { "epoch": 19.7, "eval_loss": 2.2598884105682373, "eval_runtime": 48.9927, "eval_samples_per_second": 816.449, "eval_steps_per_second": 51.028, "step": 936000 }, { "epoch": 19.87, "learning_rate": 2.4873333333333335e-07, "loss": 2.3867, "step": 944000 }, { "epoch": 19.87, "eval_loss": 2.2680182456970215, "eval_runtime": 48.4318, "eval_samples_per_second": 825.904, "eval_steps_per_second": 51.619, "step": 944000 }, { "epoch": 20.04, "eval_loss": 2.2669413089752197, "eval_runtime": 50.205, "eval_samples_per_second": 796.733, "eval_steps_per_second": 49.796, "step": 952000 }, { "epoch": 20.21, "learning_rate": 2.46e-07, "loss": 2.3942, "step": 960000 }, { "epoch": 20.21, "eval_loss": 2.2482852935791016, "eval_runtime": 48.9393, "eval_samples_per_second": 817.34, "eval_steps_per_second": 51.084, "step": 960000 }, { "epoch": 20.38, "eval_loss": 2.273371934890747, "eval_runtime": 48.5937, "eval_samples_per_second": 823.152, "eval_steps_per_second": 51.447, "step": 968000 }, { "epoch": 20.55, "learning_rate": 2.4326666666666666e-07, "loss": 2.3863, "step": 976000 }, { "epoch": 20.55, "eval_loss": 2.262270212173462, "eval_runtime": 48.5495, "eval_samples_per_second": 823.902, "eval_steps_per_second": 51.494, "step": 976000 }, { "epoch": 20.72, "eval_loss": 2.264986038208008, "eval_runtime": 48.8999, "eval_samples_per_second": 817.998, "eval_steps_per_second": 51.125, "step": 984000 }, { "epoch": 20.88, "learning_rate": 2.405333333333333e-07, "loss": 2.3924, "step": 992000 }, { "epoch": 20.88, "eval_loss": 2.2602696418762207, "eval_runtime": 49.2404, "eval_samples_per_second": 812.341, "eval_steps_per_second": 50.771, "step": 992000 }, { "epoch": 21.05, "eval_loss": 2.2708377838134766, "eval_runtime": 49.0675, "eval_samples_per_second": 815.204, "eval_steps_per_second": 50.95, "step": 1000000 }, { "epoch": 21.22, "learning_rate": 2.3779999999999997e-07, "loss": 2.3871, "step": 1008000 }, { "epoch": 21.22, "eval_loss": 2.2512402534484863, "eval_runtime": 48.7719, "eval_samples_per_second": 820.144, "eval_steps_per_second": 51.259, "step": 1008000 }, { "epoch": 21.39, "eval_loss": 2.2567834854125977, "eval_runtime": 49.1119, "eval_samples_per_second": 814.466, "eval_steps_per_second": 50.904, "step": 1016000 }, { "epoch": 21.56, "learning_rate": 2.3506666666666668e-07, "loss": 2.3827, "step": 1024000 }, { "epoch": 21.56, "eval_loss": 2.2676033973693848, "eval_runtime": 49.0505, "eval_samples_per_second": 815.486, "eval_steps_per_second": 50.968, "step": 1024000 }, { "epoch": 21.73, "eval_loss": 2.271024465560913, "eval_runtime": 49.4407, "eval_samples_per_second": 809.05, "eval_steps_per_second": 50.566, "step": 1032000 }, { "epoch": 21.89, "learning_rate": 2.3233333333333334e-07, "loss": 2.3799, "step": 1040000 }, { "epoch": 21.89, "eval_loss": 2.2804324626922607, "eval_runtime": 49.138, "eval_samples_per_second": 814.034, "eval_steps_per_second": 50.877, "step": 1040000 }, { "epoch": 22.06, "eval_loss": 2.2498600482940674, "eval_runtime": 48.6186, "eval_samples_per_second": 822.731, "eval_steps_per_second": 51.421, "step": 1048000 }, { "epoch": 22.23, "learning_rate": 2.2960000000000002e-07, "loss": 2.3863, "step": 1056000 }, { "epoch": 22.23, "eval_loss": 2.2556710243225098, "eval_runtime": 49.7999, "eval_samples_per_second": 803.214, "eval_steps_per_second": 50.201, "step": 1056000 }, { "epoch": 22.4, "eval_loss": 2.2603883743286133, "eval_runtime": 49.3365, "eval_samples_per_second": 810.759, "eval_steps_per_second": 50.672, "step": 1064000 }, { "epoch": 22.57, "learning_rate": 2.2686666666666667e-07, "loss": 2.3858, "step": 1072000 }, { "epoch": 22.57, "eval_loss": 2.2832398414611816, "eval_runtime": 48.6796, "eval_samples_per_second": 821.699, "eval_steps_per_second": 51.356, "step": 1072000 }, { "epoch": 22.74, "eval_loss": 2.244276285171509, "eval_runtime": 48.7816, "eval_samples_per_second": 819.982, "eval_steps_per_second": 51.249, "step": 1080000 }, { "epoch": 22.9, "learning_rate": 2.2413333333333333e-07, "loss": 2.3859, "step": 1088000 }, { "epoch": 22.9, "eval_loss": 2.260357141494751, "eval_runtime": 49.4857, "eval_samples_per_second": 808.315, "eval_steps_per_second": 50.52, "step": 1088000 }, { "epoch": 23.07, "eval_loss": 2.263144016265869, "eval_runtime": 48.7635, "eval_samples_per_second": 820.285, "eval_steps_per_second": 51.268, "step": 1096000 }, { "epoch": 23.24, "learning_rate": 2.214e-07, "loss": 2.3846, "step": 1104000 }, { "epoch": 23.24, "eval_loss": 2.2689881324768066, "eval_runtime": 48.7943, "eval_samples_per_second": 819.768, "eval_steps_per_second": 51.235, "step": 1104000 }, { "epoch": 23.41, "eval_loss": 2.25949764251709, "eval_runtime": 48.4368, "eval_samples_per_second": 825.819, "eval_steps_per_second": 51.614, "step": 1112000 }, { "epoch": 23.58, "learning_rate": 2.1866666666666667e-07, "loss": 2.3887, "step": 1120000 }, { "epoch": 23.58, "eval_loss": 2.2500855922698975, "eval_runtime": 48.8931, "eval_samples_per_second": 818.111, "eval_steps_per_second": 51.132, "step": 1120000 }, { "epoch": 23.75, "eval_loss": 2.2532594203948975, "eval_runtime": 48.868, "eval_samples_per_second": 818.532, "eval_steps_per_second": 51.158, "step": 1128000 }, { "epoch": 23.92, "learning_rate": 2.1593333333333332e-07, "loss": 2.3856, "step": 1136000 }, { "epoch": 23.92, "eval_loss": 2.252855062484741, "eval_runtime": 49.6155, "eval_samples_per_second": 806.199, "eval_steps_per_second": 50.387, "step": 1136000 }, { "epoch": 24.08, "eval_loss": 2.2455570697784424, "eval_runtime": 48.8925, "eval_samples_per_second": 818.121, "eval_steps_per_second": 51.133, "step": 1144000 }, { "epoch": 24.25, "learning_rate": 2.132e-07, "loss": 2.3856, "step": 1152000 }, { "epoch": 24.25, "eval_loss": 2.254368782043457, "eval_runtime": 49.1923, "eval_samples_per_second": 813.135, "eval_steps_per_second": 50.821, "step": 1152000 }, { "epoch": 24.42, "eval_loss": 2.25541090965271, "eval_runtime": 48.5868, "eval_samples_per_second": 823.268, "eval_steps_per_second": 51.454, "step": 1160000 }, { "epoch": 24.59, "learning_rate": 2.1046666666666666e-07, "loss": 2.3867, "step": 1168000 }, { "epoch": 24.59, "eval_loss": 2.2595579624176025, "eval_runtime": 49.0442, "eval_samples_per_second": 815.591, "eval_steps_per_second": 50.974, "step": 1168000 }, { "epoch": 24.76, "eval_loss": 2.252202033996582, "eval_runtime": 49.5166, "eval_samples_per_second": 807.81, "eval_steps_per_second": 50.488, "step": 1176000 }, { "epoch": 24.93, "learning_rate": 2.0773333333333334e-07, "loss": 2.3795, "step": 1184000 }, { "epoch": 24.93, "eval_loss": 2.249300241470337, "eval_runtime": 49.7952, "eval_samples_per_second": 803.29, "eval_steps_per_second": 50.206, "step": 1184000 }, { "epoch": 25.09, "eval_loss": 2.2608890533447266, "eval_runtime": 48.681, "eval_samples_per_second": 821.675, "eval_steps_per_second": 51.355, "step": 1192000 }, { "epoch": 25.26, "learning_rate": 2.05e-07, "loss": 2.3926, "step": 1200000 }, { "epoch": 25.26, "eval_loss": 2.2658445835113525, "eval_runtime": 48.9256, "eval_samples_per_second": 817.569, "eval_steps_per_second": 51.098, "step": 1200000 }, { "epoch": 25.43, "eval_loss": 2.2592995166778564, "eval_runtime": 48.8774, "eval_samples_per_second": 818.374, "eval_steps_per_second": 51.148, "step": 1208000 }, { "epoch": 25.6, "learning_rate": 2.0226666666666668e-07, "loss": 2.3887, "step": 1216000 }, { "epoch": 25.6, "eval_loss": 2.2703697681427, "eval_runtime": 48.9056, "eval_samples_per_second": 817.902, "eval_steps_per_second": 51.119, "step": 1216000 }, { "epoch": 25.77, "eval_loss": 2.263197183609009, "eval_runtime": 49.6098, "eval_samples_per_second": 806.292, "eval_steps_per_second": 50.393, "step": 1224000 }, { "epoch": 25.94, "learning_rate": 1.9953333333333333e-07, "loss": 2.3926, "step": 1232000 }, { "epoch": 25.94, "eval_loss": 2.2628068923950195, "eval_runtime": 49.6731, "eval_samples_per_second": 805.265, "eval_steps_per_second": 50.329, "step": 1232000 }, { "epoch": 26.1, "eval_loss": 2.2656562328338623, "eval_runtime": 49.8004, "eval_samples_per_second": 803.207, "eval_steps_per_second": 50.2, "step": 1240000 }, { "epoch": 26.27, "learning_rate": 1.968e-07, "loss": 2.3809, "step": 1248000 }, { "epoch": 26.27, "eval_loss": 2.2545762062072754, "eval_runtime": 49.3811, "eval_samples_per_second": 810.026, "eval_steps_per_second": 50.627, "step": 1248000 }, { "epoch": 26.44, "eval_loss": 2.259634017944336, "eval_runtime": 49.4215, "eval_samples_per_second": 809.365, "eval_steps_per_second": 50.585, "step": 1256000 }, { "epoch": 26.61, "learning_rate": 1.9406666666666667e-07, "loss": 2.3878, "step": 1264000 }, { "epoch": 26.61, "eval_loss": 2.254516124725342, "eval_runtime": 48.3, "eval_samples_per_second": 828.158, "eval_steps_per_second": 51.76, "step": 1264000 }, { "epoch": 26.78, "eval_loss": 2.2667646408081055, "eval_runtime": 49.742, "eval_samples_per_second": 804.149, "eval_steps_per_second": 50.259, "step": 1272000 }, { "epoch": 26.95, "learning_rate": 1.9133333333333333e-07, "loss": 2.3861, "step": 1280000 }, { "epoch": 26.95, "eval_loss": 2.2534382343292236, "eval_runtime": 48.5643, "eval_samples_per_second": 823.65, "eval_steps_per_second": 51.478, "step": 1280000 }, { "epoch": 27.12, "eval_loss": 2.261183738708496, "eval_runtime": 49.4288, "eval_samples_per_second": 809.246, "eval_steps_per_second": 50.578, "step": 1288000 }, { "epoch": 27.28, "learning_rate": 1.886e-07, "loss": 2.3815, "step": 1296000 }, { "epoch": 27.28, "eval_loss": 2.2441422939300537, "eval_runtime": 48.9537, "eval_samples_per_second": 817.099, "eval_steps_per_second": 51.069, "step": 1296000 }, { "epoch": 27.45, "eval_loss": 2.271397590637207, "eval_runtime": 49.5135, "eval_samples_per_second": 807.861, "eval_steps_per_second": 50.491, "step": 1304000 }, { "epoch": 27.62, "learning_rate": 1.8586666666666666e-07, "loss": 2.3861, "step": 1312000 }, { "epoch": 27.62, "eval_loss": 2.2604434490203857, "eval_runtime": 48.816, "eval_samples_per_second": 819.403, "eval_steps_per_second": 51.213, "step": 1312000 }, { "epoch": 27.79, "eval_loss": 2.2535157203674316, "eval_runtime": 49.1172, "eval_samples_per_second": 814.378, "eval_steps_per_second": 50.899, "step": 1320000 }, { "epoch": 27.96, "learning_rate": 1.8313333333333332e-07, "loss": 2.388, "step": 1328000 }, { "epoch": 27.96, "eval_loss": 2.2466070652008057, "eval_runtime": 48.6499, "eval_samples_per_second": 822.201, "eval_steps_per_second": 51.388, "step": 1328000 }, { "epoch": 28.13, "eval_loss": 2.258121967315674, "eval_runtime": 48.6067, "eval_samples_per_second": 822.932, "eval_steps_per_second": 51.433, "step": 1336000 }, { "epoch": 28.29, "learning_rate": 1.804e-07, "loss": 2.3864, "step": 1344000 }, { "epoch": 28.29, "eval_loss": 2.257232904434204, "eval_runtime": 49.3463, "eval_samples_per_second": 810.598, "eval_steps_per_second": 50.662, "step": 1344000 }, { "epoch": 28.46, "eval_loss": 2.238109827041626, "eval_runtime": 48.9872, "eval_samples_per_second": 816.539, "eval_steps_per_second": 51.034, "step": 1352000 }, { "epoch": 28.63, "learning_rate": 1.7766666666666666e-07, "loss": 2.39, "step": 1360000 }, { "epoch": 28.63, "eval_loss": 2.23980712890625, "eval_runtime": 49.3711, "eval_samples_per_second": 810.191, "eval_steps_per_second": 50.637, "step": 1360000 }, { "epoch": 28.8, "eval_loss": 2.269519805908203, "eval_runtime": 49.3307, "eval_samples_per_second": 810.853, "eval_steps_per_second": 50.678, "step": 1368000 }, { "epoch": 28.97, "learning_rate": 1.7493333333333334e-07, "loss": 2.39, "step": 1376000 }, { "epoch": 28.97, "eval_loss": 2.262801170349121, "eval_runtime": 49.0301, "eval_samples_per_second": 815.825, "eval_steps_per_second": 50.989, "step": 1376000 }, { "epoch": 29.14, "eval_loss": 2.2599363327026367, "eval_runtime": 49.1614, "eval_samples_per_second": 813.647, "eval_steps_per_second": 50.853, "step": 1384000 }, { "epoch": 29.3, "learning_rate": 1.722e-07, "loss": 2.3804, "step": 1392000 }, { "epoch": 29.3, "eval_loss": 2.262774705886841, "eval_runtime": 49.576, "eval_samples_per_second": 806.842, "eval_steps_per_second": 50.428, "step": 1392000 }, { "epoch": 29.47, "eval_loss": 2.2721939086914062, "eval_runtime": 48.6201, "eval_samples_per_second": 822.705, "eval_steps_per_second": 51.419, "step": 1400000 }, { "epoch": 29.64, "learning_rate": 1.6946666666666668e-07, "loss": 2.3858, "step": 1408000 }, { "epoch": 29.64, "eval_loss": 2.24898099899292, "eval_runtime": 49.0807, "eval_samples_per_second": 814.983, "eval_steps_per_second": 50.936, "step": 1408000 }, { "epoch": 29.81, "eval_loss": 2.262730360031128, "eval_runtime": 49.565, "eval_samples_per_second": 807.021, "eval_steps_per_second": 50.439, "step": 1416000 }, { "epoch": 29.98, "learning_rate": 1.6673333333333333e-07, "loss": 2.3804, "step": 1424000 }, { "epoch": 29.98, "eval_loss": 2.262303113937378, "eval_runtime": 48.6465, "eval_samples_per_second": 822.258, "eval_steps_per_second": 51.391, "step": 1424000 }, { "epoch": 30.15, "eval_loss": 2.252244472503662, "eval_runtime": 49.1968, "eval_samples_per_second": 813.061, "eval_steps_per_second": 50.816, "step": 1432000 }, { "epoch": 30.32, "learning_rate": 1.64e-07, "loss": 2.3834, "step": 1440000 }, { "epoch": 30.32, "eval_loss": 2.2633419036865234, "eval_runtime": 48.625, "eval_samples_per_second": 822.622, "eval_steps_per_second": 51.414, "step": 1440000 }, { "epoch": 30.48, "eval_loss": 2.255260467529297, "eval_runtime": 48.5565, "eval_samples_per_second": 823.782, "eval_steps_per_second": 51.486, "step": 1448000 }, { "epoch": 30.65, "learning_rate": 1.6126666666666667e-07, "loss": 2.3853, "step": 1456000 }, { "epoch": 30.65, "eval_loss": 2.239067554473877, "eval_runtime": 49.1758, "eval_samples_per_second": 813.408, "eval_steps_per_second": 50.838, "step": 1456000 }, { "epoch": 30.82, "eval_loss": 2.2615532875061035, "eval_runtime": 48.6338, "eval_samples_per_second": 822.473, "eval_steps_per_second": 51.405, "step": 1464000 }, { "epoch": 30.99, "learning_rate": 1.5853333333333332e-07, "loss": 2.3946, "step": 1472000 }, { "epoch": 30.99, "eval_loss": 2.2630956172943115, "eval_runtime": 48.6486, "eval_samples_per_second": 822.224, "eval_steps_per_second": 51.389, "step": 1472000 }, { "epoch": 31.16, "eval_loss": 2.2638938426971436, "eval_runtime": 48.6689, "eval_samples_per_second": 821.88, "eval_steps_per_second": 51.367, "step": 1480000 }, { "epoch": 31.33, "learning_rate": 1.558e-07, "loss": 2.385, "step": 1488000 }, { "epoch": 31.33, "eval_loss": 2.27362060546875, "eval_runtime": 49.1717, "eval_samples_per_second": 813.476, "eval_steps_per_second": 50.842, "step": 1488000 }, { "epoch": 31.49, "eval_loss": 2.2715282440185547, "eval_runtime": 48.6068, "eval_samples_per_second": 822.931, "eval_steps_per_second": 51.433, "step": 1496000 }, { "epoch": 31.66, "learning_rate": 1.5306666666666666e-07, "loss": 2.387, "step": 1504000 }, { "epoch": 31.66, "eval_loss": 2.255669116973877, "eval_runtime": 49.2692, "eval_samples_per_second": 811.866, "eval_steps_per_second": 50.742, "step": 1504000 }, { "epoch": 31.83, "eval_loss": 2.258305311203003, "eval_runtime": 49.3922, "eval_samples_per_second": 809.845, "eval_steps_per_second": 50.615, "step": 1512000 }, { "epoch": 32.0, "learning_rate": 1.5033333333333332e-07, "loss": 2.3831, "step": 1520000 }, { "epoch": 32.0, "eval_loss": 2.2543575763702393, "eval_runtime": 48.8562, "eval_samples_per_second": 818.73, "eval_steps_per_second": 51.171, "step": 1520000 }, { "epoch": 32.17, "eval_loss": 2.2756261825561523, "eval_runtime": 48.8463, "eval_samples_per_second": 818.895, "eval_steps_per_second": 51.181, "step": 1528000 }, { "epoch": 32.34, "learning_rate": 1.476e-07, "loss": 2.3835, "step": 1536000 }, { "epoch": 32.34, "eval_loss": 2.2793610095977783, "eval_runtime": 48.7275, "eval_samples_per_second": 820.891, "eval_steps_per_second": 51.306, "step": 1536000 }, { "epoch": 32.5, "eval_loss": 2.2648372650146484, "eval_runtime": 49.3109, "eval_samples_per_second": 811.179, "eval_steps_per_second": 50.699, "step": 1544000 }, { "epoch": 32.67, "learning_rate": 1.4486666666666665e-07, "loss": 2.3857, "step": 1552000 }, { "epoch": 32.67, "eval_loss": 2.2563135623931885, "eval_runtime": 49.7733, "eval_samples_per_second": 803.643, "eval_steps_per_second": 50.228, "step": 1552000 }, { "epoch": 32.84, "eval_loss": 2.2537479400634766, "eval_runtime": 49.2356, "eval_samples_per_second": 812.42, "eval_steps_per_second": 50.776, "step": 1560000 }, { "epoch": 33.01, "learning_rate": 1.4213333333333334e-07, "loss": 2.3856, "step": 1568000 }, { "epoch": 33.01, "eval_loss": 2.261024236679077, "eval_runtime": 49.2157, "eval_samples_per_second": 812.749, "eval_steps_per_second": 50.797, "step": 1568000 }, { "epoch": 33.18, "eval_loss": 2.264604330062866, "eval_runtime": 49.1631, "eval_samples_per_second": 813.618, "eval_steps_per_second": 50.851, "step": 1576000 }, { "epoch": 33.35, "learning_rate": 1.3940000000000002e-07, "loss": 2.3902, "step": 1584000 }, { "epoch": 33.35, "eval_loss": 2.2544610500335693, "eval_runtime": 48.8485, "eval_samples_per_second": 818.858, "eval_steps_per_second": 51.179, "step": 1584000 }, { "epoch": 33.52, "eval_loss": 2.271030902862549, "eval_runtime": 48.3697, "eval_samples_per_second": 826.964, "eval_steps_per_second": 51.685, "step": 1592000 }, { "epoch": 33.68, "learning_rate": 1.3666666666666665e-07, "loss": 2.3897, "step": 1600000 }, { "epoch": 33.68, "eval_loss": 2.2601163387298584, "eval_runtime": 48.8818, "eval_samples_per_second": 818.3, "eval_steps_per_second": 51.144, "step": 1600000 }, { "epoch": 33.85, "eval_loss": 2.2542924880981445, "eval_runtime": 49.0945, "eval_samples_per_second": 814.754, "eval_steps_per_second": 50.922, "step": 1608000 }, { "epoch": 34.02, "learning_rate": 1.3393333333333333e-07, "loss": 2.3866, "step": 1616000 }, { "epoch": 34.02, "eval_loss": 2.2525877952575684, "eval_runtime": 49.2321, "eval_samples_per_second": 812.478, "eval_steps_per_second": 50.78, "step": 1616000 }, { "epoch": 34.19, "eval_loss": 2.262938976287842, "eval_runtime": 49.3213, "eval_samples_per_second": 811.009, "eval_steps_per_second": 50.688, "step": 1624000 }, { "epoch": 34.36, "learning_rate": 1.312e-07, "loss": 2.3823, "step": 1632000 }, { "epoch": 34.36, "eval_loss": 2.2616801261901855, "eval_runtime": 48.9485, "eval_samples_per_second": 817.185, "eval_steps_per_second": 51.074, "step": 1632000 }, { "epoch": 34.53, "eval_loss": 2.2519824504852295, "eval_runtime": 48.6687, "eval_samples_per_second": 821.883, "eval_steps_per_second": 51.368, "step": 1640000 }, { "epoch": 34.69, "learning_rate": 1.2846666666666667e-07, "loss": 2.3874, "step": 1648000 }, { "epoch": 34.69, "eval_loss": 2.261162042617798, "eval_runtime": 48.7567, "eval_samples_per_second": 820.4, "eval_steps_per_second": 51.275, "step": 1648000 }, { "epoch": 34.86, "eval_loss": 2.2568650245666504, "eval_runtime": 48.8018, "eval_samples_per_second": 819.641, "eval_steps_per_second": 51.228, "step": 1656000 }, { "epoch": 35.03, "learning_rate": 1.2573333333333332e-07, "loss": 2.3895, "step": 1664000 }, { "epoch": 35.03, "eval_loss": 2.2633254528045654, "eval_runtime": 48.7101, "eval_samples_per_second": 821.185, "eval_steps_per_second": 51.324, "step": 1664000 }, { "epoch": 35.2, "eval_loss": 2.259277820587158, "eval_runtime": 49.2378, "eval_samples_per_second": 812.384, "eval_steps_per_second": 50.774, "step": 1672000 }, { "epoch": 35.37, "learning_rate": 1.23e-07, "loss": 2.3857, "step": 1680000 }, { "epoch": 35.37, "eval_loss": 2.2650630474090576, "eval_runtime": 48.8949, "eval_samples_per_second": 818.082, "eval_steps_per_second": 51.13, "step": 1680000 }, { "epoch": 35.54, "eval_loss": 2.256744623184204, "eval_runtime": 48.7371, "eval_samples_per_second": 820.73, "eval_steps_per_second": 51.296, "step": 1688000 }, { "epoch": 35.7, "learning_rate": 1.2026666666666666e-07, "loss": 2.3811, "step": 1696000 }, { "epoch": 35.7, "eval_loss": 2.253361701965332, "eval_runtime": 49.5532, "eval_samples_per_second": 807.214, "eval_steps_per_second": 50.451, "step": 1696000 }, { "epoch": 35.87, "eval_loss": 2.263338088989258, "eval_runtime": 49.1414, "eval_samples_per_second": 813.978, "eval_steps_per_second": 50.874, "step": 1704000 }, { "epoch": 36.04, "learning_rate": 1.1753333333333334e-07, "loss": 2.3944, "step": 1712000 }, { "epoch": 36.04, "eval_loss": 2.2504327297210693, "eval_runtime": 49.4998, "eval_samples_per_second": 808.084, "eval_steps_per_second": 50.505, "step": 1712000 }, { "epoch": 36.21, "eval_loss": 2.2518932819366455, "eval_runtime": 49.3816, "eval_samples_per_second": 810.018, "eval_steps_per_second": 50.626, "step": 1720000 }, { "epoch": 36.38, "learning_rate": 1.1480000000000001e-07, "loss": 2.3883, "step": 1728000 }, { "epoch": 36.38, "eval_loss": 2.2571768760681152, "eval_runtime": 49.5997, "eval_samples_per_second": 806.456, "eval_steps_per_second": 50.404, "step": 1728000 }, { "epoch": 36.55, "eval_loss": 2.2575507164001465, "eval_runtime": 49.3457, "eval_samples_per_second": 810.607, "eval_steps_per_second": 50.663, "step": 1736000 }, { "epoch": 36.72, "learning_rate": 1.1206666666666666e-07, "loss": 2.3859, "step": 1744000 }, { "epoch": 36.72, "eval_loss": 2.2719168663024902, "eval_runtime": 48.91, "eval_samples_per_second": 817.828, "eval_steps_per_second": 51.114, "step": 1744000 }, { "epoch": 36.88, "eval_loss": 2.2667555809020996, "eval_runtime": 48.8267, "eval_samples_per_second": 819.223, "eval_steps_per_second": 51.201, "step": 1752000 }, { "epoch": 37.05, "learning_rate": 1.0933333333333333e-07, "loss": 2.3914, "step": 1760000 }, { "epoch": 37.05, "eval_loss": 2.250850200653076, "eval_runtime": 48.8892, "eval_samples_per_second": 818.176, "eval_steps_per_second": 51.136, "step": 1760000 }, { "epoch": 37.22, "eval_loss": 2.2601399421691895, "eval_runtime": 48.8589, "eval_samples_per_second": 818.684, "eval_steps_per_second": 51.168, "step": 1768000 }, { "epoch": 37.39, "learning_rate": 1.066e-07, "loss": 2.3848, "step": 1776000 }, { "epoch": 37.39, "eval_loss": 2.2686824798583984, "eval_runtime": 48.9048, "eval_samples_per_second": 817.915, "eval_steps_per_second": 51.12, "step": 1776000 }, { "epoch": 37.56, "eval_loss": 2.2513012886047363, "eval_runtime": 48.7112, "eval_samples_per_second": 821.166, "eval_steps_per_second": 51.323, "step": 1784000 }, { "epoch": 37.73, "learning_rate": 1.0386666666666667e-07, "loss": 2.3903, "step": 1792000 }, { "epoch": 37.73, "eval_loss": 2.2519407272338867, "eval_runtime": 48.938, "eval_samples_per_second": 817.361, "eval_steps_per_second": 51.085, "step": 1792000 }, { "epoch": 37.89, "eval_loss": 2.259387731552124, "eval_runtime": 49.4041, "eval_samples_per_second": 809.65, "eval_steps_per_second": 50.603, "step": 1800000 }, { "epoch": 38.06, "learning_rate": 1.0113333333333334e-07, "loss": 2.3822, "step": 1808000 }, { "epoch": 38.06, "eval_loss": 2.256521701812744, "eval_runtime": 48.8635, "eval_samples_per_second": 818.606, "eval_steps_per_second": 51.163, "step": 1808000 }, { "epoch": 38.23, "eval_loss": 2.2812252044677734, "eval_runtime": 49.3349, "eval_samples_per_second": 810.785, "eval_steps_per_second": 50.674, "step": 1816000 }, { "epoch": 38.4, "learning_rate": 9.84e-08, "loss": 2.383, "step": 1824000 }, { "epoch": 38.4, "eval_loss": 2.2589097023010254, "eval_runtime": 48.8806, "eval_samples_per_second": 818.32, "eval_steps_per_second": 51.145, "step": 1824000 }, { "epoch": 38.57, "eval_loss": 2.2560157775878906, "eval_runtime": 48.961, "eval_samples_per_second": 816.977, "eval_steps_per_second": 51.061, "step": 1832000 }, { "epoch": 38.74, "learning_rate": 9.566666666666666e-08, "loss": 2.3868, "step": 1840000 }, { "epoch": 38.74, "eval_loss": 2.264800548553467, "eval_runtime": 49.3795, "eval_samples_per_second": 810.053, "eval_steps_per_second": 50.628, "step": 1840000 }, { "epoch": 38.9, "eval_loss": 2.2506866455078125, "eval_runtime": 48.8976, "eval_samples_per_second": 818.037, "eval_steps_per_second": 51.127, "step": 1848000 }, { "epoch": 39.07, "learning_rate": 9.293333333333333e-08, "loss": 2.3775, "step": 1856000 }, { "epoch": 39.07, "eval_loss": 2.2569808959960938, "eval_runtime": 48.9247, "eval_samples_per_second": 817.584, "eval_steps_per_second": 51.099, "step": 1856000 }, { "epoch": 39.24, "eval_loss": 2.2549405097961426, "eval_runtime": 49.5363, "eval_samples_per_second": 807.488, "eval_steps_per_second": 50.468, "step": 1864000 }, { "epoch": 39.41, "learning_rate": 9.02e-08, "loss": 2.3818, "step": 1872000 }, { "epoch": 39.41, "eval_loss": 2.2583167552948, "eval_runtime": 49.5006, "eval_samples_per_second": 808.071, "eval_steps_per_second": 50.504, "step": 1872000 }, { "epoch": 39.58, "eval_loss": 2.261044502258301, "eval_runtime": 49.628, "eval_samples_per_second": 805.997, "eval_steps_per_second": 50.375, "step": 1880000 }, { "epoch": 39.75, "learning_rate": 8.746666666666667e-08, "loss": 2.3887, "step": 1888000 }, { "epoch": 39.75, "eval_loss": 2.262882947921753, "eval_runtime": 49.6245, "eval_samples_per_second": 806.053, "eval_steps_per_second": 50.378, "step": 1888000 }, { "epoch": 39.91, "eval_loss": 2.273881435394287, "eval_runtime": 49.4491, "eval_samples_per_second": 808.913, "eval_steps_per_second": 50.557, "step": 1896000 }, { "epoch": 40.08, "learning_rate": 8.473333333333334e-08, "loss": 2.3893, "step": 1904000 }, { "epoch": 40.08, "eval_loss": 2.2657415866851807, "eval_runtime": 49.1269, "eval_samples_per_second": 814.217, "eval_steps_per_second": 50.889, "step": 1904000 }, { "epoch": 40.25, "eval_loss": 2.2507264614105225, "eval_runtime": 49.5404, "eval_samples_per_second": 807.422, "eval_steps_per_second": 50.464, "step": 1912000 }, { "epoch": 40.42, "learning_rate": 8.2e-08, "loss": 2.3826, "step": 1920000 }, { "epoch": 40.42, "eval_loss": 2.2505505084991455, "eval_runtime": 49.5643, "eval_samples_per_second": 807.033, "eval_steps_per_second": 50.44, "step": 1920000 }, { "epoch": 40.59, "eval_loss": 2.2630043029785156, "eval_runtime": 48.8805, "eval_samples_per_second": 818.322, "eval_steps_per_second": 51.145, "step": 1928000 }, { "epoch": 40.76, "learning_rate": 7.926666666666666e-08, "loss": 2.3842, "step": 1936000 }, { "epoch": 40.76, "eval_loss": 2.27164363861084, "eval_runtime": 49.401, "eval_samples_per_second": 809.701, "eval_steps_per_second": 50.606, "step": 1936000 }, { "epoch": 40.93, "eval_loss": 2.264181613922119, "eval_runtime": 49.4342, "eval_samples_per_second": 809.156, "eval_steps_per_second": 50.572, "step": 1944000 }, { "epoch": 41.09, "learning_rate": 7.653333333333333e-08, "loss": 2.3866, "step": 1952000 }, { "epoch": 41.09, "eval_loss": 2.245126485824585, "eval_runtime": 49.626, "eval_samples_per_second": 806.029, "eval_steps_per_second": 50.377, "step": 1952000 }, { "epoch": 41.26, "eval_loss": 2.2520625591278076, "eval_runtime": 49.5425, "eval_samples_per_second": 807.388, "eval_steps_per_second": 50.462, "step": 1960000 }, { "epoch": 41.43, "learning_rate": 7.38e-08, "loss": 2.3857, "step": 1968000 }, { "epoch": 41.43, "eval_loss": 2.2457118034362793, "eval_runtime": 48.9259, "eval_samples_per_second": 817.562, "eval_steps_per_second": 51.098, "step": 1968000 }, { "epoch": 41.6, "eval_loss": 2.2574808597564697, "eval_runtime": 49.7757, "eval_samples_per_second": 803.605, "eval_steps_per_second": 50.225, "step": 1976000 }, { "epoch": 41.77, "learning_rate": 7.106666666666667e-08, "loss": 2.3943, "step": 1984000 }, { "epoch": 41.77, "eval_loss": 2.265901565551758, "eval_runtime": 49.5814, "eval_samples_per_second": 806.755, "eval_steps_per_second": 50.422, "step": 1984000 }, { "epoch": 41.94, "eval_loss": 2.260754346847534, "eval_runtime": 48.9543, "eval_samples_per_second": 817.089, "eval_steps_per_second": 51.068, "step": 1992000 }, { "epoch": 42.1, "learning_rate": 6.833333333333332e-08, "loss": 2.387, "step": 2000000 }, { "epoch": 42.1, "eval_loss": 2.2687227725982666, "eval_runtime": 49.4379, "eval_samples_per_second": 809.096, "eval_steps_per_second": 50.569, "step": 2000000 }, { "epoch": 42.27, "eval_loss": 2.271784543991089, "eval_runtime": 49.2241, "eval_samples_per_second": 812.611, "eval_steps_per_second": 50.788, "step": 2008000 }, { "epoch": 42.44, "learning_rate": 6.56e-08, "loss": 2.387, "step": 2016000 }, { "epoch": 42.44, "eval_loss": 2.262915849685669, "eval_runtime": 49.9903, "eval_samples_per_second": 800.156, "eval_steps_per_second": 50.01, "step": 2016000 }, { "epoch": 42.61, "eval_loss": 2.2282731533050537, "eval_runtime": 50.2394, "eval_samples_per_second": 796.188, "eval_steps_per_second": 49.762, "step": 2024000 }, { "epoch": 42.78, "learning_rate": 6.286666666666666e-08, "loss": 2.3804, "step": 2032000 }, { "epoch": 42.78, "eval_loss": 2.2422289848327637, "eval_runtime": 49.3037, "eval_samples_per_second": 811.298, "eval_steps_per_second": 50.706, "step": 2032000 }, { "epoch": 42.95, "eval_loss": 2.243112802505493, "eval_runtime": 50.264, "eval_samples_per_second": 795.798, "eval_steps_per_second": 49.737, "step": 2040000 }, { "epoch": 43.11, "learning_rate": 6.013333333333333e-08, "loss": 2.3842, "step": 2048000 }, { "epoch": 43.11, "eval_loss": 2.268855094909668, "eval_runtime": 49.5735, "eval_samples_per_second": 806.883, "eval_steps_per_second": 50.43, "step": 2048000 }, { "epoch": 43.28, "eval_loss": 2.2586092948913574, "eval_runtime": 49.7496, "eval_samples_per_second": 804.027, "eval_steps_per_second": 50.252, "step": 2056000 }, { "epoch": 43.45, "learning_rate": 5.7400000000000004e-08, "loss": 2.3856, "step": 2064000 }, { "epoch": 43.45, "eval_loss": 2.259028434753418, "eval_runtime": 49.8461, "eval_samples_per_second": 802.47, "eval_steps_per_second": 50.154, "step": 2064000 }, { "epoch": 43.62, "eval_loss": 2.2602360248565674, "eval_runtime": 50.6446, "eval_samples_per_second": 789.817, "eval_steps_per_second": 49.364, "step": 2072000 }, { "epoch": 43.79, "learning_rate": 5.4666666666666666e-08, "loss": 2.3843, "step": 2080000 }, { "epoch": 43.79, "eval_loss": 2.255709648132324, "eval_runtime": 49.1117, "eval_samples_per_second": 814.469, "eval_steps_per_second": 50.904, "step": 2080000 }, { "epoch": 43.96, "eval_loss": 2.2776308059692383, "eval_runtime": 49.152, "eval_samples_per_second": 813.801, "eval_steps_per_second": 50.863, "step": 2088000 }, { "epoch": 44.13, "learning_rate": 5.1933333333333335e-08, "loss": 2.3891, "step": 2096000 }, { "epoch": 44.13, "eval_loss": 2.255366325378418, "eval_runtime": 49.1689, "eval_samples_per_second": 813.522, "eval_steps_per_second": 50.845, "step": 2096000 }, { "epoch": 44.29, "eval_loss": 2.2615368366241455, "eval_runtime": 49.9655, "eval_samples_per_second": 800.553, "eval_steps_per_second": 50.035, "step": 2104000 }, { "epoch": 44.46, "learning_rate": 4.92e-08, "loss": 2.3811, "step": 2112000 }, { "epoch": 44.46, "eval_loss": 2.259124517440796, "eval_runtime": 49.6149, "eval_samples_per_second": 806.21, "eval_steps_per_second": 50.388, "step": 2112000 }, { "epoch": 44.63, "eval_loss": 2.259974718093872, "eval_runtime": 48.8534, "eval_samples_per_second": 818.777, "eval_steps_per_second": 51.174, "step": 2120000 }, { "epoch": 44.8, "learning_rate": 4.6466666666666666e-08, "loss": 2.3874, "step": 2128000 }, { "epoch": 44.8, "eval_loss": 2.259488582611084, "eval_runtime": 49.484, "eval_samples_per_second": 808.342, "eval_steps_per_second": 50.521, "step": 2128000 }, { "epoch": 44.97, "eval_loss": 2.2761764526367188, "eval_runtime": 49.5444, "eval_samples_per_second": 807.357, "eval_steps_per_second": 50.46, "step": 2136000 }, { "epoch": 45.14, "learning_rate": 4.3733333333333335e-08, "loss": 2.3822, "step": 2144000 }, { "epoch": 45.14, "eval_loss": 2.2516047954559326, "eval_runtime": 49.0692, "eval_samples_per_second": 815.176, "eval_steps_per_second": 50.948, "step": 2144000 }, { "epoch": 45.3, "eval_loss": 2.2529869079589844, "eval_runtime": 49.0866, "eval_samples_per_second": 814.886, "eval_steps_per_second": 50.93, "step": 2152000 }, { "epoch": 45.47, "learning_rate": 4.1e-08, "loss": 2.3933, "step": 2160000 }, { "epoch": 45.47, "eval_loss": 2.265183210372925, "eval_runtime": 48.9694, "eval_samples_per_second": 816.837, "eval_steps_per_second": 51.052, "step": 2160000 }, { "epoch": 45.64, "eval_loss": 2.2480199337005615, "eval_runtime": 48.9374, "eval_samples_per_second": 817.371, "eval_steps_per_second": 51.086, "step": 2168000 }, { "epoch": 45.81, "learning_rate": 3.8266666666666665e-08, "loss": 2.3853, "step": 2176000 }, { "epoch": 45.81, "eval_loss": 2.2716729640960693, "eval_runtime": 49.0277, "eval_samples_per_second": 815.865, "eval_steps_per_second": 50.992, "step": 2176000 }, { "epoch": 45.98, "eval_loss": 2.2568676471710205, "eval_runtime": 49.4939, "eval_samples_per_second": 808.181, "eval_steps_per_second": 50.511, "step": 2184000 }, { "epoch": 46.15, "learning_rate": 3.5533333333333334e-08, "loss": 2.3917, "step": 2192000 }, { "epoch": 46.15, "eval_loss": 2.2564427852630615, "eval_runtime": 49.3075, "eval_samples_per_second": 811.235, "eval_steps_per_second": 50.702, "step": 2192000 }, { "epoch": 46.31, "eval_loss": 2.2512264251708984, "eval_runtime": 49.6159, "eval_samples_per_second": 806.193, "eval_steps_per_second": 50.387, "step": 2200000 }, { "epoch": 46.48, "learning_rate": 3.28e-08, "loss": 2.3859, "step": 2208000 }, { "epoch": 46.48, "eval_loss": 2.2611992359161377, "eval_runtime": 49.8507, "eval_samples_per_second": 802.396, "eval_steps_per_second": 50.15, "step": 2208000 }, { "epoch": 46.65, "eval_loss": 2.2609057426452637, "eval_runtime": 49.439, "eval_samples_per_second": 809.077, "eval_steps_per_second": 50.567, "step": 2216000 }, { "epoch": 46.82, "learning_rate": 3.0066666666666665e-08, "loss": 2.3879, "step": 2224000 }, { "epoch": 46.82, "eval_loss": 2.255183219909668, "eval_runtime": 50.0243, "eval_samples_per_second": 799.611, "eval_steps_per_second": 49.976, "step": 2224000 }, { "epoch": 46.99, "eval_loss": 2.2568256855010986, "eval_runtime": 48.9637, "eval_samples_per_second": 816.932, "eval_steps_per_second": 51.058, "step": 2232000 }, { "epoch": 47.16, "learning_rate": 2.7333333333333333e-08, "loss": 2.3823, "step": 2240000 }, { "epoch": 47.16, "eval_loss": 2.250671148300171, "eval_runtime": 49.1163, "eval_samples_per_second": 814.393, "eval_steps_per_second": 50.9, "step": 2240000 }, { "epoch": 47.33, "eval_loss": 2.2761969566345215, "eval_runtime": 49.444, "eval_samples_per_second": 808.995, "eval_steps_per_second": 50.562, "step": 2248000 }, { "epoch": 47.49, "learning_rate": 2.46e-08, "loss": 2.388, "step": 2256000 }, { "epoch": 47.49, "eval_loss": 2.252157211303711, "eval_runtime": 48.9857, "eval_samples_per_second": 816.565, "eval_steps_per_second": 51.035, "step": 2256000 }, { "epoch": 47.66, "eval_loss": 2.2531578540802, "eval_runtime": 48.9438, "eval_samples_per_second": 817.263, "eval_steps_per_second": 51.079, "step": 2264000 }, { "epoch": 47.83, "learning_rate": 2.1866666666666667e-08, "loss": 2.3773, "step": 2272000 }, { "epoch": 47.83, "eval_loss": 2.2489843368530273, "eval_runtime": 49.0407, "eval_samples_per_second": 815.65, "eval_steps_per_second": 50.978, "step": 2272000 }, { "epoch": 48.0, "eval_loss": 2.2648425102233887, "eval_runtime": 49.5178, "eval_samples_per_second": 807.791, "eval_steps_per_second": 50.487, "step": 2280000 }, { "epoch": 48.17, "learning_rate": 1.9133333333333333e-08, "loss": 2.3828, "step": 2288000 }, { "epoch": 48.17, "eval_loss": 2.25002384185791, "eval_runtime": 49.6812, "eval_samples_per_second": 805.134, "eval_steps_per_second": 50.321, "step": 2288000 }, { "epoch": 48.34, "eval_loss": 2.253399610519409, "eval_runtime": 49.4626, "eval_samples_per_second": 808.692, "eval_steps_per_second": 50.543, "step": 2296000 }, { "epoch": 48.5, "learning_rate": 1.64e-08, "loss": 2.3816, "step": 2304000 }, { "epoch": 48.5, "eval_loss": 2.251549482345581, "eval_runtime": 49.4033, "eval_samples_per_second": 809.663, "eval_steps_per_second": 50.604, "step": 2304000 }, { "epoch": 48.67, "eval_loss": 2.2701914310455322, "eval_runtime": 49.577, "eval_samples_per_second": 806.826, "eval_steps_per_second": 50.427, "step": 2312000 }, { "epoch": 48.84, "learning_rate": 1.3666666666666667e-08, "loss": 2.3784, "step": 2320000 }, { "epoch": 48.84, "eval_loss": 2.2583844661712646, "eval_runtime": 49.2469, "eval_samples_per_second": 812.233, "eval_steps_per_second": 50.765, "step": 2320000 }, { "epoch": 49.01, "eval_loss": 2.23818039894104, "eval_runtime": 48.9834, "eval_samples_per_second": 816.603, "eval_steps_per_second": 51.038, "step": 2328000 }, { "epoch": 49.18, "learning_rate": 1.0933333333333334e-08, "loss": 2.3863, "step": 2336000 }, { "epoch": 49.18, "eval_loss": 2.260406732559204, "eval_runtime": 49.569, "eval_samples_per_second": 806.956, "eval_steps_per_second": 50.435, "step": 2336000 }, { "epoch": 49.35, "eval_loss": 2.2607226371765137, "eval_runtime": 49.5423, "eval_samples_per_second": 807.39, "eval_steps_per_second": 50.462, "step": 2344000 }, { "epoch": 49.51, "learning_rate": 8.2e-09, "loss": 2.3863, "step": 2352000 }, { "epoch": 49.51, "eval_loss": 2.26461124420166, "eval_runtime": 48.9942, "eval_samples_per_second": 816.423, "eval_steps_per_second": 51.026, "step": 2352000 }, { "epoch": 49.68, "eval_loss": 2.2533907890319824, "eval_runtime": 49.998, "eval_samples_per_second": 800.033, "eval_steps_per_second": 50.002, "step": 2360000 }, { "epoch": 49.85, "learning_rate": 5.466666666666667e-09, "loss": 2.3873, "step": 2368000 }, { "epoch": 49.85, "eval_loss": 2.2741761207580566, "eval_runtime": 49.1748, "eval_samples_per_second": 813.425, "eval_steps_per_second": 50.839, "step": 2368000 }, { "epoch": 50.02, "eval_loss": 2.2686805725097656, "eval_runtime": 49.0988, "eval_samples_per_second": 814.684, "eval_steps_per_second": 50.918, "step": 2376000 }, { "epoch": 50.19, "learning_rate": 2.7333333333333334e-09, "loss": 2.39, "step": 2384000 }, { "epoch": 50.19, "eval_loss": 2.2580976486206055, "eval_runtime": 49.688, "eval_samples_per_second": 805.024, "eval_steps_per_second": 50.314, "step": 2384000 }, { "epoch": 50.36, "eval_loss": 2.2459537982940674, "eval_runtime": 49.7136, "eval_samples_per_second": 804.608, "eval_steps_per_second": 50.288, "step": 2392000 }, { "epoch": 50.53, "learning_rate": 0.0, "loss": 2.3937, "step": 2400000 }, { "epoch": 50.53, "eval_loss": 2.264155387878418, "eval_runtime": 49.6839, "eval_samples_per_second": 805.089, "eval_steps_per_second": 50.318, "step": 2400000 }, { "epoch": 50.53, "step": 2400000, "total_flos": 8.417954735470524e+17, "train_loss": 2.392315192057292, "train_runtime": 173434.012, "train_samples_per_second": 221.41, "train_steps_per_second": 13.838 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 51, "save_steps": 32000, "total_flos": 8.417954735470524e+17, "trial_name": null, "trial_params": null }