|
{ |
|
"best_metric": 2.244624614715576, |
|
"best_model_checkpoint": "./model_tweets_2020_Q3_90/checkpoint-768000", |
|
"epoch": 50.52525209995579, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.5349206924438477, |
|
"eval_runtime": 47.928, |
|
"eval_samples_per_second": 834.585, |
|
"eval_steps_per_second": 52.162, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.7955, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.444758892059326, |
|
"eval_runtime": 48.5664, |
|
"eval_samples_per_second": 823.615, |
|
"eval_steps_per_second": 51.476, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.3948748111724854, |
|
"eval_runtime": 48.544, |
|
"eval_samples_per_second": 823.995, |
|
"eval_steps_per_second": 51.5, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.5335, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.3699398040771484, |
|
"eval_runtime": 48.2115, |
|
"eval_samples_per_second": 829.678, |
|
"eval_steps_per_second": 51.855, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 2.354438066482544, |
|
"eval_runtime": 48.5047, |
|
"eval_samples_per_second": 824.662, |
|
"eval_steps_per_second": 51.541, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.4757, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 2.347705364227295, |
|
"eval_runtime": 48.6892, |
|
"eval_samples_per_second": 821.537, |
|
"eval_steps_per_second": 51.346, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 2.3280608654022217, |
|
"eval_runtime": 48.0175, |
|
"eval_samples_per_second": 833.029, |
|
"eval_steps_per_second": 52.064, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.446, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 2.317082166671753, |
|
"eval_runtime": 48.561, |
|
"eval_samples_per_second": 823.706, |
|
"eval_steps_per_second": 51.482, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 2.308218240737915, |
|
"eval_runtime": 48.3682, |
|
"eval_samples_per_second": 826.989, |
|
"eval_steps_per_second": 51.687, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.4291, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.316983222961426, |
|
"eval_runtime": 48.1155, |
|
"eval_samples_per_second": 831.333, |
|
"eval_steps_per_second": 51.958, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 2.2962071895599365, |
|
"eval_runtime": 48.0395, |
|
"eval_samples_per_second": 832.648, |
|
"eval_steps_per_second": 52.041, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.4275, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 2.304356575012207, |
|
"eval_runtime": 48.8826, |
|
"eval_samples_per_second": 818.287, |
|
"eval_steps_per_second": 51.143, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 2.2738351821899414, |
|
"eval_runtime": 48.096, |
|
"eval_samples_per_second": 831.671, |
|
"eval_steps_per_second": 51.979, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.4148, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.2927024364471436, |
|
"eval_runtime": 48.7643, |
|
"eval_samples_per_second": 820.272, |
|
"eval_steps_per_second": 51.267, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 2.2684385776519775, |
|
"eval_runtime": 48.701, |
|
"eval_samples_per_second": 821.338, |
|
"eval_steps_per_second": 51.334, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.4062, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 2.289085626602173, |
|
"eval_runtime": 48.1247, |
|
"eval_samples_per_second": 831.174, |
|
"eval_steps_per_second": 51.948, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 2.278874397277832, |
|
"eval_runtime": 48.1786, |
|
"eval_samples_per_second": 830.245, |
|
"eval_steps_per_second": 51.89, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.4022, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 2.2659101486206055, |
|
"eval_runtime": 48.6348, |
|
"eval_samples_per_second": 822.457, |
|
"eval_steps_per_second": 51.404, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 2.282435894012451, |
|
"eval_runtime": 48.1725, |
|
"eval_samples_per_second": 830.349, |
|
"eval_steps_per_second": 51.897, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.3943, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 2.268383026123047, |
|
"eval_runtime": 48.1548, |
|
"eval_samples_per_second": 830.655, |
|
"eval_steps_per_second": 51.916, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_loss": 2.268256187438965, |
|
"eval_runtime": 48.6456, |
|
"eval_samples_per_second": 822.274, |
|
"eval_steps_per_second": 51.392, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.3957, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 2.2737274169921875, |
|
"eval_runtime": 48.1068, |
|
"eval_samples_per_second": 831.483, |
|
"eval_steps_per_second": 51.968, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 2.2779204845428467, |
|
"eval_runtime": 48.6234, |
|
"eval_samples_per_second": 822.65, |
|
"eval_steps_per_second": 51.416, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.3976, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 2.2710366249084473, |
|
"eval_runtime": 48.2531, |
|
"eval_samples_per_second": 828.961, |
|
"eval_steps_per_second": 51.81, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_loss": 2.249539852142334, |
|
"eval_runtime": 48.0877, |
|
"eval_samples_per_second": 831.813, |
|
"eval_steps_per_second": 51.988, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.3933, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 2.266042947769165, |
|
"eval_runtime": 48.7475, |
|
"eval_samples_per_second": 820.554, |
|
"eval_steps_per_second": 51.285, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 2.2686843872070312, |
|
"eval_runtime": 48.6373, |
|
"eval_samples_per_second": 822.414, |
|
"eval_steps_per_second": 51.401, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.4039, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 2.258059024810791, |
|
"eval_runtime": 48.8049, |
|
"eval_samples_per_second": 819.591, |
|
"eval_steps_per_second": 51.224, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_loss": 2.265613079071045, |
|
"eval_runtime": 48.1482, |
|
"eval_samples_per_second": 830.769, |
|
"eval_steps_per_second": 51.923, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.3966, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 2.254256248474121, |
|
"eval_runtime": 48.4211, |
|
"eval_samples_per_second": 826.087, |
|
"eval_steps_per_second": 51.63, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 2.2767629623413086, |
|
"eval_runtime": 48.0495, |
|
"eval_samples_per_second": 832.475, |
|
"eval_steps_per_second": 52.03, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.3902, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 2.255053997039795, |
|
"eval_runtime": 49.0256, |
|
"eval_samples_per_second": 815.901, |
|
"eval_steps_per_second": 50.994, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_loss": 2.278200626373291, |
|
"eval_runtime": 48.1284, |
|
"eval_samples_per_second": 831.11, |
|
"eval_steps_per_second": 51.944, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.3906, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 2.2639424800872803, |
|
"eval_runtime": 48.2031, |
|
"eval_samples_per_second": 829.822, |
|
"eval_steps_per_second": 51.864, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_loss": 2.2584660053253174, |
|
"eval_runtime": 48.1979, |
|
"eval_samples_per_second": 829.911, |
|
"eval_steps_per_second": 51.869, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.3849, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 2.254004955291748, |
|
"eval_runtime": 48.3611, |
|
"eval_samples_per_second": 827.11, |
|
"eval_steps_per_second": 51.694, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 2.274897813796997, |
|
"eval_runtime": 48.7282, |
|
"eval_samples_per_second": 820.88, |
|
"eval_steps_per_second": 51.305, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.3805, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 2.250277042388916, |
|
"eval_runtime": 48.3341, |
|
"eval_samples_per_second": 827.574, |
|
"eval_steps_per_second": 51.723, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_loss": 2.273930311203003, |
|
"eval_runtime": 47.7431, |
|
"eval_samples_per_second": 837.818, |
|
"eval_steps_per_second": 52.364, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.3873, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 2.254091739654541, |
|
"eval_runtime": 48.6038, |
|
"eval_samples_per_second": 822.98, |
|
"eval_steps_per_second": 51.436, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 2.2511613368988037, |
|
"eval_runtime": 48.0895, |
|
"eval_samples_per_second": 831.783, |
|
"eval_steps_per_second": 51.986, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.3942, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 2.2594754695892334, |
|
"eval_runtime": 48.3413, |
|
"eval_samples_per_second": 827.45, |
|
"eval_steps_per_second": 51.716, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 2.260305166244507, |
|
"eval_runtime": 48.7128, |
|
"eval_samples_per_second": 821.14, |
|
"eval_steps_per_second": 51.321, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.386, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 2.257478713989258, |
|
"eval_runtime": 48.858, |
|
"eval_samples_per_second": 818.698, |
|
"eval_steps_per_second": 51.169, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_loss": 2.2788634300231934, |
|
"eval_runtime": 48.3058, |
|
"eval_samples_per_second": 828.059, |
|
"eval_steps_per_second": 51.754, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.3806, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 2.265003204345703, |
|
"eval_runtime": 48.6794, |
|
"eval_samples_per_second": 821.703, |
|
"eval_steps_per_second": 51.356, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"eval_loss": 2.2706291675567627, |
|
"eval_runtime": 48.2232, |
|
"eval_samples_per_second": 829.476, |
|
"eval_steps_per_second": 51.842, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.3883, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 2.2652194499969482, |
|
"eval_runtime": 48.2989, |
|
"eval_samples_per_second": 828.177, |
|
"eval_steps_per_second": 51.761, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_loss": 2.2540171146392822, |
|
"eval_runtime": 48.1211, |
|
"eval_samples_per_second": 831.236, |
|
"eval_steps_per_second": 51.952, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.3922, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 2.268253803253174, |
|
"eval_runtime": 48.5611, |
|
"eval_samples_per_second": 823.704, |
|
"eval_steps_per_second": 51.481, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 2.263794422149658, |
|
"eval_runtime": 48.7481, |
|
"eval_samples_per_second": 820.544, |
|
"eval_steps_per_second": 51.284, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.3887, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_loss": 2.253530502319336, |
|
"eval_runtime": 48.7981, |
|
"eval_samples_per_second": 819.704, |
|
"eval_steps_per_second": 51.232, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_loss": 2.2529492378234863, |
|
"eval_runtime": 48.6536, |
|
"eval_samples_per_second": 822.139, |
|
"eval_steps_per_second": 51.384, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.3818, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 2.248337507247925, |
|
"eval_runtime": 48.2543, |
|
"eval_samples_per_second": 828.942, |
|
"eval_steps_per_second": 51.809, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 2.2573952674865723, |
|
"eval_runtime": 48.2027, |
|
"eval_samples_per_second": 829.829, |
|
"eval_steps_per_second": 51.864, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.387, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 2.2624008655548096, |
|
"eval_runtime": 48.2217, |
|
"eval_samples_per_second": 829.502, |
|
"eval_steps_per_second": 51.844, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 2.26644229888916, |
|
"eval_runtime": 48.04, |
|
"eval_samples_per_second": 832.64, |
|
"eval_steps_per_second": 52.04, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.3839, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_loss": 2.257237195968628, |
|
"eval_runtime": 48.887, |
|
"eval_samples_per_second": 818.214, |
|
"eval_steps_per_second": 51.138, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_loss": 2.252383232116699, |
|
"eval_runtime": 48.1755, |
|
"eval_samples_per_second": 830.298, |
|
"eval_steps_per_second": 51.894, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.3901, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 2.2532765865325928, |
|
"eval_runtime": 48.1778, |
|
"eval_samples_per_second": 830.259, |
|
"eval_steps_per_second": 51.891, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"eval_loss": 2.250092029571533, |
|
"eval_runtime": 48.8687, |
|
"eval_samples_per_second": 818.52, |
|
"eval_steps_per_second": 51.157, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.382, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"eval_loss": 2.266896963119507, |
|
"eval_runtime": 48.918, |
|
"eval_samples_per_second": 817.695, |
|
"eval_steps_per_second": 51.106, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"eval_loss": 2.259631395339966, |
|
"eval_runtime": 48.6846, |
|
"eval_samples_per_second": 821.614, |
|
"eval_steps_per_second": 51.351, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.3829, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"eval_loss": 2.270488977432251, |
|
"eval_runtime": 49.0149, |
|
"eval_samples_per_second": 816.079, |
|
"eval_steps_per_second": 51.005, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"eval_loss": 2.2553160190582275, |
|
"eval_runtime": 48.2406, |
|
"eval_samples_per_second": 829.177, |
|
"eval_steps_per_second": 51.824, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.3963, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"eval_loss": 2.2741470336914062, |
|
"eval_runtime": 48.1623, |
|
"eval_samples_per_second": 830.524, |
|
"eval_steps_per_second": 51.908, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"eval_loss": 2.266427993774414, |
|
"eval_runtime": 48.1335, |
|
"eval_samples_per_second": 831.022, |
|
"eval_steps_per_second": 51.939, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.3843, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"eval_loss": 2.2532401084899902, |
|
"eval_runtime": 48.2869, |
|
"eval_samples_per_second": 828.382, |
|
"eval_steps_per_second": 51.774, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"eval_loss": 2.2719969749450684, |
|
"eval_runtime": 48.3879, |
|
"eval_samples_per_second": 826.653, |
|
"eval_steps_per_second": 51.666, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.3853, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"eval_loss": 2.2532143592834473, |
|
"eval_runtime": 48.6521, |
|
"eval_samples_per_second": 822.164, |
|
"eval_steps_per_second": 51.385, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"eval_loss": 2.2699735164642334, |
|
"eval_runtime": 48.0759, |
|
"eval_samples_per_second": 832.017, |
|
"eval_steps_per_second": 52.001, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.3907, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"eval_loss": 2.257100820541382, |
|
"eval_runtime": 48.264, |
|
"eval_samples_per_second": 828.776, |
|
"eval_steps_per_second": 51.798, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"eval_loss": 2.252300500869751, |
|
"eval_runtime": 48.085, |
|
"eval_samples_per_second": 831.86, |
|
"eval_steps_per_second": 51.991, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.3865, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"eval_loss": 2.245786428451538, |
|
"eval_runtime": 48.9081, |
|
"eval_samples_per_second": 817.861, |
|
"eval_steps_per_second": 51.116, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"eval_loss": 2.264732599258423, |
|
"eval_runtime": 48.1262, |
|
"eval_samples_per_second": 831.149, |
|
"eval_steps_per_second": 51.947, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.3827, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_loss": 2.2490034103393555, |
|
"eval_runtime": 48.1786, |
|
"eval_samples_per_second": 830.244, |
|
"eval_steps_per_second": 51.89, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"eval_loss": 2.2623653411865234, |
|
"eval_runtime": 48.6642, |
|
"eval_samples_per_second": 821.959, |
|
"eval_steps_per_second": 51.372, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.3869, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"eval_loss": 2.253758430480957, |
|
"eval_runtime": 48.2352, |
|
"eval_samples_per_second": 829.269, |
|
"eval_steps_per_second": 51.829, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"eval_loss": 2.2357494831085205, |
|
"eval_runtime": 47.7426, |
|
"eval_samples_per_second": 837.826, |
|
"eval_steps_per_second": 52.364, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.3958, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"eval_loss": 2.2508862018585205, |
|
"eval_runtime": 48.884, |
|
"eval_samples_per_second": 818.263, |
|
"eval_steps_per_second": 51.141, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"eval_loss": 2.2690088748931885, |
|
"eval_runtime": 48.5211, |
|
"eval_samples_per_second": 824.384, |
|
"eval_steps_per_second": 51.524, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.3852, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"eval_loss": 2.247575283050537, |
|
"eval_runtime": 48.4402, |
|
"eval_samples_per_second": 825.761, |
|
"eval_steps_per_second": 51.61, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"eval_loss": 2.272088050842285, |
|
"eval_runtime": 48.8783, |
|
"eval_samples_per_second": 818.359, |
|
"eval_steps_per_second": 51.147, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.3889, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 2.253678560256958, |
|
"eval_runtime": 48.7246, |
|
"eval_samples_per_second": 820.94, |
|
"eval_steps_per_second": 51.309, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"eval_loss": 2.2723231315612793, |
|
"eval_runtime": 48.1926, |
|
"eval_samples_per_second": 830.003, |
|
"eval_steps_per_second": 51.875, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.3839, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"eval_loss": 2.2664077281951904, |
|
"eval_runtime": 48.493, |
|
"eval_samples_per_second": 824.861, |
|
"eval_steps_per_second": 51.554, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"eval_loss": 2.2725985050201416, |
|
"eval_runtime": 49.24, |
|
"eval_samples_per_second": 812.348, |
|
"eval_steps_per_second": 50.772, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.3884, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"eval_loss": 2.265206813812256, |
|
"eval_runtime": 49.5431, |
|
"eval_samples_per_second": 807.377, |
|
"eval_steps_per_second": 50.461, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_loss": 2.2633461952209473, |
|
"eval_runtime": 48.6148, |
|
"eval_samples_per_second": 822.795, |
|
"eval_steps_per_second": 51.425, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.3827, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"eval_loss": 2.268095016479492, |
|
"eval_runtime": 49.156, |
|
"eval_samples_per_second": 813.737, |
|
"eval_steps_per_second": 50.859, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"eval_loss": 2.2542901039123535, |
|
"eval_runtime": 48.9808, |
|
"eval_samples_per_second": 816.646, |
|
"eval_steps_per_second": 51.04, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.3861, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"eval_loss": 2.2634222507476807, |
|
"eval_runtime": 48.9041, |
|
"eval_samples_per_second": 817.927, |
|
"eval_steps_per_second": 51.12, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"eval_loss": 2.2706964015960693, |
|
"eval_runtime": 49.2314, |
|
"eval_samples_per_second": 812.49, |
|
"eval_steps_per_second": 50.781, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.3812, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"eval_loss": 2.2575085163116455, |
|
"eval_runtime": 48.6081, |
|
"eval_samples_per_second": 822.908, |
|
"eval_steps_per_second": 51.432, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 2.2549245357513428, |
|
"eval_runtime": 48.9973, |
|
"eval_samples_per_second": 816.371, |
|
"eval_steps_per_second": 51.023, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.3862, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"eval_loss": 2.244624614715576, |
|
"eval_runtime": 49.0122, |
|
"eval_samples_per_second": 816.123, |
|
"eval_steps_per_second": 51.008, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"eval_loss": 2.261697769165039, |
|
"eval_runtime": 48.9467, |
|
"eval_samples_per_second": 817.216, |
|
"eval_steps_per_second": 51.076, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.3859, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"eval_loss": 2.250509023666382, |
|
"eval_runtime": 49.5476, |
|
"eval_samples_per_second": 807.304, |
|
"eval_steps_per_second": 50.457, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_loss": 2.271986722946167, |
|
"eval_runtime": 48.2956, |
|
"eval_samples_per_second": 828.232, |
|
"eval_steps_per_second": 51.765, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.3873, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"eval_loss": 2.252095937728882, |
|
"eval_runtime": 48.4511, |
|
"eval_samples_per_second": 825.575, |
|
"eval_steps_per_second": 51.598, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"eval_loss": 2.254298448562622, |
|
"eval_runtime": 49.0762, |
|
"eval_samples_per_second": 815.06, |
|
"eval_steps_per_second": 50.941, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.381, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"eval_loss": 2.267543077468872, |
|
"eval_runtime": 48.6635, |
|
"eval_samples_per_second": 821.971, |
|
"eval_steps_per_second": 51.373, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"eval_loss": 2.254502296447754, |
|
"eval_runtime": 49.6829, |
|
"eval_samples_per_second": 805.106, |
|
"eval_steps_per_second": 50.319, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.3851, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"eval_loss": 2.2488667964935303, |
|
"eval_runtime": 48.7936, |
|
"eval_samples_per_second": 819.78, |
|
"eval_steps_per_second": 51.236, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"eval_loss": 2.2605700492858887, |
|
"eval_runtime": 49.2213, |
|
"eval_samples_per_second": 812.656, |
|
"eval_steps_per_second": 50.791, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.3878, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"eval_loss": 2.2579894065856934, |
|
"eval_runtime": 49.0096, |
|
"eval_samples_per_second": 816.167, |
|
"eval_steps_per_second": 51.01, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"eval_loss": 2.2604382038116455, |
|
"eval_runtime": 48.2778, |
|
"eval_samples_per_second": 828.538, |
|
"eval_steps_per_second": 51.784, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.3812, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"eval_loss": 2.2630739212036133, |
|
"eval_runtime": 48.321, |
|
"eval_samples_per_second": 827.798, |
|
"eval_steps_per_second": 51.737, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"eval_loss": 2.250539541244507, |
|
"eval_runtime": 48.9155, |
|
"eval_samples_per_second": 817.737, |
|
"eval_steps_per_second": 51.109, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.3849, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"eval_loss": 2.2657594680786133, |
|
"eval_runtime": 48.513, |
|
"eval_samples_per_second": 824.521, |
|
"eval_steps_per_second": 51.533, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"eval_loss": 2.2566869258880615, |
|
"eval_runtime": 49.2473, |
|
"eval_samples_per_second": 812.227, |
|
"eval_steps_per_second": 50.764, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.3833, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"eval_loss": 2.2533113956451416, |
|
"eval_runtime": 48.9908, |
|
"eval_samples_per_second": 816.479, |
|
"eval_steps_per_second": 51.03, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"eval_loss": 2.2455687522888184, |
|
"eval_runtime": 49.5558, |
|
"eval_samples_per_second": 807.17, |
|
"eval_steps_per_second": 50.448, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.3847, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_loss": 2.253338098526001, |
|
"eval_runtime": 49.2167, |
|
"eval_samples_per_second": 812.733, |
|
"eval_steps_per_second": 50.796, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"eval_loss": 2.257462739944458, |
|
"eval_runtime": 48.4436, |
|
"eval_samples_per_second": 825.703, |
|
"eval_steps_per_second": 51.606, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.3869, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"eval_loss": 2.2667601108551025, |
|
"eval_runtime": 49.3388, |
|
"eval_samples_per_second": 810.721, |
|
"eval_steps_per_second": 50.67, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"eval_loss": 2.2598884105682373, |
|
"eval_runtime": 48.9927, |
|
"eval_samples_per_second": 816.449, |
|
"eval_steps_per_second": 51.028, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 19.87, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.3867, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 19.87, |
|
"eval_loss": 2.2680182456970215, |
|
"eval_runtime": 48.4318, |
|
"eval_samples_per_second": 825.904, |
|
"eval_steps_per_second": 51.619, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 20.04, |
|
"eval_loss": 2.2669413089752197, |
|
"eval_runtime": 50.205, |
|
"eval_samples_per_second": 796.733, |
|
"eval_steps_per_second": 49.796, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.3942, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"eval_loss": 2.2482852935791016, |
|
"eval_runtime": 48.9393, |
|
"eval_samples_per_second": 817.34, |
|
"eval_steps_per_second": 51.084, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 20.38, |
|
"eval_loss": 2.273371934890747, |
|
"eval_runtime": 48.5937, |
|
"eval_samples_per_second": 823.152, |
|
"eval_steps_per_second": 51.447, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.3863, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"eval_loss": 2.262270212173462, |
|
"eval_runtime": 48.5495, |
|
"eval_samples_per_second": 823.902, |
|
"eval_steps_per_second": 51.494, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"eval_loss": 2.264986038208008, |
|
"eval_runtime": 48.8999, |
|
"eval_samples_per_second": 817.998, |
|
"eval_steps_per_second": 51.125, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 20.88, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.3924, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 20.88, |
|
"eval_loss": 2.2602696418762207, |
|
"eval_runtime": 49.2404, |
|
"eval_samples_per_second": 812.341, |
|
"eval_steps_per_second": 50.771, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"eval_loss": 2.2708377838134766, |
|
"eval_runtime": 49.0675, |
|
"eval_samples_per_second": 815.204, |
|
"eval_steps_per_second": 50.95, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.3871, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"eval_loss": 2.2512402534484863, |
|
"eval_runtime": 48.7719, |
|
"eval_samples_per_second": 820.144, |
|
"eval_steps_per_second": 51.259, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 21.39, |
|
"eval_loss": 2.2567834854125977, |
|
"eval_runtime": 49.1119, |
|
"eval_samples_per_second": 814.466, |
|
"eval_steps_per_second": 50.904, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 21.56, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.3827, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 21.56, |
|
"eval_loss": 2.2676033973693848, |
|
"eval_runtime": 49.0505, |
|
"eval_samples_per_second": 815.486, |
|
"eval_steps_per_second": 50.968, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 21.73, |
|
"eval_loss": 2.271024465560913, |
|
"eval_runtime": 49.4407, |
|
"eval_samples_per_second": 809.05, |
|
"eval_steps_per_second": 50.566, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.3799, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"eval_loss": 2.2804324626922607, |
|
"eval_runtime": 49.138, |
|
"eval_samples_per_second": 814.034, |
|
"eval_steps_per_second": 50.877, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 22.06, |
|
"eval_loss": 2.2498600482940674, |
|
"eval_runtime": 48.6186, |
|
"eval_samples_per_second": 822.731, |
|
"eval_steps_per_second": 51.421, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 22.23, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.3863, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 22.23, |
|
"eval_loss": 2.2556710243225098, |
|
"eval_runtime": 49.7999, |
|
"eval_samples_per_second": 803.214, |
|
"eval_steps_per_second": 50.201, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"eval_loss": 2.2603883743286133, |
|
"eval_runtime": 49.3365, |
|
"eval_samples_per_second": 810.759, |
|
"eval_steps_per_second": 50.672, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.3858, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"eval_loss": 2.2832398414611816, |
|
"eval_runtime": 48.6796, |
|
"eval_samples_per_second": 821.699, |
|
"eval_steps_per_second": 51.356, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 22.74, |
|
"eval_loss": 2.244276285171509, |
|
"eval_runtime": 48.7816, |
|
"eval_samples_per_second": 819.982, |
|
"eval_steps_per_second": 51.249, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.3859, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"eval_loss": 2.260357141494751, |
|
"eval_runtime": 49.4857, |
|
"eval_samples_per_second": 808.315, |
|
"eval_steps_per_second": 50.52, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 23.07, |
|
"eval_loss": 2.263144016265869, |
|
"eval_runtime": 48.7635, |
|
"eval_samples_per_second": 820.285, |
|
"eval_steps_per_second": 51.268, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.3846, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"eval_loss": 2.2689881324768066, |
|
"eval_runtime": 48.7943, |
|
"eval_samples_per_second": 819.768, |
|
"eval_steps_per_second": 51.235, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 23.41, |
|
"eval_loss": 2.25949764251709, |
|
"eval_runtime": 48.4368, |
|
"eval_samples_per_second": 825.819, |
|
"eval_steps_per_second": 51.614, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.3887, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"eval_loss": 2.2500855922698975, |
|
"eval_runtime": 48.8931, |
|
"eval_samples_per_second": 818.111, |
|
"eval_steps_per_second": 51.132, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 23.75, |
|
"eval_loss": 2.2532594203948975, |
|
"eval_runtime": 48.868, |
|
"eval_samples_per_second": 818.532, |
|
"eval_steps_per_second": 51.158, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.3856, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"eval_loss": 2.252855062484741, |
|
"eval_runtime": 49.6155, |
|
"eval_samples_per_second": 806.199, |
|
"eval_steps_per_second": 50.387, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 24.08, |
|
"eval_loss": 2.2455570697784424, |
|
"eval_runtime": 48.8925, |
|
"eval_samples_per_second": 818.121, |
|
"eval_steps_per_second": 51.133, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.3856, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"eval_loss": 2.254368782043457, |
|
"eval_runtime": 49.1923, |
|
"eval_samples_per_second": 813.135, |
|
"eval_steps_per_second": 50.821, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"eval_loss": 2.25541090965271, |
|
"eval_runtime": 48.5868, |
|
"eval_samples_per_second": 823.268, |
|
"eval_steps_per_second": 51.454, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.3867, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"eval_loss": 2.2595579624176025, |
|
"eval_runtime": 49.0442, |
|
"eval_samples_per_second": 815.591, |
|
"eval_steps_per_second": 50.974, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 24.76, |
|
"eval_loss": 2.252202033996582, |
|
"eval_runtime": 49.5166, |
|
"eval_samples_per_second": 807.81, |
|
"eval_steps_per_second": 50.488, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 24.93, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.3795, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 24.93, |
|
"eval_loss": 2.249300241470337, |
|
"eval_runtime": 49.7952, |
|
"eval_samples_per_second": 803.29, |
|
"eval_steps_per_second": 50.206, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 25.09, |
|
"eval_loss": 2.2608890533447266, |
|
"eval_runtime": 48.681, |
|
"eval_samples_per_second": 821.675, |
|
"eval_steps_per_second": 51.355, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 25.26, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.3926, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 25.26, |
|
"eval_loss": 2.2658445835113525, |
|
"eval_runtime": 48.9256, |
|
"eval_samples_per_second": 817.569, |
|
"eval_steps_per_second": 51.098, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 25.43, |
|
"eval_loss": 2.2592995166778564, |
|
"eval_runtime": 48.8774, |
|
"eval_samples_per_second": 818.374, |
|
"eval_steps_per_second": 51.148, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.3887, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"eval_loss": 2.2703697681427, |
|
"eval_runtime": 48.9056, |
|
"eval_samples_per_second": 817.902, |
|
"eval_steps_per_second": 51.119, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 25.77, |
|
"eval_loss": 2.263197183609009, |
|
"eval_runtime": 49.6098, |
|
"eval_samples_per_second": 806.292, |
|
"eval_steps_per_second": 50.393, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.3926, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_loss": 2.2628068923950195, |
|
"eval_runtime": 49.6731, |
|
"eval_samples_per_second": 805.265, |
|
"eval_steps_per_second": 50.329, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 26.1, |
|
"eval_loss": 2.2656562328338623, |
|
"eval_runtime": 49.8004, |
|
"eval_samples_per_second": 803.207, |
|
"eval_steps_per_second": 50.2, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.3809, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"eval_loss": 2.2545762062072754, |
|
"eval_runtime": 49.3811, |
|
"eval_samples_per_second": 810.026, |
|
"eval_steps_per_second": 50.627, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 26.44, |
|
"eval_loss": 2.259634017944336, |
|
"eval_runtime": 49.4215, |
|
"eval_samples_per_second": 809.365, |
|
"eval_steps_per_second": 50.585, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.3878, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"eval_loss": 2.254516124725342, |
|
"eval_runtime": 48.3, |
|
"eval_samples_per_second": 828.158, |
|
"eval_steps_per_second": 51.76, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 26.78, |
|
"eval_loss": 2.2667646408081055, |
|
"eval_runtime": 49.742, |
|
"eval_samples_per_second": 804.149, |
|
"eval_steps_per_second": 50.259, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.3861, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"eval_loss": 2.2534382343292236, |
|
"eval_runtime": 48.5643, |
|
"eval_samples_per_second": 823.65, |
|
"eval_steps_per_second": 51.478, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"eval_loss": 2.261183738708496, |
|
"eval_runtime": 49.4288, |
|
"eval_samples_per_second": 809.246, |
|
"eval_steps_per_second": 50.578, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 27.28, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.3815, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 27.28, |
|
"eval_loss": 2.2441422939300537, |
|
"eval_runtime": 48.9537, |
|
"eval_samples_per_second": 817.099, |
|
"eval_steps_per_second": 51.069, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"eval_loss": 2.271397590637207, |
|
"eval_runtime": 49.5135, |
|
"eval_samples_per_second": 807.861, |
|
"eval_steps_per_second": 50.491, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 27.62, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.3861, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 27.62, |
|
"eval_loss": 2.2604434490203857, |
|
"eval_runtime": 48.816, |
|
"eval_samples_per_second": 819.403, |
|
"eval_steps_per_second": 51.213, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 27.79, |
|
"eval_loss": 2.2535157203674316, |
|
"eval_runtime": 49.1172, |
|
"eval_samples_per_second": 814.378, |
|
"eval_steps_per_second": 50.899, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 27.96, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.388, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 27.96, |
|
"eval_loss": 2.2466070652008057, |
|
"eval_runtime": 48.6499, |
|
"eval_samples_per_second": 822.201, |
|
"eval_steps_per_second": 51.388, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 28.13, |
|
"eval_loss": 2.258121967315674, |
|
"eval_runtime": 48.6067, |
|
"eval_samples_per_second": 822.932, |
|
"eval_steps_per_second": 51.433, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 28.29, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.3864, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 28.29, |
|
"eval_loss": 2.257232904434204, |
|
"eval_runtime": 49.3463, |
|
"eval_samples_per_second": 810.598, |
|
"eval_steps_per_second": 50.662, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 28.46, |
|
"eval_loss": 2.238109827041626, |
|
"eval_runtime": 48.9872, |
|
"eval_samples_per_second": 816.539, |
|
"eval_steps_per_second": 51.034, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 28.63, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.39, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 28.63, |
|
"eval_loss": 2.23980712890625, |
|
"eval_runtime": 49.3711, |
|
"eval_samples_per_second": 810.191, |
|
"eval_steps_per_second": 50.637, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_loss": 2.269519805908203, |
|
"eval_runtime": 49.3307, |
|
"eval_samples_per_second": 810.853, |
|
"eval_steps_per_second": 50.678, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.39, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"eval_loss": 2.262801170349121, |
|
"eval_runtime": 49.0301, |
|
"eval_samples_per_second": 815.825, |
|
"eval_steps_per_second": 50.989, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 29.14, |
|
"eval_loss": 2.2599363327026367, |
|
"eval_runtime": 49.1614, |
|
"eval_samples_per_second": 813.647, |
|
"eval_steps_per_second": 50.853, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 29.3, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.3804, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 29.3, |
|
"eval_loss": 2.262774705886841, |
|
"eval_runtime": 49.576, |
|
"eval_samples_per_second": 806.842, |
|
"eval_steps_per_second": 50.428, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 29.47, |
|
"eval_loss": 2.2721939086914062, |
|
"eval_runtime": 48.6201, |
|
"eval_samples_per_second": 822.705, |
|
"eval_steps_per_second": 51.419, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 29.64, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.3858, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 29.64, |
|
"eval_loss": 2.24898099899292, |
|
"eval_runtime": 49.0807, |
|
"eval_samples_per_second": 814.983, |
|
"eval_steps_per_second": 50.936, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 29.81, |
|
"eval_loss": 2.262730360031128, |
|
"eval_runtime": 49.565, |
|
"eval_samples_per_second": 807.021, |
|
"eval_steps_per_second": 50.439, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 29.98, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.3804, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 29.98, |
|
"eval_loss": 2.262303113937378, |
|
"eval_runtime": 48.6465, |
|
"eval_samples_per_second": 822.258, |
|
"eval_steps_per_second": 51.391, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 30.15, |
|
"eval_loss": 2.252244472503662, |
|
"eval_runtime": 49.1968, |
|
"eval_samples_per_second": 813.061, |
|
"eval_steps_per_second": 50.816, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.3834, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"eval_loss": 2.2633419036865234, |
|
"eval_runtime": 48.625, |
|
"eval_samples_per_second": 822.622, |
|
"eval_steps_per_second": 51.414, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 30.48, |
|
"eval_loss": 2.255260467529297, |
|
"eval_runtime": 48.5565, |
|
"eval_samples_per_second": 823.782, |
|
"eval_steps_per_second": 51.486, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.3853, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"eval_loss": 2.239067554473877, |
|
"eval_runtime": 49.1758, |
|
"eval_samples_per_second": 813.408, |
|
"eval_steps_per_second": 50.838, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 30.82, |
|
"eval_loss": 2.2615532875061035, |
|
"eval_runtime": 48.6338, |
|
"eval_samples_per_second": 822.473, |
|
"eval_steps_per_second": 51.405, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.3946, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_loss": 2.2630956172943115, |
|
"eval_runtime": 48.6486, |
|
"eval_samples_per_second": 822.224, |
|
"eval_steps_per_second": 51.389, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 31.16, |
|
"eval_loss": 2.2638938426971436, |
|
"eval_runtime": 48.6689, |
|
"eval_samples_per_second": 821.88, |
|
"eval_steps_per_second": 51.367, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 31.33, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.385, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 31.33, |
|
"eval_loss": 2.27362060546875, |
|
"eval_runtime": 49.1717, |
|
"eval_samples_per_second": 813.476, |
|
"eval_steps_per_second": 50.842, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 31.49, |
|
"eval_loss": 2.2715282440185547, |
|
"eval_runtime": 48.6068, |
|
"eval_samples_per_second": 822.931, |
|
"eval_steps_per_second": 51.433, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 31.66, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.387, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 31.66, |
|
"eval_loss": 2.255669116973877, |
|
"eval_runtime": 49.2692, |
|
"eval_samples_per_second": 811.866, |
|
"eval_steps_per_second": 50.742, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 31.83, |
|
"eval_loss": 2.258305311203003, |
|
"eval_runtime": 49.3922, |
|
"eval_samples_per_second": 809.845, |
|
"eval_steps_per_second": 50.615, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.3831, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 2.2543575763702393, |
|
"eval_runtime": 48.8562, |
|
"eval_samples_per_second": 818.73, |
|
"eval_steps_per_second": 51.171, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 32.17, |
|
"eval_loss": 2.2756261825561523, |
|
"eval_runtime": 48.8463, |
|
"eval_samples_per_second": 818.895, |
|
"eval_steps_per_second": 51.181, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 32.34, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.3835, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 32.34, |
|
"eval_loss": 2.2793610095977783, |
|
"eval_runtime": 48.7275, |
|
"eval_samples_per_second": 820.891, |
|
"eval_steps_per_second": 51.306, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"eval_loss": 2.2648372650146484, |
|
"eval_runtime": 49.3109, |
|
"eval_samples_per_second": 811.179, |
|
"eval_steps_per_second": 50.699, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 32.67, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.3857, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 32.67, |
|
"eval_loss": 2.2563135623931885, |
|
"eval_runtime": 49.7733, |
|
"eval_samples_per_second": 803.643, |
|
"eval_steps_per_second": 50.228, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 32.84, |
|
"eval_loss": 2.2537479400634766, |
|
"eval_runtime": 49.2356, |
|
"eval_samples_per_second": 812.42, |
|
"eval_steps_per_second": 50.776, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.3856, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"eval_loss": 2.261024236679077, |
|
"eval_runtime": 49.2157, |
|
"eval_samples_per_second": 812.749, |
|
"eval_steps_per_second": 50.797, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 33.18, |
|
"eval_loss": 2.264604330062866, |
|
"eval_runtime": 49.1631, |
|
"eval_samples_per_second": 813.618, |
|
"eval_steps_per_second": 50.851, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 33.35, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.3902, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 33.35, |
|
"eval_loss": 2.2544610500335693, |
|
"eval_runtime": 48.8485, |
|
"eval_samples_per_second": 818.858, |
|
"eval_steps_per_second": 51.179, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 33.52, |
|
"eval_loss": 2.271030902862549, |
|
"eval_runtime": 48.3697, |
|
"eval_samples_per_second": 826.964, |
|
"eval_steps_per_second": 51.685, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 33.68, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.3897, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 33.68, |
|
"eval_loss": 2.2601163387298584, |
|
"eval_runtime": 48.8818, |
|
"eval_samples_per_second": 818.3, |
|
"eval_steps_per_second": 51.144, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 33.85, |
|
"eval_loss": 2.2542924880981445, |
|
"eval_runtime": 49.0945, |
|
"eval_samples_per_second": 814.754, |
|
"eval_steps_per_second": 50.922, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.3866, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"eval_loss": 2.2525877952575684, |
|
"eval_runtime": 49.2321, |
|
"eval_samples_per_second": 812.478, |
|
"eval_steps_per_second": 50.78, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 34.19, |
|
"eval_loss": 2.262938976287842, |
|
"eval_runtime": 49.3213, |
|
"eval_samples_per_second": 811.009, |
|
"eval_steps_per_second": 50.688, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 34.36, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.3823, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 34.36, |
|
"eval_loss": 2.2616801261901855, |
|
"eval_runtime": 48.9485, |
|
"eval_samples_per_second": 817.185, |
|
"eval_steps_per_second": 51.074, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 34.53, |
|
"eval_loss": 2.2519824504852295, |
|
"eval_runtime": 48.6687, |
|
"eval_samples_per_second": 821.883, |
|
"eval_steps_per_second": 51.368, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 34.69, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.3874, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 34.69, |
|
"eval_loss": 2.261162042617798, |
|
"eval_runtime": 48.7567, |
|
"eval_samples_per_second": 820.4, |
|
"eval_steps_per_second": 51.275, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 34.86, |
|
"eval_loss": 2.2568650245666504, |
|
"eval_runtime": 48.8018, |
|
"eval_samples_per_second": 819.641, |
|
"eval_steps_per_second": 51.228, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.3895, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"eval_loss": 2.2633254528045654, |
|
"eval_runtime": 48.7101, |
|
"eval_samples_per_second": 821.185, |
|
"eval_steps_per_second": 51.324, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"eval_loss": 2.259277820587158, |
|
"eval_runtime": 49.2378, |
|
"eval_samples_per_second": 812.384, |
|
"eval_steps_per_second": 50.774, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 35.37, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.3857, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 35.37, |
|
"eval_loss": 2.2650630474090576, |
|
"eval_runtime": 48.8949, |
|
"eval_samples_per_second": 818.082, |
|
"eval_steps_per_second": 51.13, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 35.54, |
|
"eval_loss": 2.256744623184204, |
|
"eval_runtime": 48.7371, |
|
"eval_samples_per_second": 820.73, |
|
"eval_steps_per_second": 51.296, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 35.7, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.3811, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 35.7, |
|
"eval_loss": 2.253361701965332, |
|
"eval_runtime": 49.5532, |
|
"eval_samples_per_second": 807.214, |
|
"eval_steps_per_second": 50.451, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 35.87, |
|
"eval_loss": 2.263338088989258, |
|
"eval_runtime": 49.1414, |
|
"eval_samples_per_second": 813.978, |
|
"eval_steps_per_second": 50.874, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.3944, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"eval_loss": 2.2504327297210693, |
|
"eval_runtime": 49.4998, |
|
"eval_samples_per_second": 808.084, |
|
"eval_steps_per_second": 50.505, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 36.21, |
|
"eval_loss": 2.2518932819366455, |
|
"eval_runtime": 49.3816, |
|
"eval_samples_per_second": 810.018, |
|
"eval_steps_per_second": 50.626, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 36.38, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.3883, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 36.38, |
|
"eval_loss": 2.2571768760681152, |
|
"eval_runtime": 49.5997, |
|
"eval_samples_per_second": 806.456, |
|
"eval_steps_per_second": 50.404, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"eval_loss": 2.2575507164001465, |
|
"eval_runtime": 49.3457, |
|
"eval_samples_per_second": 810.607, |
|
"eval_steps_per_second": 50.663, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 36.72, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.3859, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 36.72, |
|
"eval_loss": 2.2719168663024902, |
|
"eval_runtime": 48.91, |
|
"eval_samples_per_second": 817.828, |
|
"eval_steps_per_second": 51.114, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 36.88, |
|
"eval_loss": 2.2667555809020996, |
|
"eval_runtime": 48.8267, |
|
"eval_samples_per_second": 819.223, |
|
"eval_steps_per_second": 51.201, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.3914, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"eval_loss": 2.250850200653076, |
|
"eval_runtime": 48.8892, |
|
"eval_samples_per_second": 818.176, |
|
"eval_steps_per_second": 51.136, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 37.22, |
|
"eval_loss": 2.2601399421691895, |
|
"eval_runtime": 48.8589, |
|
"eval_samples_per_second": 818.684, |
|
"eval_steps_per_second": 51.168, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.3848, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"eval_loss": 2.2686824798583984, |
|
"eval_runtime": 48.9048, |
|
"eval_samples_per_second": 817.915, |
|
"eval_steps_per_second": 51.12, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 37.56, |
|
"eval_loss": 2.2513012886047363, |
|
"eval_runtime": 48.7112, |
|
"eval_samples_per_second": 821.166, |
|
"eval_steps_per_second": 51.323, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 37.73, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.3903, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 37.73, |
|
"eval_loss": 2.2519407272338867, |
|
"eval_runtime": 48.938, |
|
"eval_samples_per_second": 817.361, |
|
"eval_steps_per_second": 51.085, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 37.89, |
|
"eval_loss": 2.259387731552124, |
|
"eval_runtime": 49.4041, |
|
"eval_samples_per_second": 809.65, |
|
"eval_steps_per_second": 50.603, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 38.06, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.3822, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 38.06, |
|
"eval_loss": 2.256521701812744, |
|
"eval_runtime": 48.8635, |
|
"eval_samples_per_second": 818.606, |
|
"eval_steps_per_second": 51.163, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 38.23, |
|
"eval_loss": 2.2812252044677734, |
|
"eval_runtime": 49.3349, |
|
"eval_samples_per_second": 810.785, |
|
"eval_steps_per_second": 50.674, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.383, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"eval_loss": 2.2589097023010254, |
|
"eval_runtime": 48.8806, |
|
"eval_samples_per_second": 818.32, |
|
"eval_steps_per_second": 51.145, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 38.57, |
|
"eval_loss": 2.2560157775878906, |
|
"eval_runtime": 48.961, |
|
"eval_samples_per_second": 816.977, |
|
"eval_steps_per_second": 51.061, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 38.74, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.3868, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 38.74, |
|
"eval_loss": 2.264800548553467, |
|
"eval_runtime": 49.3795, |
|
"eval_samples_per_second": 810.053, |
|
"eval_steps_per_second": 50.628, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 38.9, |
|
"eval_loss": 2.2506866455078125, |
|
"eval_runtime": 48.8976, |
|
"eval_samples_per_second": 818.037, |
|
"eval_steps_per_second": 51.127, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 39.07, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.3775, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 39.07, |
|
"eval_loss": 2.2569808959960938, |
|
"eval_runtime": 48.9247, |
|
"eval_samples_per_second": 817.584, |
|
"eval_steps_per_second": 51.099, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 39.24, |
|
"eval_loss": 2.2549405097961426, |
|
"eval_runtime": 49.5363, |
|
"eval_samples_per_second": 807.488, |
|
"eval_steps_per_second": 50.468, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 39.41, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.3818, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 39.41, |
|
"eval_loss": 2.2583167552948, |
|
"eval_runtime": 49.5006, |
|
"eval_samples_per_second": 808.071, |
|
"eval_steps_per_second": 50.504, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 39.58, |
|
"eval_loss": 2.261044502258301, |
|
"eval_runtime": 49.628, |
|
"eval_samples_per_second": 805.997, |
|
"eval_steps_per_second": 50.375, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 39.75, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.3887, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 39.75, |
|
"eval_loss": 2.262882947921753, |
|
"eval_runtime": 49.6245, |
|
"eval_samples_per_second": 806.053, |
|
"eval_steps_per_second": 50.378, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 39.91, |
|
"eval_loss": 2.273881435394287, |
|
"eval_runtime": 49.4491, |
|
"eval_samples_per_second": 808.913, |
|
"eval_steps_per_second": 50.557, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 40.08, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.3893, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 40.08, |
|
"eval_loss": 2.2657415866851807, |
|
"eval_runtime": 49.1269, |
|
"eval_samples_per_second": 814.217, |
|
"eval_steps_per_second": 50.889, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 40.25, |
|
"eval_loss": 2.2507264614105225, |
|
"eval_runtime": 49.5404, |
|
"eval_samples_per_second": 807.422, |
|
"eval_steps_per_second": 50.464, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 40.42, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.3826, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 40.42, |
|
"eval_loss": 2.2505505084991455, |
|
"eval_runtime": 49.5643, |
|
"eval_samples_per_second": 807.033, |
|
"eval_steps_per_second": 50.44, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 40.59, |
|
"eval_loss": 2.2630043029785156, |
|
"eval_runtime": 48.8805, |
|
"eval_samples_per_second": 818.322, |
|
"eval_steps_per_second": 51.145, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 40.76, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.3842, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 40.76, |
|
"eval_loss": 2.27164363861084, |
|
"eval_runtime": 49.401, |
|
"eval_samples_per_second": 809.701, |
|
"eval_steps_per_second": 50.606, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 40.93, |
|
"eval_loss": 2.264181613922119, |
|
"eval_runtime": 49.4342, |
|
"eval_samples_per_second": 809.156, |
|
"eval_steps_per_second": 50.572, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 41.09, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.3866, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 41.09, |
|
"eval_loss": 2.245126485824585, |
|
"eval_runtime": 49.626, |
|
"eval_samples_per_second": 806.029, |
|
"eval_steps_per_second": 50.377, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 41.26, |
|
"eval_loss": 2.2520625591278076, |
|
"eval_runtime": 49.5425, |
|
"eval_samples_per_second": 807.388, |
|
"eval_steps_per_second": 50.462, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 41.43, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.3857, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 41.43, |
|
"eval_loss": 2.2457118034362793, |
|
"eval_runtime": 48.9259, |
|
"eval_samples_per_second": 817.562, |
|
"eval_steps_per_second": 51.098, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"eval_loss": 2.2574808597564697, |
|
"eval_runtime": 49.7757, |
|
"eval_samples_per_second": 803.605, |
|
"eval_steps_per_second": 50.225, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 41.77, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.3943, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 41.77, |
|
"eval_loss": 2.265901565551758, |
|
"eval_runtime": 49.5814, |
|
"eval_samples_per_second": 806.755, |
|
"eval_steps_per_second": 50.422, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"eval_loss": 2.260754346847534, |
|
"eval_runtime": 48.9543, |
|
"eval_samples_per_second": 817.089, |
|
"eval_steps_per_second": 51.068, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 42.1, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.387, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 42.1, |
|
"eval_loss": 2.2687227725982666, |
|
"eval_runtime": 49.4379, |
|
"eval_samples_per_second": 809.096, |
|
"eval_steps_per_second": 50.569, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 42.27, |
|
"eval_loss": 2.271784543991089, |
|
"eval_runtime": 49.2241, |
|
"eval_samples_per_second": 812.611, |
|
"eval_steps_per_second": 50.788, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 42.44, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.387, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 42.44, |
|
"eval_loss": 2.262915849685669, |
|
"eval_runtime": 49.9903, |
|
"eval_samples_per_second": 800.156, |
|
"eval_steps_per_second": 50.01, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 42.61, |
|
"eval_loss": 2.2282731533050537, |
|
"eval_runtime": 50.2394, |
|
"eval_samples_per_second": 796.188, |
|
"eval_steps_per_second": 49.762, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 42.78, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.3804, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 42.78, |
|
"eval_loss": 2.2422289848327637, |
|
"eval_runtime": 49.3037, |
|
"eval_samples_per_second": 811.298, |
|
"eval_steps_per_second": 50.706, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 42.95, |
|
"eval_loss": 2.243112802505493, |
|
"eval_runtime": 50.264, |
|
"eval_samples_per_second": 795.798, |
|
"eval_steps_per_second": 49.737, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.3842, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"eval_loss": 2.268855094909668, |
|
"eval_runtime": 49.5735, |
|
"eval_samples_per_second": 806.883, |
|
"eval_steps_per_second": 50.43, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 43.28, |
|
"eval_loss": 2.2586092948913574, |
|
"eval_runtime": 49.7496, |
|
"eval_samples_per_second": 804.027, |
|
"eval_steps_per_second": 50.252, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 43.45, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.3856, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 43.45, |
|
"eval_loss": 2.259028434753418, |
|
"eval_runtime": 49.8461, |
|
"eval_samples_per_second": 802.47, |
|
"eval_steps_per_second": 50.154, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 43.62, |
|
"eval_loss": 2.2602360248565674, |
|
"eval_runtime": 50.6446, |
|
"eval_samples_per_second": 789.817, |
|
"eval_steps_per_second": 49.364, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 43.79, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.3843, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 43.79, |
|
"eval_loss": 2.255709648132324, |
|
"eval_runtime": 49.1117, |
|
"eval_samples_per_second": 814.469, |
|
"eval_steps_per_second": 50.904, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 43.96, |
|
"eval_loss": 2.2776308059692383, |
|
"eval_runtime": 49.152, |
|
"eval_samples_per_second": 813.801, |
|
"eval_steps_per_second": 50.863, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 44.13, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.3891, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 44.13, |
|
"eval_loss": 2.255366325378418, |
|
"eval_runtime": 49.1689, |
|
"eval_samples_per_second": 813.522, |
|
"eval_steps_per_second": 50.845, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 44.29, |
|
"eval_loss": 2.2615368366241455, |
|
"eval_runtime": 49.9655, |
|
"eval_samples_per_second": 800.553, |
|
"eval_steps_per_second": 50.035, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 44.46, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.3811, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 44.46, |
|
"eval_loss": 2.259124517440796, |
|
"eval_runtime": 49.6149, |
|
"eval_samples_per_second": 806.21, |
|
"eval_steps_per_second": 50.388, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 44.63, |
|
"eval_loss": 2.259974718093872, |
|
"eval_runtime": 48.8534, |
|
"eval_samples_per_second": 818.777, |
|
"eval_steps_per_second": 51.174, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.3874, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"eval_loss": 2.259488582611084, |
|
"eval_runtime": 49.484, |
|
"eval_samples_per_second": 808.342, |
|
"eval_steps_per_second": 50.521, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 44.97, |
|
"eval_loss": 2.2761764526367188, |
|
"eval_runtime": 49.5444, |
|
"eval_samples_per_second": 807.357, |
|
"eval_steps_per_second": 50.46, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 45.14, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.3822, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 45.14, |
|
"eval_loss": 2.2516047954559326, |
|
"eval_runtime": 49.0692, |
|
"eval_samples_per_second": 815.176, |
|
"eval_steps_per_second": 50.948, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 45.3, |
|
"eval_loss": 2.2529869079589844, |
|
"eval_runtime": 49.0866, |
|
"eval_samples_per_second": 814.886, |
|
"eval_steps_per_second": 50.93, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 45.47, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.3933, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 45.47, |
|
"eval_loss": 2.265183210372925, |
|
"eval_runtime": 48.9694, |
|
"eval_samples_per_second": 816.837, |
|
"eval_steps_per_second": 51.052, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 45.64, |
|
"eval_loss": 2.2480199337005615, |
|
"eval_runtime": 48.9374, |
|
"eval_samples_per_second": 817.371, |
|
"eval_steps_per_second": 51.086, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 45.81, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.3853, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 45.81, |
|
"eval_loss": 2.2716729640960693, |
|
"eval_runtime": 49.0277, |
|
"eval_samples_per_second": 815.865, |
|
"eval_steps_per_second": 50.992, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 45.98, |
|
"eval_loss": 2.2568676471710205, |
|
"eval_runtime": 49.4939, |
|
"eval_samples_per_second": 808.181, |
|
"eval_steps_per_second": 50.511, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.3917, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"eval_loss": 2.2564427852630615, |
|
"eval_runtime": 49.3075, |
|
"eval_samples_per_second": 811.235, |
|
"eval_steps_per_second": 50.702, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 46.31, |
|
"eval_loss": 2.2512264251708984, |
|
"eval_runtime": 49.6159, |
|
"eval_samples_per_second": 806.193, |
|
"eval_steps_per_second": 50.387, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 46.48, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.3859, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 46.48, |
|
"eval_loss": 2.2611992359161377, |
|
"eval_runtime": 49.8507, |
|
"eval_samples_per_second": 802.396, |
|
"eval_steps_per_second": 50.15, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 46.65, |
|
"eval_loss": 2.2609057426452637, |
|
"eval_runtime": 49.439, |
|
"eval_samples_per_second": 809.077, |
|
"eval_steps_per_second": 50.567, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 46.82, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.3879, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 46.82, |
|
"eval_loss": 2.255183219909668, |
|
"eval_runtime": 50.0243, |
|
"eval_samples_per_second": 799.611, |
|
"eval_steps_per_second": 49.976, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"eval_loss": 2.2568256855010986, |
|
"eval_runtime": 48.9637, |
|
"eval_samples_per_second": 816.932, |
|
"eval_steps_per_second": 51.058, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 47.16, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.3823, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 47.16, |
|
"eval_loss": 2.250671148300171, |
|
"eval_runtime": 49.1163, |
|
"eval_samples_per_second": 814.393, |
|
"eval_steps_per_second": 50.9, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 47.33, |
|
"eval_loss": 2.2761969566345215, |
|
"eval_runtime": 49.444, |
|
"eval_samples_per_second": 808.995, |
|
"eval_steps_per_second": 50.562, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 47.49, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.388, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 47.49, |
|
"eval_loss": 2.252157211303711, |
|
"eval_runtime": 48.9857, |
|
"eval_samples_per_second": 816.565, |
|
"eval_steps_per_second": 51.035, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 47.66, |
|
"eval_loss": 2.2531578540802, |
|
"eval_runtime": 48.9438, |
|
"eval_samples_per_second": 817.263, |
|
"eval_steps_per_second": 51.079, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.3773, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"eval_loss": 2.2489843368530273, |
|
"eval_runtime": 49.0407, |
|
"eval_samples_per_second": 815.65, |
|
"eval_steps_per_second": 50.978, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 2.2648425102233887, |
|
"eval_runtime": 49.5178, |
|
"eval_samples_per_second": 807.791, |
|
"eval_steps_per_second": 50.487, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 48.17, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.3828, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 48.17, |
|
"eval_loss": 2.25002384185791, |
|
"eval_runtime": 49.6812, |
|
"eval_samples_per_second": 805.134, |
|
"eval_steps_per_second": 50.321, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 48.34, |
|
"eval_loss": 2.253399610519409, |
|
"eval_runtime": 49.4626, |
|
"eval_samples_per_second": 808.692, |
|
"eval_steps_per_second": 50.543, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 48.5, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.3816, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 48.5, |
|
"eval_loss": 2.251549482345581, |
|
"eval_runtime": 49.4033, |
|
"eval_samples_per_second": 809.663, |
|
"eval_steps_per_second": 50.604, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 48.67, |
|
"eval_loss": 2.2701914310455322, |
|
"eval_runtime": 49.577, |
|
"eval_samples_per_second": 806.826, |
|
"eval_steps_per_second": 50.427, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.3784, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"eval_loss": 2.2583844661712646, |
|
"eval_runtime": 49.2469, |
|
"eval_samples_per_second": 812.233, |
|
"eval_steps_per_second": 50.765, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"eval_loss": 2.23818039894104, |
|
"eval_runtime": 48.9834, |
|
"eval_samples_per_second": 816.603, |
|
"eval_steps_per_second": 51.038, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 49.18, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.3863, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 49.18, |
|
"eval_loss": 2.260406732559204, |
|
"eval_runtime": 49.569, |
|
"eval_samples_per_second": 806.956, |
|
"eval_steps_per_second": 50.435, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 49.35, |
|
"eval_loss": 2.2607226371765137, |
|
"eval_runtime": 49.5423, |
|
"eval_samples_per_second": 807.39, |
|
"eval_steps_per_second": 50.462, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 49.51, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.3863, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 49.51, |
|
"eval_loss": 2.26461124420166, |
|
"eval_runtime": 48.9942, |
|
"eval_samples_per_second": 816.423, |
|
"eval_steps_per_second": 51.026, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"eval_loss": 2.2533907890319824, |
|
"eval_runtime": 49.998, |
|
"eval_samples_per_second": 800.033, |
|
"eval_steps_per_second": 50.002, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 49.85, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.3873, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 49.85, |
|
"eval_loss": 2.2741761207580566, |
|
"eval_runtime": 49.1748, |
|
"eval_samples_per_second": 813.425, |
|
"eval_steps_per_second": 50.839, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 50.02, |
|
"eval_loss": 2.2686805725097656, |
|
"eval_runtime": 49.0988, |
|
"eval_samples_per_second": 814.684, |
|
"eval_steps_per_second": 50.918, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 50.19, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.39, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 50.19, |
|
"eval_loss": 2.2580976486206055, |
|
"eval_runtime": 49.688, |
|
"eval_samples_per_second": 805.024, |
|
"eval_steps_per_second": 50.314, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 50.36, |
|
"eval_loss": 2.2459537982940674, |
|
"eval_runtime": 49.7136, |
|
"eval_samples_per_second": 804.608, |
|
"eval_steps_per_second": 50.288, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 50.53, |
|
"learning_rate": 0.0, |
|
"loss": 2.3937, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 50.53, |
|
"eval_loss": 2.264155387878418, |
|
"eval_runtime": 49.6839, |
|
"eval_samples_per_second": 805.089, |
|
"eval_steps_per_second": 50.318, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 50.53, |
|
"step": 2400000, |
|
"total_flos": 8.417954735470524e+17, |
|
"train_loss": 2.392315192057292, |
|
"train_runtime": 173434.012, |
|
"train_samples_per_second": 221.41, |
|
"train_steps_per_second": 13.838 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 51, |
|
"save_steps": 32000, |
|
"total_flos": 8.417954735470524e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|