{ "best_metric": 2.671285629272461, "best_model_checkpoint": "./model_tweets_2020_Q2/checkpoint-32000", "epoch": 19.569471624266146, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "eval_loss": 2.635031223297119, "eval_runtime": 126.0563, "eval_samples_per_second": 819.277, "eval_steps_per_second": 51.207, "step": 8000 }, { "epoch": 0.13, "learning_rate": 9.939131159843243e-06, "loss": 2.7848, "step": 16000 }, { "epoch": 0.13, "eval_loss": 2.6555588245391846, "eval_runtime": 126.7417, "eval_samples_per_second": 814.846, "eval_steps_per_second": 50.93, "step": 16000 }, { "epoch": 0.2, "eval_loss": 2.6695027351379395, "eval_runtime": 125.9524, "eval_samples_per_second": 819.953, "eval_steps_per_second": 51.25, "step": 24000 }, { "epoch": 0.26, "learning_rate": 9.872425581589261e-06, "loss": 2.7545, "step": 32000 }, { "epoch": 0.26, "eval_loss": 2.671285629272461, "eval_runtime": 126.9886, "eval_samples_per_second": 813.262, "eval_steps_per_second": 50.831, "step": 32000 }, { "epoch": 0.33, "eval_loss": 2.708911895751953, "eval_runtime": 126.0433, "eval_samples_per_second": 819.361, "eval_steps_per_second": 51.213, "step": 40000 }, { "epoch": 0.39, "learning_rate": 9.80572000333528e-06, "loss": 2.7717, "step": 48000 }, { "epoch": 0.39, "eval_loss": 2.7143805027008057, "eval_runtime": 126.2378, "eval_samples_per_second": 818.099, "eval_steps_per_second": 51.134, "step": 48000 }, { "epoch": 0.46, "eval_loss": 2.7240307331085205, "eval_runtime": 125.5002, "eval_samples_per_second": 822.907, "eval_steps_per_second": 51.434, "step": 56000 }, { "epoch": 0.52, "learning_rate": 9.739014425081299e-06, "loss": 2.8043, "step": 64000 }, { "epoch": 0.52, "eval_loss": 2.749925374984741, "eval_runtime": 126.3275, "eval_samples_per_second": 817.518, "eval_steps_per_second": 51.097, "step": 64000 }, { "epoch": 0.59, "eval_loss": 2.770448684692383, "eval_runtime": 115.1543, "eval_samples_per_second": 896.84, "eval_steps_per_second": 56.055, "step": 72000 }, { "epoch": 0.65, "learning_rate": 9.672308846827316e-06, "loss": 2.8401, "step": 80000 }, { "epoch": 0.65, "eval_loss": 2.782008409500122, "eval_runtime": 116.1441, "eval_samples_per_second": 889.197, "eval_steps_per_second": 55.578, "step": 80000 }, { "epoch": 0.72, "eval_loss": 2.8068478107452393, "eval_runtime": 116.2984, "eval_samples_per_second": 888.017, "eval_steps_per_second": 55.504, "step": 88000 }, { "epoch": 0.78, "learning_rate": 9.605603268573334e-06, "loss": 2.8723, "step": 96000 }, { "epoch": 0.78, "eval_loss": 2.8150370121002197, "eval_runtime": 116.0456, "eval_samples_per_second": 889.952, "eval_steps_per_second": 55.625, "step": 96000 }, { "epoch": 0.85, "eval_loss": 2.8410351276397705, "eval_runtime": 114.7666, "eval_samples_per_second": 899.87, "eval_steps_per_second": 56.245, "step": 104000 }, { "epoch": 0.91, "learning_rate": 9.538897690319354e-06, "loss": 2.9004, "step": 112000 }, { "epoch": 0.91, "eval_loss": 2.865703582763672, "eval_runtime": 115.4628, "eval_samples_per_second": 894.444, "eval_steps_per_second": 55.905, "step": 112000 }, { "epoch": 0.98, "eval_loss": 2.882617950439453, "eval_runtime": 116.5627, "eval_samples_per_second": 886.004, "eval_steps_per_second": 55.378, "step": 120000 }, { "epoch": 1.04, "learning_rate": 9.472192112065373e-06, "loss": 2.9396, "step": 128000 }, { "epoch": 1.04, "eval_loss": 2.9071033000946045, "eval_runtime": 116.4638, "eval_samples_per_second": 886.756, "eval_steps_per_second": 55.425, "step": 128000 }, { "epoch": 1.11, "eval_loss": 2.949030876159668, "eval_runtime": 115.1354, "eval_samples_per_second": 896.987, "eval_steps_per_second": 56.064, "step": 136000 }, { "epoch": 1.17, "learning_rate": 9.405486533811392e-06, "loss": 2.9801, "step": 144000 }, { "epoch": 1.17, "eval_loss": 2.951450824737549, "eval_runtime": 114.8755, "eval_samples_per_second": 899.017, "eval_steps_per_second": 56.191, "step": 144000 }, { "epoch": 1.24, "eval_loss": 2.9862585067749023, "eval_runtime": 116.1529, "eval_samples_per_second": 889.129, "eval_steps_per_second": 55.573, "step": 152000 }, { "epoch": 1.3, "learning_rate": 9.338780955557409e-06, "loss": 3.0173, "step": 160000 }, { "epoch": 1.3, "eval_loss": 2.991586685180664, "eval_runtime": 116.0798, "eval_samples_per_second": 889.69, "eval_steps_per_second": 55.608, "step": 160000 }, { "epoch": 1.37, "eval_loss": 3.0230655670166016, "eval_runtime": 115.2701, "eval_samples_per_second": 895.939, "eval_steps_per_second": 55.999, "step": 168000 }, { "epoch": 1.44, "learning_rate": 9.272075377303427e-06, "loss": 3.0674, "step": 176000 }, { "epoch": 1.44, "eval_loss": 3.0447049140930176, "eval_runtime": 115.1489, "eval_samples_per_second": 896.882, "eval_steps_per_second": 56.058, "step": 176000 }, { "epoch": 1.5, "eval_loss": 3.0638155937194824, "eval_runtime": 116.1134, "eval_samples_per_second": 889.432, "eval_steps_per_second": 55.592, "step": 184000 }, { "epoch": 1.57, "learning_rate": 9.205369799049446e-06, "loss": 3.1059, "step": 192000 }, { "epoch": 1.57, "eval_loss": 3.094524383544922, "eval_runtime": 114.9725, "eval_samples_per_second": 898.258, "eval_steps_per_second": 56.144, "step": 192000 }, { "epoch": 1.63, "eval_loss": 3.1008002758026123, "eval_runtime": 116.6453, "eval_samples_per_second": 885.377, "eval_steps_per_second": 55.339, "step": 200000 }, { "epoch": 1.7, "learning_rate": 9.138664220795464e-06, "loss": 3.1283, "step": 208000 }, { "epoch": 1.7, "eval_loss": 3.1256680488586426, "eval_runtime": 115.0624, "eval_samples_per_second": 897.556, "eval_steps_per_second": 56.1, "step": 208000 }, { "epoch": 1.76, "eval_loss": 3.1262004375457764, "eval_runtime": 114.9392, "eval_samples_per_second": 898.518, "eval_steps_per_second": 56.16, "step": 216000 }, { "epoch": 1.83, "learning_rate": 9.071958642541483e-06, "loss": 3.1684, "step": 224000 }, { "epoch": 1.83, "eval_loss": 3.152285099029541, "eval_runtime": 115.5854, "eval_samples_per_second": 893.495, "eval_steps_per_second": 55.846, "step": 224000 }, { "epoch": 1.89, "eval_loss": 3.1842401027679443, "eval_runtime": 114.9021, "eval_samples_per_second": 898.809, "eval_steps_per_second": 56.178, "step": 232000 }, { "epoch": 1.96, "learning_rate": 9.005253064287502e-06, "loss": 3.1966, "step": 240000 }, { "epoch": 1.96, "eval_loss": 3.1820068359375, "eval_runtime": 117.5401, "eval_samples_per_second": 878.637, "eval_steps_per_second": 54.917, "step": 240000 }, { "epoch": 2.02, "eval_loss": 3.197575569152832, "eval_runtime": 119.2185, "eval_samples_per_second": 866.266, "eval_steps_per_second": 54.144, "step": 248000 }, { "epoch": 2.09, "learning_rate": 8.93854748603352e-06, "loss": 3.2055, "step": 256000 }, { "epoch": 2.09, "eval_loss": 3.2012782096862793, "eval_runtime": 116.0617, "eval_samples_per_second": 889.829, "eval_steps_per_second": 55.617, "step": 256000 }, { "epoch": 2.15, "eval_loss": 3.219731092453003, "eval_runtime": 115.3459, "eval_samples_per_second": 895.351, "eval_steps_per_second": 55.962, "step": 264000 }, { "epoch": 2.22, "learning_rate": 8.871841907779539e-06, "loss": 3.2186, "step": 272000 }, { "epoch": 2.22, "eval_loss": 3.2258596420288086, "eval_runtime": 117.0782, "eval_samples_per_second": 882.102, "eval_steps_per_second": 55.134, "step": 272000 }, { "epoch": 2.28, "eval_loss": 3.2410128116607666, "eval_runtime": 115.7081, "eval_samples_per_second": 892.547, "eval_steps_per_second": 55.787, "step": 280000 }, { "epoch": 2.35, "learning_rate": 8.805136329525557e-06, "loss": 3.2518, "step": 288000 }, { "epoch": 2.35, "eval_loss": 3.2448806762695312, "eval_runtime": 116.2706, "eval_samples_per_second": 888.23, "eval_steps_per_second": 55.517, "step": 288000 }, { "epoch": 2.41, "eval_loss": 3.2685933113098145, "eval_runtime": 117.0296, "eval_samples_per_second": 882.469, "eval_steps_per_second": 55.157, "step": 296000 }, { "epoch": 2.48, "learning_rate": 8.738430751271576e-06, "loss": 3.2705, "step": 304000 }, { "epoch": 2.48, "eval_loss": 3.270232915878296, "eval_runtime": 115.7748, "eval_samples_per_second": 892.034, "eval_steps_per_second": 55.755, "step": 304000 }, { "epoch": 2.54, "eval_loss": 3.271563768386841, "eval_runtime": 114.7956, "eval_samples_per_second": 899.643, "eval_steps_per_second": 56.23, "step": 312000 }, { "epoch": 2.61, "learning_rate": 8.671725173017595e-06, "loss": 3.2677, "step": 320000 }, { "epoch": 2.61, "eval_loss": 3.2934534549713135, "eval_runtime": 116.4472, "eval_samples_per_second": 886.883, "eval_steps_per_second": 55.433, "step": 320000 }, { "epoch": 2.67, "eval_loss": 3.2941575050354004, "eval_runtime": 115.658, "eval_samples_per_second": 892.934, "eval_steps_per_second": 55.811, "step": 328000 }, { "epoch": 2.74, "learning_rate": 8.605019594763613e-06, "loss": 3.2955, "step": 336000 }, { "epoch": 2.74, "eval_loss": 3.304429054260254, "eval_runtime": 115.4488, "eval_samples_per_second": 894.552, "eval_steps_per_second": 55.912, "step": 336000 }, { "epoch": 2.8, "eval_loss": 3.3109662532806396, "eval_runtime": 114.8039, "eval_samples_per_second": 899.577, "eval_steps_per_second": 56.226, "step": 344000 }, { "epoch": 2.87, "learning_rate": 8.538314016509632e-06, "loss": 3.2966, "step": 352000 }, { "epoch": 2.87, "eval_loss": 3.3053431510925293, "eval_runtime": 115.0477, "eval_samples_per_second": 897.671, "eval_steps_per_second": 56.107, "step": 352000 }, { "epoch": 2.94, "eval_loss": 3.3276007175445557, "eval_runtime": 115.8876, "eval_samples_per_second": 891.165, "eval_steps_per_second": 55.701, "step": 360000 }, { "epoch": 3.0, "learning_rate": 8.471608438255649e-06, "loss": 3.311, "step": 368000 }, { "epoch": 3.0, "eval_loss": 3.3256120681762695, "eval_runtime": 117.3196, "eval_samples_per_second": 880.288, "eval_steps_per_second": 55.021, "step": 368000 }, { "epoch": 3.07, "eval_loss": 3.3292236328125, "eval_runtime": 117.5646, "eval_samples_per_second": 878.453, "eval_steps_per_second": 54.906, "step": 376000 }, { "epoch": 3.13, "learning_rate": 8.404902860001667e-06, "loss": 3.3217, "step": 384000 }, { "epoch": 3.13, "eval_loss": 3.333477258682251, "eval_runtime": 116.7284, "eval_samples_per_second": 884.746, "eval_steps_per_second": 55.299, "step": 384000 }, { "epoch": 3.2, "eval_loss": 3.316025972366333, "eval_runtime": 118.1544, "eval_samples_per_second": 874.068, "eval_steps_per_second": 54.632, "step": 392000 }, { "epoch": 3.26, "learning_rate": 8.338197281747686e-06, "loss": 3.3145, "step": 400000 }, { "epoch": 3.26, "eval_loss": 3.337838649749756, "eval_runtime": 116.066, "eval_samples_per_second": 889.796, "eval_steps_per_second": 55.615, "step": 400000 }, { "epoch": 3.33, "eval_loss": 3.3306798934936523, "eval_runtime": 117.4533, "eval_samples_per_second": 879.285, "eval_steps_per_second": 54.958, "step": 408000 }, { "epoch": 3.39, "learning_rate": 8.271491703493705e-06, "loss": 3.3246, "step": 416000 }, { "epoch": 3.39, "eval_loss": 3.342693567276001, "eval_runtime": 115.6289, "eval_samples_per_second": 893.159, "eval_steps_per_second": 55.825, "step": 416000 }, { "epoch": 3.46, "eval_loss": 3.3543155193328857, "eval_runtime": 115.7056, "eval_samples_per_second": 892.567, "eval_steps_per_second": 55.788, "step": 424000 }, { "epoch": 3.52, "learning_rate": 8.204786125239725e-06, "loss": 3.3131, "step": 432000 }, { "epoch": 3.52, "eval_loss": 3.340524196624756, "eval_runtime": 116.2105, "eval_samples_per_second": 888.689, "eval_steps_per_second": 55.546, "step": 432000 }, { "epoch": 3.59, "eval_loss": 3.336106777191162, "eval_runtime": 114.9141, "eval_samples_per_second": 898.714, "eval_steps_per_second": 56.172, "step": 440000 }, { "epoch": 3.65, "learning_rate": 8.138080546985743e-06, "loss": 3.3266, "step": 448000 }, { "epoch": 3.65, "eval_loss": 3.370443344116211, "eval_runtime": 115.193, "eval_samples_per_second": 896.539, "eval_steps_per_second": 56.036, "step": 448000 }, { "epoch": 3.72, "eval_loss": 3.354923963546753, "eval_runtime": 115.5245, "eval_samples_per_second": 893.967, "eval_steps_per_second": 55.876, "step": 456000 }, { "epoch": 3.78, "learning_rate": 8.07137496873176e-06, "loss": 3.3358, "step": 464000 }, { "epoch": 3.78, "eval_loss": 3.360276937484741, "eval_runtime": 116.1443, "eval_samples_per_second": 889.196, "eval_steps_per_second": 55.577, "step": 464000 }, { "epoch": 3.85, "eval_loss": 3.3641881942749023, "eval_runtime": 115.4508, "eval_samples_per_second": 894.537, "eval_steps_per_second": 55.911, "step": 472000 }, { "epoch": 3.91, "learning_rate": 8.004669390477779e-06, "loss": 3.3385, "step": 480000 }, { "epoch": 3.91, "eval_loss": 3.3572633266448975, "eval_runtime": 114.9449, "eval_samples_per_second": 898.474, "eval_steps_per_second": 56.157, "step": 480000 }, { "epoch": 3.98, "eval_loss": 3.3658275604248047, "eval_runtime": 115.0066, "eval_samples_per_second": 897.992, "eval_steps_per_second": 56.127, "step": 488000 }, { "epoch": 4.04, "learning_rate": 7.937963812223798e-06, "loss": 3.3375, "step": 496000 }, { "epoch": 4.04, "eval_loss": 3.345881700515747, "eval_runtime": 115.316, "eval_samples_per_second": 895.583, "eval_steps_per_second": 55.977, "step": 496000 }, { "epoch": 4.11, "eval_loss": 3.3702762126922607, "eval_runtime": 114.9631, "eval_samples_per_second": 898.331, "eval_steps_per_second": 56.148, "step": 504000 }, { "epoch": 4.17, "learning_rate": 7.871258233969816e-06, "loss": 3.3237, "step": 512000 }, { "epoch": 4.17, "eval_loss": 3.3564202785491943, "eval_runtime": 116.3254, "eval_samples_per_second": 887.811, "eval_steps_per_second": 55.491, "step": 512000 }, { "epoch": 4.24, "eval_loss": 3.3553359508514404, "eval_runtime": 115.6968, "eval_samples_per_second": 892.635, "eval_steps_per_second": 55.792, "step": 520000 }, { "epoch": 4.31, "learning_rate": 7.804552655715835e-06, "loss": 3.34, "step": 528000 }, { "epoch": 4.31, "eval_loss": 3.35756778717041, "eval_runtime": 114.9307, "eval_samples_per_second": 898.585, "eval_steps_per_second": 56.164, "step": 528000 }, { "epoch": 4.37, "eval_loss": 3.3548436164855957, "eval_runtime": 116.9698, "eval_samples_per_second": 882.92, "eval_steps_per_second": 55.185, "step": 536000 }, { "epoch": 4.44, "learning_rate": 7.737847077461853e-06, "loss": 3.3247, "step": 544000 }, { "epoch": 4.44, "eval_loss": 3.3525540828704834, "eval_runtime": 114.951, "eval_samples_per_second": 898.427, "eval_steps_per_second": 56.154, "step": 544000 }, { "epoch": 4.5, "eval_loss": 3.367372512817383, "eval_runtime": 116.891, "eval_samples_per_second": 883.515, "eval_steps_per_second": 55.222, "step": 552000 }, { "epoch": 4.57, "learning_rate": 7.671141499207872e-06, "loss": 3.318, "step": 560000 }, { "epoch": 4.57, "eval_loss": 3.3607981204986572, "eval_runtime": 115.5047, "eval_samples_per_second": 894.12, "eval_steps_per_second": 55.885, "step": 560000 }, { "epoch": 4.63, "eval_loss": 3.3527328968048096, "eval_runtime": 116.278, "eval_samples_per_second": 888.173, "eval_steps_per_second": 55.514, "step": 568000 }, { "epoch": 4.7, "learning_rate": 7.604435920953891e-06, "loss": 3.3318, "step": 576000 }, { "epoch": 4.7, "eval_loss": 3.3600049018859863, "eval_runtime": 115.0864, "eval_samples_per_second": 897.369, "eval_steps_per_second": 56.088, "step": 576000 }, { "epoch": 4.76, "eval_loss": 3.366177797317505, "eval_runtime": 116.1802, "eval_samples_per_second": 888.921, "eval_steps_per_second": 55.56, "step": 584000 }, { "epoch": 4.83, "learning_rate": 7.537730342699909e-06, "loss": 3.3211, "step": 592000 }, { "epoch": 4.83, "eval_loss": 3.36027193069458, "eval_runtime": 115.5036, "eval_samples_per_second": 894.128, "eval_steps_per_second": 55.886, "step": 592000 }, { "epoch": 4.89, "eval_loss": 3.364029884338379, "eval_runtime": 114.9019, "eval_samples_per_second": 898.81, "eval_steps_per_second": 56.178, "step": 600000 }, { "epoch": 4.96, "learning_rate": 7.471024764445928e-06, "loss": 3.3344, "step": 608000 }, { "epoch": 4.96, "eval_loss": 3.376020669937134, "eval_runtime": 115.5882, "eval_samples_per_second": 893.473, "eval_steps_per_second": 55.845, "step": 608000 }, { "epoch": 5.02, "eval_loss": 3.3876428604125977, "eval_runtime": 115.0301, "eval_samples_per_second": 897.809, "eval_steps_per_second": 56.116, "step": 616000 }, { "epoch": 5.09, "learning_rate": 7.4043191861919465e-06, "loss": 3.331, "step": 624000 }, { "epoch": 5.09, "eval_loss": 3.351862668991089, "eval_runtime": 115.49, "eval_samples_per_second": 894.233, "eval_steps_per_second": 55.892, "step": 624000 }, { "epoch": 5.15, "eval_loss": 3.373405933380127, "eval_runtime": 115.9525, "eval_samples_per_second": 890.666, "eval_steps_per_second": 55.669, "step": 632000 }, { "epoch": 5.22, "learning_rate": 7.337613607937964e-06, "loss": 3.3293, "step": 640000 }, { "epoch": 5.22, "eval_loss": 3.373460531234741, "eval_runtime": 115.1854, "eval_samples_per_second": 896.598, "eval_steps_per_second": 56.04, "step": 640000 }, { "epoch": 5.28, "eval_loss": 3.3703157901763916, "eval_runtime": 115.0036, "eval_samples_per_second": 898.016, "eval_steps_per_second": 56.129, "step": 648000 }, { "epoch": 5.35, "learning_rate": 7.270908029683983e-06, "loss": 3.3317, "step": 656000 }, { "epoch": 5.35, "eval_loss": 3.382647752761841, "eval_runtime": 115.8086, "eval_samples_per_second": 891.773, "eval_steps_per_second": 55.739, "step": 656000 }, { "epoch": 5.41, "eval_loss": 3.3825886249542236, "eval_runtime": 115.3628, "eval_samples_per_second": 895.219, "eval_steps_per_second": 55.954, "step": 664000 }, { "epoch": 5.48, "learning_rate": 7.2042024514300015e-06, "loss": 3.3291, "step": 672000 }, { "epoch": 5.48, "eval_loss": 3.391868829727173, "eval_runtime": 115.4028, "eval_samples_per_second": 894.909, "eval_steps_per_second": 55.935, "step": 672000 }, { "epoch": 5.54, "eval_loss": 3.378626585006714, "eval_runtime": 115.4498, "eval_samples_per_second": 894.545, "eval_steps_per_second": 55.912, "step": 680000 }, { "epoch": 5.61, "learning_rate": 7.13749687317602e-06, "loss": 3.3423, "step": 688000 }, { "epoch": 5.61, "eval_loss": 3.377542734146118, "eval_runtime": 115.2629, "eval_samples_per_second": 895.995, "eval_steps_per_second": 56.002, "step": 688000 }, { "epoch": 5.68, "eval_loss": 3.373429298400879, "eval_runtime": 115.5205, "eval_samples_per_second": 893.997, "eval_steps_per_second": 55.878, "step": 696000 }, { "epoch": 5.74, "learning_rate": 7.070791294922038e-06, "loss": 3.3364, "step": 704000 }, { "epoch": 5.74, "eval_loss": 3.372532367706299, "eval_runtime": 115.5543, "eval_samples_per_second": 893.735, "eval_steps_per_second": 55.861, "step": 704000 }, { "epoch": 5.81, "eval_loss": 3.3855302333831787, "eval_runtime": 115.9379, "eval_samples_per_second": 890.778, "eval_steps_per_second": 55.676, "step": 712000 }, { "epoch": 5.87, "learning_rate": 7.0040857166680564e-06, "loss": 3.347, "step": 720000 }, { "epoch": 5.87, "eval_loss": 3.3774046897888184, "eval_runtime": 114.6511, "eval_samples_per_second": 900.776, "eval_steps_per_second": 56.301, "step": 720000 }, { "epoch": 5.94, "eval_loss": 3.3717195987701416, "eval_runtime": 115.9173, "eval_samples_per_second": 890.937, "eval_steps_per_second": 55.686, "step": 728000 }, { "epoch": 6.0, "learning_rate": 6.937380138414076e-06, "loss": 3.3311, "step": 736000 }, { "epoch": 6.0, "eval_loss": 3.392944097518921, "eval_runtime": 115.7013, "eval_samples_per_second": 892.6, "eval_steps_per_second": 55.79, "step": 736000 }, { "epoch": 6.07, "eval_loss": 3.389941930770874, "eval_runtime": 117.4363, "eval_samples_per_second": 879.413, "eval_steps_per_second": 54.966, "step": 744000 }, { "epoch": 6.13, "learning_rate": 6.8706745601600945e-06, "loss": 3.3445, "step": 752000 }, { "epoch": 6.13, "eval_loss": 3.3985016345977783, "eval_runtime": 115.5779, "eval_samples_per_second": 893.553, "eval_steps_per_second": 55.85, "step": 752000 }, { "epoch": 6.2, "eval_loss": 3.3865506649017334, "eval_runtime": 114.8487, "eval_samples_per_second": 899.227, "eval_steps_per_second": 56.204, "step": 760000 }, { "epoch": 6.26, "learning_rate": 6.803968981906113e-06, "loss": 3.345, "step": 768000 }, { "epoch": 6.26, "eval_loss": 3.3942770957946777, "eval_runtime": 115.533, "eval_samples_per_second": 893.901, "eval_steps_per_second": 55.871, "step": 768000 }, { "epoch": 6.33, "eval_loss": 3.373379945755005, "eval_runtime": 115.2598, "eval_samples_per_second": 896.019, "eval_steps_per_second": 56.004, "step": 776000 }, { "epoch": 6.39, "learning_rate": 6.737263403652131e-06, "loss": 3.3427, "step": 784000 }, { "epoch": 6.39, "eval_loss": 3.383202314376831, "eval_runtime": 114.9199, "eval_samples_per_second": 898.669, "eval_steps_per_second": 56.17, "step": 784000 }, { "epoch": 6.46, "eval_loss": 3.3966336250305176, "eval_runtime": 115.6206, "eval_samples_per_second": 893.223, "eval_steps_per_second": 55.829, "step": 792000 }, { "epoch": 6.52, "learning_rate": 6.6705578253981495e-06, "loss": 3.3406, "step": 800000 }, { "epoch": 6.52, "eval_loss": 3.3891854286193848, "eval_runtime": 115.5059, "eval_samples_per_second": 894.11, "eval_steps_per_second": 55.885, "step": 800000 }, { "epoch": 6.59, "eval_loss": 3.390401601791382, "eval_runtime": 116.1612, "eval_samples_per_second": 889.066, "eval_steps_per_second": 55.569, "step": 808000 }, { "epoch": 6.65, "learning_rate": 6.603852247144168e-06, "loss": 3.3406, "step": 816000 }, { "epoch": 6.65, "eval_loss": 3.386686086654663, "eval_runtime": 115.3671, "eval_samples_per_second": 895.186, "eval_steps_per_second": 55.952, "step": 816000 }, { "epoch": 6.72, "eval_loss": 3.390192747116089, "eval_runtime": 114.8586, "eval_samples_per_second": 899.149, "eval_steps_per_second": 56.2, "step": 824000 }, { "epoch": 6.78, "learning_rate": 6.537146668890187e-06, "loss": 3.3354, "step": 832000 }, { "epoch": 6.78, "eval_loss": 3.371840000152588, "eval_runtime": 115.0229, "eval_samples_per_second": 897.865, "eval_steps_per_second": 56.119, "step": 832000 }, { "epoch": 6.85, "eval_loss": 3.383141279220581, "eval_runtime": 115.453, "eval_samples_per_second": 894.52, "eval_steps_per_second": 55.91, "step": 840000 }, { "epoch": 6.91, "learning_rate": 6.4704410906362044e-06, "loss": 3.3521, "step": 848000 }, { "epoch": 6.91, "eval_loss": 3.3909192085266113, "eval_runtime": 115.5241, "eval_samples_per_second": 893.97, "eval_steps_per_second": 55.876, "step": 848000 }, { "epoch": 6.98, "eval_loss": 3.3798959255218506, "eval_runtime": 115.2184, "eval_samples_per_second": 896.342, "eval_steps_per_second": 56.024, "step": 856000 }, { "epoch": 7.05, "learning_rate": 6.403735512382223e-06, "loss": 3.3538, "step": 864000 }, { "epoch": 7.05, "eval_loss": 3.3828136920928955, "eval_runtime": 115.3784, "eval_samples_per_second": 895.098, "eval_steps_per_second": 55.946, "step": 864000 }, { "epoch": 7.11, "eval_loss": 3.378514051437378, "eval_runtime": 115.0377, "eval_samples_per_second": 897.749, "eval_steps_per_second": 56.112, "step": 872000 }, { "epoch": 7.18, "learning_rate": 6.337029934128242e-06, "loss": 3.3363, "step": 880000 }, { "epoch": 7.18, "eval_loss": 3.3993334770202637, "eval_runtime": 115.5145, "eval_samples_per_second": 894.043, "eval_steps_per_second": 55.88, "step": 880000 }, { "epoch": 7.24, "eval_loss": 3.3849687576293945, "eval_runtime": 114.7628, "eval_samples_per_second": 899.9, "eval_steps_per_second": 56.246, "step": 888000 }, { "epoch": 7.31, "learning_rate": 6.270324355874261e-06, "loss": 3.3341, "step": 896000 }, { "epoch": 7.31, "eval_loss": 3.3932485580444336, "eval_runtime": 115.0217, "eval_samples_per_second": 897.874, "eval_steps_per_second": 56.12, "step": 896000 }, { "epoch": 7.37, "eval_loss": 3.398083209991455, "eval_runtime": 115.1782, "eval_samples_per_second": 896.654, "eval_steps_per_second": 56.044, "step": 904000 }, { "epoch": 7.44, "learning_rate": 6.20361877762028e-06, "loss": 3.3458, "step": 912000 }, { "epoch": 7.44, "eval_loss": 3.393594741821289, "eval_runtime": 116.8302, "eval_samples_per_second": 883.975, "eval_steps_per_second": 55.251, "step": 912000 }, { "epoch": 7.5, "eval_loss": 3.4032301902770996, "eval_runtime": 115.4692, "eval_samples_per_second": 894.394, "eval_steps_per_second": 55.902, "step": 920000 }, { "epoch": 7.57, "learning_rate": 6.1369131993662975e-06, "loss": 3.3327, "step": 928000 }, { "epoch": 7.57, "eval_loss": 3.385192394256592, "eval_runtime": 115.7558, "eval_samples_per_second": 892.18, "eval_steps_per_second": 55.764, "step": 928000 }, { "epoch": 7.63, "eval_loss": 3.38653826713562, "eval_runtime": 116.1964, "eval_samples_per_second": 888.797, "eval_steps_per_second": 55.553, "step": 936000 }, { "epoch": 7.7, "learning_rate": 6.070207621112316e-06, "loss": 3.3507, "step": 944000 }, { "epoch": 7.7, "eval_loss": 3.390004873275757, "eval_runtime": 115.6497, "eval_samples_per_second": 892.999, "eval_steps_per_second": 55.815, "step": 944000 }, { "epoch": 7.76, "eval_loss": 3.3772072792053223, "eval_runtime": 115.4517, "eval_samples_per_second": 894.53, "eval_steps_per_second": 55.911, "step": 952000 }, { "epoch": 7.83, "learning_rate": 6.003502042858335e-06, "loss": 3.3493, "step": 960000 }, { "epoch": 7.83, "eval_loss": 3.388688802719116, "eval_runtime": 115.7986, "eval_samples_per_second": 891.85, "eval_steps_per_second": 55.743, "step": 960000 }, { "epoch": 7.89, "eval_loss": 3.395124912261963, "eval_runtime": 115.4739, "eval_samples_per_second": 894.358, "eval_steps_per_second": 55.9, "step": 968000 }, { "epoch": 7.96, "learning_rate": 5.936796464604353e-06, "loss": 3.3412, "step": 976000 }, { "epoch": 7.96, "eval_loss": 3.3833136558532715, "eval_runtime": 114.7504, "eval_samples_per_second": 899.997, "eval_steps_per_second": 56.253, "step": 976000 }, { "epoch": 8.02, "eval_loss": 3.381627321243286, "eval_runtime": 115.0253, "eval_samples_per_second": 897.846, "eval_steps_per_second": 56.118, "step": 984000 }, { "epoch": 8.09, "learning_rate": 5.870090886350371e-06, "loss": 3.3232, "step": 992000 }, { "epoch": 8.09, "eval_loss": 3.37522292137146, "eval_runtime": 114.2933, "eval_samples_per_second": 903.597, "eval_steps_per_second": 56.478, "step": 992000 }, { "epoch": 8.15, "eval_loss": 3.384525775909424, "eval_runtime": 115.119, "eval_samples_per_second": 897.115, "eval_steps_per_second": 56.072, "step": 1000000 }, { "epoch": 8.22, "learning_rate": 5.80338530809639e-06, "loss": 3.333, "step": 1008000 }, { "epoch": 8.22, "eval_loss": 3.3906686305999756, "eval_runtime": 115.1127, "eval_samples_per_second": 897.164, "eval_steps_per_second": 56.075, "step": 1008000 }, { "epoch": 8.28, "eval_loss": 3.3822684288024902, "eval_runtime": 114.8049, "eval_samples_per_second": 899.569, "eval_steps_per_second": 56.226, "step": 1016000 }, { "epoch": 8.35, "learning_rate": 5.736679729842408e-06, "loss": 3.3449, "step": 1024000 }, { "epoch": 8.35, "eval_loss": 3.3724589347839355, "eval_runtime": 114.8265, "eval_samples_per_second": 899.4, "eval_steps_per_second": 56.215, "step": 1024000 }, { "epoch": 8.41, "eval_loss": 3.37973952293396, "eval_runtime": 115.0872, "eval_samples_per_second": 897.363, "eval_steps_per_second": 56.088, "step": 1032000 }, { "epoch": 8.48, "learning_rate": 5.669974151588427e-06, "loss": 3.3336, "step": 1040000 }, { "epoch": 8.48, "eval_loss": 3.38781476020813, "eval_runtime": 116.3835, "eval_samples_per_second": 887.368, "eval_steps_per_second": 55.463, "step": 1040000 }, { "epoch": 8.55, "eval_loss": 3.384516716003418, "eval_runtime": 115.2938, "eval_samples_per_second": 895.755, "eval_steps_per_second": 55.987, "step": 1048000 }, { "epoch": 8.61, "learning_rate": 5.603268573334446e-06, "loss": 3.3307, "step": 1056000 }, { "epoch": 8.61, "eval_loss": 3.390652894973755, "eval_runtime": 116.7145, "eval_samples_per_second": 884.851, "eval_steps_per_second": 55.306, "step": 1056000 }, { "epoch": 8.68, "eval_loss": 3.3857922554016113, "eval_runtime": 115.6915, "eval_samples_per_second": 892.676, "eval_steps_per_second": 55.795, "step": 1064000 }, { "epoch": 8.74, "learning_rate": 5.536562995080464e-06, "loss": 3.3267, "step": 1072000 }, { "epoch": 8.74, "eval_loss": 3.3951947689056396, "eval_runtime": 115.1111, "eval_samples_per_second": 897.177, "eval_steps_per_second": 56.076, "step": 1072000 }, { "epoch": 8.81, "eval_loss": 3.391402006149292, "eval_runtime": 114.8898, "eval_samples_per_second": 898.905, "eval_steps_per_second": 56.184, "step": 1080000 }, { "epoch": 8.87, "learning_rate": 5.469857416826483e-06, "loss": 3.335, "step": 1088000 }, { "epoch": 8.87, "eval_loss": 3.3904380798339844, "eval_runtime": 116.7468, "eval_samples_per_second": 884.607, "eval_steps_per_second": 55.291, "step": 1088000 }, { "epoch": 8.94, "eval_loss": 3.3894879817962646, "eval_runtime": 115.0778, "eval_samples_per_second": 897.437, "eval_steps_per_second": 56.092, "step": 1096000 }, { "epoch": 9.0, "learning_rate": 5.403151838572501e-06, "loss": 3.3411, "step": 1104000 }, { "epoch": 9.0, "eval_loss": 3.395911455154419, "eval_runtime": 116.3802, "eval_samples_per_second": 887.393, "eval_steps_per_second": 55.465, "step": 1104000 }, { "epoch": 9.07, "eval_loss": 3.391462802886963, "eval_runtime": 115.5689, "eval_samples_per_second": 893.623, "eval_steps_per_second": 55.854, "step": 1112000 }, { "epoch": 9.13, "learning_rate": 5.33644626031852e-06, "loss": 3.3324, "step": 1120000 }, { "epoch": 9.13, "eval_loss": 3.4030401706695557, "eval_runtime": 115.7261, "eval_samples_per_second": 892.409, "eval_steps_per_second": 55.778, "step": 1120000 }, { "epoch": 9.2, "eval_loss": 3.4083750247955322, "eval_runtime": 118.5809, "eval_samples_per_second": 870.924, "eval_steps_per_second": 54.435, "step": 1128000 }, { "epoch": 9.26, "learning_rate": 5.269740682064538e-06, "loss": 3.3297, "step": 1136000 }, { "epoch": 9.26, "eval_loss": 3.402348518371582, "eval_runtime": 115.6049, "eval_samples_per_second": 893.344, "eval_steps_per_second": 55.837, "step": 1136000 }, { "epoch": 9.33, "eval_loss": 3.3967323303222656, "eval_runtime": 115.5344, "eval_samples_per_second": 893.889, "eval_steps_per_second": 55.871, "step": 1144000 }, { "epoch": 9.39, "learning_rate": 5.203035103810556e-06, "loss": 3.3492, "step": 1152000 }, { "epoch": 9.39, "eval_loss": 3.393101215362549, "eval_runtime": 115.5769, "eval_samples_per_second": 893.561, "eval_steps_per_second": 55.85, "step": 1152000 }, { "epoch": 9.46, "eval_loss": 3.4064693450927734, "eval_runtime": 114.7523, "eval_samples_per_second": 899.982, "eval_steps_per_second": 56.252, "step": 1160000 }, { "epoch": 9.52, "learning_rate": 5.136329525556575e-06, "loss": 3.3317, "step": 1168000 }, { "epoch": 9.52, "eval_loss": 3.3905270099639893, "eval_runtime": 115.5534, "eval_samples_per_second": 893.743, "eval_steps_per_second": 55.862, "step": 1168000 }, { "epoch": 9.59, "eval_loss": 3.402090072631836, "eval_runtime": 114.6435, "eval_samples_per_second": 900.836, "eval_steps_per_second": 56.305, "step": 1176000 }, { "epoch": 9.65, "learning_rate": 5.0696239473025935e-06, "loss": 3.3447, "step": 1184000 }, { "epoch": 9.65, "eval_loss": 3.400120735168457, "eval_runtime": 116.0858, "eval_samples_per_second": 889.643, "eval_steps_per_second": 55.605, "step": 1184000 }, { "epoch": 9.72, "eval_loss": 3.3942949771881104, "eval_runtime": 114.8922, "eval_samples_per_second": 898.886, "eval_steps_per_second": 56.183, "step": 1192000 }, { "epoch": 9.78, "learning_rate": 5.002918369048611e-06, "loss": 3.3377, "step": 1200000 }, { "epoch": 9.78, "eval_loss": 3.3970954418182373, "eval_runtime": 114.8942, "eval_samples_per_second": 898.871, "eval_steps_per_second": 56.182, "step": 1200000 }, { "epoch": 9.85, "eval_loss": 3.3946433067321777, "eval_runtime": 114.9828, "eval_samples_per_second": 898.178, "eval_steps_per_second": 56.139, "step": 1208000 }, { "epoch": 9.92, "learning_rate": 4.936212790794631e-06, "loss": 3.3486, "step": 1216000 }, { "epoch": 9.92, "eval_loss": 3.392373561859131, "eval_runtime": 115.6846, "eval_samples_per_second": 892.729, "eval_steps_per_second": 55.798, "step": 1216000 }, { "epoch": 9.98, "eval_loss": 3.398346424102783, "eval_runtime": 115.4236, "eval_samples_per_second": 894.747, "eval_steps_per_second": 55.924, "step": 1224000 }, { "epoch": 10.05, "learning_rate": 4.869507212540649e-06, "loss": 3.3471, "step": 1232000 }, { "epoch": 10.05, "eval_loss": 3.414100408554077, "eval_runtime": 115.0455, "eval_samples_per_second": 897.689, "eval_steps_per_second": 56.108, "step": 1232000 }, { "epoch": 10.11, "eval_loss": 3.4220006465911865, "eval_runtime": 115.4764, "eval_samples_per_second": 894.339, "eval_steps_per_second": 55.899, "step": 1240000 }, { "epoch": 10.18, "learning_rate": 4.802801634286667e-06, "loss": 3.3457, "step": 1248000 }, { "epoch": 10.18, "eval_loss": 3.4085357189178467, "eval_runtime": 115.0154, "eval_samples_per_second": 897.923, "eval_steps_per_second": 56.123, "step": 1248000 }, { "epoch": 10.24, "eval_loss": 3.424273729324341, "eval_runtime": 114.96, "eval_samples_per_second": 898.356, "eval_steps_per_second": 56.15, "step": 1256000 }, { "epoch": 10.31, "learning_rate": 4.7360960560326865e-06, "loss": 3.3278, "step": 1264000 }, { "epoch": 10.31, "eval_loss": 3.4058358669281006, "eval_runtime": 115.4303, "eval_samples_per_second": 894.696, "eval_steps_per_second": 55.921, "step": 1264000 }, { "epoch": 10.37, "eval_loss": 3.403254985809326, "eval_runtime": 114.783, "eval_samples_per_second": 899.741, "eval_steps_per_second": 56.237, "step": 1272000 }, { "epoch": 10.44, "learning_rate": 4.669390477778704e-06, "loss": 3.325, "step": 1280000 }, { "epoch": 10.44, "eval_loss": 3.3866589069366455, "eval_runtime": 115.6771, "eval_samples_per_second": 892.787, "eval_steps_per_second": 55.802, "step": 1280000 }, { "epoch": 10.5, "eval_loss": 3.3878674507141113, "eval_runtime": 114.7924, "eval_samples_per_second": 899.667, "eval_steps_per_second": 56.232, "step": 1288000 }, { "epoch": 10.57, "learning_rate": 4.602684899524723e-06, "loss": 3.3248, "step": 1296000 }, { "epoch": 10.57, "eval_loss": 3.380067825317383, "eval_runtime": 115.2061, "eval_samples_per_second": 896.437, "eval_steps_per_second": 56.03, "step": 1296000 }, { "epoch": 10.63, "eval_loss": 3.4026682376861572, "eval_runtime": 117.5473, "eval_samples_per_second": 878.583, "eval_steps_per_second": 54.914, "step": 1304000 }, { "epoch": 10.7, "learning_rate": 4.5359793212707415e-06, "loss": 3.3217, "step": 1312000 }, { "epoch": 10.7, "eval_loss": 3.3781392574310303, "eval_runtime": 116.9837, "eval_samples_per_second": 882.816, "eval_steps_per_second": 55.179, "step": 1312000 }, { "epoch": 10.76, "eval_loss": 3.38712477684021, "eval_runtime": 116.1554, "eval_samples_per_second": 889.111, "eval_steps_per_second": 55.572, "step": 1320000 }, { "epoch": 10.83, "learning_rate": 4.46927374301676e-06, "loss": 3.3227, "step": 1328000 }, { "epoch": 10.83, "eval_loss": 3.386099338531494, "eval_runtime": 116.8959, "eval_samples_per_second": 883.478, "eval_steps_per_second": 55.22, "step": 1328000 }, { "epoch": 10.89, "eval_loss": 3.378852605819702, "eval_runtime": 116.5746, "eval_samples_per_second": 885.913, "eval_steps_per_second": 55.372, "step": 1336000 }, { "epoch": 10.96, "learning_rate": 4.402568164762779e-06, "loss": 3.3259, "step": 1344000 }, { "epoch": 10.96, "eval_loss": 3.386458158493042, "eval_runtime": 116.5428, "eval_samples_per_second": 886.155, "eval_steps_per_second": 55.387, "step": 1344000 }, { "epoch": 11.02, "eval_loss": 3.386268377304077, "eval_runtime": 115.7105, "eval_samples_per_second": 892.529, "eval_steps_per_second": 55.786, "step": 1352000 }, { "epoch": 11.09, "learning_rate": 4.335862586508797e-06, "loss": 3.3094, "step": 1360000 }, { "epoch": 11.09, "eval_loss": 3.3826916217803955, "eval_runtime": 118.0068, "eval_samples_per_second": 875.161, "eval_steps_per_second": 54.7, "step": 1360000 }, { "epoch": 11.15, "eval_loss": 3.3880295753479004, "eval_runtime": 115.413, "eval_samples_per_second": 894.83, "eval_steps_per_second": 55.93, "step": 1368000 }, { "epoch": 11.22, "learning_rate": 4.269157008254816e-06, "loss": 3.3128, "step": 1376000 }, { "epoch": 11.22, "eval_loss": 3.365227460861206, "eval_runtime": 116.1062, "eval_samples_per_second": 889.487, "eval_steps_per_second": 55.596, "step": 1376000 }, { "epoch": 11.29, "eval_loss": 3.381347179412842, "eval_runtime": 119.0899, "eval_samples_per_second": 867.202, "eval_steps_per_second": 54.203, "step": 1384000 }, { "epoch": 11.35, "learning_rate": 4.202451430000834e-06, "loss": 3.3088, "step": 1392000 }, { "epoch": 11.35, "eval_loss": 3.385295867919922, "eval_runtime": 115.9391, "eval_samples_per_second": 890.769, "eval_steps_per_second": 55.676, "step": 1392000 }, { "epoch": 11.42, "eval_loss": 3.3708653450012207, "eval_runtime": 116.9766, "eval_samples_per_second": 882.869, "eval_steps_per_second": 55.182, "step": 1400000 }, { "epoch": 11.48, "learning_rate": 4.135745851746852e-06, "loss": 3.3067, "step": 1408000 }, { "epoch": 11.48, "eval_loss": 3.3830504417419434, "eval_runtime": 115.9272, "eval_samples_per_second": 890.861, "eval_steps_per_second": 55.682, "step": 1408000 }, { "epoch": 11.55, "eval_loss": 3.370314598083496, "eval_runtime": 117.2105, "eval_samples_per_second": 881.107, "eval_steps_per_second": 55.072, "step": 1416000 }, { "epoch": 11.61, "learning_rate": 4.069040273492872e-06, "loss": 3.311, "step": 1424000 }, { "epoch": 11.61, "eval_loss": 3.369617223739624, "eval_runtime": 116.4339, "eval_samples_per_second": 886.984, "eval_steps_per_second": 55.439, "step": 1424000 }, { "epoch": 11.68, "eval_loss": 3.3768646717071533, "eval_runtime": 118.1326, "eval_samples_per_second": 874.23, "eval_steps_per_second": 54.642, "step": 1432000 }, { "epoch": 11.74, "learning_rate": 4.0023346952388895e-06, "loss": 3.3048, "step": 1440000 }, { "epoch": 11.74, "eval_loss": 3.373983860015869, "eval_runtime": 118.2179, "eval_samples_per_second": 873.598, "eval_steps_per_second": 54.603, "step": 1440000 }, { "epoch": 11.81, "eval_loss": 3.3731493949890137, "eval_runtime": 116.9055, "eval_samples_per_second": 883.406, "eval_steps_per_second": 55.216, "step": 1448000 }, { "epoch": 11.87, "learning_rate": 3.935629116984908e-06, "loss": 3.3055, "step": 1456000 }, { "epoch": 11.87, "eval_loss": 3.365483283996582, "eval_runtime": 117.1876, "eval_samples_per_second": 881.279, "eval_steps_per_second": 55.083, "step": 1456000 }, { "epoch": 11.94, "eval_loss": 3.3697094917297363, "eval_runtime": 117.1788, "eval_samples_per_second": 881.346, "eval_steps_per_second": 55.087, "step": 1464000 }, { "epoch": 12.0, "learning_rate": 3.868923538730927e-06, "loss": 3.3105, "step": 1472000 }, { "epoch": 12.0, "eval_loss": 3.3741800785064697, "eval_runtime": 116.7081, "eval_samples_per_second": 884.9, "eval_steps_per_second": 55.309, "step": 1472000 }, { "epoch": 12.07, "eval_loss": 3.3614203929901123, "eval_runtime": 118.1522, "eval_samples_per_second": 874.084, "eval_steps_per_second": 54.633, "step": 1480000 }, { "epoch": 12.13, "learning_rate": 3.8022179604769453e-06, "loss": 3.2977, "step": 1488000 }, { "epoch": 12.13, "eval_loss": 3.370495319366455, "eval_runtime": 117.0737, "eval_samples_per_second": 882.137, "eval_steps_per_second": 55.136, "step": 1488000 }, { "epoch": 12.2, "eval_loss": 3.3746001720428467, "eval_runtime": 117.4262, "eval_samples_per_second": 879.489, "eval_steps_per_second": 54.971, "step": 1496000 }, { "epoch": 12.26, "learning_rate": 3.735512382222964e-06, "loss": 3.2999, "step": 1504000 }, { "epoch": 12.26, "eval_loss": 3.3690757751464844, "eval_runtime": 114.9601, "eval_samples_per_second": 898.355, "eval_steps_per_second": 56.15, "step": 1504000 }, { "epoch": 12.33, "eval_loss": 3.374530792236328, "eval_runtime": 115.3595, "eval_samples_per_second": 895.245, "eval_steps_per_second": 55.955, "step": 1512000 }, { "epoch": 12.39, "learning_rate": 3.668806803968982e-06, "loss": 3.2983, "step": 1520000 }, { "epoch": 12.39, "eval_loss": 3.3717198371887207, "eval_runtime": 114.9666, "eval_samples_per_second": 898.304, "eval_steps_per_second": 56.147, "step": 1520000 }, { "epoch": 12.46, "eval_loss": 3.368246555328369, "eval_runtime": 115.591, "eval_samples_per_second": 893.452, "eval_steps_per_second": 55.843, "step": 1528000 }, { "epoch": 12.52, "learning_rate": 3.6021012257150007e-06, "loss": 3.2957, "step": 1536000 }, { "epoch": 12.52, "eval_loss": 3.369278907775879, "eval_runtime": 116.1156, "eval_samples_per_second": 889.416, "eval_steps_per_second": 55.591, "step": 1536000 }, { "epoch": 12.59, "eval_loss": 3.376443386077881, "eval_runtime": 114.7209, "eval_samples_per_second": 900.228, "eval_steps_per_second": 56.267, "step": 1544000 }, { "epoch": 12.65, "learning_rate": 3.535395647461019e-06, "loss": 3.293, "step": 1552000 }, { "epoch": 12.65, "eval_loss": 3.3690662384033203, "eval_runtime": 114.9457, "eval_samples_per_second": 898.468, "eval_steps_per_second": 56.157, "step": 1552000 }, { "epoch": 12.72, "eval_loss": 3.380187511444092, "eval_runtime": 115.2975, "eval_samples_per_second": 895.726, "eval_steps_per_second": 55.986, "step": 1560000 }, { "epoch": 12.79, "learning_rate": 3.468690069207038e-06, "loss": 3.2919, "step": 1568000 }, { "epoch": 12.79, "eval_loss": 3.3626480102539062, "eval_runtime": 115.0018, "eval_samples_per_second": 898.03, "eval_steps_per_second": 56.13, "step": 1568000 }, { "epoch": 12.85, "eval_loss": 3.3604438304901123, "eval_runtime": 116.2394, "eval_samples_per_second": 888.468, "eval_steps_per_second": 55.532, "step": 1576000 }, { "epoch": 12.92, "learning_rate": 3.4019844909530565e-06, "loss": 3.3023, "step": 1584000 }, { "epoch": 12.92, "eval_loss": 3.374943971633911, "eval_runtime": 115.4828, "eval_samples_per_second": 894.289, "eval_steps_per_second": 55.896, "step": 1584000 }, { "epoch": 12.98, "eval_loss": 3.368828773498535, "eval_runtime": 114.8626, "eval_samples_per_second": 899.118, "eval_steps_per_second": 56.198, "step": 1592000 }, { "epoch": 13.05, "learning_rate": 3.3352789126990747e-06, "loss": 3.2988, "step": 1600000 }, { "epoch": 13.05, "eval_loss": 3.3666255474090576, "eval_runtime": 115.7226, "eval_samples_per_second": 892.436, "eval_steps_per_second": 55.78, "step": 1600000 }, { "epoch": 13.11, "eval_loss": 3.369481325149536, "eval_runtime": 116.2492, "eval_samples_per_second": 888.393, "eval_steps_per_second": 55.527, "step": 1608000 }, { "epoch": 13.18, "learning_rate": 3.2685733344450933e-06, "loss": 3.2924, "step": 1616000 }, { "epoch": 13.18, "eval_loss": 3.364980697631836, "eval_runtime": 114.892, "eval_samples_per_second": 898.887, "eval_steps_per_second": 56.183, "step": 1616000 }, { "epoch": 13.24, "eval_loss": 3.3651351928710938, "eval_runtime": 114.7414, "eval_samples_per_second": 900.068, "eval_steps_per_second": 56.257, "step": 1624000 }, { "epoch": 13.31, "learning_rate": 3.2018677561911115e-06, "loss": 3.2958, "step": 1632000 }, { "epoch": 13.31, "eval_loss": 3.369225263595581, "eval_runtime": 115.9526, "eval_samples_per_second": 890.666, "eval_steps_per_second": 55.669, "step": 1632000 }, { "epoch": 13.37, "eval_loss": 3.3855459690093994, "eval_runtime": 114.8307, "eval_samples_per_second": 899.367, "eval_steps_per_second": 56.213, "step": 1640000 }, { "epoch": 13.44, "learning_rate": 3.1351621779371306e-06, "loss": 3.2918, "step": 1648000 }, { "epoch": 13.44, "eval_loss": 3.3706300258636475, "eval_runtime": 115.344, "eval_samples_per_second": 895.365, "eval_steps_per_second": 55.963, "step": 1648000 }, { "epoch": 13.5, "eval_loss": 3.3680288791656494, "eval_runtime": 114.7321, "eval_samples_per_second": 900.14, "eval_steps_per_second": 56.261, "step": 1656000 }, { "epoch": 13.57, "learning_rate": 3.0684565996831487e-06, "loss": 3.2948, "step": 1664000 }, { "epoch": 13.57, "eval_loss": 3.353415012359619, "eval_runtime": 116.4266, "eval_samples_per_second": 887.039, "eval_steps_per_second": 55.443, "step": 1664000 }, { "epoch": 13.63, "eval_loss": 3.369929790496826, "eval_runtime": 114.8306, "eval_samples_per_second": 899.369, "eval_steps_per_second": 56.213, "step": 1672000 }, { "epoch": 13.7, "learning_rate": 3.0017510214291673e-06, "loss": 3.2996, "step": 1680000 }, { "epoch": 13.7, "eval_loss": 3.3732664585113525, "eval_runtime": 115.7005, "eval_samples_per_second": 892.607, "eval_steps_per_second": 55.791, "step": 1680000 }, { "epoch": 13.76, "eval_loss": 3.3764214515686035, "eval_runtime": 115.4981, "eval_samples_per_second": 894.171, "eval_steps_per_second": 55.888, "step": 1688000 }, { "epoch": 13.83, "learning_rate": 2.9350454431751855e-06, "loss": 3.2999, "step": 1696000 }, { "epoch": 13.83, "eval_loss": 3.3792943954467773, "eval_runtime": 116.0913, "eval_samples_per_second": 889.602, "eval_steps_per_second": 55.603, "step": 1696000 }, { "epoch": 13.89, "eval_loss": 3.368272304534912, "eval_runtime": 116.0753, "eval_samples_per_second": 889.724, "eval_steps_per_second": 55.61, "step": 1704000 }, { "epoch": 13.96, "learning_rate": 2.868339864921204e-06, "loss": 3.291, "step": 1712000 }, { "epoch": 13.96, "eval_loss": 3.3653597831726074, "eval_runtime": 115.5031, "eval_samples_per_second": 894.132, "eval_steps_per_second": 55.886, "step": 1712000 }, { "epoch": 14.02, "eval_loss": 3.372131109237671, "eval_runtime": 115.6199, "eval_samples_per_second": 893.228, "eval_steps_per_second": 55.829, "step": 1720000 }, { "epoch": 14.09, "learning_rate": 2.801634286667223e-06, "loss": 3.2952, "step": 1728000 }, { "epoch": 14.09, "eval_loss": 3.367438316345215, "eval_runtime": 115.0009, "eval_samples_per_second": 898.037, "eval_steps_per_second": 56.13, "step": 1728000 }, { "epoch": 14.16, "eval_loss": 3.3762009143829346, "eval_runtime": 115.4616, "eval_samples_per_second": 894.453, "eval_steps_per_second": 55.906, "step": 1736000 }, { "epoch": 14.22, "learning_rate": 2.7349287084132413e-06, "loss": 3.2866, "step": 1744000 }, { "epoch": 14.22, "eval_loss": 3.3699355125427246, "eval_runtime": 114.9346, "eval_samples_per_second": 898.554, "eval_steps_per_second": 56.162, "step": 1744000 }, { "epoch": 14.29, "eval_loss": 3.3690149784088135, "eval_runtime": 115.9293, "eval_samples_per_second": 890.845, "eval_steps_per_second": 55.681, "step": 1752000 }, { "epoch": 14.35, "learning_rate": 2.66822313015926e-06, "loss": 3.2825, "step": 1760000 }, { "epoch": 14.35, "eval_loss": 3.365321636199951, "eval_runtime": 114.9037, "eval_samples_per_second": 898.796, "eval_steps_per_second": 56.177, "step": 1760000 }, { "epoch": 14.42, "eval_loss": 3.368727207183838, "eval_runtime": 115.3436, "eval_samples_per_second": 895.369, "eval_steps_per_second": 55.963, "step": 1768000 }, { "epoch": 14.48, "learning_rate": 2.601517551905278e-06, "loss": 3.2825, "step": 1776000 }, { "epoch": 14.48, "eval_loss": 3.3617701530456543, "eval_runtime": 115.7714, "eval_samples_per_second": 892.06, "eval_steps_per_second": 55.756, "step": 1776000 }, { "epoch": 14.55, "eval_loss": 3.3609282970428467, "eval_runtime": 114.879, "eval_samples_per_second": 898.989, "eval_steps_per_second": 56.19, "step": 1784000 }, { "epoch": 14.61, "learning_rate": 2.5348119736512967e-06, "loss": 3.2744, "step": 1792000 }, { "epoch": 14.61, "eval_loss": 3.3552184104919434, "eval_runtime": 114.6789, "eval_samples_per_second": 900.558, "eval_steps_per_second": 56.288, "step": 1792000 }, { "epoch": 14.68, "eval_loss": 3.3549087047576904, "eval_runtime": 116.3921, "eval_samples_per_second": 887.303, "eval_steps_per_second": 55.459, "step": 1800000 }, { "epoch": 14.74, "learning_rate": 2.4681063953973154e-06, "loss": 3.2811, "step": 1808000 }, { "epoch": 14.74, "eval_loss": 3.3504152297973633, "eval_runtime": 115.0014, "eval_samples_per_second": 898.032, "eval_steps_per_second": 56.13, "step": 1808000 }, { "epoch": 14.81, "eval_loss": 3.3574647903442383, "eval_runtime": 115.1236, "eval_samples_per_second": 897.079, "eval_steps_per_second": 56.07, "step": 1816000 }, { "epoch": 14.87, "learning_rate": 2.4014008171433335e-06, "loss": 3.2672, "step": 1824000 }, { "epoch": 14.87, "eval_loss": 3.3587796688079834, "eval_runtime": 116.6416, "eval_samples_per_second": 885.404, "eval_steps_per_second": 55.34, "step": 1824000 }, { "epoch": 14.94, "eval_loss": 3.3559627532958984, "eval_runtime": 116.2457, "eval_samples_per_second": 888.42, "eval_steps_per_second": 55.529, "step": 1832000 }, { "epoch": 15.0, "learning_rate": 2.334695238889352e-06, "loss": 3.2919, "step": 1840000 }, { "epoch": 15.0, "eval_loss": 3.359805107116699, "eval_runtime": 115.5497, "eval_samples_per_second": 893.771, "eval_steps_per_second": 55.863, "step": 1840000 }, { "epoch": 15.07, "eval_loss": 3.344524383544922, "eval_runtime": 115.5133, "eval_samples_per_second": 894.053, "eval_steps_per_second": 55.881, "step": 1848000 }, { "epoch": 15.13, "learning_rate": 2.2679896606353707e-06, "loss": 3.2724, "step": 1856000 }, { "epoch": 15.13, "eval_loss": 3.3516576290130615, "eval_runtime": 115.2664, "eval_samples_per_second": 895.968, "eval_steps_per_second": 56.001, "step": 1856000 }, { "epoch": 15.2, "eval_loss": 3.359280824661255, "eval_runtime": 116.0103, "eval_samples_per_second": 890.223, "eval_steps_per_second": 55.642, "step": 1864000 }, { "epoch": 15.26, "learning_rate": 2.2012840823813894e-06, "loss": 3.277, "step": 1872000 }, { "epoch": 15.26, "eval_loss": 3.3597874641418457, "eval_runtime": 114.9804, "eval_samples_per_second": 898.197, "eval_steps_per_second": 56.14, "step": 1872000 }, { "epoch": 15.33, "eval_loss": 3.345801591873169, "eval_runtime": 116.1901, "eval_samples_per_second": 888.845, "eval_steps_per_second": 55.555, "step": 1880000 }, { "epoch": 15.39, "learning_rate": 2.134578504127408e-06, "loss": 3.2842, "step": 1888000 }, { "epoch": 15.39, "eval_loss": 3.3583106994628906, "eval_runtime": 114.8266, "eval_samples_per_second": 899.399, "eval_steps_per_second": 56.215, "step": 1888000 }, { "epoch": 15.46, "eval_loss": 3.3447749614715576, "eval_runtime": 114.9801, "eval_samples_per_second": 898.199, "eval_steps_per_second": 56.14, "step": 1896000 }, { "epoch": 15.53, "learning_rate": 2.067872925873426e-06, "loss": 3.2758, "step": 1904000 }, { "epoch": 15.53, "eval_loss": 3.3593051433563232, "eval_runtime": 114.9092, "eval_samples_per_second": 898.753, "eval_steps_per_second": 56.175, "step": 1904000 }, { "epoch": 15.59, "eval_loss": 3.3551743030548096, "eval_runtime": 115.5179, "eval_samples_per_second": 894.017, "eval_steps_per_second": 55.879, "step": 1912000 }, { "epoch": 15.66, "learning_rate": 2.0011673476194448e-06, "loss": 3.2684, "step": 1920000 }, { "epoch": 15.66, "eval_loss": 3.371454954147339, "eval_runtime": 114.8944, "eval_samples_per_second": 898.869, "eval_steps_per_second": 56.182, "step": 1920000 }, { "epoch": 15.72, "eval_loss": 3.3543806076049805, "eval_runtime": 115.4862, "eval_samples_per_second": 894.263, "eval_steps_per_second": 55.894, "step": 1928000 }, { "epoch": 15.79, "learning_rate": 1.9344617693654634e-06, "loss": 3.2924, "step": 1936000 }, { "epoch": 15.79, "eval_loss": 3.3514981269836426, "eval_runtime": 115.0356, "eval_samples_per_second": 897.766, "eval_steps_per_second": 56.113, "step": 1936000 }, { "epoch": 15.85, "eval_loss": 3.36460018157959, "eval_runtime": 115.4242, "eval_samples_per_second": 894.743, "eval_steps_per_second": 55.924, "step": 1944000 }, { "epoch": 15.92, "learning_rate": 1.867756191111482e-06, "loss": 3.2673, "step": 1952000 }, { "epoch": 15.92, "eval_loss": 3.353806495666504, "eval_runtime": 115.3905, "eval_samples_per_second": 895.004, "eval_steps_per_second": 55.94, "step": 1952000 }, { "epoch": 15.98, "eval_loss": 3.3436896800994873, "eval_runtime": 114.7945, "eval_samples_per_second": 899.651, "eval_steps_per_second": 56.231, "step": 1960000 }, { "epoch": 16.05, "learning_rate": 1.8010506128575004e-06, "loss": 3.2833, "step": 1968000 }, { "epoch": 16.05, "eval_loss": 3.3442821502685547, "eval_runtime": 116.1629, "eval_samples_per_second": 889.053, "eval_steps_per_second": 55.569, "step": 1968000 }, { "epoch": 16.11, "eval_loss": 3.361924886703491, "eval_runtime": 116.4426, "eval_samples_per_second": 886.917, "eval_steps_per_second": 55.435, "step": 1976000 }, { "epoch": 16.18, "learning_rate": 1.734345034603519e-06, "loss": 3.2636, "step": 1984000 }, { "epoch": 16.18, "eval_loss": 3.3510515689849854, "eval_runtime": 115.8529, "eval_samples_per_second": 891.432, "eval_steps_per_second": 55.717, "step": 1984000 }, { "epoch": 16.24, "eval_loss": 3.3447539806365967, "eval_runtime": 114.926, "eval_samples_per_second": 898.622, "eval_steps_per_second": 56.167, "step": 1992000 }, { "epoch": 16.31, "learning_rate": 1.6676394563495374e-06, "loss": 3.2753, "step": 2000000 }, { "epoch": 16.31, "eval_loss": 3.355980396270752, "eval_runtime": 115.4649, "eval_samples_per_second": 894.427, "eval_steps_per_second": 55.904, "step": 2000000 }, { "epoch": 16.37, "eval_loss": 3.3524882793426514, "eval_runtime": 118.2786, "eval_samples_per_second": 873.151, "eval_steps_per_second": 54.575, "step": 2008000 }, { "epoch": 16.44, "learning_rate": 1.6009338780955558e-06, "loss": 3.2701, "step": 2016000 }, { "epoch": 16.44, "eval_loss": 3.355792760848999, "eval_runtime": 115.0046, "eval_samples_per_second": 898.008, "eval_steps_per_second": 56.128, "step": 2016000 }, { "epoch": 16.5, "eval_loss": 3.3558590412139893, "eval_runtime": 115.5093, "eval_samples_per_second": 894.084, "eval_steps_per_second": 55.883, "step": 2024000 }, { "epoch": 16.57, "learning_rate": 1.5342282998415744e-06, "loss": 3.2761, "step": 2032000 }, { "epoch": 16.57, "eval_loss": 3.3439648151397705, "eval_runtime": 114.8803, "eval_samples_per_second": 898.979, "eval_steps_per_second": 56.189, "step": 2032000 }, { "epoch": 16.63, "eval_loss": 3.3505825996398926, "eval_runtime": 115.5177, "eval_samples_per_second": 894.019, "eval_steps_per_second": 55.879, "step": 2040000 }, { "epoch": 16.7, "learning_rate": 1.4675227215875928e-06, "loss": 3.2677, "step": 2048000 }, { "epoch": 16.7, "eval_loss": 3.3473587036132812, "eval_runtime": 115.2604, "eval_samples_per_second": 896.014, "eval_steps_per_second": 56.004, "step": 2048000 }, { "epoch": 16.76, "eval_loss": 3.3614845275878906, "eval_runtime": 114.7851, "eval_samples_per_second": 899.724, "eval_steps_per_second": 56.236, "step": 2056000 }, { "epoch": 16.83, "learning_rate": 1.4008171433336116e-06, "loss": 3.2614, "step": 2064000 }, { "epoch": 16.83, "eval_loss": 3.350660562515259, "eval_runtime": 116.1258, "eval_samples_per_second": 889.337, "eval_steps_per_second": 55.586, "step": 2064000 }, { "epoch": 16.89, "eval_loss": 3.34436297416687, "eval_runtime": 114.7641, "eval_samples_per_second": 899.89, "eval_steps_per_second": 56.246, "step": 2072000 }, { "epoch": 16.96, "learning_rate": 1.33411156507963e-06, "loss": 3.2608, "step": 2080000 }, { "epoch": 16.96, "eval_loss": 3.352665901184082, "eval_runtime": 114.9595, "eval_samples_per_second": 898.36, "eval_steps_per_second": 56.15, "step": 2080000 }, { "epoch": 17.03, "eval_loss": 3.3398256301879883, "eval_runtime": 114.8716, "eval_samples_per_second": 899.047, "eval_steps_per_second": 56.193, "step": 2088000 }, { "epoch": 17.09, "learning_rate": 1.2674059868256484e-06, "loss": 3.2643, "step": 2096000 }, { "epoch": 17.09, "eval_loss": 3.3497581481933594, "eval_runtime": 115.3741, "eval_samples_per_second": 895.132, "eval_steps_per_second": 55.948, "step": 2096000 }, { "epoch": 17.16, "eval_loss": 3.3348639011383057, "eval_runtime": 114.8223, "eval_samples_per_second": 899.434, "eval_steps_per_second": 56.217, "step": 2104000 }, { "epoch": 17.22, "learning_rate": 1.2007004085716668e-06, "loss": 3.2721, "step": 2112000 }, { "epoch": 17.22, "eval_loss": 3.356008291244507, "eval_runtime": 115.5116, "eval_samples_per_second": 894.066, "eval_steps_per_second": 55.882, "step": 2112000 }, { "epoch": 17.29, "eval_loss": 3.3421435356140137, "eval_runtime": 115.5912, "eval_samples_per_second": 893.45, "eval_steps_per_second": 55.843, "step": 2120000 }, { "epoch": 17.35, "learning_rate": 1.1339948303176854e-06, "loss": 3.266, "step": 2128000 }, { "epoch": 17.35, "eval_loss": 3.342872142791748, "eval_runtime": 115.0319, "eval_samples_per_second": 897.794, "eval_steps_per_second": 56.115, "step": 2128000 }, { "epoch": 17.42, "eval_loss": 3.337078809738159, "eval_runtime": 114.7057, "eval_samples_per_second": 900.347, "eval_steps_per_second": 56.274, "step": 2136000 }, { "epoch": 17.48, "learning_rate": 1.067289252063704e-06, "loss": 3.2551, "step": 2144000 }, { "epoch": 17.48, "eval_loss": 3.340388774871826, "eval_runtime": 115.5719, "eval_samples_per_second": 893.599, "eval_steps_per_second": 55.853, "step": 2144000 }, { "epoch": 17.55, "eval_loss": 3.349374771118164, "eval_runtime": 116.2218, "eval_samples_per_second": 888.603, "eval_steps_per_second": 55.54, "step": 2152000 }, { "epoch": 17.61, "learning_rate": 1.0005836738097224e-06, "loss": 3.26, "step": 2160000 }, { "epoch": 17.61, "eval_loss": 3.3389031887054443, "eval_runtime": 115.0165, "eval_samples_per_second": 897.915, "eval_steps_per_second": 56.122, "step": 2160000 }, { "epoch": 17.68, "eval_loss": 3.345613718032837, "eval_runtime": 114.2481, "eval_samples_per_second": 903.954, "eval_steps_per_second": 56.5, "step": 2168000 }, { "epoch": 17.74, "learning_rate": 9.33878095555741e-07, "loss": 3.2528, "step": 2176000 }, { "epoch": 17.74, "eval_loss": 3.3248987197875977, "eval_runtime": 115.0558, "eval_samples_per_second": 897.608, "eval_steps_per_second": 56.103, "step": 2176000 }, { "epoch": 17.81, "eval_loss": 3.3452157974243164, "eval_runtime": 116.2164, "eval_samples_per_second": 888.644, "eval_steps_per_second": 55.543, "step": 2184000 }, { "epoch": 17.87, "learning_rate": 8.671725173017595e-07, "loss": 3.2602, "step": 2192000 }, { "epoch": 17.87, "eval_loss": 3.33760929107666, "eval_runtime": 116.1157, "eval_samples_per_second": 889.414, "eval_steps_per_second": 55.591, "step": 2192000 }, { "epoch": 17.94, "eval_loss": 3.351128101348877, "eval_runtime": 114.6575, "eval_samples_per_second": 900.726, "eval_steps_per_second": 56.298, "step": 2200000 }, { "epoch": 18.0, "learning_rate": 8.004669390477779e-07, "loss": 3.2492, "step": 2208000 }, { "epoch": 18.0, "eval_loss": 3.347473621368408, "eval_runtime": 115.2092, "eval_samples_per_second": 896.413, "eval_steps_per_second": 56.029, "step": 2208000 }, { "epoch": 18.07, "eval_loss": 3.349674940109253, "eval_runtime": 115.6497, "eval_samples_per_second": 892.998, "eval_steps_per_second": 55.815, "step": 2216000 }, { "epoch": 18.13, "learning_rate": 7.337613607937964e-07, "loss": 3.2469, "step": 2224000 }, { "epoch": 18.13, "eval_loss": 3.3378491401672363, "eval_runtime": 114.9296, "eval_samples_per_second": 898.594, "eval_steps_per_second": 56.165, "step": 2224000 }, { "epoch": 18.2, "eval_loss": 3.332571029663086, "eval_runtime": 115.4244, "eval_samples_per_second": 894.742, "eval_steps_per_second": 55.924, "step": 2232000 }, { "epoch": 18.26, "learning_rate": 6.67055782539815e-07, "loss": 3.2589, "step": 2240000 }, { "epoch": 18.26, "eval_loss": 3.3277342319488525, "eval_runtime": 114.9762, "eval_samples_per_second": 898.229, "eval_steps_per_second": 56.142, "step": 2240000 }, { "epoch": 18.33, "eval_loss": 3.3456978797912598, "eval_runtime": 116.0675, "eval_samples_per_second": 889.784, "eval_steps_per_second": 55.614, "step": 2248000 }, { "epoch": 18.4, "learning_rate": 6.003502042858334e-07, "loss": 3.2548, "step": 2256000 }, { "epoch": 18.4, "eval_loss": 3.334270715713501, "eval_runtime": 115.7666, "eval_samples_per_second": 892.097, "eval_steps_per_second": 55.759, "step": 2256000 }, { "epoch": 18.46, "eval_loss": 3.3362197875976562, "eval_runtime": 115.5031, "eval_samples_per_second": 894.132, "eval_steps_per_second": 55.886, "step": 2264000 }, { "epoch": 18.53, "learning_rate": 5.33644626031852e-07, "loss": 3.2589, "step": 2272000 }, { "epoch": 18.53, "eval_loss": 3.343080997467041, "eval_runtime": 115.3187, "eval_samples_per_second": 895.561, "eval_steps_per_second": 55.975, "step": 2272000 }, { "epoch": 18.59, "eval_loss": 3.3428003787994385, "eval_runtime": 115.3186, "eval_samples_per_second": 895.563, "eval_steps_per_second": 55.975, "step": 2280000 }, { "epoch": 18.66, "learning_rate": 4.669390477778705e-07, "loss": 3.2674, "step": 2288000 }, { "epoch": 18.66, "eval_loss": 3.3400795459747314, "eval_runtime": 114.7905, "eval_samples_per_second": 899.682, "eval_steps_per_second": 56.233, "step": 2288000 }, { "epoch": 18.72, "eval_loss": 3.337498903274536, "eval_runtime": 114.9489, "eval_samples_per_second": 898.443, "eval_steps_per_second": 56.155, "step": 2296000 }, { "epoch": 18.79, "learning_rate": 4.0023346952388894e-07, "loss": 3.2561, "step": 2304000 }, { "epoch": 18.79, "eval_loss": 3.3333868980407715, "eval_runtime": 114.8393, "eval_samples_per_second": 899.3, "eval_steps_per_second": 56.209, "step": 2304000 }, { "epoch": 18.85, "eval_loss": 3.3320717811584473, "eval_runtime": 115.0159, "eval_samples_per_second": 897.919, "eval_steps_per_second": 56.123, "step": 2312000 }, { "epoch": 18.92, "learning_rate": 3.335278912699075e-07, "loss": 3.2452, "step": 2320000 }, { "epoch": 18.92, "eval_loss": 3.3445632457733154, "eval_runtime": 114.9617, "eval_samples_per_second": 898.342, "eval_steps_per_second": 56.149, "step": 2320000 }, { "epoch": 18.98, "eval_loss": 3.3525032997131348, "eval_runtime": 116.2145, "eval_samples_per_second": 888.659, "eval_steps_per_second": 55.544, "step": 2328000 }, { "epoch": 19.05, "learning_rate": 2.66822313015926e-07, "loss": 3.259, "step": 2336000 }, { "epoch": 19.05, "eval_loss": 3.331772804260254, "eval_runtime": 115.4929, "eval_samples_per_second": 894.211, "eval_steps_per_second": 55.891, "step": 2336000 }, { "epoch": 19.11, "eval_loss": 3.3451852798461914, "eval_runtime": 115.1546, "eval_samples_per_second": 896.838, "eval_steps_per_second": 56.055, "step": 2344000 }, { "epoch": 19.18, "learning_rate": 2.0011673476194447e-07, "loss": 3.2494, "step": 2352000 }, { "epoch": 19.18, "eval_loss": 3.335479497909546, "eval_runtime": 114.4583, "eval_samples_per_second": 902.293, "eval_steps_per_second": 56.396, "step": 2352000 }, { "epoch": 19.24, "eval_loss": 3.3322434425354004, "eval_runtime": 116.1476, "eval_samples_per_second": 889.17, "eval_steps_per_second": 55.576, "step": 2360000 }, { "epoch": 19.31, "learning_rate": 1.33411156507963e-07, "loss": 3.2558, "step": 2368000 }, { "epoch": 19.31, "eval_loss": 3.325453281402588, "eval_runtime": 114.8662, "eval_samples_per_second": 899.089, "eval_steps_per_second": 56.196, "step": 2368000 }, { "epoch": 19.37, "eval_loss": 3.3329989910125732, "eval_runtime": 117.9929, "eval_samples_per_second": 875.265, "eval_steps_per_second": 54.707, "step": 2376000 }, { "epoch": 19.44, "learning_rate": 6.67055782539815e-08, "loss": 3.2436, "step": 2384000 }, { "epoch": 19.44, "eval_loss": 3.3357789516448975, "eval_runtime": 117.7235, "eval_samples_per_second": 877.268, "eval_steps_per_second": 54.832, "step": 2384000 }, { "epoch": 19.5, "eval_loss": 3.3287487030029297, "eval_runtime": 115.6745, "eval_samples_per_second": 892.807, "eval_steps_per_second": 55.803, "step": 2392000 }, { "epoch": 19.57, "learning_rate": 0.0, "loss": 3.2545, "step": 2400000 }, { "epoch": 19.57, "eval_loss": 3.3321266174316406, "eval_runtime": 115.8716, "eval_samples_per_second": 891.289, "eval_steps_per_second": 55.708, "step": 2400000 }, { "epoch": 19.57, "step": 2400000, "total_flos": 6.9600759359113e+17, "train_loss": 3.268406458333333, "train_runtime": 194422.9949, "train_samples_per_second": 197.508, "train_steps_per_second": 12.344 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 20, "save_steps": 32000, "total_flos": 6.9600759359113e+17, "trial_name": null, "trial_params": null }