{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2085, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 0.00014354066985645933, "loss": 4.6999, "step": 30 }, { "epoch": 0.09, "learning_rate": 0.00028708133971291867, "loss": 4.0234, "step": 60 }, { "epoch": 0.13, "learning_rate": 0.00043062200956937803, "loss": 3.84, "step": 90 }, { "epoch": 0.17, "learning_rate": 0.0005741626794258373, "loss": 3.7458, "step": 120 }, { "epoch": 0.22, "learning_rate": 0.0007177033492822966, "loss": 3.6694, "step": 150 }, { "epoch": 0.26, "learning_rate": 0.0008612440191387561, "loss": 3.6173, "step": 180 }, { "epoch": 0.3, "learning_rate": 0.000999999298909658, "loss": 3.5916, "step": 210 }, { "epoch": 0.35, "learning_rate": 0.0009993264033223551, "loss": 3.5461, "step": 240 }, { "epoch": 0.39, "learning_rate": 0.0009973935099776767, "loss": 3.5042, "step": 270 }, { "epoch": 0.43, "learning_rate": 0.0009942054963290549, "loss": 3.4956, "step": 300 }, { "epoch": 0.47, "learning_rate": 0.0009897704069942402, "loss": 3.5649, "step": 330 }, { "epoch": 0.52, "learning_rate": 0.0009840994334555549, "loss": 3.5746, "step": 360 }, { "epoch": 0.56, "learning_rate": 0.0009772068858193608, "loss": 3.5424, "step": 390 }, { "epoch": 0.6, "learning_rate": 0.000969110156706009, "loss": 3.567, "step": 420 }, { "epoch": 0.65, "learning_rate": 0.0009598296773613879, "loss": 3.4753, "step": 450 }, { "epoch": 0.69, "learning_rate": 0.0009493888661008194, "loss": 3.4324, "step": 480 }, { "epoch": 0.73, "learning_rate": 0.0009378140692153992, "loss": 3.4661, "step": 510 }, { "epoch": 0.78, "learning_rate": 0.0009251344944898958, "loss": 3.4081, "step": 540 }, { "epoch": 0.82, "learning_rate": 0.0009113821374999736, "loss": 3.3848, "step": 570 }, { "epoch": 0.86, "learning_rate": 0.0008965917008747158, "loss": 3.3552, "step": 600 }, { "epoch": 0.91, "learning_rate": 0.000880800506728183, "loss": 3.3352, "step": 630 }, { "epoch": 0.95, "learning_rate": 0.0008640484024809779, "loss": 3.2935, "step": 660 }, { "epoch": 0.99, "learning_rate": 0.0008463776603094606, "loss": 3.2754, "step": 690 }, { "epoch": 1.04, "learning_rate": 0.0008278328704763516, "loss": 3.2657, "step": 720 }, { "epoch": 1.08, "learning_rate": 0.0008084608288118838, "loss": 3.256, "step": 750 }, { "epoch": 1.12, "learning_rate": 0.0007883104186294383, "loss": 3.2201, "step": 780 }, { "epoch": 1.17, "learning_rate": 0.0007674324873736347, "loss": 3.1941, "step": 810 }, { "epoch": 1.21, "learning_rate": 0.0007458797183121429, "loss": 3.1856, "step": 840 }, { "epoch": 1.25, "learning_rate": 0.0007237064975949886, "loss": 3.1649, "step": 870 }, { "epoch": 1.29, "learning_rate": 0.0007009687770168125, "loss": 3.155, "step": 900 }, { "epoch": 1.34, "learning_rate": 0.0006777239328283909, "loss": 3.1357, "step": 930 }, { "epoch": 1.38, "learning_rate": 0.0006540306209536906, "loss": 3.1309, "step": 960 }, { "epoch": 1.42, "learning_rate": 0.0006299486289778033, "loss": 3.1091, "step": 990 }, { "epoch": 1.47, "learning_rate": 0.0006055387252792498, "loss": 3.0958, "step": 1020 }, { "epoch": 1.51, "learning_rate": 0.0005808625056873581, "loss": 3.0813, "step": 1050 }, { "epoch": 1.55, "learning_rate": 0.0005559822380516539, "loss": 3.0854, "step": 1080 }, { "epoch": 1.6, "learning_rate": 0.0005309607051154799, "loss": 3.068, "step": 1110 }, { "epoch": 1.64, "learning_rate": 0.0005058610460903332, "loss": 3.0664, "step": 1140 }, { "epoch": 1.68, "learning_rate": 0.00048074659733069516, "loss": 3.0627, "step": 1170 }, { "epoch": 1.73, "learning_rate": 0.00045568073251138903, "loss": 3.0376, "step": 1200 }, { "epoch": 1.77, "learning_rate": 0.0004307267027107653, "loss": 3.0274, "step": 1230 }, { "epoch": 1.81, "learning_rate": 0.00040594747680324574, "loss": 3.0212, "step": 1260 }, { "epoch": 1.86, "learning_rate": 0.0003814055825639795, "loss": 3.0101, "step": 1290 }, { "epoch": 1.9, "learning_rate": 0.000357162948886567, "loss": 3.0076, "step": 1320 }, { "epoch": 1.94, "learning_rate": 0.00033328074951199845, "loss": 3.0129, "step": 1350 }, { "epoch": 1.99, "learning_rate": 0.0003098192486631408, "loss": 2.9902, "step": 1380 }, { "epoch": 2.03, "learning_rate": 0.00028683764897429804, "loss": 2.9819, "step": 1410 }, { "epoch": 2.07, "learning_rate": 0.0002643939420995788, "loss": 2.9891, "step": 1440 }, { "epoch": 2.12, "learning_rate": 0.00024254476237704588, "loss": 2.9702, "step": 1470 }, { "epoch": 2.16, "learning_rate": 0.00022134524391790916, "loss": 2.972, "step": 1500 }, { "epoch": 2.2, "learning_rate": 0.00020084888148138487, "loss": 2.9733, "step": 1530 }, { "epoch": 2.24, "learning_rate": 0.00018110739548628618, "loss": 2.9688, "step": 1560 }, { "epoch": 2.29, "learning_rate": 0.0001621706014999767, "loss": 2.9629, "step": 1590 }, { "epoch": 2.33, "learning_rate": 0.00014408628453401574, "loss": 2.9609, "step": 1620 }, { "epoch": 2.37, "learning_rate": 0.00012690007846369856, "loss": 2.9461, "step": 1650 }, { "epoch": 2.42, "learning_rate": 0.00011065535087576234, "loss": 2.9493, "step": 1680 }, { "epoch": 2.46, "learning_rate": 9.539309363483478e-05, "loss": 2.9417, "step": 1710 }, { "epoch": 2.5, "learning_rate": 8.115181944476685e-05, "loss": 2.9459, "step": 1740 }, { "epoch": 2.55, "learning_rate": 6.796746466586756e-05, "loss": 2.9273, "step": 1770 }, { "epoch": 2.59, "learning_rate": 5.58732986332719e-05, "loss": 2.9397, "step": 1800 }, { "epoch": 2.63, "learning_rate": 4.4899839705266174e-05, "loss": 2.943, "step": 1830 }, { "epoch": 2.68, "learning_rate": 3.507477825341493e-05, "loss": 2.9381, "step": 1860 }, { "epoch": 2.72, "learning_rate": 2.642290678881504e-05, "loss": 2.9509, "step": 1890 }, { "epoch": 2.76, "learning_rate": 1.8966057400797153e-05, "loss": 2.9437, "step": 1920 }, { "epoch": 2.81, "learning_rate": 1.272304666594032e-05, "loss": 2.9374, "step": 1950 }, { "epoch": 2.85, "learning_rate": 7.709628166416128e-06, "loss": 2.9508, "step": 1980 }, { "epoch": 2.89, "learning_rate": 3.93845273747806e-06, "loss": 2.9407, "step": 2010 }, { "epoch": 2.94, "learning_rate": 1.4190365444071153e-06, "loss": 2.949, "step": 2040 }, { "epoch": 2.98, "learning_rate": 1.5773706946742295e-07, "loss": 2.9544, "step": 2070 }, { "epoch": 3.0, "step": 2085, "total_flos": 1.731497869485015e+17, "train_loss": 3.211610127181458, "train_runtime": 4482.5265, "train_samples_per_second": 74.379, "train_steps_per_second": 0.465 } ], "logging_steps": 30, "max_steps": 2085, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5000, "total_flos": 1.731497869485015e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }