{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9992900248491303, "eval_steps": 500, "global_step": 2816, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.1764705882352942e-05, "loss": 2.8037, "step": 50 }, { "epoch": 0.07, "learning_rate": 1.999851133118102e-05, "loss": 2.4343, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.9972058436024328e-05, "loss": 2.233, "step": 150 }, { "epoch": 0.14, "learning_rate": 1.9912624721856506e-05, "loss": 2.1259, "step": 200 }, { "epoch": 0.18, "learning_rate": 1.9820406755172106e-05, "loss": 2.0545, "step": 250 }, { "epoch": 0.21, "learning_rate": 1.9695709530580642e-05, "loss": 1.9909, "step": 300 }, { "epoch": 0.25, "learning_rate": 1.9538945462090903e-05, "loss": 1.9676, "step": 350 }, { "epoch": 0.28, "learning_rate": 1.9350633019124565e-05, "loss": 1.9318, "step": 400 }, { "epoch": 0.32, "learning_rate": 1.913139501177033e-05, "loss": 1.897, "step": 450 }, { "epoch": 0.35, "learning_rate": 1.888195653094972e-05, "loss": 1.8821, "step": 500 }, { "epoch": 0.39, "learning_rate": 1.860314255030716e-05, "loss": 1.8568, "step": 550 }, { "epoch": 0.43, "learning_rate": 1.8295875197755593e-05, "loss": 1.8416, "step": 600 }, { "epoch": 0.46, "learning_rate": 1.796117070570155e-05, "loss": 1.8278, "step": 650 }, { "epoch": 0.5, "learning_rate": 1.7600136050036265e-05, "loss": 1.8107, "step": 700 }, { "epoch": 0.53, "learning_rate": 1.7213965289008755e-05, "loss": 1.799, "step": 750 }, { "epoch": 0.57, "learning_rate": 1.6803935614089426e-05, "loss": 1.7788, "step": 800 }, { "epoch": 0.6, "learning_rate": 1.637140312588523e-05, "loss": 1.7629, "step": 850 }, { "epoch": 0.64, "learning_rate": 1.5917798349076846e-05, "loss": 1.7656, "step": 900 }, { "epoch": 0.67, "learning_rate": 1.5444621501211386e-05, "loss": 1.7521, "step": 950 }, { "epoch": 0.71, "learning_rate": 1.4953437530998262e-05, "loss": 1.738, "step": 1000 }, { "epoch": 0.75, "learning_rate": 1.4445870942518226e-05, "loss": 1.729, "step": 1050 }, { "epoch": 0.78, "learning_rate": 1.3923600422463505e-05, "loss": 1.7321, "step": 1100 }, { "epoch": 0.82, "learning_rate": 1.3388353288178595e-05, "loss": 1.7203, "step": 1150 }, { "epoch": 0.85, "learning_rate": 1.284189977486371e-05, "loss": 1.7134, "step": 1200 }, { "epoch": 0.89, "learning_rate": 1.228604718083506e-05, "loss": 1.7147, "step": 1250 }, { "epoch": 0.92, "learning_rate": 1.1722633890205434e-05, "loss": 1.6945, "step": 1300 }, { "epoch": 0.96, "learning_rate": 1.1153523292753974e-05, "loss": 1.6996, "step": 1350 }, { "epoch": 0.99, "learning_rate": 1.0580597621094166e-05, "loss": 1.6959, "step": 1400 }, { "epoch": 1.03, "learning_rate": 1.0005751725522469e-05, "loss": 1.6188, "step": 1450 }, { "epoch": 1.06, "learning_rate": 9.430886807136206e-06, "loss": 1.6012, "step": 1500 }, { "epoch": 1.1, "learning_rate": 8.857904129947295e-06, "loss": 1.6043, "step": 1550 }, { "epoch": 1.14, "learning_rate": 8.288698732787898e-06, "loss": 1.602, "step": 1600 }, { "epoch": 1.17, "learning_rate": 7.725153161804767e-06, "loss": 1.604, "step": 1650 }, { "epoch": 1.21, "learning_rate": 7.169131244270906e-06, "loss": 1.6012, "step": 1700 }, { "epoch": 1.24, "learning_rate": 6.6224719243066386e-06, "loss": 1.5921, "step": 1750 }, { "epoch": 1.28, "learning_rate": 6.086983180897257e-06, "loss": 1.6, "step": 1800 }, { "epoch": 1.31, "learning_rate": 5.564436048322415e-06, "loss": 1.5887, "step": 1850 }, { "epoch": 1.35, "learning_rate": 5.056558758773566e-06, "loss": 1.59, "step": 1900 }, { "epoch": 1.38, "learning_rate": 4.56503102653177e-06, "loss": 1.5835, "step": 1950 }, { "epoch": 1.42, "learning_rate": 4.091478492609857e-06, "loss": 1.5847, "step": 2000 }, { "epoch": 1.46, "learning_rate": 3.637467348232414e-06, "loss": 1.5934, "step": 2050 }, { "epoch": 1.49, "learning_rate": 3.2044991549353832e-06, "loss": 1.588, "step": 2100 }, { "epoch": 1.53, "learning_rate": 2.7940058784169543e-06, "loss": 1.5882, "step": 2150 }, { "epoch": 1.56, "learning_rate": 2.4073451525643566e-06, "loss": 1.5791, "step": 2200 }, { "epoch": 1.6, "learning_rate": 2.045795789319962e-06, "loss": 1.5802, "step": 2250 }, { "epoch": 1.63, "learning_rate": 1.710553549237043e-06, "loss": 1.5806, "step": 2300 }, { "epoch": 1.67, "learning_rate": 1.402727186713303e-06, "loss": 1.5816, "step": 2350 }, { "epoch": 1.7, "learning_rate": 1.1233347829818563e-06, "loss": 1.5822, "step": 2400 }, { "epoch": 1.74, "learning_rate": 8.733003789876726e-07, "loss": 1.5751, "step": 2450 }, { "epoch": 1.77, "learning_rate": 6.534509192856453e-07, "loss": 1.5708, "step": 2500 }, { "epoch": 1.81, "learning_rate": 4.645135170677595e-07, "loss": 1.5777, "step": 2550 }, { "epoch": 1.85, "learning_rate": 3.0711304936486664e-07, "loss": 1.5673, "step": 2600 }, { "epoch": 1.88, "learning_rate": 1.8177009037645633e-07, "loss": 1.5766, "step": 2650 }, { "epoch": 1.92, "learning_rate": 8.889918976358602e-08, "loss": 1.5677, "step": 2700 }, { "epoch": 1.95, "learning_rate": 2.8807501599226185e-08, "loss": 1.5849, "step": 2750 }, { "epoch": 1.99, "learning_rate": 1.6937685105011813e-09, "loss": 1.5809, "step": 2800 }, { "epoch": 2.0, "step": 2816, "total_flos": 2.1906737398074573e+17, "train_loss": 1.738261958414858, "train_runtime": 14007.5678, "train_samples_per_second": 16.084, "train_steps_per_second": 0.201 } ], "logging_steps": 50, "max_steps": 2816, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 24000, "total_flos": 2.1906737398074573e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }