diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 70.0, - "global_step": 14129010, + "epoch": 80.0, + "global_step": 16147440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -170324,11 +170324,24337 @@ "eval_samples_per_second": 1736.678, "eval_steps_per_second": 72.366, "step": 14129010 + }, + { + "epoch": 70.0, + "learning_rate": 1.501133554297152e-05, + "loss": 1.9092, + "step": 14129500 + }, + { + "epoch": 70.0, + "learning_rate": 1.5010096956545435e-05, + "loss": 1.934, + "step": 14130000 + }, + { + "epoch": 70.01, + "learning_rate": 1.500885837011935e-05, + "loss": 1.9156, + "step": 14130500 + }, + { + "epoch": 70.01, + "learning_rate": 1.5007622260866119e-05, + "loss": 1.8916, + "step": 14131000 + }, + { + "epoch": 70.01, + "learning_rate": 1.5006383674440036e-05, + "loss": 1.9084, + "step": 14131500 + }, + { + "epoch": 70.01, + "learning_rate": 1.5005145088013953e-05, + "loss": 1.9324, + "step": 14132000 + }, + { + "epoch": 70.02, + "learning_rate": 1.500390650158787e-05, + "loss": 1.9229, + "step": 14132500 + }, + { + "epoch": 70.02, + "learning_rate": 1.5002667915161787e-05, + "loss": 1.9346, + "step": 14133000 + }, + { + "epoch": 70.02, + "learning_rate": 1.50014293287357e-05, + "loss": 1.9277, + "step": 14133500 + }, + { + "epoch": 70.02, + "learning_rate": 1.500019321948247e-05, + "loss": 1.9091, + "step": 14134000 + }, + { + "epoch": 70.03, + "learning_rate": 1.4998954633056386e-05, + "loss": 1.9003, + "step": 14134500 + }, + { + "epoch": 70.03, + "learning_rate": 1.4997716046630303e-05, + "loss": 1.9312, + "step": 14135000 + }, + { + "epoch": 70.03, + "learning_rate": 1.499647746020422e-05, + "loss": 1.9082, + "step": 14135500 + }, + { + "epoch": 70.03, + "learning_rate": 1.4995238873778137e-05, + "loss": 1.9046, + "step": 14136000 + }, + { + "epoch": 70.04, + "learning_rate": 1.499400028735205e-05, + "loss": 1.9149, + "step": 14136500 + }, + { + "epoch": 70.04, + "learning_rate": 1.4992761700925967e-05, + "loss": 1.9185, + "step": 14137000 + }, + { + "epoch": 70.04, + "learning_rate": 1.4991523114499884e-05, + "loss": 1.8966, + "step": 14137500 + }, + { + "epoch": 70.04, + "learning_rate": 1.49902845280738e-05, + "loss": 1.9305, + "step": 14138000 + }, + { + "epoch": 70.05, + "learning_rate": 1.498904841882057e-05, + "loss": 1.9024, + "step": 14138500 + }, + { + "epoch": 70.05, + "learning_rate": 1.4987809832394487e-05, + "loss": 1.9305, + "step": 14139000 + }, + { + "epoch": 70.05, + "learning_rate": 1.4986571245968404e-05, + "loss": 1.926, + "step": 14139500 + }, + { + "epoch": 70.05, + "learning_rate": 1.4985332659542317e-05, + "loss": 1.9179, + "step": 14140000 + }, + { + "epoch": 70.06, + "learning_rate": 1.4984094073116234e-05, + "loss": 1.9094, + "step": 14140500 + }, + { + "epoch": 70.06, + "learning_rate": 1.4982855486690151e-05, + "loss": 1.9107, + "step": 14141000 + }, + { + "epoch": 70.06, + "learning_rate": 1.4981616900264066e-05, + "loss": 1.9128, + "step": 14141500 + }, + { + "epoch": 70.06, + "learning_rate": 1.4980378313837983e-05, + "loss": 1.9454, + "step": 14142000 + }, + { + "epoch": 70.07, + "learning_rate": 1.49791397274119e-05, + "loss": 1.8861, + "step": 14142500 + }, + { + "epoch": 70.07, + "learning_rate": 1.4977906095331521e-05, + "loss": 1.9076, + "step": 14143000 + }, + { + "epoch": 70.07, + "learning_rate": 1.4976667508905436e-05, + "loss": 1.9198, + "step": 14143500 + }, + { + "epoch": 70.07, + "learning_rate": 1.4975428922479353e-05, + "loss": 1.9085, + "step": 14144000 + }, + { + "epoch": 70.08, + "learning_rate": 1.497419033605327e-05, + "loss": 1.9269, + "step": 14144500 + }, + { + "epoch": 70.08, + "learning_rate": 1.4972951749627187e-05, + "loss": 1.934, + "step": 14145000 + }, + { + "epoch": 70.08, + "learning_rate": 1.4971713163201104e-05, + "loss": 1.9166, + "step": 14145500 + }, + { + "epoch": 70.08, + "learning_rate": 1.4970474576775018e-05, + "loss": 1.8988, + "step": 14146000 + }, + { + "epoch": 70.09, + "learning_rate": 1.4969238467521788e-05, + "loss": 1.904, + "step": 14146500 + }, + { + "epoch": 70.09, + "learning_rate": 1.4968002358268557e-05, + "loss": 1.9172, + "step": 14147000 + }, + { + "epoch": 70.09, + "learning_rate": 1.4966763771842474e-05, + "loss": 1.9221, + "step": 14147500 + }, + { + "epoch": 70.09, + "learning_rate": 1.4965525185416387e-05, + "loss": 1.9235, + "step": 14148000 + }, + { + "epoch": 70.1, + "learning_rate": 1.4964286598990304e-05, + "loss": 1.899, + "step": 14148500 + }, + { + "epoch": 70.1, + "learning_rate": 1.4963050489737073e-05, + "loss": 1.9048, + "step": 14149000 + }, + { + "epoch": 70.1, + "learning_rate": 1.496181190331099e-05, + "loss": 1.923, + "step": 14149500 + }, + { + "epoch": 70.1, + "learning_rate": 1.4960573316884907e-05, + "loss": 1.9156, + "step": 14150000 + }, + { + "epoch": 70.11, + "learning_rate": 1.4959334730458824e-05, + "loss": 1.9076, + "step": 14150500 + }, + { + "epoch": 70.11, + "learning_rate": 1.4958096144032737e-05, + "loss": 1.9122, + "step": 14151000 + }, + { + "epoch": 70.11, + "learning_rate": 1.4956860034779508e-05, + "loss": 1.9331, + "step": 14151500 + }, + { + "epoch": 70.11, + "learning_rate": 1.4955621448353425e-05, + "loss": 1.9342, + "step": 14152000 + }, + { + "epoch": 70.12, + "learning_rate": 1.495438286192734e-05, + "loss": 1.9068, + "step": 14152500 + }, + { + "epoch": 70.12, + "learning_rate": 1.4953144275501257e-05, + "loss": 1.9199, + "step": 14153000 + }, + { + "epoch": 70.12, + "learning_rate": 1.4951905689075174e-05, + "loss": 1.9388, + "step": 14153500 + }, + { + "epoch": 70.12, + "learning_rate": 1.4950667102649087e-05, + "loss": 1.9124, + "step": 14154000 + }, + { + "epoch": 70.13, + "learning_rate": 1.4949428516223004e-05, + "loss": 1.9239, + "step": 14154500 + }, + { + "epoch": 70.13, + "learning_rate": 1.4948189929796921e-05, + "loss": 1.9145, + "step": 14155000 + }, + { + "epoch": 70.13, + "learning_rate": 1.4946953820543692e-05, + "loss": 1.9196, + "step": 14155500 + }, + { + "epoch": 70.13, + "learning_rate": 1.4945715234117607e-05, + "loss": 1.928, + "step": 14156000 + }, + { + "epoch": 70.14, + "learning_rate": 1.4944476647691524e-05, + "loss": 1.917, + "step": 14156500 + }, + { + "epoch": 70.14, + "learning_rate": 1.4943238061265441e-05, + "loss": 1.9214, + "step": 14157000 + }, + { + "epoch": 70.14, + "learning_rate": 1.4941999474839354e-05, + "loss": 1.9086, + "step": 14157500 + }, + { + "epoch": 70.14, + "learning_rate": 1.4940763365586125e-05, + "loss": 1.9202, + "step": 14158000 + }, + { + "epoch": 70.15, + "learning_rate": 1.4939524779160042e-05, + "loss": 1.9212, + "step": 14158500 + }, + { + "epoch": 70.15, + "learning_rate": 1.4938286192733957e-05, + "loss": 1.9183, + "step": 14159000 + }, + { + "epoch": 70.15, + "learning_rate": 1.4937047606307874e-05, + "loss": 1.8949, + "step": 14159500 + }, + { + "epoch": 70.15, + "learning_rate": 1.4935809019881791e-05, + "loss": 1.9385, + "step": 14160000 + }, + { + "epoch": 70.16, + "learning_rate": 1.4934570433455704e-05, + "loss": 1.917, + "step": 14160500 + }, + { + "epoch": 70.16, + "learning_rate": 1.4933331847029621e-05, + "loss": 1.903, + "step": 14161000 + }, + { + "epoch": 70.16, + "learning_rate": 1.4932093260603538e-05, + "loss": 1.8887, + "step": 14161500 + }, + { + "epoch": 70.16, + "learning_rate": 1.4930854674177455e-05, + "loss": 1.9168, + "step": 14162000 + }, + { + "epoch": 70.17, + "learning_rate": 1.4929618564924224e-05, + "loss": 1.9377, + "step": 14162500 + }, + { + "epoch": 70.17, + "learning_rate": 1.4928379978498141e-05, + "loss": 1.9189, + "step": 14163000 + }, + { + "epoch": 70.17, + "learning_rate": 1.4927141392072056e-05, + "loss": 1.8981, + "step": 14163500 + }, + { + "epoch": 70.17, + "learning_rate": 1.4925902805645971e-05, + "loss": 1.9313, + "step": 14164000 + }, + { + "epoch": 70.18, + "learning_rate": 1.4924666696392742e-05, + "loss": 1.9069, + "step": 14164500 + }, + { + "epoch": 70.18, + "learning_rate": 1.4923428109966659e-05, + "loss": 1.9237, + "step": 14165000 + }, + { + "epoch": 70.18, + "learning_rate": 1.4922189523540576e-05, + "loss": 1.9373, + "step": 14165500 + }, + { + "epoch": 70.18, + "learning_rate": 1.4920950937114491e-05, + "loss": 1.9271, + "step": 14166000 + }, + { + "epoch": 70.19, + "learning_rate": 1.4919712350688406e-05, + "loss": 1.9083, + "step": 14166500 + }, + { + "epoch": 70.19, + "learning_rate": 1.4918473764262322e-05, + "loss": 1.9339, + "step": 14167000 + }, + { + "epoch": 70.19, + "learning_rate": 1.4917235177836238e-05, + "loss": 1.9192, + "step": 14167500 + }, + { + "epoch": 70.19, + "learning_rate": 1.4915999068583009e-05, + "loss": 1.9119, + "step": 14168000 + }, + { + "epoch": 70.2, + "learning_rate": 1.4914762959329776e-05, + "loss": 1.9184, + "step": 14168500 + }, + { + "epoch": 70.2, + "learning_rate": 1.4913524372903693e-05, + "loss": 1.9378, + "step": 14169000 + }, + { + "epoch": 70.2, + "learning_rate": 1.4912285786477608e-05, + "loss": 1.9191, + "step": 14169500 + }, + { + "epoch": 70.2, + "learning_rate": 1.4911049677224379e-05, + "loss": 1.9268, + "step": 14170000 + }, + { + "epoch": 70.21, + "learning_rate": 1.4909811090798296e-05, + "loss": 1.9278, + "step": 14170500 + }, + { + "epoch": 70.21, + "learning_rate": 1.4908572504372213e-05, + "loss": 1.91, + "step": 14171000 + }, + { + "epoch": 70.21, + "learning_rate": 1.4907333917946128e-05, + "loss": 1.8977, + "step": 14171500 + }, + { + "epoch": 70.21, + "learning_rate": 1.4906095331520043e-05, + "loss": 1.9209, + "step": 14172000 + }, + { + "epoch": 70.22, + "learning_rate": 1.4904856745093958e-05, + "loss": 1.9281, + "step": 14172500 + }, + { + "epoch": 70.22, + "learning_rate": 1.4903618158667875e-05, + "loss": 1.922, + "step": 14173000 + }, + { + "epoch": 70.22, + "learning_rate": 1.4902379572241792e-05, + "loss": 1.9178, + "step": 14173500 + }, + { + "epoch": 70.22, + "learning_rate": 1.4901143462988563e-05, + "loss": 1.9257, + "step": 14174000 + }, + { + "epoch": 70.23, + "learning_rate": 1.489990487656248e-05, + "loss": 1.9379, + "step": 14174500 + }, + { + "epoch": 70.23, + "learning_rate": 1.4898666290136393e-05, + "loss": 1.9132, + "step": 14175000 + }, + { + "epoch": 70.23, + "learning_rate": 1.489742770371031e-05, + "loss": 1.933, + "step": 14175500 + }, + { + "epoch": 70.23, + "learning_rate": 1.4896189117284225e-05, + "loss": 1.9212, + "step": 14176000 + }, + { + "epoch": 70.24, + "learning_rate": 1.4894950530858142e-05, + "loss": 1.911, + "step": 14176500 + }, + { + "epoch": 70.24, + "learning_rate": 1.489371194443206e-05, + "loss": 1.9071, + "step": 14177000 + }, + { + "epoch": 70.24, + "learning_rate": 1.489247583517883e-05, + "loss": 1.9094, + "step": 14177500 + }, + { + "epoch": 70.24, + "learning_rate": 1.4891237248752743e-05, + "loss": 1.9401, + "step": 14178000 + }, + { + "epoch": 70.25, + "learning_rate": 1.488999866232666e-05, + "loss": 1.916, + "step": 14178500 + }, + { + "epoch": 70.25, + "learning_rate": 1.4888760075900577e-05, + "loss": 1.9249, + "step": 14179000 + }, + { + "epoch": 70.25, + "learning_rate": 1.4887521489474492e-05, + "loss": 1.9093, + "step": 14179500 + }, + { + "epoch": 70.25, + "learning_rate": 1.488628290304841e-05, + "loss": 1.9099, + "step": 14180000 + }, + { + "epoch": 70.26, + "learning_rate": 1.4885044316622326e-05, + "loss": 1.9229, + "step": 14180500 + }, + { + "epoch": 70.26, + "learning_rate": 1.4883808207369093e-05, + "loss": 1.8947, + "step": 14181000 + }, + { + "epoch": 70.26, + "learning_rate": 1.488256962094301e-05, + "loss": 1.9306, + "step": 14181500 + }, + { + "epoch": 70.26, + "learning_rate": 1.4881331034516927e-05, + "loss": 1.9326, + "step": 14182000 + }, + { + "epoch": 70.27, + "learning_rate": 1.4880092448090844e-05, + "loss": 1.9191, + "step": 14182500 + }, + { + "epoch": 70.27, + "learning_rate": 1.487885386166476e-05, + "loss": 1.9297, + "step": 14183000 + }, + { + "epoch": 70.27, + "learning_rate": 1.4877615275238676e-05, + "loss": 1.9023, + "step": 14183500 + }, + { + "epoch": 70.27, + "learning_rate": 1.4876376688812593e-05, + "loss": 1.9038, + "step": 14184000 + }, + { + "epoch": 70.27, + "learning_rate": 1.487513810238651e-05, + "loss": 1.9299, + "step": 14184500 + }, + { + "epoch": 70.28, + "learning_rate": 1.4873899515960427e-05, + "loss": 1.9251, + "step": 14185000 + }, + { + "epoch": 70.28, + "learning_rate": 1.487266092953434e-05, + "loss": 1.9126, + "step": 14185500 + }, + { + "epoch": 70.28, + "learning_rate": 1.4871422343108257e-05, + "loss": 1.9266, + "step": 14186000 + }, + { + "epoch": 70.28, + "learning_rate": 1.4870183756682174e-05, + "loss": 1.9089, + "step": 14186500 + }, + { + "epoch": 70.29, + "learning_rate": 1.4868945170256091e-05, + "loss": 1.9351, + "step": 14187000 + }, + { + "epoch": 70.29, + "learning_rate": 1.4867706583830007e-05, + "loss": 1.9229, + "step": 14187500 + }, + { + "epoch": 70.29, + "learning_rate": 1.4866467997403923e-05, + "loss": 1.9161, + "step": 14188000 + }, + { + "epoch": 70.29, + "learning_rate": 1.486522941097784e-05, + "loss": 1.9244, + "step": 14188500 + }, + { + "epoch": 70.3, + "learning_rate": 1.4863990824551757e-05, + "loss": 1.9311, + "step": 14189000 + }, + { + "epoch": 70.3, + "learning_rate": 1.4862752238125674e-05, + "loss": 1.91, + "step": 14189500 + }, + { + "epoch": 70.3, + "learning_rate": 1.4861516128872441e-05, + "loss": 1.923, + "step": 14190000 + }, + { + "epoch": 70.3, + "learning_rate": 1.486028001961921e-05, + "loss": 1.907, + "step": 14190500 + }, + { + "epoch": 70.31, + "learning_rate": 1.4859041433193127e-05, + "loss": 1.9204, + "step": 14191000 + }, + { + "epoch": 70.31, + "learning_rate": 1.485780284676704e-05, + "loss": 1.906, + "step": 14191500 + }, + { + "epoch": 70.31, + "learning_rate": 1.4856566737513811e-05, + "loss": 1.9068, + "step": 14192000 + }, + { + "epoch": 70.31, + "learning_rate": 1.4855328151087728e-05, + "loss": 1.916, + "step": 14192500 + }, + { + "epoch": 70.32, + "learning_rate": 1.4854092041834497e-05, + "loss": 1.9202, + "step": 14193000 + }, + { + "epoch": 70.32, + "learning_rate": 1.485285345540841e-05, + "loss": 1.9185, + "step": 14193500 + }, + { + "epoch": 70.32, + "learning_rate": 1.4851614868982327e-05, + "loss": 1.9264, + "step": 14194000 + }, + { + "epoch": 70.32, + "learning_rate": 1.4850376282556244e-05, + "loss": 1.9404, + "step": 14194500 + }, + { + "epoch": 70.33, + "learning_rate": 1.4849137696130161e-05, + "loss": 1.9447, + "step": 14195000 + }, + { + "epoch": 70.33, + "learning_rate": 1.4847899109704078e-05, + "loss": 1.913, + "step": 14195500 + }, + { + "epoch": 70.33, + "learning_rate": 1.4846660523277995e-05, + "loss": 1.9202, + "step": 14196000 + }, + { + "epoch": 70.33, + "learning_rate": 1.4845424414024764e-05, + "loss": 1.9518, + "step": 14196500 + }, + { + "epoch": 70.34, + "learning_rate": 1.4844188304771531e-05, + "loss": 1.938, + "step": 14197000 + }, + { + "epoch": 70.34, + "learning_rate": 1.4842949718345448e-05, + "loss": 1.9206, + "step": 14197500 + }, + { + "epoch": 70.34, + "learning_rate": 1.4841711131919365e-05, + "loss": 1.9104, + "step": 14198000 + }, + { + "epoch": 70.34, + "learning_rate": 1.484047254549328e-05, + "loss": 1.935, + "step": 14198500 + }, + { + "epoch": 70.35, + "learning_rate": 1.4839233959067197e-05, + "loss": 1.923, + "step": 14199000 + }, + { + "epoch": 70.35, + "learning_rate": 1.4837995372641114e-05, + "loss": 1.9119, + "step": 14199500 + }, + { + "epoch": 70.35, + "learning_rate": 1.4836759263387881e-05, + "loss": 1.9371, + "step": 14200000 + }, + { + "epoch": 70.35, + "learning_rate": 1.4835520676961798e-05, + "loss": 1.9068, + "step": 14200500 + }, + { + "epoch": 70.36, + "learning_rate": 1.4834282090535715e-05, + "loss": 1.8956, + "step": 14201000 + }, + { + "epoch": 70.36, + "learning_rate": 1.4833043504109632e-05, + "loss": 1.9174, + "step": 14201500 + }, + { + "epoch": 70.36, + "learning_rate": 1.4831804917683547e-05, + "loss": 1.9286, + "step": 14202000 + }, + { + "epoch": 70.36, + "learning_rate": 1.4830568808430314e-05, + "loss": 1.9341, + "step": 14202500 + }, + { + "epoch": 70.37, + "learning_rate": 1.4829330222004231e-05, + "loss": 1.9158, + "step": 14203000 + }, + { + "epoch": 70.37, + "learning_rate": 1.4828091635578148e-05, + "loss": 1.8926, + "step": 14203500 + }, + { + "epoch": 70.37, + "learning_rate": 1.4826853049152065e-05, + "loss": 1.9321, + "step": 14204000 + }, + { + "epoch": 70.37, + "learning_rate": 1.4825614462725982e-05, + "loss": 1.9348, + "step": 14204500 + }, + { + "epoch": 70.38, + "learning_rate": 1.4824375876299897e-05, + "loss": 1.9092, + "step": 14205000 + }, + { + "epoch": 70.38, + "learning_rate": 1.4823137289873814e-05, + "loss": 1.8946, + "step": 14205500 + }, + { + "epoch": 70.38, + "learning_rate": 1.482189870344773e-05, + "loss": 1.9395, + "step": 14206000 + }, + { + "epoch": 70.38, + "learning_rate": 1.4820660117021645e-05, + "loss": 1.9405, + "step": 14206500 + }, + { + "epoch": 70.39, + "learning_rate": 1.4819424007768415e-05, + "loss": 1.9536, + "step": 14207000 + }, + { + "epoch": 70.39, + "learning_rate": 1.4818187898515184e-05, + "loss": 1.9367, + "step": 14207500 + }, + { + "epoch": 70.39, + "learning_rate": 1.4816949312089099e-05, + "loss": 1.9135, + "step": 14208000 + }, + { + "epoch": 70.39, + "learning_rate": 1.4815710725663014e-05, + "loss": 1.9384, + "step": 14208500 + }, + { + "epoch": 70.4, + "learning_rate": 1.4814472139236931e-05, + "loss": 1.929, + "step": 14209000 + }, + { + "epoch": 70.4, + "learning_rate": 1.4813233552810848e-05, + "loss": 1.9423, + "step": 14209500 + }, + { + "epoch": 70.4, + "learning_rate": 1.4811994966384765e-05, + "loss": 1.928, + "step": 14210000 + }, + { + "epoch": 70.4, + "learning_rate": 1.4810756379958682e-05, + "loss": 1.9351, + "step": 14210500 + }, + { + "epoch": 70.41, + "learning_rate": 1.4809517793532599e-05, + "loss": 1.9189, + "step": 14211000 + }, + { + "epoch": 70.41, + "learning_rate": 1.4808281684279366e-05, + "loss": 1.8931, + "step": 14211500 + }, + { + "epoch": 70.41, + "learning_rate": 1.4807043097853281e-05, + "loss": 1.9437, + "step": 14212000 + }, + { + "epoch": 70.41, + "learning_rate": 1.4805806988600052e-05, + "loss": 1.9465, + "step": 14212500 + }, + { + "epoch": 70.42, + "learning_rate": 1.4804568402173969e-05, + "loss": 1.931, + "step": 14213000 + }, + { + "epoch": 70.42, + "learning_rate": 1.4803329815747886e-05, + "loss": 1.9089, + "step": 14213500 + }, + { + "epoch": 70.42, + "learning_rate": 1.4802091229321801e-05, + "loss": 1.9127, + "step": 14214000 + }, + { + "epoch": 70.42, + "learning_rate": 1.4800852642895716e-05, + "loss": 1.9445, + "step": 14214500 + }, + { + "epoch": 70.43, + "learning_rate": 1.4799614056469633e-05, + "loss": 1.924, + "step": 14215000 + }, + { + "epoch": 70.43, + "learning_rate": 1.4798375470043548e-05, + "loss": 1.9206, + "step": 14215500 + }, + { + "epoch": 70.43, + "learning_rate": 1.4797136883617465e-05, + "loss": 1.9038, + "step": 14216000 + }, + { + "epoch": 70.43, + "learning_rate": 1.4795898297191382e-05, + "loss": 1.9108, + "step": 14216500 + }, + { + "epoch": 70.44, + "learning_rate": 1.4794659710765299e-05, + "loss": 1.9193, + "step": 14217000 + }, + { + "epoch": 70.44, + "learning_rate": 1.4793421124339216e-05, + "loss": 1.9077, + "step": 14217500 + }, + { + "epoch": 70.44, + "learning_rate": 1.4792182537913133e-05, + "loss": 1.932, + "step": 14218000 + }, + { + "epoch": 70.44, + "learning_rate": 1.4790943951487046e-05, + "loss": 1.9143, + "step": 14218500 + }, + { + "epoch": 70.45, + "learning_rate": 1.4789705365060963e-05, + "loss": 1.9126, + "step": 14219000 + }, + { + "epoch": 70.45, + "learning_rate": 1.478846677863488e-05, + "loss": 1.8989, + "step": 14219500 + }, + { + "epoch": 70.45, + "learning_rate": 1.4787228192208796e-05, + "loss": 1.934, + "step": 14220000 + }, + { + "epoch": 70.45, + "learning_rate": 1.4785992082955566e-05, + "loss": 1.9429, + "step": 14220500 + }, + { + "epoch": 70.46, + "learning_rate": 1.4784753496529483e-05, + "loss": 1.9098, + "step": 14221000 + }, + { + "epoch": 70.46, + "learning_rate": 1.47835149101034e-05, + "loss": 1.9435, + "step": 14221500 + }, + { + "epoch": 70.46, + "learning_rate": 1.4782278800850165e-05, + "loss": 1.9157, + "step": 14222000 + }, + { + "epoch": 70.46, + "learning_rate": 1.4781040214424082e-05, + "loss": 1.9525, + "step": 14222500 + }, + { + "epoch": 70.47, + "learning_rate": 1.4779801627998e-05, + "loss": 1.9214, + "step": 14223000 + }, + { + "epoch": 70.47, + "learning_rate": 1.4778563041571916e-05, + "loss": 1.9241, + "step": 14223500 + }, + { + "epoch": 70.47, + "learning_rate": 1.4777324455145833e-05, + "loss": 1.9346, + "step": 14224000 + }, + { + "epoch": 70.47, + "learning_rate": 1.477608586871975e-05, + "loss": 1.9256, + "step": 14224500 + }, + { + "epoch": 70.48, + "learning_rate": 1.4774847282293664e-05, + "loss": 1.9195, + "step": 14225000 + }, + { + "epoch": 70.48, + "learning_rate": 1.477360869586758e-05, + "loss": 1.918, + "step": 14225500 + }, + { + "epoch": 70.48, + "learning_rate": 1.4772370109441497e-05, + "loss": 1.902, + "step": 14226000 + }, + { + "epoch": 70.48, + "learning_rate": 1.4771134000188266e-05, + "loss": 1.9384, + "step": 14226500 + }, + { + "epoch": 70.49, + "learning_rate": 1.4769895413762183e-05, + "loss": 1.923, + "step": 14227000 + }, + { + "epoch": 70.49, + "learning_rate": 1.47686568273361e-05, + "loss": 1.9227, + "step": 14227500 + }, + { + "epoch": 70.49, + "learning_rate": 1.4767418240910014e-05, + "loss": 1.9435, + "step": 14228000 + }, + { + "epoch": 70.49, + "learning_rate": 1.4766182131656784e-05, + "loss": 1.9177, + "step": 14228500 + }, + { + "epoch": 70.5, + "learning_rate": 1.4764946022403553e-05, + "loss": 1.9164, + "step": 14229000 + }, + { + "epoch": 70.5, + "learning_rate": 1.476370743597747e-05, + "loss": 1.9487, + "step": 14229500 + }, + { + "epoch": 70.5, + "learning_rate": 1.4762468849551383e-05, + "loss": 1.9319, + "step": 14230000 + }, + { + "epoch": 70.5, + "learning_rate": 1.47612302631253e-05, + "loss": 1.9454, + "step": 14230500 + }, + { + "epoch": 70.51, + "learning_rate": 1.4759991676699217e-05, + "loss": 1.9279, + "step": 14231000 + }, + { + "epoch": 70.51, + "learning_rate": 1.4758753090273134e-05, + "loss": 1.9226, + "step": 14231500 + }, + { + "epoch": 70.51, + "learning_rate": 1.4757516981019903e-05, + "loss": 1.9383, + "step": 14232000 + }, + { + "epoch": 70.51, + "learning_rate": 1.475627839459382e-05, + "loss": 1.9528, + "step": 14232500 + }, + { + "epoch": 70.52, + "learning_rate": 1.4755039808167733e-05, + "loss": 1.9045, + "step": 14233000 + }, + { + "epoch": 70.52, + "learning_rate": 1.475380122174165e-05, + "loss": 1.9261, + "step": 14233500 + }, + { + "epoch": 70.52, + "learning_rate": 1.4752562635315567e-05, + "loss": 1.9085, + "step": 14234000 + }, + { + "epoch": 70.52, + "learning_rate": 1.4751326526062336e-05, + "loss": 1.9051, + "step": 14234500 + }, + { + "epoch": 70.53, + "learning_rate": 1.4750087939636253e-05, + "loss": 1.917, + "step": 14235000 + }, + { + "epoch": 70.53, + "learning_rate": 1.474884935321017e-05, + "loss": 1.9384, + "step": 14235500 + }, + { + "epoch": 70.53, + "learning_rate": 1.4747610766784084e-05, + "loss": 1.9095, + "step": 14236000 + }, + { + "epoch": 70.53, + "learning_rate": 1.4746372180358e-05, + "loss": 1.9163, + "step": 14236500 + }, + { + "epoch": 70.54, + "learning_rate": 1.4745133593931917e-05, + "loss": 1.9148, + "step": 14237000 + }, + { + "epoch": 70.54, + "learning_rate": 1.4743895007505834e-05, + "loss": 1.9276, + "step": 14237500 + }, + { + "epoch": 70.54, + "learning_rate": 1.4742656421079751e-05, + "loss": 1.9347, + "step": 14238000 + }, + { + "epoch": 70.54, + "learning_rate": 1.4741417834653668e-05, + "loss": 1.9077, + "step": 14238500 + }, + { + "epoch": 70.54, + "learning_rate": 1.4740179248227583e-05, + "loss": 1.8887, + "step": 14239000 + }, + { + "epoch": 70.55, + "learning_rate": 1.47389406618015e-05, + "loss": 1.9124, + "step": 14239500 + }, + { + "epoch": 70.55, + "learning_rate": 1.4737702075375417e-05, + "loss": 1.9259, + "step": 14240000 + }, + { + "epoch": 70.55, + "learning_rate": 1.473646348894933e-05, + "loss": 1.9341, + "step": 14240500 + }, + { + "epoch": 70.55, + "learning_rate": 1.4735224902523248e-05, + "loss": 1.9211, + "step": 14241000 + }, + { + "epoch": 70.56, + "learning_rate": 1.4733986316097165e-05, + "loss": 1.9132, + "step": 14241500 + }, + { + "epoch": 70.56, + "learning_rate": 1.4732747729671082e-05, + "loss": 1.9139, + "step": 14242000 + }, + { + "epoch": 70.56, + "learning_rate": 1.47315140975907e-05, + "loss": 1.9293, + "step": 14242500 + }, + { + "epoch": 70.56, + "learning_rate": 1.4730275511164617e-05, + "loss": 1.9161, + "step": 14243000 + }, + { + "epoch": 70.57, + "learning_rate": 1.4729036924738534e-05, + "loss": 1.9249, + "step": 14243500 + }, + { + "epoch": 70.57, + "learning_rate": 1.4727798338312451e-05, + "loss": 1.8961, + "step": 14244000 + }, + { + "epoch": 70.57, + "learning_rate": 1.4726559751886368e-05, + "loss": 1.9241, + "step": 14244500 + }, + { + "epoch": 70.57, + "learning_rate": 1.4725323642633137e-05, + "loss": 1.9244, + "step": 14245000 + }, + { + "epoch": 70.58, + "learning_rate": 1.4724085056207052e-05, + "loss": 1.9339, + "step": 14245500 + }, + { + "epoch": 70.58, + "learning_rate": 1.4722848946953821e-05, + "loss": 1.9345, + "step": 14246000 + }, + { + "epoch": 70.58, + "learning_rate": 1.4721610360527738e-05, + "loss": 1.9336, + "step": 14246500 + }, + { + "epoch": 70.58, + "learning_rate": 1.4720371774101655e-05, + "loss": 1.9386, + "step": 14247000 + }, + { + "epoch": 70.59, + "learning_rate": 1.4719133187675572e-05, + "loss": 1.9128, + "step": 14247500 + }, + { + "epoch": 70.59, + "learning_rate": 1.4717894601249487e-05, + "loss": 1.9169, + "step": 14248000 + }, + { + "epoch": 70.59, + "learning_rate": 1.4716656014823402e-05, + "loss": 1.9249, + "step": 14248500 + }, + { + "epoch": 70.59, + "learning_rate": 1.4715417428397318e-05, + "loss": 1.9068, + "step": 14249000 + }, + { + "epoch": 70.6, + "learning_rate": 1.4714178841971235e-05, + "loss": 1.9286, + "step": 14249500 + }, + { + "epoch": 70.6, + "learning_rate": 1.4712942732718005e-05, + "loss": 1.9303, + "step": 14250000 + }, + { + "epoch": 70.6, + "learning_rate": 1.4711704146291922e-05, + "loss": 1.9452, + "step": 14250500 + }, + { + "epoch": 70.6, + "learning_rate": 1.4710465559865839e-05, + "loss": 1.9394, + "step": 14251000 + }, + { + "epoch": 70.61, + "learning_rate": 1.4709226973439754e-05, + "loss": 1.9249, + "step": 14251500 + }, + { + "epoch": 70.61, + "learning_rate": 1.470798838701367e-05, + "loss": 1.933, + "step": 14252000 + }, + { + "epoch": 70.61, + "learning_rate": 1.4706749800587585e-05, + "loss": 1.9338, + "step": 14252500 + }, + { + "epoch": 70.61, + "learning_rate": 1.4705513691334355e-05, + "loss": 1.9037, + "step": 14253000 + }, + { + "epoch": 70.62, + "learning_rate": 1.4704275104908272e-05, + "loss": 1.9234, + "step": 14253500 + }, + { + "epoch": 70.62, + "learning_rate": 1.4703036518482189e-05, + "loss": 1.9472, + "step": 14254000 + }, + { + "epoch": 70.62, + "learning_rate": 1.4701800409228954e-05, + "loss": 1.9317, + "step": 14254500 + }, + { + "epoch": 70.62, + "learning_rate": 1.4700561822802871e-05, + "loss": 1.9171, + "step": 14255000 + }, + { + "epoch": 70.63, + "learning_rate": 1.4699325713549642e-05, + "loss": 1.9191, + "step": 14255500 + }, + { + "epoch": 70.63, + "learning_rate": 1.4698087127123559e-05, + "loss": 1.9321, + "step": 14256000 + }, + { + "epoch": 70.63, + "learning_rate": 1.4696848540697476e-05, + "loss": 1.9099, + "step": 14256500 + }, + { + "epoch": 70.63, + "learning_rate": 1.469560995427139e-05, + "loss": 1.9314, + "step": 14257000 + }, + { + "epoch": 70.64, + "learning_rate": 1.4694371367845306e-05, + "loss": 1.9129, + "step": 14257500 + }, + { + "epoch": 70.64, + "learning_rate": 1.4693132781419221e-05, + "loss": 1.9268, + "step": 14258000 + }, + { + "epoch": 70.64, + "learning_rate": 1.4691894194993138e-05, + "loss": 1.9125, + "step": 14258500 + }, + { + "epoch": 70.64, + "learning_rate": 1.4690655608567055e-05, + "loss": 1.9094, + "step": 14259000 + }, + { + "epoch": 70.65, + "learning_rate": 1.4689419499313826e-05, + "loss": 1.9241, + "step": 14259500 + }, + { + "epoch": 70.65, + "learning_rate": 1.468818091288774e-05, + "loss": 1.9159, + "step": 14260000 + }, + { + "epoch": 70.65, + "learning_rate": 1.4686942326461656e-05, + "loss": 1.9386, + "step": 14260500 + }, + { + "epoch": 70.65, + "learning_rate": 1.4685703740035573e-05, + "loss": 1.9579, + "step": 14261000 + }, + { + "epoch": 70.66, + "learning_rate": 1.4684465153609488e-05, + "loss": 1.9415, + "step": 14261500 + }, + { + "epoch": 70.66, + "learning_rate": 1.4683226567183405e-05, + "loss": 1.9202, + "step": 14262000 + }, + { + "epoch": 70.66, + "learning_rate": 1.4681987980757322e-05, + "loss": 1.899, + "step": 14262500 + }, + { + "epoch": 70.66, + "learning_rate": 1.468075187150409e-05, + "loss": 1.8989, + "step": 14263000 + }, + { + "epoch": 70.67, + "learning_rate": 1.4679513285078006e-05, + "loss": 1.9463, + "step": 14263500 + }, + { + "epoch": 70.67, + "learning_rate": 1.4678274698651923e-05, + "loss": 1.9243, + "step": 14264000 + }, + { + "epoch": 70.67, + "learning_rate": 1.467703611222584e-05, + "loss": 1.9177, + "step": 14264500 + }, + { + "epoch": 70.67, + "learning_rate": 1.4675797525799755e-05, + "loss": 1.9098, + "step": 14265000 + }, + { + "epoch": 70.68, + "learning_rate": 1.4674558939373672e-05, + "loss": 1.9021, + "step": 14265500 + }, + { + "epoch": 70.68, + "learning_rate": 1.467332035294759e-05, + "loss": 1.9324, + "step": 14266000 + }, + { + "epoch": 70.68, + "learning_rate": 1.4672081766521506e-05, + "loss": 1.9254, + "step": 14266500 + }, + { + "epoch": 70.68, + "learning_rate": 1.4670843180095423e-05, + "loss": 1.9223, + "step": 14267000 + }, + { + "epoch": 70.69, + "learning_rate": 1.466960707084219e-05, + "loss": 1.9399, + "step": 14267500 + }, + { + "epoch": 70.69, + "learning_rate": 1.4668368484416105e-05, + "loss": 1.9333, + "step": 14268000 + }, + { + "epoch": 70.69, + "learning_rate": 1.4667129897990022e-05, + "loss": 1.9172, + "step": 14268500 + }, + { + "epoch": 70.69, + "learning_rate": 1.466589131156394e-05, + "loss": 1.9369, + "step": 14269000 + }, + { + "epoch": 70.7, + "learning_rate": 1.4664655202310706e-05, + "loss": 1.9337, + "step": 14269500 + }, + { + "epoch": 70.7, + "learning_rate": 1.4663416615884623e-05, + "loss": 1.9318, + "step": 14270000 + }, + { + "epoch": 70.7, + "learning_rate": 1.466217802945854e-05, + "loss": 1.9265, + "step": 14270500 + }, + { + "epoch": 70.7, + "learning_rate": 1.4660939443032457e-05, + "loss": 1.9355, + "step": 14271000 + }, + { + "epoch": 70.71, + "learning_rate": 1.4659700856606372e-05, + "loss": 1.9118, + "step": 14271500 + }, + { + "epoch": 70.71, + "learning_rate": 1.465846227018029e-05, + "loss": 1.9165, + "step": 14272000 + }, + { + "epoch": 70.71, + "learning_rate": 1.4657226160927056e-05, + "loss": 1.9235, + "step": 14272500 + }, + { + "epoch": 70.71, + "learning_rate": 1.4655990051673827e-05, + "loss": 1.9385, + "step": 14273000 + }, + { + "epoch": 70.72, + "learning_rate": 1.4654753942420596e-05, + "loss": 1.958, + "step": 14273500 + }, + { + "epoch": 70.72, + "learning_rate": 1.4653515355994513e-05, + "loss": 1.9265, + "step": 14274000 + }, + { + "epoch": 70.72, + "learning_rate": 1.465227924674128e-05, + "loss": 1.9252, + "step": 14274500 + }, + { + "epoch": 70.72, + "learning_rate": 1.4651040660315197e-05, + "loss": 1.9558, + "step": 14275000 + }, + { + "epoch": 70.73, + "learning_rate": 1.4649802073889114e-05, + "loss": 1.9199, + "step": 14275500 + }, + { + "epoch": 70.73, + "learning_rate": 1.4648563487463029e-05, + "loss": 1.9237, + "step": 14276000 + }, + { + "epoch": 70.73, + "learning_rate": 1.4647324901036946e-05, + "loss": 1.9358, + "step": 14276500 + }, + { + "epoch": 70.73, + "learning_rate": 1.4646086314610863e-05, + "loss": 1.9105, + "step": 14277000 + }, + { + "epoch": 70.74, + "learning_rate": 1.4644847728184776e-05, + "loss": 1.9269, + "step": 14277500 + }, + { + "epoch": 70.74, + "learning_rate": 1.4643609141758693e-05, + "loss": 1.94, + "step": 14278000 + }, + { + "epoch": 70.74, + "learning_rate": 1.4642373032505464e-05, + "loss": 1.9285, + "step": 14278500 + }, + { + "epoch": 70.74, + "learning_rate": 1.4641134446079379e-05, + "loss": 1.914, + "step": 14279000 + }, + { + "epoch": 70.75, + "learning_rate": 1.4639895859653296e-05, + "loss": 1.9194, + "step": 14279500 + }, + { + "epoch": 70.75, + "learning_rate": 1.4638657273227213e-05, + "loss": 1.9207, + "step": 14280000 + }, + { + "epoch": 70.75, + "learning_rate": 1.4637418686801126e-05, + "loss": 1.8997, + "step": 14280500 + }, + { + "epoch": 70.75, + "learning_rate": 1.4636180100375043e-05, + "loss": 1.9406, + "step": 14281000 + }, + { + "epoch": 70.76, + "learning_rate": 1.4634943991121814e-05, + "loss": 1.9452, + "step": 14281500 + }, + { + "epoch": 70.76, + "learning_rate": 1.463370540469573e-05, + "loss": 1.9441, + "step": 14282000 + }, + { + "epoch": 70.76, + "learning_rate": 1.4632466818269646e-05, + "loss": 1.9153, + "step": 14282500 + }, + { + "epoch": 70.76, + "learning_rate": 1.4631228231843563e-05, + "loss": 1.9497, + "step": 14283000 + }, + { + "epoch": 70.77, + "learning_rate": 1.4629989645417478e-05, + "loss": 1.9459, + "step": 14283500 + }, + { + "epoch": 70.77, + "learning_rate": 1.4628751058991393e-05, + "loss": 1.9378, + "step": 14284000 + }, + { + "epoch": 70.77, + "learning_rate": 1.462751247256531e-05, + "loss": 1.9328, + "step": 14284500 + }, + { + "epoch": 70.77, + "learning_rate": 1.4626273886139227e-05, + "loss": 1.9118, + "step": 14285000 + }, + { + "epoch": 70.78, + "learning_rate": 1.4625035299713144e-05, + "loss": 1.9366, + "step": 14285500 + }, + { + "epoch": 70.78, + "learning_rate": 1.4623799190459913e-05, + "loss": 1.9217, + "step": 14286000 + }, + { + "epoch": 70.78, + "learning_rate": 1.462256060403383e-05, + "loss": 1.93, + "step": 14286500 + }, + { + "epoch": 70.78, + "learning_rate": 1.4621322017607743e-05, + "loss": 1.9397, + "step": 14287000 + }, + { + "epoch": 70.79, + "learning_rate": 1.462008343118166e-05, + "loss": 1.914, + "step": 14287500 + }, + { + "epoch": 70.79, + "learning_rate": 1.4618844844755577e-05, + "loss": 1.91, + "step": 14288000 + }, + { + "epoch": 70.79, + "learning_rate": 1.4617606258329494e-05, + "loss": 1.918, + "step": 14288500 + }, + { + "epoch": 70.79, + "learning_rate": 1.4616367671903411e-05, + "loss": 1.9324, + "step": 14289000 + }, + { + "epoch": 70.8, + "learning_rate": 1.461513156265018e-05, + "loss": 1.9045, + "step": 14289500 + }, + { + "epoch": 70.8, + "learning_rate": 1.4613892976224095e-05, + "loss": 1.9325, + "step": 14290000 + }, + { + "epoch": 70.8, + "learning_rate": 1.461265438979801e-05, + "loss": 1.9403, + "step": 14290500 + }, + { + "epoch": 70.8, + "learning_rate": 1.4611415803371927e-05, + "loss": 1.9243, + "step": 14291000 + }, + { + "epoch": 70.81, + "learning_rate": 1.4610177216945844e-05, + "loss": 1.9056, + "step": 14291500 + }, + { + "epoch": 70.81, + "learning_rate": 1.4608938630519761e-05, + "loss": 1.9464, + "step": 14292000 + }, + { + "epoch": 70.81, + "learning_rate": 1.4607700044093678e-05, + "loss": 1.9111, + "step": 14292500 + }, + { + "epoch": 70.81, + "learning_rate": 1.4606461457667595e-05, + "loss": 1.9097, + "step": 14293000 + }, + { + "epoch": 70.81, + "learning_rate": 1.4605222871241512e-05, + "loss": 1.9512, + "step": 14293500 + }, + { + "epoch": 70.82, + "learning_rate": 1.4603984284815427e-05, + "loss": 1.944, + "step": 14294000 + }, + { + "epoch": 70.82, + "learning_rate": 1.4602748175562194e-05, + "loss": 1.9366, + "step": 14294500 + }, + { + "epoch": 70.82, + "learning_rate": 1.4601509589136111e-05, + "loss": 1.9291, + "step": 14295000 + }, + { + "epoch": 70.82, + "learning_rate": 1.4600271002710028e-05, + "loss": 1.9313, + "step": 14295500 + }, + { + "epoch": 70.83, + "learning_rate": 1.4599032416283945e-05, + "loss": 1.9181, + "step": 14296000 + }, + { + "epoch": 70.83, + "learning_rate": 1.4597793829857862e-05, + "loss": 1.911, + "step": 14296500 + }, + { + "epoch": 70.83, + "learning_rate": 1.4596555243431779e-05, + "loss": 1.8992, + "step": 14297000 + }, + { + "epoch": 70.83, + "learning_rate": 1.4595316657005692e-05, + "loss": 1.9263, + "step": 14297500 + }, + { + "epoch": 70.84, + "learning_rate": 1.459407807057961e-05, + "loss": 1.926, + "step": 14298000 + }, + { + "epoch": 70.84, + "learning_rate": 1.4592839484153525e-05, + "loss": 1.9321, + "step": 14298500 + }, + { + "epoch": 70.84, + "learning_rate": 1.4591603374900295e-05, + "loss": 1.9202, + "step": 14299000 + }, + { + "epoch": 70.84, + "learning_rate": 1.4590364788474212e-05, + "loss": 1.9277, + "step": 14299500 + }, + { + "epoch": 70.85, + "learning_rate": 1.4589126202048129e-05, + "loss": 1.9134, + "step": 14300000 + }, + { + "epoch": 70.85, + "learning_rate": 1.4587887615622043e-05, + "loss": 1.9386, + "step": 14300500 + }, + { + "epoch": 70.85, + "learning_rate": 1.458664902919596e-05, + "loss": 1.9249, + "step": 14301000 + }, + { + "epoch": 70.85, + "learning_rate": 1.4585410442769876e-05, + "loss": 1.9034, + "step": 14301500 + }, + { + "epoch": 70.86, + "learning_rate": 1.4584171856343792e-05, + "loss": 1.9197, + "step": 14302000 + }, + { + "epoch": 70.86, + "learning_rate": 1.4582933269917709e-05, + "loss": 1.9067, + "step": 14302500 + }, + { + "epoch": 70.86, + "learning_rate": 1.4581694683491626e-05, + "loss": 1.9432, + "step": 14303000 + }, + { + "epoch": 70.86, + "learning_rate": 1.4580456097065542e-05, + "loss": 1.9321, + "step": 14303500 + }, + { + "epoch": 70.87, + "learning_rate": 1.457921998781231e-05, + "loss": 1.926, + "step": 14304000 + }, + { + "epoch": 70.87, + "learning_rate": 1.4577983878559078e-05, + "loss": 1.9479, + "step": 14304500 + }, + { + "epoch": 70.87, + "learning_rate": 1.4576745292132995e-05, + "loss": 1.9376, + "step": 14305000 + }, + { + "epoch": 70.87, + "learning_rate": 1.4575506705706912e-05, + "loss": 1.9283, + "step": 14305500 + }, + { + "epoch": 70.88, + "learning_rate": 1.457427059645368e-05, + "loss": 1.9042, + "step": 14306000 + }, + { + "epoch": 70.88, + "learning_rate": 1.4573032010027596e-05, + "loss": 1.9283, + "step": 14306500 + }, + { + "epoch": 70.88, + "learning_rate": 1.4571795900774365e-05, + "loss": 1.9409, + "step": 14307000 + }, + { + "epoch": 70.88, + "learning_rate": 1.4570557314348282e-05, + "loss": 1.9202, + "step": 14307500 + }, + { + "epoch": 70.89, + "learning_rate": 1.4569318727922199e-05, + "loss": 1.8992, + "step": 14308000 + }, + { + "epoch": 70.89, + "learning_rate": 1.4568080141496112e-05, + "loss": 1.9492, + "step": 14308500 + }, + { + "epoch": 70.89, + "learning_rate": 1.456684155507003e-05, + "loss": 1.92, + "step": 14309000 + }, + { + "epoch": 70.89, + "learning_rate": 1.4565602968643946e-05, + "loss": 1.9348, + "step": 14309500 + }, + { + "epoch": 70.9, + "learning_rate": 1.4564364382217863e-05, + "loss": 1.9324, + "step": 14310000 + }, + { + "epoch": 70.9, + "learning_rate": 1.456312579579178e-05, + "loss": 1.899, + "step": 14310500 + }, + { + "epoch": 70.9, + "learning_rate": 1.4561889686538549e-05, + "loss": 1.9318, + "step": 14311000 + }, + { + "epoch": 70.9, + "learning_rate": 1.4560651100112466e-05, + "loss": 1.9448, + "step": 14311500 + }, + { + "epoch": 70.91, + "learning_rate": 1.455941251368638e-05, + "loss": 1.9233, + "step": 14312000 + }, + { + "epoch": 70.91, + "learning_rate": 1.4558173927260296e-05, + "loss": 1.9092, + "step": 14312500 + }, + { + "epoch": 70.91, + "learning_rate": 1.4556935340834213e-05, + "loss": 1.9381, + "step": 14313000 + }, + { + "epoch": 70.91, + "learning_rate": 1.455569675440813e-05, + "loss": 1.9046, + "step": 14313500 + }, + { + "epoch": 70.92, + "learning_rate": 1.4554458167982045e-05, + "loss": 1.9572, + "step": 14314000 + }, + { + "epoch": 70.92, + "learning_rate": 1.4553219581555962e-05, + "loss": 1.92, + "step": 14314500 + }, + { + "epoch": 70.92, + "learning_rate": 1.455198099512988e-05, + "loss": 1.9298, + "step": 14315000 + }, + { + "epoch": 70.92, + "learning_rate": 1.4550744885876646e-05, + "loss": 1.9263, + "step": 14315500 + }, + { + "epoch": 70.93, + "learning_rate": 1.4549506299450563e-05, + "loss": 1.9262, + "step": 14316000 + }, + { + "epoch": 70.93, + "learning_rate": 1.454826771302448e-05, + "loss": 1.9259, + "step": 14316500 + }, + { + "epoch": 70.93, + "learning_rate": 1.4547029126598397e-05, + "loss": 1.9261, + "step": 14317000 + }, + { + "epoch": 70.93, + "learning_rate": 1.4545790540172312e-05, + "loss": 1.9463, + "step": 14317500 + }, + { + "epoch": 70.94, + "learning_rate": 1.454455195374623e-05, + "loss": 1.9174, + "step": 14318000 + }, + { + "epoch": 70.94, + "learning_rate": 1.4543315844492997e-05, + "loss": 1.9263, + "step": 14318500 + }, + { + "epoch": 70.94, + "learning_rate": 1.4542077258066913e-05, + "loss": 1.9231, + "step": 14319000 + }, + { + "epoch": 70.94, + "learning_rate": 1.454083867164083e-05, + "loss": 1.9326, + "step": 14319500 + }, + { + "epoch": 70.95, + "learning_rate": 1.4539600085214747e-05, + "loss": 1.9383, + "step": 14320000 + }, + { + "epoch": 70.95, + "learning_rate": 1.4538361498788664e-05, + "loss": 1.9096, + "step": 14320500 + }, + { + "epoch": 70.95, + "learning_rate": 1.453712538953543e-05, + "loss": 1.9024, + "step": 14321000 + }, + { + "epoch": 70.95, + "learning_rate": 1.4535886803109347e-05, + "loss": 1.9123, + "step": 14321500 + }, + { + "epoch": 70.96, + "learning_rate": 1.4534648216683263e-05, + "loss": 1.9215, + "step": 14322000 + }, + { + "epoch": 70.96, + "learning_rate": 1.453340963025718e-05, + "loss": 1.9452, + "step": 14322500 + }, + { + "epoch": 70.96, + "learning_rate": 1.4532171043831097e-05, + "loss": 1.9415, + "step": 14323000 + }, + { + "epoch": 70.96, + "learning_rate": 1.4530932457405014e-05, + "loss": 1.942, + "step": 14323500 + }, + { + "epoch": 70.97, + "learning_rate": 1.4529693870978931e-05, + "loss": 1.9354, + "step": 14324000 + }, + { + "epoch": 70.97, + "learning_rate": 1.4528455284552846e-05, + "loss": 1.9238, + "step": 14324500 + }, + { + "epoch": 70.97, + "learning_rate": 1.4527219175299614e-05, + "loss": 1.9325, + "step": 14325000 + }, + { + "epoch": 70.97, + "learning_rate": 1.452598058887353e-05, + "loss": 1.9247, + "step": 14325500 + }, + { + "epoch": 70.98, + "learning_rate": 1.4524742002447447e-05, + "loss": 1.9337, + "step": 14326000 + }, + { + "epoch": 70.98, + "learning_rate": 1.4523503416021364e-05, + "loss": 1.9344, + "step": 14326500 + }, + { + "epoch": 70.98, + "learning_rate": 1.4522264829595281e-05, + "loss": 1.922, + "step": 14327000 + }, + { + "epoch": 70.98, + "learning_rate": 1.4521026243169198e-05, + "loss": 1.9239, + "step": 14327500 + }, + { + "epoch": 70.99, + "learning_rate": 1.4519787656743113e-05, + "loss": 1.9301, + "step": 14328000 + }, + { + "epoch": 70.99, + "learning_rate": 1.4518554024662734e-05, + "loss": 1.9113, + "step": 14328500 + }, + { + "epoch": 70.99, + "learning_rate": 1.4517317915409503e-05, + "loss": 1.9165, + "step": 14329000 + }, + { + "epoch": 70.99, + "learning_rate": 1.4516079328983418e-05, + "loss": 1.9347, + "step": 14329500 + }, + { + "epoch": 71.0, + "learning_rate": 1.4514840742557333e-05, + "loss": 1.9186, + "step": 14330000 + }, + { + "epoch": 71.0, + "learning_rate": 1.451360215613125e-05, + "loss": 1.9073, + "step": 14330500 + }, + { + "epoch": 71.0, + "eval_accuracy": 0.6787904624217936, + "eval_accuracy_mlm": 0.6389462814215449, + "eval_accuracy_nsp": 0.8665981589196694, + "eval_loss": 2.2952094078063965, + "eval_runtime": 146.7223, + "eval_samples_per_second": 1737.698, + "eval_steps_per_second": 72.409, + "step": 14330853 + }, + { + "epoch": 71.0, + "learning_rate": 1.4512363569705167e-05, + "loss": 1.9239, + "step": 14331000 + }, + { + "epoch": 71.0, + "learning_rate": 1.4511124983279084e-05, + "loss": 1.9131, + "step": 14331500 + }, + { + "epoch": 71.01, + "learning_rate": 1.4509886396853001e-05, + "loss": 1.902, + "step": 14332000 + }, + { + "epoch": 71.01, + "learning_rate": 1.4508647810426918e-05, + "loss": 1.9067, + "step": 14332500 + }, + { + "epoch": 71.01, + "learning_rate": 1.4507409224000835e-05, + "loss": 1.9132, + "step": 14333000 + }, + { + "epoch": 71.01, + "learning_rate": 1.4506170637574749e-05, + "loss": 1.8937, + "step": 14333500 + }, + { + "epoch": 71.02, + "learning_rate": 1.4504934528321517e-05, + "loss": 1.9152, + "step": 14334000 + }, + { + "epoch": 71.02, + "learning_rate": 1.4503695941895434e-05, + "loss": 1.91, + "step": 14334500 + }, + { + "epoch": 71.02, + "learning_rate": 1.4502457355469351e-05, + "loss": 1.8947, + "step": 14335000 + }, + { + "epoch": 71.02, + "learning_rate": 1.4501218769043268e-05, + "loss": 1.9179, + "step": 14335500 + }, + { + "epoch": 71.03, + "learning_rate": 1.4499980182617185e-05, + "loss": 1.9152, + "step": 14336000 + }, + { + "epoch": 71.03, + "learning_rate": 1.44987415961911e-05, + "loss": 1.9177, + "step": 14336500 + }, + { + "epoch": 71.03, + "learning_rate": 1.4497503009765016e-05, + "loss": 1.9228, + "step": 14337000 + }, + { + "epoch": 71.03, + "learning_rate": 1.4496264423338932e-05, + "loss": 1.9195, + "step": 14337500 + }, + { + "epoch": 71.04, + "learning_rate": 1.4495025836912848e-05, + "loss": 1.9066, + "step": 14338000 + }, + { + "epoch": 71.04, + "learning_rate": 1.4493787250486765e-05, + "loss": 1.9143, + "step": 14338500 + }, + { + "epoch": 71.04, + "learning_rate": 1.4492548664060682e-05, + "loss": 1.9051, + "step": 14339000 + }, + { + "epoch": 71.04, + "learning_rate": 1.4491312554807452e-05, + "loss": 1.9077, + "step": 14339500 + }, + { + "epoch": 71.05, + "learning_rate": 1.4490073968381366e-05, + "loss": 1.9123, + "step": 14340000 + }, + { + "epoch": 71.05, + "learning_rate": 1.4488835381955282e-05, + "loss": 1.8829, + "step": 14340500 + }, + { + "epoch": 71.05, + "learning_rate": 1.44875967955292e-05, + "loss": 1.8987, + "step": 14341000 + }, + { + "epoch": 71.05, + "learning_rate": 1.4486358209103115e-05, + "loss": 1.8853, + "step": 14341500 + }, + { + "epoch": 71.06, + "learning_rate": 1.4485119622677032e-05, + "loss": 1.9083, + "step": 14342000 + }, + { + "epoch": 71.06, + "learning_rate": 1.4483881036250949e-05, + "loss": 1.8832, + "step": 14342500 + }, + { + "epoch": 71.06, + "learning_rate": 1.4482642449824865e-05, + "loss": 1.9052, + "step": 14343000 + }, + { + "epoch": 71.06, + "learning_rate": 1.4481403863398782e-05, + "loss": 1.9118, + "step": 14343500 + }, + { + "epoch": 71.07, + "learning_rate": 1.44801652769727e-05, + "loss": 1.902, + "step": 14344000 + }, + { + "epoch": 71.07, + "learning_rate": 1.4478926690546613e-05, + "loss": 1.9023, + "step": 14344500 + }, + { + "epoch": 71.07, + "learning_rate": 1.447768810412053e-05, + "loss": 1.9188, + "step": 14345000 + }, + { + "epoch": 71.07, + "learning_rate": 1.4476451994867299e-05, + "loss": 1.9079, + "step": 14345500 + }, + { + "epoch": 71.08, + "learning_rate": 1.4475213408441215e-05, + "loss": 1.8931, + "step": 14346000 + }, + { + "epoch": 71.08, + "learning_rate": 1.4473974822015132e-05, + "loss": 1.8911, + "step": 14346500 + }, + { + "epoch": 71.08, + "learning_rate": 1.44727387127619e-05, + "loss": 1.919, + "step": 14347000 + }, + { + "epoch": 71.08, + "learning_rate": 1.4471500126335816e-05, + "loss": 1.8946, + "step": 14347500 + }, + { + "epoch": 71.08, + "learning_rate": 1.4470261539909732e-05, + "loss": 1.9212, + "step": 14348000 + }, + { + "epoch": 71.09, + "learning_rate": 1.4469022953483649e-05, + "loss": 1.9147, + "step": 14348500 + }, + { + "epoch": 71.09, + "learning_rate": 1.4467784367057566e-05, + "loss": 1.904, + "step": 14349000 + }, + { + "epoch": 71.09, + "learning_rate": 1.4466548257804333e-05, + "loss": 1.9006, + "step": 14349500 + }, + { + "epoch": 71.09, + "learning_rate": 1.446530967137825e-05, + "loss": 1.8974, + "step": 14350000 + }, + { + "epoch": 71.1, + "learning_rate": 1.4464071084952167e-05, + "loss": 1.9013, + "step": 14350500 + }, + { + "epoch": 71.1, + "learning_rate": 1.4462832498526083e-05, + "loss": 1.9068, + "step": 14351000 + }, + { + "epoch": 71.1, + "learning_rate": 1.4461593912099999e-05, + "loss": 1.8936, + "step": 14351500 + }, + { + "epoch": 71.1, + "learning_rate": 1.4460355325673916e-05, + "loss": 1.9308, + "step": 14352000 + }, + { + "epoch": 71.11, + "learning_rate": 1.4459116739247833e-05, + "loss": 1.8968, + "step": 14352500 + }, + { + "epoch": 71.11, + "learning_rate": 1.445787815282175e-05, + "loss": 1.9258, + "step": 14353000 + }, + { + "epoch": 71.11, + "learning_rate": 1.4456639566395663e-05, + "loss": 1.9228, + "step": 14353500 + }, + { + "epoch": 71.11, + "learning_rate": 1.445540097996958e-05, + "loss": 1.866, + "step": 14354000 + }, + { + "epoch": 71.12, + "learning_rate": 1.4454162393543497e-05, + "loss": 1.92, + "step": 14354500 + }, + { + "epoch": 71.12, + "learning_rate": 1.4452926284290266e-05, + "loss": 1.902, + "step": 14355000 + }, + { + "epoch": 71.12, + "learning_rate": 1.4451687697864183e-05, + "loss": 1.9198, + "step": 14355500 + }, + { + "epoch": 71.12, + "learning_rate": 1.44504491114381e-05, + "loss": 1.9098, + "step": 14356000 + }, + { + "epoch": 71.13, + "learning_rate": 1.4449213002184867e-05, + "loss": 1.8939, + "step": 14356500 + }, + { + "epoch": 71.13, + "learning_rate": 1.4447974415758784e-05, + "loss": 1.8907, + "step": 14357000 + }, + { + "epoch": 71.13, + "learning_rate": 1.44467358293327e-05, + "loss": 1.9401, + "step": 14357500 + }, + { + "epoch": 71.13, + "learning_rate": 1.4445497242906616e-05, + "loss": 1.9188, + "step": 14358000 + }, + { + "epoch": 71.14, + "learning_rate": 1.4444261133653383e-05, + "loss": 1.8949, + "step": 14358500 + }, + { + "epoch": 71.14, + "learning_rate": 1.44430225472273e-05, + "loss": 1.9257, + "step": 14359000 + }, + { + "epoch": 71.14, + "learning_rate": 1.4441783960801217e-05, + "loss": 1.9107, + "step": 14359500 + }, + { + "epoch": 71.14, + "learning_rate": 1.4440545374375134e-05, + "loss": 1.9199, + "step": 14360000 + }, + { + "epoch": 71.15, + "learning_rate": 1.443930678794905e-05, + "loss": 1.9303, + "step": 14360500 + }, + { + "epoch": 71.15, + "learning_rate": 1.4438068201522968e-05, + "loss": 1.9285, + "step": 14361000 + }, + { + "epoch": 71.15, + "learning_rate": 1.4436829615096883e-05, + "loss": 1.9301, + "step": 14361500 + }, + { + "epoch": 71.15, + "learning_rate": 1.44355910286708e-05, + "loss": 1.9116, + "step": 14362000 + }, + { + "epoch": 71.16, + "learning_rate": 1.4434354919417567e-05, + "loss": 1.9051, + "step": 14362500 + }, + { + "epoch": 71.16, + "learning_rate": 1.4433116332991484e-05, + "loss": 1.9112, + "step": 14363000 + }, + { + "epoch": 71.16, + "learning_rate": 1.44318777465654e-05, + "loss": 1.9183, + "step": 14363500 + }, + { + "epoch": 71.16, + "learning_rate": 1.4430639160139318e-05, + "loss": 1.9186, + "step": 14364000 + }, + { + "epoch": 71.17, + "learning_rate": 1.4429403050886086e-05, + "loss": 1.903, + "step": 14364500 + }, + { + "epoch": 71.17, + "learning_rate": 1.442816446446e-05, + "loss": 1.9055, + "step": 14365000 + }, + { + "epoch": 71.17, + "learning_rate": 1.4426925878033917e-05, + "loss": 1.8996, + "step": 14365500 + }, + { + "epoch": 71.17, + "learning_rate": 1.4425687291607834e-05, + "loss": 1.9222, + "step": 14366000 + }, + { + "epoch": 71.18, + "learning_rate": 1.4424451182354604e-05, + "loss": 1.9053, + "step": 14366500 + }, + { + "epoch": 71.18, + "learning_rate": 1.442321259592852e-05, + "loss": 1.9254, + "step": 14367000 + }, + { + "epoch": 71.18, + "learning_rate": 1.4421974009502436e-05, + "loss": 1.9066, + "step": 14367500 + }, + { + "epoch": 71.18, + "learning_rate": 1.4420735423076352e-05, + "loss": 1.8978, + "step": 14368000 + }, + { + "epoch": 71.19, + "learning_rate": 1.441949931382312e-05, + "loss": 1.9202, + "step": 14368500 + }, + { + "epoch": 71.19, + "learning_rate": 1.4418260727397037e-05, + "loss": 1.908, + "step": 14369000 + }, + { + "epoch": 71.19, + "learning_rate": 1.4417022140970954e-05, + "loss": 1.9062, + "step": 14369500 + }, + { + "epoch": 71.19, + "learning_rate": 1.4415783554544871e-05, + "loss": 1.8834, + "step": 14370000 + }, + { + "epoch": 71.2, + "learning_rate": 1.4414544968118786e-05, + "loss": 1.9069, + "step": 14370500 + }, + { + "epoch": 71.2, + "learning_rate": 1.4413308858865554e-05, + "loss": 1.9065, + "step": 14371000 + }, + { + "epoch": 71.2, + "learning_rate": 1.441207027243947e-05, + "loss": 1.9124, + "step": 14371500 + }, + { + "epoch": 71.2, + "learning_rate": 1.4410831686013387e-05, + "loss": 1.9093, + "step": 14372000 + }, + { + "epoch": 71.21, + "learning_rate": 1.4409595576760156e-05, + "loss": 1.9036, + "step": 14372500 + }, + { + "epoch": 71.21, + "learning_rate": 1.4408356990334072e-05, + "loss": 1.9215, + "step": 14373000 + }, + { + "epoch": 71.21, + "learning_rate": 1.4407118403907988e-05, + "loss": 1.9203, + "step": 14373500 + }, + { + "epoch": 71.21, + "learning_rate": 1.4405879817481904e-05, + "loss": 1.9, + "step": 14374000 + }, + { + "epoch": 71.22, + "learning_rate": 1.440464123105582e-05, + "loss": 1.9073, + "step": 14374500 + }, + { + "epoch": 71.22, + "learning_rate": 1.4403402644629738e-05, + "loss": 1.9396, + "step": 14375000 + }, + { + "epoch": 71.22, + "learning_rate": 1.4402164058203654e-05, + "loss": 1.9193, + "step": 14375500 + }, + { + "epoch": 71.22, + "learning_rate": 1.4400925471777571e-05, + "loss": 1.9078, + "step": 14376000 + }, + { + "epoch": 71.23, + "learning_rate": 1.4399686885351488e-05, + "loss": 1.9303, + "step": 14376500 + }, + { + "epoch": 71.23, + "learning_rate": 1.4398450776098254e-05, + "loss": 1.9074, + "step": 14377000 + }, + { + "epoch": 71.23, + "learning_rate": 1.439721218967217e-05, + "loss": 1.892, + "step": 14377500 + }, + { + "epoch": 71.23, + "learning_rate": 1.4395973603246088e-05, + "loss": 1.922, + "step": 14378000 + }, + { + "epoch": 71.24, + "learning_rate": 1.4394735016820005e-05, + "loss": 1.9165, + "step": 14378500 + }, + { + "epoch": 71.24, + "learning_rate": 1.4393496430393921e-05, + "loss": 1.9175, + "step": 14379000 + }, + { + "epoch": 71.24, + "learning_rate": 1.4392257843967838e-05, + "loss": 1.8798, + "step": 14379500 + }, + { + "epoch": 71.24, + "learning_rate": 1.4391019257541755e-05, + "loss": 1.9181, + "step": 14380000 + }, + { + "epoch": 71.25, + "learning_rate": 1.4389780671115669e-05, + "loss": 1.9322, + "step": 14380500 + }, + { + "epoch": 71.25, + "learning_rate": 1.4388544561862438e-05, + "loss": 1.8988, + "step": 14381000 + }, + { + "epoch": 71.25, + "learning_rate": 1.4387308452609208e-05, + "loss": 1.9063, + "step": 14381500 + }, + { + "epoch": 71.25, + "learning_rate": 1.4386069866183125e-05, + "loss": 1.8873, + "step": 14382000 + }, + { + "epoch": 71.26, + "learning_rate": 1.4384831279757039e-05, + "loss": 1.8924, + "step": 14382500 + }, + { + "epoch": 71.26, + "learning_rate": 1.4383595170503807e-05, + "loss": 1.8987, + "step": 14383000 + }, + { + "epoch": 71.26, + "learning_rate": 1.4382356584077724e-05, + "loss": 1.9254, + "step": 14383500 + }, + { + "epoch": 71.26, + "learning_rate": 1.4381117997651641e-05, + "loss": 1.9318, + "step": 14384000 + }, + { + "epoch": 71.27, + "learning_rate": 1.4379881888398408e-05, + "loss": 1.9117, + "step": 14384500 + }, + { + "epoch": 71.27, + "learning_rate": 1.4378643301972325e-05, + "loss": 1.93, + "step": 14385000 + }, + { + "epoch": 71.27, + "learning_rate": 1.4377404715546242e-05, + "loss": 1.93, + "step": 14385500 + }, + { + "epoch": 71.27, + "learning_rate": 1.4376166129120158e-05, + "loss": 1.8973, + "step": 14386000 + }, + { + "epoch": 71.28, + "learning_rate": 1.4374927542694074e-05, + "loss": 1.9244, + "step": 14386500 + }, + { + "epoch": 71.28, + "learning_rate": 1.4373688956267991e-05, + "loss": 1.8851, + "step": 14387000 + }, + { + "epoch": 71.28, + "learning_rate": 1.4372450369841908e-05, + "loss": 1.906, + "step": 14387500 + }, + { + "epoch": 71.28, + "learning_rate": 1.4371211783415825e-05, + "loss": 1.9245, + "step": 14388000 + }, + { + "epoch": 71.29, + "learning_rate": 1.4369973196989739e-05, + "loss": 1.9055, + "step": 14388500 + }, + { + "epoch": 71.29, + "learning_rate": 1.4368734610563656e-05, + "loss": 1.9099, + "step": 14389000 + }, + { + "epoch": 71.29, + "learning_rate": 1.4367496024137573e-05, + "loss": 1.9186, + "step": 14389500 + }, + { + "epoch": 71.29, + "learning_rate": 1.436625743771149e-05, + "loss": 1.9092, + "step": 14390000 + }, + { + "epoch": 71.3, + "learning_rate": 1.4365018851285405e-05, + "loss": 1.9208, + "step": 14390500 + }, + { + "epoch": 71.3, + "learning_rate": 1.4363780264859322e-05, + "loss": 1.9142, + "step": 14391000 + }, + { + "epoch": 71.3, + "learning_rate": 1.4362541678433239e-05, + "loss": 1.9326, + "step": 14391500 + }, + { + "epoch": 71.3, + "learning_rate": 1.4361303092007156e-05, + "loss": 1.9233, + "step": 14392000 + }, + { + "epoch": 71.31, + "learning_rate": 1.4360066982753923e-05, + "loss": 1.9091, + "step": 14392500 + }, + { + "epoch": 71.31, + "learning_rate": 1.4358830873500691e-05, + "loss": 1.8912, + "step": 14393000 + }, + { + "epoch": 71.31, + "learning_rate": 1.4357592287074608e-05, + "loss": 1.9189, + "step": 14393500 + }, + { + "epoch": 71.31, + "learning_rate": 1.4356353700648525e-05, + "loss": 1.9, + "step": 14394000 + }, + { + "epoch": 71.32, + "learning_rate": 1.4355117591395292e-05, + "loss": 1.9025, + "step": 14394500 + }, + { + "epoch": 71.32, + "learning_rate": 1.435387900496921e-05, + "loss": 1.924, + "step": 14395000 + }, + { + "epoch": 71.32, + "learning_rate": 1.4352640418543126e-05, + "loss": 1.8807, + "step": 14395500 + }, + { + "epoch": 71.32, + "learning_rate": 1.4351401832117042e-05, + "loss": 1.9097, + "step": 14396000 + }, + { + "epoch": 71.33, + "learning_rate": 1.4350163245690958e-05, + "loss": 1.9103, + "step": 14396500 + }, + { + "epoch": 71.33, + "learning_rate": 1.4348924659264875e-05, + "loss": 1.9015, + "step": 14397000 + }, + { + "epoch": 71.33, + "learning_rate": 1.4347686072838792e-05, + "loss": 1.9141, + "step": 14397500 + }, + { + "epoch": 71.33, + "learning_rate": 1.4346447486412706e-05, + "loss": 1.9207, + "step": 14398000 + }, + { + "epoch": 71.34, + "learning_rate": 1.4345208899986623e-05, + "loss": 1.9227, + "step": 14398500 + }, + { + "epoch": 71.34, + "learning_rate": 1.4343972790733393e-05, + "loss": 1.9054, + "step": 14399000 + }, + { + "epoch": 71.34, + "learning_rate": 1.4342736681480162e-05, + "loss": 1.9061, + "step": 14399500 + }, + { + "epoch": 71.34, + "learning_rate": 1.4341498095054076e-05, + "loss": 1.9205, + "step": 14400000 + }, + { + "epoch": 71.35, + "learning_rate": 1.4340259508627993e-05, + "loss": 1.9142, + "step": 14400500 + }, + { + "epoch": 71.35, + "learning_rate": 1.433902092220191e-05, + "loss": 1.9093, + "step": 14401000 + }, + { + "epoch": 71.35, + "learning_rate": 1.4337782335775826e-05, + "loss": 1.8939, + "step": 14401500 + }, + { + "epoch": 71.35, + "learning_rate": 1.4336543749349743e-05, + "loss": 1.9151, + "step": 14402000 + }, + { + "epoch": 71.35, + "learning_rate": 1.433530516292366e-05, + "loss": 1.9364, + "step": 14402500 + }, + { + "epoch": 71.36, + "learning_rate": 1.4334066576497576e-05, + "loss": 1.9002, + "step": 14403000 + }, + { + "epoch": 71.36, + "learning_rate": 1.4332827990071492e-05, + "loss": 1.922, + "step": 14403500 + }, + { + "epoch": 71.36, + "learning_rate": 1.433159188081826e-05, + "loss": 1.9229, + "step": 14404000 + }, + { + "epoch": 71.36, + "learning_rate": 1.4330353294392176e-05, + "loss": 1.9073, + "step": 14404500 + }, + { + "epoch": 71.37, + "learning_rate": 1.4329114707966093e-05, + "loss": 1.9017, + "step": 14405000 + }, + { + "epoch": 71.37, + "learning_rate": 1.432787612154001e-05, + "loss": 1.9147, + "step": 14405500 + }, + { + "epoch": 71.37, + "learning_rate": 1.4326637535113927e-05, + "loss": 1.9371, + "step": 14406000 + }, + { + "epoch": 71.37, + "learning_rate": 1.4325401425860693e-05, + "loss": 1.9304, + "step": 14406500 + }, + { + "epoch": 71.38, + "learning_rate": 1.432416283943461e-05, + "loss": 1.9135, + "step": 14407000 + }, + { + "epoch": 71.38, + "learning_rate": 1.4322924253008527e-05, + "loss": 1.9095, + "step": 14407500 + }, + { + "epoch": 71.38, + "learning_rate": 1.4321685666582443e-05, + "loss": 1.9381, + "step": 14408000 + }, + { + "epoch": 71.38, + "learning_rate": 1.432044708015636e-05, + "loss": 1.9295, + "step": 14408500 + }, + { + "epoch": 71.39, + "learning_rate": 1.4319208493730277e-05, + "loss": 1.8903, + "step": 14409000 + }, + { + "epoch": 71.39, + "learning_rate": 1.4317969907304193e-05, + "loss": 1.9384, + "step": 14409500 + }, + { + "epoch": 71.39, + "learning_rate": 1.431673132087811e-05, + "loss": 1.9187, + "step": 14410000 + }, + { + "epoch": 71.39, + "learning_rate": 1.4315492734452025e-05, + "loss": 1.9115, + "step": 14410500 + }, + { + "epoch": 71.4, + "learning_rate": 1.431425414802594e-05, + "loss": 1.9318, + "step": 14411000 + }, + { + "epoch": 71.4, + "learning_rate": 1.4313015561599857e-05, + "loss": 1.8944, + "step": 14411500 + }, + { + "epoch": 71.4, + "learning_rate": 1.4311776975173774e-05, + "loss": 1.9215, + "step": 14412000 + }, + { + "epoch": 71.4, + "learning_rate": 1.431053838874769e-05, + "loss": 1.8892, + "step": 14412500 + }, + { + "epoch": 71.41, + "learning_rate": 1.430930227949446e-05, + "loss": 1.9174, + "step": 14413000 + }, + { + "epoch": 71.41, + "learning_rate": 1.4308066170241227e-05, + "loss": 1.9258, + "step": 14413500 + }, + { + "epoch": 71.41, + "learning_rate": 1.4306827583815144e-05, + "loss": 1.9108, + "step": 14414000 + }, + { + "epoch": 71.41, + "learning_rate": 1.430558899738906e-05, + "loss": 1.9194, + "step": 14414500 + }, + { + "epoch": 71.42, + "learning_rate": 1.4304350410962977e-05, + "loss": 1.9017, + "step": 14415000 + }, + { + "epoch": 71.42, + "learning_rate": 1.4303111824536894e-05, + "loss": 1.9243, + "step": 14415500 + }, + { + "epoch": 71.42, + "learning_rate": 1.4301875715283662e-05, + "loss": 1.9154, + "step": 14416000 + }, + { + "epoch": 71.42, + "learning_rate": 1.4300642083203284e-05, + "loss": 1.9088, + "step": 14416500 + }, + { + "epoch": 71.43, + "learning_rate": 1.42994034967772e-05, + "loss": 1.9103, + "step": 14417000 + }, + { + "epoch": 71.43, + "learning_rate": 1.4298164910351114e-05, + "loss": 1.8865, + "step": 14417500 + }, + { + "epoch": 71.43, + "learning_rate": 1.4296928801097883e-05, + "loss": 1.9353, + "step": 14418000 + }, + { + "epoch": 71.43, + "learning_rate": 1.42956902146718e-05, + "loss": 1.9175, + "step": 14418500 + }, + { + "epoch": 71.44, + "learning_rate": 1.4294451628245717e-05, + "loss": 1.9216, + "step": 14419000 + }, + { + "epoch": 71.44, + "learning_rate": 1.4293213041819634e-05, + "loss": 1.9196, + "step": 14419500 + }, + { + "epoch": 71.44, + "learning_rate": 1.4291974455393551e-05, + "loss": 1.9419, + "step": 14420000 + }, + { + "epoch": 71.44, + "learning_rate": 1.429074082331317e-05, + "loss": 1.9159, + "step": 14420500 + }, + { + "epoch": 71.45, + "learning_rate": 1.4289502236887087e-05, + "loss": 1.9316, + "step": 14421000 + }, + { + "epoch": 71.45, + "learning_rate": 1.4288263650461004e-05, + "loss": 1.9177, + "step": 14421500 + }, + { + "epoch": 71.45, + "learning_rate": 1.428702506403492e-05, + "loss": 1.9298, + "step": 14422000 + }, + { + "epoch": 71.45, + "learning_rate": 1.4285786477608834e-05, + "loss": 1.9174, + "step": 14422500 + }, + { + "epoch": 71.46, + "learning_rate": 1.4284547891182751e-05, + "loss": 1.9418, + "step": 14423000 + }, + { + "epoch": 71.46, + "learning_rate": 1.4283309304756668e-05, + "loss": 1.9218, + "step": 14423500 + }, + { + "epoch": 71.46, + "learning_rate": 1.4282070718330583e-05, + "loss": 1.896, + "step": 14424000 + }, + { + "epoch": 71.46, + "learning_rate": 1.42808321319045e-05, + "loss": 1.9012, + "step": 14424500 + }, + { + "epoch": 71.47, + "learning_rate": 1.4279593545478417e-05, + "loss": 1.9338, + "step": 14425000 + }, + { + "epoch": 71.47, + "learning_rate": 1.4278354959052334e-05, + "loss": 1.9184, + "step": 14425500 + }, + { + "epoch": 71.47, + "learning_rate": 1.4277116372626251e-05, + "loss": 1.9096, + "step": 14426000 + }, + { + "epoch": 71.47, + "learning_rate": 1.4275877786200168e-05, + "loss": 1.8983, + "step": 14426500 + }, + { + "epoch": 71.48, + "learning_rate": 1.4274639199774081e-05, + "loss": 1.9219, + "step": 14427000 + }, + { + "epoch": 71.48, + "learning_rate": 1.4273400613347998e-05, + "loss": 1.8926, + "step": 14427500 + }, + { + "epoch": 71.48, + "learning_rate": 1.4272162026921915e-05, + "loss": 1.9088, + "step": 14428000 + }, + { + "epoch": 71.48, + "learning_rate": 1.427092344049583e-05, + "loss": 1.9153, + "step": 14428500 + }, + { + "epoch": 71.49, + "learning_rate": 1.4269684854069747e-05, + "loss": 1.9114, + "step": 14429000 + }, + { + "epoch": 71.49, + "learning_rate": 1.4268448744816518e-05, + "loss": 1.9115, + "step": 14429500 + }, + { + "epoch": 71.49, + "learning_rate": 1.4267212635563285e-05, + "loss": 1.9371, + "step": 14430000 + }, + { + "epoch": 71.49, + "learning_rate": 1.4265974049137202e-05, + "loss": 1.9094, + "step": 14430500 + }, + { + "epoch": 71.5, + "learning_rate": 1.4264735462711117e-05, + "loss": 1.9131, + "step": 14431000 + }, + { + "epoch": 71.5, + "learning_rate": 1.4263496876285034e-05, + "loss": 1.9339, + "step": 14431500 + }, + { + "epoch": 71.5, + "learning_rate": 1.4262258289858951e-05, + "loss": 1.9089, + "step": 14432000 + }, + { + "epoch": 71.5, + "learning_rate": 1.4261019703432868e-05, + "loss": 1.881, + "step": 14432500 + }, + { + "epoch": 71.51, + "learning_rate": 1.4259781117006782e-05, + "loss": 1.9238, + "step": 14433000 + }, + { + "epoch": 71.51, + "learning_rate": 1.4258545007753552e-05, + "loss": 1.9117, + "step": 14433500 + }, + { + "epoch": 71.51, + "learning_rate": 1.4257306421327469e-05, + "loss": 1.9266, + "step": 14434000 + }, + { + "epoch": 71.51, + "learning_rate": 1.4256067834901384e-05, + "loss": 1.9117, + "step": 14434500 + }, + { + "epoch": 71.52, + "learning_rate": 1.4254829248475301e-05, + "loss": 1.9056, + "step": 14435000 + }, + { + "epoch": 71.52, + "learning_rate": 1.4253590662049218e-05, + "loss": 1.9074, + "step": 14435500 + }, + { + "epoch": 71.52, + "learning_rate": 1.4252352075623132e-05, + "loss": 1.9264, + "step": 14436000 + }, + { + "epoch": 71.52, + "learning_rate": 1.4251113489197049e-05, + "loss": 1.9008, + "step": 14436500 + }, + { + "epoch": 71.53, + "learning_rate": 1.4249877379943819e-05, + "loss": 1.9164, + "step": 14437000 + }, + { + "epoch": 71.53, + "learning_rate": 1.4248638793517734e-05, + "loss": 1.9362, + "step": 14437500 + }, + { + "epoch": 71.53, + "learning_rate": 1.4247400207091651e-05, + "loss": 1.9249, + "step": 14438000 + }, + { + "epoch": 71.53, + "learning_rate": 1.4246161620665568e-05, + "loss": 1.9272, + "step": 14438500 + }, + { + "epoch": 71.54, + "learning_rate": 1.4244923034239485e-05, + "loss": 1.9166, + "step": 14439000 + }, + { + "epoch": 71.54, + "learning_rate": 1.4243684447813399e-05, + "loss": 1.9276, + "step": 14439500 + }, + { + "epoch": 71.54, + "learning_rate": 1.4242445861387316e-05, + "loss": 1.9305, + "step": 14440000 + }, + { + "epoch": 71.54, + "learning_rate": 1.4241207274961233e-05, + "loss": 1.939, + "step": 14440500 + }, + { + "epoch": 71.55, + "learning_rate": 1.423996868853515e-05, + "loss": 1.8904, + "step": 14441000 + }, + { + "epoch": 71.55, + "learning_rate": 1.4238730102109066e-05, + "loss": 1.9188, + "step": 14441500 + }, + { + "epoch": 71.55, + "learning_rate": 1.4237491515682983e-05, + "loss": 1.9152, + "step": 14442000 + }, + { + "epoch": 71.55, + "learning_rate": 1.4236252929256899e-05, + "loss": 1.9438, + "step": 14442500 + }, + { + "epoch": 71.56, + "learning_rate": 1.4235014342830815e-05, + "loss": 1.886, + "step": 14443000 + }, + { + "epoch": 71.56, + "learning_rate": 1.4233775756404729e-05, + "loss": 1.9183, + "step": 14443500 + }, + { + "epoch": 71.56, + "learning_rate": 1.42325396471515e-05, + "loss": 1.9068, + "step": 14444000 + }, + { + "epoch": 71.56, + "learning_rate": 1.4231303537898268e-05, + "loss": 1.9167, + "step": 14444500 + }, + { + "epoch": 71.57, + "learning_rate": 1.4230064951472185e-05, + "loss": 1.9209, + "step": 14445000 + }, + { + "epoch": 71.57, + "learning_rate": 1.4228826365046099e-05, + "loss": 1.9134, + "step": 14445500 + }, + { + "epoch": 71.57, + "learning_rate": 1.4227587778620016e-05, + "loss": 1.8954, + "step": 14446000 + }, + { + "epoch": 71.57, + "learning_rate": 1.4226349192193933e-05, + "loss": 1.8934, + "step": 14446500 + }, + { + "epoch": 71.58, + "learning_rate": 1.422511060576785e-05, + "loss": 1.8903, + "step": 14447000 + }, + { + "epoch": 71.58, + "learning_rate": 1.4223872019341766e-05, + "loss": 1.928, + "step": 14447500 + }, + { + "epoch": 71.58, + "learning_rate": 1.4222633432915683e-05, + "loss": 1.9329, + "step": 14448000 + }, + { + "epoch": 71.58, + "learning_rate": 1.42213948464896e-05, + "loss": 1.9315, + "step": 14448500 + }, + { + "epoch": 71.59, + "learning_rate": 1.4220156260063516e-05, + "loss": 1.9082, + "step": 14449000 + }, + { + "epoch": 71.59, + "learning_rate": 1.4218920150810283e-05, + "loss": 1.9121, + "step": 14449500 + }, + { + "epoch": 71.59, + "learning_rate": 1.42176815643842e-05, + "loss": 1.9407, + "step": 14450000 + }, + { + "epoch": 71.59, + "learning_rate": 1.4216442977958117e-05, + "loss": 1.9215, + "step": 14450500 + }, + { + "epoch": 71.6, + "learning_rate": 1.4215204391532033e-05, + "loss": 1.901, + "step": 14451000 + }, + { + "epoch": 71.6, + "learning_rate": 1.421396580510595e-05, + "loss": 1.9414, + "step": 14451500 + }, + { + "epoch": 71.6, + "learning_rate": 1.4212727218679867e-05, + "loss": 1.9146, + "step": 14452000 + }, + { + "epoch": 71.6, + "learning_rate": 1.4211488632253783e-05, + "loss": 1.9222, + "step": 14452500 + }, + { + "epoch": 71.61, + "learning_rate": 1.421025252300055e-05, + "loss": 1.9226, + "step": 14453000 + }, + { + "epoch": 71.61, + "learning_rate": 1.4209013936574467e-05, + "loss": 1.9467, + "step": 14453500 + }, + { + "epoch": 71.61, + "learning_rate": 1.4207777827321237e-05, + "loss": 1.9151, + "step": 14454000 + }, + { + "epoch": 71.61, + "learning_rate": 1.4206539240895152e-05, + "loss": 1.936, + "step": 14454500 + }, + { + "epoch": 71.62, + "learning_rate": 1.4205300654469068e-05, + "loss": 1.9166, + "step": 14455000 + }, + { + "epoch": 71.62, + "learning_rate": 1.4204062068042985e-05, + "loss": 1.9357, + "step": 14455500 + }, + { + "epoch": 71.62, + "learning_rate": 1.4202825958789753e-05, + "loss": 1.9228, + "step": 14456000 + }, + { + "epoch": 71.62, + "learning_rate": 1.420158737236367e-05, + "loss": 1.9105, + "step": 14456500 + }, + { + "epoch": 71.62, + "learning_rate": 1.4200348785937587e-05, + "loss": 1.9047, + "step": 14457000 + }, + { + "epoch": 71.63, + "learning_rate": 1.4199110199511504e-05, + "loss": 1.9034, + "step": 14457500 + }, + { + "epoch": 71.63, + "learning_rate": 1.4197871613085418e-05, + "loss": 1.9209, + "step": 14458000 + }, + { + "epoch": 71.63, + "learning_rate": 1.4196635503832186e-05, + "loss": 1.898, + "step": 14458500 + }, + { + "epoch": 71.63, + "learning_rate": 1.4195396917406103e-05, + "loss": 1.9112, + "step": 14459000 + }, + { + "epoch": 71.64, + "learning_rate": 1.419415833098002e-05, + "loss": 1.9237, + "step": 14459500 + }, + { + "epoch": 71.64, + "learning_rate": 1.4192919744553937e-05, + "loss": 1.9124, + "step": 14460000 + }, + { + "epoch": 71.64, + "learning_rate": 1.4191683635300704e-05, + "loss": 1.921, + "step": 14460500 + }, + { + "epoch": 71.64, + "learning_rate": 1.4190445048874621e-05, + "loss": 1.9131, + "step": 14461000 + }, + { + "epoch": 71.65, + "learning_rate": 1.4189206462448537e-05, + "loss": 1.9205, + "step": 14461500 + }, + { + "epoch": 71.65, + "learning_rate": 1.4187967876022453e-05, + "loss": 1.9179, + "step": 14462000 + }, + { + "epoch": 71.65, + "learning_rate": 1.418672928959637e-05, + "loss": 1.9253, + "step": 14462500 + }, + { + "epoch": 71.65, + "learning_rate": 1.4185490703170287e-05, + "loss": 1.8922, + "step": 14463000 + }, + { + "epoch": 71.66, + "learning_rate": 1.4184252116744204e-05, + "loss": 1.9297, + "step": 14463500 + }, + { + "epoch": 71.66, + "learning_rate": 1.4183013530318121e-05, + "loss": 1.9213, + "step": 14464000 + }, + { + "epoch": 71.66, + "learning_rate": 1.4181774943892035e-05, + "loss": 1.9082, + "step": 14464500 + }, + { + "epoch": 71.66, + "learning_rate": 1.4180536357465952e-05, + "loss": 1.9184, + "step": 14465000 + }, + { + "epoch": 71.67, + "learning_rate": 1.4179297771039869e-05, + "loss": 1.911, + "step": 14465500 + }, + { + "epoch": 71.67, + "learning_rate": 1.4178061661786637e-05, + "loss": 1.948, + "step": 14466000 + }, + { + "epoch": 71.67, + "learning_rate": 1.4176823075360554e-05, + "loss": 1.9133, + "step": 14466500 + }, + { + "epoch": 71.67, + "learning_rate": 1.4175584488934471e-05, + "loss": 1.917, + "step": 14467000 + }, + { + "epoch": 71.68, + "learning_rate": 1.4174348379681238e-05, + "loss": 1.8879, + "step": 14467500 + }, + { + "epoch": 71.68, + "learning_rate": 1.4173109793255154e-05, + "loss": 1.9196, + "step": 14468000 + }, + { + "epoch": 71.68, + "learning_rate": 1.417187120682907e-05, + "loss": 1.9103, + "step": 14468500 + }, + { + "epoch": 71.68, + "learning_rate": 1.4170632620402987e-05, + "loss": 1.9064, + "step": 14469000 + }, + { + "epoch": 71.69, + "learning_rate": 1.4169394033976904e-05, + "loss": 1.9552, + "step": 14469500 + }, + { + "epoch": 71.69, + "learning_rate": 1.4168155447550821e-05, + "loss": 1.9268, + "step": 14470000 + }, + { + "epoch": 71.69, + "learning_rate": 1.4166916861124735e-05, + "loss": 1.931, + "step": 14470500 + }, + { + "epoch": 71.69, + "learning_rate": 1.4165678274698652e-05, + "loss": 1.9239, + "step": 14471000 + }, + { + "epoch": 71.7, + "learning_rate": 1.4164439688272569e-05, + "loss": 1.903, + "step": 14471500 + }, + { + "epoch": 71.7, + "learning_rate": 1.4163201101846486e-05, + "loss": 1.9277, + "step": 14472000 + }, + { + "epoch": 71.7, + "learning_rate": 1.41619625154204e-05, + "loss": 1.9334, + "step": 14472500 + }, + { + "epoch": 71.7, + "learning_rate": 1.4160723928994318e-05, + "loss": 1.9594, + "step": 14473000 + }, + { + "epoch": 71.71, + "learning_rate": 1.4159485342568235e-05, + "loss": 1.9056, + "step": 14473500 + }, + { + "epoch": 71.71, + "learning_rate": 1.4158246756142152e-05, + "loss": 1.9349, + "step": 14474000 + }, + { + "epoch": 71.71, + "learning_rate": 1.4157008169716069e-05, + "loss": 1.9109, + "step": 14474500 + }, + { + "epoch": 71.71, + "learning_rate": 1.4155769583289982e-05, + "loss": 1.9132, + "step": 14475000 + }, + { + "epoch": 71.72, + "learning_rate": 1.4154533474036753e-05, + "loss": 1.9224, + "step": 14475500 + }, + { + "epoch": 71.72, + "learning_rate": 1.4153297364783521e-05, + "loss": 1.9204, + "step": 14476000 + }, + { + "epoch": 71.72, + "learning_rate": 1.4152058778357438e-05, + "loss": 1.9196, + "step": 14476500 + }, + { + "epoch": 71.72, + "learning_rate": 1.4150822669104205e-05, + "loss": 1.9414, + "step": 14477000 + }, + { + "epoch": 71.73, + "learning_rate": 1.4149584082678122e-05, + "loss": 1.9119, + "step": 14477500 + }, + { + "epoch": 71.73, + "learning_rate": 1.4148345496252038e-05, + "loss": 1.9415, + "step": 14478000 + }, + { + "epoch": 71.73, + "learning_rate": 1.4147106909825955e-05, + "loss": 1.9162, + "step": 14478500 + }, + { + "epoch": 71.73, + "learning_rate": 1.4145868323399871e-05, + "loss": 1.9258, + "step": 14479000 + }, + { + "epoch": 71.74, + "learning_rate": 1.4144629736973788e-05, + "loss": 1.8992, + "step": 14479500 + }, + { + "epoch": 71.74, + "learning_rate": 1.4143391150547702e-05, + "loss": 1.9188, + "step": 14480000 + }, + { + "epoch": 71.74, + "learning_rate": 1.4142155041294472e-05, + "loss": 1.9176, + "step": 14480500 + }, + { + "epoch": 71.74, + "learning_rate": 1.414091645486839e-05, + "loss": 1.9126, + "step": 14481000 + }, + { + "epoch": 71.75, + "learning_rate": 1.4139677868442305e-05, + "loss": 1.9273, + "step": 14481500 + }, + { + "epoch": 71.75, + "learning_rate": 1.4138439282016222e-05, + "loss": 1.91, + "step": 14482000 + }, + { + "epoch": 71.75, + "learning_rate": 1.4137203172762989e-05, + "loss": 1.8977, + "step": 14482500 + }, + { + "epoch": 71.75, + "learning_rate": 1.4135964586336906e-05, + "loss": 1.9093, + "step": 14483000 + }, + { + "epoch": 71.76, + "learning_rate": 1.4134725999910823e-05, + "loss": 1.9205, + "step": 14483500 + }, + { + "epoch": 71.76, + "learning_rate": 1.413348741348474e-05, + "loss": 1.9218, + "step": 14484000 + }, + { + "epoch": 71.76, + "learning_rate": 1.4132248827058656e-05, + "loss": 1.9356, + "step": 14484500 + }, + { + "epoch": 71.76, + "learning_rate": 1.4131010240632572e-05, + "loss": 1.9243, + "step": 14485000 + }, + { + "epoch": 71.77, + "learning_rate": 1.4129771654206489e-05, + "loss": 1.9246, + "step": 14485500 + }, + { + "epoch": 71.77, + "learning_rate": 1.4128533067780405e-05, + "loss": 1.9069, + "step": 14486000 + }, + { + "epoch": 71.77, + "learning_rate": 1.4127294481354319e-05, + "loss": 1.9363, + "step": 14486500 + }, + { + "epoch": 71.77, + "learning_rate": 1.4126055894928236e-05, + "loss": 1.9308, + "step": 14487000 + }, + { + "epoch": 71.78, + "learning_rate": 1.4124817308502153e-05, + "loss": 1.9013, + "step": 14487500 + }, + { + "epoch": 71.78, + "learning_rate": 1.4123581199248923e-05, + "loss": 1.9203, + "step": 14488000 + }, + { + "epoch": 71.78, + "learning_rate": 1.4122342612822839e-05, + "loss": 1.9222, + "step": 14488500 + }, + { + "epoch": 71.78, + "learning_rate": 1.4121104026396756e-05, + "loss": 1.9131, + "step": 14489000 + }, + { + "epoch": 71.79, + "learning_rate": 1.4119865439970669e-05, + "loss": 1.9064, + "step": 14489500 + }, + { + "epoch": 71.79, + "learning_rate": 1.4118626853544586e-05, + "loss": 1.9179, + "step": 14490000 + }, + { + "epoch": 71.79, + "learning_rate": 1.4117388267118503e-05, + "loss": 1.9218, + "step": 14490500 + }, + { + "epoch": 71.79, + "learning_rate": 1.4116152157865273e-05, + "loss": 1.8937, + "step": 14491000 + }, + { + "epoch": 71.8, + "learning_rate": 1.4114913571439189e-05, + "loss": 1.9135, + "step": 14491500 + }, + { + "epoch": 71.8, + "learning_rate": 1.4113677462185956e-05, + "loss": 1.9099, + "step": 14492000 + }, + { + "epoch": 71.8, + "learning_rate": 1.4112438875759873e-05, + "loss": 1.9135, + "step": 14492500 + }, + { + "epoch": 71.8, + "learning_rate": 1.411120028933379e-05, + "loss": 1.9103, + "step": 14493000 + }, + { + "epoch": 71.81, + "learning_rate": 1.4109961702907707e-05, + "loss": 1.9024, + "step": 14493500 + }, + { + "epoch": 71.81, + "learning_rate": 1.4108723116481623e-05, + "loss": 1.9145, + "step": 14494000 + }, + { + "epoch": 71.81, + "learning_rate": 1.410748453005554e-05, + "loss": 1.8998, + "step": 14494500 + }, + { + "epoch": 71.81, + "learning_rate": 1.4106245943629456e-05, + "loss": 1.9236, + "step": 14495000 + }, + { + "epoch": 71.82, + "learning_rate": 1.4105007357203371e-05, + "loss": 1.9361, + "step": 14495500 + }, + { + "epoch": 71.82, + "learning_rate": 1.4103768770777288e-05, + "loss": 1.909, + "step": 14496000 + }, + { + "epoch": 71.82, + "learning_rate": 1.4102530184351203e-05, + "loss": 1.9123, + "step": 14496500 + }, + { + "epoch": 71.82, + "learning_rate": 1.410129159792512e-05, + "loss": 1.9128, + "step": 14497000 + }, + { + "epoch": 71.83, + "learning_rate": 1.4100053011499037e-05, + "loss": 1.9338, + "step": 14497500 + }, + { + "epoch": 71.83, + "learning_rate": 1.4098819379418658e-05, + "loss": 1.9244, + "step": 14498000 + }, + { + "epoch": 71.83, + "learning_rate": 1.4097580792992573e-05, + "loss": 1.9315, + "step": 14498500 + }, + { + "epoch": 71.83, + "learning_rate": 1.409634220656649e-05, + "loss": 1.9277, + "step": 14499000 + }, + { + "epoch": 71.84, + "learning_rate": 1.4095103620140407e-05, + "loss": 1.9291, + "step": 14499500 + }, + { + "epoch": 71.84, + "learning_rate": 1.4093865033714324e-05, + "loss": 1.9392, + "step": 14500000 + }, + { + "epoch": 71.84, + "learning_rate": 1.409262644728824e-05, + "loss": 1.9289, + "step": 14500500 + }, + { + "epoch": 71.84, + "learning_rate": 1.4091390338035008e-05, + "loss": 1.8996, + "step": 14501000 + }, + { + "epoch": 71.85, + "learning_rate": 1.4090151751608925e-05, + "loss": 1.915, + "step": 14501500 + }, + { + "epoch": 71.85, + "learning_rate": 1.408891316518284e-05, + "loss": 1.9289, + "step": 14502000 + }, + { + "epoch": 71.85, + "learning_rate": 1.4087674578756757e-05, + "loss": 1.9335, + "step": 14502500 + }, + { + "epoch": 71.85, + "learning_rate": 1.4086435992330674e-05, + "loss": 1.9211, + "step": 14503000 + }, + { + "epoch": 71.86, + "learning_rate": 1.408519740590459e-05, + "loss": 1.9279, + "step": 14503500 + }, + { + "epoch": 71.86, + "learning_rate": 1.4083958819478508e-05, + "loss": 1.9086, + "step": 14504000 + }, + { + "epoch": 71.86, + "learning_rate": 1.4082720233052424e-05, + "loss": 1.9402, + "step": 14504500 + }, + { + "epoch": 71.86, + "learning_rate": 1.4081481646626338e-05, + "loss": 1.9207, + "step": 14505000 + }, + { + "epoch": 71.87, + "learning_rate": 1.4080245537373107e-05, + "loss": 1.9151, + "step": 14505500 + }, + { + "epoch": 71.87, + "learning_rate": 1.4079009428119877e-05, + "loss": 1.9128, + "step": 14506000 + }, + { + "epoch": 71.87, + "learning_rate": 1.4077770841693794e-05, + "loss": 1.9258, + "step": 14506500 + }, + { + "epoch": 71.87, + "learning_rate": 1.4076532255267708e-05, + "loss": 1.9329, + "step": 14507000 + }, + { + "epoch": 71.88, + "learning_rate": 1.4075293668841625e-05, + "loss": 1.9355, + "step": 14507500 + }, + { + "epoch": 71.88, + "learning_rate": 1.4074055082415542e-05, + "loss": 1.9299, + "step": 14508000 + }, + { + "epoch": 71.88, + "learning_rate": 1.4072816495989457e-05, + "loss": 1.9434, + "step": 14508500 + }, + { + "epoch": 71.88, + "learning_rate": 1.4071577909563374e-05, + "loss": 1.9216, + "step": 14509000 + }, + { + "epoch": 71.89, + "learning_rate": 1.407033932313729e-05, + "loss": 1.9241, + "step": 14509500 + }, + { + "epoch": 71.89, + "learning_rate": 1.4069100736711208e-05, + "loss": 1.9202, + "step": 14510000 + }, + { + "epoch": 71.89, + "learning_rate": 1.4067864627457975e-05, + "loss": 1.9311, + "step": 14510500 + }, + { + "epoch": 71.89, + "learning_rate": 1.4066626041031892e-05, + "loss": 1.9227, + "step": 14511000 + }, + { + "epoch": 71.89, + "learning_rate": 1.4065387454605809e-05, + "loss": 1.9098, + "step": 14511500 + }, + { + "epoch": 71.9, + "learning_rate": 1.4064148868179724e-05, + "loss": 1.9022, + "step": 14512000 + }, + { + "epoch": 71.9, + "learning_rate": 1.4062912758926494e-05, + "loss": 1.9171, + "step": 14512500 + }, + { + "epoch": 71.9, + "learning_rate": 1.4061674172500408e-05, + "loss": 1.902, + "step": 14513000 + }, + { + "epoch": 71.9, + "learning_rate": 1.4060435586074325e-05, + "loss": 1.9384, + "step": 14513500 + }, + { + "epoch": 71.91, + "learning_rate": 1.4059199476821094e-05, + "loss": 1.9049, + "step": 14514000 + }, + { + "epoch": 71.91, + "learning_rate": 1.405796089039501e-05, + "loss": 1.9243, + "step": 14514500 + }, + { + "epoch": 71.91, + "learning_rate": 1.4056722303968927e-05, + "loss": 1.9424, + "step": 14515000 + }, + { + "epoch": 71.91, + "learning_rate": 1.4055486194715695e-05, + "loss": 1.9154, + "step": 14515500 + }, + { + "epoch": 71.92, + "learning_rate": 1.4054247608289612e-05, + "loss": 1.9131, + "step": 14516000 + }, + { + "epoch": 71.92, + "learning_rate": 1.4053009021863528e-05, + "loss": 1.9206, + "step": 14516500 + }, + { + "epoch": 71.92, + "learning_rate": 1.4051770435437445e-05, + "loss": 1.9285, + "step": 14517000 + }, + { + "epoch": 71.92, + "learning_rate": 1.4050534326184214e-05, + "loss": 1.9059, + "step": 14517500 + }, + { + "epoch": 71.93, + "learning_rate": 1.4049295739758128e-05, + "loss": 1.9206, + "step": 14518000 + }, + { + "epoch": 71.93, + "learning_rate": 1.4048057153332045e-05, + "loss": 1.9301, + "step": 14518500 + }, + { + "epoch": 71.93, + "learning_rate": 1.4046818566905962e-05, + "loss": 1.9191, + "step": 14519000 + }, + { + "epoch": 71.93, + "learning_rate": 1.4045579980479879e-05, + "loss": 1.9262, + "step": 14519500 + }, + { + "epoch": 71.94, + "learning_rate": 1.4044341394053795e-05, + "loss": 1.9292, + "step": 14520000 + }, + { + "epoch": 71.94, + "learning_rate": 1.4043102807627712e-05, + "loss": 1.9186, + "step": 14520500 + }, + { + "epoch": 71.94, + "learning_rate": 1.4041864221201628e-05, + "loss": 1.9255, + "step": 14521000 + }, + { + "epoch": 71.94, + "learning_rate": 1.4040625634775545e-05, + "loss": 1.9076, + "step": 14521500 + }, + { + "epoch": 71.95, + "learning_rate": 1.4039387048349461e-05, + "loss": 1.9214, + "step": 14522000 + }, + { + "epoch": 71.95, + "learning_rate": 1.4038148461923375e-05, + "loss": 1.9204, + "step": 14522500 + }, + { + "epoch": 71.95, + "learning_rate": 1.4036909875497292e-05, + "loss": 1.9108, + "step": 14523000 + }, + { + "epoch": 71.95, + "learning_rate": 1.4035671289071209e-05, + "loss": 1.9297, + "step": 14523500 + }, + { + "epoch": 71.96, + "learning_rate": 1.4034432702645126e-05, + "loss": 1.895, + "step": 14524000 + }, + { + "epoch": 71.96, + "learning_rate": 1.4033194116219043e-05, + "loss": 1.9119, + "step": 14524500 + }, + { + "epoch": 71.96, + "learning_rate": 1.403195552979296e-05, + "loss": 1.9135, + "step": 14525000 + }, + { + "epoch": 71.96, + "learning_rate": 1.4030719420539725e-05, + "loss": 1.8976, + "step": 14525500 + }, + { + "epoch": 71.97, + "learning_rate": 1.4029480834113642e-05, + "loss": 1.9153, + "step": 14526000 + }, + { + "epoch": 71.97, + "learning_rate": 1.4028242247687559e-05, + "loss": 1.9124, + "step": 14526500 + }, + { + "epoch": 71.97, + "learning_rate": 1.4027003661261476e-05, + "loss": 1.9256, + "step": 14527000 + }, + { + "epoch": 71.97, + "learning_rate": 1.4025765074835393e-05, + "loss": 1.93, + "step": 14527500 + }, + { + "epoch": 71.98, + "learning_rate": 1.402452648840931e-05, + "loss": 1.9018, + "step": 14528000 + }, + { + "epoch": 71.98, + "learning_rate": 1.4023287901983227e-05, + "loss": 1.8752, + "step": 14528500 + }, + { + "epoch": 71.98, + "learning_rate": 1.4022049315557142e-05, + "loss": 1.9295, + "step": 14529000 + }, + { + "epoch": 71.98, + "learning_rate": 1.4020810729131059e-05, + "loss": 1.9159, + "step": 14529500 + }, + { + "epoch": 71.99, + "learning_rate": 1.4019572142704972e-05, + "loss": 1.903, + "step": 14530000 + }, + { + "epoch": 71.99, + "learning_rate": 1.401833355627889e-05, + "loss": 1.9326, + "step": 14530500 + }, + { + "epoch": 71.99, + "learning_rate": 1.4017094969852806e-05, + "loss": 1.9232, + "step": 14531000 + }, + { + "epoch": 71.99, + "learning_rate": 1.4015858860599577e-05, + "loss": 1.9196, + "step": 14531500 + }, + { + "epoch": 72.0, + "learning_rate": 1.4014620274173492e-05, + "loss": 1.8953, + "step": 14532000 + }, + { + "epoch": 72.0, + "learning_rate": 1.4013381687747409e-05, + "loss": 1.9396, + "step": 14532500 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.6787599732536251, + "eval_accuracy_mlm": 0.638707084290668, + "eval_accuracy_nsp": 0.8675394867410055, + "eval_loss": 2.276818037033081, + "eval_runtime": 147.0953, + "eval_samples_per_second": 1733.291, + "eval_steps_per_second": 72.225, + "step": 14532696 + }, + { + "epoch": 72.0, + "learning_rate": 1.4012143101321324e-05, + "loss": 1.8878, + "step": 14533000 + }, + { + "epoch": 72.0, + "learning_rate": 1.401090451489524e-05, + "loss": 1.8965, + "step": 14533500 + }, + { + "epoch": 72.01, + "learning_rate": 1.4009665928469156e-05, + "loss": 1.8952, + "step": 14534000 + }, + { + "epoch": 72.01, + "learning_rate": 1.4008427342043073e-05, + "loss": 1.8977, + "step": 14534500 + }, + { + "epoch": 72.01, + "learning_rate": 1.400718875561699e-05, + "loss": 1.9088, + "step": 14535000 + }, + { + "epoch": 72.01, + "learning_rate": 1.4005950169190907e-05, + "loss": 1.9219, + "step": 14535500 + }, + { + "epoch": 72.02, + "learning_rate": 1.4004711582764824e-05, + "loss": 1.9042, + "step": 14536000 + }, + { + "epoch": 72.02, + "learning_rate": 1.4003475473511591e-05, + "loss": 1.9009, + "step": 14536500 + }, + { + "epoch": 72.02, + "learning_rate": 1.4002236887085506e-05, + "loss": 1.9024, + "step": 14537000 + }, + { + "epoch": 72.02, + "learning_rate": 1.4000998300659423e-05, + "loss": 1.9009, + "step": 14537500 + }, + { + "epoch": 72.03, + "learning_rate": 1.399975971423334e-05, + "loss": 1.8977, + "step": 14538000 + }, + { + "epoch": 72.03, + "learning_rate": 1.3998521127807257e-05, + "loss": 1.9094, + "step": 14538500 + }, + { + "epoch": 72.03, + "learning_rate": 1.3997285018554026e-05, + "loss": 1.8871, + "step": 14539000 + }, + { + "epoch": 72.03, + "learning_rate": 1.3996046432127941e-05, + "loss": 1.8903, + "step": 14539500 + }, + { + "epoch": 72.04, + "learning_rate": 1.3994812800047564e-05, + "loss": 1.9034, + "step": 14540000 + }, + { + "epoch": 72.04, + "learning_rate": 1.399357669079433e-05, + "loss": 1.9203, + "step": 14540500 + }, + { + "epoch": 72.04, + "learning_rate": 1.3992338104368246e-05, + "loss": 1.9071, + "step": 14541000 + }, + { + "epoch": 72.04, + "learning_rate": 1.3991099517942163e-05, + "loss": 1.9052, + "step": 14541500 + }, + { + "epoch": 72.05, + "learning_rate": 1.398986093151608e-05, + "loss": 1.8892, + "step": 14542000 + }, + { + "epoch": 72.05, + "learning_rate": 1.3988622345089997e-05, + "loss": 1.9116, + "step": 14542500 + }, + { + "epoch": 72.05, + "learning_rate": 1.3987383758663914e-05, + "loss": 1.8896, + "step": 14543000 + }, + { + "epoch": 72.05, + "learning_rate": 1.398614517223783e-05, + "loss": 1.9247, + "step": 14543500 + }, + { + "epoch": 72.06, + "learning_rate": 1.3984906585811747e-05, + "loss": 1.9073, + "step": 14544000 + }, + { + "epoch": 72.06, + "learning_rate": 1.3983667999385661e-05, + "loss": 1.8943, + "step": 14544500 + }, + { + "epoch": 72.06, + "learning_rate": 1.3982434367305283e-05, + "loss": 1.9137, + "step": 14545000 + }, + { + "epoch": 72.06, + "learning_rate": 1.39811957808792e-05, + "loss": 1.8864, + "step": 14545500 + }, + { + "epoch": 72.07, + "learning_rate": 1.3979957194453117e-05, + "loss": 1.8939, + "step": 14546000 + }, + { + "epoch": 72.07, + "learning_rate": 1.397871860802703e-05, + "loss": 1.8826, + "step": 14546500 + }, + { + "epoch": 72.07, + "learning_rate": 1.3977480021600948e-05, + "loss": 1.9263, + "step": 14547000 + }, + { + "epoch": 72.07, + "learning_rate": 1.3976241435174865e-05, + "loss": 1.9265, + "step": 14547500 + }, + { + "epoch": 72.08, + "learning_rate": 1.397500284874878e-05, + "loss": 1.9017, + "step": 14548000 + }, + { + "epoch": 72.08, + "learning_rate": 1.3973764262322697e-05, + "loss": 1.9122, + "step": 14548500 + }, + { + "epoch": 72.08, + "learning_rate": 1.3972528153069467e-05, + "loss": 1.8996, + "step": 14549000 + }, + { + "epoch": 72.08, + "learning_rate": 1.397128956664338e-05, + "loss": 1.8945, + "step": 14549500 + }, + { + "epoch": 72.09, + "learning_rate": 1.3970050980217298e-05, + "loss": 1.9222, + "step": 14550000 + }, + { + "epoch": 72.09, + "learning_rate": 1.3968812393791215e-05, + "loss": 1.9044, + "step": 14550500 + }, + { + "epoch": 72.09, + "learning_rate": 1.396757380736513e-05, + "loss": 1.8702, + "step": 14551000 + }, + { + "epoch": 72.09, + "learning_rate": 1.3966335220939047e-05, + "loss": 1.9037, + "step": 14551500 + }, + { + "epoch": 72.1, + "learning_rate": 1.3965096634512964e-05, + "loss": 1.8962, + "step": 14552000 + }, + { + "epoch": 72.1, + "learning_rate": 1.396385804808688e-05, + "loss": 1.9243, + "step": 14552500 + }, + { + "epoch": 72.1, + "learning_rate": 1.3962619461660798e-05, + "loss": 1.9134, + "step": 14553000 + }, + { + "epoch": 72.1, + "learning_rate": 1.3961380875234715e-05, + "loss": 1.9053, + "step": 14553500 + }, + { + "epoch": 72.11, + "learning_rate": 1.3960142288808628e-05, + "loss": 1.8931, + "step": 14554000 + }, + { + "epoch": 72.11, + "learning_rate": 1.3958903702382545e-05, + "loss": 1.9056, + "step": 14554500 + }, + { + "epoch": 72.11, + "learning_rate": 1.3957665115956462e-05, + "loss": 1.9017, + "step": 14555000 + }, + { + "epoch": 72.11, + "learning_rate": 1.3956426529530379e-05, + "loss": 1.8993, + "step": 14555500 + }, + { + "epoch": 72.12, + "learning_rate": 1.3955187943104294e-05, + "loss": 1.911, + "step": 14556000 + }, + { + "epoch": 72.12, + "learning_rate": 1.3953951833851065e-05, + "loss": 1.9035, + "step": 14556500 + }, + { + "epoch": 72.12, + "learning_rate": 1.3952713247424978e-05, + "loss": 1.8909, + "step": 14557000 + }, + { + "epoch": 72.12, + "learning_rate": 1.3951474660998895e-05, + "loss": 1.9198, + "step": 14557500 + }, + { + "epoch": 72.13, + "learning_rate": 1.3950238551745664e-05, + "loss": 1.9078, + "step": 14558000 + }, + { + "epoch": 72.13, + "learning_rate": 1.394899996531958e-05, + "loss": 1.9122, + "step": 14558500 + }, + { + "epoch": 72.13, + "learning_rate": 1.3947761378893498e-05, + "loss": 1.8879, + "step": 14559000 + }, + { + "epoch": 72.13, + "learning_rate": 1.3946522792467415e-05, + "loss": 1.8904, + "step": 14559500 + }, + { + "epoch": 72.14, + "learning_rate": 1.3945286683214182e-05, + "loss": 1.9129, + "step": 14560000 + }, + { + "epoch": 72.14, + "learning_rate": 1.3944048096788099e-05, + "loss": 1.9261, + "step": 14560500 + }, + { + "epoch": 72.14, + "learning_rate": 1.3942809510362016e-05, + "loss": 1.9079, + "step": 14561000 + }, + { + "epoch": 72.14, + "learning_rate": 1.3941570923935931e-05, + "loss": 1.9193, + "step": 14561500 + }, + { + "epoch": 72.15, + "learning_rate": 1.3940332337509848e-05, + "loss": 1.921, + "step": 14562000 + }, + { + "epoch": 72.15, + "learning_rate": 1.3939093751083765e-05, + "loss": 1.9036, + "step": 14562500 + }, + { + "epoch": 72.15, + "learning_rate": 1.3937855164657678e-05, + "loss": 1.911, + "step": 14563000 + }, + { + "epoch": 72.15, + "learning_rate": 1.3936616578231595e-05, + "loss": 1.9119, + "step": 14563500 + }, + { + "epoch": 72.16, + "learning_rate": 1.3935377991805512e-05, + "loss": 1.9017, + "step": 14564000 + }, + { + "epoch": 72.16, + "learning_rate": 1.3934139405379429e-05, + "loss": 1.9175, + "step": 14564500 + }, + { + "epoch": 72.16, + "learning_rate": 1.3932900818953346e-05, + "loss": 1.9202, + "step": 14565000 + }, + { + "epoch": 72.16, + "learning_rate": 1.3931662232527263e-05, + "loss": 1.9038, + "step": 14565500 + }, + { + "epoch": 72.16, + "learning_rate": 1.3930423646101178e-05, + "loss": 1.8853, + "step": 14566000 + }, + { + "epoch": 72.17, + "learning_rate": 1.3929187536847945e-05, + "loss": 1.9174, + "step": 14566500 + }, + { + "epoch": 72.17, + "learning_rate": 1.3927948950421862e-05, + "loss": 1.9115, + "step": 14567000 + }, + { + "epoch": 72.17, + "learning_rate": 1.3926712841168633e-05, + "loss": 1.9299, + "step": 14567500 + }, + { + "epoch": 72.17, + "learning_rate": 1.3925476731915398e-05, + "loss": 1.9262, + "step": 14568000 + }, + { + "epoch": 72.18, + "learning_rate": 1.3924238145489315e-05, + "loss": 1.896, + "step": 14568500 + }, + { + "epoch": 72.18, + "learning_rate": 1.3922999559063232e-05, + "loss": 1.9015, + "step": 14569000 + }, + { + "epoch": 72.18, + "learning_rate": 1.3921760972637149e-05, + "loss": 1.8945, + "step": 14569500 + }, + { + "epoch": 72.18, + "learning_rate": 1.3920522386211066e-05, + "loss": 1.9173, + "step": 14570000 + }, + { + "epoch": 72.19, + "learning_rate": 1.3919283799784983e-05, + "loss": 1.9235, + "step": 14570500 + }, + { + "epoch": 72.19, + "learning_rate": 1.3918047690531752e-05, + "loss": 1.8999, + "step": 14571000 + }, + { + "epoch": 72.19, + "learning_rate": 1.3916809104105665e-05, + "loss": 1.8869, + "step": 14571500 + }, + { + "epoch": 72.19, + "learning_rate": 1.3915570517679582e-05, + "loss": 1.8927, + "step": 14572000 + }, + { + "epoch": 72.2, + "learning_rate": 1.3914331931253499e-05, + "loss": 1.8996, + "step": 14572500 + }, + { + "epoch": 72.2, + "learning_rate": 1.3913093344827416e-05, + "loss": 1.8891, + "step": 14573000 + }, + { + "epoch": 72.2, + "learning_rate": 1.3911854758401333e-05, + "loss": 1.9016, + "step": 14573500 + }, + { + "epoch": 72.2, + "learning_rate": 1.391061617197525e-05, + "loss": 1.8983, + "step": 14574000 + }, + { + "epoch": 72.21, + "learning_rate": 1.3909380062722017e-05, + "loss": 1.9114, + "step": 14574500 + }, + { + "epoch": 72.21, + "learning_rate": 1.3908141476295932e-05, + "loss": 1.9533, + "step": 14575000 + }, + { + "epoch": 72.21, + "learning_rate": 1.3906902889869849e-05, + "loss": 1.9087, + "step": 14575500 + }, + { + "epoch": 72.21, + "learning_rate": 1.3905664303443766e-05, + "loss": 1.8896, + "step": 14576000 + }, + { + "epoch": 72.22, + "learning_rate": 1.3904425717017683e-05, + "loss": 1.9208, + "step": 14576500 + }, + { + "epoch": 72.22, + "learning_rate": 1.39031871305916e-05, + "loss": 1.9056, + "step": 14577000 + }, + { + "epoch": 72.22, + "learning_rate": 1.3901948544165517e-05, + "loss": 1.9219, + "step": 14577500 + }, + { + "epoch": 72.22, + "learning_rate": 1.3900709957739434e-05, + "loss": 1.932, + "step": 14578000 + }, + { + "epoch": 72.23, + "learning_rate": 1.3899471371313349e-05, + "loss": 1.9254, + "step": 14578500 + }, + { + "epoch": 72.23, + "learning_rate": 1.3898232784887264e-05, + "loss": 1.9002, + "step": 14579000 + }, + { + "epoch": 72.23, + "learning_rate": 1.389699419846118e-05, + "loss": 1.9271, + "step": 14579500 + }, + { + "epoch": 72.23, + "learning_rate": 1.389575808920795e-05, + "loss": 1.8763, + "step": 14580000 + }, + { + "epoch": 72.24, + "learning_rate": 1.3894519502781867e-05, + "loss": 1.9333, + "step": 14580500 + }, + { + "epoch": 72.24, + "learning_rate": 1.3893280916355784e-05, + "loss": 1.9218, + "step": 14581000 + }, + { + "epoch": 72.24, + "learning_rate": 1.3892042329929699e-05, + "loss": 1.9028, + "step": 14581500 + }, + { + "epoch": 72.24, + "learning_rate": 1.3890803743503614e-05, + "loss": 1.8932, + "step": 14582000 + }, + { + "epoch": 72.25, + "learning_rate": 1.3889565157077531e-05, + "loss": 1.9287, + "step": 14582500 + }, + { + "epoch": 72.25, + "learning_rate": 1.3888326570651446e-05, + "loss": 1.9176, + "step": 14583000 + }, + { + "epoch": 72.25, + "learning_rate": 1.3887087984225363e-05, + "loss": 1.8921, + "step": 14583500 + }, + { + "epoch": 72.25, + "learning_rate": 1.3885851874972134e-05, + "loss": 1.9326, + "step": 14584000 + }, + { + "epoch": 72.26, + "learning_rate": 1.388461328854605e-05, + "loss": 1.9078, + "step": 14584500 + }, + { + "epoch": 72.26, + "learning_rate": 1.3883374702119964e-05, + "loss": 1.8832, + "step": 14585000 + }, + { + "epoch": 72.26, + "learning_rate": 1.3882136115693881e-05, + "loss": 1.8793, + "step": 14585500 + }, + { + "epoch": 72.26, + "learning_rate": 1.3880897529267798e-05, + "loss": 1.9007, + "step": 14586000 + }, + { + "epoch": 72.27, + "learning_rate": 1.3879658942841713e-05, + "loss": 1.8987, + "step": 14586500 + }, + { + "epoch": 72.27, + "learning_rate": 1.387842035641563e-05, + "loss": 1.9077, + "step": 14587000 + }, + { + "epoch": 72.27, + "learning_rate": 1.3877181769989547e-05, + "loss": 1.9101, + "step": 14587500 + }, + { + "epoch": 72.27, + "learning_rate": 1.3875943183563464e-05, + "loss": 1.9164, + "step": 14588000 + }, + { + "epoch": 72.28, + "learning_rate": 1.3874707074310231e-05, + "loss": 1.9139, + "step": 14588500 + }, + { + "epoch": 72.28, + "learning_rate": 1.3873468487884148e-05, + "loss": 1.8772, + "step": 14589000 + }, + { + "epoch": 72.28, + "learning_rate": 1.3872229901458063e-05, + "loss": 1.9003, + "step": 14589500 + }, + { + "epoch": 72.28, + "learning_rate": 1.387099131503198e-05, + "loss": 1.9104, + "step": 14590000 + }, + { + "epoch": 72.29, + "learning_rate": 1.3869755205778751e-05, + "loss": 1.892, + "step": 14590500 + }, + { + "epoch": 72.29, + "learning_rate": 1.386852157369837e-05, + "loss": 1.9139, + "step": 14591000 + }, + { + "epoch": 72.29, + "learning_rate": 1.3867282987272287e-05, + "loss": 1.9117, + "step": 14591500 + }, + { + "epoch": 72.29, + "learning_rate": 1.3866046878019054e-05, + "loss": 1.9029, + "step": 14592000 + }, + { + "epoch": 72.3, + "learning_rate": 1.386480829159297e-05, + "loss": 1.8952, + "step": 14592500 + }, + { + "epoch": 72.3, + "learning_rate": 1.3863569705166888e-05, + "loss": 1.8949, + "step": 14593000 + }, + { + "epoch": 72.3, + "learning_rate": 1.3862331118740805e-05, + "loss": 1.9102, + "step": 14593500 + }, + { + "epoch": 72.3, + "learning_rate": 1.386109253231472e-05, + "loss": 1.9174, + "step": 14594000 + }, + { + "epoch": 72.31, + "learning_rate": 1.3859853945888637e-05, + "loss": 1.9207, + "step": 14594500 + }, + { + "epoch": 72.31, + "learning_rate": 1.3858615359462554e-05, + "loss": 1.91, + "step": 14595000 + }, + { + "epoch": 72.31, + "learning_rate": 1.385737677303647e-05, + "loss": 1.9243, + "step": 14595500 + }, + { + "epoch": 72.31, + "learning_rate": 1.3856138186610388e-05, + "loss": 1.9138, + "step": 14596000 + }, + { + "epoch": 72.32, + "learning_rate": 1.3854899600184301e-05, + "loss": 1.8809, + "step": 14596500 + }, + { + "epoch": 72.32, + "learning_rate": 1.3853661013758218e-05, + "loss": 1.8994, + "step": 14597000 + }, + { + "epoch": 72.32, + "learning_rate": 1.3852422427332135e-05, + "loss": 1.9143, + "step": 14597500 + }, + { + "epoch": 72.32, + "learning_rate": 1.3851183840906052e-05, + "loss": 1.9014, + "step": 14598000 + }, + { + "epoch": 72.33, + "learning_rate": 1.3849945254479967e-05, + "loss": 1.8996, + "step": 14598500 + }, + { + "epoch": 72.33, + "learning_rate": 1.3848706668053884e-05, + "loss": 1.9305, + "step": 14599000 + }, + { + "epoch": 72.33, + "learning_rate": 1.3847468081627801e-05, + "loss": 1.8889, + "step": 14599500 + }, + { + "epoch": 72.33, + "learning_rate": 1.3846229495201718e-05, + "loss": 1.8967, + "step": 14600000 + }, + { + "epoch": 72.34, + "learning_rate": 1.3844990908775632e-05, + "loss": 1.9069, + "step": 14600500 + }, + { + "epoch": 72.34, + "learning_rate": 1.3843754799522402e-05, + "loss": 1.9028, + "step": 14601000 + }, + { + "epoch": 72.34, + "learning_rate": 1.384251869026917e-05, + "loss": 1.9036, + "step": 14601500 + }, + { + "epoch": 72.34, + "learning_rate": 1.3841280103843088e-05, + "loss": 1.9079, + "step": 14602000 + }, + { + "epoch": 72.35, + "learning_rate": 1.3840041517417001e-05, + "loss": 1.9301, + "step": 14602500 + }, + { + "epoch": 72.35, + "learning_rate": 1.3838802930990918e-05, + "loss": 1.8785, + "step": 14603000 + }, + { + "epoch": 72.35, + "learning_rate": 1.3837564344564835e-05, + "loss": 1.9033, + "step": 14603500 + }, + { + "epoch": 72.35, + "learning_rate": 1.3836325758138752e-05, + "loss": 1.8973, + "step": 14604000 + }, + { + "epoch": 72.36, + "learning_rate": 1.3835089648885521e-05, + "loss": 1.9206, + "step": 14604500 + }, + { + "epoch": 72.36, + "learning_rate": 1.3833851062459438e-05, + "loss": 1.9092, + "step": 14605000 + }, + { + "epoch": 72.36, + "learning_rate": 1.3832612476033351e-05, + "loss": 1.897, + "step": 14605500 + }, + { + "epoch": 72.36, + "learning_rate": 1.3831373889607268e-05, + "loss": 1.9099, + "step": 14606000 + }, + { + "epoch": 72.37, + "learning_rate": 1.3830135303181185e-05, + "loss": 1.9163, + "step": 14606500 + }, + { + "epoch": 72.37, + "learning_rate": 1.3828896716755102e-05, + "loss": 1.9145, + "step": 14607000 + }, + { + "epoch": 72.37, + "learning_rate": 1.3827658130329019e-05, + "loss": 1.9067, + "step": 14607500 + }, + { + "epoch": 72.37, + "learning_rate": 1.3826422021075788e-05, + "loss": 1.8924, + "step": 14608000 + }, + { + "epoch": 72.38, + "learning_rate": 1.3825183434649705e-05, + "loss": 1.9063, + "step": 14608500 + }, + { + "epoch": 72.38, + "learning_rate": 1.3823944848223618e-05, + "loss": 1.9062, + "step": 14609000 + }, + { + "epoch": 72.38, + "learning_rate": 1.3822706261797535e-05, + "loss": 1.9011, + "step": 14609500 + }, + { + "epoch": 72.38, + "learning_rate": 1.3821467675371452e-05, + "loss": 1.9488, + "step": 14610000 + }, + { + "epoch": 72.39, + "learning_rate": 1.3820229088945369e-05, + "loss": 1.9281, + "step": 14610500 + }, + { + "epoch": 72.39, + "learning_rate": 1.3818990502519286e-05, + "loss": 1.9256, + "step": 14611000 + }, + { + "epoch": 72.39, + "learning_rate": 1.3817751916093203e-05, + "loss": 1.8995, + "step": 14611500 + }, + { + "epoch": 72.39, + "learning_rate": 1.3816515806839968e-05, + "loss": 1.9175, + "step": 14612000 + }, + { + "epoch": 72.4, + "learning_rate": 1.3815277220413885e-05, + "loss": 1.8954, + "step": 14612500 + }, + { + "epoch": 72.4, + "learning_rate": 1.3814038633987802e-05, + "loss": 1.9231, + "step": 14613000 + }, + { + "epoch": 72.4, + "learning_rate": 1.381280004756172e-05, + "loss": 1.9304, + "step": 14613500 + }, + { + "epoch": 72.4, + "learning_rate": 1.3811563938308488e-05, + "loss": 1.9059, + "step": 14614000 + }, + { + "epoch": 72.41, + "learning_rate": 1.3810325351882405e-05, + "loss": 1.9008, + "step": 14614500 + }, + { + "epoch": 72.41, + "learning_rate": 1.3809089242629172e-05, + "loss": 1.901, + "step": 14615000 + }, + { + "epoch": 72.41, + "learning_rate": 1.3807850656203089e-05, + "loss": 1.937, + "step": 14615500 + }, + { + "epoch": 72.41, + "learning_rate": 1.3806612069777006e-05, + "loss": 1.9071, + "step": 14616000 + }, + { + "epoch": 72.42, + "learning_rate": 1.3805373483350923e-05, + "loss": 1.9002, + "step": 14616500 + }, + { + "epoch": 72.42, + "learning_rate": 1.380413737409769e-05, + "loss": 1.9036, + "step": 14617000 + }, + { + "epoch": 72.42, + "learning_rate": 1.3802898787671605e-05, + "loss": 1.9176, + "step": 14617500 + }, + { + "epoch": 72.42, + "learning_rate": 1.3801660201245522e-05, + "loss": 1.8798, + "step": 14618000 + }, + { + "epoch": 72.43, + "learning_rate": 1.3800421614819439e-05, + "loss": 1.9008, + "step": 14618500 + }, + { + "epoch": 72.43, + "learning_rate": 1.3799183028393356e-05, + "loss": 1.915, + "step": 14619000 + }, + { + "epoch": 72.43, + "learning_rate": 1.3797944441967273e-05, + "loss": 1.8951, + "step": 14619500 + }, + { + "epoch": 72.43, + "learning_rate": 1.379670585554119e-05, + "loss": 1.9046, + "step": 14620000 + }, + { + "epoch": 72.44, + "learning_rate": 1.3795467269115107e-05, + "loss": 1.9239, + "step": 14620500 + }, + { + "epoch": 72.44, + "learning_rate": 1.3794228682689022e-05, + "loss": 1.9176, + "step": 14621000 + }, + { + "epoch": 72.44, + "learning_rate": 1.3792990096262937e-05, + "loss": 1.9119, + "step": 14621500 + }, + { + "epoch": 72.44, + "learning_rate": 1.3791751509836852e-05, + "loss": 1.9172, + "step": 14622000 + }, + { + "epoch": 72.44, + "learning_rate": 1.3790515400583623e-05, + "loss": 1.8729, + "step": 14622500 + }, + { + "epoch": 72.45, + "learning_rate": 1.378927929133039e-05, + "loss": 1.9105, + "step": 14623000 + }, + { + "epoch": 72.45, + "learning_rate": 1.3788040704904307e-05, + "loss": 1.8748, + "step": 14623500 + }, + { + "epoch": 72.45, + "learning_rate": 1.3786802118478224e-05, + "loss": 1.8799, + "step": 14624000 + }, + { + "epoch": 72.45, + "learning_rate": 1.378556353205214e-05, + "loss": 1.927, + "step": 14624500 + }, + { + "epoch": 72.46, + "learning_rate": 1.378432989997176e-05, + "loss": 1.9255, + "step": 14625000 + }, + { + "epoch": 72.46, + "learning_rate": 1.3783091313545677e-05, + "loss": 1.926, + "step": 14625500 + }, + { + "epoch": 72.46, + "learning_rate": 1.3781852727119594e-05, + "loss": 1.89, + "step": 14626000 + }, + { + "epoch": 72.46, + "learning_rate": 1.3780614140693509e-05, + "loss": 1.9214, + "step": 14626500 + }, + { + "epoch": 72.47, + "learning_rate": 1.3779375554267426e-05, + "loss": 1.9062, + "step": 14627000 + }, + { + "epoch": 72.47, + "learning_rate": 1.3778136967841343e-05, + "loss": 1.8986, + "step": 14627500 + }, + { + "epoch": 72.47, + "learning_rate": 1.377690085858811e-05, + "loss": 1.9081, + "step": 14628000 + }, + { + "epoch": 72.47, + "learning_rate": 1.3775662272162027e-05, + "loss": 1.9133, + "step": 14628500 + }, + { + "epoch": 72.48, + "learning_rate": 1.3774423685735944e-05, + "loss": 1.9243, + "step": 14629000 + }, + { + "epoch": 72.48, + "learning_rate": 1.377318509930986e-05, + "loss": 1.8985, + "step": 14629500 + }, + { + "epoch": 72.48, + "learning_rate": 1.3771946512883776e-05, + "loss": 1.8955, + "step": 14630000 + }, + { + "epoch": 72.48, + "learning_rate": 1.3770707926457693e-05, + "loss": 1.8956, + "step": 14630500 + }, + { + "epoch": 72.49, + "learning_rate": 1.376946934003161e-05, + "loss": 1.8922, + "step": 14631000 + }, + { + "epoch": 72.49, + "learning_rate": 1.3768233230778377e-05, + "loss": 1.8979, + "step": 14631500 + }, + { + "epoch": 72.49, + "learning_rate": 1.3766994644352294e-05, + "loss": 1.9369, + "step": 14632000 + }, + { + "epoch": 72.49, + "learning_rate": 1.376575605792621e-05, + "loss": 1.9426, + "step": 14632500 + }, + { + "epoch": 72.5, + "learning_rate": 1.3764517471500126e-05, + "loss": 1.8916, + "step": 14633000 + }, + { + "epoch": 72.5, + "learning_rate": 1.3763278885074043e-05, + "loss": 1.9038, + "step": 14633500 + }, + { + "epoch": 72.5, + "learning_rate": 1.376204029864796e-05, + "loss": 1.9253, + "step": 14634000 + }, + { + "epoch": 72.5, + "learning_rate": 1.3760804189394727e-05, + "loss": 1.9049, + "step": 14634500 + }, + { + "epoch": 72.51, + "learning_rate": 1.3759565602968644e-05, + "loss": 1.9275, + "step": 14635000 + }, + { + "epoch": 72.51, + "learning_rate": 1.375832701654256e-05, + "loss": 1.9286, + "step": 14635500 + }, + { + "epoch": 72.51, + "learning_rate": 1.3757088430116478e-05, + "loss": 1.9215, + "step": 14636000 + }, + { + "epoch": 72.51, + "learning_rate": 1.3755849843690393e-05, + "loss": 1.8922, + "step": 14636500 + }, + { + "epoch": 72.52, + "learning_rate": 1.3754613734437163e-05, + "loss": 1.9148, + "step": 14637000 + }, + { + "epoch": 72.52, + "learning_rate": 1.3753375148011077e-05, + "loss": 1.9249, + "step": 14637500 + }, + { + "epoch": 72.52, + "learning_rate": 1.3752136561584994e-05, + "loss": 1.9143, + "step": 14638000 + }, + { + "epoch": 72.52, + "learning_rate": 1.3750897975158911e-05, + "loss": 1.8981, + "step": 14638500 + }, + { + "epoch": 72.53, + "learning_rate": 1.3749659388732828e-05, + "loss": 1.8985, + "step": 14639000 + }, + { + "epoch": 72.53, + "learning_rate": 1.3748420802306745e-05, + "loss": 1.9039, + "step": 14639500 + }, + { + "epoch": 72.53, + "learning_rate": 1.374718221588066e-05, + "loss": 1.9041, + "step": 14640000 + }, + { + "epoch": 72.53, + "learning_rate": 1.3745943629454577e-05, + "loss": 1.908, + "step": 14640500 + }, + { + "epoch": 72.54, + "learning_rate": 1.3744705043028494e-05, + "loss": 1.9118, + "step": 14641000 + }, + { + "epoch": 72.54, + "learning_rate": 1.374346645660241e-05, + "loss": 1.904, + "step": 14641500 + }, + { + "epoch": 72.54, + "learning_rate": 1.3742227870176324e-05, + "loss": 1.8992, + "step": 14642000 + }, + { + "epoch": 72.54, + "learning_rate": 1.3740989283750241e-05, + "loss": 1.9171, + "step": 14642500 + }, + { + "epoch": 72.55, + "learning_rate": 1.3739750697324158e-05, + "loss": 1.9101, + "step": 14643000 + }, + { + "epoch": 72.55, + "learning_rate": 1.3738514588070927e-05, + "loss": 1.877, + "step": 14643500 + }, + { + "epoch": 72.55, + "learning_rate": 1.3737278478817694e-05, + "loss": 1.9, + "step": 14644000 + }, + { + "epoch": 72.55, + "learning_rate": 1.3736039892391611e-05, + "loss": 1.9283, + "step": 14644500 + }, + { + "epoch": 72.56, + "learning_rate": 1.3734801305965528e-05, + "loss": 1.9003, + "step": 14645000 + }, + { + "epoch": 72.56, + "learning_rate": 1.3733562719539445e-05, + "loss": 1.9279, + "step": 14645500 + }, + { + "epoch": 72.56, + "learning_rate": 1.3732326610286214e-05, + "loss": 1.8922, + "step": 14646000 + }, + { + "epoch": 72.56, + "learning_rate": 1.373108802386013e-05, + "loss": 1.9094, + "step": 14646500 + }, + { + "epoch": 72.57, + "learning_rate": 1.3729849437434044e-05, + "loss": 1.915, + "step": 14647000 + }, + { + "epoch": 72.57, + "learning_rate": 1.3728610851007961e-05, + "loss": 1.9094, + "step": 14647500 + }, + { + "epoch": 72.57, + "learning_rate": 1.3727374741754732e-05, + "loss": 1.9237, + "step": 14648000 + }, + { + "epoch": 72.57, + "learning_rate": 1.3726136155328649e-05, + "loss": 1.9047, + "step": 14648500 + }, + { + "epoch": 72.58, + "learning_rate": 1.3724897568902564e-05, + "loss": 1.9127, + "step": 14649000 + }, + { + "epoch": 72.58, + "learning_rate": 1.372365898247648e-05, + "loss": 1.889, + "step": 14649500 + }, + { + "epoch": 72.58, + "learning_rate": 1.3722420396050394e-05, + "loss": 1.9081, + "step": 14650000 + }, + { + "epoch": 72.58, + "learning_rate": 1.3721181809624311e-05, + "loss": 1.9189, + "step": 14650500 + }, + { + "epoch": 72.59, + "learning_rate": 1.3719943223198228e-05, + "loss": 1.8953, + "step": 14651000 + }, + { + "epoch": 72.59, + "learning_rate": 1.3718704636772145e-05, + "loss": 1.9173, + "step": 14651500 + }, + { + "epoch": 72.59, + "learning_rate": 1.3717466050346062e-05, + "loss": 1.9076, + "step": 14652000 + }, + { + "epoch": 72.59, + "learning_rate": 1.3716227463919979e-05, + "loss": 1.9028, + "step": 14652500 + }, + { + "epoch": 72.6, + "learning_rate": 1.3714991354666746e-05, + "loss": 1.9041, + "step": 14653000 + }, + { + "epoch": 72.6, + "learning_rate": 1.3713752768240661e-05, + "loss": 1.8871, + "step": 14653500 + }, + { + "epoch": 72.6, + "learning_rate": 1.3712514181814578e-05, + "loss": 1.9085, + "step": 14654000 + }, + { + "epoch": 72.6, + "learning_rate": 1.3711275595388495e-05, + "loss": 1.9086, + "step": 14654500 + }, + { + "epoch": 72.61, + "learning_rate": 1.3710037008962412e-05, + "loss": 1.9255, + "step": 14655000 + }, + { + "epoch": 72.61, + "learning_rate": 1.3708798422536329e-05, + "loss": 1.9105, + "step": 14655500 + }, + { + "epoch": 72.61, + "learning_rate": 1.3707559836110246e-05, + "loss": 1.9116, + "step": 14656000 + }, + { + "epoch": 72.61, + "learning_rate": 1.3706321249684163e-05, + "loss": 1.8864, + "step": 14656500 + }, + { + "epoch": 72.62, + "learning_rate": 1.3705085140430928e-05, + "loss": 1.898, + "step": 14657000 + }, + { + "epoch": 72.62, + "learning_rate": 1.3703846554004845e-05, + "loss": 1.917, + "step": 14657500 + }, + { + "epoch": 72.62, + "learning_rate": 1.3702607967578762e-05, + "loss": 1.8989, + "step": 14658000 + }, + { + "epoch": 72.62, + "learning_rate": 1.3701371858325533e-05, + "loss": 1.9246, + "step": 14658500 + }, + { + "epoch": 72.63, + "learning_rate": 1.3700133271899448e-05, + "loss": 1.9021, + "step": 14659000 + }, + { + "epoch": 72.63, + "learning_rate": 1.3698894685473363e-05, + "loss": 1.9181, + "step": 14659500 + }, + { + "epoch": 72.63, + "learning_rate": 1.3697656099047278e-05, + "loss": 1.9108, + "step": 14660000 + }, + { + "epoch": 72.63, + "learning_rate": 1.3696417512621195e-05, + "loss": 1.919, + "step": 14660500 + }, + { + "epoch": 72.64, + "learning_rate": 1.3695178926195112e-05, + "loss": 1.9105, + "step": 14661000 + }, + { + "epoch": 72.64, + "learning_rate": 1.3693940339769029e-05, + "loss": 1.9023, + "step": 14661500 + }, + { + "epoch": 72.64, + "learning_rate": 1.3692701753342946e-05, + "loss": 1.8873, + "step": 14662000 + }, + { + "epoch": 72.64, + "learning_rate": 1.3691463166916863e-05, + "loss": 1.9204, + "step": 14662500 + }, + { + "epoch": 72.65, + "learning_rate": 1.369022458049078e-05, + "loss": 1.8903, + "step": 14663000 + }, + { + "epoch": 72.65, + "learning_rate": 1.3688985994064695e-05, + "loss": 1.9193, + "step": 14663500 + }, + { + "epoch": 72.65, + "learning_rate": 1.368774740763861e-05, + "loss": 1.9086, + "step": 14664000 + }, + { + "epoch": 72.65, + "learning_rate": 1.3686511298385379e-05, + "loss": 1.9036, + "step": 14664500 + }, + { + "epoch": 72.66, + "learning_rate": 1.3685272711959296e-05, + "loss": 1.9134, + "step": 14665000 + }, + { + "epoch": 72.66, + "learning_rate": 1.3684034125533213e-05, + "loss": 1.9128, + "step": 14665500 + }, + { + "epoch": 72.66, + "learning_rate": 1.368279553910713e-05, + "loss": 1.9141, + "step": 14666000 + }, + { + "epoch": 72.66, + "learning_rate": 1.3681556952681047e-05, + "loss": 1.9041, + "step": 14666500 + }, + { + "epoch": 72.67, + "learning_rate": 1.3680323320600666e-05, + "loss": 1.8903, + "step": 14667000 + }, + { + "epoch": 72.67, + "learning_rate": 1.3679084734174583e-05, + "loss": 1.9044, + "step": 14667500 + }, + { + "epoch": 72.67, + "learning_rate": 1.36778461477485e-05, + "loss": 1.9256, + "step": 14668000 + }, + { + "epoch": 72.67, + "learning_rate": 1.3676607561322417e-05, + "loss": 1.8867, + "step": 14668500 + }, + { + "epoch": 72.68, + "learning_rate": 1.367536897489633e-05, + "loss": 1.903, + "step": 14669000 + }, + { + "epoch": 72.68, + "learning_rate": 1.3674130388470247e-05, + "loss": 1.9202, + "step": 14669500 + }, + { + "epoch": 72.68, + "learning_rate": 1.3672891802044164e-05, + "loss": 1.9303, + "step": 14670000 + }, + { + "epoch": 72.68, + "learning_rate": 1.367165321561808e-05, + "loss": 1.9341, + "step": 14670500 + }, + { + "epoch": 72.69, + "learning_rate": 1.3670414629191996e-05, + "loss": 1.9026, + "step": 14671000 + }, + { + "epoch": 72.69, + "learning_rate": 1.3669180997111617e-05, + "loss": 1.9073, + "step": 14671500 + }, + { + "epoch": 72.69, + "learning_rate": 1.3667942410685534e-05, + "loss": 1.9118, + "step": 14672000 + }, + { + "epoch": 72.69, + "learning_rate": 1.3666703824259449e-05, + "loss": 1.9081, + "step": 14672500 + }, + { + "epoch": 72.7, + "learning_rate": 1.3665465237833366e-05, + "loss": 1.9037, + "step": 14673000 + }, + { + "epoch": 72.7, + "learning_rate": 1.3664226651407283e-05, + "loss": 1.9099, + "step": 14673500 + }, + { + "epoch": 72.7, + "learning_rate": 1.36629880649812e-05, + "loss": 1.9243, + "step": 14674000 + }, + { + "epoch": 72.7, + "learning_rate": 1.3661749478555117e-05, + "loss": 1.8971, + "step": 14674500 + }, + { + "epoch": 72.71, + "learning_rate": 1.3660513369301884e-05, + "loss": 1.9049, + "step": 14675000 + }, + { + "epoch": 72.71, + "learning_rate": 1.3659277260048653e-05, + "loss": 1.9052, + "step": 14675500 + }, + { + "epoch": 72.71, + "learning_rate": 1.365803867362257e-05, + "loss": 1.9227, + "step": 14676000 + }, + { + "epoch": 72.71, + "learning_rate": 1.3656800087196486e-05, + "loss": 1.9198, + "step": 14676500 + }, + { + "epoch": 72.71, + "learning_rate": 1.36555615007704e-05, + "loss": 1.9101, + "step": 14677000 + }, + { + "epoch": 72.72, + "learning_rate": 1.3654322914344317e-05, + "loss": 1.9115, + "step": 14677500 + }, + { + "epoch": 72.72, + "learning_rate": 1.3653084327918234e-05, + "loss": 1.9106, + "step": 14678000 + }, + { + "epoch": 72.72, + "learning_rate": 1.365184574149215e-05, + "loss": 1.9154, + "step": 14678500 + }, + { + "epoch": 72.72, + "learning_rate": 1.365060963223892e-05, + "loss": 1.9113, + "step": 14679000 + }, + { + "epoch": 72.73, + "learning_rate": 1.3649371045812837e-05, + "loss": 1.9161, + "step": 14679500 + }, + { + "epoch": 72.73, + "learning_rate": 1.364813245938675e-05, + "loss": 1.9098, + "step": 14680000 + }, + { + "epoch": 72.73, + "learning_rate": 1.3646893872960667e-05, + "loss": 1.9007, + "step": 14680500 + }, + { + "epoch": 72.73, + "learning_rate": 1.3645655286534584e-05, + "loss": 1.9143, + "step": 14681000 + }, + { + "epoch": 72.74, + "learning_rate": 1.3644416700108501e-05, + "loss": 1.9126, + "step": 14681500 + }, + { + "epoch": 72.74, + "learning_rate": 1.3643178113682418e-05, + "loss": 1.9078, + "step": 14682000 + }, + { + "epoch": 72.74, + "learning_rate": 1.3641939527256333e-05, + "loss": 1.9277, + "step": 14682500 + }, + { + "epoch": 72.74, + "learning_rate": 1.364070094083025e-05, + "loss": 1.928, + "step": 14683000 + }, + { + "epoch": 72.75, + "learning_rate": 1.3639462354404167e-05, + "loss": 1.9243, + "step": 14683500 + }, + { + "epoch": 72.75, + "learning_rate": 1.3638223767978084e-05, + "loss": 1.8966, + "step": 14684000 + }, + { + "epoch": 72.75, + "learning_rate": 1.3636985181551997e-05, + "loss": 1.9211, + "step": 14684500 + }, + { + "epoch": 72.75, + "learning_rate": 1.3635749072298768e-05, + "loss": 1.9413, + "step": 14685000 + }, + { + "epoch": 72.76, + "learning_rate": 1.3634510485872685e-05, + "loss": 1.9242, + "step": 14685500 + }, + { + "epoch": 72.76, + "learning_rate": 1.36332718994466e-05, + "loss": 1.9219, + "step": 14686000 + }, + { + "epoch": 72.76, + "learning_rate": 1.3632033313020517e-05, + "loss": 1.9186, + "step": 14686500 + }, + { + "epoch": 72.76, + "learning_rate": 1.3630797203767284e-05, + "loss": 1.9166, + "step": 14687000 + }, + { + "epoch": 72.77, + "learning_rate": 1.3629558617341201e-05, + "loss": 1.9021, + "step": 14687500 + }, + { + "epoch": 72.77, + "learning_rate": 1.3628320030915118e-05, + "loss": 1.9047, + "step": 14688000 + }, + { + "epoch": 72.77, + "learning_rate": 1.3627081444489035e-05, + "loss": 1.9356, + "step": 14688500 + }, + { + "epoch": 72.77, + "learning_rate": 1.3625842858062952e-05, + "loss": 1.9152, + "step": 14689000 + }, + { + "epoch": 72.78, + "learning_rate": 1.3624606748809717e-05, + "loss": 1.9088, + "step": 14689500 + }, + { + "epoch": 72.78, + "learning_rate": 1.3623368162383634e-05, + "loss": 1.8953, + "step": 14690000 + }, + { + "epoch": 72.78, + "learning_rate": 1.3622129575957551e-05, + "loss": 1.9188, + "step": 14690500 + }, + { + "epoch": 72.78, + "learning_rate": 1.3620890989531468e-05, + "loss": 1.9272, + "step": 14691000 + }, + { + "epoch": 72.79, + "learning_rate": 1.3619652403105385e-05, + "loss": 1.8973, + "step": 14691500 + }, + { + "epoch": 72.79, + "learning_rate": 1.3618413816679302e-05, + "loss": 1.9338, + "step": 14692000 + }, + { + "epoch": 72.79, + "learning_rate": 1.3617175230253219e-05, + "loss": 1.9084, + "step": 14692500 + }, + { + "epoch": 72.79, + "learning_rate": 1.3615936643827134e-05, + "loss": 1.9129, + "step": 14693000 + }, + { + "epoch": 72.8, + "learning_rate": 1.3614698057401051e-05, + "loss": 1.9183, + "step": 14693500 + }, + { + "epoch": 72.8, + "learning_rate": 1.3613464425320672e-05, + "loss": 1.9168, + "step": 14694000 + }, + { + "epoch": 72.8, + "learning_rate": 1.3612225838894589e-05, + "loss": 1.9032, + "step": 14694500 + }, + { + "epoch": 72.8, + "learning_rate": 1.3610987252468504e-05, + "loss": 1.922, + "step": 14695000 + }, + { + "epoch": 72.81, + "learning_rate": 1.3609748666042419e-05, + "loss": 1.918, + "step": 14695500 + }, + { + "epoch": 72.81, + "learning_rate": 1.3608512556789188e-05, + "loss": 1.929, + "step": 14696000 + }, + { + "epoch": 72.81, + "learning_rate": 1.3607273970363105e-05, + "loss": 1.9341, + "step": 14696500 + }, + { + "epoch": 72.81, + "learning_rate": 1.3606035383937022e-05, + "loss": 1.8811, + "step": 14697000 + }, + { + "epoch": 72.82, + "learning_rate": 1.3604796797510939e-05, + "loss": 1.9218, + "step": 14697500 + }, + { + "epoch": 72.82, + "learning_rate": 1.3603558211084856e-05, + "loss": 1.9056, + "step": 14698000 + }, + { + "epoch": 72.82, + "learning_rate": 1.360231962465877e-05, + "loss": 1.9107, + "step": 14698500 + }, + { + "epoch": 72.82, + "learning_rate": 1.3601081038232686e-05, + "loss": 1.9415, + "step": 14699000 + }, + { + "epoch": 72.83, + "learning_rate": 1.3599842451806601e-05, + "loss": 1.9187, + "step": 14699500 + }, + { + "epoch": 72.83, + "learning_rate": 1.3598603865380518e-05, + "loss": 1.8943, + "step": 14700000 + }, + { + "epoch": 72.83, + "learning_rate": 1.3597365278954435e-05, + "loss": 1.9236, + "step": 14700500 + }, + { + "epoch": 72.83, + "learning_rate": 1.3596126692528352e-05, + "loss": 1.8923, + "step": 14701000 + }, + { + "epoch": 72.84, + "learning_rate": 1.3594888106102269e-05, + "loss": 1.9179, + "step": 14701500 + }, + { + "epoch": 72.84, + "learning_rate": 1.3593649519676186e-05, + "loss": 1.9086, + "step": 14702000 + }, + { + "epoch": 72.84, + "learning_rate": 1.3592410933250103e-05, + "loss": 1.9159, + "step": 14702500 + }, + { + "epoch": 72.84, + "learning_rate": 1.3591172346824016e-05, + "loss": 1.9072, + "step": 14703000 + }, + { + "epoch": 72.85, + "learning_rate": 1.3589933760397933e-05, + "loss": 1.9218, + "step": 14703500 + }, + { + "epoch": 72.85, + "learning_rate": 1.3588695173971849e-05, + "loss": 1.9437, + "step": 14704000 + }, + { + "epoch": 72.85, + "learning_rate": 1.3587456587545765e-05, + "loss": 1.9359, + "step": 14704500 + }, + { + "epoch": 72.85, + "learning_rate": 1.3586218001119682e-05, + "loss": 1.9191, + "step": 14705000 + }, + { + "epoch": 72.86, + "learning_rate": 1.3584981891866453e-05, + "loss": 1.9191, + "step": 14705500 + }, + { + "epoch": 72.86, + "learning_rate": 1.358374578261322e-05, + "loss": 1.9273, + "step": 14706000 + }, + { + "epoch": 72.86, + "learning_rate": 1.3582507196187135e-05, + "loss": 1.9123, + "step": 14706500 + }, + { + "epoch": 72.86, + "learning_rate": 1.3581271086933906e-05, + "loss": 1.9283, + "step": 14707000 + }, + { + "epoch": 72.87, + "learning_rate": 1.3580032500507823e-05, + "loss": 1.9033, + "step": 14707500 + }, + { + "epoch": 72.87, + "learning_rate": 1.3578793914081736e-05, + "loss": 1.9109, + "step": 14708000 + }, + { + "epoch": 72.87, + "learning_rate": 1.3577555327655653e-05, + "loss": 1.9062, + "step": 14708500 + }, + { + "epoch": 72.87, + "learning_rate": 1.357631674122957e-05, + "loss": 1.8915, + "step": 14709000 + }, + { + "epoch": 72.88, + "learning_rate": 1.3575078154803485e-05, + "loss": 1.9494, + "step": 14709500 + }, + { + "epoch": 72.88, + "learning_rate": 1.3573839568377402e-05, + "loss": 1.8908, + "step": 14710000 + }, + { + "epoch": 72.88, + "learning_rate": 1.3572600981951319e-05, + "loss": 1.8915, + "step": 14710500 + }, + { + "epoch": 72.88, + "learning_rate": 1.3571362395525236e-05, + "loss": 1.934, + "step": 14711000 + }, + { + "epoch": 72.89, + "learning_rate": 1.3570123809099153e-05, + "loss": 1.9205, + "step": 14711500 + }, + { + "epoch": 72.89, + "learning_rate": 1.356888769984592e-05, + "loss": 1.9035, + "step": 14712000 + }, + { + "epoch": 72.89, + "learning_rate": 1.3567649113419837e-05, + "loss": 1.9158, + "step": 14712500 + }, + { + "epoch": 72.89, + "learning_rate": 1.3566410526993752e-05, + "loss": 1.8937, + "step": 14713000 + }, + { + "epoch": 72.9, + "learning_rate": 1.356517194056767e-05, + "loss": 1.9086, + "step": 14713500 + }, + { + "epoch": 72.9, + "learning_rate": 1.3563933354141586e-05, + "loss": 1.9272, + "step": 14714000 + }, + { + "epoch": 72.9, + "learning_rate": 1.3562699722061207e-05, + "loss": 1.9376, + "step": 14714500 + }, + { + "epoch": 72.9, + "learning_rate": 1.3561461135635122e-05, + "loss": 1.9278, + "step": 14715000 + }, + { + "epoch": 72.91, + "learning_rate": 1.3560222549209039e-05, + "loss": 1.8708, + "step": 14715500 + }, + { + "epoch": 72.91, + "learning_rate": 1.3558983962782956e-05, + "loss": 1.8789, + "step": 14716000 + }, + { + "epoch": 72.91, + "learning_rate": 1.3557745376356873e-05, + "loss": 1.8972, + "step": 14716500 + }, + { + "epoch": 72.91, + "learning_rate": 1.355650678993079e-05, + "loss": 1.9165, + "step": 14717000 + }, + { + "epoch": 72.92, + "learning_rate": 1.3555268203504703e-05, + "loss": 1.8854, + "step": 14717500 + }, + { + "epoch": 72.92, + "learning_rate": 1.355402961707862e-05, + "loss": 1.8979, + "step": 14718000 + }, + { + "epoch": 72.92, + "learning_rate": 1.3552793507825389e-05, + "loss": 1.9238, + "step": 14718500 + }, + { + "epoch": 72.92, + "learning_rate": 1.355155739857216e-05, + "loss": 1.9057, + "step": 14719000 + }, + { + "epoch": 72.93, + "learning_rate": 1.3550318812146073e-05, + "loss": 1.9315, + "step": 14719500 + }, + { + "epoch": 72.93, + "learning_rate": 1.354908022571999e-05, + "loss": 1.9189, + "step": 14720000 + }, + { + "epoch": 72.93, + "learning_rate": 1.3547841639293907e-05, + "loss": 1.9015, + "step": 14720500 + }, + { + "epoch": 72.93, + "learning_rate": 1.3546603052867824e-05, + "loss": 1.9198, + "step": 14721000 + }, + { + "epoch": 72.94, + "learning_rate": 1.354536446644174e-05, + "loss": 1.8968, + "step": 14721500 + }, + { + "epoch": 72.94, + "learning_rate": 1.354412835718851e-05, + "loss": 1.9218, + "step": 14722000 + }, + { + "epoch": 72.94, + "learning_rate": 1.3542889770762423e-05, + "loss": 1.9061, + "step": 14722500 + }, + { + "epoch": 72.94, + "learning_rate": 1.354165118433634e-05, + "loss": 1.8988, + "step": 14723000 + }, + { + "epoch": 72.95, + "learning_rate": 1.354041507508311e-05, + "loss": 1.921, + "step": 14723500 + }, + { + "epoch": 72.95, + "learning_rate": 1.3539176488657026e-05, + "loss": 1.8783, + "step": 14724000 + }, + { + "epoch": 72.95, + "learning_rate": 1.3537937902230943e-05, + "loss": 1.9005, + "step": 14724500 + }, + { + "epoch": 72.95, + "learning_rate": 1.353669931580486e-05, + "loss": 1.9031, + "step": 14725000 + }, + { + "epoch": 72.96, + "learning_rate": 1.3535460729378777e-05, + "loss": 1.9091, + "step": 14725500 + }, + { + "epoch": 72.96, + "learning_rate": 1.353422214295269e-05, + "loss": 1.9028, + "step": 14726000 + }, + { + "epoch": 72.96, + "learning_rate": 1.3532983556526607e-05, + "loss": 1.9019, + "step": 14726500 + }, + { + "epoch": 72.96, + "learning_rate": 1.3531744970100524e-05, + "loss": 1.9505, + "step": 14727000 + }, + { + "epoch": 72.97, + "learning_rate": 1.3530506383674441e-05, + "loss": 1.9172, + "step": 14727500 + }, + { + "epoch": 72.97, + "learning_rate": 1.3529267797248358e-05, + "loss": 1.8973, + "step": 14728000 + }, + { + "epoch": 72.97, + "learning_rate": 1.3528029210822273e-05, + "loss": 1.9118, + "step": 14728500 + }, + { + "epoch": 72.97, + "learning_rate": 1.352679062439619e-05, + "loss": 1.9017, + "step": 14729000 + }, + { + "epoch": 72.98, + "learning_rate": 1.3525552037970107e-05, + "loss": 1.8918, + "step": 14729500 + }, + { + "epoch": 72.98, + "learning_rate": 1.352431345154402e-05, + "loss": 1.9158, + "step": 14730000 + }, + { + "epoch": 72.98, + "learning_rate": 1.3523074865117937e-05, + "loss": 1.9134, + "step": 14730500 + }, + { + "epoch": 72.98, + "learning_rate": 1.3521838755864708e-05, + "loss": 1.9097, + "step": 14731000 + }, + { + "epoch": 72.98, + "learning_rate": 1.3520600169438625e-05, + "loss": 1.9187, + "step": 14731500 + }, + { + "epoch": 72.99, + "learning_rate": 1.351936158301254e-05, + "loss": 1.8902, + "step": 14732000 + }, + { + "epoch": 72.99, + "learning_rate": 1.3518125473759307e-05, + "loss": 1.9143, + "step": 14732500 + }, + { + "epoch": 72.99, + "learning_rate": 1.3516886887333224e-05, + "loss": 1.9187, + "step": 14733000 + }, + { + "epoch": 72.99, + "learning_rate": 1.3515648300907141e-05, + "loss": 1.9247, + "step": 14733500 + }, + { + "epoch": 73.0, + "learning_rate": 1.3514409714481058e-05, + "loss": 1.9172, + "step": 14734000 + }, + { + "epoch": 73.0, + "learning_rate": 1.3513171128054975e-05, + "loss": 1.9199, + "step": 14734500 + }, + { + "epoch": 73.0, + "eval_accuracy": 0.6786633732376618, + "eval_accuracy_mlm": 0.6390423833363579, + "eval_accuracy_nsp": 0.8653430551578881, + "eval_loss": 2.3007607460021973, + "eval_runtime": 146.763, + "eval_samples_per_second": 1737.215, + "eval_steps_per_second": 72.389, + "step": 14734539 + }, + { + "epoch": 73.0, + "learning_rate": 1.3511932541628892e-05, + "loss": 1.9016, + "step": 14735000 + }, + { + "epoch": 73.0, + "learning_rate": 1.3510693955202807e-05, + "loss": 1.8827, + "step": 14735500 + }, + { + "epoch": 73.01, + "learning_rate": 1.3509455368776724e-05, + "loss": 1.8933, + "step": 14736000 + }, + { + "epoch": 73.01, + "learning_rate": 1.3508216782350638e-05, + "loss": 1.9127, + "step": 14736500 + }, + { + "epoch": 73.01, + "learning_rate": 1.3506980673097408e-05, + "loss": 1.8965, + "step": 14737000 + }, + { + "epoch": 73.01, + "learning_rate": 1.3505742086671325e-05, + "loss": 1.8983, + "step": 14737500 + }, + { + "epoch": 73.02, + "learning_rate": 1.3504503500245242e-05, + "loss": 1.921, + "step": 14738000 + }, + { + "epoch": 73.02, + "learning_rate": 1.3503264913819159e-05, + "loss": 1.8979, + "step": 14738500 + }, + { + "epoch": 73.02, + "learning_rate": 1.3502026327393074e-05, + "loss": 1.8833, + "step": 14739000 + }, + { + "epoch": 73.02, + "learning_rate": 1.3500790218139841e-05, + "loss": 1.9025, + "step": 14739500 + }, + { + "epoch": 73.03, + "learning_rate": 1.3499554108886612e-05, + "loss": 1.8919, + "step": 14740000 + }, + { + "epoch": 73.03, + "learning_rate": 1.3498315522460529e-05, + "loss": 1.9102, + "step": 14740500 + }, + { + "epoch": 73.03, + "learning_rate": 1.3497076936034444e-05, + "loss": 1.8995, + "step": 14741000 + }, + { + "epoch": 73.03, + "learning_rate": 1.3495838349608359e-05, + "loss": 1.9217, + "step": 14741500 + }, + { + "epoch": 73.04, + "learning_rate": 1.3494599763182274e-05, + "loss": 1.8947, + "step": 14742000 + }, + { + "epoch": 73.04, + "learning_rate": 1.3493361176756191e-05, + "loss": 1.8732, + "step": 14742500 + }, + { + "epoch": 73.04, + "learning_rate": 1.3492122590330108e-05, + "loss": 1.8891, + "step": 14743000 + }, + { + "epoch": 73.04, + "learning_rate": 1.3490886481076879e-05, + "loss": 1.8883, + "step": 14743500 + }, + { + "epoch": 73.05, + "learning_rate": 1.3489647894650796e-05, + "loss": 1.891, + "step": 14744000 + }, + { + "epoch": 73.05, + "learning_rate": 1.3488409308224709e-05, + "loss": 1.8895, + "step": 14744500 + }, + { + "epoch": 73.05, + "learning_rate": 1.3487170721798626e-05, + "loss": 1.8845, + "step": 14745000 + }, + { + "epoch": 73.05, + "learning_rate": 1.3485932135372541e-05, + "loss": 1.9125, + "step": 14745500 + }, + { + "epoch": 73.06, + "learning_rate": 1.3484693548946458e-05, + "loss": 1.8991, + "step": 14746000 + }, + { + "epoch": 73.06, + "learning_rate": 1.3483454962520375e-05, + "loss": 1.9013, + "step": 14746500 + }, + { + "epoch": 73.06, + "learning_rate": 1.3482216376094292e-05, + "loss": 1.8967, + "step": 14747000 + }, + { + "epoch": 73.06, + "learning_rate": 1.3480977789668209e-05, + "loss": 1.9089, + "step": 14747500 + }, + { + "epoch": 73.07, + "learning_rate": 1.3479739203242126e-05, + "loss": 1.8929, + "step": 14748000 + }, + { + "epoch": 73.07, + "learning_rate": 1.3478500616816043e-05, + "loss": 1.8801, + "step": 14748500 + }, + { + "epoch": 73.07, + "learning_rate": 1.3477262030389956e-05, + "loss": 1.8824, + "step": 14749000 + }, + { + "epoch": 73.07, + "learning_rate": 1.3476023443963873e-05, + "loss": 1.8903, + "step": 14749500 + }, + { + "epoch": 73.08, + "learning_rate": 1.3474787334710642e-05, + "loss": 1.9032, + "step": 14750000 + }, + { + "epoch": 73.08, + "learning_rate": 1.3473548748284559e-05, + "loss": 1.9211, + "step": 14750500 + }, + { + "epoch": 73.08, + "learning_rate": 1.3472312639031326e-05, + "loss": 1.913, + "step": 14751000 + }, + { + "epoch": 73.08, + "learning_rate": 1.3471076529778095e-05, + "loss": 1.8918, + "step": 14751500 + }, + { + "epoch": 73.09, + "learning_rate": 1.3469840420524866e-05, + "loss": 1.9002, + "step": 14752000 + }, + { + "epoch": 73.09, + "learning_rate": 1.3468601834098779e-05, + "loss": 1.9027, + "step": 14752500 + }, + { + "epoch": 73.09, + "learning_rate": 1.3467363247672696e-05, + "loss": 1.8954, + "step": 14753000 + }, + { + "epoch": 73.09, + "learning_rate": 1.3466124661246613e-05, + "loss": 1.8731, + "step": 14753500 + }, + { + "epoch": 73.1, + "learning_rate": 1.346488607482053e-05, + "loss": 1.8677, + "step": 14754000 + }, + { + "epoch": 73.1, + "learning_rate": 1.3463647488394445e-05, + "loss": 1.9341, + "step": 14754500 + }, + { + "epoch": 73.1, + "learning_rate": 1.3462408901968362e-05, + "loss": 1.8904, + "step": 14755000 + }, + { + "epoch": 73.1, + "learning_rate": 1.3461170315542279e-05, + "loss": 1.9071, + "step": 14755500 + }, + { + "epoch": 73.11, + "learning_rate": 1.3459931729116196e-05, + "loss": 1.9075, + "step": 14756000 + }, + { + "epoch": 73.11, + "learning_rate": 1.3458693142690113e-05, + "loss": 1.9063, + "step": 14756500 + }, + { + "epoch": 73.11, + "learning_rate": 1.3457454556264026e-05, + "loss": 1.8718, + "step": 14757000 + }, + { + "epoch": 73.11, + "learning_rate": 1.3456215969837943e-05, + "loss": 1.8944, + "step": 14757500 + }, + { + "epoch": 73.12, + "learning_rate": 1.345497738341186e-05, + "loss": 1.9185, + "step": 14758000 + }, + { + "epoch": 73.12, + "learning_rate": 1.3453741274158629e-05, + "loss": 1.9017, + "step": 14758500 + }, + { + "epoch": 73.12, + "learning_rate": 1.3452502687732546e-05, + "loss": 1.882, + "step": 14759000 + }, + { + "epoch": 73.12, + "learning_rate": 1.3451264101306463e-05, + "loss": 1.8956, + "step": 14759500 + }, + { + "epoch": 73.13, + "learning_rate": 1.3450025514880376e-05, + "loss": 1.8974, + "step": 14760000 + }, + { + "epoch": 73.13, + "learning_rate": 1.3448786928454293e-05, + "loss": 1.8941, + "step": 14760500 + }, + { + "epoch": 73.13, + "learning_rate": 1.344754834202821e-05, + "loss": 1.8964, + "step": 14761000 + }, + { + "epoch": 73.13, + "learning_rate": 1.3446309755602127e-05, + "loss": 1.8935, + "step": 14761500 + }, + { + "epoch": 73.14, + "learning_rate": 1.3445073646348896e-05, + "loss": 1.9025, + "step": 14762000 + }, + { + "epoch": 73.14, + "learning_rate": 1.3443837537095663e-05, + "loss": 1.9172, + "step": 14762500 + }, + { + "epoch": 73.14, + "learning_rate": 1.344259895066958e-05, + "loss": 1.9038, + "step": 14763000 + }, + { + "epoch": 73.14, + "learning_rate": 1.3441360364243497e-05, + "loss": 1.9097, + "step": 14763500 + }, + { + "epoch": 73.15, + "learning_rate": 1.3440124254990266e-05, + "loss": 1.9116, + "step": 14764000 + }, + { + "epoch": 73.15, + "learning_rate": 1.3438885668564183e-05, + "loss": 1.9018, + "step": 14764500 + }, + { + "epoch": 73.15, + "learning_rate": 1.3437647082138096e-05, + "loss": 1.909, + "step": 14765000 + }, + { + "epoch": 73.15, + "learning_rate": 1.3436408495712013e-05, + "loss": 1.9111, + "step": 14765500 + }, + { + "epoch": 73.16, + "learning_rate": 1.343516990928593e-05, + "loss": 1.8798, + "step": 14766000 + }, + { + "epoch": 73.16, + "learning_rate": 1.3433931322859847e-05, + "loss": 1.9137, + "step": 14766500 + }, + { + "epoch": 73.16, + "learning_rate": 1.3432695213606616e-05, + "loss": 1.8859, + "step": 14767000 + }, + { + "epoch": 73.16, + "learning_rate": 1.3431456627180533e-05, + "loss": 1.8998, + "step": 14767500 + }, + { + "epoch": 73.17, + "learning_rate": 1.343021804075445e-05, + "loss": 1.9009, + "step": 14768000 + }, + { + "epoch": 73.17, + "learning_rate": 1.3428979454328363e-05, + "loss": 1.8833, + "step": 14768500 + }, + { + "epoch": 73.17, + "learning_rate": 1.342774086790228e-05, + "loss": 1.8965, + "step": 14769000 + }, + { + "epoch": 73.17, + "learning_rate": 1.3426502281476197e-05, + "loss": 1.8975, + "step": 14769500 + }, + { + "epoch": 73.18, + "learning_rate": 1.3425263695050114e-05, + "loss": 1.9083, + "step": 14770000 + }, + { + "epoch": 73.18, + "learning_rate": 1.3424027585796883e-05, + "loss": 1.8681, + "step": 14770500 + }, + { + "epoch": 73.18, + "learning_rate": 1.342279147654365e-05, + "loss": 1.8773, + "step": 14771000 + }, + { + "epoch": 73.18, + "learning_rate": 1.3421552890117567e-05, + "loss": 1.9283, + "step": 14771500 + }, + { + "epoch": 73.19, + "learning_rate": 1.3420314303691484e-05, + "loss": 1.8831, + "step": 14772000 + }, + { + "epoch": 73.19, + "learning_rate": 1.34190757172654e-05, + "loss": 1.9133, + "step": 14772500 + }, + { + "epoch": 73.19, + "learning_rate": 1.3417837130839318e-05, + "loss": 1.9094, + "step": 14773000 + }, + { + "epoch": 73.19, + "learning_rate": 1.3416598544413233e-05, + "loss": 1.8916, + "step": 14773500 + }, + { + "epoch": 73.2, + "learning_rate": 1.341536243516e-05, + "loss": 1.8821, + "step": 14774000 + }, + { + "epoch": 73.2, + "learning_rate": 1.3414123848733917e-05, + "loss": 1.9126, + "step": 14774500 + }, + { + "epoch": 73.2, + "learning_rate": 1.3412885262307834e-05, + "loss": 1.8855, + "step": 14775000 + }, + { + "epoch": 73.2, + "learning_rate": 1.341164667588175e-05, + "loss": 1.9146, + "step": 14775500 + }, + { + "epoch": 73.21, + "learning_rate": 1.3410408089455668e-05, + "loss": 1.919, + "step": 14776000 + }, + { + "epoch": 73.21, + "learning_rate": 1.3409169503029585e-05, + "loss": 1.9134, + "step": 14776500 + }, + { + "epoch": 73.21, + "learning_rate": 1.34079309166035e-05, + "loss": 1.879, + "step": 14777000 + }, + { + "epoch": 73.21, + "learning_rate": 1.3406692330177415e-05, + "loss": 1.911, + "step": 14777500 + }, + { + "epoch": 73.22, + "learning_rate": 1.340545374375133e-05, + "loss": 1.8912, + "step": 14778000 + }, + { + "epoch": 73.22, + "learning_rate": 1.3404215157325247e-05, + "loss": 1.9108, + "step": 14778500 + }, + { + "epoch": 73.22, + "learning_rate": 1.3402976570899164e-05, + "loss": 1.8929, + "step": 14779000 + }, + { + "epoch": 73.22, + "learning_rate": 1.3401740461645935e-05, + "loss": 1.9152, + "step": 14779500 + }, + { + "epoch": 73.23, + "learning_rate": 1.3400501875219852e-05, + "loss": 1.8739, + "step": 14780000 + }, + { + "epoch": 73.23, + "learning_rate": 1.3399263288793767e-05, + "loss": 1.8982, + "step": 14780500 + }, + { + "epoch": 73.23, + "learning_rate": 1.3398024702367682e-05, + "loss": 1.9081, + "step": 14781000 + }, + { + "epoch": 73.23, + "learning_rate": 1.3396786115941597e-05, + "loss": 1.9061, + "step": 14781500 + }, + { + "epoch": 73.24, + "learning_rate": 1.3395547529515514e-05, + "loss": 1.8929, + "step": 14782000 + }, + { + "epoch": 73.24, + "learning_rate": 1.3394308943089431e-05, + "loss": 1.9028, + "step": 14782500 + }, + { + "epoch": 73.24, + "learning_rate": 1.3393070356663348e-05, + "loss": 1.893, + "step": 14783000 + }, + { + "epoch": 73.24, + "learning_rate": 1.3391831770237265e-05, + "loss": 1.9335, + "step": 14783500 + }, + { + "epoch": 73.25, + "learning_rate": 1.3390593183811182e-05, + "loss": 1.9176, + "step": 14784000 + }, + { + "epoch": 73.25, + "learning_rate": 1.3389354597385099e-05, + "loss": 1.8932, + "step": 14784500 + }, + { + "epoch": 73.25, + "learning_rate": 1.3388118488131864e-05, + "loss": 1.9085, + "step": 14785000 + }, + { + "epoch": 73.25, + "learning_rate": 1.3386879901705781e-05, + "loss": 1.9023, + "step": 14785500 + }, + { + "epoch": 73.25, + "learning_rate": 1.3385641315279698e-05, + "loss": 1.9045, + "step": 14786000 + }, + { + "epoch": 73.26, + "learning_rate": 1.3384402728853615e-05, + "loss": 1.9121, + "step": 14786500 + }, + { + "epoch": 73.26, + "learning_rate": 1.3383164142427532e-05, + "loss": 1.8923, + "step": 14787000 + }, + { + "epoch": 73.26, + "learning_rate": 1.3381925556001449e-05, + "loss": 1.9061, + "step": 14787500 + }, + { + "epoch": 73.26, + "learning_rate": 1.3380686969575366e-05, + "loss": 1.8975, + "step": 14788000 + }, + { + "epoch": 73.27, + "learning_rate": 1.3379450860322131e-05, + "loss": 1.9116, + "step": 14788500 + }, + { + "epoch": 73.27, + "learning_rate": 1.3378212273896048e-05, + "loss": 1.8862, + "step": 14789000 + }, + { + "epoch": 73.27, + "learning_rate": 1.3376976164642819e-05, + "loss": 1.9062, + "step": 14789500 + }, + { + "epoch": 73.27, + "learning_rate": 1.3375737578216732e-05, + "loss": 1.9149, + "step": 14790000 + }, + { + "epoch": 73.28, + "learning_rate": 1.337449899179065e-05, + "loss": 1.8999, + "step": 14790500 + }, + { + "epoch": 73.28, + "learning_rate": 1.3373260405364566e-05, + "loss": 1.8802, + "step": 14791000 + }, + { + "epoch": 73.28, + "learning_rate": 1.3372021818938481e-05, + "loss": 1.8855, + "step": 14791500 + }, + { + "epoch": 73.28, + "learning_rate": 1.3370783232512398e-05, + "loss": 1.9, + "step": 14792000 + }, + { + "epoch": 73.29, + "learning_rate": 1.3369544646086315e-05, + "loss": 1.8854, + "step": 14792500 + }, + { + "epoch": 73.29, + "learning_rate": 1.3368306059660232e-05, + "loss": 1.908, + "step": 14793000 + }, + { + "epoch": 73.29, + "learning_rate": 1.3367067473234149e-05, + "loss": 1.8964, + "step": 14793500 + }, + { + "epoch": 73.29, + "learning_rate": 1.3365828886808066e-05, + "loss": 1.8901, + "step": 14794000 + }, + { + "epoch": 73.3, + "learning_rate": 1.3364592777554833e-05, + "loss": 1.8964, + "step": 14794500 + }, + { + "epoch": 73.3, + "learning_rate": 1.3363354191128748e-05, + "loss": 1.9066, + "step": 14795000 + }, + { + "epoch": 73.3, + "learning_rate": 1.3362115604702665e-05, + "loss": 1.9262, + "step": 14795500 + }, + { + "epoch": 73.3, + "learning_rate": 1.3360877018276582e-05, + "loss": 1.8893, + "step": 14796000 + }, + { + "epoch": 73.31, + "learning_rate": 1.335964090902335e-05, + "loss": 1.8957, + "step": 14796500 + }, + { + "epoch": 73.31, + "learning_rate": 1.3358402322597266e-05, + "loss": 1.897, + "step": 14797000 + }, + { + "epoch": 73.31, + "learning_rate": 1.3357163736171183e-05, + "loss": 1.895, + "step": 14797500 + }, + { + "epoch": 73.31, + "learning_rate": 1.33559251497451e-05, + "loss": 1.9072, + "step": 14798000 + }, + { + "epoch": 73.32, + "learning_rate": 1.3354686563319015e-05, + "loss": 1.9005, + "step": 14798500 + }, + { + "epoch": 73.32, + "learning_rate": 1.3353447976892932e-05, + "loss": 1.8824, + "step": 14799000 + }, + { + "epoch": 73.32, + "learning_rate": 1.335220939046685e-05, + "loss": 1.8929, + "step": 14799500 + }, + { + "epoch": 73.32, + "learning_rate": 1.3350970804040766e-05, + "loss": 1.91, + "step": 14800000 + }, + { + "epoch": 73.33, + "learning_rate": 1.3349732217614683e-05, + "loss": 1.8912, + "step": 14800500 + }, + { + "epoch": 73.33, + "learning_rate": 1.3348493631188597e-05, + "loss": 1.9261, + "step": 14801000 + }, + { + "epoch": 73.33, + "learning_rate": 1.3347255044762514e-05, + "loss": 1.8862, + "step": 14801500 + }, + { + "epoch": 73.33, + "learning_rate": 1.334601645833643e-05, + "loss": 1.9098, + "step": 14802000 + }, + { + "epoch": 73.34, + "learning_rate": 1.3344777871910347e-05, + "loss": 1.923, + "step": 14802500 + }, + { + "epoch": 73.34, + "learning_rate": 1.3343539285484263e-05, + "loss": 1.9164, + "step": 14803000 + }, + { + "epoch": 73.34, + "learning_rate": 1.334230069905818e-05, + "loss": 1.8899, + "step": 14803500 + }, + { + "epoch": 73.34, + "learning_rate": 1.3341062112632096e-05, + "loss": 1.9058, + "step": 14804000 + }, + { + "epoch": 73.35, + "learning_rate": 1.3339826003378864e-05, + "loss": 1.8901, + "step": 14804500 + }, + { + "epoch": 73.35, + "learning_rate": 1.333858741695278e-05, + "loss": 1.8942, + "step": 14805000 + }, + { + "epoch": 73.35, + "learning_rate": 1.3337348830526697e-05, + "loss": 1.8598, + "step": 14805500 + }, + { + "epoch": 73.35, + "learning_rate": 1.3336110244100614e-05, + "loss": 1.8739, + "step": 14806000 + }, + { + "epoch": 73.36, + "learning_rate": 1.3334874134847383e-05, + "loss": 1.9049, + "step": 14806500 + }, + { + "epoch": 73.36, + "learning_rate": 1.3333635548421297e-05, + "loss": 1.9057, + "step": 14807000 + }, + { + "epoch": 73.36, + "learning_rate": 1.3332396961995214e-05, + "loss": 1.9019, + "step": 14807500 + }, + { + "epoch": 73.36, + "learning_rate": 1.333115837556913e-05, + "loss": 1.902, + "step": 14808000 + }, + { + "epoch": 73.37, + "learning_rate": 1.3329919789143048e-05, + "loss": 1.9046, + "step": 14808500 + }, + { + "epoch": 73.37, + "learning_rate": 1.3328681202716964e-05, + "loss": 1.8886, + "step": 14809000 + }, + { + "epoch": 73.37, + "learning_rate": 1.3327442616290881e-05, + "loss": 1.9122, + "step": 14809500 + }, + { + "epoch": 73.37, + "learning_rate": 1.3326204029864797e-05, + "loss": 1.8967, + "step": 14810000 + }, + { + "epoch": 73.38, + "learning_rate": 1.3324967920611564e-05, + "loss": 1.8879, + "step": 14810500 + }, + { + "epoch": 73.38, + "learning_rate": 1.332372933418548e-05, + "loss": 1.9026, + "step": 14811000 + }, + { + "epoch": 73.38, + "learning_rate": 1.3322490747759398e-05, + "loss": 1.9167, + "step": 14811500 + }, + { + "epoch": 73.38, + "learning_rate": 1.3321252161333314e-05, + "loss": 1.9076, + "step": 14812000 + }, + { + "epoch": 73.39, + "learning_rate": 1.3320013574907231e-05, + "loss": 1.8855, + "step": 14812500 + }, + { + "epoch": 73.39, + "learning_rate": 1.3318774988481147e-05, + "loss": 1.9126, + "step": 14813000 + }, + { + "epoch": 73.39, + "learning_rate": 1.3317538879227914e-05, + "loss": 1.8897, + "step": 14813500 + }, + { + "epoch": 73.39, + "learning_rate": 1.3316302769974684e-05, + "loss": 1.9086, + "step": 14814000 + }, + { + "epoch": 73.4, + "learning_rate": 1.3315066660721453e-05, + "loss": 1.9206, + "step": 14814500 + }, + { + "epoch": 73.4, + "learning_rate": 1.3313828074295368e-05, + "loss": 1.9222, + "step": 14815000 + }, + { + "epoch": 73.4, + "learning_rate": 1.3312589487869284e-05, + "loss": 1.9143, + "step": 14815500 + }, + { + "epoch": 73.4, + "learning_rate": 1.33113509014432e-05, + "loss": 1.8893, + "step": 14816000 + }, + { + "epoch": 73.41, + "learning_rate": 1.3310112315017117e-05, + "loss": 1.9053, + "step": 14816500 + }, + { + "epoch": 73.41, + "learning_rate": 1.3308873728591034e-05, + "loss": 1.9253, + "step": 14817000 + }, + { + "epoch": 73.41, + "learning_rate": 1.3307635142164951e-05, + "loss": 1.909, + "step": 14817500 + }, + { + "epoch": 73.41, + "learning_rate": 1.3306396555738868e-05, + "loss": 1.9147, + "step": 14818000 + }, + { + "epoch": 73.42, + "learning_rate": 1.3305157969312783e-05, + "loss": 1.9248, + "step": 14818500 + }, + { + "epoch": 73.42, + "learning_rate": 1.33039193828867e-05, + "loss": 1.9053, + "step": 14819000 + }, + { + "epoch": 73.42, + "learning_rate": 1.3302680796460616e-05, + "loss": 1.8883, + "step": 14819500 + }, + { + "epoch": 73.42, + "learning_rate": 1.330144221003453e-05, + "loss": 1.8947, + "step": 14820000 + }, + { + "epoch": 73.43, + "learning_rate": 1.3300203623608448e-05, + "loss": 1.9072, + "step": 14820500 + }, + { + "epoch": 73.43, + "learning_rate": 1.3298967514355218e-05, + "loss": 1.9077, + "step": 14821000 + }, + { + "epoch": 73.43, + "learning_rate": 1.3297728927929135e-05, + "loss": 1.8932, + "step": 14821500 + }, + { + "epoch": 73.43, + "learning_rate": 1.329649034150305e-05, + "loss": 1.919, + "step": 14822000 + }, + { + "epoch": 73.44, + "learning_rate": 1.3295254232249818e-05, + "loss": 1.9154, + "step": 14822500 + }, + { + "epoch": 73.44, + "learning_rate": 1.3294015645823734e-05, + "loss": 1.8963, + "step": 14823000 + }, + { + "epoch": 73.44, + "learning_rate": 1.3292777059397651e-05, + "loss": 1.9062, + "step": 14823500 + }, + { + "epoch": 73.44, + "learning_rate": 1.3291538472971568e-05, + "loss": 1.9127, + "step": 14824000 + }, + { + "epoch": 73.45, + "learning_rate": 1.3290299886545485e-05, + "loss": 1.9234, + "step": 14824500 + }, + { + "epoch": 73.45, + "learning_rate": 1.3289061300119402e-05, + "loss": 1.9137, + "step": 14825000 + }, + { + "epoch": 73.45, + "learning_rate": 1.3287822713693317e-05, + "loss": 1.9036, + "step": 14825500 + }, + { + "epoch": 73.45, + "learning_rate": 1.3286584127267233e-05, + "loss": 1.9249, + "step": 14826000 + }, + { + "epoch": 73.46, + "learning_rate": 1.3285345540841148e-05, + "loss": 1.8954, + "step": 14826500 + }, + { + "epoch": 73.46, + "learning_rate": 1.3284106954415065e-05, + "loss": 1.8974, + "step": 14827000 + }, + { + "epoch": 73.46, + "learning_rate": 1.3282868367988982e-05, + "loss": 1.911, + "step": 14827500 + }, + { + "epoch": 73.46, + "learning_rate": 1.3281629781562899e-05, + "loss": 1.9289, + "step": 14828000 + }, + { + "epoch": 73.47, + "learning_rate": 1.328039367230967e-05, + "loss": 1.9012, + "step": 14828500 + }, + { + "epoch": 73.47, + "learning_rate": 1.3279155085883583e-05, + "loss": 1.9238, + "step": 14829000 + }, + { + "epoch": 73.47, + "learning_rate": 1.3277918976630352e-05, + "loss": 1.8966, + "step": 14829500 + }, + { + "epoch": 73.47, + "learning_rate": 1.3276680390204268e-05, + "loss": 1.9044, + "step": 14830000 + }, + { + "epoch": 73.48, + "learning_rate": 1.3275441803778185e-05, + "loss": 1.8882, + "step": 14830500 + }, + { + "epoch": 73.48, + "learning_rate": 1.3274203217352102e-05, + "loss": 1.9298, + "step": 14831000 + }, + { + "epoch": 73.48, + "learning_rate": 1.327296710809887e-05, + "loss": 1.9091, + "step": 14831500 + }, + { + "epoch": 73.48, + "learning_rate": 1.3271728521672785e-05, + "loss": 1.8959, + "step": 14832000 + }, + { + "epoch": 73.49, + "learning_rate": 1.3270489935246702e-05, + "loss": 1.9056, + "step": 14832500 + }, + { + "epoch": 73.49, + "learning_rate": 1.3269251348820619e-05, + "loss": 1.8934, + "step": 14833000 + }, + { + "epoch": 73.49, + "learning_rate": 1.3268015239567389e-05, + "loss": 1.91, + "step": 14833500 + }, + { + "epoch": 73.49, + "learning_rate": 1.3266776653141303e-05, + "loss": 1.8854, + "step": 14834000 + }, + { + "epoch": 73.5, + "learning_rate": 1.326553806671522e-05, + "loss": 1.9027, + "step": 14834500 + }, + { + "epoch": 73.5, + "learning_rate": 1.3264301957461988e-05, + "loss": 1.9102, + "step": 14835000 + }, + { + "epoch": 73.5, + "learning_rate": 1.3263063371035905e-05, + "loss": 1.9147, + "step": 14835500 + }, + { + "epoch": 73.5, + "learning_rate": 1.3261824784609822e-05, + "loss": 1.8909, + "step": 14836000 + }, + { + "epoch": 73.51, + "learning_rate": 1.3260586198183739e-05, + "loss": 1.9213, + "step": 14836500 + }, + { + "epoch": 73.51, + "learning_rate": 1.3259347611757653e-05, + "loss": 1.9138, + "step": 14837000 + }, + { + "epoch": 73.51, + "learning_rate": 1.325810902533157e-05, + "loss": 1.8989, + "step": 14837500 + }, + { + "epoch": 73.51, + "learning_rate": 1.3256870438905486e-05, + "loss": 1.9105, + "step": 14838000 + }, + { + "epoch": 73.52, + "learning_rate": 1.3255631852479403e-05, + "loss": 1.892, + "step": 14838500 + }, + { + "epoch": 73.52, + "learning_rate": 1.3254393266053319e-05, + "loss": 1.8926, + "step": 14839000 + }, + { + "epoch": 73.52, + "learning_rate": 1.3253154679627236e-05, + "loss": 1.9135, + "step": 14839500 + }, + { + "epoch": 73.52, + "learning_rate": 1.3251916093201152e-05, + "loss": 1.9006, + "step": 14840000 + }, + { + "epoch": 73.52, + "learning_rate": 1.325067750677507e-05, + "loss": 1.8969, + "step": 14840500 + }, + { + "epoch": 73.53, + "learning_rate": 1.3249438920348986e-05, + "loss": 1.9059, + "step": 14841000 + }, + { + "epoch": 73.53, + "learning_rate": 1.32482003339229e-05, + "loss": 1.9263, + "step": 14841500 + }, + { + "epoch": 73.53, + "learning_rate": 1.3246961747496817e-05, + "loss": 1.9041, + "step": 14842000 + }, + { + "epoch": 73.53, + "learning_rate": 1.3245723161070734e-05, + "loss": 1.8986, + "step": 14842500 + }, + { + "epoch": 73.54, + "learning_rate": 1.3244487051817503e-05, + "loss": 1.9127, + "step": 14843000 + }, + { + "epoch": 73.54, + "learning_rate": 1.324325094256427e-05, + "loss": 1.9008, + "step": 14843500 + }, + { + "epoch": 73.54, + "learning_rate": 1.3242012356138187e-05, + "loss": 1.9024, + "step": 14844000 + }, + { + "epoch": 73.54, + "learning_rate": 1.3240773769712104e-05, + "loss": 1.8962, + "step": 14844500 + }, + { + "epoch": 73.55, + "learning_rate": 1.323953518328602e-05, + "loss": 1.9004, + "step": 14845000 + }, + { + "epoch": 73.55, + "learning_rate": 1.323829907403279e-05, + "loss": 1.9171, + "step": 14845500 + }, + { + "epoch": 73.55, + "learning_rate": 1.3237060487606706e-05, + "loss": 1.9187, + "step": 14846000 + }, + { + "epoch": 73.55, + "learning_rate": 1.3235824378353473e-05, + "loss": 1.9055, + "step": 14846500 + }, + { + "epoch": 73.56, + "learning_rate": 1.323458579192739e-05, + "loss": 1.8982, + "step": 14847000 + }, + { + "epoch": 73.56, + "learning_rate": 1.3233347205501307e-05, + "loss": 1.8905, + "step": 14847500 + }, + { + "epoch": 73.56, + "learning_rate": 1.3232108619075222e-05, + "loss": 1.8901, + "step": 14848000 + }, + { + "epoch": 73.56, + "learning_rate": 1.323087003264914e-05, + "loss": 1.9077, + "step": 14848500 + }, + { + "epoch": 73.57, + "learning_rate": 1.3229631446223056e-05, + "loss": 1.8969, + "step": 14849000 + }, + { + "epoch": 73.57, + "learning_rate": 1.322839285979697e-05, + "loss": 1.9003, + "step": 14849500 + }, + { + "epoch": 73.57, + "learning_rate": 1.3227154273370887e-05, + "loss": 1.8903, + "step": 14850000 + }, + { + "epoch": 73.57, + "learning_rate": 1.3225915686944804e-05, + "loss": 1.9066, + "step": 14850500 + }, + { + "epoch": 73.58, + "learning_rate": 1.322467710051872e-05, + "loss": 1.9145, + "step": 14851000 + }, + { + "epoch": 73.58, + "learning_rate": 1.322344099126549e-05, + "loss": 1.9085, + "step": 14851500 + }, + { + "epoch": 73.58, + "learning_rate": 1.3222202404839406e-05, + "loss": 1.9179, + "step": 14852000 + }, + { + "epoch": 73.58, + "learning_rate": 1.322096381841332e-05, + "loss": 1.8957, + "step": 14852500 + }, + { + "epoch": 73.59, + "learning_rate": 1.3219725231987237e-05, + "loss": 1.8817, + "step": 14853000 + }, + { + "epoch": 73.59, + "learning_rate": 1.3218486645561154e-05, + "loss": 1.9203, + "step": 14853500 + }, + { + "epoch": 73.59, + "learning_rate": 1.321724805913507e-05, + "loss": 1.9124, + "step": 14854000 + }, + { + "epoch": 73.59, + "learning_rate": 1.3216009472708988e-05, + "loss": 1.8968, + "step": 14854500 + }, + { + "epoch": 73.6, + "learning_rate": 1.3214770886282904e-05, + "loss": 1.8992, + "step": 14855000 + }, + { + "epoch": 73.6, + "learning_rate": 1.3213532299856821e-05, + "loss": 1.9269, + "step": 14855500 + }, + { + "epoch": 73.6, + "learning_rate": 1.3212296190603587e-05, + "loss": 1.9111, + "step": 14856000 + }, + { + "epoch": 73.6, + "learning_rate": 1.3211057604177504e-05, + "loss": 1.8771, + "step": 14856500 + }, + { + "epoch": 73.61, + "learning_rate": 1.3209821494924274e-05, + "loss": 1.8956, + "step": 14857000 + }, + { + "epoch": 73.61, + "learning_rate": 1.3208582908498191e-05, + "loss": 1.8973, + "step": 14857500 + }, + { + "epoch": 73.61, + "learning_rate": 1.3207344322072106e-05, + "loss": 1.9026, + "step": 14858000 + }, + { + "epoch": 73.61, + "learning_rate": 1.3206105735646023e-05, + "loss": 1.9163, + "step": 14858500 + }, + { + "epoch": 73.62, + "learning_rate": 1.3204867149219937e-05, + "loss": 1.8891, + "step": 14859000 + }, + { + "epoch": 73.62, + "learning_rate": 1.3203628562793854e-05, + "loss": 1.9281, + "step": 14859500 + }, + { + "epoch": 73.62, + "learning_rate": 1.3202392453540624e-05, + "loss": 1.9303, + "step": 14860000 + }, + { + "epoch": 73.62, + "learning_rate": 1.3201153867114541e-05, + "loss": 1.9037, + "step": 14860500 + }, + { + "epoch": 73.63, + "learning_rate": 1.3199915280688458e-05, + "loss": 1.9116, + "step": 14861000 + }, + { + "epoch": 73.63, + "learning_rate": 1.3198676694262373e-05, + "loss": 1.9044, + "step": 14861500 + }, + { + "epoch": 73.63, + "learning_rate": 1.3197438107836289e-05, + "loss": 1.9111, + "step": 14862000 + }, + { + "epoch": 73.63, + "learning_rate": 1.3196199521410204e-05, + "loss": 1.9101, + "step": 14862500 + }, + { + "epoch": 73.64, + "learning_rate": 1.319496093498412e-05, + "loss": 1.8967, + "step": 14863000 + }, + { + "epoch": 73.64, + "learning_rate": 1.3193722348558038e-05, + "loss": 1.9044, + "step": 14863500 + }, + { + "epoch": 73.64, + "learning_rate": 1.3192483762131955e-05, + "loss": 1.8934, + "step": 14864000 + }, + { + "epoch": 73.64, + "learning_rate": 1.3191247652878723e-05, + "loss": 1.914, + "step": 14864500 + }, + { + "epoch": 73.65, + "learning_rate": 1.3190009066452639e-05, + "loss": 1.8815, + "step": 14865000 + }, + { + "epoch": 73.65, + "learning_rate": 1.3188770480026556e-05, + "loss": 1.8958, + "step": 14865500 + }, + { + "epoch": 73.65, + "learning_rate": 1.3187531893600471e-05, + "loss": 1.9167, + "step": 14866000 + }, + { + "epoch": 73.65, + "learning_rate": 1.3186293307174388e-05, + "loss": 1.9085, + "step": 14866500 + }, + { + "epoch": 73.66, + "learning_rate": 1.3185054720748305e-05, + "loss": 1.8841, + "step": 14867000 + }, + { + "epoch": 73.66, + "learning_rate": 1.3183816134322222e-05, + "loss": 1.8942, + "step": 14867500 + }, + { + "epoch": 73.66, + "learning_rate": 1.3182577547896139e-05, + "loss": 1.9201, + "step": 14868000 + }, + { + "epoch": 73.66, + "learning_rate": 1.3181338961470056e-05, + "loss": 1.9034, + "step": 14868500 + }, + { + "epoch": 73.67, + "learning_rate": 1.3180100375043972e-05, + "loss": 1.8958, + "step": 14869000 + }, + { + "epoch": 73.67, + "learning_rate": 1.3178864265790738e-05, + "loss": 1.9082, + "step": 14869500 + }, + { + "epoch": 73.67, + "learning_rate": 1.3177625679364655e-05, + "loss": 1.9132, + "step": 14870000 + }, + { + "epoch": 73.67, + "learning_rate": 1.3176387092938572e-05, + "loss": 1.8908, + "step": 14870500 + }, + { + "epoch": 73.68, + "learning_rate": 1.3175148506512489e-05, + "loss": 1.9058, + "step": 14871000 + }, + { + "epoch": 73.68, + "learning_rate": 1.3173909920086406e-05, + "loss": 1.9065, + "step": 14871500 + }, + { + "epoch": 73.68, + "learning_rate": 1.3172673810833173e-05, + "loss": 1.9223, + "step": 14872000 + }, + { + "epoch": 73.68, + "learning_rate": 1.3171435224407088e-05, + "loss": 1.9011, + "step": 14872500 + }, + { + "epoch": 73.69, + "learning_rate": 1.3170196637981005e-05, + "loss": 1.9296, + "step": 14873000 + }, + { + "epoch": 73.69, + "learning_rate": 1.3168958051554922e-05, + "loss": 1.9243, + "step": 14873500 + }, + { + "epoch": 73.69, + "learning_rate": 1.3167719465128839e-05, + "loss": 1.9192, + "step": 14874000 + }, + { + "epoch": 73.69, + "learning_rate": 1.3166480878702756e-05, + "loss": 1.9214, + "step": 14874500 + }, + { + "epoch": 73.7, + "learning_rate": 1.3165242292276673e-05, + "loss": 1.92, + "step": 14875000 + }, + { + "epoch": 73.7, + "learning_rate": 1.316400618302344e-05, + "loss": 1.9141, + "step": 14875500 + }, + { + "epoch": 73.7, + "learning_rate": 1.3162767596597355e-05, + "loss": 1.9212, + "step": 14876000 + }, + { + "epoch": 73.7, + "learning_rate": 1.3161529010171272e-05, + "loss": 1.9169, + "step": 14876500 + }, + { + "epoch": 73.71, + "learning_rate": 1.3160292900918042e-05, + "loss": 1.9093, + "step": 14877000 + }, + { + "epoch": 73.71, + "learning_rate": 1.3159054314491956e-05, + "loss": 1.8942, + "step": 14877500 + }, + { + "epoch": 73.71, + "learning_rate": 1.3157815728065873e-05, + "loss": 1.9231, + "step": 14878000 + }, + { + "epoch": 73.71, + "learning_rate": 1.315657714163979e-05, + "loss": 1.9054, + "step": 14878500 + }, + { + "epoch": 73.72, + "learning_rate": 1.3155338555213707e-05, + "loss": 1.8933, + "step": 14879000 + }, + { + "epoch": 73.72, + "learning_rate": 1.3154099968787622e-05, + "loss": 1.9204, + "step": 14879500 + }, + { + "epoch": 73.72, + "learning_rate": 1.3152863859534392e-05, + "loss": 1.9036, + "step": 14880000 + }, + { + "epoch": 73.72, + "learning_rate": 1.315162527310831e-05, + "loss": 1.9177, + "step": 14880500 + }, + { + "epoch": 73.73, + "learning_rate": 1.3150386686682223e-05, + "loss": 1.9114, + "step": 14881000 + }, + { + "epoch": 73.73, + "learning_rate": 1.314914810025614e-05, + "loss": 1.8947, + "step": 14881500 + }, + { + "epoch": 73.73, + "learning_rate": 1.3147909513830057e-05, + "loss": 1.9122, + "step": 14882000 + }, + { + "epoch": 73.73, + "learning_rate": 1.3146670927403974e-05, + "loss": 1.9289, + "step": 14882500 + }, + { + "epoch": 73.74, + "learning_rate": 1.3145432340977889e-05, + "loss": 1.9206, + "step": 14883000 + }, + { + "epoch": 73.74, + "learning_rate": 1.3144193754551806e-05, + "loss": 1.9003, + "step": 14883500 + }, + { + "epoch": 73.74, + "learning_rate": 1.3142955168125723e-05, + "loss": 1.8715, + "step": 14884000 + }, + { + "epoch": 73.74, + "learning_rate": 1.314171658169964e-05, + "loss": 1.8933, + "step": 14884500 + }, + { + "epoch": 73.75, + "learning_rate": 1.3140482949619259e-05, + "loss": 1.9229, + "step": 14885000 + }, + { + "epoch": 73.75, + "learning_rate": 1.3139244363193176e-05, + "loss": 1.8911, + "step": 14885500 + }, + { + "epoch": 73.75, + "learning_rate": 1.3138005776767093e-05, + "loss": 1.8953, + "step": 14886000 + }, + { + "epoch": 73.75, + "learning_rate": 1.313676719034101e-05, + "loss": 1.899, + "step": 14886500 + }, + { + "epoch": 73.76, + "learning_rate": 1.3135531081087777e-05, + "loss": 1.8974, + "step": 14887000 + }, + { + "epoch": 73.76, + "learning_rate": 1.3134292494661694e-05, + "loss": 1.9094, + "step": 14887500 + }, + { + "epoch": 73.76, + "learning_rate": 1.313305390823561e-05, + "loss": 1.922, + "step": 14888000 + }, + { + "epoch": 73.76, + "learning_rate": 1.3131815321809526e-05, + "loss": 1.8945, + "step": 14888500 + }, + { + "epoch": 73.77, + "learning_rate": 1.3130576735383443e-05, + "loss": 1.9176, + "step": 14889000 + }, + { + "epoch": 73.77, + "learning_rate": 1.312933814895736e-05, + "loss": 1.9276, + "step": 14889500 + }, + { + "epoch": 73.77, + "learning_rate": 1.3128099562531273e-05, + "loss": 1.9333, + "step": 14890000 + }, + { + "epoch": 73.77, + "learning_rate": 1.312686097610519e-05, + "loss": 1.9141, + "step": 14890500 + }, + { + "epoch": 73.78, + "learning_rate": 1.3125622389679107e-05, + "loss": 1.9036, + "step": 14891000 + }, + { + "epoch": 73.78, + "learning_rate": 1.3124383803253024e-05, + "loss": 1.8887, + "step": 14891500 + }, + { + "epoch": 73.78, + "learning_rate": 1.312314521682694e-05, + "loss": 1.8995, + "step": 14892000 + }, + { + "epoch": 73.78, + "learning_rate": 1.312190910757371e-05, + "loss": 1.9167, + "step": 14892500 + }, + { + "epoch": 73.79, + "learning_rate": 1.3120670521147627e-05, + "loss": 1.8809, + "step": 14893000 + }, + { + "epoch": 73.79, + "learning_rate": 1.311943193472154e-05, + "loss": 1.8938, + "step": 14893500 + }, + { + "epoch": 73.79, + "learning_rate": 1.311819582546831e-05, + "loss": 1.8908, + "step": 14894000 + }, + { + "epoch": 73.79, + "learning_rate": 1.3116957239042227e-05, + "loss": 1.9075, + "step": 14894500 + }, + { + "epoch": 73.79, + "learning_rate": 1.3115718652616143e-05, + "loss": 1.9172, + "step": 14895000 + }, + { + "epoch": 73.8, + "learning_rate": 1.311448006619006e-05, + "loss": 1.9124, + "step": 14895500 + }, + { + "epoch": 73.8, + "learning_rate": 1.3113241479763977e-05, + "loss": 1.9057, + "step": 14896000 + }, + { + "epoch": 73.8, + "learning_rate": 1.311200289333789e-05, + "loss": 1.8984, + "step": 14896500 + }, + { + "epoch": 73.8, + "learning_rate": 1.3110764306911807e-05, + "loss": 1.9009, + "step": 14897000 + }, + { + "epoch": 73.81, + "learning_rate": 1.3109525720485724e-05, + "loss": 1.9111, + "step": 14897500 + }, + { + "epoch": 73.81, + "learning_rate": 1.3108287134059641e-05, + "loss": 1.9035, + "step": 14898000 + }, + { + "epoch": 73.81, + "learning_rate": 1.3107048547633558e-05, + "loss": 1.8997, + "step": 14898500 + }, + { + "epoch": 73.81, + "learning_rate": 1.3105809961207475e-05, + "loss": 1.9224, + "step": 14899000 + }, + { + "epoch": 73.82, + "learning_rate": 1.3104571374781392e-05, + "loss": 1.9067, + "step": 14899500 + }, + { + "epoch": 73.82, + "learning_rate": 1.3103335265528157e-05, + "loss": 1.9141, + "step": 14900000 + }, + { + "epoch": 73.82, + "learning_rate": 1.3102096679102074e-05, + "loss": 1.9033, + "step": 14900500 + }, + { + "epoch": 73.82, + "learning_rate": 1.3100860569848845e-05, + "loss": 1.9102, + "step": 14901000 + }, + { + "epoch": 73.83, + "learning_rate": 1.3099621983422761e-05, + "loss": 1.9167, + "step": 14901500 + }, + { + "epoch": 73.83, + "learning_rate": 1.3098383396996677e-05, + "loss": 1.9218, + "step": 14902000 + }, + { + "epoch": 73.83, + "learning_rate": 1.3097144810570592e-05, + "loss": 1.9019, + "step": 14902500 + }, + { + "epoch": 73.83, + "learning_rate": 1.3095906224144507e-05, + "loss": 1.9129, + "step": 14903000 + }, + { + "epoch": 73.84, + "learning_rate": 1.3094667637718424e-05, + "loss": 1.9093, + "step": 14903500 + }, + { + "epoch": 73.84, + "learning_rate": 1.3093429051292341e-05, + "loss": 1.9109, + "step": 14904000 + }, + { + "epoch": 73.84, + "learning_rate": 1.3092192942039112e-05, + "loss": 1.9081, + "step": 14904500 + }, + { + "epoch": 73.84, + "learning_rate": 1.3090954355613028e-05, + "loss": 1.8939, + "step": 14905000 + }, + { + "epoch": 73.85, + "learning_rate": 1.3089715769186944e-05, + "loss": 1.8847, + "step": 14905500 + }, + { + "epoch": 73.85, + "learning_rate": 1.3088477182760859e-05, + "loss": 1.9128, + "step": 14906000 + }, + { + "epoch": 73.85, + "learning_rate": 1.3087238596334774e-05, + "loss": 1.9099, + "step": 14906500 + }, + { + "epoch": 73.85, + "learning_rate": 1.3086000009908691e-05, + "loss": 1.9298, + "step": 14907000 + }, + { + "epoch": 73.86, + "learning_rate": 1.3084761423482608e-05, + "loss": 1.9046, + "step": 14907500 + }, + { + "epoch": 73.86, + "learning_rate": 1.3083522837056525e-05, + "loss": 1.9081, + "step": 14908000 + }, + { + "epoch": 73.86, + "learning_rate": 1.3082284250630442e-05, + "loss": 1.9089, + "step": 14908500 + }, + { + "epoch": 73.86, + "learning_rate": 1.3081045664204359e-05, + "loss": 1.8965, + "step": 14909000 + }, + { + "epoch": 73.87, + "learning_rate": 1.3079807077778276e-05, + "loss": 1.9131, + "step": 14909500 + }, + { + "epoch": 73.87, + "learning_rate": 1.3078570968525041e-05, + "loss": 1.9042, + "step": 14910000 + }, + { + "epoch": 73.87, + "learning_rate": 1.3077334859271812e-05, + "loss": 1.9014, + "step": 14910500 + }, + { + "epoch": 73.87, + "learning_rate": 1.3076098750018579e-05, + "loss": 1.9366, + "step": 14911000 + }, + { + "epoch": 73.88, + "learning_rate": 1.3074860163592496e-05, + "loss": 1.9019, + "step": 14911500 + }, + { + "epoch": 73.88, + "learning_rate": 1.3073624054339265e-05, + "loss": 1.9353, + "step": 14912000 + }, + { + "epoch": 73.88, + "learning_rate": 1.3072385467913181e-05, + "loss": 1.8942, + "step": 14912500 + }, + { + "epoch": 73.88, + "learning_rate": 1.3071146881487098e-05, + "loss": 1.9014, + "step": 14913000 + }, + { + "epoch": 73.89, + "learning_rate": 1.3069908295061015e-05, + "loss": 1.9115, + "step": 14913500 + }, + { + "epoch": 73.89, + "learning_rate": 1.3068669708634929e-05, + "loss": 1.9222, + "step": 14914000 + }, + { + "epoch": 73.89, + "learning_rate": 1.3067431122208846e-05, + "loss": 1.9054, + "step": 14914500 + }, + { + "epoch": 73.89, + "learning_rate": 1.3066192535782763e-05, + "loss": 1.9253, + "step": 14915000 + }, + { + "epoch": 73.9, + "learning_rate": 1.3064953949356678e-05, + "loss": 1.9166, + "step": 14915500 + }, + { + "epoch": 73.9, + "learning_rate": 1.3063715362930595e-05, + "loss": 1.9145, + "step": 14916000 + }, + { + "epoch": 73.9, + "learning_rate": 1.3062476776504512e-05, + "loss": 1.8994, + "step": 14916500 + }, + { + "epoch": 73.9, + "learning_rate": 1.3061240667251279e-05, + "loss": 1.9116, + "step": 14917000 + }, + { + "epoch": 73.91, + "learning_rate": 1.3060004557998048e-05, + "loss": 1.8883, + "step": 14917500 + }, + { + "epoch": 73.91, + "learning_rate": 1.3058765971571965e-05, + "loss": 1.9001, + "step": 14918000 + }, + { + "epoch": 73.91, + "learning_rate": 1.3057527385145882e-05, + "loss": 1.9153, + "step": 14918500 + }, + { + "epoch": 73.91, + "learning_rate": 1.3056288798719798e-05, + "loss": 1.9255, + "step": 14919000 + }, + { + "epoch": 73.92, + "learning_rate": 1.3055050212293715e-05, + "loss": 1.9014, + "step": 14919500 + }, + { + "epoch": 73.92, + "learning_rate": 1.3053811625867629e-05, + "loss": 1.9161, + "step": 14920000 + }, + { + "epoch": 73.92, + "learning_rate": 1.3052573039441546e-05, + "loss": 1.8993, + "step": 14920500 + }, + { + "epoch": 73.92, + "learning_rate": 1.3051334453015463e-05, + "loss": 1.9019, + "step": 14921000 + }, + { + "epoch": 73.93, + "learning_rate": 1.305009586658938e-05, + "loss": 1.89, + "step": 14921500 + }, + { + "epoch": 73.93, + "learning_rate": 1.3048857280163295e-05, + "loss": 1.9104, + "step": 14922000 + }, + { + "epoch": 73.93, + "learning_rate": 1.3047618693737212e-05, + "loss": 1.8911, + "step": 14922500 + }, + { + "epoch": 73.93, + "learning_rate": 1.3046382584483982e-05, + "loss": 1.9058, + "step": 14923000 + }, + { + "epoch": 73.94, + "learning_rate": 1.3045143998057896e-05, + "loss": 1.886, + "step": 14923500 + }, + { + "epoch": 73.94, + "learning_rate": 1.3043905411631813e-05, + "loss": 1.9264, + "step": 14924000 + }, + { + "epoch": 73.94, + "learning_rate": 1.304266682520573e-05, + "loss": 1.9063, + "step": 14924500 + }, + { + "epoch": 73.94, + "learning_rate": 1.3041428238779647e-05, + "loss": 1.9045, + "step": 14925000 + }, + { + "epoch": 73.95, + "learning_rate": 1.3040189652353562e-05, + "loss": 1.8947, + "step": 14925500 + }, + { + "epoch": 73.95, + "learning_rate": 1.3038953543100332e-05, + "loss": 1.9233, + "step": 14926000 + }, + { + "epoch": 73.95, + "learning_rate": 1.3037714956674246e-05, + "loss": 1.9154, + "step": 14926500 + }, + { + "epoch": 73.95, + "learning_rate": 1.3036476370248163e-05, + "loss": 1.8947, + "step": 14927000 + }, + { + "epoch": 73.96, + "learning_rate": 1.303523778382208e-05, + "loss": 1.925, + "step": 14927500 + }, + { + "epoch": 73.96, + "learning_rate": 1.3033999197395997e-05, + "loss": 1.9243, + "step": 14928000 + }, + { + "epoch": 73.96, + "learning_rate": 1.3032760610969914e-05, + "loss": 1.8761, + "step": 14928500 + }, + { + "epoch": 73.96, + "learning_rate": 1.3031522024543829e-05, + "loss": 1.9112, + "step": 14929000 + }, + { + "epoch": 73.97, + "learning_rate": 1.3030283438117746e-05, + "loss": 1.9038, + "step": 14929500 + }, + { + "epoch": 73.97, + "learning_rate": 1.3029044851691663e-05, + "loss": 1.9119, + "step": 14930000 + }, + { + "epoch": 73.97, + "learning_rate": 1.302780626526558e-05, + "loss": 1.9032, + "step": 14930500 + }, + { + "epoch": 73.97, + "learning_rate": 1.3026567678839493e-05, + "loss": 1.8952, + "step": 14931000 + }, + { + "epoch": 73.98, + "learning_rate": 1.3025331569586264e-05, + "loss": 1.885, + "step": 14931500 + }, + { + "epoch": 73.98, + "learning_rate": 1.302409298316018e-05, + "loss": 1.9094, + "step": 14932000 + }, + { + "epoch": 73.98, + "learning_rate": 1.3022854396734096e-05, + "loss": 1.8916, + "step": 14932500 + }, + { + "epoch": 73.98, + "learning_rate": 1.3021615810308013e-05, + "loss": 1.9118, + "step": 14933000 + }, + { + "epoch": 73.99, + "learning_rate": 1.302037722388193e-05, + "loss": 1.8884, + "step": 14933500 + }, + { + "epoch": 73.99, + "learning_rate": 1.3019138637455843e-05, + "loss": 1.8974, + "step": 14934000 + }, + { + "epoch": 73.99, + "learning_rate": 1.3017902528202614e-05, + "loss": 1.8946, + "step": 14934500 + }, + { + "epoch": 73.99, + "learning_rate": 1.301666394177653e-05, + "loss": 1.89, + "step": 14935000 + }, + { + "epoch": 74.0, + "learning_rate": 1.3015425355350446e-05, + "loss": 1.9086, + "step": 14935500 + }, + { + "epoch": 74.0, + "learning_rate": 1.3014186768924363e-05, + "loss": 1.9069, + "step": 14936000 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.6792200986605649, + "eval_accuracy_mlm": 0.6396116608158647, + "eval_accuracy_nsp": 0.866103961813468, + "eval_loss": 2.2816011905670166, + "eval_runtime": 147.0037, + "eval_samples_per_second": 1734.372, + "eval_steps_per_second": 72.27, + "step": 14936382 + }, + { + "epoch": 74.0, + "learning_rate": 1.301294818249828e-05, + "loss": 1.8995, + "step": 14936500 + }, + { + "epoch": 74.0, + "learning_rate": 1.3011709596072193e-05, + "loss": 1.8853, + "step": 14937000 + }, + { + "epoch": 74.01, + "learning_rate": 1.301047100964611e-05, + "loss": 1.8996, + "step": 14937500 + }, + { + "epoch": 74.01, + "learning_rate": 1.300923490039288e-05, + "loss": 1.89, + "step": 14938000 + }, + { + "epoch": 74.01, + "learning_rate": 1.3007996313966798e-05, + "loss": 1.8797, + "step": 14938500 + }, + { + "epoch": 74.01, + "learning_rate": 1.3006757727540713e-05, + "loss": 1.8685, + "step": 14939000 + }, + { + "epoch": 74.02, + "learning_rate": 1.300551914111463e-05, + "loss": 1.8868, + "step": 14939500 + }, + { + "epoch": 74.02, + "learning_rate": 1.3004280554688545e-05, + "loss": 1.9132, + "step": 14940000 + }, + { + "epoch": 74.02, + "learning_rate": 1.300304196826246e-05, + "loss": 1.9075, + "step": 14940500 + }, + { + "epoch": 74.02, + "learning_rate": 1.3001803381836377e-05, + "loss": 1.8816, + "step": 14941000 + }, + { + "epoch": 74.03, + "learning_rate": 1.3000564795410294e-05, + "loss": 1.912, + "step": 14941500 + }, + { + "epoch": 74.03, + "learning_rate": 1.2999326208984211e-05, + "loss": 1.8787, + "step": 14942000 + }, + { + "epoch": 74.03, + "learning_rate": 1.299809009973098e-05, + "loss": 1.8959, + "step": 14942500 + }, + { + "epoch": 74.03, + "learning_rate": 1.2996853990477747e-05, + "loss": 1.915, + "step": 14943000 + }, + { + "epoch": 74.04, + "learning_rate": 1.2995615404051664e-05, + "loss": 1.8976, + "step": 14943500 + }, + { + "epoch": 74.04, + "learning_rate": 1.2994376817625581e-05, + "loss": 1.9016, + "step": 14944000 + }, + { + "epoch": 74.04, + "learning_rate": 1.2993138231199498e-05, + "loss": 1.881, + "step": 14944500 + }, + { + "epoch": 74.04, + "learning_rate": 1.2991902121946265e-05, + "loss": 1.8767, + "step": 14945000 + }, + { + "epoch": 74.05, + "learning_rate": 1.2990666012693034e-05, + "loss": 1.8737, + "step": 14945500 + }, + { + "epoch": 74.05, + "learning_rate": 1.298942742626695e-05, + "loss": 1.8955, + "step": 14946000 + }, + { + "epoch": 74.05, + "learning_rate": 1.2988188839840868e-05, + "loss": 1.8768, + "step": 14946500 + }, + { + "epoch": 74.05, + "learning_rate": 1.2986950253414785e-05, + "loss": 1.8812, + "step": 14947000 + }, + { + "epoch": 74.06, + "learning_rate": 1.2985711666988702e-05, + "loss": 1.8907, + "step": 14947500 + }, + { + "epoch": 74.06, + "learning_rate": 1.2984473080562617e-05, + "loss": 1.8812, + "step": 14948000 + }, + { + "epoch": 74.06, + "learning_rate": 1.2983234494136532e-05, + "loss": 1.8802, + "step": 14948500 + }, + { + "epoch": 74.06, + "learning_rate": 1.2981995907710447e-05, + "loss": 1.8746, + "step": 14949000 + }, + { + "epoch": 74.06, + "learning_rate": 1.2980759798457218e-05, + "loss": 1.899, + "step": 14949500 + }, + { + "epoch": 74.07, + "learning_rate": 1.2979521212031135e-05, + "loss": 1.9013, + "step": 14950000 + }, + { + "epoch": 74.07, + "learning_rate": 1.2978282625605052e-05, + "loss": 1.8867, + "step": 14950500 + }, + { + "epoch": 74.07, + "learning_rate": 1.2977044039178969e-05, + "loss": 1.8968, + "step": 14951000 + }, + { + "epoch": 74.07, + "learning_rate": 1.2975805452752882e-05, + "loss": 1.9089, + "step": 14951500 + }, + { + "epoch": 74.08, + "learning_rate": 1.2974566866326799e-05, + "loss": 1.8855, + "step": 14952000 + }, + { + "epoch": 74.08, + "learning_rate": 1.2973328279900714e-05, + "loss": 1.914, + "step": 14952500 + }, + { + "epoch": 74.08, + "learning_rate": 1.2972089693474631e-05, + "loss": 1.8733, + "step": 14953000 + }, + { + "epoch": 74.08, + "learning_rate": 1.2970853584221402e-05, + "loss": 1.9026, + "step": 14953500 + }, + { + "epoch": 74.09, + "learning_rate": 1.2969617474968169e-05, + "loss": 1.8846, + "step": 14954000 + }, + { + "epoch": 74.09, + "learning_rate": 1.2968378888542084e-05, + "loss": 1.8847, + "step": 14954500 + }, + { + "epoch": 74.09, + "learning_rate": 1.2967140302116001e-05, + "loss": 1.8787, + "step": 14955000 + }, + { + "epoch": 74.09, + "learning_rate": 1.2965901715689918e-05, + "loss": 1.8709, + "step": 14955500 + }, + { + "epoch": 74.1, + "learning_rate": 1.2964663129263835e-05, + "loss": 1.9002, + "step": 14956000 + }, + { + "epoch": 74.1, + "learning_rate": 1.2963424542837752e-05, + "loss": 1.9058, + "step": 14956500 + }, + { + "epoch": 74.1, + "learning_rate": 1.2962185956411669e-05, + "loss": 1.8937, + "step": 14957000 + }, + { + "epoch": 74.1, + "learning_rate": 1.2960949847158436e-05, + "loss": 1.8798, + "step": 14957500 + }, + { + "epoch": 74.11, + "learning_rate": 1.2959711260732351e-05, + "loss": 1.8908, + "step": 14958000 + }, + { + "epoch": 74.11, + "learning_rate": 1.2958472674306268e-05, + "loss": 1.8946, + "step": 14958500 + }, + { + "epoch": 74.11, + "learning_rate": 1.2957234087880185e-05, + "loss": 1.8972, + "step": 14959000 + }, + { + "epoch": 74.11, + "learning_rate": 1.2955995501454102e-05, + "loss": 1.8851, + "step": 14959500 + }, + { + "epoch": 74.12, + "learning_rate": 1.2954756915028019e-05, + "loss": 1.8942, + "step": 14960000 + }, + { + "epoch": 74.12, + "learning_rate": 1.2953518328601936e-05, + "loss": 1.88, + "step": 14960500 + }, + { + "epoch": 74.12, + "learning_rate": 1.295227974217585e-05, + "loss": 1.8894, + "step": 14961000 + }, + { + "epoch": 74.12, + "learning_rate": 1.2951041155749766e-05, + "loss": 1.9052, + "step": 14961500 + }, + { + "epoch": 74.13, + "learning_rate": 1.2949805046496535e-05, + "loss": 1.8784, + "step": 14962000 + }, + { + "epoch": 74.13, + "learning_rate": 1.2948566460070452e-05, + "loss": 1.9039, + "step": 14962500 + }, + { + "epoch": 74.13, + "learning_rate": 1.2947330350817219e-05, + "loss": 1.864, + "step": 14963000 + }, + { + "epoch": 74.13, + "learning_rate": 1.2946091764391136e-05, + "loss": 1.8984, + "step": 14963500 + }, + { + "epoch": 74.14, + "learning_rate": 1.2944853177965053e-05, + "loss": 1.8885, + "step": 14964000 + }, + { + "epoch": 74.14, + "learning_rate": 1.294361459153897e-05, + "loss": 1.8989, + "step": 14964500 + }, + { + "epoch": 74.14, + "learning_rate": 1.2942376005112885e-05, + "loss": 1.9028, + "step": 14965000 + }, + { + "epoch": 74.14, + "learning_rate": 1.2941137418686802e-05, + "loss": 1.8601, + "step": 14965500 + }, + { + "epoch": 74.15, + "learning_rate": 1.2939898832260719e-05, + "loss": 1.8804, + "step": 14966000 + }, + { + "epoch": 74.15, + "learning_rate": 1.2938660245834636e-05, + "loss": 1.9017, + "step": 14966500 + }, + { + "epoch": 74.15, + "learning_rate": 1.293742165940855e-05, + "loss": 1.8796, + "step": 14967000 + }, + { + "epoch": 74.15, + "learning_rate": 1.2936183072982466e-05, + "loss": 1.8905, + "step": 14967500 + }, + { + "epoch": 74.16, + "learning_rate": 1.2934944486556383e-05, + "loss": 1.9082, + "step": 14968000 + }, + { + "epoch": 74.16, + "learning_rate": 1.29337059001303e-05, + "loss": 1.8804, + "step": 14968500 + }, + { + "epoch": 74.16, + "learning_rate": 1.2932472268049919e-05, + "loss": 1.904, + "step": 14969000 + }, + { + "epoch": 74.16, + "learning_rate": 1.2931233681623836e-05, + "loss": 1.9027, + "step": 14969500 + }, + { + "epoch": 74.17, + "learning_rate": 1.2929995095197753e-05, + "loss": 1.9025, + "step": 14970000 + }, + { + "epoch": 74.17, + "learning_rate": 1.292875650877167e-05, + "loss": 1.8617, + "step": 14970500 + }, + { + "epoch": 74.17, + "learning_rate": 1.2927517922345587e-05, + "loss": 1.8989, + "step": 14971000 + }, + { + "epoch": 74.17, + "learning_rate": 1.2926279335919502e-05, + "loss": 1.8851, + "step": 14971500 + }, + { + "epoch": 74.18, + "learning_rate": 1.2925045703839123e-05, + "loss": 1.895, + "step": 14972000 + }, + { + "epoch": 74.18, + "learning_rate": 1.292380711741304e-05, + "loss": 1.8864, + "step": 14972500 + }, + { + "epoch": 74.18, + "learning_rate": 1.2922568530986957e-05, + "loss": 1.9005, + "step": 14973000 + }, + { + "epoch": 74.18, + "learning_rate": 1.2921329944560872e-05, + "loss": 1.9022, + "step": 14973500 + }, + { + "epoch": 74.19, + "learning_rate": 1.2920091358134789e-05, + "loss": 1.8886, + "step": 14974000 + }, + { + "epoch": 74.19, + "learning_rate": 1.2918852771708706e-05, + "loss": 1.8961, + "step": 14974500 + }, + { + "epoch": 74.19, + "learning_rate": 1.291761418528262e-05, + "loss": 1.8967, + "step": 14975000 + }, + { + "epoch": 74.19, + "learning_rate": 1.2916375598856536e-05, + "loss": 1.8822, + "step": 14975500 + }, + { + "epoch": 74.2, + "learning_rate": 1.2915139489603307e-05, + "loss": 1.8874, + "step": 14976000 + }, + { + "epoch": 74.2, + "learning_rate": 1.2913900903177224e-05, + "loss": 1.9252, + "step": 14976500 + }, + { + "epoch": 74.2, + "learning_rate": 1.2912662316751139e-05, + "loss": 1.9198, + "step": 14977000 + }, + { + "epoch": 74.2, + "learning_rate": 1.2911423730325056e-05, + "loss": 1.9019, + "step": 14977500 + }, + { + "epoch": 74.21, + "learning_rate": 1.2910185143898973e-05, + "loss": 1.8863, + "step": 14978000 + }, + { + "epoch": 74.21, + "learning_rate": 1.2908946557472886e-05, + "loss": 1.8957, + "step": 14978500 + }, + { + "epoch": 74.21, + "learning_rate": 1.2907710448219657e-05, + "loss": 1.9175, + "step": 14979000 + }, + { + "epoch": 74.21, + "learning_rate": 1.2906471861793574e-05, + "loss": 1.904, + "step": 14979500 + }, + { + "epoch": 74.22, + "learning_rate": 1.290523327536749e-05, + "loss": 1.9038, + "step": 14980000 + }, + { + "epoch": 74.22, + "learning_rate": 1.2903994688941406e-05, + "loss": 1.8909, + "step": 14980500 + }, + { + "epoch": 74.22, + "learning_rate": 1.2902756102515323e-05, + "loss": 1.9079, + "step": 14981000 + }, + { + "epoch": 74.22, + "learning_rate": 1.2901517516089236e-05, + "loss": 1.889, + "step": 14981500 + }, + { + "epoch": 74.23, + "learning_rate": 1.2900278929663153e-05, + "loss": 1.895, + "step": 14982000 + }, + { + "epoch": 74.23, + "learning_rate": 1.289904034323707e-05, + "loss": 1.8969, + "step": 14982500 + }, + { + "epoch": 74.23, + "learning_rate": 1.2897801756810987e-05, + "loss": 1.8986, + "step": 14983000 + }, + { + "epoch": 74.23, + "learning_rate": 1.2896563170384904e-05, + "loss": 1.8907, + "step": 14983500 + }, + { + "epoch": 74.24, + "learning_rate": 1.2895324583958821e-05, + "loss": 1.9046, + "step": 14984000 + }, + { + "epoch": 74.24, + "learning_rate": 1.2894085997532738e-05, + "loss": 1.8903, + "step": 14984500 + }, + { + "epoch": 74.24, + "learning_rate": 1.2892847411106653e-05, + "loss": 1.9024, + "step": 14985000 + }, + { + "epoch": 74.24, + "learning_rate": 1.289160882468057e-05, + "loss": 1.9132, + "step": 14985500 + }, + { + "epoch": 74.25, + "learning_rate": 1.2890370238254485e-05, + "loss": 1.9148, + "step": 14986000 + }, + { + "epoch": 74.25, + "learning_rate": 1.28891316518284e-05, + "loss": 1.9057, + "step": 14986500 + }, + { + "epoch": 74.25, + "learning_rate": 1.2887895542575171e-05, + "loss": 1.8749, + "step": 14987000 + }, + { + "epoch": 74.25, + "learning_rate": 1.288666191049479e-05, + "loss": 1.9026, + "step": 14987500 + }, + { + "epoch": 74.26, + "learning_rate": 1.2885423324068707e-05, + "loss": 1.8893, + "step": 14988000 + }, + { + "epoch": 74.26, + "learning_rate": 1.2884184737642624e-05, + "loss": 1.9397, + "step": 14988500 + }, + { + "epoch": 74.26, + "learning_rate": 1.288294615121654e-05, + "loss": 1.9068, + "step": 14989000 + }, + { + "epoch": 74.26, + "learning_rate": 1.2881707564790458e-05, + "loss": 1.8949, + "step": 14989500 + }, + { + "epoch": 74.27, + "learning_rate": 1.2880468978364375e-05, + "loss": 1.9133, + "step": 14990000 + }, + { + "epoch": 74.27, + "learning_rate": 1.287923039193829e-05, + "loss": 1.9071, + "step": 14990500 + }, + { + "epoch": 74.27, + "learning_rate": 1.2877991805512205e-05, + "loss": 1.8928, + "step": 14991000 + }, + { + "epoch": 74.27, + "learning_rate": 1.2876755696258974e-05, + "loss": 1.8895, + "step": 14991500 + }, + { + "epoch": 74.28, + "learning_rate": 1.287551710983289e-05, + "loss": 1.9008, + "step": 14992000 + }, + { + "epoch": 74.28, + "learning_rate": 1.2874278523406808e-05, + "loss": 1.905, + "step": 14992500 + }, + { + "epoch": 74.28, + "learning_rate": 1.2873039936980725e-05, + "loss": 1.9104, + "step": 14993000 + }, + { + "epoch": 74.28, + "learning_rate": 1.2871803827727492e-05, + "loss": 1.907, + "step": 14993500 + }, + { + "epoch": 74.29, + "learning_rate": 1.2870565241301407e-05, + "loss": 1.9074, + "step": 14994000 + }, + { + "epoch": 74.29, + "learning_rate": 1.2869326654875324e-05, + "loss": 1.9145, + "step": 14994500 + }, + { + "epoch": 74.29, + "learning_rate": 1.2868088068449241e-05, + "loss": 1.9309, + "step": 14995000 + }, + { + "epoch": 74.29, + "learning_rate": 1.2866849482023158e-05, + "loss": 1.9, + "step": 14995500 + }, + { + "epoch": 74.3, + "learning_rate": 1.2865610895597075e-05, + "loss": 1.8983, + "step": 14996000 + }, + { + "epoch": 74.3, + "learning_rate": 1.2864372309170992e-05, + "loss": 1.9106, + "step": 14996500 + }, + { + "epoch": 74.3, + "learning_rate": 1.2863136199917759e-05, + "loss": 1.9075, + "step": 14997000 + }, + { + "epoch": 74.3, + "learning_rate": 1.2861900090664528e-05, + "loss": 1.8897, + "step": 14997500 + }, + { + "epoch": 74.31, + "learning_rate": 1.2860661504238445e-05, + "loss": 1.8864, + "step": 14998000 + }, + { + "epoch": 74.31, + "learning_rate": 1.2859422917812361e-05, + "loss": 1.884, + "step": 14998500 + }, + { + "epoch": 74.31, + "learning_rate": 1.2858186808559129e-05, + "loss": 1.8938, + "step": 14999000 + }, + { + "epoch": 74.31, + "learning_rate": 1.2856948222133044e-05, + "loss": 1.8828, + "step": 14999500 + }, + { + "epoch": 74.32, + "learning_rate": 1.285570963570696e-05, + "loss": 1.9067, + "step": 15000000 + }, + { + "epoch": 74.32, + "learning_rate": 1.2854471049280878e-05, + "loss": 1.8773, + "step": 15000500 + }, + { + "epoch": 74.32, + "learning_rate": 1.2853232462854795e-05, + "loss": 1.8879, + "step": 15001000 + }, + { + "epoch": 74.32, + "learning_rate": 1.2851993876428711e-05, + "loss": 1.8878, + "step": 15001500 + }, + { + "epoch": 74.33, + "learning_rate": 1.2850755290002625e-05, + "loss": 1.8983, + "step": 15002000 + }, + { + "epoch": 74.33, + "learning_rate": 1.2849516703576542e-05, + "loss": 1.9164, + "step": 15002500 + }, + { + "epoch": 74.33, + "learning_rate": 1.2848278117150459e-05, + "loss": 1.8917, + "step": 15003000 + }, + { + "epoch": 74.33, + "learning_rate": 1.2847039530724376e-05, + "loss": 1.8998, + "step": 15003500 + }, + { + "epoch": 74.34, + "learning_rate": 1.2845800944298291e-05, + "loss": 1.8896, + "step": 15004000 + }, + { + "epoch": 74.34, + "learning_rate": 1.2844562357872208e-05, + "loss": 1.8896, + "step": 15004500 + }, + { + "epoch": 74.34, + "learning_rate": 1.2843323771446125e-05, + "loss": 1.8744, + "step": 15005000 + }, + { + "epoch": 74.34, + "learning_rate": 1.2842087662192892e-05, + "loss": 1.8789, + "step": 15005500 + }, + { + "epoch": 74.34, + "learning_rate": 1.2840849075766809e-05, + "loss": 1.8747, + "step": 15006000 + }, + { + "epoch": 74.35, + "learning_rate": 1.2839612966513578e-05, + "loss": 1.8831, + "step": 15006500 + }, + { + "epoch": 74.35, + "learning_rate": 1.2838374380087495e-05, + "loss": 1.8989, + "step": 15007000 + }, + { + "epoch": 74.35, + "learning_rate": 1.2837135793661412e-05, + "loss": 1.899, + "step": 15007500 + }, + { + "epoch": 74.35, + "learning_rate": 1.2835897207235329e-05, + "loss": 1.8811, + "step": 15008000 + }, + { + "epoch": 74.36, + "learning_rate": 1.2834658620809242e-05, + "loss": 1.8714, + "step": 15008500 + }, + { + "epoch": 74.36, + "learning_rate": 1.2833420034383159e-05, + "loss": 1.8666, + "step": 15009000 + }, + { + "epoch": 74.36, + "learning_rate": 1.2832181447957076e-05, + "loss": 1.8896, + "step": 15009500 + }, + { + "epoch": 74.36, + "learning_rate": 1.2830945338703845e-05, + "loss": 1.8802, + "step": 15010000 + }, + { + "epoch": 74.37, + "learning_rate": 1.2829706752277762e-05, + "loss": 1.9049, + "step": 15010500 + }, + { + "epoch": 74.37, + "learning_rate": 1.2828468165851679e-05, + "loss": 1.9037, + "step": 15011000 + }, + { + "epoch": 74.37, + "learning_rate": 1.2827229579425592e-05, + "loss": 1.9052, + "step": 15011500 + }, + { + "epoch": 74.37, + "learning_rate": 1.2825990992999509e-05, + "loss": 1.8916, + "step": 15012000 + }, + { + "epoch": 74.38, + "learning_rate": 1.2824752406573426e-05, + "loss": 1.9019, + "step": 15012500 + }, + { + "epoch": 74.38, + "learning_rate": 1.2823513820147343e-05, + "loss": 1.8899, + "step": 15013000 + }, + { + "epoch": 74.38, + "learning_rate": 1.282227523372126e-05, + "loss": 1.8828, + "step": 15013500 + }, + { + "epoch": 74.38, + "learning_rate": 1.2821036647295177e-05, + "loss": 1.8996, + "step": 15014000 + }, + { + "epoch": 74.39, + "learning_rate": 1.2819798060869092e-05, + "loss": 1.8898, + "step": 15014500 + }, + { + "epoch": 74.39, + "learning_rate": 1.2818559474443009e-05, + "loss": 1.8978, + "step": 15015000 + }, + { + "epoch": 74.39, + "learning_rate": 1.2817320888016926e-05, + "loss": 1.8964, + "step": 15015500 + }, + { + "epoch": 74.39, + "learning_rate": 1.281608230159084e-05, + "loss": 1.913, + "step": 15016000 + }, + { + "epoch": 74.4, + "learning_rate": 1.2814843715164756e-05, + "loss": 1.8876, + "step": 15016500 + }, + { + "epoch": 74.4, + "learning_rate": 1.2813605128738673e-05, + "loss": 1.8779, + "step": 15017000 + }, + { + "epoch": 74.4, + "learning_rate": 1.281236654231259e-05, + "loss": 1.9106, + "step": 15017500 + }, + { + "epoch": 74.4, + "learning_rate": 1.281113291023221e-05, + "loss": 1.8981, + "step": 15018000 + }, + { + "epoch": 74.41, + "learning_rate": 1.2809894323806126e-05, + "loss": 1.8965, + "step": 15018500 + }, + { + "epoch": 74.41, + "learning_rate": 1.2808655737380043e-05, + "loss": 1.8901, + "step": 15019000 + }, + { + "epoch": 74.41, + "learning_rate": 1.280741715095396e-05, + "loss": 1.8901, + "step": 15019500 + }, + { + "epoch": 74.41, + "learning_rate": 1.2806181041700729e-05, + "loss": 1.8864, + "step": 15020000 + }, + { + "epoch": 74.42, + "learning_rate": 1.2804944932447496e-05, + "loss": 1.9157, + "step": 15020500 + }, + { + "epoch": 74.42, + "learning_rate": 1.2803706346021413e-05, + "loss": 1.8845, + "step": 15021000 + }, + { + "epoch": 74.42, + "learning_rate": 1.280246775959533e-05, + "loss": 1.8825, + "step": 15021500 + }, + { + "epoch": 74.42, + "learning_rate": 1.2801229173169247e-05, + "loss": 1.8773, + "step": 15022000 + }, + { + "epoch": 74.43, + "learning_rate": 1.2799993063916014e-05, + "loss": 1.8916, + "step": 15022500 + }, + { + "epoch": 74.43, + "learning_rate": 1.2798754477489929e-05, + "loss": 1.8903, + "step": 15023000 + }, + { + "epoch": 74.43, + "learning_rate": 1.2797515891063846e-05, + "loss": 1.8943, + "step": 15023500 + }, + { + "epoch": 74.43, + "learning_rate": 1.2796277304637763e-05, + "loss": 1.8861, + "step": 15024000 + }, + { + "epoch": 74.44, + "learning_rate": 1.2795041195384533e-05, + "loss": 1.8925, + "step": 15024500 + }, + { + "epoch": 74.44, + "learning_rate": 1.279380260895845e-05, + "loss": 1.894, + "step": 15025000 + }, + { + "epoch": 74.44, + "learning_rate": 1.2792564022532366e-05, + "loss": 1.8974, + "step": 15025500 + }, + { + "epoch": 74.44, + "learning_rate": 1.279132543610628e-05, + "loss": 1.8926, + "step": 15026000 + }, + { + "epoch": 74.45, + "learning_rate": 1.2790086849680196e-05, + "loss": 1.8745, + "step": 15026500 + }, + { + "epoch": 74.45, + "learning_rate": 1.2788848263254113e-05, + "loss": 1.89, + "step": 15027000 + }, + { + "epoch": 74.45, + "learning_rate": 1.278760967682803e-05, + "loss": 1.907, + "step": 15027500 + }, + { + "epoch": 74.45, + "learning_rate": 1.2786371090401947e-05, + "loss": 1.9021, + "step": 15028000 + }, + { + "epoch": 74.46, + "learning_rate": 1.2785132503975864e-05, + "loss": 1.8887, + "step": 15028500 + }, + { + "epoch": 74.46, + "learning_rate": 1.278389391754978e-05, + "loss": 1.9137, + "step": 15029000 + }, + { + "epoch": 74.46, + "learning_rate": 1.2782655331123698e-05, + "loss": 1.9007, + "step": 15029500 + }, + { + "epoch": 74.46, + "learning_rate": 1.2781419221870463e-05, + "loss": 1.892, + "step": 15030000 + }, + { + "epoch": 74.47, + "learning_rate": 1.278018063544438e-05, + "loss": 1.8966, + "step": 15030500 + }, + { + "epoch": 74.47, + "learning_rate": 1.2778942049018297e-05, + "loss": 1.8976, + "step": 15031000 + }, + { + "epoch": 74.47, + "learning_rate": 1.2777703462592214e-05, + "loss": 1.8992, + "step": 15031500 + }, + { + "epoch": 74.47, + "learning_rate": 1.277646487616613e-05, + "loss": 1.9168, + "step": 15032000 + }, + { + "epoch": 74.48, + "learning_rate": 1.2775226289740048e-05, + "loss": 1.8993, + "step": 15032500 + }, + { + "epoch": 74.48, + "learning_rate": 1.2773987703313965e-05, + "loss": 1.8812, + "step": 15033000 + }, + { + "epoch": 74.48, + "learning_rate": 1.2772749116887878e-05, + "loss": 1.8841, + "step": 15033500 + }, + { + "epoch": 74.48, + "learning_rate": 1.2771513007634647e-05, + "loss": 1.8912, + "step": 15034000 + }, + { + "epoch": 74.49, + "learning_rate": 1.2770276898381417e-05, + "loss": 1.8874, + "step": 15034500 + }, + { + "epoch": 74.49, + "learning_rate": 1.2769038311955331e-05, + "loss": 1.8827, + "step": 15035000 + }, + { + "epoch": 74.49, + "learning_rate": 1.2767799725529248e-05, + "loss": 1.9173, + "step": 15035500 + }, + { + "epoch": 74.49, + "learning_rate": 1.2766563616276017e-05, + "loss": 1.9037, + "step": 15036000 + }, + { + "epoch": 74.5, + "learning_rate": 1.2765325029849934e-05, + "loss": 1.8961, + "step": 15036500 + }, + { + "epoch": 74.5, + "learning_rate": 1.276408644342385e-05, + "loss": 1.8934, + "step": 15037000 + }, + { + "epoch": 74.5, + "learning_rate": 1.2762847856997768e-05, + "loss": 1.8901, + "step": 15037500 + }, + { + "epoch": 74.5, + "learning_rate": 1.2761609270571684e-05, + "loss": 1.8946, + "step": 15038000 + }, + { + "epoch": 74.51, + "learning_rate": 1.2760370684145598e-05, + "loss": 1.8978, + "step": 15038500 + }, + { + "epoch": 74.51, + "learning_rate": 1.2759132097719515e-05, + "loss": 1.8814, + "step": 15039000 + }, + { + "epoch": 74.51, + "learning_rate": 1.2757893511293432e-05, + "loss": 1.895, + "step": 15039500 + }, + { + "epoch": 74.51, + "learning_rate": 1.2756654924867347e-05, + "loss": 1.8926, + "step": 15040000 + }, + { + "epoch": 74.52, + "learning_rate": 1.2755416338441264e-05, + "loss": 1.8938, + "step": 15040500 + }, + { + "epoch": 74.52, + "learning_rate": 1.2754180229188035e-05, + "loss": 1.8929, + "step": 15041000 + }, + { + "epoch": 74.52, + "learning_rate": 1.2752944119934802e-05, + "loss": 1.877, + "step": 15041500 + }, + { + "epoch": 74.52, + "learning_rate": 1.2751705533508717e-05, + "loss": 1.8843, + "step": 15042000 + }, + { + "epoch": 74.53, + "learning_rate": 1.2750466947082634e-05, + "loss": 1.895, + "step": 15042500 + }, + { + "epoch": 74.53, + "learning_rate": 1.274922836065655e-05, + "loss": 1.9203, + "step": 15043000 + }, + { + "epoch": 74.53, + "learning_rate": 1.2747989774230468e-05, + "loss": 1.8962, + "step": 15043500 + }, + { + "epoch": 74.53, + "learning_rate": 1.2746751187804385e-05, + "loss": 1.8897, + "step": 15044000 + }, + { + "epoch": 74.54, + "learning_rate": 1.2745512601378298e-05, + "loss": 1.9368, + "step": 15044500 + }, + { + "epoch": 74.54, + "learning_rate": 1.2744274014952215e-05, + "loss": 1.9064, + "step": 15045000 + }, + { + "epoch": 74.54, + "learning_rate": 1.2743037905698984e-05, + "loss": 1.8958, + "step": 15045500 + }, + { + "epoch": 74.54, + "learning_rate": 1.27417993192729e-05, + "loss": 1.8723, + "step": 15046000 + }, + { + "epoch": 74.55, + "learning_rate": 1.2740563210019668e-05, + "loss": 1.8881, + "step": 15046500 + }, + { + "epoch": 74.55, + "learning_rate": 1.2739324623593585e-05, + "loss": 1.9017, + "step": 15047000 + }, + { + "epoch": 74.55, + "learning_rate": 1.2738086037167502e-05, + "loss": 1.8873, + "step": 15047500 + }, + { + "epoch": 74.55, + "learning_rate": 1.2736847450741419e-05, + "loss": 1.8849, + "step": 15048000 + }, + { + "epoch": 74.56, + "learning_rate": 1.2735608864315336e-05, + "loss": 1.8906, + "step": 15048500 + }, + { + "epoch": 74.56, + "learning_rate": 1.273437027788925e-05, + "loss": 1.8709, + "step": 15049000 + }, + { + "epoch": 74.56, + "learning_rate": 1.2733131691463168e-05, + "loss": 1.8878, + "step": 15049500 + }, + { + "epoch": 74.56, + "learning_rate": 1.2731893105037085e-05, + "loss": 1.8995, + "step": 15050000 + }, + { + "epoch": 74.57, + "learning_rate": 1.2730654518611002e-05, + "loss": 1.9152, + "step": 15050500 + }, + { + "epoch": 74.57, + "learning_rate": 1.2729415932184915e-05, + "loss": 1.8893, + "step": 15051000 + }, + { + "epoch": 74.57, + "learning_rate": 1.2728179822931686e-05, + "loss": 1.9091, + "step": 15051500 + }, + { + "epoch": 74.57, + "learning_rate": 1.2726941236505603e-05, + "loss": 1.8999, + "step": 15052000 + }, + { + "epoch": 74.58, + "learning_rate": 1.2725702650079518e-05, + "loss": 1.9083, + "step": 15052500 + }, + { + "epoch": 74.58, + "learning_rate": 1.2724464063653435e-05, + "loss": 1.9152, + "step": 15053000 + }, + { + "epoch": 74.58, + "learning_rate": 1.2723225477227352e-05, + "loss": 1.893, + "step": 15053500 + }, + { + "epoch": 74.58, + "learning_rate": 1.2721986890801265e-05, + "loss": 1.8995, + "step": 15054000 + }, + { + "epoch": 74.59, + "learning_rate": 1.2720748304375182e-05, + "loss": 1.888, + "step": 15054500 + }, + { + "epoch": 74.59, + "learning_rate": 1.2719509717949099e-05, + "loss": 1.8886, + "step": 15055000 + }, + { + "epoch": 74.59, + "learning_rate": 1.2718271131523016e-05, + "loss": 1.8924, + "step": 15055500 + }, + { + "epoch": 74.59, + "learning_rate": 1.2717032545096933e-05, + "loss": 1.897, + "step": 15056000 + }, + { + "epoch": 74.6, + "learning_rate": 1.271579395867085e-05, + "loss": 1.9131, + "step": 15056500 + }, + { + "epoch": 74.6, + "learning_rate": 1.2714555372244765e-05, + "loss": 1.8865, + "step": 15057000 + }, + { + "epoch": 74.6, + "learning_rate": 1.2713316785818682e-05, + "loss": 1.8689, + "step": 15057500 + }, + { + "epoch": 74.6, + "learning_rate": 1.2712080676565449e-05, + "loss": 1.918, + "step": 15058000 + }, + { + "epoch": 74.61, + "learning_rate": 1.2710842090139366e-05, + "loss": 1.8995, + "step": 15058500 + }, + { + "epoch": 74.61, + "learning_rate": 1.2709605980886135e-05, + "loss": 1.8863, + "step": 15059000 + }, + { + "epoch": 74.61, + "learning_rate": 1.2708367394460052e-05, + "loss": 1.9044, + "step": 15059500 + }, + { + "epoch": 74.61, + "learning_rate": 1.2707128808033969e-05, + "loss": 1.9064, + "step": 15060000 + }, + { + "epoch": 74.61, + "learning_rate": 1.2705890221607882e-05, + "loss": 1.8883, + "step": 15060500 + }, + { + "epoch": 74.62, + "learning_rate": 1.2704654112354653e-05, + "loss": 1.9018, + "step": 15061000 + }, + { + "epoch": 74.62, + "learning_rate": 1.270341552592857e-05, + "loss": 1.9207, + "step": 15061500 + }, + { + "epoch": 74.62, + "learning_rate": 1.2702176939502487e-05, + "loss": 1.9001, + "step": 15062000 + }, + { + "epoch": 74.62, + "learning_rate": 1.2700938353076402e-05, + "loss": 1.9049, + "step": 15062500 + }, + { + "epoch": 74.63, + "learning_rate": 1.2699699766650319e-05, + "loss": 1.8938, + "step": 15063000 + }, + { + "epoch": 74.63, + "learning_rate": 1.2698461180224232e-05, + "loss": 1.8587, + "step": 15063500 + }, + { + "epoch": 74.63, + "learning_rate": 1.269722259379815e-05, + "loss": 1.8896, + "step": 15064000 + }, + { + "epoch": 74.63, + "learning_rate": 1.269598648454492e-05, + "loss": 1.8942, + "step": 15064500 + }, + { + "epoch": 74.64, + "learning_rate": 1.2694747898118837e-05, + "loss": 1.9, + "step": 15065000 + }, + { + "epoch": 74.64, + "learning_rate": 1.2693509311692754e-05, + "loss": 1.8766, + "step": 15065500 + }, + { + "epoch": 74.64, + "learning_rate": 1.2692273202439519e-05, + "loss": 1.8565, + "step": 15066000 + }, + { + "epoch": 74.64, + "learning_rate": 1.2691034616013436e-05, + "loss": 1.9105, + "step": 15066500 + }, + { + "epoch": 74.65, + "learning_rate": 1.2689796029587353e-05, + "loss": 1.9002, + "step": 15067000 + }, + { + "epoch": 74.65, + "learning_rate": 1.268855744316127e-05, + "loss": 1.8794, + "step": 15067500 + }, + { + "epoch": 74.65, + "learning_rate": 1.2687318856735187e-05, + "loss": 1.8768, + "step": 15068000 + }, + { + "epoch": 74.65, + "learning_rate": 1.2686080270309104e-05, + "loss": 1.8985, + "step": 15068500 + }, + { + "epoch": 74.66, + "learning_rate": 1.2684841683883019e-05, + "loss": 1.8995, + "step": 15069000 + }, + { + "epoch": 74.66, + "learning_rate": 1.2683603097456934e-05, + "loss": 1.8983, + "step": 15069500 + }, + { + "epoch": 74.66, + "learning_rate": 1.2682364511030851e-05, + "loss": 1.8891, + "step": 15070000 + }, + { + "epoch": 74.66, + "learning_rate": 1.2681125924604766e-05, + "loss": 1.9101, + "step": 15070500 + }, + { + "epoch": 74.67, + "learning_rate": 1.2679887338178683e-05, + "loss": 1.8788, + "step": 15071000 + }, + { + "epoch": 74.67, + "learning_rate": 1.26786487517526e-05, + "loss": 1.8921, + "step": 15071500 + }, + { + "epoch": 74.67, + "learning_rate": 1.2677410165326517e-05, + "loss": 1.9111, + "step": 15072000 + }, + { + "epoch": 74.67, + "learning_rate": 1.2676174056073286e-05, + "loss": 1.9204, + "step": 15072500 + }, + { + "epoch": 74.68, + "learning_rate": 1.2674937946820053e-05, + "loss": 1.9022, + "step": 15073000 + }, + { + "epoch": 74.68, + "learning_rate": 1.267369936039397e-05, + "loss": 1.9149, + "step": 15073500 + }, + { + "epoch": 74.68, + "learning_rate": 1.2672460773967887e-05, + "loss": 1.9018, + "step": 15074000 + }, + { + "epoch": 74.68, + "learning_rate": 1.2671222187541804e-05, + "loss": 1.8965, + "step": 15074500 + }, + { + "epoch": 74.69, + "learning_rate": 1.266998360111572e-05, + "loss": 1.8967, + "step": 15075000 + }, + { + "epoch": 74.69, + "learning_rate": 1.2668747491862488e-05, + "loss": 1.8973, + "step": 15075500 + }, + { + "epoch": 74.69, + "learning_rate": 1.2667508905436403e-05, + "loss": 1.881, + "step": 15076000 + }, + { + "epoch": 74.69, + "learning_rate": 1.266627031901032e-05, + "loss": 1.8876, + "step": 15076500 + }, + { + "epoch": 74.7, + "learning_rate": 1.2665031732584237e-05, + "loss": 1.8851, + "step": 15077000 + }, + { + "epoch": 74.7, + "learning_rate": 1.2663793146158154e-05, + "loss": 1.8969, + "step": 15077500 + }, + { + "epoch": 74.7, + "learning_rate": 1.266255455973207e-05, + "loss": 1.8804, + "step": 15078000 + }, + { + "epoch": 74.7, + "learning_rate": 1.2661318450478838e-05, + "loss": 1.8953, + "step": 15078500 + }, + { + "epoch": 74.71, + "learning_rate": 1.2660079864052755e-05, + "loss": 1.8974, + "step": 15079000 + }, + { + "epoch": 74.71, + "learning_rate": 1.265884127762667e-05, + "loss": 1.9102, + "step": 15079500 + }, + { + "epoch": 74.71, + "learning_rate": 1.2657602691200587e-05, + "loss": 1.8813, + "step": 15080000 + }, + { + "epoch": 74.71, + "learning_rate": 1.2656364104774504e-05, + "loss": 1.8912, + "step": 15080500 + }, + { + "epoch": 74.72, + "learning_rate": 1.2655125518348421e-05, + "loss": 1.9117, + "step": 15081000 + }, + { + "epoch": 74.72, + "learning_rate": 1.2653886931922338e-05, + "loss": 1.9164, + "step": 15081500 + }, + { + "epoch": 74.72, + "learning_rate": 1.2652648345496251e-05, + "loss": 1.8869, + "step": 15082000 + }, + { + "epoch": 74.72, + "learning_rate": 1.2651409759070168e-05, + "loss": 1.9112, + "step": 15082500 + }, + { + "epoch": 74.73, + "learning_rate": 1.2650171172644085e-05, + "loss": 1.8732, + "step": 15083000 + }, + { + "epoch": 74.73, + "learning_rate": 1.2648932586218002e-05, + "loss": 1.9017, + "step": 15083500 + }, + { + "epoch": 74.73, + "learning_rate": 1.2647696476964771e-05, + "loss": 1.907, + "step": 15084000 + }, + { + "epoch": 74.73, + "learning_rate": 1.2646457890538688e-05, + "loss": 1.8968, + "step": 15084500 + }, + { + "epoch": 74.74, + "learning_rate": 1.2645219304112605e-05, + "loss": 1.8787, + "step": 15085000 + }, + { + "epoch": 74.74, + "learning_rate": 1.2643980717686518e-05, + "loss": 1.9035, + "step": 15085500 + }, + { + "epoch": 74.74, + "learning_rate": 1.2642744608433287e-05, + "loss": 1.9089, + "step": 15086000 + }, + { + "epoch": 74.74, + "learning_rate": 1.2641506022007204e-05, + "loss": 1.8962, + "step": 15086500 + }, + { + "epoch": 74.75, + "learning_rate": 1.2640267435581121e-05, + "loss": 1.9269, + "step": 15087000 + }, + { + "epoch": 74.75, + "learning_rate": 1.2639031326327888e-05, + "loss": 1.8967, + "step": 15087500 + }, + { + "epoch": 74.75, + "learning_rate": 1.2637792739901805e-05, + "loss": 1.8836, + "step": 15088000 + }, + { + "epoch": 74.75, + "learning_rate": 1.2636554153475722e-05, + "loss": 1.8754, + "step": 15088500 + }, + { + "epoch": 74.76, + "learning_rate": 1.2635315567049639e-05, + "loss": 1.8628, + "step": 15089000 + }, + { + "epoch": 74.76, + "learning_rate": 1.2634076980623554e-05, + "loss": 1.8911, + "step": 15089500 + }, + { + "epoch": 74.76, + "learning_rate": 1.2632838394197471e-05, + "loss": 1.9043, + "step": 15090000 + }, + { + "epoch": 74.76, + "learning_rate": 1.2631599807771388e-05, + "loss": 1.8793, + "step": 15090500 + }, + { + "epoch": 74.77, + "learning_rate": 1.2630363698518155e-05, + "loss": 1.8877, + "step": 15091000 + }, + { + "epoch": 74.77, + "learning_rate": 1.2629125112092072e-05, + "loss": 1.923, + "step": 15091500 + }, + { + "epoch": 74.77, + "learning_rate": 1.2627886525665989e-05, + "loss": 1.8951, + "step": 15092000 + }, + { + "epoch": 74.77, + "learning_rate": 1.2626647939239906e-05, + "loss": 1.8738, + "step": 15092500 + }, + { + "epoch": 74.78, + "learning_rate": 1.2625409352813821e-05, + "loss": 1.8952, + "step": 15093000 + }, + { + "epoch": 74.78, + "learning_rate": 1.2624170766387738e-05, + "loss": 1.9143, + "step": 15093500 + }, + { + "epoch": 74.78, + "learning_rate": 1.2622932179961655e-05, + "loss": 1.9213, + "step": 15094000 + }, + { + "epoch": 74.78, + "learning_rate": 1.2621693593535569e-05, + "loss": 1.8975, + "step": 15094500 + }, + { + "epoch": 74.79, + "learning_rate": 1.2620455007109485e-05, + "loss": 1.8703, + "step": 15095000 + }, + { + "epoch": 74.79, + "learning_rate": 1.2619216420683402e-05, + "loss": 1.9011, + "step": 15095500 + }, + { + "epoch": 74.79, + "learning_rate": 1.261797783425732e-05, + "loss": 1.912, + "step": 15096000 + }, + { + "epoch": 74.79, + "learning_rate": 1.2616741725004088e-05, + "loss": 1.8952, + "step": 15096500 + }, + { + "epoch": 74.8, + "learning_rate": 1.2615503138578005e-05, + "loss": 1.8651, + "step": 15097000 + }, + { + "epoch": 74.8, + "learning_rate": 1.2614264552151922e-05, + "loss": 1.8755, + "step": 15097500 + }, + { + "epoch": 74.8, + "learning_rate": 1.2613025965725835e-05, + "loss": 1.8861, + "step": 15098000 + }, + { + "epoch": 74.8, + "learning_rate": 1.2611787379299752e-05, + "loss": 1.8997, + "step": 15098500 + }, + { + "epoch": 74.81, + "learning_rate": 1.261054879287367e-05, + "loss": 1.9087, + "step": 15099000 + }, + { + "epoch": 74.81, + "learning_rate": 1.2609312683620438e-05, + "loss": 1.9005, + "step": 15099500 + }, + { + "epoch": 74.81, + "learning_rate": 1.2608074097194355e-05, + "loss": 1.8906, + "step": 15100000 + }, + { + "epoch": 74.81, + "learning_rate": 1.2606835510768272e-05, + "loss": 1.875, + "step": 15100500 + }, + { + "epoch": 74.82, + "learning_rate": 1.2605596924342186e-05, + "loss": 1.8844, + "step": 15101000 + }, + { + "epoch": 74.82, + "learning_rate": 1.2604358337916102e-05, + "loss": 1.8874, + "step": 15101500 + }, + { + "epoch": 74.82, + "learning_rate": 1.2603122228662873e-05, + "loss": 1.8904, + "step": 15102000 + }, + { + "epoch": 74.82, + "learning_rate": 1.260188364223679e-05, + "loss": 1.8831, + "step": 15102500 + }, + { + "epoch": 74.83, + "learning_rate": 1.2600645055810705e-05, + "loss": 1.9203, + "step": 15103000 + }, + { + "epoch": 74.83, + "learning_rate": 1.2599406469384622e-05, + "loss": 1.903, + "step": 15103500 + }, + { + "epoch": 74.83, + "learning_rate": 1.2598167882958536e-05, + "loss": 1.8978, + "step": 15104000 + }, + { + "epoch": 74.83, + "learning_rate": 1.2596929296532453e-05, + "loss": 1.9039, + "step": 15104500 + }, + { + "epoch": 74.84, + "learning_rate": 1.259569071010637e-05, + "loss": 1.9223, + "step": 15105000 + }, + { + "epoch": 74.84, + "learning_rate": 1.2594452123680286e-05, + "loss": 1.8871, + "step": 15105500 + }, + { + "epoch": 74.84, + "learning_rate": 1.2593216014427057e-05, + "loss": 1.8998, + "step": 15106000 + }, + { + "epoch": 74.84, + "learning_rate": 1.2591977428000972e-05, + "loss": 1.9187, + "step": 15106500 + }, + { + "epoch": 74.85, + "learning_rate": 1.2590738841574887e-05, + "loss": 1.8917, + "step": 15107000 + }, + { + "epoch": 74.85, + "learning_rate": 1.2589500255148803e-05, + "loss": 1.9114, + "step": 15107500 + }, + { + "epoch": 74.85, + "learning_rate": 1.258826166872272e-05, + "loss": 1.8808, + "step": 15108000 + }, + { + "epoch": 74.85, + "learning_rate": 1.2587023082296636e-05, + "loss": 1.8811, + "step": 15108500 + }, + { + "epoch": 74.86, + "learning_rate": 1.2585784495870553e-05, + "loss": 1.8946, + "step": 15109000 + }, + { + "epoch": 74.86, + "learning_rate": 1.2584548386617322e-05, + "loss": 1.8907, + "step": 15109500 + }, + { + "epoch": 74.86, + "learning_rate": 1.258331227736409e-05, + "loss": 1.8863, + "step": 15110000 + }, + { + "epoch": 74.86, + "learning_rate": 1.2582073690938006e-05, + "loss": 1.8974, + "step": 15110500 + }, + { + "epoch": 74.87, + "learning_rate": 1.2580835104511923e-05, + "loss": 1.8909, + "step": 15111000 + }, + { + "epoch": 74.87, + "learning_rate": 1.257959651808584e-05, + "loss": 1.879, + "step": 15111500 + }, + { + "epoch": 74.87, + "learning_rate": 1.2578360408832607e-05, + "loss": 1.8996, + "step": 15112000 + }, + { + "epoch": 74.87, + "learning_rate": 1.2577121822406524e-05, + "loss": 1.9193, + "step": 15112500 + }, + { + "epoch": 74.88, + "learning_rate": 1.257588323598044e-05, + "loss": 1.8988, + "step": 15113000 + }, + { + "epoch": 74.88, + "learning_rate": 1.2574644649554356e-05, + "loss": 1.8891, + "step": 15113500 + }, + { + "epoch": 74.88, + "learning_rate": 1.2573406063128273e-05, + "loss": 1.8916, + "step": 15114000 + }, + { + "epoch": 74.88, + "learning_rate": 1.257216747670219e-05, + "loss": 1.873, + "step": 15114500 + }, + { + "epoch": 74.88, + "learning_rate": 1.2570928890276107e-05, + "loss": 1.912, + "step": 15115000 + }, + { + "epoch": 74.89, + "learning_rate": 1.2569690303850024e-05, + "loss": 1.9028, + "step": 15115500 + }, + { + "epoch": 74.89, + "learning_rate": 1.2568451717423941e-05, + "loss": 1.8922, + "step": 15116000 + }, + { + "epoch": 74.89, + "learning_rate": 1.2567213130997854e-05, + "loss": 1.8838, + "step": 15116500 + }, + { + "epoch": 74.89, + "learning_rate": 1.2565974544571771e-05, + "loss": 1.8795, + "step": 15117000 + }, + { + "epoch": 74.9, + "learning_rate": 1.2564735958145687e-05, + "loss": 1.8927, + "step": 15117500 + }, + { + "epoch": 74.9, + "learning_rate": 1.256350232606531e-05, + "loss": 1.8834, + "step": 15118000 + }, + { + "epoch": 74.9, + "learning_rate": 1.2562263739639224e-05, + "loss": 1.8828, + "step": 15118500 + }, + { + "epoch": 74.9, + "learning_rate": 1.2561025153213141e-05, + "loss": 1.9193, + "step": 15119000 + }, + { + "epoch": 74.91, + "learning_rate": 1.2559786566787058e-05, + "loss": 1.9087, + "step": 15119500 + }, + { + "epoch": 74.91, + "learning_rate": 1.2558550457533827e-05, + "loss": 1.8851, + "step": 15120000 + }, + { + "epoch": 74.91, + "learning_rate": 1.2557311871107744e-05, + "loss": 1.8871, + "step": 15120500 + }, + { + "epoch": 74.91, + "learning_rate": 1.255607328468166e-05, + "loss": 1.9038, + "step": 15121000 + }, + { + "epoch": 74.92, + "learning_rate": 1.2554834698255574e-05, + "loss": 1.9201, + "step": 15121500 + }, + { + "epoch": 74.92, + "learning_rate": 1.2553596111829491e-05, + "loss": 1.8995, + "step": 15122000 + }, + { + "epoch": 74.92, + "learning_rate": 1.255236000257626e-05, + "loss": 1.8981, + "step": 15122500 + }, + { + "epoch": 74.92, + "learning_rate": 1.2551121416150177e-05, + "loss": 1.8614, + "step": 15123000 + }, + { + "epoch": 74.93, + "learning_rate": 1.2549882829724094e-05, + "loss": 1.8844, + "step": 15123500 + }, + { + "epoch": 74.93, + "learning_rate": 1.2548644243298011e-05, + "loss": 1.908, + "step": 15124000 + }, + { + "epoch": 74.93, + "learning_rate": 1.2547405656871924e-05, + "loss": 1.8913, + "step": 15124500 + }, + { + "epoch": 74.93, + "learning_rate": 1.2546167070445841e-05, + "loss": 1.8878, + "step": 15125000 + }, + { + "epoch": 74.94, + "learning_rate": 1.2544928484019758e-05, + "loss": 1.899, + "step": 15125500 + }, + { + "epoch": 74.94, + "learning_rate": 1.2543689897593675e-05, + "loss": 1.918, + "step": 15126000 + }, + { + "epoch": 74.94, + "learning_rate": 1.2542456265513294e-05, + "loss": 1.9051, + "step": 15126500 + }, + { + "epoch": 74.94, + "learning_rate": 1.2541217679087211e-05, + "loss": 1.864, + "step": 15127000 + }, + { + "epoch": 74.95, + "learning_rate": 1.2539979092661128e-05, + "loss": 1.8912, + "step": 15127500 + }, + { + "epoch": 74.95, + "learning_rate": 1.2538740506235045e-05, + "loss": 1.8882, + "step": 15128000 + }, + { + "epoch": 74.95, + "learning_rate": 1.2537501919808962e-05, + "loss": 1.916, + "step": 15128500 + }, + { + "epoch": 74.95, + "learning_rate": 1.2536263333382877e-05, + "loss": 1.9184, + "step": 15129000 + }, + { + "epoch": 74.96, + "learning_rate": 1.2535024746956794e-05, + "loss": 1.9103, + "step": 15129500 + }, + { + "epoch": 74.96, + "learning_rate": 1.2533786160530711e-05, + "loss": 1.9013, + "step": 15130000 + }, + { + "epoch": 74.96, + "learning_rate": 1.2532547574104628e-05, + "loss": 1.8991, + "step": 15130500 + }, + { + "epoch": 74.96, + "learning_rate": 1.2531308987678541e-05, + "loss": 1.902, + "step": 15131000 + }, + { + "epoch": 74.97, + "learning_rate": 1.2530070401252458e-05, + "loss": 1.9016, + "step": 15131500 + }, + { + "epoch": 74.97, + "learning_rate": 1.2528834291999227e-05, + "loss": 1.9117, + "step": 15132000 + }, + { + "epoch": 74.97, + "learning_rate": 1.2527595705573144e-05, + "loss": 1.8774, + "step": 15132500 + }, + { + "epoch": 74.97, + "learning_rate": 1.2526357119147061e-05, + "loss": 1.9021, + "step": 15133000 + }, + { + "epoch": 74.98, + "learning_rate": 1.2525118532720978e-05, + "loss": 1.9006, + "step": 15133500 + }, + { + "epoch": 74.98, + "learning_rate": 1.2523879946294892e-05, + "loss": 1.9208, + "step": 15134000 + }, + { + "epoch": 74.98, + "learning_rate": 1.2522641359868808e-05, + "loss": 1.8938, + "step": 15134500 + }, + { + "epoch": 74.98, + "learning_rate": 1.2521402773442725e-05, + "loss": 1.8997, + "step": 15135000 + }, + { + "epoch": 74.99, + "learning_rate": 1.2520164187016642e-05, + "loss": 1.9033, + "step": 15135500 + }, + { + "epoch": 74.99, + "learning_rate": 1.2518930554936261e-05, + "loss": 1.8967, + "step": 15136000 + }, + { + "epoch": 74.99, + "learning_rate": 1.2517691968510178e-05, + "loss": 1.8856, + "step": 15136500 + }, + { + "epoch": 74.99, + "learning_rate": 1.2516453382084095e-05, + "loss": 1.9101, + "step": 15137000 + }, + { + "epoch": 75.0, + "learning_rate": 1.2515214795658012e-05, + "loss": 1.8854, + "step": 15137500 + }, + { + "epoch": 75.0, + "learning_rate": 1.2513976209231929e-05, + "loss": 1.9014, + "step": 15138000 + }, + { + "epoch": 75.0, + "eval_accuracy": 0.6787383353736862, + "eval_accuracy_mlm": 0.6389567910002085, + "eval_accuracy_nsp": 0.8665903145211583, + "eval_loss": 2.280489444732666, + "eval_runtime": 146.8044, + "eval_samples_per_second": 1736.725, + "eval_steps_per_second": 72.368, + "step": 15138225 + }, + { + "epoch": 75.0, + "learning_rate": 1.2512740099978698e-05, + "loss": 1.8729, + "step": 15138500 + }, + { + "epoch": 75.0, + "learning_rate": 1.2511501513552611e-05, + "loss": 1.8938, + "step": 15139000 + }, + { + "epoch": 75.01, + "learning_rate": 1.2510262927126528e-05, + "loss": 1.9064, + "step": 15139500 + }, + { + "epoch": 75.01, + "learning_rate": 1.2509024340700445e-05, + "loss": 1.8803, + "step": 15140000 + }, + { + "epoch": 75.01, + "learning_rate": 1.2507785754274362e-05, + "loss": 1.9021, + "step": 15140500 + }, + { + "epoch": 75.01, + "learning_rate": 1.2506547167848279e-05, + "loss": 1.899, + "step": 15141000 + }, + { + "epoch": 75.02, + "learning_rate": 1.2505308581422196e-05, + "loss": 1.8652, + "step": 15141500 + }, + { + "epoch": 75.02, + "learning_rate": 1.2504069994996113e-05, + "loss": 1.8718, + "step": 15142000 + }, + { + "epoch": 75.02, + "learning_rate": 1.2502833885742878e-05, + "loss": 1.9073, + "step": 15142500 + }, + { + "epoch": 75.02, + "learning_rate": 1.2501597776489649e-05, + "loss": 1.881, + "step": 15143000 + }, + { + "epoch": 75.03, + "learning_rate": 1.2500359190063566e-05, + "loss": 1.8619, + "step": 15143500 + }, + { + "epoch": 75.03, + "learning_rate": 1.2499120603637481e-05, + "loss": 1.8742, + "step": 15144000 + }, + { + "epoch": 75.03, + "learning_rate": 1.2497882017211398e-05, + "loss": 1.8812, + "step": 15144500 + }, + { + "epoch": 75.03, + "learning_rate": 1.2496643430785313e-05, + "loss": 1.8619, + "step": 15145000 + }, + { + "epoch": 75.04, + "learning_rate": 1.2495407321532082e-05, + "loss": 1.882, + "step": 15145500 + }, + { + "epoch": 75.04, + "learning_rate": 1.2494168735105999e-05, + "loss": 1.8891, + "step": 15146000 + }, + { + "epoch": 75.04, + "learning_rate": 1.2492930148679916e-05, + "loss": 1.8919, + "step": 15146500 + }, + { + "epoch": 75.04, + "learning_rate": 1.2491694039426683e-05, + "loss": 1.8842, + "step": 15147000 + }, + { + "epoch": 75.05, + "learning_rate": 1.2490457930173452e-05, + "loss": 1.8924, + "step": 15147500 + }, + { + "epoch": 75.05, + "learning_rate": 1.2489219343747369e-05, + "loss": 1.8966, + "step": 15148000 + }, + { + "epoch": 75.05, + "learning_rate": 1.2487980757321286e-05, + "loss": 1.8882, + "step": 15148500 + }, + { + "epoch": 75.05, + "learning_rate": 1.2486742170895201e-05, + "loss": 1.8634, + "step": 15149000 + }, + { + "epoch": 75.06, + "learning_rate": 1.2485503584469118e-05, + "loss": 1.9051, + "step": 15149500 + }, + { + "epoch": 75.06, + "learning_rate": 1.2484264998043035e-05, + "loss": 1.8926, + "step": 15150000 + }, + { + "epoch": 75.06, + "learning_rate": 1.2483028888789802e-05, + "loss": 1.8806, + "step": 15150500 + }, + { + "epoch": 75.06, + "learning_rate": 1.2481790302363719e-05, + "loss": 1.8722, + "step": 15151000 + }, + { + "epoch": 75.07, + "learning_rate": 1.2480551715937636e-05, + "loss": 1.8816, + "step": 15151500 + }, + { + "epoch": 75.07, + "learning_rate": 1.2479313129511553e-05, + "loss": 1.8801, + "step": 15152000 + }, + { + "epoch": 75.07, + "learning_rate": 1.2478074543085468e-05, + "loss": 1.8945, + "step": 15152500 + }, + { + "epoch": 75.07, + "learning_rate": 1.2476835956659385e-05, + "loss": 1.8691, + "step": 15153000 + }, + { + "epoch": 75.08, + "learning_rate": 1.2475599847406152e-05, + "loss": 1.9033, + "step": 15153500 + }, + { + "epoch": 75.08, + "learning_rate": 1.2474361260980069e-05, + "loss": 1.8783, + "step": 15154000 + }, + { + "epoch": 75.08, + "learning_rate": 1.2473122674553986e-05, + "loss": 1.8804, + "step": 15154500 + }, + { + "epoch": 75.08, + "learning_rate": 1.2471884088127903e-05, + "loss": 1.8803, + "step": 15155000 + }, + { + "epoch": 75.09, + "learning_rate": 1.2470645501701818e-05, + "loss": 1.8872, + "step": 15155500 + }, + { + "epoch": 75.09, + "learning_rate": 1.2469406915275735e-05, + "loss": 1.8809, + "step": 15156000 + }, + { + "epoch": 75.09, + "learning_rate": 1.2468168328849652e-05, + "loss": 1.8738, + "step": 15156500 + }, + { + "epoch": 75.09, + "learning_rate": 1.2466929742423569e-05, + "loss": 1.8961, + "step": 15157000 + }, + { + "epoch": 75.1, + "learning_rate": 1.2465691155997484e-05, + "loss": 1.9079, + "step": 15157500 + }, + { + "epoch": 75.1, + "learning_rate": 1.24644525695714e-05, + "loss": 1.8755, + "step": 15158000 + }, + { + "epoch": 75.1, + "learning_rate": 1.2463213983145316e-05, + "loss": 1.887, + "step": 15158500 + }, + { + "epoch": 75.1, + "learning_rate": 1.2461975396719233e-05, + "loss": 1.8784, + "step": 15159000 + }, + { + "epoch": 75.11, + "learning_rate": 1.246073681029315e-05, + "loss": 1.9027, + "step": 15159500 + }, + { + "epoch": 75.11, + "learning_rate": 1.2459498223867065e-05, + "loss": 1.883, + "step": 15160000 + }, + { + "epoch": 75.11, + "learning_rate": 1.2458259637440982e-05, + "loss": 1.8628, + "step": 15160500 + }, + { + "epoch": 75.11, + "learning_rate": 1.2457021051014899e-05, + "loss": 1.8761, + "step": 15161000 + }, + { + "epoch": 75.12, + "learning_rate": 1.2455782464588816e-05, + "loss": 1.9079, + "step": 15161500 + }, + { + "epoch": 75.12, + "learning_rate": 1.2454543878162731e-05, + "loss": 1.8917, + "step": 15162000 + }, + { + "epoch": 75.12, + "learning_rate": 1.2453305291736646e-05, + "loss": 1.8758, + "step": 15162500 + }, + { + "epoch": 75.12, + "learning_rate": 1.2452066705310563e-05, + "loss": 1.8904, + "step": 15163000 + }, + { + "epoch": 75.13, + "learning_rate": 1.2450830596057332e-05, + "loss": 1.8767, + "step": 15163500 + }, + { + "epoch": 75.13, + "learning_rate": 1.2449594486804101e-05, + "loss": 1.9026, + "step": 15164000 + }, + { + "epoch": 75.13, + "learning_rate": 1.2448355900378016e-05, + "loss": 1.8942, + "step": 15164500 + }, + { + "epoch": 75.13, + "learning_rate": 1.2447117313951933e-05, + "loss": 1.901, + "step": 15165000 + }, + { + "epoch": 75.14, + "learning_rate": 1.244587872752585e-05, + "loss": 1.9102, + "step": 15165500 + }, + { + "epoch": 75.14, + "learning_rate": 1.2444640141099765e-05, + "loss": 1.8804, + "step": 15166000 + }, + { + "epoch": 75.14, + "learning_rate": 1.2443404031846536e-05, + "loss": 1.9057, + "step": 15166500 + }, + { + "epoch": 75.14, + "learning_rate": 1.2442165445420451e-05, + "loss": 1.8992, + "step": 15167000 + }, + { + "epoch": 75.15, + "learning_rate": 1.2440926858994368e-05, + "loss": 1.9119, + "step": 15167500 + }, + { + "epoch": 75.15, + "learning_rate": 1.2439688272568283e-05, + "loss": 1.9122, + "step": 15168000 + }, + { + "epoch": 75.15, + "learning_rate": 1.24384496861422e-05, + "loss": 1.8681, + "step": 15168500 + }, + { + "epoch": 75.15, + "learning_rate": 1.2437211099716115e-05, + "loss": 1.8665, + "step": 15169000 + }, + { + "epoch": 75.15, + "learning_rate": 1.2435974990462886e-05, + "loss": 1.9, + "step": 15169500 + }, + { + "epoch": 75.16, + "learning_rate": 1.2434738881209653e-05, + "loss": 1.8815, + "step": 15170000 + }, + { + "epoch": 75.16, + "learning_rate": 1.243350029478357e-05, + "loss": 1.8695, + "step": 15170500 + }, + { + "epoch": 75.16, + "learning_rate": 1.2432261708357485e-05, + "loss": 1.8855, + "step": 15171000 + }, + { + "epoch": 75.16, + "learning_rate": 1.2431023121931402e-05, + "loss": 1.8741, + "step": 15171500 + }, + { + "epoch": 75.17, + "learning_rate": 1.2429787012678171e-05, + "loss": 1.8813, + "step": 15172000 + }, + { + "epoch": 75.17, + "learning_rate": 1.2428548426252088e-05, + "loss": 1.8956, + "step": 15172500 + }, + { + "epoch": 75.17, + "learning_rate": 1.2427309839826005e-05, + "loss": 1.8811, + "step": 15173000 + }, + { + "epoch": 75.17, + "learning_rate": 1.242607125339992e-05, + "loss": 1.8922, + "step": 15173500 + }, + { + "epoch": 75.18, + "learning_rate": 1.2424832666973835e-05, + "loss": 1.901, + "step": 15174000 + }, + { + "epoch": 75.18, + "learning_rate": 1.2423594080547752e-05, + "loss": 1.8789, + "step": 15174500 + }, + { + "epoch": 75.18, + "learning_rate": 1.2422355494121669e-05, + "loss": 1.8856, + "step": 15175000 + }, + { + "epoch": 75.18, + "learning_rate": 1.2421116907695586e-05, + "loss": 1.8921, + "step": 15175500 + }, + { + "epoch": 75.19, + "learning_rate": 1.2419878321269503e-05, + "loss": 1.8897, + "step": 15176000 + }, + { + "epoch": 75.19, + "learning_rate": 1.2418642212016272e-05, + "loss": 1.8823, + "step": 15176500 + }, + { + "epoch": 75.19, + "learning_rate": 1.2417403625590187e-05, + "loss": 1.8921, + "step": 15177000 + }, + { + "epoch": 75.19, + "learning_rate": 1.2416165039164102e-05, + "loss": 1.8867, + "step": 15177500 + }, + { + "epoch": 75.2, + "learning_rate": 1.2414928929910873e-05, + "loss": 1.8846, + "step": 15178000 + }, + { + "epoch": 75.2, + "learning_rate": 1.2413690343484788e-05, + "loss": 1.9016, + "step": 15178500 + }, + { + "epoch": 75.2, + "learning_rate": 1.2412451757058705e-05, + "loss": 1.8915, + "step": 15179000 + }, + { + "epoch": 75.2, + "learning_rate": 1.2411213170632622e-05, + "loss": 1.9042, + "step": 15179500 + }, + { + "epoch": 75.21, + "learning_rate": 1.2409974584206539e-05, + "loss": 1.9012, + "step": 15180000 + }, + { + "epoch": 75.21, + "learning_rate": 1.2408735997780454e-05, + "loss": 1.8953, + "step": 15180500 + }, + { + "epoch": 75.21, + "learning_rate": 1.240749741135437e-05, + "loss": 1.898, + "step": 15181000 + }, + { + "epoch": 75.21, + "learning_rate": 1.2406258824928286e-05, + "loss": 1.8585, + "step": 15181500 + }, + { + "epoch": 75.22, + "learning_rate": 1.2405020238502203e-05, + "loss": 1.8627, + "step": 15182000 + }, + { + "epoch": 75.22, + "learning_rate": 1.2403784129248972e-05, + "loss": 1.8721, + "step": 15182500 + }, + { + "epoch": 75.22, + "learning_rate": 1.2402548019995739e-05, + "loss": 1.8883, + "step": 15183000 + }, + { + "epoch": 75.22, + "learning_rate": 1.2401309433569656e-05, + "loss": 1.9086, + "step": 15183500 + }, + { + "epoch": 75.23, + "learning_rate": 1.2400070847143573e-05, + "loss": 1.8838, + "step": 15184000 + }, + { + "epoch": 75.23, + "learning_rate": 1.2398832260717488e-05, + "loss": 1.8761, + "step": 15184500 + }, + { + "epoch": 75.23, + "learning_rate": 1.2397593674291405e-05, + "loss": 1.8548, + "step": 15185000 + }, + { + "epoch": 75.23, + "learning_rate": 1.2396357565038174e-05, + "loss": 1.9018, + "step": 15185500 + }, + { + "epoch": 75.24, + "learning_rate": 1.239511897861209e-05, + "loss": 1.9073, + "step": 15186000 + }, + { + "epoch": 75.24, + "learning_rate": 1.2393882869358858e-05, + "loss": 1.8747, + "step": 15186500 + }, + { + "epoch": 75.24, + "learning_rate": 1.2392644282932775e-05, + "loss": 1.8984, + "step": 15187000 + }, + { + "epoch": 75.24, + "learning_rate": 1.2391405696506692e-05, + "loss": 1.8921, + "step": 15187500 + }, + { + "epoch": 75.25, + "learning_rate": 1.2390167110080609e-05, + "loss": 1.8821, + "step": 15188000 + }, + { + "epoch": 75.25, + "learning_rate": 1.2388928523654524e-05, + "loss": 1.88, + "step": 15188500 + }, + { + "epoch": 75.25, + "learning_rate": 1.238768993722844e-05, + "loss": 1.888, + "step": 15189000 + }, + { + "epoch": 75.25, + "learning_rate": 1.2386451350802358e-05, + "loss": 1.905, + "step": 15189500 + }, + { + "epoch": 75.26, + "learning_rate": 1.2385212764376273e-05, + "loss": 1.859, + "step": 15190000 + }, + { + "epoch": 75.26, + "learning_rate": 1.238397417795019e-05, + "loss": 1.897, + "step": 15190500 + }, + { + "epoch": 75.26, + "learning_rate": 1.2382735591524105e-05, + "loss": 1.895, + "step": 15191000 + }, + { + "epoch": 75.26, + "learning_rate": 1.2381497005098022e-05, + "loss": 1.9096, + "step": 15191500 + }, + { + "epoch": 75.27, + "learning_rate": 1.2380258418671939e-05, + "loss": 1.9024, + "step": 15192000 + }, + { + "epoch": 75.27, + "learning_rate": 1.2379019832245856e-05, + "loss": 1.8729, + "step": 15192500 + }, + { + "epoch": 75.27, + "learning_rate": 1.2377783722992623e-05, + "loss": 1.8892, + "step": 15193000 + }, + { + "epoch": 75.27, + "learning_rate": 1.237654513656654e-05, + "loss": 1.8705, + "step": 15193500 + }, + { + "epoch": 75.28, + "learning_rate": 1.2375306550140455e-05, + "loss": 1.9112, + "step": 15194000 + }, + { + "epoch": 75.28, + "learning_rate": 1.2374067963714372e-05, + "loss": 1.8826, + "step": 15194500 + }, + { + "epoch": 75.28, + "learning_rate": 1.2372831854461141e-05, + "loss": 1.8834, + "step": 15195000 + }, + { + "epoch": 75.28, + "learning_rate": 1.2371593268035058e-05, + "loss": 1.8808, + "step": 15195500 + }, + { + "epoch": 75.29, + "learning_rate": 1.2370354681608975e-05, + "loss": 1.8758, + "step": 15196000 + }, + { + "epoch": 75.29, + "learning_rate": 1.236911609518289e-05, + "loss": 1.8569, + "step": 15196500 + }, + { + "epoch": 75.29, + "learning_rate": 1.2367877508756805e-05, + "loss": 1.9037, + "step": 15197000 + }, + { + "epoch": 75.29, + "learning_rate": 1.2366641399503576e-05, + "loss": 1.8866, + "step": 15197500 + }, + { + "epoch": 75.3, + "learning_rate": 1.2365407767423195e-05, + "loss": 1.9038, + "step": 15198000 + }, + { + "epoch": 75.3, + "learning_rate": 1.2364169180997112e-05, + "loss": 1.88, + "step": 15198500 + }, + { + "epoch": 75.3, + "learning_rate": 1.2362930594571029e-05, + "loss": 1.8747, + "step": 15199000 + }, + { + "epoch": 75.3, + "learning_rate": 1.2361694485317797e-05, + "loss": 1.9122, + "step": 15199500 + }, + { + "epoch": 75.31, + "learning_rate": 1.2360458376064565e-05, + "loss": 1.8969, + "step": 15200000 + }, + { + "epoch": 75.31, + "learning_rate": 1.2359219789638481e-05, + "loss": 1.8956, + "step": 15200500 + }, + { + "epoch": 75.31, + "learning_rate": 1.2357981203212398e-05, + "loss": 1.9032, + "step": 15201000 + }, + { + "epoch": 75.31, + "learning_rate": 1.2356742616786315e-05, + "loss": 1.9012, + "step": 15201500 + }, + { + "epoch": 75.32, + "learning_rate": 1.2355506507533084e-05, + "loss": 1.9021, + "step": 15202000 + }, + { + "epoch": 75.32, + "learning_rate": 1.2354267921107e-05, + "loss": 1.8858, + "step": 15202500 + }, + { + "epoch": 75.32, + "learning_rate": 1.2353029334680916e-05, + "loss": 1.9033, + "step": 15203000 + }, + { + "epoch": 75.32, + "learning_rate": 1.2351790748254832e-05, + "loss": 1.8716, + "step": 15203500 + }, + { + "epoch": 75.33, + "learning_rate": 1.2350552161828748e-05, + "loss": 1.903, + "step": 15204000 + }, + { + "epoch": 75.33, + "learning_rate": 1.2349313575402665e-05, + "loss": 1.8747, + "step": 15204500 + }, + { + "epoch": 75.33, + "learning_rate": 1.234807498897658e-05, + "loss": 1.8843, + "step": 15205000 + }, + { + "epoch": 75.33, + "learning_rate": 1.2346838879723351e-05, + "loss": 1.9003, + "step": 15205500 + }, + { + "epoch": 75.34, + "learning_rate": 1.2345600293297266e-05, + "loss": 1.8647, + "step": 15206000 + }, + { + "epoch": 75.34, + "learning_rate": 1.2344361706871183e-05, + "loss": 1.9028, + "step": 15206500 + }, + { + "epoch": 75.34, + "learning_rate": 1.2343123120445098e-05, + "loss": 1.8957, + "step": 15207000 + }, + { + "epoch": 75.34, + "learning_rate": 1.2341884534019015e-05, + "loss": 1.9114, + "step": 15207500 + }, + { + "epoch": 75.35, + "learning_rate": 1.234064594759293e-05, + "loss": 1.8761, + "step": 15208000 + }, + { + "epoch": 75.35, + "learning_rate": 1.2339407361166848e-05, + "loss": 1.8892, + "step": 15208500 + }, + { + "epoch": 75.35, + "learning_rate": 1.2338168774740765e-05, + "loss": 1.887, + "step": 15209000 + }, + { + "epoch": 75.35, + "learning_rate": 1.2336930188314681e-05, + "loss": 1.8954, + "step": 15209500 + }, + { + "epoch": 75.36, + "learning_rate": 1.2335691601888597e-05, + "loss": 1.8968, + "step": 15210000 + }, + { + "epoch": 75.36, + "learning_rate": 1.2334453015462514e-05, + "loss": 1.8785, + "step": 15210500 + }, + { + "epoch": 75.36, + "learning_rate": 1.233321442903643e-05, + "loss": 1.8776, + "step": 15211000 + }, + { + "epoch": 75.36, + "learning_rate": 1.2331975842610346e-05, + "loss": 1.8974, + "step": 15211500 + }, + { + "epoch": 75.37, + "learning_rate": 1.2330737256184263e-05, + "loss": 1.8917, + "step": 15212000 + }, + { + "epoch": 75.37, + "learning_rate": 1.2329498669758178e-05, + "loss": 1.8752, + "step": 15212500 + }, + { + "epoch": 75.37, + "learning_rate": 1.2328260083332095e-05, + "loss": 1.8689, + "step": 15213000 + }, + { + "epoch": 75.37, + "learning_rate": 1.2327021496906012e-05, + "loss": 1.8761, + "step": 15213500 + }, + { + "epoch": 75.38, + "learning_rate": 1.2325782910479929e-05, + "loss": 1.883, + "step": 15214000 + }, + { + "epoch": 75.38, + "learning_rate": 1.2324544324053844e-05, + "loss": 1.8828, + "step": 15214500 + }, + { + "epoch": 75.38, + "learning_rate": 1.2323305737627761e-05, + "loss": 1.9029, + "step": 15215000 + }, + { + "epoch": 75.38, + "learning_rate": 1.2322069628374528e-05, + "loss": 1.8781, + "step": 15215500 + }, + { + "epoch": 75.39, + "learning_rate": 1.2320831041948445e-05, + "loss": 1.8845, + "step": 15216000 + }, + { + "epoch": 75.39, + "learning_rate": 1.2319592455522362e-05, + "loss": 1.8722, + "step": 15216500 + }, + { + "epoch": 75.39, + "learning_rate": 1.2318353869096279e-05, + "loss": 1.9123, + "step": 15217000 + }, + { + "epoch": 75.39, + "learning_rate": 1.2317115282670194e-05, + "loss": 1.8936, + "step": 15217500 + }, + { + "epoch": 75.4, + "learning_rate": 1.2315876696244111e-05, + "loss": 1.8919, + "step": 15218000 + }, + { + "epoch": 75.4, + "learning_rate": 1.2314638109818028e-05, + "loss": 1.8709, + "step": 15218500 + }, + { + "epoch": 75.4, + "learning_rate": 1.2313399523391945e-05, + "loss": 1.8763, + "step": 15219000 + }, + { + "epoch": 75.4, + "learning_rate": 1.231216093696586e-05, + "loss": 1.8938, + "step": 15219500 + }, + { + "epoch": 75.41, + "learning_rate": 1.2310922350539775e-05, + "loss": 1.8683, + "step": 15220000 + }, + { + "epoch": 75.41, + "learning_rate": 1.2309683764113692e-05, + "loss": 1.8873, + "step": 15220500 + }, + { + "epoch": 75.41, + "learning_rate": 1.2308447654860461e-05, + "loss": 1.8963, + "step": 15221000 + }, + { + "epoch": 75.41, + "learning_rate": 1.2307209068434378e-05, + "loss": 1.876, + "step": 15221500 + }, + { + "epoch": 75.42, + "learning_rate": 1.2305970482008295e-05, + "loss": 1.911, + "step": 15222000 + }, + { + "epoch": 75.42, + "learning_rate": 1.2304734372755062e-05, + "loss": 1.866, + "step": 15222500 + }, + { + "epoch": 75.42, + "learning_rate": 1.2303495786328979e-05, + "loss": 1.9039, + "step": 15223000 + }, + { + "epoch": 75.42, + "learning_rate": 1.2302259677075748e-05, + "loss": 1.8906, + "step": 15223500 + }, + { + "epoch": 75.42, + "learning_rate": 1.2301021090649665e-05, + "loss": 1.8856, + "step": 15224000 + }, + { + "epoch": 75.43, + "learning_rate": 1.2299782504223582e-05, + "loss": 1.89, + "step": 15224500 + }, + { + "epoch": 75.43, + "learning_rate": 1.2298543917797497e-05, + "loss": 1.8773, + "step": 15225000 + }, + { + "epoch": 75.43, + "learning_rate": 1.2297305331371414e-05, + "loss": 1.9063, + "step": 15225500 + }, + { + "epoch": 75.43, + "learning_rate": 1.2296066744945329e-05, + "loss": 1.8803, + "step": 15226000 + }, + { + "epoch": 75.44, + "learning_rate": 1.2294828158519246e-05, + "loss": 1.8947, + "step": 15226500 + }, + { + "epoch": 75.44, + "learning_rate": 1.2293592049266015e-05, + "loss": 1.8747, + "step": 15227000 + }, + { + "epoch": 75.44, + "learning_rate": 1.2292353462839932e-05, + "loss": 1.9022, + "step": 15227500 + }, + { + "epoch": 75.44, + "learning_rate": 1.2291114876413847e-05, + "loss": 1.8702, + "step": 15228000 + }, + { + "epoch": 75.45, + "learning_rate": 1.2289878767160616e-05, + "loss": 1.8991, + "step": 15228500 + }, + { + "epoch": 75.45, + "learning_rate": 1.2288640180734531e-05, + "loss": 1.883, + "step": 15229000 + }, + { + "epoch": 75.45, + "learning_rate": 1.2287401594308448e-05, + "loss": 1.8856, + "step": 15229500 + }, + { + "epoch": 75.45, + "learning_rate": 1.2286163007882365e-05, + "loss": 1.8837, + "step": 15230000 + }, + { + "epoch": 75.46, + "learning_rate": 1.2284924421456282e-05, + "loss": 1.8838, + "step": 15230500 + }, + { + "epoch": 75.46, + "learning_rate": 1.2283685835030197e-05, + "loss": 1.9002, + "step": 15231000 + }, + { + "epoch": 75.46, + "learning_rate": 1.2282447248604114e-05, + "loss": 1.8896, + "step": 15231500 + }, + { + "epoch": 75.46, + "learning_rate": 1.228120866217803e-05, + "loss": 1.9029, + "step": 15232000 + }, + { + "epoch": 75.47, + "learning_rate": 1.2279970075751946e-05, + "loss": 1.8723, + "step": 15232500 + }, + { + "epoch": 75.47, + "learning_rate": 1.2278731489325863e-05, + "loss": 1.8899, + "step": 15233000 + }, + { + "epoch": 75.47, + "learning_rate": 1.2277492902899778e-05, + "loss": 1.8829, + "step": 15233500 + }, + { + "epoch": 75.47, + "learning_rate": 1.2276254316473695e-05, + "loss": 1.8922, + "step": 15234000 + }, + { + "epoch": 75.48, + "learning_rate": 1.2275015730047612e-05, + "loss": 1.887, + "step": 15234500 + }, + { + "epoch": 75.48, + "learning_rate": 1.2273777143621529e-05, + "loss": 1.8769, + "step": 15235000 + }, + { + "epoch": 75.48, + "learning_rate": 1.2272538557195444e-05, + "loss": 1.9016, + "step": 15235500 + }, + { + "epoch": 75.48, + "learning_rate": 1.2271299970769361e-05, + "loss": 1.8839, + "step": 15236000 + }, + { + "epoch": 75.49, + "learning_rate": 1.2270066338688982e-05, + "loss": 1.893, + "step": 15236500 + }, + { + "epoch": 75.49, + "learning_rate": 1.2268827752262899e-05, + "loss": 1.9014, + "step": 15237000 + }, + { + "epoch": 75.49, + "learning_rate": 1.2267589165836814e-05, + "loss": 1.9072, + "step": 15237500 + }, + { + "epoch": 75.49, + "learning_rate": 1.2266350579410731e-05, + "loss": 1.88, + "step": 15238000 + }, + { + "epoch": 75.5, + "learning_rate": 1.2265111992984648e-05, + "loss": 1.8955, + "step": 15238500 + }, + { + "epoch": 75.5, + "learning_rate": 1.2263873406558565e-05, + "loss": 1.8753, + "step": 15239000 + }, + { + "epoch": 75.5, + "learning_rate": 1.226263482013248e-05, + "loss": 1.8998, + "step": 15239500 + }, + { + "epoch": 75.5, + "learning_rate": 1.2261396233706395e-05, + "loss": 1.8691, + "step": 15240000 + }, + { + "epoch": 75.51, + "learning_rate": 1.2260160124453164e-05, + "loss": 1.9096, + "step": 15240500 + }, + { + "epoch": 75.51, + "learning_rate": 1.2258921538027081e-05, + "loss": 1.9023, + "step": 15241000 + }, + { + "epoch": 75.51, + "learning_rate": 1.2257682951600998e-05, + "loss": 1.8804, + "step": 15241500 + }, + { + "epoch": 75.51, + "learning_rate": 1.2256444365174915e-05, + "loss": 1.8878, + "step": 15242000 + }, + { + "epoch": 75.52, + "learning_rate": 1.2255208255921682e-05, + "loss": 1.9075, + "step": 15242500 + }, + { + "epoch": 75.52, + "learning_rate": 1.2253969669495599e-05, + "loss": 1.8794, + "step": 15243000 + }, + { + "epoch": 75.52, + "learning_rate": 1.2252731083069514e-05, + "loss": 1.884, + "step": 15243500 + }, + { + "epoch": 75.52, + "learning_rate": 1.2251492496643431e-05, + "loss": 1.8878, + "step": 15244000 + }, + { + "epoch": 75.53, + "learning_rate": 1.2250253910217348e-05, + "loss": 1.8702, + "step": 15244500 + }, + { + "epoch": 75.53, + "learning_rate": 1.2249017800964117e-05, + "loss": 1.9096, + "step": 15245000 + }, + { + "epoch": 75.53, + "learning_rate": 1.2247779214538032e-05, + "loss": 1.8812, + "step": 15245500 + }, + { + "epoch": 75.53, + "learning_rate": 1.2246540628111949e-05, + "loss": 1.9025, + "step": 15246000 + }, + { + "epoch": 75.54, + "learning_rate": 1.2245302041685866e-05, + "loss": 1.9022, + "step": 15246500 + }, + { + "epoch": 75.54, + "learning_rate": 1.2244063455259781e-05, + "loss": 1.8903, + "step": 15247000 + }, + { + "epoch": 75.54, + "learning_rate": 1.2242824868833698e-05, + "loss": 1.883, + "step": 15247500 + }, + { + "epoch": 75.54, + "learning_rate": 1.2241586282407615e-05, + "loss": 1.8644, + "step": 15248000 + }, + { + "epoch": 75.55, + "learning_rate": 1.2240347695981532e-05, + "loss": 1.8887, + "step": 15248500 + }, + { + "epoch": 75.55, + "learning_rate": 1.2239109109555447e-05, + "loss": 1.8882, + "step": 15249000 + }, + { + "epoch": 75.55, + "learning_rate": 1.2237870523129364e-05, + "loss": 1.9125, + "step": 15249500 + }, + { + "epoch": 75.55, + "learning_rate": 1.2236636891048985e-05, + "loss": 1.8907, + "step": 15250000 + }, + { + "epoch": 75.56, + "learning_rate": 1.2235398304622902e-05, + "loss": 1.899, + "step": 15250500 + }, + { + "epoch": 75.56, + "learning_rate": 1.2234159718196817e-05, + "loss": 1.8749, + "step": 15251000 + }, + { + "epoch": 75.56, + "learning_rate": 1.2232921131770734e-05, + "loss": 1.9026, + "step": 15251500 + }, + { + "epoch": 75.56, + "learning_rate": 1.2231685022517501e-05, + "loss": 1.9002, + "step": 15252000 + }, + { + "epoch": 75.57, + "learning_rate": 1.2230446436091418e-05, + "loss": 1.8937, + "step": 15252500 + }, + { + "epoch": 75.57, + "learning_rate": 1.2229207849665335e-05, + "loss": 1.8809, + "step": 15253000 + }, + { + "epoch": 75.57, + "learning_rate": 1.2227969263239252e-05, + "loss": 1.8784, + "step": 15253500 + }, + { + "epoch": 75.57, + "learning_rate": 1.2226730676813167e-05, + "loss": 1.9051, + "step": 15254000 + }, + { + "epoch": 75.58, + "learning_rate": 1.2225492090387084e-05, + "loss": 1.8822, + "step": 15254500 + }, + { + "epoch": 75.58, + "learning_rate": 1.2224253503961e-05, + "loss": 1.8945, + "step": 15255000 + }, + { + "epoch": 75.58, + "learning_rate": 1.2223017394707768e-05, + "loss": 1.8802, + "step": 15255500 + }, + { + "epoch": 75.58, + "learning_rate": 1.2221778808281685e-05, + "loss": 1.8987, + "step": 15256000 + }, + { + "epoch": 75.59, + "learning_rate": 1.2220540221855602e-05, + "loss": 1.8964, + "step": 15256500 + }, + { + "epoch": 75.59, + "learning_rate": 1.2219301635429517e-05, + "loss": 1.8794, + "step": 15257000 + }, + { + "epoch": 75.59, + "learning_rate": 1.2218063049003434e-05, + "loss": 1.8951, + "step": 15257500 + }, + { + "epoch": 75.59, + "learning_rate": 1.2216824462577351e-05, + "loss": 1.8966, + "step": 15258000 + }, + { + "epoch": 75.6, + "learning_rate": 1.2215585876151268e-05, + "loss": 1.9008, + "step": 15258500 + }, + { + "epoch": 75.6, + "learning_rate": 1.2214347289725183e-05, + "loss": 1.8893, + "step": 15259000 + }, + { + "epoch": 75.6, + "learning_rate": 1.2213108703299098e-05, + "loss": 1.9003, + "step": 15259500 + }, + { + "epoch": 75.6, + "learning_rate": 1.2211870116873015e-05, + "loss": 1.8819, + "step": 15260000 + }, + { + "epoch": 75.61, + "learning_rate": 1.2210636484792638e-05, + "loss": 1.8899, + "step": 15260500 + }, + { + "epoch": 75.61, + "learning_rate": 1.2209397898366553e-05, + "loss": 1.9013, + "step": 15261000 + }, + { + "epoch": 75.61, + "learning_rate": 1.2208159311940468e-05, + "loss": 1.8883, + "step": 15261500 + }, + { + "epoch": 75.61, + "learning_rate": 1.2206920725514385e-05, + "loss": 1.9007, + "step": 15262000 + }, + { + "epoch": 75.62, + "learning_rate": 1.2205682139088302e-05, + "loss": 1.9042, + "step": 15262500 + }, + { + "epoch": 75.62, + "learning_rate": 1.2204443552662219e-05, + "loss": 1.8675, + "step": 15263000 + }, + { + "epoch": 75.62, + "learning_rate": 1.2203204966236134e-05, + "loss": 1.8913, + "step": 15263500 + }, + { + "epoch": 75.62, + "learning_rate": 1.2201966379810051e-05, + "loss": 1.8735, + "step": 15264000 + }, + { + "epoch": 75.63, + "learning_rate": 1.2200727793383968e-05, + "loss": 1.9069, + "step": 15264500 + }, + { + "epoch": 75.63, + "learning_rate": 1.2199489206957885e-05, + "loss": 1.9057, + "step": 15265000 + }, + { + "epoch": 75.63, + "learning_rate": 1.21982506205318e-05, + "loss": 1.8724, + "step": 15265500 + }, + { + "epoch": 75.63, + "learning_rate": 1.2197014511278569e-05, + "loss": 1.8808, + "step": 15266000 + }, + { + "epoch": 75.64, + "learning_rate": 1.2195775924852484e-05, + "loss": 1.9049, + "step": 15266500 + }, + { + "epoch": 75.64, + "learning_rate": 1.2194537338426401e-05, + "loss": 1.8792, + "step": 15267000 + }, + { + "epoch": 75.64, + "learning_rate": 1.2193298752000318e-05, + "loss": 1.8795, + "step": 15267500 + }, + { + "epoch": 75.64, + "learning_rate": 1.2192062642747087e-05, + "loss": 1.8903, + "step": 15268000 + }, + { + "epoch": 75.65, + "learning_rate": 1.2190824056321002e-05, + "loss": 1.9035, + "step": 15268500 + }, + { + "epoch": 75.65, + "learning_rate": 1.218958794706777e-05, + "loss": 1.8962, + "step": 15269000 + }, + { + "epoch": 75.65, + "learning_rate": 1.2188349360641688e-05, + "loss": 1.8776, + "step": 15269500 + }, + { + "epoch": 75.65, + "learning_rate": 1.2187110774215605e-05, + "loss": 1.8737, + "step": 15270000 + }, + { + "epoch": 75.66, + "learning_rate": 1.218587218778952e-05, + "loss": 1.8824, + "step": 15270500 + }, + { + "epoch": 75.66, + "learning_rate": 1.2184633601363437e-05, + "loss": 1.8905, + "step": 15271000 + }, + { + "epoch": 75.66, + "learning_rate": 1.2183395014937354e-05, + "loss": 1.9001, + "step": 15271500 + }, + { + "epoch": 75.66, + "learning_rate": 1.2182156428511269e-05, + "loss": 1.901, + "step": 15272000 + }, + { + "epoch": 75.67, + "learning_rate": 1.2180920319258038e-05, + "loss": 1.9247, + "step": 15272500 + }, + { + "epoch": 75.67, + "learning_rate": 1.2179681732831955e-05, + "loss": 1.8782, + "step": 15273000 + }, + { + "epoch": 75.67, + "learning_rate": 1.217844314640587e-05, + "loss": 1.8718, + "step": 15273500 + }, + { + "epoch": 75.67, + "learning_rate": 1.2177204559979787e-05, + "loss": 1.8806, + "step": 15274000 + }, + { + "epoch": 75.68, + "learning_rate": 1.2175965973553704e-05, + "loss": 1.9052, + "step": 15274500 + }, + { + "epoch": 75.68, + "learning_rate": 1.2174727387127619e-05, + "loss": 1.9043, + "step": 15275000 + }, + { + "epoch": 75.68, + "learning_rate": 1.2173488800701536e-05, + "loss": 1.8847, + "step": 15275500 + }, + { + "epoch": 75.68, + "learning_rate": 1.2172250214275451e-05, + "loss": 1.8917, + "step": 15276000 + }, + { + "epoch": 75.69, + "learning_rate": 1.2171011627849368e-05, + "loss": 1.9003, + "step": 15276500 + }, + { + "epoch": 75.69, + "learning_rate": 1.2169773041423285e-05, + "loss": 1.8879, + "step": 15277000 + }, + { + "epoch": 75.69, + "learning_rate": 1.2168534454997202e-05, + "loss": 1.894, + "step": 15277500 + }, + { + "epoch": 75.69, + "learning_rate": 1.2167295868571117e-05, + "loss": 1.8867, + "step": 15278000 + }, + { + "epoch": 75.69, + "learning_rate": 1.2166059759317886e-05, + "loss": 1.8951, + "step": 15278500 + }, + { + "epoch": 75.7, + "learning_rate": 1.2164821172891801e-05, + "loss": 1.9017, + "step": 15279000 + }, + { + "epoch": 75.7, + "learning_rate": 1.2163582586465718e-05, + "loss": 1.8796, + "step": 15279500 + }, + { + "epoch": 75.7, + "learning_rate": 1.2162344000039635e-05, + "loss": 1.8936, + "step": 15280000 + }, + { + "epoch": 75.7, + "learning_rate": 1.2161105413613552e-05, + "loss": 1.8888, + "step": 15280500 + }, + { + "epoch": 75.71, + "learning_rate": 1.2159866827187467e-05, + "loss": 1.8728, + "step": 15281000 + }, + { + "epoch": 75.71, + "learning_rate": 1.2158628240761384e-05, + "loss": 1.8943, + "step": 15281500 + }, + { + "epoch": 75.71, + "learning_rate": 1.2157389654335301e-05, + "loss": 1.8752, + "step": 15282000 + }, + { + "epoch": 75.71, + "learning_rate": 1.2156151067909218e-05, + "loss": 1.8904, + "step": 15282500 + }, + { + "epoch": 75.72, + "learning_rate": 1.2154912481483133e-05, + "loss": 1.9005, + "step": 15283000 + }, + { + "epoch": 75.72, + "learning_rate": 1.215367389505705e-05, + "loss": 1.8808, + "step": 15283500 + }, + { + "epoch": 75.72, + "learning_rate": 1.2152435308630966e-05, + "loss": 1.9046, + "step": 15284000 + }, + { + "epoch": 75.72, + "learning_rate": 1.2151199199377734e-05, + "loss": 1.8995, + "step": 15284500 + }, + { + "epoch": 75.73, + "learning_rate": 1.2149960612951651e-05, + "loss": 1.8921, + "step": 15285000 + }, + { + "epoch": 75.73, + "learning_rate": 1.2148722026525568e-05, + "loss": 1.9039, + "step": 15285500 + }, + { + "epoch": 75.73, + "learning_rate": 1.2147485917272335e-05, + "loss": 1.8901, + "step": 15286000 + }, + { + "epoch": 75.73, + "learning_rate": 1.2146247330846252e-05, + "loss": 1.8807, + "step": 15286500 + }, + { + "epoch": 75.74, + "learning_rate": 1.2145008744420169e-05, + "loss": 1.8694, + "step": 15287000 + }, + { + "epoch": 75.74, + "learning_rate": 1.2143770157994084e-05, + "loss": 1.8916, + "step": 15287500 + }, + { + "epoch": 75.74, + "learning_rate": 1.2142534048740855e-05, + "loss": 1.8717, + "step": 15288000 + }, + { + "epoch": 75.74, + "learning_rate": 1.214129546231477e-05, + "loss": 1.911, + "step": 15288500 + }, + { + "epoch": 75.75, + "learning_rate": 1.2140056875888687e-05, + "loss": 1.9041, + "step": 15289000 + }, + { + "epoch": 75.75, + "learning_rate": 1.2138820766635454e-05, + "loss": 1.9315, + "step": 15289500 + }, + { + "epoch": 75.75, + "learning_rate": 1.2137584657382223e-05, + "loss": 1.9098, + "step": 15290000 + }, + { + "epoch": 75.75, + "learning_rate": 1.213634607095614e-05, + "loss": 1.8833, + "step": 15290500 + }, + { + "epoch": 75.76, + "learning_rate": 1.2135107484530057e-05, + "loss": 1.8775, + "step": 15291000 + }, + { + "epoch": 75.76, + "learning_rate": 1.2133868898103972e-05, + "loss": 1.8773, + "step": 15291500 + }, + { + "epoch": 75.76, + "learning_rate": 1.2132630311677889e-05, + "loss": 1.9074, + "step": 15292000 + }, + { + "epoch": 75.76, + "learning_rate": 1.2131394202424658e-05, + "loss": 1.8815, + "step": 15292500 + }, + { + "epoch": 75.77, + "learning_rate": 1.2130155615998575e-05, + "loss": 1.9043, + "step": 15293000 + }, + { + "epoch": 75.77, + "learning_rate": 1.212891702957249e-05, + "loss": 1.8638, + "step": 15293500 + }, + { + "epoch": 75.77, + "learning_rate": 1.2127678443146407e-05, + "loss": 1.9259, + "step": 15294000 + }, + { + "epoch": 75.77, + "learning_rate": 1.2126439856720324e-05, + "loss": 1.897, + "step": 15294500 + }, + { + "epoch": 75.78, + "learning_rate": 1.2125201270294239e-05, + "loss": 1.8631, + "step": 15295000 + }, + { + "epoch": 75.78, + "learning_rate": 1.2123965161041008e-05, + "loss": 1.8988, + "step": 15295500 + }, + { + "epoch": 75.78, + "learning_rate": 1.2122729051787777e-05, + "loss": 1.8994, + "step": 15296000 + }, + { + "epoch": 75.78, + "learning_rate": 1.2121490465361694e-05, + "loss": 1.8789, + "step": 15296500 + }, + { + "epoch": 75.79, + "learning_rate": 1.2120251878935609e-05, + "loss": 1.9435, + "step": 15297000 + }, + { + "epoch": 75.79, + "learning_rate": 1.2119013292509524e-05, + "loss": 1.9018, + "step": 15297500 + }, + { + "epoch": 75.79, + "learning_rate": 1.2117774706083441e-05, + "loss": 1.9045, + "step": 15298000 + }, + { + "epoch": 75.79, + "learning_rate": 1.2116536119657358e-05, + "loss": 1.8851, + "step": 15298500 + }, + { + "epoch": 75.8, + "learning_rate": 1.2115297533231275e-05, + "loss": 1.9079, + "step": 15299000 + }, + { + "epoch": 75.8, + "learning_rate": 1.211405894680519e-05, + "loss": 1.8977, + "step": 15299500 + }, + { + "epoch": 75.8, + "learning_rate": 1.2112820360379107e-05, + "loss": 1.8714, + "step": 15300000 + }, + { + "epoch": 75.8, + "learning_rate": 1.2111581773953024e-05, + "loss": 1.8939, + "step": 15300500 + }, + { + "epoch": 75.81, + "learning_rate": 1.2110343187526941e-05, + "loss": 1.8837, + "step": 15301000 + }, + { + "epoch": 75.81, + "learning_rate": 1.2109104601100856e-05, + "loss": 1.9135, + "step": 15301500 + }, + { + "epoch": 75.81, + "learning_rate": 1.2107866014674771e-05, + "loss": 1.884, + "step": 15302000 + }, + { + "epoch": 75.81, + "learning_rate": 1.2106627428248688e-05, + "loss": 1.8817, + "step": 15302500 + }, + { + "epoch": 75.82, + "learning_rate": 1.2105388841822605e-05, + "loss": 1.9053, + "step": 15303000 + }, + { + "epoch": 75.82, + "learning_rate": 1.2104150255396522e-05, + "loss": 1.9037, + "step": 15303500 + }, + { + "epoch": 75.82, + "learning_rate": 1.2102914146143291e-05, + "loss": 1.8923, + "step": 15304000 + }, + { + "epoch": 75.82, + "learning_rate": 1.2101675559717208e-05, + "loss": 1.8834, + "step": 15304500 + }, + { + "epoch": 75.83, + "learning_rate": 1.2100436973291123e-05, + "loss": 1.8935, + "step": 15305000 + }, + { + "epoch": 75.83, + "learning_rate": 1.2099198386865038e-05, + "loss": 1.8792, + "step": 15305500 + }, + { + "epoch": 75.83, + "learning_rate": 1.2097959800438955e-05, + "loss": 1.8827, + "step": 15306000 + }, + { + "epoch": 75.83, + "learning_rate": 1.2096721214012872e-05, + "loss": 1.8725, + "step": 15306500 + }, + { + "epoch": 75.84, + "learning_rate": 1.2095482627586787e-05, + "loss": 1.8947, + "step": 15307000 + }, + { + "epoch": 75.84, + "learning_rate": 1.2094244041160704e-05, + "loss": 1.886, + "step": 15307500 + }, + { + "epoch": 75.84, + "learning_rate": 1.2093005454734621e-05, + "loss": 1.8948, + "step": 15308000 + }, + { + "epoch": 75.84, + "learning_rate": 1.2091766868308538e-05, + "loss": 1.88, + "step": 15308500 + }, + { + "epoch": 75.85, + "learning_rate": 1.2090530759055305e-05, + "loss": 1.878, + "step": 15309000 + }, + { + "epoch": 75.85, + "learning_rate": 1.2089292172629222e-05, + "loss": 1.9066, + "step": 15309500 + }, + { + "epoch": 75.85, + "learning_rate": 1.2088053586203137e-05, + "loss": 1.909, + "step": 15310000 + }, + { + "epoch": 75.85, + "learning_rate": 1.2086814999777054e-05, + "loss": 1.8991, + "step": 15310500 + }, + { + "epoch": 75.86, + "learning_rate": 1.2085578890523823e-05, + "loss": 1.8745, + "step": 15311000 + }, + { + "epoch": 75.86, + "learning_rate": 1.208434030409774e-05, + "loss": 1.8999, + "step": 15311500 + }, + { + "epoch": 75.86, + "learning_rate": 1.2083101717671657e-05, + "loss": 1.9124, + "step": 15312000 + }, + { + "epoch": 75.86, + "learning_rate": 1.2081865608418424e-05, + "loss": 1.8633, + "step": 15312500 + }, + { + "epoch": 75.87, + "learning_rate": 1.2080627021992341e-05, + "loss": 1.8726, + "step": 15313000 + }, + { + "epoch": 75.87, + "learning_rate": 1.2079388435566258e-05, + "loss": 1.891, + "step": 15313500 + }, + { + "epoch": 75.87, + "learning_rate": 1.2078149849140175e-05, + "loss": 1.9122, + "step": 15314000 + }, + { + "epoch": 75.87, + "learning_rate": 1.207691126271409e-05, + "loss": 1.8884, + "step": 15314500 + }, + { + "epoch": 75.88, + "learning_rate": 1.2075675153460857e-05, + "loss": 1.8814, + "step": 15315000 + }, + { + "epoch": 75.88, + "learning_rate": 1.2074436567034774e-05, + "loss": 1.8984, + "step": 15315500 + }, + { + "epoch": 75.88, + "learning_rate": 1.2073197980608691e-05, + "loss": 1.9047, + "step": 15316000 + }, + { + "epoch": 75.88, + "learning_rate": 1.2071959394182608e-05, + "loss": 1.8943, + "step": 15316500 + }, + { + "epoch": 75.89, + "learning_rate": 1.2070720807756525e-05, + "loss": 1.9004, + "step": 15317000 + }, + { + "epoch": 75.89, + "learning_rate": 1.206948222133044e-05, + "loss": 1.8765, + "step": 15317500 + }, + { + "epoch": 75.89, + "learning_rate": 1.2068243634904357e-05, + "loss": 1.8754, + "step": 15318000 + }, + { + "epoch": 75.89, + "learning_rate": 1.2067007525651124e-05, + "loss": 1.8897, + "step": 15318500 + }, + { + "epoch": 75.9, + "learning_rate": 1.2065771416397895e-05, + "loss": 1.9208, + "step": 15319000 + }, + { + "epoch": 75.9, + "learning_rate": 1.206453282997181e-05, + "loss": 1.8825, + "step": 15319500 + }, + { + "epoch": 75.9, + "learning_rate": 1.2063294243545727e-05, + "loss": 1.8696, + "step": 15320000 + }, + { + "epoch": 75.9, + "learning_rate": 1.2062055657119644e-05, + "loss": 1.9103, + "step": 15320500 + }, + { + "epoch": 75.91, + "learning_rate": 1.2060817070693559e-05, + "loss": 1.9067, + "step": 15321000 + }, + { + "epoch": 75.91, + "learning_rate": 1.2059578484267476e-05, + "loss": 1.9077, + "step": 15321500 + }, + { + "epoch": 75.91, + "learning_rate": 1.2058339897841391e-05, + "loss": 1.8837, + "step": 15322000 + }, + { + "epoch": 75.91, + "learning_rate": 1.2057101311415308e-05, + "loss": 1.9158, + "step": 15322500 + }, + { + "epoch": 75.92, + "learning_rate": 1.2055862724989225e-05, + "loss": 1.9149, + "step": 15323000 + }, + { + "epoch": 75.92, + "learning_rate": 1.205462413856314e-05, + "loss": 1.9041, + "step": 15323500 + }, + { + "epoch": 75.92, + "learning_rate": 1.2053385552137057e-05, + "loss": 1.8824, + "step": 15324000 + }, + { + "epoch": 75.92, + "learning_rate": 1.2052149442883826e-05, + "loss": 1.8788, + "step": 15324500 + }, + { + "epoch": 75.93, + "learning_rate": 1.2050910856457741e-05, + "loss": 1.8809, + "step": 15325000 + }, + { + "epoch": 75.93, + "learning_rate": 1.2049672270031658e-05, + "loss": 1.9003, + "step": 15325500 + }, + { + "epoch": 75.93, + "learning_rate": 1.2048433683605575e-05, + "loss": 1.8942, + "step": 15326000 + }, + { + "epoch": 75.93, + "learning_rate": 1.2047197574352344e-05, + "loss": 1.8783, + "step": 15326500 + }, + { + "epoch": 75.94, + "learning_rate": 1.2045958987926261e-05, + "loss": 1.9196, + "step": 15327000 + }, + { + "epoch": 75.94, + "learning_rate": 1.2044720401500176e-05, + "loss": 1.8984, + "step": 15327500 + }, + { + "epoch": 75.94, + "learning_rate": 1.2043481815074093e-05, + "loss": 1.8962, + "step": 15328000 + }, + { + "epoch": 75.94, + "learning_rate": 1.2042243228648008e-05, + "loss": 1.9032, + "step": 15328500 + }, + { + "epoch": 75.95, + "learning_rate": 1.2041004642221925e-05, + "loss": 1.9013, + "step": 15329000 + }, + { + "epoch": 75.95, + "learning_rate": 1.2039766055795842e-05, + "loss": 1.9005, + "step": 15329500 + }, + { + "epoch": 75.95, + "learning_rate": 1.2038527469369757e-05, + "loss": 1.8993, + "step": 15330000 + }, + { + "epoch": 75.95, + "learning_rate": 1.2037288882943674e-05, + "loss": 1.912, + "step": 15330500 + }, + { + "epoch": 75.96, + "learning_rate": 1.2036052773690443e-05, + "loss": 1.9004, + "step": 15331000 + }, + { + "epoch": 75.96, + "learning_rate": 1.203481418726436e-05, + "loss": 1.909, + "step": 15331500 + }, + { + "epoch": 75.96, + "learning_rate": 1.2033575600838275e-05, + "loss": 1.8784, + "step": 15332000 + }, + { + "epoch": 75.96, + "learning_rate": 1.2032337014412192e-05, + "loss": 1.9069, + "step": 15332500 + }, + { + "epoch": 75.96, + "learning_rate": 1.2031098427986108e-05, + "loss": 1.8734, + "step": 15333000 + }, + { + "epoch": 75.97, + "learning_rate": 1.2029859841560024e-05, + "loss": 1.9101, + "step": 15333500 + }, + { + "epoch": 75.97, + "learning_rate": 1.2028621255133941e-05, + "loss": 1.9136, + "step": 15334000 + }, + { + "epoch": 75.97, + "learning_rate": 1.2027382668707858e-05, + "loss": 1.8864, + "step": 15334500 + }, + { + "epoch": 75.97, + "learning_rate": 1.2026149036627477e-05, + "loss": 1.9181, + "step": 15335000 + }, + { + "epoch": 75.98, + "learning_rate": 1.2024912927374248e-05, + "loss": 1.9033, + "step": 15335500 + }, + { + "epoch": 75.98, + "learning_rate": 1.2023674340948163e-05, + "loss": 1.8974, + "step": 15336000 + }, + { + "epoch": 75.98, + "learning_rate": 1.202243575452208e-05, + "loss": 1.8745, + "step": 15336500 + }, + { + "epoch": 75.98, + "learning_rate": 1.2021197168095997e-05, + "loss": 1.9128, + "step": 15337000 + }, + { + "epoch": 75.99, + "learning_rate": 1.2019961058842764e-05, + "loss": 1.9056, + "step": 15337500 + }, + { + "epoch": 75.99, + "learning_rate": 1.2018722472416681e-05, + "loss": 1.884, + "step": 15338000 + }, + { + "epoch": 75.99, + "learning_rate": 1.2017483885990598e-05, + "loss": 1.8867, + "step": 15338500 + }, + { + "epoch": 75.99, + "learning_rate": 1.2016247776737367e-05, + "loss": 1.8959, + "step": 15339000 + }, + { + "epoch": 76.0, + "learning_rate": 1.2015009190311282e-05, + "loss": 1.8837, + "step": 15339500 + }, + { + "epoch": 76.0, + "learning_rate": 1.2013770603885199e-05, + "loss": 1.8864, + "step": 15340000 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.679493156752265, + "eval_accuracy_mlm": 0.6396720061085497, + "eval_accuracy_nsp": 0.8674728093536608, + "eval_loss": 2.308666467666626, + "eval_runtime": 146.9106, + "eval_samples_per_second": 1735.47, + "eval_steps_per_second": 72.316, + "step": 15340068 + }, + { + "epoch": 76.0, + "learning_rate": 1.2012532017459114e-05, + "loss": 1.8648, + "step": 15340500 + }, + { + "epoch": 76.0, + "learning_rate": 1.2011293431033031e-05, + "loss": 1.8656, + "step": 15341000 + }, + { + "epoch": 76.01, + "learning_rate": 1.2010054844606948e-05, + "loss": 1.8683, + "step": 15341500 + }, + { + "epoch": 76.01, + "learning_rate": 1.2008816258180863e-05, + "loss": 1.8716, + "step": 15342000 + }, + { + "epoch": 76.01, + "learning_rate": 1.2007580148927634e-05, + "loss": 1.8754, + "step": 15342500 + }, + { + "epoch": 76.01, + "learning_rate": 1.2006341562501549e-05, + "loss": 1.8832, + "step": 15343000 + }, + { + "epoch": 76.02, + "learning_rate": 1.2005102976075464e-05, + "loss": 1.8649, + "step": 15343500 + }, + { + "epoch": 76.02, + "learning_rate": 1.2003864389649381e-05, + "loss": 1.8779, + "step": 15344000 + }, + { + "epoch": 76.02, + "learning_rate": 1.2002625803223298e-05, + "loss": 1.8951, + "step": 15344500 + }, + { + "epoch": 76.02, + "learning_rate": 1.2001387216797213e-05, + "loss": 1.8625, + "step": 15345000 + }, + { + "epoch": 76.03, + "learning_rate": 1.2000151107543984e-05, + "loss": 1.879, + "step": 15345500 + }, + { + "epoch": 76.03, + "learning_rate": 1.1998912521117899e-05, + "loss": 1.8718, + "step": 15346000 + }, + { + "epoch": 76.03, + "learning_rate": 1.1997673934691816e-05, + "loss": 1.8633, + "step": 15346500 + }, + { + "epoch": 76.03, + "learning_rate": 1.1996435348265731e-05, + "loss": 1.8749, + "step": 15347000 + }, + { + "epoch": 76.04, + "learning_rate": 1.1995196761839648e-05, + "loss": 1.8774, + "step": 15347500 + }, + { + "epoch": 76.04, + "learning_rate": 1.1993960652586417e-05, + "loss": 1.8653, + "step": 15348000 + }, + { + "epoch": 76.04, + "learning_rate": 1.1992722066160334e-05, + "loss": 1.8872, + "step": 15348500 + }, + { + "epoch": 76.04, + "learning_rate": 1.199148347973425e-05, + "loss": 1.8899, + "step": 15349000 + }, + { + "epoch": 76.05, + "learning_rate": 1.1990244893308166e-05, + "loss": 1.8905, + "step": 15349500 + }, + { + "epoch": 76.05, + "learning_rate": 1.1989006306882083e-05, + "loss": 1.8859, + "step": 15350000 + }, + { + "epoch": 76.05, + "learning_rate": 1.1987767720455998e-05, + "loss": 1.8888, + "step": 15350500 + }, + { + "epoch": 76.05, + "learning_rate": 1.1986529134029915e-05, + "loss": 1.8765, + "step": 15351000 + }, + { + "epoch": 76.06, + "learning_rate": 1.198529054760383e-05, + "loss": 1.8703, + "step": 15351500 + }, + { + "epoch": 76.06, + "learning_rate": 1.1984051961177747e-05, + "loss": 1.9111, + "step": 15352000 + }, + { + "epoch": 76.06, + "learning_rate": 1.1982813374751664e-05, + "loss": 1.8686, + "step": 15352500 + }, + { + "epoch": 76.06, + "learning_rate": 1.1981574788325581e-05, + "loss": 1.8917, + "step": 15353000 + }, + { + "epoch": 76.07, + "learning_rate": 1.1980336201899496e-05, + "loss": 1.8757, + "step": 15353500 + }, + { + "epoch": 76.07, + "learning_rate": 1.1979100092646265e-05, + "loss": 1.8821, + "step": 15354000 + }, + { + "epoch": 76.07, + "learning_rate": 1.197786150622018e-05, + "loss": 1.8752, + "step": 15354500 + }, + { + "epoch": 76.07, + "learning_rate": 1.1976622919794097e-05, + "loss": 1.8905, + "step": 15355000 + }, + { + "epoch": 76.08, + "learning_rate": 1.1975384333368014e-05, + "loss": 1.8784, + "step": 15355500 + }, + { + "epoch": 76.08, + "learning_rate": 1.1974148224114783e-05, + "loss": 1.8631, + "step": 15356000 + }, + { + "epoch": 76.08, + "learning_rate": 1.19729096376887e-05, + "loss": 1.8906, + "step": 15356500 + }, + { + "epoch": 76.08, + "learning_rate": 1.1971671051262615e-05, + "loss": 1.8968, + "step": 15357000 + }, + { + "epoch": 76.09, + "learning_rate": 1.1970432464836532e-05, + "loss": 1.8813, + "step": 15357500 + }, + { + "epoch": 76.09, + "learning_rate": 1.1969193878410447e-05, + "loss": 1.8795, + "step": 15358000 + }, + { + "epoch": 76.09, + "learning_rate": 1.1967955291984364e-05, + "loss": 1.8721, + "step": 15358500 + }, + { + "epoch": 76.09, + "learning_rate": 1.1966716705558281e-05, + "loss": 1.8726, + "step": 15359000 + }, + { + "epoch": 76.1, + "learning_rate": 1.1965478119132198e-05, + "loss": 1.8954, + "step": 15359500 + }, + { + "epoch": 76.1, + "learning_rate": 1.1964239532706113e-05, + "loss": 1.8844, + "step": 15360000 + }, + { + "epoch": 76.1, + "learning_rate": 1.196300094628003e-05, + "loss": 1.8878, + "step": 15360500 + }, + { + "epoch": 76.1, + "learning_rate": 1.1961764837026797e-05, + "loss": 1.8772, + "step": 15361000 + }, + { + "epoch": 76.11, + "learning_rate": 1.1960526250600714e-05, + "loss": 1.8606, + "step": 15361500 + }, + { + "epoch": 76.11, + "learning_rate": 1.1959287664174631e-05, + "loss": 1.8582, + "step": 15362000 + }, + { + "epoch": 76.11, + "learning_rate": 1.1958049077748548e-05, + "loss": 1.9032, + "step": 15362500 + }, + { + "epoch": 76.11, + "learning_rate": 1.1956812968495317e-05, + "loss": 1.8693, + "step": 15363000 + }, + { + "epoch": 76.12, + "learning_rate": 1.1955576859242084e-05, + "loss": 1.8733, + "step": 15363500 + }, + { + "epoch": 76.12, + "learning_rate": 1.1954338272816001e-05, + "loss": 1.8887, + "step": 15364000 + }, + { + "epoch": 76.12, + "learning_rate": 1.1953099686389918e-05, + "loss": 1.8763, + "step": 15364500 + }, + { + "epoch": 76.12, + "learning_rate": 1.1951861099963833e-05, + "loss": 1.8616, + "step": 15365000 + }, + { + "epoch": 76.13, + "learning_rate": 1.195062251353775e-05, + "loss": 1.902, + "step": 15365500 + }, + { + "epoch": 76.13, + "learning_rate": 1.1949386404284519e-05, + "loss": 1.8737, + "step": 15366000 + }, + { + "epoch": 76.13, + "learning_rate": 1.1948147817858434e-05, + "loss": 1.8818, + "step": 15366500 + }, + { + "epoch": 76.13, + "learning_rate": 1.1946911708605203e-05, + "loss": 1.8669, + "step": 15367000 + }, + { + "epoch": 76.14, + "learning_rate": 1.194567312217912e-05, + "loss": 1.9131, + "step": 15367500 + }, + { + "epoch": 76.14, + "learning_rate": 1.1944434535753037e-05, + "loss": 1.8822, + "step": 15368000 + }, + { + "epoch": 76.14, + "learning_rate": 1.1943195949326954e-05, + "loss": 1.8756, + "step": 15368500 + }, + { + "epoch": 76.14, + "learning_rate": 1.1941957362900869e-05, + "loss": 1.8778, + "step": 15369000 + }, + { + "epoch": 76.15, + "learning_rate": 1.1940718776474786e-05, + "loss": 1.9047, + "step": 15369500 + }, + { + "epoch": 76.15, + "learning_rate": 1.1939480190048701e-05, + "loss": 1.8774, + "step": 15370000 + }, + { + "epoch": 76.15, + "learning_rate": 1.1938241603622618e-05, + "loss": 1.8896, + "step": 15370500 + }, + { + "epoch": 76.15, + "learning_rate": 1.1937003017196533e-05, + "loss": 1.892, + "step": 15371000 + }, + { + "epoch": 76.16, + "learning_rate": 1.193576443077045e-05, + "loss": 1.8903, + "step": 15371500 + }, + { + "epoch": 76.16, + "learning_rate": 1.1934525844344367e-05, + "loss": 1.8996, + "step": 15372000 + }, + { + "epoch": 76.16, + "learning_rate": 1.1933289735091136e-05, + "loss": 1.8693, + "step": 15372500 + }, + { + "epoch": 76.16, + "learning_rate": 1.1932051148665053e-05, + "loss": 1.8654, + "step": 15373000 + }, + { + "epoch": 76.17, + "learning_rate": 1.1930812562238968e-05, + "loss": 1.8835, + "step": 15373500 + }, + { + "epoch": 76.17, + "learning_rate": 1.1929573975812885e-05, + "loss": 1.8739, + "step": 15374000 + }, + { + "epoch": 76.17, + "learning_rate": 1.19283353893868e-05, + "loss": 1.8731, + "step": 15374500 + }, + { + "epoch": 76.17, + "learning_rate": 1.1927096802960717e-05, + "loss": 1.8647, + "step": 15375000 + }, + { + "epoch": 76.18, + "learning_rate": 1.1925858216534634e-05, + "loss": 1.8783, + "step": 15375500 + }, + { + "epoch": 76.18, + "learning_rate": 1.1924619630108551e-05, + "loss": 1.883, + "step": 15376000 + }, + { + "epoch": 76.18, + "learning_rate": 1.1923381043682466e-05, + "loss": 1.8746, + "step": 15376500 + }, + { + "epoch": 76.18, + "learning_rate": 1.1922142457256383e-05, + "loss": 1.839, + "step": 15377000 + }, + { + "epoch": 76.19, + "learning_rate": 1.192090634800315e-05, + "loss": 1.875, + "step": 15377500 + }, + { + "epoch": 76.19, + "learning_rate": 1.1919667761577067e-05, + "loss": 1.8939, + "step": 15378000 + }, + { + "epoch": 76.19, + "learning_rate": 1.1918431652323836e-05, + "loss": 1.8808, + "step": 15378500 + }, + { + "epoch": 76.19, + "learning_rate": 1.1917193065897753e-05, + "loss": 1.8936, + "step": 15379000 + }, + { + "epoch": 76.2, + "learning_rate": 1.191595447947167e-05, + "loss": 1.8908, + "step": 15379500 + }, + { + "epoch": 76.2, + "learning_rate": 1.1914715893045585e-05, + "loss": 1.8783, + "step": 15380000 + }, + { + "epoch": 76.2, + "learning_rate": 1.1913477306619502e-05, + "loss": 1.8903, + "step": 15380500 + }, + { + "epoch": 76.2, + "learning_rate": 1.1912238720193417e-05, + "loss": 1.8802, + "step": 15381000 + }, + { + "epoch": 76.21, + "learning_rate": 1.1911000133767334e-05, + "loss": 1.8673, + "step": 15381500 + }, + { + "epoch": 76.21, + "learning_rate": 1.1909764024514103e-05, + "loss": 1.8913, + "step": 15382000 + }, + { + "epoch": 76.21, + "learning_rate": 1.190852543808802e-05, + "loss": 1.8848, + "step": 15382500 + }, + { + "epoch": 76.21, + "learning_rate": 1.1907286851661937e-05, + "loss": 1.8818, + "step": 15383000 + }, + { + "epoch": 76.22, + "learning_rate": 1.1906048265235852e-05, + "loss": 1.8895, + "step": 15383500 + }, + { + "epoch": 76.22, + "learning_rate": 1.1904809678809767e-05, + "loss": 1.8723, + "step": 15384000 + }, + { + "epoch": 76.22, + "learning_rate": 1.1903573569556536e-05, + "loss": 1.8585, + "step": 15384500 + }, + { + "epoch": 76.22, + "learning_rate": 1.1902334983130453e-05, + "loss": 1.8883, + "step": 15385000 + }, + { + "epoch": 76.23, + "learning_rate": 1.190109639670437e-05, + "loss": 1.8622, + "step": 15385500 + }, + { + "epoch": 76.23, + "learning_rate": 1.1899857810278287e-05, + "loss": 1.8835, + "step": 15386000 + }, + { + "epoch": 76.23, + "learning_rate": 1.1898619223852204e-05, + "loss": 1.8652, + "step": 15386500 + }, + { + "epoch": 76.23, + "learning_rate": 1.189738063742612e-05, + "loss": 1.9015, + "step": 15387000 + }, + { + "epoch": 76.23, + "learning_rate": 1.1896142051000034e-05, + "loss": 1.8757, + "step": 15387500 + }, + { + "epoch": 76.24, + "learning_rate": 1.1894903464573951e-05, + "loss": 1.8457, + "step": 15388000 + }, + { + "epoch": 76.24, + "learning_rate": 1.189366735532072e-05, + "loss": 1.8719, + "step": 15388500 + }, + { + "epoch": 76.24, + "learning_rate": 1.1892431246067489e-05, + "loss": 1.8598, + "step": 15389000 + }, + { + "epoch": 76.24, + "learning_rate": 1.1891192659641404e-05, + "loss": 1.8832, + "step": 15389500 + }, + { + "epoch": 76.25, + "learning_rate": 1.1889954073215321e-05, + "loss": 1.905, + "step": 15390000 + }, + { + "epoch": 76.25, + "learning_rate": 1.1888715486789238e-05, + "loss": 1.8653, + "step": 15390500 + }, + { + "epoch": 76.25, + "learning_rate": 1.1887479377536007e-05, + "loss": 1.8718, + "step": 15391000 + }, + { + "epoch": 76.25, + "learning_rate": 1.1886240791109924e-05, + "loss": 1.8939, + "step": 15391500 + }, + { + "epoch": 76.26, + "learning_rate": 1.1885002204683839e-05, + "loss": 1.8643, + "step": 15392000 + }, + { + "epoch": 76.26, + "learning_rate": 1.1883763618257756e-05, + "loss": 1.8631, + "step": 15392500 + }, + { + "epoch": 76.26, + "learning_rate": 1.1882525031831671e-05, + "loss": 1.8898, + "step": 15393000 + }, + { + "epoch": 76.26, + "learning_rate": 1.1881286445405588e-05, + "loss": 1.8735, + "step": 15393500 + }, + { + "epoch": 76.27, + "learning_rate": 1.1880047858979503e-05, + "loss": 1.8671, + "step": 15394000 + }, + { + "epoch": 76.27, + "learning_rate": 1.187880927255342e-05, + "loss": 1.8882, + "step": 15394500 + }, + { + "epoch": 76.27, + "learning_rate": 1.1877570686127337e-05, + "loss": 1.8842, + "step": 15395000 + }, + { + "epoch": 76.27, + "learning_rate": 1.1876332099701254e-05, + "loss": 1.8935, + "step": 15395500 + }, + { + "epoch": 76.28, + "learning_rate": 1.1875095990448023e-05, + "loss": 1.8878, + "step": 15396000 + }, + { + "epoch": 76.28, + "learning_rate": 1.1873857404021938e-05, + "loss": 1.8602, + "step": 15396500 + }, + { + "epoch": 76.28, + "learning_rate": 1.1872618817595853e-05, + "loss": 1.8748, + "step": 15397000 + }, + { + "epoch": 76.28, + "learning_rate": 1.1871382708342624e-05, + "loss": 1.882, + "step": 15397500 + }, + { + "epoch": 76.29, + "learning_rate": 1.1870144121916539e-05, + "loss": 1.8727, + "step": 15398000 + }, + { + "epoch": 76.29, + "learning_rate": 1.1868905535490456e-05, + "loss": 1.8849, + "step": 15398500 + }, + { + "epoch": 76.29, + "learning_rate": 1.1867666949064373e-05, + "loss": 1.8827, + "step": 15399000 + }, + { + "epoch": 76.29, + "learning_rate": 1.186643083981114e-05, + "loss": 1.8722, + "step": 15399500 + }, + { + "epoch": 76.3, + "learning_rate": 1.1865192253385057e-05, + "loss": 1.8869, + "step": 15400000 + }, + { + "epoch": 76.3, + "learning_rate": 1.1863953666958974e-05, + "loss": 1.8849, + "step": 15400500 + }, + { + "epoch": 76.3, + "learning_rate": 1.186271508053289e-05, + "loss": 1.8978, + "step": 15401000 + }, + { + "epoch": 76.3, + "learning_rate": 1.1861476494106806e-05, + "loss": 1.8763, + "step": 15401500 + }, + { + "epoch": 76.31, + "learning_rate": 1.1860237907680723e-05, + "loss": 1.8868, + "step": 15402000 + }, + { + "epoch": 76.31, + "learning_rate": 1.185899932125464e-05, + "loss": 1.8954, + "step": 15402500 + }, + { + "epoch": 76.31, + "learning_rate": 1.1857760734828555e-05, + "loss": 1.8772, + "step": 15403000 + }, + { + "epoch": 76.31, + "learning_rate": 1.1856522148402472e-05, + "loss": 1.8657, + "step": 15403500 + }, + { + "epoch": 76.32, + "learning_rate": 1.1855283561976387e-05, + "loss": 1.8838, + "step": 15404000 + }, + { + "epoch": 76.32, + "learning_rate": 1.1854044975550304e-05, + "loss": 1.9066, + "step": 15404500 + }, + { + "epoch": 76.32, + "learning_rate": 1.1852806389124221e-05, + "loss": 1.8991, + "step": 15405000 + }, + { + "epoch": 76.32, + "learning_rate": 1.1851567802698136e-05, + "loss": 1.9027, + "step": 15405500 + }, + { + "epoch": 76.33, + "learning_rate": 1.1850329216272053e-05, + "loss": 1.8942, + "step": 15406000 + }, + { + "epoch": 76.33, + "learning_rate": 1.184909062984597e-05, + "loss": 1.867, + "step": 15406500 + }, + { + "epoch": 76.33, + "learning_rate": 1.1847852043419887e-05, + "loss": 1.8707, + "step": 15407000 + }, + { + "epoch": 76.33, + "learning_rate": 1.1846613456993804e-05, + "loss": 1.881, + "step": 15407500 + }, + { + "epoch": 76.34, + "learning_rate": 1.184537487056772e-05, + "loss": 1.8906, + "step": 15408000 + }, + { + "epoch": 76.34, + "learning_rate": 1.1844138761314487e-05, + "loss": 1.8818, + "step": 15408500 + }, + { + "epoch": 76.34, + "learning_rate": 1.1842900174888403e-05, + "loss": 1.8929, + "step": 15409000 + }, + { + "epoch": 76.34, + "learning_rate": 1.184166158846232e-05, + "loss": 1.8852, + "step": 15409500 + }, + { + "epoch": 76.35, + "learning_rate": 1.184042547920909e-05, + "loss": 1.8738, + "step": 15410000 + }, + { + "epoch": 76.35, + "learning_rate": 1.1839186892783004e-05, + "loss": 1.8928, + "step": 15410500 + }, + { + "epoch": 76.35, + "learning_rate": 1.1837948306356921e-05, + "loss": 1.9285, + "step": 15411000 + }, + { + "epoch": 76.35, + "learning_rate": 1.1836709719930838e-05, + "loss": 1.8789, + "step": 15411500 + }, + { + "epoch": 76.36, + "learning_rate": 1.1835471133504754e-05, + "loss": 1.8798, + "step": 15412000 + }, + { + "epoch": 76.36, + "learning_rate": 1.1834235024251524e-05, + "loss": 1.8799, + "step": 15412500 + }, + { + "epoch": 76.36, + "learning_rate": 1.183299643782544e-05, + "loss": 1.8711, + "step": 15413000 + }, + { + "epoch": 76.36, + "learning_rate": 1.1831757851399356e-05, + "loss": 1.8664, + "step": 15413500 + }, + { + "epoch": 76.37, + "learning_rate": 1.1830519264973271e-05, + "loss": 1.8859, + "step": 15414000 + }, + { + "epoch": 76.37, + "learning_rate": 1.1829280678547188e-05, + "loss": 1.8777, + "step": 15414500 + }, + { + "epoch": 76.37, + "learning_rate": 1.1828042092121104e-05, + "loss": 1.8804, + "step": 15415000 + }, + { + "epoch": 76.37, + "learning_rate": 1.182680350569502e-05, + "loss": 1.9053, + "step": 15415500 + }, + { + "epoch": 76.38, + "learning_rate": 1.1825564919268937e-05, + "loss": 1.8834, + "step": 15416000 + }, + { + "epoch": 76.38, + "learning_rate": 1.1824326332842854e-05, + "loss": 1.8836, + "step": 15416500 + }, + { + "epoch": 76.38, + "learning_rate": 1.1823090223589623e-05, + "loss": 1.9008, + "step": 15417000 + }, + { + "epoch": 76.38, + "learning_rate": 1.1821851637163538e-05, + "loss": 1.8588, + "step": 15417500 + }, + { + "epoch": 76.39, + "learning_rate": 1.1820613050737454e-05, + "loss": 1.8777, + "step": 15418000 + }, + { + "epoch": 76.39, + "learning_rate": 1.181937446431137e-05, + "loss": 1.8936, + "step": 15418500 + }, + { + "epoch": 76.39, + "learning_rate": 1.181813835505814e-05, + "loss": 1.9231, + "step": 15419000 + }, + { + "epoch": 76.39, + "learning_rate": 1.1816899768632056e-05, + "loss": 1.8995, + "step": 15419500 + }, + { + "epoch": 76.4, + "learning_rate": 1.1815661182205973e-05, + "loss": 1.8579, + "step": 15420000 + }, + { + "epoch": 76.4, + "learning_rate": 1.1814427550125594e-05, + "loss": 1.8756, + "step": 15420500 + }, + { + "epoch": 76.4, + "learning_rate": 1.181318896369951e-05, + "loss": 1.8866, + "step": 15421000 + }, + { + "epoch": 76.4, + "learning_rate": 1.1811950377273426e-05, + "loss": 1.8856, + "step": 15421500 + }, + { + "epoch": 76.41, + "learning_rate": 1.1810711790847343e-05, + "loss": 1.8939, + "step": 15422000 + }, + { + "epoch": 76.41, + "learning_rate": 1.180947320442126e-05, + "loss": 1.8524, + "step": 15422500 + }, + { + "epoch": 76.41, + "learning_rate": 1.1808234617995175e-05, + "loss": 1.8893, + "step": 15423000 + }, + { + "epoch": 76.41, + "learning_rate": 1.180699603156909e-05, + "loss": 1.8837, + "step": 15423500 + }, + { + "epoch": 76.42, + "learning_rate": 1.1805757445143007e-05, + "loss": 1.8788, + "step": 15424000 + }, + { + "epoch": 76.42, + "learning_rate": 1.1804518858716924e-05, + "loss": 1.8971, + "step": 15424500 + }, + { + "epoch": 76.42, + "learning_rate": 1.1803280272290841e-05, + "loss": 1.8795, + "step": 15425000 + }, + { + "epoch": 76.42, + "learning_rate": 1.1802041685864756e-05, + "loss": 1.8792, + "step": 15425500 + }, + { + "epoch": 76.43, + "learning_rate": 1.1800803099438673e-05, + "loss": 1.8778, + "step": 15426000 + }, + { + "epoch": 76.43, + "learning_rate": 1.179956451301259e-05, + "loss": 1.8816, + "step": 15426500 + }, + { + "epoch": 76.43, + "learning_rate": 1.1798325926586507e-05, + "loss": 1.8778, + "step": 15427000 + }, + { + "epoch": 76.43, + "learning_rate": 1.1797087340160422e-05, + "loss": 1.8858, + "step": 15427500 + }, + { + "epoch": 76.44, + "learning_rate": 1.1795851230907191e-05, + "loss": 1.868, + "step": 15428000 + }, + { + "epoch": 76.44, + "learning_rate": 1.1794612644481106e-05, + "loss": 1.8769, + "step": 15428500 + }, + { + "epoch": 76.44, + "learning_rate": 1.1793374058055023e-05, + "loss": 1.8669, + "step": 15429000 + }, + { + "epoch": 76.44, + "learning_rate": 1.179213547162894e-05, + "loss": 1.8927, + "step": 15429500 + }, + { + "epoch": 76.45, + "learning_rate": 1.1790899362375707e-05, + "loss": 1.8761, + "step": 15430000 + }, + { + "epoch": 76.45, + "learning_rate": 1.1789663253122476e-05, + "loss": 1.8804, + "step": 15430500 + }, + { + "epoch": 76.45, + "learning_rate": 1.1788424666696393e-05, + "loss": 1.8976, + "step": 15431000 + }, + { + "epoch": 76.45, + "learning_rate": 1.178718608027031e-05, + "loss": 1.8814, + "step": 15431500 + }, + { + "epoch": 76.46, + "learning_rate": 1.1785947493844227e-05, + "loss": 1.8757, + "step": 15432000 + }, + { + "epoch": 76.46, + "learning_rate": 1.1784708907418142e-05, + "loss": 1.8777, + "step": 15432500 + }, + { + "epoch": 76.46, + "learning_rate": 1.178347032099206e-05, + "loss": 1.8783, + "step": 15433000 + }, + { + "epoch": 76.46, + "learning_rate": 1.1782231734565974e-05, + "loss": 1.8729, + "step": 15433500 + }, + { + "epoch": 76.47, + "learning_rate": 1.1780993148139891e-05, + "loss": 1.8649, + "step": 15434000 + }, + { + "epoch": 76.47, + "learning_rate": 1.1779754561713807e-05, + "loss": 1.8842, + "step": 15434500 + }, + { + "epoch": 76.47, + "learning_rate": 1.1778515975287724e-05, + "loss": 1.89, + "step": 15435000 + }, + { + "epoch": 76.47, + "learning_rate": 1.177727738886164e-05, + "loss": 1.902, + "step": 15435500 + }, + { + "epoch": 76.48, + "learning_rate": 1.1776038802435557e-05, + "loss": 1.8527, + "step": 15436000 + }, + { + "epoch": 76.48, + "learning_rate": 1.1774800216009474e-05, + "loss": 1.8618, + "step": 15436500 + }, + { + "epoch": 76.48, + "learning_rate": 1.177356162958339e-05, + "loss": 1.8992, + "step": 15437000 + }, + { + "epoch": 76.48, + "learning_rate": 1.1772323043157306e-05, + "loss": 1.8853, + "step": 15437500 + }, + { + "epoch": 76.49, + "learning_rate": 1.1771089411076927e-05, + "loss": 1.8735, + "step": 15438000 + }, + { + "epoch": 76.49, + "learning_rate": 1.1769853301823696e-05, + "loss": 1.8799, + "step": 15438500 + }, + { + "epoch": 76.49, + "learning_rate": 1.1768614715397611e-05, + "loss": 1.8823, + "step": 15439000 + }, + { + "epoch": 76.49, + "learning_rate": 1.1767376128971526e-05, + "loss": 1.8665, + "step": 15439500 + }, + { + "epoch": 76.5, + "learning_rate": 1.1766140019718297e-05, + "loss": 1.8704, + "step": 15440000 + }, + { + "epoch": 76.5, + "learning_rate": 1.1764901433292212e-05, + "loss": 1.8877, + "step": 15440500 + }, + { + "epoch": 76.5, + "learning_rate": 1.1763662846866129e-05, + "loss": 1.8959, + "step": 15441000 + }, + { + "epoch": 76.5, + "learning_rate": 1.1762424260440046e-05, + "loss": 1.8878, + "step": 15441500 + }, + { + "epoch": 76.51, + "learning_rate": 1.1761185674013963e-05, + "loss": 1.8859, + "step": 15442000 + }, + { + "epoch": 76.51, + "learning_rate": 1.1759947087587878e-05, + "loss": 1.8948, + "step": 15442500 + }, + { + "epoch": 76.51, + "learning_rate": 1.1758708501161793e-05, + "loss": 1.8577, + "step": 15443000 + }, + { + "epoch": 76.51, + "learning_rate": 1.175746991473571e-05, + "loss": 1.8993, + "step": 15443500 + }, + { + "epoch": 76.51, + "learning_rate": 1.1756231328309627e-05, + "loss": 1.8791, + "step": 15444000 + }, + { + "epoch": 76.52, + "learning_rate": 1.1754992741883544e-05, + "loss": 1.8922, + "step": 15444500 + }, + { + "epoch": 76.52, + "learning_rate": 1.175375415545746e-05, + "loss": 1.8983, + "step": 15445000 + }, + { + "epoch": 76.52, + "learning_rate": 1.175251804620423e-05, + "loss": 1.8624, + "step": 15445500 + }, + { + "epoch": 76.52, + "learning_rate": 1.1751279459778145e-05, + "loss": 1.877, + "step": 15446000 + }, + { + "epoch": 76.53, + "learning_rate": 1.175004087335206e-05, + "loss": 1.8828, + "step": 15446500 + }, + { + "epoch": 76.53, + "learning_rate": 1.1748802286925977e-05, + "loss": 1.8823, + "step": 15447000 + }, + { + "epoch": 76.53, + "learning_rate": 1.1747563700499894e-05, + "loss": 1.8877, + "step": 15447500 + }, + { + "epoch": 76.53, + "learning_rate": 1.174632511407381e-05, + "loss": 1.8947, + "step": 15448000 + }, + { + "epoch": 76.54, + "learning_rate": 1.1745086527647726e-05, + "loss": 1.8836, + "step": 15448500 + }, + { + "epoch": 76.54, + "learning_rate": 1.1743847941221643e-05, + "loss": 1.8687, + "step": 15449000 + }, + { + "epoch": 76.54, + "learning_rate": 1.1742611831968412e-05, + "loss": 1.8502, + "step": 15449500 + }, + { + "epoch": 76.54, + "learning_rate": 1.1741373245542327e-05, + "loss": 1.8873, + "step": 15450000 + }, + { + "epoch": 76.55, + "learning_rate": 1.1740134659116244e-05, + "loss": 1.8893, + "step": 15450500 + }, + { + "epoch": 76.55, + "learning_rate": 1.173889607269016e-05, + "loss": 1.8979, + "step": 15451000 + }, + { + "epoch": 76.55, + "learning_rate": 1.1737657486264077e-05, + "loss": 1.9003, + "step": 15451500 + }, + { + "epoch": 76.55, + "learning_rate": 1.1736418899837993e-05, + "loss": 1.8652, + "step": 15452000 + }, + { + "epoch": 76.56, + "learning_rate": 1.173518031341191e-05, + "loss": 1.8942, + "step": 15452500 + }, + { + "epoch": 76.56, + "learning_rate": 1.1733941726985827e-05, + "loss": 1.9027, + "step": 15453000 + }, + { + "epoch": 76.56, + "learning_rate": 1.1732705617732594e-05, + "loss": 1.887, + "step": 15453500 + }, + { + "epoch": 76.56, + "learning_rate": 1.1731467031306511e-05, + "loss": 1.88, + "step": 15454000 + }, + { + "epoch": 76.57, + "learning_rate": 1.173023092205328e-05, + "loss": 1.8734, + "step": 15454500 + }, + { + "epoch": 76.57, + "learning_rate": 1.1728992335627197e-05, + "loss": 1.8883, + "step": 15455000 + }, + { + "epoch": 76.57, + "learning_rate": 1.1727756226373964e-05, + "loss": 1.8773, + "step": 15455500 + }, + { + "epoch": 76.57, + "learning_rate": 1.172651763994788e-05, + "loss": 1.8958, + "step": 15456000 + }, + { + "epoch": 76.58, + "learning_rate": 1.1725279053521796e-05, + "loss": 1.8905, + "step": 15456500 + }, + { + "epoch": 76.58, + "learning_rate": 1.1724040467095713e-05, + "loss": 1.9019, + "step": 15457000 + }, + { + "epoch": 76.58, + "learning_rate": 1.172280188066963e-05, + "loss": 1.892, + "step": 15457500 + }, + { + "epoch": 76.58, + "learning_rate": 1.1721563294243547e-05, + "loss": 1.8746, + "step": 15458000 + }, + { + "epoch": 76.59, + "learning_rate": 1.1720324707817462e-05, + "loss": 1.8834, + "step": 15458500 + }, + { + "epoch": 76.59, + "learning_rate": 1.171908612139138e-05, + "loss": 1.8878, + "step": 15459000 + }, + { + "epoch": 76.59, + "learning_rate": 1.1717850012138146e-05, + "loss": 1.9017, + "step": 15459500 + }, + { + "epoch": 76.59, + "learning_rate": 1.1716611425712063e-05, + "loss": 1.8775, + "step": 15460000 + }, + { + "epoch": 76.6, + "learning_rate": 1.171537283928598e-05, + "loss": 1.8791, + "step": 15460500 + }, + { + "epoch": 76.6, + "learning_rate": 1.1714134252859897e-05, + "loss": 1.8836, + "step": 15461000 + }, + { + "epoch": 76.6, + "learning_rate": 1.1712895666433812e-05, + "loss": 1.8674, + "step": 15461500 + }, + { + "epoch": 76.6, + "learning_rate": 1.171165708000773e-05, + "loss": 1.8867, + "step": 15462000 + }, + { + "epoch": 76.61, + "learning_rate": 1.1710418493581646e-05, + "loss": 1.8816, + "step": 15462500 + }, + { + "epoch": 76.61, + "learning_rate": 1.1709179907155563e-05, + "loss": 1.8842, + "step": 15463000 + }, + { + "epoch": 76.61, + "learning_rate": 1.170794379790233e-05, + "loss": 1.8916, + "step": 15463500 + }, + { + "epoch": 76.61, + "learning_rate": 1.1706705211476247e-05, + "loss": 1.8847, + "step": 15464000 + }, + { + "epoch": 76.62, + "learning_rate": 1.1705466625050163e-05, + "loss": 1.8889, + "step": 15464500 + }, + { + "epoch": 76.62, + "learning_rate": 1.170422803862408e-05, + "loss": 1.8911, + "step": 15465000 + }, + { + "epoch": 76.62, + "learning_rate": 1.1702989452197996e-05, + "loss": 1.9003, + "step": 15465500 + }, + { + "epoch": 76.62, + "learning_rate": 1.1701750865771913e-05, + "loss": 1.873, + "step": 15466000 + }, + { + "epoch": 76.63, + "learning_rate": 1.170051227934583e-05, + "loss": 1.8776, + "step": 15466500 + }, + { + "epoch": 76.63, + "learning_rate": 1.1699273692919745e-05, + "loss": 1.8863, + "step": 15467000 + }, + { + "epoch": 76.63, + "learning_rate": 1.1698037583666514e-05, + "loss": 1.8841, + "step": 15467500 + }, + { + "epoch": 76.63, + "learning_rate": 1.169679899724043e-05, + "loss": 1.8707, + "step": 15468000 + }, + { + "epoch": 76.64, + "learning_rate": 1.1695560410814346e-05, + "loss": 1.878, + "step": 15468500 + }, + { + "epoch": 76.64, + "learning_rate": 1.1694321824388263e-05, + "loss": 1.8734, + "step": 15469000 + }, + { + "epoch": 76.64, + "learning_rate": 1.169308323796218e-05, + "loss": 1.885, + "step": 15469500 + }, + { + "epoch": 76.64, + "learning_rate": 1.1691844651536096e-05, + "loss": 1.8797, + "step": 15470000 + }, + { + "epoch": 76.65, + "learning_rate": 1.1690608542282864e-05, + "loss": 1.8727, + "step": 15470500 + }, + { + "epoch": 76.65, + "learning_rate": 1.168936995585678e-05, + "loss": 1.8715, + "step": 15471000 + }, + { + "epoch": 76.65, + "learning_rate": 1.1688131369430696e-05, + "loss": 1.8908, + "step": 15471500 + }, + { + "epoch": 76.65, + "learning_rate": 1.1686892783004613e-05, + "loss": 1.897, + "step": 15472000 + }, + { + "epoch": 76.66, + "learning_rate": 1.168565419657853e-05, + "loss": 1.8715, + "step": 15472500 + }, + { + "epoch": 76.66, + "learning_rate": 1.1684418087325297e-05, + "loss": 1.8653, + "step": 15473000 + }, + { + "epoch": 76.66, + "learning_rate": 1.1683179500899214e-05, + "loss": 1.884, + "step": 15473500 + }, + { + "epoch": 76.66, + "learning_rate": 1.168194091447313e-05, + "loss": 1.8727, + "step": 15474000 + }, + { + "epoch": 76.67, + "learning_rate": 1.1680702328047047e-05, + "loss": 1.8855, + "step": 15474500 + }, + { + "epoch": 76.67, + "learning_rate": 1.1679463741620963e-05, + "loss": 1.8817, + "step": 15475000 + }, + { + "epoch": 76.67, + "learning_rate": 1.167822515519488e-05, + "loss": 1.8449, + "step": 15475500 + }, + { + "epoch": 76.67, + "learning_rate": 1.1676986568768796e-05, + "loss": 1.8677, + "step": 15476000 + }, + { + "epoch": 76.68, + "learning_rate": 1.1675747982342713e-05, + "loss": 1.8751, + "step": 15476500 + }, + { + "epoch": 76.68, + "learning_rate": 1.167450939591663e-05, + "loss": 1.8729, + "step": 15477000 + }, + { + "epoch": 76.68, + "learning_rate": 1.1673270809490545e-05, + "loss": 1.9028, + "step": 15477500 + }, + { + "epoch": 76.68, + "learning_rate": 1.1672037177410165e-05, + "loss": 1.9048, + "step": 15478000 + }, + { + "epoch": 76.69, + "learning_rate": 1.1670798590984082e-05, + "loss": 1.9081, + "step": 15478500 + }, + { + "epoch": 76.69, + "learning_rate": 1.1669560004558e-05, + "loss": 1.8951, + "step": 15479000 + }, + { + "epoch": 76.69, + "learning_rate": 1.1668321418131915e-05, + "loss": 1.8774, + "step": 15479500 + }, + { + "epoch": 76.69, + "learning_rate": 1.1667082831705831e-05, + "loss": 1.873, + "step": 15480000 + }, + { + "epoch": 76.7, + "learning_rate": 1.1665844245279747e-05, + "loss": 1.8776, + "step": 15480500 + }, + { + "epoch": 76.7, + "learning_rate": 1.1664608136026515e-05, + "loss": 1.8881, + "step": 15481000 + }, + { + "epoch": 76.7, + "learning_rate": 1.1663369549600432e-05, + "loss": 1.8737, + "step": 15481500 + }, + { + "epoch": 76.7, + "learning_rate": 1.166213096317435e-05, + "loss": 1.8822, + "step": 15482000 + }, + { + "epoch": 76.71, + "learning_rate": 1.1660892376748266e-05, + "loss": 1.8848, + "step": 15482500 + }, + { + "epoch": 76.71, + "learning_rate": 1.1659653790322181e-05, + "loss": 1.8875, + "step": 15483000 + }, + { + "epoch": 76.71, + "learning_rate": 1.1658415203896097e-05, + "loss": 1.8679, + "step": 15483500 + }, + { + "epoch": 76.71, + "learning_rate": 1.1657176617470014e-05, + "loss": 1.8633, + "step": 15484000 + }, + { + "epoch": 76.72, + "learning_rate": 1.165593803104393e-05, + "loss": 1.8527, + "step": 15484500 + }, + { + "epoch": 76.72, + "learning_rate": 1.1654699444617848e-05, + "loss": 1.9099, + "step": 15485000 + }, + { + "epoch": 76.72, + "learning_rate": 1.1653460858191763e-05, + "loss": 1.8935, + "step": 15485500 + }, + { + "epoch": 76.72, + "learning_rate": 1.165222227176568e-05, + "loss": 1.8843, + "step": 15486000 + }, + { + "epoch": 76.73, + "learning_rate": 1.1650986162512448e-05, + "loss": 1.8744, + "step": 15486500 + }, + { + "epoch": 76.73, + "learning_rate": 1.1649747576086364e-05, + "loss": 1.9029, + "step": 15487000 + }, + { + "epoch": 76.73, + "learning_rate": 1.164850898966028e-05, + "loss": 1.8852, + "step": 15487500 + }, + { + "epoch": 76.73, + "learning_rate": 1.1647270403234198e-05, + "loss": 1.8878, + "step": 15488000 + }, + { + "epoch": 76.74, + "learning_rate": 1.1646034293980966e-05, + "loss": 1.8881, + "step": 15488500 + }, + { + "epoch": 76.74, + "learning_rate": 1.1644795707554883e-05, + "loss": 1.8993, + "step": 15489000 + }, + { + "epoch": 76.74, + "learning_rate": 1.164355959830165e-05, + "loss": 1.8896, + "step": 15489500 + }, + { + "epoch": 76.74, + "learning_rate": 1.1642321011875567e-05, + "loss": 1.9001, + "step": 15490000 + }, + { + "epoch": 76.75, + "learning_rate": 1.1641082425449483e-05, + "loss": 1.89, + "step": 15490500 + }, + { + "epoch": 76.75, + "learning_rate": 1.16398438390234e-05, + "loss": 1.8793, + "step": 15491000 + }, + { + "epoch": 76.75, + "learning_rate": 1.1638605252597316e-05, + "loss": 1.8748, + "step": 15491500 + }, + { + "epoch": 76.75, + "learning_rate": 1.1637366666171233e-05, + "loss": 1.88, + "step": 15492000 + }, + { + "epoch": 76.76, + "learning_rate": 1.163612807974515e-05, + "loss": 1.9166, + "step": 15492500 + }, + { + "epoch": 76.76, + "learning_rate": 1.1634889493319066e-05, + "loss": 1.8906, + "step": 15493000 + }, + { + "epoch": 76.76, + "learning_rate": 1.1633650906892982e-05, + "loss": 1.8909, + "step": 15493500 + }, + { + "epoch": 76.76, + "learning_rate": 1.1632412320466898e-05, + "loss": 1.8866, + "step": 15494000 + }, + { + "epoch": 76.77, + "learning_rate": 1.1631173734040815e-05, + "loss": 1.8698, + "step": 15494500 + }, + { + "epoch": 76.77, + "learning_rate": 1.162993514761473e-05, + "loss": 1.8731, + "step": 15495000 + }, + { + "epoch": 76.77, + "learning_rate": 1.16286990383615e-05, + "loss": 1.8688, + "step": 15495500 + }, + { + "epoch": 76.77, + "learning_rate": 1.1627460451935416e-05, + "loss": 1.8718, + "step": 15496000 + }, + { + "epoch": 76.78, + "learning_rate": 1.1626221865509333e-05, + "loss": 1.898, + "step": 15496500 + }, + { + "epoch": 76.78, + "learning_rate": 1.1624983279083248e-05, + "loss": 1.9139, + "step": 15497000 + }, + { + "epoch": 76.78, + "learning_rate": 1.1623747169830017e-05, + "loss": 1.8907, + "step": 15497500 + }, + { + "epoch": 76.78, + "learning_rate": 1.1622511060576785e-05, + "loss": 1.8831, + "step": 15498000 + }, + { + "epoch": 76.78, + "learning_rate": 1.1621272474150702e-05, + "loss": 1.8875, + "step": 15498500 + }, + { + "epoch": 76.79, + "learning_rate": 1.162003388772462e-05, + "loss": 1.8851, + "step": 15499000 + }, + { + "epoch": 76.79, + "learning_rate": 1.1618795301298534e-05, + "loss": 1.8919, + "step": 15499500 + }, + { + "epoch": 76.79, + "learning_rate": 1.161755671487245e-05, + "loss": 1.8706, + "step": 15500000 + }, + { + "epoch": 76.79, + "learning_rate": 1.1616318128446367e-05, + "loss": 1.8822, + "step": 15500500 + }, + { + "epoch": 76.8, + "learning_rate": 1.1615079542020284e-05, + "loss": 1.8846, + "step": 15501000 + }, + { + "epoch": 76.8, + "learning_rate": 1.16138409555942e-05, + "loss": 1.8826, + "step": 15501500 + }, + { + "epoch": 76.8, + "learning_rate": 1.161260484634097e-05, + "loss": 1.8904, + "step": 15502000 + }, + { + "epoch": 76.8, + "learning_rate": 1.1611366259914885e-05, + "loss": 1.9019, + "step": 15502500 + }, + { + "epoch": 76.81, + "learning_rate": 1.1610127673488801e-05, + "loss": 1.8706, + "step": 15503000 + }, + { + "epoch": 76.81, + "learning_rate": 1.1608889087062717e-05, + "loss": 1.8563, + "step": 15503500 + }, + { + "epoch": 76.81, + "learning_rate": 1.1607650500636634e-05, + "loss": 1.8832, + "step": 15504000 + }, + { + "epoch": 76.81, + "learning_rate": 1.160641191421055e-05, + "loss": 1.8794, + "step": 15504500 + }, + { + "epoch": 76.82, + "learning_rate": 1.1605173327784467e-05, + "loss": 1.8913, + "step": 15505000 + }, + { + "epoch": 76.82, + "learning_rate": 1.1603934741358383e-05, + "loss": 1.8822, + "step": 15505500 + }, + { + "epoch": 76.82, + "learning_rate": 1.16026961549323e-05, + "loss": 1.8741, + "step": 15506000 + }, + { + "epoch": 76.82, + "learning_rate": 1.1601457568506217e-05, + "loss": 1.8674, + "step": 15506500 + }, + { + "epoch": 76.83, + "learning_rate": 1.1600218982080134e-05, + "loss": 1.881, + "step": 15507000 + }, + { + "epoch": 76.83, + "learning_rate": 1.1598980395654049e-05, + "loss": 1.9012, + "step": 15507500 + }, + { + "epoch": 76.83, + "learning_rate": 1.1597744286400818e-05, + "loss": 1.8998, + "step": 15508000 + }, + { + "epoch": 76.83, + "learning_rate": 1.1596505699974733e-05, + "loss": 1.8669, + "step": 15508500 + }, + { + "epoch": 76.84, + "learning_rate": 1.159526711354865e-05, + "loss": 1.9002, + "step": 15509000 + }, + { + "epoch": 76.84, + "learning_rate": 1.1594028527122567e-05, + "loss": 1.8637, + "step": 15509500 + }, + { + "epoch": 76.84, + "learning_rate": 1.1592789940696484e-05, + "loss": 1.8885, + "step": 15510000 + }, + { + "epoch": 76.84, + "learning_rate": 1.159155383144325e-05, + "loss": 1.8668, + "step": 15510500 + }, + { + "epoch": 76.85, + "learning_rate": 1.159031772219002e-05, + "loss": 1.8882, + "step": 15511000 + }, + { + "epoch": 76.85, + "learning_rate": 1.1589079135763936e-05, + "loss": 1.8826, + "step": 15511500 + }, + { + "epoch": 76.85, + "learning_rate": 1.1587840549337853e-05, + "loss": 1.913, + "step": 15512000 + }, + { + "epoch": 76.85, + "learning_rate": 1.1586601962911769e-05, + "loss": 1.8613, + "step": 15512500 + }, + { + "epoch": 76.86, + "learning_rate": 1.1585363376485686e-05, + "loss": 1.8704, + "step": 15513000 + }, + { + "epoch": 76.86, + "learning_rate": 1.15841247900596e-05, + "loss": 1.8765, + "step": 15513500 + }, + { + "epoch": 76.86, + "learning_rate": 1.158288868080637e-05, + "loss": 1.8848, + "step": 15514000 + }, + { + "epoch": 76.86, + "learning_rate": 1.1581650094380286e-05, + "loss": 1.8924, + "step": 15514500 + }, + { + "epoch": 76.87, + "learning_rate": 1.1580411507954203e-05, + "loss": 1.89, + "step": 15515000 + }, + { + "epoch": 76.87, + "learning_rate": 1.1579172921528119e-05, + "loss": 1.8604, + "step": 15515500 + }, + { + "epoch": 76.87, + "learning_rate": 1.1577934335102036e-05, + "loss": 1.8786, + "step": 15516000 + }, + { + "epoch": 76.87, + "learning_rate": 1.1576695748675952e-05, + "loss": 1.8897, + "step": 15516500 + }, + { + "epoch": 76.88, + "learning_rate": 1.1575457162249868e-05, + "loss": 1.8885, + "step": 15517000 + }, + { + "epoch": 76.88, + "learning_rate": 1.1574218575823785e-05, + "loss": 1.8938, + "step": 15517500 + }, + { + "epoch": 76.88, + "learning_rate": 1.15729799893977e-05, + "loss": 1.8735, + "step": 15518000 + }, + { + "epoch": 76.88, + "learning_rate": 1.1571743880144469e-05, + "loss": 1.8912, + "step": 15518500 + }, + { + "epoch": 76.89, + "learning_rate": 1.1570505293718386e-05, + "loss": 1.8641, + "step": 15519000 + }, + { + "epoch": 76.89, + "learning_rate": 1.1569266707292303e-05, + "loss": 1.8891, + "step": 15519500 + }, + { + "epoch": 76.89, + "learning_rate": 1.1568028120866218e-05, + "loss": 1.865, + "step": 15520000 + }, + { + "epoch": 76.89, + "learning_rate": 1.1566789534440135e-05, + "loss": 1.8772, + "step": 15520500 + }, + { + "epoch": 76.9, + "learning_rate": 1.1565553425186904e-05, + "loss": 1.8945, + "step": 15521000 + }, + { + "epoch": 76.9, + "learning_rate": 1.156431483876082e-05, + "loss": 1.8745, + "step": 15521500 + }, + { + "epoch": 76.9, + "learning_rate": 1.1563076252334736e-05, + "loss": 1.8913, + "step": 15522000 + }, + { + "epoch": 76.9, + "learning_rate": 1.1561837665908653e-05, + "loss": 1.8833, + "step": 15522500 + }, + { + "epoch": 76.91, + "learning_rate": 1.156060155665542e-05, + "loss": 1.8838, + "step": 15523000 + }, + { + "epoch": 76.91, + "learning_rate": 1.1559362970229337e-05, + "loss": 1.8898, + "step": 15523500 + }, + { + "epoch": 76.91, + "learning_rate": 1.1558124383803254e-05, + "loss": 1.8863, + "step": 15524000 + }, + { + "epoch": 76.91, + "learning_rate": 1.155688579737717e-05, + "loss": 1.885, + "step": 15524500 + }, + { + "epoch": 76.92, + "learning_rate": 1.1555647210951086e-05, + "loss": 1.9047, + "step": 15525000 + }, + { + "epoch": 76.92, + "learning_rate": 1.1554411101697855e-05, + "loss": 1.891, + "step": 15525500 + }, + { + "epoch": 76.92, + "learning_rate": 1.1553174992444623e-05, + "loss": 1.8745, + "step": 15526000 + }, + { + "epoch": 76.92, + "learning_rate": 1.155193640601854e-05, + "loss": 1.8693, + "step": 15526500 + }, + { + "epoch": 76.93, + "learning_rate": 1.1550697819592456e-05, + "loss": 1.8949, + "step": 15527000 + }, + { + "epoch": 76.93, + "learning_rate": 1.1549459233166372e-05, + "loss": 1.8914, + "step": 15527500 + }, + { + "epoch": 76.93, + "learning_rate": 1.154822064674029e-05, + "loss": 1.8787, + "step": 15528000 + }, + { + "epoch": 76.93, + "learning_rate": 1.1546982060314206e-05, + "loss": 1.8953, + "step": 15528500 + }, + { + "epoch": 76.94, + "learning_rate": 1.1545743473888122e-05, + "loss": 1.8837, + "step": 15529000 + }, + { + "epoch": 76.94, + "learning_rate": 1.1544504887462037e-05, + "loss": 1.8712, + "step": 15529500 + }, + { + "epoch": 76.94, + "learning_rate": 1.1543266301035954e-05, + "loss": 1.8803, + "step": 15530000 + }, + { + "epoch": 76.94, + "learning_rate": 1.1542030191782723e-05, + "loss": 1.8936, + "step": 15530500 + }, + { + "epoch": 76.95, + "learning_rate": 1.154079160535664e-05, + "loss": 1.8822, + "step": 15531000 + }, + { + "epoch": 76.95, + "learning_rate": 1.1539553018930556e-05, + "loss": 1.9027, + "step": 15531500 + }, + { + "epoch": 76.95, + "learning_rate": 1.1538314432504472e-05, + "loss": 1.8602, + "step": 15532000 + }, + { + "epoch": 76.95, + "learning_rate": 1.1537075846078389e-05, + "loss": 1.8688, + "step": 15532500 + }, + { + "epoch": 76.96, + "learning_rate": 1.1535837259652304e-05, + "loss": 1.9058, + "step": 15533000 + }, + { + "epoch": 76.96, + "learning_rate": 1.1534603627571926e-05, + "loss": 1.8734, + "step": 15533500 + }, + { + "epoch": 76.96, + "learning_rate": 1.1533365041145841e-05, + "loss": 1.8758, + "step": 15534000 + }, + { + "epoch": 76.96, + "learning_rate": 1.1532126454719758e-05, + "loss": 1.8625, + "step": 15534500 + }, + { + "epoch": 76.97, + "learning_rate": 1.1530887868293674e-05, + "loss": 1.8683, + "step": 15535000 + }, + { + "epoch": 76.97, + "learning_rate": 1.152964928186759e-05, + "loss": 1.8953, + "step": 15535500 + }, + { + "epoch": 76.97, + "learning_rate": 1.152841317261436e-05, + "loss": 1.8991, + "step": 15536000 + }, + { + "epoch": 76.97, + "learning_rate": 1.1527174586188276e-05, + "loss": 1.8907, + "step": 15536500 + }, + { + "epoch": 76.98, + "learning_rate": 1.1525935999762191e-05, + "loss": 1.8906, + "step": 15537000 + }, + { + "epoch": 76.98, + "learning_rate": 1.1524697413336108e-05, + "loss": 1.8903, + "step": 15537500 + }, + { + "epoch": 76.98, + "learning_rate": 1.1523458826910025e-05, + "loss": 1.8719, + "step": 15538000 + }, + { + "epoch": 76.98, + "learning_rate": 1.152222024048394e-05, + "loss": 1.8836, + "step": 15538500 + }, + { + "epoch": 76.99, + "learning_rate": 1.1520981654057857e-05, + "loss": 1.8884, + "step": 15539000 + }, + { + "epoch": 76.99, + "learning_rate": 1.1519743067631773e-05, + "loss": 1.8813, + "step": 15539500 + }, + { + "epoch": 76.99, + "learning_rate": 1.151850448120569e-05, + "loss": 1.895, + "step": 15540000 + }, + { + "epoch": 76.99, + "learning_rate": 1.1517268371952458e-05, + "loss": 1.9044, + "step": 15540500 + }, + { + "epoch": 77.0, + "learning_rate": 1.1516032262699227e-05, + "loss": 1.8895, + "step": 15541000 + }, + { + "epoch": 77.0, + "learning_rate": 1.1514793676273142e-05, + "loss": 1.8904, + "step": 15541500 + }, + { + "epoch": 77.0, + "eval_accuracy": 0.6799779348529961, + "eval_accuracy_mlm": 0.6405141004819173, + "eval_accuracy_nsp": 0.86639420455838, + "eval_loss": 2.305272340774536, + "eval_runtime": 146.6394, + "eval_samples_per_second": 1738.68, + "eval_steps_per_second": 72.45, + "step": 15541911 + }, + { + "epoch": 77.0, + "learning_rate": 1.151355508984706e-05, + "loss": 1.877, + "step": 15542000 + }, + { + "epoch": 77.0, + "learning_rate": 1.1512318980593828e-05, + "loss": 1.8668, + "step": 15542500 + }, + { + "epoch": 77.01, + "learning_rate": 1.1511080394167745e-05, + "loss": 1.907, + "step": 15543000 + }, + { + "epoch": 77.01, + "learning_rate": 1.1509841807741662e-05, + "loss": 1.8976, + "step": 15543500 + }, + { + "epoch": 77.01, + "learning_rate": 1.1508603221315577e-05, + "loss": 1.8849, + "step": 15544000 + }, + { + "epoch": 77.01, + "learning_rate": 1.1507364634889493e-05, + "loss": 1.8725, + "step": 15544500 + }, + { + "epoch": 77.02, + "learning_rate": 1.150612604846341e-05, + "loss": 1.8642, + "step": 15545000 + }, + { + "epoch": 77.02, + "learning_rate": 1.1504887462037326e-05, + "loss": 1.8798, + "step": 15545500 + }, + { + "epoch": 77.02, + "learning_rate": 1.1503651352784095e-05, + "loss": 1.863, + "step": 15546000 + }, + { + "epoch": 77.02, + "learning_rate": 1.1502412766358012e-05, + "loss": 1.8869, + "step": 15546500 + }, + { + "epoch": 77.03, + "learning_rate": 1.1501174179931929e-05, + "loss": 1.8606, + "step": 15547000 + }, + { + "epoch": 77.03, + "learning_rate": 1.1499935593505844e-05, + "loss": 1.8571, + "step": 15547500 + }, + { + "epoch": 77.03, + "learning_rate": 1.1498699484252613e-05, + "loss": 1.8798, + "step": 15548000 + }, + { + "epoch": 77.03, + "learning_rate": 1.1497460897826528e-05, + "loss": 1.8694, + "step": 15548500 + }, + { + "epoch": 77.04, + "learning_rate": 1.1496222311400445e-05, + "loss": 1.8833, + "step": 15549000 + }, + { + "epoch": 77.04, + "learning_rate": 1.1494983724974362e-05, + "loss": 1.8545, + "step": 15549500 + }, + { + "epoch": 77.04, + "learning_rate": 1.1493745138548279e-05, + "loss": 1.8923, + "step": 15550000 + }, + { + "epoch": 77.04, + "learning_rate": 1.1492506552122194e-05, + "loss": 1.8839, + "step": 15550500 + }, + { + "epoch": 77.05, + "learning_rate": 1.1491267965696111e-05, + "loss": 1.8764, + "step": 15551000 + }, + { + "epoch": 77.05, + "learning_rate": 1.1490029379270027e-05, + "loss": 1.8595, + "step": 15551500 + }, + { + "epoch": 77.05, + "learning_rate": 1.1488790792843943e-05, + "loss": 1.8789, + "step": 15552000 + }, + { + "epoch": 77.05, + "learning_rate": 1.148755220641786e-05, + "loss": 1.8708, + "step": 15552500 + }, + { + "epoch": 77.05, + "learning_rate": 1.1486313619991776e-05, + "loss": 1.9007, + "step": 15553000 + }, + { + "epoch": 77.06, + "learning_rate": 1.1485075033565693e-05, + "loss": 1.8675, + "step": 15553500 + }, + { + "epoch": 77.06, + "learning_rate": 1.148383644713961e-05, + "loss": 1.877, + "step": 15554000 + }, + { + "epoch": 77.06, + "learning_rate": 1.1482597860713526e-05, + "loss": 1.8817, + "step": 15554500 + }, + { + "epoch": 77.06, + "learning_rate": 1.1481359274287442e-05, + "loss": 1.8638, + "step": 15555000 + }, + { + "epoch": 77.07, + "learning_rate": 1.1480120687861359e-05, + "loss": 1.8815, + "step": 15555500 + }, + { + "epoch": 77.07, + "learning_rate": 1.1478884578608126e-05, + "loss": 1.8844, + "step": 15556000 + }, + { + "epoch": 77.07, + "learning_rate": 1.1477645992182043e-05, + "loss": 1.8657, + "step": 15556500 + }, + { + "epoch": 77.07, + "learning_rate": 1.147640740575596e-05, + "loss": 1.8818, + "step": 15557000 + }, + { + "epoch": 77.08, + "learning_rate": 1.1475168819329876e-05, + "loss": 1.8675, + "step": 15557500 + }, + { + "epoch": 77.08, + "learning_rate": 1.1473930232903792e-05, + "loss": 1.8776, + "step": 15558000 + }, + { + "epoch": 77.08, + "learning_rate": 1.1472691646477709e-05, + "loss": 1.8846, + "step": 15558500 + }, + { + "epoch": 77.08, + "learning_rate": 1.1471455537224476e-05, + "loss": 1.8817, + "step": 15559000 + }, + { + "epoch": 77.09, + "learning_rate": 1.1470216950798393e-05, + "loss": 1.8957, + "step": 15559500 + }, + { + "epoch": 77.09, + "learning_rate": 1.1468983318718015e-05, + "loss": 1.851, + "step": 15560000 + }, + { + "epoch": 77.09, + "learning_rate": 1.146774473229193e-05, + "loss": 1.8806, + "step": 15560500 + }, + { + "epoch": 77.09, + "learning_rate": 1.1466506145865846e-05, + "loss": 1.8673, + "step": 15561000 + }, + { + "epoch": 77.1, + "learning_rate": 1.1465267559439762e-05, + "loss": 1.873, + "step": 15561500 + }, + { + "epoch": 77.1, + "learning_rate": 1.146402897301368e-05, + "loss": 1.8651, + "step": 15562000 + }, + { + "epoch": 77.1, + "learning_rate": 1.1462792863760448e-05, + "loss": 1.853, + "step": 15562500 + }, + { + "epoch": 77.1, + "learning_rate": 1.1461554277334365e-05, + "loss": 1.8478, + "step": 15563000 + }, + { + "epoch": 77.11, + "learning_rate": 1.1460315690908282e-05, + "loss": 1.8704, + "step": 15563500 + }, + { + "epoch": 77.11, + "learning_rate": 1.1459077104482197e-05, + "loss": 1.8566, + "step": 15564000 + }, + { + "epoch": 77.11, + "learning_rate": 1.1457838518056113e-05, + "loss": 1.8692, + "step": 15564500 + }, + { + "epoch": 77.11, + "learning_rate": 1.145659993163003e-05, + "loss": 1.8518, + "step": 15565000 + }, + { + "epoch": 77.12, + "learning_rate": 1.1455363822376798e-05, + "loss": 1.8701, + "step": 15565500 + }, + { + "epoch": 77.12, + "learning_rate": 1.1454125235950715e-05, + "loss": 1.8479, + "step": 15566000 + }, + { + "epoch": 77.12, + "learning_rate": 1.1452886649524632e-05, + "loss": 1.8898, + "step": 15566500 + }, + { + "epoch": 77.12, + "learning_rate": 1.1451648063098547e-05, + "loss": 1.8733, + "step": 15567000 + }, + { + "epoch": 77.13, + "learning_rate": 1.1450409476672464e-05, + "loss": 1.884, + "step": 15567500 + }, + { + "epoch": 77.13, + "learning_rate": 1.144917089024638e-05, + "loss": 1.867, + "step": 15568000 + }, + { + "epoch": 77.13, + "learning_rate": 1.1447932303820296e-05, + "loss": 1.8661, + "step": 15568500 + }, + { + "epoch": 77.13, + "learning_rate": 1.1446693717394213e-05, + "loss": 1.8631, + "step": 15569000 + }, + { + "epoch": 77.14, + "learning_rate": 1.1445455130968129e-05, + "loss": 1.8884, + "step": 15569500 + }, + { + "epoch": 77.14, + "learning_rate": 1.1444216544542046e-05, + "loss": 1.8887, + "step": 15570000 + }, + { + "epoch": 77.14, + "learning_rate": 1.1442977958115962e-05, + "loss": 1.8783, + "step": 15570500 + }, + { + "epoch": 77.14, + "learning_rate": 1.144174184886273e-05, + "loss": 1.848, + "step": 15571000 + }, + { + "epoch": 77.15, + "learning_rate": 1.1440503262436647e-05, + "loss": 1.8659, + "step": 15571500 + }, + { + "epoch": 77.15, + "learning_rate": 1.1439264676010563e-05, + "loss": 1.8761, + "step": 15572000 + }, + { + "epoch": 77.15, + "learning_rate": 1.1438026089584479e-05, + "loss": 1.8729, + "step": 15572500 + }, + { + "epoch": 77.15, + "learning_rate": 1.1436787503158396e-05, + "loss": 1.8545, + "step": 15573000 + }, + { + "epoch": 77.16, + "learning_rate": 1.1435548916732313e-05, + "loss": 1.8796, + "step": 15573500 + }, + { + "epoch": 77.16, + "learning_rate": 1.143431033030623e-05, + "loss": 1.8939, + "step": 15574000 + }, + { + "epoch": 77.16, + "learning_rate": 1.1433071743880145e-05, + "loss": 1.8771, + "step": 15574500 + }, + { + "epoch": 77.16, + "learning_rate": 1.1431833157454062e-05, + "loss": 1.8957, + "step": 15575000 + }, + { + "epoch": 77.17, + "learning_rate": 1.1430597048200829e-05, + "loss": 1.8788, + "step": 15575500 + }, + { + "epoch": 77.17, + "learning_rate": 1.1429358461774746e-05, + "loss": 1.8872, + "step": 15576000 + }, + { + "epoch": 77.17, + "learning_rate": 1.1428119875348663e-05, + "loss": 1.8629, + "step": 15576500 + }, + { + "epoch": 77.17, + "learning_rate": 1.1426883766095431e-05, + "loss": 1.8808, + "step": 15577000 + }, + { + "epoch": 77.18, + "learning_rate": 1.1425645179669348e-05, + "loss": 1.8754, + "step": 15577500 + }, + { + "epoch": 77.18, + "learning_rate": 1.1424406593243264e-05, + "loss": 1.8692, + "step": 15578000 + }, + { + "epoch": 77.18, + "learning_rate": 1.142316800681718e-05, + "loss": 1.8659, + "step": 15578500 + }, + { + "epoch": 77.18, + "learning_rate": 1.1421929420391096e-05, + "loss": 1.8765, + "step": 15579000 + }, + { + "epoch": 77.19, + "learning_rate": 1.1420690833965013e-05, + "loss": 1.8884, + "step": 15579500 + }, + { + "epoch": 77.19, + "learning_rate": 1.141945224753893e-05, + "loss": 1.8741, + "step": 15580000 + }, + { + "epoch": 77.19, + "learning_rate": 1.1418213661112847e-05, + "loss": 1.8868, + "step": 15580500 + }, + { + "epoch": 77.19, + "learning_rate": 1.1416975074686762e-05, + "loss": 1.8713, + "step": 15581000 + }, + { + "epoch": 77.2, + "learning_rate": 1.1415736488260679e-05, + "loss": 1.8785, + "step": 15581500 + }, + { + "epoch": 77.2, + "learning_rate": 1.1414497901834596e-05, + "loss": 1.8741, + "step": 15582000 + }, + { + "epoch": 77.2, + "learning_rate": 1.1413261792581363e-05, + "loss": 1.8688, + "step": 15582500 + }, + { + "epoch": 77.2, + "learning_rate": 1.141202320615528e-05, + "loss": 1.8619, + "step": 15583000 + }, + { + "epoch": 77.21, + "learning_rate": 1.1410787096902048e-05, + "loss": 1.8632, + "step": 15583500 + }, + { + "epoch": 77.21, + "learning_rate": 1.1409550987648816e-05, + "loss": 1.8597, + "step": 15584000 + }, + { + "epoch": 77.21, + "learning_rate": 1.1408312401222732e-05, + "loss": 1.8637, + "step": 15584500 + }, + { + "epoch": 77.21, + "learning_rate": 1.140707381479665e-05, + "loss": 1.8889, + "step": 15585000 + }, + { + "epoch": 77.22, + "learning_rate": 1.1405835228370566e-05, + "loss": 1.907, + "step": 15585500 + }, + { + "epoch": 77.22, + "learning_rate": 1.1404596641944482e-05, + "loss": 1.8621, + "step": 15586000 + }, + { + "epoch": 77.22, + "learning_rate": 1.1403358055518399e-05, + "loss": 1.8738, + "step": 15586500 + }, + { + "epoch": 77.22, + "learning_rate": 1.1402119469092315e-05, + "loss": 1.872, + "step": 15587000 + }, + { + "epoch": 77.23, + "learning_rate": 1.1400880882666232e-05, + "loss": 1.8598, + "step": 15587500 + }, + { + "epoch": 77.23, + "learning_rate": 1.1399642296240148e-05, + "loss": 1.8779, + "step": 15588000 + }, + { + "epoch": 77.23, + "learning_rate": 1.1398403709814063e-05, + "loss": 1.8843, + "step": 15588500 + }, + { + "epoch": 77.23, + "learning_rate": 1.139716512338798e-05, + "loss": 1.8867, + "step": 15589000 + }, + { + "epoch": 77.24, + "learning_rate": 1.1395926536961897e-05, + "loss": 1.8607, + "step": 15589500 + }, + { + "epoch": 77.24, + "learning_rate": 1.1394687950535814e-05, + "loss": 1.8889, + "step": 15590000 + }, + { + "epoch": 77.24, + "learning_rate": 1.1393449364109729e-05, + "loss": 1.8689, + "step": 15590500 + }, + { + "epoch": 77.24, + "learning_rate": 1.13922132548565e-05, + "loss": 1.8666, + "step": 15591000 + }, + { + "epoch": 77.25, + "learning_rate": 1.1390974668430415e-05, + "loss": 1.8803, + "step": 15591500 + }, + { + "epoch": 77.25, + "learning_rate": 1.138973608200433e-05, + "loss": 1.8712, + "step": 15592000 + }, + { + "epoch": 77.25, + "learning_rate": 1.1388497495578247e-05, + "loss": 1.8735, + "step": 15592500 + }, + { + "epoch": 77.25, + "learning_rate": 1.1387258909152164e-05, + "loss": 1.888, + "step": 15593000 + }, + { + "epoch": 77.26, + "learning_rate": 1.1386020322726079e-05, + "loss": 1.8569, + "step": 15593500 + }, + { + "epoch": 77.26, + "learning_rate": 1.138478421347285e-05, + "loss": 1.8811, + "step": 15594000 + }, + { + "epoch": 77.26, + "learning_rate": 1.1383548104219617e-05, + "loss": 1.8613, + "step": 15594500 + }, + { + "epoch": 77.26, + "learning_rate": 1.1382309517793533e-05, + "loss": 1.8654, + "step": 15595000 + }, + { + "epoch": 77.27, + "learning_rate": 1.1381070931367449e-05, + "loss": 1.8902, + "step": 15595500 + }, + { + "epoch": 77.27, + "learning_rate": 1.1379832344941366e-05, + "loss": 1.8849, + "step": 15596000 + }, + { + "epoch": 77.27, + "learning_rate": 1.1378593758515283e-05, + "loss": 1.8665, + "step": 15596500 + }, + { + "epoch": 77.27, + "learning_rate": 1.13773551720892e-05, + "loss": 1.8813, + "step": 15597000 + }, + { + "epoch": 77.28, + "learning_rate": 1.1376116585663115e-05, + "loss": 1.8523, + "step": 15597500 + }, + { + "epoch": 77.28, + "learning_rate": 1.1374877999237032e-05, + "loss": 1.8817, + "step": 15598000 + }, + { + "epoch": 77.28, + "learning_rate": 1.1373639412810949e-05, + "loss": 1.8671, + "step": 15598500 + }, + { + "epoch": 77.28, + "learning_rate": 1.1372403303557716e-05, + "loss": 1.8675, + "step": 15599000 + }, + { + "epoch": 77.29, + "learning_rate": 1.1371164717131633e-05, + "loss": 1.8927, + "step": 15599500 + }, + { + "epoch": 77.29, + "learning_rate": 1.1369928607878401e-05, + "loss": 1.8665, + "step": 15600000 + }, + { + "epoch": 77.29, + "learning_rate": 1.1368690021452318e-05, + "loss": 1.8405, + "step": 15600500 + }, + { + "epoch": 77.29, + "learning_rate": 1.1367451435026234e-05, + "loss": 1.8831, + "step": 15601000 + }, + { + "epoch": 77.3, + "learning_rate": 1.1366215325773002e-05, + "loss": 1.8776, + "step": 15601500 + }, + { + "epoch": 77.3, + "learning_rate": 1.136497673934692e-05, + "loss": 1.8768, + "step": 15602000 + }, + { + "epoch": 77.3, + "learning_rate": 1.1363738152920835e-05, + "loss": 1.8569, + "step": 15602500 + }, + { + "epoch": 77.3, + "learning_rate": 1.1362499566494751e-05, + "loss": 1.8621, + "step": 15603000 + }, + { + "epoch": 77.31, + "learning_rate": 1.1361263457241519e-05, + "loss": 1.8561, + "step": 15603500 + }, + { + "epoch": 77.31, + "learning_rate": 1.1360024870815436e-05, + "loss": 1.9146, + "step": 15604000 + }, + { + "epoch": 77.31, + "learning_rate": 1.1358786284389352e-05, + "loss": 1.8746, + "step": 15604500 + }, + { + "epoch": 77.31, + "learning_rate": 1.135754769796327e-05, + "loss": 1.8827, + "step": 15605000 + }, + { + "epoch": 77.32, + "learning_rate": 1.1356309111537185e-05, + "loss": 1.8853, + "step": 15605500 + }, + { + "epoch": 77.32, + "learning_rate": 1.1355070525111102e-05, + "loss": 1.8473, + "step": 15606000 + }, + { + "epoch": 77.32, + "learning_rate": 1.1353831938685018e-05, + "loss": 1.8416, + "step": 15606500 + }, + { + "epoch": 77.32, + "learning_rate": 1.1352593352258935e-05, + "loss": 1.8838, + "step": 15607000 + }, + { + "epoch": 77.32, + "learning_rate": 1.135135476583285e-05, + "loss": 1.8611, + "step": 15607500 + }, + { + "epoch": 77.33, + "learning_rate": 1.1350116179406768e-05, + "loss": 1.8775, + "step": 15608000 + }, + { + "epoch": 77.33, + "learning_rate": 1.1348880070153536e-05, + "loss": 1.8814, + "step": 15608500 + }, + { + "epoch": 77.33, + "learning_rate": 1.1347641483727452e-05, + "loss": 1.8768, + "step": 15609000 + }, + { + "epoch": 77.33, + "learning_rate": 1.1346402897301369e-05, + "loss": 1.8687, + "step": 15609500 + }, + { + "epoch": 77.34, + "learning_rate": 1.1345164310875285e-05, + "loss": 1.8675, + "step": 15610000 + }, + { + "epoch": 77.34, + "learning_rate": 1.1343928201622053e-05, + "loss": 1.8628, + "step": 15610500 + }, + { + "epoch": 77.34, + "learning_rate": 1.1342692092368821e-05, + "loss": 1.8646, + "step": 15611000 + }, + { + "epoch": 77.34, + "learning_rate": 1.1341453505942738e-05, + "loss": 1.8653, + "step": 15611500 + }, + { + "epoch": 77.35, + "learning_rate": 1.1340214919516655e-05, + "loss": 1.8956, + "step": 15612000 + }, + { + "epoch": 77.35, + "learning_rate": 1.1338976333090572e-05, + "loss": 1.8833, + "step": 15612500 + }, + { + "epoch": 77.35, + "learning_rate": 1.1337737746664487e-05, + "loss": 1.8735, + "step": 15613000 + }, + { + "epoch": 77.35, + "learning_rate": 1.1336499160238404e-05, + "loss": 1.8607, + "step": 15613500 + }, + { + "epoch": 77.36, + "learning_rate": 1.1335263050985171e-05, + "loss": 1.873, + "step": 15614000 + }, + { + "epoch": 77.36, + "learning_rate": 1.1334024464559088e-05, + "loss": 1.8895, + "step": 15614500 + }, + { + "epoch": 77.36, + "learning_rate": 1.1332785878133005e-05, + "loss": 1.8796, + "step": 15615000 + }, + { + "epoch": 77.36, + "learning_rate": 1.1331547291706922e-05, + "loss": 1.8841, + "step": 15615500 + }, + { + "epoch": 77.37, + "learning_rate": 1.1330308705280837e-05, + "loss": 1.8564, + "step": 15616000 + }, + { + "epoch": 77.37, + "learning_rate": 1.1329072596027606e-05, + "loss": 1.8752, + "step": 15616500 + }, + { + "epoch": 77.37, + "learning_rate": 1.1327834009601522e-05, + "loss": 1.8749, + "step": 15617000 + }, + { + "epoch": 77.37, + "learning_rate": 1.1326595423175438e-05, + "loss": 1.8762, + "step": 15617500 + }, + { + "epoch": 77.38, + "learning_rate": 1.1325359313922207e-05, + "loss": 1.8491, + "step": 15618000 + }, + { + "epoch": 77.38, + "learning_rate": 1.1324120727496124e-05, + "loss": 1.8963, + "step": 15618500 + }, + { + "epoch": 77.38, + "learning_rate": 1.1322882141070041e-05, + "loss": 1.8832, + "step": 15619000 + }, + { + "epoch": 77.38, + "learning_rate": 1.1321643554643956e-05, + "loss": 1.8908, + "step": 15619500 + }, + { + "epoch": 77.39, + "learning_rate": 1.1320404968217872e-05, + "loss": 1.8779, + "step": 15620000 + }, + { + "epoch": 77.39, + "learning_rate": 1.1319166381791789e-05, + "loss": 1.8625, + "step": 15620500 + }, + { + "epoch": 77.39, + "learning_rate": 1.1317927795365705e-05, + "loss": 1.8804, + "step": 15621000 + }, + { + "epoch": 77.39, + "learning_rate": 1.1316689208939622e-05, + "loss": 1.8712, + "step": 15621500 + }, + { + "epoch": 77.4, + "learning_rate": 1.1315450622513538e-05, + "loss": 1.896, + "step": 15622000 + }, + { + "epoch": 77.4, + "learning_rate": 1.1314212036087455e-05, + "loss": 1.872, + "step": 15622500 + }, + { + "epoch": 77.4, + "learning_rate": 1.1312973449661371e-05, + "loss": 1.9099, + "step": 15623000 + }, + { + "epoch": 77.4, + "learning_rate": 1.1311734863235288e-05, + "loss": 1.8862, + "step": 15623500 + }, + { + "epoch": 77.41, + "learning_rate": 1.1310496276809204e-05, + "loss": 1.9084, + "step": 15624000 + }, + { + "epoch": 77.41, + "learning_rate": 1.1309257690383119e-05, + "loss": 1.8626, + "step": 15624500 + }, + { + "epoch": 77.41, + "learning_rate": 1.1308019103957036e-05, + "loss": 1.8528, + "step": 15625000 + }, + { + "epoch": 77.41, + "learning_rate": 1.1306780517530953e-05, + "loss": 1.8676, + "step": 15625500 + }, + { + "epoch": 77.42, + "learning_rate": 1.130554193110487e-05, + "loss": 1.8635, + "step": 15626000 + }, + { + "epoch": 77.42, + "learning_rate": 1.1304303344678785e-05, + "loss": 1.8737, + "step": 15626500 + }, + { + "epoch": 77.42, + "learning_rate": 1.1303067235425555e-05, + "loss": 1.8703, + "step": 15627000 + }, + { + "epoch": 77.42, + "learning_rate": 1.130182864899947e-05, + "loss": 1.867, + "step": 15627500 + }, + { + "epoch": 77.43, + "learning_rate": 1.1300590062573386e-05, + "loss": 1.8484, + "step": 15628000 + }, + { + "epoch": 77.43, + "learning_rate": 1.1299351476147303e-05, + "loss": 1.8754, + "step": 15628500 + }, + { + "epoch": 77.43, + "learning_rate": 1.129811288972122e-05, + "loss": 1.8784, + "step": 15629000 + }, + { + "epoch": 77.43, + "learning_rate": 1.129687925764084e-05, + "loss": 1.8815, + "step": 15629500 + }, + { + "epoch": 77.44, + "learning_rate": 1.1295640671214756e-05, + "loss": 1.8738, + "step": 15630000 + }, + { + "epoch": 77.44, + "learning_rate": 1.1294402084788673e-05, + "loss": 1.8704, + "step": 15630500 + }, + { + "epoch": 77.44, + "learning_rate": 1.129316349836259e-05, + "loss": 1.8805, + "step": 15631000 + }, + { + "epoch": 77.44, + "learning_rate": 1.1291927389109358e-05, + "loss": 1.8937, + "step": 15631500 + }, + { + "epoch": 77.45, + "learning_rate": 1.1290688802683275e-05, + "loss": 1.8969, + "step": 15632000 + }, + { + "epoch": 77.45, + "learning_rate": 1.1289452693430042e-05, + "loss": 1.8538, + "step": 15632500 + }, + { + "epoch": 77.45, + "learning_rate": 1.128821410700396e-05, + "loss": 1.8845, + "step": 15633000 + }, + { + "epoch": 77.45, + "learning_rate": 1.1286975520577874e-05, + "loss": 1.8547, + "step": 15633500 + }, + { + "epoch": 77.46, + "learning_rate": 1.1285736934151791e-05, + "loss": 1.8823, + "step": 15634000 + }, + { + "epoch": 77.46, + "learning_rate": 1.1284498347725708e-05, + "loss": 1.8519, + "step": 15634500 + }, + { + "epoch": 77.46, + "learning_rate": 1.1283259761299625e-05, + "loss": 1.8858, + "step": 15635000 + }, + { + "epoch": 77.46, + "learning_rate": 1.128202117487354e-05, + "loss": 1.8799, + "step": 15635500 + }, + { + "epoch": 77.47, + "learning_rate": 1.1280782588447457e-05, + "loss": 1.8577, + "step": 15636000 + }, + { + "epoch": 77.47, + "learning_rate": 1.1279544002021374e-05, + "loss": 1.8663, + "step": 15636500 + }, + { + "epoch": 77.47, + "learning_rate": 1.1278307892768141e-05, + "loss": 1.8612, + "step": 15637000 + }, + { + "epoch": 77.47, + "learning_rate": 1.1277069306342058e-05, + "loss": 1.8728, + "step": 15637500 + }, + { + "epoch": 77.48, + "learning_rate": 1.1275830719915975e-05, + "loss": 1.8631, + "step": 15638000 + }, + { + "epoch": 77.48, + "learning_rate": 1.1274592133489892e-05, + "loss": 1.8941, + "step": 15638500 + }, + { + "epoch": 77.48, + "learning_rate": 1.1273353547063807e-05, + "loss": 1.8772, + "step": 15639000 + }, + { + "epoch": 77.48, + "learning_rate": 1.1272114960637724e-05, + "loss": 1.8881, + "step": 15639500 + }, + { + "epoch": 77.49, + "learning_rate": 1.127087637421164e-05, + "loss": 1.8645, + "step": 15640000 + }, + { + "epoch": 77.49, + "learning_rate": 1.1269637787785557e-05, + "loss": 1.8579, + "step": 15640500 + }, + { + "epoch": 77.49, + "learning_rate": 1.1268401678532325e-05, + "loss": 1.8872, + "step": 15641000 + }, + { + "epoch": 77.49, + "learning_rate": 1.1267163092106242e-05, + "loss": 1.8632, + "step": 15641500 + }, + { + "epoch": 77.5, + "learning_rate": 1.1265924505680158e-05, + "loss": 1.8754, + "step": 15642000 + }, + { + "epoch": 77.5, + "learning_rate": 1.1264688396426926e-05, + "loss": 1.8747, + "step": 15642500 + }, + { + "epoch": 77.5, + "learning_rate": 1.1263449810000842e-05, + "loss": 1.8822, + "step": 15643000 + }, + { + "epoch": 77.5, + "learning_rate": 1.1262211223574759e-05, + "loss": 1.8813, + "step": 15643500 + }, + { + "epoch": 77.51, + "learning_rate": 1.1260972637148675e-05, + "loss": 1.8657, + "step": 15644000 + }, + { + "epoch": 77.51, + "learning_rate": 1.1259734050722592e-05, + "loss": 1.8769, + "step": 15644500 + }, + { + "epoch": 77.51, + "learning_rate": 1.1258497941469361e-05, + "loss": 1.8658, + "step": 15645000 + }, + { + "epoch": 77.51, + "learning_rate": 1.1257259355043276e-05, + "loss": 1.8803, + "step": 15645500 + }, + { + "epoch": 77.52, + "learning_rate": 1.1256020768617193e-05, + "loss": 1.8436, + "step": 15646000 + }, + { + "epoch": 77.52, + "learning_rate": 1.1254782182191109e-05, + "loss": 1.8598, + "step": 15646500 + }, + { + "epoch": 77.52, + "learning_rate": 1.1253546072937877e-05, + "loss": 1.866, + "step": 15647000 + }, + { + "epoch": 77.52, + "learning_rate": 1.1252307486511794e-05, + "loss": 1.8686, + "step": 15647500 + }, + { + "epoch": 77.53, + "learning_rate": 1.1251071377258563e-05, + "loss": 1.873, + "step": 15648000 + }, + { + "epoch": 77.53, + "learning_rate": 1.1249832790832478e-05, + "loss": 1.8612, + "step": 15648500 + }, + { + "epoch": 77.53, + "learning_rate": 1.1248594204406395e-05, + "loss": 1.8849, + "step": 15649000 + }, + { + "epoch": 77.53, + "learning_rate": 1.1247355617980312e-05, + "loss": 1.8521, + "step": 15649500 + }, + { + "epoch": 77.54, + "learning_rate": 1.1246117031554227e-05, + "loss": 1.8865, + "step": 15650000 + }, + { + "epoch": 77.54, + "learning_rate": 1.1244878445128144e-05, + "loss": 1.9013, + "step": 15650500 + }, + { + "epoch": 77.54, + "learning_rate": 1.1243639858702061e-05, + "loss": 1.8609, + "step": 15651000 + }, + { + "epoch": 77.54, + "learning_rate": 1.1242401272275978e-05, + "loss": 1.8757, + "step": 15651500 + }, + { + "epoch": 77.55, + "learning_rate": 1.1241162685849895e-05, + "loss": 1.8749, + "step": 15652000 + }, + { + "epoch": 77.55, + "learning_rate": 1.123992409942381e-05, + "loss": 1.8847, + "step": 15652500 + }, + { + "epoch": 77.55, + "learning_rate": 1.1238685512997726e-05, + "loss": 1.9056, + "step": 15653000 + }, + { + "epoch": 77.55, + "learning_rate": 1.1237446926571643e-05, + "loss": 1.863, + "step": 15653500 + }, + { + "epoch": 77.56, + "learning_rate": 1.123620834014556e-05, + "loss": 1.8674, + "step": 15654000 + }, + { + "epoch": 77.56, + "learning_rate": 1.1234969753719475e-05, + "loss": 1.8811, + "step": 15654500 + }, + { + "epoch": 77.56, + "learning_rate": 1.1233731167293392e-05, + "loss": 1.8661, + "step": 15655000 + }, + { + "epoch": 77.56, + "learning_rate": 1.123249505804016e-05, + "loss": 1.8805, + "step": 15655500 + }, + { + "epoch": 77.57, + "learning_rate": 1.1231256471614077e-05, + "loss": 1.8841, + "step": 15656000 + }, + { + "epoch": 77.57, + "learning_rate": 1.1230020362360845e-05, + "loss": 1.8438, + "step": 15656500 + }, + { + "epoch": 77.57, + "learning_rate": 1.1228781775934761e-05, + "loss": 1.8691, + "step": 15657000 + }, + { + "epoch": 77.57, + "learning_rate": 1.1227543189508678e-05, + "loss": 1.8676, + "step": 15657500 + }, + { + "epoch": 77.58, + "learning_rate": 1.1226304603082595e-05, + "loss": 1.8877, + "step": 15658000 + }, + { + "epoch": 77.58, + "learning_rate": 1.122506601665651e-05, + "loss": 1.8912, + "step": 15658500 + }, + { + "epoch": 77.58, + "learning_rate": 1.1223827430230427e-05, + "loss": 1.8743, + "step": 15659000 + }, + { + "epoch": 77.58, + "learning_rate": 1.1222588843804344e-05, + "loss": 1.8737, + "step": 15659500 + }, + { + "epoch": 77.59, + "learning_rate": 1.122135025737826e-05, + "loss": 1.8738, + "step": 15660000 + }, + { + "epoch": 77.59, + "learning_rate": 1.1220111670952175e-05, + "loss": 1.8704, + "step": 15660500 + }, + { + "epoch": 77.59, + "learning_rate": 1.1218873084526092e-05, + "loss": 1.8711, + "step": 15661000 + }, + { + "epoch": 77.59, + "learning_rate": 1.121763697527286e-05, + "loss": 1.8768, + "step": 15661500 + }, + { + "epoch": 77.59, + "learning_rate": 1.1216398388846778e-05, + "loss": 1.8914, + "step": 15662000 + }, + { + "epoch": 77.6, + "learning_rate": 1.1215162279593545e-05, + "loss": 1.9019, + "step": 15662500 + }, + { + "epoch": 77.6, + "learning_rate": 1.1213923693167462e-05, + "loss": 1.8763, + "step": 15663000 + }, + { + "epoch": 77.6, + "learning_rate": 1.1212685106741378e-05, + "loss": 1.879, + "step": 15663500 + }, + { + "epoch": 77.6, + "learning_rate": 1.1211446520315295e-05, + "loss": 1.8847, + "step": 15664000 + }, + { + "epoch": 77.61, + "learning_rate": 1.1210207933889212e-05, + "loss": 1.8728, + "step": 15664500 + }, + { + "epoch": 77.61, + "learning_rate": 1.1208969347463128e-05, + "loss": 1.8497, + "step": 15665000 + }, + { + "epoch": 77.61, + "learning_rate": 1.1207730761037045e-05, + "loss": 1.8573, + "step": 15665500 + }, + { + "epoch": 77.61, + "learning_rate": 1.1206492174610961e-05, + "loss": 1.8717, + "step": 15666000 + }, + { + "epoch": 77.62, + "learning_rate": 1.1205253588184877e-05, + "loss": 1.8816, + "step": 15666500 + }, + { + "epoch": 77.62, + "learning_rate": 1.1204015001758794e-05, + "loss": 1.8841, + "step": 15667000 + }, + { + "epoch": 77.62, + "learning_rate": 1.1202778892505562e-05, + "loss": 1.8609, + "step": 15667500 + }, + { + "epoch": 77.62, + "learning_rate": 1.1201540306079478e-05, + "loss": 1.8731, + "step": 15668000 + }, + { + "epoch": 77.63, + "learning_rate": 1.1200301719653395e-05, + "loss": 1.8701, + "step": 15668500 + }, + { + "epoch": 77.63, + "learning_rate": 1.1199063133227312e-05, + "loss": 1.8689, + "step": 15669000 + }, + { + "epoch": 77.63, + "learning_rate": 1.1197824546801228e-05, + "loss": 1.8675, + "step": 15669500 + }, + { + "epoch": 77.63, + "learning_rate": 1.1196585960375144e-05, + "loss": 1.8618, + "step": 15670000 + }, + { + "epoch": 77.64, + "learning_rate": 1.1195347373949059e-05, + "loss": 1.8859, + "step": 15670500 + }, + { + "epoch": 77.64, + "learning_rate": 1.1194108787522976e-05, + "loss": 1.8653, + "step": 15671000 + }, + { + "epoch": 77.64, + "learning_rate": 1.1192872678269745e-05, + "loss": 1.8843, + "step": 15671500 + }, + { + "epoch": 77.64, + "learning_rate": 1.1191636569016513e-05, + "loss": 1.8708, + "step": 15672000 + }, + { + "epoch": 77.65, + "learning_rate": 1.119039798259043e-05, + "loss": 1.898, + "step": 15672500 + }, + { + "epoch": 77.65, + "learning_rate": 1.1189159396164346e-05, + "loss": 1.8883, + "step": 15673000 + }, + { + "epoch": 77.65, + "learning_rate": 1.1187920809738263e-05, + "loss": 1.87, + "step": 15673500 + }, + { + "epoch": 77.65, + "learning_rate": 1.1186682223312178e-05, + "loss": 1.8679, + "step": 15674000 + }, + { + "epoch": 77.66, + "learning_rate": 1.1185443636886095e-05, + "loss": 1.8717, + "step": 15674500 + }, + { + "epoch": 77.66, + "learning_rate": 1.1184205050460012e-05, + "loss": 1.8555, + "step": 15675000 + }, + { + "epoch": 77.66, + "learning_rate": 1.1182966464033929e-05, + "loss": 1.8615, + "step": 15675500 + }, + { + "epoch": 77.66, + "learning_rate": 1.1181727877607845e-05, + "loss": 1.8679, + "step": 15676000 + }, + { + "epoch": 77.67, + "learning_rate": 1.118048929118176e-05, + "loss": 1.8746, + "step": 15676500 + }, + { + "epoch": 77.67, + "learning_rate": 1.1179250704755678e-05, + "loss": 1.9047, + "step": 15677000 + }, + { + "epoch": 77.67, + "learning_rate": 1.1178012118329593e-05, + "loss": 1.8694, + "step": 15677500 + }, + { + "epoch": 77.67, + "learning_rate": 1.117677353190351e-05, + "loss": 1.8666, + "step": 15678000 + }, + { + "epoch": 77.68, + "learning_rate": 1.1175534945477425e-05, + "loss": 1.8587, + "step": 15678500 + }, + { + "epoch": 77.68, + "learning_rate": 1.1174298836224196e-05, + "loss": 1.875, + "step": 15679000 + }, + { + "epoch": 77.68, + "learning_rate": 1.1173062726970963e-05, + "loss": 1.8774, + "step": 15679500 + }, + { + "epoch": 77.68, + "learning_rate": 1.117182414054488e-05, + "loss": 1.8691, + "step": 15680000 + }, + { + "epoch": 77.69, + "learning_rate": 1.1170585554118795e-05, + "loss": 1.8561, + "step": 15680500 + }, + { + "epoch": 77.69, + "learning_rate": 1.1169346967692712e-05, + "loss": 1.8547, + "step": 15681000 + }, + { + "epoch": 77.69, + "learning_rate": 1.1168108381266629e-05, + "loss": 1.8793, + "step": 15681500 + }, + { + "epoch": 77.69, + "learning_rate": 1.1166872272013397e-05, + "loss": 1.8786, + "step": 15682000 + }, + { + "epoch": 77.7, + "learning_rate": 1.1165633685587314e-05, + "loss": 1.889, + "step": 15682500 + }, + { + "epoch": 77.7, + "learning_rate": 1.116439509916123e-05, + "loss": 1.8716, + "step": 15683000 + }, + { + "epoch": 77.7, + "learning_rate": 1.1163156512735145e-05, + "loss": 1.8809, + "step": 15683500 + }, + { + "epoch": 77.7, + "learning_rate": 1.1161917926309062e-05, + "loss": 1.8712, + "step": 15684000 + }, + { + "epoch": 77.71, + "learning_rate": 1.1160684294228684e-05, + "loss": 1.8753, + "step": 15684500 + }, + { + "epoch": 77.71, + "learning_rate": 1.11594457078026e-05, + "loss": 1.8736, + "step": 15685000 + }, + { + "epoch": 77.71, + "learning_rate": 1.1158207121376515e-05, + "loss": 1.8798, + "step": 15685500 + }, + { + "epoch": 77.71, + "learning_rate": 1.1156968534950432e-05, + "loss": 1.8867, + "step": 15686000 + }, + { + "epoch": 77.72, + "learning_rate": 1.1155729948524349e-05, + "loss": 1.8961, + "step": 15686500 + }, + { + "epoch": 77.72, + "learning_rate": 1.1154491362098265e-05, + "loss": 1.8746, + "step": 15687000 + }, + { + "epoch": 77.72, + "learning_rate": 1.115325277567218e-05, + "loss": 1.8836, + "step": 15687500 + }, + { + "epoch": 77.72, + "learning_rate": 1.1152014189246098e-05, + "loss": 1.851, + "step": 15688000 + }, + { + "epoch": 77.73, + "learning_rate": 1.1150775602820015e-05, + "loss": 1.8773, + "step": 15688500 + }, + { + "epoch": 77.73, + "learning_rate": 1.1149537016393931e-05, + "loss": 1.887, + "step": 15689000 + }, + { + "epoch": 77.73, + "learning_rate": 1.1148298429967847e-05, + "loss": 1.8506, + "step": 15689500 + }, + { + "epoch": 77.73, + "learning_rate": 1.1147062320714616e-05, + "loss": 1.8842, + "step": 15690000 + }, + { + "epoch": 77.74, + "learning_rate": 1.114582373428853e-05, + "loss": 1.8771, + "step": 15690500 + }, + { + "epoch": 77.74, + "learning_rate": 1.1144585147862448e-05, + "loss": 1.878, + "step": 15691000 + }, + { + "epoch": 77.74, + "learning_rate": 1.1143349038609216e-05, + "loss": 1.8773, + "step": 15691500 + }, + { + "epoch": 77.74, + "learning_rate": 1.1142110452183133e-05, + "loss": 1.8791, + "step": 15692000 + }, + { + "epoch": 77.75, + "learning_rate": 1.1140871865757049e-05, + "loss": 1.861, + "step": 15692500 + }, + { + "epoch": 77.75, + "learning_rate": 1.1139633279330966e-05, + "loss": 1.8709, + "step": 15693000 + }, + { + "epoch": 77.75, + "learning_rate": 1.1138394692904883e-05, + "loss": 1.8734, + "step": 15693500 + }, + { + "epoch": 77.75, + "learning_rate": 1.1137158583651651e-05, + "loss": 1.8856, + "step": 15694000 + }, + { + "epoch": 77.76, + "learning_rate": 1.1135919997225568e-05, + "loss": 1.8857, + "step": 15694500 + }, + { + "epoch": 77.76, + "learning_rate": 1.1134681410799483e-05, + "loss": 1.8707, + "step": 15695000 + }, + { + "epoch": 77.76, + "learning_rate": 1.11334428243734e-05, + "loss": 1.8789, + "step": 15695500 + }, + { + "epoch": 77.76, + "learning_rate": 1.1132206715120168e-05, + "loss": 1.8615, + "step": 15696000 + }, + { + "epoch": 77.77, + "learning_rate": 1.1130968128694084e-05, + "loss": 1.8657, + "step": 15696500 + }, + { + "epoch": 77.77, + "learning_rate": 1.1129729542268001e-05, + "loss": 1.9037, + "step": 15697000 + }, + { + "epoch": 77.77, + "learning_rate": 1.1128490955841918e-05, + "loss": 1.8697, + "step": 15697500 + }, + { + "epoch": 77.77, + "learning_rate": 1.1127252369415834e-05, + "loss": 1.8616, + "step": 15698000 + }, + { + "epoch": 77.78, + "learning_rate": 1.112601378298975e-05, + "loss": 1.8858, + "step": 15698500 + }, + { + "epoch": 77.78, + "learning_rate": 1.1124777673736518e-05, + "loss": 1.8685, + "step": 15699000 + }, + { + "epoch": 77.78, + "learning_rate": 1.1123539087310435e-05, + "loss": 1.8925, + "step": 15699500 + }, + { + "epoch": 77.78, + "learning_rate": 1.1122300500884351e-05, + "loss": 1.8746, + "step": 15700000 + }, + { + "epoch": 77.79, + "learning_rate": 1.1121061914458268e-05, + "loss": 1.8526, + "step": 15700500 + }, + { + "epoch": 77.79, + "learning_rate": 1.1119823328032184e-05, + "loss": 1.8832, + "step": 15701000 + }, + { + "epoch": 77.79, + "learning_rate": 1.11185847416061e-05, + "loss": 1.876, + "step": 15701500 + }, + { + "epoch": 77.79, + "learning_rate": 1.1117348632352868e-05, + "loss": 1.8782, + "step": 15702000 + }, + { + "epoch": 77.8, + "learning_rate": 1.1116110045926785e-05, + "loss": 1.8874, + "step": 15702500 + }, + { + "epoch": 77.8, + "learning_rate": 1.1114871459500702e-05, + "loss": 1.8641, + "step": 15703000 + }, + { + "epoch": 77.8, + "learning_rate": 1.1113632873074618e-05, + "loss": 1.8784, + "step": 15703500 + }, + { + "epoch": 77.8, + "learning_rate": 1.1112396763821387e-05, + "loss": 1.8775, + "step": 15704000 + }, + { + "epoch": 77.81, + "learning_rate": 1.1111158177395302e-05, + "loss": 1.8497, + "step": 15704500 + }, + { + "epoch": 77.81, + "learning_rate": 1.110991959096922e-05, + "loss": 1.8626, + "step": 15705000 + }, + { + "epoch": 77.81, + "learning_rate": 1.1108681004543135e-05, + "loss": 1.8862, + "step": 15705500 + }, + { + "epoch": 77.81, + "learning_rate": 1.1107442418117052e-05, + "loss": 1.8604, + "step": 15706000 + }, + { + "epoch": 77.82, + "learning_rate": 1.1106203831690968e-05, + "loss": 1.8775, + "step": 15706500 + }, + { + "epoch": 77.82, + "learning_rate": 1.1104965245264885e-05, + "loss": 1.8732, + "step": 15707000 + }, + { + "epoch": 77.82, + "learning_rate": 1.11037266588388e-05, + "loss": 1.8673, + "step": 15707500 + }, + { + "epoch": 77.82, + "learning_rate": 1.1102488072412718e-05, + "loss": 1.8931, + "step": 15708000 + }, + { + "epoch": 77.83, + "learning_rate": 1.1101251963159485e-05, + "loss": 1.8391, + "step": 15708500 + }, + { + "epoch": 77.83, + "learning_rate": 1.1100013376733402e-05, + "loss": 1.8714, + "step": 15709000 + }, + { + "epoch": 77.83, + "learning_rate": 1.1098774790307319e-05, + "loss": 1.8563, + "step": 15709500 + }, + { + "epoch": 77.83, + "learning_rate": 1.1097536203881235e-05, + "loss": 1.8889, + "step": 15710000 + }, + { + "epoch": 77.84, + "learning_rate": 1.109629761745515e-05, + "loss": 1.8565, + "step": 15710500 + }, + { + "epoch": 77.84, + "learning_rate": 1.1095059031029068e-05, + "loss": 1.8917, + "step": 15711000 + }, + { + "epoch": 77.84, + "learning_rate": 1.1093820444602985e-05, + "loss": 1.8682, + "step": 15711500 + }, + { + "epoch": 77.84, + "learning_rate": 1.1092581858176902e-05, + "loss": 1.8824, + "step": 15712000 + }, + { + "epoch": 77.85, + "learning_rate": 1.1091343271750817e-05, + "loss": 1.8918, + "step": 15712500 + }, + { + "epoch": 77.85, + "learning_rate": 1.1090104685324734e-05, + "loss": 1.8594, + "step": 15713000 + }, + { + "epoch": 77.85, + "learning_rate": 1.1088866098898649e-05, + "loss": 1.8717, + "step": 15713500 + }, + { + "epoch": 77.85, + "learning_rate": 1.1087629989645418e-05, + "loss": 1.8716, + "step": 15714000 + }, + { + "epoch": 77.86, + "learning_rate": 1.1086391403219335e-05, + "loss": 1.8778, + "step": 15714500 + }, + { + "epoch": 77.86, + "learning_rate": 1.1085152816793252e-05, + "loss": 1.8789, + "step": 15715000 + }, + { + "epoch": 77.86, + "learning_rate": 1.1083914230367167e-05, + "loss": 1.8761, + "step": 15715500 + }, + { + "epoch": 77.86, + "learning_rate": 1.1082675643941084e-05, + "loss": 1.864, + "step": 15716000 + }, + { + "epoch": 77.86, + "learning_rate": 1.1081437057514999e-05, + "loss": 1.9065, + "step": 15716500 + }, + { + "epoch": 77.87, + "learning_rate": 1.1080200948261768e-05, + "loss": 1.9095, + "step": 15717000 + }, + { + "epoch": 77.87, + "learning_rate": 1.1078962361835685e-05, + "loss": 1.8912, + "step": 15717500 + }, + { + "epoch": 77.87, + "learning_rate": 1.1077723775409602e-05, + "loss": 1.8926, + "step": 15718000 + }, + { + "epoch": 77.87, + "learning_rate": 1.107648766615637e-05, + "loss": 1.8869, + "step": 15718500 + }, + { + "epoch": 77.88, + "learning_rate": 1.1075249079730286e-05, + "loss": 1.8983, + "step": 15719000 + }, + { + "epoch": 77.88, + "learning_rate": 1.1074010493304203e-05, + "loss": 1.8664, + "step": 15719500 + }, + { + "epoch": 77.88, + "learning_rate": 1.1072771906878118e-05, + "loss": 1.8946, + "step": 15720000 + }, + { + "epoch": 77.88, + "learning_rate": 1.1071533320452035e-05, + "loss": 1.901, + "step": 15720500 + }, + { + "epoch": 77.89, + "learning_rate": 1.1070294734025952e-05, + "loss": 1.8707, + "step": 15721000 + }, + { + "epoch": 77.89, + "learning_rate": 1.1069056147599869e-05, + "loss": 1.8869, + "step": 15721500 + }, + { + "epoch": 77.89, + "learning_rate": 1.1067817561173784e-05, + "loss": 1.8488, + "step": 15722000 + }, + { + "epoch": 77.89, + "learning_rate": 1.1066581451920553e-05, + "loss": 1.8687, + "step": 15722500 + }, + { + "epoch": 77.9, + "learning_rate": 1.1065342865494468e-05, + "loss": 1.8713, + "step": 15723000 + }, + { + "epoch": 77.9, + "learning_rate": 1.1064104279068385e-05, + "loss": 1.8776, + "step": 15723500 + }, + { + "epoch": 77.9, + "learning_rate": 1.1062865692642302e-05, + "loss": 1.8622, + "step": 15724000 + }, + { + "epoch": 77.9, + "learning_rate": 1.1061627106216219e-05, + "loss": 1.8722, + "step": 15724500 + }, + { + "epoch": 77.91, + "learning_rate": 1.1060388519790134e-05, + "loss": 1.8823, + "step": 15725000 + }, + { + "epoch": 77.91, + "learning_rate": 1.105914993336405e-05, + "loss": 1.89, + "step": 15725500 + }, + { + "epoch": 77.91, + "learning_rate": 1.1057911346937968e-05, + "loss": 1.8688, + "step": 15726000 + }, + { + "epoch": 77.91, + "learning_rate": 1.1056672760511885e-05, + "loss": 1.8817, + "step": 15726500 + }, + { + "epoch": 77.92, + "learning_rate": 1.10554341740858e-05, + "loss": 1.8652, + "step": 15727000 + }, + { + "epoch": 77.92, + "learning_rate": 1.1054195587659715e-05, + "loss": 1.8648, + "step": 15727500 + }, + { + "epoch": 77.92, + "learning_rate": 1.1052959478406484e-05, + "loss": 1.8813, + "step": 15728000 + }, + { + "epoch": 77.92, + "learning_rate": 1.1051720891980401e-05, + "loss": 1.8568, + "step": 15728500 + }, + { + "epoch": 77.93, + "learning_rate": 1.105048478272717e-05, + "loss": 1.8871, + "step": 15729000 + }, + { + "epoch": 77.93, + "learning_rate": 1.1049246196301085e-05, + "loss": 1.8738, + "step": 15729500 + }, + { + "epoch": 77.93, + "learning_rate": 1.1048007609875002e-05, + "loss": 1.8776, + "step": 15730000 + }, + { + "epoch": 77.93, + "learning_rate": 1.1046769023448919e-05, + "loss": 1.8897, + "step": 15730500 + }, + { + "epoch": 77.94, + "learning_rate": 1.1045530437022836e-05, + "loss": 1.8658, + "step": 15731000 + }, + { + "epoch": 77.94, + "learning_rate": 1.1044291850596751e-05, + "loss": 1.8766, + "step": 15731500 + }, + { + "epoch": 77.94, + "learning_rate": 1.1043053264170668e-05, + "loss": 1.894, + "step": 15732000 + }, + { + "epoch": 77.94, + "learning_rate": 1.1041814677744585e-05, + "loss": 1.8813, + "step": 15732500 + }, + { + "epoch": 77.95, + "learning_rate": 1.1040576091318502e-05, + "loss": 1.8552, + "step": 15733000 + }, + { + "epoch": 77.95, + "learning_rate": 1.1039337504892417e-05, + "loss": 1.895, + "step": 15733500 + }, + { + "epoch": 77.95, + "learning_rate": 1.1038098918466332e-05, + "loss": 1.9002, + "step": 15734000 + }, + { + "epoch": 77.95, + "learning_rate": 1.1036860332040249e-05, + "loss": 1.8651, + "step": 15734500 + }, + { + "epoch": 77.96, + "learning_rate": 1.1035621745614166e-05, + "loss": 1.8914, + "step": 15735000 + }, + { + "epoch": 77.96, + "learning_rate": 1.1034385636360935e-05, + "loss": 1.9016, + "step": 15735500 + }, + { + "epoch": 77.96, + "learning_rate": 1.1033147049934852e-05, + "loss": 1.8946, + "step": 15736000 + }, + { + "epoch": 77.96, + "learning_rate": 1.1031908463508767e-05, + "loss": 1.8813, + "step": 15736500 + }, + { + "epoch": 77.97, + "learning_rate": 1.1030669877082684e-05, + "loss": 1.8784, + "step": 15737000 + }, + { + "epoch": 77.97, + "learning_rate": 1.10294312906566e-05, + "loss": 1.9147, + "step": 15737500 + }, + { + "epoch": 77.97, + "learning_rate": 1.1028195181403368e-05, + "loss": 1.8839, + "step": 15738000 + }, + { + "epoch": 77.97, + "learning_rate": 1.1026959072150137e-05, + "loss": 1.8892, + "step": 15738500 + }, + { + "epoch": 77.98, + "learning_rate": 1.1025720485724054e-05, + "loss": 1.8793, + "step": 15739000 + }, + { + "epoch": 77.98, + "learning_rate": 1.1024481899297969e-05, + "loss": 1.8695, + "step": 15739500 + }, + { + "epoch": 77.98, + "learning_rate": 1.1023243312871886e-05, + "loss": 1.8919, + "step": 15740000 + }, + { + "epoch": 77.98, + "learning_rate": 1.1022004726445801e-05, + "loss": 1.8631, + "step": 15740500 + }, + { + "epoch": 77.99, + "learning_rate": 1.1020766140019718e-05, + "loss": 1.8748, + "step": 15741000 + }, + { + "epoch": 77.99, + "learning_rate": 1.1019527553593635e-05, + "loss": 1.8925, + "step": 15741500 + }, + { + "epoch": 77.99, + "learning_rate": 1.1018288967167552e-05, + "loss": 1.8819, + "step": 15742000 + }, + { + "epoch": 77.99, + "learning_rate": 1.101705285791432e-05, + "loss": 1.8878, + "step": 15742500 + }, + { + "epoch": 78.0, + "learning_rate": 1.1015816748661088e-05, + "loss": 1.8768, + "step": 15743000 + }, + { + "epoch": 78.0, + "learning_rate": 1.1014578162235005e-05, + "loss": 1.8954, + "step": 15743500 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.680015952696548, + "eval_accuracy_mlm": 0.6405002962105025, + "eval_accuracy_nsp": 0.8662883051784797, + "eval_loss": 2.299572467803955, + "eval_runtime": 146.8596, + "eval_samples_per_second": 1736.073, + "eval_steps_per_second": 72.341, + "step": 15743754 + }, + { + "epoch": 78.0, + "learning_rate": 1.1013339575808922e-05, + "loss": 1.8868, + "step": 15744000 + }, + { + "epoch": 78.0, + "learning_rate": 1.1012100989382839e-05, + "loss": 1.8654, + "step": 15744500 + }, + { + "epoch": 78.01, + "learning_rate": 1.1010862402956754e-05, + "loss": 1.8859, + "step": 15745000 + }, + { + "epoch": 78.01, + "learning_rate": 1.100962381653067e-05, + "loss": 1.8535, + "step": 15745500 + }, + { + "epoch": 78.01, + "learning_rate": 1.1008387707277438e-05, + "loss": 1.833, + "step": 15746000 + }, + { + "epoch": 78.01, + "learning_rate": 1.1007149120851355e-05, + "loss": 1.8756, + "step": 15746500 + }, + { + "epoch": 78.02, + "learning_rate": 1.1005910534425272e-05, + "loss": 1.8535, + "step": 15747000 + }, + { + "epoch": 78.02, + "learning_rate": 1.1004671947999189e-05, + "loss": 1.8642, + "step": 15747500 + }, + { + "epoch": 78.02, + "learning_rate": 1.1003433361573104e-05, + "loss": 1.8497, + "step": 15748000 + }, + { + "epoch": 78.02, + "learning_rate": 1.1002194775147021e-05, + "loss": 1.8574, + "step": 15748500 + }, + { + "epoch": 78.03, + "learning_rate": 1.1000956188720938e-05, + "loss": 1.871, + "step": 15749000 + }, + { + "epoch": 78.03, + "learning_rate": 1.0999720079467705e-05, + "loss": 1.8629, + "step": 15749500 + }, + { + "epoch": 78.03, + "learning_rate": 1.0998481493041622e-05, + "loss": 1.874, + "step": 15750000 + }, + { + "epoch": 78.03, + "learning_rate": 1.099724538378839e-05, + "loss": 1.851, + "step": 15750500 + }, + { + "epoch": 78.04, + "learning_rate": 1.0996006797362308e-05, + "loss": 1.8463, + "step": 15751000 + }, + { + "epoch": 78.04, + "learning_rate": 1.0994768210936225e-05, + "loss": 1.8624, + "step": 15751500 + }, + { + "epoch": 78.04, + "learning_rate": 1.099352962451014e-05, + "loss": 1.8571, + "step": 15752000 + }, + { + "epoch": 78.04, + "learning_rate": 1.0992291038084055e-05, + "loss": 1.8759, + "step": 15752500 + }, + { + "epoch": 78.05, + "learning_rate": 1.0991052451657972e-05, + "loss": 1.8849, + "step": 15753000 + }, + { + "epoch": 78.05, + "learning_rate": 1.098981634240474e-05, + "loss": 1.8516, + "step": 15753500 + }, + { + "epoch": 78.05, + "learning_rate": 1.0988577755978658e-05, + "loss": 1.8534, + "step": 15754000 + }, + { + "epoch": 78.05, + "learning_rate": 1.0987339169552575e-05, + "loss": 1.8716, + "step": 15754500 + }, + { + "epoch": 78.06, + "learning_rate": 1.098610058312649e-05, + "loss": 1.8509, + "step": 15755000 + }, + { + "epoch": 78.06, + "learning_rate": 1.0984861996700407e-05, + "loss": 1.8543, + "step": 15755500 + }, + { + "epoch": 78.06, + "learning_rate": 1.0983623410274322e-05, + "loss": 1.8715, + "step": 15756000 + }, + { + "epoch": 78.06, + "learning_rate": 1.098238730102109e-05, + "loss": 1.8806, + "step": 15756500 + }, + { + "epoch": 78.07, + "learning_rate": 1.0981148714595008e-05, + "loss": 1.8725, + "step": 15757000 + }, + { + "epoch": 78.07, + "learning_rate": 1.0979912605341777e-05, + "loss": 1.8665, + "step": 15757500 + }, + { + "epoch": 78.07, + "learning_rate": 1.0978674018915692e-05, + "loss": 1.8509, + "step": 15758000 + }, + { + "epoch": 78.07, + "learning_rate": 1.097743790966246e-05, + "loss": 1.86, + "step": 15758500 + }, + { + "epoch": 78.08, + "learning_rate": 1.0976199323236377e-05, + "loss": 1.8859, + "step": 15759000 + }, + { + "epoch": 78.08, + "learning_rate": 1.0974960736810294e-05, + "loss": 1.8738, + "step": 15759500 + }, + { + "epoch": 78.08, + "learning_rate": 1.097372215038421e-05, + "loss": 1.8671, + "step": 15760000 + }, + { + "epoch": 78.08, + "learning_rate": 1.0972483563958127e-05, + "loss": 1.8736, + "step": 15760500 + }, + { + "epoch": 78.09, + "learning_rate": 1.0971244977532044e-05, + "loss": 1.854, + "step": 15761000 + }, + { + "epoch": 78.09, + "learning_rate": 1.0970006391105959e-05, + "loss": 1.8616, + "step": 15761500 + }, + { + "epoch": 78.09, + "learning_rate": 1.0968767804679876e-05, + "loss": 1.8532, + "step": 15762000 + }, + { + "epoch": 78.09, + "learning_rate": 1.0967529218253791e-05, + "loss": 1.845, + "step": 15762500 + }, + { + "epoch": 78.1, + "learning_rate": 1.0966290631827708e-05, + "loss": 1.8945, + "step": 15763000 + }, + { + "epoch": 78.1, + "learning_rate": 1.0965052045401625e-05, + "loss": 1.8747, + "step": 15763500 + }, + { + "epoch": 78.1, + "learning_rate": 1.0963813458975542e-05, + "loss": 1.8622, + "step": 15764000 + }, + { + "epoch": 78.1, + "learning_rate": 1.0962574872549457e-05, + "loss": 1.8702, + "step": 15764500 + }, + { + "epoch": 78.11, + "learning_rate": 1.0961336286123374e-05, + "loss": 1.8746, + "step": 15765000 + }, + { + "epoch": 78.11, + "learning_rate": 1.0960100176870141e-05, + "loss": 1.851, + "step": 15765500 + }, + { + "epoch": 78.11, + "learning_rate": 1.0958861590444058e-05, + "loss": 1.8814, + "step": 15766000 + }, + { + "epoch": 78.11, + "learning_rate": 1.0957623004017975e-05, + "loss": 1.8603, + "step": 15766500 + }, + { + "epoch": 78.12, + "learning_rate": 1.0956384417591892e-05, + "loss": 1.8517, + "step": 15767000 + }, + { + "epoch": 78.12, + "learning_rate": 1.0955145831165807e-05, + "loss": 1.8795, + "step": 15767500 + }, + { + "epoch": 78.12, + "learning_rate": 1.0953909721912577e-05, + "loss": 1.8558, + "step": 15768000 + }, + { + "epoch": 78.12, + "learning_rate": 1.0952671135486493e-05, + "loss": 1.8635, + "step": 15768500 + }, + { + "epoch": 78.13, + "learning_rate": 1.0951432549060408e-05, + "loss": 1.8511, + "step": 15769000 + }, + { + "epoch": 78.13, + "learning_rate": 1.0950193962634325e-05, + "loss": 1.8675, + "step": 15769500 + }, + { + "epoch": 78.13, + "learning_rate": 1.0948955376208242e-05, + "loss": 1.856, + "step": 15770000 + }, + { + "epoch": 78.13, + "learning_rate": 1.0947716789782157e-05, + "loss": 1.8549, + "step": 15770500 + }, + { + "epoch": 78.13, + "learning_rate": 1.0946480680528928e-05, + "loss": 1.8655, + "step": 15771000 + }, + { + "epoch": 78.14, + "learning_rate": 1.0945242094102843e-05, + "loss": 1.8426, + "step": 15771500 + }, + { + "epoch": 78.14, + "learning_rate": 1.0944003507676758e-05, + "loss": 1.868, + "step": 15772000 + }, + { + "epoch": 78.14, + "learning_rate": 1.0942764921250675e-05, + "loss": 1.8734, + "step": 15772500 + }, + { + "epoch": 78.14, + "learning_rate": 1.0941526334824592e-05, + "loss": 1.8745, + "step": 15773000 + }, + { + "epoch": 78.15, + "learning_rate": 1.0940287748398509e-05, + "loss": 1.8753, + "step": 15773500 + }, + { + "epoch": 78.15, + "learning_rate": 1.0939049161972424e-05, + "loss": 1.8554, + "step": 15774000 + }, + { + "epoch": 78.15, + "learning_rate": 1.0937810575546341e-05, + "loss": 1.8567, + "step": 15774500 + }, + { + "epoch": 78.15, + "learning_rate": 1.0936571989120258e-05, + "loss": 1.8276, + "step": 15775000 + }, + { + "epoch": 78.16, + "learning_rate": 1.0935333402694175e-05, + "loss": 1.8678, + "step": 15775500 + }, + { + "epoch": 78.16, + "learning_rate": 1.093409481626809e-05, + "loss": 1.8593, + "step": 15776000 + }, + { + "epoch": 78.16, + "learning_rate": 1.0932858707014859e-05, + "loss": 1.8462, + "step": 15776500 + }, + { + "epoch": 78.16, + "learning_rate": 1.0931620120588774e-05, + "loss": 1.8509, + "step": 15777000 + }, + { + "epoch": 78.17, + "learning_rate": 1.0930381534162691e-05, + "loss": 1.8912, + "step": 15777500 + }, + { + "epoch": 78.17, + "learning_rate": 1.092914542490946e-05, + "loss": 1.8817, + "step": 15778000 + }, + { + "epoch": 78.17, + "learning_rate": 1.0927906838483377e-05, + "loss": 1.879, + "step": 15778500 + }, + { + "epoch": 78.17, + "learning_rate": 1.0926668252057292e-05, + "loss": 1.8546, + "step": 15779000 + }, + { + "epoch": 78.18, + "learning_rate": 1.0925429665631209e-05, + "loss": 1.8513, + "step": 15779500 + }, + { + "epoch": 78.18, + "learning_rate": 1.0924193556377978e-05, + "loss": 1.8516, + "step": 15780000 + }, + { + "epoch": 78.18, + "learning_rate": 1.0922954969951895e-05, + "loss": 1.8701, + "step": 15780500 + }, + { + "epoch": 78.18, + "learning_rate": 1.092171638352581e-05, + "loss": 1.8699, + "step": 15781000 + }, + { + "epoch": 78.19, + "learning_rate": 1.0920477797099727e-05, + "loss": 1.8588, + "step": 15781500 + }, + { + "epoch": 78.19, + "learning_rate": 1.0919239210673644e-05, + "loss": 1.856, + "step": 15782000 + }, + { + "epoch": 78.19, + "learning_rate": 1.0918000624247559e-05, + "loss": 1.8864, + "step": 15782500 + }, + { + "epoch": 78.19, + "learning_rate": 1.0916762037821474e-05, + "loss": 1.8613, + "step": 15783000 + }, + { + "epoch": 78.2, + "learning_rate": 1.0915523451395391e-05, + "loss": 1.8594, + "step": 15783500 + }, + { + "epoch": 78.2, + "learning_rate": 1.091428734214216e-05, + "loss": 1.8545, + "step": 15784000 + }, + { + "epoch": 78.2, + "learning_rate": 1.0913048755716077e-05, + "loss": 1.8666, + "step": 15784500 + }, + { + "epoch": 78.2, + "learning_rate": 1.0911810169289994e-05, + "loss": 1.8721, + "step": 15785000 + }, + { + "epoch": 78.21, + "learning_rate": 1.091057158286391e-05, + "loss": 1.8695, + "step": 15785500 + }, + { + "epoch": 78.21, + "learning_rate": 1.0909332996437826e-05, + "loss": 1.8816, + "step": 15786000 + }, + { + "epoch": 78.21, + "learning_rate": 1.0908094410011741e-05, + "loss": 1.8621, + "step": 15786500 + }, + { + "epoch": 78.21, + "learning_rate": 1.0906855823585658e-05, + "loss": 1.8529, + "step": 15787000 + }, + { + "epoch": 78.22, + "learning_rate": 1.0905617237159575e-05, + "loss": 1.8729, + "step": 15787500 + }, + { + "epoch": 78.22, + "learning_rate": 1.0904378650733492e-05, + "loss": 1.8399, + "step": 15788000 + }, + { + "epoch": 78.22, + "learning_rate": 1.090314254148026e-05, + "loss": 1.8661, + "step": 15788500 + }, + { + "epoch": 78.22, + "learning_rate": 1.0901903955054176e-05, + "loss": 1.8796, + "step": 15789000 + }, + { + "epoch": 78.23, + "learning_rate": 1.0900665368628093e-05, + "loss": 1.8487, + "step": 15789500 + }, + { + "epoch": 78.23, + "learning_rate": 1.0899426782202008e-05, + "loss": 1.8609, + "step": 15790000 + }, + { + "epoch": 78.23, + "learning_rate": 1.0898190672948777e-05, + "loss": 1.8499, + "step": 15790500 + }, + { + "epoch": 78.23, + "learning_rate": 1.0896952086522694e-05, + "loss": 1.8548, + "step": 15791000 + }, + { + "epoch": 78.24, + "learning_rate": 1.0895713500096611e-05, + "loss": 1.8996, + "step": 15791500 + }, + { + "epoch": 78.24, + "learning_rate": 1.0894474913670528e-05, + "loss": 1.852, + "step": 15792000 + }, + { + "epoch": 78.24, + "learning_rate": 1.0893236327244443e-05, + "loss": 1.8624, + "step": 15792500 + }, + { + "epoch": 78.24, + "learning_rate": 1.0891997740818358e-05, + "loss": 1.878, + "step": 15793000 + }, + { + "epoch": 78.25, + "learning_rate": 1.0890759154392275e-05, + "loss": 1.8669, + "step": 15793500 + }, + { + "epoch": 78.25, + "learning_rate": 1.0889520567966192e-05, + "loss": 1.8608, + "step": 15794000 + }, + { + "epoch": 78.25, + "learning_rate": 1.0888281981540109e-05, + "loss": 1.8828, + "step": 15794500 + }, + { + "epoch": 78.25, + "learning_rate": 1.0887043395114024e-05, + "loss": 1.879, + "step": 15795000 + }, + { + "epoch": 78.26, + "learning_rate": 1.0885804808687941e-05, + "loss": 1.8798, + "step": 15795500 + }, + { + "epoch": 78.26, + "learning_rate": 1.0884566222261858e-05, + "loss": 1.8702, + "step": 15796000 + }, + { + "epoch": 78.26, + "learning_rate": 1.0883330113008625e-05, + "loss": 1.8671, + "step": 15796500 + }, + { + "epoch": 78.26, + "learning_rate": 1.0882094003755394e-05, + "loss": 1.8688, + "step": 15797000 + }, + { + "epoch": 78.27, + "learning_rate": 1.0880857894502163e-05, + "loss": 1.8688, + "step": 15797500 + }, + { + "epoch": 78.27, + "learning_rate": 1.087961930807608e-05, + "loss": 1.8712, + "step": 15798000 + }, + { + "epoch": 78.27, + "learning_rate": 1.0878380721649995e-05, + "loss": 1.8609, + "step": 15798500 + }, + { + "epoch": 78.27, + "learning_rate": 1.0877142135223912e-05, + "loss": 1.8564, + "step": 15799000 + }, + { + "epoch": 78.28, + "learning_rate": 1.0875903548797829e-05, + "loss": 1.8844, + "step": 15799500 + }, + { + "epoch": 78.28, + "learning_rate": 1.0874667439544598e-05, + "loss": 1.8467, + "step": 15800000 + }, + { + "epoch": 78.28, + "learning_rate": 1.0873428853118515e-05, + "loss": 1.8655, + "step": 15800500 + }, + { + "epoch": 78.28, + "learning_rate": 1.087219026669243e-05, + "loss": 1.8817, + "step": 15801000 + }, + { + "epoch": 78.29, + "learning_rate": 1.0870954157439197e-05, + "loss": 1.8568, + "step": 15801500 + }, + { + "epoch": 78.29, + "learning_rate": 1.0869715571013114e-05, + "loss": 1.8451, + "step": 15802000 + }, + { + "epoch": 78.29, + "learning_rate": 1.086847698458703e-05, + "loss": 1.8604, + "step": 15802500 + }, + { + "epoch": 78.29, + "learning_rate": 1.0867238398160948e-05, + "loss": 1.8672, + "step": 15803000 + }, + { + "epoch": 78.3, + "learning_rate": 1.0865999811734865e-05, + "loss": 1.8936, + "step": 15803500 + }, + { + "epoch": 78.3, + "learning_rate": 1.086476122530878e-05, + "loss": 1.8595, + "step": 15804000 + }, + { + "epoch": 78.3, + "learning_rate": 1.0863522638882697e-05, + "loss": 1.8774, + "step": 15804500 + }, + { + "epoch": 78.3, + "learning_rate": 1.0862284052456614e-05, + "loss": 1.8647, + "step": 15805000 + }, + { + "epoch": 78.31, + "learning_rate": 1.0861047943203381e-05, + "loss": 1.8741, + "step": 15805500 + }, + { + "epoch": 78.31, + "learning_rate": 1.0859809356777298e-05, + "loss": 1.8707, + "step": 15806000 + }, + { + "epoch": 78.31, + "learning_rate": 1.0858570770351215e-05, + "loss": 1.8692, + "step": 15806500 + }, + { + "epoch": 78.31, + "learning_rate": 1.085733218392513e-05, + "loss": 1.8657, + "step": 15807000 + }, + { + "epoch": 78.32, + "learning_rate": 1.0856093597499047e-05, + "loss": 1.8547, + "step": 15807500 + }, + { + "epoch": 78.32, + "learning_rate": 1.0854855011072964e-05, + "loss": 1.8708, + "step": 15808000 + }, + { + "epoch": 78.32, + "learning_rate": 1.0853618901819731e-05, + "loss": 1.8464, + "step": 15808500 + }, + { + "epoch": 78.32, + "learning_rate": 1.08523827925665e-05, + "loss": 1.852, + "step": 15809000 + }, + { + "epoch": 78.33, + "learning_rate": 1.0851146683313269e-05, + "loss": 1.8875, + "step": 15809500 + }, + { + "epoch": 78.33, + "learning_rate": 1.0849908096887186e-05, + "loss": 1.8703, + "step": 15810000 + }, + { + "epoch": 78.33, + "learning_rate": 1.08486695104611e-05, + "loss": 1.8744, + "step": 15810500 + }, + { + "epoch": 78.33, + "learning_rate": 1.0847430924035018e-05, + "loss": 1.8586, + "step": 15811000 + }, + { + "epoch": 78.34, + "learning_rate": 1.0846192337608935e-05, + "loss": 1.8824, + "step": 15811500 + }, + { + "epoch": 78.34, + "learning_rate": 1.084495375118285e-05, + "loss": 1.8626, + "step": 15812000 + }, + { + "epoch": 78.34, + "learning_rate": 1.0843715164756767e-05, + "loss": 1.8739, + "step": 15812500 + }, + { + "epoch": 78.34, + "learning_rate": 1.0842476578330684e-05, + "loss": 1.8457, + "step": 15813000 + }, + { + "epoch": 78.35, + "learning_rate": 1.08412379919046e-05, + "loss": 1.887, + "step": 15813500 + }, + { + "epoch": 78.35, + "learning_rate": 1.0839999405478516e-05, + "loss": 1.867, + "step": 15814000 + }, + { + "epoch": 78.35, + "learning_rate": 1.0838760819052433e-05, + "loss": 1.8788, + "step": 15814500 + }, + { + "epoch": 78.35, + "learning_rate": 1.0837522232626348e-05, + "loss": 1.873, + "step": 15815000 + }, + { + "epoch": 78.36, + "learning_rate": 1.0836283646200265e-05, + "loss": 1.8562, + "step": 15815500 + }, + { + "epoch": 78.36, + "learning_rate": 1.0835047536947034e-05, + "loss": 1.8597, + "step": 15816000 + }, + { + "epoch": 78.36, + "learning_rate": 1.083380895052095e-05, + "loss": 1.8459, + "step": 15816500 + }, + { + "epoch": 78.36, + "learning_rate": 1.0832570364094868e-05, + "loss": 1.8808, + "step": 15817000 + }, + { + "epoch": 78.37, + "learning_rate": 1.0831331777668783e-05, + "loss": 1.8915, + "step": 15817500 + }, + { + "epoch": 78.37, + "learning_rate": 1.08300931912427e-05, + "loss": 1.8567, + "step": 15818000 + }, + { + "epoch": 78.37, + "learning_rate": 1.0828854604816615e-05, + "loss": 1.8562, + "step": 15818500 + }, + { + "epoch": 78.37, + "learning_rate": 1.0827616018390532e-05, + "loss": 1.8726, + "step": 15819000 + }, + { + "epoch": 78.38, + "learning_rate": 1.0826377431964447e-05, + "loss": 1.8653, + "step": 15819500 + }, + { + "epoch": 78.38, + "learning_rate": 1.0825141322711218e-05, + "loss": 1.8817, + "step": 15820000 + }, + { + "epoch": 78.38, + "learning_rate": 1.0823902736285133e-05, + "loss": 1.8592, + "step": 15820500 + }, + { + "epoch": 78.38, + "learning_rate": 1.082266414985905e-05, + "loss": 1.8659, + "step": 15821000 + }, + { + "epoch": 78.39, + "learning_rate": 1.0821428040605817e-05, + "loss": 1.874, + "step": 15821500 + }, + { + "epoch": 78.39, + "learning_rate": 1.0820189454179734e-05, + "loss": 1.8536, + "step": 15822000 + }, + { + "epoch": 78.39, + "learning_rate": 1.081895086775365e-05, + "loss": 1.8791, + "step": 15822500 + }, + { + "epoch": 78.39, + "learning_rate": 1.0817712281327568e-05, + "loss": 1.873, + "step": 15823000 + }, + { + "epoch": 78.4, + "learning_rate": 1.0816473694901483e-05, + "loss": 1.8685, + "step": 15823500 + }, + { + "epoch": 78.4, + "learning_rate": 1.08152351084754e-05, + "loss": 1.8713, + "step": 15824000 + }, + { + "epoch": 78.4, + "learning_rate": 1.0813996522049317e-05, + "loss": 1.8615, + "step": 15824500 + }, + { + "epoch": 78.4, + "learning_rate": 1.0812757935623232e-05, + "loss": 1.8606, + "step": 15825000 + }, + { + "epoch": 78.4, + "learning_rate": 1.0811521826370001e-05, + "loss": 1.8299, + "step": 15825500 + }, + { + "epoch": 78.41, + "learning_rate": 1.0810283239943918e-05, + "loss": 1.849, + "step": 15826000 + }, + { + "epoch": 78.41, + "learning_rate": 1.0809044653517833e-05, + "loss": 1.8789, + "step": 15826500 + }, + { + "epoch": 78.41, + "learning_rate": 1.080780606709175e-05, + "loss": 1.8719, + "step": 15827000 + }, + { + "epoch": 78.41, + "learning_rate": 1.0806567480665667e-05, + "loss": 1.8848, + "step": 15827500 + }, + { + "epoch": 78.42, + "learning_rate": 1.0805328894239584e-05, + "loss": 1.8595, + "step": 15828000 + }, + { + "epoch": 78.42, + "learning_rate": 1.0804090307813499e-05, + "loss": 1.8925, + "step": 15828500 + }, + { + "epoch": 78.42, + "learning_rate": 1.0802851721387414e-05, + "loss": 1.8892, + "step": 15829000 + }, + { + "epoch": 78.42, + "learning_rate": 1.0801613134961331e-05, + "loss": 1.8659, + "step": 15829500 + }, + { + "epoch": 78.43, + "learning_rate": 1.0800374548535248e-05, + "loss": 1.8633, + "step": 15830000 + }, + { + "epoch": 78.43, + "learning_rate": 1.0799135962109165e-05, + "loss": 1.8658, + "step": 15830500 + }, + { + "epoch": 78.43, + "learning_rate": 1.079789737568308e-05, + "loss": 1.8636, + "step": 15831000 + }, + { + "epoch": 78.43, + "learning_rate": 1.0796658789256997e-05, + "loss": 1.8638, + "step": 15831500 + }, + { + "epoch": 78.44, + "learning_rate": 1.0795422680003766e-05, + "loss": 1.875, + "step": 15832000 + }, + { + "epoch": 78.44, + "learning_rate": 1.0794184093577681e-05, + "loss": 1.8574, + "step": 15832500 + }, + { + "epoch": 78.44, + "learning_rate": 1.0792945507151598e-05, + "loss": 1.847, + "step": 15833000 + }, + { + "epoch": 78.44, + "learning_rate": 1.0791706920725515e-05, + "loss": 1.8477, + "step": 15833500 + }, + { + "epoch": 78.45, + "learning_rate": 1.0790470811472284e-05, + "loss": 1.8651, + "step": 15834000 + }, + { + "epoch": 78.45, + "learning_rate": 1.0789232225046201e-05, + "loss": 1.8477, + "step": 15834500 + }, + { + "epoch": 78.45, + "learning_rate": 1.0787993638620116e-05, + "loss": 1.8638, + "step": 15835000 + }, + { + "epoch": 78.45, + "learning_rate": 1.0786757529366885e-05, + "loss": 1.8481, + "step": 15835500 + }, + { + "epoch": 78.46, + "learning_rate": 1.07855189429408e-05, + "loss": 1.8719, + "step": 15836000 + }, + { + "epoch": 78.46, + "learning_rate": 1.0784280356514717e-05, + "loss": 1.8747, + "step": 15836500 + }, + { + "epoch": 78.46, + "learning_rate": 1.0783041770088634e-05, + "loss": 1.886, + "step": 15837000 + }, + { + "epoch": 78.46, + "learning_rate": 1.0781803183662551e-05, + "loss": 1.877, + "step": 15837500 + }, + { + "epoch": 78.47, + "learning_rate": 1.0780564597236468e-05, + "loss": 1.8709, + "step": 15838000 + }, + { + "epoch": 78.47, + "learning_rate": 1.0779326010810383e-05, + "loss": 1.8547, + "step": 15838500 + }, + { + "epoch": 78.47, + "learning_rate": 1.077808990155715e-05, + "loss": 1.8865, + "step": 15839000 + }, + { + "epoch": 78.47, + "learning_rate": 1.0776851315131067e-05, + "loss": 1.8442, + "step": 15839500 + }, + { + "epoch": 78.48, + "learning_rate": 1.0775612728704984e-05, + "loss": 1.8636, + "step": 15840000 + }, + { + "epoch": 78.48, + "learning_rate": 1.0774374142278901e-05, + "loss": 1.8775, + "step": 15840500 + }, + { + "epoch": 78.48, + "learning_rate": 1.0773135555852818e-05, + "loss": 1.8749, + "step": 15841000 + }, + { + "epoch": 78.48, + "learning_rate": 1.0771896969426733e-05, + "loss": 1.8572, + "step": 15841500 + }, + { + "epoch": 78.49, + "learning_rate": 1.077065838300065e-05, + "loss": 1.8703, + "step": 15842000 + }, + { + "epoch": 78.49, + "learning_rate": 1.0769419796574565e-05, + "loss": 1.8516, + "step": 15842500 + }, + { + "epoch": 78.49, + "learning_rate": 1.0768181210148482e-05, + "loss": 1.8673, + "step": 15843000 + }, + { + "epoch": 78.49, + "learning_rate": 1.0766942623722397e-05, + "loss": 1.8639, + "step": 15843500 + }, + { + "epoch": 78.5, + "learning_rate": 1.0765704037296314e-05, + "loss": 1.8765, + "step": 15844000 + }, + { + "epoch": 78.5, + "learning_rate": 1.0764465450870231e-05, + "loss": 1.8655, + "step": 15844500 + }, + { + "epoch": 78.5, + "learning_rate": 1.0763226864444148e-05, + "loss": 1.8523, + "step": 15845000 + }, + { + "epoch": 78.5, + "learning_rate": 1.0761988278018065e-05, + "loss": 1.8666, + "step": 15845500 + }, + { + "epoch": 78.51, + "learning_rate": 1.0760752168764832e-05, + "loss": 1.8548, + "step": 15846000 + }, + { + "epoch": 78.51, + "learning_rate": 1.0759516059511601e-05, + "loss": 1.854, + "step": 15846500 + }, + { + "epoch": 78.51, + "learning_rate": 1.0758277473085518e-05, + "loss": 1.8719, + "step": 15847000 + }, + { + "epoch": 78.51, + "learning_rate": 1.0757038886659433e-05, + "loss": 1.8623, + "step": 15847500 + }, + { + "epoch": 78.52, + "learning_rate": 1.075580030023335e-05, + "loss": 1.8707, + "step": 15848000 + }, + { + "epoch": 78.52, + "learning_rate": 1.0754561713807267e-05, + "loss": 1.8689, + "step": 15848500 + }, + { + "epoch": 78.52, + "learning_rate": 1.0753323127381184e-05, + "loss": 1.8596, + "step": 15849000 + }, + { + "epoch": 78.52, + "learning_rate": 1.07520845409551e-05, + "loss": 1.8815, + "step": 15849500 + }, + { + "epoch": 78.53, + "learning_rate": 1.0750845954529015e-05, + "loss": 1.8648, + "step": 15850000 + }, + { + "epoch": 78.53, + "learning_rate": 1.0749609845275785e-05, + "loss": 1.864, + "step": 15850500 + }, + { + "epoch": 78.53, + "learning_rate": 1.07483712588497e-05, + "loss": 1.8653, + "step": 15851000 + }, + { + "epoch": 78.53, + "learning_rate": 1.0747132672423617e-05, + "loss": 1.8411, + "step": 15851500 + }, + { + "epoch": 78.54, + "learning_rate": 1.0745894085997534e-05, + "loss": 1.8575, + "step": 15852000 + }, + { + "epoch": 78.54, + "learning_rate": 1.0744657976744301e-05, + "loss": 1.8548, + "step": 15852500 + }, + { + "epoch": 78.54, + "learning_rate": 1.0743419390318218e-05, + "loss": 1.8838, + "step": 15853000 + }, + { + "epoch": 78.54, + "learning_rate": 1.0742180803892135e-05, + "loss": 1.8606, + "step": 15853500 + }, + { + "epoch": 78.55, + "learning_rate": 1.0740944694638904e-05, + "loss": 1.8797, + "step": 15854000 + }, + { + "epoch": 78.55, + "learning_rate": 1.073970610821282e-05, + "loss": 1.86, + "step": 15854500 + }, + { + "epoch": 78.55, + "learning_rate": 1.0738467521786736e-05, + "loss": 1.8518, + "step": 15855000 + }, + { + "epoch": 78.55, + "learning_rate": 1.0737228935360651e-05, + "loss": 1.8939, + "step": 15855500 + }, + { + "epoch": 78.56, + "learning_rate": 1.073599282610742e-05, + "loss": 1.8682, + "step": 15856000 + }, + { + "epoch": 78.56, + "learning_rate": 1.0734754239681337e-05, + "loss": 1.8726, + "step": 15856500 + }, + { + "epoch": 78.56, + "learning_rate": 1.0733515653255254e-05, + "loss": 1.8772, + "step": 15857000 + }, + { + "epoch": 78.56, + "learning_rate": 1.0732277066829171e-05, + "loss": 1.8627, + "step": 15857500 + }, + { + "epoch": 78.57, + "learning_rate": 1.0731038480403086e-05, + "loss": 1.8784, + "step": 15858000 + }, + { + "epoch": 78.57, + "learning_rate": 1.0729799893977003e-05, + "loss": 1.8457, + "step": 15858500 + }, + { + "epoch": 78.57, + "learning_rate": 1.0728561307550918e-05, + "loss": 1.8901, + "step": 15859000 + }, + { + "epoch": 78.57, + "learning_rate": 1.0727322721124835e-05, + "loss": 1.8698, + "step": 15859500 + }, + { + "epoch": 78.58, + "learning_rate": 1.0726089089044456e-05, + "loss": 1.8532, + "step": 15860000 + }, + { + "epoch": 78.58, + "learning_rate": 1.0724850502618373e-05, + "loss": 1.8739, + "step": 15860500 + }, + { + "epoch": 78.58, + "learning_rate": 1.0723611916192288e-05, + "loss": 1.8981, + "step": 15861000 + }, + { + "epoch": 78.58, + "learning_rate": 1.0722373329766205e-05, + "loss": 1.8636, + "step": 15861500 + }, + { + "epoch": 78.59, + "learning_rate": 1.072113474334012e-05, + "loss": 1.8903, + "step": 15862000 + }, + { + "epoch": 78.59, + "learning_rate": 1.0719896156914037e-05, + "loss": 1.8719, + "step": 15862500 + }, + { + "epoch": 78.59, + "learning_rate": 1.0718657570487954e-05, + "loss": 1.8733, + "step": 15863000 + }, + { + "epoch": 78.59, + "learning_rate": 1.0717421461234723e-05, + "loss": 1.8627, + "step": 15863500 + }, + { + "epoch": 78.6, + "learning_rate": 1.071618287480864e-05, + "loss": 1.8794, + "step": 15864000 + }, + { + "epoch": 78.6, + "learning_rate": 1.0714944288382555e-05, + "loss": 1.8738, + "step": 15864500 + }, + { + "epoch": 78.6, + "learning_rate": 1.071370570195647e-05, + "loss": 1.8501, + "step": 15865000 + }, + { + "epoch": 78.6, + "learning_rate": 1.0712467115530387e-05, + "loss": 1.8881, + "step": 15865500 + }, + { + "epoch": 78.61, + "learning_rate": 1.0711228529104304e-05, + "loss": 1.8812, + "step": 15866000 + }, + { + "epoch": 78.61, + "learning_rate": 1.0709989942678221e-05, + "loss": 1.8538, + "step": 15866500 + }, + { + "epoch": 78.61, + "learning_rate": 1.0708751356252138e-05, + "loss": 1.8752, + "step": 15867000 + }, + { + "epoch": 78.61, + "learning_rate": 1.0707515246998905e-05, + "loss": 1.87, + "step": 15867500 + }, + { + "epoch": 78.62, + "learning_rate": 1.0706279137745674e-05, + "loss": 1.8711, + "step": 15868000 + }, + { + "epoch": 78.62, + "learning_rate": 1.0705040551319591e-05, + "loss": 1.8533, + "step": 15868500 + }, + { + "epoch": 78.62, + "learning_rate": 1.0703801964893506e-05, + "loss": 1.8769, + "step": 15869000 + }, + { + "epoch": 78.62, + "learning_rate": 1.0702563378467423e-05, + "loss": 1.8665, + "step": 15869500 + }, + { + "epoch": 78.63, + "learning_rate": 1.070132479204134e-05, + "loss": 1.8744, + "step": 15870000 + }, + { + "epoch": 78.63, + "learning_rate": 1.0700086205615257e-05, + "loss": 1.8731, + "step": 15870500 + }, + { + "epoch": 78.63, + "learning_rate": 1.0698847619189172e-05, + "loss": 1.8772, + "step": 15871000 + }, + { + "epoch": 78.63, + "learning_rate": 1.0697611509935941e-05, + "loss": 1.8763, + "step": 15871500 + }, + { + "epoch": 78.64, + "learning_rate": 1.0696372923509858e-05, + "loss": 1.8782, + "step": 15872000 + }, + { + "epoch": 78.64, + "learning_rate": 1.0695134337083773e-05, + "loss": 1.8816, + "step": 15872500 + }, + { + "epoch": 78.64, + "learning_rate": 1.069389575065769e-05, + "loss": 1.8848, + "step": 15873000 + }, + { + "epoch": 78.64, + "learning_rate": 1.0692657164231607e-05, + "loss": 1.8607, + "step": 15873500 + }, + { + "epoch": 78.65, + "learning_rate": 1.0691418577805524e-05, + "loss": 1.8528, + "step": 15874000 + }, + { + "epoch": 78.65, + "learning_rate": 1.0690179991379439e-05, + "loss": 1.8535, + "step": 15874500 + }, + { + "epoch": 78.65, + "learning_rate": 1.0688941404953354e-05, + "loss": 1.8554, + "step": 15875000 + }, + { + "epoch": 78.65, + "learning_rate": 1.0687702818527271e-05, + "loss": 1.8729, + "step": 15875500 + }, + { + "epoch": 78.66, + "learning_rate": 1.0686464232101188e-05, + "loss": 1.8663, + "step": 15876000 + }, + { + "epoch": 78.66, + "learning_rate": 1.0685225645675103e-05, + "loss": 1.852, + "step": 15876500 + }, + { + "epoch": 78.66, + "learning_rate": 1.068398705924902e-05, + "loss": 1.8357, + "step": 15877000 + }, + { + "epoch": 78.66, + "learning_rate": 1.0682748472822937e-05, + "loss": 1.8834, + "step": 15877500 + }, + { + "epoch": 78.67, + "learning_rate": 1.0681509886396854e-05, + "loss": 1.878, + "step": 15878000 + }, + { + "epoch": 78.67, + "learning_rate": 1.0680271299970771e-05, + "loss": 1.85, + "step": 15878500 + }, + { + "epoch": 78.67, + "learning_rate": 1.0679032713544686e-05, + "loss": 1.8688, + "step": 15879000 + }, + { + "epoch": 78.67, + "learning_rate": 1.0677794127118602e-05, + "loss": 1.8708, + "step": 15879500 + }, + { + "epoch": 78.68, + "learning_rate": 1.067655801786537e-05, + "loss": 1.8781, + "step": 15880000 + }, + { + "epoch": 78.68, + "learning_rate": 1.0675319431439287e-05, + "loss": 1.8674, + "step": 15880500 + }, + { + "epoch": 78.68, + "learning_rate": 1.0674080845013204e-05, + "loss": 1.8747, + "step": 15881000 + }, + { + "epoch": 78.68, + "learning_rate": 1.0672844735759973e-05, + "loss": 1.8743, + "step": 15881500 + }, + { + "epoch": 78.68, + "learning_rate": 1.0671606149333888e-05, + "loss": 1.8575, + "step": 15882000 + }, + { + "epoch": 78.69, + "learning_rate": 1.0670367562907805e-05, + "loss": 1.8898, + "step": 15882500 + }, + { + "epoch": 78.69, + "learning_rate": 1.066912897648172e-05, + "loss": 1.8806, + "step": 15883000 + }, + { + "epoch": 78.69, + "learning_rate": 1.0667890390055637e-05, + "loss": 1.8754, + "step": 15883500 + }, + { + "epoch": 78.69, + "learning_rate": 1.0666651803629554e-05, + "loss": 1.8733, + "step": 15884000 + }, + { + "epoch": 78.7, + "learning_rate": 1.0665413217203471e-05, + "loss": 1.8535, + "step": 15884500 + }, + { + "epoch": 78.7, + "learning_rate": 1.0664174630777386e-05, + "loss": 1.8838, + "step": 15885000 + }, + { + "epoch": 78.7, + "learning_rate": 1.0662936044351303e-05, + "loss": 1.873, + "step": 15885500 + }, + { + "epoch": 78.7, + "learning_rate": 1.066169745792522e-05, + "loss": 1.868, + "step": 15886000 + }, + { + "epoch": 78.71, + "learning_rate": 1.0660458871499136e-05, + "loss": 1.889, + "step": 15886500 + }, + { + "epoch": 78.71, + "learning_rate": 1.0659220285073053e-05, + "loss": 1.8863, + "step": 15887000 + }, + { + "epoch": 78.71, + "learning_rate": 1.0657981698646968e-05, + "loss": 1.8613, + "step": 15887500 + }, + { + "epoch": 78.71, + "learning_rate": 1.065674806656659e-05, + "loss": 1.8717, + "step": 15888000 + }, + { + "epoch": 78.72, + "learning_rate": 1.0655509480140505e-05, + "loss": 1.8537, + "step": 15888500 + }, + { + "epoch": 78.72, + "learning_rate": 1.065427089371442e-05, + "loss": 1.8642, + "step": 15889000 + }, + { + "epoch": 78.72, + "learning_rate": 1.0653032307288338e-05, + "loss": 1.8765, + "step": 15889500 + }, + { + "epoch": 78.72, + "learning_rate": 1.0651793720862254e-05, + "loss": 1.893, + "step": 15890000 + }, + { + "epoch": 78.73, + "learning_rate": 1.0650555134436171e-05, + "loss": 1.9037, + "step": 15890500 + }, + { + "epoch": 78.73, + "learning_rate": 1.0649316548010088e-05, + "loss": 1.8857, + "step": 15891000 + }, + { + "epoch": 78.73, + "learning_rate": 1.0648077961584004e-05, + "loss": 1.8587, + "step": 15891500 + }, + { + "epoch": 78.73, + "learning_rate": 1.064683937515792e-05, + "loss": 1.8831, + "step": 15892000 + }, + { + "epoch": 78.74, + "learning_rate": 1.0645603265904688e-05, + "loss": 1.874, + "step": 15892500 + }, + { + "epoch": 78.74, + "learning_rate": 1.0644364679478605e-05, + "loss": 1.8601, + "step": 15893000 + }, + { + "epoch": 78.74, + "learning_rate": 1.0643128570225373e-05, + "loss": 1.8541, + "step": 15893500 + }, + { + "epoch": 78.74, + "learning_rate": 1.064188998379929e-05, + "loss": 1.8395, + "step": 15894000 + }, + { + "epoch": 78.75, + "learning_rate": 1.0640651397373207e-05, + "loss": 1.8522, + "step": 15894500 + }, + { + "epoch": 78.75, + "learning_rate": 1.0639415288119974e-05, + "loss": 1.8594, + "step": 15895000 + }, + { + "epoch": 78.75, + "learning_rate": 1.0638176701693891e-05, + "loss": 1.8842, + "step": 15895500 + }, + { + "epoch": 78.75, + "learning_rate": 1.0636938115267808e-05, + "loss": 1.8862, + "step": 15896000 + }, + { + "epoch": 78.76, + "learning_rate": 1.0635702006014577e-05, + "loss": 1.8705, + "step": 15896500 + }, + { + "epoch": 78.76, + "learning_rate": 1.0634463419588494e-05, + "loss": 1.8732, + "step": 15897000 + }, + { + "epoch": 78.76, + "learning_rate": 1.0633224833162409e-05, + "loss": 1.8723, + "step": 15897500 + }, + { + "epoch": 78.76, + "learning_rate": 1.0631986246736324e-05, + "loss": 1.8575, + "step": 15898000 + }, + { + "epoch": 78.77, + "learning_rate": 1.0630750137483093e-05, + "loss": 1.8958, + "step": 15898500 + }, + { + "epoch": 78.77, + "learning_rate": 1.062951155105701e-05, + "loss": 1.8704, + "step": 15899000 + }, + { + "epoch": 78.77, + "learning_rate": 1.0628275441803779e-05, + "loss": 1.855, + "step": 15899500 + }, + { + "epoch": 78.77, + "learning_rate": 1.0627036855377696e-05, + "loss": 1.8816, + "step": 15900000 + }, + { + "epoch": 78.78, + "learning_rate": 1.0625798268951611e-05, + "loss": 1.8539, + "step": 15900500 + }, + { + "epoch": 78.78, + "learning_rate": 1.0624559682525528e-05, + "loss": 1.8536, + "step": 15901000 + }, + { + "epoch": 78.78, + "learning_rate": 1.0623321096099443e-05, + "loss": 1.8645, + "step": 15901500 + }, + { + "epoch": 78.78, + "learning_rate": 1.062208250967336e-05, + "loss": 1.8618, + "step": 15902000 + }, + { + "epoch": 78.79, + "learning_rate": 1.0620843923247277e-05, + "loss": 1.8944, + "step": 15902500 + }, + { + "epoch": 78.79, + "learning_rate": 1.0619605336821194e-05, + "loss": 1.8606, + "step": 15903000 + }, + { + "epoch": 78.79, + "learning_rate": 1.061836675039511e-05, + "loss": 1.8647, + "step": 15903500 + }, + { + "epoch": 78.79, + "learning_rate": 1.0617128163969026e-05, + "loss": 1.8849, + "step": 15904000 + }, + { + "epoch": 78.8, + "learning_rate": 1.0615889577542943e-05, + "loss": 1.8636, + "step": 15904500 + }, + { + "epoch": 78.8, + "learning_rate": 1.0614650991116858e-05, + "loss": 1.8526, + "step": 15905000 + }, + { + "epoch": 78.8, + "learning_rate": 1.0613412404690775e-05, + "loss": 1.8565, + "step": 15905500 + }, + { + "epoch": 78.8, + "learning_rate": 1.0612176295437544e-05, + "loss": 1.8606, + "step": 15906000 + }, + { + "epoch": 78.81, + "learning_rate": 1.061093770901146e-05, + "loss": 1.854, + "step": 15906500 + }, + { + "epoch": 78.81, + "learning_rate": 1.0609699122585376e-05, + "loss": 1.8606, + "step": 15907000 + }, + { + "epoch": 78.81, + "learning_rate": 1.0608460536159293e-05, + "loss": 1.8893, + "step": 15907500 + }, + { + "epoch": 78.81, + "learning_rate": 1.060722194973321e-05, + "loss": 1.8755, + "step": 15908000 + }, + { + "epoch": 78.82, + "learning_rate": 1.0605983363307125e-05, + "loss": 1.8786, + "step": 15908500 + }, + { + "epoch": 78.82, + "learning_rate": 1.0604747254053894e-05, + "loss": 1.8905, + "step": 15909000 + }, + { + "epoch": 78.82, + "learning_rate": 1.0603508667627811e-05, + "loss": 1.8666, + "step": 15909500 + }, + { + "epoch": 78.82, + "learning_rate": 1.0602270081201726e-05, + "loss": 1.8919, + "step": 15910000 + }, + { + "epoch": 78.83, + "learning_rate": 1.0601031494775643e-05, + "loss": 1.8895, + "step": 15910500 + }, + { + "epoch": 78.83, + "learning_rate": 1.059979290834956e-05, + "loss": 1.8895, + "step": 15911000 + }, + { + "epoch": 78.83, + "learning_rate": 1.0598554321923475e-05, + "loss": 1.8578, + "step": 15911500 + }, + { + "epoch": 78.83, + "learning_rate": 1.0597318212670244e-05, + "loss": 1.8986, + "step": 15912000 + }, + { + "epoch": 78.84, + "learning_rate": 1.0596079626244161e-05, + "loss": 1.8914, + "step": 15912500 + }, + { + "epoch": 78.84, + "learning_rate": 1.059484351699093e-05, + "loss": 1.8854, + "step": 15913000 + }, + { + "epoch": 78.84, + "learning_rate": 1.0593604930564847e-05, + "loss": 1.8709, + "step": 15913500 + }, + { + "epoch": 78.84, + "learning_rate": 1.0592366344138762e-05, + "loss": 1.8516, + "step": 15914000 + }, + { + "epoch": 78.85, + "learning_rate": 1.0591127757712677e-05, + "loss": 1.8629, + "step": 15914500 + }, + { + "epoch": 78.85, + "learning_rate": 1.0589889171286594e-05, + "loss": 1.8991, + "step": 15915000 + }, + { + "epoch": 78.85, + "learning_rate": 1.0588650584860511e-05, + "loss": 1.871, + "step": 15915500 + }, + { + "epoch": 78.85, + "learning_rate": 1.0587411998434426e-05, + "loss": 1.8615, + "step": 15916000 + }, + { + "epoch": 78.86, + "learning_rate": 1.0586173412008343e-05, + "loss": 1.8992, + "step": 15916500 + }, + { + "epoch": 78.86, + "learning_rate": 1.058493482558226e-05, + "loss": 1.8795, + "step": 15917000 + }, + { + "epoch": 78.86, + "learning_rate": 1.0583698716329029e-05, + "loss": 1.8903, + "step": 15917500 + }, + { + "epoch": 78.86, + "learning_rate": 1.0582460129902944e-05, + "loss": 1.8577, + "step": 15918000 + }, + { + "epoch": 78.87, + "learning_rate": 1.0581221543476861e-05, + "loss": 1.868, + "step": 15918500 + }, + { + "epoch": 78.87, + "learning_rate": 1.0579982957050776e-05, + "loss": 1.866, + "step": 15919000 + }, + { + "epoch": 78.87, + "learning_rate": 1.0578744370624693e-05, + "loss": 1.8793, + "step": 15919500 + }, + { + "epoch": 78.87, + "learning_rate": 1.057750578419861e-05, + "loss": 1.8744, + "step": 15920000 + }, + { + "epoch": 78.88, + "learning_rate": 1.057626967494538e-05, + "loss": 1.8826, + "step": 15920500 + }, + { + "epoch": 78.88, + "learning_rate": 1.0575031088519294e-05, + "loss": 1.8529, + "step": 15921000 + }, + { + "epoch": 78.88, + "learning_rate": 1.0573792502093211e-05, + "loss": 1.8723, + "step": 15921500 + }, + { + "epoch": 78.88, + "learning_rate": 1.057255639283998e-05, + "loss": 1.8771, + "step": 15922000 + }, + { + "epoch": 78.89, + "learning_rate": 1.0571317806413897e-05, + "loss": 1.8734, + "step": 15922500 + }, + { + "epoch": 78.89, + "learning_rate": 1.0570079219987814e-05, + "loss": 1.894, + "step": 15923000 + }, + { + "epoch": 78.89, + "learning_rate": 1.056884063356173e-05, + "loss": 1.8524, + "step": 15923500 + }, + { + "epoch": 78.89, + "learning_rate": 1.0567602047135646e-05, + "loss": 1.8543, + "step": 15924000 + }, + { + "epoch": 78.9, + "learning_rate": 1.0566363460709561e-05, + "loss": 1.9014, + "step": 15924500 + }, + { + "epoch": 78.9, + "learning_rate": 1.0565124874283478e-05, + "loss": 1.8411, + "step": 15925000 + }, + { + "epoch": 78.9, + "learning_rate": 1.0563886287857394e-05, + "loss": 1.8893, + "step": 15925500 + }, + { + "epoch": 78.9, + "learning_rate": 1.056264770143131e-05, + "loss": 1.8738, + "step": 15926000 + }, + { + "epoch": 78.91, + "learning_rate": 1.0561409115005227e-05, + "loss": 1.8507, + "step": 15926500 + }, + { + "epoch": 78.91, + "learning_rate": 1.0560170528579144e-05, + "loss": 1.8498, + "step": 15927000 + }, + { + "epoch": 78.91, + "learning_rate": 1.055893194215306e-05, + "loss": 1.8927, + "step": 15927500 + }, + { + "epoch": 78.91, + "learning_rate": 1.0557693355726976e-05, + "loss": 1.8635, + "step": 15928000 + }, + { + "epoch": 78.92, + "learning_rate": 1.0556457246473744e-05, + "loss": 1.8736, + "step": 15928500 + }, + { + "epoch": 78.92, + "learning_rate": 1.0555221137220514e-05, + "loss": 1.864, + "step": 15929000 + }, + { + "epoch": 78.92, + "learning_rate": 1.055398255079443e-05, + "loss": 1.8458, + "step": 15929500 + }, + { + "epoch": 78.92, + "learning_rate": 1.0552743964368346e-05, + "loss": 1.8823, + "step": 15930000 + }, + { + "epoch": 78.93, + "learning_rate": 1.0551505377942263e-05, + "loss": 1.8878, + "step": 15930500 + }, + { + "epoch": 78.93, + "learning_rate": 1.055026679151618e-05, + "loss": 1.8645, + "step": 15931000 + }, + { + "epoch": 78.93, + "learning_rate": 1.0549028205090095e-05, + "loss": 1.8855, + "step": 15931500 + }, + { + "epoch": 78.93, + "learning_rate": 1.054778961866401e-05, + "loss": 1.8641, + "step": 15932000 + }, + { + "epoch": 78.94, + "learning_rate": 1.0546551032237928e-05, + "loss": 1.8749, + "step": 15932500 + }, + { + "epoch": 78.94, + "learning_rate": 1.0545312445811844e-05, + "loss": 1.8696, + "step": 15933000 + }, + { + "epoch": 78.94, + "learning_rate": 1.0544073859385761e-05, + "loss": 1.8787, + "step": 15933500 + }, + { + "epoch": 78.94, + "learning_rate": 1.054283775013253e-05, + "loss": 1.8391, + "step": 15934000 + }, + { + "epoch": 78.95, + "learning_rate": 1.0541599163706445e-05, + "loss": 1.8827, + "step": 15934500 + }, + { + "epoch": 78.95, + "learning_rate": 1.0540360577280362e-05, + "loss": 1.8824, + "step": 15935000 + }, + { + "epoch": 78.95, + "learning_rate": 1.0539121990854278e-05, + "loss": 1.8837, + "step": 15935500 + }, + { + "epoch": 78.95, + "learning_rate": 1.0537885881601046e-05, + "loss": 1.8854, + "step": 15936000 + }, + { + "epoch": 78.95, + "learning_rate": 1.0536647295174963e-05, + "loss": 1.8908, + "step": 15936500 + }, + { + "epoch": 78.96, + "learning_rate": 1.0535411185921732e-05, + "loss": 1.8484, + "step": 15937000 + }, + { + "epoch": 78.96, + "learning_rate": 1.05341750766685e-05, + "loss": 1.876, + "step": 15937500 + }, + { + "epoch": 78.96, + "learning_rate": 1.0532936490242416e-05, + "loss": 1.8643, + "step": 15938000 + }, + { + "epoch": 78.96, + "learning_rate": 1.0531697903816333e-05, + "loss": 1.8598, + "step": 15938500 + }, + { + "epoch": 78.97, + "learning_rate": 1.053045931739025e-05, + "loss": 1.8741, + "step": 15939000 + }, + { + "epoch": 78.97, + "learning_rate": 1.0529220730964167e-05, + "loss": 1.8401, + "step": 15939500 + }, + { + "epoch": 78.97, + "learning_rate": 1.0527982144538082e-05, + "loss": 1.87, + "step": 15940000 + }, + { + "epoch": 78.97, + "learning_rate": 1.0526743558111999e-05, + "loss": 1.8572, + "step": 15940500 + }, + { + "epoch": 78.98, + "learning_rate": 1.0525504971685914e-05, + "loss": 1.8269, + "step": 15941000 + }, + { + "epoch": 78.98, + "learning_rate": 1.0524266385259831e-05, + "loss": 1.9005, + "step": 15941500 + }, + { + "epoch": 78.98, + "learning_rate": 1.0523027798833747e-05, + "loss": 1.8644, + "step": 15942000 + }, + { + "epoch": 78.98, + "learning_rate": 1.0521789212407663e-05, + "loss": 1.8758, + "step": 15942500 + }, + { + "epoch": 78.99, + "learning_rate": 1.0520553103154432e-05, + "loss": 1.8897, + "step": 15943000 + }, + { + "epoch": 78.99, + "learning_rate": 1.0519316993901201e-05, + "loss": 1.8529, + "step": 15943500 + }, + { + "epoch": 78.99, + "learning_rate": 1.0518078407475116e-05, + "loss": 1.8731, + "step": 15944000 + }, + { + "epoch": 78.99, + "learning_rate": 1.0516839821049033e-05, + "loss": 1.8879, + "step": 15944500 + }, + { + "epoch": 79.0, + "learning_rate": 1.051560123462295e-05, + "loss": 1.8848, + "step": 15945000 + }, + { + "epoch": 79.0, + "learning_rate": 1.0514362648196867e-05, + "loss": 1.877, + "step": 15945500 + }, + { + "epoch": 79.0, + "eval_accuracy": 0.6818432483474977, + "eval_accuracy_mlm": 0.642718824875163, + "eval_accuracy_nsp": 0.8661667170015571, + "eval_loss": 2.3034467697143555, + "eval_runtime": 146.8171, + "eval_samples_per_second": 1736.576, + "eval_steps_per_second": 72.362, + "step": 15945597 + }, + { + "epoch": 79.0, + "learning_rate": 1.0513124061770782e-05, + "loss": 1.8515, + "step": 15946000 + }, + { + "epoch": 79.0, + "learning_rate": 1.05118854753447e-05, + "loss": 1.847, + "step": 15946500 + }, + { + "epoch": 79.01, + "learning_rate": 1.0510646888918616e-05, + "loss": 1.8334, + "step": 15947000 + }, + { + "epoch": 79.01, + "learning_rate": 1.0509408302492531e-05, + "loss": 1.8664, + "step": 15947500 + }, + { + "epoch": 79.01, + "learning_rate": 1.0508169716066448e-05, + "loss": 1.8383, + "step": 15948000 + }, + { + "epoch": 79.01, + "learning_rate": 1.0506931129640364e-05, + "loss": 1.863, + "step": 15948500 + }, + { + "epoch": 79.02, + "learning_rate": 1.050569254321428e-05, + "loss": 1.8608, + "step": 15949000 + }, + { + "epoch": 79.02, + "learning_rate": 1.050445643396105e-05, + "loss": 1.828, + "step": 15949500 + }, + { + "epoch": 79.02, + "learning_rate": 1.0503217847534966e-05, + "loss": 1.8564, + "step": 15950000 + }, + { + "epoch": 79.02, + "learning_rate": 1.0501979261108883e-05, + "loss": 1.8518, + "step": 15950500 + }, + { + "epoch": 79.03, + "learning_rate": 1.0500740674682798e-05, + "loss": 1.8593, + "step": 15951000 + }, + { + "epoch": 79.03, + "learning_rate": 1.0499502088256714e-05, + "loss": 1.8801, + "step": 15951500 + }, + { + "epoch": 79.03, + "learning_rate": 1.049826350183063e-05, + "loss": 1.8473, + "step": 15952000 + }, + { + "epoch": 79.03, + "learning_rate": 1.0497024915404547e-05, + "loss": 1.8317, + "step": 15952500 + }, + { + "epoch": 79.04, + "learning_rate": 1.0495786328978464e-05, + "loss": 1.8475, + "step": 15953000 + }, + { + "epoch": 79.04, + "learning_rate": 1.049454774255238e-05, + "loss": 1.8859, + "step": 15953500 + }, + { + "epoch": 79.04, + "learning_rate": 1.0493309156126297e-05, + "loss": 1.8709, + "step": 15954000 + }, + { + "epoch": 79.04, + "learning_rate": 1.0492070569700214e-05, + "loss": 1.8766, + "step": 15954500 + }, + { + "epoch": 79.05, + "learning_rate": 1.049083446044698e-05, + "loss": 1.8772, + "step": 15955000 + }, + { + "epoch": 79.05, + "learning_rate": 1.0489595874020898e-05, + "loss": 1.8796, + "step": 15955500 + }, + { + "epoch": 79.05, + "learning_rate": 1.0488357287594814e-05, + "loss": 1.8831, + "step": 15956000 + }, + { + "epoch": 79.05, + "learning_rate": 1.048711870116873e-05, + "loss": 1.8514, + "step": 15956500 + }, + { + "epoch": 79.06, + "learning_rate": 1.0485880114742647e-05, + "loss": 1.8517, + "step": 15957000 + }, + { + "epoch": 79.06, + "learning_rate": 1.0484644005489415e-05, + "loss": 1.8726, + "step": 15957500 + }, + { + "epoch": 79.06, + "learning_rate": 1.0483405419063332e-05, + "loss": 1.8689, + "step": 15958000 + }, + { + "epoch": 79.06, + "learning_rate": 1.0482166832637248e-05, + "loss": 1.843, + "step": 15958500 + }, + { + "epoch": 79.07, + "learning_rate": 1.0480928246211165e-05, + "loss": 1.8567, + "step": 15959000 + }, + { + "epoch": 79.07, + "learning_rate": 1.0479689659785081e-05, + "loss": 1.8238, + "step": 15959500 + }, + { + "epoch": 79.07, + "learning_rate": 1.0478451073358997e-05, + "loss": 1.8507, + "step": 15960000 + }, + { + "epoch": 79.07, + "learning_rate": 1.0477212486932914e-05, + "loss": 1.8677, + "step": 15960500 + }, + { + "epoch": 79.08, + "learning_rate": 1.047597390050683e-05, + "loss": 1.8477, + "step": 15961000 + }, + { + "epoch": 79.08, + "learning_rate": 1.0474737791253598e-05, + "loss": 1.8388, + "step": 15961500 + }, + { + "epoch": 79.08, + "learning_rate": 1.0473499204827515e-05, + "loss": 1.8311, + "step": 15962000 + }, + { + "epoch": 79.08, + "learning_rate": 1.0472260618401432e-05, + "loss": 1.8795, + "step": 15962500 + }, + { + "epoch": 79.09, + "learning_rate": 1.0471022031975347e-05, + "loss": 1.8643, + "step": 15963000 + }, + { + "epoch": 79.09, + "learning_rate": 1.0469783445549264e-05, + "loss": 1.8399, + "step": 15963500 + }, + { + "epoch": 79.09, + "learning_rate": 1.046854485912318e-05, + "loss": 1.8417, + "step": 15964000 + }, + { + "epoch": 79.09, + "learning_rate": 1.0467306272697098e-05, + "loss": 1.873, + "step": 15964500 + }, + { + "epoch": 79.1, + "learning_rate": 1.0466067686271013e-05, + "loss": 1.8721, + "step": 15965000 + }, + { + "epoch": 79.1, + "learning_rate": 1.046482909984493e-05, + "loss": 1.8472, + "step": 15965500 + }, + { + "epoch": 79.1, + "learning_rate": 1.0463590513418847e-05, + "loss": 1.8496, + "step": 15966000 + }, + { + "epoch": 79.1, + "learning_rate": 1.0462354404165614e-05, + "loss": 1.8492, + "step": 15966500 + }, + { + "epoch": 79.11, + "learning_rate": 1.046111581773953e-05, + "loss": 1.858, + "step": 15967000 + }, + { + "epoch": 79.11, + "learning_rate": 1.0459877231313448e-05, + "loss": 1.8495, + "step": 15967500 + }, + { + "epoch": 79.11, + "learning_rate": 1.0458638644887365e-05, + "loss": 1.843, + "step": 15968000 + }, + { + "epoch": 79.11, + "learning_rate": 1.0457402535634132e-05, + "loss": 1.8639, + "step": 15968500 + }, + { + "epoch": 79.12, + "learning_rate": 1.0456163949208047e-05, + "loss": 1.846, + "step": 15969000 + }, + { + "epoch": 79.12, + "learning_rate": 1.0454925362781964e-05, + "loss": 1.8591, + "step": 15969500 + }, + { + "epoch": 79.12, + "learning_rate": 1.045368677635588e-05, + "loss": 1.8709, + "step": 15970000 + }, + { + "epoch": 79.12, + "learning_rate": 1.0452448189929798e-05, + "loss": 1.8668, + "step": 15970500 + }, + { + "epoch": 79.13, + "learning_rate": 1.0451209603503715e-05, + "loss": 1.8566, + "step": 15971000 + }, + { + "epoch": 79.13, + "learning_rate": 1.0449975971423334e-05, + "loss": 1.8486, + "step": 15971500 + }, + { + "epoch": 79.13, + "learning_rate": 1.044873738499725e-05, + "loss": 1.8569, + "step": 15972000 + }, + { + "epoch": 79.13, + "learning_rate": 1.0447498798571167e-05, + "loss": 1.8504, + "step": 15972500 + }, + { + "epoch": 79.14, + "learning_rate": 1.0446260212145084e-05, + "loss": 1.8639, + "step": 15973000 + }, + { + "epoch": 79.14, + "learning_rate": 1.0445021625719e-05, + "loss": 1.8771, + "step": 15973500 + }, + { + "epoch": 79.14, + "learning_rate": 1.0443783039292917e-05, + "loss": 1.8712, + "step": 15974000 + }, + { + "epoch": 79.14, + "learning_rate": 1.0442544452866833e-05, + "loss": 1.8616, + "step": 15974500 + }, + { + "epoch": 79.15, + "learning_rate": 1.0441305866440749e-05, + "loss": 1.8484, + "step": 15975000 + }, + { + "epoch": 79.15, + "learning_rate": 1.0440067280014666e-05, + "loss": 1.8581, + "step": 15975500 + }, + { + "epoch": 79.15, + "learning_rate": 1.0438831170761434e-05, + "loss": 1.8374, + "step": 15976000 + }, + { + "epoch": 79.15, + "learning_rate": 1.043759258433535e-05, + "loss": 1.8526, + "step": 15976500 + }, + { + "epoch": 79.16, + "learning_rate": 1.0436353997909267e-05, + "loss": 1.8516, + "step": 15977000 + }, + { + "epoch": 79.16, + "learning_rate": 1.0435115411483184e-05, + "loss": 1.8779, + "step": 15977500 + }, + { + "epoch": 79.16, + "learning_rate": 1.04338768250571e-05, + "loss": 1.8507, + "step": 15978000 + }, + { + "epoch": 79.16, + "learning_rate": 1.0432638238631016e-05, + "loss": 1.8609, + "step": 15978500 + }, + { + "epoch": 79.17, + "learning_rate": 1.0431399652204931e-05, + "loss": 1.8687, + "step": 15979000 + }, + { + "epoch": 79.17, + "learning_rate": 1.0430161065778848e-05, + "loss": 1.8318, + "step": 15979500 + }, + { + "epoch": 79.17, + "learning_rate": 1.0428922479352765e-05, + "loss": 1.8624, + "step": 15980000 + }, + { + "epoch": 79.17, + "learning_rate": 1.0427686370099534e-05, + "loss": 1.8497, + "step": 15980500 + }, + { + "epoch": 79.18, + "learning_rate": 1.042644778367345e-05, + "loss": 1.845, + "step": 15981000 + }, + { + "epoch": 79.18, + "learning_rate": 1.0425209197247366e-05, + "loss": 1.8477, + "step": 15981500 + }, + { + "epoch": 79.18, + "learning_rate": 1.0423970610821283e-05, + "loss": 1.8368, + "step": 15982000 + }, + { + "epoch": 79.18, + "learning_rate": 1.0422732024395198e-05, + "loss": 1.8612, + "step": 15982500 + }, + { + "epoch": 79.19, + "learning_rate": 1.0421495915141967e-05, + "loss": 1.8376, + "step": 15983000 + }, + { + "epoch": 79.19, + "learning_rate": 1.0420257328715884e-05, + "loss": 1.8617, + "step": 15983500 + }, + { + "epoch": 79.19, + "learning_rate": 1.04190187422898e-05, + "loss": 1.8692, + "step": 15984000 + }, + { + "epoch": 79.19, + "learning_rate": 1.0417780155863718e-05, + "loss": 1.8587, + "step": 15984500 + }, + { + "epoch": 79.2, + "learning_rate": 1.0416546523783337e-05, + "loss": 1.877, + "step": 15985000 + }, + { + "epoch": 79.2, + "learning_rate": 1.0415307937357253e-05, + "loss": 1.8435, + "step": 15985500 + }, + { + "epoch": 79.2, + "learning_rate": 1.041406935093117e-05, + "loss": 1.8822, + "step": 15986000 + }, + { + "epoch": 79.2, + "learning_rate": 1.0412830764505087e-05, + "loss": 1.8418, + "step": 15986500 + }, + { + "epoch": 79.21, + "learning_rate": 1.0411594655251854e-05, + "loss": 1.8649, + "step": 15987000 + }, + { + "epoch": 79.21, + "learning_rate": 1.041035606882577e-05, + "loss": 1.8447, + "step": 15987500 + }, + { + "epoch": 79.21, + "learning_rate": 1.0409117482399687e-05, + "loss": 1.8733, + "step": 15988000 + }, + { + "epoch": 79.21, + "learning_rate": 1.0407878895973604e-05, + "loss": 1.8504, + "step": 15988500 + }, + { + "epoch": 79.22, + "learning_rate": 1.040664030954752e-05, + "loss": 1.834, + "step": 15989000 + }, + { + "epoch": 79.22, + "learning_rate": 1.0405401723121437e-05, + "loss": 1.8491, + "step": 15989500 + }, + { + "epoch": 79.22, + "learning_rate": 1.0404163136695353e-05, + "loss": 1.8547, + "step": 15990000 + }, + { + "epoch": 79.22, + "learning_rate": 1.0402927027442121e-05, + "loss": 1.8487, + "step": 15990500 + }, + { + "epoch": 79.22, + "learning_rate": 1.040169091818889e-05, + "loss": 1.8663, + "step": 15991000 + }, + { + "epoch": 79.23, + "learning_rate": 1.0400452331762807e-05, + "loss": 1.8565, + "step": 15991500 + }, + { + "epoch": 79.23, + "learning_rate": 1.0399213745336722e-05, + "loss": 1.84, + "step": 15992000 + }, + { + "epoch": 79.23, + "learning_rate": 1.039797515891064e-05, + "loss": 1.8735, + "step": 15992500 + }, + { + "epoch": 79.23, + "learning_rate": 1.0396736572484556e-05, + "loss": 1.8575, + "step": 15993000 + }, + { + "epoch": 79.24, + "learning_rate": 1.0395497986058471e-05, + "loss": 1.8685, + "step": 15993500 + }, + { + "epoch": 79.24, + "learning_rate": 1.0394259399632387e-05, + "loss": 1.861, + "step": 15994000 + }, + { + "epoch": 79.24, + "learning_rate": 1.0393020813206304e-05, + "loss": 1.8495, + "step": 15994500 + }, + { + "epoch": 79.24, + "learning_rate": 1.039178222678022e-05, + "loss": 1.8496, + "step": 15995000 + }, + { + "epoch": 79.25, + "learning_rate": 1.0390543640354137e-05, + "loss": 1.8695, + "step": 15995500 + }, + { + "epoch": 79.25, + "learning_rate": 1.0389305053928053e-05, + "loss": 1.8396, + "step": 15996000 + }, + { + "epoch": 79.25, + "learning_rate": 1.038806646750197e-05, + "loss": 1.8547, + "step": 15996500 + }, + { + "epoch": 79.25, + "learning_rate": 1.0386827881075887e-05, + "loss": 1.8754, + "step": 15997000 + }, + { + "epoch": 79.26, + "learning_rate": 1.0385589294649804e-05, + "loss": 1.87, + "step": 15997500 + }, + { + "epoch": 79.26, + "learning_rate": 1.038435318539657e-05, + "loss": 1.8415, + "step": 15998000 + }, + { + "epoch": 79.26, + "learning_rate": 1.0383114598970488e-05, + "loss": 1.8719, + "step": 15998500 + }, + { + "epoch": 79.26, + "learning_rate": 1.0381876012544404e-05, + "loss": 1.8684, + "step": 15999000 + }, + { + "epoch": 79.27, + "learning_rate": 1.038063742611832e-05, + "loss": 1.8597, + "step": 15999500 + }, + { + "epoch": 79.27, + "learning_rate": 1.0379398839692237e-05, + "loss": 1.8615, + "step": 16000000 + }, + { + "epoch": 79.27, + "learning_rate": 1.0378160253266154e-05, + "loss": 1.8435, + "step": 16000500 + }, + { + "epoch": 79.27, + "learning_rate": 1.037692166684007e-05, + "loss": 1.8671, + "step": 16001000 + }, + { + "epoch": 79.28, + "learning_rate": 1.0375683080413986e-05, + "loss": 1.8885, + "step": 16001500 + }, + { + "epoch": 79.28, + "learning_rate": 1.0374446971160755e-05, + "loss": 1.8657, + "step": 16002000 + }, + { + "epoch": 79.28, + "learning_rate": 1.037320838473467e-05, + "loss": 1.8673, + "step": 16002500 + }, + { + "epoch": 79.28, + "learning_rate": 1.0371969798308587e-05, + "loss": 1.8512, + "step": 16003000 + }, + { + "epoch": 79.29, + "learning_rate": 1.0370731211882504e-05, + "loss": 1.8596, + "step": 16003500 + }, + { + "epoch": 79.29, + "learning_rate": 1.036949262545642e-05, + "loss": 1.8527, + "step": 16004000 + }, + { + "epoch": 79.29, + "learning_rate": 1.0368256516203188e-05, + "loss": 1.8827, + "step": 16004500 + }, + { + "epoch": 79.29, + "learning_rate": 1.0367017929777105e-05, + "loss": 1.8822, + "step": 16005000 + }, + { + "epoch": 79.3, + "learning_rate": 1.0365781820523873e-05, + "loss": 1.8471, + "step": 16005500 + }, + { + "epoch": 79.3, + "learning_rate": 1.036454323409779e-05, + "loss": 1.8617, + "step": 16006000 + }, + { + "epoch": 79.3, + "learning_rate": 1.0363304647671706e-05, + "loss": 1.8538, + "step": 16006500 + }, + { + "epoch": 79.3, + "learning_rate": 1.0362066061245622e-05, + "loss": 1.844, + "step": 16007000 + }, + { + "epoch": 79.31, + "learning_rate": 1.036082747481954e-05, + "loss": 1.8938, + "step": 16007500 + }, + { + "epoch": 79.31, + "learning_rate": 1.0359591365566307e-05, + "loss": 1.8654, + "step": 16008000 + }, + { + "epoch": 79.31, + "learning_rate": 1.0358352779140223e-05, + "loss": 1.8568, + "step": 16008500 + }, + { + "epoch": 79.31, + "learning_rate": 1.035711419271414e-05, + "loss": 1.8766, + "step": 16009000 + }, + { + "epoch": 79.32, + "learning_rate": 1.0355875606288056e-05, + "loss": 1.8675, + "step": 16009500 + }, + { + "epoch": 79.32, + "learning_rate": 1.0354637019861973e-05, + "loss": 1.8724, + "step": 16010000 + }, + { + "epoch": 79.32, + "learning_rate": 1.035339843343589e-05, + "loss": 1.8556, + "step": 16010500 + }, + { + "epoch": 79.32, + "learning_rate": 1.0352159847009805e-05, + "loss": 1.8637, + "step": 16011000 + }, + { + "epoch": 79.33, + "learning_rate": 1.0350923737756574e-05, + "loss": 1.8833, + "step": 16011500 + }, + { + "epoch": 79.33, + "learning_rate": 1.034968515133049e-05, + "loss": 1.8464, + "step": 16012000 + }, + { + "epoch": 79.33, + "learning_rate": 1.0348446564904406e-05, + "loss": 1.8869, + "step": 16012500 + }, + { + "epoch": 79.33, + "learning_rate": 1.0347210455651176e-05, + "loss": 1.8608, + "step": 16013000 + }, + { + "epoch": 79.34, + "learning_rate": 1.0345971869225091e-05, + "loss": 1.845, + "step": 16013500 + }, + { + "epoch": 79.34, + "learning_rate": 1.0344733282799007e-05, + "loss": 1.8536, + "step": 16014000 + }, + { + "epoch": 79.34, + "learning_rate": 1.0343494696372924e-05, + "loss": 1.8702, + "step": 16014500 + }, + { + "epoch": 79.34, + "learning_rate": 1.034225610994684e-05, + "loss": 1.8582, + "step": 16015000 + }, + { + "epoch": 79.35, + "learning_rate": 1.0341017523520757e-05, + "loss": 1.8464, + "step": 16015500 + }, + { + "epoch": 79.35, + "learning_rate": 1.0339778937094673e-05, + "loss": 1.855, + "step": 16016000 + }, + { + "epoch": 79.35, + "learning_rate": 1.033854035066859e-05, + "loss": 1.8455, + "step": 16016500 + }, + { + "epoch": 79.35, + "learning_rate": 1.0337301764242507e-05, + "loss": 1.8682, + "step": 16017000 + }, + { + "epoch": 79.36, + "learning_rate": 1.0336063177816423e-05, + "loss": 1.8413, + "step": 16017500 + }, + { + "epoch": 79.36, + "learning_rate": 1.0334824591390339e-05, + "loss": 1.889, + "step": 16018000 + }, + { + "epoch": 79.36, + "learning_rate": 1.0333586004964254e-05, + "loss": 1.8456, + "step": 16018500 + }, + { + "epoch": 79.36, + "learning_rate": 1.0332349895711023e-05, + "loss": 1.8658, + "step": 16019000 + }, + { + "epoch": 79.37, + "learning_rate": 1.033111130928494e-05, + "loss": 1.8469, + "step": 16019500 + }, + { + "epoch": 79.37, + "learning_rate": 1.0329872722858857e-05, + "loss": 1.8737, + "step": 16020000 + }, + { + "epoch": 79.37, + "learning_rate": 1.0328634136432774e-05, + "loss": 1.871, + "step": 16020500 + }, + { + "epoch": 79.37, + "learning_rate": 1.0327395550006689e-05, + "loss": 1.8355, + "step": 16021000 + }, + { + "epoch": 79.38, + "learning_rate": 1.0326156963580606e-05, + "loss": 1.8532, + "step": 16021500 + }, + { + "epoch": 79.38, + "learning_rate": 1.0324918377154521e-05, + "loss": 1.8641, + "step": 16022000 + }, + { + "epoch": 79.38, + "learning_rate": 1.032368226790129e-05, + "loss": 1.8492, + "step": 16022500 + }, + { + "epoch": 79.38, + "learning_rate": 1.0322443681475207e-05, + "loss": 1.8769, + "step": 16023000 + }, + { + "epoch": 79.39, + "learning_rate": 1.0321205095049124e-05, + "loss": 1.8604, + "step": 16023500 + }, + { + "epoch": 79.39, + "learning_rate": 1.031996898579589e-05, + "loss": 1.8863, + "step": 16024000 + }, + { + "epoch": 79.39, + "learning_rate": 1.0318730399369808e-05, + "loss": 1.8712, + "step": 16024500 + }, + { + "epoch": 79.39, + "learning_rate": 1.0317491812943723e-05, + "loss": 1.8525, + "step": 16025000 + }, + { + "epoch": 79.4, + "learning_rate": 1.0316255703690493e-05, + "loss": 1.8668, + "step": 16025500 + }, + { + "epoch": 79.4, + "learning_rate": 1.0315017117264409e-05, + "loss": 1.8551, + "step": 16026000 + }, + { + "epoch": 79.4, + "learning_rate": 1.0313778530838326e-05, + "loss": 1.8408, + "step": 16026500 + }, + { + "epoch": 79.4, + "learning_rate": 1.0312539944412242e-05, + "loss": 1.883, + "step": 16027000 + }, + { + "epoch": 79.41, + "learning_rate": 1.031130383515901e-05, + "loss": 1.8592, + "step": 16027500 + }, + { + "epoch": 79.41, + "learning_rate": 1.0310067725905778e-05, + "loss": 1.8812, + "step": 16028000 + }, + { + "epoch": 79.41, + "learning_rate": 1.0308829139479695e-05, + "loss": 1.8476, + "step": 16028500 + }, + { + "epoch": 79.41, + "learning_rate": 1.0307590553053612e-05, + "loss": 1.848, + "step": 16029000 + }, + { + "epoch": 79.42, + "learning_rate": 1.0306351966627527e-05, + "loss": 1.8479, + "step": 16029500 + }, + { + "epoch": 79.42, + "learning_rate": 1.0305113380201443e-05, + "loss": 1.8702, + "step": 16030000 + }, + { + "epoch": 79.42, + "learning_rate": 1.030387479377536e-05, + "loss": 1.8716, + "step": 16030500 + }, + { + "epoch": 79.42, + "learning_rate": 1.0302636207349277e-05, + "loss": 1.8797, + "step": 16031000 + }, + { + "epoch": 79.43, + "learning_rate": 1.0301397620923193e-05, + "loss": 1.8595, + "step": 16031500 + }, + { + "epoch": 79.43, + "learning_rate": 1.030015903449711e-05, + "loss": 1.8743, + "step": 16032000 + }, + { + "epoch": 79.43, + "learning_rate": 1.0298920448071026e-05, + "loss": 1.877, + "step": 16032500 + }, + { + "epoch": 79.43, + "learning_rate": 1.0297681861644943e-05, + "loss": 1.8212, + "step": 16033000 + }, + { + "epoch": 79.44, + "learning_rate": 1.029644327521886e-05, + "loss": 1.8485, + "step": 16033500 + }, + { + "epoch": 79.44, + "learning_rate": 1.0295204688792775e-05, + "loss": 1.8211, + "step": 16034000 + }, + { + "epoch": 79.44, + "learning_rate": 1.0293966102366692e-05, + "loss": 1.8717, + "step": 16034500 + }, + { + "epoch": 79.44, + "learning_rate": 1.0292727515940607e-05, + "loss": 1.8599, + "step": 16035000 + }, + { + "epoch": 79.45, + "learning_rate": 1.0291488929514524e-05, + "loss": 1.836, + "step": 16035500 + }, + { + "epoch": 79.45, + "learning_rate": 1.029025034308844e-05, + "loss": 1.8549, + "step": 16036000 + }, + { + "epoch": 79.45, + "learning_rate": 1.028901423383521e-05, + "loss": 1.8451, + "step": 16036500 + }, + { + "epoch": 79.45, + "learning_rate": 1.0287775647409127e-05, + "loss": 1.8701, + "step": 16037000 + }, + { + "epoch": 79.46, + "learning_rate": 1.0286537060983042e-05, + "loss": 1.8705, + "step": 16037500 + }, + { + "epoch": 79.46, + "learning_rate": 1.0285298474556957e-05, + "loss": 1.8545, + "step": 16038000 + }, + { + "epoch": 79.46, + "learning_rate": 1.0284059888130874e-05, + "loss": 1.8517, + "step": 16038500 + }, + { + "epoch": 79.46, + "learning_rate": 1.028282130170479e-05, + "loss": 1.8393, + "step": 16039000 + }, + { + "epoch": 79.47, + "learning_rate": 1.0281582715278708e-05, + "loss": 1.8347, + "step": 16039500 + }, + { + "epoch": 79.47, + "learning_rate": 1.0280344128852623e-05, + "loss": 1.8718, + "step": 16040000 + }, + { + "epoch": 79.47, + "learning_rate": 1.0279108019599393e-05, + "loss": 1.8758, + "step": 16040500 + }, + { + "epoch": 79.47, + "learning_rate": 1.0277869433173309e-05, + "loss": 1.8632, + "step": 16041000 + }, + { + "epoch": 79.48, + "learning_rate": 1.0276630846747224e-05, + "loss": 1.8624, + "step": 16041500 + }, + { + "epoch": 79.48, + "learning_rate": 1.0275392260321141e-05, + "loss": 1.8691, + "step": 16042000 + }, + { + "epoch": 79.48, + "learning_rate": 1.0274153673895058e-05, + "loss": 1.8643, + "step": 16042500 + }, + { + "epoch": 79.48, + "learning_rate": 1.0272915087468973e-05, + "loss": 1.8641, + "step": 16043000 + }, + { + "epoch": 79.49, + "learning_rate": 1.027167650104289e-05, + "loss": 1.8565, + "step": 16043500 + }, + { + "epoch": 79.49, + "learning_rate": 1.0270437914616807e-05, + "loss": 1.8531, + "step": 16044000 + }, + { + "epoch": 79.49, + "learning_rate": 1.0269199328190724e-05, + "loss": 1.8685, + "step": 16044500 + }, + { + "epoch": 79.49, + "learning_rate": 1.0267960741764639e-05, + "loss": 1.8362, + "step": 16045000 + }, + { + "epoch": 79.49, + "learning_rate": 1.0266724632511408e-05, + "loss": 1.88, + "step": 16045500 + }, + { + "epoch": 79.5, + "learning_rate": 1.0265486046085323e-05, + "loss": 1.8612, + "step": 16046000 + }, + { + "epoch": 79.5, + "learning_rate": 1.026424745965924e-05, + "loss": 1.8648, + "step": 16046500 + }, + { + "epoch": 79.5, + "learning_rate": 1.0263008873233157e-05, + "loss": 1.8472, + "step": 16047000 + }, + { + "epoch": 79.5, + "learning_rate": 1.0261772763979926e-05, + "loss": 1.8545, + "step": 16047500 + }, + { + "epoch": 79.51, + "learning_rate": 1.0260534177553843e-05, + "loss": 1.868, + "step": 16048000 + }, + { + "epoch": 79.51, + "learning_rate": 1.0259295591127758e-05, + "loss": 1.8616, + "step": 16048500 + }, + { + "epoch": 79.51, + "learning_rate": 1.0258057004701675e-05, + "loss": 1.8677, + "step": 16049000 + }, + { + "epoch": 79.51, + "learning_rate": 1.025681841827559e-05, + "loss": 1.8652, + "step": 16049500 + }, + { + "epoch": 79.52, + "learning_rate": 1.0255579831849507e-05, + "loss": 1.8528, + "step": 16050000 + }, + { + "epoch": 79.52, + "learning_rate": 1.0254341245423424e-05, + "loss": 1.8368, + "step": 16050500 + }, + { + "epoch": 79.52, + "learning_rate": 1.0253105136170193e-05, + "loss": 1.8672, + "step": 16051000 + }, + { + "epoch": 79.52, + "learning_rate": 1.0251866549744108e-05, + "loss": 1.8656, + "step": 16051500 + }, + { + "epoch": 79.53, + "learning_rate": 1.0250627963318025e-05, + "loss": 1.859, + "step": 16052000 + }, + { + "epoch": 79.53, + "learning_rate": 1.024938937689194e-05, + "loss": 1.8645, + "step": 16052500 + }, + { + "epoch": 79.53, + "learning_rate": 1.0248150790465857e-05, + "loss": 1.8786, + "step": 16053000 + }, + { + "epoch": 79.53, + "learning_rate": 1.0246912204039774e-05, + "loss": 1.8525, + "step": 16053500 + }, + { + "epoch": 79.54, + "learning_rate": 1.0245673617613691e-05, + "loss": 1.8549, + "step": 16054000 + }, + { + "epoch": 79.54, + "learning_rate": 1.024443750836046e-05, + "loss": 1.854, + "step": 16054500 + }, + { + "epoch": 79.54, + "learning_rate": 1.0243198921934375e-05, + "loss": 1.8776, + "step": 16055000 + }, + { + "epoch": 79.54, + "learning_rate": 1.024196033550829e-05, + "loss": 1.8692, + "step": 16055500 + }, + { + "epoch": 79.55, + "learning_rate": 1.0240721749082207e-05, + "loss": 1.8556, + "step": 16056000 + }, + { + "epoch": 79.55, + "learning_rate": 1.0239485639828976e-05, + "loss": 1.8623, + "step": 16056500 + }, + { + "epoch": 79.55, + "learning_rate": 1.0238247053402893e-05, + "loss": 1.8497, + "step": 16057000 + }, + { + "epoch": 79.55, + "learning_rate": 1.023700846697681e-05, + "loss": 1.8609, + "step": 16057500 + }, + { + "epoch": 79.56, + "learning_rate": 1.0235769880550727e-05, + "loss": 1.8462, + "step": 16058000 + }, + { + "epoch": 79.56, + "learning_rate": 1.0234531294124642e-05, + "loss": 1.8519, + "step": 16058500 + }, + { + "epoch": 79.56, + "learning_rate": 1.0233292707698557e-05, + "loss": 1.8731, + "step": 16059000 + }, + { + "epoch": 79.56, + "learning_rate": 1.0232054121272474e-05, + "loss": 1.8492, + "step": 16059500 + }, + { + "epoch": 79.57, + "learning_rate": 1.0230815534846391e-05, + "loss": 1.8614, + "step": 16060000 + }, + { + "epoch": 79.57, + "learning_rate": 1.0229576948420308e-05, + "loss": 1.8849, + "step": 16060500 + }, + { + "epoch": 79.57, + "learning_rate": 1.0228338361994223e-05, + "loss": 1.8858, + "step": 16061000 + }, + { + "epoch": 79.57, + "learning_rate": 1.0227102252740994e-05, + "loss": 1.8415, + "step": 16061500 + }, + { + "epoch": 79.58, + "learning_rate": 1.0225866143487761e-05, + "loss": 1.8697, + "step": 16062000 + }, + { + "epoch": 79.58, + "learning_rate": 1.0224627557061676e-05, + "loss": 1.8521, + "step": 16062500 + }, + { + "epoch": 79.58, + "learning_rate": 1.0223388970635593e-05, + "loss": 1.8341, + "step": 16063000 + }, + { + "epoch": 79.58, + "learning_rate": 1.022215038420951e-05, + "loss": 1.8665, + "step": 16063500 + }, + { + "epoch": 79.59, + "learning_rate": 1.0220911797783427e-05, + "loss": 1.8883, + "step": 16064000 + }, + { + "epoch": 79.59, + "learning_rate": 1.0219673211357344e-05, + "loss": 1.8556, + "step": 16064500 + }, + { + "epoch": 79.59, + "learning_rate": 1.0218439579276963e-05, + "loss": 1.8493, + "step": 16065000 + }, + { + "epoch": 79.59, + "learning_rate": 1.021720099285088e-05, + "loss": 1.8715, + "step": 16065500 + }, + { + "epoch": 79.6, + "learning_rate": 1.0215962406424797e-05, + "loss": 1.8664, + "step": 16066000 + }, + { + "epoch": 79.6, + "learning_rate": 1.0214723819998714e-05, + "loss": 1.8445, + "step": 16066500 + }, + { + "epoch": 79.6, + "learning_rate": 1.0213485233572629e-05, + "loss": 1.864, + "step": 16067000 + }, + { + "epoch": 79.6, + "learning_rate": 1.0212246647146546e-05, + "loss": 1.8524, + "step": 16067500 + }, + { + "epoch": 79.61, + "learning_rate": 1.0211008060720461e-05, + "loss": 1.8628, + "step": 16068000 + }, + { + "epoch": 79.61, + "learning_rate": 1.0209769474294378e-05, + "loss": 1.8587, + "step": 16068500 + }, + { + "epoch": 79.61, + "learning_rate": 1.0208530887868293e-05, + "loss": 1.8635, + "step": 16069000 + }, + { + "epoch": 79.61, + "learning_rate": 1.020729230144221e-05, + "loss": 1.8772, + "step": 16069500 + }, + { + "epoch": 79.62, + "learning_rate": 1.0206053715016127e-05, + "loss": 1.8544, + "step": 16070000 + }, + { + "epoch": 79.62, + "learning_rate": 1.0204815128590044e-05, + "loss": 1.8598, + "step": 16070500 + }, + { + "epoch": 79.62, + "learning_rate": 1.0203579019336813e-05, + "loss": 1.8486, + "step": 16071000 + }, + { + "epoch": 79.62, + "learning_rate": 1.0202340432910728e-05, + "loss": 1.8807, + "step": 16071500 + }, + { + "epoch": 79.63, + "learning_rate": 1.0201101846484643e-05, + "loss": 1.8737, + "step": 16072000 + }, + { + "epoch": 79.63, + "learning_rate": 1.0199865737231414e-05, + "loss": 1.8409, + "step": 16072500 + }, + { + "epoch": 79.63, + "learning_rate": 1.0198627150805329e-05, + "loss": 1.8576, + "step": 16073000 + }, + { + "epoch": 79.63, + "learning_rate": 1.0197388564379246e-05, + "loss": 1.8703, + "step": 16073500 + }, + { + "epoch": 79.64, + "learning_rate": 1.0196149977953163e-05, + "loss": 1.8677, + "step": 16074000 + }, + { + "epoch": 79.64, + "learning_rate": 1.0194911391527078e-05, + "loss": 1.8561, + "step": 16074500 + }, + { + "epoch": 79.64, + "learning_rate": 1.0193672805100995e-05, + "loss": 1.852, + "step": 16075000 + }, + { + "epoch": 79.64, + "learning_rate": 1.019243421867491e-05, + "loss": 1.8582, + "step": 16075500 + }, + { + "epoch": 79.65, + "learning_rate": 1.0191195632248827e-05, + "loss": 1.83, + "step": 16076000 + }, + { + "epoch": 79.65, + "learning_rate": 1.0189957045822744e-05, + "loss": 1.8476, + "step": 16076500 + }, + { + "epoch": 79.65, + "learning_rate": 1.0188718459396661e-05, + "loss": 1.8835, + "step": 16077000 + }, + { + "epoch": 79.65, + "learning_rate": 1.0187479872970576e-05, + "loss": 1.8725, + "step": 16077500 + }, + { + "epoch": 79.66, + "learning_rate": 1.0186241286544493e-05, + "loss": 1.8702, + "step": 16078000 + }, + { + "epoch": 79.66, + "learning_rate": 1.018500270011841e-05, + "loss": 1.848, + "step": 16078500 + }, + { + "epoch": 79.66, + "learning_rate": 1.0183766590865177e-05, + "loss": 1.8633, + "step": 16079000 + }, + { + "epoch": 79.66, + "learning_rate": 1.0182528004439094e-05, + "loss": 1.8583, + "step": 16079500 + }, + { + "epoch": 79.67, + "learning_rate": 1.0181289418013011e-05, + "loss": 1.8396, + "step": 16080000 + }, + { + "epoch": 79.67, + "learning_rate": 1.0180050831586926e-05, + "loss": 1.85, + "step": 16080500 + }, + { + "epoch": 79.67, + "learning_rate": 1.0178817199506547e-05, + "loss": 1.8545, + "step": 16081000 + }, + { + "epoch": 79.67, + "learning_rate": 1.0177578613080464e-05, + "loss": 1.8756, + "step": 16081500 + }, + { + "epoch": 79.68, + "learning_rate": 1.017634002665438e-05, + "loss": 1.8449, + "step": 16082000 + }, + { + "epoch": 79.68, + "learning_rate": 1.0175101440228296e-05, + "loss": 1.8793, + "step": 16082500 + }, + { + "epoch": 79.68, + "learning_rate": 1.0173862853802213e-05, + "loss": 1.865, + "step": 16083000 + }, + { + "epoch": 79.68, + "learning_rate": 1.017262426737613e-05, + "loss": 1.8687, + "step": 16083500 + }, + { + "epoch": 79.69, + "learning_rate": 1.0171385680950047e-05, + "loss": 1.8615, + "step": 16084000 + }, + { + "epoch": 79.69, + "learning_rate": 1.0170147094523962e-05, + "loss": 1.8538, + "step": 16084500 + }, + { + "epoch": 79.69, + "learning_rate": 1.0168910985270731e-05, + "loss": 1.8661, + "step": 16085000 + }, + { + "epoch": 79.69, + "learning_rate": 1.01676748760175e-05, + "loss": 1.8592, + "step": 16085500 + }, + { + "epoch": 79.7, + "learning_rate": 1.0166436289591417e-05, + "loss": 1.889, + "step": 16086000 + }, + { + "epoch": 79.7, + "learning_rate": 1.0165197703165332e-05, + "loss": 1.8778, + "step": 16086500 + }, + { + "epoch": 79.7, + "learning_rate": 1.0163959116739249e-05, + "loss": 1.8794, + "step": 16087000 + }, + { + "epoch": 79.7, + "learning_rate": 1.0162720530313164e-05, + "loss": 1.8637, + "step": 16087500 + }, + { + "epoch": 79.71, + "learning_rate": 1.0161481943887081e-05, + "loss": 1.8525, + "step": 16088000 + }, + { + "epoch": 79.71, + "learning_rate": 1.0160243357460996e-05, + "loss": 1.8635, + "step": 16088500 + }, + { + "epoch": 79.71, + "learning_rate": 1.0159007248207767e-05, + "loss": 1.8586, + "step": 16089000 + }, + { + "epoch": 79.71, + "learning_rate": 1.0157768661781682e-05, + "loss": 1.8643, + "step": 16089500 + }, + { + "epoch": 79.72, + "learning_rate": 1.0156530075355599e-05, + "loss": 1.8673, + "step": 16090000 + }, + { + "epoch": 79.72, + "learning_rate": 1.0155291488929516e-05, + "loss": 1.8637, + "step": 16090500 + }, + { + "epoch": 79.72, + "learning_rate": 1.0154052902503431e-05, + "loss": 1.8566, + "step": 16091000 + }, + { + "epoch": 79.72, + "learning_rate": 1.0152814316077348e-05, + "loss": 1.8539, + "step": 16091500 + }, + { + "epoch": 79.73, + "learning_rate": 1.0151575729651263e-05, + "loss": 1.8667, + "step": 16092000 + }, + { + "epoch": 79.73, + "learning_rate": 1.015033714322518e-05, + "loss": 1.8579, + "step": 16092500 + }, + { + "epoch": 79.73, + "learning_rate": 1.0149098556799097e-05, + "loss": 1.892, + "step": 16093000 + }, + { + "epoch": 79.73, + "learning_rate": 1.0147859970373014e-05, + "loss": 1.8477, + "step": 16093500 + }, + { + "epoch": 79.74, + "learning_rate": 1.014662138394693e-05, + "loss": 1.8798, + "step": 16094000 + }, + { + "epoch": 79.74, + "learning_rate": 1.0145382797520846e-05, + "loss": 1.8607, + "step": 16094500 + }, + { + "epoch": 79.74, + "learning_rate": 1.0144144211094763e-05, + "loss": 1.844, + "step": 16095000 + }, + { + "epoch": 79.74, + "learning_rate": 1.0142905624668678e-05, + "loss": 1.8601, + "step": 16095500 + }, + { + "epoch": 79.75, + "learning_rate": 1.0141669515415447e-05, + "loss": 1.8576, + "step": 16096000 + }, + { + "epoch": 79.75, + "learning_rate": 1.0140430928989364e-05, + "loss": 1.8711, + "step": 16096500 + }, + { + "epoch": 79.75, + "learning_rate": 1.013919234256328e-05, + "loss": 1.8684, + "step": 16097000 + }, + { + "epoch": 79.75, + "learning_rate": 1.0137953756137196e-05, + "loss": 1.868, + "step": 16097500 + }, + { + "epoch": 79.76, + "learning_rate": 1.0136715169711113e-05, + "loss": 1.8844, + "step": 16098000 + }, + { + "epoch": 79.76, + "learning_rate": 1.013547658328503e-05, + "loss": 1.87, + "step": 16098500 + }, + { + "epoch": 79.76, + "learning_rate": 1.0134237996858945e-05, + "loss": 1.8639, + "step": 16099000 + }, + { + "epoch": 79.76, + "learning_rate": 1.013299941043286e-05, + "loss": 1.8543, + "step": 16099500 + }, + { + "epoch": 79.76, + "learning_rate": 1.013176330117963e-05, + "loss": 1.8437, + "step": 16100000 + }, + { + "epoch": 79.77, + "learning_rate": 1.01305271919264e-05, + "loss": 1.8716, + "step": 16100500 + }, + { + "epoch": 79.77, + "learning_rate": 1.0129288605500315e-05, + "loss": 1.881, + "step": 16101000 + }, + { + "epoch": 79.77, + "learning_rate": 1.012805001907423e-05, + "loss": 1.8835, + "step": 16101500 + }, + { + "epoch": 79.77, + "learning_rate": 1.0126813909820999e-05, + "loss": 1.8645, + "step": 16102000 + }, + { + "epoch": 79.78, + "learning_rate": 1.0125575323394916e-05, + "loss": 1.8641, + "step": 16102500 + }, + { + "epoch": 79.78, + "learning_rate": 1.0124336736968833e-05, + "loss": 1.8449, + "step": 16103000 + }, + { + "epoch": 79.78, + "learning_rate": 1.012309815054275e-05, + "loss": 1.8601, + "step": 16103500 + }, + { + "epoch": 79.78, + "learning_rate": 1.0121859564116667e-05, + "loss": 1.8495, + "step": 16104000 + }, + { + "epoch": 79.79, + "learning_rate": 1.0120620977690582e-05, + "loss": 1.8782, + "step": 16104500 + }, + { + "epoch": 79.79, + "learning_rate": 1.0119382391264497e-05, + "loss": 1.858, + "step": 16105000 + }, + { + "epoch": 79.79, + "learning_rate": 1.0118143804838414e-05, + "loss": 1.8513, + "step": 16105500 + }, + { + "epoch": 79.79, + "learning_rate": 1.0116910172758035e-05, + "loss": 1.8793, + "step": 16106000 + }, + { + "epoch": 79.8, + "learning_rate": 1.0115671586331952e-05, + "loss": 1.8555, + "step": 16106500 + }, + { + "epoch": 79.8, + "learning_rate": 1.0114432999905867e-05, + "loss": 1.8736, + "step": 16107000 + }, + { + "epoch": 79.8, + "learning_rate": 1.0113194413479784e-05, + "loss": 1.8563, + "step": 16107500 + }, + { + "epoch": 79.8, + "learning_rate": 1.0111955827053701e-05, + "loss": 1.8588, + "step": 16108000 + }, + { + "epoch": 79.81, + "learning_rate": 1.011071971780047e-05, + "loss": 1.8792, + "step": 16108500 + }, + { + "epoch": 79.81, + "learning_rate": 1.0109481131374387e-05, + "loss": 1.8559, + "step": 16109000 + }, + { + "epoch": 79.81, + "learning_rate": 1.0108242544948302e-05, + "loss": 1.86, + "step": 16109500 + }, + { + "epoch": 79.81, + "learning_rate": 1.0107003958522219e-05, + "loss": 1.8568, + "step": 16110000 + }, + { + "epoch": 79.82, + "learning_rate": 1.0105765372096134e-05, + "loss": 1.8676, + "step": 16110500 + }, + { + "epoch": 79.82, + "learning_rate": 1.0104526785670051e-05, + "loss": 1.8679, + "step": 16111000 + }, + { + "epoch": 79.82, + "learning_rate": 1.0103288199243966e-05, + "loss": 1.8485, + "step": 16111500 + }, + { + "epoch": 79.82, + "learning_rate": 1.0102052089990737e-05, + "loss": 1.8525, + "step": 16112000 + }, + { + "epoch": 79.83, + "learning_rate": 1.0100813503564652e-05, + "loss": 1.8616, + "step": 16112500 + }, + { + "epoch": 79.83, + "learning_rate": 1.009957739431142e-05, + "loss": 1.8532, + "step": 16113000 + }, + { + "epoch": 79.83, + "learning_rate": 1.0098338807885336e-05, + "loss": 1.8797, + "step": 16113500 + }, + { + "epoch": 79.83, + "learning_rate": 1.0097102698632106e-05, + "loss": 1.8569, + "step": 16114000 + }, + { + "epoch": 79.84, + "learning_rate": 1.0095864112206022e-05, + "loss": 1.8273, + "step": 16114500 + }, + { + "epoch": 79.84, + "learning_rate": 1.0094625525779939e-05, + "loss": 1.8762, + "step": 16115000 + }, + { + "epoch": 79.84, + "learning_rate": 1.0093386939353856e-05, + "loss": 1.8631, + "step": 16115500 + }, + { + "epoch": 79.84, + "learning_rate": 1.009214835292777e-05, + "loss": 1.8638, + "step": 16116000 + }, + { + "epoch": 79.85, + "learning_rate": 1.0090909766501686e-05, + "loss": 1.8532, + "step": 16116500 + }, + { + "epoch": 79.85, + "learning_rate": 1.0089671180075603e-05, + "loss": 1.8843, + "step": 16117000 + }, + { + "epoch": 79.85, + "learning_rate": 1.008843259364952e-05, + "loss": 1.8567, + "step": 16117500 + }, + { + "epoch": 79.85, + "learning_rate": 1.0087194007223437e-05, + "loss": 1.8786, + "step": 16118000 + }, + { + "epoch": 79.86, + "learning_rate": 1.0085955420797352e-05, + "loss": 1.8804, + "step": 16118500 + }, + { + "epoch": 79.86, + "learning_rate": 1.0084716834371269e-05, + "loss": 1.8547, + "step": 16119000 + }, + { + "epoch": 79.86, + "learning_rate": 1.0083478247945186e-05, + "loss": 1.8656, + "step": 16119500 + }, + { + "epoch": 79.86, + "learning_rate": 1.0082239661519103e-05, + "loss": 1.8744, + "step": 16120000 + }, + { + "epoch": 79.87, + "learning_rate": 1.008100107509302e-05, + "loss": 1.8794, + "step": 16120500 + }, + { + "epoch": 79.87, + "learning_rate": 1.0079762488666935e-05, + "loss": 1.8488, + "step": 16121000 + }, + { + "epoch": 79.87, + "learning_rate": 1.007852390224085e-05, + "loss": 1.8554, + "step": 16121500 + }, + { + "epoch": 79.87, + "learning_rate": 1.0077287792987619e-05, + "loss": 1.8496, + "step": 16122000 + }, + { + "epoch": 79.88, + "learning_rate": 1.007605168373439e-05, + "loss": 1.8644, + "step": 16122500 + }, + { + "epoch": 79.88, + "learning_rate": 1.0074813097308305e-05, + "loss": 1.8581, + "step": 16123000 + }, + { + "epoch": 79.88, + "learning_rate": 1.0073579465227925e-05, + "loss": 1.8588, + "step": 16123500 + }, + { + "epoch": 79.88, + "learning_rate": 1.0072340878801842e-05, + "loss": 1.8758, + "step": 16124000 + }, + { + "epoch": 79.89, + "learning_rate": 1.0071102292375758e-05, + "loss": 1.8591, + "step": 16124500 + }, + { + "epoch": 79.89, + "learning_rate": 1.0069863705949675e-05, + "loss": 1.8645, + "step": 16125000 + }, + { + "epoch": 79.89, + "learning_rate": 1.006862511952359e-05, + "loss": 1.876, + "step": 16125500 + }, + { + "epoch": 79.89, + "learning_rate": 1.0067386533097507e-05, + "loss": 1.8647, + "step": 16126000 + }, + { + "epoch": 79.9, + "learning_rate": 1.0066147946671424e-05, + "loss": 1.8751, + "step": 16126500 + }, + { + "epoch": 79.9, + "learning_rate": 1.0064909360245339e-05, + "loss": 1.8622, + "step": 16127000 + }, + { + "epoch": 79.9, + "learning_rate": 1.0063670773819256e-05, + "loss": 1.8777, + "step": 16127500 + }, + { + "epoch": 79.9, + "learning_rate": 1.0062434664566025e-05, + "loss": 1.8919, + "step": 16128000 + }, + { + "epoch": 79.91, + "learning_rate": 1.0061196078139942e-05, + "loss": 1.8732, + "step": 16128500 + }, + { + "epoch": 79.91, + "learning_rate": 1.0059957491713857e-05, + "loss": 1.8527, + "step": 16129000 + }, + { + "epoch": 79.91, + "learning_rate": 1.0058718905287774e-05, + "loss": 1.891, + "step": 16129500 + }, + { + "epoch": 79.91, + "learning_rate": 1.0057480318861689e-05, + "loss": 1.8831, + "step": 16130000 + }, + { + "epoch": 79.92, + "learning_rate": 1.0056241732435606e-05, + "loss": 1.8559, + "step": 16130500 + }, + { + "epoch": 79.92, + "learning_rate": 1.0055005623182375e-05, + "loss": 1.8765, + "step": 16131000 + }, + { + "epoch": 79.92, + "learning_rate": 1.0053767036756292e-05, + "loss": 1.858, + "step": 16131500 + }, + { + "epoch": 79.92, + "learning_rate": 1.0052528450330209e-05, + "loss": 1.8731, + "step": 16132000 + }, + { + "epoch": 79.93, + "learning_rate": 1.0051289863904124e-05, + "loss": 1.8598, + "step": 16132500 + }, + { + "epoch": 79.93, + "learning_rate": 1.0050051277478039e-05, + "loss": 1.8488, + "step": 16133000 + }, + { + "epoch": 79.93, + "learning_rate": 1.0048812691051956e-05, + "loss": 1.8712, + "step": 16133500 + }, + { + "epoch": 79.93, + "learning_rate": 1.0047574104625873e-05, + "loss": 1.8511, + "step": 16134000 + }, + { + "epoch": 79.94, + "learning_rate": 1.004633551819979e-05, + "loss": 1.8708, + "step": 16134500 + }, + { + "epoch": 79.94, + "learning_rate": 1.0045096931773707e-05, + "loss": 1.866, + "step": 16135000 + }, + { + "epoch": 79.94, + "learning_rate": 1.0043858345347622e-05, + "loss": 1.8857, + "step": 16135500 + }, + { + "epoch": 79.94, + "learning_rate": 1.004262223609439e-05, + "loss": 1.8209, + "step": 16136000 + }, + { + "epoch": 79.95, + "learning_rate": 1.0041383649668306e-05, + "loss": 1.8713, + "step": 16136500 + }, + { + "epoch": 79.95, + "learning_rate": 1.0040145063242223e-05, + "loss": 1.8665, + "step": 16137000 + }, + { + "epoch": 79.95, + "learning_rate": 1.003890647681614e-05, + "loss": 1.8726, + "step": 16137500 + }, + { + "epoch": 79.95, + "learning_rate": 1.0037667890390057e-05, + "loss": 1.8528, + "step": 16138000 + }, + { + "epoch": 79.96, + "learning_rate": 1.0036429303963972e-05, + "loss": 1.8639, + "step": 16138500 + }, + { + "epoch": 79.96, + "learning_rate": 1.0035193194710741e-05, + "loss": 1.8804, + "step": 16139000 + }, + { + "epoch": 79.96, + "learning_rate": 1.003395708545751e-05, + "loss": 1.8557, + "step": 16139500 + }, + { + "epoch": 79.96, + "learning_rate": 1.0032718499031427e-05, + "loss": 1.8376, + "step": 16140000 + }, + { + "epoch": 79.97, + "learning_rate": 1.0031479912605342e-05, + "loss": 1.8537, + "step": 16140500 + }, + { + "epoch": 79.97, + "learning_rate": 1.0030241326179259e-05, + "loss": 1.8594, + "step": 16141000 + }, + { + "epoch": 79.97, + "learning_rate": 1.0029002739753176e-05, + "loss": 1.8476, + "step": 16141500 + }, + { + "epoch": 79.97, + "learning_rate": 1.0027764153327093e-05, + "loss": 1.8473, + "step": 16142000 + }, + { + "epoch": 79.98, + "learning_rate": 1.0026525566901008e-05, + "loss": 1.8677, + "step": 16142500 + }, + { + "epoch": 79.98, + "learning_rate": 1.0025286980474923e-05, + "loss": 1.8476, + "step": 16143000 + }, + { + "epoch": 79.98, + "learning_rate": 1.002404839404884e-05, + "loss": 1.8737, + "step": 16143500 + }, + { + "epoch": 79.98, + "learning_rate": 1.0022812284795609e-05, + "loss": 1.8549, + "step": 16144000 + }, + { + "epoch": 79.99, + "learning_rate": 1.0021576175542378e-05, + "loss": 1.866, + "step": 16144500 + }, + { + "epoch": 79.99, + "learning_rate": 1.0020337589116295e-05, + "loss": 1.8653, + "step": 16145000 + }, + { + "epoch": 79.99, + "learning_rate": 1.001909900269021e-05, + "loss": 1.8503, + "step": 16145500 + }, + { + "epoch": 79.99, + "learning_rate": 1.0017860416264127e-05, + "loss": 1.8262, + "step": 16146000 + }, + { + "epoch": 80.0, + "learning_rate": 1.0016621829838042e-05, + "loss": 1.8549, + "step": 16146500 + }, + { + "epoch": 80.0, + "learning_rate": 1.0015383243411959e-05, + "loss": 1.8961, + "step": 16147000 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.6824833334363511, + "eval_accuracy_mlm": 0.6431221380401299, + "eval_accuracy_nsp": 0.8679120956702843, + "eval_loss": 2.289163112640381, + "eval_runtime": 146.873, + "eval_samples_per_second": 1735.915, + "eval_steps_per_second": 72.335, + "step": 16147440 } ], "max_steps": 20184300, "num_train_epochs": 100, - "total_flos": 1.8292334964832614e+19, + "total_flos": 2.0905538460921295e+19, "trial_name": null, "trial_params": null }