diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 34.0, - "global_step": 6862662, + "epoch": 40.0, + "global_step": 8073720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -82730,11 +82730,14609 @@ "eval_samples_per_second": 1750.384, "eval_steps_per_second": 72.938, "step": 6862662 + }, + { + "epoch": 34.0, + "learning_rate": 3.300526399231086e-05, + "loss": 2.234, + "step": 6863000 + }, + { + "epoch": 34.0, + "learning_rate": 3.300402540588478e-05, + "loss": 2.1825, + "step": 6863500 + }, + { + "epoch": 34.01, + "learning_rate": 3.3002786819458695e-05, + "loss": 2.1727, + "step": 6864000 + }, + { + "epoch": 34.01, + "learning_rate": 3.300154823303261e-05, + "loss": 2.1697, + "step": 6864500 + }, + { + "epoch": 34.01, + "learning_rate": 3.300030964660652e-05, + "loss": 2.1944, + "step": 6865000 + }, + { + "epoch": 34.01, + "learning_rate": 3.299907106018044e-05, + "loss": 2.1777, + "step": 6865500 + }, + { + "epoch": 34.02, + "learning_rate": 3.2997832473754355e-05, + "loss": 2.1701, + "step": 6866000 + }, + { + "epoch": 34.02, + "learning_rate": 3.299659388732827e-05, + "loss": 2.2099, + "step": 6866500 + }, + { + "epoch": 34.02, + "learning_rate": 3.299536025524789e-05, + "loss": 2.1635, + "step": 6867000 + }, + { + "epoch": 34.02, + "learning_rate": 3.299412166882181e-05, + "loss": 2.173, + "step": 6867500 + }, + { + "epoch": 34.03, + "learning_rate": 3.299288308239573e-05, + "loss": 2.1623, + "step": 6868000 + }, + { + "epoch": 34.03, + "learning_rate": 3.2991644495969644e-05, + "loss": 2.1848, + "step": 6868500 + }, + { + "epoch": 34.03, + "learning_rate": 3.299040590954356e-05, + "loss": 2.1915, + "step": 6869000 + }, + { + "epoch": 34.03, + "learning_rate": 3.298916732311748e-05, + "loss": 2.193, + "step": 6869500 + }, + { + "epoch": 34.04, + "learning_rate": 3.2987928736691395e-05, + "loss": 2.2049, + "step": 6870000 + }, + { + "epoch": 34.04, + "learning_rate": 3.298669015026531e-05, + "loss": 2.1921, + "step": 6870500 + }, + { + "epoch": 34.04, + "learning_rate": 3.298545156383923e-05, + "loss": 2.1759, + "step": 6871000 + }, + { + "epoch": 34.04, + "learning_rate": 3.298421297741314e-05, + "loss": 2.1883, + "step": 6871500 + }, + { + "epoch": 34.05, + "learning_rate": 3.298297686815991e-05, + "loss": 2.1694, + "step": 6872000 + }, + { + "epoch": 34.05, + "learning_rate": 3.2981738281733824e-05, + "loss": 2.1732, + "step": 6872500 + }, + { + "epoch": 34.05, + "learning_rate": 3.298049969530774e-05, + "loss": 2.1889, + "step": 6873000 + }, + { + "epoch": 34.05, + "learning_rate": 3.297926110888166e-05, + "loss": 2.1705, + "step": 6873500 + }, + { + "epoch": 34.06, + "learning_rate": 3.2978022522455575e-05, + "loss": 2.1701, + "step": 6874000 + }, + { + "epoch": 34.06, + "learning_rate": 3.2976786413202344e-05, + "loss": 2.1949, + "step": 6874500 + }, + { + "epoch": 34.06, + "learning_rate": 3.297554782677626e-05, + "loss": 2.1956, + "step": 6875000 + }, + { + "epoch": 34.06, + "learning_rate": 3.297430924035018e-05, + "loss": 2.1968, + "step": 6875500 + }, + { + "epoch": 34.07, + "learning_rate": 3.2973070653924095e-05, + "loss": 2.1826, + "step": 6876000 + }, + { + "epoch": 34.07, + "learning_rate": 3.297183206749801e-05, + "loss": 2.169, + "step": 6876500 + }, + { + "epoch": 34.07, + "learning_rate": 3.2970595958244774e-05, + "loss": 2.1826, + "step": 6877000 + }, + { + "epoch": 34.07, + "learning_rate": 3.296935737181869e-05, + "loss": 2.1644, + "step": 6877500 + }, + { + "epoch": 34.08, + "learning_rate": 3.296811878539261e-05, + "loss": 2.1973, + "step": 6878000 + }, + { + "epoch": 34.08, + "learning_rate": 3.2966880198966525e-05, + "loss": 2.1765, + "step": 6878500 + }, + { + "epoch": 34.08, + "learning_rate": 3.296564161254044e-05, + "loss": 2.1869, + "step": 6879000 + }, + { + "epoch": 34.08, + "learning_rate": 3.296440302611436e-05, + "loss": 2.1799, + "step": 6879500 + }, + { + "epoch": 34.09, + "learning_rate": 3.2963164439688275e-05, + "loss": 2.1789, + "step": 6880000 + }, + { + "epoch": 34.09, + "learning_rate": 3.2961925853262185e-05, + "loss": 2.1891, + "step": 6880500 + }, + { + "epoch": 34.09, + "learning_rate": 3.29606872668361e-05, + "loss": 2.2122, + "step": 6881000 + }, + { + "epoch": 34.09, + "learning_rate": 3.295944868041002e-05, + "loss": 2.163, + "step": 6881500 + }, + { + "epoch": 34.1, + "learning_rate": 3.2958212571156795e-05, + "loss": 2.1841, + "step": 6882000 + }, + { + "epoch": 34.1, + "learning_rate": 3.2956976461903564e-05, + "loss": 2.188, + "step": 6882500 + }, + { + "epoch": 34.1, + "learning_rate": 3.2955740352650326e-05, + "loss": 2.1696, + "step": 6883000 + }, + { + "epoch": 34.1, + "learning_rate": 3.295450176622424e-05, + "loss": 2.1982, + "step": 6883500 + }, + { + "epoch": 34.11, + "learning_rate": 3.295326317979816e-05, + "loss": 2.165, + "step": 6884000 + }, + { + "epoch": 34.11, + "learning_rate": 3.2952024593372077e-05, + "loss": 2.1517, + "step": 6884500 + }, + { + "epoch": 34.11, + "learning_rate": 3.2950786006945993e-05, + "loss": 2.1447, + "step": 6885000 + }, + { + "epoch": 34.11, + "learning_rate": 3.294954742051991e-05, + "loss": 2.1803, + "step": 6885500 + }, + { + "epoch": 34.12, + "learning_rate": 3.294830883409383e-05, + "loss": 2.1727, + "step": 6886000 + }, + { + "epoch": 34.12, + "learning_rate": 3.2947070247667744e-05, + "loss": 2.1894, + "step": 6886500 + }, + { + "epoch": 34.12, + "learning_rate": 3.2945836615587365e-05, + "loss": 2.1625, + "step": 6887000 + }, + { + "epoch": 34.12, + "learning_rate": 3.2944598029161275e-05, + "loss": 2.1874, + "step": 6887500 + }, + { + "epoch": 34.13, + "learning_rate": 3.294335944273519e-05, + "loss": 2.1846, + "step": 6888000 + }, + { + "epoch": 34.13, + "learning_rate": 3.294212333348197e-05, + "loss": 2.1903, + "step": 6888500 + }, + { + "epoch": 34.13, + "learning_rate": 3.2940884747055885e-05, + "loss": 2.1673, + "step": 6889000 + }, + { + "epoch": 34.13, + "learning_rate": 3.29396461606298e-05, + "loss": 2.1791, + "step": 6889500 + }, + { + "epoch": 34.14, + "learning_rate": 3.293840757420372e-05, + "loss": 2.1863, + "step": 6890000 + }, + { + "epoch": 34.14, + "learning_rate": 3.2937168987777635e-05, + "loss": 2.1819, + "step": 6890500 + }, + { + "epoch": 34.14, + "learning_rate": 3.2935930401351545e-05, + "loss": 2.2075, + "step": 6891000 + }, + { + "epoch": 34.14, + "learning_rate": 3.293469181492546e-05, + "loss": 2.2059, + "step": 6891500 + }, + { + "epoch": 34.15, + "learning_rate": 3.293345322849938e-05, + "loss": 2.1733, + "step": 6892000 + }, + { + "epoch": 34.15, + "learning_rate": 3.2932214642073296e-05, + "loss": 2.1764, + "step": 6892500 + }, + { + "epoch": 34.15, + "learning_rate": 3.293097605564721e-05, + "loss": 2.2026, + "step": 6893000 + }, + { + "epoch": 34.15, + "learning_rate": 3.292973746922113e-05, + "loss": 2.1988, + "step": 6893500 + }, + { + "epoch": 34.16, + "learning_rate": 3.292849888279505e-05, + "loss": 2.1872, + "step": 6894000 + }, + { + "epoch": 34.16, + "learning_rate": 3.2927260296368964e-05, + "loss": 2.1832, + "step": 6894500 + }, + { + "epoch": 34.16, + "learning_rate": 3.292602170994288e-05, + "loss": 2.1525, + "step": 6895000 + }, + { + "epoch": 34.16, + "learning_rate": 3.292478560068964e-05, + "loss": 2.1841, + "step": 6895500 + }, + { + "epoch": 34.17, + "learning_rate": 3.292354701426356e-05, + "loss": 2.1954, + "step": 6896000 + }, + { + "epoch": 34.17, + "learning_rate": 3.292230842783748e-05, + "loss": 2.2155, + "step": 6896500 + }, + { + "epoch": 34.17, + "learning_rate": 3.2921069841411394e-05, + "loss": 2.1929, + "step": 6897000 + }, + { + "epoch": 34.17, + "learning_rate": 3.291983125498531e-05, + "loss": 2.1928, + "step": 6897500 + }, + { + "epoch": 34.18, + "learning_rate": 3.291859266855923e-05, + "loss": 2.2045, + "step": 6898000 + }, + { + "epoch": 34.18, + "learning_rate": 3.2917356559305996e-05, + "loss": 2.1672, + "step": 6898500 + }, + { + "epoch": 34.18, + "learning_rate": 3.291611797287991e-05, + "loss": 2.1694, + "step": 6899000 + }, + { + "epoch": 34.18, + "learning_rate": 3.291487938645383e-05, + "loss": 2.19, + "step": 6899500 + }, + { + "epoch": 34.18, + "learning_rate": 3.291364080002775e-05, + "loss": 2.1734, + "step": 6900000 + }, + { + "epoch": 34.19, + "learning_rate": 3.291240469077451e-05, + "loss": 2.1893, + "step": 6900500 + }, + { + "epoch": 34.19, + "learning_rate": 3.2911166104348426e-05, + "loss": 2.16, + "step": 6901000 + }, + { + "epoch": 34.19, + "learning_rate": 3.290992751792234e-05, + "loss": 2.1769, + "step": 6901500 + }, + { + "epoch": 34.19, + "learning_rate": 3.290868893149626e-05, + "loss": 2.1874, + "step": 6902000 + }, + { + "epoch": 34.2, + "learning_rate": 3.290745034507018e-05, + "loss": 2.1739, + "step": 6902500 + }, + { + "epoch": 34.2, + "learning_rate": 3.2906211758644094e-05, + "loss": 2.1859, + "step": 6903000 + }, + { + "epoch": 34.2, + "learning_rate": 3.290497317221801e-05, + "loss": 2.2051, + "step": 6903500 + }, + { + "epoch": 34.2, + "learning_rate": 3.290373458579193e-05, + "loss": 2.1717, + "step": 6904000 + }, + { + "epoch": 34.21, + "learning_rate": 3.2902495999365845e-05, + "loss": 2.1797, + "step": 6904500 + }, + { + "epoch": 34.21, + "learning_rate": 3.2901262367285465e-05, + "loss": 2.1786, + "step": 6905000 + }, + { + "epoch": 34.21, + "learning_rate": 3.290002378085938e-05, + "loss": 2.1727, + "step": 6905500 + }, + { + "epoch": 34.21, + "learning_rate": 3.28987851944333e-05, + "loss": 2.1891, + "step": 6906000 + }, + { + "epoch": 34.22, + "learning_rate": 3.289754660800721e-05, + "loss": 2.1985, + "step": 6906500 + }, + { + "epoch": 34.22, + "learning_rate": 3.2896308021581126e-05, + "loss": 2.1938, + "step": 6907000 + }, + { + "epoch": 34.22, + "learning_rate": 3.28950719123279e-05, + "loss": 2.1897, + "step": 6907500 + }, + { + "epoch": 34.22, + "learning_rate": 3.289383332590182e-05, + "loss": 2.166, + "step": 6908000 + }, + { + "epoch": 34.23, + "learning_rate": 3.2892594739475736e-05, + "loss": 2.1857, + "step": 6908500 + }, + { + "epoch": 34.23, + "learning_rate": 3.2891358630222504e-05, + "loss": 2.1884, + "step": 6909000 + }, + { + "epoch": 34.23, + "learning_rate": 3.289012004379642e-05, + "loss": 2.1808, + "step": 6909500 + }, + { + "epoch": 34.23, + "learning_rate": 3.288888145737034e-05, + "loss": 2.1817, + "step": 6910000 + }, + { + "epoch": 34.24, + "learning_rate": 3.2887642870944255e-05, + "loss": 2.209, + "step": 6910500 + }, + { + "epoch": 34.24, + "learning_rate": 3.2886404284518165e-05, + "loss": 2.1976, + "step": 6911000 + }, + { + "epoch": 34.24, + "learning_rate": 3.288516569809208e-05, + "loss": 2.2096, + "step": 6911500 + }, + { + "epoch": 34.24, + "learning_rate": 3.2883927111666e-05, + "loss": 2.1899, + "step": 6912000 + }, + { + "epoch": 34.25, + "learning_rate": 3.2882688525239916e-05, + "loss": 2.1939, + "step": 6912500 + }, + { + "epoch": 34.25, + "learning_rate": 3.2881449938813826e-05, + "loss": 2.1881, + "step": 6913000 + }, + { + "epoch": 34.25, + "learning_rate": 3.288021135238774e-05, + "loss": 2.2121, + "step": 6913500 + }, + { + "epoch": 34.25, + "learning_rate": 3.287897276596166e-05, + "loss": 2.1759, + "step": 6914000 + }, + { + "epoch": 34.26, + "learning_rate": 3.287773417953558e-05, + "loss": 2.1824, + "step": 6914500 + }, + { + "epoch": 34.26, + "learning_rate": 3.287649807028235e-05, + "loss": 2.1901, + "step": 6915000 + }, + { + "epoch": 34.26, + "learning_rate": 3.287525948385627e-05, + "loss": 2.1768, + "step": 6915500 + }, + { + "epoch": 34.26, + "learning_rate": 3.287402089743018e-05, + "loss": 2.1994, + "step": 6916000 + }, + { + "epoch": 34.27, + "learning_rate": 3.28727823110041e-05, + "loss": 2.1775, + "step": 6916500 + }, + { + "epoch": 34.27, + "learning_rate": 3.287154867892372e-05, + "loss": 2.2061, + "step": 6917000 + }, + { + "epoch": 34.27, + "learning_rate": 3.2870310092497634e-05, + "loss": 2.1988, + "step": 6917500 + }, + { + "epoch": 34.27, + "learning_rate": 3.286907150607155e-05, + "loss": 2.174, + "step": 6918000 + }, + { + "epoch": 34.28, + "learning_rate": 3.286783291964547e-05, + "loss": 2.1943, + "step": 6918500 + }, + { + "epoch": 34.28, + "learning_rate": 3.2866594333219385e-05, + "loss": 2.2035, + "step": 6919000 + }, + { + "epoch": 34.28, + "learning_rate": 3.2865358223966154e-05, + "loss": 2.1593, + "step": 6919500 + }, + { + "epoch": 34.28, + "learning_rate": 3.286411963754007e-05, + "loss": 2.1835, + "step": 6920000 + }, + { + "epoch": 34.29, + "learning_rate": 3.286288105111399e-05, + "loss": 2.2108, + "step": 6920500 + }, + { + "epoch": 34.29, + "learning_rate": 3.2861642464687905e-05, + "loss": 2.2148, + "step": 6921000 + }, + { + "epoch": 34.29, + "learning_rate": 3.286040635543467e-05, + "loss": 2.193, + "step": 6921500 + }, + { + "epoch": 34.29, + "learning_rate": 3.2859167769008584e-05, + "loss": 2.2132, + "step": 6922000 + }, + { + "epoch": 34.3, + "learning_rate": 3.28579291825825e-05, + "loss": 2.1787, + "step": 6922500 + }, + { + "epoch": 34.3, + "learning_rate": 3.285669555050213e-05, + "loss": 2.1977, + "step": 6923000 + }, + { + "epoch": 34.3, + "learning_rate": 3.2855456964076045e-05, + "loss": 2.1799, + "step": 6923500 + }, + { + "epoch": 34.3, + "learning_rate": 3.285421837764996e-05, + "loss": 2.186, + "step": 6924000 + }, + { + "epoch": 34.31, + "learning_rate": 3.285297979122388e-05, + "loss": 2.1851, + "step": 6924500 + }, + { + "epoch": 34.31, + "learning_rate": 3.285174120479779e-05, + "loss": 2.1996, + "step": 6925000 + }, + { + "epoch": 34.31, + "learning_rate": 3.285050509554456e-05, + "loss": 2.1971, + "step": 6925500 + }, + { + "epoch": 34.31, + "learning_rate": 3.2849266509118475e-05, + "loss": 2.1959, + "step": 6926000 + }, + { + "epoch": 34.32, + "learning_rate": 3.284802792269239e-05, + "loss": 2.1869, + "step": 6926500 + }, + { + "epoch": 34.32, + "learning_rate": 3.284678933626631e-05, + "loss": 2.196, + "step": 6927000 + }, + { + "epoch": 34.32, + "learning_rate": 3.2845550749840226e-05, + "loss": 2.1712, + "step": 6927500 + }, + { + "epoch": 34.32, + "learning_rate": 3.284431216341414e-05, + "loss": 2.1767, + "step": 6928000 + }, + { + "epoch": 34.33, + "learning_rate": 3.284307357698806e-05, + "loss": 2.1987, + "step": 6928500 + }, + { + "epoch": 34.33, + "learning_rate": 3.284183746773483e-05, + "loss": 2.2139, + "step": 6929000 + }, + { + "epoch": 34.33, + "learning_rate": 3.2840598881308745e-05, + "loss": 2.1773, + "step": 6929500 + }, + { + "epoch": 34.33, + "learning_rate": 3.283936029488266e-05, + "loss": 2.1861, + "step": 6930000 + }, + { + "epoch": 34.34, + "learning_rate": 3.283812170845658e-05, + "loss": 2.1925, + "step": 6930500 + }, + { + "epoch": 34.34, + "learning_rate": 3.2836883122030496e-05, + "loss": 2.1843, + "step": 6931000 + }, + { + "epoch": 34.34, + "learning_rate": 3.283564453560441e-05, + "loss": 2.1896, + "step": 6931500 + }, + { + "epoch": 34.34, + "learning_rate": 3.283440594917832e-05, + "loss": 2.2057, + "step": 6932000 + }, + { + "epoch": 34.35, + "learning_rate": 3.283316983992509e-05, + "loss": 2.1789, + "step": 6932500 + }, + { + "epoch": 34.35, + "learning_rate": 3.283193125349901e-05, + "loss": 2.1865, + "step": 6933000 + }, + { + "epoch": 34.35, + "learning_rate": 3.2830692667072926e-05, + "loss": 2.18, + "step": 6933500 + }, + { + "epoch": 34.35, + "learning_rate": 3.282945408064684e-05, + "loss": 2.195, + "step": 6934000 + }, + { + "epoch": 34.36, + "learning_rate": 3.282821549422076e-05, + "loss": 2.2251, + "step": 6934500 + }, + { + "epoch": 34.36, + "learning_rate": 3.282697690779467e-05, + "loss": 2.176, + "step": 6935000 + }, + { + "epoch": 34.36, + "learning_rate": 3.2825738321368587e-05, + "loss": 2.1886, + "step": 6935500 + }, + { + "epoch": 34.36, + "learning_rate": 3.2824499734942504e-05, + "loss": 2.1885, + "step": 6936000 + }, + { + "epoch": 34.37, + "learning_rate": 3.282326114851642e-05, + "loss": 2.2061, + "step": 6936500 + }, + { + "epoch": 34.37, + "learning_rate": 3.282202256209034e-05, + "loss": 2.2056, + "step": 6937000 + }, + { + "epoch": 34.37, + "learning_rate": 3.282078645283711e-05, + "loss": 2.1923, + "step": 6937500 + }, + { + "epoch": 34.37, + "learning_rate": 3.2819550343583875e-05, + "loss": 2.1916, + "step": 6938000 + }, + { + "epoch": 34.38, + "learning_rate": 3.281831175715779e-05, + "loss": 2.1716, + "step": 6938500 + }, + { + "epoch": 34.38, + "learning_rate": 3.281707317073171e-05, + "loss": 2.1798, + "step": 6939000 + }, + { + "epoch": 34.38, + "learning_rate": 3.2815834584305626e-05, + "loss": 2.1916, + "step": 6939500 + }, + { + "epoch": 34.38, + "learning_rate": 3.281459599787954e-05, + "loss": 2.1689, + "step": 6940000 + }, + { + "epoch": 34.39, + "learning_rate": 3.281335741145346e-05, + "loss": 2.227, + "step": 6940500 + }, + { + "epoch": 34.39, + "learning_rate": 3.281212377937308e-05, + "loss": 2.1959, + "step": 6941000 + }, + { + "epoch": 34.39, + "learning_rate": 3.2810885192947e-05, + "loss": 2.2158, + "step": 6941500 + }, + { + "epoch": 34.39, + "learning_rate": 3.280964660652091e-05, + "loss": 2.2076, + "step": 6942000 + }, + { + "epoch": 34.4, + "learning_rate": 3.2808408020094824e-05, + "loss": 2.2063, + "step": 6942500 + }, + { + "epoch": 34.4, + "learning_rate": 3.280716943366874e-05, + "loss": 2.2234, + "step": 6943000 + }, + { + "epoch": 34.4, + "learning_rate": 3.280593084724266e-05, + "loss": 2.2024, + "step": 6943500 + }, + { + "epoch": 34.4, + "learning_rate": 3.2804692260816575e-05, + "loss": 2.1942, + "step": 6944000 + }, + { + "epoch": 34.41, + "learning_rate": 3.280345367439049e-05, + "loss": 2.1817, + "step": 6944500 + }, + { + "epoch": 34.41, + "learning_rate": 3.280221508796441e-05, + "loss": 2.2017, + "step": 6945000 + }, + { + "epoch": 34.41, + "learning_rate": 3.2800976501538326e-05, + "loss": 2.198, + "step": 6945500 + }, + { + "epoch": 34.41, + "learning_rate": 3.279973791511224e-05, + "loss": 2.1933, + "step": 6946000 + }, + { + "epoch": 34.42, + "learning_rate": 3.279849932868616e-05, + "loss": 2.2008, + "step": 6946500 + }, + { + "epoch": 34.42, + "learning_rate": 3.279726074226008e-05, + "loss": 2.2, + "step": 6947000 + }, + { + "epoch": 34.42, + "learning_rate": 3.279602215583399e-05, + "loss": 2.174, + "step": 6947500 + }, + { + "epoch": 34.42, + "learning_rate": 3.2794783569407904e-05, + "loss": 2.1931, + "step": 6948000 + }, + { + "epoch": 34.43, + "learning_rate": 3.279354746015468e-05, + "loss": 2.1865, + "step": 6948500 + }, + { + "epoch": 34.43, + "learning_rate": 3.2792308873728596e-05, + "loss": 2.2144, + "step": 6949000 + }, + { + "epoch": 34.43, + "learning_rate": 3.279107028730251e-05, + "loss": 2.2073, + "step": 6949500 + }, + { + "epoch": 34.43, + "learning_rate": 3.278983170087643e-05, + "loss": 2.2077, + "step": 6950000 + }, + { + "epoch": 34.44, + "learning_rate": 3.278859311445035e-05, + "loss": 2.201, + "step": 6950500 + }, + { + "epoch": 34.44, + "learning_rate": 3.278735700519711e-05, + "loss": 2.1901, + "step": 6951000 + }, + { + "epoch": 34.44, + "learning_rate": 3.2786118418771026e-05, + "loss": 2.202, + "step": 6951500 + }, + { + "epoch": 34.44, + "learning_rate": 3.278487983234494e-05, + "loss": 2.184, + "step": 6952000 + }, + { + "epoch": 34.45, + "learning_rate": 3.278364124591886e-05, + "loss": 2.191, + "step": 6952500 + }, + { + "epoch": 34.45, + "learning_rate": 3.278240513666563e-05, + "loss": 2.2036, + "step": 6953000 + }, + { + "epoch": 34.45, + "learning_rate": 3.2781166550239546e-05, + "loss": 2.1638, + "step": 6953500 + }, + { + "epoch": 34.45, + "learning_rate": 3.277992796381346e-05, + "loss": 2.1686, + "step": 6954000 + }, + { + "epoch": 34.45, + "learning_rate": 3.277868937738738e-05, + "loss": 2.2152, + "step": 6954500 + }, + { + "epoch": 34.46, + "learning_rate": 3.2777450790961296e-05, + "loss": 2.196, + "step": 6955000 + }, + { + "epoch": 34.46, + "learning_rate": 3.277621220453521e-05, + "loss": 2.2021, + "step": 6955500 + }, + { + "epoch": 34.46, + "learning_rate": 3.277497361810913e-05, + "loss": 2.1876, + "step": 6956000 + }, + { + "epoch": 34.46, + "learning_rate": 3.277373503168305e-05, + "loss": 2.2034, + "step": 6956500 + }, + { + "epoch": 34.47, + "learning_rate": 3.277249892242981e-05, + "loss": 2.1923, + "step": 6957000 + }, + { + "epoch": 34.47, + "learning_rate": 3.2771260336003726e-05, + "loss": 2.2079, + "step": 6957500 + }, + { + "epoch": 34.47, + "learning_rate": 3.277002174957764e-05, + "loss": 2.1761, + "step": 6958000 + }, + { + "epoch": 34.47, + "learning_rate": 3.276878316315156e-05, + "loss": 2.1964, + "step": 6958500 + }, + { + "epoch": 34.48, + "learning_rate": 3.276754457672548e-05, + "loss": 2.1802, + "step": 6959000 + }, + { + "epoch": 34.48, + "learning_rate": 3.2766305990299394e-05, + "loss": 2.196, + "step": 6959500 + }, + { + "epoch": 34.48, + "learning_rate": 3.2765067403873304e-05, + "loss": 2.2058, + "step": 6960000 + }, + { + "epoch": 34.48, + "learning_rate": 3.276383129462008e-05, + "loss": 2.1698, + "step": 6960500 + }, + { + "epoch": 34.49, + "learning_rate": 3.276259518536685e-05, + "loss": 2.1963, + "step": 6961000 + }, + { + "epoch": 34.49, + "learning_rate": 3.2761356598940765e-05, + "loss": 2.1675, + "step": 6961500 + }, + { + "epoch": 34.49, + "learning_rate": 3.276011801251468e-05, + "loss": 2.1949, + "step": 6962000 + }, + { + "epoch": 34.49, + "learning_rate": 3.275887942608859e-05, + "loss": 2.2113, + "step": 6962500 + }, + { + "epoch": 34.5, + "learning_rate": 3.275764083966251e-05, + "loss": 2.2099, + "step": 6963000 + }, + { + "epoch": 34.5, + "learning_rate": 3.2756402253236426e-05, + "loss": 2.2009, + "step": 6963500 + }, + { + "epoch": 34.5, + "learning_rate": 3.275516366681034e-05, + "loss": 2.1868, + "step": 6964000 + }, + { + "epoch": 34.5, + "learning_rate": 3.275392508038426e-05, + "loss": 2.2024, + "step": 6964500 + }, + { + "epoch": 34.51, + "learning_rate": 3.275268649395818e-05, + "loss": 2.1997, + "step": 6965000 + }, + { + "epoch": 34.51, + "learning_rate": 3.2751447907532094e-05, + "loss": 2.1985, + "step": 6965500 + }, + { + "epoch": 34.51, + "learning_rate": 3.275021179827886e-05, + "loss": 2.1987, + "step": 6966000 + }, + { + "epoch": 34.51, + "learning_rate": 3.274897321185278e-05, + "loss": 2.2142, + "step": 6966500 + }, + { + "epoch": 34.52, + "learning_rate": 3.27477346254267e-05, + "loss": 2.2064, + "step": 6967000 + }, + { + "epoch": 34.52, + "learning_rate": 3.2746496039000614e-05, + "loss": 2.1865, + "step": 6967500 + }, + { + "epoch": 34.52, + "learning_rate": 3.274525745257453e-05, + "loss": 2.1711, + "step": 6968000 + }, + { + "epoch": 34.52, + "learning_rate": 3.274401886614845e-05, + "loss": 2.2059, + "step": 6968500 + }, + { + "epoch": 34.53, + "learning_rate": 3.2742780279722364e-05, + "loss": 2.199, + "step": 6969000 + }, + { + "epoch": 34.53, + "learning_rate": 3.2741544170469126e-05, + "loss": 2.2009, + "step": 6969500 + }, + { + "epoch": 34.53, + "learning_rate": 3.274030558404304e-05, + "loss": 2.1918, + "step": 6970000 + }, + { + "epoch": 34.53, + "learning_rate": 3.273906699761696e-05, + "loss": 2.1988, + "step": 6970500 + }, + { + "epoch": 34.54, + "learning_rate": 3.273782841119088e-05, + "loss": 2.2398, + "step": 6971000 + }, + { + "epoch": 34.54, + "learning_rate": 3.2736592301937646e-05, + "loss": 2.2103, + "step": 6971500 + }, + { + "epoch": 34.54, + "learning_rate": 3.273535371551156e-05, + "loss": 2.1703, + "step": 6972000 + }, + { + "epoch": 34.54, + "learning_rate": 3.273411512908548e-05, + "loss": 2.189, + "step": 6972500 + }, + { + "epoch": 34.55, + "learning_rate": 3.27328765426594e-05, + "loss": 2.21, + "step": 6973000 + }, + { + "epoch": 34.55, + "learning_rate": 3.2731637956233314e-05, + "loss": 2.201, + "step": 6973500 + }, + { + "epoch": 34.55, + "learning_rate": 3.273040184698008e-05, + "loss": 2.2171, + "step": 6974000 + }, + { + "epoch": 34.55, + "learning_rate": 3.2729163260554e-05, + "loss": 2.2048, + "step": 6974500 + }, + { + "epoch": 34.56, + "learning_rate": 3.2727924674127916e-05, + "loss": 2.2046, + "step": 6975000 + }, + { + "epoch": 34.56, + "learning_rate": 3.272668608770183e-05, + "loss": 2.1846, + "step": 6975500 + }, + { + "epoch": 34.56, + "learning_rate": 3.2725447501275743e-05, + "loss": 2.1931, + "step": 6976000 + }, + { + "epoch": 34.56, + "learning_rate": 3.2724213869195364e-05, + "loss": 2.1719, + "step": 6976500 + }, + { + "epoch": 34.57, + "learning_rate": 3.272297528276928e-05, + "loss": 2.181, + "step": 6977000 + }, + { + "epoch": 34.57, + "learning_rate": 3.27217366963432e-05, + "loss": 2.18, + "step": 6977500 + }, + { + "epoch": 34.57, + "learning_rate": 3.2720498109917115e-05, + "loss": 2.2069, + "step": 6978000 + }, + { + "epoch": 34.57, + "learning_rate": 3.271925952349103e-05, + "loss": 2.2135, + "step": 6978500 + }, + { + "epoch": 34.58, + "learning_rate": 3.271802093706495e-05, + "loss": 2.2009, + "step": 6979000 + }, + { + "epoch": 34.58, + "learning_rate": 3.2716782350638866e-05, + "loss": 2.1917, + "step": 6979500 + }, + { + "epoch": 34.58, + "learning_rate": 3.271554376421278e-05, + "loss": 2.2079, + "step": 6980000 + }, + { + "epoch": 34.58, + "learning_rate": 3.27143051777867e-05, + "loss": 2.182, + "step": 6980500 + }, + { + "epoch": 34.59, + "learning_rate": 3.2713066591360617e-05, + "loss": 2.2022, + "step": 6981000 + }, + { + "epoch": 34.59, + "learning_rate": 3.271183048210738e-05, + "loss": 2.199, + "step": 6981500 + }, + { + "epoch": 34.59, + "learning_rate": 3.2710591895681295e-05, + "loss": 2.1813, + "step": 6982000 + }, + { + "epoch": 34.59, + "learning_rate": 3.270935330925521e-05, + "loss": 2.1995, + "step": 6982500 + }, + { + "epoch": 34.6, + "learning_rate": 3.270811472282913e-05, + "loss": 2.2181, + "step": 6983000 + }, + { + "epoch": 34.6, + "learning_rate": 3.2706876136403046e-05, + "loss": 2.2099, + "step": 6983500 + }, + { + "epoch": 34.6, + "learning_rate": 3.2705640027149815e-05, + "loss": 2.1953, + "step": 6984000 + }, + { + "epoch": 34.6, + "learning_rate": 3.270440144072373e-05, + "loss": 2.2185, + "step": 6984500 + }, + { + "epoch": 34.61, + "learning_rate": 3.270316285429765e-05, + "loss": 2.1995, + "step": 6985000 + }, + { + "epoch": 34.61, + "learning_rate": 3.2701924267871566e-05, + "loss": 2.1897, + "step": 6985500 + }, + { + "epoch": 34.61, + "learning_rate": 3.270068568144548e-05, + "loss": 2.2206, + "step": 6986000 + }, + { + "epoch": 34.61, + "learning_rate": 3.2699449572192245e-05, + "loss": 2.1854, + "step": 6986500 + }, + { + "epoch": 34.62, + "learning_rate": 3.269821098576616e-05, + "loss": 2.2143, + "step": 6987000 + }, + { + "epoch": 34.62, + "learning_rate": 3.269697239934008e-05, + "loss": 2.196, + "step": 6987500 + }, + { + "epoch": 34.62, + "learning_rate": 3.2695733812913996e-05, + "loss": 2.2031, + "step": 6988000 + }, + { + "epoch": 34.62, + "learning_rate": 3.269449522648791e-05, + "loss": 2.1955, + "step": 6988500 + }, + { + "epoch": 34.63, + "learning_rate": 3.269325911723468e-05, + "loss": 2.1923, + "step": 6989000 + }, + { + "epoch": 34.63, + "learning_rate": 3.26920205308086e-05, + "loss": 2.2108, + "step": 6989500 + }, + { + "epoch": 34.63, + "learning_rate": 3.2690781944382515e-05, + "loss": 2.2116, + "step": 6990000 + }, + { + "epoch": 34.63, + "learning_rate": 3.268954335795643e-05, + "loss": 2.2157, + "step": 6990500 + }, + { + "epoch": 34.64, + "learning_rate": 3.268830477153035e-05, + "loss": 2.1915, + "step": 6991000 + }, + { + "epoch": 34.64, + "learning_rate": 3.2687066185104266e-05, + "loss": 2.2114, + "step": 6991500 + }, + { + "epoch": 34.64, + "learning_rate": 3.268582759867818e-05, + "loss": 2.1786, + "step": 6992000 + }, + { + "epoch": 34.64, + "learning_rate": 3.26845890122521e-05, + "loss": 2.1929, + "step": 6992500 + }, + { + "epoch": 34.65, + "learning_rate": 3.268335042582602e-05, + "loss": 2.1735, + "step": 6993000 + }, + { + "epoch": 34.65, + "learning_rate": 3.2682111839399934e-05, + "loss": 2.205, + "step": 6993500 + }, + { + "epoch": 34.65, + "learning_rate": 3.268087325297385e-05, + "loss": 2.1987, + "step": 6994000 + }, + { + "epoch": 34.65, + "learning_rate": 3.267963466654777e-05, + "loss": 2.1879, + "step": 6994500 + }, + { + "epoch": 34.66, + "learning_rate": 3.267839855729453e-05, + "loss": 2.2025, + "step": 6995000 + }, + { + "epoch": 34.66, + "learning_rate": 3.2677159970868447e-05, + "loss": 2.1804, + "step": 6995500 + }, + { + "epoch": 34.66, + "learning_rate": 3.2675926338788074e-05, + "loss": 2.2173, + "step": 6996000 + }, + { + "epoch": 34.66, + "learning_rate": 3.267468775236199e-05, + "loss": 2.1693, + "step": 6996500 + }, + { + "epoch": 34.67, + "learning_rate": 3.26734491659359e-05, + "loss": 2.1852, + "step": 6997000 + }, + { + "epoch": 34.67, + "learning_rate": 3.267221057950982e-05, + "loss": 2.2025, + "step": 6997500 + }, + { + "epoch": 34.67, + "learning_rate": 3.2670971993083735e-05, + "loss": 2.1977, + "step": 6998000 + }, + { + "epoch": 34.67, + "learning_rate": 3.266973340665765e-05, + "loss": 2.1816, + "step": 6998500 + }, + { + "epoch": 34.68, + "learning_rate": 3.266849482023157e-05, + "loss": 2.2068, + "step": 6999000 + }, + { + "epoch": 34.68, + "learning_rate": 3.266725623380548e-05, + "loss": 2.1962, + "step": 6999500 + }, + { + "epoch": 34.68, + "learning_rate": 3.2666017647379396e-05, + "loss": 2.2068, + "step": 7000000 + }, + { + "epoch": 34.68, + "learning_rate": 3.266478153812617e-05, + "loss": 2.2036, + "step": 7000500 + }, + { + "epoch": 34.69, + "learning_rate": 3.266354295170009e-05, + "loss": 2.2308, + "step": 7001000 + }, + { + "epoch": 34.69, + "learning_rate": 3.2662304365274e-05, + "loss": 2.1966, + "step": 7001500 + }, + { + "epoch": 34.69, + "learning_rate": 3.2661065778847915e-05, + "loss": 2.207, + "step": 7002000 + }, + { + "epoch": 34.69, + "learning_rate": 3.265982719242183e-05, + "loss": 2.1952, + "step": 7002500 + }, + { + "epoch": 34.7, + "learning_rate": 3.265859108316861e-05, + "loss": 2.1811, + "step": 7003000 + }, + { + "epoch": 34.7, + "learning_rate": 3.265735497391537e-05, + "loss": 2.1887, + "step": 7003500 + }, + { + "epoch": 34.7, + "learning_rate": 3.265611638748929e-05, + "loss": 2.2001, + "step": 7004000 + }, + { + "epoch": 34.7, + "learning_rate": 3.2654877801063204e-05, + "loss": 2.1961, + "step": 7004500 + }, + { + "epoch": 34.71, + "learning_rate": 3.265363921463712e-05, + "loss": 2.21, + "step": 7005000 + }, + { + "epoch": 34.71, + "learning_rate": 3.265240062821104e-05, + "loss": 2.1943, + "step": 7005500 + }, + { + "epoch": 34.71, + "learning_rate": 3.2651162041784955e-05, + "loss": 2.2111, + "step": 7006000 + }, + { + "epoch": 34.71, + "learning_rate": 3.264992345535887e-05, + "loss": 2.1912, + "step": 7006500 + }, + { + "epoch": 34.72, + "learning_rate": 3.264868486893279e-05, + "loss": 2.1894, + "step": 7007000 + }, + { + "epoch": 34.72, + "learning_rate": 3.26474462825067e-05, + "loss": 2.1854, + "step": 7007500 + }, + { + "epoch": 34.72, + "learning_rate": 3.2646207696080616e-05, + "loss": 2.2093, + "step": 7008000 + }, + { + "epoch": 34.72, + "learning_rate": 3.264497158682739e-05, + "loss": 2.2025, + "step": 7008500 + }, + { + "epoch": 34.73, + "learning_rate": 3.264373300040131e-05, + "loss": 2.1801, + "step": 7009000 + }, + { + "epoch": 34.73, + "learning_rate": 3.2642494413975225e-05, + "loss": 2.1916, + "step": 7009500 + }, + { + "epoch": 34.73, + "learning_rate": 3.264125582754914e-05, + "loss": 2.2205, + "step": 7010000 + }, + { + "epoch": 34.73, + "learning_rate": 3.264001724112305e-05, + "loss": 2.1982, + "step": 7010500 + }, + { + "epoch": 34.73, + "learning_rate": 3.263877865469697e-05, + "loss": 2.1991, + "step": 7011000 + }, + { + "epoch": 34.74, + "learning_rate": 3.2637540068270886e-05, + "loss": 2.2063, + "step": 7011500 + }, + { + "epoch": 34.74, + "learning_rate": 3.26363014818448e-05, + "loss": 2.1884, + "step": 7012000 + }, + { + "epoch": 34.74, + "learning_rate": 3.263506289541872e-05, + "loss": 2.1918, + "step": 7012500 + }, + { + "epoch": 34.74, + "learning_rate": 3.263382430899263e-05, + "loss": 2.2083, + "step": 7013000 + }, + { + "epoch": 34.75, + "learning_rate": 3.2632588199739406e-05, + "loss": 2.1801, + "step": 7013500 + }, + { + "epoch": 34.75, + "learning_rate": 3.2631349613313316e-05, + "loss": 2.1982, + "step": 7014000 + }, + { + "epoch": 34.75, + "learning_rate": 3.263011102688723e-05, + "loss": 2.1761, + "step": 7014500 + }, + { + "epoch": 34.75, + "learning_rate": 3.262887491763401e-05, + "loss": 2.2228, + "step": 7015000 + }, + { + "epoch": 34.76, + "learning_rate": 3.2627636331207925e-05, + "loss": 2.1739, + "step": 7015500 + }, + { + "epoch": 34.76, + "learning_rate": 3.262639774478184e-05, + "loss": 2.2176, + "step": 7016000 + }, + { + "epoch": 34.76, + "learning_rate": 3.2625161635528604e-05, + "loss": 2.1896, + "step": 7016500 + }, + { + "epoch": 34.76, + "learning_rate": 3.262392304910252e-05, + "loss": 2.1842, + "step": 7017000 + }, + { + "epoch": 34.77, + "learning_rate": 3.262268446267644e-05, + "loss": 2.2216, + "step": 7017500 + }, + { + "epoch": 34.77, + "learning_rate": 3.2621445876250355e-05, + "loss": 2.1878, + "step": 7018000 + }, + { + "epoch": 34.77, + "learning_rate": 3.262020728982427e-05, + "loss": 2.2043, + "step": 7018500 + }, + { + "epoch": 34.77, + "learning_rate": 3.261896870339819e-05, + "loss": 2.192, + "step": 7019000 + }, + { + "epoch": 34.78, + "learning_rate": 3.2617730116972106e-05, + "loss": 2.2035, + "step": 7019500 + }, + { + "epoch": 34.78, + "learning_rate": 3.2616491530546016e-05, + "loss": 2.2266, + "step": 7020000 + }, + { + "epoch": 34.78, + "learning_rate": 3.261525294411993e-05, + "loss": 2.1957, + "step": 7020500 + }, + { + "epoch": 34.78, + "learning_rate": 3.261401435769385e-05, + "loss": 2.2057, + "step": 7021000 + }, + { + "epoch": 34.79, + "learning_rate": 3.2612778248440625e-05, + "loss": 2.1826, + "step": 7021500 + }, + { + "epoch": 34.79, + "learning_rate": 3.261153966201454e-05, + "loss": 2.1922, + "step": 7022000 + }, + { + "epoch": 34.79, + "learning_rate": 3.261030107558846e-05, + "loss": 2.1976, + "step": 7022500 + }, + { + "epoch": 34.79, + "learning_rate": 3.2609062489162376e-05, + "loss": 2.2111, + "step": 7023000 + }, + { + "epoch": 34.8, + "learning_rate": 3.2607823902736286e-05, + "loss": 2.1923, + "step": 7023500 + }, + { + "epoch": 34.8, + "learning_rate": 3.2606587793483055e-05, + "loss": 2.2014, + "step": 7024000 + }, + { + "epoch": 34.8, + "learning_rate": 3.260534920705697e-05, + "loss": 2.1941, + "step": 7024500 + }, + { + "epoch": 34.8, + "learning_rate": 3.260411062063089e-05, + "loss": 2.1953, + "step": 7025000 + }, + { + "epoch": 34.81, + "learning_rate": 3.2602872034204806e-05, + "loss": 2.213, + "step": 7025500 + }, + { + "epoch": 34.81, + "learning_rate": 3.2601635924951575e-05, + "loss": 2.1574, + "step": 7026000 + }, + { + "epoch": 34.81, + "learning_rate": 3.260039733852549e-05, + "loss": 2.2092, + "step": 7026500 + }, + { + "epoch": 34.81, + "learning_rate": 3.259915875209941e-05, + "loss": 2.2085, + "step": 7027000 + }, + { + "epoch": 34.82, + "learning_rate": 3.259792264284617e-05, + "loss": 2.1949, + "step": 7027500 + }, + { + "epoch": 34.82, + "learning_rate": 3.259668405642009e-05, + "loss": 2.2119, + "step": 7028000 + }, + { + "epoch": 34.82, + "learning_rate": 3.2595445469994004e-05, + "loss": 2.1811, + "step": 7028500 + }, + { + "epoch": 34.82, + "learning_rate": 3.259420688356792e-05, + "loss": 2.1964, + "step": 7029000 + }, + { + "epoch": 34.83, + "learning_rate": 3.259297077431469e-05, + "loss": 2.2017, + "step": 7029500 + }, + { + "epoch": 34.83, + "learning_rate": 3.259173218788861e-05, + "loss": 2.1988, + "step": 7030000 + }, + { + "epoch": 34.83, + "learning_rate": 3.2590493601462524e-05, + "loss": 2.2063, + "step": 7030500 + }, + { + "epoch": 34.83, + "learning_rate": 3.258925501503644e-05, + "loss": 2.1986, + "step": 7031000 + }, + { + "epoch": 34.84, + "learning_rate": 3.258801642861036e-05, + "loss": 2.1811, + "step": 7031500 + }, + { + "epoch": 34.84, + "learning_rate": 3.2586777842184275e-05, + "loss": 2.1918, + "step": 7032000 + }, + { + "epoch": 34.84, + "learning_rate": 3.258553925575819e-05, + "loss": 2.2125, + "step": 7032500 + }, + { + "epoch": 34.84, + "learning_rate": 3.258430066933211e-05, + "loss": 2.209, + "step": 7033000 + }, + { + "epoch": 34.85, + "learning_rate": 3.2583062082906026e-05, + "loss": 2.2314, + "step": 7033500 + }, + { + "epoch": 34.85, + "learning_rate": 3.258182349647994e-05, + "loss": 2.1891, + "step": 7034000 + }, + { + "epoch": 34.85, + "learning_rate": 3.258058491005386e-05, + "loss": 2.2034, + "step": 7034500 + }, + { + "epoch": 34.85, + "learning_rate": 3.2579346323627776e-05, + "loss": 2.1971, + "step": 7035000 + }, + { + "epoch": 34.86, + "learning_rate": 3.257810773720169e-05, + "loss": 2.1942, + "step": 7035500 + }, + { + "epoch": 34.86, + "learning_rate": 3.25768691507756e-05, + "loss": 2.1755, + "step": 7036000 + }, + { + "epoch": 34.86, + "learning_rate": 3.257563304152237e-05, + "loss": 2.1964, + "step": 7036500 + }, + { + "epoch": 34.86, + "learning_rate": 3.257439940944199e-05, + "loss": 2.1967, + "step": 7037000 + }, + { + "epoch": 34.87, + "learning_rate": 3.257316082301591e-05, + "loss": 2.2141, + "step": 7037500 + }, + { + "epoch": 34.87, + "learning_rate": 3.257192223658983e-05, + "loss": 2.2195, + "step": 7038000 + }, + { + "epoch": 34.87, + "learning_rate": 3.2570683650163744e-05, + "loss": 2.2072, + "step": 7038500 + }, + { + "epoch": 34.87, + "learning_rate": 3.256944506373766e-05, + "loss": 2.2148, + "step": 7039000 + }, + { + "epoch": 34.88, + "learning_rate": 3.256820647731158e-05, + "loss": 2.213, + "step": 7039500 + }, + { + "epoch": 34.88, + "learning_rate": 3.2566967890885494e-05, + "loss": 2.1913, + "step": 7040000 + }, + { + "epoch": 34.88, + "learning_rate": 3.2565731781632256e-05, + "loss": 2.2049, + "step": 7040500 + }, + { + "epoch": 34.88, + "learning_rate": 3.2564493195206173e-05, + "loss": 2.2205, + "step": 7041000 + }, + { + "epoch": 34.89, + "learning_rate": 3.256325460878009e-05, + "loss": 2.1822, + "step": 7041500 + }, + { + "epoch": 34.89, + "learning_rate": 3.256201602235401e-05, + "loss": 2.1775, + "step": 7042000 + }, + { + "epoch": 34.89, + "learning_rate": 3.2560777435927924e-05, + "loss": 2.2201, + "step": 7042500 + }, + { + "epoch": 34.89, + "learning_rate": 3.255953884950184e-05, + "loss": 2.1917, + "step": 7043000 + }, + { + "epoch": 34.9, + "learning_rate": 3.255830026307576e-05, + "loss": 2.2215, + "step": 7043500 + }, + { + "epoch": 34.9, + "learning_rate": 3.2557061676649675e-05, + "loss": 2.212, + "step": 7044000 + }, + { + "epoch": 34.9, + "learning_rate": 3.2555825567396444e-05, + "loss": 2.1995, + "step": 7044500 + }, + { + "epoch": 34.9, + "learning_rate": 3.255458698097036e-05, + "loss": 2.2085, + "step": 7045000 + }, + { + "epoch": 34.91, + "learning_rate": 3.255334839454428e-05, + "loss": 2.1986, + "step": 7045500 + }, + { + "epoch": 34.91, + "learning_rate": 3.2552109808118195e-05, + "loss": 2.1863, + "step": 7046000 + }, + { + "epoch": 34.91, + "learning_rate": 3.255087122169211e-05, + "loss": 2.2211, + "step": 7046500 + }, + { + "epoch": 34.91, + "learning_rate": 3.2549635112438874e-05, + "loss": 2.1993, + "step": 7047000 + }, + { + "epoch": 34.92, + "learning_rate": 3.254839652601279e-05, + "loss": 2.1953, + "step": 7047500 + }, + { + "epoch": 34.92, + "learning_rate": 3.254715793958671e-05, + "loss": 2.2076, + "step": 7048000 + }, + { + "epoch": 34.92, + "learning_rate": 3.2545919353160624e-05, + "loss": 2.1892, + "step": 7048500 + }, + { + "epoch": 34.92, + "learning_rate": 3.254468324390739e-05, + "loss": 2.2138, + "step": 7049000 + }, + { + "epoch": 34.93, + "learning_rate": 3.254344465748131e-05, + "loss": 2.1946, + "step": 7049500 + }, + { + "epoch": 34.93, + "learning_rate": 3.254220607105523e-05, + "loss": 2.204, + "step": 7050000 + }, + { + "epoch": 34.93, + "learning_rate": 3.2540967484629144e-05, + "loss": 2.212, + "step": 7050500 + }, + { + "epoch": 34.93, + "learning_rate": 3.253972889820306e-05, + "loss": 2.1801, + "step": 7051000 + }, + { + "epoch": 34.94, + "learning_rate": 3.253849031177698e-05, + "loss": 2.1985, + "step": 7051500 + }, + { + "epoch": 34.94, + "learning_rate": 3.2537251725350895e-05, + "loss": 2.1711, + "step": 7052000 + }, + { + "epoch": 34.94, + "learning_rate": 3.253601313892481e-05, + "loss": 2.2165, + "step": 7052500 + }, + { + "epoch": 34.94, + "learning_rate": 3.2534777029671574e-05, + "loss": 2.2252, + "step": 7053000 + }, + { + "epoch": 34.95, + "learning_rate": 3.253353844324549e-05, + "loss": 2.1821, + "step": 7053500 + }, + { + "epoch": 34.95, + "learning_rate": 3.2532302333992266e-05, + "loss": 2.1778, + "step": 7054000 + }, + { + "epoch": 34.95, + "learning_rate": 3.253106374756618e-05, + "loss": 2.2102, + "step": 7054500 + }, + { + "epoch": 34.95, + "learning_rate": 3.252982516114009e-05, + "loss": 2.2164, + "step": 7055000 + }, + { + "epoch": 34.96, + "learning_rate": 3.252858657471401e-05, + "loss": 2.2235, + "step": 7055500 + }, + { + "epoch": 34.96, + "learning_rate": 3.252734798828793e-05, + "loss": 2.2477, + "step": 7056000 + }, + { + "epoch": 34.96, + "learning_rate": 3.2526111879034696e-05, + "loss": 2.1983, + "step": 7056500 + }, + { + "epoch": 34.96, + "learning_rate": 3.252487329260861e-05, + "loss": 2.2055, + "step": 7057000 + }, + { + "epoch": 34.97, + "learning_rate": 3.252363718335538e-05, + "loss": 2.2165, + "step": 7057500 + }, + { + "epoch": 34.97, + "learning_rate": 3.25223985969293e-05, + "loss": 2.2136, + "step": 7058000 + }, + { + "epoch": 34.97, + "learning_rate": 3.2521160010503216e-05, + "loss": 2.2037, + "step": 7058500 + }, + { + "epoch": 34.97, + "learning_rate": 3.251992142407713e-05, + "loss": 2.1987, + "step": 7059000 + }, + { + "epoch": 34.98, + "learning_rate": 3.251868283765105e-05, + "loss": 2.2126, + "step": 7059500 + }, + { + "epoch": 34.98, + "learning_rate": 3.2517444251224966e-05, + "loss": 2.2202, + "step": 7060000 + }, + { + "epoch": 34.98, + "learning_rate": 3.251620566479888e-05, + "loss": 2.2023, + "step": 7060500 + }, + { + "epoch": 34.98, + "learning_rate": 3.25149670783728e-05, + "loss": 2.2168, + "step": 7061000 + }, + { + "epoch": 34.99, + "learning_rate": 3.251372849194671e-05, + "loss": 2.1998, + "step": 7061500 + }, + { + "epoch": 34.99, + "learning_rate": 3.251249238269348e-05, + "loss": 2.1941, + "step": 7062000 + }, + { + "epoch": 34.99, + "learning_rate": 3.2511253796267396e-05, + "loss": 2.1872, + "step": 7062500 + }, + { + "epoch": 34.99, + "learning_rate": 3.2510017687014165e-05, + "loss": 2.2097, + "step": 7063000 + }, + { + "epoch": 35.0, + "learning_rate": 3.250877910058808e-05, + "loss": 2.2149, + "step": 7063500 + }, + { + "epoch": 35.0, + "learning_rate": 3.2507540514162e-05, + "loss": 2.2237, + "step": 7064000 + }, + { + "epoch": 35.0, + "learning_rate": 3.2506301927735916e-05, + "loss": 2.201, + "step": 7064500 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.662072274008178, + "eval_accuracy_mlm": 0.6186920132733609, + "eval_accuracy_nsp": 0.8666060033181806, + "eval_loss": 2.2997031211853027, + "eval_runtime": 145.6764, + "eval_samples_per_second": 1750.173, + "eval_steps_per_second": 72.929, + "step": 7064505 + }, + { + "epoch": 35.0, + "learning_rate": 3.250506334130983e-05, + "loss": 2.1687, + "step": 7065000 + }, + { + "epoch": 35.0, + "learning_rate": 3.25038272320566e-05, + "loss": 2.1827, + "step": 7065500 + }, + { + "epoch": 35.01, + "learning_rate": 3.250258864563052e-05, + "loss": 2.1597, + "step": 7066000 + }, + { + "epoch": 35.01, + "learning_rate": 3.2501350059204435e-05, + "loss": 2.1611, + "step": 7066500 + }, + { + "epoch": 35.01, + "learning_rate": 3.250011147277835e-05, + "loss": 2.1661, + "step": 7067000 + }, + { + "epoch": 35.01, + "learning_rate": 3.249887288635227e-05, + "loss": 2.158, + "step": 7067500 + }, + { + "epoch": 35.02, + "learning_rate": 3.249763677709903e-05, + "loss": 2.1792, + "step": 7068000 + }, + { + "epoch": 35.02, + "learning_rate": 3.249639819067295e-05, + "loss": 2.1561, + "step": 7068500 + }, + { + "epoch": 35.02, + "learning_rate": 3.2495159604246865e-05, + "loss": 2.1664, + "step": 7069000 + }, + { + "epoch": 35.02, + "learning_rate": 3.2493923494993634e-05, + "loss": 2.1907, + "step": 7069500 + }, + { + "epoch": 35.03, + "learning_rate": 3.249268490856755e-05, + "loss": 2.1791, + "step": 7070000 + }, + { + "epoch": 35.03, + "learning_rate": 3.249144632214147e-05, + "loss": 2.1948, + "step": 7070500 + }, + { + "epoch": 35.03, + "learning_rate": 3.2490207735715385e-05, + "loss": 2.1503, + "step": 7071000 + }, + { + "epoch": 35.03, + "learning_rate": 3.24889691492893e-05, + "loss": 2.1801, + "step": 7071500 + }, + { + "epoch": 35.04, + "learning_rate": 3.248773056286322e-05, + "loss": 2.1924, + "step": 7072000 + }, + { + "epoch": 35.04, + "learning_rate": 3.2486491976437135e-05, + "loss": 2.1651, + "step": 7072500 + }, + { + "epoch": 35.04, + "learning_rate": 3.248525339001105e-05, + "loss": 2.1392, + "step": 7073000 + }, + { + "epoch": 35.04, + "learning_rate": 3.248401480358497e-05, + "loss": 2.1702, + "step": 7073500 + }, + { + "epoch": 35.05, + "learning_rate": 3.2482776217158886e-05, + "loss": 2.1746, + "step": 7074000 + }, + { + "epoch": 35.05, + "learning_rate": 3.24815376307328e-05, + "loss": 2.1503, + "step": 7074500 + }, + { + "epoch": 35.05, + "learning_rate": 3.248029904430672e-05, + "loss": 2.1645, + "step": 7075000 + }, + { + "epoch": 35.05, + "learning_rate": 3.247906045788063e-05, + "loss": 2.1635, + "step": 7075500 + }, + { + "epoch": 35.06, + "learning_rate": 3.247782187145455e-05, + "loss": 2.1599, + "step": 7076000 + }, + { + "epoch": 35.06, + "learning_rate": 3.2476583285028464e-05, + "loss": 2.164, + "step": 7076500 + }, + { + "epoch": 35.06, + "learning_rate": 3.247534469860238e-05, + "loss": 2.1623, + "step": 7077000 + }, + { + "epoch": 35.06, + "learning_rate": 3.24741061121763e-05, + "loss": 2.1668, + "step": 7077500 + }, + { + "epoch": 35.07, + "learning_rate": 3.247287000292307e-05, + "loss": 2.1809, + "step": 7078000 + }, + { + "epoch": 35.07, + "learning_rate": 3.2471633893669835e-05, + "loss": 2.1799, + "step": 7078500 + }, + { + "epoch": 35.07, + "learning_rate": 3.247039530724375e-05, + "loss": 2.1867, + "step": 7079000 + }, + { + "epoch": 35.07, + "learning_rate": 3.246915672081767e-05, + "loss": 2.1799, + "step": 7079500 + }, + { + "epoch": 35.08, + "learning_rate": 3.2467918134391586e-05, + "loss": 2.1792, + "step": 7080000 + }, + { + "epoch": 35.08, + "learning_rate": 3.24666795479655e-05, + "loss": 2.1811, + "step": 7080500 + }, + { + "epoch": 35.08, + "learning_rate": 3.246544096153942e-05, + "loss": 2.1582, + "step": 7081000 + }, + { + "epoch": 35.08, + "learning_rate": 3.246420237511334e-05, + "loss": 2.1756, + "step": 7081500 + }, + { + "epoch": 35.09, + "learning_rate": 3.2462963788687254e-05, + "loss": 2.2023, + "step": 7082000 + }, + { + "epoch": 35.09, + "learning_rate": 3.2461725202261164e-05, + "loss": 2.1908, + "step": 7082500 + }, + { + "epoch": 35.09, + "learning_rate": 3.246048909300793e-05, + "loss": 2.1654, + "step": 7083000 + }, + { + "epoch": 35.09, + "learning_rate": 3.245925050658185e-05, + "loss": 2.1965, + "step": 7083500 + }, + { + "epoch": 35.1, + "learning_rate": 3.245801192015577e-05, + "loss": 2.1755, + "step": 7084000 + }, + { + "epoch": 35.1, + "learning_rate": 3.2456773333729684e-05, + "loss": 2.1879, + "step": 7084500 + }, + { + "epoch": 35.1, + "learning_rate": 3.24555347473036e-05, + "loss": 2.1588, + "step": 7085000 + }, + { + "epoch": 35.1, + "learning_rate": 3.245429863805037e-05, + "loss": 2.1715, + "step": 7085500 + }, + { + "epoch": 35.11, + "learning_rate": 3.2453060051624286e-05, + "loss": 2.1767, + "step": 7086000 + }, + { + "epoch": 35.11, + "learning_rate": 3.24518214651982e-05, + "loss": 2.1845, + "step": 7086500 + }, + { + "epoch": 35.11, + "learning_rate": 3.245058287877212e-05, + "loss": 2.1522, + "step": 7087000 + }, + { + "epoch": 35.11, + "learning_rate": 3.244934429234604e-05, + "loss": 2.1601, + "step": 7087500 + }, + { + "epoch": 35.12, + "learning_rate": 3.244811066026565e-05, + "loss": 2.1818, + "step": 7088000 + }, + { + "epoch": 35.12, + "learning_rate": 3.244687455101243e-05, + "loss": 2.198, + "step": 7088500 + }, + { + "epoch": 35.12, + "learning_rate": 3.2445635964586344e-05, + "loss": 2.1568, + "step": 7089000 + }, + { + "epoch": 35.12, + "learning_rate": 3.244439737816026e-05, + "loss": 2.1736, + "step": 7089500 + }, + { + "epoch": 35.13, + "learning_rate": 3.244315879173417e-05, + "loss": 2.1725, + "step": 7090000 + }, + { + "epoch": 35.13, + "learning_rate": 3.244192020530809e-05, + "loss": 2.1716, + "step": 7090500 + }, + { + "epoch": 35.13, + "learning_rate": 3.2440681618882005e-05, + "loss": 2.1593, + "step": 7091000 + }, + { + "epoch": 35.13, + "learning_rate": 3.243944303245592e-05, + "loss": 2.1417, + "step": 7091500 + }, + { + "epoch": 35.14, + "learning_rate": 3.243820444602984e-05, + "loss": 2.1594, + "step": 7092000 + }, + { + "epoch": 35.14, + "learning_rate": 3.243696585960375e-05, + "loss": 2.1771, + "step": 7092500 + }, + { + "epoch": 35.14, + "learning_rate": 3.2435727273177665e-05, + "loss": 2.1621, + "step": 7093000 + }, + { + "epoch": 35.14, + "learning_rate": 3.243448868675158e-05, + "loss": 2.1602, + "step": 7093500 + }, + { + "epoch": 35.15, + "learning_rate": 3.24332501003255e-05, + "loss": 2.1644, + "step": 7094000 + }, + { + "epoch": 35.15, + "learning_rate": 3.2432011513899416e-05, + "loss": 2.1716, + "step": 7094500 + }, + { + "epoch": 35.15, + "learning_rate": 3.243077292747333e-05, + "loss": 2.1893, + "step": 7095000 + }, + { + "epoch": 35.15, + "learning_rate": 3.242953434104725e-05, + "loss": 2.1837, + "step": 7095500 + }, + { + "epoch": 35.16, + "learning_rate": 3.242829575462117e-05, + "loss": 2.1803, + "step": 7096000 + }, + { + "epoch": 35.16, + "learning_rate": 3.2427059645367936e-05, + "loss": 2.1727, + "step": 7096500 + }, + { + "epoch": 35.16, + "learning_rate": 3.242582105894185e-05, + "loss": 2.1676, + "step": 7097000 + }, + { + "epoch": 35.16, + "learning_rate": 3.242458247251577e-05, + "loss": 2.1982, + "step": 7097500 + }, + { + "epoch": 35.17, + "learning_rate": 3.242334388608969e-05, + "loss": 2.203, + "step": 7098000 + }, + { + "epoch": 35.17, + "learning_rate": 3.2422105299663604e-05, + "loss": 2.1756, + "step": 7098500 + }, + { + "epoch": 35.17, + "learning_rate": 3.2420869190410366e-05, + "loss": 2.1352, + "step": 7099000 + }, + { + "epoch": 35.17, + "learning_rate": 3.241963060398428e-05, + "loss": 2.1828, + "step": 7099500 + }, + { + "epoch": 35.18, + "learning_rate": 3.24183920175582e-05, + "loss": 2.1801, + "step": 7100000 + }, + { + "epoch": 35.18, + "learning_rate": 3.2417153431132116e-05, + "loss": 2.1608, + "step": 7100500 + }, + { + "epoch": 35.18, + "learning_rate": 3.241591484470603e-05, + "loss": 2.1727, + "step": 7101000 + }, + { + "epoch": 35.18, + "learning_rate": 3.241467625827995e-05, + "loss": 2.1829, + "step": 7101500 + }, + { + "epoch": 35.19, + "learning_rate": 3.241343767185387e-05, + "loss": 2.1832, + "step": 7102000 + }, + { + "epoch": 35.19, + "learning_rate": 3.2412199085427784e-05, + "loss": 2.1647, + "step": 7102500 + }, + { + "epoch": 35.19, + "learning_rate": 3.241096297617455e-05, + "loss": 2.153, + "step": 7103000 + }, + { + "epoch": 35.19, + "learning_rate": 3.240972438974847e-05, + "loss": 2.164, + "step": 7103500 + }, + { + "epoch": 35.2, + "learning_rate": 3.240848580332239e-05, + "loss": 2.1728, + "step": 7104000 + }, + { + "epoch": 35.2, + "learning_rate": 3.2407247216896304e-05, + "loss": 2.1788, + "step": 7104500 + }, + { + "epoch": 35.2, + "learning_rate": 3.240600863047022e-05, + "loss": 2.1901, + "step": 7105000 + }, + { + "epoch": 35.2, + "learning_rate": 3.240477004404414e-05, + "loss": 2.1619, + "step": 7105500 + }, + { + "epoch": 35.21, + "learning_rate": 3.2403531457618054e-05, + "loss": 2.1746, + "step": 7106000 + }, + { + "epoch": 35.21, + "learning_rate": 3.240229287119197e-05, + "loss": 2.19, + "step": 7106500 + }, + { + "epoch": 35.21, + "learning_rate": 3.240105428476589e-05, + "loss": 2.1823, + "step": 7107000 + }, + { + "epoch": 35.21, + "learning_rate": 3.239981817551265e-05, + "loss": 2.1837, + "step": 7107500 + }, + { + "epoch": 35.22, + "learning_rate": 3.239857958908657e-05, + "loss": 2.1808, + "step": 7108000 + }, + { + "epoch": 35.22, + "learning_rate": 3.2397341002660484e-05, + "loss": 2.1844, + "step": 7108500 + }, + { + "epoch": 35.22, + "learning_rate": 3.23961024162344e-05, + "loss": 2.1701, + "step": 7109000 + }, + { + "epoch": 35.22, + "learning_rate": 3.239486382980832e-05, + "loss": 2.1671, + "step": 7109500 + }, + { + "epoch": 35.23, + "learning_rate": 3.2393625243382235e-05, + "loss": 2.2084, + "step": 7110000 + }, + { + "epoch": 35.23, + "learning_rate": 3.2392389134129004e-05, + "loss": 2.1831, + "step": 7110500 + }, + { + "epoch": 35.23, + "learning_rate": 3.239115054770292e-05, + "loss": 2.1894, + "step": 7111000 + }, + { + "epoch": 35.23, + "learning_rate": 3.238991196127684e-05, + "loss": 2.1976, + "step": 7111500 + }, + { + "epoch": 35.24, + "learning_rate": 3.2388673374850755e-05, + "loss": 2.1777, + "step": 7112000 + }, + { + "epoch": 35.24, + "learning_rate": 3.238743726559752e-05, + "loss": 2.1676, + "step": 7112500 + }, + { + "epoch": 35.24, + "learning_rate": 3.2386198679171434e-05, + "loss": 2.1834, + "step": 7113000 + }, + { + "epoch": 35.24, + "learning_rate": 3.238496504709106e-05, + "loss": 2.1865, + "step": 7113500 + }, + { + "epoch": 35.25, + "learning_rate": 3.238372646066498e-05, + "loss": 2.1847, + "step": 7114000 + }, + { + "epoch": 35.25, + "learning_rate": 3.238249035141174e-05, + "loss": 2.1596, + "step": 7114500 + }, + { + "epoch": 35.25, + "learning_rate": 3.238125176498566e-05, + "loss": 2.1785, + "step": 7115000 + }, + { + "epoch": 35.25, + "learning_rate": 3.2380013178559574e-05, + "loss": 2.1876, + "step": 7115500 + }, + { + "epoch": 35.26, + "learning_rate": 3.237877459213349e-05, + "loss": 2.1801, + "step": 7116000 + }, + { + "epoch": 35.26, + "learning_rate": 3.237753600570741e-05, + "loss": 2.1576, + "step": 7116500 + }, + { + "epoch": 35.26, + "learning_rate": 3.2376297419281325e-05, + "loss": 2.1708, + "step": 7117000 + }, + { + "epoch": 35.26, + "learning_rate": 3.237505883285524e-05, + "loss": 2.188, + "step": 7117500 + }, + { + "epoch": 35.27, + "learning_rate": 3.237382024642916e-05, + "loss": 2.1749, + "step": 7118000 + }, + { + "epoch": 35.27, + "learning_rate": 3.237258166000307e-05, + "loss": 2.1658, + "step": 7118500 + }, + { + "epoch": 35.27, + "learning_rate": 3.2371345550749844e-05, + "loss": 2.1957, + "step": 7119000 + }, + { + "epoch": 35.27, + "learning_rate": 3.237010696432376e-05, + "loss": 2.1925, + "step": 7119500 + }, + { + "epoch": 35.27, + "learning_rate": 3.236886837789768e-05, + "loss": 2.1954, + "step": 7120000 + }, + { + "epoch": 35.28, + "learning_rate": 3.2367629791471595e-05, + "loss": 2.171, + "step": 7120500 + }, + { + "epoch": 35.28, + "learning_rate": 3.236639120504551e-05, + "loss": 2.1574, + "step": 7121000 + }, + { + "epoch": 35.28, + "learning_rate": 3.236515261861942e-05, + "loss": 2.1797, + "step": 7121500 + }, + { + "epoch": 35.28, + "learning_rate": 3.236391403219334e-05, + "loss": 2.1708, + "step": 7122000 + }, + { + "epoch": 35.29, + "learning_rate": 3.2362675445767256e-05, + "loss": 2.1776, + "step": 7122500 + }, + { + "epoch": 35.29, + "learning_rate": 3.236143685934117e-05, + "loss": 2.1824, + "step": 7123000 + }, + { + "epoch": 35.29, + "learning_rate": 3.236019827291509e-05, + "loss": 2.1585, + "step": 7123500 + }, + { + "epoch": 35.29, + "learning_rate": 3.235895968648901e-05, + "loss": 2.1665, + "step": 7124000 + }, + { + "epoch": 35.3, + "learning_rate": 3.2357721100062924e-05, + "loss": 2.162, + "step": 7124500 + }, + { + "epoch": 35.3, + "learning_rate": 3.235648251363684e-05, + "loss": 2.1863, + "step": 7125000 + }, + { + "epoch": 35.3, + "learning_rate": 3.235524392721076e-05, + "loss": 2.1742, + "step": 7125500 + }, + { + "epoch": 35.3, + "learning_rate": 3.2354005340784674e-05, + "loss": 2.1806, + "step": 7126000 + }, + { + "epoch": 35.31, + "learning_rate": 3.2352766754358585e-05, + "loss": 2.1616, + "step": 7126500 + }, + { + "epoch": 35.31, + "learning_rate": 3.23515281679325e-05, + "loss": 2.1634, + "step": 7127000 + }, + { + "epoch": 35.31, + "learning_rate": 3.235029453585213e-05, + "loss": 2.1948, + "step": 7127500 + }, + { + "epoch": 35.31, + "learning_rate": 3.234905594942604e-05, + "loss": 2.1992, + "step": 7128000 + }, + { + "epoch": 35.32, + "learning_rate": 3.2347817362999956e-05, + "loss": 2.1518, + "step": 7128500 + }, + { + "epoch": 35.32, + "learning_rate": 3.234657877657387e-05, + "loss": 2.1799, + "step": 7129000 + }, + { + "epoch": 35.32, + "learning_rate": 3.234534266732064e-05, + "loss": 2.1858, + "step": 7129500 + }, + { + "epoch": 35.32, + "learning_rate": 3.234410408089456e-05, + "loss": 2.1683, + "step": 7130000 + }, + { + "epoch": 35.33, + "learning_rate": 3.2342865494468476e-05, + "loss": 2.1536, + "step": 7130500 + }, + { + "epoch": 35.33, + "learning_rate": 3.2341626908042386e-05, + "loss": 2.1983, + "step": 7131000 + }, + { + "epoch": 35.33, + "learning_rate": 3.23403883216163e-05, + "loss": 2.1816, + "step": 7131500 + }, + { + "epoch": 35.33, + "learning_rate": 3.233914973519022e-05, + "loss": 2.1769, + "step": 7132000 + }, + { + "epoch": 35.34, + "learning_rate": 3.2337911148764137e-05, + "loss": 2.1931, + "step": 7132500 + }, + { + "epoch": 35.34, + "learning_rate": 3.2336672562338054e-05, + "loss": 2.1675, + "step": 7133000 + }, + { + "epoch": 35.34, + "learning_rate": 3.233543397591197e-05, + "loss": 2.1926, + "step": 7133500 + }, + { + "epoch": 35.34, + "learning_rate": 3.233419538948589e-05, + "loss": 2.1825, + "step": 7134000 + }, + { + "epoch": 35.35, + "learning_rate": 3.2332959280232656e-05, + "loss": 2.196, + "step": 7134500 + }, + { + "epoch": 35.35, + "learning_rate": 3.2331723170979425e-05, + "loss": 2.173, + "step": 7135000 + }, + { + "epoch": 35.35, + "learning_rate": 3.2330487061726194e-05, + "loss": 2.1661, + "step": 7135500 + }, + { + "epoch": 35.35, + "learning_rate": 3.232924847530011e-05, + "loss": 2.1848, + "step": 7136000 + }, + { + "epoch": 35.36, + "learning_rate": 3.232800988887403e-05, + "loss": 2.1861, + "step": 7136500 + }, + { + "epoch": 35.36, + "learning_rate": 3.2326771302447945e-05, + "loss": 2.1877, + "step": 7137000 + }, + { + "epoch": 35.36, + "learning_rate": 3.232553271602186e-05, + "loss": 2.2125, + "step": 7137500 + }, + { + "epoch": 35.36, + "learning_rate": 3.232429412959578e-05, + "loss": 2.1783, + "step": 7138000 + }, + { + "epoch": 35.37, + "learning_rate": 3.232305802034255e-05, + "loss": 2.1706, + "step": 7138500 + }, + { + "epoch": 35.37, + "learning_rate": 3.2321819433916464e-05, + "loss": 2.1612, + "step": 7139000 + }, + { + "epoch": 35.37, + "learning_rate": 3.232058084749038e-05, + "loss": 2.1701, + "step": 7139500 + }, + { + "epoch": 35.37, + "learning_rate": 3.23193422610643e-05, + "loss": 2.1693, + "step": 7140000 + }, + { + "epoch": 35.38, + "learning_rate": 3.231810615181106e-05, + "loss": 2.1877, + "step": 7140500 + }, + { + "epoch": 35.38, + "learning_rate": 3.231686756538498e-05, + "loss": 2.1918, + "step": 7141000 + }, + { + "epoch": 35.38, + "learning_rate": 3.2315628978958894e-05, + "loss": 2.1736, + "step": 7141500 + }, + { + "epoch": 35.38, + "learning_rate": 3.231439039253281e-05, + "loss": 2.1831, + "step": 7142000 + }, + { + "epoch": 35.39, + "learning_rate": 3.231315180610673e-05, + "loss": 2.1866, + "step": 7142500 + }, + { + "epoch": 35.39, + "learning_rate": 3.2311913219680645e-05, + "loss": 2.2025, + "step": 7143000 + }, + { + "epoch": 35.39, + "learning_rate": 3.231067463325456e-05, + "loss": 2.1617, + "step": 7143500 + }, + { + "epoch": 35.39, + "learning_rate": 3.230943604682848e-05, + "loss": 2.2045, + "step": 7144000 + }, + { + "epoch": 35.4, + "learning_rate": 3.2308197460402395e-05, + "loss": 2.1891, + "step": 7144500 + }, + { + "epoch": 35.4, + "learning_rate": 3.230695887397631e-05, + "loss": 2.1938, + "step": 7145000 + }, + { + "epoch": 35.4, + "learning_rate": 3.230572028755023e-05, + "loss": 2.1791, + "step": 7145500 + }, + { + "epoch": 35.4, + "learning_rate": 3.2304484178297e-05, + "loss": 2.1913, + "step": 7146000 + }, + { + "epoch": 35.41, + "learning_rate": 3.2303245591870915e-05, + "loss": 2.1784, + "step": 7146500 + }, + { + "epoch": 35.41, + "learning_rate": 3.230200700544483e-05, + "loss": 2.1682, + "step": 7147000 + }, + { + "epoch": 35.41, + "learning_rate": 3.230076841901874e-05, + "loss": 2.1989, + "step": 7147500 + }, + { + "epoch": 35.41, + "learning_rate": 3.229952983259266e-05, + "loss": 2.173, + "step": 7148000 + }, + { + "epoch": 35.42, + "learning_rate": 3.229829372333943e-05, + "loss": 2.1915, + "step": 7148500 + }, + { + "epoch": 35.42, + "learning_rate": 3.2297055136913345e-05, + "loss": 2.1816, + "step": 7149000 + }, + { + "epoch": 35.42, + "learning_rate": 3.229581655048726e-05, + "loss": 2.1887, + "step": 7149500 + }, + { + "epoch": 35.42, + "learning_rate": 3.229458044123403e-05, + "loss": 2.1812, + "step": 7150000 + }, + { + "epoch": 35.43, + "learning_rate": 3.229334185480795e-05, + "loss": 2.1866, + "step": 7150500 + }, + { + "epoch": 35.43, + "learning_rate": 3.2292103268381864e-05, + "loss": 2.1714, + "step": 7151000 + }, + { + "epoch": 35.43, + "learning_rate": 3.229086468195578e-05, + "loss": 2.1648, + "step": 7151500 + }, + { + "epoch": 35.43, + "learning_rate": 3.22896260955297e-05, + "loss": 2.1689, + "step": 7152000 + }, + { + "epoch": 35.44, + "learning_rate": 3.2288387509103615e-05, + "loss": 2.1751, + "step": 7152500 + }, + { + "epoch": 35.44, + "learning_rate": 3.228715139985038e-05, + "loss": 2.1721, + "step": 7153000 + }, + { + "epoch": 35.44, + "learning_rate": 3.2285912813424294e-05, + "loss": 2.1756, + "step": 7153500 + }, + { + "epoch": 35.44, + "learning_rate": 3.228467422699821e-05, + "loss": 2.1678, + "step": 7154000 + }, + { + "epoch": 35.45, + "learning_rate": 3.228343564057213e-05, + "loss": 2.2027, + "step": 7154500 + }, + { + "epoch": 35.45, + "learning_rate": 3.22821995313189e-05, + "loss": 2.2017, + "step": 7155000 + }, + { + "epoch": 35.45, + "learning_rate": 3.2280960944892814e-05, + "loss": 2.1792, + "step": 7155500 + }, + { + "epoch": 35.45, + "learning_rate": 3.227972483563958e-05, + "loss": 2.203, + "step": 7156000 + }, + { + "epoch": 35.46, + "learning_rate": 3.22784862492135e-05, + "loss": 2.174, + "step": 7156500 + }, + { + "epoch": 35.46, + "learning_rate": 3.2277247662787416e-05, + "loss": 2.1771, + "step": 7157000 + }, + { + "epoch": 35.46, + "learning_rate": 3.2276009076361327e-05, + "loss": 2.2021, + "step": 7157500 + }, + { + "epoch": 35.46, + "learning_rate": 3.2274770489935244e-05, + "loss": 2.1874, + "step": 7158000 + }, + { + "epoch": 35.47, + "learning_rate": 3.227353190350916e-05, + "loss": 2.1839, + "step": 7158500 + }, + { + "epoch": 35.47, + "learning_rate": 3.227229331708308e-05, + "loss": 2.1788, + "step": 7159000 + }, + { + "epoch": 35.47, + "learning_rate": 3.2271054730656994e-05, + "loss": 2.1972, + "step": 7159500 + }, + { + "epoch": 35.47, + "learning_rate": 3.226981614423091e-05, + "loss": 2.1936, + "step": 7160000 + }, + { + "epoch": 35.48, + "learning_rate": 3.226857755780483e-05, + "loss": 2.1646, + "step": 7160500 + }, + { + "epoch": 35.48, + "learning_rate": 3.2267338971378745e-05, + "loss": 2.1965, + "step": 7161000 + }, + { + "epoch": 35.48, + "learning_rate": 3.226610038495266e-05, + "loss": 2.1918, + "step": 7161500 + }, + { + "epoch": 35.48, + "learning_rate": 3.226486179852658e-05, + "loss": 2.2044, + "step": 7162000 + }, + { + "epoch": 35.49, + "learning_rate": 3.2263623212100496e-05, + "loss": 2.1982, + "step": 7162500 + }, + { + "epoch": 35.49, + "learning_rate": 3.2262387102847265e-05, + "loss": 2.1941, + "step": 7163000 + }, + { + "epoch": 35.49, + "learning_rate": 3.226114851642118e-05, + "loss": 2.1976, + "step": 7163500 + }, + { + "epoch": 35.49, + "learning_rate": 3.22599099299951e-05, + "loss": 2.1736, + "step": 7164000 + }, + { + "epoch": 35.5, + "learning_rate": 3.2258671343569015e-05, + "loss": 2.1829, + "step": 7164500 + }, + { + "epoch": 35.5, + "learning_rate": 3.225743275714293e-05, + "loss": 2.1834, + "step": 7165000 + }, + { + "epoch": 35.5, + "learning_rate": 3.225619417071685e-05, + "loss": 2.1704, + "step": 7165500 + }, + { + "epoch": 35.5, + "learning_rate": 3.2254955584290766e-05, + "loss": 2.1748, + "step": 7166000 + }, + { + "epoch": 35.51, + "learning_rate": 3.225371699786468e-05, + "loss": 2.1674, + "step": 7166500 + }, + { + "epoch": 35.51, + "learning_rate": 3.22524784114386e-05, + "loss": 2.2049, + "step": 7167000 + }, + { + "epoch": 35.51, + "learning_rate": 3.225123982501251e-05, + "loss": 2.1529, + "step": 7167500 + }, + { + "epoch": 35.51, + "learning_rate": 3.225000619293213e-05, + "loss": 2.1924, + "step": 7168000 + }, + { + "epoch": 35.52, + "learning_rate": 3.224876760650605e-05, + "loss": 2.1786, + "step": 7168500 + }, + { + "epoch": 35.52, + "learning_rate": 3.2247529020079965e-05, + "loss": 2.1862, + "step": 7169000 + }, + { + "epoch": 35.52, + "learning_rate": 3.224629043365388e-05, + "loss": 2.1665, + "step": 7169500 + }, + { + "epoch": 35.52, + "learning_rate": 3.22450518472278e-05, + "loss": 2.1811, + "step": 7170000 + }, + { + "epoch": 35.53, + "learning_rate": 3.224381573797457e-05, + "loss": 2.1747, + "step": 7170500 + }, + { + "epoch": 35.53, + "learning_rate": 3.224257715154848e-05, + "loss": 2.2019, + "step": 7171000 + }, + { + "epoch": 35.53, + "learning_rate": 3.2241338565122395e-05, + "loss": 2.2123, + "step": 7171500 + }, + { + "epoch": 35.53, + "learning_rate": 3.224009997869631e-05, + "loss": 2.1879, + "step": 7172000 + }, + { + "epoch": 35.54, + "learning_rate": 3.223886386944308e-05, + "loss": 2.2036, + "step": 7172500 + }, + { + "epoch": 35.54, + "learning_rate": 3.2237625283017e-05, + "loss": 2.1967, + "step": 7173000 + }, + { + "epoch": 35.54, + "learning_rate": 3.2236386696590914e-05, + "loss": 2.191, + "step": 7173500 + }, + { + "epoch": 35.54, + "learning_rate": 3.223514811016483e-05, + "loss": 2.1963, + "step": 7174000 + }, + { + "epoch": 35.54, + "learning_rate": 3.223390952373875e-05, + "loss": 2.1806, + "step": 7174500 + }, + { + "epoch": 35.55, + "learning_rate": 3.2232670937312665e-05, + "loss": 2.202, + "step": 7175000 + }, + { + "epoch": 35.55, + "learning_rate": 3.2231434828059434e-05, + "loss": 2.1755, + "step": 7175500 + }, + { + "epoch": 35.55, + "learning_rate": 3.223019624163335e-05, + "loss": 2.1722, + "step": 7176000 + }, + { + "epoch": 35.55, + "learning_rate": 3.222895765520727e-05, + "loss": 2.1985, + "step": 7176500 + }, + { + "epoch": 35.56, + "learning_rate": 3.2227719068781185e-05, + "loss": 2.1716, + "step": 7177000 + }, + { + "epoch": 35.56, + "learning_rate": 3.22264804823551e-05, + "loss": 2.1865, + "step": 7177500 + }, + { + "epoch": 35.56, + "learning_rate": 3.222524189592901e-05, + "loss": 2.1747, + "step": 7178000 + }, + { + "epoch": 35.56, + "learning_rate": 3.222400578667578e-05, + "loss": 2.1841, + "step": 7178500 + }, + { + "epoch": 35.57, + "learning_rate": 3.22227672002497e-05, + "loss": 2.2028, + "step": 7179000 + }, + { + "epoch": 35.57, + "learning_rate": 3.2221528613823614e-05, + "loss": 2.1779, + "step": 7179500 + }, + { + "epoch": 35.57, + "learning_rate": 3.222029002739753e-05, + "loss": 2.2021, + "step": 7180000 + }, + { + "epoch": 35.57, + "learning_rate": 3.221905391814431e-05, + "loss": 2.1877, + "step": 7180500 + }, + { + "epoch": 35.58, + "learning_rate": 3.2217815331718224e-05, + "loss": 2.192, + "step": 7181000 + }, + { + "epoch": 35.58, + "learning_rate": 3.2216576745292134e-05, + "loss": 2.1805, + "step": 7181500 + }, + { + "epoch": 35.58, + "learning_rate": 3.221533815886605e-05, + "loss": 2.2088, + "step": 7182000 + }, + { + "epoch": 35.58, + "learning_rate": 3.221409957243997e-05, + "loss": 2.1966, + "step": 7182500 + }, + { + "epoch": 35.59, + "learning_rate": 3.2212860986013885e-05, + "loss": 2.1925, + "step": 7183000 + }, + { + "epoch": 35.59, + "learning_rate": 3.22116223995878e-05, + "loss": 2.1957, + "step": 7183500 + }, + { + "epoch": 35.59, + "learning_rate": 3.221038629033457e-05, + "loss": 2.1982, + "step": 7184000 + }, + { + "epoch": 35.59, + "learning_rate": 3.220914770390848e-05, + "loss": 2.2168, + "step": 7184500 + }, + { + "epoch": 35.6, + "learning_rate": 3.22079091174824e-05, + "loss": 2.1911, + "step": 7185000 + }, + { + "epoch": 35.6, + "learning_rate": 3.2206670531056314e-05, + "loss": 2.1777, + "step": 7185500 + }, + { + "epoch": 35.6, + "learning_rate": 3.220543194463023e-05, + "loss": 2.1778, + "step": 7186000 + }, + { + "epoch": 35.6, + "learning_rate": 3.220419335820415e-05, + "loss": 2.1783, + "step": 7186500 + }, + { + "epoch": 35.61, + "learning_rate": 3.2202954771778065e-05, + "loss": 2.2068, + "step": 7187000 + }, + { + "epoch": 35.61, + "learning_rate": 3.220171618535198e-05, + "loss": 2.2157, + "step": 7187500 + }, + { + "epoch": 35.61, + "learning_rate": 3.22004775989259e-05, + "loss": 2.1735, + "step": 7188000 + }, + { + "epoch": 35.61, + "learning_rate": 3.219924148967267e-05, + "loss": 2.166, + "step": 7188500 + }, + { + "epoch": 35.62, + "learning_rate": 3.2198002903246585e-05, + "loss": 2.195, + "step": 7189000 + }, + { + "epoch": 35.62, + "learning_rate": 3.2196766793993354e-05, + "loss": 2.2019, + "step": 7189500 + }, + { + "epoch": 35.62, + "learning_rate": 3.219552820756727e-05, + "loss": 2.1698, + "step": 7190000 + }, + { + "epoch": 35.62, + "learning_rate": 3.219428962114119e-05, + "loss": 2.1708, + "step": 7190500 + }, + { + "epoch": 35.63, + "learning_rate": 3.21930510347151e-05, + "loss": 2.198, + "step": 7191000 + }, + { + "epoch": 35.63, + "learning_rate": 3.2191812448289015e-05, + "loss": 2.1899, + "step": 7191500 + }, + { + "epoch": 35.63, + "learning_rate": 3.219057633903579e-05, + "loss": 2.188, + "step": 7192000 + }, + { + "epoch": 35.63, + "learning_rate": 3.218934022978255e-05, + "loss": 2.2009, + "step": 7192500 + }, + { + "epoch": 35.64, + "learning_rate": 3.218810164335647e-05, + "loss": 2.189, + "step": 7193000 + }, + { + "epoch": 35.64, + "learning_rate": 3.2186863056930386e-05, + "loss": 2.1848, + "step": 7193500 + }, + { + "epoch": 35.64, + "learning_rate": 3.21856244705043e-05, + "loss": 2.2159, + "step": 7194000 + }, + { + "epoch": 35.64, + "learning_rate": 3.218438588407822e-05, + "loss": 2.1881, + "step": 7194500 + }, + { + "epoch": 35.65, + "learning_rate": 3.218314729765214e-05, + "loss": 2.1793, + "step": 7195000 + }, + { + "epoch": 35.65, + "learning_rate": 3.2181908711226054e-05, + "loss": 2.1856, + "step": 7195500 + }, + { + "epoch": 35.65, + "learning_rate": 3.218067012479997e-05, + "loss": 2.1974, + "step": 7196000 + }, + { + "epoch": 35.65, + "learning_rate": 3.217943153837389e-05, + "loss": 2.1806, + "step": 7196500 + }, + { + "epoch": 35.66, + "learning_rate": 3.21781929519478e-05, + "loss": 2.1729, + "step": 7197000 + }, + { + "epoch": 35.66, + "learning_rate": 3.2176954365521715e-05, + "loss": 2.1895, + "step": 7197500 + }, + { + "epoch": 35.66, + "learning_rate": 3.217571577909563e-05, + "loss": 2.2215, + "step": 7198000 + }, + { + "epoch": 35.66, + "learning_rate": 3.217447719266955e-05, + "loss": 2.1851, + "step": 7198500 + }, + { + "epoch": 35.67, + "learning_rate": 3.2173238606243465e-05, + "loss": 2.1652, + "step": 7199000 + }, + { + "epoch": 35.67, + "learning_rate": 3.217200001981738e-05, + "loss": 2.192, + "step": 7199500 + }, + { + "epoch": 35.67, + "learning_rate": 3.21707614333913e-05, + "loss": 2.2075, + "step": 7200000 + }, + { + "epoch": 35.67, + "learning_rate": 3.2169522846965216e-05, + "loss": 2.1748, + "step": 7200500 + }, + { + "epoch": 35.68, + "learning_rate": 3.2168286737711985e-05, + "loss": 2.1801, + "step": 7201000 + }, + { + "epoch": 35.68, + "learning_rate": 3.21670481512859e-05, + "loss": 2.1767, + "step": 7201500 + }, + { + "epoch": 35.68, + "learning_rate": 3.216580956485982e-05, + "loss": 2.1897, + "step": 7202000 + }, + { + "epoch": 35.68, + "learning_rate": 3.2164570978433736e-05, + "loss": 2.2161, + "step": 7202500 + }, + { + "epoch": 35.69, + "learning_rate": 3.216333239200765e-05, + "loss": 2.2114, + "step": 7203000 + }, + { + "epoch": 35.69, + "learning_rate": 3.2162096282754415e-05, + "loss": 2.2018, + "step": 7203500 + }, + { + "epoch": 35.69, + "learning_rate": 3.216086017350119e-05, + "loss": 2.1898, + "step": 7204000 + }, + { + "epoch": 35.69, + "learning_rate": 3.215962158707511e-05, + "loss": 2.2153, + "step": 7204500 + }, + { + "epoch": 35.7, + "learning_rate": 3.2158385477821876e-05, + "loss": 2.1962, + "step": 7205000 + }, + { + "epoch": 35.7, + "learning_rate": 3.2157146891395786e-05, + "loss": 2.2066, + "step": 7205500 + }, + { + "epoch": 35.7, + "learning_rate": 3.21559083049697e-05, + "loss": 2.1808, + "step": 7206000 + }, + { + "epoch": 35.7, + "learning_rate": 3.215466971854362e-05, + "loss": 2.1784, + "step": 7206500 + }, + { + "epoch": 35.71, + "learning_rate": 3.215343113211754e-05, + "loss": 2.1879, + "step": 7207000 + }, + { + "epoch": 35.71, + "learning_rate": 3.2152192545691454e-05, + "loss": 2.2027, + "step": 7207500 + }, + { + "epoch": 35.71, + "learning_rate": 3.215095395926537e-05, + "loss": 2.149, + "step": 7208000 + }, + { + "epoch": 35.71, + "learning_rate": 3.214971537283929e-05, + "loss": 2.1831, + "step": 7208500 + }, + { + "epoch": 35.72, + "learning_rate": 3.2148476786413205e-05, + "loss": 2.216, + "step": 7209000 + }, + { + "epoch": 35.72, + "learning_rate": 3.2147238199987115e-05, + "loss": 2.216, + "step": 7209500 + }, + { + "epoch": 35.72, + "learning_rate": 3.214600209073389e-05, + "loss": 2.1782, + "step": 7210000 + }, + { + "epoch": 35.72, + "learning_rate": 3.214476350430781e-05, + "loss": 2.1767, + "step": 7210500 + }, + { + "epoch": 35.73, + "learning_rate": 3.2143524917881724e-05, + "loss": 2.1726, + "step": 7211000 + }, + { + "epoch": 35.73, + "learning_rate": 3.214228633145564e-05, + "loss": 2.1896, + "step": 7211500 + }, + { + "epoch": 35.73, + "learning_rate": 3.214104774502956e-05, + "loss": 2.1825, + "step": 7212000 + }, + { + "epoch": 35.73, + "learning_rate": 3.2139809158603475e-05, + "loss": 2.1887, + "step": 7212500 + }, + { + "epoch": 35.74, + "learning_rate": 3.2138570572177385e-05, + "loss": 2.1715, + "step": 7213000 + }, + { + "epoch": 35.74, + "learning_rate": 3.21373319857513e-05, + "loss": 2.187, + "step": 7213500 + }, + { + "epoch": 35.74, + "learning_rate": 3.213609339932522e-05, + "loss": 2.1838, + "step": 7214000 + }, + { + "epoch": 35.74, + "learning_rate": 3.213485729007199e-05, + "loss": 2.194, + "step": 7214500 + }, + { + "epoch": 35.75, + "learning_rate": 3.2133618703645905e-05, + "loss": 2.1984, + "step": 7215000 + }, + { + "epoch": 35.75, + "learning_rate": 3.213238011721982e-05, + "loss": 2.2033, + "step": 7215500 + }, + { + "epoch": 35.75, + "learning_rate": 3.213114153079373e-05, + "loss": 2.1934, + "step": 7216000 + }, + { + "epoch": 35.75, + "learning_rate": 3.212990294436765e-05, + "loss": 2.2022, + "step": 7216500 + }, + { + "epoch": 35.76, + "learning_rate": 3.2128664357941566e-05, + "loss": 2.185, + "step": 7217000 + }, + { + "epoch": 35.76, + "learning_rate": 3.212742577151548e-05, + "loss": 2.1806, + "step": 7217500 + }, + { + "epoch": 35.76, + "learning_rate": 3.212618966226226e-05, + "loss": 2.1936, + "step": 7218000 + }, + { + "epoch": 35.76, + "learning_rate": 3.2124951075836175e-05, + "loss": 2.163, + "step": 7218500 + }, + { + "epoch": 35.77, + "learning_rate": 3.2123712489410085e-05, + "loss": 2.1739, + "step": 7219000 + }, + { + "epoch": 35.77, + "learning_rate": 3.2122473902984e-05, + "loss": 2.1609, + "step": 7219500 + }, + { + "epoch": 35.77, + "learning_rate": 3.212123531655792e-05, + "loss": 2.1824, + "step": 7220000 + }, + { + "epoch": 35.77, + "learning_rate": 3.2119996730131836e-05, + "loss": 2.1913, + "step": 7220500 + }, + { + "epoch": 35.78, + "learning_rate": 3.211875814370575e-05, + "loss": 2.2127, + "step": 7221000 + }, + { + "epoch": 35.78, + "learning_rate": 3.211751955727967e-05, + "loss": 2.1843, + "step": 7221500 + }, + { + "epoch": 35.78, + "learning_rate": 3.211628097085359e-05, + "loss": 2.1812, + "step": 7222000 + }, + { + "epoch": 35.78, + "learning_rate": 3.211504486160035e-05, + "loss": 2.1801, + "step": 7222500 + }, + { + "epoch": 35.79, + "learning_rate": 3.2113806275174266e-05, + "loss": 2.1778, + "step": 7223000 + }, + { + "epoch": 35.79, + "learning_rate": 3.211256768874818e-05, + "loss": 2.1982, + "step": 7223500 + }, + { + "epoch": 35.79, + "learning_rate": 3.21113291023221e-05, + "loss": 2.2053, + "step": 7224000 + }, + { + "epoch": 35.79, + "learning_rate": 3.211009051589602e-05, + "loss": 2.1966, + "step": 7224500 + }, + { + "epoch": 35.8, + "learning_rate": 3.2108851929469934e-05, + "loss": 2.1682, + "step": 7225000 + }, + { + "epoch": 35.8, + "learning_rate": 3.210761334304385e-05, + "loss": 2.2173, + "step": 7225500 + }, + { + "epoch": 35.8, + "learning_rate": 3.210637475661777e-05, + "loss": 2.1867, + "step": 7226000 + }, + { + "epoch": 35.8, + "learning_rate": 3.2105136170191684e-05, + "loss": 2.1976, + "step": 7226500 + }, + { + "epoch": 35.81, + "learning_rate": 3.210390006093845e-05, + "loss": 2.185, + "step": 7227000 + }, + { + "epoch": 35.81, + "learning_rate": 3.210266147451237e-05, + "loss": 2.1922, + "step": 7227500 + }, + { + "epoch": 35.81, + "learning_rate": 3.210142536525914e-05, + "loss": 2.1783, + "step": 7228000 + }, + { + "epoch": 35.81, + "learning_rate": 3.210018677883305e-05, + "loss": 2.1788, + "step": 7228500 + }, + { + "epoch": 35.81, + "learning_rate": 3.2098950669579825e-05, + "loss": 2.2044, + "step": 7229000 + }, + { + "epoch": 35.82, + "learning_rate": 3.209771208315374e-05, + "loss": 2.1966, + "step": 7229500 + }, + { + "epoch": 35.82, + "learning_rate": 3.209647349672766e-05, + "loss": 2.2111, + "step": 7230000 + }, + { + "epoch": 35.82, + "learning_rate": 3.2095234910301575e-05, + "loss": 2.1748, + "step": 7230500 + }, + { + "epoch": 35.82, + "learning_rate": 3.209400127822119e-05, + "loss": 2.1646, + "step": 7231000 + }, + { + "epoch": 35.83, + "learning_rate": 3.2092765168967965e-05, + "loss": 2.1985, + "step": 7231500 + }, + { + "epoch": 35.83, + "learning_rate": 3.2091526582541875e-05, + "loss": 2.1675, + "step": 7232000 + }, + { + "epoch": 35.83, + "learning_rate": 3.209028799611579e-05, + "loss": 2.1727, + "step": 7232500 + }, + { + "epoch": 35.83, + "learning_rate": 3.208905188686257e-05, + "loss": 2.1669, + "step": 7233000 + }, + { + "epoch": 35.84, + "learning_rate": 3.208781330043648e-05, + "loss": 2.19, + "step": 7233500 + }, + { + "epoch": 35.84, + "learning_rate": 3.2086574714010395e-05, + "loss": 2.1846, + "step": 7234000 + }, + { + "epoch": 35.84, + "learning_rate": 3.208533612758431e-05, + "loss": 2.2084, + "step": 7234500 + }, + { + "epoch": 35.84, + "learning_rate": 3.208409754115823e-05, + "loss": 2.1889, + "step": 7235000 + }, + { + "epoch": 35.85, + "learning_rate": 3.2082858954732146e-05, + "loss": 2.1709, + "step": 7235500 + }, + { + "epoch": 35.85, + "learning_rate": 3.2081620368306056e-05, + "loss": 2.192, + "step": 7236000 + }, + { + "epoch": 35.85, + "learning_rate": 3.208038425905283e-05, + "loss": 2.1989, + "step": 7236500 + }, + { + "epoch": 35.85, + "learning_rate": 3.207914567262675e-05, + "loss": 2.1778, + "step": 7237000 + }, + { + "epoch": 35.86, + "learning_rate": 3.2077907086200665e-05, + "loss": 2.2039, + "step": 7237500 + }, + { + "epoch": 35.86, + "learning_rate": 3.207666849977458e-05, + "loss": 2.1986, + "step": 7238000 + }, + { + "epoch": 35.86, + "learning_rate": 3.207542991334849e-05, + "loss": 2.1762, + "step": 7238500 + }, + { + "epoch": 35.86, + "learning_rate": 3.207419132692241e-05, + "loss": 2.177, + "step": 7239000 + }, + { + "epoch": 35.87, + "learning_rate": 3.2072952740496326e-05, + "loss": 2.1804, + "step": 7239500 + }, + { + "epoch": 35.87, + "learning_rate": 3.207171415407024e-05, + "loss": 2.194, + "step": 7240000 + }, + { + "epoch": 35.87, + "learning_rate": 3.207047556764416e-05, + "loss": 2.1995, + "step": 7240500 + }, + { + "epoch": 35.87, + "learning_rate": 3.206923698121808e-05, + "loss": 2.227, + "step": 7241000 + }, + { + "epoch": 35.88, + "learning_rate": 3.2067998394791994e-05, + "loss": 2.1887, + "step": 7241500 + }, + { + "epoch": 35.88, + "learning_rate": 3.206675980836591e-05, + "loss": 2.1928, + "step": 7242000 + }, + { + "epoch": 35.88, + "learning_rate": 3.206552122193983e-05, + "loss": 2.1956, + "step": 7242500 + }, + { + "epoch": 35.88, + "learning_rate": 3.206428511268659e-05, + "loss": 2.217, + "step": 7243000 + }, + { + "epoch": 35.89, + "learning_rate": 3.2063046526260507e-05, + "loss": 2.212, + "step": 7243500 + }, + { + "epoch": 35.89, + "learning_rate": 3.206181041700728e-05, + "loss": 2.1715, + "step": 7244000 + }, + { + "epoch": 35.89, + "learning_rate": 3.206057183058119e-05, + "loss": 2.1658, + "step": 7244500 + }, + { + "epoch": 35.89, + "learning_rate": 3.205933324415511e-05, + "loss": 2.1871, + "step": 7245000 + }, + { + "epoch": 35.9, + "learning_rate": 3.2058097134901885e-05, + "loss": 2.183, + "step": 7245500 + }, + { + "epoch": 35.9, + "learning_rate": 3.20568585484758e-05, + "loss": 2.1936, + "step": 7246000 + }, + { + "epoch": 35.9, + "learning_rate": 3.205561996204972e-05, + "loss": 2.1959, + "step": 7246500 + }, + { + "epoch": 35.9, + "learning_rate": 3.205438137562363e-05, + "loss": 2.2226, + "step": 7247000 + }, + { + "epoch": 35.91, + "learning_rate": 3.2053142789197546e-05, + "loss": 2.1854, + "step": 7247500 + }, + { + "epoch": 35.91, + "learning_rate": 3.205190420277146e-05, + "loss": 2.2058, + "step": 7248000 + }, + { + "epoch": 35.91, + "learning_rate": 3.205066561634538e-05, + "loss": 2.1896, + "step": 7248500 + }, + { + "epoch": 35.91, + "learning_rate": 3.2049427029919297e-05, + "loss": 2.1738, + "step": 7249000 + }, + { + "epoch": 35.92, + "learning_rate": 3.204818844349321e-05, + "loss": 2.1808, + "step": 7249500 + }, + { + "epoch": 35.92, + "learning_rate": 3.2046949857067124e-05, + "loss": 2.2039, + "step": 7250000 + }, + { + "epoch": 35.92, + "learning_rate": 3.204571127064104e-05, + "loss": 2.1845, + "step": 7250500 + }, + { + "epoch": 35.92, + "learning_rate": 3.204447268421496e-05, + "loss": 2.203, + "step": 7251000 + }, + { + "epoch": 35.93, + "learning_rate": 3.2043234097788874e-05, + "loss": 2.1995, + "step": 7251500 + }, + { + "epoch": 35.93, + "learning_rate": 3.204199551136279e-05, + "loss": 2.2299, + "step": 7252000 + }, + { + "epoch": 35.93, + "learning_rate": 3.204075692493671e-05, + "loss": 2.2146, + "step": 7252500 + }, + { + "epoch": 35.93, + "learning_rate": 3.2039518338510625e-05, + "loss": 2.2082, + "step": 7253000 + }, + { + "epoch": 35.94, + "learning_rate": 3.2038282229257394e-05, + "loss": 2.1813, + "step": 7253500 + }, + { + "epoch": 35.94, + "learning_rate": 3.203704364283131e-05, + "loss": 2.1665, + "step": 7254000 + }, + { + "epoch": 35.94, + "learning_rate": 3.203580505640523e-05, + "loss": 2.1758, + "step": 7254500 + }, + { + "epoch": 35.94, + "learning_rate": 3.2034566469979145e-05, + "loss": 2.1813, + "step": 7255000 + }, + { + "epoch": 35.95, + "learning_rate": 3.2033330360725914e-05, + "loss": 2.1977, + "step": 7255500 + }, + { + "epoch": 35.95, + "learning_rate": 3.203209177429983e-05, + "loss": 2.1928, + "step": 7256000 + }, + { + "epoch": 35.95, + "learning_rate": 3.203085318787374e-05, + "loss": 2.2045, + "step": 7256500 + }, + { + "epoch": 35.95, + "learning_rate": 3.202961460144766e-05, + "loss": 2.1974, + "step": 7257000 + }, + { + "epoch": 35.96, + "learning_rate": 3.2028378492194426e-05, + "loss": 2.1832, + "step": 7257500 + }, + { + "epoch": 35.96, + "learning_rate": 3.202713990576834e-05, + "loss": 2.1904, + "step": 7258000 + }, + { + "epoch": 35.96, + "learning_rate": 3.202590131934226e-05, + "loss": 2.1817, + "step": 7258500 + }, + { + "epoch": 35.96, + "learning_rate": 3.202466273291618e-05, + "loss": 2.2003, + "step": 7259000 + }, + { + "epoch": 35.97, + "learning_rate": 3.2023424146490094e-05, + "loss": 2.1702, + "step": 7259500 + }, + { + "epoch": 35.97, + "learning_rate": 3.202218556006401e-05, + "loss": 2.1826, + "step": 7260000 + }, + { + "epoch": 35.97, + "learning_rate": 3.202094697363793e-05, + "loss": 2.1929, + "step": 7260500 + }, + { + "epoch": 35.97, + "learning_rate": 3.20197108643847e-05, + "loss": 2.1713, + "step": 7261000 + }, + { + "epoch": 35.98, + "learning_rate": 3.2018472277958614e-05, + "loss": 2.2067, + "step": 7261500 + }, + { + "epoch": 35.98, + "learning_rate": 3.201723369153253e-05, + "loss": 2.1851, + "step": 7262000 + }, + { + "epoch": 35.98, + "learning_rate": 3.201599510510645e-05, + "loss": 2.2131, + "step": 7262500 + }, + { + "epoch": 35.98, + "learning_rate": 3.2014758995853216e-05, + "loss": 2.1984, + "step": 7263000 + }, + { + "epoch": 35.99, + "learning_rate": 3.2013520409427127e-05, + "loss": 2.1667, + "step": 7263500 + }, + { + "epoch": 35.99, + "learning_rate": 3.2012281823001043e-05, + "loss": 2.1926, + "step": 7264000 + }, + { + "epoch": 35.99, + "learning_rate": 3.201104323657496e-05, + "loss": 2.2088, + "step": 7264500 + }, + { + "epoch": 35.99, + "learning_rate": 3.200980465014888e-05, + "loss": 2.1889, + "step": 7265000 + }, + { + "epoch": 36.0, + "learning_rate": 3.2008566063722794e-05, + "loss": 2.1909, + "step": 7265500 + }, + { + "epoch": 36.0, + "learning_rate": 3.200732747729671e-05, + "loss": 2.2, + "step": 7266000 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.6622320326905691, + "eval_accuracy_mlm": 0.6186748779181579, + "eval_accuracy_nsp": 0.8673472989774826, + "eval_loss": 2.315115213394165, + "eval_runtime": 146.032, + "eval_samples_per_second": 1745.911, + "eval_steps_per_second": 72.751, + "step": 7266348 + }, + { + "epoch": 36.0, + "learning_rate": 3.200608889087063e-05, + "loss": 2.1755, + "step": 7266500 + }, + { + "epoch": 36.0, + "learning_rate": 3.20048527816174e-05, + "loss": 2.1472, + "step": 7267000 + }, + { + "epoch": 36.01, + "learning_rate": 3.2003614195191314e-05, + "loss": 2.1568, + "step": 7267500 + }, + { + "epoch": 36.01, + "learning_rate": 3.200237808593808e-05, + "loss": 2.1619, + "step": 7268000 + }, + { + "epoch": 36.01, + "learning_rate": 3.2001139499512e-05, + "loss": 2.1834, + "step": 7268500 + }, + { + "epoch": 36.01, + "learning_rate": 3.1999900913085917e-05, + "loss": 2.1446, + "step": 7269000 + }, + { + "epoch": 36.02, + "learning_rate": 3.199866232665983e-05, + "loss": 2.1727, + "step": 7269500 + }, + { + "epoch": 36.02, + "learning_rate": 3.1997423740233744e-05, + "loss": 2.1695, + "step": 7270000 + }, + { + "epoch": 36.02, + "learning_rate": 3.199618515380766e-05, + "loss": 2.1422, + "step": 7270500 + }, + { + "epoch": 36.02, + "learning_rate": 3.199494656738158e-05, + "loss": 2.1672, + "step": 7271000 + }, + { + "epoch": 36.03, + "learning_rate": 3.1993707980955494e-05, + "loss": 2.1533, + "step": 7271500 + }, + { + "epoch": 36.03, + "learning_rate": 3.199247187170227e-05, + "loss": 2.1539, + "step": 7272000 + }, + { + "epoch": 36.03, + "learning_rate": 3.199123576244903e-05, + "loss": 2.1517, + "step": 7272500 + }, + { + "epoch": 36.03, + "learning_rate": 3.198999717602295e-05, + "loss": 2.1898, + "step": 7273000 + }, + { + "epoch": 36.04, + "learning_rate": 3.1988758589596866e-05, + "loss": 2.1593, + "step": 7273500 + }, + { + "epoch": 36.04, + "learning_rate": 3.198752000317078e-05, + "loss": 2.1246, + "step": 7274000 + }, + { + "epoch": 36.04, + "learning_rate": 3.19862814167447e-05, + "loss": 2.1623, + "step": 7274500 + }, + { + "epoch": 36.04, + "learning_rate": 3.198504283031862e-05, + "loss": 2.1748, + "step": 7275000 + }, + { + "epoch": 36.05, + "learning_rate": 3.1983804243892534e-05, + "loss": 2.1472, + "step": 7275500 + }, + { + "epoch": 36.05, + "learning_rate": 3.1982565657466444e-05, + "loss": 2.1667, + "step": 7276000 + }, + { + "epoch": 36.05, + "learning_rate": 3.198132954821322e-05, + "loss": 2.185, + "step": 7276500 + }, + { + "epoch": 36.05, + "learning_rate": 3.1980090961787136e-05, + "loss": 2.1632, + "step": 7277000 + }, + { + "epoch": 36.06, + "learning_rate": 3.197885237536105e-05, + "loss": 2.1667, + "step": 7277500 + }, + { + "epoch": 36.06, + "learning_rate": 3.1977616266107815e-05, + "loss": 2.1795, + "step": 7278000 + }, + { + "epoch": 36.06, + "learning_rate": 3.197637767968173e-05, + "loss": 2.1736, + "step": 7278500 + }, + { + "epoch": 36.06, + "learning_rate": 3.197513909325565e-05, + "loss": 2.1804, + "step": 7279000 + }, + { + "epoch": 36.07, + "learning_rate": 3.1973900506829566e-05, + "loss": 2.1707, + "step": 7279500 + }, + { + "epoch": 36.07, + "learning_rate": 3.197266192040348e-05, + "loss": 2.16, + "step": 7280000 + }, + { + "epoch": 36.07, + "learning_rate": 3.19714233339774e-05, + "loss": 2.1459, + "step": 7280500 + }, + { + "epoch": 36.07, + "learning_rate": 3.197018474755132e-05, + "loss": 2.176, + "step": 7281000 + }, + { + "epoch": 36.08, + "learning_rate": 3.1968946161125234e-05, + "loss": 2.188, + "step": 7281500 + }, + { + "epoch": 36.08, + "learning_rate": 3.1967707574699144e-05, + "loss": 2.1487, + "step": 7282000 + }, + { + "epoch": 36.08, + "learning_rate": 3.196646898827306e-05, + "loss": 2.1741, + "step": 7282500 + }, + { + "epoch": 36.08, + "learning_rate": 3.196523040184698e-05, + "loss": 2.1627, + "step": 7283000 + }, + { + "epoch": 36.08, + "learning_rate": 3.1963991815420895e-05, + "loss": 2.1524, + "step": 7283500 + }, + { + "epoch": 36.09, + "learning_rate": 3.196275322899481e-05, + "loss": 2.1659, + "step": 7284000 + }, + { + "epoch": 36.09, + "learning_rate": 3.196151464256873e-05, + "loss": 2.1753, + "step": 7284500 + }, + { + "epoch": 36.09, + "learning_rate": 3.1960276056142645e-05, + "loss": 2.1853, + "step": 7285000 + }, + { + "epoch": 36.09, + "learning_rate": 3.195903746971656e-05, + "loss": 2.1606, + "step": 7285500 + }, + { + "epoch": 36.1, + "learning_rate": 3.195779888329048e-05, + "loss": 2.1595, + "step": 7286000 + }, + { + "epoch": 36.1, + "learning_rate": 3.195656277403725e-05, + "loss": 2.1824, + "step": 7286500 + }, + { + "epoch": 36.1, + "learning_rate": 3.1955324187611165e-05, + "loss": 2.1565, + "step": 7287000 + }, + { + "epoch": 36.1, + "learning_rate": 3.195408560118508e-05, + "loss": 2.1437, + "step": 7287500 + }, + { + "epoch": 36.11, + "learning_rate": 3.195284949193185e-05, + "loss": 2.1699, + "step": 7288000 + }, + { + "epoch": 36.11, + "learning_rate": 3.195161090550576e-05, + "loss": 2.1642, + "step": 7288500 + }, + { + "epoch": 36.11, + "learning_rate": 3.195037231907968e-05, + "loss": 2.1632, + "step": 7289000 + }, + { + "epoch": 36.11, + "learning_rate": 3.1949133732653595e-05, + "loss": 2.1737, + "step": 7289500 + }, + { + "epoch": 36.12, + "learning_rate": 3.194789514622751e-05, + "loss": 2.1658, + "step": 7290000 + }, + { + "epoch": 36.12, + "learning_rate": 3.194665655980143e-05, + "loss": 2.1809, + "step": 7290500 + }, + { + "epoch": 36.12, + "learning_rate": 3.1945417973375346e-05, + "loss": 2.1634, + "step": 7291000 + }, + { + "epoch": 36.12, + "learning_rate": 3.194417938694926e-05, + "loss": 2.151, + "step": 7291500 + }, + { + "epoch": 36.13, + "learning_rate": 3.194294080052318e-05, + "loss": 2.1942, + "step": 7292000 + }, + { + "epoch": 36.13, + "learning_rate": 3.194170469126995e-05, + "loss": 2.1557, + "step": 7292500 + }, + { + "epoch": 36.13, + "learning_rate": 3.1940466104843865e-05, + "loss": 2.1469, + "step": 7293000 + }, + { + "epoch": 36.13, + "learning_rate": 3.193922751841778e-05, + "loss": 2.1525, + "step": 7293500 + }, + { + "epoch": 36.14, + "learning_rate": 3.19379889319917e-05, + "loss": 2.1464, + "step": 7294000 + }, + { + "epoch": 36.14, + "learning_rate": 3.1936750345565616e-05, + "loss": 2.1576, + "step": 7294500 + }, + { + "epoch": 36.14, + "learning_rate": 3.193551175913953e-05, + "loss": 2.1805, + "step": 7295000 + }, + { + "epoch": 36.14, + "learning_rate": 3.1934278127059154e-05, + "loss": 2.1826, + "step": 7295500 + }, + { + "epoch": 36.15, + "learning_rate": 3.193303954063307e-05, + "loss": 2.1589, + "step": 7296000 + }, + { + "epoch": 36.15, + "learning_rate": 3.193180095420699e-05, + "loss": 2.1538, + "step": 7296500 + }, + { + "epoch": 36.15, + "learning_rate": 3.1930562367780904e-05, + "loss": 2.1704, + "step": 7297000 + }, + { + "epoch": 36.15, + "learning_rate": 3.192932378135482e-05, + "loss": 2.1759, + "step": 7297500 + }, + { + "epoch": 36.16, + "learning_rate": 3.192808519492873e-05, + "loss": 2.1512, + "step": 7298000 + }, + { + "epoch": 36.16, + "learning_rate": 3.192684660850265e-05, + "loss": 2.1743, + "step": 7298500 + }, + { + "epoch": 36.16, + "learning_rate": 3.1925608022076565e-05, + "loss": 2.1707, + "step": 7299000 + }, + { + "epoch": 36.16, + "learning_rate": 3.192436943565048e-05, + "loss": 2.1599, + "step": 7299500 + }, + { + "epoch": 36.17, + "learning_rate": 3.19231308492244e-05, + "loss": 2.1745, + "step": 7300000 + }, + { + "epoch": 36.17, + "learning_rate": 3.192189473997117e-05, + "loss": 2.1474, + "step": 7300500 + }, + { + "epoch": 36.17, + "learning_rate": 3.192065615354508e-05, + "loss": 2.1428, + "step": 7301000 + }, + { + "epoch": 36.17, + "learning_rate": 3.1919420044291854e-05, + "loss": 2.1729, + "step": 7301500 + }, + { + "epoch": 36.18, + "learning_rate": 3.191818145786577e-05, + "loss": 2.1742, + "step": 7302000 + }, + { + "epoch": 36.18, + "learning_rate": 3.191694287143969e-05, + "loss": 2.1903, + "step": 7302500 + }, + { + "epoch": 36.18, + "learning_rate": 3.1915704285013604e-05, + "loss": 2.1836, + "step": 7303000 + }, + { + "epoch": 36.18, + "learning_rate": 3.191446817576037e-05, + "loss": 2.1727, + "step": 7303500 + }, + { + "epoch": 36.19, + "learning_rate": 3.191322958933429e-05, + "loss": 2.1738, + "step": 7304000 + }, + { + "epoch": 36.19, + "learning_rate": 3.19119910029082e-05, + "loss": 2.165, + "step": 7304500 + }, + { + "epoch": 36.19, + "learning_rate": 3.191075241648212e-05, + "loss": 2.171, + "step": 7305000 + }, + { + "epoch": 36.19, + "learning_rate": 3.1909513830056034e-05, + "loss": 2.1837, + "step": 7305500 + }, + { + "epoch": 36.2, + "learning_rate": 3.190827524362995e-05, + "loss": 2.1613, + "step": 7306000 + }, + { + "epoch": 36.2, + "learning_rate": 3.190703665720387e-05, + "loss": 2.1567, + "step": 7306500 + }, + { + "epoch": 36.2, + "learning_rate": 3.190579807077778e-05, + "loss": 2.1435, + "step": 7307000 + }, + { + "epoch": 36.2, + "learning_rate": 3.1904559484351695e-05, + "loss": 2.1601, + "step": 7307500 + }, + { + "epoch": 36.21, + "learning_rate": 3.190332089792561e-05, + "loss": 2.1849, + "step": 7308000 + }, + { + "epoch": 36.21, + "learning_rate": 3.190208231149953e-05, + "loss": 2.1669, + "step": 7308500 + }, + { + "epoch": 36.21, + "learning_rate": 3.1900843725073446e-05, + "loss": 2.1879, + "step": 7309000 + }, + { + "epoch": 36.21, + "learning_rate": 3.189960513864736e-05, + "loss": 2.1559, + "step": 7309500 + }, + { + "epoch": 36.22, + "learning_rate": 3.189836902939414e-05, + "loss": 2.1691, + "step": 7310000 + }, + { + "epoch": 36.22, + "learning_rate": 3.189713292014091e-05, + "loss": 2.163, + "step": 7310500 + }, + { + "epoch": 36.22, + "learning_rate": 3.1895894333714824e-05, + "loss": 2.1601, + "step": 7311000 + }, + { + "epoch": 36.22, + "learning_rate": 3.1894655747288734e-05, + "loss": 2.1817, + "step": 7311500 + }, + { + "epoch": 36.23, + "learning_rate": 3.189341716086265e-05, + "loss": 2.197, + "step": 7312000 + }, + { + "epoch": 36.23, + "learning_rate": 3.189217857443657e-05, + "loss": 2.1816, + "step": 7312500 + }, + { + "epoch": 36.23, + "learning_rate": 3.1890939988010485e-05, + "loss": 2.152, + "step": 7313000 + }, + { + "epoch": 36.23, + "learning_rate": 3.18897014015844e-05, + "loss": 2.1727, + "step": 7313500 + }, + { + "epoch": 36.24, + "learning_rate": 3.188846281515831e-05, + "loss": 2.1761, + "step": 7314000 + }, + { + "epoch": 36.24, + "learning_rate": 3.188722670590509e-05, + "loss": 2.1642, + "step": 7314500 + }, + { + "epoch": 36.24, + "learning_rate": 3.1885988119479005e-05, + "loss": 2.1726, + "step": 7315000 + }, + { + "epoch": 36.24, + "learning_rate": 3.1884752010225773e-05, + "loss": 2.1537, + "step": 7315500 + }, + { + "epoch": 36.25, + "learning_rate": 3.188351342379969e-05, + "loss": 2.1697, + "step": 7316000 + }, + { + "epoch": 36.25, + "learning_rate": 3.188227483737361e-05, + "loss": 2.179, + "step": 7316500 + }, + { + "epoch": 36.25, + "learning_rate": 3.1881036250947524e-05, + "loss": 2.1536, + "step": 7317000 + }, + { + "epoch": 36.25, + "learning_rate": 3.187979766452144e-05, + "loss": 2.1655, + "step": 7317500 + }, + { + "epoch": 36.26, + "learning_rate": 3.18785615552682e-05, + "loss": 2.1739, + "step": 7318000 + }, + { + "epoch": 36.26, + "learning_rate": 3.187732296884212e-05, + "loss": 2.1631, + "step": 7318500 + }, + { + "epoch": 36.26, + "learning_rate": 3.187608685958889e-05, + "loss": 2.164, + "step": 7319000 + }, + { + "epoch": 36.26, + "learning_rate": 3.1874848273162806e-05, + "loss": 2.1755, + "step": 7319500 + }, + { + "epoch": 36.27, + "learning_rate": 3.187360968673672e-05, + "loss": 2.1961, + "step": 7320000 + }, + { + "epoch": 36.27, + "learning_rate": 3.187237357748349e-05, + "loss": 2.1747, + "step": 7320500 + }, + { + "epoch": 36.27, + "learning_rate": 3.187113499105741e-05, + "loss": 2.1745, + "step": 7321000 + }, + { + "epoch": 36.27, + "learning_rate": 3.186989640463132e-05, + "loss": 2.146, + "step": 7321500 + }, + { + "epoch": 36.28, + "learning_rate": 3.1868657818205236e-05, + "loss": 2.1678, + "step": 7322000 + }, + { + "epoch": 36.28, + "learning_rate": 3.186741923177915e-05, + "loss": 2.1745, + "step": 7322500 + }, + { + "epoch": 36.28, + "learning_rate": 3.186618312252593e-05, + "loss": 2.1547, + "step": 7323000 + }, + { + "epoch": 36.28, + "learning_rate": 3.186494453609984e-05, + "loss": 2.1704, + "step": 7323500 + }, + { + "epoch": 36.29, + "learning_rate": 3.1863705949673755e-05, + "loss": 2.1843, + "step": 7324000 + }, + { + "epoch": 36.29, + "learning_rate": 3.186246736324767e-05, + "loss": 2.1645, + "step": 7324500 + }, + { + "epoch": 36.29, + "learning_rate": 3.186122877682159e-05, + "loss": 2.1699, + "step": 7325000 + }, + { + "epoch": 36.29, + "learning_rate": 3.1859990190395506e-05, + "loss": 2.188, + "step": 7325500 + }, + { + "epoch": 36.3, + "learning_rate": 3.1858754081142275e-05, + "loss": 2.172, + "step": 7326000 + }, + { + "epoch": 36.3, + "learning_rate": 3.185751549471619e-05, + "loss": 2.1751, + "step": 7326500 + }, + { + "epoch": 36.3, + "learning_rate": 3.185627690829011e-05, + "loss": 2.1842, + "step": 7327000 + }, + { + "epoch": 36.3, + "learning_rate": 3.185504079903688e-05, + "loss": 2.1814, + "step": 7327500 + }, + { + "epoch": 36.31, + "learning_rate": 3.1853802212610794e-05, + "loss": 2.1958, + "step": 7328000 + }, + { + "epoch": 36.31, + "learning_rate": 3.185256362618471e-05, + "loss": 2.1448, + "step": 7328500 + }, + { + "epoch": 36.31, + "learning_rate": 3.185132503975863e-05, + "loss": 2.1794, + "step": 7329000 + }, + { + "epoch": 36.31, + "learning_rate": 3.1850086453332545e-05, + "loss": 2.1839, + "step": 7329500 + }, + { + "epoch": 36.32, + "learning_rate": 3.1848847866906455e-05, + "loss": 2.2051, + "step": 7330000 + }, + { + "epoch": 36.32, + "learning_rate": 3.184760928048037e-05, + "loss": 2.1578, + "step": 7330500 + }, + { + "epoch": 36.32, + "learning_rate": 3.184637069405429e-05, + "loss": 2.1948, + "step": 7331000 + }, + { + "epoch": 36.32, + "learning_rate": 3.1845132107628206e-05, + "loss": 2.1852, + "step": 7331500 + }, + { + "epoch": 36.33, + "learning_rate": 3.184389599837498e-05, + "loss": 2.1675, + "step": 7332000 + }, + { + "epoch": 36.33, + "learning_rate": 3.184265741194889e-05, + "loss": 2.1613, + "step": 7332500 + }, + { + "epoch": 36.33, + "learning_rate": 3.184141882552281e-05, + "loss": 2.1787, + "step": 7333000 + }, + { + "epoch": 36.33, + "learning_rate": 3.1840180239096726e-05, + "loss": 2.1896, + "step": 7333500 + }, + { + "epoch": 36.34, + "learning_rate": 3.183894165267064e-05, + "loss": 2.1535, + "step": 7334000 + }, + { + "epoch": 36.34, + "learning_rate": 3.183770306624456e-05, + "loss": 2.1596, + "step": 7334500 + }, + { + "epoch": 36.34, + "learning_rate": 3.183646447981847e-05, + "loss": 2.1774, + "step": 7335000 + }, + { + "epoch": 36.34, + "learning_rate": 3.183522589339239e-05, + "loss": 2.1598, + "step": 7335500 + }, + { + "epoch": 36.35, + "learning_rate": 3.1833987306966304e-05, + "loss": 2.1618, + "step": 7336000 + }, + { + "epoch": 36.35, + "learning_rate": 3.183274872054022e-05, + "loss": 2.1732, + "step": 7336500 + }, + { + "epoch": 36.35, + "learning_rate": 3.183151013411414e-05, + "loss": 2.2031, + "step": 7337000 + }, + { + "epoch": 36.35, + "learning_rate": 3.1830274024860906e-05, + "loss": 2.1618, + "step": 7337500 + }, + { + "epoch": 36.35, + "learning_rate": 3.182903543843482e-05, + "loss": 2.1865, + "step": 7338000 + }, + { + "epoch": 36.36, + "learning_rate": 3.182779685200874e-05, + "loss": 2.1547, + "step": 7338500 + }, + { + "epoch": 36.36, + "learning_rate": 3.182655826558266e-05, + "loss": 2.1551, + "step": 7339000 + }, + { + "epoch": 36.36, + "learning_rate": 3.1825319679156574e-05, + "loss": 2.169, + "step": 7339500 + }, + { + "epoch": 36.36, + "learning_rate": 3.182408356990334e-05, + "loss": 2.1587, + "step": 7340000 + }, + { + "epoch": 36.37, + "learning_rate": 3.182284498347726e-05, + "loss": 2.1691, + "step": 7340500 + }, + { + "epoch": 36.37, + "learning_rate": 3.182160639705118e-05, + "loss": 2.161, + "step": 7341000 + }, + { + "epoch": 36.37, + "learning_rate": 3.1820367810625094e-05, + "loss": 2.1649, + "step": 7341500 + }, + { + "epoch": 36.37, + "learning_rate": 3.181913170137186e-05, + "loss": 2.1787, + "step": 7342000 + }, + { + "epoch": 36.38, + "learning_rate": 3.181789311494577e-05, + "loss": 2.1742, + "step": 7342500 + }, + { + "epoch": 36.38, + "learning_rate": 3.181665452851969e-05, + "loss": 2.1531, + "step": 7343000 + }, + { + "epoch": 36.38, + "learning_rate": 3.1815415942093606e-05, + "loss": 2.1657, + "step": 7343500 + }, + { + "epoch": 36.38, + "learning_rate": 3.181417735566752e-05, + "loss": 2.1664, + "step": 7344000 + }, + { + "epoch": 36.39, + "learning_rate": 3.181293876924144e-05, + "loss": 2.1728, + "step": 7344500 + }, + { + "epoch": 36.39, + "learning_rate": 3.181170018281536e-05, + "loss": 2.1725, + "step": 7345000 + }, + { + "epoch": 36.39, + "learning_rate": 3.1810461596389274e-05, + "loss": 2.1948, + "step": 7345500 + }, + { + "epoch": 36.39, + "learning_rate": 3.180922300996319e-05, + "loss": 2.1758, + "step": 7346000 + }, + { + "epoch": 36.4, + "learning_rate": 3.180798442353711e-05, + "loss": 2.1829, + "step": 7346500 + }, + { + "epoch": 36.4, + "learning_rate": 3.1806745837111025e-05, + "loss": 2.2015, + "step": 7347000 + }, + { + "epoch": 36.4, + "learning_rate": 3.180550725068494e-05, + "loss": 2.1804, + "step": 7347500 + }, + { + "epoch": 36.4, + "learning_rate": 3.180426866425886e-05, + "loss": 2.179, + "step": 7348000 + }, + { + "epoch": 36.41, + "learning_rate": 3.1803030077832776e-05, + "loss": 2.1528, + "step": 7348500 + }, + { + "epoch": 36.41, + "learning_rate": 3.180179396857954e-05, + "loss": 2.193, + "step": 7349000 + }, + { + "epoch": 36.41, + "learning_rate": 3.1800555382153455e-05, + "loss": 2.154, + "step": 7349500 + }, + { + "epoch": 36.41, + "learning_rate": 3.179931679572737e-05, + "loss": 2.1824, + "step": 7350000 + }, + { + "epoch": 36.42, + "learning_rate": 3.179808068647414e-05, + "loss": 2.1668, + "step": 7350500 + }, + { + "epoch": 36.42, + "learning_rate": 3.179684210004806e-05, + "loss": 2.1718, + "step": 7351000 + }, + { + "epoch": 36.42, + "learning_rate": 3.1795603513621974e-05, + "loss": 2.1842, + "step": 7351500 + }, + { + "epoch": 36.42, + "learning_rate": 3.179436492719589e-05, + "loss": 2.1563, + "step": 7352000 + }, + { + "epoch": 36.43, + "learning_rate": 3.179312634076981e-05, + "loss": 2.1707, + "step": 7352500 + }, + { + "epoch": 36.43, + "learning_rate": 3.1791887754343725e-05, + "loss": 2.1689, + "step": 7353000 + }, + { + "epoch": 36.43, + "learning_rate": 3.179064916791764e-05, + "loss": 2.1638, + "step": 7353500 + }, + { + "epoch": 36.43, + "learning_rate": 3.178941058149156e-05, + "loss": 2.1388, + "step": 7354000 + }, + { + "epoch": 36.44, + "learning_rate": 3.1788171995065476e-05, + "loss": 2.1856, + "step": 7354500 + }, + { + "epoch": 36.44, + "learning_rate": 3.178693340863939e-05, + "loss": 2.1731, + "step": 7355000 + }, + { + "epoch": 36.44, + "learning_rate": 3.1785697299386155e-05, + "loss": 2.1694, + "step": 7355500 + }, + { + "epoch": 36.44, + "learning_rate": 3.1784461190132924e-05, + "loss": 2.1709, + "step": 7356000 + }, + { + "epoch": 36.45, + "learning_rate": 3.178322260370684e-05, + "loss": 2.1522, + "step": 7356500 + }, + { + "epoch": 36.45, + "learning_rate": 3.178198401728076e-05, + "loss": 2.1841, + "step": 7357000 + }, + { + "epoch": 36.45, + "learning_rate": 3.1780745430854674e-05, + "loss": 2.1854, + "step": 7357500 + }, + { + "epoch": 36.45, + "learning_rate": 3.177950932160144e-05, + "loss": 2.2015, + "step": 7358000 + }, + { + "epoch": 36.46, + "learning_rate": 3.177827073517536e-05, + "loss": 2.1727, + "step": 7358500 + }, + { + "epoch": 36.46, + "learning_rate": 3.177703214874928e-05, + "loss": 2.1739, + "step": 7359000 + }, + { + "epoch": 36.46, + "learning_rate": 3.17757985166689e-05, + "loss": 2.1939, + "step": 7359500 + }, + { + "epoch": 36.46, + "learning_rate": 3.1774559930242815e-05, + "loss": 2.1778, + "step": 7360000 + }, + { + "epoch": 36.47, + "learning_rate": 3.177332134381673e-05, + "loss": 2.1905, + "step": 7360500 + }, + { + "epoch": 36.47, + "learning_rate": 3.177208275739065e-05, + "loss": 2.1869, + "step": 7361000 + }, + { + "epoch": 36.47, + "learning_rate": 3.1770844170964565e-05, + "loss": 2.1856, + "step": 7361500 + }, + { + "epoch": 36.47, + "learning_rate": 3.176960558453848e-05, + "loss": 2.1606, + "step": 7362000 + }, + { + "epoch": 36.48, + "learning_rate": 3.17683669981124e-05, + "loss": 2.1803, + "step": 7362500 + }, + { + "epoch": 36.48, + "learning_rate": 3.1767128411686316e-05, + "loss": 2.1836, + "step": 7363000 + }, + { + "epoch": 36.48, + "learning_rate": 3.176588982526023e-05, + "loss": 2.1588, + "step": 7363500 + }, + { + "epoch": 36.48, + "learning_rate": 3.176465123883414e-05, + "loss": 2.1725, + "step": 7364000 + }, + { + "epoch": 36.49, + "learning_rate": 3.176341512958091e-05, + "loss": 2.1961, + "step": 7364500 + }, + { + "epoch": 36.49, + "learning_rate": 3.176217654315483e-05, + "loss": 2.1519, + "step": 7365000 + }, + { + "epoch": 36.49, + "learning_rate": 3.1760937956728746e-05, + "loss": 2.1619, + "step": 7365500 + }, + { + "epoch": 36.49, + "learning_rate": 3.175969937030266e-05, + "loss": 2.1723, + "step": 7366000 + }, + { + "epoch": 36.5, + "learning_rate": 3.175846078387658e-05, + "loss": 2.1882, + "step": 7366500 + }, + { + "epoch": 36.5, + "learning_rate": 3.17572221974505e-05, + "loss": 2.1582, + "step": 7367000 + }, + { + "epoch": 36.5, + "learning_rate": 3.175598361102441e-05, + "loss": 2.171, + "step": 7367500 + }, + { + "epoch": 36.5, + "learning_rate": 3.1754745024598324e-05, + "loss": 2.174, + "step": 7368000 + }, + { + "epoch": 36.51, + "learning_rate": 3.175350643817224e-05, + "loss": 2.1664, + "step": 7368500 + }, + { + "epoch": 36.51, + "learning_rate": 3.175226785174616e-05, + "loss": 2.1808, + "step": 7369000 + }, + { + "epoch": 36.51, + "learning_rate": 3.1751029265320075e-05, + "loss": 2.212, + "step": 7369500 + }, + { + "epoch": 36.51, + "learning_rate": 3.174979315606685e-05, + "loss": 2.1871, + "step": 7370000 + }, + { + "epoch": 36.52, + "learning_rate": 3.174855456964076e-05, + "loss": 2.1889, + "step": 7370500 + }, + { + "epoch": 36.52, + "learning_rate": 3.174731598321468e-05, + "loss": 2.1857, + "step": 7371000 + }, + { + "epoch": 36.52, + "learning_rate": 3.1746077396788594e-05, + "loss": 2.1746, + "step": 7371500 + }, + { + "epoch": 36.52, + "learning_rate": 3.174483881036251e-05, + "loss": 2.1546, + "step": 7372000 + }, + { + "epoch": 36.53, + "learning_rate": 3.174360022393643e-05, + "loss": 2.1701, + "step": 7372500 + }, + { + "epoch": 36.53, + "learning_rate": 3.1742361637510345e-05, + "loss": 2.1688, + "step": 7373000 + }, + { + "epoch": 36.53, + "learning_rate": 3.174112552825711e-05, + "loss": 2.1697, + "step": 7373500 + }, + { + "epoch": 36.53, + "learning_rate": 3.1739886941831024e-05, + "loss": 2.1417, + "step": 7374000 + }, + { + "epoch": 36.54, + "learning_rate": 3.173864835540494e-05, + "loss": 2.1972, + "step": 7374500 + }, + { + "epoch": 36.54, + "learning_rate": 3.1737412246151716e-05, + "loss": 2.1895, + "step": 7375000 + }, + { + "epoch": 36.54, + "learning_rate": 3.1736173659725633e-05, + "loss": 2.1635, + "step": 7375500 + }, + { + "epoch": 36.54, + "learning_rate": 3.17349375504724e-05, + "loss": 2.1726, + "step": 7376000 + }, + { + "epoch": 36.55, + "learning_rate": 3.173369896404631e-05, + "loss": 2.191, + "step": 7376500 + }, + { + "epoch": 36.55, + "learning_rate": 3.173246037762023e-05, + "loss": 2.1821, + "step": 7377000 + }, + { + "epoch": 36.55, + "learning_rate": 3.1731221791194146e-05, + "loss": 2.1846, + "step": 7377500 + }, + { + "epoch": 36.55, + "learning_rate": 3.172998320476806e-05, + "loss": 2.1596, + "step": 7378000 + }, + { + "epoch": 36.56, + "learning_rate": 3.172874461834198e-05, + "loss": 2.1764, + "step": 7378500 + }, + { + "epoch": 36.56, + "learning_rate": 3.17275060319159e-05, + "loss": 2.1806, + "step": 7379000 + }, + { + "epoch": 36.56, + "learning_rate": 3.1726267445489814e-05, + "loss": 2.1604, + "step": 7379500 + }, + { + "epoch": 36.56, + "learning_rate": 3.1725028859063724e-05, + "loss": 2.2086, + "step": 7380000 + }, + { + "epoch": 36.57, + "learning_rate": 3.172379027263764e-05, + "loss": 2.1964, + "step": 7380500 + }, + { + "epoch": 36.57, + "learning_rate": 3.172255168621156e-05, + "loss": 2.1792, + "step": 7381000 + }, + { + "epoch": 36.57, + "learning_rate": 3.1721313099785475e-05, + "loss": 2.17, + "step": 7381500 + }, + { + "epoch": 36.57, + "learning_rate": 3.172007451335939e-05, + "loss": 2.1855, + "step": 7382000 + }, + { + "epoch": 36.58, + "learning_rate": 3.171883592693331e-05, + "loss": 2.2097, + "step": 7382500 + }, + { + "epoch": 36.58, + "learning_rate": 3.1717597340507226e-05, + "loss": 2.1862, + "step": 7383000 + }, + { + "epoch": 36.58, + "learning_rate": 3.171635875408114e-05, + "loss": 2.184, + "step": 7383500 + }, + { + "epoch": 36.58, + "learning_rate": 3.171512016765506e-05, + "loss": 2.1804, + "step": 7384000 + }, + { + "epoch": 36.59, + "learning_rate": 3.171388405840183e-05, + "loss": 2.1422, + "step": 7384500 + }, + { + "epoch": 36.59, + "learning_rate": 3.17126479491486e-05, + "loss": 2.1633, + "step": 7385000 + }, + { + "epoch": 36.59, + "learning_rate": 3.1711411839895366e-05, + "loss": 2.1765, + "step": 7385500 + }, + { + "epoch": 36.59, + "learning_rate": 3.171017325346928e-05, + "loss": 2.1716, + "step": 7386000 + }, + { + "epoch": 36.6, + "learning_rate": 3.17089346670432e-05, + "loss": 2.1797, + "step": 7386500 + }, + { + "epoch": 36.6, + "learning_rate": 3.170769608061712e-05, + "loss": 2.2091, + "step": 7387000 + }, + { + "epoch": 36.6, + "learning_rate": 3.1706457494191034e-05, + "loss": 2.1754, + "step": 7387500 + }, + { + "epoch": 36.6, + "learning_rate": 3.170521890776495e-05, + "loss": 2.1538, + "step": 7388000 + }, + { + "epoch": 36.61, + "learning_rate": 3.170398032133887e-05, + "loss": 2.1559, + "step": 7388500 + }, + { + "epoch": 36.61, + "learning_rate": 3.170274173491278e-05, + "loss": 2.1987, + "step": 7389000 + }, + { + "epoch": 36.61, + "learning_rate": 3.170150562565955e-05, + "loss": 2.194, + "step": 7389500 + }, + { + "epoch": 36.61, + "learning_rate": 3.1700267039233463e-05, + "loss": 2.1804, + "step": 7390000 + }, + { + "epoch": 36.62, + "learning_rate": 3.169902845280738e-05, + "loss": 2.2139, + "step": 7390500 + }, + { + "epoch": 36.62, + "learning_rate": 3.16977898663813e-05, + "loss": 2.1842, + "step": 7391000 + }, + { + "epoch": 36.62, + "learning_rate": 3.1696553757128066e-05, + "loss": 2.1761, + "step": 7391500 + }, + { + "epoch": 36.62, + "learning_rate": 3.169531517070198e-05, + "loss": 2.1856, + "step": 7392000 + }, + { + "epoch": 36.63, + "learning_rate": 3.16940765842759e-05, + "loss": 2.1945, + "step": 7392500 + }, + { + "epoch": 36.63, + "learning_rate": 3.169283799784982e-05, + "loss": 2.1783, + "step": 7393000 + }, + { + "epoch": 36.63, + "learning_rate": 3.1691599411423734e-05, + "loss": 2.182, + "step": 7393500 + }, + { + "epoch": 36.63, + "learning_rate": 3.169036082499765e-05, + "loss": 2.1868, + "step": 7394000 + }, + { + "epoch": 36.63, + "learning_rate": 3.168912223857157e-05, + "loss": 2.1836, + "step": 7394500 + }, + { + "epoch": 36.64, + "learning_rate": 3.1687886129318336e-05, + "loss": 2.1808, + "step": 7395000 + }, + { + "epoch": 36.64, + "learning_rate": 3.168664754289225e-05, + "loss": 2.1812, + "step": 7395500 + }, + { + "epoch": 36.64, + "learning_rate": 3.168540895646617e-05, + "loss": 2.1927, + "step": 7396000 + }, + { + "epoch": 36.64, + "learning_rate": 3.168417037004008e-05, + "loss": 2.2173, + "step": 7396500 + }, + { + "epoch": 36.65, + "learning_rate": 3.1682931783614e-05, + "loss": 2.168, + "step": 7397000 + }, + { + "epoch": 36.65, + "learning_rate": 3.1681693197187914e-05, + "loss": 2.1856, + "step": 7397500 + }, + { + "epoch": 36.65, + "learning_rate": 3.168045461076183e-05, + "loss": 2.1885, + "step": 7398000 + }, + { + "epoch": 36.65, + "learning_rate": 3.167921602433575e-05, + "loss": 2.1744, + "step": 7398500 + }, + { + "epoch": 36.66, + "learning_rate": 3.167797743790966e-05, + "loss": 2.1875, + "step": 7399000 + }, + { + "epoch": 36.66, + "learning_rate": 3.1676738851483575e-05, + "loss": 2.1897, + "step": 7399500 + }, + { + "epoch": 36.66, + "learning_rate": 3.167550026505749e-05, + "loss": 2.1933, + "step": 7400000 + }, + { + "epoch": 36.66, + "learning_rate": 3.167426415580427e-05, + "loss": 2.1792, + "step": 7400500 + }, + { + "epoch": 36.67, + "learning_rate": 3.1673025569378185e-05, + "loss": 2.1766, + "step": 7401000 + }, + { + "epoch": 36.67, + "learning_rate": 3.1671786982952095e-05, + "loss": 2.1659, + "step": 7401500 + }, + { + "epoch": 36.67, + "learning_rate": 3.167054839652601e-05, + "loss": 2.169, + "step": 7402000 + }, + { + "epoch": 36.67, + "learning_rate": 3.166930981009993e-05, + "loss": 2.1943, + "step": 7402500 + }, + { + "epoch": 36.68, + "learning_rate": 3.1668071223673846e-05, + "loss": 2.1795, + "step": 7403000 + }, + { + "epoch": 36.68, + "learning_rate": 3.166683263724776e-05, + "loss": 2.1787, + "step": 7403500 + }, + { + "epoch": 36.68, + "learning_rate": 3.166559405082168e-05, + "loss": 2.1887, + "step": 7404000 + }, + { + "epoch": 36.68, + "learning_rate": 3.1664355464395596e-05, + "loss": 2.1796, + "step": 7404500 + }, + { + "epoch": 36.69, + "learning_rate": 3.1663119355142365e-05, + "loss": 2.1882, + "step": 7405000 + }, + { + "epoch": 36.69, + "learning_rate": 3.166188076871628e-05, + "loss": 2.1677, + "step": 7405500 + }, + { + "epoch": 36.69, + "learning_rate": 3.166064465946305e-05, + "loss": 2.183, + "step": 7406000 + }, + { + "epoch": 36.69, + "learning_rate": 3.165940607303697e-05, + "loss": 2.1747, + "step": 7406500 + }, + { + "epoch": 36.7, + "learning_rate": 3.1658167486610885e-05, + "loss": 2.1711, + "step": 7407000 + }, + { + "epoch": 36.7, + "learning_rate": 3.16569289001848e-05, + "loss": 2.1921, + "step": 7407500 + }, + { + "epoch": 36.7, + "learning_rate": 3.165569031375871e-05, + "loss": 2.2008, + "step": 7408000 + }, + { + "epoch": 36.7, + "learning_rate": 3.165445172733263e-05, + "loss": 2.1975, + "step": 7408500 + }, + { + "epoch": 36.71, + "learning_rate": 3.1653213140906546e-05, + "loss": 2.1755, + "step": 7409000 + }, + { + "epoch": 36.71, + "learning_rate": 3.1651979508826166e-05, + "loss": 2.1676, + "step": 7409500 + }, + { + "epoch": 36.71, + "learning_rate": 3.165074092240008e-05, + "loss": 2.1594, + "step": 7410000 + }, + { + "epoch": 36.71, + "learning_rate": 3.1649502335974e-05, + "loss": 2.1739, + "step": 7410500 + }, + { + "epoch": 36.72, + "learning_rate": 3.164826374954792e-05, + "loss": 2.1926, + "step": 7411000 + }, + { + "epoch": 36.72, + "learning_rate": 3.1647025163121834e-05, + "loss": 2.1763, + "step": 7411500 + }, + { + "epoch": 36.72, + "learning_rate": 3.164578657669575e-05, + "loss": 2.192, + "step": 7412000 + }, + { + "epoch": 36.72, + "learning_rate": 3.164455046744252e-05, + "loss": 2.1917, + "step": 7412500 + }, + { + "epoch": 36.73, + "learning_rate": 3.164331188101644e-05, + "loss": 2.1724, + "step": 7413000 + }, + { + "epoch": 36.73, + "learning_rate": 3.16420757717632e-05, + "loss": 2.2043, + "step": 7413500 + }, + { + "epoch": 36.73, + "learning_rate": 3.1640837185337116e-05, + "loss": 2.1866, + "step": 7414000 + }, + { + "epoch": 36.73, + "learning_rate": 3.163959859891103e-05, + "loss": 2.1808, + "step": 7414500 + }, + { + "epoch": 36.74, + "learning_rate": 3.163836001248495e-05, + "loss": 2.1876, + "step": 7415000 + }, + { + "epoch": 36.74, + "learning_rate": 3.1637121426058867e-05, + "loss": 2.1957, + "step": 7415500 + }, + { + "epoch": 36.74, + "learning_rate": 3.1635882839632783e-05, + "loss": 2.1583, + "step": 7416000 + }, + { + "epoch": 36.74, + "learning_rate": 3.163464673037955e-05, + "loss": 2.1689, + "step": 7416500 + }, + { + "epoch": 36.75, + "learning_rate": 3.163340814395347e-05, + "loss": 2.1737, + "step": 7417000 + }, + { + "epoch": 36.75, + "learning_rate": 3.1632169557527386e-05, + "loss": 2.2015, + "step": 7417500 + }, + { + "epoch": 36.75, + "learning_rate": 3.16309309711013e-05, + "loss": 2.1799, + "step": 7418000 + }, + { + "epoch": 36.75, + "learning_rate": 3.162969238467522e-05, + "loss": 2.1741, + "step": 7418500 + }, + { + "epoch": 36.76, + "learning_rate": 3.162845379824914e-05, + "loss": 2.1589, + "step": 7419000 + }, + { + "epoch": 36.76, + "learning_rate": 3.1627215211823054e-05, + "loss": 2.1929, + "step": 7419500 + }, + { + "epoch": 36.76, + "learning_rate": 3.162597662539697e-05, + "loss": 2.1637, + "step": 7420000 + }, + { + "epoch": 36.76, + "learning_rate": 3.162473803897089e-05, + "loss": 2.1771, + "step": 7420500 + }, + { + "epoch": 36.77, + "learning_rate": 3.1623499452544805e-05, + "loss": 2.193, + "step": 7421000 + }, + { + "epoch": 36.77, + "learning_rate": 3.162226086611872e-05, + "loss": 2.197, + "step": 7421500 + }, + { + "epoch": 36.77, + "learning_rate": 3.162102227969264e-05, + "loss": 2.179, + "step": 7422000 + }, + { + "epoch": 36.77, + "learning_rate": 3.16197861704394e-05, + "loss": 2.1923, + "step": 7422500 + }, + { + "epoch": 36.78, + "learning_rate": 3.161854758401332e-05, + "loss": 2.2144, + "step": 7423000 + }, + { + "epoch": 36.78, + "learning_rate": 3.1617308997587234e-05, + "loss": 2.1794, + "step": 7423500 + }, + { + "epoch": 36.78, + "learning_rate": 3.161607041116115e-05, + "loss": 2.1906, + "step": 7424000 + }, + { + "epoch": 36.78, + "learning_rate": 3.161483182473507e-05, + "loss": 2.177, + "step": 7424500 + }, + { + "epoch": 36.79, + "learning_rate": 3.1613593238308985e-05, + "loss": 2.1639, + "step": 7425000 + }, + { + "epoch": 36.79, + "learning_rate": 3.16123546518829e-05, + "loss": 2.1855, + "step": 7425500 + }, + { + "epoch": 36.79, + "learning_rate": 3.161111606545682e-05, + "loss": 2.1537, + "step": 7426000 + }, + { + "epoch": 36.79, + "learning_rate": 3.160987747903073e-05, + "loss": 2.1746, + "step": 7426500 + }, + { + "epoch": 36.8, + "learning_rate": 3.1608638892604646e-05, + "loss": 2.1806, + "step": 7427000 + }, + { + "epoch": 36.8, + "learning_rate": 3.160740030617856e-05, + "loss": 2.1547, + "step": 7427500 + }, + { + "epoch": 36.8, + "learning_rate": 3.160616171975248e-05, + "loss": 2.1845, + "step": 7428000 + }, + { + "epoch": 36.8, + "learning_rate": 3.1604925610499256e-05, + "loss": 2.1727, + "step": 7428500 + }, + { + "epoch": 36.81, + "learning_rate": 3.160368702407317e-05, + "loss": 2.1838, + "step": 7429000 + }, + { + "epoch": 36.81, + "learning_rate": 3.160244843764709e-05, + "loss": 2.1918, + "step": 7429500 + }, + { + "epoch": 36.81, + "learning_rate": 3.1601209851221e-05, + "loss": 2.2049, + "step": 7430000 + }, + { + "epoch": 36.81, + "learning_rate": 3.1599971264794916e-05, + "loss": 2.1743, + "step": 7430500 + }, + { + "epoch": 36.82, + "learning_rate": 3.1598735155541685e-05, + "loss": 2.1785, + "step": 7431000 + }, + { + "epoch": 36.82, + "learning_rate": 3.15974965691156e-05, + "loss": 2.1674, + "step": 7431500 + }, + { + "epoch": 36.82, + "learning_rate": 3.159625798268952e-05, + "loss": 2.1937, + "step": 7432000 + }, + { + "epoch": 36.82, + "learning_rate": 3.1595019396263436e-05, + "loss": 2.1767, + "step": 7432500 + }, + { + "epoch": 36.83, + "learning_rate": 3.1593780809837346e-05, + "loss": 2.1666, + "step": 7433000 + }, + { + "epoch": 36.83, + "learning_rate": 3.159254470058412e-05, + "loss": 2.1748, + "step": 7433500 + }, + { + "epoch": 36.83, + "learning_rate": 3.159130611415804e-05, + "loss": 2.2004, + "step": 7434000 + }, + { + "epoch": 36.83, + "learning_rate": 3.1590067527731956e-05, + "loss": 2.1619, + "step": 7434500 + }, + { + "epoch": 36.84, + "learning_rate": 3.158883141847872e-05, + "loss": 2.1845, + "step": 7435000 + }, + { + "epoch": 36.84, + "learning_rate": 3.1587592832052635e-05, + "loss": 2.1957, + "step": 7435500 + }, + { + "epoch": 36.84, + "learning_rate": 3.1586356722799403e-05, + "loss": 2.1968, + "step": 7436000 + }, + { + "epoch": 36.84, + "learning_rate": 3.158511813637332e-05, + "loss": 2.1949, + "step": 7436500 + }, + { + "epoch": 36.85, + "learning_rate": 3.158387954994724e-05, + "loss": 2.183, + "step": 7437000 + }, + { + "epoch": 36.85, + "learning_rate": 3.1582640963521154e-05, + "loss": 2.1959, + "step": 7437500 + }, + { + "epoch": 36.85, + "learning_rate": 3.158140485426792e-05, + "loss": 2.1838, + "step": 7438000 + }, + { + "epoch": 36.85, + "learning_rate": 3.158016874501469e-05, + "loss": 2.1668, + "step": 7438500 + }, + { + "epoch": 36.86, + "learning_rate": 3.157893015858861e-05, + "loss": 2.1742, + "step": 7439000 + }, + { + "epoch": 36.86, + "learning_rate": 3.1577691572162526e-05, + "loss": 2.2019, + "step": 7439500 + }, + { + "epoch": 36.86, + "learning_rate": 3.1576452985736436e-05, + "loss": 2.1639, + "step": 7440000 + }, + { + "epoch": 36.86, + "learning_rate": 3.157521687648321e-05, + "loss": 2.1704, + "step": 7440500 + }, + { + "epoch": 36.87, + "learning_rate": 3.157397829005713e-05, + "loss": 2.1903, + "step": 7441000 + }, + { + "epoch": 36.87, + "learning_rate": 3.1572739703631045e-05, + "loss": 2.2107, + "step": 7441500 + }, + { + "epoch": 36.87, + "learning_rate": 3.157150111720496e-05, + "loss": 2.1606, + "step": 7442000 + }, + { + "epoch": 36.87, + "learning_rate": 3.157026253077888e-05, + "loss": 2.1743, + "step": 7442500 + }, + { + "epoch": 36.88, + "learning_rate": 3.156902394435279e-05, + "loss": 2.1895, + "step": 7443000 + }, + { + "epoch": 36.88, + "learning_rate": 3.1567785357926706e-05, + "loss": 2.2057, + "step": 7443500 + }, + { + "epoch": 36.88, + "learning_rate": 3.156654677150062e-05, + "loss": 2.1841, + "step": 7444000 + }, + { + "epoch": 36.88, + "learning_rate": 3.156530818507454e-05, + "loss": 2.1785, + "step": 7444500 + }, + { + "epoch": 36.89, + "learning_rate": 3.156406959864846e-05, + "loss": 2.1737, + "step": 7445000 + }, + { + "epoch": 36.89, + "learning_rate": 3.1562831012222374e-05, + "loss": 2.1713, + "step": 7445500 + }, + { + "epoch": 36.89, + "learning_rate": 3.156159242579629e-05, + "loss": 2.17, + "step": 7446000 + }, + { + "epoch": 36.89, + "learning_rate": 3.156035383937021e-05, + "loss": 2.1862, + "step": 7446500 + }, + { + "epoch": 36.9, + "learning_rate": 3.1559115252944125e-05, + "loss": 2.1914, + "step": 7447000 + }, + { + "epoch": 36.9, + "learning_rate": 3.1557876666518035e-05, + "loss": 2.1914, + "step": 7447500 + }, + { + "epoch": 36.9, + "learning_rate": 3.155663808009195e-05, + "loss": 2.1828, + "step": 7448000 + }, + { + "epoch": 36.9, + "learning_rate": 3.155539949366587e-05, + "loss": 2.1949, + "step": 7448500 + }, + { + "epoch": 36.9, + "learning_rate": 3.1554160907239786e-05, + "loss": 2.1743, + "step": 7449000 + }, + { + "epoch": 36.91, + "learning_rate": 3.1552927275159406e-05, + "loss": 2.1951, + "step": 7449500 + }, + { + "epoch": 36.91, + "learning_rate": 3.1551691165906175e-05, + "loss": 2.2043, + "step": 7450000 + }, + { + "epoch": 36.91, + "learning_rate": 3.155045257948009e-05, + "loss": 2.172, + "step": 7450500 + }, + { + "epoch": 36.91, + "learning_rate": 3.154921399305401e-05, + "loss": 2.1704, + "step": 7451000 + }, + { + "epoch": 36.92, + "learning_rate": 3.1547975406627926e-05, + "loss": 2.1903, + "step": 7451500 + }, + { + "epoch": 36.92, + "learning_rate": 3.154673682020184e-05, + "loss": 2.1893, + "step": 7452000 + }, + { + "epoch": 36.92, + "learning_rate": 3.154549823377575e-05, + "loss": 2.1799, + "step": 7452500 + }, + { + "epoch": 36.92, + "learning_rate": 3.154425964734967e-05, + "loss": 2.1961, + "step": 7453000 + }, + { + "epoch": 36.93, + "learning_rate": 3.154302106092359e-05, + "loss": 2.1879, + "step": 7453500 + }, + { + "epoch": 36.93, + "learning_rate": 3.1541782474497504e-05, + "loss": 2.2118, + "step": 7454000 + }, + { + "epoch": 36.93, + "learning_rate": 3.154054388807142e-05, + "loss": 2.1753, + "step": 7454500 + }, + { + "epoch": 36.93, + "learning_rate": 3.153930530164534e-05, + "loss": 2.1622, + "step": 7455000 + }, + { + "epoch": 36.94, + "learning_rate": 3.1538066715219255e-05, + "loss": 2.1666, + "step": 7455500 + }, + { + "epoch": 36.94, + "learning_rate": 3.153682812879317e-05, + "loss": 2.18, + "step": 7456000 + }, + { + "epoch": 36.94, + "learning_rate": 3.153558954236709e-05, + "loss": 2.2071, + "step": 7456500 + }, + { + "epoch": 36.94, + "learning_rate": 3.153435343311386e-05, + "loss": 2.1606, + "step": 7457000 + }, + { + "epoch": 36.95, + "learning_rate": 3.1533114846687774e-05, + "loss": 2.183, + "step": 7457500 + }, + { + "epoch": 36.95, + "learning_rate": 3.153187626026169e-05, + "loss": 2.176, + "step": 7458000 + }, + { + "epoch": 36.95, + "learning_rate": 3.153063767383561e-05, + "loss": 2.1672, + "step": 7458500 + }, + { + "epoch": 36.95, + "learning_rate": 3.1529399087409525e-05, + "loss": 2.1663, + "step": 7459000 + }, + { + "epoch": 36.96, + "learning_rate": 3.152816050098344e-05, + "loss": 2.2065, + "step": 7459500 + }, + { + "epoch": 36.96, + "learning_rate": 3.152692191455736e-05, + "loss": 2.1735, + "step": 7460000 + }, + { + "epoch": 36.96, + "learning_rate": 3.1525683328131276e-05, + "loss": 2.1947, + "step": 7460500 + }, + { + "epoch": 36.96, + "learning_rate": 3.152444721887804e-05, + "loss": 2.206, + "step": 7461000 + }, + { + "epoch": 36.97, + "learning_rate": 3.1523211109624807e-05, + "loss": 2.1445, + "step": 7461500 + }, + { + "epoch": 36.97, + "learning_rate": 3.1521975000371575e-05, + "loss": 2.1961, + "step": 7462000 + }, + { + "epoch": 36.97, + "learning_rate": 3.152073641394549e-05, + "loss": 2.1584, + "step": 7462500 + }, + { + "epoch": 36.97, + "learning_rate": 3.151949782751941e-05, + "loss": 2.161, + "step": 7463000 + }, + { + "epoch": 36.98, + "learning_rate": 3.151826171826618e-05, + "loss": 2.1464, + "step": 7463500 + }, + { + "epoch": 36.98, + "learning_rate": 3.1517023131840095e-05, + "loss": 2.1763, + "step": 7464000 + }, + { + "epoch": 36.98, + "learning_rate": 3.151578454541401e-05, + "loss": 2.1944, + "step": 7464500 + }, + { + "epoch": 36.98, + "learning_rate": 3.151454843616078e-05, + "loss": 2.179, + "step": 7465000 + }, + { + "epoch": 36.99, + "learning_rate": 3.15133098497347e-05, + "loss": 2.204, + "step": 7465500 + }, + { + "epoch": 36.99, + "learning_rate": 3.1512071263308615e-05, + "loss": 2.179, + "step": 7466000 + }, + { + "epoch": 36.99, + "learning_rate": 3.151083515405538e-05, + "loss": 2.1788, + "step": 7466500 + }, + { + "epoch": 36.99, + "learning_rate": 3.1509596567629294e-05, + "loss": 2.2008, + "step": 7467000 + }, + { + "epoch": 37.0, + "learning_rate": 3.150835798120321e-05, + "loss": 2.168, + "step": 7467500 + }, + { + "epoch": 37.0, + "learning_rate": 3.150711939477713e-05, + "loss": 2.1852, + "step": 7468000 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.662816636573663, + "eval_accuracy_mlm": 0.6197658435400042, + "eval_accuracy_nsp": 0.8659980624335677, + "eval_loss": 2.305152416229248, + "eval_runtime": 145.847, + "eval_samples_per_second": 1748.126, + "eval_steps_per_second": 72.843, + "step": 7468191 + }, + { + "epoch": 37.0, + "learning_rate": 3.1505880808351044e-05, + "loss": 2.1681, + "step": 7468500 + }, + { + "epoch": 37.0, + "learning_rate": 3.150464222192496e-05, + "loss": 2.1523, + "step": 7469000 + }, + { + "epoch": 37.01, + "learning_rate": 3.150340363549888e-05, + "loss": 2.1706, + "step": 7469500 + }, + { + "epoch": 37.01, + "learning_rate": 3.1502165049072795e-05, + "loss": 2.1522, + "step": 7470000 + }, + { + "epoch": 37.01, + "learning_rate": 3.150092646264671e-05, + "loss": 2.1346, + "step": 7470500 + }, + { + "epoch": 37.01, + "learning_rate": 3.149968787622063e-05, + "loss": 2.1607, + "step": 7471000 + }, + { + "epoch": 37.02, + "learning_rate": 3.1498449289794546e-05, + "loss": 2.1727, + "step": 7471500 + }, + { + "epoch": 37.02, + "learning_rate": 3.149721070336846e-05, + "loss": 2.1578, + "step": 7472000 + }, + { + "epoch": 37.02, + "learning_rate": 3.149597459411523e-05, + "loss": 2.1535, + "step": 7472500 + }, + { + "epoch": 37.02, + "learning_rate": 3.149473600768915e-05, + "loss": 2.1529, + "step": 7473000 + }, + { + "epoch": 37.03, + "learning_rate": 3.1493497421263066e-05, + "loss": 2.1526, + "step": 7473500 + }, + { + "epoch": 37.03, + "learning_rate": 3.149225883483698e-05, + "loss": 2.1306, + "step": 7474000 + }, + { + "epoch": 37.03, + "learning_rate": 3.1491022725583744e-05, + "loss": 2.1446, + "step": 7474500 + }, + { + "epoch": 37.03, + "learning_rate": 3.148978413915766e-05, + "loss": 2.1611, + "step": 7475000 + }, + { + "epoch": 37.04, + "learning_rate": 3.148854555273158e-05, + "loss": 2.1508, + "step": 7475500 + }, + { + "epoch": 37.04, + "learning_rate": 3.1487306966305495e-05, + "loss": 2.1516, + "step": 7476000 + }, + { + "epoch": 37.04, + "learning_rate": 3.148606837987941e-05, + "loss": 2.1576, + "step": 7476500 + }, + { + "epoch": 37.04, + "learning_rate": 3.148482979345333e-05, + "loss": 2.1554, + "step": 7477000 + }, + { + "epoch": 37.05, + "learning_rate": 3.1483591207027246e-05, + "loss": 2.1549, + "step": 7477500 + }, + { + "epoch": 37.05, + "learning_rate": 3.148235262060116e-05, + "loss": 2.137, + "step": 7478000 + }, + { + "epoch": 37.05, + "learning_rate": 3.148111403417508e-05, + "loss": 2.1573, + "step": 7478500 + }, + { + "epoch": 37.05, + "learning_rate": 3.1479875447749e-05, + "loss": 2.1439, + "step": 7479000 + }, + { + "epoch": 37.06, + "learning_rate": 3.1478636861322914e-05, + "loss": 2.1432, + "step": 7479500 + }, + { + "epoch": 37.06, + "learning_rate": 3.147739827489683e-05, + "loss": 2.1633, + "step": 7480000 + }, + { + "epoch": 37.06, + "learning_rate": 3.14761621656436e-05, + "loss": 2.1335, + "step": 7480500 + }, + { + "epoch": 37.06, + "learning_rate": 3.1474923579217516e-05, + "loss": 2.1549, + "step": 7481000 + }, + { + "epoch": 37.07, + "learning_rate": 3.147368499279143e-05, + "loss": 2.1481, + "step": 7481500 + }, + { + "epoch": 37.07, + "learning_rate": 3.1472446406365344e-05, + "loss": 2.144, + "step": 7482000 + }, + { + "epoch": 37.07, + "learning_rate": 3.147121029711211e-05, + "loss": 2.1578, + "step": 7482500 + }, + { + "epoch": 37.07, + "learning_rate": 3.146997418785888e-05, + "loss": 2.1529, + "step": 7483000 + }, + { + "epoch": 37.08, + "learning_rate": 3.14687356014328e-05, + "loss": 2.1599, + "step": 7483500 + }, + { + "epoch": 37.08, + "learning_rate": 3.1467497015006715e-05, + "loss": 2.1403, + "step": 7484000 + }, + { + "epoch": 37.08, + "learning_rate": 3.146625842858063e-05, + "loss": 2.1674, + "step": 7484500 + }, + { + "epoch": 37.08, + "learning_rate": 3.146501984215455e-05, + "loss": 2.1574, + "step": 7485000 + }, + { + "epoch": 37.09, + "learning_rate": 3.1463781255728466e-05, + "loss": 2.1732, + "step": 7485500 + }, + { + "epoch": 37.09, + "learning_rate": 3.146254266930238e-05, + "loss": 2.1428, + "step": 7486000 + }, + { + "epoch": 37.09, + "learning_rate": 3.14613040828763e-05, + "loss": 2.1599, + "step": 7486500 + }, + { + "epoch": 37.09, + "learning_rate": 3.1460065496450217e-05, + "loss": 2.138, + "step": 7487000 + }, + { + "epoch": 37.1, + "learning_rate": 3.1458826910024133e-05, + "loss": 2.1664, + "step": 7487500 + }, + { + "epoch": 37.1, + "learning_rate": 3.145758832359805e-05, + "loss": 2.1581, + "step": 7488000 + }, + { + "epoch": 37.1, + "learning_rate": 3.145635221434481e-05, + "loss": 2.176, + "step": 7488500 + }, + { + "epoch": 37.1, + "learning_rate": 3.145511362791873e-05, + "loss": 2.1708, + "step": 7489000 + }, + { + "epoch": 37.11, + "learning_rate": 3.1453875041492646e-05, + "loss": 2.1769, + "step": 7489500 + }, + { + "epoch": 37.11, + "learning_rate": 3.145263645506656e-05, + "loss": 2.1438, + "step": 7490000 + }, + { + "epoch": 37.11, + "learning_rate": 3.145139786864048e-05, + "loss": 2.1697, + "step": 7490500 + }, + { + "epoch": 37.11, + "learning_rate": 3.14501592822144e-05, + "loss": 2.1765, + "step": 7491000 + }, + { + "epoch": 37.12, + "learning_rate": 3.1448920695788314e-05, + "loss": 2.1288, + "step": 7491500 + }, + { + "epoch": 37.12, + "learning_rate": 3.144768210936223e-05, + "loss": 2.143, + "step": 7492000 + }, + { + "epoch": 37.12, + "learning_rate": 3.1446446000109e-05, + "loss": 2.1439, + "step": 7492500 + }, + { + "epoch": 37.12, + "learning_rate": 3.144520741368292e-05, + "loss": 2.1486, + "step": 7493000 + }, + { + "epoch": 37.13, + "learning_rate": 3.1443968827256834e-05, + "loss": 2.1771, + "step": 7493500 + }, + { + "epoch": 37.13, + "learning_rate": 3.144273024083075e-05, + "loss": 2.1703, + "step": 7494000 + }, + { + "epoch": 37.13, + "learning_rate": 3.144149165440467e-05, + "loss": 2.1512, + "step": 7494500 + }, + { + "epoch": 37.13, + "learning_rate": 3.1440253067978584e-05, + "loss": 2.1605, + "step": 7495000 + }, + { + "epoch": 37.14, + "learning_rate": 3.1439014481552495e-05, + "loss": 2.1528, + "step": 7495500 + }, + { + "epoch": 37.14, + "learning_rate": 3.143777589512641e-05, + "loss": 2.1338, + "step": 7496000 + }, + { + "epoch": 37.14, + "learning_rate": 3.143653978587318e-05, + "loss": 2.1437, + "step": 7496500 + }, + { + "epoch": 37.14, + "learning_rate": 3.14353011994471e-05, + "loss": 2.1349, + "step": 7497000 + }, + { + "epoch": 37.15, + "learning_rate": 3.1434062613021014e-05, + "loss": 2.1594, + "step": 7497500 + }, + { + "epoch": 37.15, + "learning_rate": 3.143282650376778e-05, + "loss": 2.1769, + "step": 7498000 + }, + { + "epoch": 37.15, + "learning_rate": 3.14315879173417e-05, + "loss": 2.1253, + "step": 7498500 + }, + { + "epoch": 37.15, + "learning_rate": 3.143034933091562e-05, + "loss": 2.1595, + "step": 7499000 + }, + { + "epoch": 37.16, + "learning_rate": 3.1429110744489534e-05, + "loss": 2.1514, + "step": 7499500 + }, + { + "epoch": 37.16, + "learning_rate": 3.142787215806345e-05, + "loss": 2.1622, + "step": 7500000 + }, + { + "epoch": 37.16, + "learning_rate": 3.142663357163737e-05, + "loss": 2.1447, + "step": 7500500 + }, + { + "epoch": 37.16, + "learning_rate": 3.1425394985211285e-05, + "loss": 2.1597, + "step": 7501000 + }, + { + "epoch": 37.17, + "learning_rate": 3.14241563987852e-05, + "loss": 2.1814, + "step": 7501500 + }, + { + "epoch": 37.17, + "learning_rate": 3.142291781235912e-05, + "loss": 2.1547, + "step": 7502000 + }, + { + "epoch": 37.17, + "learning_rate": 3.142168170310588e-05, + "loss": 2.1666, + "step": 7502500 + }, + { + "epoch": 37.17, + "learning_rate": 3.14204431166798e-05, + "loss": 2.1768, + "step": 7503000 + }, + { + "epoch": 37.17, + "learning_rate": 3.1419204530253714e-05, + "loss": 2.1721, + "step": 7503500 + }, + { + "epoch": 37.18, + "learning_rate": 3.141796842100048e-05, + "loss": 2.1622, + "step": 7504000 + }, + { + "epoch": 37.18, + "learning_rate": 3.14167298345744e-05, + "loss": 2.1507, + "step": 7504500 + }, + { + "epoch": 37.18, + "learning_rate": 3.141549372532117e-05, + "loss": 2.152, + "step": 7505000 + }, + { + "epoch": 37.18, + "learning_rate": 3.141425513889508e-05, + "loss": 2.1878, + "step": 7505500 + }, + { + "epoch": 37.19, + "learning_rate": 3.1413016552468996e-05, + "loss": 2.1722, + "step": 7506000 + }, + { + "epoch": 37.19, + "learning_rate": 3.141177796604291e-05, + "loss": 2.1489, + "step": 7506500 + }, + { + "epoch": 37.19, + "learning_rate": 3.141053937961683e-05, + "loss": 2.1713, + "step": 7507000 + }, + { + "epoch": 37.19, + "learning_rate": 3.140930079319075e-05, + "loss": 2.1761, + "step": 7507500 + }, + { + "epoch": 37.2, + "learning_rate": 3.1408062206764664e-05, + "loss": 2.1787, + "step": 7508000 + }, + { + "epoch": 37.2, + "learning_rate": 3.140682362033858e-05, + "loss": 2.1452, + "step": 7508500 + }, + { + "epoch": 37.2, + "learning_rate": 3.140558751108535e-05, + "loss": 2.1495, + "step": 7509000 + }, + { + "epoch": 37.2, + "learning_rate": 3.140435140183212e-05, + "loss": 2.165, + "step": 7509500 + }, + { + "epoch": 37.21, + "learning_rate": 3.1403112815406035e-05, + "loss": 2.1504, + "step": 7510000 + }, + { + "epoch": 37.21, + "learning_rate": 3.140187422897995e-05, + "loss": 2.1651, + "step": 7510500 + }, + { + "epoch": 37.21, + "learning_rate": 3.140063564255387e-05, + "loss": 2.1685, + "step": 7511000 + }, + { + "epoch": 37.21, + "learning_rate": 3.1399397056127786e-05, + "loss": 2.199, + "step": 7511500 + }, + { + "epoch": 37.22, + "learning_rate": 3.13981584697017e-05, + "loss": 2.1633, + "step": 7512000 + }, + { + "epoch": 37.22, + "learning_rate": 3.139691988327561e-05, + "loss": 2.1725, + "step": 7512500 + }, + { + "epoch": 37.22, + "learning_rate": 3.139568129684953e-05, + "loss": 2.1644, + "step": 7513000 + }, + { + "epoch": 37.22, + "learning_rate": 3.139444271042345e-05, + "loss": 2.1411, + "step": 7513500 + }, + { + "epoch": 37.23, + "learning_rate": 3.1393204123997364e-05, + "loss": 2.1723, + "step": 7514000 + }, + { + "epoch": 37.23, + "learning_rate": 3.139196553757128e-05, + "loss": 2.1631, + "step": 7514500 + }, + { + "epoch": 37.23, + "learning_rate": 3.139072942831805e-05, + "loss": 2.1702, + "step": 7515000 + }, + { + "epoch": 37.23, + "learning_rate": 3.1389490841891966e-05, + "loss": 2.1552, + "step": 7515500 + }, + { + "epoch": 37.24, + "learning_rate": 3.138825225546588e-05, + "loss": 2.1477, + "step": 7516000 + }, + { + "epoch": 37.24, + "learning_rate": 3.13870136690398e-05, + "loss": 2.142, + "step": 7516500 + }, + { + "epoch": 37.24, + "learning_rate": 3.138577508261372e-05, + "loss": 2.1627, + "step": 7517000 + }, + { + "epoch": 37.24, + "learning_rate": 3.1384536496187634e-05, + "loss": 2.1808, + "step": 7517500 + }, + { + "epoch": 37.25, + "learning_rate": 3.138329790976155e-05, + "loss": 2.1587, + "step": 7518000 + }, + { + "epoch": 37.25, + "learning_rate": 3.138205932333547e-05, + "loss": 2.163, + "step": 7518500 + }, + { + "epoch": 37.25, + "learning_rate": 3.1380820736909385e-05, + "loss": 2.1469, + "step": 7519000 + }, + { + "epoch": 37.25, + "learning_rate": 3.13795821504833e-05, + "loss": 2.1555, + "step": 7519500 + }, + { + "epoch": 37.26, + "learning_rate": 3.137834356405722e-05, + "loss": 2.1745, + "step": 7520000 + }, + { + "epoch": 37.26, + "learning_rate": 3.1377104977631136e-05, + "loss": 2.1535, + "step": 7520500 + }, + { + "epoch": 37.26, + "learning_rate": 3.1375866391205046e-05, + "loss": 2.177, + "step": 7521000 + }, + { + "epoch": 37.26, + "learning_rate": 3.137462780477896e-05, + "loss": 2.1823, + "step": 7521500 + }, + { + "epoch": 37.27, + "learning_rate": 3.137338921835288e-05, + "loss": 2.1531, + "step": 7522000 + }, + { + "epoch": 37.27, + "learning_rate": 3.1372150631926797e-05, + "loss": 2.154, + "step": 7522500 + }, + { + "epoch": 37.27, + "learning_rate": 3.1370912045500714e-05, + "loss": 2.1678, + "step": 7523000 + }, + { + "epoch": 37.27, + "learning_rate": 3.136967593624748e-05, + "loss": 2.1881, + "step": 7523500 + }, + { + "epoch": 37.28, + "learning_rate": 3.136843734982139e-05, + "loss": 2.1854, + "step": 7524000 + }, + { + "epoch": 37.28, + "learning_rate": 3.136719876339531e-05, + "loss": 2.1702, + "step": 7524500 + }, + { + "epoch": 37.28, + "learning_rate": 3.1365960176969226e-05, + "loss": 2.1338, + "step": 7525000 + }, + { + "epoch": 37.28, + "learning_rate": 3.1364724067716e-05, + "loss": 2.178, + "step": 7525500 + }, + { + "epoch": 37.29, + "learning_rate": 3.1363487958462764e-05, + "loss": 2.1832, + "step": 7526000 + }, + { + "epoch": 37.29, + "learning_rate": 3.136224937203668e-05, + "loss": 2.1513, + "step": 7526500 + }, + { + "epoch": 37.29, + "learning_rate": 3.13610107856106e-05, + "loss": 2.1825, + "step": 7527000 + }, + { + "epoch": 37.29, + "learning_rate": 3.1359772199184515e-05, + "loss": 2.1724, + "step": 7527500 + }, + { + "epoch": 37.3, + "learning_rate": 3.135853361275843e-05, + "loss": 2.1605, + "step": 7528000 + }, + { + "epoch": 37.3, + "learning_rate": 3.135729502633235e-05, + "loss": 2.1593, + "step": 7528500 + }, + { + "epoch": 37.3, + "learning_rate": 3.1356056439906266e-05, + "loss": 2.1526, + "step": 7529000 + }, + { + "epoch": 37.3, + "learning_rate": 3.135481785348018e-05, + "loss": 2.1773, + "step": 7529500 + }, + { + "epoch": 37.31, + "learning_rate": 3.13535792670541e-05, + "loss": 2.1671, + "step": 7530000 + }, + { + "epoch": 37.31, + "learning_rate": 3.135234563497372e-05, + "loss": 2.1669, + "step": 7530500 + }, + { + "epoch": 37.31, + "learning_rate": 3.135110704854764e-05, + "loss": 2.1934, + "step": 7531000 + }, + { + "epoch": 37.31, + "learning_rate": 3.1349868462121554e-05, + "loss": 2.1723, + "step": 7531500 + }, + { + "epoch": 37.32, + "learning_rate": 3.134862987569547e-05, + "loss": 2.1526, + "step": 7532000 + }, + { + "epoch": 37.32, + "learning_rate": 3.134739376644223e-05, + "loss": 2.1498, + "step": 7532500 + }, + { + "epoch": 37.32, + "learning_rate": 3.134615518001615e-05, + "loss": 2.1688, + "step": 7533000 + }, + { + "epoch": 37.32, + "learning_rate": 3.134491659359007e-05, + "loss": 2.1782, + "step": 7533500 + }, + { + "epoch": 37.33, + "learning_rate": 3.1343678007163984e-05, + "loss": 2.1479, + "step": 7534000 + }, + { + "epoch": 37.33, + "learning_rate": 3.13424394207379e-05, + "loss": 2.1636, + "step": 7534500 + }, + { + "epoch": 37.33, + "learning_rate": 3.134120083431182e-05, + "loss": 2.1481, + "step": 7535000 + }, + { + "epoch": 37.33, + "learning_rate": 3.1339962247885734e-05, + "loss": 2.1379, + "step": 7535500 + }, + { + "epoch": 37.34, + "learning_rate": 3.133872366145965e-05, + "loss": 2.1667, + "step": 7536000 + }, + { + "epoch": 37.34, + "learning_rate": 3.133748755220642e-05, + "loss": 2.1851, + "step": 7536500 + }, + { + "epoch": 37.34, + "learning_rate": 3.133624896578034e-05, + "loss": 2.1634, + "step": 7537000 + }, + { + "epoch": 37.34, + "learning_rate": 3.1335010379354254e-05, + "loss": 2.1672, + "step": 7537500 + }, + { + "epoch": 37.35, + "learning_rate": 3.1333774270101016e-05, + "loss": 2.1684, + "step": 7538000 + }, + { + "epoch": 37.35, + "learning_rate": 3.133253568367493e-05, + "loss": 2.1728, + "step": 7538500 + }, + { + "epoch": 37.35, + "learning_rate": 3.133129709724885e-05, + "loss": 2.1735, + "step": 7539000 + }, + { + "epoch": 37.35, + "learning_rate": 3.133005851082277e-05, + "loss": 2.1688, + "step": 7539500 + }, + { + "epoch": 37.36, + "learning_rate": 3.1328819924396684e-05, + "loss": 2.1657, + "step": 7540000 + }, + { + "epoch": 37.36, + "learning_rate": 3.13275813379706e-05, + "loss": 2.1685, + "step": 7540500 + }, + { + "epoch": 37.36, + "learning_rate": 3.132634275154452e-05, + "loss": 2.168, + "step": 7541000 + }, + { + "epoch": 37.36, + "learning_rate": 3.1325104165118435e-05, + "loss": 2.1783, + "step": 7541500 + }, + { + "epoch": 37.37, + "learning_rate": 3.132386557869235e-05, + "loss": 2.1726, + "step": 7542000 + }, + { + "epoch": 37.37, + "learning_rate": 3.132262946943912e-05, + "loss": 2.1833, + "step": 7542500 + }, + { + "epoch": 37.37, + "learning_rate": 3.132139088301304e-05, + "loss": 2.1462, + "step": 7543000 + }, + { + "epoch": 37.37, + "learning_rate": 3.13201547737598e-05, + "loss": 2.1548, + "step": 7543500 + }, + { + "epoch": 37.38, + "learning_rate": 3.1318916187333716e-05, + "loss": 2.1871, + "step": 7544000 + }, + { + "epoch": 37.38, + "learning_rate": 3.131767760090763e-05, + "loss": 2.151, + "step": 7544500 + }, + { + "epoch": 37.38, + "learning_rate": 3.131643901448155e-05, + "loss": 2.1504, + "step": 7545000 + }, + { + "epoch": 37.38, + "learning_rate": 3.131520042805547e-05, + "loss": 2.1335, + "step": 7545500 + }, + { + "epoch": 37.39, + "learning_rate": 3.1313961841629384e-05, + "loss": 2.1584, + "step": 7546000 + }, + { + "epoch": 37.39, + "learning_rate": 3.13127232552033e-05, + "loss": 2.1635, + "step": 7546500 + }, + { + "epoch": 37.39, + "learning_rate": 3.131148714595007e-05, + "loss": 2.1373, + "step": 7547000 + }, + { + "epoch": 37.39, + "learning_rate": 3.1310248559523987e-05, + "loss": 2.1691, + "step": 7547500 + }, + { + "epoch": 37.4, + "learning_rate": 3.1309009973097904e-05, + "loss": 2.1757, + "step": 7548000 + }, + { + "epoch": 37.4, + "learning_rate": 3.130777386384467e-05, + "loss": 2.1534, + "step": 7548500 + }, + { + "epoch": 37.4, + "learning_rate": 3.130653527741859e-05, + "loss": 2.1707, + "step": 7549000 + }, + { + "epoch": 37.4, + "learning_rate": 3.1305296690992506e-05, + "loss": 2.1685, + "step": 7549500 + }, + { + "epoch": 37.41, + "learning_rate": 3.1304058104566416e-05, + "loss": 2.1556, + "step": 7550000 + }, + { + "epoch": 37.41, + "learning_rate": 3.130281951814033e-05, + "loss": 2.1833, + "step": 7550500 + }, + { + "epoch": 37.41, + "learning_rate": 3.130158093171425e-05, + "loss": 2.1858, + "step": 7551000 + }, + { + "epoch": 37.41, + "learning_rate": 3.130034234528817e-05, + "loss": 2.1862, + "step": 7551500 + }, + { + "epoch": 37.42, + "learning_rate": 3.129910623603494e-05, + "loss": 2.1598, + "step": 7552000 + }, + { + "epoch": 37.42, + "learning_rate": 3.129786764960886e-05, + "loss": 2.1699, + "step": 7552500 + }, + { + "epoch": 37.42, + "learning_rate": 3.129663154035563e-05, + "loss": 2.1736, + "step": 7553000 + }, + { + "epoch": 37.42, + "learning_rate": 3.1295392953929545e-05, + "loss": 2.162, + "step": 7553500 + }, + { + "epoch": 37.43, + "learning_rate": 3.1294154367503456e-05, + "loss": 2.1583, + "step": 7554000 + }, + { + "epoch": 37.43, + "learning_rate": 3.129291578107737e-05, + "loss": 2.1947, + "step": 7554500 + }, + { + "epoch": 37.43, + "learning_rate": 3.129167719465129e-05, + "loss": 2.1973, + "step": 7555000 + }, + { + "epoch": 37.43, + "learning_rate": 3.129044108539806e-05, + "loss": 2.1579, + "step": 7555500 + }, + { + "epoch": 37.44, + "learning_rate": 3.1289202498971975e-05, + "loss": 2.1586, + "step": 7556000 + }, + { + "epoch": 37.44, + "learning_rate": 3.128796391254589e-05, + "loss": 2.1745, + "step": 7556500 + }, + { + "epoch": 37.44, + "learning_rate": 3.128672532611981e-05, + "loss": 2.1567, + "step": 7557000 + }, + { + "epoch": 37.44, + "learning_rate": 3.1285486739693726e-05, + "loss": 2.1785, + "step": 7557500 + }, + { + "epoch": 37.44, + "learning_rate": 3.1284250630440495e-05, + "loss": 2.1605, + "step": 7558000 + }, + { + "epoch": 37.45, + "learning_rate": 3.128301204401441e-05, + "loss": 2.17, + "step": 7558500 + }, + { + "epoch": 37.45, + "learning_rate": 3.128177345758833e-05, + "loss": 2.1761, + "step": 7559000 + }, + { + "epoch": 37.45, + "learning_rate": 3.1280534871162246e-05, + "loss": 2.1689, + "step": 7559500 + }, + { + "epoch": 37.45, + "learning_rate": 3.127929628473616e-05, + "loss": 2.1641, + "step": 7560000 + }, + { + "epoch": 37.46, + "learning_rate": 3.1278060175482924e-05, + "loss": 2.1337, + "step": 7560500 + }, + { + "epoch": 37.46, + "learning_rate": 3.127682158905684e-05, + "loss": 2.1564, + "step": 7561000 + }, + { + "epoch": 37.46, + "learning_rate": 3.127558300263076e-05, + "loss": 2.1722, + "step": 7561500 + }, + { + "epoch": 37.46, + "learning_rate": 3.1274344416204675e-05, + "loss": 2.1591, + "step": 7562000 + }, + { + "epoch": 37.47, + "learning_rate": 3.127310582977859e-05, + "loss": 2.1634, + "step": 7562500 + }, + { + "epoch": 37.47, + "learning_rate": 3.127186724335251e-05, + "loss": 2.1877, + "step": 7563000 + }, + { + "epoch": 37.47, + "learning_rate": 3.1270628656926426e-05, + "loss": 2.159, + "step": 7563500 + }, + { + "epoch": 37.47, + "learning_rate": 3.126939007050034e-05, + "loss": 2.1643, + "step": 7564000 + }, + { + "epoch": 37.48, + "learning_rate": 3.126815148407426e-05, + "loss": 2.1624, + "step": 7564500 + }, + { + "epoch": 37.48, + "learning_rate": 3.126691289764818e-05, + "loss": 2.1848, + "step": 7565000 + }, + { + "epoch": 37.48, + "learning_rate": 3.126567431122209e-05, + "loss": 2.1678, + "step": 7565500 + }, + { + "epoch": 37.48, + "learning_rate": 3.1264435724796004e-05, + "loss": 2.1883, + "step": 7566000 + }, + { + "epoch": 37.49, + "learning_rate": 3.126319713836992e-05, + "loss": 2.174, + "step": 7566500 + }, + { + "epoch": 37.49, + "learning_rate": 3.126195855194384e-05, + "loss": 2.1423, + "step": 7567000 + }, + { + "epoch": 37.49, + "learning_rate": 3.1260719965517755e-05, + "loss": 2.1437, + "step": 7567500 + }, + { + "epoch": 37.49, + "learning_rate": 3.1259483856264523e-05, + "loss": 2.1632, + "step": 7568000 + }, + { + "epoch": 37.5, + "learning_rate": 3.125824526983844e-05, + "loss": 2.1706, + "step": 7568500 + }, + { + "epoch": 37.5, + "learning_rate": 3.125700668341236e-05, + "loss": 2.139, + "step": 7569000 + }, + { + "epoch": 37.5, + "learning_rate": 3.1255770574159126e-05, + "loss": 2.1831, + "step": 7569500 + }, + { + "epoch": 37.5, + "learning_rate": 3.125453198773304e-05, + "loss": 2.1732, + "step": 7570000 + }, + { + "epoch": 37.51, + "learning_rate": 3.125329340130696e-05, + "loss": 2.1728, + "step": 7570500 + }, + { + "epoch": 37.51, + "learning_rate": 3.125205481488088e-05, + "loss": 2.1557, + "step": 7571000 + }, + { + "epoch": 37.51, + "learning_rate": 3.125081622845479e-05, + "loss": 2.1746, + "step": 7571500 + }, + { + "epoch": 37.51, + "learning_rate": 3.1249577642028704e-05, + "loss": 2.1592, + "step": 7572000 + }, + { + "epoch": 37.52, + "learning_rate": 3.124833905560262e-05, + "loss": 2.1405, + "step": 7572500 + }, + { + "epoch": 37.52, + "learning_rate": 3.124710542352224e-05, + "loss": 2.161, + "step": 7573000 + }, + { + "epoch": 37.52, + "learning_rate": 3.124586683709616e-05, + "loss": 2.1546, + "step": 7573500 + }, + { + "epoch": 37.52, + "learning_rate": 3.1244628250670076e-05, + "loss": 2.161, + "step": 7574000 + }, + { + "epoch": 37.53, + "learning_rate": 3.124338966424399e-05, + "loss": 2.1624, + "step": 7574500 + }, + { + "epoch": 37.53, + "learning_rate": 3.124215603216361e-05, + "loss": 2.1927, + "step": 7575000 + }, + { + "epoch": 37.53, + "learning_rate": 3.124091744573753e-05, + "loss": 2.1766, + "step": 7575500 + }, + { + "epoch": 37.53, + "learning_rate": 3.123967885931145e-05, + "loss": 2.1885, + "step": 7576000 + }, + { + "epoch": 37.54, + "learning_rate": 3.1238440272885364e-05, + "loss": 2.1728, + "step": 7576500 + }, + { + "epoch": 37.54, + "learning_rate": 3.123720168645928e-05, + "loss": 2.1817, + "step": 7577000 + }, + { + "epoch": 37.54, + "learning_rate": 3.123596310003319e-05, + "loss": 2.2075, + "step": 7577500 + }, + { + "epoch": 37.54, + "learning_rate": 3.123472451360711e-05, + "loss": 2.1638, + "step": 7578000 + }, + { + "epoch": 37.55, + "learning_rate": 3.1233485927181025e-05, + "loss": 2.1478, + "step": 7578500 + }, + { + "epoch": 37.55, + "learning_rate": 3.123224734075494e-05, + "loss": 2.1594, + "step": 7579000 + }, + { + "epoch": 37.55, + "learning_rate": 3.123100875432886e-05, + "loss": 2.1663, + "step": 7579500 + }, + { + "epoch": 37.55, + "learning_rate": 3.1229770167902776e-05, + "loss": 2.1883, + "step": 7580000 + }, + { + "epoch": 37.56, + "learning_rate": 3.122853158147669e-05, + "loss": 2.1795, + "step": 7580500 + }, + { + "epoch": 37.56, + "learning_rate": 3.122729299505061e-05, + "loss": 2.1715, + "step": 7581000 + }, + { + "epoch": 37.56, + "learning_rate": 3.1226054408624526e-05, + "loss": 2.1624, + "step": 7581500 + }, + { + "epoch": 37.56, + "learning_rate": 3.122481582219844e-05, + "loss": 2.1937, + "step": 7582000 + }, + { + "epoch": 37.57, + "learning_rate": 3.122357723577236e-05, + "loss": 2.1976, + "step": 7582500 + }, + { + "epoch": 37.57, + "learning_rate": 3.122233864934628e-05, + "loss": 2.1896, + "step": 7583000 + }, + { + "epoch": 37.57, + "learning_rate": 3.1221100062920194e-05, + "loss": 2.1723, + "step": 7583500 + }, + { + "epoch": 37.57, + "learning_rate": 3.1219861476494104e-05, + "loss": 2.171, + "step": 7584000 + }, + { + "epoch": 37.58, + "learning_rate": 3.121862289006802e-05, + "loss": 2.1689, + "step": 7584500 + }, + { + "epoch": 37.58, + "learning_rate": 3.121738430364194e-05, + "loss": 2.1642, + "step": 7585000 + }, + { + "epoch": 37.58, + "learning_rate": 3.1216148194388714e-05, + "loss": 2.1587, + "step": 7585500 + }, + { + "epoch": 37.58, + "learning_rate": 3.121490960796263e-05, + "loss": 2.1621, + "step": 7586000 + }, + { + "epoch": 37.59, + "learning_rate": 3.121367102153655e-05, + "loss": 2.2021, + "step": 7586500 + }, + { + "epoch": 37.59, + "learning_rate": 3.1212432435110465e-05, + "loss": 2.1851, + "step": 7587000 + }, + { + "epoch": 37.59, + "learning_rate": 3.1211193848684375e-05, + "loss": 2.1986, + "step": 7587500 + }, + { + "epoch": 37.59, + "learning_rate": 3.120995526225829e-05, + "loss": 2.1908, + "step": 7588000 + }, + { + "epoch": 37.6, + "learning_rate": 3.120871667583221e-05, + "loss": 2.1592, + "step": 7588500 + }, + { + "epoch": 37.6, + "learning_rate": 3.1207478089406125e-05, + "loss": 2.1781, + "step": 7589000 + }, + { + "epoch": 37.6, + "learning_rate": 3.1206241980152894e-05, + "loss": 2.1659, + "step": 7589500 + }, + { + "epoch": 37.6, + "learning_rate": 3.120500339372681e-05, + "loss": 2.1833, + "step": 7590000 + }, + { + "epoch": 37.61, + "learning_rate": 3.120376480730072e-05, + "loss": 2.1932, + "step": 7590500 + }, + { + "epoch": 37.61, + "learning_rate": 3.12025286980475e-05, + "loss": 2.1681, + "step": 7591000 + }, + { + "epoch": 37.61, + "learning_rate": 3.1201290111621414e-05, + "loss": 2.1914, + "step": 7591500 + }, + { + "epoch": 37.61, + "learning_rate": 3.120005152519533e-05, + "loss": 2.1588, + "step": 7592000 + }, + { + "epoch": 37.62, + "learning_rate": 3.119881541594209e-05, + "loss": 2.1643, + "step": 7592500 + }, + { + "epoch": 37.62, + "learning_rate": 3.119757682951601e-05, + "loss": 2.1474, + "step": 7593000 + }, + { + "epoch": 37.62, + "learning_rate": 3.119633824308993e-05, + "loss": 2.1326, + "step": 7593500 + }, + { + "epoch": 37.62, + "learning_rate": 3.1195099656663844e-05, + "loss": 2.1581, + "step": 7594000 + }, + { + "epoch": 37.63, + "learning_rate": 3.119386107023776e-05, + "loss": 2.1565, + "step": 7594500 + }, + { + "epoch": 37.63, + "learning_rate": 3.119262496098453e-05, + "loss": 2.158, + "step": 7595000 + }, + { + "epoch": 37.63, + "learning_rate": 3.1191386374558446e-05, + "loss": 2.1375, + "step": 7595500 + }, + { + "epoch": 37.63, + "learning_rate": 3.119014778813236e-05, + "loss": 2.1917, + "step": 7596000 + }, + { + "epoch": 37.64, + "learning_rate": 3.118890920170628e-05, + "loss": 2.1699, + "step": 7596500 + }, + { + "epoch": 37.64, + "learning_rate": 3.11876706152802e-05, + "loss": 2.178, + "step": 7597000 + }, + { + "epoch": 37.64, + "learning_rate": 3.1186434506026966e-05, + "loss": 2.1686, + "step": 7597500 + }, + { + "epoch": 37.64, + "learning_rate": 3.1185195919600876e-05, + "loss": 2.1555, + "step": 7598000 + }, + { + "epoch": 37.65, + "learning_rate": 3.118395733317479e-05, + "loss": 2.1557, + "step": 7598500 + }, + { + "epoch": 37.65, + "learning_rate": 3.118271874674871e-05, + "loss": 2.175, + "step": 7599000 + }, + { + "epoch": 37.65, + "learning_rate": 3.118148016032263e-05, + "loss": 2.1777, + "step": 7599500 + }, + { + "epoch": 37.65, + "learning_rate": 3.1180244051069396e-05, + "loss": 2.1642, + "step": 7600000 + }, + { + "epoch": 37.66, + "learning_rate": 3.117900546464331e-05, + "loss": 2.1832, + "step": 7600500 + }, + { + "epoch": 37.66, + "learning_rate": 3.117776687821723e-05, + "loss": 2.1524, + "step": 7601000 + }, + { + "epoch": 37.66, + "learning_rate": 3.1176528291791146e-05, + "loss": 2.175, + "step": 7601500 + }, + { + "epoch": 37.66, + "learning_rate": 3.117528970536506e-05, + "loss": 2.158, + "step": 7602000 + }, + { + "epoch": 37.67, + "learning_rate": 3.117405111893898e-05, + "loss": 2.1643, + "step": 7602500 + }, + { + "epoch": 37.67, + "learning_rate": 3.117281500968575e-05, + "loss": 2.198, + "step": 7603000 + }, + { + "epoch": 37.67, + "learning_rate": 3.1171576423259666e-05, + "loss": 2.1969, + "step": 7603500 + }, + { + "epoch": 37.67, + "learning_rate": 3.117033783683358e-05, + "loss": 2.1576, + "step": 7604000 + }, + { + "epoch": 37.68, + "learning_rate": 3.116909925040749e-05, + "loss": 2.1697, + "step": 7604500 + }, + { + "epoch": 37.68, + "learning_rate": 3.116786066398141e-05, + "loss": 2.1615, + "step": 7605000 + }, + { + "epoch": 37.68, + "learning_rate": 3.116662207755533e-05, + "loss": 2.1891, + "step": 7605500 + }, + { + "epoch": 37.68, + "learning_rate": 3.1165383491129244e-05, + "loss": 2.1839, + "step": 7606000 + }, + { + "epoch": 37.69, + "learning_rate": 3.116414738187601e-05, + "loss": 2.1447, + "step": 7606500 + }, + { + "epoch": 37.69, + "learning_rate": 3.116290879544993e-05, + "loss": 2.178, + "step": 7607000 + }, + { + "epoch": 37.69, + "learning_rate": 3.1161670209023847e-05, + "loss": 2.1625, + "step": 7607500 + }, + { + "epoch": 37.69, + "learning_rate": 3.1160431622597763e-05, + "loss": 2.1604, + "step": 7608000 + }, + { + "epoch": 37.7, + "learning_rate": 3.115919303617168e-05, + "loss": 2.1825, + "step": 7608500 + }, + { + "epoch": 37.7, + "learning_rate": 3.11579544497456e-05, + "loss": 2.1545, + "step": 7609000 + }, + { + "epoch": 37.7, + "learning_rate": 3.1156718340492366e-05, + "loss": 2.1914, + "step": 7609500 + }, + { + "epoch": 37.7, + "learning_rate": 3.115548223123913e-05, + "loss": 2.1585, + "step": 7610000 + }, + { + "epoch": 37.71, + "learning_rate": 3.1154243644813045e-05, + "loss": 2.1652, + "step": 7610500 + }, + { + "epoch": 37.71, + "learning_rate": 3.115300505838696e-05, + "loss": 2.1715, + "step": 7611000 + }, + { + "epoch": 37.71, + "learning_rate": 3.115176647196088e-05, + "loss": 2.1806, + "step": 7611500 + }, + { + "epoch": 37.71, + "learning_rate": 3.1150527885534796e-05, + "loss": 2.1622, + "step": 7612000 + }, + { + "epoch": 37.71, + "learning_rate": 3.114928929910871e-05, + "loss": 2.1724, + "step": 7612500 + }, + { + "epoch": 37.72, + "learning_rate": 3.114805071268263e-05, + "loss": 2.1598, + "step": 7613000 + }, + { + "epoch": 37.72, + "learning_rate": 3.114681212625655e-05, + "loss": 2.1558, + "step": 7613500 + }, + { + "epoch": 37.72, + "learning_rate": 3.1145573539830464e-05, + "loss": 2.1799, + "step": 7614000 + }, + { + "epoch": 37.72, + "learning_rate": 3.114433495340438e-05, + "loss": 2.1604, + "step": 7614500 + }, + { + "epoch": 37.73, + "learning_rate": 3.11430963669783e-05, + "loss": 2.1678, + "step": 7615000 + }, + { + "epoch": 37.73, + "learning_rate": 3.1141857780552214e-05, + "loss": 2.1516, + "step": 7615500 + }, + { + "epoch": 37.73, + "learning_rate": 3.114062167129898e-05, + "loss": 2.1665, + "step": 7616000 + }, + { + "epoch": 37.73, + "learning_rate": 3.11393830848729e-05, + "loss": 2.1556, + "step": 7616500 + }, + { + "epoch": 37.74, + "learning_rate": 3.113814449844682e-05, + "loss": 2.1651, + "step": 7617000 + }, + { + "epoch": 37.74, + "learning_rate": 3.1136905912020734e-05, + "loss": 2.1926, + "step": 7617500 + }, + { + "epoch": 37.74, + "learning_rate": 3.1135669802767496e-05, + "loss": 2.1944, + "step": 7618000 + }, + { + "epoch": 37.74, + "learning_rate": 3.113443121634141e-05, + "loss": 2.1642, + "step": 7618500 + }, + { + "epoch": 37.75, + "learning_rate": 3.113319262991533e-05, + "loss": 2.17, + "step": 7619000 + }, + { + "epoch": 37.75, + "learning_rate": 3.113195404348925e-05, + "loss": 2.168, + "step": 7619500 + }, + { + "epoch": 37.75, + "learning_rate": 3.1130715457063164e-05, + "loss": 2.1791, + "step": 7620000 + }, + { + "epoch": 37.75, + "learning_rate": 3.112947687063708e-05, + "loss": 2.1555, + "step": 7620500 + }, + { + "epoch": 37.76, + "learning_rate": 3.1128238284211e-05, + "loss": 2.1896, + "step": 7621000 + }, + { + "epoch": 37.76, + "learning_rate": 3.1126999697784914e-05, + "loss": 2.184, + "step": 7621500 + }, + { + "epoch": 37.76, + "learning_rate": 3.112576111135883e-05, + "loss": 2.1875, + "step": 7622000 + }, + { + "epoch": 37.76, + "learning_rate": 3.112452252493275e-05, + "loss": 2.1642, + "step": 7622500 + }, + { + "epoch": 37.77, + "learning_rate": 3.1123283938506665e-05, + "loss": 2.1688, + "step": 7623000 + }, + { + "epoch": 37.77, + "learning_rate": 3.1122047829253434e-05, + "loss": 2.1603, + "step": 7623500 + }, + { + "epoch": 37.77, + "learning_rate": 3.112080924282735e-05, + "loss": 2.1725, + "step": 7624000 + }, + { + "epoch": 37.77, + "learning_rate": 3.111957065640127e-05, + "loss": 2.1852, + "step": 7624500 + }, + { + "epoch": 37.78, + "learning_rate": 3.111833206997518e-05, + "loss": 2.1877, + "step": 7625000 + }, + { + "epoch": 37.78, + "learning_rate": 3.111709596072195e-05, + "loss": 2.1658, + "step": 7625500 + }, + { + "epoch": 37.78, + "learning_rate": 3.1115857374295864e-05, + "loss": 2.191, + "step": 7626000 + }, + { + "epoch": 37.78, + "learning_rate": 3.111461878786978e-05, + "loss": 2.1775, + "step": 7626500 + }, + { + "epoch": 37.79, + "learning_rate": 3.11133802014437e-05, + "loss": 2.1898, + "step": 7627000 + }, + { + "epoch": 37.79, + "learning_rate": 3.1112141615017615e-05, + "loss": 2.1753, + "step": 7627500 + }, + { + "epoch": 37.79, + "learning_rate": 3.111090302859153e-05, + "loss": 2.1766, + "step": 7628000 + }, + { + "epoch": 37.79, + "learning_rate": 3.110966444216545e-05, + "loss": 2.153, + "step": 7628500 + }, + { + "epoch": 37.8, + "learning_rate": 3.110842833291222e-05, + "loss": 2.1655, + "step": 7629000 + }, + { + "epoch": 37.8, + "learning_rate": 3.110719222365898e-05, + "loss": 2.1837, + "step": 7629500 + }, + { + "epoch": 37.8, + "learning_rate": 3.1105956114405755e-05, + "loss": 2.1567, + "step": 7630000 + }, + { + "epoch": 37.8, + "learning_rate": 3.110471752797967e-05, + "loss": 2.1809, + "step": 7630500 + }, + { + "epoch": 37.81, + "learning_rate": 3.110347894155359e-05, + "loss": 2.1559, + "step": 7631000 + }, + { + "epoch": 37.81, + "learning_rate": 3.1102240355127506e-05, + "loss": 2.1647, + "step": 7631500 + }, + { + "epoch": 37.81, + "learning_rate": 3.1101001768701416e-05, + "loss": 2.1759, + "step": 7632000 + }, + { + "epoch": 37.81, + "learning_rate": 3.109976318227533e-05, + "loss": 2.1911, + "step": 7632500 + }, + { + "epoch": 37.82, + "learning_rate": 3.109852459584925e-05, + "loss": 2.1902, + "step": 7633000 + }, + { + "epoch": 37.82, + "learning_rate": 3.1097286009423167e-05, + "loss": 2.1445, + "step": 7633500 + }, + { + "epoch": 37.82, + "learning_rate": 3.1096047422997084e-05, + "loss": 2.1595, + "step": 7634000 + }, + { + "epoch": 37.82, + "learning_rate": 3.1094808836571e-05, + "loss": 2.1681, + "step": 7634500 + }, + { + "epoch": 37.83, + "learning_rate": 3.109357520449062e-05, + "loss": 2.1935, + "step": 7635000 + }, + { + "epoch": 37.83, + "learning_rate": 3.109233661806454e-05, + "loss": 2.1732, + "step": 7635500 + }, + { + "epoch": 37.83, + "learning_rate": 3.1091098031638455e-05, + "loss": 2.1912, + "step": 7636000 + }, + { + "epoch": 37.83, + "learning_rate": 3.108985944521237e-05, + "loss": 2.165, + "step": 7636500 + }, + { + "epoch": 37.84, + "learning_rate": 3.108862085878629e-05, + "loss": 2.1409, + "step": 7637000 + }, + { + "epoch": 37.84, + "learning_rate": 3.1087382272360206e-05, + "loss": 2.1897, + "step": 7637500 + }, + { + "epoch": 37.84, + "learning_rate": 3.1086143685934116e-05, + "loss": 2.1517, + "step": 7638000 + }, + { + "epoch": 37.84, + "learning_rate": 3.108490509950803e-05, + "loss": 2.1736, + "step": 7638500 + }, + { + "epoch": 37.85, + "learning_rate": 3.108366651308195e-05, + "loss": 2.1654, + "step": 7639000 + }, + { + "epoch": 37.85, + "learning_rate": 3.108242792665587e-05, + "loss": 2.1516, + "step": 7639500 + }, + { + "epoch": 37.85, + "learning_rate": 3.1081189340229784e-05, + "loss": 2.1426, + "step": 7640000 + }, + { + "epoch": 37.85, + "learning_rate": 3.107995323097655e-05, + "loss": 2.2071, + "step": 7640500 + }, + { + "epoch": 37.86, + "learning_rate": 3.107871464455047e-05, + "loss": 2.1758, + "step": 7641000 + }, + { + "epoch": 37.86, + "learning_rate": 3.107747605812438e-05, + "loss": 2.1858, + "step": 7641500 + }, + { + "epoch": 37.86, + "learning_rate": 3.1076237471698296e-05, + "loss": 2.1854, + "step": 7642000 + }, + { + "epoch": 37.86, + "learning_rate": 3.1074998885272213e-05, + "loss": 2.1974, + "step": 7642500 + }, + { + "epoch": 37.87, + "learning_rate": 3.107376029884613e-05, + "loss": 2.1805, + "step": 7643000 + }, + { + "epoch": 37.87, + "learning_rate": 3.107252171242005e-05, + "loss": 2.1728, + "step": 7643500 + }, + { + "epoch": 37.87, + "learning_rate": 3.1071283125993964e-05, + "loss": 2.1834, + "step": 7644000 + }, + { + "epoch": 37.87, + "learning_rate": 3.107004453956788e-05, + "loss": 2.1629, + "step": 7644500 + }, + { + "epoch": 37.88, + "learning_rate": 3.106880843031465e-05, + "loss": 2.179, + "step": 7645000 + }, + { + "epoch": 37.88, + "learning_rate": 3.106756984388857e-05, + "loss": 2.1791, + "step": 7645500 + }, + { + "epoch": 37.88, + "learning_rate": 3.1066331257462484e-05, + "loss": 2.1528, + "step": 7646000 + }, + { + "epoch": 37.88, + "learning_rate": 3.10650926710364e-05, + "loss": 2.1654, + "step": 7646500 + }, + { + "epoch": 37.89, + "learning_rate": 3.106385408461032e-05, + "loss": 2.1706, + "step": 7647000 + }, + { + "epoch": 37.89, + "learning_rate": 3.1062615498184235e-05, + "loss": 2.1603, + "step": 7647500 + }, + { + "epoch": 37.89, + "learning_rate": 3.106137691175815e-05, + "loss": 2.1772, + "step": 7648000 + }, + { + "epoch": 37.89, + "learning_rate": 3.106013832533207e-05, + "loss": 2.1699, + "step": 7648500 + }, + { + "epoch": 37.9, + "learning_rate": 3.1058899738905985e-05, + "loss": 2.187, + "step": 7649000 + }, + { + "epoch": 37.9, + "learning_rate": 3.105766362965275e-05, + "loss": 2.1842, + "step": 7649500 + }, + { + "epoch": 37.9, + "learning_rate": 3.105642752039952e-05, + "loss": 2.186, + "step": 7650000 + }, + { + "epoch": 37.9, + "learning_rate": 3.105518893397343e-05, + "loss": 2.2046, + "step": 7650500 + }, + { + "epoch": 37.91, + "learning_rate": 3.105395034754735e-05, + "loss": 2.1892, + "step": 7651000 + }, + { + "epoch": 37.91, + "learning_rate": 3.105271176112127e-05, + "loss": 2.1576, + "step": 7651500 + }, + { + "epoch": 37.91, + "learning_rate": 3.1051473174695184e-05, + "loss": 2.1767, + "step": 7652000 + }, + { + "epoch": 37.91, + "learning_rate": 3.10502345882691e-05, + "loss": 2.1738, + "step": 7652500 + }, + { + "epoch": 37.92, + "learning_rate": 3.104899600184302e-05, + "loss": 2.1886, + "step": 7653000 + }, + { + "epoch": 37.92, + "learning_rate": 3.1047757415416935e-05, + "loss": 2.1829, + "step": 7653500 + }, + { + "epoch": 37.92, + "learning_rate": 3.104651882899085e-05, + "loss": 2.145, + "step": 7654000 + }, + { + "epoch": 37.92, + "learning_rate": 3.104528024256477e-05, + "loss": 2.1616, + "step": 7654500 + }, + { + "epoch": 37.93, + "learning_rate": 3.1044041656138685e-05, + "loss": 2.2104, + "step": 7655000 + }, + { + "epoch": 37.93, + "learning_rate": 3.10428030697126e-05, + "loss": 2.167, + "step": 7655500 + }, + { + "epoch": 37.93, + "learning_rate": 3.1041566960459364e-05, + "loss": 2.1776, + "step": 7656000 + }, + { + "epoch": 37.93, + "learning_rate": 3.104032837403328e-05, + "loss": 2.1884, + "step": 7656500 + }, + { + "epoch": 37.94, + "learning_rate": 3.103909226478005e-05, + "loss": 2.1785, + "step": 7657000 + }, + { + "epoch": 37.94, + "learning_rate": 3.103785367835397e-05, + "loss": 2.1838, + "step": 7657500 + }, + { + "epoch": 37.94, + "learning_rate": 3.1036615091927884e-05, + "loss": 2.1869, + "step": 7658000 + }, + { + "epoch": 37.94, + "learning_rate": 3.10353765055018e-05, + "loss": 2.159, + "step": 7658500 + }, + { + "epoch": 37.95, + "learning_rate": 3.103413791907572e-05, + "loss": 2.1483, + "step": 7659000 + }, + { + "epoch": 37.95, + "learning_rate": 3.1032899332649635e-05, + "loss": 2.1951, + "step": 7659500 + }, + { + "epoch": 37.95, + "learning_rate": 3.103166074622355e-05, + "loss": 2.1653, + "step": 7660000 + }, + { + "epoch": 37.95, + "learning_rate": 3.103042215979747e-05, + "loss": 2.163, + "step": 7660500 + }, + { + "epoch": 37.96, + "learning_rate": 3.1029183573371386e-05, + "loss": 2.1676, + "step": 7661000 + }, + { + "epoch": 37.96, + "learning_rate": 3.10279449869453e-05, + "loss": 2.1827, + "step": 7661500 + }, + { + "epoch": 37.96, + "learning_rate": 3.1026708877692065e-05, + "loss": 2.1703, + "step": 7662000 + }, + { + "epoch": 37.96, + "learning_rate": 3.102547276843884e-05, + "loss": 2.2001, + "step": 7662500 + }, + { + "epoch": 37.97, + "learning_rate": 3.102423418201275e-05, + "loss": 2.1585, + "step": 7663000 + }, + { + "epoch": 37.97, + "learning_rate": 3.102299559558667e-05, + "loss": 2.1608, + "step": 7663500 + }, + { + "epoch": 37.97, + "learning_rate": 3.1021757009160584e-05, + "loss": 2.182, + "step": 7664000 + }, + { + "epoch": 37.97, + "learning_rate": 3.10205184227345e-05, + "loss": 2.1963, + "step": 7664500 + }, + { + "epoch": 37.98, + "learning_rate": 3.101927983630842e-05, + "loss": 2.1576, + "step": 7665000 + }, + { + "epoch": 37.98, + "learning_rate": 3.1018041249882335e-05, + "loss": 2.1685, + "step": 7665500 + }, + { + "epoch": 37.98, + "learning_rate": 3.101680266345625e-05, + "loss": 2.1808, + "step": 7666000 + }, + { + "epoch": 37.98, + "learning_rate": 3.101556407703017e-05, + "loss": 2.1778, + "step": 7666500 + }, + { + "epoch": 37.98, + "learning_rate": 3.101432796777694e-05, + "loss": 2.1627, + "step": 7667000 + }, + { + "epoch": 37.99, + "learning_rate": 3.1013089381350855e-05, + "loss": 2.1722, + "step": 7667500 + }, + { + "epoch": 37.99, + "learning_rate": 3.101185079492477e-05, + "loss": 2.1858, + "step": 7668000 + }, + { + "epoch": 37.99, + "learning_rate": 3.101061220849868e-05, + "loss": 2.2135, + "step": 7668500 + }, + { + "epoch": 37.99, + "learning_rate": 3.100937609924546e-05, + "loss": 2.1998, + "step": 7669000 + }, + { + "epoch": 38.0, + "learning_rate": 3.100813751281937e-05, + "loss": 2.1548, + "step": 7669500 + }, + { + "epoch": 38.0, + "learning_rate": 3.1006898926393284e-05, + "loss": 2.1506, + "step": 7670000 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.6640129221845454, + "eval_accuracy_mlm": 0.6211758980997927, + "eval_accuracy_nsp": 0.8663432159680576, + "eval_loss": 2.2940046787261963, + "eval_runtime": 146.024, + "eval_samples_per_second": 1746.007, + "eval_steps_per_second": 72.755, + "step": 7670034 + }, + { + "epoch": 38.0, + "learning_rate": 3.10056603399672e-05, + "loss": 2.1341, + "step": 7670500 + }, + { + "epoch": 38.0, + "learning_rate": 3.100442175354112e-05, + "loss": 2.1179, + "step": 7671000 + }, + { + "epoch": 38.01, + "learning_rate": 3.1003183167115035e-05, + "loss": 2.1475, + "step": 7671500 + }, + { + "epoch": 38.01, + "learning_rate": 3.100194458068895e-05, + "loss": 2.1367, + "step": 7672000 + }, + { + "epoch": 38.01, + "learning_rate": 3.100070599426287e-05, + "loss": 2.152, + "step": 7672500 + }, + { + "epoch": 38.01, + "learning_rate": 3.0999467407836786e-05, + "loss": 2.1322, + "step": 7673000 + }, + { + "epoch": 38.02, + "learning_rate": 3.09982288214107e-05, + "loss": 2.1336, + "step": 7673500 + }, + { + "epoch": 38.02, + "learning_rate": 3.099699023498462e-05, + "loss": 2.1332, + "step": 7674000 + }, + { + "epoch": 38.02, + "learning_rate": 3.099575164855854e-05, + "loss": 2.1682, + "step": 7674500 + }, + { + "epoch": 38.02, + "learning_rate": 3.0994515539305305e-05, + "loss": 2.1566, + "step": 7675000 + }, + { + "epoch": 38.03, + "learning_rate": 3.099327943005207e-05, + "loss": 2.1353, + "step": 7675500 + }, + { + "epoch": 38.03, + "learning_rate": 3.099204332079884e-05, + "loss": 2.1698, + "step": 7676000 + }, + { + "epoch": 38.03, + "learning_rate": 3.099080473437276e-05, + "loss": 2.1583, + "step": 7676500 + }, + { + "epoch": 38.03, + "learning_rate": 3.098956614794668e-05, + "loss": 2.1478, + "step": 7677000 + }, + { + "epoch": 38.04, + "learning_rate": 3.0988327561520594e-05, + "loss": 2.1403, + "step": 7677500 + }, + { + "epoch": 38.04, + "learning_rate": 3.098708897509451e-05, + "loss": 2.1576, + "step": 7678000 + }, + { + "epoch": 38.04, + "learning_rate": 3.098585038866842e-05, + "loss": 2.1442, + "step": 7678500 + }, + { + "epoch": 38.04, + "learning_rate": 3.098461180224234e-05, + "loss": 2.1502, + "step": 7679000 + }, + { + "epoch": 38.05, + "learning_rate": 3.0983373215816255e-05, + "loss": 2.1695, + "step": 7679500 + }, + { + "epoch": 38.05, + "learning_rate": 3.098213462939017e-05, + "loss": 2.1417, + "step": 7680000 + }, + { + "epoch": 38.05, + "learning_rate": 3.098089604296409e-05, + "loss": 2.1439, + "step": 7680500 + }, + { + "epoch": 38.05, + "learning_rate": 3.0979657456538006e-05, + "loss": 2.1506, + "step": 7681000 + }, + { + "epoch": 38.06, + "learning_rate": 3.097841887011192e-05, + "loss": 2.107, + "step": 7681500 + }, + { + "epoch": 38.06, + "learning_rate": 3.0977182760858685e-05, + "loss": 2.1619, + "step": 7682000 + }, + { + "epoch": 38.06, + "learning_rate": 3.097594665160546e-05, + "loss": 2.1356, + "step": 7682500 + }, + { + "epoch": 38.06, + "learning_rate": 3.097470806517938e-05, + "loss": 2.1428, + "step": 7683000 + }, + { + "epoch": 38.07, + "learning_rate": 3.0973469478753294e-05, + "loss": 2.1517, + "step": 7683500 + }, + { + "epoch": 38.07, + "learning_rate": 3.097223089232721e-05, + "loss": 2.1489, + "step": 7684000 + }, + { + "epoch": 38.07, + "learning_rate": 3.097099478307397e-05, + "loss": 2.1292, + "step": 7684500 + }, + { + "epoch": 38.07, + "learning_rate": 3.096975619664789e-05, + "loss": 2.1504, + "step": 7685000 + }, + { + "epoch": 38.08, + "learning_rate": 3.096851761022181e-05, + "loss": 2.1655, + "step": 7685500 + }, + { + "epoch": 38.08, + "learning_rate": 3.0967281500968576e-05, + "loss": 2.1519, + "step": 7686000 + }, + { + "epoch": 38.08, + "learning_rate": 3.096604291454249e-05, + "loss": 2.1419, + "step": 7686500 + }, + { + "epoch": 38.08, + "learning_rate": 3.096480432811641e-05, + "loss": 2.1692, + "step": 7687000 + }, + { + "epoch": 38.09, + "learning_rate": 3.0963565741690326e-05, + "loss": 2.1659, + "step": 7687500 + }, + { + "epoch": 38.09, + "learning_rate": 3.096232715526424e-05, + "loss": 2.1339, + "step": 7688000 + }, + { + "epoch": 38.09, + "learning_rate": 3.096108856883816e-05, + "loss": 2.1517, + "step": 7688500 + }, + { + "epoch": 38.09, + "learning_rate": 3.095985245958493e-05, + "loss": 2.1371, + "step": 7689000 + }, + { + "epoch": 38.1, + "learning_rate": 3.095861635033169e-05, + "loss": 2.1456, + "step": 7689500 + }, + { + "epoch": 38.1, + "learning_rate": 3.095737776390561e-05, + "loss": 2.1537, + "step": 7690000 + }, + { + "epoch": 38.1, + "learning_rate": 3.0956139177479525e-05, + "loss": 2.167, + "step": 7690500 + }, + { + "epoch": 38.1, + "learning_rate": 3.095490059105344e-05, + "loss": 2.1423, + "step": 7691000 + }, + { + "epoch": 38.11, + "learning_rate": 3.095366200462736e-05, + "loss": 2.1428, + "step": 7691500 + }, + { + "epoch": 38.11, + "learning_rate": 3.095242589537413e-05, + "loss": 2.1569, + "step": 7692000 + }, + { + "epoch": 38.11, + "learning_rate": 3.0951187308948045e-05, + "loss": 2.1359, + "step": 7692500 + }, + { + "epoch": 38.11, + "learning_rate": 3.094994872252196e-05, + "loss": 2.1448, + "step": 7693000 + }, + { + "epoch": 38.12, + "learning_rate": 3.094871013609588e-05, + "loss": 2.1413, + "step": 7693500 + }, + { + "epoch": 38.12, + "learning_rate": 3.0947471549669795e-05, + "loss": 2.1571, + "step": 7694000 + }, + { + "epoch": 38.12, + "learning_rate": 3.094623296324371e-05, + "loss": 2.1341, + "step": 7694500 + }, + { + "epoch": 38.12, + "learning_rate": 3.094499437681763e-05, + "loss": 2.1578, + "step": 7695000 + }, + { + "epoch": 38.13, + "learning_rate": 3.0943755790391546e-05, + "loss": 2.1347, + "step": 7695500 + }, + { + "epoch": 38.13, + "learning_rate": 3.094251720396546e-05, + "loss": 2.1585, + "step": 7696000 + }, + { + "epoch": 38.13, + "learning_rate": 3.094127861753937e-05, + "loss": 2.132, + "step": 7696500 + }, + { + "epoch": 38.13, + "learning_rate": 3.094004003111329e-05, + "loss": 2.1421, + "step": 7697000 + }, + { + "epoch": 38.14, + "learning_rate": 3.093880144468721e-05, + "loss": 2.1641, + "step": 7697500 + }, + { + "epoch": 38.14, + "learning_rate": 3.0937562858261124e-05, + "loss": 2.1336, + "step": 7698000 + }, + { + "epoch": 38.14, + "learning_rate": 3.093632427183504e-05, + "loss": 2.1314, + "step": 7698500 + }, + { + "epoch": 38.14, + "learning_rate": 3.093508816258181e-05, + "loss": 2.1504, + "step": 7699000 + }, + { + "epoch": 38.15, + "learning_rate": 3.093385205332858e-05, + "loss": 2.1328, + "step": 7699500 + }, + { + "epoch": 38.15, + "learning_rate": 3.0932613466902495e-05, + "loss": 2.1288, + "step": 7700000 + }, + { + "epoch": 38.15, + "learning_rate": 3.093137488047641e-05, + "loss": 2.1542, + "step": 7700500 + }, + { + "epoch": 38.15, + "learning_rate": 3.093013629405033e-05, + "loss": 2.1708, + "step": 7701000 + }, + { + "epoch": 38.16, + "learning_rate": 3.0928897707624246e-05, + "loss": 2.1498, + "step": 7701500 + }, + { + "epoch": 38.16, + "learning_rate": 3.092765912119816e-05, + "loss": 2.1544, + "step": 7702000 + }, + { + "epoch": 38.16, + "learning_rate": 3.0926423011944925e-05, + "loss": 2.1696, + "step": 7702500 + }, + { + "epoch": 38.16, + "learning_rate": 3.09251869026917e-05, + "loss": 2.1817, + "step": 7703000 + }, + { + "epoch": 38.17, + "learning_rate": 3.092394831626562e-05, + "loss": 2.1451, + "step": 7703500 + }, + { + "epoch": 38.17, + "learning_rate": 3.0922709729839535e-05, + "loss": 2.1684, + "step": 7704000 + }, + { + "epoch": 38.17, + "learning_rate": 3.0921471143413445e-05, + "loss": 2.1609, + "step": 7704500 + }, + { + "epoch": 38.17, + "learning_rate": 3.092023255698736e-05, + "loss": 2.17, + "step": 7705000 + }, + { + "epoch": 38.18, + "learning_rate": 3.091899397056128e-05, + "loss": 2.1697, + "step": 7705500 + }, + { + "epoch": 38.18, + "learning_rate": 3.09177603384809e-05, + "loss": 2.1148, + "step": 7706000 + }, + { + "epoch": 38.18, + "learning_rate": 3.0916521752054816e-05, + "loss": 2.1489, + "step": 7706500 + }, + { + "epoch": 38.18, + "learning_rate": 3.091528316562873e-05, + "loss": 2.1828, + "step": 7707000 + }, + { + "epoch": 38.19, + "learning_rate": 3.091404457920265e-05, + "loss": 2.1395, + "step": 7707500 + }, + { + "epoch": 38.19, + "learning_rate": 3.091280599277657e-05, + "loss": 2.1432, + "step": 7708000 + }, + { + "epoch": 38.19, + "learning_rate": 3.0911567406350484e-05, + "loss": 2.1308, + "step": 7708500 + }, + { + "epoch": 38.19, + "learning_rate": 3.09103288199244e-05, + "loss": 2.1493, + "step": 7709000 + }, + { + "epoch": 38.2, + "learning_rate": 3.090909271067117e-05, + "loss": 2.1679, + "step": 7709500 + }, + { + "epoch": 38.2, + "learning_rate": 3.090785412424509e-05, + "loss": 2.1403, + "step": 7710000 + }, + { + "epoch": 38.2, + "learning_rate": 3.0906615537819004e-05, + "loss": 2.1312, + "step": 7710500 + }, + { + "epoch": 38.2, + "learning_rate": 3.0905376951392914e-05, + "loss": 2.1532, + "step": 7711000 + }, + { + "epoch": 38.21, + "learning_rate": 3.090413836496683e-05, + "loss": 2.155, + "step": 7711500 + }, + { + "epoch": 38.21, + "learning_rate": 3.090289977854075e-05, + "loss": 2.1598, + "step": 7712000 + }, + { + "epoch": 38.21, + "learning_rate": 3.0901661192114664e-05, + "loss": 2.1614, + "step": 7712500 + }, + { + "epoch": 38.21, + "learning_rate": 3.090042260568858e-05, + "loss": 2.1175, + "step": 7713000 + }, + { + "epoch": 38.22, + "learning_rate": 3.089918401926249e-05, + "loss": 2.1656, + "step": 7713500 + }, + { + "epoch": 38.22, + "learning_rate": 3.089794543283641e-05, + "loss": 2.1467, + "step": 7714000 + }, + { + "epoch": 38.22, + "learning_rate": 3.0896706846410325e-05, + "loss": 2.1399, + "step": 7714500 + }, + { + "epoch": 38.22, + "learning_rate": 3.089546825998424e-05, + "loss": 2.1499, + "step": 7715000 + }, + { + "epoch": 38.23, + "learning_rate": 3.089422967355816e-05, + "loss": 2.1806, + "step": 7715500 + }, + { + "epoch": 38.23, + "learning_rate": 3.0892991087132076e-05, + "loss": 2.1437, + "step": 7716000 + }, + { + "epoch": 38.23, + "learning_rate": 3.089175250070599e-05, + "loss": 2.1507, + "step": 7716500 + }, + { + "epoch": 38.23, + "learning_rate": 3.089051639145276e-05, + "loss": 2.1541, + "step": 7717000 + }, + { + "epoch": 38.24, + "learning_rate": 3.088928028219954e-05, + "loss": 2.1685, + "step": 7717500 + }, + { + "epoch": 38.24, + "learning_rate": 3.088804169577345e-05, + "loss": 2.1489, + "step": 7718000 + }, + { + "epoch": 38.24, + "learning_rate": 3.0886803109347365e-05, + "loss": 2.1454, + "step": 7718500 + }, + { + "epoch": 38.24, + "learning_rate": 3.088556452292128e-05, + "loss": 2.158, + "step": 7719000 + }, + { + "epoch": 38.25, + "learning_rate": 3.08843259364952e-05, + "loss": 2.1734, + "step": 7719500 + }, + { + "epoch": 38.25, + "learning_rate": 3.0883087350069115e-05, + "loss": 2.1772, + "step": 7720000 + }, + { + "epoch": 38.25, + "learning_rate": 3.0881848763643026e-05, + "loss": 2.1496, + "step": 7720500 + }, + { + "epoch": 38.25, + "learning_rate": 3.088061017721694e-05, + "loss": 2.1361, + "step": 7721000 + }, + { + "epoch": 38.25, + "learning_rate": 3.087937159079086e-05, + "loss": 2.1307, + "step": 7721500 + }, + { + "epoch": 38.26, + "learning_rate": 3.087813795871049e-05, + "loss": 2.178, + "step": 7722000 + }, + { + "epoch": 38.26, + "learning_rate": 3.087690184945725e-05, + "loss": 2.1533, + "step": 7722500 + }, + { + "epoch": 38.26, + "learning_rate": 3.0875663263031166e-05, + "loss": 2.1338, + "step": 7723000 + }, + { + "epoch": 38.26, + "learning_rate": 3.087442467660508e-05, + "loss": 2.149, + "step": 7723500 + }, + { + "epoch": 38.27, + "learning_rate": 3.0873186090179e-05, + "loss": 2.1582, + "step": 7724000 + }, + { + "epoch": 38.27, + "learning_rate": 3.0871947503752917e-05, + "loss": 2.1487, + "step": 7724500 + }, + { + "epoch": 38.27, + "learning_rate": 3.0870708917326834e-05, + "loss": 2.1591, + "step": 7725000 + }, + { + "epoch": 38.27, + "learning_rate": 3.086947033090075e-05, + "loss": 2.1655, + "step": 7725500 + }, + { + "epoch": 38.28, + "learning_rate": 3.086823174447467e-05, + "loss": 2.1918, + "step": 7726000 + }, + { + "epoch": 38.28, + "learning_rate": 3.0866993158048584e-05, + "loss": 2.1604, + "step": 7726500 + }, + { + "epoch": 38.28, + "learning_rate": 3.08657545716225e-05, + "loss": 2.1778, + "step": 7727000 + }, + { + "epoch": 38.28, + "learning_rate": 3.086451598519642e-05, + "loss": 2.1669, + "step": 7727500 + }, + { + "epoch": 38.29, + "learning_rate": 3.086327987594319e-05, + "loss": 2.1821, + "step": 7728000 + }, + { + "epoch": 38.29, + "learning_rate": 3.0862041289517104e-05, + "loss": 2.1247, + "step": 7728500 + }, + { + "epoch": 38.29, + "learning_rate": 3.086080270309102e-05, + "loss": 2.1724, + "step": 7729000 + }, + { + "epoch": 38.29, + "learning_rate": 3.085956411666494e-05, + "loss": 2.1415, + "step": 7729500 + }, + { + "epoch": 38.3, + "learning_rate": 3.0858325530238855e-05, + "loss": 2.1568, + "step": 7730000 + }, + { + "epoch": 38.3, + "learning_rate": 3.085708694381277e-05, + "loss": 2.1605, + "step": 7730500 + }, + { + "epoch": 38.3, + "learning_rate": 3.085584835738669e-05, + "loss": 2.1404, + "step": 7731000 + }, + { + "epoch": 38.3, + "learning_rate": 3.08546097709606e-05, + "loss": 2.166, + "step": 7731500 + }, + { + "epoch": 38.31, + "learning_rate": 3.085337366170737e-05, + "loss": 2.1407, + "step": 7732000 + }, + { + "epoch": 38.31, + "learning_rate": 3.0852135075281284e-05, + "loss": 2.151, + "step": 7732500 + }, + { + "epoch": 38.31, + "learning_rate": 3.08508964888552e-05, + "loss": 2.1633, + "step": 7733000 + }, + { + "epoch": 38.31, + "learning_rate": 3.084965790242912e-05, + "loss": 2.1448, + "step": 7733500 + }, + { + "epoch": 38.32, + "learning_rate": 3.0848419316003035e-05, + "loss": 2.1802, + "step": 7734000 + }, + { + "epoch": 38.32, + "learning_rate": 3.084718072957695e-05, + "loss": 2.1672, + "step": 7734500 + }, + { + "epoch": 38.32, + "learning_rate": 3.084594214315087e-05, + "loss": 2.1376, + "step": 7735000 + }, + { + "epoch": 38.32, + "learning_rate": 3.084470355672478e-05, + "loss": 2.1488, + "step": 7735500 + }, + { + "epoch": 38.33, + "learning_rate": 3.0843464970298696e-05, + "loss": 2.1596, + "step": 7736000 + }, + { + "epoch": 38.33, + "learning_rate": 3.084222638387261e-05, + "loss": 2.1616, + "step": 7736500 + }, + { + "epoch": 38.33, + "learning_rate": 3.084099027461939e-05, + "loss": 2.1577, + "step": 7737000 + }, + { + "epoch": 38.33, + "learning_rate": 3.0839751688193306e-05, + "loss": 2.1608, + "step": 7737500 + }, + { + "epoch": 38.34, + "learning_rate": 3.083851557894007e-05, + "loss": 2.1492, + "step": 7738000 + }, + { + "epoch": 38.34, + "learning_rate": 3.0837276992513985e-05, + "loss": 2.1875, + "step": 7738500 + }, + { + "epoch": 38.34, + "learning_rate": 3.08360384060879e-05, + "loss": 2.1617, + "step": 7739000 + }, + { + "epoch": 38.34, + "learning_rate": 3.083479981966182e-05, + "loss": 2.1713, + "step": 7739500 + }, + { + "epoch": 38.35, + "learning_rate": 3.0833561233235735e-05, + "loss": 2.1445, + "step": 7740000 + }, + { + "epoch": 38.35, + "learning_rate": 3.083232264680965e-05, + "loss": 2.1644, + "step": 7740500 + }, + { + "epoch": 38.35, + "learning_rate": 3.083108406038357e-05, + "loss": 2.1407, + "step": 7741000 + }, + { + "epoch": 38.35, + "learning_rate": 3.082984795113034e-05, + "loss": 2.1581, + "step": 7741500 + }, + { + "epoch": 38.36, + "learning_rate": 3.0828609364704255e-05, + "loss": 2.1386, + "step": 7742000 + }, + { + "epoch": 38.36, + "learning_rate": 3.082737077827817e-05, + "loss": 2.1513, + "step": 7742500 + }, + { + "epoch": 38.36, + "learning_rate": 3.082613219185209e-05, + "loss": 2.1501, + "step": 7743000 + }, + { + "epoch": 38.36, + "learning_rate": 3.082489608259885e-05, + "loss": 2.1552, + "step": 7743500 + }, + { + "epoch": 38.37, + "learning_rate": 3.082365749617277e-05, + "loss": 2.1685, + "step": 7744000 + }, + { + "epoch": 38.37, + "learning_rate": 3.0822418909746685e-05, + "loss": 2.1543, + "step": 7744500 + }, + { + "epoch": 38.37, + "learning_rate": 3.08211803233206e-05, + "loss": 2.1393, + "step": 7745000 + }, + { + "epoch": 38.37, + "learning_rate": 3.081994173689452e-05, + "loss": 2.1423, + "step": 7745500 + }, + { + "epoch": 38.38, + "learning_rate": 3.0818703150468435e-05, + "loss": 2.1453, + "step": 7746000 + }, + { + "epoch": 38.38, + "learning_rate": 3.081746456404235e-05, + "loss": 2.1586, + "step": 7746500 + }, + { + "epoch": 38.38, + "learning_rate": 3.081622597761627e-05, + "loss": 2.1634, + "step": 7747000 + }, + { + "epoch": 38.38, + "learning_rate": 3.0814987391190186e-05, + "loss": 2.1584, + "step": 7747500 + }, + { + "epoch": 38.39, + "learning_rate": 3.0813748804764096e-05, + "loss": 2.1648, + "step": 7748000 + }, + { + "epoch": 38.39, + "learning_rate": 3.081251021833801e-05, + "loss": 2.1442, + "step": 7748500 + }, + { + "epoch": 38.39, + "learning_rate": 3.081127163191193e-05, + "loss": 2.1382, + "step": 7749000 + }, + { + "epoch": 38.39, + "learning_rate": 3.081003304548585e-05, + "loss": 2.139, + "step": 7749500 + }, + { + "epoch": 38.4, + "learning_rate": 3.0808794459059764e-05, + "loss": 2.1524, + "step": 7750000 + }, + { + "epoch": 38.4, + "learning_rate": 3.080755587263368e-05, + "loss": 2.1801, + "step": 7750500 + }, + { + "epoch": 38.4, + "learning_rate": 3.080631976338045e-05, + "loss": 2.1802, + "step": 7751000 + }, + { + "epoch": 38.4, + "learning_rate": 3.080508365412722e-05, + "loss": 2.1441, + "step": 7751500 + }, + { + "epoch": 38.41, + "learning_rate": 3.0803845067701136e-05, + "loss": 2.1448, + "step": 7752000 + }, + { + "epoch": 38.41, + "learning_rate": 3.0802608958447904e-05, + "loss": 2.1827, + "step": 7752500 + }, + { + "epoch": 38.41, + "learning_rate": 3.080137037202182e-05, + "loss": 2.1571, + "step": 7753000 + }, + { + "epoch": 38.41, + "learning_rate": 3.080013178559574e-05, + "loss": 2.1433, + "step": 7753500 + }, + { + "epoch": 38.42, + "learning_rate": 3.0798893199169655e-05, + "loss": 2.1795, + "step": 7754000 + }, + { + "epoch": 38.42, + "learning_rate": 3.079765461274357e-05, + "loss": 2.1581, + "step": 7754500 + }, + { + "epoch": 38.42, + "learning_rate": 3.079641602631749e-05, + "loss": 2.1609, + "step": 7755000 + }, + { + "epoch": 38.42, + "learning_rate": 3.0795177439891406e-05, + "loss": 2.1601, + "step": 7755500 + }, + { + "epoch": 38.43, + "learning_rate": 3.079394133063817e-05, + "loss": 2.1734, + "step": 7756000 + }, + { + "epoch": 38.43, + "learning_rate": 3.0792702744212085e-05, + "loss": 2.1364, + "step": 7756500 + }, + { + "epoch": 38.43, + "learning_rate": 3.0791464157786e-05, + "loss": 2.1786, + "step": 7757000 + }, + { + "epoch": 38.43, + "learning_rate": 3.079022557135992e-05, + "loss": 2.174, + "step": 7757500 + }, + { + "epoch": 38.44, + "learning_rate": 3.0788986984933836e-05, + "loss": 2.138, + "step": 7758000 + }, + { + "epoch": 38.44, + "learning_rate": 3.078774839850775e-05, + "loss": 2.1685, + "step": 7758500 + }, + { + "epoch": 38.44, + "learning_rate": 3.078650981208167e-05, + "loss": 2.1682, + "step": 7759000 + }, + { + "epoch": 38.44, + "learning_rate": 3.0785271225655587e-05, + "loss": 2.1384, + "step": 7759500 + }, + { + "epoch": 38.45, + "learning_rate": 3.0784032639229503e-05, + "loss": 2.1512, + "step": 7760000 + }, + { + "epoch": 38.45, + "learning_rate": 3.0782794052803414e-05, + "loss": 2.1479, + "step": 7760500 + }, + { + "epoch": 38.45, + "learning_rate": 3.078155546637733e-05, + "loss": 2.1676, + "step": 7761000 + }, + { + "epoch": 38.45, + "learning_rate": 3.078031687995125e-05, + "loss": 2.1731, + "step": 7761500 + }, + { + "epoch": 38.46, + "learning_rate": 3.0779078293525164e-05, + "loss": 2.1725, + "step": 7762000 + }, + { + "epoch": 38.46, + "learning_rate": 3.077784218427194e-05, + "loss": 2.1648, + "step": 7762500 + }, + { + "epoch": 38.46, + "learning_rate": 3.07766060750187e-05, + "loss": 2.1484, + "step": 7763000 + }, + { + "epoch": 38.46, + "learning_rate": 3.077536748859262e-05, + "loss": 2.1783, + "step": 7763500 + }, + { + "epoch": 38.47, + "learning_rate": 3.0774128902166536e-05, + "loss": 2.1605, + "step": 7764000 + }, + { + "epoch": 38.47, + "learning_rate": 3.077289031574045e-05, + "loss": 2.1725, + "step": 7764500 + }, + { + "epoch": 38.47, + "learning_rate": 3.077165172931437e-05, + "loss": 2.1651, + "step": 7765000 + }, + { + "epoch": 38.47, + "learning_rate": 3.077041562006114e-05, + "loss": 2.1763, + "step": 7765500 + }, + { + "epoch": 38.48, + "learning_rate": 3.0769177033635055e-05, + "loss": 2.146, + "step": 7766000 + }, + { + "epoch": 38.48, + "learning_rate": 3.076793844720897e-05, + "loss": 2.168, + "step": 7766500 + }, + { + "epoch": 38.48, + "learning_rate": 3.076669986078289e-05, + "loss": 2.145, + "step": 7767000 + }, + { + "epoch": 38.48, + "learning_rate": 3.0765461274356806e-05, + "loss": 2.1671, + "step": 7767500 + }, + { + "epoch": 38.49, + "learning_rate": 3.076422268793072e-05, + "loss": 2.1792, + "step": 7768000 + }, + { + "epoch": 38.49, + "learning_rate": 3.076298410150464e-05, + "loss": 2.1563, + "step": 7768500 + }, + { + "epoch": 38.49, + "learning_rate": 3.076174551507856e-05, + "loss": 2.1745, + "step": 7769000 + }, + { + "epoch": 38.49, + "learning_rate": 3.0760506928652474e-05, + "loss": 2.1663, + "step": 7769500 + }, + { + "epoch": 38.5, + "learning_rate": 3.0759270819399236e-05, + "loss": 2.1363, + "step": 7770000 + }, + { + "epoch": 38.5, + "learning_rate": 3.075803223297315e-05, + "loss": 2.1503, + "step": 7770500 + }, + { + "epoch": 38.5, + "learning_rate": 3.075679364654707e-05, + "loss": 2.1608, + "step": 7771000 + }, + { + "epoch": 38.5, + "learning_rate": 3.075555506012099e-05, + "loss": 2.1483, + "step": 7771500 + }, + { + "epoch": 38.51, + "learning_rate": 3.0754316473694904e-05, + "loss": 2.1652, + "step": 7772000 + }, + { + "epoch": 38.51, + "learning_rate": 3.075307788726882e-05, + "loss": 2.148, + "step": 7772500 + }, + { + "epoch": 38.51, + "learning_rate": 3.075183930084273e-05, + "loss": 2.1688, + "step": 7773000 + }, + { + "epoch": 38.51, + "learning_rate": 3.0750603191589506e-05, + "loss": 2.1357, + "step": 7773500 + }, + { + "epoch": 38.52, + "learning_rate": 3.074936460516342e-05, + "loss": 2.1664, + "step": 7774000 + }, + { + "epoch": 38.52, + "learning_rate": 3.074812849591019e-05, + "loss": 2.1626, + "step": 7774500 + }, + { + "epoch": 38.52, + "learning_rate": 3.074688990948411e-05, + "loss": 2.15, + "step": 7775000 + }, + { + "epoch": 38.52, + "learning_rate": 3.074565132305802e-05, + "loss": 2.1828, + "step": 7775500 + }, + { + "epoch": 38.52, + "learning_rate": 3.0744412736631936e-05, + "loss": 2.1476, + "step": 7776000 + }, + { + "epoch": 38.53, + "learning_rate": 3.0743176627378705e-05, + "loss": 2.1792, + "step": 7776500 + }, + { + "epoch": 38.53, + "learning_rate": 3.074193804095262e-05, + "loss": 2.179, + "step": 7777000 + }, + { + "epoch": 38.53, + "learning_rate": 3.074069945452654e-05, + "loss": 2.1365, + "step": 7777500 + }, + { + "epoch": 38.53, + "learning_rate": 3.0739460868100456e-05, + "loss": 2.1565, + "step": 7778000 + }, + { + "epoch": 38.54, + "learning_rate": 3.073822228167437e-05, + "loss": 2.1655, + "step": 7778500 + }, + { + "epoch": 38.54, + "learning_rate": 3.073698369524829e-05, + "loss": 2.1732, + "step": 7779000 + }, + { + "epoch": 38.54, + "learning_rate": 3.0735745108822206e-05, + "loss": 2.1556, + "step": 7779500 + }, + { + "epoch": 38.54, + "learning_rate": 3.0734506522396123e-05, + "loss": 2.1604, + "step": 7780000 + }, + { + "epoch": 38.55, + "learning_rate": 3.073326793597004e-05, + "loss": 2.1461, + "step": 7780500 + }, + { + "epoch": 38.55, + "learning_rate": 3.073203182671681e-05, + "loss": 2.1542, + "step": 7781000 + }, + { + "epoch": 38.55, + "learning_rate": 3.0730793240290726e-05, + "loss": 2.164, + "step": 7781500 + }, + { + "epoch": 38.55, + "learning_rate": 3.0729554653864636e-05, + "loss": 2.1792, + "step": 7782000 + }, + { + "epoch": 38.56, + "learning_rate": 3.0728321021784264e-05, + "loss": 2.1498, + "step": 7782500 + }, + { + "epoch": 38.56, + "learning_rate": 3.0727082435358174e-05, + "loss": 2.1584, + "step": 7783000 + }, + { + "epoch": 38.56, + "learning_rate": 3.072584384893209e-05, + "loss": 2.1797, + "step": 7783500 + }, + { + "epoch": 38.56, + "learning_rate": 3.072460526250601e-05, + "loss": 2.1565, + "step": 7784000 + }, + { + "epoch": 38.57, + "learning_rate": 3.0723366676079925e-05, + "loss": 2.1586, + "step": 7784500 + }, + { + "epoch": 38.57, + "learning_rate": 3.072212808965384e-05, + "loss": 2.1527, + "step": 7785000 + }, + { + "epoch": 38.57, + "learning_rate": 3.072088950322776e-05, + "loss": 2.1582, + "step": 7785500 + }, + { + "epoch": 38.57, + "learning_rate": 3.0719650916801675e-05, + "loss": 2.1819, + "step": 7786000 + }, + { + "epoch": 38.58, + "learning_rate": 3.071841233037559e-05, + "loss": 2.1677, + "step": 7786500 + }, + { + "epoch": 38.58, + "learning_rate": 3.071717374394951e-05, + "loss": 2.1631, + "step": 7787000 + }, + { + "epoch": 38.58, + "learning_rate": 3.0715935157523426e-05, + "loss": 2.153, + "step": 7787500 + }, + { + "epoch": 38.58, + "learning_rate": 3.071469657109734e-05, + "loss": 2.1566, + "step": 7788000 + }, + { + "epoch": 38.59, + "learning_rate": 3.071345798467126e-05, + "loss": 2.1773, + "step": 7788500 + }, + { + "epoch": 38.59, + "learning_rate": 3.071221939824517e-05, + "loss": 2.1527, + "step": 7789000 + }, + { + "epoch": 38.59, + "learning_rate": 3.071098081181909e-05, + "loss": 2.1608, + "step": 7789500 + }, + { + "epoch": 38.59, + "learning_rate": 3.0709744702565856e-05, + "loss": 2.1594, + "step": 7790000 + }, + { + "epoch": 38.6, + "learning_rate": 3.070850611613977e-05, + "loss": 2.1544, + "step": 7790500 + }, + { + "epoch": 38.6, + "learning_rate": 3.070726752971369e-05, + "loss": 2.1647, + "step": 7791000 + }, + { + "epoch": 38.6, + "learning_rate": 3.070603142046046e-05, + "loss": 2.146, + "step": 7791500 + }, + { + "epoch": 38.6, + "learning_rate": 3.0704792834034376e-05, + "loss": 2.1485, + "step": 7792000 + }, + { + "epoch": 38.61, + "learning_rate": 3.070355424760829e-05, + "loss": 2.1703, + "step": 7792500 + }, + { + "epoch": 38.61, + "learning_rate": 3.070231566118221e-05, + "loss": 2.1722, + "step": 7793000 + }, + { + "epoch": 38.61, + "learning_rate": 3.0701077074756126e-05, + "loss": 2.1854, + "step": 7793500 + }, + { + "epoch": 38.61, + "learning_rate": 3.069983848833004e-05, + "loss": 2.1533, + "step": 7794000 + }, + { + "epoch": 38.62, + "learning_rate": 3.069859990190396e-05, + "loss": 2.141, + "step": 7794500 + }, + { + "epoch": 38.62, + "learning_rate": 3.069736131547788e-05, + "loss": 2.1703, + "step": 7795000 + }, + { + "epoch": 38.62, + "learning_rate": 3.069612520622464e-05, + "loss": 2.1582, + "step": 7795500 + }, + { + "epoch": 38.62, + "learning_rate": 3.0694886619798556e-05, + "loss": 2.1393, + "step": 7796000 + }, + { + "epoch": 38.63, + "learning_rate": 3.0693650510545325e-05, + "loss": 2.1568, + "step": 7796500 + }, + { + "epoch": 38.63, + "learning_rate": 3.069241192411924e-05, + "loss": 2.1373, + "step": 7797000 + }, + { + "epoch": 38.63, + "learning_rate": 3.069117333769316e-05, + "loss": 2.1609, + "step": 7797500 + }, + { + "epoch": 38.63, + "learning_rate": 3.0689934751267076e-05, + "loss": 2.1931, + "step": 7798000 + }, + { + "epoch": 38.64, + "learning_rate": 3.068869616484099e-05, + "loss": 2.1741, + "step": 7798500 + }, + { + "epoch": 38.64, + "learning_rate": 3.068745757841491e-05, + "loss": 2.1588, + "step": 7799000 + }, + { + "epoch": 38.64, + "learning_rate": 3.068622146916167e-05, + "loss": 2.1735, + "step": 7799500 + }, + { + "epoch": 38.64, + "learning_rate": 3.068498288273559e-05, + "loss": 2.1634, + "step": 7800000 + }, + { + "epoch": 38.65, + "learning_rate": 3.0683744296309505e-05, + "loss": 2.1464, + "step": 7800500 + }, + { + "epoch": 38.65, + "learning_rate": 3.068250570988342e-05, + "loss": 2.1824, + "step": 7801000 + }, + { + "epoch": 38.65, + "learning_rate": 3.068126712345734e-05, + "loss": 2.1648, + "step": 7801500 + }, + { + "epoch": 38.65, + "learning_rate": 3.0680028537031256e-05, + "loss": 2.1464, + "step": 7802000 + }, + { + "epoch": 38.66, + "learning_rate": 3.067878995060517e-05, + "loss": 2.1653, + "step": 7802500 + }, + { + "epoch": 38.66, + "learning_rate": 3.067755136417909e-05, + "loss": 2.1694, + "step": 7803000 + }, + { + "epoch": 38.66, + "learning_rate": 3.067631277775301e-05, + "loss": 2.1317, + "step": 7803500 + }, + { + "epoch": 38.66, + "learning_rate": 3.0675076668499776e-05, + "loss": 2.166, + "step": 7804000 + }, + { + "epoch": 38.67, + "learning_rate": 3.067383808207369e-05, + "loss": 2.1336, + "step": 7804500 + }, + { + "epoch": 38.67, + "learning_rate": 3.067259949564761e-05, + "loss": 2.1756, + "step": 7805000 + }, + { + "epoch": 38.67, + "learning_rate": 3.0671360909221527e-05, + "loss": 2.1769, + "step": 7805500 + }, + { + "epoch": 38.67, + "learning_rate": 3.0670122322795443e-05, + "loss": 2.168, + "step": 7806000 + }, + { + "epoch": 38.68, + "learning_rate": 3.066888373636936e-05, + "loss": 2.1716, + "step": 7806500 + }, + { + "epoch": 38.68, + "learning_rate": 3.066764762711612e-05, + "loss": 2.1611, + "step": 7807000 + }, + { + "epoch": 38.68, + "learning_rate": 3.066640904069004e-05, + "loss": 2.1616, + "step": 7807500 + }, + { + "epoch": 38.68, + "learning_rate": 3.0665170454263956e-05, + "loss": 2.1478, + "step": 7808000 + }, + { + "epoch": 38.69, + "learning_rate": 3.066393186783787e-05, + "loss": 2.1736, + "step": 7808500 + }, + { + "epoch": 38.69, + "learning_rate": 3.066269328141179e-05, + "loss": 2.1648, + "step": 7809000 + }, + { + "epoch": 38.69, + "learning_rate": 3.066145469498571e-05, + "loss": 2.1752, + "step": 7809500 + }, + { + "epoch": 38.69, + "learning_rate": 3.0660216108559624e-05, + "loss": 2.1323, + "step": 7810000 + }, + { + "epoch": 38.7, + "learning_rate": 3.065897752213354e-05, + "loss": 2.1641, + "step": 7810500 + }, + { + "epoch": 38.7, + "learning_rate": 3.065773893570746e-05, + "loss": 2.2023, + "step": 7811000 + }, + { + "epoch": 38.7, + "learning_rate": 3.0656500349281375e-05, + "loss": 2.1507, + "step": 7811500 + }, + { + "epoch": 38.7, + "learning_rate": 3.065526176285529e-05, + "loss": 2.1431, + "step": 7812000 + }, + { + "epoch": 38.71, + "learning_rate": 3.065402317642921e-05, + "loss": 2.1635, + "step": 7812500 + }, + { + "epoch": 38.71, + "learning_rate": 3.0652784590003126e-05, + "loss": 2.1486, + "step": 7813000 + }, + { + "epoch": 38.71, + "learning_rate": 3.0651548480749894e-05, + "loss": 2.1406, + "step": 7813500 + }, + { + "epoch": 38.71, + "learning_rate": 3.065030989432381e-05, + "loss": 2.1673, + "step": 7814000 + }, + { + "epoch": 38.72, + "learning_rate": 3.064907130789773e-05, + "loss": 2.1746, + "step": 7814500 + }, + { + "epoch": 38.72, + "learning_rate": 3.0647832721471645e-05, + "loss": 2.163, + "step": 7815000 + }, + { + "epoch": 38.72, + "learning_rate": 3.064659661221841e-05, + "loss": 2.1701, + "step": 7815500 + }, + { + "epoch": 38.72, + "learning_rate": 3.0645358025792324e-05, + "loss": 2.1663, + "step": 7816000 + }, + { + "epoch": 38.73, + "learning_rate": 3.064411943936624e-05, + "loss": 2.1682, + "step": 7816500 + }, + { + "epoch": 38.73, + "learning_rate": 3.064288085294016e-05, + "loss": 2.1865, + "step": 7817000 + }, + { + "epoch": 38.73, + "learning_rate": 3.064164474368693e-05, + "loss": 2.1702, + "step": 7817500 + }, + { + "epoch": 38.73, + "learning_rate": 3.0640406157260844e-05, + "loss": 2.1665, + "step": 7818000 + }, + { + "epoch": 38.74, + "learning_rate": 3.063916757083476e-05, + "loss": 2.1762, + "step": 7818500 + }, + { + "epoch": 38.74, + "learning_rate": 3.063792898440868e-05, + "loss": 2.146, + "step": 7819000 + }, + { + "epoch": 38.74, + "learning_rate": 3.0636690397982595e-05, + "loss": 2.1585, + "step": 7819500 + }, + { + "epoch": 38.74, + "learning_rate": 3.063545181155651e-05, + "loss": 2.1604, + "step": 7820000 + }, + { + "epoch": 38.75, + "learning_rate": 3.063421322513043e-05, + "loss": 2.1483, + "step": 7820500 + }, + { + "epoch": 38.75, + "learning_rate": 3.0632974638704345e-05, + "loss": 2.1786, + "step": 7821000 + }, + { + "epoch": 38.75, + "learning_rate": 3.063173605227826e-05, + "loss": 2.1559, + "step": 7821500 + }, + { + "epoch": 38.75, + "learning_rate": 3.063049746585218e-05, + "loss": 2.1845, + "step": 7822000 + }, + { + "epoch": 38.76, + "learning_rate": 3.062925887942609e-05, + "loss": 2.1505, + "step": 7822500 + }, + { + "epoch": 38.76, + "learning_rate": 3.0628020293000006e-05, + "loss": 2.1552, + "step": 7823000 + }, + { + "epoch": 38.76, + "learning_rate": 3.0626784183746775e-05, + "loss": 2.1628, + "step": 7823500 + }, + { + "epoch": 38.76, + "learning_rate": 3.062554559732069e-05, + "loss": 2.154, + "step": 7824000 + }, + { + "epoch": 38.77, + "learning_rate": 3.062430701089461e-05, + "loss": 2.1525, + "step": 7824500 + }, + { + "epoch": 38.77, + "learning_rate": 3.0623068424468526e-05, + "loss": 2.1739, + "step": 7825000 + }, + { + "epoch": 38.77, + "learning_rate": 3.062182983804244e-05, + "loss": 2.1721, + "step": 7825500 + }, + { + "epoch": 38.77, + "learning_rate": 3.062059125161635e-05, + "loss": 2.1654, + "step": 7826000 + }, + { + "epoch": 38.78, + "learning_rate": 3.061935266519027e-05, + "loss": 2.1693, + "step": 7826500 + }, + { + "epoch": 38.78, + "learning_rate": 3.061811407876419e-05, + "loss": 2.1554, + "step": 7827000 + }, + { + "epoch": 38.78, + "learning_rate": 3.061687796951096e-05, + "loss": 2.1482, + "step": 7827500 + }, + { + "epoch": 38.78, + "learning_rate": 3.061563938308488e-05, + "loss": 2.1546, + "step": 7828000 + }, + { + "epoch": 38.79, + "learning_rate": 3.0614400796658796e-05, + "loss": 2.1799, + "step": 7828500 + }, + { + "epoch": 38.79, + "learning_rate": 3.061316221023271e-05, + "loss": 2.1825, + "step": 7829000 + }, + { + "epoch": 38.79, + "learning_rate": 3.061192857815233e-05, + "loss": 2.1486, + "step": 7829500 + }, + { + "epoch": 38.79, + "learning_rate": 3.0610692468899096e-05, + "loss": 2.1515, + "step": 7830000 + }, + { + "epoch": 38.8, + "learning_rate": 3.060945388247301e-05, + "loss": 2.1725, + "step": 7830500 + }, + { + "epoch": 38.8, + "learning_rate": 3.060821529604693e-05, + "loss": 2.1588, + "step": 7831000 + }, + { + "epoch": 38.8, + "learning_rate": 3.060697670962085e-05, + "loss": 2.1636, + "step": 7831500 + }, + { + "epoch": 38.8, + "learning_rate": 3.0605738123194764e-05, + "loss": 2.1585, + "step": 7832000 + }, + { + "epoch": 38.8, + "learning_rate": 3.0604499536768674e-05, + "loss": 2.1466, + "step": 7832500 + }, + { + "epoch": 38.81, + "learning_rate": 3.060326095034259e-05, + "loss": 2.1806, + "step": 7833000 + }, + { + "epoch": 38.81, + "learning_rate": 3.060202236391651e-05, + "loss": 2.1412, + "step": 7833500 + }, + { + "epoch": 38.81, + "learning_rate": 3.0600783777490425e-05, + "loss": 2.1794, + "step": 7834000 + }, + { + "epoch": 38.81, + "learning_rate": 3.059954519106434e-05, + "loss": 2.1508, + "step": 7834500 + }, + { + "epoch": 38.82, + "learning_rate": 3.059830660463826e-05, + "loss": 2.1664, + "step": 7835000 + }, + { + "epoch": 38.82, + "learning_rate": 3.0597068018212175e-05, + "loss": 2.1578, + "step": 7835500 + }, + { + "epoch": 38.82, + "learning_rate": 3.059582943178609e-05, + "loss": 2.1817, + "step": 7836000 + }, + { + "epoch": 38.82, + "learning_rate": 3.059459084536001e-05, + "loss": 2.1615, + "step": 7836500 + }, + { + "epoch": 38.83, + "learning_rate": 3.059335473610678e-05, + "loss": 2.1464, + "step": 7837000 + }, + { + "epoch": 38.83, + "learning_rate": 3.0592116149680695e-05, + "loss": 2.1565, + "step": 7837500 + }, + { + "epoch": 38.83, + "learning_rate": 3.059087756325461e-05, + "loss": 2.1671, + "step": 7838000 + }, + { + "epoch": 38.83, + "learning_rate": 3.058964145400138e-05, + "loss": 2.1436, + "step": 7838500 + }, + { + "epoch": 38.84, + "learning_rate": 3.05884028675753e-05, + "loss": 2.1457, + "step": 7839000 + }, + { + "epoch": 38.84, + "learning_rate": 3.058716428114921e-05, + "loss": 2.1704, + "step": 7839500 + }, + { + "epoch": 38.84, + "learning_rate": 3.0585925694723125e-05, + "loss": 2.1605, + "step": 7840000 + }, + { + "epoch": 38.84, + "learning_rate": 3.058469206264275e-05, + "loss": 2.1886, + "step": 7840500 + }, + { + "epoch": 38.85, + "learning_rate": 3.058345347621667e-05, + "loss": 2.1732, + "step": 7841000 + }, + { + "epoch": 38.85, + "learning_rate": 3.0582214889790586e-05, + "loss": 2.1919, + "step": 7841500 + }, + { + "epoch": 38.85, + "learning_rate": 3.05809763033645e-05, + "loss": 2.1547, + "step": 7842000 + }, + { + "epoch": 38.85, + "learning_rate": 3.057973771693841e-05, + "loss": 2.182, + "step": 7842500 + }, + { + "epoch": 38.86, + "learning_rate": 3.057849913051233e-05, + "loss": 2.1522, + "step": 7843000 + }, + { + "epoch": 38.86, + "learning_rate": 3.057726054408625e-05, + "loss": 2.1648, + "step": 7843500 + }, + { + "epoch": 38.86, + "learning_rate": 3.0576021957660164e-05, + "loss": 2.1581, + "step": 7844000 + }, + { + "epoch": 38.86, + "learning_rate": 3.057478337123408e-05, + "loss": 2.1647, + "step": 7844500 + }, + { + "epoch": 38.87, + "learning_rate": 3.0573544784808e-05, + "loss": 2.1649, + "step": 7845000 + }, + { + "epoch": 38.87, + "learning_rate": 3.0572306198381915e-05, + "loss": 2.149, + "step": 7845500 + }, + { + "epoch": 38.87, + "learning_rate": 3.0571067611955825e-05, + "loss": 2.1535, + "step": 7846000 + }, + { + "epoch": 38.87, + "learning_rate": 3.056982902552974e-05, + "loss": 2.1511, + "step": 7846500 + }, + { + "epoch": 38.88, + "learning_rate": 3.056859043910366e-05, + "loss": 2.1786, + "step": 7847000 + }, + { + "epoch": 38.88, + "learning_rate": 3.0567351852677576e-05, + "loss": 2.1551, + "step": 7847500 + }, + { + "epoch": 38.88, + "learning_rate": 3.056611326625149e-05, + "loss": 2.1863, + "step": 7848000 + }, + { + "epoch": 38.88, + "learning_rate": 3.056487715699826e-05, + "loss": 2.1915, + "step": 7848500 + }, + { + "epoch": 38.89, + "learning_rate": 3.056364104774503e-05, + "loss": 2.163, + "step": 7849000 + }, + { + "epoch": 38.89, + "learning_rate": 3.056240246131895e-05, + "loss": 2.1685, + "step": 7849500 + }, + { + "epoch": 38.89, + "learning_rate": 3.0561166352065716e-05, + "loss": 2.1734, + "step": 7850000 + }, + { + "epoch": 38.89, + "learning_rate": 3.055992776563963e-05, + "loss": 2.1701, + "step": 7850500 + }, + { + "epoch": 38.9, + "learning_rate": 3.055868917921355e-05, + "loss": 2.1444, + "step": 7851000 + }, + { + "epoch": 38.9, + "learning_rate": 3.055745059278747e-05, + "loss": 2.1707, + "step": 7851500 + }, + { + "epoch": 38.9, + "learning_rate": 3.055621200636138e-05, + "loss": 2.1581, + "step": 7852000 + }, + { + "epoch": 38.9, + "learning_rate": 3.0554973419935294e-05, + "loss": 2.1871, + "step": 7852500 + }, + { + "epoch": 38.91, + "learning_rate": 3.055373483350921e-05, + "loss": 2.19, + "step": 7853000 + }, + { + "epoch": 38.91, + "learning_rate": 3.055249624708313e-05, + "loss": 2.1667, + "step": 7853500 + }, + { + "epoch": 38.91, + "learning_rate": 3.0551257660657044e-05, + "loss": 2.1786, + "step": 7854000 + }, + { + "epoch": 38.91, + "learning_rate": 3.055002155140382e-05, + "loss": 2.1762, + "step": 7854500 + }, + { + "epoch": 38.92, + "learning_rate": 3.054878296497773e-05, + "loss": 2.1552, + "step": 7855000 + }, + { + "epoch": 38.92, + "learning_rate": 3.054754437855165e-05, + "loss": 2.1586, + "step": 7855500 + }, + { + "epoch": 38.92, + "learning_rate": 3.0546305792125564e-05, + "loss": 2.1457, + "step": 7856000 + }, + { + "epoch": 38.92, + "learning_rate": 3.054506968287233e-05, + "loss": 2.1658, + "step": 7856500 + }, + { + "epoch": 38.93, + "learning_rate": 3.054383109644625e-05, + "loss": 2.1832, + "step": 7857000 + }, + { + "epoch": 38.93, + "learning_rate": 3.054259251002017e-05, + "loss": 2.1612, + "step": 7857500 + }, + { + "epoch": 38.93, + "learning_rate": 3.054135392359408e-05, + "loss": 2.1709, + "step": 7858000 + }, + { + "epoch": 38.93, + "learning_rate": 3.0540115337167994e-05, + "loss": 2.1588, + "step": 7858500 + }, + { + "epoch": 38.94, + "learning_rate": 3.053887922791477e-05, + "loss": 2.1646, + "step": 7859000 + }, + { + "epoch": 38.94, + "learning_rate": 3.0537640641488686e-05, + "loss": 2.1794, + "step": 7859500 + }, + { + "epoch": 38.94, + "learning_rate": 3.0536404532235455e-05, + "loss": 2.178, + "step": 7860000 + }, + { + "epoch": 38.94, + "learning_rate": 3.0535165945809365e-05, + "loss": 2.1825, + "step": 7860500 + }, + { + "epoch": 38.95, + "learning_rate": 3.053392735938328e-05, + "loss": 2.1545, + "step": 7861000 + }, + { + "epoch": 38.95, + "learning_rate": 3.05326887729572e-05, + "loss": 2.1719, + "step": 7861500 + }, + { + "epoch": 38.95, + "learning_rate": 3.053145266370397e-05, + "loss": 2.1506, + "step": 7862000 + }, + { + "epoch": 38.95, + "learning_rate": 3.0530214077277885e-05, + "loss": 2.1656, + "step": 7862500 + }, + { + "epoch": 38.96, + "learning_rate": 3.05289754908518e-05, + "loss": 2.1701, + "step": 7863000 + }, + { + "epoch": 38.96, + "learning_rate": 3.052773690442572e-05, + "loss": 2.1722, + "step": 7863500 + }, + { + "epoch": 38.96, + "learning_rate": 3.052650079517249e-05, + "loss": 2.1621, + "step": 7864000 + }, + { + "epoch": 38.96, + "learning_rate": 3.0525262208746404e-05, + "loss": 2.1586, + "step": 7864500 + }, + { + "epoch": 38.97, + "learning_rate": 3.052402362232032e-05, + "loss": 2.1747, + "step": 7865000 + }, + { + "epoch": 38.97, + "learning_rate": 3.052278503589424e-05, + "loss": 2.1669, + "step": 7865500 + }, + { + "epoch": 38.97, + "learning_rate": 3.0521546449468155e-05, + "loss": 2.17, + "step": 7866000 + }, + { + "epoch": 38.97, + "learning_rate": 3.052030786304207e-05, + "loss": 2.1544, + "step": 7866500 + }, + { + "epoch": 38.98, + "learning_rate": 3.051906927661599e-05, + "loss": 2.161, + "step": 7867000 + }, + { + "epoch": 38.98, + "learning_rate": 3.05178306901899e-05, + "loss": 2.1717, + "step": 7867500 + }, + { + "epoch": 38.98, + "learning_rate": 3.0516594580936668e-05, + "loss": 2.1653, + "step": 7868000 + }, + { + "epoch": 38.98, + "learning_rate": 3.0515355994510585e-05, + "loss": 2.1722, + "step": 7868500 + }, + { + "epoch": 38.99, + "learning_rate": 3.0514117408084502e-05, + "loss": 2.1728, + "step": 7869000 + }, + { + "epoch": 38.99, + "learning_rate": 3.051287882165842e-05, + "loss": 2.1704, + "step": 7869500 + }, + { + "epoch": 38.99, + "learning_rate": 3.0511640235232336e-05, + "loss": 2.1638, + "step": 7870000 + }, + { + "epoch": 38.99, + "learning_rate": 3.0510401648806253e-05, + "loss": 2.1512, + "step": 7870500 + }, + { + "epoch": 39.0, + "learning_rate": 3.0509165539553018e-05, + "loss": 2.1884, + "step": 7871000 + }, + { + "epoch": 39.0, + "learning_rate": 3.0507926953126935e-05, + "loss": 2.1752, + "step": 7871500 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.6649884989419301, + "eval_accuracy_mlm": 0.6219056881216726, + "eval_accuracy_nsp": 0.867931706666562, + "eval_loss": 2.296628952026367, + "eval_runtime": 146.0071, + "eval_samples_per_second": 1746.209, + "eval_steps_per_second": 72.764, + "step": 7871877 + }, + { + "epoch": 39.0, + "learning_rate": 3.0506688366700852e-05, + "loss": 2.1657, + "step": 7872000 + }, + { + "epoch": 39.0, + "learning_rate": 3.050544978027477e-05, + "loss": 2.1253, + "step": 7872500 + }, + { + "epoch": 39.01, + "learning_rate": 3.0504211193848686e-05, + "loss": 2.1353, + "step": 7873000 + }, + { + "epoch": 39.01, + "learning_rate": 3.0502972607422603e-05, + "loss": 2.1536, + "step": 7873500 + }, + { + "epoch": 39.01, + "learning_rate": 3.0501736498169368e-05, + "loss": 2.1249, + "step": 7874000 + }, + { + "epoch": 39.01, + "learning_rate": 3.0500497911743285e-05, + "loss": 2.1628, + "step": 7874500 + }, + { + "epoch": 39.02, + "learning_rate": 3.0499259325317202e-05, + "loss": 2.1184, + "step": 7875000 + }, + { + "epoch": 39.02, + "learning_rate": 3.049802073889112e-05, + "loss": 2.1196, + "step": 7875500 + }, + { + "epoch": 39.02, + "learning_rate": 3.0496782152465036e-05, + "loss": 2.1508, + "step": 7876000 + }, + { + "epoch": 39.02, + "learning_rate": 3.0495546043211805e-05, + "loss": 2.1501, + "step": 7876500 + }, + { + "epoch": 39.03, + "learning_rate": 3.049430745678572e-05, + "loss": 2.1245, + "step": 7877000 + }, + { + "epoch": 39.03, + "learning_rate": 3.0493068870359635e-05, + "loss": 2.1496, + "step": 7877500 + }, + { + "epoch": 39.03, + "learning_rate": 3.0491830283933552e-05, + "loss": 2.1452, + "step": 7878000 + }, + { + "epoch": 39.03, + "learning_rate": 3.049059169750747e-05, + "loss": 2.1498, + "step": 7878500 + }, + { + "epoch": 39.04, + "learning_rate": 3.0489353111081386e-05, + "loss": 2.1396, + "step": 7879000 + }, + { + "epoch": 39.04, + "learning_rate": 3.0488114524655303e-05, + "loss": 2.1236, + "step": 7879500 + }, + { + "epoch": 39.04, + "learning_rate": 3.048687593822922e-05, + "loss": 2.1515, + "step": 7880000 + }, + { + "epoch": 39.04, + "learning_rate": 3.048563982897599e-05, + "loss": 2.1484, + "step": 7880500 + }, + { + "epoch": 39.05, + "learning_rate": 3.0484401242549902e-05, + "loss": 2.133, + "step": 7881000 + }, + { + "epoch": 39.05, + "learning_rate": 3.048316265612382e-05, + "loss": 2.1314, + "step": 7881500 + }, + { + "epoch": 39.05, + "learning_rate": 3.0481926546870588e-05, + "loss": 2.1429, + "step": 7882000 + }, + { + "epoch": 39.05, + "learning_rate": 3.0480687960444505e-05, + "loss": 2.1548, + "step": 7882500 + }, + { + "epoch": 39.06, + "learning_rate": 3.0479451851191277e-05, + "loss": 2.1394, + "step": 7883000 + }, + { + "epoch": 39.06, + "learning_rate": 3.0478213264765194e-05, + "loss": 2.1301, + "step": 7883500 + }, + { + "epoch": 39.06, + "learning_rate": 3.0476974678339104e-05, + "loss": 2.1422, + "step": 7884000 + }, + { + "epoch": 39.06, + "learning_rate": 3.047573609191302e-05, + "loss": 2.137, + "step": 7884500 + }, + { + "epoch": 39.07, + "learning_rate": 3.0474497505486938e-05, + "loss": 2.1319, + "step": 7885000 + }, + { + "epoch": 39.07, + "learning_rate": 3.0473258919060855e-05, + "loss": 2.1218, + "step": 7885500 + }, + { + "epoch": 39.07, + "learning_rate": 3.0472020332634772e-05, + "loss": 2.1418, + "step": 7886000 + }, + { + "epoch": 39.07, + "learning_rate": 3.047078174620869e-05, + "loss": 2.1179, + "step": 7886500 + }, + { + "epoch": 39.07, + "learning_rate": 3.0469543159782606e-05, + "loss": 2.133, + "step": 7887000 + }, + { + "epoch": 39.08, + "learning_rate": 3.046830457335652e-05, + "loss": 2.1354, + "step": 7887500 + }, + { + "epoch": 39.08, + "learning_rate": 3.0467065986930436e-05, + "loss": 2.1265, + "step": 7888000 + }, + { + "epoch": 39.08, + "learning_rate": 3.0465827400504353e-05, + "loss": 2.137, + "step": 7888500 + }, + { + "epoch": 39.08, + "learning_rate": 3.046458881407827e-05, + "loss": 2.1538, + "step": 7889000 + }, + { + "epoch": 39.09, + "learning_rate": 3.0463350227652187e-05, + "loss": 2.1145, + "step": 7889500 + }, + { + "epoch": 39.09, + "learning_rate": 3.0462111641226104e-05, + "loss": 2.1411, + "step": 7890000 + }, + { + "epoch": 39.09, + "learning_rate": 3.046087305480002e-05, + "loss": 2.1373, + "step": 7890500 + }, + { + "epoch": 39.09, + "learning_rate": 3.0459634468373938e-05, + "loss": 2.1356, + "step": 7891000 + }, + { + "epoch": 39.1, + "learning_rate": 3.0458398359120703e-05, + "loss": 2.1337, + "step": 7891500 + }, + { + "epoch": 39.1, + "learning_rate": 3.045715977269462e-05, + "loss": 2.1247, + "step": 7892000 + }, + { + "epoch": 39.1, + "learning_rate": 3.0455921186268537e-05, + "loss": 2.1473, + "step": 7892500 + }, + { + "epoch": 39.1, + "learning_rate": 3.0454682599842454e-05, + "loss": 2.1322, + "step": 7893000 + }, + { + "epoch": 39.11, + "learning_rate": 3.045344401341637e-05, + "loss": 2.1385, + "step": 7893500 + }, + { + "epoch": 39.11, + "learning_rate": 3.0452205426990288e-05, + "loss": 2.1519, + "step": 7894000 + }, + { + "epoch": 39.11, + "learning_rate": 3.0450966840564205e-05, + "loss": 2.1409, + "step": 7894500 + }, + { + "epoch": 39.11, + "learning_rate": 3.044973073131097e-05, + "loss": 2.1508, + "step": 7895000 + }, + { + "epoch": 39.12, + "learning_rate": 3.0448492144884887e-05, + "loss": 2.1595, + "step": 7895500 + }, + { + "epoch": 39.12, + "learning_rate": 3.0447253558458804e-05, + "loss": 2.1137, + "step": 7896000 + }, + { + "epoch": 39.12, + "learning_rate": 3.044601497203272e-05, + "loss": 2.1519, + "step": 7896500 + }, + { + "epoch": 39.12, + "learning_rate": 3.044477886277949e-05, + "loss": 2.1222, + "step": 7897000 + }, + { + "epoch": 39.13, + "learning_rate": 3.0443540276353407e-05, + "loss": 2.1545, + "step": 7897500 + }, + { + "epoch": 39.13, + "learning_rate": 3.044230168992732e-05, + "loss": 2.132, + "step": 7898000 + }, + { + "epoch": 39.13, + "learning_rate": 3.0441063103501237e-05, + "loss": 2.1354, + "step": 7898500 + }, + { + "epoch": 39.13, + "learning_rate": 3.0439826994248006e-05, + "loss": 2.1435, + "step": 7899000 + }, + { + "epoch": 39.14, + "learning_rate": 3.0438588407821923e-05, + "loss": 2.1475, + "step": 7899500 + }, + { + "epoch": 39.14, + "learning_rate": 3.043734982139584e-05, + "loss": 2.1216, + "step": 7900000 + }, + { + "epoch": 39.14, + "learning_rate": 3.0436113712142605e-05, + "loss": 2.1333, + "step": 7900500 + }, + { + "epoch": 39.14, + "learning_rate": 3.0434875125716522e-05, + "loss": 2.1707, + "step": 7901000 + }, + { + "epoch": 39.15, + "learning_rate": 3.043363653929044e-05, + "loss": 2.1407, + "step": 7901500 + }, + { + "epoch": 39.15, + "learning_rate": 3.0432397952864356e-05, + "loss": 2.1273, + "step": 7902000 + }, + { + "epoch": 39.15, + "learning_rate": 3.0431159366438273e-05, + "loss": 2.1465, + "step": 7902500 + }, + { + "epoch": 39.15, + "learning_rate": 3.042992325718504e-05, + "loss": 2.1403, + "step": 7903000 + }, + { + "epoch": 39.16, + "learning_rate": 3.0428684670758955e-05, + "loss": 2.1304, + "step": 7903500 + }, + { + "epoch": 39.16, + "learning_rate": 3.0427446084332872e-05, + "loss": 2.1517, + "step": 7904000 + }, + { + "epoch": 39.16, + "learning_rate": 3.042620749790679e-05, + "loss": 2.1458, + "step": 7904500 + }, + { + "epoch": 39.16, + "learning_rate": 3.0424968911480706e-05, + "loss": 2.1455, + "step": 7905000 + }, + { + "epoch": 39.17, + "learning_rate": 3.0423730325054623e-05, + "loss": 2.1511, + "step": 7905500 + }, + { + "epoch": 39.17, + "learning_rate": 3.042249173862854e-05, + "loss": 2.1395, + "step": 7906000 + }, + { + "epoch": 39.17, + "learning_rate": 3.0421253152202457e-05, + "loss": 2.1483, + "step": 7906500 + }, + { + "epoch": 39.17, + "learning_rate": 3.0420014565776374e-05, + "loss": 2.1353, + "step": 7907000 + }, + { + "epoch": 39.18, + "learning_rate": 3.041877597935029e-05, + "loss": 2.1086, + "step": 7907500 + }, + { + "epoch": 39.18, + "learning_rate": 3.0417537392924204e-05, + "loss": 2.1319, + "step": 7908000 + }, + { + "epoch": 39.18, + "learning_rate": 3.041629880649812e-05, + "loss": 2.1362, + "step": 7908500 + }, + { + "epoch": 39.18, + "learning_rate": 3.041506269724489e-05, + "loss": 2.1574, + "step": 7909000 + }, + { + "epoch": 39.19, + "learning_rate": 3.0413826587991655e-05, + "loss": 2.149, + "step": 7909500 + }, + { + "epoch": 39.19, + "learning_rate": 3.0412588001565572e-05, + "loss": 2.1484, + "step": 7910000 + }, + { + "epoch": 39.19, + "learning_rate": 3.041134941513949e-05, + "loss": 2.1333, + "step": 7910500 + }, + { + "epoch": 39.19, + "learning_rate": 3.0410110828713406e-05, + "loss": 2.1348, + "step": 7911000 + }, + { + "epoch": 39.2, + "learning_rate": 3.0408872242287323e-05, + "loss": 2.1438, + "step": 7911500 + }, + { + "epoch": 39.2, + "learning_rate": 3.040763365586124e-05, + "loss": 2.1444, + "step": 7912000 + }, + { + "epoch": 39.2, + "learning_rate": 3.0406395069435157e-05, + "loss": 2.1385, + "step": 7912500 + }, + { + "epoch": 39.2, + "learning_rate": 3.0405156483009074e-05, + "loss": 2.1308, + "step": 7913000 + }, + { + "epoch": 39.21, + "learning_rate": 3.040391789658299e-05, + "loss": 2.1406, + "step": 7913500 + }, + { + "epoch": 39.21, + "learning_rate": 3.0402679310156908e-05, + "loss": 2.1446, + "step": 7914000 + }, + { + "epoch": 39.21, + "learning_rate": 3.040144072373082e-05, + "loss": 2.1441, + "step": 7914500 + }, + { + "epoch": 39.21, + "learning_rate": 3.040020461447759e-05, + "loss": 2.1577, + "step": 7915000 + }, + { + "epoch": 39.22, + "learning_rate": 3.0398966028051507e-05, + "loss": 2.1304, + "step": 7915500 + }, + { + "epoch": 39.22, + "learning_rate": 3.0397727441625424e-05, + "loss": 2.1456, + "step": 7916000 + }, + { + "epoch": 39.22, + "learning_rate": 3.039648885519934e-05, + "loss": 2.1595, + "step": 7916500 + }, + { + "epoch": 39.22, + "learning_rate": 3.0395252745946106e-05, + "loss": 2.1297, + "step": 7917000 + }, + { + "epoch": 39.23, + "learning_rate": 3.0394014159520023e-05, + "loss": 2.1346, + "step": 7917500 + }, + { + "epoch": 39.23, + "learning_rate": 3.039277557309394e-05, + "loss": 2.1546, + "step": 7918000 + }, + { + "epoch": 39.23, + "learning_rate": 3.0391536986667857e-05, + "loss": 2.1356, + "step": 7918500 + }, + { + "epoch": 39.23, + "learning_rate": 3.0390298400241774e-05, + "loss": 2.1417, + "step": 7919000 + }, + { + "epoch": 39.24, + "learning_rate": 3.038905981381569e-05, + "loss": 2.1567, + "step": 7919500 + }, + { + "epoch": 39.24, + "learning_rate": 3.0387821227389608e-05, + "loss": 2.1488, + "step": 7920000 + }, + { + "epoch": 39.24, + "learning_rate": 3.0386582640963525e-05, + "loss": 2.1396, + "step": 7920500 + }, + { + "epoch": 39.24, + "learning_rate": 3.0385344054537442e-05, + "loss": 2.1383, + "step": 7921000 + }, + { + "epoch": 39.25, + "learning_rate": 3.0384105468111355e-05, + "loss": 2.099, + "step": 7921500 + }, + { + "epoch": 39.25, + "learning_rate": 3.0382866881685272e-05, + "loss": 2.141, + "step": 7922000 + }, + { + "epoch": 39.25, + "learning_rate": 3.038162829525919e-05, + "loss": 2.1426, + "step": 7922500 + }, + { + "epoch": 39.25, + "learning_rate": 3.0380392186005958e-05, + "loss": 2.1428, + "step": 7923000 + }, + { + "epoch": 39.26, + "learning_rate": 3.0379153599579875e-05, + "loss": 2.1092, + "step": 7923500 + }, + { + "epoch": 39.26, + "learning_rate": 3.037791749032664e-05, + "loss": 2.1557, + "step": 7924000 + }, + { + "epoch": 39.26, + "learning_rate": 3.0376678903900557e-05, + "loss": 2.1542, + "step": 7924500 + }, + { + "epoch": 39.26, + "learning_rate": 3.0375440317474474e-05, + "loss": 2.1493, + "step": 7925000 + }, + { + "epoch": 39.27, + "learning_rate": 3.037420173104839e-05, + "loss": 2.1659, + "step": 7925500 + }, + { + "epoch": 39.27, + "learning_rate": 3.0372965621795157e-05, + "loss": 2.1458, + "step": 7926000 + }, + { + "epoch": 39.27, + "learning_rate": 3.037172951254193e-05, + "loss": 2.1533, + "step": 7926500 + }, + { + "epoch": 39.27, + "learning_rate": 3.0370490926115846e-05, + "loss": 2.1417, + "step": 7927000 + }, + { + "epoch": 39.28, + "learning_rate": 3.036925233968976e-05, + "loss": 2.1492, + "step": 7927500 + }, + { + "epoch": 39.28, + "learning_rate": 3.0368013753263673e-05, + "loss": 2.1322, + "step": 7928000 + }, + { + "epoch": 39.28, + "learning_rate": 3.036677516683759e-05, + "loss": 2.1395, + "step": 7928500 + }, + { + "epoch": 39.28, + "learning_rate": 3.0365536580411507e-05, + "loss": 2.1302, + "step": 7929000 + }, + { + "epoch": 39.29, + "learning_rate": 3.0364297993985424e-05, + "loss": 2.1257, + "step": 7929500 + }, + { + "epoch": 39.29, + "learning_rate": 3.036305940755934e-05, + "loss": 2.1455, + "step": 7930000 + }, + { + "epoch": 39.29, + "learning_rate": 3.0361820821133257e-05, + "loss": 2.1264, + "step": 7930500 + }, + { + "epoch": 39.29, + "learning_rate": 3.0360582234707174e-05, + "loss": 2.1652, + "step": 7931000 + }, + { + "epoch": 39.3, + "learning_rate": 3.035934364828109e-05, + "loss": 2.1703, + "step": 7931500 + }, + { + "epoch": 39.3, + "learning_rate": 3.0358105061855008e-05, + "loss": 2.1462, + "step": 7932000 + }, + { + "epoch": 39.3, + "learning_rate": 3.0356866475428925e-05, + "loss": 2.1323, + "step": 7932500 + }, + { + "epoch": 39.3, + "learning_rate": 3.035563036617569e-05, + "loss": 2.1556, + "step": 7933000 + }, + { + "epoch": 39.31, + "learning_rate": 3.0354391779749607e-05, + "loss": 2.1391, + "step": 7933500 + }, + { + "epoch": 39.31, + "learning_rate": 3.0353155670496376e-05, + "loss": 2.1458, + "step": 7934000 + }, + { + "epoch": 39.31, + "learning_rate": 3.0351917084070293e-05, + "loss": 2.1295, + "step": 7934500 + }, + { + "epoch": 39.31, + "learning_rate": 3.0350678497644207e-05, + "loss": 2.1523, + "step": 7935000 + }, + { + "epoch": 39.32, + "learning_rate": 3.0349439911218124e-05, + "loss": 2.1252, + "step": 7935500 + }, + { + "epoch": 39.32, + "learning_rate": 3.0348203801964896e-05, + "loss": 2.1702, + "step": 7936000 + }, + { + "epoch": 39.32, + "learning_rate": 3.0346965215538813e-05, + "loss": 2.1224, + "step": 7936500 + }, + { + "epoch": 39.32, + "learning_rate": 3.034572910628558e-05, + "loss": 2.1537, + "step": 7937000 + }, + { + "epoch": 39.33, + "learning_rate": 3.03444905198595e-05, + "loss": 2.1324, + "step": 7937500 + }, + { + "epoch": 39.33, + "learning_rate": 3.0343251933433415e-05, + "loss": 2.1567, + "step": 7938000 + }, + { + "epoch": 39.33, + "learning_rate": 3.0342013347007332e-05, + "loss": 2.1463, + "step": 7938500 + }, + { + "epoch": 39.33, + "learning_rate": 3.0340774760581246e-05, + "loss": 2.1471, + "step": 7939000 + }, + { + "epoch": 39.34, + "learning_rate": 3.0339538651328015e-05, + "loss": 2.1427, + "step": 7939500 + }, + { + "epoch": 39.34, + "learning_rate": 3.033830006490193e-05, + "loss": 2.1476, + "step": 7940000 + }, + { + "epoch": 39.34, + "learning_rate": 3.033706147847585e-05, + "loss": 2.1357, + "step": 7940500 + }, + { + "epoch": 39.34, + "learning_rate": 3.0335822892049765e-05, + "loss": 2.1364, + "step": 7941000 + }, + { + "epoch": 39.34, + "learning_rate": 3.0334584305623682e-05, + "loss": 2.1433, + "step": 7941500 + }, + { + "epoch": 39.35, + "learning_rate": 3.0333348196370448e-05, + "loss": 2.1451, + "step": 7942000 + }, + { + "epoch": 39.35, + "learning_rate": 3.0332109609944365e-05, + "loss": 2.1652, + "step": 7942500 + }, + { + "epoch": 39.35, + "learning_rate": 3.0330871023518282e-05, + "loss": 2.16, + "step": 7943000 + }, + { + "epoch": 39.35, + "learning_rate": 3.03296324370922e-05, + "loss": 2.1619, + "step": 7943500 + }, + { + "epoch": 39.36, + "learning_rate": 3.0328393850666116e-05, + "loss": 2.1616, + "step": 7944000 + }, + { + "epoch": 39.36, + "learning_rate": 3.032715774141288e-05, + "loss": 2.1725, + "step": 7944500 + }, + { + "epoch": 39.36, + "learning_rate": 3.0325919154986798e-05, + "loss": 2.1325, + "step": 7945000 + }, + { + "epoch": 39.36, + "learning_rate": 3.0324680568560715e-05, + "loss": 2.1277, + "step": 7945500 + }, + { + "epoch": 39.37, + "learning_rate": 3.0323441982134632e-05, + "loss": 2.1332, + "step": 7946000 + }, + { + "epoch": 39.37, + "learning_rate": 3.032220339570855e-05, + "loss": 2.1359, + "step": 7946500 + }, + { + "epoch": 39.37, + "learning_rate": 3.0320967286455314e-05, + "loss": 2.1517, + "step": 7947000 + }, + { + "epoch": 39.37, + "learning_rate": 3.031972870002923e-05, + "loss": 2.152, + "step": 7947500 + }, + { + "epoch": 39.38, + "learning_rate": 3.0318490113603148e-05, + "loss": 2.1653, + "step": 7948000 + }, + { + "epoch": 39.38, + "learning_rate": 3.0317251527177065e-05, + "loss": 2.1808, + "step": 7948500 + }, + { + "epoch": 39.38, + "learning_rate": 3.0316012940750982e-05, + "loss": 2.1778, + "step": 7949000 + }, + { + "epoch": 39.38, + "learning_rate": 3.03147743543249e-05, + "loss": 2.1583, + "step": 7949500 + }, + { + "epoch": 39.39, + "learning_rate": 3.0313535767898816e-05, + "loss": 2.1499, + "step": 7950000 + }, + { + "epoch": 39.39, + "learning_rate": 3.0312297181472733e-05, + "loss": 2.1613, + "step": 7950500 + }, + { + "epoch": 39.39, + "learning_rate": 3.031105859504665e-05, + "loss": 2.1439, + "step": 7951000 + }, + { + "epoch": 39.39, + "learning_rate": 3.0309820008620566e-05, + "loss": 2.1565, + "step": 7951500 + }, + { + "epoch": 39.4, + "learning_rate": 3.0308581422194483e-05, + "loss": 2.1586, + "step": 7952000 + }, + { + "epoch": 39.4, + "learning_rate": 3.0307342835768394e-05, + "loss": 2.1902, + "step": 7952500 + }, + { + "epoch": 39.4, + "learning_rate": 3.030610424934231e-05, + "loss": 2.1583, + "step": 7953000 + }, + { + "epoch": 39.4, + "learning_rate": 3.0304865662916227e-05, + "loss": 2.1433, + "step": 7953500 + }, + { + "epoch": 39.41, + "learning_rate": 3.0303627076490144e-05, + "loss": 2.1525, + "step": 7954000 + }, + { + "epoch": 39.41, + "learning_rate": 3.030238849006406e-05, + "loss": 2.1442, + "step": 7954500 + }, + { + "epoch": 39.41, + "learning_rate": 3.0301149903637975e-05, + "loss": 2.1515, + "step": 7955000 + }, + { + "epoch": 39.41, + "learning_rate": 3.0299911317211892e-05, + "loss": 2.146, + "step": 7955500 + }, + { + "epoch": 39.42, + "learning_rate": 3.029867520795866e-05, + "loss": 2.1483, + "step": 7956000 + }, + { + "epoch": 39.42, + "learning_rate": 3.0297436621532577e-05, + "loss": 2.1599, + "step": 7956500 + }, + { + "epoch": 39.42, + "learning_rate": 3.0296198035106494e-05, + "loss": 2.1642, + "step": 7957000 + }, + { + "epoch": 39.42, + "learning_rate": 3.0294961925853267e-05, + "loss": 2.1511, + "step": 7957500 + }, + { + "epoch": 39.43, + "learning_rate": 3.0293723339427184e-05, + "loss": 2.1465, + "step": 7958000 + }, + { + "epoch": 39.43, + "learning_rate": 3.02924847530011e-05, + "loss": 2.157, + "step": 7958500 + }, + { + "epoch": 39.43, + "learning_rate": 3.0291248643747866e-05, + "loss": 2.1562, + "step": 7959000 + }, + { + "epoch": 39.43, + "learning_rate": 3.0290010057321783e-05, + "loss": 2.1536, + "step": 7959500 + }, + { + "epoch": 39.44, + "learning_rate": 3.02887714708957e-05, + "loss": 2.1747, + "step": 7960000 + }, + { + "epoch": 39.44, + "learning_rate": 3.0287532884469617e-05, + "loss": 2.1465, + "step": 7960500 + }, + { + "epoch": 39.44, + "learning_rate": 3.0286296775216382e-05, + "loss": 2.1475, + "step": 7961000 + }, + { + "epoch": 39.44, + "learning_rate": 3.02850581887903e-05, + "loss": 2.1554, + "step": 7961500 + }, + { + "epoch": 39.45, + "learning_rate": 3.0283819602364216e-05, + "loss": 2.1414, + "step": 7962000 + }, + { + "epoch": 39.45, + "learning_rate": 3.0282581015938133e-05, + "loss": 2.1614, + "step": 7962500 + }, + { + "epoch": 39.45, + "learning_rate": 3.028134242951205e-05, + "loss": 2.1447, + "step": 7963000 + }, + { + "epoch": 39.45, + "learning_rate": 3.0280103843085967e-05, + "loss": 2.1288, + "step": 7963500 + }, + { + "epoch": 39.46, + "learning_rate": 3.0278865256659884e-05, + "loss": 2.1451, + "step": 7964000 + }, + { + "epoch": 39.46, + "learning_rate": 3.02776266702338e-05, + "loss": 2.1575, + "step": 7964500 + }, + { + "epoch": 39.46, + "learning_rate": 3.027638808380771e-05, + "loss": 2.1413, + "step": 7965000 + }, + { + "epoch": 39.46, + "learning_rate": 3.0275149497381628e-05, + "loss": 2.1849, + "step": 7965500 + }, + { + "epoch": 39.47, + "learning_rate": 3.02739133881284e-05, + "loss": 2.1532, + "step": 7966000 + }, + { + "epoch": 39.47, + "learning_rate": 3.0272674801702317e-05, + "loss": 2.1684, + "step": 7966500 + }, + { + "epoch": 39.47, + "learning_rate": 3.0271436215276234e-05, + "loss": 2.1787, + "step": 7967000 + }, + { + "epoch": 39.47, + "learning_rate": 3.027019762885015e-05, + "loss": 2.1244, + "step": 7967500 + }, + { + "epoch": 39.48, + "learning_rate": 3.0268961519596916e-05, + "loss": 2.1448, + "step": 7968000 + }, + { + "epoch": 39.48, + "learning_rate": 3.0267725410343685e-05, + "loss": 2.1404, + "step": 7968500 + }, + { + "epoch": 39.48, + "learning_rate": 3.0266486823917602e-05, + "loss": 2.1448, + "step": 7969000 + }, + { + "epoch": 39.48, + "learning_rate": 3.0265250714664367e-05, + "loss": 2.1466, + "step": 7969500 + }, + { + "epoch": 39.49, + "learning_rate": 3.0264012128238284e-05, + "loss": 2.1603, + "step": 7970000 + }, + { + "epoch": 39.49, + "learning_rate": 3.02627735418122e-05, + "loss": 2.1393, + "step": 7970500 + }, + { + "epoch": 39.49, + "learning_rate": 3.0261534955386118e-05, + "loss": 2.1433, + "step": 7971000 + }, + { + "epoch": 39.49, + "learning_rate": 3.0260296368960035e-05, + "loss": 2.1232, + "step": 7971500 + }, + { + "epoch": 39.5, + "learning_rate": 3.0259057782533952e-05, + "loss": 2.1082, + "step": 7972000 + }, + { + "epoch": 39.5, + "learning_rate": 3.025781919610787e-05, + "loss": 2.1548, + "step": 7972500 + }, + { + "epoch": 39.5, + "learning_rate": 3.0256580609681782e-05, + "loss": 2.1618, + "step": 7973000 + }, + { + "epoch": 39.5, + "learning_rate": 3.02553420232557e-05, + "loss": 2.1554, + "step": 7973500 + }, + { + "epoch": 39.51, + "learning_rate": 3.0254103436829616e-05, + "loss": 2.1582, + "step": 7974000 + }, + { + "epoch": 39.51, + "learning_rate": 3.0252864850403533e-05, + "loss": 2.1767, + "step": 7974500 + }, + { + "epoch": 39.51, + "learning_rate": 3.0251628741150302e-05, + "loss": 2.1367, + "step": 7975000 + }, + { + "epoch": 39.51, + "learning_rate": 3.025039015472422e-05, + "loss": 2.151, + "step": 7975500 + }, + { + "epoch": 39.52, + "learning_rate": 3.0249154045470984e-05, + "loss": 2.1421, + "step": 7976000 + }, + { + "epoch": 39.52, + "learning_rate": 3.02479154590449e-05, + "loss": 2.1534, + "step": 7976500 + }, + { + "epoch": 39.52, + "learning_rate": 3.0246676872618818e-05, + "loss": 2.1454, + "step": 7977000 + }, + { + "epoch": 39.52, + "learning_rate": 3.0245438286192735e-05, + "loss": 2.1565, + "step": 7977500 + }, + { + "epoch": 39.53, + "learning_rate": 3.0244199699766652e-05, + "loss": 2.134, + "step": 7978000 + }, + { + "epoch": 39.53, + "learning_rate": 3.024296111334057e-05, + "loss": 2.1578, + "step": 7978500 + }, + { + "epoch": 39.53, + "learning_rate": 3.0241722526914486e-05, + "loss": 2.1315, + "step": 7979000 + }, + { + "epoch": 39.53, + "learning_rate": 3.02404839404884e-05, + "loss": 2.1359, + "step": 7979500 + }, + { + "epoch": 39.54, + "learning_rate": 3.0239245354062316e-05, + "loss": 2.1432, + "step": 7980000 + }, + { + "epoch": 39.54, + "learning_rate": 3.0238006767636233e-05, + "loss": 2.1618, + "step": 7980500 + }, + { + "epoch": 39.54, + "learning_rate": 3.0236770658383002e-05, + "loss": 2.1471, + "step": 7981000 + }, + { + "epoch": 39.54, + "learning_rate": 3.023553207195692e-05, + "loss": 2.1546, + "step": 7981500 + }, + { + "epoch": 39.55, + "learning_rate": 3.0234293485530836e-05, + "loss": 2.1428, + "step": 7982000 + }, + { + "epoch": 39.55, + "learning_rate": 3.0233054899104753e-05, + "loss": 2.1344, + "step": 7982500 + }, + { + "epoch": 39.55, + "learning_rate": 3.0231816312678666e-05, + "loss": 2.1659, + "step": 7983000 + }, + { + "epoch": 39.55, + "learning_rate": 3.0230577726252583e-05, + "loss": 2.156, + "step": 7983500 + }, + { + "epoch": 39.56, + "learning_rate": 3.02293391398265e-05, + "loss": 2.1345, + "step": 7984000 + }, + { + "epoch": 39.56, + "learning_rate": 3.0228100553400417e-05, + "loss": 2.1534, + "step": 7984500 + }, + { + "epoch": 39.56, + "learning_rate": 3.0226861966974334e-05, + "loss": 2.1419, + "step": 7985000 + }, + { + "epoch": 39.56, + "learning_rate": 3.022562338054825e-05, + "loss": 2.1344, + "step": 7985500 + }, + { + "epoch": 39.57, + "learning_rate": 3.022438727129502e-05, + "loss": 2.1341, + "step": 7986000 + }, + { + "epoch": 39.57, + "learning_rate": 3.0223148684868933e-05, + "loss": 2.132, + "step": 7986500 + }, + { + "epoch": 39.57, + "learning_rate": 3.022191009844285e-05, + "loss": 2.1422, + "step": 7987000 + }, + { + "epoch": 39.57, + "learning_rate": 3.022067398918962e-05, + "loss": 2.1957, + "step": 7987500 + }, + { + "epoch": 39.58, + "learning_rate": 3.0219435402763536e-05, + "loss": 2.1574, + "step": 7988000 + }, + { + "epoch": 39.58, + "learning_rate": 3.0218196816337453e-05, + "loss": 2.1652, + "step": 7988500 + }, + { + "epoch": 39.58, + "learning_rate": 3.021695822991137e-05, + "loss": 2.1417, + "step": 7989000 + }, + { + "epoch": 39.58, + "learning_rate": 3.0215719643485287e-05, + "loss": 2.1492, + "step": 7989500 + }, + { + "epoch": 39.59, + "learning_rate": 3.02144810570592e-05, + "loss": 2.1364, + "step": 7990000 + }, + { + "epoch": 39.59, + "learning_rate": 3.0213242470633117e-05, + "loss": 2.1672, + "step": 7990500 + }, + { + "epoch": 39.59, + "learning_rate": 3.0212003884207034e-05, + "loss": 2.1569, + "step": 7991000 + }, + { + "epoch": 39.59, + "learning_rate": 3.021076529778095e-05, + "loss": 2.1703, + "step": 7991500 + }, + { + "epoch": 39.6, + "learning_rate": 3.0209526711354868e-05, + "loss": 2.1459, + "step": 7992000 + }, + { + "epoch": 39.6, + "learning_rate": 3.0208288124928785e-05, + "loss": 2.164, + "step": 7992500 + }, + { + "epoch": 39.6, + "learning_rate": 3.0207052015675554e-05, + "loss": 2.1593, + "step": 7993000 + }, + { + "epoch": 39.6, + "learning_rate": 3.0205813429249467e-05, + "loss": 2.1342, + "step": 7993500 + }, + { + "epoch": 39.61, + "learning_rate": 3.0204574842823384e-05, + "loss": 2.1347, + "step": 7994000 + }, + { + "epoch": 39.61, + "learning_rate": 3.02033362563973e-05, + "loss": 2.1442, + "step": 7994500 + }, + { + "epoch": 39.61, + "learning_rate": 3.0202097669971218e-05, + "loss": 2.1486, + "step": 7995000 + }, + { + "epoch": 39.61, + "learning_rate": 3.0200859083545135e-05, + "loss": 2.1455, + "step": 7995500 + }, + { + "epoch": 39.61, + "learning_rate": 3.0199620497119052e-05, + "loss": 2.166, + "step": 7996000 + }, + { + "epoch": 39.62, + "learning_rate": 3.0198381910692962e-05, + "loss": 2.1218, + "step": 7996500 + }, + { + "epoch": 39.62, + "learning_rate": 3.0197145801439734e-05, + "loss": 2.1713, + "step": 7997000 + }, + { + "epoch": 39.62, + "learning_rate": 3.019590721501365e-05, + "loss": 2.1567, + "step": 7997500 + }, + { + "epoch": 39.62, + "learning_rate": 3.0194668628587568e-05, + "loss": 2.132, + "step": 7998000 + }, + { + "epoch": 39.63, + "learning_rate": 3.0193430042161485e-05, + "loss": 2.1449, + "step": 7998500 + }, + { + "epoch": 39.63, + "learning_rate": 3.0192191455735402e-05, + "loss": 2.1452, + "step": 7999000 + }, + { + "epoch": 39.63, + "learning_rate": 3.0190952869309312e-05, + "loss": 2.1455, + "step": 7999500 + }, + { + "epoch": 39.63, + "learning_rate": 3.018971428288323e-05, + "loss": 2.1394, + "step": 8000000 + }, + { + "epoch": 39.64, + "learning_rate": 3.0188475696457146e-05, + "loss": 2.1418, + "step": 8000500 + }, + { + "epoch": 39.64, + "learning_rate": 3.0187237110031063e-05, + "loss": 2.1492, + "step": 8001000 + }, + { + "epoch": 39.64, + "learning_rate": 3.0186001000777835e-05, + "loss": 2.1505, + "step": 8001500 + }, + { + "epoch": 39.64, + "learning_rate": 3.0184762414351752e-05, + "loss": 2.1721, + "step": 8002000 + }, + { + "epoch": 39.65, + "learning_rate": 3.0183523827925662e-05, + "loss": 2.1447, + "step": 8002500 + }, + { + "epoch": 39.65, + "learning_rate": 3.0182290195845286e-05, + "loss": 2.1685, + "step": 8003000 + }, + { + "epoch": 39.65, + "learning_rate": 3.0181051609419203e-05, + "loss": 2.1416, + "step": 8003500 + }, + { + "epoch": 39.65, + "learning_rate": 3.017981302299312e-05, + "loss": 2.1407, + "step": 8004000 + }, + { + "epoch": 39.66, + "learning_rate": 3.0178574436567037e-05, + "loss": 2.1782, + "step": 8004500 + }, + { + "epoch": 39.66, + "learning_rate": 3.0177335850140954e-05, + "loss": 2.143, + "step": 8005000 + }, + { + "epoch": 39.66, + "learning_rate": 3.017609726371487e-05, + "loss": 2.1823, + "step": 8005500 + }, + { + "epoch": 39.66, + "learning_rate": 3.0174861154461636e-05, + "loss": 2.1475, + "step": 8006000 + }, + { + "epoch": 39.67, + "learning_rate": 3.0173622568035553e-05, + "loss": 2.1581, + "step": 8006500 + }, + { + "epoch": 39.67, + "learning_rate": 3.017238645878232e-05, + "loss": 2.1574, + "step": 8007000 + }, + { + "epoch": 39.67, + "learning_rate": 3.0171147872356236e-05, + "loss": 2.1332, + "step": 8007500 + }, + { + "epoch": 39.67, + "learning_rate": 3.0169909285930153e-05, + "loss": 2.1649, + "step": 8008000 + }, + { + "epoch": 39.68, + "learning_rate": 3.016867069950407e-05, + "loss": 2.1523, + "step": 8008500 + }, + { + "epoch": 39.68, + "learning_rate": 3.0167432113077986e-05, + "loss": 2.1674, + "step": 8009000 + }, + { + "epoch": 39.68, + "learning_rate": 3.0166193526651903e-05, + "loss": 2.1503, + "step": 8009500 + }, + { + "epoch": 39.68, + "learning_rate": 3.016495494022582e-05, + "loss": 2.1785, + "step": 8010000 + }, + { + "epoch": 39.69, + "learning_rate": 3.0163716353799737e-05, + "loss": 2.1555, + "step": 8010500 + }, + { + "epoch": 39.69, + "learning_rate": 3.0162477767373654e-05, + "loss": 2.1361, + "step": 8011000 + }, + { + "epoch": 39.69, + "learning_rate": 3.016123918094757e-05, + "loss": 2.1458, + "step": 8011500 + }, + { + "epoch": 39.69, + "learning_rate": 3.0160000594521488e-05, + "loss": 2.1399, + "step": 8012000 + }, + { + "epoch": 39.7, + "learning_rate": 3.0158762008095405e-05, + "loss": 2.1611, + "step": 8012500 + }, + { + "epoch": 39.7, + "learning_rate": 3.0157523421669322e-05, + "loss": 2.1584, + "step": 8013000 + }, + { + "epoch": 39.7, + "learning_rate": 3.0156287312416087e-05, + "loss": 2.1321, + "step": 8013500 + }, + { + "epoch": 39.7, + "learning_rate": 3.0155048725990004e-05, + "loss": 2.1513, + "step": 8014000 + }, + { + "epoch": 39.71, + "learning_rate": 3.015381013956392e-05, + "loss": 2.1694, + "step": 8014500 + }, + { + "epoch": 39.71, + "learning_rate": 3.0152571553137838e-05, + "loss": 2.1494, + "step": 8015000 + }, + { + "epoch": 39.71, + "learning_rate": 3.0151332966711755e-05, + "loss": 2.1445, + "step": 8015500 + }, + { + "epoch": 39.71, + "learning_rate": 3.0150094380285672e-05, + "loss": 2.1644, + "step": 8016000 + }, + { + "epoch": 39.72, + "learning_rate": 3.0148858271032437e-05, + "loss": 2.1543, + "step": 8016500 + }, + { + "epoch": 39.72, + "learning_rate": 3.0147619684606354e-05, + "loss": 2.1643, + "step": 8017000 + }, + { + "epoch": 39.72, + "learning_rate": 3.014638109818027e-05, + "loss": 2.1538, + "step": 8017500 + }, + { + "epoch": 39.72, + "learning_rate": 3.0145142511754188e-05, + "loss": 2.1518, + "step": 8018000 + }, + { + "epoch": 39.73, + "learning_rate": 3.0143903925328105e-05, + "loss": 2.1669, + "step": 8018500 + }, + { + "epoch": 39.73, + "learning_rate": 3.0142665338902022e-05, + "loss": 2.1774, + "step": 8019000 + }, + { + "epoch": 39.73, + "learning_rate": 3.014142675247594e-05, + "loss": 2.1471, + "step": 8019500 + }, + { + "epoch": 39.73, + "learning_rate": 3.0140188166049856e-05, + "loss": 2.1726, + "step": 8020000 + }, + { + "epoch": 39.74, + "learning_rate": 3.013895205679662e-05, + "loss": 2.1384, + "step": 8020500 + }, + { + "epoch": 39.74, + "learning_rate": 3.0137713470370538e-05, + "loss": 2.1756, + "step": 8021000 + }, + { + "epoch": 39.74, + "learning_rate": 3.0136474883944455e-05, + "loss": 2.1773, + "step": 8021500 + }, + { + "epoch": 39.74, + "learning_rate": 3.0135241251864072e-05, + "loss": 2.1633, + "step": 8022000 + }, + { + "epoch": 39.75, + "learning_rate": 3.013400266543799e-05, + "loss": 2.1297, + "step": 8022500 + }, + { + "epoch": 39.75, + "learning_rate": 3.013276655618476e-05, + "loss": 2.1697, + "step": 8023000 + }, + { + "epoch": 39.75, + "learning_rate": 3.013152796975868e-05, + "loss": 2.1987, + "step": 8023500 + }, + { + "epoch": 39.75, + "learning_rate": 3.0130289383332595e-05, + "loss": 2.1599, + "step": 8024000 + }, + { + "epoch": 39.76, + "learning_rate": 3.012905327407936e-05, + "loss": 2.1246, + "step": 8024500 + }, + { + "epoch": 39.76, + "learning_rate": 3.0127814687653278e-05, + "loss": 2.1334, + "step": 8025000 + }, + { + "epoch": 39.76, + "learning_rate": 3.0126576101227195e-05, + "loss": 2.1671, + "step": 8025500 + }, + { + "epoch": 39.76, + "learning_rate": 3.012533751480111e-05, + "loss": 2.1581, + "step": 8026000 + }, + { + "epoch": 39.77, + "learning_rate": 3.012409892837503e-05, + "loss": 2.1355, + "step": 8026500 + }, + { + "epoch": 39.77, + "learning_rate": 3.0122860341948945e-05, + "loss": 2.153, + "step": 8027000 + }, + { + "epoch": 39.77, + "learning_rate": 3.0121621755522862e-05, + "loss": 2.1714, + "step": 8027500 + }, + { + "epoch": 39.77, + "learning_rate": 3.0120383169096773e-05, + "loss": 2.1358, + "step": 8028000 + }, + { + "epoch": 39.78, + "learning_rate": 3.011914458267069e-05, + "loss": 2.1435, + "step": 8028500 + }, + { + "epoch": 39.78, + "learning_rate": 3.0117905996244606e-05, + "loss": 2.1649, + "step": 8029000 + }, + { + "epoch": 39.78, + "learning_rate": 3.0116667409818523e-05, + "loss": 2.1489, + "step": 8029500 + }, + { + "epoch": 39.78, + "learning_rate": 3.011542882339244e-05, + "loss": 2.1574, + "step": 8030000 + }, + { + "epoch": 39.79, + "learning_rate": 3.0114190236966354e-05, + "loss": 2.1635, + "step": 8030500 + }, + { + "epoch": 39.79, + "learning_rate": 3.011295412771313e-05, + "loss": 2.1656, + "step": 8031000 + }, + { + "epoch": 39.79, + "learning_rate": 3.011171554128704e-05, + "loss": 2.1903, + "step": 8031500 + }, + { + "epoch": 39.79, + "learning_rate": 3.0110476954860956e-05, + "loss": 2.1556, + "step": 8032000 + }, + { + "epoch": 39.8, + "learning_rate": 3.0109238368434873e-05, + "loss": 2.145, + "step": 8032500 + }, + { + "epoch": 39.8, + "learning_rate": 3.0108002259181646e-05, + "loss": 2.1641, + "step": 8033000 + }, + { + "epoch": 39.8, + "learning_rate": 3.0106763672755563e-05, + "loss": 2.1497, + "step": 8033500 + }, + { + "epoch": 39.8, + "learning_rate": 3.010552508632948e-05, + "loss": 2.1683, + "step": 8034000 + }, + { + "epoch": 39.81, + "learning_rate": 3.010428649990339e-05, + "loss": 2.1542, + "step": 8034500 + }, + { + "epoch": 39.81, + "learning_rate": 3.0103047913477307e-05, + "loss": 2.1433, + "step": 8035000 + }, + { + "epoch": 39.81, + "learning_rate": 3.0101809327051223e-05, + "loss": 2.1602, + "step": 8035500 + }, + { + "epoch": 39.81, + "learning_rate": 3.010057074062514e-05, + "loss": 2.1568, + "step": 8036000 + }, + { + "epoch": 39.82, + "learning_rate": 3.0099332154199057e-05, + "loss": 2.1549, + "step": 8036500 + }, + { + "epoch": 39.82, + "learning_rate": 3.009809356777297e-05, + "loss": 2.1468, + "step": 8037000 + }, + { + "epoch": 39.82, + "learning_rate": 3.0096859935692595e-05, + "loss": 2.1577, + "step": 8037500 + }, + { + "epoch": 39.82, + "learning_rate": 3.009562382643936e-05, + "loss": 2.1663, + "step": 8038000 + }, + { + "epoch": 39.83, + "learning_rate": 3.0094385240013277e-05, + "loss": 2.1725, + "step": 8038500 + }, + { + "epoch": 39.83, + "learning_rate": 3.0093146653587194e-05, + "loss": 2.1502, + "step": 8039000 + }, + { + "epoch": 39.83, + "learning_rate": 3.009190806716111e-05, + "loss": 2.1691, + "step": 8039500 + }, + { + "epoch": 39.83, + "learning_rate": 3.0090669480735028e-05, + "loss": 2.1382, + "step": 8040000 + }, + { + "epoch": 39.84, + "learning_rate": 3.0089430894308945e-05, + "loss": 2.1767, + "step": 8040500 + }, + { + "epoch": 39.84, + "learning_rate": 3.0088192307882862e-05, + "loss": 2.1753, + "step": 8041000 + }, + { + "epoch": 39.84, + "learning_rate": 3.008695372145678e-05, + "loss": 2.161, + "step": 8041500 + }, + { + "epoch": 39.84, + "learning_rate": 3.0085715135030696e-05, + "loss": 2.1657, + "step": 8042000 + }, + { + "epoch": 39.85, + "learning_rate": 3.0084476548604613e-05, + "loss": 2.1477, + "step": 8042500 + }, + { + "epoch": 39.85, + "learning_rate": 3.008323796217853e-05, + "loss": 2.1735, + "step": 8043000 + }, + { + "epoch": 39.85, + "learning_rate": 3.0081999375752447e-05, + "loss": 2.1239, + "step": 8043500 + }, + { + "epoch": 39.85, + "learning_rate": 3.0080760789326357e-05, + "loss": 2.1411, + "step": 8044000 + }, + { + "epoch": 39.86, + "learning_rate": 3.0079522202900274e-05, + "loss": 2.1439, + "step": 8044500 + }, + { + "epoch": 39.86, + "learning_rate": 3.0078288570819894e-05, + "loss": 2.1604, + "step": 8045000 + }, + { + "epoch": 39.86, + "learning_rate": 3.007704998439381e-05, + "loss": 2.1593, + "step": 8045500 + }, + { + "epoch": 39.86, + "learning_rate": 3.0075811397967728e-05, + "loss": 2.1567, + "step": 8046000 + }, + { + "epoch": 39.87, + "learning_rate": 3.0074572811541645e-05, + "loss": 2.1343, + "step": 8046500 + }, + { + "epoch": 39.87, + "learning_rate": 3.0073336702288414e-05, + "loss": 2.1458, + "step": 8047000 + }, + { + "epoch": 39.87, + "learning_rate": 3.007209811586233e-05, + "loss": 2.1592, + "step": 8047500 + }, + { + "epoch": 39.87, + "learning_rate": 3.0070859529436244e-05, + "loss": 2.1504, + "step": 8048000 + }, + { + "epoch": 39.88, + "learning_rate": 3.006962094301016e-05, + "loss": 2.1612, + "step": 8048500 + }, + { + "epoch": 39.88, + "learning_rate": 3.0068382356584078e-05, + "loss": 2.1423, + "step": 8049000 + }, + { + "epoch": 39.88, + "learning_rate": 3.0067143770157995e-05, + "loss": 2.1497, + "step": 8049500 + }, + { + "epoch": 39.88, + "learning_rate": 3.0065907660904764e-05, + "loss": 2.1624, + "step": 8050000 + }, + { + "epoch": 39.88, + "learning_rate": 3.006466907447868e-05, + "loss": 2.1358, + "step": 8050500 + }, + { + "epoch": 39.89, + "learning_rate": 3.0063430488052598e-05, + "loss": 2.161, + "step": 8051000 + }, + { + "epoch": 39.89, + "learning_rate": 3.006219190162651e-05, + "loss": 2.1392, + "step": 8051500 + }, + { + "epoch": 39.89, + "learning_rate": 3.006095331520043e-05, + "loss": 2.1358, + "step": 8052000 + }, + { + "epoch": 39.89, + "learning_rate": 3.0059714728774345e-05, + "loss": 2.1436, + "step": 8052500 + }, + { + "epoch": 39.9, + "learning_rate": 3.0058476142348262e-05, + "loss": 2.1518, + "step": 8053000 + }, + { + "epoch": 39.9, + "learning_rate": 3.005723755592218e-05, + "loss": 2.1306, + "step": 8053500 + }, + { + "epoch": 39.9, + "learning_rate": 3.0055998969496096e-05, + "loss": 2.1684, + "step": 8054000 + }, + { + "epoch": 39.9, + "learning_rate": 3.0054762860242865e-05, + "loss": 2.1548, + "step": 8054500 + }, + { + "epoch": 39.91, + "learning_rate": 3.005352427381678e-05, + "loss": 2.1863, + "step": 8055000 + }, + { + "epoch": 39.91, + "learning_rate": 3.0052285687390695e-05, + "loss": 2.1506, + "step": 8055500 + }, + { + "epoch": 39.91, + "learning_rate": 3.0051047100964612e-05, + "loss": 2.1482, + "step": 8056000 + }, + { + "epoch": 39.91, + "learning_rate": 3.004980851453853e-05, + "loss": 2.1735, + "step": 8056500 + }, + { + "epoch": 39.92, + "learning_rate": 3.0048569928112446e-05, + "loss": 2.1738, + "step": 8057000 + }, + { + "epoch": 39.92, + "learning_rate": 3.0047331341686363e-05, + "loss": 2.1549, + "step": 8057500 + }, + { + "epoch": 39.92, + "learning_rate": 3.004609275526028e-05, + "loss": 2.1322, + "step": 8058000 + }, + { + "epoch": 39.92, + "learning_rate": 3.0044854168834197e-05, + "loss": 2.1524, + "step": 8058500 + }, + { + "epoch": 39.93, + "learning_rate": 3.0043615582408114e-05, + "loss": 2.141, + "step": 8059000 + }, + { + "epoch": 39.93, + "learning_rate": 3.0042376995982024e-05, + "loss": 2.1674, + "step": 8059500 + }, + { + "epoch": 39.93, + "learning_rate": 3.004113840955594e-05, + "loss": 2.1547, + "step": 8060000 + }, + { + "epoch": 39.93, + "learning_rate": 3.0039899823129858e-05, + "loss": 2.1545, + "step": 8060500 + }, + { + "epoch": 39.94, + "learning_rate": 3.003866371387663e-05, + "loss": 2.166, + "step": 8061000 + }, + { + "epoch": 39.94, + "learning_rate": 3.0037425127450547e-05, + "loss": 2.1595, + "step": 8061500 + }, + { + "epoch": 39.94, + "learning_rate": 3.0036186541024464e-05, + "loss": 2.1371, + "step": 8062000 + }, + { + "epoch": 39.94, + "learning_rate": 3.0034947954598374e-05, + "loss": 2.1685, + "step": 8062500 + }, + { + "epoch": 39.95, + "learning_rate": 3.0033711845345146e-05, + "loss": 2.1529, + "step": 8063000 + }, + { + "epoch": 39.95, + "learning_rate": 3.0032475736091915e-05, + "loss": 2.1464, + "step": 8063500 + }, + { + "epoch": 39.95, + "learning_rate": 3.0031237149665832e-05, + "loss": 2.1639, + "step": 8064000 + }, + { + "epoch": 39.95, + "learning_rate": 3.0030001040412597e-05, + "loss": 2.1509, + "step": 8064500 + }, + { + "epoch": 39.96, + "learning_rate": 3.0028762453986514e-05, + "loss": 2.1675, + "step": 8065000 + }, + { + "epoch": 39.96, + "learning_rate": 3.002752386756043e-05, + "loss": 2.1495, + "step": 8065500 + }, + { + "epoch": 39.96, + "learning_rate": 3.0026285281134348e-05, + "loss": 2.1633, + "step": 8066000 + }, + { + "epoch": 39.96, + "learning_rate": 3.0025046694708265e-05, + "loss": 2.1319, + "step": 8066500 + }, + { + "epoch": 39.97, + "learning_rate": 3.0023808108282182e-05, + "loss": 2.1646, + "step": 8067000 + }, + { + "epoch": 39.97, + "learning_rate": 3.00225695218561e-05, + "loss": 2.1526, + "step": 8067500 + }, + { + "epoch": 39.97, + "learning_rate": 3.0021330935430016e-05, + "loss": 2.1123, + "step": 8068000 + }, + { + "epoch": 39.97, + "learning_rate": 3.002009482617678e-05, + "loss": 2.1465, + "step": 8068500 + }, + { + "epoch": 39.98, + "learning_rate": 3.0018858716923553e-05, + "loss": 2.1841, + "step": 8069000 + }, + { + "epoch": 39.98, + "learning_rate": 3.0017620130497464e-05, + "loss": 2.1716, + "step": 8069500 + }, + { + "epoch": 39.98, + "learning_rate": 3.001638154407138e-05, + "loss": 2.17, + "step": 8070000 + }, + { + "epoch": 39.98, + "learning_rate": 3.0015142957645298e-05, + "loss": 2.1723, + "step": 8070500 + }, + { + "epoch": 39.99, + "learning_rate": 3.0013904371219214e-05, + "loss": 2.1409, + "step": 8071000 + }, + { + "epoch": 39.99, + "learning_rate": 3.001266578479313e-05, + "loss": 2.1666, + "step": 8071500 + }, + { + "epoch": 39.99, + "learning_rate": 3.0011427198367048e-05, + "loss": 2.1527, + "step": 8072000 + }, + { + "epoch": 39.99, + "learning_rate": 3.0010188611940965e-05, + "loss": 2.1557, + "step": 8072500 + }, + { + "epoch": 40.0, + "learning_rate": 3.0008950025514882e-05, + "loss": 2.1458, + "step": 8073000 + }, + { + "epoch": 40.0, + "learning_rate": 3.00077114390888e-05, + "loss": 2.1418, + "step": 8073500 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.6648354250857901, + "eval_accuracy_mlm": 0.6219327105976565, + "eval_accuracy_nsp": 0.8670649006310819, + "eval_loss": 2.2923583984375, + "eval_runtime": 146.917, + "eval_samples_per_second": 1735.395, + "eval_steps_per_second": 72.313, + "step": 8073720 } ], "max_steps": 20184300, "num_train_epochs": 100, - "total_flos": 8.884340001928284e+18, + "total_flos": 1.0452273947820423e+19, "trial_name": null, "trial_params": null }