{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.597108245408362, "global_step": 11000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.208333333333333e-08, "loss": 3.5267, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.0416666666666667e-07, "loss": 4.1364, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.5625e-07, "loss": 3.942, "step": 3 }, { "epoch": 0.0, "learning_rate": 2.0833333333333333e-07, "loss": 3.8524, "step": 4 }, { "epoch": 0.0, "learning_rate": 2.604166666666667e-07, "loss": 3.6195, "step": 5 }, { "epoch": 0.0, "learning_rate": 3.125e-07, "loss": 3.4781, "step": 6 }, { "epoch": 0.01, "learning_rate": 3.6458333333333337e-07, "loss": 3.956, "step": 7 }, { "epoch": 0.01, "learning_rate": 4.1666666666666667e-07, "loss": 3.7294, "step": 8 }, { "epoch": 0.01, "learning_rate": 4.6875000000000006e-07, "loss": 3.6739, "step": 9 }, { "epoch": 0.01, "learning_rate": 5.208333333333334e-07, "loss": 3.3912, "step": 10 }, { "epoch": 0.01, "learning_rate": 5.729166666666667e-07, "loss": 3.7342, "step": 11 }, { "epoch": 0.01, "learning_rate": 6.25e-07, "loss": 3.5167, "step": 12 }, { "epoch": 0.01, "learning_rate": 6.770833333333333e-07, "loss": 3.6668, "step": 13 }, { "epoch": 0.01, "learning_rate": 7.291666666666667e-07, "loss": 3.7244, "step": 14 }, { "epoch": 0.01, "learning_rate": 7.8125e-07, "loss": 3.9285, "step": 15 }, { "epoch": 0.01, "learning_rate": 8.333333333333333e-07, "loss": 3.7596, "step": 16 }, { "epoch": 0.01, "learning_rate": 8.854166666666668e-07, "loss": 3.5427, "step": 17 }, { "epoch": 0.01, "learning_rate": 9.375000000000001e-07, "loss": 3.5945, "step": 18 }, { "epoch": 0.01, "learning_rate": 9.895833333333333e-07, "loss": 3.4566, "step": 19 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-06, "loss": 3.4137, "step": 20 }, { "epoch": 0.02, "learning_rate": 1.0937500000000001e-06, "loss": 3.507, "step": 21 }, { "epoch": 0.02, "learning_rate": 1.1458333333333333e-06, "loss": 3.5058, "step": 22 }, { "epoch": 0.02, "learning_rate": 1.197916666666667e-06, "loss": 3.4375, "step": 23 }, { "epoch": 0.02, "learning_rate": 1.25e-06, "loss": 3.515, "step": 24 }, { "epoch": 0.02, "learning_rate": 1.3020833333333335e-06, "loss": 3.6441, "step": 25 }, { "epoch": 0.02, "learning_rate": 1.3541666666666667e-06, "loss": 3.5722, "step": 26 }, { "epoch": 0.02, "learning_rate": 1.40625e-06, "loss": 3.5685, "step": 27 }, { "epoch": 0.02, "learning_rate": 1.4583333333333335e-06, "loss": 3.1269, "step": 28 }, { "epoch": 0.02, "learning_rate": 1.5104166666666667e-06, "loss": 3.2355, "step": 29 }, { "epoch": 0.02, "learning_rate": 1.5625e-06, "loss": 3.1759, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.6145833333333335e-06, "loss": 3.0113, "step": 31 }, { "epoch": 0.03, "learning_rate": 1.6666666666666667e-06, "loss": 3.1397, "step": 32 }, { "epoch": 0.03, "learning_rate": 1.71875e-06, "loss": 2.9405, "step": 33 }, { "epoch": 0.03, "learning_rate": 1.7708333333333337e-06, "loss": 2.948, "step": 34 }, { "epoch": 0.03, "learning_rate": 1.8229166666666666e-06, "loss": 2.9599, "step": 35 }, { "epoch": 0.03, "learning_rate": 1.8750000000000003e-06, "loss": 2.9237, "step": 36 }, { "epoch": 0.03, "learning_rate": 1.9270833333333334e-06, "loss": 2.9392, "step": 37 }, { "epoch": 0.03, "learning_rate": 1.9791666666666666e-06, "loss": 2.9355, "step": 38 }, { "epoch": 0.03, "learning_rate": 2.0312500000000002e-06, "loss": 3.0742, "step": 39 }, { "epoch": 0.03, "learning_rate": 2.0833333333333334e-06, "loss": 2.7303, "step": 40 }, { "epoch": 0.03, "learning_rate": 2.1354166666666666e-06, "loss": 2.8735, "step": 41 }, { "epoch": 0.03, "learning_rate": 2.1875000000000002e-06, "loss": 2.7781, "step": 42 }, { "epoch": 0.03, "learning_rate": 2.2395833333333334e-06, "loss": 2.9273, "step": 43 }, { "epoch": 0.03, "learning_rate": 2.2916666666666666e-06, "loss": 2.7942, "step": 44 }, { "epoch": 0.04, "learning_rate": 2.3437500000000002e-06, "loss": 2.8997, "step": 45 }, { "epoch": 0.04, "learning_rate": 2.395833333333334e-06, "loss": 3.0492, "step": 46 }, { "epoch": 0.04, "learning_rate": 2.4479166666666666e-06, "loss": 2.7576, "step": 47 }, { "epoch": 0.04, "learning_rate": 2.5e-06, "loss": 2.5814, "step": 48 }, { "epoch": 0.04, "learning_rate": 2.5520833333333334e-06, "loss": 2.9769, "step": 49 }, { "epoch": 0.04, "learning_rate": 2.604166666666667e-06, "loss": 2.8412, "step": 50 }, { "epoch": 0.04, "learning_rate": 2.65625e-06, "loss": 2.7884, "step": 51 }, { "epoch": 0.04, "learning_rate": 2.7083333333333334e-06, "loss": 2.551, "step": 52 }, { "epoch": 0.04, "learning_rate": 2.760416666666667e-06, "loss": 2.8506, "step": 53 }, { "epoch": 0.04, "learning_rate": 2.8125e-06, "loss": 2.7259, "step": 54 }, { "epoch": 0.04, "learning_rate": 2.8645833333333334e-06, "loss": 2.6894, "step": 55 }, { "epoch": 0.04, "learning_rate": 2.916666666666667e-06, "loss": 2.7998, "step": 56 }, { "epoch": 0.04, "learning_rate": 2.96875e-06, "loss": 2.7422, "step": 57 }, { "epoch": 0.05, "learning_rate": 3.0208333333333334e-06, "loss": 2.524, "step": 58 }, { "epoch": 0.05, "learning_rate": 3.072916666666667e-06, "loss": 2.8281, "step": 59 }, { "epoch": 0.05, "learning_rate": 3.125e-06, "loss": 2.5097, "step": 60 }, { "epoch": 0.05, "learning_rate": 3.1770833333333333e-06, "loss": 2.5921, "step": 61 }, { "epoch": 0.05, "learning_rate": 3.229166666666667e-06, "loss": 2.2834, "step": 62 }, { "epoch": 0.05, "learning_rate": 3.28125e-06, "loss": 2.4968, "step": 63 }, { "epoch": 0.05, "learning_rate": 3.3333333333333333e-06, "loss": 2.7739, "step": 64 }, { "epoch": 0.05, "learning_rate": 3.385416666666667e-06, "loss": 2.7555, "step": 65 }, { "epoch": 0.05, "learning_rate": 3.4375e-06, "loss": 2.68, "step": 66 }, { "epoch": 0.05, "learning_rate": 3.4895833333333333e-06, "loss": 2.6824, "step": 67 }, { "epoch": 0.05, "learning_rate": 3.5416666666666673e-06, "loss": 2.8905, "step": 68 }, { "epoch": 0.05, "learning_rate": 3.59375e-06, "loss": 2.8249, "step": 69 }, { "epoch": 0.05, "learning_rate": 3.6458333333333333e-06, "loss": 2.564, "step": 70 }, { "epoch": 0.06, "learning_rate": 3.6979166666666673e-06, "loss": 2.5839, "step": 71 }, { "epoch": 0.06, "learning_rate": 3.7500000000000005e-06, "loss": 2.7628, "step": 72 }, { "epoch": 0.06, "learning_rate": 3.8020833333333333e-06, "loss": 2.5765, "step": 73 }, { "epoch": 0.06, "learning_rate": 3.854166666666667e-06, "loss": 2.3685, "step": 74 }, { "epoch": 0.06, "learning_rate": 3.90625e-06, "loss": 2.3594, "step": 75 }, { "epoch": 0.06, "learning_rate": 3.958333333333333e-06, "loss": 2.796, "step": 76 }, { "epoch": 0.06, "learning_rate": 4.010416666666667e-06, "loss": 2.8343, "step": 77 }, { "epoch": 0.06, "learning_rate": 4.0625000000000005e-06, "loss": 2.5586, "step": 78 }, { "epoch": 0.06, "learning_rate": 4.114583333333334e-06, "loss": 2.2866, "step": 79 }, { "epoch": 0.06, "learning_rate": 4.166666666666667e-06, "loss": 2.6071, "step": 80 }, { "epoch": 0.06, "learning_rate": 4.21875e-06, "loss": 2.5232, "step": 81 }, { "epoch": 0.06, "learning_rate": 4.270833333333333e-06, "loss": 2.9306, "step": 82 }, { "epoch": 0.06, "learning_rate": 4.322916666666667e-06, "loss": 2.398, "step": 83 }, { "epoch": 0.07, "learning_rate": 4.3750000000000005e-06, "loss": 2.8232, "step": 84 }, { "epoch": 0.07, "learning_rate": 4.427083333333334e-06, "loss": 2.6078, "step": 85 }, { "epoch": 0.07, "learning_rate": 4.479166666666667e-06, "loss": 2.3238, "step": 86 }, { "epoch": 0.07, "learning_rate": 4.53125e-06, "loss": 2.8657, "step": 87 }, { "epoch": 0.07, "learning_rate": 4.583333333333333e-06, "loss": 2.4534, "step": 88 }, { "epoch": 0.07, "learning_rate": 4.635416666666667e-06, "loss": 2.7152, "step": 89 }, { "epoch": 0.07, "learning_rate": 4.6875000000000004e-06, "loss": 2.7031, "step": 90 }, { "epoch": 0.07, "learning_rate": 4.739583333333334e-06, "loss": 2.3554, "step": 91 }, { "epoch": 0.07, "learning_rate": 4.791666666666668e-06, "loss": 2.3549, "step": 92 }, { "epoch": 0.07, "learning_rate": 4.84375e-06, "loss": 2.6694, "step": 93 }, { "epoch": 0.07, "learning_rate": 4.895833333333333e-06, "loss": 2.4775, "step": 94 }, { "epoch": 0.07, "learning_rate": 4.947916666666667e-06, "loss": 2.3013, "step": 95 }, { "epoch": 0.08, "learning_rate": 5e-06, "loss": 2.7759, "step": 96 }, { "epoch": 0.08, "learning_rate": 5.0520833333333344e-06, "loss": 2.3246, "step": 97 }, { "epoch": 0.08, "learning_rate": 5.104166666666667e-06, "loss": 2.517, "step": 98 }, { "epoch": 0.08, "learning_rate": 5.156250000000001e-06, "loss": 2.3907, "step": 99 }, { "epoch": 0.08, "learning_rate": 5.208333333333334e-06, "loss": 2.3388, "step": 100 }, { "epoch": 0.08, "learning_rate": 5.260416666666666e-06, "loss": 2.8517, "step": 101 }, { "epoch": 0.08, "learning_rate": 5.3125e-06, "loss": 2.5464, "step": 102 }, { "epoch": 0.08, "learning_rate": 5.364583333333334e-06, "loss": 2.8232, "step": 103 }, { "epoch": 0.08, "learning_rate": 5.416666666666667e-06, "loss": 2.1819, "step": 104 }, { "epoch": 0.08, "learning_rate": 5.468750000000001e-06, "loss": 2.6514, "step": 105 }, { "epoch": 0.08, "learning_rate": 5.520833333333334e-06, "loss": 2.573, "step": 106 }, { "epoch": 0.08, "learning_rate": 5.572916666666667e-06, "loss": 2.2725, "step": 107 }, { "epoch": 0.08, "learning_rate": 5.625e-06, "loss": 2.3159, "step": 108 }, { "epoch": 0.09, "learning_rate": 5.677083333333334e-06, "loss": 2.5444, "step": 109 }, { "epoch": 0.09, "learning_rate": 5.729166666666667e-06, "loss": 2.4378, "step": 110 }, { "epoch": 0.09, "learning_rate": 5.781250000000001e-06, "loss": 2.7143, "step": 111 }, { "epoch": 0.09, "learning_rate": 5.833333333333334e-06, "loss": 2.3757, "step": 112 }, { "epoch": 0.09, "learning_rate": 5.885416666666667e-06, "loss": 2.2596, "step": 113 }, { "epoch": 0.09, "learning_rate": 5.9375e-06, "loss": 2.5989, "step": 114 }, { "epoch": 0.09, "learning_rate": 5.989583333333334e-06, "loss": 2.4151, "step": 115 }, { "epoch": 0.09, "learning_rate": 6.041666666666667e-06, "loss": 2.5002, "step": 116 }, { "epoch": 0.09, "learning_rate": 6.093750000000001e-06, "loss": 2.261, "step": 117 }, { "epoch": 0.09, "learning_rate": 6.145833333333334e-06, "loss": 2.4592, "step": 118 }, { "epoch": 0.09, "learning_rate": 6.197916666666667e-06, "loss": 2.2709, "step": 119 }, { "epoch": 0.09, "learning_rate": 6.25e-06, "loss": 2.1657, "step": 120 }, { "epoch": 0.09, "learning_rate": 6.302083333333334e-06, "loss": 2.6225, "step": 121 }, { "epoch": 0.1, "learning_rate": 6.354166666666667e-06, "loss": 2.4417, "step": 122 }, { "epoch": 0.1, "learning_rate": 6.406250000000001e-06, "loss": 2.4204, "step": 123 }, { "epoch": 0.1, "learning_rate": 6.458333333333334e-06, "loss": 2.0899, "step": 124 }, { "epoch": 0.1, "learning_rate": 6.510416666666667e-06, "loss": 2.4573, "step": 125 }, { "epoch": 0.1, "learning_rate": 6.5625e-06, "loss": 2.6266, "step": 126 }, { "epoch": 0.1, "learning_rate": 6.614583333333334e-06, "loss": 2.1137, "step": 127 }, { "epoch": 0.1, "learning_rate": 6.666666666666667e-06, "loss": 2.4646, "step": 128 }, { "epoch": 0.1, "learning_rate": 6.718750000000001e-06, "loss": 2.6435, "step": 129 }, { "epoch": 0.1, "learning_rate": 6.770833333333334e-06, "loss": 2.6149, "step": 130 }, { "epoch": 0.1, "learning_rate": 6.822916666666667e-06, "loss": 2.1359, "step": 131 }, { "epoch": 0.1, "learning_rate": 6.875e-06, "loss": 2.4055, "step": 132 }, { "epoch": 0.1, "learning_rate": 6.927083333333334e-06, "loss": 2.3537, "step": 133 }, { "epoch": 0.1, "learning_rate": 6.979166666666667e-06, "loss": 2.5536, "step": 134 }, { "epoch": 0.11, "learning_rate": 7.031250000000001e-06, "loss": 2.3925, "step": 135 }, { "epoch": 0.11, "learning_rate": 7.083333333333335e-06, "loss": 2.2271, "step": 136 }, { "epoch": 0.11, "learning_rate": 7.135416666666667e-06, "loss": 2.6941, "step": 137 }, { "epoch": 0.11, "learning_rate": 7.1875e-06, "loss": 2.2381, "step": 138 }, { "epoch": 0.11, "learning_rate": 7.239583333333334e-06, "loss": 2.488, "step": 139 }, { "epoch": 0.11, "learning_rate": 7.291666666666667e-06, "loss": 2.1613, "step": 140 }, { "epoch": 0.11, "learning_rate": 7.343750000000001e-06, "loss": 2.3625, "step": 141 }, { "epoch": 0.11, "learning_rate": 7.395833333333335e-06, "loss": 2.2673, "step": 142 }, { "epoch": 0.11, "learning_rate": 7.447916666666667e-06, "loss": 2.3286, "step": 143 }, { "epoch": 0.11, "learning_rate": 7.500000000000001e-06, "loss": 2.3592, "step": 144 }, { "epoch": 0.11, "learning_rate": 7.552083333333334e-06, "loss": 2.2435, "step": 145 }, { "epoch": 0.11, "learning_rate": 7.6041666666666666e-06, "loss": 2.2696, "step": 146 }, { "epoch": 0.11, "learning_rate": 7.656250000000001e-06, "loss": 2.3512, "step": 147 }, { "epoch": 0.12, "learning_rate": 7.708333333333334e-06, "loss": 2.4559, "step": 148 }, { "epoch": 0.12, "learning_rate": 7.760416666666666e-06, "loss": 2.7156, "step": 149 }, { "epoch": 0.12, "learning_rate": 7.8125e-06, "loss": 2.2873, "step": 150 }, { "epoch": 0.12, "learning_rate": 7.864583333333334e-06, "loss": 2.5873, "step": 151 }, { "epoch": 0.12, "learning_rate": 7.916666666666667e-06, "loss": 2.7533, "step": 152 }, { "epoch": 0.12, "learning_rate": 7.96875e-06, "loss": 2.7216, "step": 153 }, { "epoch": 0.12, "learning_rate": 8.020833333333335e-06, "loss": 2.2136, "step": 154 }, { "epoch": 0.12, "learning_rate": 8.072916666666667e-06, "loss": 2.1669, "step": 155 }, { "epoch": 0.12, "learning_rate": 8.125000000000001e-06, "loss": 2.3479, "step": 156 }, { "epoch": 0.12, "learning_rate": 8.177083333333335e-06, "loss": 2.3243, "step": 157 }, { "epoch": 0.12, "learning_rate": 8.229166666666667e-06, "loss": 2.2527, "step": 158 }, { "epoch": 0.12, "learning_rate": 8.281250000000001e-06, "loss": 2.5445, "step": 159 }, { "epoch": 0.13, "learning_rate": 8.333333333333334e-06, "loss": 2.4719, "step": 160 }, { "epoch": 0.13, "learning_rate": 8.385416666666668e-06, "loss": 2.6352, "step": 161 }, { "epoch": 0.13, "learning_rate": 8.4375e-06, "loss": 2.5129, "step": 162 }, { "epoch": 0.13, "learning_rate": 8.489583333333334e-06, "loss": 2.5947, "step": 163 }, { "epoch": 0.13, "learning_rate": 8.541666666666666e-06, "loss": 2.2788, "step": 164 }, { "epoch": 0.13, "learning_rate": 8.59375e-06, "loss": 2.4675, "step": 165 }, { "epoch": 0.13, "learning_rate": 8.645833333333335e-06, "loss": 2.5702, "step": 166 }, { "epoch": 0.13, "learning_rate": 8.697916666666667e-06, "loss": 2.4434, "step": 167 }, { "epoch": 0.13, "learning_rate": 8.750000000000001e-06, "loss": 2.3014, "step": 168 }, { "epoch": 0.13, "learning_rate": 8.802083333333335e-06, "loss": 2.2866, "step": 169 }, { "epoch": 0.13, "learning_rate": 8.854166666666667e-06, "loss": 2.5144, "step": 170 }, { "epoch": 0.13, "learning_rate": 8.906250000000001e-06, "loss": 2.3496, "step": 171 }, { "epoch": 0.13, "learning_rate": 8.958333333333334e-06, "loss": 2.1876, "step": 172 }, { "epoch": 0.14, "learning_rate": 9.010416666666668e-06, "loss": 2.3842, "step": 173 }, { "epoch": 0.14, "learning_rate": 9.0625e-06, "loss": 2.3003, "step": 174 }, { "epoch": 0.14, "learning_rate": 9.114583333333334e-06, "loss": 2.5197, "step": 175 }, { "epoch": 0.14, "learning_rate": 9.166666666666666e-06, "loss": 2.5111, "step": 176 }, { "epoch": 0.14, "learning_rate": 9.21875e-06, "loss": 2.3456, "step": 177 }, { "epoch": 0.14, "learning_rate": 9.270833333333334e-06, "loss": 2.3132, "step": 178 }, { "epoch": 0.14, "learning_rate": 9.322916666666667e-06, "loss": 2.6047, "step": 179 }, { "epoch": 0.14, "learning_rate": 9.375000000000001e-06, "loss": 2.3989, "step": 180 }, { "epoch": 0.14, "learning_rate": 9.427083333333335e-06, "loss": 2.3129, "step": 181 }, { "epoch": 0.14, "learning_rate": 9.479166666666667e-06, "loss": 2.472, "step": 182 }, { "epoch": 0.14, "learning_rate": 9.531250000000001e-06, "loss": 2.5617, "step": 183 }, { "epoch": 0.14, "learning_rate": 9.583333333333335e-06, "loss": 2.264, "step": 184 }, { "epoch": 0.14, "learning_rate": 9.635416666666668e-06, "loss": 2.441, "step": 185 }, { "epoch": 0.15, "learning_rate": 9.6875e-06, "loss": 2.3, "step": 186 }, { "epoch": 0.15, "learning_rate": 9.739583333333334e-06, "loss": 2.4012, "step": 187 }, { "epoch": 0.15, "learning_rate": 9.791666666666666e-06, "loss": 2.7917, "step": 188 }, { "epoch": 0.15, "learning_rate": 9.84375e-06, "loss": 2.3238, "step": 189 }, { "epoch": 0.15, "learning_rate": 9.895833333333334e-06, "loss": 2.4105, "step": 190 }, { "epoch": 0.15, "learning_rate": 9.947916666666667e-06, "loss": 2.4543, "step": 191 }, { "epoch": 0.15, "learning_rate": 1e-05, "loss": 2.1863, "step": 192 }, { "epoch": 0.15, "learning_rate": 1.0052083333333333e-05, "loss": 2.2082, "step": 193 }, { "epoch": 0.15, "learning_rate": 1.0104166666666669e-05, "loss": 2.2279, "step": 194 }, { "epoch": 0.15, "learning_rate": 1.0156250000000001e-05, "loss": 2.5996, "step": 195 }, { "epoch": 0.15, "learning_rate": 1.0208333333333334e-05, "loss": 2.1899, "step": 196 }, { "epoch": 0.15, "learning_rate": 1.0260416666666668e-05, "loss": 2.223, "step": 197 }, { "epoch": 0.15, "learning_rate": 1.0312500000000002e-05, "loss": 2.4603, "step": 198 }, { "epoch": 0.16, "learning_rate": 1.0364583333333334e-05, "loss": 2.1318, "step": 199 }, { "epoch": 0.16, "learning_rate": 1.0416666666666668e-05, "loss": 2.4841, "step": 200 }, { "epoch": 0.16, "learning_rate": 1.046875e-05, "loss": 2.233, "step": 201 }, { "epoch": 0.16, "learning_rate": 1.0520833333333333e-05, "loss": 2.4003, "step": 202 }, { "epoch": 0.16, "learning_rate": 1.0572916666666668e-05, "loss": 2.4746, "step": 203 }, { "epoch": 0.16, "learning_rate": 1.0625e-05, "loss": 2.2622, "step": 204 }, { "epoch": 0.16, "learning_rate": 1.0677083333333333e-05, "loss": 2.6961, "step": 205 }, { "epoch": 0.16, "learning_rate": 1.0729166666666669e-05, "loss": 2.2356, "step": 206 }, { "epoch": 0.16, "learning_rate": 1.0781250000000001e-05, "loss": 2.3199, "step": 207 }, { "epoch": 0.16, "learning_rate": 1.0833333333333334e-05, "loss": 2.519, "step": 208 }, { "epoch": 0.16, "learning_rate": 1.0885416666666668e-05, "loss": 2.4034, "step": 209 }, { "epoch": 0.16, "learning_rate": 1.0937500000000002e-05, "loss": 2.3274, "step": 210 }, { "epoch": 0.16, "learning_rate": 1.0989583333333334e-05, "loss": 1.9308, "step": 211 }, { "epoch": 0.17, "learning_rate": 1.1041666666666668e-05, "loss": 2.576, "step": 212 }, { "epoch": 0.17, "learning_rate": 1.109375e-05, "loss": 2.3625, "step": 213 }, { "epoch": 0.17, "learning_rate": 1.1145833333333334e-05, "loss": 2.5368, "step": 214 }, { "epoch": 0.17, "learning_rate": 1.1197916666666668e-05, "loss": 2.544, "step": 215 }, { "epoch": 0.17, "learning_rate": 1.125e-05, "loss": 2.3512, "step": 216 }, { "epoch": 0.17, "learning_rate": 1.1302083333333333e-05, "loss": 2.3454, "step": 217 }, { "epoch": 0.17, "learning_rate": 1.1354166666666669e-05, "loss": 2.3801, "step": 218 }, { "epoch": 0.17, "learning_rate": 1.1406250000000001e-05, "loss": 2.4619, "step": 219 }, { "epoch": 0.17, "learning_rate": 1.1458333333333333e-05, "loss": 2.2728, "step": 220 }, { "epoch": 0.17, "learning_rate": 1.151041666666667e-05, "loss": 2.4146, "step": 221 }, { "epoch": 0.17, "learning_rate": 1.1562500000000002e-05, "loss": 2.6888, "step": 222 }, { "epoch": 0.17, "learning_rate": 1.1614583333333334e-05, "loss": 2.3561, "step": 223 }, { "epoch": 0.18, "learning_rate": 1.1666666666666668e-05, "loss": 2.4934, "step": 224 }, { "epoch": 0.18, "learning_rate": 1.171875e-05, "loss": 2.7103, "step": 225 }, { "epoch": 0.18, "learning_rate": 1.1770833333333334e-05, "loss": 2.4573, "step": 226 }, { "epoch": 0.18, "learning_rate": 1.1822916666666668e-05, "loss": 2.308, "step": 227 }, { "epoch": 0.18, "learning_rate": 1.1875e-05, "loss": 2.4738, "step": 228 }, { "epoch": 0.18, "learning_rate": 1.1927083333333333e-05, "loss": 1.8763, "step": 229 }, { "epoch": 0.18, "learning_rate": 1.1979166666666669e-05, "loss": 2.4195, "step": 230 }, { "epoch": 0.18, "learning_rate": 1.2031250000000001e-05, "loss": 2.357, "step": 231 }, { "epoch": 0.18, "learning_rate": 1.2083333333333333e-05, "loss": 2.3469, "step": 232 }, { "epoch": 0.18, "learning_rate": 1.2135416666666669e-05, "loss": 2.3364, "step": 233 }, { "epoch": 0.18, "learning_rate": 1.2187500000000001e-05, "loss": 1.9565, "step": 234 }, { "epoch": 0.18, "learning_rate": 1.2239583333333334e-05, "loss": 2.3699, "step": 235 }, { "epoch": 0.18, "learning_rate": 1.2291666666666668e-05, "loss": 2.3514, "step": 236 }, { "epoch": 0.19, "learning_rate": 1.234375e-05, "loss": 2.1052, "step": 237 }, { "epoch": 0.19, "learning_rate": 1.2395833333333334e-05, "loss": 2.3897, "step": 238 }, { "epoch": 0.19, "learning_rate": 1.2447916666666668e-05, "loss": 2.5978, "step": 239 }, { "epoch": 0.19, "learning_rate": 1.25e-05, "loss": 2.0936, "step": 240 }, { "epoch": 0.19, "learning_rate": 1.2552083333333333e-05, "loss": 2.167, "step": 241 }, { "epoch": 0.19, "learning_rate": 1.2604166666666669e-05, "loss": 2.5945, "step": 242 }, { "epoch": 0.19, "learning_rate": 1.2656250000000001e-05, "loss": 2.5607, "step": 243 }, { "epoch": 0.19, "learning_rate": 1.2708333333333333e-05, "loss": 2.3868, "step": 244 }, { "epoch": 0.19, "learning_rate": 1.2760416666666669e-05, "loss": 2.5594, "step": 245 }, { "epoch": 0.19, "learning_rate": 1.2812500000000001e-05, "loss": 2.4599, "step": 246 }, { "epoch": 0.19, "learning_rate": 1.2864583333333334e-05, "loss": 1.9767, "step": 247 }, { "epoch": 0.19, "learning_rate": 1.2916666666666668e-05, "loss": 2.4512, "step": 248 }, { "epoch": 0.19, "learning_rate": 1.2968750000000002e-05, "loss": 2.2954, "step": 249 }, { "epoch": 0.2, "learning_rate": 1.3020833333333334e-05, "loss": 1.9699, "step": 250 }, { "epoch": 0.2, "learning_rate": 1.3072916666666668e-05, "loss": 2.566, "step": 251 }, { "epoch": 0.2, "learning_rate": 1.3125e-05, "loss": 2.2245, "step": 252 }, { "epoch": 0.2, "learning_rate": 1.3177083333333333e-05, "loss": 2.2449, "step": 253 }, { "epoch": 0.2, "learning_rate": 1.3229166666666669e-05, "loss": 2.1532, "step": 254 }, { "epoch": 0.2, "learning_rate": 1.3281250000000001e-05, "loss": 2.5615, "step": 255 }, { "epoch": 0.2, "learning_rate": 1.3333333333333333e-05, "loss": 2.3508, "step": 256 }, { "epoch": 0.2, "learning_rate": 1.3385416666666669e-05, "loss": 2.1298, "step": 257 }, { "epoch": 0.2, "learning_rate": 1.3437500000000001e-05, "loss": 2.3829, "step": 258 }, { "epoch": 0.2, "learning_rate": 1.3489583333333334e-05, "loss": 2.3752, "step": 259 }, { "epoch": 0.2, "learning_rate": 1.3541666666666668e-05, "loss": 2.1924, "step": 260 }, { "epoch": 0.2, "learning_rate": 1.3593750000000002e-05, "loss": 2.0965, "step": 261 }, { "epoch": 0.2, "learning_rate": 1.3645833333333334e-05, "loss": 2.6209, "step": 262 }, { "epoch": 0.21, "learning_rate": 1.3697916666666668e-05, "loss": 2.3404, "step": 263 }, { "epoch": 0.21, "learning_rate": 1.375e-05, "loss": 2.2769, "step": 264 }, { "epoch": 0.21, "learning_rate": 1.3802083333333335e-05, "loss": 2.4245, "step": 265 }, { "epoch": 0.21, "learning_rate": 1.3854166666666669e-05, "loss": 2.5843, "step": 266 }, { "epoch": 0.21, "learning_rate": 1.3906250000000001e-05, "loss": 2.6937, "step": 267 }, { "epoch": 0.21, "learning_rate": 1.3958333333333333e-05, "loss": 2.3221, "step": 268 }, { "epoch": 0.21, "learning_rate": 1.4010416666666669e-05, "loss": 2.6337, "step": 269 }, { "epoch": 0.21, "learning_rate": 1.4062500000000001e-05, "loss": 2.3712, "step": 270 }, { "epoch": 0.21, "learning_rate": 1.4114583333333334e-05, "loss": 2.0884, "step": 271 }, { "epoch": 0.21, "learning_rate": 1.416666666666667e-05, "loss": 2.4553, "step": 272 }, { "epoch": 0.21, "learning_rate": 1.4218750000000002e-05, "loss": 2.66, "step": 273 }, { "epoch": 0.21, "learning_rate": 1.4270833333333334e-05, "loss": 2.439, "step": 274 }, { "epoch": 0.21, "learning_rate": 1.4322916666666668e-05, "loss": 2.3423, "step": 275 }, { "epoch": 0.22, "learning_rate": 1.4375e-05, "loss": 2.0647, "step": 276 }, { "epoch": 0.22, "learning_rate": 1.4427083333333334e-05, "loss": 2.3899, "step": 277 }, { "epoch": 0.22, "learning_rate": 1.4479166666666669e-05, "loss": 2.3144, "step": 278 }, { "epoch": 0.22, "learning_rate": 1.453125e-05, "loss": 2.3799, "step": 279 }, { "epoch": 0.22, "learning_rate": 1.4583333333333333e-05, "loss": 2.1866, "step": 280 }, { "epoch": 0.22, "learning_rate": 1.4635416666666669e-05, "loss": 2.4274, "step": 281 }, { "epoch": 0.22, "learning_rate": 1.4687500000000001e-05, "loss": 2.401, "step": 282 }, { "epoch": 0.22, "learning_rate": 1.4739583333333334e-05, "loss": 2.2564, "step": 283 }, { "epoch": 0.22, "learning_rate": 1.479166666666667e-05, "loss": 2.3731, "step": 284 }, { "epoch": 0.22, "learning_rate": 1.4843750000000002e-05, "loss": 2.5199, "step": 285 }, { "epoch": 0.22, "learning_rate": 1.4895833333333334e-05, "loss": 2.3629, "step": 286 }, { "epoch": 0.22, "learning_rate": 1.4947916666666668e-05, "loss": 2.3843, "step": 287 }, { "epoch": 0.23, "learning_rate": 1.5000000000000002e-05, "loss": 2.1433, "step": 288 }, { "epoch": 0.23, "learning_rate": 1.5052083333333334e-05, "loss": 2.2603, "step": 289 }, { "epoch": 0.23, "learning_rate": 1.5104166666666668e-05, "loss": 2.0834, "step": 290 }, { "epoch": 0.23, "learning_rate": 1.515625e-05, "loss": 2.5686, "step": 291 }, { "epoch": 0.23, "learning_rate": 1.5208333333333333e-05, "loss": 2.4388, "step": 292 }, { "epoch": 0.23, "learning_rate": 1.5260416666666667e-05, "loss": 2.3681, "step": 293 }, { "epoch": 0.23, "learning_rate": 1.5312500000000003e-05, "loss": 2.5342, "step": 294 }, { "epoch": 0.23, "learning_rate": 1.5364583333333335e-05, "loss": 2.4578, "step": 295 }, { "epoch": 0.23, "learning_rate": 1.5416666666666668e-05, "loss": 1.9724, "step": 296 }, { "epoch": 0.23, "learning_rate": 1.546875e-05, "loss": 2.4773, "step": 297 }, { "epoch": 0.23, "learning_rate": 1.5520833333333332e-05, "loss": 2.0724, "step": 298 }, { "epoch": 0.23, "learning_rate": 1.5572916666666668e-05, "loss": 2.2087, "step": 299 }, { "epoch": 0.23, "learning_rate": 1.5625e-05, "loss": 2.4552, "step": 300 }, { "epoch": 0.24, "learning_rate": 1.5677083333333333e-05, "loss": 2.2699, "step": 301 }, { "epoch": 0.24, "learning_rate": 1.572916666666667e-05, "loss": 1.809, "step": 302 }, { "epoch": 0.24, "learning_rate": 1.578125e-05, "loss": 1.97, "step": 303 }, { "epoch": 0.24, "learning_rate": 1.5833333333333333e-05, "loss": 2.6242, "step": 304 }, { "epoch": 0.24, "learning_rate": 1.588541666666667e-05, "loss": 2.4933, "step": 305 }, { "epoch": 0.24, "learning_rate": 1.59375e-05, "loss": 2.14, "step": 306 }, { "epoch": 0.24, "learning_rate": 1.5989583333333333e-05, "loss": 2.3977, "step": 307 }, { "epoch": 0.24, "learning_rate": 1.604166666666667e-05, "loss": 2.6526, "step": 308 }, { "epoch": 0.24, "learning_rate": 1.609375e-05, "loss": 2.1979, "step": 309 }, { "epoch": 0.24, "learning_rate": 1.6145833333333334e-05, "loss": 2.0825, "step": 310 }, { "epoch": 0.24, "learning_rate": 1.619791666666667e-05, "loss": 2.3477, "step": 311 }, { "epoch": 0.24, "learning_rate": 1.6250000000000002e-05, "loss": 2.4764, "step": 312 }, { "epoch": 0.24, "learning_rate": 1.6302083333333334e-05, "loss": 2.2278, "step": 313 }, { "epoch": 0.25, "learning_rate": 1.635416666666667e-05, "loss": 2.2502, "step": 314 }, { "epoch": 0.25, "learning_rate": 1.6406250000000002e-05, "loss": 2.3826, "step": 315 }, { "epoch": 0.25, "learning_rate": 1.6458333333333335e-05, "loss": 2.3363, "step": 316 }, { "epoch": 0.25, "learning_rate": 1.651041666666667e-05, "loss": 2.3755, "step": 317 }, { "epoch": 0.25, "learning_rate": 1.6562500000000003e-05, "loss": 2.1108, "step": 318 }, { "epoch": 0.25, "learning_rate": 1.6614583333333335e-05, "loss": 2.4599, "step": 319 }, { "epoch": 0.25, "learning_rate": 1.6666666666666667e-05, "loss": 2.5756, "step": 320 }, { "epoch": 0.25, "learning_rate": 1.671875e-05, "loss": 2.3992, "step": 321 }, { "epoch": 0.25, "learning_rate": 1.6770833333333336e-05, "loss": 2.0538, "step": 322 }, { "epoch": 0.25, "learning_rate": 1.6822916666666668e-05, "loss": 2.131, "step": 323 }, { "epoch": 0.25, "learning_rate": 1.6875e-05, "loss": 2.3612, "step": 324 }, { "epoch": 0.25, "learning_rate": 1.6927083333333333e-05, "loss": 2.471, "step": 325 }, { "epoch": 0.25, "learning_rate": 1.6979166666666668e-05, "loss": 2.2179, "step": 326 }, { "epoch": 0.26, "learning_rate": 1.703125e-05, "loss": 2.2624, "step": 327 }, { "epoch": 0.26, "learning_rate": 1.7083333333333333e-05, "loss": 2.4052, "step": 328 }, { "epoch": 0.26, "learning_rate": 1.713541666666667e-05, "loss": 2.5379, "step": 329 }, { "epoch": 0.26, "learning_rate": 1.71875e-05, "loss": 2.2707, "step": 330 }, { "epoch": 0.26, "learning_rate": 1.7239583333333333e-05, "loss": 2.2658, "step": 331 }, { "epoch": 0.26, "learning_rate": 1.729166666666667e-05, "loss": 2.4921, "step": 332 }, { "epoch": 0.26, "learning_rate": 1.734375e-05, "loss": 2.0215, "step": 333 }, { "epoch": 0.26, "learning_rate": 1.7395833333333334e-05, "loss": 2.3348, "step": 334 }, { "epoch": 0.26, "learning_rate": 1.744791666666667e-05, "loss": 2.2997, "step": 335 }, { "epoch": 0.26, "learning_rate": 1.7500000000000002e-05, "loss": 2.2208, "step": 336 }, { "epoch": 0.26, "learning_rate": 1.7552083333333334e-05, "loss": 2.1194, "step": 337 }, { "epoch": 0.26, "learning_rate": 1.760416666666667e-05, "loss": 2.2412, "step": 338 }, { "epoch": 0.26, "learning_rate": 1.7656250000000002e-05, "loss": 2.1475, "step": 339 }, { "epoch": 0.27, "learning_rate": 1.7708333333333335e-05, "loss": 2.4105, "step": 340 }, { "epoch": 0.27, "learning_rate": 1.776041666666667e-05, "loss": 2.0935, "step": 341 }, { "epoch": 0.27, "learning_rate": 1.7812500000000003e-05, "loss": 2.3793, "step": 342 }, { "epoch": 0.27, "learning_rate": 1.7864583333333335e-05, "loss": 2.2909, "step": 343 }, { "epoch": 0.27, "learning_rate": 1.7916666666666667e-05, "loss": 2.42, "step": 344 }, { "epoch": 0.27, "learning_rate": 1.7968750000000003e-05, "loss": 2.3437, "step": 345 }, { "epoch": 0.27, "learning_rate": 1.8020833333333335e-05, "loss": 2.4626, "step": 346 }, { "epoch": 0.27, "learning_rate": 1.8072916666666668e-05, "loss": 2.2944, "step": 347 }, { "epoch": 0.27, "learning_rate": 1.8125e-05, "loss": 2.0547, "step": 348 }, { "epoch": 0.27, "learning_rate": 1.8177083333333332e-05, "loss": 2.1701, "step": 349 }, { "epoch": 0.27, "learning_rate": 1.8229166666666668e-05, "loss": 2.6822, "step": 350 }, { "epoch": 0.27, "learning_rate": 1.828125e-05, "loss": 2.2993, "step": 351 }, { "epoch": 0.28, "learning_rate": 1.8333333333333333e-05, "loss": 2.3871, "step": 352 }, { "epoch": 0.28, "learning_rate": 1.838541666666667e-05, "loss": 2.0841, "step": 353 }, { "epoch": 0.28, "learning_rate": 1.84375e-05, "loss": 2.2944, "step": 354 }, { "epoch": 0.28, "learning_rate": 1.8489583333333333e-05, "loss": 2.5925, "step": 355 }, { "epoch": 0.28, "learning_rate": 1.854166666666667e-05, "loss": 2.4058, "step": 356 }, { "epoch": 0.28, "learning_rate": 1.859375e-05, "loss": 2.533, "step": 357 }, { "epoch": 0.28, "learning_rate": 1.8645833333333334e-05, "loss": 2.2981, "step": 358 }, { "epoch": 0.28, "learning_rate": 1.869791666666667e-05, "loss": 2.1106, "step": 359 }, { "epoch": 0.28, "learning_rate": 1.8750000000000002e-05, "loss": 2.5157, "step": 360 }, { "epoch": 0.28, "learning_rate": 1.8802083333333334e-05, "loss": 2.3536, "step": 361 }, { "epoch": 0.28, "learning_rate": 1.885416666666667e-05, "loss": 2.1043, "step": 362 }, { "epoch": 0.28, "learning_rate": 1.8906250000000002e-05, "loss": 2.4986, "step": 363 }, { "epoch": 0.28, "learning_rate": 1.8958333333333334e-05, "loss": 1.9831, "step": 364 }, { "epoch": 0.29, "learning_rate": 1.901041666666667e-05, "loss": 2.5, "step": 365 }, { "epoch": 0.29, "learning_rate": 1.9062500000000003e-05, "loss": 2.47, "step": 366 }, { "epoch": 0.29, "learning_rate": 1.9114583333333335e-05, "loss": 2.0442, "step": 367 }, { "epoch": 0.29, "learning_rate": 1.916666666666667e-05, "loss": 2.6178, "step": 368 }, { "epoch": 0.29, "learning_rate": 1.9218750000000003e-05, "loss": 2.2842, "step": 369 }, { "epoch": 0.29, "learning_rate": 1.9270833333333335e-05, "loss": 2.1735, "step": 370 }, { "epoch": 0.29, "learning_rate": 1.9322916666666668e-05, "loss": 2.4521, "step": 371 }, { "epoch": 0.29, "learning_rate": 1.9375e-05, "loss": 2.4288, "step": 372 }, { "epoch": 0.29, "learning_rate": 1.9427083333333336e-05, "loss": 2.249, "step": 373 }, { "epoch": 0.29, "learning_rate": 1.9479166666666668e-05, "loss": 2.3725, "step": 374 }, { "epoch": 0.29, "learning_rate": 1.953125e-05, "loss": 2.4418, "step": 375 }, { "epoch": 0.29, "learning_rate": 1.9583333333333333e-05, "loss": 2.5865, "step": 376 }, { "epoch": 0.29, "learning_rate": 1.963541666666667e-05, "loss": 2.0262, "step": 377 }, { "epoch": 0.3, "learning_rate": 1.96875e-05, "loss": 2.3921, "step": 378 }, { "epoch": 0.3, "learning_rate": 1.9739583333333333e-05, "loss": 2.1366, "step": 379 }, { "epoch": 0.3, "learning_rate": 1.979166666666667e-05, "loss": 2.4038, "step": 380 }, { "epoch": 0.3, "learning_rate": 1.984375e-05, "loss": 2.2986, "step": 381 }, { "epoch": 0.3, "learning_rate": 1.9895833333333334e-05, "loss": 2.7233, "step": 382 }, { "epoch": 0.3, "learning_rate": 1.994791666666667e-05, "loss": 2.1788, "step": 383 }, { "epoch": 0.3, "learning_rate": 2e-05, "loss": 2.4779, "step": 384 }, { "epoch": 0.3, "learning_rate": 1.9999999679368497e-05, "loss": 2.1991, "step": 385 }, { "epoch": 0.3, "learning_rate": 1.9999998717474e-05, "loss": 2.4695, "step": 386 }, { "epoch": 0.3, "learning_rate": 1.999999711431658e-05, "loss": 2.2836, "step": 387 }, { "epoch": 0.3, "learning_rate": 1.999999486989633e-05, "loss": 2.1097, "step": 388 }, { "epoch": 0.3, "learning_rate": 1.9999991984213403e-05, "loss": 2.3797, "step": 389 }, { "epoch": 0.3, "learning_rate": 1.9999988457267974e-05, "loss": 1.9845, "step": 390 }, { "epoch": 0.31, "learning_rate": 1.9999984289060277e-05, "loss": 2.2455, "step": 391 }, { "epoch": 0.31, "learning_rate": 1.999997947959058e-05, "loss": 2.0307, "step": 392 }, { "epoch": 0.31, "learning_rate": 1.999997402885919e-05, "loss": 2.2315, "step": 393 }, { "epoch": 0.31, "learning_rate": 1.9999967936866447e-05, "loss": 2.6387, "step": 394 }, { "epoch": 0.31, "learning_rate": 1.9999961203612756e-05, "loss": 2.3712, "step": 395 }, { "epoch": 0.31, "learning_rate": 1.9999953829098546e-05, "loss": 2.5086, "step": 396 }, { "epoch": 0.31, "learning_rate": 1.999994581332428e-05, "loss": 2.0677, "step": 397 }, { "epoch": 0.31, "learning_rate": 1.999993715629048e-05, "loss": 2.2307, "step": 398 }, { "epoch": 0.31, "learning_rate": 1.99999278579977e-05, "loss": 2.4192, "step": 399 }, { "epoch": 0.31, "learning_rate": 1.9999917918446537e-05, "loss": 2.3636, "step": 400 }, { "epoch": 0.31, "learning_rate": 1.9999907337637626e-05, "loss": 2.6402, "step": 401 }, { "epoch": 0.31, "learning_rate": 1.9999896115571646e-05, "loss": 2.6225, "step": 402 }, { "epoch": 0.31, "learning_rate": 1.999988425224932e-05, "loss": 2.1134, "step": 403 }, { "epoch": 0.32, "learning_rate": 1.9999871747671404e-05, "loss": 2.0401, "step": 404 }, { "epoch": 0.32, "learning_rate": 1.99998586018387e-05, "loss": 2.2704, "step": 405 }, { "epoch": 0.32, "learning_rate": 1.999984481475206e-05, "loss": 2.5254, "step": 406 }, { "epoch": 0.32, "learning_rate": 1.9999830386412355e-05, "loss": 2.3625, "step": 407 }, { "epoch": 0.32, "learning_rate": 1.999981531682052e-05, "loss": 2.0392, "step": 408 }, { "epoch": 0.32, "learning_rate": 1.9999799605977513e-05, "loss": 2.2239, "step": 409 }, { "epoch": 0.32, "learning_rate": 1.999978325388435e-05, "loss": 2.3386, "step": 410 }, { "epoch": 0.32, "learning_rate": 1.9999766260542074e-05, "loss": 2.484, "step": 411 }, { "epoch": 0.32, "learning_rate": 1.999974862595178e-05, "loss": 2.3704, "step": 412 }, { "epoch": 0.32, "learning_rate": 1.9999730350114594e-05, "loss": 2.3156, "step": 413 }, { "epoch": 0.32, "learning_rate": 1.999971143303169e-05, "loss": 2.0583, "step": 414 }, { "epoch": 0.32, "learning_rate": 1.999969187470428e-05, "loss": 2.3624, "step": 415 }, { "epoch": 0.33, "learning_rate": 1.9999671675133613e-05, "loss": 2.512, "step": 416 }, { "epoch": 0.33, "learning_rate": 1.9999650834320996e-05, "loss": 2.356, "step": 417 }, { "epoch": 0.33, "learning_rate": 1.999962935226776e-05, "loss": 2.1592, "step": 418 }, { "epoch": 0.33, "learning_rate": 1.999960722897528e-05, "loss": 2.282, "step": 419 }, { "epoch": 0.33, "learning_rate": 1.9999584464444974e-05, "loss": 2.289, "step": 420 }, { "epoch": 0.33, "learning_rate": 1.9999561058678306e-05, "loss": 2.311, "step": 421 }, { "epoch": 0.33, "learning_rate": 1.9999537011676778e-05, "loss": 2.0113, "step": 422 }, { "epoch": 0.33, "learning_rate": 1.999951232344193e-05, "loss": 2.2306, "step": 423 }, { "epoch": 0.33, "learning_rate": 1.999948699397534e-05, "loss": 2.4403, "step": 424 }, { "epoch": 0.33, "learning_rate": 1.999946102327864e-05, "loss": 1.9165, "step": 425 }, { "epoch": 0.33, "learning_rate": 1.9999434411353494e-05, "loss": 2.299, "step": 426 }, { "epoch": 0.33, "learning_rate": 1.9999407158201603e-05, "loss": 2.3037, "step": 427 }, { "epoch": 0.33, "learning_rate": 1.999937926382472e-05, "loss": 2.2133, "step": 428 }, { "epoch": 0.34, "learning_rate": 1.9999350728224633e-05, "loss": 2.2913, "step": 429 }, { "epoch": 0.34, "learning_rate": 1.999932155140317e-05, "loss": 2.1417, "step": 430 }, { "epoch": 0.34, "learning_rate": 1.9999291733362205e-05, "loss": 2.5714, "step": 431 }, { "epoch": 0.34, "learning_rate": 1.9999261274103646e-05, "loss": 2.1199, "step": 432 }, { "epoch": 0.34, "learning_rate": 1.999923017362945e-05, "loss": 2.301, "step": 433 }, { "epoch": 0.34, "learning_rate": 1.9999198431941607e-05, "loss": 2.314, "step": 434 }, { "epoch": 0.34, "learning_rate": 1.9999166049042158e-05, "loss": 2.465, "step": 435 }, { "epoch": 0.34, "learning_rate": 1.9999133024933174e-05, "loss": 2.3272, "step": 436 }, { "epoch": 0.34, "learning_rate": 1.999909935961678e-05, "loss": 2.1315, "step": 437 }, { "epoch": 0.34, "learning_rate": 1.999906505309513e-05, "loss": 2.3707, "step": 438 }, { "epoch": 0.34, "learning_rate": 1.9999030105370422e-05, "loss": 2.3191, "step": 439 }, { "epoch": 0.34, "learning_rate": 1.9998994516444902e-05, "loss": 2.3913, "step": 440 }, { "epoch": 0.34, "learning_rate": 1.9998958286320844e-05, "loss": 2.3293, "step": 441 }, { "epoch": 0.35, "learning_rate": 1.999892141500058e-05, "loss": 2.4155, "step": 442 }, { "epoch": 0.35, "learning_rate": 1.9998883902486476e-05, "loss": 1.9014, "step": 443 }, { "epoch": 0.35, "learning_rate": 1.9998845748780926e-05, "loss": 2.321, "step": 444 }, { "epoch": 0.35, "learning_rate": 1.999880695388639e-05, "loss": 2.1142, "step": 445 }, { "epoch": 0.35, "learning_rate": 1.9998767517805344e-05, "loss": 2.2421, "step": 446 }, { "epoch": 0.35, "learning_rate": 1.9998727440540324e-05, "loss": 2.0887, "step": 447 }, { "epoch": 0.35, "learning_rate": 1.9998686722093897e-05, "loss": 2.2832, "step": 448 }, { "epoch": 0.35, "learning_rate": 1.999864536246868e-05, "loss": 2.2686, "step": 449 }, { "epoch": 0.35, "learning_rate": 1.9998603361667314e-05, "loss": 2.5963, "step": 450 }, { "epoch": 0.35, "learning_rate": 1.9998560719692507e-05, "loss": 2.3741, "step": 451 }, { "epoch": 0.35, "learning_rate": 1.999851743654698e-05, "loss": 2.4301, "step": 452 }, { "epoch": 0.35, "learning_rate": 1.9998473512233513e-05, "loss": 2.2453, "step": 453 }, { "epoch": 0.35, "learning_rate": 1.9998428946754927e-05, "loss": 2.3581, "step": 454 }, { "epoch": 0.36, "learning_rate": 1.9998383740114074e-05, "loss": 2.1218, "step": 455 }, { "epoch": 0.36, "learning_rate": 1.9998337892313854e-05, "loss": 2.2259, "step": 456 }, { "epoch": 0.36, "learning_rate": 1.9998291403357215e-05, "loss": 2.1788, "step": 457 }, { "epoch": 0.36, "learning_rate": 1.9998244273247125e-05, "loss": 2.4433, "step": 458 }, { "epoch": 0.36, "learning_rate": 1.999819650198662e-05, "loss": 2.278, "step": 459 }, { "epoch": 0.36, "learning_rate": 1.9998148089578747e-05, "loss": 2.2172, "step": 460 }, { "epoch": 0.36, "learning_rate": 1.9998099036026627e-05, "loss": 2.3617, "step": 461 }, { "epoch": 0.36, "learning_rate": 1.9998049341333398e-05, "loss": 2.4089, "step": 462 }, { "epoch": 0.36, "learning_rate": 1.9997999005502246e-05, "loss": 2.5551, "step": 463 }, { "epoch": 0.36, "learning_rate": 1.99979480285364e-05, "loss": 2.3987, "step": 464 }, { "epoch": 0.36, "learning_rate": 1.9997896410439125e-05, "loss": 2.288, "step": 465 }, { "epoch": 0.36, "learning_rate": 1.9997844151213742e-05, "loss": 2.7645, "step": 466 }, { "epoch": 0.36, "learning_rate": 1.9997791250863592e-05, "loss": 2.4103, "step": 467 }, { "epoch": 0.37, "learning_rate": 1.999773770939207e-05, "loss": 2.6735, "step": 468 }, { "epoch": 0.37, "learning_rate": 1.999768352680261e-05, "loss": 2.0989, "step": 469 }, { "epoch": 0.37, "learning_rate": 1.9997628703098688e-05, "loss": 2.4348, "step": 470 }, { "epoch": 0.37, "learning_rate": 1.9997573238283818e-05, "loss": 2.5572, "step": 471 }, { "epoch": 0.37, "learning_rate": 1.999751713236156e-05, "loss": 2.405, "step": 472 }, { "epoch": 0.37, "learning_rate": 1.9997460385335505e-05, "loss": 2.2248, "step": 473 }, { "epoch": 0.37, "learning_rate": 1.9997402997209295e-05, "loss": 2.2505, "step": 474 }, { "epoch": 0.37, "learning_rate": 1.9997344967986612e-05, "loss": 2.0819, "step": 475 }, { "epoch": 0.37, "learning_rate": 1.9997286297671177e-05, "loss": 2.246, "step": 476 }, { "epoch": 0.37, "learning_rate": 1.9997226986266753e-05, "loss": 2.3622, "step": 477 }, { "epoch": 0.37, "learning_rate": 1.9997167033777138e-05, "loss": 2.4131, "step": 478 }, { "epoch": 0.37, "learning_rate": 1.9997106440206182e-05, "loss": 2.2199, "step": 479 }, { "epoch": 0.38, "learning_rate": 1.9997045205557767e-05, "loss": 2.183, "step": 480 }, { "epoch": 0.38, "learning_rate": 1.9996983329835828e-05, "loss": 2.3495, "step": 481 }, { "epoch": 0.38, "learning_rate": 1.9996920813044317e-05, "loss": 2.3732, "step": 482 }, { "epoch": 0.38, "learning_rate": 1.999685765518726e-05, "loss": 2.2171, "step": 483 }, { "epoch": 0.38, "learning_rate": 1.99967938562687e-05, "loss": 2.2809, "step": 484 }, { "epoch": 0.38, "learning_rate": 1.9996729416292723e-05, "loss": 2.2222, "step": 485 }, { "epoch": 0.38, "learning_rate": 1.999666433526347e-05, "loss": 2.1866, "step": 486 }, { "epoch": 0.38, "learning_rate": 1.9996598613185106e-05, "loss": 2.3512, "step": 487 }, { "epoch": 0.38, "learning_rate": 1.9996532250061854e-05, "loss": 2.3183, "step": 488 }, { "epoch": 0.38, "learning_rate": 1.9996465245897966e-05, "loss": 2.4176, "step": 489 }, { "epoch": 0.38, "learning_rate": 1.9996397600697737e-05, "loss": 2.262, "step": 490 }, { "epoch": 0.38, "learning_rate": 1.9996329314465508e-05, "loss": 2.1797, "step": 491 }, { "epoch": 0.38, "learning_rate": 1.9996260387205653e-05, "loss": 1.7512, "step": 492 }, { "epoch": 0.39, "learning_rate": 1.99961908189226e-05, "loss": 2.1756, "step": 493 }, { "epoch": 0.39, "learning_rate": 1.99961206096208e-05, "loss": 2.2085, "step": 494 }, { "epoch": 0.39, "learning_rate": 1.9996049759304763e-05, "loss": 2.1381, "step": 495 }, { "epoch": 0.39, "learning_rate": 1.999597826797903e-05, "loss": 2.396, "step": 496 }, { "epoch": 0.39, "learning_rate": 1.9995906135648185e-05, "loss": 2.3679, "step": 497 }, { "epoch": 0.39, "learning_rate": 1.9995833362316855e-05, "loss": 2.4717, "step": 498 }, { "epoch": 0.39, "learning_rate": 1.9995759947989707e-05, "loss": 2.57, "step": 499 }, { "epoch": 0.39, "learning_rate": 1.9995685892671446e-05, "loss": 1.9048, "step": 500 }, { "epoch": 0.39, "learning_rate": 1.999561119636682e-05, "loss": 2.4586, "step": 501 }, { "epoch": 0.39, "learning_rate": 1.9995535859080623e-05, "loss": 2.6198, "step": 502 }, { "epoch": 0.39, "learning_rate": 1.9995459880817686e-05, "loss": 2.1813, "step": 503 }, { "epoch": 0.39, "learning_rate": 1.9995383261582878e-05, "loss": 2.3963, "step": 504 }, { "epoch": 0.39, "learning_rate": 1.999530600138112e-05, "loss": 2.6341, "step": 505 }, { "epoch": 0.4, "learning_rate": 1.9995228100217355e-05, "loss": 2.2205, "step": 506 }, { "epoch": 0.4, "learning_rate": 1.9995149558096584e-05, "loss": 2.4529, "step": 507 }, { "epoch": 0.4, "learning_rate": 1.9995070375023843e-05, "loss": 2.4466, "step": 508 }, { "epoch": 0.4, "learning_rate": 1.9994990551004212e-05, "loss": 2.618, "step": 509 }, { "epoch": 0.4, "learning_rate": 1.9994910086042807e-05, "loss": 2.2783, "step": 510 }, { "epoch": 0.4, "learning_rate": 1.9994828980144792e-05, "loss": 2.1956, "step": 511 }, { "epoch": 0.4, "learning_rate": 1.9994747233315363e-05, "loss": 2.4172, "step": 512 }, { "epoch": 0.4, "learning_rate": 1.9994664845559762e-05, "loss": 2.2935, "step": 513 }, { "epoch": 0.4, "learning_rate": 1.999458181688328e-05, "loss": 2.5627, "step": 514 }, { "epoch": 0.4, "learning_rate": 1.999449814729123e-05, "loss": 2.4974, "step": 515 }, { "epoch": 0.4, "learning_rate": 1.9994413836788987e-05, "loss": 2.5319, "step": 516 }, { "epoch": 0.4, "learning_rate": 1.999432888538195e-05, "loss": 2.3052, "step": 517 }, { "epoch": 0.4, "learning_rate": 1.999424329307557e-05, "loss": 2.5444, "step": 518 }, { "epoch": 0.41, "learning_rate": 1.9994157059875344e-05, "loss": 2.1189, "step": 519 }, { "epoch": 0.41, "learning_rate": 1.9994070185786787e-05, "loss": 2.1635, "step": 520 }, { "epoch": 0.41, "learning_rate": 1.999398267081548e-05, "loss": 2.0982, "step": 521 }, { "epoch": 0.41, "learning_rate": 1.9993894514967028e-05, "loss": 2.172, "step": 522 }, { "epoch": 0.41, "learning_rate": 1.999380571824709e-05, "loss": 2.3539, "step": 523 }, { "epoch": 0.41, "learning_rate": 1.9993716280661357e-05, "loss": 2.3373, "step": 524 }, { "epoch": 0.41, "learning_rate": 1.9993626202215566e-05, "loss": 2.2618, "step": 525 }, { "epoch": 0.41, "learning_rate": 1.9993535482915493e-05, "loss": 2.1669, "step": 526 }, { "epoch": 0.41, "learning_rate": 1.9993444122766957e-05, "loss": 2.2305, "step": 527 }, { "epoch": 0.41, "learning_rate": 1.9993352121775812e-05, "loss": 2.274, "step": 528 }, { "epoch": 0.41, "learning_rate": 1.9993259479947965e-05, "loss": 2.1695, "step": 529 }, { "epoch": 0.41, "learning_rate": 1.9993166197289346e-05, "loss": 2.2096, "step": 530 }, { "epoch": 0.42, "learning_rate": 1.9993072273805948e-05, "loss": 2.1645, "step": 531 }, { "epoch": 0.42, "learning_rate": 1.9992977709503786e-05, "loss": 1.9009, "step": 532 }, { "epoch": 0.42, "learning_rate": 1.999288250438893e-05, "loss": 2.3046, "step": 533 }, { "epoch": 0.42, "learning_rate": 1.999278665846748e-05, "loss": 2.354, "step": 534 }, { "epoch": 0.42, "learning_rate": 1.9992690171745588e-05, "loss": 2.1494, "step": 535 }, { "epoch": 0.42, "learning_rate": 1.9992593044229435e-05, "loss": 2.2196, "step": 536 }, { "epoch": 0.42, "learning_rate": 1.9992495275925253e-05, "loss": 2.4258, "step": 537 }, { "epoch": 0.42, "learning_rate": 1.999239686683931e-05, "loss": 2.2447, "step": 538 }, { "epoch": 0.42, "learning_rate": 1.9992297816977918e-05, "loss": 2.4059, "step": 539 }, { "epoch": 0.42, "learning_rate": 1.999219812634743e-05, "loss": 2.4016, "step": 540 }, { "epoch": 0.42, "learning_rate": 1.9992097794954234e-05, "loss": 2.4129, "step": 541 }, { "epoch": 0.42, "learning_rate": 1.9991996822804773e-05, "loss": 2.4378, "step": 542 }, { "epoch": 0.42, "learning_rate": 1.999189520990551e-05, "loss": 2.4231, "step": 543 }, { "epoch": 0.43, "learning_rate": 1.9991792956262967e-05, "loss": 2.3047, "step": 544 }, { "epoch": 0.43, "learning_rate": 1.99916900618837e-05, "loss": 2.3792, "step": 545 }, { "epoch": 0.43, "learning_rate": 1.9991586526774315e-05, "loss": 2.31, "step": 546 }, { "epoch": 0.43, "learning_rate": 1.9991482350941438e-05, "loss": 2.293, "step": 547 }, { "epoch": 0.43, "learning_rate": 1.999137753439176e-05, "loss": 2.2521, "step": 548 }, { "epoch": 0.43, "learning_rate": 1.9991272077131995e-05, "loss": 2.5713, "step": 549 }, { "epoch": 0.43, "learning_rate": 1.999116597916891e-05, "loss": 2.4993, "step": 550 }, { "epoch": 0.43, "learning_rate": 1.999105924050931e-05, "loss": 2.0947, "step": 551 }, { "epoch": 0.43, "learning_rate": 1.999095186116004e-05, "loss": 2.6922, "step": 552 }, { "epoch": 0.43, "learning_rate": 1.9990843841127975e-05, "loss": 2.1788, "step": 553 }, { "epoch": 0.43, "learning_rate": 1.9990735180420058e-05, "loss": 1.9466, "step": 554 }, { "epoch": 0.43, "learning_rate": 1.9990625879043247e-05, "loss": 2.304, "step": 555 }, { "epoch": 0.43, "learning_rate": 1.9990515937004552e-05, "loss": 2.1267, "step": 556 }, { "epoch": 0.44, "learning_rate": 1.9990405354311027e-05, "loss": 2.3507, "step": 557 }, { "epoch": 0.44, "learning_rate": 1.9990294130969757e-05, "loss": 2.0987, "step": 558 }, { "epoch": 0.44, "learning_rate": 1.999018226698788e-05, "loss": 2.1446, "step": 559 }, { "epoch": 0.44, "learning_rate": 1.999006976237257e-05, "loss": 2.3555, "step": 560 }, { "epoch": 0.44, "learning_rate": 1.9989956617131037e-05, "loss": 2.0377, "step": 561 }, { "epoch": 0.44, "learning_rate": 1.998984283127054e-05, "loss": 2.2258, "step": 562 }, { "epoch": 0.44, "learning_rate": 1.9989728404798373e-05, "loss": 2.25, "step": 563 }, { "epoch": 0.44, "learning_rate": 1.9989613337721873e-05, "loss": 2.5476, "step": 564 }, { "epoch": 0.44, "learning_rate": 1.9989497630048423e-05, "loss": 2.1508, "step": 565 }, { "epoch": 0.44, "learning_rate": 1.9989381281785442e-05, "loss": 2.3378, "step": 566 }, { "epoch": 0.44, "learning_rate": 1.9989264292940392e-05, "loss": 1.9111, "step": 567 }, { "epoch": 0.44, "learning_rate": 1.998914666352077e-05, "loss": 2.2417, "step": 568 }, { "epoch": 0.44, "learning_rate": 1.998902839353412e-05, "loss": 2.0839, "step": 569 }, { "epoch": 0.45, "learning_rate": 1.998890948298803e-05, "loss": 2.2123, "step": 570 }, { "epoch": 0.45, "learning_rate": 1.9988789931890125e-05, "loss": 1.8544, "step": 571 }, { "epoch": 0.45, "learning_rate": 1.998866974024807e-05, "loss": 2.2254, "step": 572 }, { "epoch": 0.45, "learning_rate": 1.998854890806957e-05, "loss": 2.2803, "step": 573 }, { "epoch": 0.45, "learning_rate": 1.9988427435362376e-05, "loss": 2.2137, "step": 574 }, { "epoch": 0.45, "learning_rate": 1.9988305322134277e-05, "loss": 2.2837, "step": 575 }, { "epoch": 0.45, "learning_rate": 1.9988182568393108e-05, "loss": 2.341, "step": 576 }, { "epoch": 0.45, "learning_rate": 1.9988059174146732e-05, "loss": 2.4734, "step": 577 }, { "epoch": 0.45, "learning_rate": 1.998793513940307e-05, "loss": 2.2847, "step": 578 }, { "epoch": 0.45, "learning_rate": 1.998781046417007e-05, "loss": 2.2098, "step": 579 }, { "epoch": 0.45, "learning_rate": 1.998768514845573e-05, "loss": 2.191, "step": 580 }, { "epoch": 0.45, "learning_rate": 1.9987559192268093e-05, "loss": 2.5626, "step": 581 }, { "epoch": 0.45, "learning_rate": 1.998743259561522e-05, "loss": 2.3024, "step": 582 }, { "epoch": 0.46, "learning_rate": 1.998730535850524e-05, "loss": 2.3169, "step": 583 }, { "epoch": 0.46, "learning_rate": 1.9987177480946308e-05, "loss": 2.0788, "step": 584 }, { "epoch": 0.46, "learning_rate": 1.9987048962946633e-05, "loss": 2.289, "step": 585 }, { "epoch": 0.46, "learning_rate": 1.9986919804514443e-05, "loss": 2.1532, "step": 586 }, { "epoch": 0.46, "learning_rate": 1.9986790005658033e-05, "loss": 2.1349, "step": 587 }, { "epoch": 0.46, "learning_rate": 1.9986659566385715e-05, "loss": 2.2534, "step": 588 }, { "epoch": 0.46, "learning_rate": 1.9986528486705862e-05, "loss": 2.185, "step": 589 }, { "epoch": 0.46, "learning_rate": 1.9986396766626876e-05, "loss": 2.0862, "step": 590 }, { "epoch": 0.46, "learning_rate": 1.9986264406157208e-05, "loss": 2.2153, "step": 591 }, { "epoch": 0.46, "learning_rate": 1.998613140530534e-05, "loss": 2.5565, "step": 592 }, { "epoch": 0.46, "learning_rate": 1.9985997764079802e-05, "loss": 2.4275, "step": 593 }, { "epoch": 0.46, "learning_rate": 1.998586348248917e-05, "loss": 2.1836, "step": 594 }, { "epoch": 0.47, "learning_rate": 1.9985728560542048e-05, "loss": 2.1768, "step": 595 }, { "epoch": 0.47, "learning_rate": 1.998559299824709e-05, "loss": 2.5067, "step": 596 }, { "epoch": 0.47, "learning_rate": 1.998545679561299e-05, "loss": 2.4359, "step": 597 }, { "epoch": 0.47, "learning_rate": 1.9985319952648478e-05, "loss": 2.4914, "step": 598 }, { "epoch": 0.47, "learning_rate": 1.9985182469362336e-05, "loss": 2.1807, "step": 599 }, { "epoch": 0.47, "learning_rate": 1.998504434576338e-05, "loss": 2.2084, "step": 600 }, { "epoch": 0.47, "learning_rate": 1.998490558186046e-05, "loss": 2.3576, "step": 601 }, { "epoch": 0.47, "learning_rate": 1.9984766177662476e-05, "loss": 2.1817, "step": 602 }, { "epoch": 0.47, "learning_rate": 1.9984626133178377e-05, "loss": 2.2652, "step": 603 }, { "epoch": 0.47, "learning_rate": 1.9984485448417135e-05, "loss": 2.171, "step": 604 }, { "epoch": 0.47, "learning_rate": 1.9984344123387772e-05, "loss": 2.1527, "step": 605 }, { "epoch": 0.47, "learning_rate": 1.9984202158099357e-05, "loss": 2.4746, "step": 606 }, { "epoch": 0.47, "learning_rate": 1.9984059552560983e-05, "loss": 2.2313, "step": 607 }, { "epoch": 0.48, "learning_rate": 1.9983916306781802e-05, "loss": 2.1011, "step": 608 }, { "epoch": 0.48, "learning_rate": 1.9983772420771e-05, "loss": 2.399, "step": 609 }, { "epoch": 0.48, "learning_rate": 1.99836278945378e-05, "loss": 2.4247, "step": 610 }, { "epoch": 0.48, "learning_rate": 1.9983482728091473e-05, "loss": 2.3102, "step": 611 }, { "epoch": 0.48, "learning_rate": 1.998333692144133e-05, "loss": 2.175, "step": 612 }, { "epoch": 0.48, "learning_rate": 1.998319047459672e-05, "loss": 1.9555, "step": 613 }, { "epoch": 0.48, "learning_rate": 1.9983043387567026e-05, "loss": 1.9695, "step": 614 }, { "epoch": 0.48, "learning_rate": 1.998289566036169e-05, "loss": 1.9142, "step": 615 }, { "epoch": 0.48, "learning_rate": 1.998274729299018e-05, "loss": 2.649, "step": 616 }, { "epoch": 0.48, "learning_rate": 1.9982598285462014e-05, "loss": 2.1869, "step": 617 }, { "epoch": 0.48, "learning_rate": 1.9982448637786744e-05, "loss": 2.3347, "step": 618 }, { "epoch": 0.48, "learning_rate": 1.9982298349973968e-05, "loss": 2.3234, "step": 619 }, { "epoch": 0.48, "learning_rate": 1.9982147422033324e-05, "loss": 2.4891, "step": 620 }, { "epoch": 0.49, "learning_rate": 1.9981995853974486e-05, "loss": 2.1795, "step": 621 }, { "epoch": 0.49, "learning_rate": 1.9981843645807182e-05, "loss": 2.0465, "step": 622 }, { "epoch": 0.49, "learning_rate": 1.9981690797541166e-05, "loss": 2.1857, "step": 623 }, { "epoch": 0.49, "learning_rate": 1.998153730918624e-05, "loss": 2.3888, "step": 624 }, { "epoch": 0.49, "learning_rate": 1.998138318075225e-05, "loss": 1.9896, "step": 625 }, { "epoch": 0.49, "learning_rate": 1.9981228412249073e-05, "loss": 2.1974, "step": 626 }, { "epoch": 0.49, "learning_rate": 1.998107300368664e-05, "loss": 2.3151, "step": 627 }, { "epoch": 0.49, "learning_rate": 1.9980916955074918e-05, "loss": 2.2865, "step": 628 }, { "epoch": 0.49, "learning_rate": 1.9980760266423908e-05, "loss": 2.6372, "step": 629 }, { "epoch": 0.49, "learning_rate": 1.998060293774366e-05, "loss": 2.1494, "step": 630 }, { "epoch": 0.49, "learning_rate": 1.9980444969044265e-05, "loss": 2.6098, "step": 631 }, { "epoch": 0.49, "learning_rate": 1.998028636033585e-05, "loss": 2.3379, "step": 632 }, { "epoch": 0.49, "learning_rate": 1.9980127111628588e-05, "loss": 2.2315, "step": 633 }, { "epoch": 0.5, "learning_rate": 1.9979967222932693e-05, "loss": 2.1053, "step": 634 }, { "epoch": 0.5, "learning_rate": 1.9979806694258413e-05, "loss": 2.1319, "step": 635 }, { "epoch": 0.5, "learning_rate": 1.9979645525616047e-05, "loss": 2.5155, "step": 636 }, { "epoch": 0.5, "learning_rate": 1.9979483717015925e-05, "loss": 2.4507, "step": 637 }, { "epoch": 0.5, "learning_rate": 1.997932126846843e-05, "loss": 2.1297, "step": 638 }, { "epoch": 0.5, "learning_rate": 1.997915817998397e-05, "loss": 2.3041, "step": 639 }, { "epoch": 0.5, "learning_rate": 1.9978994451573013e-05, "loss": 2.3503, "step": 640 }, { "epoch": 0.5, "learning_rate": 1.997883008324605e-05, "loss": 2.1602, "step": 641 }, { "epoch": 0.5, "learning_rate": 1.9978665075013627e-05, "loss": 2.2396, "step": 642 }, { "epoch": 0.5, "learning_rate": 1.9978499426886326e-05, "loss": 2.1812, "step": 643 }, { "epoch": 0.5, "learning_rate": 1.9978333138874763e-05, "loss": 2.4176, "step": 644 }, { "epoch": 0.5, "learning_rate": 1.9978166210989607e-05, "loss": 2.3388, "step": 645 }, { "epoch": 0.5, "learning_rate": 1.9977998643241562e-05, "loss": 2.3597, "step": 646 }, { "epoch": 0.51, "learning_rate": 1.997783043564137e-05, "loss": 2.756, "step": 647 }, { "epoch": 0.51, "learning_rate": 1.997766158819982e-05, "loss": 2.2307, "step": 648 }, { "epoch": 0.51, "learning_rate": 1.997749210092774e-05, "loss": 2.1551, "step": 649 }, { "epoch": 0.51, "learning_rate": 1.9977321973836002e-05, "loss": 2.143, "step": 650 }, { "epoch": 0.51, "learning_rate": 1.9977151206935507e-05, "loss": 2.0657, "step": 651 }, { "epoch": 0.51, "learning_rate": 1.9976979800237214e-05, "loss": 2.5261, "step": 652 }, { "epoch": 0.51, "learning_rate": 1.9976807753752106e-05, "loss": 2.2017, "step": 653 }, { "epoch": 0.51, "learning_rate": 1.9976635067491226e-05, "loss": 2.1913, "step": 654 }, { "epoch": 0.51, "learning_rate": 1.997646174146564e-05, "loss": 2.2392, "step": 655 }, { "epoch": 0.51, "learning_rate": 1.9976287775686462e-05, "loss": 2.2593, "step": 656 }, { "epoch": 0.51, "learning_rate": 1.9976113170164856e-05, "loss": 1.8943, "step": 657 }, { "epoch": 0.51, "learning_rate": 1.997593792491201e-05, "loss": 2.1796, "step": 658 }, { "epoch": 0.52, "learning_rate": 1.9975762039939168e-05, "loss": 2.4189, "step": 659 }, { "epoch": 0.52, "learning_rate": 1.9975585515257604e-05, "loss": 1.8477, "step": 660 }, { "epoch": 0.52, "learning_rate": 1.9975408350878642e-05, "loss": 2.3177, "step": 661 }, { "epoch": 0.52, "learning_rate": 1.9975230546813642e-05, "loss": 2.2026, "step": 662 }, { "epoch": 0.52, "learning_rate": 1.9975052103074003e-05, "loss": 2.197, "step": 663 }, { "epoch": 0.52, "learning_rate": 1.997487301967117e-05, "loss": 2.428, "step": 664 }, { "epoch": 0.52, "learning_rate": 1.997469329661663e-05, "loss": 2.2945, "step": 665 }, { "epoch": 0.52, "learning_rate": 1.9974512933921905e-05, "loss": 2.1691, "step": 666 }, { "epoch": 0.52, "learning_rate": 1.997433193159856e-05, "loss": 2.3339, "step": 667 }, { "epoch": 0.52, "learning_rate": 1.99741502896582e-05, "loss": 2.0145, "step": 668 }, { "epoch": 0.52, "learning_rate": 1.997396800811248e-05, "loss": 2.4275, "step": 669 }, { "epoch": 0.52, "learning_rate": 1.997378508697308e-05, "loss": 2.2852, "step": 670 }, { "epoch": 0.52, "learning_rate": 1.997360152625174e-05, "loss": 2.5794, "step": 671 }, { "epoch": 0.53, "learning_rate": 1.9973417325960224e-05, "loss": 2.1852, "step": 672 }, { "epoch": 0.53, "learning_rate": 1.9973232486110346e-05, "loss": 2.3787, "step": 673 }, { "epoch": 0.53, "learning_rate": 1.9973047006713958e-05, "loss": 2.2446, "step": 674 }, { "epoch": 0.53, "learning_rate": 1.9972860887782957e-05, "loss": 2.1854, "step": 675 }, { "epoch": 0.53, "learning_rate": 1.9972674129329278e-05, "loss": 2.0591, "step": 676 }, { "epoch": 0.53, "learning_rate": 1.9972486731364897e-05, "loss": 2.5141, "step": 677 }, { "epoch": 0.53, "learning_rate": 1.9972298693901828e-05, "loss": 1.9181, "step": 678 }, { "epoch": 0.53, "learning_rate": 1.997211001695213e-05, "loss": 2.3726, "step": 679 }, { "epoch": 0.53, "learning_rate": 1.9971920700527905e-05, "loss": 2.0122, "step": 680 }, { "epoch": 0.53, "learning_rate": 1.997173074464129e-05, "loss": 2.4205, "step": 681 }, { "epoch": 0.53, "learning_rate": 1.9971540149304468e-05, "loss": 2.4441, "step": 682 }, { "epoch": 0.53, "learning_rate": 1.997134891452966e-05, "loss": 2.2192, "step": 683 }, { "epoch": 0.53, "learning_rate": 1.997115704032913e-05, "loss": 2.1567, "step": 684 }, { "epoch": 0.54, "learning_rate": 1.997096452671518e-05, "loss": 2.3536, "step": 685 }, { "epoch": 0.54, "learning_rate": 1.9970771373700165e-05, "loss": 2.2238, "step": 686 }, { "epoch": 0.54, "learning_rate": 1.9970577581296454e-05, "loss": 2.3256, "step": 687 }, { "epoch": 0.54, "learning_rate": 1.997038314951649e-05, "loss": 2.1369, "step": 688 }, { "epoch": 0.54, "learning_rate": 1.997018807837273e-05, "loss": 2.4964, "step": 689 }, { "epoch": 0.54, "learning_rate": 1.9969992367877693e-05, "loss": 2.3993, "step": 690 }, { "epoch": 0.54, "learning_rate": 1.9969796018043918e-05, "loss": 2.1579, "step": 691 }, { "epoch": 0.54, "learning_rate": 1.9969599028884006e-05, "loss": 2.5404, "step": 692 }, { "epoch": 0.54, "learning_rate": 1.996940140041059e-05, "loss": 2.2589, "step": 693 }, { "epoch": 0.54, "learning_rate": 1.9969203132636334e-05, "loss": 1.9341, "step": 694 }, { "epoch": 0.54, "learning_rate": 1.9969004225573956e-05, "loss": 2.0629, "step": 695 }, { "epoch": 0.54, "learning_rate": 1.996880467923621e-05, "loss": 2.1223, "step": 696 }, { "epoch": 0.54, "learning_rate": 1.99686044936359e-05, "loss": 2.3012, "step": 697 }, { "epoch": 0.55, "learning_rate": 1.9968403668785853e-05, "loss": 2.5328, "step": 698 }, { "epoch": 0.55, "learning_rate": 1.9968202204698954e-05, "loss": 1.8695, "step": 699 }, { "epoch": 0.55, "learning_rate": 1.996800010138812e-05, "loss": 2.2448, "step": 700 }, { "epoch": 0.55, "learning_rate": 1.996779735886631e-05, "loss": 2.3347, "step": 701 }, { "epoch": 0.55, "learning_rate": 1.9967593977146525e-05, "loss": 2.2875, "step": 702 }, { "epoch": 0.55, "learning_rate": 1.9967389956241808e-05, "loss": 2.0915, "step": 703 }, { "epoch": 0.55, "learning_rate": 1.9967185296165243e-05, "loss": 2.4839, "step": 704 }, { "epoch": 0.55, "learning_rate": 1.9966979996929955e-05, "loss": 2.2472, "step": 705 }, { "epoch": 0.55, "learning_rate": 1.9966774058549105e-05, "loss": 2.1372, "step": 706 }, { "epoch": 0.55, "learning_rate": 1.9966567481035902e-05, "loss": 2.3343, "step": 707 }, { "epoch": 0.55, "learning_rate": 1.996636026440359e-05, "loss": 2.3212, "step": 708 }, { "epoch": 0.55, "learning_rate": 1.9966152408665463e-05, "loss": 2.1604, "step": 709 }, { "epoch": 0.55, "learning_rate": 1.9965943913834847e-05, "loss": 1.9049, "step": 710 }, { "epoch": 0.56, "learning_rate": 1.996573477992511e-05, "loss": 2.3439, "step": 711 }, { "epoch": 0.56, "learning_rate": 1.9965525006949664e-05, "loss": 2.226, "step": 712 }, { "epoch": 0.56, "learning_rate": 1.996531459492196e-05, "loss": 2.1897, "step": 713 }, { "epoch": 0.56, "learning_rate": 1.9965103543855496e-05, "loss": 2.0519, "step": 714 }, { "epoch": 0.56, "learning_rate": 1.99648918537638e-05, "loss": 2.2962, "step": 715 }, { "epoch": 0.56, "learning_rate": 1.9964679524660452e-05, "loss": 2.2406, "step": 716 }, { "epoch": 0.56, "learning_rate": 1.9964466556559063e-05, "loss": 2.3312, "step": 717 }, { "epoch": 0.56, "learning_rate": 1.996425294947329e-05, "loss": 1.9239, "step": 718 }, { "epoch": 0.56, "learning_rate": 1.996403870341684e-05, "loss": 2.1068, "step": 719 }, { "epoch": 0.56, "learning_rate": 1.9963823818403437e-05, "loss": 2.2264, "step": 720 }, { "epoch": 0.56, "learning_rate": 1.9963608294446872e-05, "loss": 2.3295, "step": 721 }, { "epoch": 0.56, "learning_rate": 1.9963392131560965e-05, "loss": 2.0822, "step": 722 }, { "epoch": 0.57, "learning_rate": 1.9963175329759566e-05, "loss": 2.0229, "step": 723 }, { "epoch": 0.57, "learning_rate": 1.9962957889056597e-05, "loss": 2.0658, "step": 724 }, { "epoch": 0.57, "learning_rate": 1.9962739809465984e-05, "loss": 2.1476, "step": 725 }, { "epoch": 0.57, "learning_rate": 1.996252109100172e-05, "loss": 2.1134, "step": 726 }, { "epoch": 0.57, "learning_rate": 1.996230173367783e-05, "loss": 2.3644, "step": 727 }, { "epoch": 0.57, "learning_rate": 1.996208173750838e-05, "loss": 2.0404, "step": 728 }, { "epoch": 0.57, "learning_rate": 1.996186110250748e-05, "loss": 2.4157, "step": 729 }, { "epoch": 0.57, "learning_rate": 1.9961639828689275e-05, "loss": 2.5132, "step": 730 }, { "epoch": 0.57, "learning_rate": 1.9961417916067957e-05, "loss": 2.0197, "step": 731 }, { "epoch": 0.57, "learning_rate": 1.9961195364657754e-05, "loss": 2.093, "step": 732 }, { "epoch": 0.57, "learning_rate": 1.9960972174472935e-05, "loss": 2.3855, "step": 733 }, { "epoch": 0.57, "learning_rate": 1.9960748345527817e-05, "loss": 2.3066, "step": 734 }, { "epoch": 0.57, "learning_rate": 1.9960523877836757e-05, "loss": 2.3122, "step": 735 }, { "epoch": 0.58, "learning_rate": 1.996029877141414e-05, "loss": 2.3918, "step": 736 }, { "epoch": 0.58, "learning_rate": 1.9960073026274408e-05, "loss": 2.2312, "step": 737 }, { "epoch": 0.58, "learning_rate": 1.995984664243203e-05, "loss": 1.8684, "step": 738 }, { "epoch": 0.58, "learning_rate": 1.9959619619901533e-05, "loss": 2.3764, "step": 739 }, { "epoch": 0.58, "learning_rate": 1.9959391958697466e-05, "loss": 2.3927, "step": 740 }, { "epoch": 0.58, "learning_rate": 1.9959163658834436e-05, "loss": 2.4532, "step": 741 }, { "epoch": 0.58, "learning_rate": 1.995893472032708e-05, "loss": 2.2256, "step": 742 }, { "epoch": 0.58, "learning_rate": 1.9958705143190076e-05, "loss": 2.1432, "step": 743 }, { "epoch": 0.58, "learning_rate": 1.9958474927438147e-05, "loss": 2.4291, "step": 744 }, { "epoch": 0.58, "learning_rate": 1.995824407308606e-05, "loss": 2.3855, "step": 745 }, { "epoch": 0.58, "learning_rate": 1.9958012580148614e-05, "loss": 2.2152, "step": 746 }, { "epoch": 0.58, "learning_rate": 1.9957780448640656e-05, "loss": 2.0984, "step": 747 }, { "epoch": 0.58, "learning_rate": 1.995754767857707e-05, "loss": 2.4587, "step": 748 }, { "epoch": 0.59, "learning_rate": 1.9957314269972786e-05, "loss": 2.0792, "step": 749 }, { "epoch": 0.59, "learning_rate": 1.995708022284277e-05, "loss": 2.2771, "step": 750 }, { "epoch": 0.59, "learning_rate": 1.9956845537202025e-05, "loss": 2.2302, "step": 751 }, { "epoch": 0.59, "learning_rate": 1.9956610213065614e-05, "loss": 2.3344, "step": 752 }, { "epoch": 0.59, "learning_rate": 1.9956374250448615e-05, "loss": 2.0954, "step": 753 }, { "epoch": 0.59, "learning_rate": 1.9956137649366162e-05, "loss": 2.0292, "step": 754 }, { "epoch": 0.59, "learning_rate": 1.9955900409833432e-05, "loss": 2.2194, "step": 755 }, { "epoch": 0.59, "learning_rate": 1.995566253186563e-05, "loss": 1.9503, "step": 756 }, { "epoch": 0.59, "learning_rate": 1.995542401547802e-05, "loss": 2.2729, "step": 757 }, { "epoch": 0.59, "learning_rate": 1.9955184860685896e-05, "loss": 2.5776, "step": 758 }, { "epoch": 0.59, "learning_rate": 1.9954945067504586e-05, "loss": 1.9763, "step": 759 }, { "epoch": 0.59, "learning_rate": 1.9954704635949472e-05, "loss": 2.508, "step": 760 }, { "epoch": 0.59, "learning_rate": 1.9954463566035973e-05, "loss": 2.2206, "step": 761 }, { "epoch": 0.6, "learning_rate": 1.995422185777955e-05, "loss": 2.042, "step": 762 }, { "epoch": 0.6, "learning_rate": 1.9953979511195695e-05, "loss": 2.051, "step": 763 }, { "epoch": 0.6, "learning_rate": 1.9953736526299954e-05, "loss": 2.1495, "step": 764 }, { "epoch": 0.6, "learning_rate": 1.995349290310791e-05, "loss": 2.0739, "step": 765 }, { "epoch": 0.6, "learning_rate": 1.9953248641635184e-05, "loss": 1.9608, "step": 766 }, { "epoch": 0.6, "learning_rate": 1.9953003741897437e-05, "loss": 1.916, "step": 767 }, { "epoch": 0.6, "learning_rate": 1.9952758203910377e-05, "loss": 2.2247, "step": 768 }, { "epoch": 0.6, "learning_rate": 1.9952512027689746e-05, "loss": 2.1971, "step": 769 }, { "epoch": 0.6, "learning_rate": 1.9952265213251335e-05, "loss": 2.3169, "step": 770 }, { "epoch": 0.6, "learning_rate": 1.995201776061097e-05, "loss": 2.1936, "step": 771 }, { "epoch": 0.6, "learning_rate": 1.995176966978452e-05, "loss": 1.9067, "step": 772 }, { "epoch": 0.6, "learning_rate": 1.995152094078789e-05, "loss": 2.5387, "step": 773 }, { "epoch": 0.6, "learning_rate": 1.995127157363703e-05, "loss": 2.4966, "step": 774 }, { "epoch": 0.61, "learning_rate": 1.9951021568347934e-05, "loss": 2.3727, "step": 775 }, { "epoch": 0.61, "learning_rate": 1.9950770924936638e-05, "loss": 2.1129, "step": 776 }, { "epoch": 0.61, "learning_rate": 1.9950519643419207e-05, "loss": 2.225, "step": 777 }, { "epoch": 0.61, "learning_rate": 1.995026772381176e-05, "loss": 2.4232, "step": 778 }, { "epoch": 0.61, "learning_rate": 1.9950015166130444e-05, "loss": 2.2837, "step": 779 }, { "epoch": 0.61, "learning_rate": 1.9949761970391465e-05, "loss": 2.2276, "step": 780 }, { "epoch": 0.61, "learning_rate": 1.9949508136611054e-05, "loss": 2.0495, "step": 781 }, { "epoch": 0.61, "learning_rate": 1.994925366480549e-05, "loss": 2.2546, "step": 782 }, { "epoch": 0.61, "learning_rate": 1.9948998554991087e-05, "loss": 1.9976, "step": 783 }, { "epoch": 0.61, "learning_rate": 1.994874280718421e-05, "loss": 2.2241, "step": 784 }, { "epoch": 0.61, "learning_rate": 1.994848642140126e-05, "loss": 2.262, "step": 785 }, { "epoch": 0.61, "learning_rate": 1.994822939765867e-05, "loss": 2.073, "step": 786 }, { "epoch": 0.62, "learning_rate": 1.9947971735972928e-05, "loss": 1.7872, "step": 787 }, { "epoch": 0.62, "learning_rate": 1.9947713436360557e-05, "loss": 2.2653, "step": 788 }, { "epoch": 0.62, "learning_rate": 1.9947454498838122e-05, "loss": 2.3048, "step": 789 }, { "epoch": 0.62, "learning_rate": 1.9947194923422222e-05, "loss": 2.0194, "step": 790 }, { "epoch": 0.62, "learning_rate": 1.9946934710129506e-05, "loss": 2.2517, "step": 791 }, { "epoch": 0.62, "learning_rate": 1.994667385897666e-05, "loss": 2.4085, "step": 792 }, { "epoch": 0.62, "learning_rate": 1.9946412369980416e-05, "loss": 2.225, "step": 793 }, { "epoch": 0.62, "learning_rate": 1.9946150243157533e-05, "loss": 2.373, "step": 794 }, { "epoch": 0.62, "learning_rate": 1.994588747852483e-05, "loss": 2.3178, "step": 795 }, { "epoch": 0.62, "learning_rate": 1.9945624076099148e-05, "loss": 1.6415, "step": 796 }, { "epoch": 0.62, "learning_rate": 1.9945360035897386e-05, "loss": 2.159, "step": 797 }, { "epoch": 0.62, "learning_rate": 1.9945095357936472e-05, "loss": 2.0249, "step": 798 }, { "epoch": 0.62, "learning_rate": 1.9944830042233376e-05, "loss": 2.1458, "step": 799 }, { "epoch": 0.63, "learning_rate": 1.994456408880512e-05, "loss": 1.9618, "step": 800 }, { "epoch": 0.63, "learning_rate": 1.994429749766875e-05, "loss": 2.408, "step": 801 }, { "epoch": 0.63, "learning_rate": 1.9944030268841366e-05, "loss": 2.241, "step": 802 }, { "epoch": 0.63, "learning_rate": 1.9943762402340104e-05, "loss": 2.2953, "step": 803 }, { "epoch": 0.63, "learning_rate": 1.994349389818214e-05, "loss": 2.1593, "step": 804 }, { "epoch": 0.63, "learning_rate": 1.9943224756384696e-05, "loss": 2.5646, "step": 805 }, { "epoch": 0.63, "learning_rate": 1.9942954976965026e-05, "loss": 1.8763, "step": 806 }, { "epoch": 0.63, "learning_rate": 1.994268455994043e-05, "loss": 2.3078, "step": 807 }, { "epoch": 0.63, "learning_rate": 1.994241350532825e-05, "loss": 2.3609, "step": 808 }, { "epoch": 0.63, "learning_rate": 1.9942141813145875e-05, "loss": 2.2132, "step": 809 }, { "epoch": 0.63, "learning_rate": 1.9941869483410717e-05, "loss": 2.285, "step": 810 }, { "epoch": 0.63, "learning_rate": 1.9941596516140246e-05, "loss": 2.0822, "step": 811 }, { "epoch": 0.63, "learning_rate": 1.9941322911351962e-05, "loss": 2.297, "step": 812 }, { "epoch": 0.64, "learning_rate": 1.9941048669063413e-05, "loss": 2.1986, "step": 813 }, { "epoch": 0.64, "learning_rate": 1.9940773789292182e-05, "loss": 2.0364, "step": 814 }, { "epoch": 0.64, "learning_rate": 1.9940498272055898e-05, "loss": 2.4835, "step": 815 }, { "epoch": 0.64, "learning_rate": 1.9940222117372237e-05, "loss": 2.195, "step": 816 }, { "epoch": 0.64, "learning_rate": 1.9939945325258895e-05, "loss": 1.9348, "step": 817 }, { "epoch": 0.64, "learning_rate": 1.9939667895733626e-05, "loss": 1.9883, "step": 818 }, { "epoch": 0.64, "learning_rate": 1.993938982881422e-05, "loss": 2.3108, "step": 819 }, { "epoch": 0.64, "learning_rate": 1.9939111124518512e-05, "loss": 1.987, "step": 820 }, { "epoch": 0.64, "learning_rate": 1.9938831782864375e-05, "loss": 2.0653, "step": 821 }, { "epoch": 0.64, "learning_rate": 1.9938551803869715e-05, "loss": 2.3378, "step": 822 }, { "epoch": 0.64, "learning_rate": 1.9938271187552494e-05, "loss": 2.1647, "step": 823 }, { "epoch": 0.64, "learning_rate": 1.9937989933930702e-05, "loss": 2.1345, "step": 824 }, { "epoch": 0.64, "learning_rate": 1.9937708043022375e-05, "loss": 2.3627, "step": 825 }, { "epoch": 0.65, "learning_rate": 1.993742551484559e-05, "loss": 2.1746, "step": 826 }, { "epoch": 0.65, "learning_rate": 1.9937142349418463e-05, "loss": 2.2415, "step": 827 }, { "epoch": 0.65, "learning_rate": 1.9936858546759158e-05, "loss": 2.244, "step": 828 }, { "epoch": 0.65, "learning_rate": 1.993657410688587e-05, "loss": 2.5379, "step": 829 }, { "epoch": 0.65, "learning_rate": 1.993628902981684e-05, "loss": 2.0117, "step": 830 }, { "epoch": 0.65, "learning_rate": 1.9936003315570346e-05, "loss": 2.1345, "step": 831 }, { "epoch": 0.65, "learning_rate": 1.9935716964164715e-05, "loss": 2.2257, "step": 832 }, { "epoch": 0.65, "learning_rate": 1.9935429975618303e-05, "loss": 2.318, "step": 833 }, { "epoch": 0.65, "learning_rate": 1.9935142349949524e-05, "loss": 2.1528, "step": 834 }, { "epoch": 0.65, "learning_rate": 1.993485408717681e-05, "loss": 2.3288, "step": 835 }, { "epoch": 0.65, "learning_rate": 1.9934565187318654e-05, "loss": 2.4638, "step": 836 }, { "epoch": 0.65, "learning_rate": 1.9934275650393583e-05, "loss": 2.1541, "step": 837 }, { "epoch": 0.65, "learning_rate": 1.9933985476420157e-05, "loss": 2.0948, "step": 838 }, { "epoch": 0.66, "learning_rate": 1.993369466541699e-05, "loss": 2.2916, "step": 839 }, { "epoch": 0.66, "learning_rate": 1.993340321740273e-05, "loss": 1.8598, "step": 840 }, { "epoch": 0.66, "learning_rate": 1.993311113239606e-05, "loss": 2.4374, "step": 841 }, { "epoch": 0.66, "learning_rate": 1.9932818410415723e-05, "loss": 2.4325, "step": 842 }, { "epoch": 0.66, "learning_rate": 1.9932525051480475e-05, "loss": 2.2617, "step": 843 }, { "epoch": 0.66, "learning_rate": 1.993223105560914e-05, "loss": 2.5841, "step": 844 }, { "epoch": 0.66, "learning_rate": 1.9931936422820565e-05, "loss": 2.0495, "step": 845 }, { "epoch": 0.66, "learning_rate": 1.9931641153133646e-05, "loss": 2.1197, "step": 846 }, { "epoch": 0.66, "learning_rate": 1.9931345246567317e-05, "loss": 2.4644, "step": 847 }, { "epoch": 0.66, "learning_rate": 1.993104870314055e-05, "loss": 2.324, "step": 848 }, { "epoch": 0.66, "learning_rate": 1.9930751522872366e-05, "loss": 2.2961, "step": 849 }, { "epoch": 0.66, "learning_rate": 1.993045370578182e-05, "loss": 2.3482, "step": 850 }, { "epoch": 0.67, "learning_rate": 1.9930155251888013e-05, "loss": 2.1003, "step": 851 }, { "epoch": 0.67, "learning_rate": 1.992985616121008e-05, "loss": 2.193, "step": 852 }, { "epoch": 0.67, "learning_rate": 1.9929556433767198e-05, "loss": 2.2113, "step": 853 }, { "epoch": 0.67, "learning_rate": 1.99292560695786e-05, "loss": 2.1066, "step": 854 }, { "epoch": 0.67, "learning_rate": 1.992895506866353e-05, "loss": 2.1459, "step": 855 }, { "epoch": 0.67, "learning_rate": 1.9928653431041297e-05, "loss": 2.147, "step": 856 }, { "epoch": 0.67, "learning_rate": 1.992835115673125e-05, "loss": 2.1804, "step": 857 }, { "epoch": 0.67, "learning_rate": 1.9928048245752767e-05, "loss": 2.5116, "step": 858 }, { "epoch": 0.67, "learning_rate": 1.9927744698125276e-05, "loss": 2.1408, "step": 859 }, { "epoch": 0.67, "learning_rate": 1.9927440513868236e-05, "loss": 2.2736, "step": 860 }, { "epoch": 0.67, "learning_rate": 1.9927135693001158e-05, "loss": 2.1236, "step": 861 }, { "epoch": 0.67, "learning_rate": 1.992683023554359e-05, "loss": 2.1253, "step": 862 }, { "epoch": 0.67, "learning_rate": 1.9926524141515115e-05, "loss": 2.3234, "step": 863 }, { "epoch": 0.68, "learning_rate": 1.9926217410935367e-05, "loss": 2.4049, "step": 864 }, { "epoch": 0.68, "learning_rate": 1.9925910043824013e-05, "loss": 2.2948, "step": 865 }, { "epoch": 0.68, "learning_rate": 1.9925602040200763e-05, "loss": 2.0833, "step": 866 }, { "epoch": 0.68, "learning_rate": 1.992529340008537e-05, "loss": 1.8875, "step": 867 }, { "epoch": 0.68, "learning_rate": 1.9924984123497623e-05, "loss": 1.9127, "step": 868 }, { "epoch": 0.68, "learning_rate": 1.9924674210457358e-05, "loss": 2.3402, "step": 869 }, { "epoch": 0.68, "learning_rate": 1.9924363660984445e-05, "loss": 2.5747, "step": 870 }, { "epoch": 0.68, "learning_rate": 1.99240524750988e-05, "loss": 2.0475, "step": 871 }, { "epoch": 0.68, "learning_rate": 1.992374065282038e-05, "loss": 2.1814, "step": 872 }, { "epoch": 0.68, "learning_rate": 1.9923428194169177e-05, "loss": 2.0653, "step": 873 }, { "epoch": 0.68, "learning_rate": 1.9923115099165237e-05, "loss": 2.3321, "step": 874 }, { "epoch": 0.68, "learning_rate": 1.9922801367828626e-05, "loss": 2.3856, "step": 875 }, { "epoch": 0.68, "learning_rate": 1.9922487000179468e-05, "loss": 2.1339, "step": 876 }, { "epoch": 0.69, "learning_rate": 1.992217199623792e-05, "loss": 2.5401, "step": 877 }, { "epoch": 0.69, "learning_rate": 1.9921856356024186e-05, "loss": 2.2325, "step": 878 }, { "epoch": 0.69, "learning_rate": 1.9921540079558506e-05, "loss": 2.1329, "step": 879 }, { "epoch": 0.69, "learning_rate": 1.992122316686116e-05, "loss": 2.314, "step": 880 }, { "epoch": 0.69, "learning_rate": 1.9920905617952468e-05, "loss": 2.1541, "step": 881 }, { "epoch": 0.69, "learning_rate": 1.9920587432852797e-05, "loss": 2.5358, "step": 882 }, { "epoch": 0.69, "learning_rate": 1.9920268611582547e-05, "loss": 2.3626, "step": 883 }, { "epoch": 0.69, "learning_rate": 1.991994915416217e-05, "loss": 1.8627, "step": 884 }, { "epoch": 0.69, "learning_rate": 1.9919629060612147e-05, "loss": 2.3381, "step": 885 }, { "epoch": 0.69, "learning_rate": 1.9919308330953e-05, "loss": 2.1335, "step": 886 }, { "epoch": 0.69, "learning_rate": 1.9918986965205307e-05, "loss": 2.496, "step": 887 }, { "epoch": 0.69, "learning_rate": 1.9918664963389668e-05, "loss": 2.3987, "step": 888 }, { "epoch": 0.69, "learning_rate": 1.9918342325526732e-05, "loss": 2.3256, "step": 889 }, { "epoch": 0.7, "learning_rate": 1.9918019051637195e-05, "loss": 2.1085, "step": 890 }, { "epoch": 0.7, "learning_rate": 1.991769514174178e-05, "loss": 2.0683, "step": 891 }, { "epoch": 0.7, "learning_rate": 1.9917370595861256e-05, "loss": 2.0761, "step": 892 }, { "epoch": 0.7, "learning_rate": 1.9917045414016446e-05, "loss": 2.2118, "step": 893 }, { "epoch": 0.7, "learning_rate": 1.9916719596228193e-05, "loss": 1.8679, "step": 894 }, { "epoch": 0.7, "learning_rate": 1.9916393142517395e-05, "loss": 1.9727, "step": 895 }, { "epoch": 0.7, "learning_rate": 1.9916066052904988e-05, "loss": 2.4852, "step": 896 }, { "epoch": 0.7, "learning_rate": 1.991573832741194e-05, "loss": 2.0704, "step": 897 }, { "epoch": 0.7, "learning_rate": 1.9915409966059274e-05, "loss": 2.1677, "step": 898 }, { "epoch": 0.7, "learning_rate": 1.9915080968868042e-05, "loss": 2.3372, "step": 899 }, { "epoch": 0.7, "learning_rate": 1.9914751335859345e-05, "loss": 2.0255, "step": 900 }, { "epoch": 0.7, "learning_rate": 1.9914421067054314e-05, "loss": 2.2026, "step": 901 }, { "epoch": 0.7, "learning_rate": 1.991409016247414e-05, "loss": 2.119, "step": 902 }, { "epoch": 0.71, "learning_rate": 1.9913758622140032e-05, "loss": 2.351, "step": 903 }, { "epoch": 0.71, "learning_rate": 1.9913426446073255e-05, "loss": 2.2703, "step": 904 }, { "epoch": 0.71, "learning_rate": 1.991309363429511e-05, "loss": 2.2845, "step": 905 }, { "epoch": 0.71, "learning_rate": 1.9912760186826937e-05, "loss": 2.1622, "step": 906 }, { "epoch": 0.71, "learning_rate": 1.991242610369012e-05, "loss": 2.1044, "step": 907 }, { "epoch": 0.71, "learning_rate": 1.9912091384906083e-05, "loss": 2.1762, "step": 908 }, { "epoch": 0.71, "learning_rate": 1.991175603049629e-05, "loss": 2.2674, "step": 909 }, { "epoch": 0.71, "learning_rate": 1.991142004048225e-05, "loss": 2.2125, "step": 910 }, { "epoch": 0.71, "learning_rate": 1.9911083414885503e-05, "loss": 1.9902, "step": 911 }, { "epoch": 0.71, "learning_rate": 1.9910746153727635e-05, "loss": 2.3696, "step": 912 }, { "epoch": 0.71, "learning_rate": 1.9910408257030278e-05, "loss": 2.3781, "step": 913 }, { "epoch": 0.71, "learning_rate": 1.9910069724815098e-05, "loss": 2.3875, "step": 914 }, { "epoch": 0.72, "learning_rate": 1.99097305571038e-05, "loss": 2.1529, "step": 915 }, { "epoch": 0.72, "learning_rate": 1.990939075391814e-05, "loss": 2.0707, "step": 916 }, { "epoch": 0.72, "learning_rate": 1.990905031527991e-05, "loss": 2.0914, "step": 917 }, { "epoch": 0.72, "learning_rate": 1.9908709241210927e-05, "loss": 2.1718, "step": 918 }, { "epoch": 0.72, "learning_rate": 1.9908367531733078e-05, "loss": 2.3746, "step": 919 }, { "epoch": 0.72, "learning_rate": 1.990802518686827e-05, "loss": 2.2502, "step": 920 }, { "epoch": 0.72, "learning_rate": 1.9907682206638455e-05, "loss": 2.2319, "step": 921 }, { "epoch": 0.72, "learning_rate": 1.9907338591065627e-05, "loss": 2.1461, "step": 922 }, { "epoch": 0.72, "learning_rate": 1.9906994340171824e-05, "loss": 2.225, "step": 923 }, { "epoch": 0.72, "learning_rate": 1.990664945397912e-05, "loss": 2.2963, "step": 924 }, { "epoch": 0.72, "learning_rate": 1.9906303932509628e-05, "loss": 2.1029, "step": 925 }, { "epoch": 0.72, "learning_rate": 1.9905957775785514e-05, "loss": 2.2656, "step": 926 }, { "epoch": 0.72, "learning_rate": 1.9905610983828963e-05, "loss": 2.171, "step": 927 }, { "epoch": 0.73, "learning_rate": 1.9905263556662226e-05, "loss": 2.1953, "step": 928 }, { "epoch": 0.73, "learning_rate": 1.990491549430757e-05, "loss": 2.2835, "step": 929 }, { "epoch": 0.73, "learning_rate": 1.990456679678733e-05, "loss": 2.0745, "step": 930 }, { "epoch": 0.73, "learning_rate": 1.9904217464123848e-05, "loss": 2.4348, "step": 931 }, { "epoch": 0.73, "learning_rate": 1.9903867496339542e-05, "loss": 2.3273, "step": 932 }, { "epoch": 0.73, "learning_rate": 1.990351689345685e-05, "loss": 1.9718, "step": 933 }, { "epoch": 0.73, "learning_rate": 1.9903165655498246e-05, "loss": 2.1938, "step": 934 }, { "epoch": 0.73, "learning_rate": 1.9902813782486263e-05, "loss": 2.4377, "step": 935 }, { "epoch": 0.73, "learning_rate": 1.990246127444346e-05, "loss": 2.226, "step": 936 }, { "epoch": 0.73, "learning_rate": 1.9902108131392448e-05, "loss": 2.2859, "step": 937 }, { "epoch": 0.73, "learning_rate": 1.9901754353355868e-05, "loss": 2.1242, "step": 938 }, { "epoch": 0.73, "learning_rate": 1.9901399940356404e-05, "loss": 2.3821, "step": 939 }, { "epoch": 0.73, "learning_rate": 1.990104489241679e-05, "loss": 2.5321, "step": 940 }, { "epoch": 0.74, "learning_rate": 1.990068920955979e-05, "loss": 2.0489, "step": 941 }, { "epoch": 0.74, "learning_rate": 1.9900332891808218e-05, "loss": 2.1406, "step": 942 }, { "epoch": 0.74, "learning_rate": 1.989997593918491e-05, "loss": 2.2272, "step": 943 }, { "epoch": 0.74, "learning_rate": 1.989961835171277e-05, "loss": 2.2278, "step": 944 }, { "epoch": 0.74, "learning_rate": 1.9899260129414723e-05, "loss": 2.2767, "step": 945 }, { "epoch": 0.74, "learning_rate": 1.989890127231374e-05, "loss": 2.1505, "step": 946 }, { "epoch": 0.74, "learning_rate": 1.9898541780432835e-05, "loss": 2.1689, "step": 947 }, { "epoch": 0.74, "learning_rate": 1.9898181653795062e-05, "loss": 2.1369, "step": 948 }, { "epoch": 0.74, "learning_rate": 1.989782089242351e-05, "loss": 2.3296, "step": 949 }, { "epoch": 0.74, "learning_rate": 1.9897459496341315e-05, "loss": 2.3909, "step": 950 }, { "epoch": 0.74, "learning_rate": 1.9897097465571654e-05, "loss": 2.0588, "step": 951 }, { "epoch": 0.74, "learning_rate": 1.9896734800137746e-05, "loss": 2.1316, "step": 952 }, { "epoch": 0.74, "learning_rate": 1.9896371500062836e-05, "loss": 1.9885, "step": 953 }, { "epoch": 0.75, "learning_rate": 1.9896007565370233e-05, "loss": 2.0972, "step": 954 }, { "epoch": 0.75, "learning_rate": 1.9895642996083266e-05, "loss": 2.3295, "step": 955 }, { "epoch": 0.75, "learning_rate": 1.989527779222532e-05, "loss": 2.3199, "step": 956 }, { "epoch": 0.75, "learning_rate": 1.9894911953819812e-05, "loss": 2.4424, "step": 957 }, { "epoch": 0.75, "learning_rate": 1.98945454808902e-05, "loss": 2.1351, "step": 958 }, { "epoch": 0.75, "learning_rate": 1.9894178373459984e-05, "loss": 2.166, "step": 959 }, { "epoch": 0.75, "learning_rate": 1.9893810631552712e-05, "loss": 2.1247, "step": 960 }, { "epoch": 0.75, "learning_rate": 1.989344225519196e-05, "loss": 2.216, "step": 961 }, { "epoch": 0.75, "learning_rate": 1.9893073244401345e-05, "loss": 1.989, "step": 962 }, { "epoch": 0.75, "learning_rate": 1.9892703599204543e-05, "loss": 2.113, "step": 963 }, { "epoch": 0.75, "learning_rate": 1.9892333319625253e-05, "loss": 2.3235, "step": 964 }, { "epoch": 0.75, "learning_rate": 1.9891962405687218e-05, "loss": 2.127, "step": 965 }, { "epoch": 0.75, "learning_rate": 1.9891590857414224e-05, "loss": 2.2074, "step": 966 }, { "epoch": 0.76, "learning_rate": 1.9891218674830095e-05, "loss": 1.9968, "step": 967 }, { "epoch": 0.76, "learning_rate": 1.98908458579587e-05, "loss": 2.1831, "step": 968 }, { "epoch": 0.76, "learning_rate": 1.9890472406823947e-05, "loss": 2.1405, "step": 969 }, { "epoch": 0.76, "learning_rate": 1.9890098321449782e-05, "loss": 1.9987, "step": 970 }, { "epoch": 0.76, "learning_rate": 1.98897236018602e-05, "loss": 2.1399, "step": 971 }, { "epoch": 0.76, "learning_rate": 1.9889348248079217e-05, "loss": 2.1834, "step": 972 }, { "epoch": 0.76, "learning_rate": 1.9888972260130915e-05, "loss": 2.1606, "step": 973 }, { "epoch": 0.76, "learning_rate": 1.9888595638039403e-05, "loss": 2.2745, "step": 974 }, { "epoch": 0.76, "learning_rate": 1.988821838182883e-05, "loss": 2.073, "step": 975 }, { "epoch": 0.76, "learning_rate": 1.9887840491523386e-05, "loss": 2.4775, "step": 976 }, { "epoch": 0.76, "learning_rate": 1.9887461967147307e-05, "loss": 2.1065, "step": 977 }, { "epoch": 0.76, "learning_rate": 1.988708280872486e-05, "loss": 2.132, "step": 978 }, { "epoch": 0.77, "learning_rate": 1.9886703016280372e-05, "loss": 2.0679, "step": 979 }, { "epoch": 0.77, "learning_rate": 1.988632258983819e-05, "loss": 2.2178, "step": 980 }, { "epoch": 0.77, "learning_rate": 1.9885941529422708e-05, "loss": 2.1846, "step": 981 }, { "epoch": 0.77, "learning_rate": 1.9885559835058364e-05, "loss": 2.0736, "step": 982 }, { "epoch": 0.77, "learning_rate": 1.9885177506769632e-05, "loss": 1.8834, "step": 983 }, { "epoch": 0.77, "learning_rate": 1.9884794544581033e-05, "loss": 2.3119, "step": 984 }, { "epoch": 0.77, "learning_rate": 1.9884410948517123e-05, "loss": 2.2172, "step": 985 }, { "epoch": 0.77, "learning_rate": 1.9884026718602504e-05, "loss": 2.3346, "step": 986 }, { "epoch": 0.77, "learning_rate": 1.9883641854861808e-05, "loss": 2.0549, "step": 987 }, { "epoch": 0.77, "learning_rate": 1.9883256357319723e-05, "loss": 2.0899, "step": 988 }, { "epoch": 0.77, "learning_rate": 1.9882870226000964e-05, "loss": 2.1432, "step": 989 }, { "epoch": 0.77, "learning_rate": 1.988248346093029e-05, "loss": 2.284, "step": 990 }, { "epoch": 0.77, "learning_rate": 1.9882096062132512e-05, "loss": 2.0757, "step": 991 }, { "epoch": 0.78, "learning_rate": 1.9881708029632463e-05, "loss": 2.1669, "step": 992 }, { "epoch": 0.78, "learning_rate": 1.9881319363455035e-05, "loss": 2.4016, "step": 993 }, { "epoch": 0.78, "learning_rate": 1.9880930063625144e-05, "loss": 2.2498, "step": 994 }, { "epoch": 0.78, "learning_rate": 1.9880540130167758e-05, "loss": 2.2733, "step": 995 }, { "epoch": 0.78, "learning_rate": 1.988014956310788e-05, "loss": 2.2048, "step": 996 }, { "epoch": 0.78, "learning_rate": 1.987975836247056e-05, "loss": 2.335, "step": 997 }, { "epoch": 0.78, "learning_rate": 1.9879366528280878e-05, "loss": 2.4296, "step": 998 }, { "epoch": 0.78, "learning_rate": 1.9878974060563963e-05, "loss": 1.9157, "step": 999 }, { "epoch": 0.78, "learning_rate": 1.9878580959344988e-05, "loss": 2.4826, "step": 1000 }, { "epoch": 0.78, "learning_rate": 1.9878187224649153e-05, "loss": 2.0749, "step": 1001 }, { "epoch": 0.78, "learning_rate": 1.987779285650171e-05, "loss": 2.3302, "step": 1002 }, { "epoch": 0.78, "learning_rate": 1.9877397854927954e-05, "loss": 2.3158, "step": 1003 }, { "epoch": 0.78, "learning_rate": 1.9877002219953204e-05, "loss": 2.1727, "step": 1004 }, { "epoch": 0.79, "learning_rate": 1.987660595160284e-05, "loss": 2.12, "step": 1005 }, { "epoch": 0.79, "learning_rate": 1.9876209049902268e-05, "loss": 2.1214, "step": 1006 }, { "epoch": 0.79, "learning_rate": 1.9875811514876943e-05, "loss": 2.3084, "step": 1007 }, { "epoch": 0.79, "learning_rate": 1.987541334655235e-05, "loss": 1.9726, "step": 1008 }, { "epoch": 0.79, "learning_rate": 1.9875014544954036e-05, "loss": 1.8425, "step": 1009 }, { "epoch": 0.79, "learning_rate": 1.9874615110107563e-05, "loss": 2.1551, "step": 1010 }, { "epoch": 0.79, "learning_rate": 1.9874215042038548e-05, "loss": 2.231, "step": 1011 }, { "epoch": 0.79, "learning_rate": 1.9873814340772645e-05, "loss": 2.013, "step": 1012 }, { "epoch": 0.79, "learning_rate": 1.9873413006335556e-05, "loss": 1.9156, "step": 1013 }, { "epoch": 0.79, "learning_rate": 1.987301103875301e-05, "loss": 2.2221, "step": 1014 }, { "epoch": 0.79, "learning_rate": 1.987260843805079e-05, "loss": 1.8426, "step": 1015 }, { "epoch": 0.79, "learning_rate": 1.9872205204254704e-05, "loss": 2.4652, "step": 1016 }, { "epoch": 0.79, "learning_rate": 1.987180133739062e-05, "loss": 2.2046, "step": 1017 }, { "epoch": 0.8, "learning_rate": 1.9871396837484426e-05, "loss": 1.8809, "step": 1018 }, { "epoch": 0.8, "learning_rate": 1.987099170456207e-05, "loss": 2.2338, "step": 1019 }, { "epoch": 0.8, "learning_rate": 1.9870585938649526e-05, "loss": 1.943, "step": 1020 }, { "epoch": 0.8, "learning_rate": 1.987017953977282e-05, "loss": 2.5326, "step": 1021 }, { "epoch": 0.8, "learning_rate": 1.986977250795801e-05, "loss": 2.073, "step": 1022 }, { "epoch": 0.8, "learning_rate": 1.9869364843231194e-05, "loss": 2.0934, "step": 1023 }, { "epoch": 0.8, "learning_rate": 1.9868956545618518e-05, "loss": 2.2788, "step": 1024 }, { "epoch": 0.8, "learning_rate": 1.9868547615146165e-05, "loss": 1.9804, "step": 1025 }, { "epoch": 0.8, "learning_rate": 1.986813805184036e-05, "loss": 2.2159, "step": 1026 }, { "epoch": 0.8, "learning_rate": 1.9867727855727358e-05, "loss": 2.1107, "step": 1027 }, { "epoch": 0.8, "learning_rate": 1.986731702683347e-05, "loss": 2.4038, "step": 1028 }, { "epoch": 0.8, "learning_rate": 1.9866905565185043e-05, "loss": 2.2087, "step": 1029 }, { "epoch": 0.81, "learning_rate": 1.9866493470808455e-05, "loss": 2.0507, "step": 1030 }, { "epoch": 0.81, "learning_rate": 1.9866080743730142e-05, "loss": 2.202, "step": 1031 }, { "epoch": 0.81, "learning_rate": 1.986566738397656e-05, "loss": 2.3273, "step": 1032 }, { "epoch": 0.81, "learning_rate": 1.9865253391574223e-05, "loss": 2.0993, "step": 1033 }, { "epoch": 0.81, "learning_rate": 1.986483876654968e-05, "loss": 2.3178, "step": 1034 }, { "epoch": 0.81, "learning_rate": 1.9864423508929516e-05, "loss": 2.1713, "step": 1035 }, { "epoch": 0.81, "learning_rate": 1.9864007618740357e-05, "loss": 1.8883, "step": 1036 }, { "epoch": 0.81, "learning_rate": 1.986359109600888e-05, "loss": 2.0965, "step": 1037 }, { "epoch": 0.81, "learning_rate": 1.9863173940761787e-05, "loss": 2.1014, "step": 1038 }, { "epoch": 0.81, "learning_rate": 1.9862756153025838e-05, "loss": 1.9958, "step": 1039 }, { "epoch": 0.81, "learning_rate": 1.9862337732827816e-05, "loss": 2.2281, "step": 1040 }, { "epoch": 0.81, "learning_rate": 1.9861918680194558e-05, "loss": 2.0299, "step": 1041 }, { "epoch": 0.81, "learning_rate": 1.986149899515293e-05, "loss": 2.1983, "step": 1042 }, { "epoch": 0.82, "learning_rate": 1.986107867772985e-05, "loss": 1.8911, "step": 1043 }, { "epoch": 0.82, "learning_rate": 1.986065772795227e-05, "loss": 2.1353, "step": 1044 }, { "epoch": 0.82, "learning_rate": 1.9860236145847186e-05, "loss": 2.3219, "step": 1045 }, { "epoch": 0.82, "learning_rate": 1.9859813931441632e-05, "loss": 1.9627, "step": 1046 }, { "epoch": 0.82, "learning_rate": 1.985939108476268e-05, "loss": 2.2711, "step": 1047 }, { "epoch": 0.82, "learning_rate": 1.9858967605837446e-05, "loss": 2.3868, "step": 1048 }, { "epoch": 0.82, "learning_rate": 1.985854349469309e-05, "loss": 2.2283, "step": 1049 }, { "epoch": 0.82, "learning_rate": 1.9858118751356806e-05, "loss": 1.8893, "step": 1050 }, { "epoch": 0.82, "learning_rate": 1.9857693375855826e-05, "loss": 2.2682, "step": 1051 }, { "epoch": 0.82, "learning_rate": 1.985726736821744e-05, "loss": 2.1647, "step": 1052 }, { "epoch": 0.82, "learning_rate": 1.9856840728468957e-05, "loss": 2.0663, "step": 1053 }, { "epoch": 0.82, "learning_rate": 1.9856413456637736e-05, "loss": 2.0646, "step": 1054 }, { "epoch": 0.82, "learning_rate": 1.985598555275118e-05, "loss": 2.3376, "step": 1055 }, { "epoch": 0.83, "learning_rate": 1.9855557016836727e-05, "loss": 2.4127, "step": 1056 }, { "epoch": 0.83, "learning_rate": 1.985512784892186e-05, "loss": 2.1709, "step": 1057 }, { "epoch": 0.83, "learning_rate": 1.98546980490341e-05, "loss": 2.3152, "step": 1058 }, { "epoch": 0.83, "learning_rate": 1.9854267617201003e-05, "loss": 2.1169, "step": 1059 }, { "epoch": 0.83, "learning_rate": 1.985383655345017e-05, "loss": 2.1672, "step": 1060 }, { "epoch": 0.83, "learning_rate": 1.985340485780926e-05, "loss": 2.4022, "step": 1061 }, { "epoch": 0.83, "learning_rate": 1.985297253030593e-05, "loss": 2.2116, "step": 1062 }, { "epoch": 0.83, "learning_rate": 1.9852539570967925e-05, "loss": 2.0325, "step": 1063 }, { "epoch": 0.83, "learning_rate": 1.9852105979823003e-05, "loss": 2.1512, "step": 1064 }, { "epoch": 0.83, "learning_rate": 1.9851671756898964e-05, "loss": 2.0764, "step": 1065 }, { "epoch": 0.83, "learning_rate": 1.9851236902223655e-05, "loss": 1.976, "step": 1066 }, { "epoch": 0.83, "learning_rate": 1.9850801415824966e-05, "loss": 2.2177, "step": 1067 }, { "epoch": 0.83, "learning_rate": 1.985036529773082e-05, "loss": 2.3228, "step": 1068 }, { "epoch": 0.84, "learning_rate": 1.9849928547969183e-05, "loss": 2.2635, "step": 1069 }, { "epoch": 0.84, "learning_rate": 1.984949116656806e-05, "loss": 2.2157, "step": 1070 }, { "epoch": 0.84, "learning_rate": 1.9849053153555507e-05, "loss": 2.285, "step": 1071 }, { "epoch": 0.84, "learning_rate": 1.9848614508959604e-05, "loss": 2.1799, "step": 1072 }, { "epoch": 0.84, "learning_rate": 1.984817523280848e-05, "loss": 2.2051, "step": 1073 }, { "epoch": 0.84, "learning_rate": 1.984773532513031e-05, "loss": 2.2609, "step": 1074 }, { "epoch": 0.84, "learning_rate": 1.9847294785953295e-05, "loss": 2.0824, "step": 1075 }, { "epoch": 0.84, "learning_rate": 1.9846853615305695e-05, "loss": 2.2377, "step": 1076 }, { "epoch": 0.84, "learning_rate": 1.9846411813215794e-05, "loss": 2.476, "step": 1077 }, { "epoch": 0.84, "learning_rate": 1.9845969379711925e-05, "loss": 2.278, "step": 1078 }, { "epoch": 0.84, "learning_rate": 1.984552631482246e-05, "loss": 2.1997, "step": 1079 }, { "epoch": 0.84, "learning_rate": 1.984508261857581e-05, "loss": 2.1575, "step": 1080 }, { "epoch": 0.84, "learning_rate": 1.984463829100043e-05, "loss": 2.0172, "step": 1081 }, { "epoch": 0.85, "learning_rate": 1.984419333212481e-05, "loss": 2.4061, "step": 1082 }, { "epoch": 0.85, "learning_rate": 1.9843747741977487e-05, "loss": 1.9373, "step": 1083 }, { "epoch": 0.85, "learning_rate": 1.9843301520587032e-05, "loss": 2.3833, "step": 1084 }, { "epoch": 0.85, "learning_rate": 1.9842854667982058e-05, "loss": 1.9982, "step": 1085 }, { "epoch": 0.85, "learning_rate": 1.9842407184191225e-05, "loss": 2.1621, "step": 1086 }, { "epoch": 0.85, "learning_rate": 1.9841959069243228e-05, "loss": 1.9836, "step": 1087 }, { "epoch": 0.85, "learning_rate": 1.9841510323166797e-05, "loss": 2.0203, "step": 1088 }, { "epoch": 0.85, "learning_rate": 1.9841060945990715e-05, "loss": 2.0744, "step": 1089 }, { "epoch": 0.85, "learning_rate": 1.9840610937743794e-05, "loss": 2.3578, "step": 1090 }, { "epoch": 0.85, "learning_rate": 1.9840160298454896e-05, "loss": 2.2098, "step": 1091 }, { "epoch": 0.85, "learning_rate": 1.9839709028152916e-05, "loss": 2.1596, "step": 1092 }, { "epoch": 0.85, "learning_rate": 1.9839257126866793e-05, "loss": 1.8895, "step": 1093 }, { "epoch": 0.86, "learning_rate": 1.9838804594625506e-05, "loss": 2.134, "step": 1094 }, { "epoch": 0.86, "learning_rate": 1.983835143145807e-05, "loss": 2.162, "step": 1095 }, { "epoch": 0.86, "learning_rate": 1.9837897637393554e-05, "loss": 2.0049, "step": 1096 }, { "epoch": 0.86, "learning_rate": 1.983744321246105e-05, "loss": 2.1285, "step": 1097 }, { "epoch": 0.86, "learning_rate": 1.98369881566897e-05, "loss": 2.2895, "step": 1098 }, { "epoch": 0.86, "learning_rate": 1.9836532470108686e-05, "loss": 2.0215, "step": 1099 }, { "epoch": 0.86, "learning_rate": 1.9836076152747236e-05, "loss": 2.3844, "step": 1100 }, { "epoch": 0.86, "learning_rate": 1.9835619204634597e-05, "loss": 1.9506, "step": 1101 }, { "epoch": 0.86, "learning_rate": 1.9835161625800087e-05, "loss": 1.8855, "step": 1102 }, { "epoch": 0.86, "learning_rate": 1.9834703416273037e-05, "loss": 2.1974, "step": 1103 }, { "epoch": 0.86, "learning_rate": 1.9834244576082836e-05, "loss": 2.4702, "step": 1104 }, { "epoch": 0.86, "learning_rate": 1.983378510525891e-05, "loss": 2.0958, "step": 1105 }, { "epoch": 0.86, "learning_rate": 1.983332500383072e-05, "loss": 2.4014, "step": 1106 }, { "epoch": 0.87, "learning_rate": 1.9832864271827766e-05, "loss": 2.0278, "step": 1107 }, { "epoch": 0.87, "learning_rate": 1.98324029092796e-05, "loss": 2.0787, "step": 1108 }, { "epoch": 0.87, "learning_rate": 1.983194091621581e-05, "loss": 2.0861, "step": 1109 }, { "epoch": 0.87, "learning_rate": 1.9831478292666012e-05, "loss": 2.3735, "step": 1110 }, { "epoch": 0.87, "learning_rate": 1.9831015038659876e-05, "loss": 1.9715, "step": 1111 }, { "epoch": 0.87, "learning_rate": 1.9830551154227114e-05, "loss": 2.1389, "step": 1112 }, { "epoch": 0.87, "learning_rate": 1.9830086639397467e-05, "loss": 2.3889, "step": 1113 }, { "epoch": 0.87, "learning_rate": 1.9829621494200727e-05, "loss": 2.1383, "step": 1114 }, { "epoch": 0.87, "learning_rate": 1.9829155718666717e-05, "loss": 1.9084, "step": 1115 }, { "epoch": 0.87, "learning_rate": 1.9828689312825312e-05, "loss": 1.961, "step": 1116 }, { "epoch": 0.87, "learning_rate": 1.9828222276706417e-05, "loss": 1.955, "step": 1117 }, { "epoch": 0.87, "learning_rate": 1.982775461033998e-05, "loss": 2.0952, "step": 1118 }, { "epoch": 0.87, "learning_rate": 1.9827286313755994e-05, "loss": 1.9538, "step": 1119 }, { "epoch": 0.88, "learning_rate": 1.9826817386984486e-05, "loss": 2.0538, "step": 1120 }, { "epoch": 0.88, "learning_rate": 1.982634783005553e-05, "loss": 2.3493, "step": 1121 }, { "epoch": 0.88, "learning_rate": 1.982587764299923e-05, "loss": 2.0681, "step": 1122 }, { "epoch": 0.88, "learning_rate": 1.9825406825845748e-05, "loss": 2.0162, "step": 1123 }, { "epoch": 0.88, "learning_rate": 1.982493537862527e-05, "loss": 2.1943, "step": 1124 }, { "epoch": 0.88, "learning_rate": 1.9824463301368026e-05, "loss": 2.1823, "step": 1125 }, { "epoch": 0.88, "learning_rate": 1.982399059410429e-05, "loss": 2.124, "step": 1126 }, { "epoch": 0.88, "learning_rate": 1.982351725686438e-05, "loss": 2.2697, "step": 1127 }, { "epoch": 0.88, "learning_rate": 1.9823043289678638e-05, "loss": 2.0657, "step": 1128 }, { "epoch": 0.88, "learning_rate": 1.9822568692577473e-05, "loss": 1.9337, "step": 1129 }, { "epoch": 0.88, "learning_rate": 1.9822093465591306e-05, "loss": 2.1367, "step": 1130 }, { "epoch": 0.88, "learning_rate": 1.982161760875062e-05, "loss": 2.3516, "step": 1131 }, { "epoch": 0.88, "learning_rate": 1.9821141122085926e-05, "loss": 2.0907, "step": 1132 }, { "epoch": 0.89, "learning_rate": 1.9820664005627778e-05, "loss": 2.3727, "step": 1133 }, { "epoch": 0.89, "learning_rate": 1.9820186259406773e-05, "loss": 2.2992, "step": 1134 }, { "epoch": 0.89, "learning_rate": 1.981970788345355e-05, "loss": 2.0174, "step": 1135 }, { "epoch": 0.89, "learning_rate": 1.981922887779878e-05, "loss": 2.2263, "step": 1136 }, { "epoch": 0.89, "learning_rate": 1.9818749242473188e-05, "loss": 2.3202, "step": 1137 }, { "epoch": 0.89, "learning_rate": 1.9818268977507523e-05, "loss": 2.2296, "step": 1138 }, { "epoch": 0.89, "learning_rate": 1.981778808293259e-05, "loss": 2.2253, "step": 1139 }, { "epoch": 0.89, "learning_rate": 1.981730655877922e-05, "loss": 2.5772, "step": 1140 }, { "epoch": 0.89, "learning_rate": 1.9816824405078293e-05, "loss": 2.2264, "step": 1141 }, { "epoch": 0.89, "learning_rate": 1.981634162186073e-05, "loss": 1.9245, "step": 1142 }, { "epoch": 0.89, "learning_rate": 1.981585820915749e-05, "loss": 2.1988, "step": 1143 }, { "epoch": 0.89, "learning_rate": 1.981537416699957e-05, "loss": 2.2392, "step": 1144 }, { "epoch": 0.89, "learning_rate": 1.9814889495418016e-05, "loss": 1.9625, "step": 1145 }, { "epoch": 0.9, "learning_rate": 1.9814404194443896e-05, "loss": 2.1421, "step": 1146 }, { "epoch": 0.9, "learning_rate": 1.9813918264108345e-05, "loss": 2.0642, "step": 1147 }, { "epoch": 0.9, "learning_rate": 1.9813431704442517e-05, "loss": 2.4646, "step": 1148 }, { "epoch": 0.9, "learning_rate": 1.981294451547761e-05, "loss": 2.3262, "step": 1149 }, { "epoch": 0.9, "learning_rate": 1.9812456697244867e-05, "loss": 2.089, "step": 1150 }, { "epoch": 0.9, "learning_rate": 1.9811968249775578e-05, "loss": 2.1603, "step": 1151 }, { "epoch": 0.9, "learning_rate": 1.9811479173101057e-05, "loss": 2.2219, "step": 1152 }, { "epoch": 0.9, "learning_rate": 1.9810989467252665e-05, "loss": 2.1096, "step": 1153 }, { "epoch": 0.9, "learning_rate": 1.9810499132261816e-05, "loss": 1.9287, "step": 1154 }, { "epoch": 0.9, "learning_rate": 1.981000816815994e-05, "loss": 2.0922, "step": 1155 }, { "epoch": 0.9, "learning_rate": 1.9809516574978532e-05, "loss": 2.0217, "step": 1156 }, { "epoch": 0.9, "learning_rate": 1.980902435274911e-05, "loss": 1.862, "step": 1157 }, { "epoch": 0.91, "learning_rate": 1.9808531501503238e-05, "loss": 2.0775, "step": 1158 }, { "epoch": 0.91, "learning_rate": 1.9808038021272522e-05, "loss": 1.8649, "step": 1159 }, { "epoch": 0.91, "learning_rate": 1.980754391208861e-05, "loss": 2.086, "step": 1160 }, { "epoch": 0.91, "learning_rate": 1.9807049173983182e-05, "loss": 2.1806, "step": 1161 }, { "epoch": 0.91, "learning_rate": 1.9806553806987966e-05, "loss": 2.2844, "step": 1162 }, { "epoch": 0.91, "learning_rate": 1.9806057811134728e-05, "loss": 2.1591, "step": 1163 }, { "epoch": 0.91, "learning_rate": 1.980556118645528e-05, "loss": 2.4286, "step": 1164 }, { "epoch": 0.91, "learning_rate": 1.9805063932981458e-05, "loss": 2.1383, "step": 1165 }, { "epoch": 0.91, "learning_rate": 1.9804566050745158e-05, "loss": 2.1059, "step": 1166 }, { "epoch": 0.91, "learning_rate": 1.9804067539778304e-05, "loss": 1.9007, "step": 1167 }, { "epoch": 0.91, "learning_rate": 1.9803568400112866e-05, "loss": 1.9984, "step": 1168 }, { "epoch": 0.91, "learning_rate": 1.9803068631780845e-05, "loss": 2.3127, "step": 1169 }, { "epoch": 0.91, "learning_rate": 1.9802568234814292e-05, "loss": 2.1446, "step": 1170 }, { "epoch": 0.92, "learning_rate": 1.9802067209245305e-05, "loss": 1.9021, "step": 1171 }, { "epoch": 0.92, "learning_rate": 1.9801565555105998e-05, "loss": 2.1355, "step": 1172 }, { "epoch": 0.92, "learning_rate": 1.9801063272428552e-05, "loss": 1.9654, "step": 1173 }, { "epoch": 0.92, "learning_rate": 1.980056036124517e-05, "loss": 2.1527, "step": 1174 }, { "epoch": 0.92, "learning_rate": 1.9800056821588108e-05, "loss": 2.2694, "step": 1175 }, { "epoch": 0.92, "learning_rate": 1.9799552653489647e-05, "loss": 2.2883, "step": 1176 }, { "epoch": 0.92, "learning_rate": 1.9799047856982125e-05, "loss": 2.0559, "step": 1177 }, { "epoch": 0.92, "learning_rate": 1.9798542432097912e-05, "loss": 2.1624, "step": 1178 }, { "epoch": 0.92, "learning_rate": 1.9798036378869415e-05, "loss": 2.5119, "step": 1179 }, { "epoch": 0.92, "learning_rate": 1.9797529697329087e-05, "loss": 2.0807, "step": 1180 }, { "epoch": 0.92, "learning_rate": 1.9797022387509422e-05, "loss": 2.4843, "step": 1181 }, { "epoch": 0.92, "learning_rate": 1.979651444944295e-05, "loss": 1.7787, "step": 1182 }, { "epoch": 0.92, "learning_rate": 1.9796005883162245e-05, "loss": 1.8245, "step": 1183 }, { "epoch": 0.93, "learning_rate": 1.9795496688699914e-05, "loss": 1.8777, "step": 1184 }, { "epoch": 0.93, "learning_rate": 1.9794986866088615e-05, "loss": 2.097, "step": 1185 }, { "epoch": 0.93, "learning_rate": 1.979447641536104e-05, "loss": 2.217, "step": 1186 }, { "epoch": 0.93, "learning_rate": 1.9793965336549922e-05, "loss": 1.9813, "step": 1187 }, { "epoch": 0.93, "learning_rate": 1.9793453629688034e-05, "loss": 1.8438, "step": 1188 }, { "epoch": 0.93, "learning_rate": 1.979294129480819e-05, "loss": 2.0851, "step": 1189 }, { "epoch": 0.93, "learning_rate": 1.9792428331943247e-05, "loss": 2.4829, "step": 1190 }, { "epoch": 0.93, "learning_rate": 1.9791914741126095e-05, "loss": 2.4167, "step": 1191 }, { "epoch": 0.93, "learning_rate": 1.9791400522389672e-05, "loss": 2.2482, "step": 1192 }, { "epoch": 0.93, "learning_rate": 1.979088567576695e-05, "loss": 2.0206, "step": 1193 }, { "epoch": 0.93, "learning_rate": 1.979037020129095e-05, "loss": 2.5517, "step": 1194 }, { "epoch": 0.93, "learning_rate": 1.978985409899472e-05, "loss": 2.153, "step": 1195 }, { "epoch": 0.93, "learning_rate": 1.9789337368911358e-05, "loss": 2.1128, "step": 1196 }, { "epoch": 0.94, "learning_rate": 1.9788820011074e-05, "loss": 2.0211, "step": 1197 }, { "epoch": 0.94, "learning_rate": 1.9788302025515828e-05, "loss": 2.2267, "step": 1198 }, { "epoch": 0.94, "learning_rate": 1.9787783412270052e-05, "loss": 2.0547, "step": 1199 }, { "epoch": 0.94, "learning_rate": 1.9787264171369928e-05, "loss": 2.0524, "step": 1200 }, { "epoch": 0.94, "learning_rate": 1.9786744302848758e-05, "loss": 2.3429, "step": 1201 }, { "epoch": 0.94, "learning_rate": 1.9786223806739875e-05, "loss": 2.2532, "step": 1202 }, { "epoch": 0.94, "learning_rate": 1.978570268307666e-05, "loss": 2.1976, "step": 1203 }, { "epoch": 0.94, "learning_rate": 1.9785180931892527e-05, "loss": 2.3845, "step": 1204 }, { "epoch": 0.94, "learning_rate": 1.9784658553220937e-05, "loss": 2.1754, "step": 1205 }, { "epoch": 0.94, "learning_rate": 1.9784135547095387e-05, "loss": 2.3721, "step": 1206 }, { "epoch": 0.94, "learning_rate": 1.9783611913549413e-05, "loss": 2.3179, "step": 1207 }, { "epoch": 0.94, "learning_rate": 1.97830876526166e-05, "loss": 2.0153, "step": 1208 }, { "epoch": 0.94, "learning_rate": 1.9782562764330563e-05, "loss": 2.4955, "step": 1209 }, { "epoch": 0.95, "learning_rate": 1.9782037248724963e-05, "loss": 2.4286, "step": 1210 }, { "epoch": 0.95, "learning_rate": 1.9781511105833494e-05, "loss": 2.0267, "step": 1211 }, { "epoch": 0.95, "learning_rate": 1.97809843356899e-05, "loss": 2.2903, "step": 1212 }, { "epoch": 0.95, "learning_rate": 1.9780456938327962e-05, "loss": 2.3458, "step": 1213 }, { "epoch": 0.95, "learning_rate": 1.9779928913781495e-05, "loss": 2.2337, "step": 1214 }, { "epoch": 0.95, "learning_rate": 1.9779400262084366e-05, "loss": 2.2242, "step": 1215 }, { "epoch": 0.95, "learning_rate": 1.977887098327047e-05, "loss": 1.948, "step": 1216 }, { "epoch": 0.95, "learning_rate": 1.9778341077373748e-05, "loss": 1.9582, "step": 1217 }, { "epoch": 0.95, "learning_rate": 1.9777810544428187e-05, "loss": 2.0764, "step": 1218 }, { "epoch": 0.95, "learning_rate": 1.9777279384467798e-05, "loss": 2.1165, "step": 1219 }, { "epoch": 0.95, "learning_rate": 1.9776747597526652e-05, "loss": 1.9611, "step": 1220 }, { "epoch": 0.95, "learning_rate": 1.9776215183638843e-05, "loss": 1.9581, "step": 1221 }, { "epoch": 0.96, "learning_rate": 1.9775682142838522e-05, "loss": 2.1279, "step": 1222 }, { "epoch": 0.96, "learning_rate": 1.9775148475159863e-05, "loss": 2.2186, "step": 1223 }, { "epoch": 0.96, "learning_rate": 1.9774614180637085e-05, "loss": 2.2273, "step": 1224 }, { "epoch": 0.96, "learning_rate": 1.9774079259304463e-05, "loss": 2.0796, "step": 1225 }, { "epoch": 0.96, "learning_rate": 1.9773543711196287e-05, "loss": 1.8121, "step": 1226 }, { "epoch": 0.96, "learning_rate": 1.9773007536346908e-05, "loss": 2.1117, "step": 1227 }, { "epoch": 0.96, "learning_rate": 1.9772470734790703e-05, "loss": 2.2885, "step": 1228 }, { "epoch": 0.96, "learning_rate": 1.9771933306562098e-05, "loss": 2.2653, "step": 1229 }, { "epoch": 0.96, "learning_rate": 1.9771395251695557e-05, "loss": 2.4899, "step": 1230 }, { "epoch": 0.96, "learning_rate": 1.9770856570225586e-05, "loss": 2.0629, "step": 1231 }, { "epoch": 0.96, "learning_rate": 1.9770317262186722e-05, "loss": 2.4612, "step": 1232 }, { "epoch": 0.96, "learning_rate": 1.9769777327613552e-05, "loss": 2.1214, "step": 1233 }, { "epoch": 0.96, "learning_rate": 1.97692367665407e-05, "loss": 2.3508, "step": 1234 }, { "epoch": 0.97, "learning_rate": 1.976869557900283e-05, "loss": 1.9869, "step": 1235 }, { "epoch": 0.97, "learning_rate": 1.976815376503465e-05, "loss": 2.1649, "step": 1236 }, { "epoch": 0.97, "learning_rate": 1.9767611324670897e-05, "loss": 2.1986, "step": 1237 }, { "epoch": 0.97, "learning_rate": 1.976706825794636e-05, "loss": 2.1749, "step": 1238 }, { "epoch": 0.97, "learning_rate": 1.9766524564895868e-05, "loss": 2.4078, "step": 1239 }, { "epoch": 0.97, "learning_rate": 1.9765980245554278e-05, "loss": 2.2716, "step": 1240 }, { "epoch": 0.97, "learning_rate": 1.97654352999565e-05, "loss": 1.8812, "step": 1241 }, { "epoch": 0.97, "learning_rate": 1.9764889728137478e-05, "loss": 2.2164, "step": 1242 }, { "epoch": 0.97, "learning_rate": 1.9764343530132196e-05, "loss": 2.0325, "step": 1243 }, { "epoch": 0.97, "learning_rate": 1.9763796705975684e-05, "loss": 2.3162, "step": 1244 }, { "epoch": 0.97, "learning_rate": 1.9763249255703006e-05, "loss": 2.1837, "step": 1245 }, { "epoch": 0.97, "learning_rate": 1.9762701179349265e-05, "loss": 1.9839, "step": 1246 }, { "epoch": 0.97, "learning_rate": 1.976215247694961e-05, "loss": 2.1779, "step": 1247 }, { "epoch": 0.98, "learning_rate": 1.9761603148539227e-05, "loss": 1.9715, "step": 1248 }, { "epoch": 0.98, "learning_rate": 1.9761053194153342e-05, "loss": 1.8218, "step": 1249 }, { "epoch": 0.98, "learning_rate": 1.976050261382722e-05, "loss": 2.1356, "step": 1250 }, { "epoch": 0.98, "learning_rate": 1.975995140759617e-05, "loss": 2.0262, "step": 1251 }, { "epoch": 0.98, "learning_rate": 1.975939957549554e-05, "loss": 2.4145, "step": 1252 }, { "epoch": 0.98, "learning_rate": 1.9758847117560712e-05, "loss": 2.1878, "step": 1253 }, { "epoch": 0.98, "learning_rate": 1.975829403382712e-05, "loss": 2.0749, "step": 1254 }, { "epoch": 0.98, "learning_rate": 1.975774032433022e-05, "loss": 2.1976, "step": 1255 }, { "epoch": 0.98, "learning_rate": 1.9757185989105533e-05, "loss": 2.1375, "step": 1256 }, { "epoch": 0.98, "learning_rate": 1.9756631028188597e-05, "loss": 2.0815, "step": 1257 }, { "epoch": 0.98, "learning_rate": 1.9756075441615007e-05, "loss": 2.0705, "step": 1258 }, { "epoch": 0.98, "learning_rate": 1.9755519229420384e-05, "loss": 2.1645, "step": 1259 }, { "epoch": 0.98, "learning_rate": 1.9754962391640397e-05, "loss": 2.4649, "step": 1260 }, { "epoch": 0.99, "learning_rate": 1.9754404928310755e-05, "loss": 2.0229, "step": 1261 }, { "epoch": 0.99, "learning_rate": 1.9753846839467208e-05, "loss": 2.1163, "step": 1262 }, { "epoch": 0.99, "learning_rate": 1.975328812514554e-05, "loss": 2.1727, "step": 1263 }, { "epoch": 0.99, "learning_rate": 1.9752728785381584e-05, "loss": 1.9359, "step": 1264 }, { "epoch": 0.99, "learning_rate": 1.9752168820211205e-05, "loss": 2.0653, "step": 1265 }, { "epoch": 0.99, "learning_rate": 1.975160822967031e-05, "loss": 2.1015, "step": 1266 }, { "epoch": 0.99, "learning_rate": 1.9751047013794853e-05, "loss": 2.2216, "step": 1267 }, { "epoch": 0.99, "learning_rate": 1.9750485172620818e-05, "loss": 2.1235, "step": 1268 }, { "epoch": 0.99, "learning_rate": 1.974992270618424e-05, "loss": 2.4453, "step": 1269 }, { "epoch": 0.99, "learning_rate": 1.974935961452118e-05, "loss": 2.2263, "step": 1270 }, { "epoch": 0.99, "learning_rate": 1.9748795897667753e-05, "loss": 2.0817, "step": 1271 }, { "epoch": 0.99, "learning_rate": 1.97482315556601e-05, "loss": 2.269, "step": 1272 }, { "epoch": 0.99, "learning_rate": 1.9747666588534424e-05, "loss": 2.3248, "step": 1273 }, { "epoch": 1.0, "learning_rate": 1.974710099632694e-05, "loss": 1.9498, "step": 1274 }, { "epoch": 1.0, "learning_rate": 1.9746534779073926e-05, "loss": 2.5543, "step": 1275 }, { "epoch": 1.0, "learning_rate": 1.9745967936811684e-05, "loss": 2.1391, "step": 1276 }, { "epoch": 1.0, "learning_rate": 1.9745400469576576e-05, "loss": 2.1191, "step": 1277 }, { "epoch": 1.0, "learning_rate": 1.9744832377404978e-05, "loss": 2.3779, "step": 1278 }, { "epoch": 1.0, "learning_rate": 1.974426366033333e-05, "loss": 2.3616, "step": 1279 }, { "epoch": 1.0, "learning_rate": 1.974369431839809e-05, "loss": 2.2086, "step": 1280 }, { "epoch": 1.0, "learning_rate": 1.974312435163578e-05, "loss": 1.6231, "step": 1281 }, { "epoch": 1.0, "learning_rate": 1.9742553760082946e-05, "loss": 1.9083, "step": 1282 }, { "epoch": 1.0, "learning_rate": 1.9741982543776174e-05, "loss": 1.9895, "step": 1283 }, { "epoch": 1.0, "learning_rate": 1.9741410702752097e-05, "loss": 1.8935, "step": 1284 }, { "epoch": 1.0, "learning_rate": 1.9740838237047388e-05, "loss": 1.8117, "step": 1285 }, { "epoch": 1.01, "learning_rate": 1.9740265146698753e-05, "loss": 1.6714, "step": 1286 }, { "epoch": 1.01, "learning_rate": 1.973969143174294e-05, "loss": 1.796, "step": 1287 }, { "epoch": 1.01, "learning_rate": 1.9739117092216743e-05, "loss": 1.589, "step": 1288 }, { "epoch": 1.01, "learning_rate": 1.9738542128156995e-05, "loss": 1.7285, "step": 1289 }, { "epoch": 1.01, "learning_rate": 1.973796653960056e-05, "loss": 1.8519, "step": 1290 }, { "epoch": 1.01, "learning_rate": 1.9737390326584352e-05, "loss": 1.9706, "step": 1291 }, { "epoch": 1.01, "learning_rate": 1.973681348914532e-05, "loss": 1.8692, "step": 1292 }, { "epoch": 1.01, "learning_rate": 1.9736236027320456e-05, "loss": 1.7224, "step": 1293 }, { "epoch": 1.01, "learning_rate": 1.973565794114679e-05, "loss": 1.7994, "step": 1294 }, { "epoch": 1.01, "learning_rate": 1.973507923066139e-05, "loss": 2.084, "step": 1295 }, { "epoch": 1.01, "learning_rate": 1.973449989590137e-05, "loss": 1.771, "step": 1296 }, { "epoch": 1.01, "learning_rate": 1.973391993690388e-05, "loss": 1.7895, "step": 1297 }, { "epoch": 1.01, "learning_rate": 1.973333935370611e-05, "loss": 1.8177, "step": 1298 }, { "epoch": 1.02, "learning_rate": 1.973275814634529e-05, "loss": 1.9238, "step": 1299 }, { "epoch": 1.02, "learning_rate": 1.9732176314858692e-05, "loss": 1.8848, "step": 1300 }, { "epoch": 1.02, "learning_rate": 1.9731593859283626e-05, "loss": 1.8196, "step": 1301 }, { "epoch": 1.02, "learning_rate": 1.973101077965744e-05, "loss": 2.0784, "step": 1302 }, { "epoch": 1.02, "learning_rate": 1.9730427076017528e-05, "loss": 2.0611, "step": 1303 }, { "epoch": 1.02, "learning_rate": 1.9729842748401324e-05, "loss": 1.81, "step": 1304 }, { "epoch": 1.02, "learning_rate": 1.972925779684629e-05, "loss": 1.9628, "step": 1305 }, { "epoch": 1.02, "learning_rate": 1.9728672221389946e-05, "loss": 1.9393, "step": 1306 }, { "epoch": 1.02, "learning_rate": 1.9728086022069837e-05, "loss": 2.0149, "step": 1307 }, { "epoch": 1.02, "learning_rate": 1.9727499198923558e-05, "loss": 1.9186, "step": 1308 }, { "epoch": 1.02, "learning_rate": 1.972691175198873e-05, "loss": 1.7131, "step": 1309 }, { "epoch": 1.02, "learning_rate": 1.9726323681303037e-05, "loss": 1.9197, "step": 1310 }, { "epoch": 1.02, "learning_rate": 1.9725734986904182e-05, "loss": 2.2318, "step": 1311 }, { "epoch": 1.03, "learning_rate": 1.9725145668829916e-05, "loss": 1.7981, "step": 1312 }, { "epoch": 1.03, "learning_rate": 1.9724555727118033e-05, "loss": 1.8879, "step": 1313 }, { "epoch": 1.03, "learning_rate": 1.9723965161806363e-05, "loss": 1.9347, "step": 1314 }, { "epoch": 1.03, "learning_rate": 1.9723373972932775e-05, "loss": 1.9075, "step": 1315 }, { "epoch": 1.03, "learning_rate": 1.9722782160535177e-05, "loss": 1.8799, "step": 1316 }, { "epoch": 1.03, "learning_rate": 1.972218972465153e-05, "loss": 1.9466, "step": 1317 }, { "epoch": 1.03, "learning_rate": 1.9721596665319813e-05, "loss": 1.8576, "step": 1318 }, { "epoch": 1.03, "learning_rate": 1.9721002982578065e-05, "loss": 1.7088, "step": 1319 }, { "epoch": 1.03, "learning_rate": 1.972040867646435e-05, "loss": 1.7258, "step": 1320 }, { "epoch": 1.03, "learning_rate": 1.9719813747016784e-05, "loss": 1.6966, "step": 1321 }, { "epoch": 1.03, "learning_rate": 1.9719218194273513e-05, "loss": 1.9489, "step": 1322 }, { "epoch": 1.03, "learning_rate": 1.9718622018272735e-05, "loss": 1.8528, "step": 1323 }, { "epoch": 1.03, "learning_rate": 1.9718025219052673e-05, "loss": 1.7522, "step": 1324 }, { "epoch": 1.04, "learning_rate": 1.97174277966516e-05, "loss": 1.999, "step": 1325 }, { "epoch": 1.04, "learning_rate": 1.9716829751107828e-05, "loss": 1.8902, "step": 1326 }, { "epoch": 1.04, "learning_rate": 1.971623108245971e-05, "loss": 1.6749, "step": 1327 }, { "epoch": 1.04, "learning_rate": 1.9715631790745625e-05, "loss": 2.2144, "step": 1328 }, { "epoch": 1.04, "learning_rate": 1.9715031876004017e-05, "loss": 1.6048, "step": 1329 }, { "epoch": 1.04, "learning_rate": 1.971443133827335e-05, "loss": 1.8422, "step": 1330 }, { "epoch": 1.04, "learning_rate": 1.9713830177592132e-05, "loss": 1.5185, "step": 1331 }, { "epoch": 1.04, "learning_rate": 1.9713228393998917e-05, "loss": 1.8999, "step": 1332 }, { "epoch": 1.04, "learning_rate": 1.971262598753229e-05, "loss": 2.0644, "step": 1333 }, { "epoch": 1.04, "learning_rate": 1.971202295823089e-05, "loss": 1.6165, "step": 1334 }, { "epoch": 1.04, "learning_rate": 1.9711419306133385e-05, "loss": 1.8674, "step": 1335 }, { "epoch": 1.04, "learning_rate": 1.971081503127848e-05, "loss": 1.6207, "step": 1336 }, { "epoch": 1.04, "learning_rate": 1.9710210133704924e-05, "loss": 2.0939, "step": 1337 }, { "epoch": 1.05, "learning_rate": 1.9709604613451512e-05, "loss": 1.786, "step": 1338 }, { "epoch": 1.05, "learning_rate": 1.9708998470557074e-05, "loss": 1.8945, "step": 1339 }, { "epoch": 1.05, "learning_rate": 1.970839170506047e-05, "loss": 1.7745, "step": 1340 }, { "epoch": 1.05, "learning_rate": 1.970778431700062e-05, "loss": 1.953, "step": 1341 }, { "epoch": 1.05, "learning_rate": 1.9707176306416477e-05, "loss": 1.6682, "step": 1342 }, { "epoch": 1.05, "learning_rate": 1.9706567673347017e-05, "loss": 1.9575, "step": 1343 }, { "epoch": 1.05, "learning_rate": 1.9705958417831282e-05, "loss": 1.6184, "step": 1344 }, { "epoch": 1.05, "learning_rate": 1.9705348539908332e-05, "loss": 1.5804, "step": 1345 }, { "epoch": 1.05, "learning_rate": 1.9704738039617278e-05, "loss": 1.8934, "step": 1346 }, { "epoch": 1.05, "learning_rate": 1.9704126916997273e-05, "loss": 2.166, "step": 1347 }, { "epoch": 1.05, "learning_rate": 1.9703515172087506e-05, "loss": 1.6707, "step": 1348 }, { "epoch": 1.05, "learning_rate": 1.9702902804927203e-05, "loss": 2.0192, "step": 1349 }, { "epoch": 1.06, "learning_rate": 1.9702289815555634e-05, "loss": 1.7461, "step": 1350 }, { "epoch": 1.06, "learning_rate": 1.9701676204012106e-05, "loss": 2.0644, "step": 1351 }, { "epoch": 1.06, "learning_rate": 1.970106197033597e-05, "loss": 1.9728, "step": 1352 }, { "epoch": 1.06, "learning_rate": 1.9700447114566614e-05, "loss": 1.8763, "step": 1353 }, { "epoch": 1.06, "learning_rate": 1.9699831636743468e-05, "loss": 1.7052, "step": 1354 }, { "epoch": 1.06, "learning_rate": 1.9699215536905996e-05, "loss": 1.5837, "step": 1355 }, { "epoch": 1.06, "learning_rate": 1.969859881509371e-05, "loss": 2.2079, "step": 1356 }, { "epoch": 1.06, "learning_rate": 1.9697981471346156e-05, "loss": 1.7549, "step": 1357 }, { "epoch": 1.06, "learning_rate": 1.9697363505702923e-05, "loss": 2.1183, "step": 1358 }, { "epoch": 1.06, "learning_rate": 1.9696744918203638e-05, "loss": 1.9572, "step": 1359 }, { "epoch": 1.06, "learning_rate": 1.969612570888797e-05, "loss": 1.8463, "step": 1360 }, { "epoch": 1.06, "learning_rate": 1.9695505877795627e-05, "loss": 2.1258, "step": 1361 }, { "epoch": 1.06, "learning_rate": 1.9694885424966356e-05, "loss": 1.6697, "step": 1362 }, { "epoch": 1.07, "learning_rate": 1.9694264350439943e-05, "loss": 2.0954, "step": 1363 }, { "epoch": 1.07, "learning_rate": 1.9693642654256213e-05, "loss": 1.9643, "step": 1364 }, { "epoch": 1.07, "learning_rate": 1.9693020336455037e-05, "loss": 1.6474, "step": 1365 }, { "epoch": 1.07, "learning_rate": 1.9692397397076326e-05, "loss": 1.8906, "step": 1366 }, { "epoch": 1.07, "learning_rate": 1.969177383616002e-05, "loss": 1.9317, "step": 1367 }, { "epoch": 1.07, "learning_rate": 1.9691149653746103e-05, "loss": 1.6161, "step": 1368 }, { "epoch": 1.07, "learning_rate": 1.969052484987461e-05, "loss": 1.7639, "step": 1369 }, { "epoch": 1.07, "learning_rate": 1.9689899424585602e-05, "loss": 1.6403, "step": 1370 }, { "epoch": 1.07, "learning_rate": 1.9689273377919183e-05, "loss": 2.1175, "step": 1371 }, { "epoch": 1.07, "learning_rate": 1.968864670991551e-05, "loss": 1.9107, "step": 1372 }, { "epoch": 1.07, "learning_rate": 1.9688019420614756e-05, "loss": 1.7518, "step": 1373 }, { "epoch": 1.07, "learning_rate": 1.9687391510057155e-05, "loss": 2.0012, "step": 1374 }, { "epoch": 1.07, "learning_rate": 1.968676297828297e-05, "loss": 1.7882, "step": 1375 }, { "epoch": 1.08, "learning_rate": 1.9686133825332507e-05, "loss": 1.6715, "step": 1376 }, { "epoch": 1.08, "learning_rate": 1.9685504051246107e-05, "loss": 1.8833, "step": 1377 }, { "epoch": 1.08, "learning_rate": 1.968487365606416e-05, "loss": 2.2088, "step": 1378 }, { "epoch": 1.08, "learning_rate": 1.9684242639827088e-05, "loss": 1.7218, "step": 1379 }, { "epoch": 1.08, "learning_rate": 1.968361100257536e-05, "loss": 1.9268, "step": 1380 }, { "epoch": 1.08, "learning_rate": 1.968297874434948e-05, "loss": 2.0367, "step": 1381 }, { "epoch": 1.08, "learning_rate": 1.9682345865189986e-05, "loss": 1.823, "step": 1382 }, { "epoch": 1.08, "learning_rate": 1.9681712365137466e-05, "loss": 1.9817, "step": 1383 }, { "epoch": 1.08, "learning_rate": 1.9681078244232545e-05, "loss": 2.0348, "step": 1384 }, { "epoch": 1.08, "learning_rate": 1.968044350251589e-05, "loss": 1.7702, "step": 1385 }, { "epoch": 1.08, "learning_rate": 1.96798081400282e-05, "loss": 1.7417, "step": 1386 }, { "epoch": 1.08, "learning_rate": 1.9679172156810216e-05, "loss": 1.9397, "step": 1387 }, { "epoch": 1.08, "learning_rate": 1.9678535552902725e-05, "loss": 1.7551, "step": 1388 }, { "epoch": 1.09, "learning_rate": 1.967789832834655e-05, "loss": 2.1155, "step": 1389 }, { "epoch": 1.09, "learning_rate": 1.967726048318255e-05, "loss": 1.8881, "step": 1390 }, { "epoch": 1.09, "learning_rate": 1.9676622017451637e-05, "loss": 2.175, "step": 1391 }, { "epoch": 1.09, "learning_rate": 1.9675982931194747e-05, "loss": 1.6815, "step": 1392 }, { "epoch": 1.09, "learning_rate": 1.9675343224452864e-05, "loss": 1.8743, "step": 1393 }, { "epoch": 1.09, "learning_rate": 1.9674702897267004e-05, "loss": 1.9093, "step": 1394 }, { "epoch": 1.09, "learning_rate": 1.9674061949678235e-05, "loss": 1.8845, "step": 1395 }, { "epoch": 1.09, "learning_rate": 1.9673420381727658e-05, "loss": 1.7027, "step": 1396 }, { "epoch": 1.09, "learning_rate": 1.9672778193456415e-05, "loss": 1.7649, "step": 1397 }, { "epoch": 1.09, "learning_rate": 1.9672135384905683e-05, "loss": 1.8492, "step": 1398 }, { "epoch": 1.09, "learning_rate": 1.9671491956116688e-05, "loss": 1.7418, "step": 1399 }, { "epoch": 1.09, "learning_rate": 1.9670847907130687e-05, "loss": 1.6384, "step": 1400 }, { "epoch": 1.09, "learning_rate": 1.9670203237988982e-05, "loss": 1.888, "step": 1401 }, { "epoch": 1.1, "learning_rate": 1.9669557948732915e-05, "loss": 2.1357, "step": 1402 }, { "epoch": 1.1, "learning_rate": 1.9668912039403864e-05, "loss": 2.1574, "step": 1403 }, { "epoch": 1.1, "learning_rate": 1.9668265510043248e-05, "loss": 2.1109, "step": 1404 }, { "epoch": 1.1, "learning_rate": 1.9667618360692525e-05, "loss": 2.1853, "step": 1405 }, { "epoch": 1.1, "learning_rate": 1.96669705913932e-05, "loss": 1.6043, "step": 1406 }, { "epoch": 1.1, "learning_rate": 1.966632220218681e-05, "loss": 1.6725, "step": 1407 }, { "epoch": 1.1, "learning_rate": 1.966567319311493e-05, "loss": 1.6934, "step": 1408 }, { "epoch": 1.1, "learning_rate": 1.9665023564219182e-05, "loss": 1.8164, "step": 1409 }, { "epoch": 1.1, "learning_rate": 1.966437331554122e-05, "loss": 1.9348, "step": 1410 }, { "epoch": 1.1, "learning_rate": 1.966372244712275e-05, "loss": 1.866, "step": 1411 }, { "epoch": 1.1, "learning_rate": 1.9663070959005505e-05, "loss": 1.7386, "step": 1412 }, { "epoch": 1.1, "learning_rate": 1.9662418851231262e-05, "loss": 1.788, "step": 1413 }, { "epoch": 1.11, "learning_rate": 1.966176612384184e-05, "loss": 1.8367, "step": 1414 }, { "epoch": 1.11, "learning_rate": 1.9661112776879093e-05, "loss": 1.7723, "step": 1415 }, { "epoch": 1.11, "learning_rate": 1.966045881038492e-05, "loss": 2.0257, "step": 1416 }, { "epoch": 1.11, "learning_rate": 1.965980422440126e-05, "loss": 1.912, "step": 1417 }, { "epoch": 1.11, "learning_rate": 1.9659149018970082e-05, "loss": 2.0657, "step": 1418 }, { "epoch": 1.11, "learning_rate": 1.9658493194133408e-05, "loss": 2.1179, "step": 1419 }, { "epoch": 1.11, "learning_rate": 1.9657836749933293e-05, "loss": 1.8701, "step": 1420 }, { "epoch": 1.11, "learning_rate": 1.9657179686411832e-05, "loss": 2.0639, "step": 1421 }, { "epoch": 1.11, "learning_rate": 1.965652200361116e-05, "loss": 2.1919, "step": 1422 }, { "epoch": 1.11, "learning_rate": 1.9655863701573448e-05, "loss": 1.8233, "step": 1423 }, { "epoch": 1.11, "learning_rate": 1.9655204780340916e-05, "loss": 1.7413, "step": 1424 }, { "epoch": 1.11, "learning_rate": 1.9654545239955814e-05, "loss": 1.6275, "step": 1425 }, { "epoch": 1.11, "learning_rate": 1.965388508046044e-05, "loss": 1.9894, "step": 1426 }, { "epoch": 1.12, "learning_rate": 1.965322430189712e-05, "loss": 1.8101, "step": 1427 }, { "epoch": 1.12, "learning_rate": 1.9652562904308236e-05, "loss": 2.0838, "step": 1428 }, { "epoch": 1.12, "learning_rate": 1.9651900887736198e-05, "loss": 1.899, "step": 1429 }, { "epoch": 1.12, "learning_rate": 1.965123825222346e-05, "loss": 1.9706, "step": 1430 }, { "epoch": 1.12, "learning_rate": 1.965057499781251e-05, "loss": 1.8526, "step": 1431 }, { "epoch": 1.12, "learning_rate": 1.9649911124545885e-05, "loss": 1.945, "step": 1432 }, { "epoch": 1.12, "learning_rate": 1.964924663246615e-05, "loss": 2.1006, "step": 1433 }, { "epoch": 1.12, "learning_rate": 1.9648581521615925e-05, "loss": 2.1477, "step": 1434 }, { "epoch": 1.12, "learning_rate": 1.9647915792037852e-05, "loss": 1.5158, "step": 1435 }, { "epoch": 1.12, "learning_rate": 1.964724944377463e-05, "loss": 1.6656, "step": 1436 }, { "epoch": 1.12, "learning_rate": 1.964658247686899e-05, "loss": 1.9768, "step": 1437 }, { "epoch": 1.12, "learning_rate": 1.964591489136369e-05, "loss": 1.9272, "step": 1438 }, { "epoch": 1.12, "learning_rate": 1.964524668730155e-05, "loss": 1.859, "step": 1439 }, { "epoch": 1.13, "learning_rate": 1.964457786472542e-05, "loss": 1.8735, "step": 1440 }, { "epoch": 1.13, "learning_rate": 1.9643908423678188e-05, "loss": 2.1497, "step": 1441 }, { "epoch": 1.13, "learning_rate": 1.9643238364202777e-05, "loss": 1.7182, "step": 1442 }, { "epoch": 1.13, "learning_rate": 1.964256768634216e-05, "loss": 1.6211, "step": 1443 }, { "epoch": 1.13, "learning_rate": 1.964189639013935e-05, "loss": 1.899, "step": 1444 }, { "epoch": 1.13, "learning_rate": 1.9641224475637382e-05, "loss": 1.828, "step": 1445 }, { "epoch": 1.13, "learning_rate": 1.964055194287936e-05, "loss": 1.9046, "step": 1446 }, { "epoch": 1.13, "learning_rate": 1.9639878791908397e-05, "loss": 1.9015, "step": 1447 }, { "epoch": 1.13, "learning_rate": 1.9639205022767663e-05, "loss": 1.7647, "step": 1448 }, { "epoch": 1.13, "learning_rate": 1.9638530635500367e-05, "loss": 2.0908, "step": 1449 }, { "epoch": 1.13, "learning_rate": 1.9637855630149755e-05, "loss": 2.0623, "step": 1450 }, { "epoch": 1.13, "learning_rate": 1.9637180006759114e-05, "loss": 1.8386, "step": 1451 }, { "epoch": 1.13, "learning_rate": 1.963650376537176e-05, "loss": 2.0162, "step": 1452 }, { "epoch": 1.14, "learning_rate": 1.9635826906031073e-05, "loss": 1.9595, "step": 1453 }, { "epoch": 1.14, "learning_rate": 1.9635149428780443e-05, "loss": 1.777, "step": 1454 }, { "epoch": 1.14, "learning_rate": 1.9634471333663324e-05, "loss": 2.0194, "step": 1455 }, { "epoch": 1.14, "learning_rate": 1.9633792620723192e-05, "loss": 1.9487, "step": 1456 }, { "epoch": 1.14, "learning_rate": 1.963311329000358e-05, "loss": 1.9298, "step": 1457 }, { "epoch": 1.14, "learning_rate": 1.9632433341548044e-05, "loss": 1.7967, "step": 1458 }, { "epoch": 1.14, "learning_rate": 1.9631752775400183e-05, "loss": 1.8766, "step": 1459 }, { "epoch": 1.14, "learning_rate": 1.9631071591603646e-05, "loss": 1.8641, "step": 1460 }, { "epoch": 1.14, "learning_rate": 1.9630389790202116e-05, "loss": 1.6978, "step": 1461 }, { "epoch": 1.14, "learning_rate": 1.962970737123931e-05, "loss": 2.1171, "step": 1462 }, { "epoch": 1.14, "learning_rate": 1.962902433475899e-05, "loss": 1.8339, "step": 1463 }, { "epoch": 1.14, "learning_rate": 1.9628340680804957e-05, "loss": 1.8675, "step": 1464 }, { "epoch": 1.14, "learning_rate": 1.962765640942105e-05, "loss": 1.8521, "step": 1465 }, { "epoch": 1.15, "learning_rate": 1.9626971520651147e-05, "loss": 1.796, "step": 1466 }, { "epoch": 1.15, "learning_rate": 1.9626286014539175e-05, "loss": 1.7547, "step": 1467 }, { "epoch": 1.15, "learning_rate": 1.9625599891129087e-05, "loss": 1.7221, "step": 1468 }, { "epoch": 1.15, "learning_rate": 1.9624913150464884e-05, "loss": 2.2033, "step": 1469 }, { "epoch": 1.15, "learning_rate": 1.9624225792590602e-05, "loss": 1.8346, "step": 1470 }, { "epoch": 1.15, "learning_rate": 1.9623537817550316e-05, "loss": 1.9721, "step": 1471 }, { "epoch": 1.15, "learning_rate": 1.962284922538815e-05, "loss": 1.8823, "step": 1472 }, { "epoch": 1.15, "learning_rate": 1.962216001614826e-05, "loss": 1.8539, "step": 1473 }, { "epoch": 1.15, "learning_rate": 1.9621470189874842e-05, "loss": 2.0299, "step": 1474 }, { "epoch": 1.15, "learning_rate": 1.9620779746612126e-05, "loss": 1.7413, "step": 1475 }, { "epoch": 1.15, "learning_rate": 1.9620088686404393e-05, "loss": 1.7746, "step": 1476 }, { "epoch": 1.15, "learning_rate": 1.9619397009295955e-05, "loss": 1.888, "step": 1477 }, { "epoch": 1.16, "learning_rate": 1.961870471533117e-05, "loss": 2.3043, "step": 1478 }, { "epoch": 1.16, "learning_rate": 1.9618011804554432e-05, "loss": 1.872, "step": 1479 }, { "epoch": 1.16, "learning_rate": 1.961731827701017e-05, "loss": 1.8845, "step": 1480 }, { "epoch": 1.16, "learning_rate": 1.9616624132742867e-05, "loss": 1.8889, "step": 1481 }, { "epoch": 1.16, "learning_rate": 1.9615929371797024e-05, "loss": 1.8987, "step": 1482 }, { "epoch": 1.16, "learning_rate": 1.9615233994217206e-05, "loss": 1.9486, "step": 1483 }, { "epoch": 1.16, "learning_rate": 1.961453800004799e-05, "loss": 2.067, "step": 1484 }, { "epoch": 1.16, "learning_rate": 1.9613841389334023e-05, "loss": 1.7092, "step": 1485 }, { "epoch": 1.16, "learning_rate": 1.961314416211997e-05, "loss": 1.9598, "step": 1486 }, { "epoch": 1.16, "learning_rate": 1.9612446318450535e-05, "loss": 2.1319, "step": 1487 }, { "epoch": 1.16, "learning_rate": 1.9611747858370472e-05, "loss": 1.7168, "step": 1488 }, { "epoch": 1.16, "learning_rate": 1.961104878192458e-05, "loss": 1.7272, "step": 1489 }, { "epoch": 1.16, "learning_rate": 1.9610349089157677e-05, "loss": 1.6378, "step": 1490 }, { "epoch": 1.17, "learning_rate": 1.9609648780114635e-05, "loss": 1.8268, "step": 1491 }, { "epoch": 1.17, "learning_rate": 1.9608947854840365e-05, "loss": 1.9125, "step": 1492 }, { "epoch": 1.17, "learning_rate": 1.9608246313379807e-05, "loss": 1.7584, "step": 1493 }, { "epoch": 1.17, "learning_rate": 1.9607544155777957e-05, "loss": 1.8613, "step": 1494 }, { "epoch": 1.17, "learning_rate": 1.960684138207984e-05, "loss": 1.6183, "step": 1495 }, { "epoch": 1.17, "learning_rate": 1.960613799233052e-05, "loss": 1.7501, "step": 1496 }, { "epoch": 1.17, "learning_rate": 1.96054339865751e-05, "loss": 1.7525, "step": 1497 }, { "epoch": 1.17, "learning_rate": 1.9604729364858732e-05, "loss": 1.8407, "step": 1498 }, { "epoch": 1.17, "learning_rate": 1.9604024127226595e-05, "loss": 1.8434, "step": 1499 }, { "epoch": 1.17, "learning_rate": 1.9603318273723916e-05, "loss": 2.0594, "step": 1500 }, { "epoch": 1.17, "learning_rate": 1.960261180439596e-05, "loss": 1.6977, "step": 1501 }, { "epoch": 1.17, "learning_rate": 1.9601904719288026e-05, "loss": 1.847, "step": 1502 }, { "epoch": 1.17, "learning_rate": 1.9601197018445464e-05, "loss": 1.8203, "step": 1503 }, { "epoch": 1.18, "learning_rate": 1.960048870191365e-05, "loss": 2.0008, "step": 1504 }, { "epoch": 1.18, "learning_rate": 1.9599779769738007e-05, "loss": 1.9332, "step": 1505 }, { "epoch": 1.18, "learning_rate": 1.9599070221963996e-05, "loss": 1.7868, "step": 1506 }, { "epoch": 1.18, "learning_rate": 1.959836005863712e-05, "loss": 1.8214, "step": 1507 }, { "epoch": 1.18, "learning_rate": 1.9597649279802915e-05, "loss": 1.815, "step": 1508 }, { "epoch": 1.18, "learning_rate": 1.9596937885506966e-05, "loss": 1.9709, "step": 1509 }, { "epoch": 1.18, "learning_rate": 1.959622587579489e-05, "loss": 1.9556, "step": 1510 }, { "epoch": 1.18, "learning_rate": 1.9595513250712338e-05, "loss": 1.9526, "step": 1511 }, { "epoch": 1.18, "learning_rate": 1.959480001030502e-05, "loss": 1.772, "step": 1512 }, { "epoch": 1.18, "learning_rate": 1.959408615461867e-05, "loss": 1.8616, "step": 1513 }, { "epoch": 1.18, "learning_rate": 1.9593371683699057e-05, "loss": 1.9482, "step": 1514 }, { "epoch": 1.18, "learning_rate": 1.959265659759201e-05, "loss": 1.8872, "step": 1515 }, { "epoch": 1.18, "learning_rate": 1.9591940896343374e-05, "loss": 2.1215, "step": 1516 }, { "epoch": 1.19, "learning_rate": 1.959122457999905e-05, "loss": 1.7652, "step": 1517 }, { "epoch": 1.19, "learning_rate": 1.9590507648604973e-05, "loss": 1.8398, "step": 1518 }, { "epoch": 1.19, "learning_rate": 1.9589790102207115e-05, "loss": 1.8501, "step": 1519 }, { "epoch": 1.19, "learning_rate": 1.9589071940851488e-05, "loss": 1.884, "step": 1520 }, { "epoch": 1.19, "learning_rate": 1.9588353164584148e-05, "loss": 1.9816, "step": 1521 }, { "epoch": 1.19, "learning_rate": 1.9587633773451187e-05, "loss": 1.7524, "step": 1522 }, { "epoch": 1.19, "learning_rate": 1.9586913767498735e-05, "loss": 2.1455, "step": 1523 }, { "epoch": 1.19, "learning_rate": 1.9586193146772968e-05, "loss": 2.0141, "step": 1524 }, { "epoch": 1.19, "learning_rate": 1.958547191132009e-05, "loss": 1.9223, "step": 1525 }, { "epoch": 1.19, "learning_rate": 1.9584750061186356e-05, "loss": 2.1814, "step": 1526 }, { "epoch": 1.19, "learning_rate": 1.9584027596418055e-05, "loss": 1.8686, "step": 1527 }, { "epoch": 1.19, "learning_rate": 1.9583304517061515e-05, "loss": 1.824, "step": 1528 }, { "epoch": 1.19, "learning_rate": 1.9582580823163108e-05, "loss": 2.0222, "step": 1529 }, { "epoch": 1.2, "learning_rate": 1.9581856514769233e-05, "loss": 2.1212, "step": 1530 }, { "epoch": 1.2, "learning_rate": 1.9581131591926342e-05, "loss": 1.7682, "step": 1531 }, { "epoch": 1.2, "learning_rate": 1.9580406054680926e-05, "loss": 2.0474, "step": 1532 }, { "epoch": 1.2, "learning_rate": 1.957967990307951e-05, "loss": 1.9774, "step": 1533 }, { "epoch": 1.2, "learning_rate": 1.9578953137168653e-05, "loss": 1.8586, "step": 1534 }, { "epoch": 1.2, "learning_rate": 1.9578225756994965e-05, "loss": 1.7404, "step": 1535 }, { "epoch": 1.2, "learning_rate": 1.9577497762605086e-05, "loss": 1.7015, "step": 1536 }, { "epoch": 1.2, "learning_rate": 1.9576769154045702e-05, "loss": 1.6866, "step": 1537 }, { "epoch": 1.2, "learning_rate": 1.957603993136354e-05, "loss": 1.9614, "step": 1538 }, { "epoch": 1.2, "learning_rate": 1.9575310094605357e-05, "loss": 1.9086, "step": 1539 }, { "epoch": 1.2, "learning_rate": 1.9574579643817956e-05, "loss": 1.7176, "step": 1540 }, { "epoch": 1.2, "learning_rate": 1.957384857904818e-05, "loss": 2.2151, "step": 1541 }, { "epoch": 1.21, "learning_rate": 1.9573116900342905e-05, "loss": 1.7889, "step": 1542 }, { "epoch": 1.21, "learning_rate": 1.957238460774906e-05, "loss": 1.7372, "step": 1543 }, { "epoch": 1.21, "learning_rate": 1.957165170131359e-05, "loss": 1.8295, "step": 1544 }, { "epoch": 1.21, "learning_rate": 1.9570918181083506e-05, "loss": 1.9872, "step": 1545 }, { "epoch": 1.21, "learning_rate": 1.9570184047105844e-05, "loss": 1.6677, "step": 1546 }, { "epoch": 1.21, "learning_rate": 1.9569449299427675e-05, "loss": 1.6809, "step": 1547 }, { "epoch": 1.21, "learning_rate": 1.956871393809612e-05, "loss": 1.9869, "step": 1548 }, { "epoch": 1.21, "learning_rate": 1.9567977963158335e-05, "loss": 2.1947, "step": 1549 }, { "epoch": 1.21, "learning_rate": 1.9567241374661517e-05, "loss": 1.8127, "step": 1550 }, { "epoch": 1.21, "learning_rate": 1.9566504172652895e-05, "loss": 2.0319, "step": 1551 }, { "epoch": 1.21, "learning_rate": 1.956576635717975e-05, "loss": 1.6384, "step": 1552 }, { "epoch": 1.21, "learning_rate": 1.956502792828939e-05, "loss": 1.8212, "step": 1553 }, { "epoch": 1.21, "learning_rate": 1.956428888602917e-05, "loss": 2.0201, "step": 1554 }, { "epoch": 1.22, "learning_rate": 1.956354923044648e-05, "loss": 1.6744, "step": 1555 }, { "epoch": 1.22, "learning_rate": 1.9562808961588756e-05, "loss": 1.8805, "step": 1556 }, { "epoch": 1.22, "learning_rate": 1.9562068079503467e-05, "loss": 1.5736, "step": 1557 }, { "epoch": 1.22, "learning_rate": 1.956132658423812e-05, "loss": 2.1578, "step": 1558 }, { "epoch": 1.22, "learning_rate": 1.9560584475840264e-05, "loss": 2.0118, "step": 1559 }, { "epoch": 1.22, "learning_rate": 1.955984175435749e-05, "loss": 1.856, "step": 1560 }, { "epoch": 1.22, "learning_rate": 1.955909841983743e-05, "loss": 2.0356, "step": 1561 }, { "epoch": 1.22, "learning_rate": 1.9558354472327747e-05, "loss": 1.7901, "step": 1562 }, { "epoch": 1.22, "learning_rate": 1.9557609911876147e-05, "loss": 1.6668, "step": 1563 }, { "epoch": 1.22, "learning_rate": 1.9556864738530372e-05, "loss": 1.8405, "step": 1564 }, { "epoch": 1.22, "learning_rate": 1.9556118952338218e-05, "loss": 1.7162, "step": 1565 }, { "epoch": 1.22, "learning_rate": 1.9555372553347498e-05, "loss": 1.9823, "step": 1566 }, { "epoch": 1.22, "learning_rate": 1.9554625541606086e-05, "loss": 2.0583, "step": 1567 }, { "epoch": 1.23, "learning_rate": 1.9553877917161878e-05, "loss": 2.1034, "step": 1568 }, { "epoch": 1.23, "learning_rate": 1.9553129680062818e-05, "loss": 1.7806, "step": 1569 }, { "epoch": 1.23, "learning_rate": 1.9552380830356886e-05, "loss": 1.908, "step": 1570 }, { "epoch": 1.23, "learning_rate": 1.955163136809211e-05, "loss": 1.7815, "step": 1571 }, { "epoch": 1.23, "learning_rate": 1.9550881293316543e-05, "loss": 1.687, "step": 1572 }, { "epoch": 1.23, "learning_rate": 1.9550130606078288e-05, "loss": 1.7135, "step": 1573 }, { "epoch": 1.23, "learning_rate": 1.9549379306425484e-05, "loss": 1.5349, "step": 1574 }, { "epoch": 1.23, "learning_rate": 1.9548627394406308e-05, "loss": 1.4578, "step": 1575 }, { "epoch": 1.23, "learning_rate": 1.9547874870068976e-05, "loss": 1.9888, "step": 1576 }, { "epoch": 1.23, "learning_rate": 1.954712173346175e-05, "loss": 1.8251, "step": 1577 }, { "epoch": 1.23, "learning_rate": 1.9546367984632914e-05, "loss": 2.0252, "step": 1578 }, { "epoch": 1.23, "learning_rate": 1.9545613623630813e-05, "loss": 1.8499, "step": 1579 }, { "epoch": 1.23, "learning_rate": 1.9544858650503822e-05, "loss": 1.9951, "step": 1580 }, { "epoch": 1.24, "learning_rate": 1.954410306530035e-05, "loss": 1.7349, "step": 1581 }, { "epoch": 1.24, "learning_rate": 1.9543346868068857e-05, "loss": 1.7413, "step": 1582 }, { "epoch": 1.24, "learning_rate": 1.9542590058857823e-05, "loss": 1.6278, "step": 1583 }, { "epoch": 1.24, "learning_rate": 1.954183263771579e-05, "loss": 1.7297, "step": 1584 }, { "epoch": 1.24, "learning_rate": 1.9541074604691323e-05, "loss": 1.6699, "step": 1585 }, { "epoch": 1.24, "learning_rate": 1.9540315959833037e-05, "loss": 1.8712, "step": 1586 }, { "epoch": 1.24, "learning_rate": 1.9539556703189576e-05, "loss": 1.8734, "step": 1587 }, { "epoch": 1.24, "learning_rate": 1.9538796834809627e-05, "loss": 1.9499, "step": 1588 }, { "epoch": 1.24, "learning_rate": 1.953803635474192e-05, "loss": 1.6081, "step": 1589 }, { "epoch": 1.24, "learning_rate": 1.9537275263035228e-05, "loss": 1.9422, "step": 1590 }, { "epoch": 1.24, "learning_rate": 1.9536513559738352e-05, "loss": 1.9472, "step": 1591 }, { "epoch": 1.24, "learning_rate": 1.9535751244900135e-05, "loss": 1.657, "step": 1592 }, { "epoch": 1.25, "learning_rate": 1.953498831856946e-05, "loss": 1.7799, "step": 1593 }, { "epoch": 1.25, "learning_rate": 1.9534224780795258e-05, "loss": 2.0024, "step": 1594 }, { "epoch": 1.25, "learning_rate": 1.9533460631626484e-05, "loss": 1.9382, "step": 1595 }, { "epoch": 1.25, "learning_rate": 1.9532695871112146e-05, "loss": 1.9072, "step": 1596 }, { "epoch": 1.25, "learning_rate": 1.9531930499301284e-05, "loss": 1.956, "step": 1597 }, { "epoch": 1.25, "learning_rate": 1.9531164516242975e-05, "loss": 1.7414, "step": 1598 }, { "epoch": 1.25, "learning_rate": 1.953039792198634e-05, "loss": 1.9064, "step": 1599 }, { "epoch": 1.25, "learning_rate": 1.952963071658054e-05, "loss": 1.7636, "step": 1600 }, { "epoch": 1.25, "learning_rate": 1.9528862900074775e-05, "loss": 2.0089, "step": 1601 }, { "epoch": 1.25, "learning_rate": 1.9528094472518275e-05, "loss": 2.1257, "step": 1602 }, { "epoch": 1.25, "learning_rate": 1.9527325433960326e-05, "loss": 1.9753, "step": 1603 }, { "epoch": 1.25, "learning_rate": 1.952655578445023e-05, "loss": 1.8471, "step": 1604 }, { "epoch": 1.25, "learning_rate": 1.9525785524037357e-05, "loss": 1.7999, "step": 1605 }, { "epoch": 1.26, "learning_rate": 1.9525014652771093e-05, "loss": 1.9743, "step": 1606 }, { "epoch": 1.26, "learning_rate": 1.952424317070087e-05, "loss": 1.8981, "step": 1607 }, { "epoch": 1.26, "learning_rate": 1.9523471077876163e-05, "loss": 1.6462, "step": 1608 }, { "epoch": 1.26, "learning_rate": 1.9522698374346482e-05, "loss": 1.551, "step": 1609 }, { "epoch": 1.26, "learning_rate": 1.952192506016138e-05, "loss": 2.0007, "step": 1610 }, { "epoch": 1.26, "learning_rate": 1.9521151135370445e-05, "loss": 2.0148, "step": 1611 }, { "epoch": 1.26, "learning_rate": 1.9520376600023305e-05, "loss": 1.8378, "step": 1612 }, { "epoch": 1.26, "learning_rate": 1.951960145416963e-05, "loss": 1.5909, "step": 1613 }, { "epoch": 1.26, "learning_rate": 1.9518825697859127e-05, "loss": 1.627, "step": 1614 }, { "epoch": 1.26, "learning_rate": 1.9518049331141543e-05, "loss": 2.0713, "step": 1615 }, { "epoch": 1.26, "learning_rate": 1.9517272354066657e-05, "loss": 1.9815, "step": 1616 }, { "epoch": 1.26, "learning_rate": 1.9516494766684305e-05, "loss": 2.0305, "step": 1617 }, { "epoch": 1.26, "learning_rate": 1.951571656904434e-05, "loss": 1.9555, "step": 1618 }, { "epoch": 1.27, "learning_rate": 1.9514937761196674e-05, "loss": 1.9335, "step": 1619 }, { "epoch": 1.27, "learning_rate": 1.9514158343191244e-05, "loss": 1.8947, "step": 1620 }, { "epoch": 1.27, "learning_rate": 1.951337831507803e-05, "loss": 1.8397, "step": 1621 }, { "epoch": 1.27, "learning_rate": 1.9512597676907056e-05, "loss": 1.9058, "step": 1622 }, { "epoch": 1.27, "learning_rate": 1.9511816428728378e-05, "loss": 1.7507, "step": 1623 }, { "epoch": 1.27, "learning_rate": 1.95110345705921e-05, "loss": 1.8954, "step": 1624 }, { "epoch": 1.27, "learning_rate": 1.9510252102548355e-05, "loss": 1.6292, "step": 1625 }, { "epoch": 1.27, "learning_rate": 1.950946902464732e-05, "loss": 1.5643, "step": 1626 }, { "epoch": 1.27, "learning_rate": 1.950868533693921e-05, "loss": 1.6244, "step": 1627 }, { "epoch": 1.27, "learning_rate": 1.9507901039474283e-05, "loss": 2.1911, "step": 1628 }, { "epoch": 1.27, "learning_rate": 1.950711613230283e-05, "loss": 1.386, "step": 1629 }, { "epoch": 1.27, "learning_rate": 1.950633061547519e-05, "loss": 1.6533, "step": 1630 }, { "epoch": 1.27, "learning_rate": 1.9505544489041728e-05, "loss": 2.1616, "step": 1631 }, { "epoch": 1.28, "learning_rate": 1.950475775305286e-05, "loss": 2.2516, "step": 1632 }, { "epoch": 1.28, "learning_rate": 1.9503970407559033e-05, "loss": 2.1513, "step": 1633 }, { "epoch": 1.28, "learning_rate": 1.950318245261074e-05, "loss": 2.0152, "step": 1634 }, { "epoch": 1.28, "learning_rate": 1.950239388825851e-05, "loss": 2.1471, "step": 1635 }, { "epoch": 1.28, "learning_rate": 1.950160471455291e-05, "loss": 1.6925, "step": 1636 }, { "epoch": 1.28, "learning_rate": 1.950081493154454e-05, "loss": 1.8008, "step": 1637 }, { "epoch": 1.28, "learning_rate": 1.9500024539284057e-05, "loss": 1.9512, "step": 1638 }, { "epoch": 1.28, "learning_rate": 1.9499233537822138e-05, "loss": 1.5702, "step": 1639 }, { "epoch": 1.28, "learning_rate": 1.9498441927209513e-05, "loss": 1.8075, "step": 1640 }, { "epoch": 1.28, "learning_rate": 1.9497649707496936e-05, "loss": 2.1124, "step": 1641 }, { "epoch": 1.28, "learning_rate": 1.949685687873522e-05, "loss": 2.07, "step": 1642 }, { "epoch": 1.28, "learning_rate": 1.94960634409752e-05, "loss": 1.9689, "step": 1643 }, { "epoch": 1.28, "learning_rate": 1.9495269394267757e-05, "loss": 2.1056, "step": 1644 }, { "epoch": 1.29, "learning_rate": 1.949447473866381e-05, "loss": 2.1189, "step": 1645 }, { "epoch": 1.29, "learning_rate": 1.9493679474214318e-05, "loss": 1.8793, "step": 1646 }, { "epoch": 1.29, "learning_rate": 1.9492883600970277e-05, "loss": 1.7454, "step": 1647 }, { "epoch": 1.29, "learning_rate": 1.9492087118982725e-05, "loss": 1.9041, "step": 1648 }, { "epoch": 1.29, "learning_rate": 1.9491290028302735e-05, "loss": 1.9509, "step": 1649 }, { "epoch": 1.29, "learning_rate": 1.9490492328981428e-05, "loss": 1.8068, "step": 1650 }, { "epoch": 1.29, "learning_rate": 1.948969402106995e-05, "loss": 1.9039, "step": 1651 }, { "epoch": 1.29, "learning_rate": 1.9488895104619498e-05, "loss": 2.0181, "step": 1652 }, { "epoch": 1.29, "learning_rate": 1.9488095579681302e-05, "loss": 2.0721, "step": 1653 }, { "epoch": 1.29, "learning_rate": 1.9487295446306632e-05, "loss": 1.9861, "step": 1654 }, { "epoch": 1.29, "learning_rate": 1.9486494704546797e-05, "loss": 1.9752, "step": 1655 }, { "epoch": 1.29, "learning_rate": 1.9485693354453147e-05, "loss": 1.9457, "step": 1656 }, { "epoch": 1.3, "learning_rate": 1.948489139607707e-05, "loss": 1.6081, "step": 1657 }, { "epoch": 1.3, "learning_rate": 1.9484088829469993e-05, "loss": 1.9123, "step": 1658 }, { "epoch": 1.3, "learning_rate": 1.9483285654683382e-05, "loss": 1.8749, "step": 1659 }, { "epoch": 1.3, "learning_rate": 1.948248187176874e-05, "loss": 2.2143, "step": 1660 }, { "epoch": 1.3, "learning_rate": 1.948167748077761e-05, "loss": 2.0023, "step": 1661 }, { "epoch": 1.3, "learning_rate": 1.948087248176157e-05, "loss": 1.8716, "step": 1662 }, { "epoch": 1.3, "learning_rate": 1.9480066874772257e-05, "loss": 2.1031, "step": 1663 }, { "epoch": 1.3, "learning_rate": 1.9479260659861318e-05, "loss": 1.4728, "step": 1664 }, { "epoch": 1.3, "learning_rate": 1.9478453837080454e-05, "loss": 1.9042, "step": 1665 }, { "epoch": 1.3, "learning_rate": 1.9477646406481408e-05, "loss": 1.7855, "step": 1666 }, { "epoch": 1.3, "learning_rate": 1.9476838368115955e-05, "loss": 1.8524, "step": 1667 }, { "epoch": 1.3, "learning_rate": 1.9476029722035913e-05, "loss": 2.103, "step": 1668 }, { "epoch": 1.3, "learning_rate": 1.9475220468293137e-05, "loss": 2.1177, "step": 1669 }, { "epoch": 1.31, "learning_rate": 1.947441060693952e-05, "loss": 1.8165, "step": 1670 }, { "epoch": 1.31, "learning_rate": 1.9473600138026993e-05, "loss": 1.9402, "step": 1671 }, { "epoch": 1.31, "learning_rate": 1.9472789061607536e-05, "loss": 1.5327, "step": 1672 }, { "epoch": 1.31, "learning_rate": 1.9471977377733157e-05, "loss": 1.8208, "step": 1673 }, { "epoch": 1.31, "learning_rate": 1.9471165086455903e-05, "loss": 1.8279, "step": 1674 }, { "epoch": 1.31, "learning_rate": 1.9470352187827863e-05, "loss": 1.8463, "step": 1675 }, { "epoch": 1.31, "learning_rate": 1.946953868190117e-05, "loss": 1.8237, "step": 1676 }, { "epoch": 1.31, "learning_rate": 1.946872456872799e-05, "loss": 1.6831, "step": 1677 }, { "epoch": 1.31, "learning_rate": 1.9467909848360527e-05, "loss": 1.5867, "step": 1678 }, { "epoch": 1.31, "learning_rate": 1.9467094520851028e-05, "loss": 1.8918, "step": 1679 }, { "epoch": 1.31, "learning_rate": 1.9466278586251777e-05, "loss": 2.1224, "step": 1680 }, { "epoch": 1.31, "learning_rate": 1.9465462044615093e-05, "loss": 2.2041, "step": 1681 }, { "epoch": 1.31, "learning_rate": 1.9464644895993343e-05, "loss": 1.9405, "step": 1682 }, { "epoch": 1.32, "learning_rate": 1.9463827140438922e-05, "loss": 1.6728, "step": 1683 }, { "epoch": 1.32, "learning_rate": 1.9463008778004277e-05, "loss": 2.0462, "step": 1684 }, { "epoch": 1.32, "learning_rate": 1.946218980874188e-05, "loss": 1.9709, "step": 1685 }, { "epoch": 1.32, "learning_rate": 1.9461370232704248e-05, "loss": 2.0364, "step": 1686 }, { "epoch": 1.32, "learning_rate": 1.9460550049943944e-05, "loss": 1.8697, "step": 1687 }, { "epoch": 1.32, "learning_rate": 1.945972926051356e-05, "loss": 1.815, "step": 1688 }, { "epoch": 1.32, "learning_rate": 1.945890786446573e-05, "loss": 1.375, "step": 1689 }, { "epoch": 1.32, "learning_rate": 1.9458085861853125e-05, "loss": 1.9193, "step": 1690 }, { "epoch": 1.32, "learning_rate": 1.9457263252728458e-05, "loss": 2.0534, "step": 1691 }, { "epoch": 1.32, "learning_rate": 1.945644003714448e-05, "loss": 1.598, "step": 1692 }, { "epoch": 1.32, "learning_rate": 1.9455616215153986e-05, "loss": 1.6227, "step": 1693 }, { "epoch": 1.32, "learning_rate": 1.9454791786809797e-05, "loss": 1.8384, "step": 1694 }, { "epoch": 1.32, "learning_rate": 1.9453966752164787e-05, "loss": 2.0069, "step": 1695 }, { "epoch": 1.33, "learning_rate": 1.9453141111271856e-05, "loss": 2.172, "step": 1696 }, { "epoch": 1.33, "learning_rate": 1.9452314864183953e-05, "loss": 1.8307, "step": 1697 }, { "epoch": 1.33, "learning_rate": 1.945148801095406e-05, "loss": 2.0686, "step": 1698 }, { "epoch": 1.33, "learning_rate": 1.94506605516352e-05, "loss": 2.1566, "step": 1699 }, { "epoch": 1.33, "learning_rate": 1.9449832486280445e-05, "loss": 2.1808, "step": 1700 }, { "epoch": 1.33, "learning_rate": 1.944900381494288e-05, "loss": 1.5204, "step": 1701 }, { "epoch": 1.33, "learning_rate": 1.9448174537675654e-05, "loss": 1.7027, "step": 1702 }, { "epoch": 1.33, "learning_rate": 1.9447344654531947e-05, "loss": 1.7841, "step": 1703 }, { "epoch": 1.33, "learning_rate": 1.944651416556497e-05, "loss": 1.8862, "step": 1704 }, { "epoch": 1.33, "learning_rate": 1.944568307082798e-05, "loss": 1.6134, "step": 1705 }, { "epoch": 1.33, "learning_rate": 1.9444851370374277e-05, "loss": 1.8269, "step": 1706 }, { "epoch": 1.33, "learning_rate": 1.9444019064257187e-05, "loss": 2.0175, "step": 1707 }, { "epoch": 1.33, "learning_rate": 1.944318615253009e-05, "loss": 2.1528, "step": 1708 }, { "epoch": 1.34, "learning_rate": 1.9442352635246397e-05, "loss": 1.9145, "step": 1709 }, { "epoch": 1.34, "learning_rate": 1.9441518512459554e-05, "loss": 1.6152, "step": 1710 }, { "epoch": 1.34, "learning_rate": 1.9440683784223054e-05, "loss": 2.0847, "step": 1711 }, { "epoch": 1.34, "learning_rate": 1.9439848450590423e-05, "loss": 1.9864, "step": 1712 }, { "epoch": 1.34, "learning_rate": 1.9439012511615233e-05, "loss": 1.7644, "step": 1713 }, { "epoch": 1.34, "learning_rate": 1.9438175967351082e-05, "loss": 1.9297, "step": 1714 }, { "epoch": 1.34, "learning_rate": 1.9437338817851614e-05, "loss": 2.0998, "step": 1715 }, { "epoch": 1.34, "learning_rate": 1.943650106317052e-05, "loss": 1.6421, "step": 1716 }, { "epoch": 1.34, "learning_rate": 1.943566270336152e-05, "loss": 2.0889, "step": 1717 }, { "epoch": 1.34, "learning_rate": 1.943482373847837e-05, "loss": 2.0124, "step": 1718 }, { "epoch": 1.34, "learning_rate": 1.9433984168574874e-05, "loss": 2.0608, "step": 1719 }, { "epoch": 1.34, "learning_rate": 1.9433143993704867e-05, "loss": 1.7803, "step": 1720 }, { "epoch": 1.35, "learning_rate": 1.943230321392223e-05, "loss": 1.886, "step": 1721 }, { "epoch": 1.35, "learning_rate": 1.943146182928088e-05, "loss": 1.9654, "step": 1722 }, { "epoch": 1.35, "learning_rate": 1.943061983983477e-05, "loss": 1.9865, "step": 1723 }, { "epoch": 1.35, "learning_rate": 1.9429777245637886e-05, "loss": 1.9272, "step": 1724 }, { "epoch": 1.35, "learning_rate": 1.9428934046744275e-05, "loss": 1.7678, "step": 1725 }, { "epoch": 1.35, "learning_rate": 1.9428090243207998e-05, "loss": 1.7812, "step": 1726 }, { "epoch": 1.35, "learning_rate": 1.9427245835083168e-05, "loss": 1.9658, "step": 1727 }, { "epoch": 1.35, "learning_rate": 1.9426400822423934e-05, "loss": 1.9121, "step": 1728 }, { "epoch": 1.35, "learning_rate": 1.9425555205284485e-05, "loss": 1.7821, "step": 1729 }, { "epoch": 1.35, "learning_rate": 1.9424708983719043e-05, "loss": 1.8365, "step": 1730 }, { "epoch": 1.35, "learning_rate": 1.942386215778188e-05, "loss": 1.7945, "step": 1731 }, { "epoch": 1.35, "learning_rate": 1.942301472752729e-05, "loss": 1.9026, "step": 1732 }, { "epoch": 1.35, "learning_rate": 1.942216669300962e-05, "loss": 1.7043, "step": 1733 }, { "epoch": 1.36, "learning_rate": 1.9421318054283257e-05, "loss": 1.9846, "step": 1734 }, { "epoch": 1.36, "learning_rate": 1.9420468811402615e-05, "loss": 1.9915, "step": 1735 }, { "epoch": 1.36, "learning_rate": 1.9419618964422155e-05, "loss": 1.5367, "step": 1736 }, { "epoch": 1.36, "learning_rate": 1.941876851339637e-05, "loss": 1.833, "step": 1737 }, { "epoch": 1.36, "learning_rate": 1.9417917458379803e-05, "loss": 1.774, "step": 1738 }, { "epoch": 1.36, "learning_rate": 1.9417065799427028e-05, "loss": 2.137, "step": 1739 }, { "epoch": 1.36, "learning_rate": 1.9416213536592653e-05, "loss": 1.6313, "step": 1740 }, { "epoch": 1.36, "learning_rate": 1.9415360669931336e-05, "loss": 2.0718, "step": 1741 }, { "epoch": 1.36, "learning_rate": 1.9414507199497765e-05, "loss": 2.0136, "step": 1742 }, { "epoch": 1.36, "learning_rate": 1.941365312534667e-05, "loss": 1.8859, "step": 1743 }, { "epoch": 1.36, "learning_rate": 1.9412798447532825e-05, "loss": 1.6253, "step": 1744 }, { "epoch": 1.36, "learning_rate": 1.9411943166111032e-05, "loss": 1.8843, "step": 1745 }, { "epoch": 1.36, "learning_rate": 1.941108728113614e-05, "loss": 1.645, "step": 1746 }, { "epoch": 1.37, "learning_rate": 1.9410230792663025e-05, "loss": 2.0713, "step": 1747 }, { "epoch": 1.37, "learning_rate": 1.9409373700746624e-05, "loss": 1.7764, "step": 1748 }, { "epoch": 1.37, "learning_rate": 1.940851600544189e-05, "loss": 1.8021, "step": 1749 }, { "epoch": 1.37, "learning_rate": 1.9407657706803827e-05, "loss": 1.8378, "step": 1750 }, { "epoch": 1.37, "learning_rate": 1.9406798804887474e-05, "loss": 1.8907, "step": 1751 }, { "epoch": 1.37, "learning_rate": 1.9405939299747908e-05, "loss": 2.0529, "step": 1752 }, { "epoch": 1.37, "learning_rate": 1.9405079191440248e-05, "loss": 1.8626, "step": 1753 }, { "epoch": 1.37, "learning_rate": 1.940421848001965e-05, "loss": 1.7724, "step": 1754 }, { "epoch": 1.37, "learning_rate": 1.9403357165541304e-05, "loss": 1.6749, "step": 1755 }, { "epoch": 1.37, "learning_rate": 1.940249524806045e-05, "loss": 1.9546, "step": 1756 }, { "epoch": 1.37, "learning_rate": 1.940163272763235e-05, "loss": 1.9094, "step": 1757 }, { "epoch": 1.37, "learning_rate": 1.9400769604312323e-05, "loss": 1.3618, "step": 1758 }, { "epoch": 1.37, "learning_rate": 1.9399905878155713e-05, "loss": 1.8248, "step": 1759 }, { "epoch": 1.38, "learning_rate": 1.939904154921791e-05, "loss": 1.9961, "step": 1760 }, { "epoch": 1.38, "learning_rate": 1.939817661755434e-05, "loss": 1.7617, "step": 1761 }, { "epoch": 1.38, "learning_rate": 1.9397311083220463e-05, "loss": 1.6334, "step": 1762 }, { "epoch": 1.38, "learning_rate": 1.9396444946271788e-05, "loss": 1.8196, "step": 1763 }, { "epoch": 1.38, "learning_rate": 1.9395578206763857e-05, "loss": 1.727, "step": 1764 }, { "epoch": 1.38, "learning_rate": 1.939471086475225e-05, "loss": 1.8721, "step": 1765 }, { "epoch": 1.38, "learning_rate": 1.939384292029259e-05, "loss": 2.0683, "step": 1766 }, { "epoch": 1.38, "learning_rate": 1.9392974373440524e-05, "loss": 1.789, "step": 1767 }, { "epoch": 1.38, "learning_rate": 1.939210522425176e-05, "loss": 1.7586, "step": 1768 }, { "epoch": 1.38, "learning_rate": 1.9391235472782025e-05, "loss": 1.8736, "step": 1769 }, { "epoch": 1.38, "learning_rate": 1.93903651190871e-05, "loss": 2.1456, "step": 1770 }, { "epoch": 1.38, "learning_rate": 1.9389494163222796e-05, "loss": 1.9058, "step": 1771 }, { "epoch": 1.38, "learning_rate": 1.938862260524496e-05, "loss": 2.022, "step": 1772 }, { "epoch": 1.39, "learning_rate": 1.9387750445209486e-05, "loss": 1.9155, "step": 1773 }, { "epoch": 1.39, "learning_rate": 1.9386877683172302e-05, "loss": 1.7369, "step": 1774 }, { "epoch": 1.39, "learning_rate": 1.938600431918937e-05, "loss": 1.8978, "step": 1775 }, { "epoch": 1.39, "learning_rate": 1.9385130353316704e-05, "loss": 1.9938, "step": 1776 }, { "epoch": 1.39, "learning_rate": 1.938425578561034e-05, "loss": 1.8755, "step": 1777 }, { "epoch": 1.39, "learning_rate": 1.938338061612637e-05, "loss": 1.5847, "step": 1778 }, { "epoch": 1.39, "learning_rate": 1.9382504844920905e-05, "loss": 2.0414, "step": 1779 }, { "epoch": 1.39, "learning_rate": 1.938162847205011e-05, "loss": 1.7661, "step": 1780 }, { "epoch": 1.39, "learning_rate": 1.9380751497570184e-05, "loss": 2.0627, "step": 1781 }, { "epoch": 1.39, "learning_rate": 1.9379873921537366e-05, "loss": 1.9417, "step": 1782 }, { "epoch": 1.39, "learning_rate": 1.937899574400793e-05, "loss": 1.9249, "step": 1783 }, { "epoch": 1.39, "learning_rate": 1.9378116965038183e-05, "loss": 1.8138, "step": 1784 }, { "epoch": 1.4, "learning_rate": 1.9377237584684488e-05, "loss": 1.9001, "step": 1785 }, { "epoch": 1.4, "learning_rate": 1.9376357603003235e-05, "loss": 1.7798, "step": 1786 }, { "epoch": 1.4, "learning_rate": 1.937547702005085e-05, "loss": 2.1176, "step": 1787 }, { "epoch": 1.4, "learning_rate": 1.9374595835883804e-05, "loss": 1.7423, "step": 1788 }, { "epoch": 1.4, "learning_rate": 1.9373714050558603e-05, "loss": 1.9312, "step": 1789 }, { "epoch": 1.4, "learning_rate": 1.937283166413179e-05, "loss": 2.1529, "step": 1790 }, { "epoch": 1.4, "learning_rate": 1.937194867665996e-05, "loss": 1.6508, "step": 1791 }, { "epoch": 1.4, "learning_rate": 1.937106508819972e-05, "loss": 1.8103, "step": 1792 }, { "epoch": 1.4, "learning_rate": 1.9370180898807743e-05, "loss": 1.9751, "step": 1793 }, { "epoch": 1.4, "learning_rate": 1.9369296108540726e-05, "loss": 2.0863, "step": 1794 }, { "epoch": 1.4, "learning_rate": 1.9368410717455405e-05, "loss": 1.771, "step": 1795 }, { "epoch": 1.4, "learning_rate": 1.9367524725608558e-05, "loss": 1.883, "step": 1796 }, { "epoch": 1.4, "learning_rate": 1.9366638133057e-05, "loss": 1.904, "step": 1797 }, { "epoch": 1.41, "learning_rate": 1.9365750939857584e-05, "loss": 1.8406, "step": 1798 }, { "epoch": 1.41, "learning_rate": 1.9364863146067207e-05, "loss": 1.7582, "step": 1799 }, { "epoch": 1.41, "learning_rate": 1.9363974751742796e-05, "loss": 2.06, "step": 1800 }, { "epoch": 1.41, "learning_rate": 1.936308575694132e-05, "loss": 2.1454, "step": 1801 }, { "epoch": 1.41, "learning_rate": 1.9362196161719787e-05, "loss": 1.8664, "step": 1802 }, { "epoch": 1.41, "learning_rate": 1.936130596613525e-05, "loss": 1.9049, "step": 1803 }, { "epoch": 1.41, "learning_rate": 1.9360415170244786e-05, "loss": 1.9051, "step": 1804 }, { "epoch": 1.41, "learning_rate": 1.9359523774105517e-05, "loss": 1.811, "step": 1805 }, { "epoch": 1.41, "learning_rate": 1.9358631777774613e-05, "loss": 1.8861, "step": 1806 }, { "epoch": 1.41, "learning_rate": 1.935773918130927e-05, "loss": 2.0627, "step": 1807 }, { "epoch": 1.41, "learning_rate": 1.9356845984766726e-05, "loss": 2.1597, "step": 1808 }, { "epoch": 1.41, "learning_rate": 1.935595218820426e-05, "loss": 1.511, "step": 1809 }, { "epoch": 1.41, "learning_rate": 1.9355057791679188e-05, "loss": 1.9142, "step": 1810 }, { "epoch": 1.42, "learning_rate": 1.9354162795248864e-05, "loss": 1.974, "step": 1811 }, { "epoch": 1.42, "learning_rate": 1.935326719897068e-05, "loss": 1.5499, "step": 1812 }, { "epoch": 1.42, "learning_rate": 1.935237100290207e-05, "loss": 2.1057, "step": 1813 }, { "epoch": 1.42, "learning_rate": 1.9351474207100502e-05, "loss": 1.8544, "step": 1814 }, { "epoch": 1.42, "learning_rate": 1.9350576811623482e-05, "loss": 1.784, "step": 1815 }, { "epoch": 1.42, "learning_rate": 1.934967881652856e-05, "loss": 1.9067, "step": 1816 }, { "epoch": 1.42, "learning_rate": 1.9348780221873317e-05, "loss": 1.8106, "step": 1817 }, { "epoch": 1.42, "learning_rate": 1.9347881027715382e-05, "loss": 1.7627, "step": 1818 }, { "epoch": 1.42, "learning_rate": 1.9346981234112413e-05, "loss": 1.9439, "step": 1819 }, { "epoch": 1.42, "learning_rate": 1.934608084112211e-05, "loss": 2.2328, "step": 1820 }, { "epoch": 1.42, "learning_rate": 1.9345179848802214e-05, "loss": 1.947, "step": 1821 }, { "epoch": 1.42, "learning_rate": 1.9344278257210506e-05, "loss": 2.061, "step": 1822 }, { "epoch": 1.42, "learning_rate": 1.9343376066404794e-05, "loss": 1.7176, "step": 1823 }, { "epoch": 1.43, "learning_rate": 1.9342473276442937e-05, "loss": 1.9577, "step": 1824 }, { "epoch": 1.43, "learning_rate": 1.934156988738283e-05, "loss": 2.1837, "step": 1825 }, { "epoch": 1.43, "learning_rate": 1.9340665899282392e-05, "loss": 2.0106, "step": 1826 }, { "epoch": 1.43, "learning_rate": 1.9339761312199604e-05, "loss": 1.8859, "step": 1827 }, { "epoch": 1.43, "learning_rate": 1.933885612619247e-05, "loss": 1.9454, "step": 1828 }, { "epoch": 1.43, "learning_rate": 1.933795034131904e-05, "loss": 1.8984, "step": 1829 }, { "epoch": 1.43, "learning_rate": 1.9337043957637394e-05, "loss": 1.8685, "step": 1830 }, { "epoch": 1.43, "learning_rate": 1.9336136975205656e-05, "loss": 1.9095, "step": 1831 }, { "epoch": 1.43, "learning_rate": 1.933522939408199e-05, "loss": 1.8976, "step": 1832 }, { "epoch": 1.43, "learning_rate": 1.933432121432459e-05, "loss": 1.8718, "step": 1833 }, { "epoch": 1.43, "learning_rate": 1.93334124359917e-05, "loss": 1.6776, "step": 1834 }, { "epoch": 1.43, "learning_rate": 1.9332503059141595e-05, "loss": 1.9147, "step": 1835 }, { "epoch": 1.43, "learning_rate": 1.9331593083832586e-05, "loss": 1.7516, "step": 1836 }, { "epoch": 1.44, "learning_rate": 1.9330682510123036e-05, "loss": 2.2202, "step": 1837 }, { "epoch": 1.44, "learning_rate": 1.9329771338071324e-05, "loss": 1.9488, "step": 1838 }, { "epoch": 1.44, "learning_rate": 1.9328859567735893e-05, "loss": 1.6703, "step": 1839 }, { "epoch": 1.44, "learning_rate": 1.93279471991752e-05, "loss": 2.089, "step": 1840 }, { "epoch": 1.44, "learning_rate": 1.9327034232447758e-05, "loss": 1.7581, "step": 1841 }, { "epoch": 1.44, "learning_rate": 1.932612066761211e-05, "loss": 1.589, "step": 1842 }, { "epoch": 1.44, "learning_rate": 1.9325206504726844e-05, "loss": 1.645, "step": 1843 }, { "epoch": 1.44, "learning_rate": 1.9324291743850578e-05, "loss": 1.8863, "step": 1844 }, { "epoch": 1.44, "learning_rate": 1.9323376385041974e-05, "loss": 1.8482, "step": 1845 }, { "epoch": 1.44, "learning_rate": 1.9322460428359724e-05, "loss": 1.9558, "step": 1846 }, { "epoch": 1.44, "learning_rate": 1.9321543873862572e-05, "loss": 1.8714, "step": 1847 }, { "epoch": 1.44, "learning_rate": 1.9320626721609296e-05, "loss": 1.7915, "step": 1848 }, { "epoch": 1.45, "learning_rate": 1.93197089716587e-05, "loss": 1.7408, "step": 1849 }, { "epoch": 1.45, "learning_rate": 1.931879062406964e-05, "loss": 1.6836, "step": 1850 }, { "epoch": 1.45, "learning_rate": 1.931787167890101e-05, "loss": 1.7353, "step": 1851 }, { "epoch": 1.45, "learning_rate": 1.9316952136211738e-05, "loss": 1.8869, "step": 1852 }, { "epoch": 1.45, "learning_rate": 1.9316031996060785e-05, "loss": 2.1879, "step": 1853 }, { "epoch": 1.45, "learning_rate": 1.9315111258507162e-05, "loss": 1.6747, "step": 1854 }, { "epoch": 1.45, "learning_rate": 1.931418992360991e-05, "loss": 1.7336, "step": 1855 }, { "epoch": 1.45, "learning_rate": 1.931326799142811e-05, "loss": 1.9043, "step": 1856 }, { "epoch": 1.45, "learning_rate": 1.9312345462020885e-05, "loss": 2.1225, "step": 1857 }, { "epoch": 1.45, "learning_rate": 1.931142233544739e-05, "loss": 1.9612, "step": 1858 }, { "epoch": 1.45, "learning_rate": 1.9310498611766824e-05, "loss": 1.7676, "step": 1859 }, { "epoch": 1.45, "learning_rate": 1.930957429103842e-05, "loss": 1.9637, "step": 1860 }, { "epoch": 1.45, "learning_rate": 1.9308649373321454e-05, "loss": 1.9295, "step": 1861 }, { "epoch": 1.46, "learning_rate": 1.9307723858675237e-05, "loss": 1.8815, "step": 1862 }, { "epoch": 1.46, "learning_rate": 1.9306797747159118e-05, "loss": 2.0058, "step": 1863 }, { "epoch": 1.46, "learning_rate": 1.930587103883248e-05, "loss": 1.9935, "step": 1864 }, { "epoch": 1.46, "learning_rate": 1.930494373375476e-05, "loss": 1.7604, "step": 1865 }, { "epoch": 1.46, "learning_rate": 1.9304015831985415e-05, "loss": 1.5713, "step": 1866 }, { "epoch": 1.46, "learning_rate": 1.9303087333583953e-05, "loss": 2.1317, "step": 1867 }, { "epoch": 1.46, "learning_rate": 1.9302158238609908e-05, "loss": 1.674, "step": 1868 }, { "epoch": 1.46, "learning_rate": 1.9301228547122864e-05, "loss": 1.9663, "step": 1869 }, { "epoch": 1.46, "learning_rate": 1.930029825918244e-05, "loss": 1.6258, "step": 1870 }, { "epoch": 1.46, "learning_rate": 1.929936737484829e-05, "loss": 1.6514, "step": 1871 }, { "epoch": 1.46, "learning_rate": 1.9298435894180106e-05, "loss": 1.781, "step": 1872 }, { "epoch": 1.46, "learning_rate": 1.9297503817237622e-05, "loss": 1.8242, "step": 1873 }, { "epoch": 1.46, "learning_rate": 1.9296571144080614e-05, "loss": 1.7421, "step": 1874 }, { "epoch": 1.47, "learning_rate": 1.9295637874768885e-05, "loss": 1.9115, "step": 1875 }, { "epoch": 1.47, "learning_rate": 1.9294704009362283e-05, "loss": 1.917, "step": 1876 }, { "epoch": 1.47, "learning_rate": 1.9293769547920694e-05, "loss": 1.8257, "step": 1877 }, { "epoch": 1.47, "learning_rate": 1.929283449050404e-05, "loss": 1.8192, "step": 1878 }, { "epoch": 1.47, "learning_rate": 1.9291898837172286e-05, "loss": 1.8526, "step": 1879 }, { "epoch": 1.47, "learning_rate": 1.929096258798543e-05, "loss": 1.827, "step": 1880 }, { "epoch": 1.47, "learning_rate": 1.9290025743003507e-05, "loss": 1.8652, "step": 1881 }, { "epoch": 1.47, "learning_rate": 1.9289088302286596e-05, "loss": 1.855, "step": 1882 }, { "epoch": 1.47, "learning_rate": 1.9288150265894815e-05, "loss": 1.6122, "step": 1883 }, { "epoch": 1.47, "learning_rate": 1.9287211633888315e-05, "loss": 1.6041, "step": 1884 }, { "epoch": 1.47, "learning_rate": 1.9286272406327284e-05, "loss": 2.0845, "step": 1885 }, { "epoch": 1.47, "learning_rate": 1.9285332583271953e-05, "loss": 2.0604, "step": 1886 }, { "epoch": 1.47, "learning_rate": 1.9284392164782592e-05, "loss": 2.0855, "step": 1887 }, { "epoch": 1.48, "learning_rate": 1.9283451150919504e-05, "loss": 1.9167, "step": 1888 }, { "epoch": 1.48, "learning_rate": 1.928250954174303e-05, "loss": 1.8266, "step": 1889 }, { "epoch": 1.48, "learning_rate": 1.928156733731356e-05, "loss": 1.7367, "step": 1890 }, { "epoch": 1.48, "learning_rate": 1.9280624537691503e-05, "loss": 1.9127, "step": 1891 }, { "epoch": 1.48, "learning_rate": 1.9279681142937324e-05, "loss": 1.7847, "step": 1892 }, { "epoch": 1.48, "learning_rate": 1.9278737153111522e-05, "loss": 2.0474, "step": 1893 }, { "epoch": 1.48, "learning_rate": 1.9277792568274626e-05, "loss": 2.0909, "step": 1894 }, { "epoch": 1.48, "learning_rate": 1.9276847388487213e-05, "loss": 1.5633, "step": 1895 }, { "epoch": 1.48, "learning_rate": 1.9275901613809888e-05, "loss": 1.8267, "step": 1896 }, { "epoch": 1.48, "learning_rate": 1.9274955244303307e-05, "loss": 1.4624, "step": 1897 }, { "epoch": 1.48, "learning_rate": 1.9274008280028155e-05, "loss": 2.116, "step": 1898 }, { "epoch": 1.48, "learning_rate": 1.9273060721045152e-05, "loss": 1.9456, "step": 1899 }, { "epoch": 1.48, "learning_rate": 1.9272112567415067e-05, "loss": 1.7325, "step": 1900 }, { "epoch": 1.49, "learning_rate": 1.92711638191987e-05, "loss": 2.2353, "step": 1901 }, { "epoch": 1.49, "learning_rate": 1.9270214476456896e-05, "loss": 2.1149, "step": 1902 }, { "epoch": 1.49, "learning_rate": 1.9269264539250522e-05, "loss": 1.8552, "step": 1903 }, { "epoch": 1.49, "learning_rate": 1.92683140076405e-05, "loss": 2.1899, "step": 1904 }, { "epoch": 1.49, "learning_rate": 1.9267362881687786e-05, "loss": 1.8043, "step": 1905 }, { "epoch": 1.49, "learning_rate": 1.9266411161453373e-05, "loss": 1.955, "step": 1906 }, { "epoch": 1.49, "learning_rate": 1.9265458846998286e-05, "loss": 1.7335, "step": 1907 }, { "epoch": 1.49, "learning_rate": 1.9264505938383593e-05, "loss": 1.9438, "step": 1908 }, { "epoch": 1.49, "learning_rate": 1.9263552435670405e-05, "loss": 1.7794, "step": 1909 }, { "epoch": 1.49, "learning_rate": 1.9262598338919866e-05, "loss": 2.0572, "step": 1910 }, { "epoch": 1.49, "learning_rate": 1.926164364819316e-05, "loss": 2.2177, "step": 1911 }, { "epoch": 1.49, "learning_rate": 1.92606883635515e-05, "loss": 2.1521, "step": 1912 }, { "epoch": 1.5, "learning_rate": 1.9259732485056154e-05, "loss": 1.9684, "step": 1913 }, { "epoch": 1.5, "learning_rate": 1.9258776012768412e-05, "loss": 2.1832, "step": 1914 }, { "epoch": 1.5, "learning_rate": 1.9257818946749613e-05, "loss": 2.2354, "step": 1915 }, { "epoch": 1.5, "learning_rate": 1.925686128706113e-05, "loss": 1.8707, "step": 1916 }, { "epoch": 1.5, "learning_rate": 1.9255903033764374e-05, "loss": 1.9986, "step": 1917 }, { "epoch": 1.5, "learning_rate": 1.9254944186920793e-05, "loss": 1.8532, "step": 1918 }, { "epoch": 1.5, "learning_rate": 1.9253984746591877e-05, "loss": 2.2444, "step": 1919 }, { "epoch": 1.5, "learning_rate": 1.925302471283915e-05, "loss": 1.9657, "step": 1920 }, { "epoch": 1.5, "learning_rate": 1.9252064085724172e-05, "loss": 1.4804, "step": 1921 }, { "epoch": 1.5, "learning_rate": 1.925110286530855e-05, "loss": 2.0082, "step": 1922 }, { "epoch": 1.5, "learning_rate": 1.9250141051653915e-05, "loss": 1.7956, "step": 1923 }, { "epoch": 1.5, "learning_rate": 1.9249178644821956e-05, "loss": 1.7732, "step": 1924 }, { "epoch": 1.5, "learning_rate": 1.9248215644874383e-05, "loss": 1.8773, "step": 1925 }, { "epoch": 1.51, "learning_rate": 1.9247252051872948e-05, "loss": 1.6442, "step": 1926 }, { "epoch": 1.51, "learning_rate": 1.9246287865879444e-05, "loss": 1.8335, "step": 1927 }, { "epoch": 1.51, "learning_rate": 1.9245323086955704e-05, "loss": 2.0507, "step": 1928 }, { "epoch": 1.51, "learning_rate": 1.924435771516359e-05, "loss": 1.6086, "step": 1929 }, { "epoch": 1.51, "learning_rate": 1.9243391750565007e-05, "loss": 1.9057, "step": 1930 }, { "epoch": 1.51, "learning_rate": 1.9242425193221906e-05, "loss": 2.06, "step": 1931 }, { "epoch": 1.51, "learning_rate": 1.9241458043196265e-05, "loss": 1.7164, "step": 1932 }, { "epoch": 1.51, "learning_rate": 1.92404903005501e-05, "loss": 2.1252, "step": 1933 }, { "epoch": 1.51, "learning_rate": 1.9239521965345474e-05, "loss": 1.903, "step": 1934 }, { "epoch": 1.51, "learning_rate": 1.9238553037644486e-05, "loss": 1.886, "step": 1935 }, { "epoch": 1.51, "learning_rate": 1.9237583517509257e-05, "loss": 1.7169, "step": 1936 }, { "epoch": 1.51, "learning_rate": 1.923661340500197e-05, "loss": 1.737, "step": 1937 }, { "epoch": 1.51, "learning_rate": 1.9235642700184835e-05, "loss": 1.9662, "step": 1938 }, { "epoch": 1.52, "learning_rate": 1.9234671403120094e-05, "loss": 1.8015, "step": 1939 }, { "epoch": 1.52, "learning_rate": 1.9233699513870033e-05, "loss": 2.0298, "step": 1940 }, { "epoch": 1.52, "learning_rate": 1.9232727032496975e-05, "loss": 1.9138, "step": 1941 }, { "epoch": 1.52, "learning_rate": 1.923175395906329e-05, "loss": 2.0776, "step": 1942 }, { "epoch": 1.52, "learning_rate": 1.923078029363137e-05, "loss": 2.0561, "step": 1943 }, { "epoch": 1.52, "learning_rate": 1.922980603626365e-05, "loss": 1.6553, "step": 1944 }, { "epoch": 1.52, "learning_rate": 1.922883118702261e-05, "loss": 1.9887, "step": 1945 }, { "epoch": 1.52, "learning_rate": 1.9227855745970766e-05, "loss": 1.8458, "step": 1946 }, { "epoch": 1.52, "learning_rate": 1.922687971317067e-05, "loss": 1.912, "step": 1947 }, { "epoch": 1.52, "learning_rate": 1.9225903088684902e-05, "loss": 1.6693, "step": 1948 }, { "epoch": 1.52, "learning_rate": 1.9224925872576094e-05, "loss": 2.002, "step": 1949 }, { "epoch": 1.52, "learning_rate": 1.922394806490692e-05, "loss": 1.9665, "step": 1950 }, { "epoch": 1.52, "learning_rate": 1.922296966574007e-05, "loss": 1.7161, "step": 1951 }, { "epoch": 1.53, "learning_rate": 1.9221990675138293e-05, "loss": 1.8317, "step": 1952 }, { "epoch": 1.53, "learning_rate": 1.9221011093164366e-05, "loss": 1.8211, "step": 1953 }, { "epoch": 1.53, "learning_rate": 1.9220030919881102e-05, "loss": 1.7845, "step": 1954 }, { "epoch": 1.53, "learning_rate": 1.9219050155351365e-05, "loss": 2.0799, "step": 1955 }, { "epoch": 1.53, "learning_rate": 1.921806879963804e-05, "loss": 1.995, "step": 1956 }, { "epoch": 1.53, "learning_rate": 1.921708685280406e-05, "loss": 1.8172, "step": 1957 }, { "epoch": 1.53, "learning_rate": 1.9216104314912394e-05, "loss": 2.004, "step": 1958 }, { "epoch": 1.53, "learning_rate": 1.921512118602605e-05, "loss": 2.228, "step": 1959 }, { "epoch": 1.53, "learning_rate": 1.921413746620807e-05, "loss": 1.9058, "step": 1960 }, { "epoch": 1.53, "learning_rate": 1.9213153155521537e-05, "loss": 1.798, "step": 1961 }, { "epoch": 1.53, "learning_rate": 1.921216825402957e-05, "loss": 1.8723, "step": 1962 }, { "epoch": 1.53, "learning_rate": 1.9211182761795328e-05, "loss": 1.7683, "step": 1963 }, { "epoch": 1.53, "learning_rate": 1.9210196678882013e-05, "loss": 2.0396, "step": 1964 }, { "epoch": 1.54, "learning_rate": 1.9209210005352845e-05, "loss": 2.0383, "step": 1965 }, { "epoch": 1.54, "learning_rate": 1.9208222741271106e-05, "loss": 2.2877, "step": 1966 }, { "epoch": 1.54, "learning_rate": 1.920723488670011e-05, "loss": 1.7767, "step": 1967 }, { "epoch": 1.54, "learning_rate": 1.920624644170319e-05, "loss": 1.8415, "step": 1968 }, { "epoch": 1.54, "learning_rate": 1.9205257406343744e-05, "loss": 1.89, "step": 1969 }, { "epoch": 1.54, "learning_rate": 1.920426778068519e-05, "loss": 1.9637, "step": 1970 }, { "epoch": 1.54, "learning_rate": 1.9203277564790988e-05, "loss": 1.8572, "step": 1971 }, { "epoch": 1.54, "learning_rate": 1.9202286758724642e-05, "loss": 1.7, "step": 1972 }, { "epoch": 1.54, "learning_rate": 1.920129536254968e-05, "loss": 1.5546, "step": 1973 }, { "epoch": 1.54, "learning_rate": 1.9200303376329686e-05, "loss": 1.7424, "step": 1974 }, { "epoch": 1.54, "learning_rate": 1.9199310800128266e-05, "loss": 1.8641, "step": 1975 }, { "epoch": 1.54, "learning_rate": 1.9198317634009072e-05, "loss": 1.8316, "step": 1976 }, { "epoch": 1.55, "learning_rate": 1.9197323878035793e-05, "loss": 1.9334, "step": 1977 }, { "epoch": 1.55, "learning_rate": 1.9196329532272157e-05, "loss": 1.8902, "step": 1978 }, { "epoch": 1.55, "learning_rate": 1.919533459678192e-05, "loss": 1.7317, "step": 1979 }, { "epoch": 1.55, "learning_rate": 1.9194339071628894e-05, "loss": 1.7113, "step": 1980 }, { "epoch": 1.55, "learning_rate": 1.919334295687691e-05, "loss": 2.039, "step": 1981 }, { "epoch": 1.55, "learning_rate": 1.9192346252589845e-05, "loss": 2.1407, "step": 1982 }, { "epoch": 1.55, "learning_rate": 1.919134895883162e-05, "loss": 2.1883, "step": 1983 }, { "epoch": 1.55, "learning_rate": 1.919035107566619e-05, "loss": 1.8368, "step": 1984 }, { "epoch": 1.55, "learning_rate": 1.9189352603157533e-05, "loss": 2.0365, "step": 1985 }, { "epoch": 1.55, "learning_rate": 1.918835354136969e-05, "loss": 1.8059, "step": 1986 }, { "epoch": 1.55, "learning_rate": 1.918735389036672e-05, "loss": 2.0261, "step": 1987 }, { "epoch": 1.55, "learning_rate": 1.9186353650212727e-05, "loss": 1.7127, "step": 1988 }, { "epoch": 1.55, "learning_rate": 1.918535282097185e-05, "loss": 2.111, "step": 1989 }, { "epoch": 1.56, "learning_rate": 1.918435140270828e-05, "loss": 1.9732, "step": 1990 }, { "epoch": 1.56, "learning_rate": 1.9183349395486226e-05, "loss": 1.7533, "step": 1991 }, { "epoch": 1.56, "learning_rate": 1.9182346799369943e-05, "loss": 1.6754, "step": 1992 }, { "epoch": 1.56, "learning_rate": 1.9181343614423727e-05, "loss": 1.7629, "step": 1993 }, { "epoch": 1.56, "learning_rate": 1.9180339840711903e-05, "loss": 1.7099, "step": 1994 }, { "epoch": 1.56, "learning_rate": 1.9179335478298845e-05, "loss": 2.1337, "step": 1995 }, { "epoch": 1.56, "learning_rate": 1.9178330527248958e-05, "loss": 2.1531, "step": 1996 }, { "epoch": 1.56, "learning_rate": 1.9177324987626684e-05, "loss": 1.8488, "step": 1997 }, { "epoch": 1.56, "learning_rate": 1.9176318859496502e-05, "loss": 2.1331, "step": 1998 }, { "epoch": 1.56, "learning_rate": 1.917531214292294e-05, "loss": 1.7474, "step": 1999 }, { "epoch": 1.56, "learning_rate": 1.9174304837970546e-05, "loss": 1.9442, "step": 2000 }, { "epoch": 1.56, "learning_rate": 1.917329694470392e-05, "loss": 1.8569, "step": 2001 }, { "epoch": 1.56, "learning_rate": 1.9172288463187692e-05, "loss": 1.8731, "step": 2002 }, { "epoch": 1.57, "learning_rate": 1.917127939348653e-05, "loss": 1.6534, "step": 2003 }, { "epoch": 1.57, "learning_rate": 1.917026973566515e-05, "loss": 1.8074, "step": 2004 }, { "epoch": 1.57, "learning_rate": 1.9169259489788288e-05, "loss": 1.8529, "step": 2005 }, { "epoch": 1.57, "learning_rate": 1.9168248655920734e-05, "loss": 2.0474, "step": 2006 }, { "epoch": 1.57, "learning_rate": 1.9167237234127306e-05, "loss": 1.7536, "step": 2007 }, { "epoch": 1.57, "learning_rate": 1.9166225224472863e-05, "loss": 2.0597, "step": 2008 }, { "epoch": 1.57, "learning_rate": 1.9165212627022302e-05, "loss": 1.696, "step": 2009 }, { "epoch": 1.57, "learning_rate": 1.916419944184056e-05, "loss": 2.007, "step": 2010 }, { "epoch": 1.57, "learning_rate": 1.9163185668992604e-05, "loss": 1.6449, "step": 2011 }, { "epoch": 1.57, "learning_rate": 1.9162171308543445e-05, "loss": 1.6064, "step": 2012 }, { "epoch": 1.57, "learning_rate": 1.916115636055813e-05, "loss": 1.9318, "step": 2013 }, { "epoch": 1.57, "learning_rate": 1.9160140825101746e-05, "loss": 2.0189, "step": 2014 }, { "epoch": 1.57, "learning_rate": 1.9159124702239414e-05, "loss": 1.9979, "step": 2015 }, { "epoch": 1.58, "learning_rate": 1.9158107992036292e-05, "loss": 1.862, "step": 2016 }, { "epoch": 1.58, "learning_rate": 1.915709069455758e-05, "loss": 2.0011, "step": 2017 }, { "epoch": 1.58, "learning_rate": 1.9156072809868514e-05, "loss": 1.6563, "step": 2018 }, { "epoch": 1.58, "learning_rate": 1.915505433803437e-05, "loss": 1.8598, "step": 2019 }, { "epoch": 1.58, "learning_rate": 1.9154035279120452e-05, "loss": 1.7227, "step": 2020 }, { "epoch": 1.58, "learning_rate": 1.9153015633192113e-05, "loss": 1.9236, "step": 2021 }, { "epoch": 1.58, "learning_rate": 1.915199540031474e-05, "loss": 1.6292, "step": 2022 }, { "epoch": 1.58, "learning_rate": 1.9150974580553755e-05, "loss": 1.7529, "step": 2023 }, { "epoch": 1.58, "learning_rate": 1.9149953173974617e-05, "loss": 1.7298, "step": 2024 }, { "epoch": 1.58, "learning_rate": 1.9148931180642827e-05, "loss": 2.0086, "step": 2025 }, { "epoch": 1.58, "learning_rate": 1.9147908600623923e-05, "loss": 1.8843, "step": 2026 }, { "epoch": 1.58, "learning_rate": 1.9146885433983477e-05, "loss": 1.6557, "step": 2027 }, { "epoch": 1.58, "learning_rate": 1.9145861680787104e-05, "loss": 2.218, "step": 2028 }, { "epoch": 1.59, "learning_rate": 1.914483734110045e-05, "loss": 1.98, "step": 2029 }, { "epoch": 1.59, "learning_rate": 1.9143812414989207e-05, "loss": 1.773, "step": 2030 }, { "epoch": 1.59, "learning_rate": 1.9142786902519095e-05, "loss": 2.0343, "step": 2031 }, { "epoch": 1.59, "learning_rate": 1.9141760803755877e-05, "loss": 1.8509, "step": 2032 }, { "epoch": 1.59, "learning_rate": 1.9140734118765353e-05, "loss": 1.9815, "step": 2033 }, { "epoch": 1.59, "learning_rate": 1.9139706847613364e-05, "loss": 1.8326, "step": 2034 }, { "epoch": 1.59, "learning_rate": 1.913867899036578e-05, "loss": 2.0384, "step": 2035 }, { "epoch": 1.59, "learning_rate": 1.913765054708852e-05, "loss": 1.8195, "step": 2036 }, { "epoch": 1.59, "learning_rate": 1.9136621517847524e-05, "loss": 1.7841, "step": 2037 }, { "epoch": 1.59, "learning_rate": 1.913559190270879e-05, "loss": 2.1227, "step": 2038 }, { "epoch": 1.59, "learning_rate": 1.9134561701738338e-05, "loss": 2.0032, "step": 2039 }, { "epoch": 1.59, "learning_rate": 1.9133530915002233e-05, "loss": 1.9888, "step": 2040 }, { "epoch": 1.6, "learning_rate": 1.913249954256658e-05, "loss": 2.0869, "step": 2041 }, { "epoch": 1.6, "learning_rate": 1.9131467584497505e-05, "loss": 1.6549, "step": 2042 }, { "epoch": 1.6, "learning_rate": 1.9130435040861196e-05, "loss": 1.7687, "step": 2043 }, { "epoch": 1.6, "learning_rate": 1.9129401911723858e-05, "loss": 1.7587, "step": 2044 }, { "epoch": 1.6, "learning_rate": 1.9128368197151747e-05, "loss": 1.7214, "step": 2045 }, { "epoch": 1.6, "learning_rate": 1.9127333897211144e-05, "loss": 1.71, "step": 2046 }, { "epoch": 1.6, "learning_rate": 1.9126299011968387e-05, "loss": 1.9209, "step": 2047 }, { "epoch": 1.6, "learning_rate": 1.9125263541489827e-05, "loss": 1.9184, "step": 2048 }, { "epoch": 1.6, "learning_rate": 1.9124227485841872e-05, "loss": 2.1314, "step": 2049 }, { "epoch": 1.6, "learning_rate": 1.912319084509096e-05, "loss": 1.7472, "step": 2050 }, { "epoch": 1.6, "learning_rate": 1.912215361930356e-05, "loss": 2.0097, "step": 2051 }, { "epoch": 1.6, "learning_rate": 1.9121115808546196e-05, "loss": 1.9146, "step": 2052 }, { "epoch": 1.6, "learning_rate": 1.9120077412885414e-05, "loss": 2.1109, "step": 2053 }, { "epoch": 1.61, "learning_rate": 1.91190384323878e-05, "loss": 1.8966, "step": 2054 }, { "epoch": 1.61, "learning_rate": 1.9117998867119985e-05, "loss": 1.9444, "step": 2055 }, { "epoch": 1.61, "learning_rate": 1.911695871714863e-05, "loss": 1.9908, "step": 2056 }, { "epoch": 1.61, "learning_rate": 1.9115917982540435e-05, "loss": 1.5799, "step": 2057 }, { "epoch": 1.61, "learning_rate": 1.9114876663362137e-05, "loss": 1.8889, "step": 2058 }, { "epoch": 1.61, "learning_rate": 1.9113834759680516e-05, "loss": 1.8554, "step": 2059 }, { "epoch": 1.61, "learning_rate": 1.9112792271562384e-05, "loss": 1.8669, "step": 2060 }, { "epoch": 1.61, "learning_rate": 1.9111749199074592e-05, "loss": 1.9993, "step": 2061 }, { "epoch": 1.61, "learning_rate": 1.911070554228403e-05, "loss": 1.7288, "step": 2062 }, { "epoch": 1.61, "learning_rate": 1.9109661301257616e-05, "loss": 1.7935, "step": 2063 }, { "epoch": 1.61, "learning_rate": 1.9108616476062324e-05, "loss": 1.7126, "step": 2064 }, { "epoch": 1.61, "learning_rate": 1.9107571066765147e-05, "loss": 2.0576, "step": 2065 }, { "epoch": 1.61, "learning_rate": 1.9106525073433127e-05, "loss": 1.5709, "step": 2066 }, { "epoch": 1.62, "learning_rate": 1.910547849613334e-05, "loss": 2.1289, "step": 2067 }, { "epoch": 1.62, "learning_rate": 1.9104431334932895e-05, "loss": 1.7333, "step": 2068 }, { "epoch": 1.62, "learning_rate": 1.9103383589898946e-05, "loss": 1.8295, "step": 2069 }, { "epoch": 1.62, "learning_rate": 1.9102335261098686e-05, "loss": 1.8995, "step": 2070 }, { "epoch": 1.62, "learning_rate": 1.910128634859933e-05, "loss": 1.9137, "step": 2071 }, { "epoch": 1.62, "learning_rate": 1.9100236852468147e-05, "loss": 2.2927, "step": 2072 }, { "epoch": 1.62, "learning_rate": 1.9099186772772437e-05, "loss": 1.9936, "step": 2073 }, { "epoch": 1.62, "learning_rate": 1.9098136109579535e-05, "loss": 1.8397, "step": 2074 }, { "epoch": 1.62, "learning_rate": 1.9097084862956818e-05, "loss": 1.981, "step": 2075 }, { "epoch": 1.62, "learning_rate": 1.9096033032971698e-05, "loss": 1.8029, "step": 2076 }, { "epoch": 1.62, "learning_rate": 1.909498061969163e-05, "loss": 1.9516, "step": 2077 }, { "epoch": 1.62, "learning_rate": 1.9093927623184095e-05, "loss": 1.8571, "step": 2078 }, { "epoch": 1.62, "learning_rate": 1.9092874043516616e-05, "loss": 1.7117, "step": 2079 }, { "epoch": 1.63, "learning_rate": 1.9091819880756762e-05, "loss": 1.7104, "step": 2080 }, { "epoch": 1.63, "learning_rate": 1.909076513497213e-05, "loss": 2.0757, "step": 2081 }, { "epoch": 1.63, "learning_rate": 1.9089709806230353e-05, "loss": 1.8115, "step": 2082 }, { "epoch": 1.63, "learning_rate": 1.9088653894599113e-05, "loss": 1.7328, "step": 2083 }, { "epoch": 1.63, "learning_rate": 1.9087597400146115e-05, "loss": 1.6901, "step": 2084 }, { "epoch": 1.63, "learning_rate": 1.908654032293911e-05, "loss": 1.9398, "step": 2085 }, { "epoch": 1.63, "learning_rate": 1.9085482663045888e-05, "loss": 2.0057, "step": 2086 }, { "epoch": 1.63, "learning_rate": 1.9084424420534266e-05, "loss": 1.9422, "step": 2087 }, { "epoch": 1.63, "learning_rate": 1.9083365595472112e-05, "loss": 1.7056, "step": 2088 }, { "epoch": 1.63, "learning_rate": 1.908230618792732e-05, "loss": 1.9884, "step": 2089 }, { "epoch": 1.63, "learning_rate": 1.9081246197967828e-05, "loss": 1.9397, "step": 2090 }, { "epoch": 1.63, "learning_rate": 1.9080185625661608e-05, "loss": 1.8954, "step": 2091 }, { "epoch": 1.64, "learning_rate": 1.907912447107667e-05, "loss": 1.8701, "step": 2092 }, { "epoch": 1.64, "learning_rate": 1.9078062734281063e-05, "loss": 1.6297, "step": 2093 }, { "epoch": 1.64, "learning_rate": 1.9077000415342874e-05, "loss": 1.8835, "step": 2094 }, { "epoch": 1.64, "learning_rate": 1.9075937514330226e-05, "loss": 1.8219, "step": 2095 }, { "epoch": 1.64, "learning_rate": 1.9074874031311272e-05, "loss": 1.7882, "step": 2096 }, { "epoch": 1.64, "learning_rate": 1.907380996635422e-05, "loss": 1.877, "step": 2097 }, { "epoch": 1.64, "learning_rate": 1.9072745319527293e-05, "loss": 1.8381, "step": 2098 }, { "epoch": 1.64, "learning_rate": 1.907168009089877e-05, "loss": 1.7895, "step": 2099 }, { "epoch": 1.64, "learning_rate": 1.9070614280536958e-05, "loss": 1.5628, "step": 2100 }, { "epoch": 1.64, "learning_rate": 1.9069547888510205e-05, "loss": 1.9391, "step": 2101 }, { "epoch": 1.64, "learning_rate": 1.9068480914886897e-05, "loss": 1.9706, "step": 2102 }, { "epoch": 1.64, "learning_rate": 1.906741335973545e-05, "loss": 1.8668, "step": 2103 }, { "epoch": 1.64, "learning_rate": 1.9066345223124322e-05, "loss": 1.9369, "step": 2104 }, { "epoch": 1.65, "learning_rate": 1.9065276505122013e-05, "loss": 2.0802, "step": 2105 }, { "epoch": 1.65, "learning_rate": 1.906420720579705e-05, "loss": 1.8465, "step": 2106 }, { "epoch": 1.65, "learning_rate": 1.9063137325218014e-05, "loss": 2.0116, "step": 2107 }, { "epoch": 1.65, "learning_rate": 1.90620668634535e-05, "loss": 1.7785, "step": 2108 }, { "epoch": 1.65, "learning_rate": 1.9060995820572162e-05, "loss": 1.8256, "step": 2109 }, { "epoch": 1.65, "learning_rate": 1.905992419664268e-05, "loss": 1.7347, "step": 2110 }, { "epoch": 1.65, "learning_rate": 1.905885199173377e-05, "loss": 1.903, "step": 2111 }, { "epoch": 1.65, "learning_rate": 1.9057779205914185e-05, "loss": 1.7748, "step": 2112 }, { "epoch": 1.65, "learning_rate": 1.905670583925273e-05, "loss": 2.0639, "step": 2113 }, { "epoch": 1.65, "learning_rate": 1.9055631891818228e-05, "loss": 1.7836, "step": 2114 }, { "epoch": 1.65, "learning_rate": 1.9054557363679547e-05, "loss": 1.8369, "step": 2115 }, { "epoch": 1.65, "learning_rate": 1.9053482254905598e-05, "loss": 2.3826, "step": 2116 }, { "epoch": 1.65, "learning_rate": 1.905240656556532e-05, "loss": 1.9609, "step": 2117 }, { "epoch": 1.66, "learning_rate": 1.905133029572769e-05, "loss": 1.568, "step": 2118 }, { "epoch": 1.66, "learning_rate": 1.9050253445461733e-05, "loss": 1.739, "step": 2119 }, { "epoch": 1.66, "learning_rate": 1.90491760148365e-05, "loss": 1.7941, "step": 2120 }, { "epoch": 1.66, "learning_rate": 1.9048098003921077e-05, "loss": 2.2397, "step": 2121 }, { "epoch": 1.66, "learning_rate": 1.90470194127846e-05, "loss": 1.88, "step": 2122 }, { "epoch": 1.66, "learning_rate": 1.9045940241496235e-05, "loss": 1.7193, "step": 2123 }, { "epoch": 1.66, "learning_rate": 1.904486049012518e-05, "loss": 1.6694, "step": 2124 }, { "epoch": 1.66, "learning_rate": 1.904378015874068e-05, "loss": 1.7901, "step": 2125 }, { "epoch": 1.66, "learning_rate": 1.9042699247412012e-05, "loss": 1.8401, "step": 2126 }, { "epoch": 1.66, "learning_rate": 1.9041617756208488e-05, "loss": 1.6848, "step": 2127 }, { "epoch": 1.66, "learning_rate": 1.904053568519946e-05, "loss": 2.0323, "step": 2128 }, { "epoch": 1.66, "learning_rate": 1.9039453034454327e-05, "loss": 1.7043, "step": 2129 }, { "epoch": 1.66, "learning_rate": 1.90383698040425e-05, "loss": 1.6425, "step": 2130 }, { "epoch": 1.67, "learning_rate": 1.9037285994033452e-05, "loss": 1.9832, "step": 2131 }, { "epoch": 1.67, "learning_rate": 1.9036201604496683e-05, "loss": 1.5546, "step": 2132 }, { "epoch": 1.67, "learning_rate": 1.903511663550173e-05, "loss": 1.9522, "step": 2133 }, { "epoch": 1.67, "learning_rate": 1.9034031087118164e-05, "loss": 1.8946, "step": 2134 }, { "epoch": 1.67, "learning_rate": 1.9032944959415607e-05, "loss": 2.1897, "step": 2135 }, { "epoch": 1.67, "learning_rate": 1.9031858252463698e-05, "loss": 2.0285, "step": 2136 }, { "epoch": 1.67, "learning_rate": 1.903077096633213e-05, "loss": 2.0332, "step": 2137 }, { "epoch": 1.67, "learning_rate": 1.9029683101090623e-05, "loss": 1.9736, "step": 2138 }, { "epoch": 1.67, "learning_rate": 1.9028594656808937e-05, "loss": 1.6575, "step": 2139 }, { "epoch": 1.67, "learning_rate": 1.9027505633556878e-05, "loss": 1.8216, "step": 2140 }, { "epoch": 1.67, "learning_rate": 1.902641603140427e-05, "loss": 1.8321, "step": 2141 }, { "epoch": 1.67, "learning_rate": 1.9025325850420995e-05, "loss": 2.0622, "step": 2142 }, { "epoch": 1.67, "learning_rate": 1.9024235090676956e-05, "loss": 1.7159, "step": 2143 }, { "epoch": 1.68, "learning_rate": 1.90231437522421e-05, "loss": 2.1846, "step": 2144 }, { "epoch": 1.68, "learning_rate": 1.9022051835186414e-05, "loss": 2.1047, "step": 2145 }, { "epoch": 1.68, "learning_rate": 1.902095933957991e-05, "loss": 1.6983, "step": 2146 }, { "epoch": 1.68, "learning_rate": 1.9019866265492656e-05, "loss": 1.8377, "step": 2147 }, { "epoch": 1.68, "learning_rate": 1.9018772612994744e-05, "loss": 1.7651, "step": 2148 }, { "epoch": 1.68, "learning_rate": 1.9017678382156304e-05, "loss": 1.7963, "step": 2149 }, { "epoch": 1.68, "learning_rate": 1.9016583573047503e-05, "loss": 2.0192, "step": 2150 }, { "epoch": 1.68, "learning_rate": 1.9015488185738552e-05, "loss": 2.0329, "step": 2151 }, { "epoch": 1.68, "learning_rate": 1.901439222029969e-05, "loss": 2.0569, "step": 2152 }, { "epoch": 1.68, "learning_rate": 1.90132956768012e-05, "loss": 1.9108, "step": 2153 }, { "epoch": 1.68, "learning_rate": 1.9012198555313396e-05, "loss": 2.1913, "step": 2154 }, { "epoch": 1.68, "learning_rate": 1.9011100855906635e-05, "loss": 1.8384, "step": 2155 }, { "epoch": 1.69, "learning_rate": 1.901000257865131e-05, "loss": 1.7251, "step": 2156 }, { "epoch": 1.69, "learning_rate": 1.9008903723617846e-05, "loss": 2.0346, "step": 2157 }, { "epoch": 1.69, "learning_rate": 1.900780429087671e-05, "loss": 1.4568, "step": 2158 }, { "epoch": 1.69, "learning_rate": 1.9006704280498406e-05, "loss": 1.7919, "step": 2159 }, { "epoch": 1.69, "learning_rate": 1.9005603692553468e-05, "loss": 2.0702, "step": 2160 }, { "epoch": 1.69, "learning_rate": 1.9004502527112482e-05, "loss": 1.7683, "step": 2161 }, { "epoch": 1.69, "learning_rate": 1.9003400784246056e-05, "loss": 1.988, "step": 2162 }, { "epoch": 1.69, "learning_rate": 1.900229846402484e-05, "loss": 1.7476, "step": 2163 }, { "epoch": 1.69, "learning_rate": 1.9001195566519518e-05, "loss": 1.905, "step": 2164 }, { "epoch": 1.69, "learning_rate": 1.9000092091800824e-05, "loss": 1.8073, "step": 2165 }, { "epoch": 1.69, "learning_rate": 1.8998988039939514e-05, "loss": 1.8857, "step": 2166 }, { "epoch": 1.69, "learning_rate": 1.8997883411006387e-05, "loss": 1.894, "step": 2167 }, { "epoch": 1.69, "learning_rate": 1.899677820507228e-05, "loss": 1.9012, "step": 2168 }, { "epoch": 1.7, "learning_rate": 1.899567242220807e-05, "loss": 1.7933, "step": 2169 }, { "epoch": 1.7, "learning_rate": 1.8994566062484655e-05, "loss": 1.9435, "step": 2170 }, { "epoch": 1.7, "learning_rate": 1.8993459125972995e-05, "loss": 1.9556, "step": 2171 }, { "epoch": 1.7, "learning_rate": 1.8992351612744065e-05, "loss": 1.9435, "step": 2172 }, { "epoch": 1.7, "learning_rate": 1.899124352286889e-05, "loss": 1.8878, "step": 2173 }, { "epoch": 1.7, "learning_rate": 1.8990134856418526e-05, "loss": 1.5181, "step": 2174 }, { "epoch": 1.7, "learning_rate": 1.898902561346407e-05, "loss": 1.9799, "step": 2175 }, { "epoch": 1.7, "learning_rate": 1.8987915794076647e-05, "loss": 2.1143, "step": 2176 }, { "epoch": 1.7, "learning_rate": 1.898680539832743e-05, "loss": 1.9512, "step": 2177 }, { "epoch": 1.7, "learning_rate": 1.8985694426287627e-05, "loss": 1.7381, "step": 2178 }, { "epoch": 1.7, "learning_rate": 1.898458287802848e-05, "loss": 1.9032, "step": 2179 }, { "epoch": 1.7, "learning_rate": 1.8983470753621263e-05, "loss": 1.9393, "step": 2180 }, { "epoch": 1.7, "learning_rate": 1.89823580531373e-05, "loss": 1.856, "step": 2181 }, { "epoch": 1.71, "learning_rate": 1.898124477664794e-05, "loss": 1.5565, "step": 2182 }, { "epoch": 1.71, "learning_rate": 1.8980130924224568e-05, "loss": 1.8587, "step": 2183 }, { "epoch": 1.71, "learning_rate": 1.8979016495938624e-05, "loss": 1.6336, "step": 2184 }, { "epoch": 1.71, "learning_rate": 1.8977901491861563e-05, "loss": 1.9887, "step": 2185 }, { "epoch": 1.71, "learning_rate": 1.8976785912064888e-05, "loss": 1.7994, "step": 2186 }, { "epoch": 1.71, "learning_rate": 1.8975669756620136e-05, "loss": 1.9335, "step": 2187 }, { "epoch": 1.71, "learning_rate": 1.897455302559888e-05, "loss": 1.9481, "step": 2188 }, { "epoch": 1.71, "learning_rate": 1.8973435719072743e-05, "loss": 2.1909, "step": 2189 }, { "epoch": 1.71, "learning_rate": 1.8972317837113362e-05, "loss": 1.8893, "step": 2190 }, { "epoch": 1.71, "learning_rate": 1.897119937979243e-05, "loss": 1.8536, "step": 2191 }, { "epoch": 1.71, "learning_rate": 1.8970080347181663e-05, "loss": 1.83, "step": 2192 }, { "epoch": 1.71, "learning_rate": 1.896896073935282e-05, "loss": 1.7804, "step": 2193 }, { "epoch": 1.71, "learning_rate": 1.8967840556377706e-05, "loss": 2.2462, "step": 2194 }, { "epoch": 1.72, "learning_rate": 1.8966719798328147e-05, "loss": 1.8716, "step": 2195 }, { "epoch": 1.72, "learning_rate": 1.8965598465276012e-05, "loss": 1.8887, "step": 2196 }, { "epoch": 1.72, "learning_rate": 1.8964476557293217e-05, "loss": 1.515, "step": 2197 }, { "epoch": 1.72, "learning_rate": 1.8963354074451695e-05, "loss": 1.8515, "step": 2198 }, { "epoch": 1.72, "learning_rate": 1.8962231016823434e-05, "loss": 1.8413, "step": 2199 }, { "epoch": 1.72, "learning_rate": 1.8961107384480448e-05, "loss": 1.8129, "step": 2200 }, { "epoch": 1.72, "learning_rate": 1.8959983177494793e-05, "loss": 2.1596, "step": 2201 }, { "epoch": 1.72, "learning_rate": 1.895885839593856e-05, "loss": 1.8493, "step": 2202 }, { "epoch": 1.72, "learning_rate": 1.895773303988387e-05, "loss": 2.0217, "step": 2203 }, { "epoch": 1.72, "learning_rate": 1.8956607109402904e-05, "loss": 1.8191, "step": 2204 }, { "epoch": 1.72, "learning_rate": 1.895548060456785e-05, "loss": 1.7845, "step": 2205 }, { "epoch": 1.72, "learning_rate": 1.895435352545095e-05, "loss": 1.675, "step": 2206 }, { "epoch": 1.72, "learning_rate": 1.895322587212448e-05, "loss": 1.8894, "step": 2207 }, { "epoch": 1.73, "learning_rate": 1.8952097644660753e-05, "loss": 2.18, "step": 2208 }, { "epoch": 1.73, "learning_rate": 1.895096884313212e-05, "loss": 2.1255, "step": 2209 }, { "epoch": 1.73, "learning_rate": 1.8949839467610962e-05, "loss": 1.7257, "step": 2210 }, { "epoch": 1.73, "learning_rate": 1.8948709518169707e-05, "loss": 2.0582, "step": 2211 }, { "epoch": 1.73, "learning_rate": 1.8947578994880808e-05, "loss": 2.2998, "step": 2212 }, { "epoch": 1.73, "learning_rate": 1.8946447897816768e-05, "loss": 1.6759, "step": 2213 }, { "epoch": 1.73, "learning_rate": 1.8945316227050116e-05, "loss": 1.8205, "step": 2214 }, { "epoch": 1.73, "learning_rate": 1.8944183982653423e-05, "loss": 2.0013, "step": 2215 }, { "epoch": 1.73, "learning_rate": 1.8943051164699296e-05, "loss": 1.7227, "step": 2216 }, { "epoch": 1.73, "learning_rate": 1.8941917773260374e-05, "loss": 1.6701, "step": 2217 }, { "epoch": 1.73, "learning_rate": 1.8940783808409343e-05, "loss": 1.9593, "step": 2218 }, { "epoch": 1.73, "learning_rate": 1.893964927021892e-05, "loss": 1.4617, "step": 2219 }, { "epoch": 1.74, "learning_rate": 1.8938514158761855e-05, "loss": 2.1706, "step": 2220 }, { "epoch": 1.74, "learning_rate": 1.893737847411094e-05, "loss": 2.2142, "step": 2221 }, { "epoch": 1.74, "learning_rate": 1.8936242216339006e-05, "loss": 2.1096, "step": 2222 }, { "epoch": 1.74, "learning_rate": 1.8935105385518912e-05, "loss": 1.685, "step": 2223 }, { "epoch": 1.74, "learning_rate": 1.893396798172356e-05, "loss": 1.9913, "step": 2224 }, { "epoch": 1.74, "learning_rate": 1.8932830005025885e-05, "loss": 2.1838, "step": 2225 }, { "epoch": 1.74, "learning_rate": 1.8931691455498868e-05, "loss": 1.9486, "step": 2226 }, { "epoch": 1.74, "learning_rate": 1.8930552333215514e-05, "loss": 1.8487, "step": 2227 }, { "epoch": 1.74, "learning_rate": 1.8929412638248875e-05, "loss": 1.7556, "step": 2228 }, { "epoch": 1.74, "learning_rate": 1.892827237067203e-05, "loss": 1.6757, "step": 2229 }, { "epoch": 1.74, "learning_rate": 1.8927131530558106e-05, "loss": 1.7058, "step": 2230 }, { "epoch": 1.74, "learning_rate": 1.8925990117980255e-05, "loss": 2.1049, "step": 2231 }, { "epoch": 1.74, "learning_rate": 1.8924848133011674e-05, "loss": 1.78, "step": 2232 }, { "epoch": 1.75, "learning_rate": 1.89237055757256e-05, "loss": 1.8822, "step": 2233 }, { "epoch": 1.75, "learning_rate": 1.8922562446195293e-05, "loss": 1.6918, "step": 2234 }, { "epoch": 1.75, "learning_rate": 1.892141874449406e-05, "loss": 1.87, "step": 2235 }, { "epoch": 1.75, "learning_rate": 1.8920274470695245e-05, "loss": 1.8843, "step": 2236 }, { "epoch": 1.75, "learning_rate": 1.8919129624872223e-05, "loss": 1.7439, "step": 2237 }, { "epoch": 1.75, "learning_rate": 1.891798420709841e-05, "loss": 1.9126, "step": 2238 }, { "epoch": 1.75, "learning_rate": 1.8916838217447258e-05, "loss": 1.8176, "step": 2239 }, { "epoch": 1.75, "learning_rate": 1.8915691655992253e-05, "loss": 1.8635, "step": 2240 }, { "epoch": 1.75, "learning_rate": 1.8914544522806923e-05, "loss": 1.8404, "step": 2241 }, { "epoch": 1.75, "learning_rate": 1.8913396817964823e-05, "loss": 2.1315, "step": 2242 }, { "epoch": 1.75, "learning_rate": 1.891224854153956e-05, "loss": 1.7465, "step": 2243 }, { "epoch": 1.75, "learning_rate": 1.891109969360476e-05, "loss": 1.7723, "step": 2244 }, { "epoch": 1.75, "learning_rate": 1.89099502742341e-05, "loss": 1.5269, "step": 2245 }, { "epoch": 1.76, "learning_rate": 1.8908800283501288e-05, "loss": 1.9037, "step": 2246 }, { "epoch": 1.76, "learning_rate": 1.8907649721480065e-05, "loss": 1.4824, "step": 2247 }, { "epoch": 1.76, "learning_rate": 1.8906498588244216e-05, "loss": 1.7716, "step": 2248 }, { "epoch": 1.76, "learning_rate": 1.890534688386756e-05, "loss": 1.9132, "step": 2249 }, { "epoch": 1.76, "learning_rate": 1.8904194608423947e-05, "loss": 2.2756, "step": 2250 }, { "epoch": 1.76, "learning_rate": 1.890304176198727e-05, "loss": 1.8273, "step": 2251 }, { "epoch": 1.76, "learning_rate": 1.8901888344631455e-05, "loss": 1.8222, "step": 2252 }, { "epoch": 1.76, "learning_rate": 1.8900734356430468e-05, "loss": 1.9121, "step": 2253 }, { "epoch": 1.76, "learning_rate": 1.889957979745831e-05, "loss": 1.8506, "step": 2254 }, { "epoch": 1.76, "learning_rate": 1.8898424667789023e-05, "loss": 1.9313, "step": 2255 }, { "epoch": 1.76, "learning_rate": 1.8897268967496677e-05, "loss": 2.0061, "step": 2256 }, { "epoch": 1.76, "learning_rate": 1.889611269665538e-05, "loss": 1.7875, "step": 2257 }, { "epoch": 1.76, "learning_rate": 1.8894955855339282e-05, "loss": 2.1437, "step": 2258 }, { "epoch": 1.77, "learning_rate": 1.8893798443622572e-05, "loss": 2.3534, "step": 2259 }, { "epoch": 1.77, "learning_rate": 1.889264046157946e-05, "loss": 1.6395, "step": 2260 }, { "epoch": 1.77, "learning_rate": 1.889148190928421e-05, "loss": 1.6372, "step": 2261 }, { "epoch": 1.77, "learning_rate": 1.8890322786811115e-05, "loss": 1.6454, "step": 2262 }, { "epoch": 1.77, "learning_rate": 1.8889163094234507e-05, "loss": 1.9276, "step": 2263 }, { "epoch": 1.77, "learning_rate": 1.8888002831628747e-05, "loss": 1.8999, "step": 2264 }, { "epoch": 1.77, "learning_rate": 1.8886841999068242e-05, "loss": 1.683, "step": 2265 }, { "epoch": 1.77, "learning_rate": 1.8885680596627433e-05, "loss": 1.7057, "step": 2266 }, { "epoch": 1.77, "learning_rate": 1.8884518624380797e-05, "loss": 1.821, "step": 2267 }, { "epoch": 1.77, "learning_rate": 1.888335608240284e-05, "loss": 2.0857, "step": 2268 }, { "epoch": 1.77, "learning_rate": 1.888219297076812e-05, "loss": 1.7132, "step": 2269 }, { "epoch": 1.77, "learning_rate": 1.8881029289551224e-05, "loss": 1.7874, "step": 2270 }, { "epoch": 1.77, "learning_rate": 1.8879865038826767e-05, "loss": 1.9414, "step": 2271 }, { "epoch": 1.78, "learning_rate": 1.887870021866941e-05, "loss": 2.0577, "step": 2272 }, { "epoch": 1.78, "learning_rate": 1.8877534829153853e-05, "loss": 1.7371, "step": 2273 }, { "epoch": 1.78, "learning_rate": 1.887636887035482e-05, "loss": 1.7303, "step": 2274 }, { "epoch": 1.78, "learning_rate": 1.887520234234709e-05, "loss": 1.7982, "step": 2275 }, { "epoch": 1.78, "learning_rate": 1.887403524520546e-05, "loss": 1.7572, "step": 2276 }, { "epoch": 1.78, "learning_rate": 1.8872867579004773e-05, "loss": 1.8231, "step": 2277 }, { "epoch": 1.78, "learning_rate": 1.887169934381991e-05, "loss": 1.8197, "step": 2278 }, { "epoch": 1.78, "learning_rate": 1.8870530539725786e-05, "loss": 1.7655, "step": 2279 }, { "epoch": 1.78, "learning_rate": 1.8869361166797348e-05, "loss": 1.7062, "step": 2280 }, { "epoch": 1.78, "learning_rate": 1.886819122510959e-05, "loss": 1.9887, "step": 2281 }, { "epoch": 1.78, "learning_rate": 1.886702071473753e-05, "loss": 1.6784, "step": 2282 }, { "epoch": 1.78, "learning_rate": 1.8865849635756227e-05, "loss": 1.7593, "step": 2283 }, { "epoch": 1.79, "learning_rate": 1.8864677988240786e-05, "loss": 1.944, "step": 2284 }, { "epoch": 1.79, "learning_rate": 1.8863505772266332e-05, "loss": 1.7613, "step": 2285 }, { "epoch": 1.79, "learning_rate": 1.886233298790804e-05, "loss": 1.708, "step": 2286 }, { "epoch": 1.79, "learning_rate": 1.8861159635241117e-05, "loss": 1.7468, "step": 2287 }, { "epoch": 1.79, "learning_rate": 1.88599857143408e-05, "loss": 2.4422, "step": 2288 }, { "epoch": 1.79, "learning_rate": 1.8858811225282375e-05, "loss": 2.264, "step": 2289 }, { "epoch": 1.79, "learning_rate": 1.8857636168141155e-05, "loss": 1.5954, "step": 2290 }, { "epoch": 1.79, "learning_rate": 1.885646054299249e-05, "loss": 1.8587, "step": 2291 }, { "epoch": 1.79, "learning_rate": 1.8855284349911772e-05, "loss": 1.6867, "step": 2292 }, { "epoch": 1.79, "learning_rate": 1.885410758897442e-05, "loss": 1.669, "step": 2293 }, { "epoch": 1.79, "learning_rate": 1.8852930260255904e-05, "loss": 2.0667, "step": 2294 }, { "epoch": 1.79, "learning_rate": 1.8851752363831713e-05, "loss": 1.869, "step": 2295 }, { "epoch": 1.79, "learning_rate": 1.885057389977739e-05, "loss": 2.0958, "step": 2296 }, { "epoch": 1.8, "learning_rate": 1.88493948681685e-05, "loss": 1.7864, "step": 2297 }, { "epoch": 1.8, "learning_rate": 1.8848215269080644e-05, "loss": 1.693, "step": 2298 }, { "epoch": 1.8, "learning_rate": 1.8847035102589483e-05, "loss": 1.7569, "step": 2299 }, { "epoch": 1.8, "learning_rate": 1.8845854368770677e-05, "loss": 2.0551, "step": 2300 }, { "epoch": 1.8, "learning_rate": 1.884467306769995e-05, "loss": 1.8955, "step": 2301 }, { "epoch": 1.8, "learning_rate": 1.8843491199453063e-05, "loss": 1.6256, "step": 2302 }, { "epoch": 1.8, "learning_rate": 1.8842308764105795e-05, "loss": 2.0998, "step": 2303 }, { "epoch": 1.8, "learning_rate": 1.884112576173397e-05, "loss": 1.7851, "step": 2304 }, { "epoch": 1.8, "learning_rate": 1.8839942192413455e-05, "loss": 1.851, "step": 2305 }, { "epoch": 1.8, "learning_rate": 1.8838758056220147e-05, "loss": 2.0442, "step": 2306 }, { "epoch": 1.8, "learning_rate": 1.8837573353229978e-05, "loss": 1.9565, "step": 2307 }, { "epoch": 1.8, "learning_rate": 1.8836388083518918e-05, "loss": 1.6712, "step": 2308 }, { "epoch": 1.8, "learning_rate": 1.883520224716298e-05, "loss": 2.0106, "step": 2309 }, { "epoch": 1.81, "learning_rate": 1.8834015844238197e-05, "loss": 1.9254, "step": 2310 }, { "epoch": 1.81, "learning_rate": 1.883282887482066e-05, "loss": 1.983, "step": 2311 }, { "epoch": 1.81, "learning_rate": 1.883164133898648e-05, "loss": 1.8607, "step": 2312 }, { "epoch": 1.81, "learning_rate": 1.8830453236811805e-05, "loss": 1.7683, "step": 2313 }, { "epoch": 1.81, "learning_rate": 1.8829264568372827e-05, "loss": 2.0766, "step": 2314 }, { "epoch": 1.81, "learning_rate": 1.8828075333745776e-05, "loss": 1.8955, "step": 2315 }, { "epoch": 1.81, "learning_rate": 1.8826885533006907e-05, "loss": 2.1392, "step": 2316 }, { "epoch": 1.81, "learning_rate": 1.8825695166232514e-05, "loss": 2.1108, "step": 2317 }, { "epoch": 1.81, "learning_rate": 1.8824504233498943e-05, "loss": 1.7879, "step": 2318 }, { "epoch": 1.81, "learning_rate": 1.8823312734882555e-05, "loss": 1.7623, "step": 2319 }, { "epoch": 1.81, "learning_rate": 1.882212067045976e-05, "loss": 1.8401, "step": 2320 }, { "epoch": 1.81, "learning_rate": 1.8820928040306996e-05, "loss": 1.7774, "step": 2321 }, { "epoch": 1.81, "learning_rate": 1.8819734844500744e-05, "loss": 1.9443, "step": 2322 }, { "epoch": 1.82, "learning_rate": 1.8818541083117522e-05, "loss": 1.635, "step": 2323 }, { "epoch": 1.82, "learning_rate": 1.8817346756233884e-05, "loss": 1.7353, "step": 2324 }, { "epoch": 1.82, "learning_rate": 1.881615186392641e-05, "loss": 1.9891, "step": 2325 }, { "epoch": 1.82, "learning_rate": 1.8814956406271725e-05, "loss": 1.6829, "step": 2326 }, { "epoch": 1.82, "learning_rate": 1.8813760383346495e-05, "loss": 2.0035, "step": 2327 }, { "epoch": 1.82, "learning_rate": 1.8812563795227415e-05, "loss": 2.02, "step": 2328 }, { "epoch": 1.82, "learning_rate": 1.8811366641991212e-05, "loss": 1.9016, "step": 2329 }, { "epoch": 1.82, "learning_rate": 1.8810168923714665e-05, "loss": 2.4147, "step": 2330 }, { "epoch": 1.82, "learning_rate": 1.8808970640474568e-05, "loss": 1.6307, "step": 2331 }, { "epoch": 1.82, "learning_rate": 1.8807771792347772e-05, "loss": 1.8316, "step": 2332 }, { "epoch": 1.82, "learning_rate": 1.8806572379411147e-05, "loss": 1.8165, "step": 2333 }, { "epoch": 1.82, "learning_rate": 1.8805372401741613e-05, "loss": 1.4594, "step": 2334 }, { "epoch": 1.82, "learning_rate": 1.8804171859416118e-05, "loss": 1.7128, "step": 2335 }, { "epoch": 1.83, "learning_rate": 1.8802970752511647e-05, "loss": 2.1287, "step": 2336 }, { "epoch": 1.83, "learning_rate": 1.8801769081105223e-05, "loss": 2.033, "step": 2337 }, { "epoch": 1.83, "learning_rate": 1.8800566845273905e-05, "loss": 2.0083, "step": 2338 }, { "epoch": 1.83, "learning_rate": 1.879936404509479e-05, "loss": 1.7905, "step": 2339 }, { "epoch": 1.83, "learning_rate": 1.8798160680645007e-05, "loss": 1.6448, "step": 2340 }, { "epoch": 1.83, "learning_rate": 1.879695675200172e-05, "loss": 1.8249, "step": 2341 }, { "epoch": 1.83, "learning_rate": 1.879575225924214e-05, "loss": 1.9051, "step": 2342 }, { "epoch": 1.83, "learning_rate": 1.8794547202443507e-05, "loss": 2.0383, "step": 2343 }, { "epoch": 1.83, "learning_rate": 1.8793341581683086e-05, "loss": 1.6582, "step": 2344 }, { "epoch": 1.83, "learning_rate": 1.8792135397038195e-05, "loss": 2.1212, "step": 2345 }, { "epoch": 1.83, "learning_rate": 1.8790928648586185e-05, "loss": 1.9886, "step": 2346 }, { "epoch": 1.83, "learning_rate": 1.8789721336404442e-05, "loss": 1.817, "step": 2347 }, { "epoch": 1.84, "learning_rate": 1.8788513460570377e-05, "loss": 2.1077, "step": 2348 }, { "epoch": 1.84, "learning_rate": 1.8787305021161457e-05, "loss": 1.7482, "step": 2349 }, { "epoch": 1.84, "learning_rate": 1.8786096018255168e-05, "loss": 1.8817, "step": 2350 }, { "epoch": 1.84, "learning_rate": 1.8784886451929037e-05, "loss": 1.9196, "step": 2351 }, { "epoch": 1.84, "learning_rate": 1.878367632226064e-05, "loss": 1.7898, "step": 2352 }, { "epoch": 1.84, "learning_rate": 1.8782465629327563e-05, "loss": 1.747, "step": 2353 }, { "epoch": 1.84, "learning_rate": 1.8781254373207457e-05, "loss": 1.68, "step": 2354 }, { "epoch": 1.84, "learning_rate": 1.8780042553977987e-05, "loss": 1.6634, "step": 2355 }, { "epoch": 1.84, "learning_rate": 1.8778830171716868e-05, "loss": 2.0513, "step": 2356 }, { "epoch": 1.84, "learning_rate": 1.8777617226501842e-05, "loss": 1.9663, "step": 2357 }, { "epoch": 1.84, "learning_rate": 1.877640371841069e-05, "loss": 1.9739, "step": 2358 }, { "epoch": 1.84, "learning_rate": 1.877518964752123e-05, "loss": 1.8647, "step": 2359 }, { "epoch": 1.84, "learning_rate": 1.8773975013911322e-05, "loss": 1.961, "step": 2360 }, { "epoch": 1.85, "learning_rate": 1.8772759817658846e-05, "loss": 2.0372, "step": 2361 }, { "epoch": 1.85, "learning_rate": 1.8771544058841734e-05, "loss": 1.6643, "step": 2362 }, { "epoch": 1.85, "learning_rate": 1.877032773753795e-05, "loss": 2.2108, "step": 2363 }, { "epoch": 1.85, "learning_rate": 1.8769110853825488e-05, "loss": 2.0283, "step": 2364 }, { "epoch": 1.85, "learning_rate": 1.8767893407782384e-05, "loss": 1.9922, "step": 2365 }, { "epoch": 1.85, "learning_rate": 1.8766675399486704e-05, "loss": 1.8216, "step": 2366 }, { "epoch": 1.85, "learning_rate": 1.8765456829016565e-05, "loss": 2.1175, "step": 2367 }, { "epoch": 1.85, "learning_rate": 1.8764237696450097e-05, "loss": 1.5373, "step": 2368 }, { "epoch": 1.85, "learning_rate": 1.8763018001865484e-05, "loss": 2.0411, "step": 2369 }, { "epoch": 1.85, "learning_rate": 1.8761797745340945e-05, "loss": 1.8975, "step": 2370 }, { "epoch": 1.85, "learning_rate": 1.8760576926954723e-05, "loss": 1.8559, "step": 2371 }, { "epoch": 1.85, "learning_rate": 1.875935554678511e-05, "loss": 1.7577, "step": 2372 }, { "epoch": 1.85, "learning_rate": 1.8758133604910425e-05, "loss": 1.9766, "step": 2373 }, { "epoch": 1.86, "learning_rate": 1.875691110140903e-05, "loss": 1.7644, "step": 2374 }, { "epoch": 1.86, "learning_rate": 1.8755688036359314e-05, "loss": 1.7772, "step": 2375 }, { "epoch": 1.86, "learning_rate": 1.8754464409839713e-05, "loss": 1.9634, "step": 2376 }, { "epoch": 1.86, "learning_rate": 1.875324022192869e-05, "loss": 1.8831, "step": 2377 }, { "epoch": 1.86, "learning_rate": 1.8752015472704755e-05, "loss": 1.9478, "step": 2378 }, { "epoch": 1.86, "learning_rate": 1.8750790162246436e-05, "loss": 2.0573, "step": 2379 }, { "epoch": 1.86, "learning_rate": 1.8749564290632318e-05, "loss": 1.9028, "step": 2380 }, { "epoch": 1.86, "learning_rate": 1.8748337857941003e-05, "loss": 2.003, "step": 2381 }, { "epoch": 1.86, "learning_rate": 1.8747110864251138e-05, "loss": 1.7214, "step": 2382 }, { "epoch": 1.86, "learning_rate": 1.874588330964141e-05, "loss": 1.7798, "step": 2383 }, { "epoch": 1.86, "learning_rate": 1.874465519419054e-05, "loss": 1.9454, "step": 2384 }, { "epoch": 1.86, "learning_rate": 1.8743426517977278e-05, "loss": 1.7946, "step": 2385 }, { "epoch": 1.86, "learning_rate": 1.8742197281080413e-05, "loss": 2.1107, "step": 2386 }, { "epoch": 1.87, "learning_rate": 1.8740967483578772e-05, "loss": 1.9517, "step": 2387 }, { "epoch": 1.87, "learning_rate": 1.8739737125551226e-05, "loss": 1.8514, "step": 2388 }, { "epoch": 1.87, "learning_rate": 1.873850620707666e-05, "loss": 1.9152, "step": 2389 }, { "epoch": 1.87, "learning_rate": 1.8737274728234015e-05, "loss": 1.6937, "step": 2390 }, { "epoch": 1.87, "learning_rate": 1.873604268910226e-05, "loss": 1.9582, "step": 2391 }, { "epoch": 1.87, "learning_rate": 1.8734810089760403e-05, "loss": 1.8585, "step": 2392 }, { "epoch": 1.87, "learning_rate": 1.8733576930287484e-05, "loss": 1.7938, "step": 2393 }, { "epoch": 1.87, "learning_rate": 1.873234321076258e-05, "loss": 1.6897, "step": 2394 }, { "epoch": 1.87, "learning_rate": 1.8731108931264813e-05, "loss": 1.8322, "step": 2395 }, { "epoch": 1.87, "learning_rate": 1.872987409187332e-05, "loss": 1.8919, "step": 2396 }, { "epoch": 1.87, "learning_rate": 1.8728638692667298e-05, "loss": 1.6724, "step": 2397 }, { "epoch": 1.87, "learning_rate": 1.8727402733725958e-05, "loss": 1.9151, "step": 2398 }, { "epoch": 1.87, "learning_rate": 1.8726166215128568e-05, "loss": 1.978, "step": 2399 }, { "epoch": 1.88, "learning_rate": 1.8724929136954413e-05, "loss": 1.8512, "step": 2400 }, { "epoch": 1.88, "learning_rate": 1.872369149928283e-05, "loss": 2.0064, "step": 2401 }, { "epoch": 1.88, "learning_rate": 1.8722453302193174e-05, "loss": 1.8605, "step": 2402 }, { "epoch": 1.88, "learning_rate": 1.8721214545764854e-05, "loss": 1.6097, "step": 2403 }, { "epoch": 1.88, "learning_rate": 1.871997523007731e-05, "loss": 2.1286, "step": 2404 }, { "epoch": 1.88, "learning_rate": 1.8718735355210002e-05, "loss": 1.9801, "step": 2405 }, { "epoch": 1.88, "learning_rate": 1.871749492124245e-05, "loss": 1.5991, "step": 2406 }, { "epoch": 1.88, "learning_rate": 1.871625392825419e-05, "loss": 1.892, "step": 2407 }, { "epoch": 1.88, "learning_rate": 1.871501237632481e-05, "loss": 1.9874, "step": 2408 }, { "epoch": 1.88, "learning_rate": 1.8713770265533928e-05, "loss": 1.8164, "step": 2409 }, { "epoch": 1.88, "learning_rate": 1.8712527595961182e-05, "loss": 1.8465, "step": 2410 }, { "epoch": 1.88, "learning_rate": 1.8711284367686273e-05, "loss": 1.8112, "step": 2411 }, { "epoch": 1.89, "learning_rate": 1.871004058078892e-05, "loss": 1.8683, "step": 2412 }, { "epoch": 1.89, "learning_rate": 1.8708796235348882e-05, "loss": 1.5511, "step": 2413 }, { "epoch": 1.89, "learning_rate": 1.870755133144596e-05, "loss": 2.1897, "step": 2414 }, { "epoch": 1.89, "learning_rate": 1.8706305869159977e-05, "loss": 1.9901, "step": 2415 }, { "epoch": 1.89, "learning_rate": 1.8705059848570796e-05, "loss": 1.9271, "step": 2416 }, { "epoch": 1.89, "learning_rate": 1.8703813269758334e-05, "loss": 1.9429, "step": 2417 }, { "epoch": 1.89, "learning_rate": 1.870256613280252e-05, "loss": 1.7205, "step": 2418 }, { "epoch": 1.89, "learning_rate": 1.8701318437783333e-05, "loss": 1.9383, "step": 2419 }, { "epoch": 1.89, "learning_rate": 1.8700070184780775e-05, "loss": 1.866, "step": 2420 }, { "epoch": 1.89, "learning_rate": 1.8698821373874904e-05, "loss": 2.0276, "step": 2421 }, { "epoch": 1.89, "learning_rate": 1.869757200514579e-05, "loss": 1.6449, "step": 2422 }, { "epoch": 1.89, "learning_rate": 1.8696322078673558e-05, "loss": 1.6828, "step": 2423 }, { "epoch": 1.89, "learning_rate": 1.869507159453836e-05, "loss": 1.906, "step": 2424 }, { "epoch": 1.9, "learning_rate": 1.869382055282038e-05, "loss": 1.8803, "step": 2425 }, { "epoch": 1.9, "learning_rate": 1.8692568953599847e-05, "loss": 1.7399, "step": 2426 }, { "epoch": 1.9, "learning_rate": 1.869131679695702e-05, "loss": 1.8283, "step": 2427 }, { "epoch": 1.9, "learning_rate": 1.86900640829722e-05, "loss": 1.632, "step": 2428 }, { "epoch": 1.9, "learning_rate": 1.8688810811725707e-05, "loss": 2.0673, "step": 2429 }, { "epoch": 1.9, "learning_rate": 1.868755698329792e-05, "loss": 1.7393, "step": 2430 }, { "epoch": 1.9, "learning_rate": 1.868630259776924e-05, "loss": 2.0201, "step": 2431 }, { "epoch": 1.9, "learning_rate": 1.8685047655220107e-05, "loss": 1.8299, "step": 2432 }, { "epoch": 1.9, "learning_rate": 1.8683792155730992e-05, "loss": 2.2117, "step": 2433 }, { "epoch": 1.9, "learning_rate": 1.86825360993824e-05, "loss": 2.0498, "step": 2434 }, { "epoch": 1.9, "learning_rate": 1.8681279486254892e-05, "loss": 1.7976, "step": 2435 }, { "epoch": 1.9, "learning_rate": 1.8680022316429043e-05, "loss": 1.8501, "step": 2436 }, { "epoch": 1.9, "learning_rate": 1.8678764589985468e-05, "loss": 1.9201, "step": 2437 }, { "epoch": 1.91, "learning_rate": 1.8677506307004823e-05, "loss": 1.863, "step": 2438 }, { "epoch": 1.91, "learning_rate": 1.8676247467567795e-05, "loss": 1.7947, "step": 2439 }, { "epoch": 1.91, "learning_rate": 1.867498807175511e-05, "loss": 1.9406, "step": 2440 }, { "epoch": 1.91, "learning_rate": 1.867372811964753e-05, "loss": 1.7839, "step": 2441 }, { "epoch": 1.91, "learning_rate": 1.8672467611325848e-05, "loss": 1.7921, "step": 2442 }, { "epoch": 1.91, "learning_rate": 1.8671206546870896e-05, "loss": 1.9711, "step": 2443 }, { "epoch": 1.91, "learning_rate": 1.8669944926363545e-05, "loss": 1.7277, "step": 2444 }, { "epoch": 1.91, "learning_rate": 1.8668682749884696e-05, "loss": 1.7092, "step": 2445 }, { "epoch": 1.91, "learning_rate": 1.8667420017515284e-05, "loss": 2.1389, "step": 2446 }, { "epoch": 1.91, "learning_rate": 1.866615672933629e-05, "loss": 1.9438, "step": 2447 }, { "epoch": 1.91, "learning_rate": 1.866489288542872e-05, "loss": 2.0866, "step": 2448 }, { "epoch": 1.91, "learning_rate": 1.866362848587362e-05, "loss": 1.8789, "step": 2449 }, { "epoch": 1.91, "learning_rate": 1.8662363530752076e-05, "loss": 2.1075, "step": 2450 }, { "epoch": 1.92, "learning_rate": 1.8661098020145196e-05, "loss": 1.9244, "step": 2451 }, { "epoch": 1.92, "learning_rate": 1.8659831954134136e-05, "loss": 1.9767, "step": 2452 }, { "epoch": 1.92, "learning_rate": 1.8658565332800087e-05, "loss": 1.698, "step": 2453 }, { "epoch": 1.92, "learning_rate": 1.8657298156224273e-05, "loss": 1.6985, "step": 2454 }, { "epoch": 1.92, "learning_rate": 1.865603042448795e-05, "loss": 1.8861, "step": 2455 }, { "epoch": 1.92, "learning_rate": 1.8654762137672414e-05, "loss": 1.6054, "step": 2456 }, { "epoch": 1.92, "learning_rate": 1.8653493295858997e-05, "loss": 1.687, "step": 2457 }, { "epoch": 1.92, "learning_rate": 1.8652223899129062e-05, "loss": 2.011, "step": 2458 }, { "epoch": 1.92, "learning_rate": 1.865095394756401e-05, "loss": 1.7811, "step": 2459 }, { "epoch": 1.92, "learning_rate": 1.864968344124529e-05, "loss": 1.8783, "step": 2460 }, { "epoch": 1.92, "learning_rate": 1.8648412380254356e-05, "loss": 1.754, "step": 2461 }, { "epoch": 1.92, "learning_rate": 1.8647140764672735e-05, "loss": 1.892, "step": 2462 }, { "epoch": 1.92, "learning_rate": 1.8645868594581956e-05, "loss": 1.6746, "step": 2463 }, { "epoch": 1.93, "learning_rate": 1.8644595870063608e-05, "loss": 1.702, "step": 2464 }, { "epoch": 1.93, "learning_rate": 1.86433225911993e-05, "loss": 1.748, "step": 2465 }, { "epoch": 1.93, "learning_rate": 1.8642048758070686e-05, "loss": 1.9034, "step": 2466 }, { "epoch": 1.93, "learning_rate": 1.8640774370759455e-05, "loss": 1.8064, "step": 2467 }, { "epoch": 1.93, "learning_rate": 1.8639499429347324e-05, "loss": 2.2017, "step": 2468 }, { "epoch": 1.93, "learning_rate": 1.863822393391605e-05, "loss": 1.9733, "step": 2469 }, { "epoch": 1.93, "learning_rate": 1.8636947884547433e-05, "loss": 1.8568, "step": 2470 }, { "epoch": 1.93, "learning_rate": 1.863567128132329e-05, "loss": 1.7542, "step": 2471 }, { "epoch": 1.93, "learning_rate": 1.8634394124325493e-05, "loss": 1.9379, "step": 2472 }, { "epoch": 1.93, "learning_rate": 1.863311641363594e-05, "loss": 2.0424, "step": 2473 }, { "epoch": 1.93, "learning_rate": 1.863183814933656e-05, "loss": 1.5208, "step": 2474 }, { "epoch": 1.93, "learning_rate": 1.8630559331509338e-05, "loss": 1.67, "step": 2475 }, { "epoch": 1.94, "learning_rate": 1.8629279960236262e-05, "loss": 1.9389, "step": 2476 }, { "epoch": 1.94, "learning_rate": 1.8628000035599384e-05, "loss": 2.1601, "step": 2477 }, { "epoch": 1.94, "learning_rate": 1.8626719557680778e-05, "loss": 1.968, "step": 2478 }, { "epoch": 1.94, "learning_rate": 1.8625438526562562e-05, "loss": 1.7852, "step": 2479 }, { "epoch": 1.94, "learning_rate": 1.862415694232687e-05, "loss": 1.9172, "step": 2480 }, { "epoch": 1.94, "learning_rate": 1.8622874805055898e-05, "loss": 1.8631, "step": 2481 }, { "epoch": 1.94, "learning_rate": 1.8621592114831863e-05, "loss": 1.9185, "step": 2482 }, { "epoch": 1.94, "learning_rate": 1.8620308871737013e-05, "loss": 1.8326, "step": 2483 }, { "epoch": 1.94, "learning_rate": 1.8619025075853644e-05, "loss": 1.8182, "step": 2484 }, { "epoch": 1.94, "learning_rate": 1.8617740727264077e-05, "loss": 1.7615, "step": 2485 }, { "epoch": 1.94, "learning_rate": 1.861645582605067e-05, "loss": 1.8571, "step": 2486 }, { "epoch": 1.94, "learning_rate": 1.861517037229583e-05, "loss": 2.1205, "step": 2487 }, { "epoch": 1.94, "learning_rate": 1.861388436608198e-05, "loss": 1.6981, "step": 2488 }, { "epoch": 1.95, "learning_rate": 1.8612597807491583e-05, "loss": 1.8424, "step": 2489 }, { "epoch": 1.95, "learning_rate": 1.861131069660715e-05, "loss": 1.8968, "step": 2490 }, { "epoch": 1.95, "learning_rate": 1.8610023033511215e-05, "loss": 1.8642, "step": 2491 }, { "epoch": 1.95, "learning_rate": 1.860873481828635e-05, "loss": 1.8868, "step": 2492 }, { "epoch": 1.95, "learning_rate": 1.8607446051015165e-05, "loss": 1.9326, "step": 2493 }, { "epoch": 1.95, "learning_rate": 1.8606156731780306e-05, "loss": 2.0344, "step": 2494 }, { "epoch": 1.95, "learning_rate": 1.8604866860664444e-05, "loss": 1.9389, "step": 2495 }, { "epoch": 1.95, "learning_rate": 1.8603576437750307e-05, "loss": 2.0033, "step": 2496 }, { "epoch": 1.95, "learning_rate": 1.860228546312063e-05, "loss": 1.8122, "step": 2497 }, { "epoch": 1.95, "learning_rate": 1.860099393685821e-05, "loss": 2.0111, "step": 2498 }, { "epoch": 1.95, "learning_rate": 1.8599701859045864e-05, "loss": 1.8628, "step": 2499 }, { "epoch": 1.95, "learning_rate": 1.8598409229766445e-05, "loss": 2.0069, "step": 2500 }, { "epoch": 1.95, "learning_rate": 1.859711604910285e-05, "loss": 1.9018, "step": 2501 }, { "epoch": 1.96, "learning_rate": 1.8595822317138005e-05, "loss": 1.8907, "step": 2502 }, { "epoch": 1.96, "learning_rate": 1.8594528033954864e-05, "loss": 1.8255, "step": 2503 }, { "epoch": 1.96, "learning_rate": 1.8593233199636437e-05, "loss": 1.8371, "step": 2504 }, { "epoch": 1.96, "learning_rate": 1.8591937814265747e-05, "loss": 1.67, "step": 2505 }, { "epoch": 1.96, "learning_rate": 1.859064187792587e-05, "loss": 1.8528, "step": 2506 }, { "epoch": 1.96, "learning_rate": 1.85893453906999e-05, "loss": 2.0871, "step": 2507 }, { "epoch": 1.96, "learning_rate": 1.8588048352670987e-05, "loss": 1.8968, "step": 2508 }, { "epoch": 1.96, "learning_rate": 1.85867507639223e-05, "loss": 2.0879, "step": 2509 }, { "epoch": 1.96, "learning_rate": 1.858545262453705e-05, "loss": 1.8681, "step": 2510 }, { "epoch": 1.96, "learning_rate": 1.8584153934598477e-05, "loss": 1.6556, "step": 2511 }, { "epoch": 1.96, "learning_rate": 1.8582854694189866e-05, "loss": 1.8945, "step": 2512 }, { "epoch": 1.96, "learning_rate": 1.858155490339453e-05, "loss": 2.1159, "step": 2513 }, { "epoch": 1.96, "learning_rate": 1.8580254562295822e-05, "loss": 1.898, "step": 2514 }, { "epoch": 1.97, "learning_rate": 1.8578953670977126e-05, "loss": 1.7436, "step": 2515 }, { "epoch": 1.97, "learning_rate": 1.8577652229521868e-05, "loss": 2.0593, "step": 2516 }, { "epoch": 1.97, "learning_rate": 1.8576350238013497e-05, "loss": 1.61, "step": 2517 }, { "epoch": 1.97, "learning_rate": 1.857504769653551e-05, "loss": 2.0102, "step": 2518 }, { "epoch": 1.97, "learning_rate": 1.8573744605171432e-05, "loss": 1.7788, "step": 2519 }, { "epoch": 1.97, "learning_rate": 1.857244096400483e-05, "loss": 1.8007, "step": 2520 }, { "epoch": 1.97, "learning_rate": 1.8571136773119295e-05, "loss": 2.0077, "step": 2521 }, { "epoch": 1.97, "learning_rate": 1.8569832032598463e-05, "loss": 1.8692, "step": 2522 }, { "epoch": 1.97, "learning_rate": 1.8568526742526007e-05, "loss": 1.694, "step": 2523 }, { "epoch": 1.97, "learning_rate": 1.856722090298562e-05, "loss": 1.8519, "step": 2524 }, { "epoch": 1.97, "learning_rate": 1.856591451406105e-05, "loss": 1.7955, "step": 2525 }, { "epoch": 1.97, "learning_rate": 1.8564607575836065e-05, "loss": 1.8706, "step": 2526 }, { "epoch": 1.97, "learning_rate": 1.8563300088394477e-05, "loss": 1.8368, "step": 2527 }, { "epoch": 1.98, "learning_rate": 1.856199205182013e-05, "loss": 2.2377, "step": 2528 }, { "epoch": 1.98, "learning_rate": 1.8560683466196905e-05, "loss": 1.5564, "step": 2529 }, { "epoch": 1.98, "learning_rate": 1.8559374331608712e-05, "loss": 2.1143, "step": 2530 }, { "epoch": 1.98, "learning_rate": 1.8558064648139504e-05, "loss": 1.8894, "step": 2531 }, { "epoch": 1.98, "learning_rate": 1.855675441587327e-05, "loss": 1.8805, "step": 2532 }, { "epoch": 1.98, "learning_rate": 1.8555443634894023e-05, "loss": 1.7826, "step": 2533 }, { "epoch": 1.98, "learning_rate": 1.8554132305285823e-05, "loss": 1.7141, "step": 2534 }, { "epoch": 1.98, "learning_rate": 1.855282042713276e-05, "loss": 1.7963, "step": 2535 }, { "epoch": 1.98, "learning_rate": 1.8551508000518962e-05, "loss": 1.9325, "step": 2536 }, { "epoch": 1.98, "learning_rate": 1.855019502552858e-05, "loss": 1.9631, "step": 2537 }, { "epoch": 1.98, "learning_rate": 1.8548881502245827e-05, "loss": 1.9936, "step": 2538 }, { "epoch": 1.98, "learning_rate": 1.854756743075492e-05, "loss": 1.9707, "step": 2539 }, { "epoch": 1.99, "learning_rate": 1.854625281114013e-05, "loss": 1.8151, "step": 2540 }, { "epoch": 1.99, "learning_rate": 1.8544937643485764e-05, "loss": 2.0239, "step": 2541 }, { "epoch": 1.99, "learning_rate": 1.8543621927876154e-05, "loss": 1.9636, "step": 2542 }, { "epoch": 1.99, "learning_rate": 1.854230566439567e-05, "loss": 2.0047, "step": 2543 }, { "epoch": 1.99, "learning_rate": 1.854098885312872e-05, "loss": 1.7737, "step": 2544 }, { "epoch": 1.99, "learning_rate": 1.8539671494159752e-05, "loss": 2.021, "step": 2545 }, { "epoch": 1.99, "learning_rate": 1.8538353587573237e-05, "loss": 1.7273, "step": 2546 }, { "epoch": 1.99, "learning_rate": 1.8537035133453688e-05, "loss": 1.7098, "step": 2547 }, { "epoch": 1.99, "learning_rate": 1.8535716131885656e-05, "loss": 1.7696, "step": 2548 }, { "epoch": 1.99, "learning_rate": 1.853439658295372e-05, "loss": 1.8881, "step": 2549 }, { "epoch": 1.99, "learning_rate": 1.8533076486742504e-05, "loss": 1.9583, "step": 2550 }, { "epoch": 1.99, "learning_rate": 1.8531755843336653e-05, "loss": 1.9402, "step": 2551 }, { "epoch": 1.99, "learning_rate": 1.853043465282086e-05, "loss": 1.809, "step": 2552 }, { "epoch": 2.0, "learning_rate": 1.8529112915279843e-05, "loss": 1.671, "step": 2553 }, { "epoch": 2.0, "learning_rate": 1.8527790630798367e-05, "loss": 1.8365, "step": 2554 }, { "epoch": 2.0, "learning_rate": 1.852646779946122e-05, "loss": 1.684, "step": 2555 }, { "epoch": 2.0, "learning_rate": 1.8525144421353233e-05, "loss": 2.1648, "step": 2556 }, { "epoch": 2.0, "learning_rate": 1.852382049655927e-05, "loss": 2.0089, "step": 2557 }, { "epoch": 2.0, "learning_rate": 1.8522496025164227e-05, "loss": 1.6549, "step": 2558 }, { "epoch": 2.0, "learning_rate": 1.8521171007253037e-05, "loss": 1.6786, "step": 2559 }, { "epoch": 2.0, "learning_rate": 1.8519845442910673e-05, "loss": 1.4183, "step": 2560 }, { "epoch": 2.0, "learning_rate": 1.8518519332222132e-05, "loss": 1.4676, "step": 2561 }, { "epoch": 2.0, "learning_rate": 1.851719267527246e-05, "loss": 1.5418, "step": 2562 }, { "epoch": 2.0, "learning_rate": 1.851586547214672e-05, "loss": 1.6907, "step": 2563 }, { "epoch": 2.0, "learning_rate": 1.8514537722930032e-05, "loss": 1.3141, "step": 2564 }, { "epoch": 2.0, "learning_rate": 1.8513209427707534e-05, "loss": 1.5468, "step": 2565 }, { "epoch": 2.01, "learning_rate": 1.8511880586564407e-05, "loss": 1.5701, "step": 2566 }, { "epoch": 2.01, "learning_rate": 1.851055119958586e-05, "loss": 1.5511, "step": 2567 }, { "epoch": 2.01, "learning_rate": 1.8509221266857147e-05, "loss": 1.2779, "step": 2568 }, { "epoch": 2.01, "learning_rate": 1.8507890788463552e-05, "loss": 1.4152, "step": 2569 }, { "epoch": 2.01, "learning_rate": 1.8506559764490387e-05, "loss": 1.5257, "step": 2570 }, { "epoch": 2.01, "learning_rate": 1.850522819502301e-05, "loss": 1.6004, "step": 2571 }, { "epoch": 2.01, "learning_rate": 1.8503896080146813e-05, "loss": 1.446, "step": 2572 }, { "epoch": 2.01, "learning_rate": 1.850256341994721e-05, "loss": 1.6629, "step": 2573 }, { "epoch": 2.01, "learning_rate": 1.850123021450967e-05, "loss": 1.3325, "step": 2574 }, { "epoch": 2.01, "learning_rate": 1.8499896463919682e-05, "loss": 1.4879, "step": 2575 }, { "epoch": 2.01, "learning_rate": 1.8498562168262775e-05, "loss": 1.2501, "step": 2576 }, { "epoch": 2.01, "learning_rate": 1.849722732762451e-05, "loss": 1.3837, "step": 2577 }, { "epoch": 2.01, "learning_rate": 1.849589194209049e-05, "loss": 1.1103, "step": 2578 }, { "epoch": 2.02, "learning_rate": 1.8494556011746345e-05, "loss": 1.6067, "step": 2579 }, { "epoch": 2.02, "learning_rate": 1.849321953667774e-05, "loss": 1.5063, "step": 2580 }, { "epoch": 2.02, "learning_rate": 1.8491882516970387e-05, "loss": 1.6023, "step": 2581 }, { "epoch": 2.02, "learning_rate": 1.849054495271002e-05, "loss": 1.4011, "step": 2582 }, { "epoch": 2.02, "learning_rate": 1.848920684398241e-05, "loss": 1.1725, "step": 2583 }, { "epoch": 2.02, "learning_rate": 1.8487868190873364e-05, "loss": 1.3998, "step": 2584 }, { "epoch": 2.02, "learning_rate": 1.848652899346873e-05, "loss": 1.4578, "step": 2585 }, { "epoch": 2.02, "learning_rate": 1.848518925185438e-05, "loss": 1.4955, "step": 2586 }, { "epoch": 2.02, "learning_rate": 1.8483848966116234e-05, "loss": 1.1744, "step": 2587 }, { "epoch": 2.02, "learning_rate": 1.8482508136340236e-05, "loss": 1.4395, "step": 2588 }, { "epoch": 2.02, "learning_rate": 1.8481166762612364e-05, "loss": 1.438, "step": 2589 }, { "epoch": 2.02, "learning_rate": 1.847982484501864e-05, "loss": 1.1219, "step": 2590 }, { "epoch": 2.03, "learning_rate": 1.8478482383645115e-05, "loss": 1.1971, "step": 2591 }, { "epoch": 2.03, "learning_rate": 1.847713937857788e-05, "loss": 1.3304, "step": 2592 }, { "epoch": 2.03, "learning_rate": 1.847579582990305e-05, "loss": 1.2524, "step": 2593 }, { "epoch": 2.03, "learning_rate": 1.8474451737706784e-05, "loss": 1.8908, "step": 2594 }, { "epoch": 2.03, "learning_rate": 1.847310710207528e-05, "loss": 1.3297, "step": 2595 }, { "epoch": 2.03, "learning_rate": 1.8471761923094752e-05, "loss": 1.24, "step": 2596 }, { "epoch": 2.03, "learning_rate": 1.8470416200851474e-05, "loss": 1.298, "step": 2597 }, { "epoch": 2.03, "learning_rate": 1.8469069935431732e-05, "loss": 1.1938, "step": 2598 }, { "epoch": 2.03, "learning_rate": 1.8467723126921865e-05, "loss": 1.4839, "step": 2599 }, { "epoch": 2.03, "learning_rate": 1.846637577540824e-05, "loss": 1.4332, "step": 2600 }, { "epoch": 2.03, "learning_rate": 1.8465027880977248e-05, "loss": 1.3535, "step": 2601 }, { "epoch": 2.03, "learning_rate": 1.846367944371533e-05, "loss": 1.5541, "step": 2602 }, { "epoch": 2.03, "learning_rate": 1.8462330463708956e-05, "loss": 1.6077, "step": 2603 }, { "epoch": 2.04, "learning_rate": 1.8460980941044636e-05, "loss": 1.606, "step": 2604 }, { "epoch": 2.04, "learning_rate": 1.84596308758089e-05, "loss": 1.319, "step": 2605 }, { "epoch": 2.04, "learning_rate": 1.845828026808833e-05, "loss": 1.4476, "step": 2606 }, { "epoch": 2.04, "learning_rate": 1.8456929117969533e-05, "loss": 1.2948, "step": 2607 }, { "epoch": 2.04, "learning_rate": 1.845557742553915e-05, "loss": 1.4613, "step": 2608 }, { "epoch": 2.04, "learning_rate": 1.845422519088387e-05, "loss": 1.4956, "step": 2609 }, { "epoch": 2.04, "learning_rate": 1.84528724140904e-05, "loss": 1.4191, "step": 2610 }, { "epoch": 2.04, "learning_rate": 1.8451519095245483e-05, "loss": 1.4716, "step": 2611 }, { "epoch": 2.04, "learning_rate": 1.8450165234435914e-05, "loss": 1.3944, "step": 2612 }, { "epoch": 2.04, "learning_rate": 1.84488108317485e-05, "loss": 1.4317, "step": 2613 }, { "epoch": 2.04, "learning_rate": 1.8447455887270105e-05, "loss": 1.0277, "step": 2614 }, { "epoch": 2.04, "learning_rate": 1.844610040108761e-05, "loss": 1.6055, "step": 2615 }, { "epoch": 2.04, "learning_rate": 1.844474437328794e-05, "loss": 1.4721, "step": 2616 }, { "epoch": 2.05, "learning_rate": 1.8443387803958044e-05, "loss": 1.3714, "step": 2617 }, { "epoch": 2.05, "learning_rate": 1.8442030693184923e-05, "loss": 1.4621, "step": 2618 }, { "epoch": 2.05, "learning_rate": 1.84406730410556e-05, "loss": 1.3219, "step": 2619 }, { "epoch": 2.05, "learning_rate": 1.843931484765714e-05, "loss": 1.2571, "step": 2620 }, { "epoch": 2.05, "learning_rate": 1.843795611307663e-05, "loss": 1.3297, "step": 2621 }, { "epoch": 2.05, "learning_rate": 1.8436596837401208e-05, "loss": 1.2873, "step": 2622 }, { "epoch": 2.05, "learning_rate": 1.843523702071804e-05, "loss": 1.4428, "step": 2623 }, { "epoch": 2.05, "learning_rate": 1.8433876663114323e-05, "loss": 1.6376, "step": 2624 }, { "epoch": 2.05, "learning_rate": 1.843251576467729e-05, "loss": 1.4401, "step": 2625 }, { "epoch": 2.05, "learning_rate": 1.8431154325494214e-05, "loss": 1.4332, "step": 2626 }, { "epoch": 2.05, "learning_rate": 1.8429792345652396e-05, "loss": 1.4464, "step": 2627 }, { "epoch": 2.05, "learning_rate": 1.842842982523918e-05, "loss": 1.4484, "step": 2628 }, { "epoch": 2.05, "learning_rate": 1.8427066764341934e-05, "loss": 1.4263, "step": 2629 }, { "epoch": 2.06, "learning_rate": 1.8425703163048066e-05, "loss": 1.5681, "step": 2630 }, { "epoch": 2.06, "learning_rate": 1.8424339021445023e-05, "loss": 1.4315, "step": 2631 }, { "epoch": 2.06, "learning_rate": 1.842297433962028e-05, "loss": 1.0051, "step": 2632 }, { "epoch": 2.06, "learning_rate": 1.842160911766135e-05, "loss": 1.5092, "step": 2633 }, { "epoch": 2.06, "learning_rate": 1.8420243355655776e-05, "loss": 1.4591, "step": 2634 }, { "epoch": 2.06, "learning_rate": 1.8418877053691143e-05, "loss": 1.1865, "step": 2635 }, { "epoch": 2.06, "learning_rate": 1.8417510211855065e-05, "loss": 1.4215, "step": 2636 }, { "epoch": 2.06, "learning_rate": 1.8416142830235193e-05, "loss": 1.4984, "step": 2637 }, { "epoch": 2.06, "learning_rate": 1.841477490891921e-05, "loss": 1.2663, "step": 2638 }, { "epoch": 2.06, "learning_rate": 1.8413406447994844e-05, "loss": 1.3654, "step": 2639 }, { "epoch": 2.06, "learning_rate": 1.841203744754984e-05, "loss": 1.1319, "step": 2640 }, { "epoch": 2.06, "learning_rate": 1.841066790767199e-05, "loss": 1.3136, "step": 2641 }, { "epoch": 2.06, "learning_rate": 1.840929782844912e-05, "loss": 1.2142, "step": 2642 }, { "epoch": 2.07, "learning_rate": 1.8407927209969084e-05, "loss": 1.7206, "step": 2643 }, { "epoch": 2.07, "learning_rate": 1.840655605231978e-05, "loss": 1.3218, "step": 2644 }, { "epoch": 2.07, "learning_rate": 1.8405184355589128e-05, "loss": 1.6143, "step": 2645 }, { "epoch": 2.07, "learning_rate": 1.8403812119865097e-05, "loss": 1.0901, "step": 2646 }, { "epoch": 2.07, "learning_rate": 1.8402439345235676e-05, "loss": 1.5446, "step": 2647 }, { "epoch": 2.07, "learning_rate": 1.8401066031788902e-05, "loss": 1.3592, "step": 2648 }, { "epoch": 2.07, "learning_rate": 1.8399692179612843e-05, "loss": 1.2932, "step": 2649 }, { "epoch": 2.07, "learning_rate": 1.839831778879559e-05, "loss": 1.4749, "step": 2650 }, { "epoch": 2.07, "learning_rate": 1.839694285942528e-05, "loss": 1.3146, "step": 2651 }, { "epoch": 2.07, "learning_rate": 1.839556739159009e-05, "loss": 1.4282, "step": 2652 }, { "epoch": 2.07, "learning_rate": 1.839419138537822e-05, "loss": 1.3887, "step": 2653 }, { "epoch": 2.07, "learning_rate": 1.8392814840877897e-05, "loss": 1.3286, "step": 2654 }, { "epoch": 2.08, "learning_rate": 1.8391437758177408e-05, "loss": 1.3287, "step": 2655 }, { "epoch": 2.08, "learning_rate": 1.8390060137365057e-05, "loss": 1.4555, "step": 2656 }, { "epoch": 2.08, "learning_rate": 1.8388681978529185e-05, "loss": 1.2621, "step": 2657 }, { "epoch": 2.08, "learning_rate": 1.8387303281758165e-05, "loss": 1.5357, "step": 2658 }, { "epoch": 2.08, "learning_rate": 1.8385924047140408e-05, "loss": 1.5832, "step": 2659 }, { "epoch": 2.08, "learning_rate": 1.8384544274764363e-05, "loss": 1.3068, "step": 2660 }, { "epoch": 2.08, "learning_rate": 1.8383163964718508e-05, "loss": 1.6974, "step": 2661 }, { "epoch": 2.08, "learning_rate": 1.8381783117091355e-05, "loss": 1.4434, "step": 2662 }, { "epoch": 2.08, "learning_rate": 1.8380401731971457e-05, "loss": 1.4474, "step": 2663 }, { "epoch": 2.08, "learning_rate": 1.8379019809447395e-05, "loss": 1.3544, "step": 2664 }, { "epoch": 2.08, "learning_rate": 1.8377637349607783e-05, "loss": 1.4366, "step": 2665 }, { "epoch": 2.08, "learning_rate": 1.8376254352541282e-05, "loss": 1.585, "step": 2666 }, { "epoch": 2.08, "learning_rate": 1.837487081833657e-05, "loss": 1.1993, "step": 2667 }, { "epoch": 2.09, "learning_rate": 1.837348674708237e-05, "loss": 1.4452, "step": 2668 }, { "epoch": 2.09, "learning_rate": 1.837210213886744e-05, "loss": 1.5022, "step": 2669 }, { "epoch": 2.09, "learning_rate": 1.8370716993780566e-05, "loss": 1.3203, "step": 2670 }, { "epoch": 2.09, "learning_rate": 1.8369331311910574e-05, "loss": 1.3726, "step": 2671 }, { "epoch": 2.09, "learning_rate": 1.8367945093346328e-05, "loss": 1.4676, "step": 2672 }, { "epoch": 2.09, "learning_rate": 1.836655833817671e-05, "loss": 1.3559, "step": 2673 }, { "epoch": 2.09, "learning_rate": 1.8365171046490656e-05, "loss": 1.4062, "step": 2674 }, { "epoch": 2.09, "learning_rate": 1.8363783218377124e-05, "loss": 1.4456, "step": 2675 }, { "epoch": 2.09, "learning_rate": 1.836239485392511e-05, "loss": 1.3887, "step": 2676 }, { "epoch": 2.09, "learning_rate": 1.8361005953223648e-05, "loss": 1.4562, "step": 2677 }, { "epoch": 2.09, "learning_rate": 1.8359616516361805e-05, "loss": 1.6503, "step": 2678 }, { "epoch": 2.09, "learning_rate": 1.8358226543428676e-05, "loss": 1.6349, "step": 2679 }, { "epoch": 2.09, "learning_rate": 1.8356836034513393e-05, "loss": 1.5115, "step": 2680 }, { "epoch": 2.1, "learning_rate": 1.8355444989705128e-05, "loss": 1.581, "step": 2681 }, { "epoch": 2.1, "learning_rate": 1.8354053409093077e-05, "loss": 1.0008, "step": 2682 }, { "epoch": 2.1, "learning_rate": 1.835266129276649e-05, "loss": 1.3171, "step": 2683 }, { "epoch": 2.1, "learning_rate": 1.8351268640814625e-05, "loss": 1.5838, "step": 2684 }, { "epoch": 2.1, "learning_rate": 1.8349875453326796e-05, "loss": 1.5026, "step": 2685 }, { "epoch": 2.1, "learning_rate": 1.834848173039234e-05, "loss": 1.2818, "step": 2686 }, { "epoch": 2.1, "learning_rate": 1.834708747210063e-05, "loss": 1.4264, "step": 2687 }, { "epoch": 2.1, "learning_rate": 1.834569267854108e-05, "loss": 1.4531, "step": 2688 }, { "epoch": 2.1, "learning_rate": 1.8344297349803125e-05, "loss": 1.5474, "step": 2689 }, { "epoch": 2.1, "learning_rate": 1.834290148597625e-05, "loss": 1.392, "step": 2690 }, { "epoch": 2.1, "learning_rate": 1.8341505087149957e-05, "loss": 1.4824, "step": 2691 }, { "epoch": 2.1, "learning_rate": 1.83401081534138e-05, "loss": 1.4734, "step": 2692 }, { "epoch": 2.1, "learning_rate": 1.8338710684857358e-05, "loss": 1.4403, "step": 2693 }, { "epoch": 2.11, "learning_rate": 1.8337312681570246e-05, "loss": 1.3987, "step": 2694 }, { "epoch": 2.11, "learning_rate": 1.8335914143642107e-05, "loss": 1.6732, "step": 2695 }, { "epoch": 2.11, "learning_rate": 1.8334515071162633e-05, "loss": 1.5234, "step": 2696 }, { "epoch": 2.11, "learning_rate": 1.8333115464221536e-05, "loss": 1.3349, "step": 2697 }, { "epoch": 2.11, "learning_rate": 1.8331715322908567e-05, "loss": 1.4833, "step": 2698 }, { "epoch": 2.11, "learning_rate": 1.833031464731351e-05, "loss": 1.1528, "step": 2699 }, { "epoch": 2.11, "learning_rate": 1.832891343752619e-05, "loss": 1.4588, "step": 2700 }, { "epoch": 2.11, "learning_rate": 1.8327511693636463e-05, "loss": 1.3007, "step": 2701 }, { "epoch": 2.11, "learning_rate": 1.832610941573421e-05, "loss": 1.5978, "step": 2702 }, { "epoch": 2.11, "learning_rate": 1.8324706603909362e-05, "loss": 1.4482, "step": 2703 }, { "epoch": 2.11, "learning_rate": 1.832330325825187e-05, "loss": 1.5633, "step": 2704 }, { "epoch": 2.11, "learning_rate": 1.8321899378851727e-05, "loss": 1.2733, "step": 2705 }, { "epoch": 2.11, "learning_rate": 1.832049496579896e-05, "loss": 1.2877, "step": 2706 }, { "epoch": 2.12, "learning_rate": 1.831909001918363e-05, "loss": 1.3059, "step": 2707 }, { "epoch": 2.12, "learning_rate": 1.831768453909583e-05, "loss": 1.3773, "step": 2708 }, { "epoch": 2.12, "learning_rate": 1.8316278525625685e-05, "loss": 1.3231, "step": 2709 }, { "epoch": 2.12, "learning_rate": 1.831487197886336e-05, "loss": 1.4683, "step": 2710 }, { "epoch": 2.12, "learning_rate": 1.8313464898899053e-05, "loss": 1.3007, "step": 2711 }, { "epoch": 2.12, "learning_rate": 1.8312057285822987e-05, "loss": 1.5557, "step": 2712 }, { "epoch": 2.12, "learning_rate": 1.831064913972544e-05, "loss": 1.3395, "step": 2713 }, { "epoch": 2.12, "learning_rate": 1.8309240460696704e-05, "loss": 1.2155, "step": 2714 }, { "epoch": 2.12, "learning_rate": 1.8307831248827112e-05, "loss": 1.4091, "step": 2715 }, { "epoch": 2.12, "learning_rate": 1.8306421504207033e-05, "loss": 1.4625, "step": 2716 }, { "epoch": 2.12, "learning_rate": 1.8305011226926864e-05, "loss": 1.2122, "step": 2717 }, { "epoch": 2.12, "learning_rate": 1.830360041707705e-05, "loss": 1.6414, "step": 2718 }, { "epoch": 2.13, "learning_rate": 1.8302189074748054e-05, "loss": 1.4293, "step": 2719 }, { "epoch": 2.13, "learning_rate": 1.830077720003038e-05, "loss": 1.5253, "step": 2720 }, { "epoch": 2.13, "learning_rate": 1.8299364793014574e-05, "loss": 1.6291, "step": 2721 }, { "epoch": 2.13, "learning_rate": 1.82979518537912e-05, "loss": 1.3083, "step": 2722 }, { "epoch": 2.13, "learning_rate": 1.8296538382450867e-05, "loss": 1.1785, "step": 2723 }, { "epoch": 2.13, "learning_rate": 1.8295124379084215e-05, "loss": 1.4325, "step": 2724 }, { "epoch": 2.13, "learning_rate": 1.829370984378192e-05, "loss": 1.5289, "step": 2725 }, { "epoch": 2.13, "learning_rate": 1.8292294776634695e-05, "loss": 1.2311, "step": 2726 }, { "epoch": 2.13, "learning_rate": 1.8290879177733276e-05, "loss": 1.1389, "step": 2727 }, { "epoch": 2.13, "learning_rate": 1.8289463047168443e-05, "loss": 1.4706, "step": 2728 }, { "epoch": 2.13, "learning_rate": 1.8288046385031007e-05, "loss": 1.5159, "step": 2729 }, { "epoch": 2.13, "learning_rate": 1.8286629191411816e-05, "loss": 1.5632, "step": 2730 }, { "epoch": 2.13, "learning_rate": 1.8285211466401744e-05, "loss": 1.5537, "step": 2731 }, { "epoch": 2.14, "learning_rate": 1.828379321009171e-05, "loss": 1.3318, "step": 2732 }, { "epoch": 2.14, "learning_rate": 1.8282374422572657e-05, "loss": 1.2775, "step": 2733 }, { "epoch": 2.14, "learning_rate": 1.8280955103935568e-05, "loss": 1.1994, "step": 2734 }, { "epoch": 2.14, "learning_rate": 1.827953525427146e-05, "loss": 1.3734, "step": 2735 }, { "epoch": 2.14, "learning_rate": 1.8278114873671383e-05, "loss": 1.6122, "step": 2736 }, { "epoch": 2.14, "learning_rate": 1.827669396222642e-05, "loss": 1.327, "step": 2737 }, { "epoch": 2.14, "learning_rate": 1.8275272520027683e-05, "loss": 1.2403, "step": 2738 }, { "epoch": 2.14, "learning_rate": 1.8273850547166338e-05, "loss": 1.6276, "step": 2739 }, { "epoch": 2.14, "learning_rate": 1.8272428043733556e-05, "loss": 1.6082, "step": 2740 }, { "epoch": 2.14, "learning_rate": 1.8271005009820566e-05, "loss": 1.4323, "step": 2741 }, { "epoch": 2.14, "learning_rate": 1.8269581445518615e-05, "loss": 1.1516, "step": 2742 }, { "epoch": 2.14, "learning_rate": 1.8268157350919e-05, "loss": 1.6294, "step": 2743 }, { "epoch": 2.14, "learning_rate": 1.8266732726113034e-05, "loss": 1.2728, "step": 2744 }, { "epoch": 2.15, "learning_rate": 1.826530757119208e-05, "loss": 1.3525, "step": 2745 }, { "epoch": 2.15, "learning_rate": 1.826388188624752e-05, "loss": 1.3767, "step": 2746 }, { "epoch": 2.15, "learning_rate": 1.8262455671370786e-05, "loss": 1.2054, "step": 2747 }, { "epoch": 2.15, "learning_rate": 1.8261028926653333e-05, "loss": 1.6819, "step": 2748 }, { "epoch": 2.15, "learning_rate": 1.8259601652186648e-05, "loss": 1.4222, "step": 2749 }, { "epoch": 2.15, "learning_rate": 1.8258173848062265e-05, "loss": 1.2761, "step": 2750 }, { "epoch": 2.15, "learning_rate": 1.825674551437174e-05, "loss": 1.4512, "step": 2751 }, { "epoch": 2.15, "learning_rate": 1.8255316651206663e-05, "loss": 1.2567, "step": 2752 }, { "epoch": 2.15, "learning_rate": 1.825388725865867e-05, "loss": 1.5916, "step": 2753 }, { "epoch": 2.15, "learning_rate": 1.8252457336819413e-05, "loss": 1.2678, "step": 2754 }, { "epoch": 2.15, "learning_rate": 1.8251026885780598e-05, "loss": 1.4201, "step": 2755 }, { "epoch": 2.15, "learning_rate": 1.8249595905633944e-05, "loss": 1.5046, "step": 2756 }, { "epoch": 2.15, "learning_rate": 1.824816439647122e-05, "loss": 1.4746, "step": 2757 }, { "epoch": 2.16, "learning_rate": 1.8246732358384228e-05, "loss": 1.4851, "step": 2758 }, { "epoch": 2.16, "learning_rate": 1.824529979146479e-05, "loss": 1.4973, "step": 2759 }, { "epoch": 2.16, "learning_rate": 1.8243866695804776e-05, "loss": 1.6251, "step": 2760 }, { "epoch": 2.16, "learning_rate": 1.8242433071496086e-05, "loss": 1.6821, "step": 2761 }, { "epoch": 2.16, "learning_rate": 1.824099891863065e-05, "loss": 1.4686, "step": 2762 }, { "epoch": 2.16, "learning_rate": 1.823956423730044e-05, "loss": 1.6493, "step": 2763 }, { "epoch": 2.16, "learning_rate": 1.823812902759745e-05, "loss": 1.4907, "step": 2764 }, { "epoch": 2.16, "learning_rate": 1.823669328961372e-05, "loss": 1.772, "step": 2765 }, { "epoch": 2.16, "learning_rate": 1.8235257023441315e-05, "loss": 1.4109, "step": 2766 }, { "epoch": 2.16, "learning_rate": 1.8233820229172343e-05, "loss": 1.4885, "step": 2767 }, { "epoch": 2.16, "learning_rate": 1.823238290689893e-05, "loss": 1.316, "step": 2768 }, { "epoch": 2.16, "learning_rate": 1.8230945056713255e-05, "loss": 1.4532, "step": 2769 }, { "epoch": 2.16, "learning_rate": 1.822950667870752e-05, "loss": 1.4535, "step": 2770 }, { "epoch": 2.17, "learning_rate": 1.8228067772973962e-05, "loss": 1.2447, "step": 2771 }, { "epoch": 2.17, "learning_rate": 1.8226628339604856e-05, "loss": 1.189, "step": 2772 }, { "epoch": 2.17, "learning_rate": 1.82251883786925e-05, "loss": 1.3512, "step": 2773 }, { "epoch": 2.17, "learning_rate": 1.822374789032924e-05, "loss": 1.3944, "step": 2774 }, { "epoch": 2.17, "learning_rate": 1.8222306874607447e-05, "loss": 1.6215, "step": 2775 }, { "epoch": 2.17, "learning_rate": 1.822086533161953e-05, "loss": 1.5237, "step": 2776 }, { "epoch": 2.17, "learning_rate": 1.8219423261457925e-05, "loss": 1.6523, "step": 2777 }, { "epoch": 2.17, "learning_rate": 1.8217980664215115e-05, "loss": 1.1687, "step": 2778 }, { "epoch": 2.17, "learning_rate": 1.8216537539983597e-05, "loss": 1.3712, "step": 2779 }, { "epoch": 2.17, "learning_rate": 1.8215093888855923e-05, "loss": 1.4663, "step": 2780 }, { "epoch": 2.17, "learning_rate": 1.821364971092466e-05, "loss": 1.6116, "step": 2781 }, { "epoch": 2.17, "learning_rate": 1.821220500628243e-05, "loss": 1.4797, "step": 2782 }, { "epoch": 2.18, "learning_rate": 1.8210759775021865e-05, "loss": 1.4409, "step": 2783 }, { "epoch": 2.18, "learning_rate": 1.820931401723565e-05, "loss": 1.4082, "step": 2784 }, { "epoch": 2.18, "learning_rate": 1.820786773301649e-05, "loss": 1.5761, "step": 2785 }, { "epoch": 2.18, "learning_rate": 1.820642092245714e-05, "loss": 1.3728, "step": 2786 }, { "epoch": 2.18, "learning_rate": 1.8204973585650362e-05, "loss": 1.677, "step": 2787 }, { "epoch": 2.18, "learning_rate": 1.8203525722688983e-05, "loss": 1.1703, "step": 2788 }, { "epoch": 2.18, "learning_rate": 1.8202077333665845e-05, "loss": 1.5596, "step": 2789 }, { "epoch": 2.18, "learning_rate": 1.8200628418673823e-05, "loss": 1.575, "step": 2790 }, { "epoch": 2.18, "learning_rate": 1.8199178977805835e-05, "loss": 1.487, "step": 2791 }, { "epoch": 2.18, "learning_rate": 1.819772901115483e-05, "loss": 1.3116, "step": 2792 }, { "epoch": 2.18, "learning_rate": 1.8196278518813786e-05, "loss": 1.5238, "step": 2793 }, { "epoch": 2.18, "learning_rate": 1.819482750087572e-05, "loss": 1.6402, "step": 2794 }, { "epoch": 2.18, "learning_rate": 1.8193375957433676e-05, "loss": 1.4703, "step": 2795 }, { "epoch": 2.19, "learning_rate": 1.819192388858074e-05, "loss": 1.0729, "step": 2796 }, { "epoch": 2.19, "learning_rate": 1.8190471294410027e-05, "loss": 1.3849, "step": 2797 }, { "epoch": 2.19, "learning_rate": 1.8189018175014687e-05, "loss": 1.3614, "step": 2798 }, { "epoch": 2.19, "learning_rate": 1.81875645304879e-05, "loss": 1.5942, "step": 2799 }, { "epoch": 2.19, "learning_rate": 1.8186110360922886e-05, "loss": 1.6879, "step": 2800 }, { "epoch": 2.19, "learning_rate": 1.8184655666412894e-05, "loss": 1.5151, "step": 2801 }, { "epoch": 2.19, "learning_rate": 1.8183200447051212e-05, "loss": 1.3837, "step": 2802 }, { "epoch": 2.19, "learning_rate": 1.818174470293115e-05, "loss": 1.5967, "step": 2803 }, { "epoch": 2.19, "learning_rate": 1.8180288434146066e-05, "loss": 1.5199, "step": 2804 }, { "epoch": 2.19, "learning_rate": 1.8178831640789347e-05, "loss": 1.7378, "step": 2805 }, { "epoch": 2.19, "learning_rate": 1.8177374322954407e-05, "loss": 1.3923, "step": 2806 }, { "epoch": 2.19, "learning_rate": 1.8175916480734698e-05, "loss": 1.6912, "step": 2807 }, { "epoch": 2.19, "learning_rate": 1.8174458114223706e-05, "loss": 1.3705, "step": 2808 }, { "epoch": 2.2, "learning_rate": 1.8172999223514957e-05, "loss": 1.3807, "step": 2809 }, { "epoch": 2.2, "learning_rate": 1.8171539808701993e-05, "loss": 1.3405, "step": 2810 }, { "epoch": 2.2, "learning_rate": 1.8170079869878412e-05, "loss": 1.4507, "step": 2811 }, { "epoch": 2.2, "learning_rate": 1.8168619407137827e-05, "loss": 1.289, "step": 2812 }, { "epoch": 2.2, "learning_rate": 1.8167158420573898e-05, "loss": 1.4919, "step": 2813 }, { "epoch": 2.2, "learning_rate": 1.8165696910280308e-05, "loss": 1.4717, "step": 2814 }, { "epoch": 2.2, "learning_rate": 1.816423487635078e-05, "loss": 1.4507, "step": 2815 }, { "epoch": 2.2, "learning_rate": 1.8162772318879067e-05, "loss": 1.3091, "step": 2816 }, { "epoch": 2.2, "learning_rate": 1.816130923795896e-05, "loss": 1.4057, "step": 2817 }, { "epoch": 2.2, "learning_rate": 1.8159845633684282e-05, "loss": 1.129, "step": 2818 }, { "epoch": 2.2, "learning_rate": 1.8158381506148885e-05, "loss": 1.5087, "step": 2819 }, { "epoch": 2.2, "learning_rate": 1.815691685544666e-05, "loss": 1.1725, "step": 2820 }, { "epoch": 2.2, "learning_rate": 1.8155451681671528e-05, "loss": 1.3353, "step": 2821 }, { "epoch": 2.21, "learning_rate": 1.8153985984917448e-05, "loss": 1.3832, "step": 2822 }, { "epoch": 2.21, "learning_rate": 1.815251976527841e-05, "loss": 1.4373, "step": 2823 }, { "epoch": 2.21, "learning_rate": 1.815105302284843e-05, "loss": 1.4096, "step": 2824 }, { "epoch": 2.21, "learning_rate": 1.8149585757721575e-05, "loss": 1.4825, "step": 2825 }, { "epoch": 2.21, "learning_rate": 1.8148117969991926e-05, "loss": 1.735, "step": 2826 }, { "epoch": 2.21, "learning_rate": 1.8146649659753618e-05, "loss": 1.6426, "step": 2827 }, { "epoch": 2.21, "learning_rate": 1.8145180827100794e-05, "loss": 1.5027, "step": 2828 }, { "epoch": 2.21, "learning_rate": 1.8143711472127658e-05, "loss": 1.4073, "step": 2829 }, { "epoch": 2.21, "learning_rate": 1.8142241594928423e-05, "loss": 1.2347, "step": 2830 }, { "epoch": 2.21, "learning_rate": 1.8140771195597358e-05, "loss": 1.2703, "step": 2831 }, { "epoch": 2.21, "learning_rate": 1.8139300274228747e-05, "loss": 1.4091, "step": 2832 }, { "epoch": 2.21, "learning_rate": 1.8137828830916913e-05, "loss": 1.6073, "step": 2833 }, { "epoch": 2.21, "learning_rate": 1.8136356865756225e-05, "loss": 1.3723, "step": 2834 }, { "epoch": 2.22, "learning_rate": 1.813488437884106e-05, "loss": 1.6023, "step": 2835 }, { "epoch": 2.22, "learning_rate": 1.8133411370265855e-05, "loss": 1.2442, "step": 2836 }, { "epoch": 2.22, "learning_rate": 1.813193784012506e-05, "loss": 1.4896, "step": 2837 }, { "epoch": 2.22, "learning_rate": 1.8130463788513176e-05, "loss": 1.2456, "step": 2838 }, { "epoch": 2.22, "learning_rate": 1.812898921552472e-05, "loss": 1.3249, "step": 2839 }, { "epoch": 2.22, "learning_rate": 1.8127514121254254e-05, "loss": 1.3455, "step": 2840 }, { "epoch": 2.22, "learning_rate": 1.8126038505796376e-05, "loss": 1.602, "step": 2841 }, { "epoch": 2.22, "learning_rate": 1.8124562369245702e-05, "loss": 1.3916, "step": 2842 }, { "epoch": 2.22, "learning_rate": 1.8123085711696898e-05, "loss": 1.5594, "step": 2843 }, { "epoch": 2.22, "learning_rate": 1.8121608533244653e-05, "loss": 1.2986, "step": 2844 }, { "epoch": 2.22, "learning_rate": 1.8120130833983695e-05, "loss": 1.3359, "step": 2845 }, { "epoch": 2.22, "learning_rate": 1.811865261400878e-05, "loss": 1.4834, "step": 2846 }, { "epoch": 2.23, "learning_rate": 1.811717387341471e-05, "loss": 1.1435, "step": 2847 }, { "epoch": 2.23, "learning_rate": 1.8115694612296296e-05, "loss": 1.3066, "step": 2848 }, { "epoch": 2.23, "learning_rate": 1.8114214830748412e-05, "loss": 1.3621, "step": 2849 }, { "epoch": 2.23, "learning_rate": 1.8112734528865946e-05, "loss": 1.268, "step": 2850 }, { "epoch": 2.23, "learning_rate": 1.8111253706743822e-05, "loss": 1.1931, "step": 2851 }, { "epoch": 2.23, "learning_rate": 1.8109772364477e-05, "loss": 1.3414, "step": 2852 }, { "epoch": 2.23, "learning_rate": 1.8108290502160474e-05, "loss": 1.3682, "step": 2853 }, { "epoch": 2.23, "learning_rate": 1.810680811988927e-05, "loss": 1.4612, "step": 2854 }, { "epoch": 2.23, "learning_rate": 1.810532521775845e-05, "loss": 1.4007, "step": 2855 }, { "epoch": 2.23, "learning_rate": 1.8103841795863104e-05, "loss": 1.3276, "step": 2856 }, { "epoch": 2.23, "learning_rate": 1.810235785429836e-05, "loss": 1.1123, "step": 2857 }, { "epoch": 2.23, "learning_rate": 1.8100873393159376e-05, "loss": 1.6053, "step": 2858 }, { "epoch": 2.23, "learning_rate": 1.8099388412541348e-05, "loss": 1.3571, "step": 2859 }, { "epoch": 2.24, "learning_rate": 1.80979029125395e-05, "loss": 1.264, "step": 2860 }, { "epoch": 2.24, "learning_rate": 1.8096416893249093e-05, "loss": 1.416, "step": 2861 }, { "epoch": 2.24, "learning_rate": 1.8094930354765415e-05, "loss": 1.5048, "step": 2862 }, { "epoch": 2.24, "learning_rate": 1.8093443297183803e-05, "loss": 1.2886, "step": 2863 }, { "epoch": 2.24, "learning_rate": 1.8091955720599605e-05, "loss": 1.5721, "step": 2864 }, { "epoch": 2.24, "learning_rate": 1.809046762510822e-05, "loss": 1.2658, "step": 2865 }, { "epoch": 2.24, "learning_rate": 1.8088979010805072e-05, "loss": 1.3894, "step": 2866 }, { "epoch": 2.24, "learning_rate": 1.808748987778562e-05, "loss": 1.2219, "step": 2867 }, { "epoch": 2.24, "learning_rate": 1.808600022614536e-05, "loss": 1.3894, "step": 2868 }, { "epoch": 2.24, "learning_rate": 1.808451005597981e-05, "loss": 1.2263, "step": 2869 }, { "epoch": 2.24, "learning_rate": 1.808301936738454e-05, "loss": 1.2639, "step": 2870 }, { "epoch": 2.24, "learning_rate": 1.8081528160455138e-05, "loss": 1.3883, "step": 2871 }, { "epoch": 2.24, "learning_rate": 1.808003643528722e-05, "loss": 1.3809, "step": 2872 }, { "epoch": 2.25, "learning_rate": 1.8078544191976464e-05, "loss": 1.4749, "step": 2873 }, { "epoch": 2.25, "learning_rate": 1.8077051430618543e-05, "loss": 1.3585, "step": 2874 }, { "epoch": 2.25, "learning_rate": 1.8075558151309197e-05, "loss": 1.6329, "step": 2875 }, { "epoch": 2.25, "learning_rate": 1.8074064354144176e-05, "loss": 1.3895, "step": 2876 }, { "epoch": 2.25, "learning_rate": 1.8072570039219267e-05, "loss": 1.3588, "step": 2877 }, { "epoch": 2.25, "learning_rate": 1.8071075206630308e-05, "loss": 1.5711, "step": 2878 }, { "epoch": 2.25, "learning_rate": 1.8069579856473153e-05, "loss": 1.1718, "step": 2879 }, { "epoch": 2.25, "learning_rate": 1.8068083988843685e-05, "loss": 1.4357, "step": 2880 }, { "epoch": 2.25, "learning_rate": 1.8066587603837837e-05, "loss": 1.3853, "step": 2881 }, { "epoch": 2.25, "learning_rate": 1.8065090701551563e-05, "loss": 1.3272, "step": 2882 }, { "epoch": 2.25, "learning_rate": 1.806359328208086e-05, "loss": 1.1802, "step": 2883 }, { "epoch": 2.25, "learning_rate": 1.8062095345521738e-05, "loss": 1.3315, "step": 2884 }, { "epoch": 2.25, "learning_rate": 1.806059689197027e-05, "loss": 1.47, "step": 2885 }, { "epoch": 2.26, "learning_rate": 1.8059097921522535e-05, "loss": 1.3029, "step": 2886 }, { "epoch": 2.26, "learning_rate": 1.8057598434274662e-05, "loss": 1.2693, "step": 2887 }, { "epoch": 2.26, "learning_rate": 1.805609843032281e-05, "loss": 1.3735, "step": 2888 }, { "epoch": 2.26, "learning_rate": 1.805459790976316e-05, "loss": 1.3663, "step": 2889 }, { "epoch": 2.26, "learning_rate": 1.805309687269194e-05, "loss": 1.4957, "step": 2890 }, { "epoch": 2.26, "learning_rate": 1.8051595319205408e-05, "loss": 1.5661, "step": 2891 }, { "epoch": 2.26, "learning_rate": 1.8050093249399852e-05, "loss": 1.2348, "step": 2892 }, { "epoch": 2.26, "learning_rate": 1.804859066337159e-05, "loss": 1.2546, "step": 2893 }, { "epoch": 2.26, "learning_rate": 1.804708756121698e-05, "loss": 1.3455, "step": 2894 }, { "epoch": 2.26, "learning_rate": 1.8045583943032415e-05, "loss": 1.345, "step": 2895 }, { "epoch": 2.26, "learning_rate": 1.8044079808914308e-05, "loss": 1.5684, "step": 2896 }, { "epoch": 2.26, "learning_rate": 1.8042575158959116e-05, "loss": 1.3615, "step": 2897 }, { "epoch": 2.26, "learning_rate": 1.8041069993263334e-05, "loss": 1.4197, "step": 2898 }, { "epoch": 2.27, "learning_rate": 1.803956431192347e-05, "loss": 1.4022, "step": 2899 }, { "epoch": 2.27, "learning_rate": 1.803805811503609e-05, "loss": 1.4917, "step": 2900 }, { "epoch": 2.27, "learning_rate": 1.8036551402697778e-05, "loss": 1.3511, "step": 2901 }, { "epoch": 2.27, "learning_rate": 1.8035044175005145e-05, "loss": 1.268, "step": 2902 }, { "epoch": 2.27, "learning_rate": 1.8033536432054855e-05, "loss": 1.3232, "step": 2903 }, { "epoch": 2.27, "learning_rate": 1.803202817394359e-05, "loss": 1.4663, "step": 2904 }, { "epoch": 2.27, "learning_rate": 1.8030519400768064e-05, "loss": 1.6894, "step": 2905 }, { "epoch": 2.27, "learning_rate": 1.8029010112625035e-05, "loss": 1.6128, "step": 2906 }, { "epoch": 2.27, "learning_rate": 1.8027500309611288e-05, "loss": 1.3237, "step": 2907 }, { "epoch": 2.27, "learning_rate": 1.8025989991823638e-05, "loss": 1.3146, "step": 2908 }, { "epoch": 2.27, "learning_rate": 1.8024479159358938e-05, "loss": 1.4177, "step": 2909 }, { "epoch": 2.27, "learning_rate": 1.8022967812314073e-05, "loss": 1.1749, "step": 2910 }, { "epoch": 2.28, "learning_rate": 1.802145595078596e-05, "loss": 1.5686, "step": 2911 }, { "epoch": 2.28, "learning_rate": 1.8019943574871545e-05, "loss": 1.2896, "step": 2912 }, { "epoch": 2.28, "learning_rate": 1.8018430684667816e-05, "loss": 1.3938, "step": 2913 }, { "epoch": 2.28, "learning_rate": 1.8016917280271788e-05, "loss": 1.4827, "step": 2914 }, { "epoch": 2.28, "learning_rate": 1.8015403361780506e-05, "loss": 1.5687, "step": 2915 }, { "epoch": 2.28, "learning_rate": 1.8013888929291055e-05, "loss": 1.4708, "step": 2916 }, { "epoch": 2.28, "learning_rate": 1.8012373982900556e-05, "loss": 1.3971, "step": 2917 }, { "epoch": 2.28, "learning_rate": 1.801085852270615e-05, "loss": 1.3974, "step": 2918 }, { "epoch": 2.28, "learning_rate": 1.8009342548805012e-05, "loss": 1.4579, "step": 2919 }, { "epoch": 2.28, "learning_rate": 1.8007826061294365e-05, "loss": 1.2431, "step": 2920 }, { "epoch": 2.28, "learning_rate": 1.8006309060271455e-05, "loss": 1.5068, "step": 2921 }, { "epoch": 2.28, "learning_rate": 1.8004791545833564e-05, "loss": 1.5652, "step": 2922 }, { "epoch": 2.28, "learning_rate": 1.8003273518077996e-05, "loss": 1.3419, "step": 2923 }, { "epoch": 2.29, "learning_rate": 1.80017549771021e-05, "loss": 1.4905, "step": 2924 }, { "epoch": 2.29, "learning_rate": 1.8000235923003258e-05, "loss": 1.3197, "step": 2925 }, { "epoch": 2.29, "learning_rate": 1.7998716355878878e-05, "loss": 1.4633, "step": 2926 }, { "epoch": 2.29, "learning_rate": 1.7997196275826406e-05, "loss": 1.5713, "step": 2927 }, { "epoch": 2.29, "learning_rate": 1.799567568294332e-05, "loss": 1.4369, "step": 2928 }, { "epoch": 2.29, "learning_rate": 1.7994154577327122e-05, "loss": 1.3451, "step": 2929 }, { "epoch": 2.29, "learning_rate": 1.7992632959075367e-05, "loss": 1.4582, "step": 2930 }, { "epoch": 2.29, "learning_rate": 1.7991110828285623e-05, "loss": 1.4136, "step": 2931 }, { "epoch": 2.29, "learning_rate": 1.79895881850555e-05, "loss": 1.2259, "step": 2932 }, { "epoch": 2.29, "learning_rate": 1.798806502948264e-05, "loss": 1.1903, "step": 2933 }, { "epoch": 2.29, "learning_rate": 1.798654136166472e-05, "loss": 1.4412, "step": 2934 }, { "epoch": 2.29, "learning_rate": 1.798501718169944e-05, "loss": 1.5704, "step": 2935 }, { "epoch": 2.29, "learning_rate": 1.7983492489684546e-05, "loss": 1.2795, "step": 2936 }, { "epoch": 2.3, "learning_rate": 1.798196728571781e-05, "loss": 1.5051, "step": 2937 }, { "epoch": 2.3, "learning_rate": 1.7980441569897035e-05, "loss": 1.2958, "step": 2938 }, { "epoch": 2.3, "learning_rate": 1.7978915342320067e-05, "loss": 1.4746, "step": 2939 }, { "epoch": 2.3, "learning_rate": 1.797738860308477e-05, "loss": 1.3462, "step": 2940 }, { "epoch": 2.3, "learning_rate": 1.7975861352289045e-05, "loss": 1.379, "step": 2941 }, { "epoch": 2.3, "learning_rate": 1.7974333590030837e-05, "loss": 1.5468, "step": 2942 }, { "epoch": 2.3, "learning_rate": 1.7972805316408116e-05, "loss": 1.3997, "step": 2943 }, { "epoch": 2.3, "learning_rate": 1.7971276531518876e-05, "loss": 1.5659, "step": 2944 }, { "epoch": 2.3, "learning_rate": 1.796974723546116e-05, "loss": 1.6335, "step": 2945 }, { "epoch": 2.3, "learning_rate": 1.7968217428333033e-05, "loss": 1.427, "step": 2946 }, { "epoch": 2.3, "learning_rate": 1.79666871102326e-05, "loss": 1.4449, "step": 2947 }, { "epoch": 2.3, "learning_rate": 1.7965156281257986e-05, "loss": 1.4012, "step": 2948 }, { "epoch": 2.3, "learning_rate": 1.7963624941507364e-05, "loss": 1.4481, "step": 2949 }, { "epoch": 2.31, "learning_rate": 1.7962093091078935e-05, "loss": 1.5104, "step": 2950 }, { "epoch": 2.31, "learning_rate": 1.7960560730070924e-05, "loss": 1.1941, "step": 2951 }, { "epoch": 2.31, "learning_rate": 1.79590278585816e-05, "loss": 1.5242, "step": 2952 }, { "epoch": 2.31, "learning_rate": 1.795749447670926e-05, "loss": 1.3504, "step": 2953 }, { "epoch": 2.31, "learning_rate": 1.7955960584552232e-05, "loss": 1.3293, "step": 2954 }, { "epoch": 2.31, "learning_rate": 1.7954426182208882e-05, "loss": 1.3799, "step": 2955 }, { "epoch": 2.31, "learning_rate": 1.7952891269777606e-05, "loss": 1.6683, "step": 2956 }, { "epoch": 2.31, "learning_rate": 1.7951355847356828e-05, "loss": 1.4961, "step": 2957 }, { "epoch": 2.31, "learning_rate": 1.794981991504501e-05, "loss": 1.4237, "step": 2958 }, { "epoch": 2.31, "learning_rate": 1.7948283472940652e-05, "loss": 1.415, "step": 2959 }, { "epoch": 2.31, "learning_rate": 1.7946746521142274e-05, "loss": 1.4409, "step": 2960 }, { "epoch": 2.31, "learning_rate": 1.7945209059748434e-05, "loss": 1.4069, "step": 2961 }, { "epoch": 2.31, "learning_rate": 1.7943671088857727e-05, "loss": 1.6168, "step": 2962 }, { "epoch": 2.32, "learning_rate": 1.7942132608568778e-05, "loss": 1.3832, "step": 2963 }, { "epoch": 2.32, "learning_rate": 1.794059361898024e-05, "loss": 1.5264, "step": 2964 }, { "epoch": 2.32, "learning_rate": 1.7939054120190807e-05, "loss": 1.5285, "step": 2965 }, { "epoch": 2.32, "learning_rate": 1.79375141122992e-05, "loss": 1.4986, "step": 2966 }, { "epoch": 2.32, "learning_rate": 1.793597359540417e-05, "loss": 1.5112, "step": 2967 }, { "epoch": 2.32, "learning_rate": 1.7934432569604514e-05, "loss": 1.6089, "step": 2968 }, { "epoch": 2.32, "learning_rate": 1.793289103499904e-05, "loss": 1.5142, "step": 2969 }, { "epoch": 2.32, "learning_rate": 1.7931348991686612e-05, "loss": 1.3628, "step": 2970 }, { "epoch": 2.32, "learning_rate": 1.7929806439766108e-05, "loss": 1.4759, "step": 2971 }, { "epoch": 2.32, "learning_rate": 1.7928263379336453e-05, "loss": 1.3618, "step": 2972 }, { "epoch": 2.32, "learning_rate": 1.7926719810496592e-05, "loss": 1.3924, "step": 2973 }, { "epoch": 2.32, "learning_rate": 1.7925175733345507e-05, "loss": 1.3635, "step": 2974 }, { "epoch": 2.33, "learning_rate": 1.792363114798222e-05, "loss": 1.2291, "step": 2975 }, { "epoch": 2.33, "learning_rate": 1.7922086054505778e-05, "loss": 1.4512, "step": 2976 }, { "epoch": 2.33, "learning_rate": 1.7920540453015262e-05, "loss": 1.2717, "step": 2977 }, { "epoch": 2.33, "learning_rate": 1.791899434360978e-05, "loss": 1.588, "step": 2978 }, { "epoch": 2.33, "learning_rate": 1.7917447726388487e-05, "loss": 1.2683, "step": 2979 }, { "epoch": 2.33, "learning_rate": 1.7915900601450556e-05, "loss": 1.3938, "step": 2980 }, { "epoch": 2.33, "learning_rate": 1.7914352968895202e-05, "loss": 1.4644, "step": 2981 }, { "epoch": 2.33, "learning_rate": 1.7912804828821664e-05, "loss": 1.4459, "step": 2982 }, { "epoch": 2.33, "learning_rate": 1.7911256181329224e-05, "loss": 1.4463, "step": 2983 }, { "epoch": 2.33, "learning_rate": 1.7909707026517184e-05, "loss": 1.3622, "step": 2984 }, { "epoch": 2.33, "learning_rate": 1.7908157364484896e-05, "loss": 1.5117, "step": 2985 }, { "epoch": 2.33, "learning_rate": 1.790660719533173e-05, "loss": 1.4195, "step": 2986 }, { "epoch": 2.33, "learning_rate": 1.7905056519157083e-05, "loss": 1.5715, "step": 2987 }, { "epoch": 2.34, "learning_rate": 1.7903505336060406e-05, "loss": 1.5931, "step": 2988 }, { "epoch": 2.34, "learning_rate": 1.7901953646141164e-05, "loss": 1.463, "step": 2989 }, { "epoch": 2.34, "learning_rate": 1.790040144949887e-05, "loss": 1.4047, "step": 2990 }, { "epoch": 2.34, "learning_rate": 1.789884874623305e-05, "loss": 1.2108, "step": 2991 }, { "epoch": 2.34, "learning_rate": 1.7897295536443277e-05, "loss": 1.2997, "step": 2992 }, { "epoch": 2.34, "learning_rate": 1.7895741820229156e-05, "loss": 1.5474, "step": 2993 }, { "epoch": 2.34, "learning_rate": 1.7894187597690314e-05, "loss": 1.4981, "step": 2994 }, { "epoch": 2.34, "learning_rate": 1.7892632868926424e-05, "loss": 1.3536, "step": 2995 }, { "epoch": 2.34, "learning_rate": 1.789107763403718e-05, "loss": 1.1792, "step": 2996 }, { "epoch": 2.34, "learning_rate": 1.7889521893122324e-05, "loss": 1.5045, "step": 2997 }, { "epoch": 2.34, "learning_rate": 1.7887965646281605e-05, "loss": 1.5787, "step": 2998 }, { "epoch": 2.34, "learning_rate": 1.7886408893614827e-05, "loss": 1.4252, "step": 2999 }, { "epoch": 2.34, "learning_rate": 1.7884851635221823e-05, "loss": 1.2817, "step": 3000 }, { "epoch": 2.35, "learning_rate": 1.7883293871202446e-05, "loss": 1.3184, "step": 3001 }, { "epoch": 2.35, "learning_rate": 1.7881735601656594e-05, "loss": 1.557, "step": 3002 }, { "epoch": 2.35, "learning_rate": 1.7880176826684193e-05, "loss": 1.6474, "step": 3003 }, { "epoch": 2.35, "learning_rate": 1.7878617546385198e-05, "loss": 1.1452, "step": 3004 }, { "epoch": 2.35, "learning_rate": 1.7877057760859604e-05, "loss": 1.4197, "step": 3005 }, { "epoch": 2.35, "learning_rate": 1.787549747020743e-05, "loss": 1.5311, "step": 3006 }, { "epoch": 2.35, "learning_rate": 1.7873936674528738e-05, "loss": 1.6961, "step": 3007 }, { "epoch": 2.35, "learning_rate": 1.7872375373923613e-05, "loss": 1.3509, "step": 3008 }, { "epoch": 2.35, "learning_rate": 1.787081356849217e-05, "loss": 1.5186, "step": 3009 }, { "epoch": 2.35, "learning_rate": 1.7869251258334574e-05, "loss": 1.3903, "step": 3010 }, { "epoch": 2.35, "learning_rate": 1.7867688443550997e-05, "loss": 1.4216, "step": 3011 }, { "epoch": 2.35, "learning_rate": 1.7866125124241668e-05, "loss": 1.5057, "step": 3012 }, { "epoch": 2.35, "learning_rate": 1.7864561300506827e-05, "loss": 1.5613, "step": 3013 }, { "epoch": 2.36, "learning_rate": 1.786299697244676e-05, "loss": 1.3453, "step": 3014 }, { "epoch": 2.36, "learning_rate": 1.7861432140161783e-05, "loss": 1.3782, "step": 3015 }, { "epoch": 2.36, "learning_rate": 1.7859866803752246e-05, "loss": 1.2549, "step": 3016 }, { "epoch": 2.36, "learning_rate": 1.785830096331852e-05, "loss": 1.3348, "step": 3017 }, { "epoch": 2.36, "learning_rate": 1.785673461896102e-05, "loss": 1.6264, "step": 3018 }, { "epoch": 2.36, "learning_rate": 1.7855167770780196e-05, "loss": 1.3807, "step": 3019 }, { "epoch": 2.36, "learning_rate": 1.7853600418876517e-05, "loss": 1.5375, "step": 3020 }, { "epoch": 2.36, "learning_rate": 1.785203256335049e-05, "loss": 1.6203, "step": 3021 }, { "epoch": 2.36, "learning_rate": 1.7850464204302662e-05, "loss": 1.4338, "step": 3022 }, { "epoch": 2.36, "learning_rate": 1.78488953418336e-05, "loss": 1.4712, "step": 3023 }, { "epoch": 2.36, "learning_rate": 1.7847325976043918e-05, "loss": 1.7212, "step": 3024 }, { "epoch": 2.36, "learning_rate": 1.7845756107034242e-05, "loss": 1.327, "step": 3025 }, { "epoch": 2.36, "learning_rate": 1.7844185734905253e-05, "loss": 1.5821, "step": 3026 }, { "epoch": 2.37, "learning_rate": 1.7842614859757645e-05, "loss": 1.603, "step": 3027 }, { "epoch": 2.37, "learning_rate": 1.784104348169216e-05, "loss": 1.3359, "step": 3028 }, { "epoch": 2.37, "learning_rate": 1.7839471600809558e-05, "loss": 1.3781, "step": 3029 }, { "epoch": 2.37, "learning_rate": 1.7837899217210637e-05, "loss": 1.227, "step": 3030 }, { "epoch": 2.37, "learning_rate": 1.7836326330996235e-05, "loss": 1.5585, "step": 3031 }, { "epoch": 2.37, "learning_rate": 1.783475294226721e-05, "loss": 1.5946, "step": 3032 }, { "epoch": 2.37, "learning_rate": 1.783317905112446e-05, "loss": 1.4386, "step": 3033 }, { "epoch": 2.37, "learning_rate": 1.783160465766891e-05, "loss": 1.4052, "step": 3034 }, { "epoch": 2.37, "learning_rate": 1.7830029762001525e-05, "loss": 1.3906, "step": 3035 }, { "epoch": 2.37, "learning_rate": 1.7828454364223295e-05, "loss": 1.5728, "step": 3036 }, { "epoch": 2.37, "learning_rate": 1.7826878464435243e-05, "loss": 1.4637, "step": 3037 }, { "epoch": 2.37, "learning_rate": 1.7825302062738425e-05, "loss": 1.3593, "step": 3038 }, { "epoch": 2.38, "learning_rate": 1.7823725159233936e-05, "loss": 1.5355, "step": 3039 }, { "epoch": 2.38, "learning_rate": 1.7822147754022887e-05, "loss": 1.2671, "step": 3040 }, { "epoch": 2.38, "learning_rate": 1.782056984720644e-05, "loss": 1.3881, "step": 3041 }, { "epoch": 2.38, "learning_rate": 1.7818991438885773e-05, "loss": 1.284, "step": 3042 }, { "epoch": 2.38, "learning_rate": 1.7817412529162112e-05, "loss": 1.4965, "step": 3043 }, { "epoch": 2.38, "learning_rate": 1.7815833118136695e-05, "loss": 1.5992, "step": 3044 }, { "epoch": 2.38, "learning_rate": 1.7814253205910817e-05, "loss": 1.6135, "step": 3045 }, { "epoch": 2.38, "learning_rate": 1.7812672792585784e-05, "loss": 1.2837, "step": 3046 }, { "epoch": 2.38, "learning_rate": 1.7811091878262942e-05, "loss": 1.2708, "step": 3047 }, { "epoch": 2.38, "learning_rate": 1.780951046304367e-05, "loss": 1.2462, "step": 3048 }, { "epoch": 2.38, "learning_rate": 1.7807928547029384e-05, "loss": 1.26, "step": 3049 }, { "epoch": 2.38, "learning_rate": 1.7806346130321516e-05, "loss": 1.2786, "step": 3050 }, { "epoch": 2.38, "learning_rate": 1.780476321302155e-05, "loss": 1.4673, "step": 3051 }, { "epoch": 2.39, "learning_rate": 1.7803179795230986e-05, "loss": 1.3807, "step": 3052 }, { "epoch": 2.39, "learning_rate": 1.780159587705137e-05, "loss": 1.2445, "step": 3053 }, { "epoch": 2.39, "learning_rate": 1.7800011458584264e-05, "loss": 1.3972, "step": 3054 }, { "epoch": 2.39, "learning_rate": 1.7798426539931276e-05, "loss": 1.4371, "step": 3055 }, { "epoch": 2.39, "learning_rate": 1.779684112119404e-05, "loss": 1.5874, "step": 3056 }, { "epoch": 2.39, "learning_rate": 1.7795255202474223e-05, "loss": 1.6607, "step": 3057 }, { "epoch": 2.39, "learning_rate": 1.7793668783873527e-05, "loss": 1.3449, "step": 3058 }, { "epoch": 2.39, "learning_rate": 1.7792081865493678e-05, "loss": 1.4473, "step": 3059 }, { "epoch": 2.39, "learning_rate": 1.7790494447436442e-05, "loss": 1.4627, "step": 3060 }, { "epoch": 2.39, "learning_rate": 1.7788906529803613e-05, "loss": 1.528, "step": 3061 }, { "epoch": 2.39, "learning_rate": 1.778731811269702e-05, "loss": 1.4242, "step": 3062 }, { "epoch": 2.39, "learning_rate": 1.778572919621852e-05, "loss": 1.4874, "step": 3063 }, { "epoch": 2.39, "learning_rate": 1.778413978047001e-05, "loss": 1.502, "step": 3064 }, { "epoch": 2.4, "learning_rate": 1.7782549865553406e-05, "loss": 1.5016, "step": 3065 }, { "epoch": 2.4, "learning_rate": 1.7780959451570668e-05, "loss": 1.3446, "step": 3066 }, { "epoch": 2.4, "learning_rate": 1.7779368538623783e-05, "loss": 1.3627, "step": 3067 }, { "epoch": 2.4, "learning_rate": 1.7777777126814766e-05, "loss": 1.4821, "step": 3068 }, { "epoch": 2.4, "learning_rate": 1.777618521624567e-05, "loss": 1.339, "step": 3069 }, { "epoch": 2.4, "learning_rate": 1.7774592807018588e-05, "loss": 1.4351, "step": 3070 }, { "epoch": 2.4, "learning_rate": 1.777299989923562e-05, "loss": 1.5458, "step": 3071 }, { "epoch": 2.4, "learning_rate": 1.7771406492998925e-05, "loss": 1.4403, "step": 3072 }, { "epoch": 2.4, "learning_rate": 1.7769812588410675e-05, "loss": 1.5238, "step": 3073 }, { "epoch": 2.4, "learning_rate": 1.7768218185573084e-05, "loss": 1.3647, "step": 3074 }, { "epoch": 2.4, "learning_rate": 1.7766623284588393e-05, "loss": 1.5082, "step": 3075 }, { "epoch": 2.4, "learning_rate": 1.7765027885558886e-05, "loss": 1.4464, "step": 3076 }, { "epoch": 2.4, "learning_rate": 1.776343198858686e-05, "loss": 1.607, "step": 3077 }, { "epoch": 2.41, "learning_rate": 1.7761835593774655e-05, "loss": 1.4306, "step": 3078 }, { "epoch": 2.41, "learning_rate": 1.7760238701224645e-05, "loss": 1.6271, "step": 3079 }, { "epoch": 2.41, "learning_rate": 1.7758641311039235e-05, "loss": 1.1114, "step": 3080 }, { "epoch": 2.41, "learning_rate": 1.7757043423320854e-05, "loss": 1.5807, "step": 3081 }, { "epoch": 2.41, "learning_rate": 1.775544503817197e-05, "loss": 1.4645, "step": 3082 }, { "epoch": 2.41, "learning_rate": 1.7753846155695086e-05, "loss": 1.2616, "step": 3083 }, { "epoch": 2.41, "learning_rate": 1.7752246775992726e-05, "loss": 1.5803, "step": 3084 }, { "epoch": 2.41, "learning_rate": 1.775064689916746e-05, "loss": 1.3675, "step": 3085 }, { "epoch": 2.41, "learning_rate": 1.7749046525321873e-05, "loss": 1.4128, "step": 3086 }, { "epoch": 2.41, "learning_rate": 1.77474456545586e-05, "loss": 1.4751, "step": 3087 }, { "epoch": 2.41, "learning_rate": 1.7745844286980294e-05, "loss": 1.5989, "step": 3088 }, { "epoch": 2.41, "learning_rate": 1.7744242422689644e-05, "loss": 1.4074, "step": 3089 }, { "epoch": 2.42, "learning_rate": 1.7742640061789374e-05, "loss": 1.5435, "step": 3090 }, { "epoch": 2.42, "learning_rate": 1.7741037204382236e-05, "loss": 1.5015, "step": 3091 }, { "epoch": 2.42, "learning_rate": 1.7739433850571016e-05, "loss": 1.6155, "step": 3092 }, { "epoch": 2.42, "learning_rate": 1.773783000045853e-05, "loss": 1.4371, "step": 3093 }, { "epoch": 2.42, "learning_rate": 1.773622565414763e-05, "loss": 1.4907, "step": 3094 }, { "epoch": 2.42, "learning_rate": 1.7734620811741197e-05, "loss": 1.6364, "step": 3095 }, { "epoch": 2.42, "learning_rate": 1.7733015473342135e-05, "loss": 1.2152, "step": 3096 }, { "epoch": 2.42, "learning_rate": 1.77314096390534e-05, "loss": 1.4019, "step": 3097 }, { "epoch": 2.42, "learning_rate": 1.7729803308977964e-05, "loss": 1.6248, "step": 3098 }, { "epoch": 2.42, "learning_rate": 1.7728196483218832e-05, "loss": 1.6384, "step": 3099 }, { "epoch": 2.42, "learning_rate": 1.7726589161879045e-05, "loss": 1.2605, "step": 3100 }, { "epoch": 2.42, "learning_rate": 1.772498134506168e-05, "loss": 1.8122, "step": 3101 }, { "epoch": 2.42, "learning_rate": 1.7723373032869832e-05, "loss": 1.3932, "step": 3102 }, { "epoch": 2.43, "learning_rate": 1.772176422540664e-05, "loss": 1.4383, "step": 3103 }, { "epoch": 2.43, "learning_rate": 1.772015492277527e-05, "loss": 1.1257, "step": 3104 }, { "epoch": 2.43, "learning_rate": 1.7718545125078922e-05, "loss": 1.3744, "step": 3105 }, { "epoch": 2.43, "learning_rate": 1.7716934832420825e-05, "loss": 1.3198, "step": 3106 }, { "epoch": 2.43, "learning_rate": 1.7715324044904244e-05, "loss": 1.3703, "step": 3107 }, { "epoch": 2.43, "learning_rate": 1.771371276263247e-05, "loss": 1.4859, "step": 3108 }, { "epoch": 2.43, "learning_rate": 1.7712100985708825e-05, "loss": 1.4334, "step": 3109 }, { "epoch": 2.43, "learning_rate": 1.7710488714236677e-05, "loss": 1.4669, "step": 3110 }, { "epoch": 2.43, "learning_rate": 1.770887594831941e-05, "loss": 1.2367, "step": 3111 }, { "epoch": 2.43, "learning_rate": 1.7707262688060437e-05, "loss": 1.3878, "step": 3112 }, { "epoch": 2.43, "learning_rate": 1.770564893356322e-05, "loss": 1.3158, "step": 3113 }, { "epoch": 2.43, "learning_rate": 1.770403468493124e-05, "loss": 1.4723, "step": 3114 }, { "epoch": 2.43, "learning_rate": 1.7702419942268013e-05, "loss": 1.5684, "step": 3115 }, { "epoch": 2.44, "learning_rate": 1.7700804705677083e-05, "loss": 1.5033, "step": 3116 }, { "epoch": 2.44, "learning_rate": 1.7699188975262035e-05, "loss": 1.5042, "step": 3117 }, { "epoch": 2.44, "learning_rate": 1.7697572751126478e-05, "loss": 1.6109, "step": 3118 }, { "epoch": 2.44, "learning_rate": 1.769595603337405e-05, "loss": 1.4067, "step": 3119 }, { "epoch": 2.44, "learning_rate": 1.7694338822108432e-05, "loss": 1.4726, "step": 3120 }, { "epoch": 2.44, "learning_rate": 1.7692721117433324e-05, "loss": 1.5648, "step": 3121 }, { "epoch": 2.44, "learning_rate": 1.7691102919452468e-05, "loss": 1.5282, "step": 3122 }, { "epoch": 2.44, "learning_rate": 1.768948422826963e-05, "loss": 1.4465, "step": 3123 }, { "epoch": 2.44, "learning_rate": 1.7687865043988615e-05, "loss": 1.5455, "step": 3124 }, { "epoch": 2.44, "learning_rate": 1.7686245366713248e-05, "loss": 1.7333, "step": 3125 }, { "epoch": 2.44, "learning_rate": 1.7684625196547397e-05, "loss": 1.3964, "step": 3126 }, { "epoch": 2.44, "learning_rate": 1.768300453359496e-05, "loss": 1.6165, "step": 3127 }, { "epoch": 2.44, "learning_rate": 1.7681383377959857e-05, "loss": 1.2362, "step": 3128 }, { "epoch": 2.45, "learning_rate": 1.7679761729746053e-05, "loss": 1.3652, "step": 3129 }, { "epoch": 2.45, "learning_rate": 1.7678139589057536e-05, "loss": 1.3107, "step": 3130 }, { "epoch": 2.45, "learning_rate": 1.767651695599833e-05, "loss": 1.5158, "step": 3131 }, { "epoch": 2.45, "learning_rate": 1.7674893830672486e-05, "loss": 1.4914, "step": 3132 }, { "epoch": 2.45, "learning_rate": 1.7673270213184092e-05, "loss": 1.3567, "step": 3133 }, { "epoch": 2.45, "learning_rate": 1.7671646103637257e-05, "loss": 1.3566, "step": 3134 }, { "epoch": 2.45, "learning_rate": 1.767002150213614e-05, "loss": 1.3821, "step": 3135 }, { "epoch": 2.45, "learning_rate": 1.7668396408784915e-05, "loss": 1.2736, "step": 3136 }, { "epoch": 2.45, "learning_rate": 1.766677082368779e-05, "loss": 1.1608, "step": 3137 }, { "epoch": 2.45, "learning_rate": 1.7665144746949013e-05, "loss": 1.3544, "step": 3138 }, { "epoch": 2.45, "learning_rate": 1.766351817867286e-05, "loss": 1.5156, "step": 3139 }, { "epoch": 2.45, "learning_rate": 1.766189111896363e-05, "loss": 1.6151, "step": 3140 }, { "epoch": 2.45, "learning_rate": 1.766026356792566e-05, "loss": 1.3502, "step": 3141 }, { "epoch": 2.46, "learning_rate": 1.7658635525663327e-05, "loss": 1.6302, "step": 3142 }, { "epoch": 2.46, "learning_rate": 1.7657006992281026e-05, "loss": 1.4459, "step": 3143 }, { "epoch": 2.46, "learning_rate": 1.765537796788319e-05, "loss": 1.1806, "step": 3144 }, { "epoch": 2.46, "learning_rate": 1.7653748452574282e-05, "loss": 1.3837, "step": 3145 }, { "epoch": 2.46, "learning_rate": 1.7652118446458794e-05, "loss": 1.3367, "step": 3146 }, { "epoch": 2.46, "learning_rate": 1.765048794964126e-05, "loss": 1.5452, "step": 3147 }, { "epoch": 2.46, "learning_rate": 1.764885696222623e-05, "loss": 1.4418, "step": 3148 }, { "epoch": 2.46, "learning_rate": 1.7647225484318292e-05, "loss": 1.3976, "step": 3149 }, { "epoch": 2.46, "learning_rate": 1.7645593516022077e-05, "loss": 1.6811, "step": 3150 }, { "epoch": 2.46, "learning_rate": 1.7643961057442226e-05, "loss": 1.6282, "step": 3151 }, { "epoch": 2.46, "learning_rate": 1.764232810868343e-05, "loss": 1.4337, "step": 3152 }, { "epoch": 2.46, "learning_rate": 1.76406946698504e-05, "loss": 1.4298, "step": 3153 }, { "epoch": 2.47, "learning_rate": 1.7639060741047888e-05, "loss": 1.5025, "step": 3154 }, { "epoch": 2.47, "learning_rate": 1.763742632238066e-05, "loss": 1.3831, "step": 3155 }, { "epoch": 2.47, "learning_rate": 1.7635791413953536e-05, "loss": 1.6223, "step": 3156 }, { "epoch": 2.47, "learning_rate": 1.7634156015871352e-05, "loss": 1.5687, "step": 3157 }, { "epoch": 2.47, "learning_rate": 1.763252012823898e-05, "loss": 1.2285, "step": 3158 }, { "epoch": 2.47, "learning_rate": 1.763088375116133e-05, "loss": 1.5901, "step": 3159 }, { "epoch": 2.47, "learning_rate": 1.7629246884743328e-05, "loss": 1.4967, "step": 3160 }, { "epoch": 2.47, "learning_rate": 1.7627609529089944e-05, "loss": 1.6069, "step": 3161 }, { "epoch": 2.47, "learning_rate": 1.7625971684306176e-05, "loss": 1.3656, "step": 3162 }, { "epoch": 2.47, "learning_rate": 1.762433335049705e-05, "loss": 1.8891, "step": 3163 }, { "epoch": 2.47, "learning_rate": 1.762269452776763e-05, "loss": 1.4889, "step": 3164 }, { "epoch": 2.47, "learning_rate": 1.7621055216223005e-05, "loss": 1.3354, "step": 3165 }, { "epoch": 2.47, "learning_rate": 1.76194154159683e-05, "loss": 1.3039, "step": 3166 }, { "epoch": 2.48, "learning_rate": 1.761777512710867e-05, "loss": 1.3673, "step": 3167 }, { "epoch": 2.48, "learning_rate": 1.7616134349749298e-05, "loss": 1.3577, "step": 3168 }, { "epoch": 2.48, "learning_rate": 1.7614493083995402e-05, "loss": 1.2908, "step": 3169 }, { "epoch": 2.48, "learning_rate": 1.7612851329952226e-05, "loss": 1.3505, "step": 3170 }, { "epoch": 2.48, "learning_rate": 1.7611209087725057e-05, "loss": 1.4789, "step": 3171 }, { "epoch": 2.48, "learning_rate": 1.7609566357419205e-05, "loss": 1.5051, "step": 3172 }, { "epoch": 2.48, "learning_rate": 1.7607923139140006e-05, "loss": 1.4786, "step": 3173 }, { "epoch": 2.48, "learning_rate": 1.760627943299284e-05, "loss": 1.2671, "step": 3174 }, { "epoch": 2.48, "learning_rate": 1.7604635239083113e-05, "loss": 1.5519, "step": 3175 }, { "epoch": 2.48, "learning_rate": 1.7602990557516252e-05, "loss": 1.6183, "step": 3176 }, { "epoch": 2.48, "learning_rate": 1.7601345388397732e-05, "loss": 1.3414, "step": 3177 }, { "epoch": 2.48, "learning_rate": 1.759969973183305e-05, "loss": 1.394, "step": 3178 }, { "epoch": 2.48, "learning_rate": 1.7598053587927735e-05, "loss": 1.3157, "step": 3179 }, { "epoch": 2.49, "learning_rate": 1.7596406956787343e-05, "loss": 1.5436, "step": 3180 }, { "epoch": 2.49, "learning_rate": 1.759475983851748e-05, "loss": 1.4778, "step": 3181 }, { "epoch": 2.49, "learning_rate": 1.7593112233223758e-05, "loss": 1.5032, "step": 3182 }, { "epoch": 2.49, "learning_rate": 1.7591464141011833e-05, "loss": 1.2706, "step": 3183 }, { "epoch": 2.49, "learning_rate": 1.7589815561987397e-05, "loss": 1.3988, "step": 3184 }, { "epoch": 2.49, "learning_rate": 1.758816649625616e-05, "loss": 1.3689, "step": 3185 }, { "epoch": 2.49, "learning_rate": 1.7586516943923876e-05, "loss": 1.3555, "step": 3186 }, { "epoch": 2.49, "learning_rate": 1.7584866905096323e-05, "loss": 1.6827, "step": 3187 }, { "epoch": 2.49, "learning_rate": 1.758321637987931e-05, "loss": 1.3853, "step": 3188 }, { "epoch": 2.49, "learning_rate": 1.758156536837868e-05, "loss": 1.2079, "step": 3189 }, { "epoch": 2.49, "learning_rate": 1.7579913870700308e-05, "loss": 1.5199, "step": 3190 }, { "epoch": 2.49, "learning_rate": 1.75782618869501e-05, "loss": 1.5029, "step": 3191 }, { "epoch": 2.49, "learning_rate": 1.7576609417233988e-05, "loss": 1.4529, "step": 3192 }, { "epoch": 2.5, "learning_rate": 1.7574956461657937e-05, "loss": 1.5604, "step": 3193 }, { "epoch": 2.5, "learning_rate": 1.757330302032795e-05, "loss": 1.4237, "step": 3194 }, { "epoch": 2.5, "learning_rate": 1.757164909335005e-05, "loss": 1.3981, "step": 3195 }, { "epoch": 2.5, "learning_rate": 1.7569994680830307e-05, "loss": 1.5134, "step": 3196 }, { "epoch": 2.5, "learning_rate": 1.7568339782874805e-05, "loss": 1.437, "step": 3197 }, { "epoch": 2.5, "learning_rate": 1.7566684399589665e-05, "loss": 1.5925, "step": 3198 }, { "epoch": 2.5, "learning_rate": 1.7565028531081044e-05, "loss": 1.4992, "step": 3199 }, { "epoch": 2.5, "learning_rate": 1.7563372177455126e-05, "loss": 1.4233, "step": 3200 }, { "epoch": 2.5, "learning_rate": 1.7561715338818128e-05, "loss": 1.2639, "step": 3201 }, { "epoch": 2.5, "learning_rate": 1.7560058015276293e-05, "loss": 1.3196, "step": 3202 }, { "epoch": 2.5, "learning_rate": 1.7558400206935907e-05, "loss": 1.1377, "step": 3203 }, { "epoch": 2.5, "learning_rate": 1.755674191390327e-05, "loss": 1.2857, "step": 3204 }, { "epoch": 2.5, "learning_rate": 1.755508313628473e-05, "loss": 1.1643, "step": 3205 }, { "epoch": 2.51, "learning_rate": 1.755342387418665e-05, "loss": 1.4321, "step": 3206 }, { "epoch": 2.51, "learning_rate": 1.755176412771544e-05, "loss": 1.5749, "step": 3207 }, { "epoch": 2.51, "learning_rate": 1.7550103896977528e-05, "loss": 1.5755, "step": 3208 }, { "epoch": 2.51, "learning_rate": 1.7548443182079382e-05, "loss": 1.3514, "step": 3209 }, { "epoch": 2.51, "learning_rate": 1.7546781983127497e-05, "loss": 1.5846, "step": 3210 }, { "epoch": 2.51, "learning_rate": 1.7545120300228396e-05, "loss": 1.5051, "step": 3211 }, { "epoch": 2.51, "learning_rate": 1.7543458133488643e-05, "loss": 1.3775, "step": 3212 }, { "epoch": 2.51, "learning_rate": 1.7541795483014817e-05, "loss": 1.5201, "step": 3213 }, { "epoch": 2.51, "learning_rate": 1.754013234891355e-05, "loss": 1.2364, "step": 3214 }, { "epoch": 2.51, "learning_rate": 1.7538468731291486e-05, "loss": 1.4332, "step": 3215 }, { "epoch": 2.51, "learning_rate": 1.75368046302553e-05, "loss": 1.2935, "step": 3216 }, { "epoch": 2.51, "learning_rate": 1.7535140045911717e-05, "loss": 1.4986, "step": 3217 }, { "epoch": 2.52, "learning_rate": 1.753347497836748e-05, "loss": 1.7259, "step": 3218 }, { "epoch": 2.52, "learning_rate": 1.753180942772935e-05, "loss": 1.4291, "step": 3219 }, { "epoch": 2.52, "learning_rate": 1.7530143394104143e-05, "loss": 1.3513, "step": 3220 }, { "epoch": 2.52, "learning_rate": 1.7528476877598698e-05, "loss": 1.4822, "step": 3221 }, { "epoch": 2.52, "learning_rate": 1.7526809878319872e-05, "loss": 1.4036, "step": 3222 }, { "epoch": 2.52, "learning_rate": 1.7525142396374574e-05, "loss": 1.4661, "step": 3223 }, { "epoch": 2.52, "learning_rate": 1.7523474431869728e-05, "loss": 1.3459, "step": 3224 }, { "epoch": 2.52, "learning_rate": 1.75218059849123e-05, "loss": 1.3962, "step": 3225 }, { "epoch": 2.52, "learning_rate": 1.7520137055609275e-05, "loss": 1.4991, "step": 3226 }, { "epoch": 2.52, "learning_rate": 1.7518467644067673e-05, "loss": 1.3752, "step": 3227 }, { "epoch": 2.52, "learning_rate": 1.7516797750394555e-05, "loss": 1.5378, "step": 3228 }, { "epoch": 2.52, "learning_rate": 1.7515127374697006e-05, "loss": 1.6128, "step": 3229 }, { "epoch": 2.52, "learning_rate": 1.751345651708213e-05, "loss": 1.4072, "step": 3230 }, { "epoch": 2.53, "learning_rate": 1.7511785177657083e-05, "loss": 1.1841, "step": 3231 }, { "epoch": 2.53, "learning_rate": 1.7510113356529035e-05, "loss": 1.3906, "step": 3232 }, { "epoch": 2.53, "learning_rate": 1.7508441053805202e-05, "loss": 1.4687, "step": 3233 }, { "epoch": 2.53, "learning_rate": 1.7506768269592812e-05, "loss": 1.4954, "step": 3234 }, { "epoch": 2.53, "learning_rate": 1.7505095003999147e-05, "loss": 1.4145, "step": 3235 }, { "epoch": 2.53, "learning_rate": 1.7503421257131498e-05, "loss": 1.3541, "step": 3236 }, { "epoch": 2.53, "learning_rate": 1.7501747029097195e-05, "loss": 1.5557, "step": 3237 }, { "epoch": 2.53, "learning_rate": 1.7500072320003605e-05, "loss": 1.5549, "step": 3238 }, { "epoch": 2.53, "learning_rate": 1.7498397129958124e-05, "loss": 1.4054, "step": 3239 }, { "epoch": 2.53, "learning_rate": 1.7496721459068166e-05, "loss": 1.5833, "step": 3240 }, { "epoch": 2.53, "learning_rate": 1.7495045307441194e-05, "loss": 1.4194, "step": 3241 }, { "epoch": 2.53, "learning_rate": 1.749336867518469e-05, "loss": 1.2216, "step": 3242 }, { "epoch": 2.53, "learning_rate": 1.7491691562406173e-05, "loss": 1.4215, "step": 3243 }, { "epoch": 2.54, "learning_rate": 1.7490013969213183e-05, "loss": 1.2858, "step": 3244 }, { "epoch": 2.54, "learning_rate": 1.7488335895713305e-05, "loss": 1.6179, "step": 3245 }, { "epoch": 2.54, "learning_rate": 1.7486657342014146e-05, "loss": 1.5231, "step": 3246 }, { "epoch": 2.54, "learning_rate": 1.7484978308223342e-05, "loss": 1.4397, "step": 3247 }, { "epoch": 2.54, "learning_rate": 1.7483298794448566e-05, "loss": 1.4209, "step": 3248 }, { "epoch": 2.54, "learning_rate": 1.7481618800797523e-05, "loss": 1.3117, "step": 3249 }, { "epoch": 2.54, "learning_rate": 1.7479938327377935e-05, "loss": 1.5725, "step": 3250 }, { "epoch": 2.54, "learning_rate": 1.7478257374297575e-05, "loss": 1.2829, "step": 3251 }, { "epoch": 2.54, "learning_rate": 1.747657594166423e-05, "loss": 1.5706, "step": 3252 }, { "epoch": 2.54, "learning_rate": 1.7474894029585724e-05, "loss": 1.4531, "step": 3253 }, { "epoch": 2.54, "learning_rate": 1.7473211638169914e-05, "loss": 1.3396, "step": 3254 }, { "epoch": 2.54, "learning_rate": 1.7471528767524682e-05, "loss": 1.5817, "step": 3255 }, { "epoch": 2.54, "learning_rate": 1.7469845417757954e-05, "loss": 1.1275, "step": 3256 }, { "epoch": 2.55, "learning_rate": 1.7468161588977665e-05, "loss": 1.6897, "step": 3257 }, { "epoch": 2.55, "learning_rate": 1.74664772812918e-05, "loss": 1.5706, "step": 3258 }, { "epoch": 2.55, "learning_rate": 1.746479249480836e-05, "loss": 1.514, "step": 3259 }, { "epoch": 2.55, "learning_rate": 1.7463107229635397e-05, "loss": 1.4126, "step": 3260 }, { "epoch": 2.55, "learning_rate": 1.7461421485880968e-05, "loss": 1.3405, "step": 3261 }, { "epoch": 2.55, "learning_rate": 1.745973526365318e-05, "loss": 1.6746, "step": 3262 }, { "epoch": 2.55, "learning_rate": 1.7458048563060167e-05, "loss": 1.4623, "step": 3263 }, { "epoch": 2.55, "learning_rate": 1.745636138421008e-05, "loss": 1.7845, "step": 3264 }, { "epoch": 2.55, "learning_rate": 1.745467372721112e-05, "loss": 1.624, "step": 3265 }, { "epoch": 2.55, "learning_rate": 1.745298559217151e-05, "loss": 1.2212, "step": 3266 }, { "epoch": 2.55, "learning_rate": 1.7451296979199502e-05, "loss": 1.5203, "step": 3267 }, { "epoch": 2.55, "learning_rate": 1.7449607888403384e-05, "loss": 1.3063, "step": 3268 }, { "epoch": 2.55, "learning_rate": 1.7447918319891464e-05, "loss": 1.487, "step": 3269 }, { "epoch": 2.56, "learning_rate": 1.7446228273772093e-05, "loss": 1.3971, "step": 3270 }, { "epoch": 2.56, "learning_rate": 1.7444537750153645e-05, "loss": 1.5161, "step": 3271 }, { "epoch": 2.56, "learning_rate": 1.744284674914453e-05, "loss": 1.6229, "step": 3272 }, { "epoch": 2.56, "learning_rate": 1.7441155270853183e-05, "loss": 1.4861, "step": 3273 }, { "epoch": 2.56, "learning_rate": 1.7439463315388073e-05, "loss": 1.2532, "step": 3274 }, { "epoch": 2.56, "learning_rate": 1.74377708828577e-05, "loss": 1.4042, "step": 3275 }, { "epoch": 2.56, "learning_rate": 1.7436077973370592e-05, "loss": 1.4295, "step": 3276 }, { "epoch": 2.56, "learning_rate": 1.7434384587035306e-05, "loss": 1.5248, "step": 3277 }, { "epoch": 2.56, "learning_rate": 1.743269072396044e-05, "loss": 1.351, "step": 3278 }, { "epoch": 2.56, "learning_rate": 1.743099638425461e-05, "loss": 1.6901, "step": 3279 }, { "epoch": 2.56, "learning_rate": 1.742930156802647e-05, "loss": 1.7789, "step": 3280 }, { "epoch": 2.56, "learning_rate": 1.7427606275384698e-05, "loss": 1.4313, "step": 3281 }, { "epoch": 2.57, "learning_rate": 1.7425910506438014e-05, "loss": 1.501, "step": 3282 }, { "epoch": 2.57, "learning_rate": 1.7424214261295156e-05, "loss": 1.3138, "step": 3283 }, { "epoch": 2.57, "learning_rate": 1.7422517540064898e-05, "loss": 1.3379, "step": 3284 }, { "epoch": 2.57, "learning_rate": 1.7420820342856046e-05, "loss": 1.6325, "step": 3285 }, { "epoch": 2.57, "learning_rate": 1.7419122669777436e-05, "loss": 1.3415, "step": 3286 }, { "epoch": 2.57, "learning_rate": 1.7417424520937933e-05, "loss": 1.4791, "step": 3287 }, { "epoch": 2.57, "learning_rate": 1.7415725896446432e-05, "loss": 1.4875, "step": 3288 }, { "epoch": 2.57, "learning_rate": 1.741402679641186e-05, "loss": 1.5753, "step": 3289 }, { "epoch": 2.57, "learning_rate": 1.7412327220943174e-05, "loss": 1.3403, "step": 3290 }, { "epoch": 2.57, "learning_rate": 1.741062717014936e-05, "loss": 1.6553, "step": 3291 }, { "epoch": 2.57, "learning_rate": 1.7408926644139435e-05, "loss": 1.5716, "step": 3292 }, { "epoch": 2.57, "learning_rate": 1.7407225643022453e-05, "loss": 1.693, "step": 3293 }, { "epoch": 2.57, "learning_rate": 1.7405524166907487e-05, "loss": 1.1355, "step": 3294 }, { "epoch": 2.58, "learning_rate": 1.740382221590365e-05, "loss": 1.3666, "step": 3295 }, { "epoch": 2.58, "learning_rate": 1.7402119790120085e-05, "loss": 1.672, "step": 3296 }, { "epoch": 2.58, "learning_rate": 1.7400416889665956e-05, "loss": 1.292, "step": 3297 }, { "epoch": 2.58, "learning_rate": 1.739871351465046e-05, "loss": 1.2603, "step": 3298 }, { "epoch": 2.58, "learning_rate": 1.739700966518284e-05, "loss": 1.4234, "step": 3299 }, { "epoch": 2.58, "learning_rate": 1.7395305341372353e-05, "loss": 1.6262, "step": 3300 }, { "epoch": 2.58, "learning_rate": 1.7393600543328287e-05, "loss": 1.47, "step": 3301 }, { "epoch": 2.58, "learning_rate": 1.7391895271159964e-05, "loss": 1.2698, "step": 3302 }, { "epoch": 2.58, "learning_rate": 1.7390189524976745e-05, "loss": 1.3927, "step": 3303 }, { "epoch": 2.58, "learning_rate": 1.7388483304888003e-05, "loss": 1.5132, "step": 3304 }, { "epoch": 2.58, "learning_rate": 1.7386776611003156e-05, "loss": 1.3565, "step": 3305 }, { "epoch": 2.58, "learning_rate": 1.7385069443431654e-05, "loss": 1.137, "step": 3306 }, { "epoch": 2.58, "learning_rate": 1.738336180228296e-05, "loss": 1.378, "step": 3307 }, { "epoch": 2.59, "learning_rate": 1.738165368766659e-05, "loss": 1.7959, "step": 3308 }, { "epoch": 2.59, "learning_rate": 1.7379945099692067e-05, "loss": 1.5665, "step": 3309 }, { "epoch": 2.59, "learning_rate": 1.7378236038468968e-05, "loss": 1.4187, "step": 3310 }, { "epoch": 2.59, "learning_rate": 1.7376526504106885e-05, "loss": 1.479, "step": 3311 }, { "epoch": 2.59, "learning_rate": 1.7374816496715437e-05, "loss": 1.2997, "step": 3312 }, { "epoch": 2.59, "learning_rate": 1.7373106016404292e-05, "loss": 1.2448, "step": 3313 }, { "epoch": 2.59, "learning_rate": 1.7371395063283126e-05, "loss": 1.0736, "step": 3314 }, { "epoch": 2.59, "learning_rate": 1.7369683637461662e-05, "loss": 1.2241, "step": 3315 }, { "epoch": 2.59, "learning_rate": 1.736797173904965e-05, "loss": 1.3779, "step": 3316 }, { "epoch": 2.59, "learning_rate": 1.736625936815686e-05, "loss": 1.6185, "step": 3317 }, { "epoch": 2.59, "learning_rate": 1.736454652489311e-05, "loss": 1.4353, "step": 3318 }, { "epoch": 2.59, "learning_rate": 1.7362833209368228e-05, "loss": 1.1678, "step": 3319 }, { "epoch": 2.59, "learning_rate": 1.736111942169209e-05, "loss": 1.6103, "step": 3320 }, { "epoch": 2.6, "learning_rate": 1.735940516197459e-05, "loss": 1.3639, "step": 3321 }, { "epoch": 2.6, "learning_rate": 1.735769043032566e-05, "loss": 1.5427, "step": 3322 }, { "epoch": 2.6, "learning_rate": 1.735597522685526e-05, "loss": 1.5461, "step": 3323 }, { "epoch": 2.6, "learning_rate": 1.7354259551673377e-05, "loss": 1.4742, "step": 3324 }, { "epoch": 2.6, "learning_rate": 1.735254340489003e-05, "loss": 1.255, "step": 3325 }, { "epoch": 2.6, "learning_rate": 1.7350826786615274e-05, "loss": 1.2485, "step": 3326 }, { "epoch": 2.6, "learning_rate": 1.7349109696959184e-05, "loss": 1.5115, "step": 3327 }, { "epoch": 2.6, "learning_rate": 1.7347392136031876e-05, "loss": 1.3993, "step": 3328 }, { "epoch": 2.6, "learning_rate": 1.7345674103943487e-05, "loss": 1.3294, "step": 3329 }, { "epoch": 2.6, "learning_rate": 1.734395560080419e-05, "loss": 1.3746, "step": 3330 }, { "epoch": 2.6, "learning_rate": 1.7342236626724184e-05, "loss": 1.5678, "step": 3331 }, { "epoch": 2.6, "learning_rate": 1.7340517181813702e-05, "loss": 1.2961, "step": 3332 }, { "epoch": 2.6, "learning_rate": 1.7338797266183006e-05, "loss": 1.3772, "step": 3333 }, { "epoch": 2.61, "learning_rate": 1.7337076879942388e-05, "loss": 1.4576, "step": 3334 }, { "epoch": 2.61, "learning_rate": 1.7335356023202167e-05, "loss": 1.4459, "step": 3335 }, { "epoch": 2.61, "learning_rate": 1.73336346960727e-05, "loss": 1.1657, "step": 3336 }, { "epoch": 2.61, "learning_rate": 1.7331912898664364e-05, "loss": 1.5496, "step": 3337 }, { "epoch": 2.61, "learning_rate": 1.7330190631087577e-05, "loss": 1.6854, "step": 3338 }, { "epoch": 2.61, "learning_rate": 1.732846789345278e-05, "loss": 1.2741, "step": 3339 }, { "epoch": 2.61, "learning_rate": 1.7326744685870443e-05, "loss": 1.4995, "step": 3340 }, { "epoch": 2.61, "learning_rate": 1.732502100845107e-05, "loss": 1.5655, "step": 3341 }, { "epoch": 2.61, "learning_rate": 1.7323296861305198e-05, "loss": 1.7134, "step": 3342 }, { "epoch": 2.61, "learning_rate": 1.7321572244543383e-05, "loss": 1.4799, "step": 3343 }, { "epoch": 2.61, "learning_rate": 1.7319847158276222e-05, "loss": 1.3686, "step": 3344 }, { "epoch": 2.61, "learning_rate": 1.7318121602614343e-05, "loss": 1.4096, "step": 3345 }, { "epoch": 2.62, "learning_rate": 1.731639557766839e-05, "loss": 1.206, "step": 3346 }, { "epoch": 2.62, "learning_rate": 1.7314669083549057e-05, "loss": 1.6128, "step": 3347 }, { "epoch": 2.62, "learning_rate": 1.7312942120367048e-05, "loss": 1.4665, "step": 3348 }, { "epoch": 2.62, "learning_rate": 1.7311214688233114e-05, "loss": 1.334, "step": 3349 }, { "epoch": 2.62, "learning_rate": 1.7309486787258025e-05, "loss": 1.4494, "step": 3350 }, { "epoch": 2.62, "learning_rate": 1.7307758417552585e-05, "loss": 1.6093, "step": 3351 }, { "epoch": 2.62, "learning_rate": 1.730602957922763e-05, "loss": 1.4625, "step": 3352 }, { "epoch": 2.62, "learning_rate": 1.7304300272394022e-05, "loss": 1.6066, "step": 3353 }, { "epoch": 2.62, "learning_rate": 1.730257049716266e-05, "loss": 1.5426, "step": 3354 }, { "epoch": 2.62, "learning_rate": 1.7300840253644458e-05, "loss": 1.4575, "step": 3355 }, { "epoch": 2.62, "learning_rate": 1.729910954195038e-05, "loss": 1.2664, "step": 3356 }, { "epoch": 2.62, "learning_rate": 1.7297378362191405e-05, "loss": 1.6655, "step": 3357 }, { "epoch": 2.62, "learning_rate": 1.7295646714478546e-05, "loss": 1.7592, "step": 3358 }, { "epoch": 2.63, "learning_rate": 1.7293914598922854e-05, "loss": 1.5932, "step": 3359 }, { "epoch": 2.63, "learning_rate": 1.7292182015635396e-05, "loss": 1.4905, "step": 3360 }, { "epoch": 2.63, "learning_rate": 1.729044896472728e-05, "loss": 1.5558, "step": 3361 }, { "epoch": 2.63, "learning_rate": 1.728871544630964e-05, "loss": 1.4769, "step": 3362 }, { "epoch": 2.63, "learning_rate": 1.7286981460493636e-05, "loss": 1.4556, "step": 3363 }, { "epoch": 2.63, "learning_rate": 1.7285247007390468e-05, "loss": 1.5973, "step": 3364 }, { "epoch": 2.63, "learning_rate": 1.7283512087111353e-05, "loss": 1.4782, "step": 3365 }, { "epoch": 2.63, "learning_rate": 1.7281776699767554e-05, "loss": 1.8018, "step": 3366 }, { "epoch": 2.63, "learning_rate": 1.728004084547035e-05, "loss": 1.6084, "step": 3367 }, { "epoch": 2.63, "learning_rate": 1.7278304524331054e-05, "loss": 1.2677, "step": 3368 }, { "epoch": 2.63, "learning_rate": 1.7276567736461012e-05, "loss": 1.6211, "step": 3369 }, { "epoch": 2.63, "learning_rate": 1.7274830481971598e-05, "loss": 1.7145, "step": 3370 }, { "epoch": 2.63, "learning_rate": 1.727309276097421e-05, "loss": 1.1919, "step": 3371 }, { "epoch": 2.64, "learning_rate": 1.727135457358029e-05, "loss": 1.46, "step": 3372 }, { "epoch": 2.64, "learning_rate": 1.7269615919901297e-05, "loss": 1.5163, "step": 3373 }, { "epoch": 2.64, "learning_rate": 1.7267876800048723e-05, "loss": 1.3514, "step": 3374 }, { "epoch": 2.64, "learning_rate": 1.7266137214134095e-05, "loss": 1.5253, "step": 3375 }, { "epoch": 2.64, "learning_rate": 1.7264397162268967e-05, "loss": 1.1247, "step": 3376 }, { "epoch": 2.64, "learning_rate": 1.726265664456492e-05, "loss": 1.4783, "step": 3377 }, { "epoch": 2.64, "learning_rate": 1.7260915661133563e-05, "loss": 1.4304, "step": 3378 }, { "epoch": 2.64, "learning_rate": 1.725917421208654e-05, "loss": 1.3979, "step": 3379 }, { "epoch": 2.64, "learning_rate": 1.7257432297535535e-05, "loss": 1.1768, "step": 3380 }, { "epoch": 2.64, "learning_rate": 1.7255689917592237e-05, "loss": 1.1172, "step": 3381 }, { "epoch": 2.64, "learning_rate": 1.7253947072368384e-05, "loss": 1.7351, "step": 3382 }, { "epoch": 2.64, "learning_rate": 1.7252203761975737e-05, "loss": 1.3719, "step": 3383 }, { "epoch": 2.64, "learning_rate": 1.725045998652609e-05, "loss": 1.4855, "step": 3384 }, { "epoch": 2.65, "learning_rate": 1.724871574613126e-05, "loss": 1.4913, "step": 3385 }, { "epoch": 2.65, "learning_rate": 1.7246971040903102e-05, "loss": 1.5326, "step": 3386 }, { "epoch": 2.65, "learning_rate": 1.72452258709535e-05, "loss": 1.3116, "step": 3387 }, { "epoch": 2.65, "learning_rate": 1.724348023639436e-05, "loss": 1.2164, "step": 3388 }, { "epoch": 2.65, "learning_rate": 1.7241734137337628e-05, "loss": 1.4257, "step": 3389 }, { "epoch": 2.65, "learning_rate": 1.7239987573895273e-05, "loss": 1.5898, "step": 3390 }, { "epoch": 2.65, "learning_rate": 1.7238240546179296e-05, "loss": 1.6166, "step": 3391 }, { "epoch": 2.65, "learning_rate": 1.7236493054301725e-05, "loss": 1.7497, "step": 3392 }, { "epoch": 2.65, "learning_rate": 1.7234745098374623e-05, "loss": 1.475, "step": 3393 }, { "epoch": 2.65, "learning_rate": 1.723299667851008e-05, "loss": 1.2807, "step": 3394 }, { "epoch": 2.65, "learning_rate": 1.723124779482021e-05, "loss": 1.6131, "step": 3395 }, { "epoch": 2.65, "learning_rate": 1.722949844741717e-05, "loss": 1.1255, "step": 3396 }, { "epoch": 2.65, "learning_rate": 1.722774863641314e-05, "loss": 1.1473, "step": 3397 }, { "epoch": 2.66, "learning_rate": 1.722599836192032e-05, "loss": 1.5967, "step": 3398 }, { "epoch": 2.66, "learning_rate": 1.7224247624050957e-05, "loss": 1.4015, "step": 3399 }, { "epoch": 2.66, "learning_rate": 1.7222496422917315e-05, "loss": 1.4652, "step": 3400 }, { "epoch": 2.66, "learning_rate": 1.7220744758631693e-05, "loss": 1.5788, "step": 3401 }, { "epoch": 2.66, "learning_rate": 1.7218992631306416e-05, "loss": 1.5524, "step": 3402 }, { "epoch": 2.66, "learning_rate": 1.721724004105385e-05, "loss": 1.5263, "step": 3403 }, { "epoch": 2.66, "learning_rate": 1.721548698798637e-05, "loss": 1.4729, "step": 3404 }, { "epoch": 2.66, "learning_rate": 1.7213733472216407e-05, "loss": 1.3726, "step": 3405 }, { "epoch": 2.66, "learning_rate": 1.7211979493856397e-05, "loss": 1.554, "step": 3406 }, { "epoch": 2.66, "learning_rate": 1.7210225053018816e-05, "loss": 1.4534, "step": 3407 }, { "epoch": 2.66, "learning_rate": 1.7208470149816175e-05, "loss": 1.4727, "step": 3408 }, { "epoch": 2.66, "learning_rate": 1.7206714784361007e-05, "loss": 1.2167, "step": 3409 }, { "epoch": 2.67, "learning_rate": 1.7204958956765876e-05, "loss": 1.6852, "step": 3410 }, { "epoch": 2.67, "learning_rate": 1.720320266714338e-05, "loss": 1.5698, "step": 3411 }, { "epoch": 2.67, "learning_rate": 1.720144591560614e-05, "loss": 1.6098, "step": 3412 }, { "epoch": 2.67, "learning_rate": 1.7199688702266812e-05, "loss": 1.6366, "step": 3413 }, { "epoch": 2.67, "learning_rate": 1.719793102723808e-05, "loss": 1.3341, "step": 3414 }, { "epoch": 2.67, "learning_rate": 1.7196172890632653e-05, "loss": 1.3402, "step": 3415 }, { "epoch": 2.67, "learning_rate": 1.719441429256328e-05, "loss": 1.2618, "step": 3416 }, { "epoch": 2.67, "learning_rate": 1.719265523314273e-05, "loss": 1.5916, "step": 3417 }, { "epoch": 2.67, "learning_rate": 1.7190895712483805e-05, "loss": 1.3103, "step": 3418 }, { "epoch": 2.67, "learning_rate": 1.7189135730699335e-05, "loss": 1.3193, "step": 3419 }, { "epoch": 2.67, "learning_rate": 1.7187375287902188e-05, "loss": 1.4728, "step": 3420 }, { "epoch": 2.67, "learning_rate": 1.7185614384205246e-05, "loss": 1.2807, "step": 3421 }, { "epoch": 2.67, "learning_rate": 1.7183853019721432e-05, "loss": 1.7281, "step": 3422 }, { "epoch": 2.68, "learning_rate": 1.7182091194563696e-05, "loss": 1.7362, "step": 3423 }, { "epoch": 2.68, "learning_rate": 1.7180328908845023e-05, "loss": 1.5669, "step": 3424 }, { "epoch": 2.68, "learning_rate": 1.7178566162678412e-05, "loss": 1.6485, "step": 3425 }, { "epoch": 2.68, "learning_rate": 1.717680295617691e-05, "loss": 1.7366, "step": 3426 }, { "epoch": 2.68, "learning_rate": 1.7175039289453578e-05, "loss": 1.4756, "step": 3427 }, { "epoch": 2.68, "learning_rate": 1.717327516262152e-05, "loss": 1.3168, "step": 3428 }, { "epoch": 2.68, "learning_rate": 1.717151057579386e-05, "loss": 1.2657, "step": 3429 }, { "epoch": 2.68, "learning_rate": 1.7169745529083747e-05, "loss": 1.7623, "step": 3430 }, { "epoch": 2.68, "learning_rate": 1.716798002260438e-05, "loss": 1.4101, "step": 3431 }, { "epoch": 2.68, "learning_rate": 1.7166214056468966e-05, "loss": 1.4325, "step": 3432 }, { "epoch": 2.68, "learning_rate": 1.716444763079075e-05, "loss": 1.4368, "step": 3433 }, { "epoch": 2.68, "learning_rate": 1.7162680745683015e-05, "loss": 1.4462, "step": 3434 }, { "epoch": 2.68, "learning_rate": 1.7160913401259052e-05, "loss": 1.1304, "step": 3435 }, { "epoch": 2.69, "learning_rate": 1.7159145597632207e-05, "loss": 1.338, "step": 3436 }, { "epoch": 2.69, "learning_rate": 1.715737733491583e-05, "loss": 1.3342, "step": 3437 }, { "epoch": 2.69, "learning_rate": 1.7155608613223323e-05, "loss": 1.7451, "step": 3438 }, { "epoch": 2.69, "learning_rate": 1.7153839432668104e-05, "loss": 1.6444, "step": 3439 }, { "epoch": 2.69, "learning_rate": 1.7152069793363625e-05, "loss": 1.3809, "step": 3440 }, { "epoch": 2.69, "learning_rate": 1.715029969542336e-05, "loss": 1.6298, "step": 3441 }, { "epoch": 2.69, "learning_rate": 1.714852913896083e-05, "loss": 1.5931, "step": 3442 }, { "epoch": 2.69, "learning_rate": 1.7146758124089566e-05, "loss": 1.2974, "step": 3443 }, { "epoch": 2.69, "learning_rate": 1.714498665092314e-05, "loss": 1.4312, "step": 3444 }, { "epoch": 2.69, "learning_rate": 1.714321471957515e-05, "loss": 1.4215, "step": 3445 }, { "epoch": 2.69, "learning_rate": 1.714144233015922e-05, "loss": 1.7044, "step": 3446 }, { "epoch": 2.69, "learning_rate": 1.7139669482789013e-05, "loss": 1.497, "step": 3447 }, { "epoch": 2.69, "learning_rate": 1.7137896177578207e-05, "loss": 1.676, "step": 3448 }, { "epoch": 2.7, "learning_rate": 1.7136122414640524e-05, "loss": 1.4357, "step": 3449 }, { "epoch": 2.7, "learning_rate": 1.713434819408971e-05, "loss": 1.3492, "step": 3450 }, { "epoch": 2.7, "learning_rate": 1.713257351603953e-05, "loss": 1.5866, "step": 3451 }, { "epoch": 2.7, "learning_rate": 1.71307983806038e-05, "loss": 1.5243, "step": 3452 }, { "epoch": 2.7, "learning_rate": 1.712902278789634e-05, "loss": 1.5079, "step": 3453 }, { "epoch": 2.7, "learning_rate": 1.7127246738031025e-05, "loss": 1.376, "step": 3454 }, { "epoch": 2.7, "learning_rate": 1.7125470231121734e-05, "loss": 1.53, "step": 3455 }, { "epoch": 2.7, "learning_rate": 1.71236932672824e-05, "loss": 1.4603, "step": 3456 }, { "epoch": 2.7, "learning_rate": 1.7121915846626965e-05, "loss": 1.5007, "step": 3457 }, { "epoch": 2.7, "learning_rate": 1.7120137969269407e-05, "loss": 1.5832, "step": 3458 }, { "epoch": 2.7, "learning_rate": 1.7118359635323743e-05, "loss": 1.568, "step": 3459 }, { "epoch": 2.7, "learning_rate": 1.7116580844904e-05, "loss": 1.3325, "step": 3460 }, { "epoch": 2.7, "learning_rate": 1.711480159812426e-05, "loss": 1.4044, "step": 3461 }, { "epoch": 2.71, "learning_rate": 1.7113021895098602e-05, "loss": 1.562, "step": 3462 }, { "epoch": 2.71, "learning_rate": 1.711124173594117e-05, "loss": 1.5266, "step": 3463 }, { "epoch": 2.71, "learning_rate": 1.71094611207661e-05, "loss": 1.4762, "step": 3464 }, { "epoch": 2.71, "learning_rate": 1.7107680049687594e-05, "loss": 1.3843, "step": 3465 }, { "epoch": 2.71, "learning_rate": 1.710589852281985e-05, "loss": 1.4499, "step": 3466 }, { "epoch": 2.71, "learning_rate": 1.7104116540277128e-05, "loss": 1.5938, "step": 3467 }, { "epoch": 2.71, "learning_rate": 1.7102334102173685e-05, "loss": 1.2316, "step": 3468 }, { "epoch": 2.71, "learning_rate": 1.7100551208623825e-05, "loss": 1.8076, "step": 3469 }, { "epoch": 2.71, "learning_rate": 1.7098767859741887e-05, "loss": 1.5312, "step": 3470 }, { "epoch": 2.71, "learning_rate": 1.709698405564222e-05, "loss": 1.3313, "step": 3471 }, { "epoch": 2.71, "learning_rate": 1.7095199796439214e-05, "loss": 1.3163, "step": 3472 }, { "epoch": 2.71, "learning_rate": 1.7093415082247295e-05, "loss": 1.2791, "step": 3473 }, { "epoch": 2.72, "learning_rate": 1.70916299131809e-05, "loss": 1.5185, "step": 3474 }, { "epoch": 2.72, "learning_rate": 1.708984428935452e-05, "loss": 1.4312, "step": 3475 }, { "epoch": 2.72, "learning_rate": 1.708805821088264e-05, "loss": 1.2854, "step": 3476 }, { "epoch": 2.72, "learning_rate": 1.708627167787981e-05, "loss": 1.6759, "step": 3477 }, { "epoch": 2.72, "learning_rate": 1.7084484690460593e-05, "loss": 1.2274, "step": 3478 }, { "epoch": 2.72, "learning_rate": 1.708269724873957e-05, "loss": 1.3886, "step": 3479 }, { "epoch": 2.72, "learning_rate": 1.7080909352831377e-05, "loss": 1.418, "step": 3480 }, { "epoch": 2.72, "learning_rate": 1.707912100285066e-05, "loss": 1.4309, "step": 3481 }, { "epoch": 2.72, "learning_rate": 1.7077332198912095e-05, "loss": 1.35, "step": 3482 }, { "epoch": 2.72, "learning_rate": 1.7075542941130397e-05, "loss": 1.3529, "step": 3483 }, { "epoch": 2.72, "learning_rate": 1.70737532296203e-05, "loss": 1.3699, "step": 3484 }, { "epoch": 2.72, "learning_rate": 1.7071963064496576e-05, "loss": 1.322, "step": 3485 }, { "epoch": 2.72, "learning_rate": 1.707017244587402e-05, "loss": 1.4304, "step": 3486 }, { "epoch": 2.73, "learning_rate": 1.7068381373867457e-05, "loss": 1.3422, "step": 3487 }, { "epoch": 2.73, "learning_rate": 1.7066589848591742e-05, "loss": 1.3654, "step": 3488 }, { "epoch": 2.73, "learning_rate": 1.706479787016176e-05, "loss": 1.3336, "step": 3489 }, { "epoch": 2.73, "learning_rate": 1.7063005438692424e-05, "loss": 1.3177, "step": 3490 }, { "epoch": 2.73, "learning_rate": 1.706121255429867e-05, "loss": 1.5935, "step": 3491 }, { "epoch": 2.73, "learning_rate": 1.705941921709548e-05, "loss": 1.4127, "step": 3492 }, { "epoch": 2.73, "learning_rate": 1.7057625427197844e-05, "loss": 1.3487, "step": 3493 }, { "epoch": 2.73, "learning_rate": 1.7055831184720798e-05, "loss": 1.5739, "step": 3494 }, { "epoch": 2.73, "learning_rate": 1.7054036489779395e-05, "loss": 1.3941, "step": 3495 }, { "epoch": 2.73, "learning_rate": 1.7052241342488727e-05, "loss": 1.5081, "step": 3496 }, { "epoch": 2.73, "learning_rate": 1.7050445742963906e-05, "loss": 1.1632, "step": 3497 }, { "epoch": 2.73, "learning_rate": 1.7048649691320077e-05, "loss": 1.6702, "step": 3498 }, { "epoch": 2.73, "learning_rate": 1.704685318767242e-05, "loss": 1.5934, "step": 3499 }, { "epoch": 2.74, "learning_rate": 1.7045056232136135e-05, "loss": 1.394, "step": 3500 }, { "epoch": 2.74, "learning_rate": 1.7043258824826452e-05, "loss": 1.3793, "step": 3501 }, { "epoch": 2.74, "learning_rate": 1.7041460965858632e-05, "loss": 1.4775, "step": 3502 }, { "epoch": 2.74, "learning_rate": 1.7039662655347966e-05, "loss": 1.5268, "step": 3503 }, { "epoch": 2.74, "learning_rate": 1.7037863893409777e-05, "loss": 1.248, "step": 3504 }, { "epoch": 2.74, "learning_rate": 1.7036064680159406e-05, "loss": 1.5593, "step": 3505 }, { "epoch": 2.74, "learning_rate": 1.7034265015712232e-05, "loss": 1.3388, "step": 3506 }, { "epoch": 2.74, "learning_rate": 1.7032464900183667e-05, "loss": 1.4933, "step": 3507 }, { "epoch": 2.74, "learning_rate": 1.7030664333689135e-05, "loss": 1.537, "step": 3508 }, { "epoch": 2.74, "learning_rate": 1.702886331634411e-05, "loss": 1.5054, "step": 3509 }, { "epoch": 2.74, "learning_rate": 1.7027061848264077e-05, "loss": 1.3022, "step": 3510 }, { "epoch": 2.74, "learning_rate": 1.7025259929564564e-05, "loss": 1.3355, "step": 3511 }, { "epoch": 2.74, "learning_rate": 1.7023457560361114e-05, "loss": 1.2759, "step": 3512 }, { "epoch": 2.75, "learning_rate": 1.7021654740769314e-05, "loss": 1.2674, "step": 3513 }, { "epoch": 2.75, "learning_rate": 1.7019851470904764e-05, "loss": 1.2587, "step": 3514 }, { "epoch": 2.75, "learning_rate": 1.7018047750883107e-05, "loss": 1.3492, "step": 3515 }, { "epoch": 2.75, "learning_rate": 1.701624358082001e-05, "loss": 1.431, "step": 3516 }, { "epoch": 2.75, "learning_rate": 1.7014438960831165e-05, "loss": 1.5443, "step": 3517 }, { "epoch": 2.75, "learning_rate": 1.7012633891032295e-05, "loss": 1.4756, "step": 3518 }, { "epoch": 2.75, "learning_rate": 1.701082837153915e-05, "loss": 1.4969, "step": 3519 }, { "epoch": 2.75, "learning_rate": 1.7009022402467518e-05, "loss": 1.3438, "step": 3520 }, { "epoch": 2.75, "learning_rate": 1.70072159839332e-05, "loss": 1.5845, "step": 3521 }, { "epoch": 2.75, "learning_rate": 1.7005409116052048e-05, "loss": 1.5862, "step": 3522 }, { "epoch": 2.75, "learning_rate": 1.7003601798939918e-05, "loss": 1.6186, "step": 3523 }, { "epoch": 2.75, "learning_rate": 1.700179403271271e-05, "loss": 1.4968, "step": 3524 }, { "epoch": 2.75, "learning_rate": 1.6999985817486353e-05, "loss": 1.6283, "step": 3525 }, { "epoch": 2.76, "learning_rate": 1.6998177153376796e-05, "loss": 1.5131, "step": 3526 }, { "epoch": 2.76, "learning_rate": 1.6996368040500028e-05, "loss": 1.4925, "step": 3527 }, { "epoch": 2.76, "learning_rate": 1.6994558478972053e-05, "loss": 1.3975, "step": 3528 }, { "epoch": 2.76, "learning_rate": 1.6992748468908914e-05, "loss": 1.4364, "step": 3529 }, { "epoch": 2.76, "learning_rate": 1.6990938010426685e-05, "loss": 1.3879, "step": 3530 }, { "epoch": 2.76, "learning_rate": 1.6989127103641457e-05, "loss": 1.6057, "step": 3531 }, { "epoch": 2.76, "learning_rate": 1.6987315748669363e-05, "loss": 1.5327, "step": 3532 }, { "epoch": 2.76, "learning_rate": 1.698550394562656e-05, "loss": 1.5521, "step": 3533 }, { "epoch": 2.76, "learning_rate": 1.6983691694629223e-05, "loss": 1.3843, "step": 3534 }, { "epoch": 2.76, "learning_rate": 1.698187899579357e-05, "loss": 1.4148, "step": 3535 }, { "epoch": 2.76, "learning_rate": 1.6980065849235844e-05, "loss": 1.4189, "step": 3536 }, { "epoch": 2.76, "learning_rate": 1.6978252255072315e-05, "loss": 1.3566, "step": 3537 }, { "epoch": 2.77, "learning_rate": 1.6976438213419278e-05, "loss": 1.3984, "step": 3538 }, { "epoch": 2.77, "learning_rate": 1.6974623724393067e-05, "loss": 1.3762, "step": 3539 }, { "epoch": 2.77, "learning_rate": 1.6972808788110035e-05, "loss": 1.348, "step": 3540 }, { "epoch": 2.77, "learning_rate": 1.697099340468657e-05, "loss": 1.3077, "step": 3541 }, { "epoch": 2.77, "learning_rate": 1.696917757423908e-05, "loss": 1.4838, "step": 3542 }, { "epoch": 2.77, "learning_rate": 1.696736129688401e-05, "loss": 1.6523, "step": 3543 }, { "epoch": 2.77, "learning_rate": 1.6965544572737837e-05, "loss": 1.3612, "step": 3544 }, { "epoch": 2.77, "learning_rate": 1.696372740191705e-05, "loss": 1.5672, "step": 3545 }, { "epoch": 2.77, "learning_rate": 1.6961909784538188e-05, "loss": 1.3039, "step": 3546 }, { "epoch": 2.77, "learning_rate": 1.6960091720717803e-05, "loss": 1.2639, "step": 3547 }, { "epoch": 2.77, "learning_rate": 1.695827321057248e-05, "loss": 1.4978, "step": 3548 }, { "epoch": 2.77, "learning_rate": 1.6956454254218833e-05, "loss": 1.1157, "step": 3549 }, { "epoch": 2.77, "learning_rate": 1.6954634851773508e-05, "loss": 1.4342, "step": 3550 }, { "epoch": 2.78, "learning_rate": 1.6952815003353172e-05, "loss": 1.1797, "step": 3551 }, { "epoch": 2.78, "learning_rate": 1.6950994709074534e-05, "loss": 1.5053, "step": 3552 }, { "epoch": 2.78, "learning_rate": 1.6949173969054314e-05, "loss": 1.2906, "step": 3553 }, { "epoch": 2.78, "learning_rate": 1.694735278340927e-05, "loss": 1.5892, "step": 3554 }, { "epoch": 2.78, "learning_rate": 1.6945531152256192e-05, "loss": 1.4732, "step": 3555 }, { "epoch": 2.78, "learning_rate": 1.694370907571189e-05, "loss": 1.7514, "step": 3556 }, { "epoch": 2.78, "learning_rate": 1.6941886553893214e-05, "loss": 1.4462, "step": 3557 }, { "epoch": 2.78, "learning_rate": 1.694006358691703e-05, "loss": 1.6616, "step": 3558 }, { "epoch": 2.78, "learning_rate": 1.6938240174900238e-05, "loss": 1.4301, "step": 3559 }, { "epoch": 2.78, "learning_rate": 1.6936416317959766e-05, "loss": 1.6563, "step": 3560 }, { "epoch": 2.78, "learning_rate": 1.693459201621258e-05, "loss": 1.3751, "step": 3561 }, { "epoch": 2.78, "learning_rate": 1.6932767269775648e-05, "loss": 1.3948, "step": 3562 }, { "epoch": 2.78, "learning_rate": 1.6930942078766003e-05, "loss": 1.3281, "step": 3563 }, { "epoch": 2.79, "learning_rate": 1.692911644330068e-05, "loss": 1.5178, "step": 3564 }, { "epoch": 2.79, "learning_rate": 1.6927290363496748e-05, "loss": 1.3245, "step": 3565 }, { "epoch": 2.79, "learning_rate": 1.692546383947131e-05, "loss": 1.5785, "step": 3566 }, { "epoch": 2.79, "learning_rate": 1.6923636871341492e-05, "loss": 1.2594, "step": 3567 }, { "epoch": 2.79, "learning_rate": 1.692180945922445e-05, "loss": 1.5873, "step": 3568 }, { "epoch": 2.79, "learning_rate": 1.6919981603237375e-05, "loss": 1.3888, "step": 3569 }, { "epoch": 2.79, "learning_rate": 1.6918153303497473e-05, "loss": 1.3402, "step": 3570 }, { "epoch": 2.79, "learning_rate": 1.691632456012199e-05, "loss": 1.3903, "step": 3571 }, { "epoch": 2.79, "learning_rate": 1.6914495373228197e-05, "loss": 1.5648, "step": 3572 }, { "epoch": 2.79, "learning_rate": 1.6912665742933393e-05, "loss": 1.7759, "step": 3573 }, { "epoch": 2.79, "learning_rate": 1.6910835669354905e-05, "loss": 1.3518, "step": 3574 }, { "epoch": 2.79, "learning_rate": 1.6909005152610086e-05, "loss": 1.4516, "step": 3575 }, { "epoch": 2.79, "learning_rate": 1.6907174192816326e-05, "loss": 1.4513, "step": 3576 }, { "epoch": 2.8, "learning_rate": 1.6905342790091036e-05, "loss": 1.223, "step": 3577 }, { "epoch": 2.8, "learning_rate": 1.690351094455165e-05, "loss": 1.4452, "step": 3578 }, { "epoch": 2.8, "learning_rate": 1.6901678656315647e-05, "loss": 1.5459, "step": 3579 }, { "epoch": 2.8, "learning_rate": 1.6899845925500518e-05, "loss": 1.7484, "step": 3580 }, { "epoch": 2.8, "learning_rate": 1.6898012752223793e-05, "loss": 1.6993, "step": 3581 }, { "epoch": 2.8, "learning_rate": 1.6896179136603028e-05, "loss": 1.5524, "step": 3582 }, { "epoch": 2.8, "learning_rate": 1.68943450787558e-05, "loss": 1.785, "step": 3583 }, { "epoch": 2.8, "learning_rate": 1.6892510578799726e-05, "loss": 1.3681, "step": 3584 }, { "epoch": 2.8, "learning_rate": 1.6890675636852444e-05, "loss": 1.6188, "step": 3585 }, { "epoch": 2.8, "learning_rate": 1.688884025303162e-05, "loss": 1.3104, "step": 3586 }, { "epoch": 2.8, "learning_rate": 1.6887004427454957e-05, "loss": 1.3552, "step": 3587 }, { "epoch": 2.8, "learning_rate": 1.688516816024017e-05, "loss": 1.6204, "step": 3588 }, { "epoch": 2.81, "learning_rate": 1.688333145150502e-05, "loss": 1.4591, "step": 3589 }, { "epoch": 2.81, "learning_rate": 1.6881494301367283e-05, "loss": 1.5776, "step": 3590 }, { "epoch": 2.81, "learning_rate": 1.687965670994477e-05, "loss": 1.4709, "step": 3591 }, { "epoch": 2.81, "learning_rate": 1.6877818677355323e-05, "loss": 1.4619, "step": 3592 }, { "epoch": 2.81, "learning_rate": 1.6875980203716805e-05, "loss": 1.3182, "step": 3593 }, { "epoch": 2.81, "learning_rate": 1.687414128914711e-05, "loss": 1.5098, "step": 3594 }, { "epoch": 2.81, "learning_rate": 1.6872301933764158e-05, "loss": 1.632, "step": 3595 }, { "epoch": 2.81, "learning_rate": 1.687046213768591e-05, "loss": 1.5835, "step": 3596 }, { "epoch": 2.81, "learning_rate": 1.6868621901030333e-05, "loss": 1.2178, "step": 3597 }, { "epoch": 2.81, "learning_rate": 1.6866781223915444e-05, "loss": 1.4252, "step": 3598 }, { "epoch": 2.81, "learning_rate": 1.6864940106459273e-05, "loss": 1.7984, "step": 3599 }, { "epoch": 2.81, "learning_rate": 1.6863098548779886e-05, "loss": 1.3795, "step": 3600 }, { "epoch": 2.81, "learning_rate": 1.6861256550995376e-05, "loss": 1.2813, "step": 3601 }, { "epoch": 2.82, "learning_rate": 1.685941411322386e-05, "loss": 1.4043, "step": 3602 }, { "epoch": 2.82, "learning_rate": 1.6857571235583495e-05, "loss": 1.6143, "step": 3603 }, { "epoch": 2.82, "learning_rate": 1.6855727918192452e-05, "loss": 1.3838, "step": 3604 }, { "epoch": 2.82, "learning_rate": 1.6853884161168934e-05, "loss": 1.4485, "step": 3605 }, { "epoch": 2.82, "learning_rate": 1.685203996463118e-05, "loss": 1.2825, "step": 3606 }, { "epoch": 2.82, "learning_rate": 1.6850195328697444e-05, "loss": 1.551, "step": 3607 }, { "epoch": 2.82, "learning_rate": 1.6848350253486027e-05, "loss": 1.5051, "step": 3608 }, { "epoch": 2.82, "learning_rate": 1.6846504739115234e-05, "loss": 1.4725, "step": 3609 }, { "epoch": 2.82, "learning_rate": 1.6844658785703418e-05, "loss": 1.4641, "step": 3610 }, { "epoch": 2.82, "learning_rate": 1.6842812393368955e-05, "loss": 1.6316, "step": 3611 }, { "epoch": 2.82, "learning_rate": 1.684096556223024e-05, "loss": 1.5973, "step": 3612 }, { "epoch": 2.82, "learning_rate": 1.6839118292405713e-05, "loss": 1.4399, "step": 3613 }, { "epoch": 2.82, "learning_rate": 1.6837270584013825e-05, "loss": 1.4612, "step": 3614 }, { "epoch": 2.83, "learning_rate": 1.6835422437173066e-05, "loss": 1.2383, "step": 3615 }, { "epoch": 2.83, "learning_rate": 1.683357385200195e-05, "loss": 1.5678, "step": 3616 }, { "epoch": 2.83, "learning_rate": 1.683172482861902e-05, "loss": 1.4648, "step": 3617 }, { "epoch": 2.83, "learning_rate": 1.682987536714285e-05, "loss": 1.2756, "step": 3618 }, { "epoch": 2.83, "learning_rate": 1.682802546769203e-05, "loss": 1.6442, "step": 3619 }, { "epoch": 2.83, "learning_rate": 1.6826175130385198e-05, "loss": 1.4202, "step": 3620 }, { "epoch": 2.83, "learning_rate": 1.6824324355341004e-05, "loss": 1.3641, "step": 3621 }, { "epoch": 2.83, "learning_rate": 1.682247314267813e-05, "loss": 1.4595, "step": 3622 }, { "epoch": 2.83, "learning_rate": 1.6820621492515293e-05, "loss": 1.4607, "step": 3623 }, { "epoch": 2.83, "learning_rate": 1.681876940497123e-05, "loss": 1.3467, "step": 3624 }, { "epoch": 2.83, "learning_rate": 1.68169168801647e-05, "loss": 1.4764, "step": 3625 }, { "epoch": 2.83, "learning_rate": 1.6815063918214513e-05, "loss": 1.4301, "step": 3626 }, { "epoch": 2.83, "learning_rate": 1.6813210519239485e-05, "loss": 1.5154, "step": 3627 }, { "epoch": 2.84, "learning_rate": 1.6811356683358463e-05, "loss": 1.4725, "step": 3628 }, { "epoch": 2.84, "learning_rate": 1.6809502410690336e-05, "loss": 1.5591, "step": 3629 }, { "epoch": 2.84, "learning_rate": 1.6807647701354007e-05, "loss": 1.621, "step": 3630 }, { "epoch": 2.84, "learning_rate": 1.6805792555468414e-05, "loss": 1.8371, "step": 3631 }, { "epoch": 2.84, "learning_rate": 1.6803936973152516e-05, "loss": 1.5196, "step": 3632 }, { "epoch": 2.84, "learning_rate": 1.680208095452531e-05, "loss": 1.4668, "step": 3633 }, { "epoch": 2.84, "learning_rate": 1.680022449970581e-05, "loss": 1.5323, "step": 3634 }, { "epoch": 2.84, "learning_rate": 1.6798367608813067e-05, "loss": 1.2998, "step": 3635 }, { "epoch": 2.84, "learning_rate": 1.679651028196616e-05, "loss": 1.5815, "step": 3636 }, { "epoch": 2.84, "learning_rate": 1.6794652519284186e-05, "loss": 1.3115, "step": 3637 }, { "epoch": 2.84, "learning_rate": 1.679279432088628e-05, "loss": 1.4522, "step": 3638 }, { "epoch": 2.84, "learning_rate": 1.67909356868916e-05, "loss": 1.5995, "step": 3639 }, { "epoch": 2.84, "learning_rate": 1.6789076617419333e-05, "loss": 1.615, "step": 3640 }, { "epoch": 2.85, "learning_rate": 1.6787217112588697e-05, "loss": 1.5364, "step": 3641 }, { "epoch": 2.85, "learning_rate": 1.6785357172518933e-05, "loss": 1.3343, "step": 3642 }, { "epoch": 2.85, "learning_rate": 1.678349679732931e-05, "loss": 1.2247, "step": 3643 }, { "epoch": 2.85, "learning_rate": 1.678163598713913e-05, "loss": 1.2539, "step": 3644 }, { "epoch": 2.85, "learning_rate": 1.677977474206772e-05, "loss": 1.3197, "step": 3645 }, { "epoch": 2.85, "learning_rate": 1.6777913062234434e-05, "loss": 1.5157, "step": 3646 }, { "epoch": 2.85, "learning_rate": 1.6776050947758656e-05, "loss": 1.5741, "step": 3647 }, { "epoch": 2.85, "learning_rate": 1.6774188398759794e-05, "loss": 1.591, "step": 3648 }, { "epoch": 2.85, "learning_rate": 1.6772325415357287e-05, "loss": 1.3614, "step": 3649 }, { "epoch": 2.85, "learning_rate": 1.6770461997670603e-05, "loss": 1.3677, "step": 3650 }, { "epoch": 2.85, "learning_rate": 1.676859814581923e-05, "loss": 1.6991, "step": 3651 }, { "epoch": 2.85, "learning_rate": 1.6766733859922705e-05, "loss": 1.5199, "step": 3652 }, { "epoch": 2.86, "learning_rate": 1.676486914010056e-05, "loss": 1.5212, "step": 3653 }, { "epoch": 2.86, "learning_rate": 1.676300398647238e-05, "loss": 1.2599, "step": 3654 }, { "epoch": 2.86, "learning_rate": 1.6761138399157773e-05, "loss": 1.4145, "step": 3655 }, { "epoch": 2.86, "learning_rate": 1.6759272378276367e-05, "loss": 1.4719, "step": 3656 }, { "epoch": 2.86, "learning_rate": 1.675740592394783e-05, "loss": 1.4618, "step": 3657 }, { "epoch": 2.86, "learning_rate": 1.6755539036291844e-05, "loss": 1.6723, "step": 3658 }, { "epoch": 2.86, "learning_rate": 1.6753671715428126e-05, "loss": 1.4776, "step": 3659 }, { "epoch": 2.86, "learning_rate": 1.6751803961476425e-05, "loss": 1.5359, "step": 3660 }, { "epoch": 2.86, "learning_rate": 1.674993577455651e-05, "loss": 1.6311, "step": 3661 }, { "epoch": 2.86, "learning_rate": 1.6748067154788183e-05, "loss": 1.5273, "step": 3662 }, { "epoch": 2.86, "learning_rate": 1.674619810229127e-05, "loss": 1.2082, "step": 3663 }, { "epoch": 2.86, "learning_rate": 1.6744328617185622e-05, "loss": 1.6365, "step": 3664 }, { "epoch": 2.86, "learning_rate": 1.6742458699591135e-05, "loss": 1.7206, "step": 3665 }, { "epoch": 2.87, "learning_rate": 1.6740588349627703e-05, "loss": 1.3337, "step": 3666 }, { "epoch": 2.87, "learning_rate": 1.673871756741528e-05, "loss": 1.4514, "step": 3667 }, { "epoch": 2.87, "learning_rate": 1.673684635307382e-05, "loss": 1.4188, "step": 3668 }, { "epoch": 2.87, "learning_rate": 1.673497470672333e-05, "loss": 1.4893, "step": 3669 }, { "epoch": 2.87, "learning_rate": 1.6733102628483823e-05, "loss": 1.5154, "step": 3670 }, { "epoch": 2.87, "learning_rate": 1.6731230118475344e-05, "loss": 1.6108, "step": 3671 }, { "epoch": 2.87, "learning_rate": 1.672935717681798e-05, "loss": 1.7629, "step": 3672 }, { "epoch": 2.87, "learning_rate": 1.672748380363183e-05, "loss": 1.3262, "step": 3673 }, { "epoch": 2.87, "learning_rate": 1.6725609999037034e-05, "loss": 1.7014, "step": 3674 }, { "epoch": 2.87, "learning_rate": 1.6723735763153744e-05, "loss": 1.3754, "step": 3675 }, { "epoch": 2.87, "learning_rate": 1.672186109610215e-05, "loss": 1.4895, "step": 3676 }, { "epoch": 2.87, "learning_rate": 1.6719985998002463e-05, "loss": 1.5354, "step": 3677 }, { "epoch": 2.87, "learning_rate": 1.671811046897494e-05, "loss": 1.5158, "step": 3678 }, { "epoch": 2.88, "learning_rate": 1.6716234509139833e-05, "loss": 1.7096, "step": 3679 }, { "epoch": 2.88, "learning_rate": 1.6714358118617456e-05, "loss": 1.5978, "step": 3680 }, { "epoch": 2.88, "learning_rate": 1.671248129752813e-05, "loss": 1.3614, "step": 3681 }, { "epoch": 2.88, "learning_rate": 1.6710604045992205e-05, "loss": 1.5716, "step": 3682 }, { "epoch": 2.88, "learning_rate": 1.6708726364130065e-05, "loss": 1.2805, "step": 3683 }, { "epoch": 2.88, "learning_rate": 1.670684825206212e-05, "loss": 1.4903, "step": 3684 }, { "epoch": 2.88, "learning_rate": 1.6704969709908805e-05, "loss": 1.4302, "step": 3685 }, { "epoch": 2.88, "learning_rate": 1.670309073779058e-05, "loss": 1.4936, "step": 3686 }, { "epoch": 2.88, "learning_rate": 1.6701211335827945e-05, "loss": 1.4293, "step": 3687 }, { "epoch": 2.88, "learning_rate": 1.6699331504141416e-05, "loss": 1.432, "step": 3688 }, { "epoch": 2.88, "learning_rate": 1.6697451242851536e-05, "loss": 1.6781, "step": 3689 }, { "epoch": 2.88, "learning_rate": 1.6695570552078878e-05, "loss": 1.6174, "step": 3690 }, { "epoch": 2.88, "learning_rate": 1.669368943194405e-05, "loss": 1.329, "step": 3691 }, { "epoch": 2.89, "learning_rate": 1.669180788256768e-05, "loss": 1.5787, "step": 3692 }, { "epoch": 2.89, "learning_rate": 1.6689925904070424e-05, "loss": 1.3083, "step": 3693 }, { "epoch": 2.89, "learning_rate": 1.6688043496572965e-05, "loss": 1.4773, "step": 3694 }, { "epoch": 2.89, "learning_rate": 1.6686160660196012e-05, "loss": 1.4486, "step": 3695 }, { "epoch": 2.89, "learning_rate": 1.668427739506031e-05, "loss": 1.5006, "step": 3696 }, { "epoch": 2.89, "learning_rate": 1.6682393701286623e-05, "loss": 1.3043, "step": 3697 }, { "epoch": 2.89, "learning_rate": 1.668050957899575e-05, "loss": 1.3493, "step": 3698 }, { "epoch": 2.89, "learning_rate": 1.6678625028308503e-05, "loss": 1.3186, "step": 3699 }, { "epoch": 2.89, "learning_rate": 1.667674004934574e-05, "loss": 1.4512, "step": 3700 }, { "epoch": 2.89, "learning_rate": 1.6674854642228332e-05, "loss": 1.5398, "step": 3701 }, { "epoch": 2.89, "learning_rate": 1.6672968807077187e-05, "loss": 1.3719, "step": 3702 }, { "epoch": 2.89, "learning_rate": 1.6671082544013237e-05, "loss": 1.4143, "step": 3703 }, { "epoch": 2.89, "learning_rate": 1.6669195853157442e-05, "loss": 1.5639, "step": 3704 }, { "epoch": 2.9, "learning_rate": 1.666730873463078e-05, "loss": 1.2763, "step": 3705 }, { "epoch": 2.9, "learning_rate": 1.6665421188554276e-05, "loss": 1.5908, "step": 3706 }, { "epoch": 2.9, "learning_rate": 1.6663533215048964e-05, "loss": 1.5672, "step": 3707 }, { "epoch": 2.9, "learning_rate": 1.6661644814235916e-05, "loss": 1.6447, "step": 3708 }, { "epoch": 2.9, "learning_rate": 1.6659755986236225e-05, "loss": 1.5865, "step": 3709 }, { "epoch": 2.9, "learning_rate": 1.665786673117102e-05, "loss": 1.6754, "step": 3710 }, { "epoch": 2.9, "learning_rate": 1.6655977049161448e-05, "loss": 1.5407, "step": 3711 }, { "epoch": 2.9, "learning_rate": 1.665408694032869e-05, "loss": 1.4107, "step": 3712 }, { "epoch": 2.9, "learning_rate": 1.6652196404793944e-05, "loss": 1.4676, "step": 3713 }, { "epoch": 2.9, "learning_rate": 1.6650305442678455e-05, "loss": 1.1675, "step": 3714 }, { "epoch": 2.9, "learning_rate": 1.6648414054103474e-05, "loss": 1.8205, "step": 3715 }, { "epoch": 2.9, "learning_rate": 1.664652223919029e-05, "loss": 1.4016, "step": 3716 }, { "epoch": 2.91, "learning_rate": 1.6644629998060225e-05, "loss": 1.6066, "step": 3717 }, { "epoch": 2.91, "learning_rate": 1.6642737330834613e-05, "loss": 1.2332, "step": 3718 }, { "epoch": 2.91, "learning_rate": 1.6640844237634828e-05, "loss": 1.3886, "step": 3719 }, { "epoch": 2.91, "learning_rate": 1.6638950718582265e-05, "loss": 1.6278, "step": 3720 }, { "epoch": 2.91, "learning_rate": 1.6637056773798353e-05, "loss": 1.7136, "step": 3721 }, { "epoch": 2.91, "learning_rate": 1.663516240340454e-05, "loss": 1.3926, "step": 3722 }, { "epoch": 2.91, "learning_rate": 1.6633267607522305e-05, "loss": 1.2542, "step": 3723 }, { "epoch": 2.91, "learning_rate": 1.663137238627315e-05, "loss": 1.3135, "step": 3724 }, { "epoch": 2.91, "learning_rate": 1.6629476739778618e-05, "loss": 1.6001, "step": 3725 }, { "epoch": 2.91, "learning_rate": 1.6627580668160265e-05, "loss": 1.442, "step": 3726 }, { "epoch": 2.91, "learning_rate": 1.6625684171539678e-05, "loss": 1.367, "step": 3727 }, { "epoch": 2.91, "learning_rate": 1.662378725003847e-05, "loss": 1.7792, "step": 3728 }, { "epoch": 2.91, "learning_rate": 1.662188990377829e-05, "loss": 1.6615, "step": 3729 }, { "epoch": 2.92, "learning_rate": 1.6619992132880806e-05, "loss": 1.3946, "step": 3730 }, { "epoch": 2.92, "learning_rate": 1.661809393746771e-05, "loss": 1.4479, "step": 3731 }, { "epoch": 2.92, "learning_rate": 1.6616195317660735e-05, "loss": 1.5984, "step": 3732 }, { "epoch": 2.92, "learning_rate": 1.6614296273581623e-05, "loss": 1.7245, "step": 3733 }, { "epoch": 2.92, "learning_rate": 1.661239680535216e-05, "loss": 1.9131, "step": 3734 }, { "epoch": 2.92, "learning_rate": 1.661049691309415e-05, "loss": 1.6331, "step": 3735 }, { "epoch": 2.92, "learning_rate": 1.6608596596929422e-05, "loss": 1.2407, "step": 3736 }, { "epoch": 2.92, "learning_rate": 1.660669585697984e-05, "loss": 1.4735, "step": 3737 }, { "epoch": 2.92, "learning_rate": 1.6604794693367287e-05, "loss": 1.4209, "step": 3738 }, { "epoch": 2.92, "learning_rate": 1.6602893106213687e-05, "loss": 1.2337, "step": 3739 }, { "epoch": 2.92, "learning_rate": 1.660099109564097e-05, "loss": 1.6271, "step": 3740 }, { "epoch": 2.92, "learning_rate": 1.6599088661771116e-05, "loss": 1.292, "step": 3741 }, { "epoch": 2.92, "learning_rate": 1.6597185804726118e-05, "loss": 1.6065, "step": 3742 }, { "epoch": 2.93, "learning_rate": 1.659528252462799e-05, "loss": 1.3751, "step": 3743 }, { "epoch": 2.93, "learning_rate": 1.6593378821598793e-05, "loss": 1.7108, "step": 3744 }, { "epoch": 2.93, "learning_rate": 1.6591474695760602e-05, "loss": 1.5232, "step": 3745 }, { "epoch": 2.93, "learning_rate": 1.658957014723552e-05, "loss": 1.2491, "step": 3746 }, { "epoch": 2.93, "learning_rate": 1.658766517614568e-05, "loss": 1.5617, "step": 3747 }, { "epoch": 2.93, "learning_rate": 1.6585759782613236e-05, "loss": 1.7224, "step": 3748 }, { "epoch": 2.93, "learning_rate": 1.6583853966760384e-05, "loss": 1.3991, "step": 3749 }, { "epoch": 2.93, "learning_rate": 1.6581947728709326e-05, "loss": 1.5281, "step": 3750 }, { "epoch": 2.93, "learning_rate": 1.6580041068582305e-05, "loss": 1.5116, "step": 3751 }, { "epoch": 2.93, "learning_rate": 1.657813398650159e-05, "loss": 1.3917, "step": 3752 }, { "epoch": 2.93, "learning_rate": 1.657622648258948e-05, "loss": 1.3022, "step": 3753 }, { "epoch": 2.93, "learning_rate": 1.6574318556968284e-05, "loss": 1.3706, "step": 3754 }, { "epoch": 2.93, "learning_rate": 1.6572410209760365e-05, "loss": 1.4728, "step": 3755 }, { "epoch": 2.94, "learning_rate": 1.6570501441088083e-05, "loss": 1.4223, "step": 3756 }, { "epoch": 2.94, "learning_rate": 1.656859225107385e-05, "loss": 1.642, "step": 3757 }, { "epoch": 2.94, "learning_rate": 1.65666826398401e-05, "loss": 1.3436, "step": 3758 }, { "epoch": 2.94, "learning_rate": 1.6564772607509273e-05, "loss": 1.4403, "step": 3759 }, { "epoch": 2.94, "learning_rate": 1.6562862154203866e-05, "loss": 1.7174, "step": 3760 }, { "epoch": 2.94, "learning_rate": 1.656095128004638e-05, "loss": 1.3663, "step": 3761 }, { "epoch": 2.94, "learning_rate": 1.6559039985159364e-05, "loss": 1.2988, "step": 3762 }, { "epoch": 2.94, "learning_rate": 1.655712826966537e-05, "loss": 1.4483, "step": 3763 }, { "epoch": 2.94, "learning_rate": 1.6555216133686997e-05, "loss": 1.2897, "step": 3764 }, { "epoch": 2.94, "learning_rate": 1.6553303577346864e-05, "loss": 1.2984, "step": 3765 }, { "epoch": 2.94, "learning_rate": 1.655139060076761e-05, "loss": 1.3238, "step": 3766 }, { "epoch": 2.94, "learning_rate": 1.654947720407191e-05, "loss": 1.5273, "step": 3767 }, { "epoch": 2.94, "learning_rate": 1.654756338738246e-05, "loss": 1.6639, "step": 3768 }, { "epoch": 2.95, "learning_rate": 1.6545649150821996e-05, "loss": 1.3548, "step": 3769 }, { "epoch": 2.95, "learning_rate": 1.654373449451326e-05, "loss": 1.4817, "step": 3770 }, { "epoch": 2.95, "learning_rate": 1.6541819418579034e-05, "loss": 1.6357, "step": 3771 }, { "epoch": 2.95, "learning_rate": 1.653990392314213e-05, "loss": 1.5998, "step": 3772 }, { "epoch": 2.95, "learning_rate": 1.653798800832538e-05, "loss": 1.504, "step": 3773 }, { "epoch": 2.95, "learning_rate": 1.653607167425164e-05, "loss": 1.387, "step": 3774 }, { "epoch": 2.95, "learning_rate": 1.65341549210438e-05, "loss": 1.2939, "step": 3775 }, { "epoch": 2.95, "learning_rate": 1.6532237748824775e-05, "loss": 1.4088, "step": 3776 }, { "epoch": 2.95, "learning_rate": 1.6530320157717506e-05, "loss": 1.6651, "step": 3777 }, { "epoch": 2.95, "learning_rate": 1.652840214784496e-05, "loss": 1.4626, "step": 3778 }, { "epoch": 2.95, "learning_rate": 1.6526483719330135e-05, "loss": 1.2919, "step": 3779 }, { "epoch": 2.95, "learning_rate": 1.6524564872296046e-05, "loss": 1.4062, "step": 3780 }, { "epoch": 2.96, "learning_rate": 1.6522645606865754e-05, "loss": 1.4699, "step": 3781 }, { "epoch": 2.96, "learning_rate": 1.6520725923162318e-05, "loss": 1.3085, "step": 3782 }, { "epoch": 2.96, "learning_rate": 1.6518805821308854e-05, "loss": 1.7047, "step": 3783 }, { "epoch": 2.96, "learning_rate": 1.6516885301428482e-05, "loss": 1.3862, "step": 3784 }, { "epoch": 2.96, "learning_rate": 1.651496436364436e-05, "loss": 1.4116, "step": 3785 }, { "epoch": 2.96, "learning_rate": 1.6513043008079678e-05, "loss": 1.4761, "step": 3786 }, { "epoch": 2.96, "learning_rate": 1.6511121234857634e-05, "loss": 1.4052, "step": 3787 }, { "epoch": 2.96, "learning_rate": 1.650919904410147e-05, "loss": 1.5951, "step": 3788 }, { "epoch": 2.96, "learning_rate": 1.6507276435934452e-05, "loss": 1.4256, "step": 3789 }, { "epoch": 2.96, "learning_rate": 1.6505353410479865e-05, "loss": 1.3741, "step": 3790 }, { "epoch": 2.96, "learning_rate": 1.6503429967861023e-05, "loss": 1.2435, "step": 3791 }, { "epoch": 2.96, "learning_rate": 1.650150610820128e-05, "loss": 1.8545, "step": 3792 }, { "epoch": 2.96, "learning_rate": 1.6499581831623993e-05, "loss": 1.5128, "step": 3793 }, { "epoch": 2.97, "learning_rate": 1.649765713825257e-05, "loss": 1.561, "step": 3794 }, { "epoch": 2.97, "learning_rate": 1.649573202821043e-05, "loss": 1.3134, "step": 3795 }, { "epoch": 2.97, "learning_rate": 1.6493806501621017e-05, "loss": 1.401, "step": 3796 }, { "epoch": 2.97, "learning_rate": 1.6491880558607816e-05, "loss": 1.5002, "step": 3797 }, { "epoch": 2.97, "learning_rate": 1.648995419929433e-05, "loss": 1.1567, "step": 3798 }, { "epoch": 2.97, "learning_rate": 1.648802742380408e-05, "loss": 1.6117, "step": 3799 }, { "epoch": 2.97, "learning_rate": 1.648610023226064e-05, "loss": 1.5088, "step": 3800 }, { "epoch": 2.97, "learning_rate": 1.648417262478758e-05, "loss": 1.1956, "step": 3801 }, { "epoch": 2.97, "learning_rate": 1.648224460150851e-05, "loss": 1.3419, "step": 3802 }, { "epoch": 2.97, "learning_rate": 1.6480316162547082e-05, "loss": 1.538, "step": 3803 }, { "epoch": 2.97, "learning_rate": 1.6478387308026942e-05, "loss": 1.4172, "step": 3804 }, { "epoch": 2.97, "learning_rate": 1.6476458038071785e-05, "loss": 1.3799, "step": 3805 }, { "epoch": 2.97, "learning_rate": 1.6474528352805336e-05, "loss": 1.4339, "step": 3806 }, { "epoch": 2.98, "learning_rate": 1.647259825235133e-05, "loss": 1.6958, "step": 3807 }, { "epoch": 2.98, "learning_rate": 1.6470667736833535e-05, "loss": 1.2636, "step": 3808 }, { "epoch": 2.98, "learning_rate": 1.646873680637576e-05, "loss": 1.6943, "step": 3809 }, { "epoch": 2.98, "learning_rate": 1.646680546110182e-05, "loss": 1.3433, "step": 3810 }, { "epoch": 2.98, "learning_rate": 1.6464873701135566e-05, "loss": 1.4523, "step": 3811 }, { "epoch": 2.98, "learning_rate": 1.6462941526600875e-05, "loss": 1.4426, "step": 3812 }, { "epoch": 2.98, "learning_rate": 1.646100893762165e-05, "loss": 1.3608, "step": 3813 }, { "epoch": 2.98, "learning_rate": 1.6459075934321818e-05, "loss": 1.6581, "step": 3814 }, { "epoch": 2.98, "learning_rate": 1.6457142516825342e-05, "loss": 1.1644, "step": 3815 }, { "epoch": 2.98, "learning_rate": 1.64552086852562e-05, "loss": 1.0978, "step": 3816 }, { "epoch": 2.98, "learning_rate": 1.6453274439738404e-05, "loss": 1.3615, "step": 3817 }, { "epoch": 2.98, "learning_rate": 1.6451339780395984e-05, "loss": 1.2898, "step": 3818 }, { "epoch": 2.98, "learning_rate": 1.6449404707353013e-05, "loss": 1.2387, "step": 3819 }, { "epoch": 2.99, "learning_rate": 1.644746922073357e-05, "loss": 1.5728, "step": 3820 }, { "epoch": 2.99, "learning_rate": 1.6445533320661775e-05, "loss": 1.4944, "step": 3821 }, { "epoch": 2.99, "learning_rate": 1.6443597007261774e-05, "loss": 1.6298, "step": 3822 }, { "epoch": 2.99, "learning_rate": 1.644166028065773e-05, "loss": 1.4934, "step": 3823 }, { "epoch": 2.99, "learning_rate": 1.6439723140973835e-05, "loss": 1.2744, "step": 3824 }, { "epoch": 2.99, "learning_rate": 1.643778558833432e-05, "loss": 1.7425, "step": 3825 }, { "epoch": 2.99, "learning_rate": 1.643584762286343e-05, "loss": 1.52, "step": 3826 }, { "epoch": 2.99, "learning_rate": 1.6433909244685433e-05, "loss": 1.2623, "step": 3827 }, { "epoch": 2.99, "learning_rate": 1.643197045392464e-05, "loss": 1.3338, "step": 3828 }, { "epoch": 2.99, "learning_rate": 1.643003125070537e-05, "loss": 1.4879, "step": 3829 }, { "epoch": 2.99, "learning_rate": 1.642809163515198e-05, "loss": 1.4236, "step": 3830 }, { "epoch": 2.99, "learning_rate": 1.6426151607388852e-05, "loss": 1.3515, "step": 3831 }, { "epoch": 2.99, "learning_rate": 1.6424211167540394e-05, "loss": 1.372, "step": 3832 }, { "epoch": 3.0, "learning_rate": 1.6422270315731035e-05, "loss": 1.587, "step": 3833 }, { "epoch": 3.0, "learning_rate": 1.6420329052085234e-05, "loss": 1.4556, "step": 3834 }, { "epoch": 3.0, "learning_rate": 1.641838737672748e-05, "loss": 1.5448, "step": 3835 }, { "epoch": 3.0, "learning_rate": 1.6416445289782288e-05, "loss": 1.3423, "step": 3836 }, { "epoch": 3.0, "learning_rate": 1.6414502791374194e-05, "loss": 1.4041, "step": 3837 }, { "epoch": 3.0, "learning_rate": 1.641255988162776e-05, "loss": 1.562, "step": 3838 }, { "epoch": 3.0, "learning_rate": 1.641061656066758e-05, "loss": 1.28, "step": 3839 }, { "epoch": 3.0, "learning_rate": 1.6408672828618276e-05, "loss": 0.9716, "step": 3840 }, { "epoch": 3.0, "learning_rate": 1.6406728685604486e-05, "loss": 1.1627, "step": 3841 }, { "epoch": 3.0, "learning_rate": 1.6404784131750885e-05, "loss": 1.0469, "step": 3842 }, { "epoch": 3.0, "learning_rate": 1.640283916718217e-05, "loss": 0.9087, "step": 3843 }, { "epoch": 3.0, "learning_rate": 1.640089379202306e-05, "loss": 0.8967, "step": 3844 }, { "epoch": 3.01, "learning_rate": 1.6398948006398312e-05, "loss": 1.0129, "step": 3845 }, { "epoch": 3.01, "learning_rate": 1.63970018104327e-05, "loss": 0.9614, "step": 3846 }, { "epoch": 3.01, "learning_rate": 1.6395055204251016e-05, "loss": 1.0611, "step": 3847 }, { "epoch": 3.01, "learning_rate": 1.63931081879781e-05, "loss": 0.9458, "step": 3848 }, { "epoch": 3.01, "learning_rate": 1.6391160761738807e-05, "loss": 1.1083, "step": 3849 }, { "epoch": 3.01, "learning_rate": 1.638921292565801e-05, "loss": 1.0009, "step": 3850 }, { "epoch": 3.01, "learning_rate": 1.6387264679860627e-05, "loss": 0.8108, "step": 3851 }, { "epoch": 3.01, "learning_rate": 1.6385316024471586e-05, "loss": 0.9339, "step": 3852 }, { "epoch": 3.01, "learning_rate": 1.6383366959615844e-05, "loss": 0.8481, "step": 3853 }, { "epoch": 3.01, "learning_rate": 1.6381417485418392e-05, "loss": 0.9169, "step": 3854 }, { "epoch": 3.01, "learning_rate": 1.637946760200424e-05, "loss": 0.6927, "step": 3855 }, { "epoch": 3.01, "learning_rate": 1.6377517309498426e-05, "loss": 0.9313, "step": 3856 }, { "epoch": 3.01, "learning_rate": 1.6375566608026026e-05, "loss": 0.9741, "step": 3857 }, { "epoch": 3.02, "learning_rate": 1.6373615497712118e-05, "loss": 0.8575, "step": 3858 }, { "epoch": 3.02, "learning_rate": 1.637166397868182e-05, "loss": 0.9582, "step": 3859 }, { "epoch": 3.02, "learning_rate": 1.6369712051060284e-05, "loss": 1.017, "step": 3860 }, { "epoch": 3.02, "learning_rate": 1.6367759714972675e-05, "loss": 0.8978, "step": 3861 }, { "epoch": 3.02, "learning_rate": 1.6365806970544186e-05, "loss": 0.9288, "step": 3862 }, { "epoch": 3.02, "learning_rate": 1.6363853817900048e-05, "loss": 0.6921, "step": 3863 }, { "epoch": 3.02, "learning_rate": 1.63619002571655e-05, "loss": 0.7523, "step": 3864 }, { "epoch": 3.02, "learning_rate": 1.6359946288465823e-05, "loss": 0.8585, "step": 3865 }, { "epoch": 3.02, "learning_rate": 1.6357991911926317e-05, "loss": 0.8927, "step": 3866 }, { "epoch": 3.02, "learning_rate": 1.6356037127672306e-05, "loss": 0.7974, "step": 3867 }, { "epoch": 3.02, "learning_rate": 1.6354081935829147e-05, "loss": 0.8369, "step": 3868 }, { "epoch": 3.02, "learning_rate": 1.6352126336522216e-05, "loss": 0.8651, "step": 3869 }, { "epoch": 3.02, "learning_rate": 1.6350170329876916e-05, "loss": 0.9471, "step": 3870 }, { "epoch": 3.03, "learning_rate": 1.6348213916018684e-05, "loss": 0.7694, "step": 3871 }, { "epoch": 3.03, "learning_rate": 1.634625709507298e-05, "loss": 0.9348, "step": 3872 }, { "epoch": 3.03, "learning_rate": 1.6344299867165276e-05, "loss": 0.9377, "step": 3873 }, { "epoch": 3.03, "learning_rate": 1.6342342232421097e-05, "loss": 0.9262, "step": 3874 }, { "epoch": 3.03, "learning_rate": 1.6340384190965963e-05, "loss": 0.7905, "step": 3875 }, { "epoch": 3.03, "learning_rate": 1.6338425742925447e-05, "loss": 1.0352, "step": 3876 }, { "epoch": 3.03, "learning_rate": 1.6336466888425136e-05, "loss": 0.7068, "step": 3877 }, { "epoch": 3.03, "learning_rate": 1.6334507627590638e-05, "loss": 0.8238, "step": 3878 }, { "epoch": 3.03, "learning_rate": 1.6332547960547595e-05, "loss": 0.8139, "step": 3879 }, { "epoch": 3.03, "learning_rate": 1.633058788742168e-05, "loss": 0.6546, "step": 3880 }, { "epoch": 3.03, "learning_rate": 1.632862740833858e-05, "loss": 0.9094, "step": 3881 }, { "epoch": 3.03, "learning_rate": 1.632666652342401e-05, "loss": 0.7025, "step": 3882 }, { "epoch": 3.03, "learning_rate": 1.6324705232803723e-05, "loss": 0.7597, "step": 3883 }, { "epoch": 3.04, "learning_rate": 1.632274353660348e-05, "loss": 0.8969, "step": 3884 }, { "epoch": 3.04, "learning_rate": 1.6320781434949083e-05, "loss": 0.7873, "step": 3885 }, { "epoch": 3.04, "learning_rate": 1.631881892796636e-05, "loss": 0.8846, "step": 3886 }, { "epoch": 3.04, "learning_rate": 1.631685601578114e-05, "loss": 1.1423, "step": 3887 }, { "epoch": 3.04, "learning_rate": 1.6314892698519317e-05, "loss": 0.8951, "step": 3888 }, { "epoch": 3.04, "learning_rate": 1.631292897630678e-05, "loss": 0.8325, "step": 3889 }, { "epoch": 3.04, "learning_rate": 1.6310964849269465e-05, "loss": 0.7928, "step": 3890 }, { "epoch": 3.04, "learning_rate": 1.6309000317533315e-05, "loss": 0.9712, "step": 3891 }, { "epoch": 3.04, "learning_rate": 1.630703538122431e-05, "loss": 1.0076, "step": 3892 }, { "epoch": 3.04, "learning_rate": 1.6305070040468455e-05, "loss": 0.6521, "step": 3893 }, { "epoch": 3.04, "learning_rate": 1.6303104295391784e-05, "loss": 0.7172, "step": 3894 }, { "epoch": 3.04, "learning_rate": 1.630113814612035e-05, "loss": 0.8448, "step": 3895 }, { "epoch": 3.04, "learning_rate": 1.629917159278023e-05, "loss": 0.9345, "step": 3896 }, { "epoch": 3.05, "learning_rate": 1.629720463549754e-05, "loss": 1.0166, "step": 3897 }, { "epoch": 3.05, "learning_rate": 1.6295237274398408e-05, "loss": 1.0075, "step": 3898 }, { "epoch": 3.05, "learning_rate": 1.6293269509608994e-05, "loss": 0.9531, "step": 3899 }, { "epoch": 3.05, "learning_rate": 1.6291301341255487e-05, "loss": 0.8835, "step": 3900 }, { "epoch": 3.05, "learning_rate": 1.6289332769464093e-05, "loss": 0.5681, "step": 3901 }, { "epoch": 3.05, "learning_rate": 1.628736379436106e-05, "loss": 0.8125, "step": 3902 }, { "epoch": 3.05, "learning_rate": 1.6285394416072638e-05, "loss": 0.9617, "step": 3903 }, { "epoch": 3.05, "learning_rate": 1.628342463472512e-05, "loss": 0.8808, "step": 3904 }, { "epoch": 3.05, "learning_rate": 1.6281454450444827e-05, "loss": 0.9318, "step": 3905 }, { "epoch": 3.05, "learning_rate": 1.6279483863358094e-05, "loss": 0.9088, "step": 3906 }, { "epoch": 3.05, "learning_rate": 1.6277512873591287e-05, "loss": 0.7651, "step": 3907 }, { "epoch": 3.05, "learning_rate": 1.62755414812708e-05, "loss": 0.8622, "step": 3908 }, { "epoch": 3.06, "learning_rate": 1.6273569686523055e-05, "loss": 0.8899, "step": 3909 }, { "epoch": 3.06, "learning_rate": 1.6271597489474486e-05, "loss": 0.9187, "step": 3910 }, { "epoch": 3.06, "learning_rate": 1.6269624890251573e-05, "loss": 0.9426, "step": 3911 }, { "epoch": 3.06, "learning_rate": 1.6267651888980803e-05, "loss": 0.9667, "step": 3912 }, { "epoch": 3.06, "learning_rate": 1.6265678485788702e-05, "loss": 0.7491, "step": 3913 }, { "epoch": 3.06, "learning_rate": 1.6263704680801816e-05, "loss": 0.937, "step": 3914 }, { "epoch": 3.06, "learning_rate": 1.626173047414672e-05, "loss": 0.9358, "step": 3915 }, { "epoch": 3.06, "learning_rate": 1.6259755865950012e-05, "loss": 0.8746, "step": 3916 }, { "epoch": 3.06, "learning_rate": 1.6257780856338313e-05, "loss": 0.9465, "step": 3917 }, { "epoch": 3.06, "learning_rate": 1.6255805445438274e-05, "loss": 0.882, "step": 3918 }, { "epoch": 3.06, "learning_rate": 1.625382963337657e-05, "loss": 0.9065, "step": 3919 }, { "epoch": 3.06, "learning_rate": 1.6251853420279907e-05, "loss": 0.8844, "step": 3920 }, { "epoch": 3.06, "learning_rate": 1.6249876806275012e-05, "loss": 0.8657, "step": 3921 }, { "epoch": 3.07, "learning_rate": 1.624789979148863e-05, "loss": 0.8068, "step": 3922 }, { "epoch": 3.07, "learning_rate": 1.6245922376047548e-05, "loss": 0.871, "step": 3923 }, { "epoch": 3.07, "learning_rate": 1.624394456007857e-05, "loss": 0.667, "step": 3924 }, { "epoch": 3.07, "learning_rate": 1.624196634370852e-05, "loss": 0.981, "step": 3925 }, { "epoch": 3.07, "learning_rate": 1.623998772706426e-05, "loss": 0.7812, "step": 3926 }, { "epoch": 3.07, "learning_rate": 1.6238008710272666e-05, "loss": 0.9538, "step": 3927 }, { "epoch": 3.07, "learning_rate": 1.623602929346065e-05, "loss": 0.7955, "step": 3928 }, { "epoch": 3.07, "learning_rate": 1.6234049476755142e-05, "loss": 1.0086, "step": 3929 }, { "epoch": 3.07, "learning_rate": 1.62320692602831e-05, "loss": 0.8451, "step": 3930 }, { "epoch": 3.07, "learning_rate": 1.6230088644171507e-05, "loss": 0.7404, "step": 3931 }, { "epoch": 3.07, "learning_rate": 1.6228107628547375e-05, "loss": 1.169, "step": 3932 }, { "epoch": 3.07, "learning_rate": 1.6226126213537736e-05, "loss": 0.9005, "step": 3933 }, { "epoch": 3.07, "learning_rate": 1.622414439926966e-05, "loss": 0.9288, "step": 3934 }, { "epoch": 3.08, "learning_rate": 1.622216218587022e-05, "loss": 0.7955, "step": 3935 }, { "epoch": 3.08, "learning_rate": 1.6220179573466537e-05, "loss": 0.729, "step": 3936 }, { "epoch": 3.08, "learning_rate": 1.6218196562185748e-05, "loss": 0.9627, "step": 3937 }, { "epoch": 3.08, "learning_rate": 1.6216213152155013e-05, "loss": 0.9149, "step": 3938 }, { "epoch": 3.08, "learning_rate": 1.621422934350152e-05, "loss": 0.7667, "step": 3939 }, { "epoch": 3.08, "learning_rate": 1.6212245136352487e-05, "loss": 0.8746, "step": 3940 }, { "epoch": 3.08, "learning_rate": 1.6210260530835152e-05, "loss": 1.0292, "step": 3941 }, { "epoch": 3.08, "learning_rate": 1.6208275527076782e-05, "loss": 0.7671, "step": 3942 }, { "epoch": 3.08, "learning_rate": 1.6206290125204667e-05, "loss": 0.8038, "step": 3943 }, { "epoch": 3.08, "learning_rate": 1.6204304325346122e-05, "loss": 0.9476, "step": 3944 }, { "epoch": 3.08, "learning_rate": 1.620231812762849e-05, "loss": 1.0736, "step": 3945 }, { "epoch": 3.08, "learning_rate": 1.620033153217914e-05, "loss": 0.8835, "step": 3946 }, { "epoch": 3.08, "learning_rate": 1.6198344539125464e-05, "loss": 0.936, "step": 3947 }, { "epoch": 3.09, "learning_rate": 1.619635714859488e-05, "loss": 0.8031, "step": 3948 }, { "epoch": 3.09, "learning_rate": 1.619436936071483e-05, "loss": 0.9305, "step": 3949 }, { "epoch": 3.09, "learning_rate": 1.6192381175612785e-05, "loss": 1.2067, "step": 3950 }, { "epoch": 3.09, "learning_rate": 1.6190392593416244e-05, "loss": 0.8082, "step": 3951 }, { "epoch": 3.09, "learning_rate": 1.6188403614252722e-05, "loss": 1.0178, "step": 3952 }, { "epoch": 3.09, "learning_rate": 1.6186414238249768e-05, "loss": 0.9057, "step": 3953 }, { "epoch": 3.09, "learning_rate": 1.6184424465534952e-05, "loss": 0.9567, "step": 3954 }, { "epoch": 3.09, "learning_rate": 1.618243429623587e-05, "loss": 0.8197, "step": 3955 }, { "epoch": 3.09, "learning_rate": 1.6180443730480146e-05, "loss": 0.8973, "step": 3956 }, { "epoch": 3.09, "learning_rate": 1.6178452768395428e-05, "loss": 0.8459, "step": 3957 }, { "epoch": 3.09, "learning_rate": 1.617646141010939e-05, "loss": 0.9659, "step": 3958 }, { "epoch": 3.09, "learning_rate": 1.617446965574972e-05, "loss": 0.7732, "step": 3959 }, { "epoch": 3.09, "learning_rate": 1.617247750544416e-05, "loss": 0.8343, "step": 3960 }, { "epoch": 3.1, "learning_rate": 1.617048495932044e-05, "loss": 0.8011, "step": 3961 }, { "epoch": 3.1, "learning_rate": 1.616849201750635e-05, "loss": 0.8634, "step": 3962 }, { "epoch": 3.1, "learning_rate": 1.6166498680129682e-05, "loss": 0.8849, "step": 3963 }, { "epoch": 3.1, "learning_rate": 1.6164504947318265e-05, "loss": 0.97, "step": 3964 }, { "epoch": 3.1, "learning_rate": 1.6162510819199947e-05, "loss": 1.055, "step": 3965 }, { "epoch": 3.1, "learning_rate": 1.6160516295902604e-05, "loss": 0.774, "step": 3966 }, { "epoch": 3.1, "learning_rate": 1.615852137755414e-05, "loss": 0.8718, "step": 3967 }, { "epoch": 3.1, "learning_rate": 1.615652606428248e-05, "loss": 1.1806, "step": 3968 }, { "epoch": 3.1, "learning_rate": 1.6154530356215577e-05, "loss": 0.9091, "step": 3969 }, { "epoch": 3.1, "learning_rate": 1.6152534253481404e-05, "loss": 0.9864, "step": 3970 }, { "epoch": 3.1, "learning_rate": 1.6150537756207974e-05, "loss": 0.8407, "step": 3971 }, { "epoch": 3.1, "learning_rate": 1.6148540864523303e-05, "loss": 0.85, "step": 3972 }, { "epoch": 3.11, "learning_rate": 1.6146543578555453e-05, "loss": 1.1581, "step": 3973 }, { "epoch": 3.11, "learning_rate": 1.6144545898432496e-05, "loss": 0.9212, "step": 3974 }, { "epoch": 3.11, "learning_rate": 1.6142547824282542e-05, "loss": 0.9749, "step": 3975 }, { "epoch": 3.11, "learning_rate": 1.6140549356233712e-05, "loss": 0.7377, "step": 3976 }, { "epoch": 3.11, "learning_rate": 1.6138550494414173e-05, "loss": 0.8329, "step": 3977 }, { "epoch": 3.11, "learning_rate": 1.613655123895209e-05, "loss": 0.7945, "step": 3978 }, { "epoch": 3.11, "learning_rate": 1.613455158997568e-05, "loss": 0.9443, "step": 3979 }, { "epoch": 3.11, "learning_rate": 1.6132551547613168e-05, "loss": 0.807, "step": 3980 }, { "epoch": 3.11, "learning_rate": 1.6130551111992806e-05, "loss": 0.6552, "step": 3981 }, { "epoch": 3.11, "learning_rate": 1.6128550283242878e-05, "loss": 1.1789, "step": 3982 }, { "epoch": 3.11, "learning_rate": 1.612654906149169e-05, "loss": 0.883, "step": 3983 }, { "epoch": 3.11, "learning_rate": 1.612454744686758e-05, "loss": 1.0694, "step": 3984 }, { "epoch": 3.11, "learning_rate": 1.6122545439498888e-05, "loss": 0.8788, "step": 3985 }, { "epoch": 3.12, "learning_rate": 1.6120543039514006e-05, "loss": 1.0314, "step": 3986 }, { "epoch": 3.12, "learning_rate": 1.611854024704134e-05, "loss": 0.9727, "step": 3987 }, { "epoch": 3.12, "learning_rate": 1.611653706220932e-05, "loss": 0.9163, "step": 3988 }, { "epoch": 3.12, "learning_rate": 1.6114533485146404e-05, "loss": 0.9255, "step": 3989 }, { "epoch": 3.12, "learning_rate": 1.611252951598107e-05, "loss": 0.8951, "step": 3990 }, { "epoch": 3.12, "learning_rate": 1.611052515484183e-05, "loss": 0.7844, "step": 3991 }, { "epoch": 3.12, "learning_rate": 1.6108520401857216e-05, "loss": 1.0682, "step": 3992 }, { "epoch": 3.12, "learning_rate": 1.610651525715578e-05, "loss": 0.9163, "step": 3993 }, { "epoch": 3.12, "learning_rate": 1.6104509720866112e-05, "loss": 0.9433, "step": 3994 }, { "epoch": 3.12, "learning_rate": 1.6102503793116815e-05, "loss": 0.9989, "step": 3995 }, { "epoch": 3.12, "learning_rate": 1.6100497474036522e-05, "loss": 0.9292, "step": 3996 }, { "epoch": 3.12, "learning_rate": 1.6098490763753892e-05, "loss": 0.9602, "step": 3997 }, { "epoch": 3.12, "learning_rate": 1.6096483662397608e-05, "loss": 0.7799, "step": 3998 }, { "epoch": 3.13, "learning_rate": 1.6094476170096377e-05, "loss": 0.8718, "step": 3999 }, { "epoch": 3.13, "learning_rate": 1.609246828697893e-05, "loss": 0.7544, "step": 4000 }, { "epoch": 3.13, "learning_rate": 1.609046001317403e-05, "loss": 0.9794, "step": 4001 }, { "epoch": 3.13, "learning_rate": 1.608845134881046e-05, "loss": 0.8501, "step": 4002 }, { "epoch": 3.13, "learning_rate": 1.6086442294017022e-05, "loss": 0.8342, "step": 4003 }, { "epoch": 3.13, "learning_rate": 1.6084432848922555e-05, "loss": 1.052, "step": 4004 }, { "epoch": 3.13, "learning_rate": 1.6082423013655916e-05, "loss": 0.8347, "step": 4005 }, { "epoch": 3.13, "learning_rate": 1.6080412788345987e-05, "loss": 0.8935, "step": 4006 }, { "epoch": 3.13, "learning_rate": 1.607840217312168e-05, "loss": 0.9106, "step": 4007 }, { "epoch": 3.13, "learning_rate": 1.6076391168111923e-05, "loss": 0.811, "step": 4008 }, { "epoch": 3.13, "learning_rate": 1.6074379773445675e-05, "loss": 0.9125, "step": 4009 }, { "epoch": 3.13, "learning_rate": 1.6072367989251922e-05, "loss": 0.6974, "step": 4010 }, { "epoch": 3.13, "learning_rate": 1.607035581565967e-05, "loss": 0.7466, "step": 4011 }, { "epoch": 3.14, "learning_rate": 1.606834325279796e-05, "loss": 0.8721, "step": 4012 }, { "epoch": 3.14, "learning_rate": 1.6066330300795837e-05, "loss": 0.7982, "step": 4013 }, { "epoch": 3.14, "learning_rate": 1.60643169597824e-05, "loss": 0.9728, "step": 4014 }, { "epoch": 3.14, "learning_rate": 1.6062303229886736e-05, "loss": 0.8318, "step": 4015 }, { "epoch": 3.14, "learning_rate": 1.6060289111238e-05, "loss": 0.8849, "step": 4016 }, { "epoch": 3.14, "learning_rate": 1.6058274603965334e-05, "loss": 0.8697, "step": 4017 }, { "epoch": 3.14, "learning_rate": 1.605625970819793e-05, "loss": 0.9521, "step": 4018 }, { "epoch": 3.14, "learning_rate": 1.6054244424064992e-05, "loss": 0.7827, "step": 4019 }, { "epoch": 3.14, "learning_rate": 1.6052228751695752e-05, "loss": 0.807, "step": 4020 }, { "epoch": 3.14, "learning_rate": 1.6050212691219472e-05, "loss": 0.9509, "step": 4021 }, { "epoch": 3.14, "learning_rate": 1.6048196242765432e-05, "loss": 0.9054, "step": 4022 }, { "epoch": 3.14, "learning_rate": 1.6046179406462937e-05, "loss": 1.0212, "step": 4023 }, { "epoch": 3.14, "learning_rate": 1.604416218244132e-05, "loss": 0.8468, "step": 4024 }, { "epoch": 3.15, "learning_rate": 1.604214457082994e-05, "loss": 0.8568, "step": 4025 }, { "epoch": 3.15, "learning_rate": 1.604012657175818e-05, "loss": 1.026, "step": 4026 }, { "epoch": 3.15, "learning_rate": 1.6038108185355444e-05, "loss": 1.0579, "step": 4027 }, { "epoch": 3.15, "learning_rate": 1.603608941175117e-05, "loss": 0.8983, "step": 4028 }, { "epoch": 3.15, "learning_rate": 1.6034070251074803e-05, "loss": 1.1265, "step": 4029 }, { "epoch": 3.15, "learning_rate": 1.6032050703455833e-05, "loss": 1.0078, "step": 4030 }, { "epoch": 3.15, "learning_rate": 1.6030030769023763e-05, "loss": 0.7754, "step": 4031 }, { "epoch": 3.15, "learning_rate": 1.6028010447908126e-05, "loss": 0.9135, "step": 4032 }, { "epoch": 3.15, "learning_rate": 1.6025989740238476e-05, "loss": 1.0163, "step": 4033 }, { "epoch": 3.15, "learning_rate": 1.6023968646144393e-05, "loss": 1.0241, "step": 4034 }, { "epoch": 3.15, "learning_rate": 1.6021947165755483e-05, "loss": 1.0441, "step": 4035 }, { "epoch": 3.15, "learning_rate": 1.6019925299201375e-05, "loss": 0.9891, "step": 4036 }, { "epoch": 3.16, "learning_rate": 1.6017903046611728e-05, "loss": 0.8453, "step": 4037 }, { "epoch": 3.16, "learning_rate": 1.6015880408116217e-05, "loss": 1.0318, "step": 4038 }, { "epoch": 3.16, "learning_rate": 1.6013857383844552e-05, "loss": 1.0444, "step": 4039 }, { "epoch": 3.16, "learning_rate": 1.6011833973926453e-05, "loss": 0.8745, "step": 4040 }, { "epoch": 3.16, "learning_rate": 1.600981017849168e-05, "loss": 0.8161, "step": 4041 }, { "epoch": 3.16, "learning_rate": 1.600778599767001e-05, "loss": 1.0091, "step": 4042 }, { "epoch": 3.16, "learning_rate": 1.600576143159125e-05, "loss": 0.8288, "step": 4043 }, { "epoch": 3.16, "learning_rate": 1.6003736480385218e-05, "loss": 0.9477, "step": 4044 }, { "epoch": 3.16, "learning_rate": 1.600171114418178e-05, "loss": 0.8124, "step": 4045 }, { "epoch": 3.16, "learning_rate": 1.5999685423110803e-05, "loss": 0.9021, "step": 4046 }, { "epoch": 3.16, "learning_rate": 1.5997659317302193e-05, "loss": 0.9189, "step": 4047 }, { "epoch": 3.16, "learning_rate": 1.5995632826885876e-05, "loss": 0.9369, "step": 4048 }, { "epoch": 3.16, "learning_rate": 1.5993605951991804e-05, "loss": 0.9967, "step": 4049 }, { "epoch": 3.17, "learning_rate": 1.5991578692749953e-05, "loss": 0.8989, "step": 4050 }, { "epoch": 3.17, "learning_rate": 1.5989551049290324e-05, "loss": 0.7697, "step": 4051 }, { "epoch": 3.17, "learning_rate": 1.5987523021742938e-05, "loss": 1.0925, "step": 4052 }, { "epoch": 3.17, "learning_rate": 1.5985494610237848e-05, "loss": 1.1104, "step": 4053 }, { "epoch": 3.17, "learning_rate": 1.598346581490513e-05, "loss": 0.7732, "step": 4054 }, { "epoch": 3.17, "learning_rate": 1.5981436635874885e-05, "loss": 0.821, "step": 4055 }, { "epoch": 3.17, "learning_rate": 1.597940707327723e-05, "loss": 0.9197, "step": 4056 }, { "epoch": 3.17, "learning_rate": 1.597737712724232e-05, "loss": 0.924, "step": 4057 }, { "epoch": 3.17, "learning_rate": 1.5975346797900325e-05, "loss": 0.7744, "step": 4058 }, { "epoch": 3.17, "learning_rate": 1.597331608538144e-05, "loss": 0.8043, "step": 4059 }, { "epoch": 3.17, "learning_rate": 1.5971284989815892e-05, "loss": 0.9617, "step": 4060 }, { "epoch": 3.17, "learning_rate": 1.5969253511333923e-05, "loss": 0.8649, "step": 4061 }, { "epoch": 3.17, "learning_rate": 1.5967221650065806e-05, "loss": 1.1768, "step": 4062 }, { "epoch": 3.18, "learning_rate": 1.596518940614184e-05, "loss": 0.8546, "step": 4063 }, { "epoch": 3.18, "learning_rate": 1.596315677969234e-05, "loss": 1.084, "step": 4064 }, { "epoch": 3.18, "learning_rate": 1.5961123770847655e-05, "loss": 0.9002, "step": 4065 }, { "epoch": 3.18, "learning_rate": 1.5959090379738152e-05, "loss": 0.9775, "step": 4066 }, { "epoch": 3.18, "learning_rate": 1.5957056606494225e-05, "loss": 1.0442, "step": 4067 }, { "epoch": 3.18, "learning_rate": 1.5955022451246295e-05, "loss": 0.9737, "step": 4068 }, { "epoch": 3.18, "learning_rate": 1.5952987914124796e-05, "loss": 0.852, "step": 4069 }, { "epoch": 3.18, "learning_rate": 1.595095299526021e-05, "loss": 0.858, "step": 4070 }, { "epoch": 3.18, "learning_rate": 1.5948917694783016e-05, "loss": 1.0718, "step": 4071 }, { "epoch": 3.18, "learning_rate": 1.5946882012823735e-05, "loss": 0.9058, "step": 4072 }, { "epoch": 3.18, "learning_rate": 1.594484594951291e-05, "loss": 0.9099, "step": 4073 }, { "epoch": 3.18, "learning_rate": 1.5942809504981105e-05, "loss": 0.9581, "step": 4074 }, { "epoch": 3.18, "learning_rate": 1.5940772679358908e-05, "loss": 0.7842, "step": 4075 }, { "epoch": 3.19, "learning_rate": 1.5938735472776935e-05, "loss": 0.7724, "step": 4076 }, { "epoch": 3.19, "learning_rate": 1.5936697885365818e-05, "loss": 0.9694, "step": 4077 }, { "epoch": 3.19, "learning_rate": 1.5934659917256227e-05, "loss": 0.7926, "step": 4078 }, { "epoch": 3.19, "learning_rate": 1.5932621568578853e-05, "loss": 0.8211, "step": 4079 }, { "epoch": 3.19, "learning_rate": 1.5930582839464397e-05, "loss": 0.761, "step": 4080 }, { "epoch": 3.19, "learning_rate": 1.5928543730043602e-05, "loss": 0.6931, "step": 4081 }, { "epoch": 3.19, "learning_rate": 1.592650424044723e-05, "loss": 1.0179, "step": 4082 }, { "epoch": 3.19, "learning_rate": 1.592446437080606e-05, "loss": 0.976, "step": 4083 }, { "epoch": 3.19, "learning_rate": 1.5922424121250904e-05, "loss": 1.1402, "step": 4084 }, { "epoch": 3.19, "learning_rate": 1.5920383491912596e-05, "loss": 0.9878, "step": 4085 }, { "epoch": 3.19, "learning_rate": 1.5918342482921992e-05, "loss": 0.8689, "step": 4086 }, { "epoch": 3.19, "learning_rate": 1.5916301094409982e-05, "loss": 0.9402, "step": 4087 }, { "epoch": 3.19, "learning_rate": 1.5914259326507463e-05, "loss": 0.9102, "step": 4088 }, { "epoch": 3.2, "learning_rate": 1.5912217179345372e-05, "loss": 0.9307, "step": 4089 }, { "epoch": 3.2, "learning_rate": 1.591017465305466e-05, "loss": 0.9121, "step": 4090 }, { "epoch": 3.2, "learning_rate": 1.5908131747766313e-05, "loss": 0.9954, "step": 4091 }, { "epoch": 3.2, "learning_rate": 1.5906088463611328e-05, "loss": 0.8516, "step": 4092 }, { "epoch": 3.2, "learning_rate": 1.5904044800720736e-05, "loss": 1.0348, "step": 4093 }, { "epoch": 3.2, "learning_rate": 1.590200075922559e-05, "loss": 0.8823, "step": 4094 }, { "epoch": 3.2, "learning_rate": 1.589995633925697e-05, "loss": 0.8422, "step": 4095 }, { "epoch": 3.2, "learning_rate": 1.589791154094597e-05, "loss": 1.2244, "step": 4096 }, { "epoch": 3.2, "learning_rate": 1.589586636442372e-05, "loss": 1.0979, "step": 4097 }, { "epoch": 3.2, "learning_rate": 1.589382080982137e-05, "loss": 0.8231, "step": 4098 }, { "epoch": 3.2, "learning_rate": 1.589177487727009e-05, "loss": 0.9434, "step": 4099 }, { "epoch": 3.2, "learning_rate": 1.588972856690108e-05, "loss": 0.8067, "step": 4100 }, { "epoch": 3.21, "learning_rate": 1.5887681878845565e-05, "loss": 0.7708, "step": 4101 }, { "epoch": 3.21, "learning_rate": 1.588563481323479e-05, "loss": 0.8432, "step": 4102 }, { "epoch": 3.21, "learning_rate": 1.5883587370200028e-05, "loss": 1.0002, "step": 4103 }, { "epoch": 3.21, "learning_rate": 1.5881539549872566e-05, "loss": 0.8919, "step": 4104 }, { "epoch": 3.21, "learning_rate": 1.5879491352383732e-05, "loss": 1.0695, "step": 4105 }, { "epoch": 3.21, "learning_rate": 1.5877442777864863e-05, "loss": 0.9568, "step": 4106 }, { "epoch": 3.21, "learning_rate": 1.587539382644733e-05, "loss": 0.9896, "step": 4107 }, { "epoch": 3.21, "learning_rate": 1.5873344498262523e-05, "loss": 0.9292, "step": 4108 }, { "epoch": 3.21, "learning_rate": 1.587129479344186e-05, "loss": 0.9368, "step": 4109 }, { "epoch": 3.21, "learning_rate": 1.5869244712116775e-05, "loss": 0.8403, "step": 4110 }, { "epoch": 3.21, "learning_rate": 1.5867194254418743e-05, "loss": 0.8355, "step": 4111 }, { "epoch": 3.21, "learning_rate": 1.5865143420479246e-05, "loss": 0.9875, "step": 4112 }, { "epoch": 3.21, "learning_rate": 1.5863092210429796e-05, "loss": 1.0309, "step": 4113 }, { "epoch": 3.22, "learning_rate": 1.5861040624401927e-05, "loss": 1.1882, "step": 4114 }, { "epoch": 3.22, "learning_rate": 1.58589886625272e-05, "loss": 0.8498, "step": 4115 }, { "epoch": 3.22, "learning_rate": 1.585693632493721e-05, "loss": 1.0827, "step": 4116 }, { "epoch": 3.22, "learning_rate": 1.5854883611763556e-05, "loss": 0.7408, "step": 4117 }, { "epoch": 3.22, "learning_rate": 1.585283052313787e-05, "loss": 0.9332, "step": 4118 }, { "epoch": 3.22, "learning_rate": 1.5850777059191812e-05, "loss": 0.8457, "step": 4119 }, { "epoch": 3.22, "learning_rate": 1.5848723220057065e-05, "loss": 0.4853, "step": 4120 }, { "epoch": 3.22, "learning_rate": 1.584666900586533e-05, "loss": 0.8524, "step": 4121 }, { "epoch": 3.22, "learning_rate": 1.584461441674834e-05, "loss": 0.7686, "step": 4122 }, { "epoch": 3.22, "learning_rate": 1.5842559452837848e-05, "loss": 0.9361, "step": 4123 }, { "epoch": 3.22, "learning_rate": 1.584050411426563e-05, "loss": 1.0146, "step": 4124 }, { "epoch": 3.22, "learning_rate": 1.5838448401163483e-05, "loss": 1.0849, "step": 4125 }, { "epoch": 3.22, "learning_rate": 1.5836392313663237e-05, "loss": 1.0467, "step": 4126 }, { "epoch": 3.23, "learning_rate": 1.583433585189674e-05, "loss": 0.9549, "step": 4127 }, { "epoch": 3.23, "learning_rate": 1.583227901599587e-05, "loss": 0.7676, "step": 4128 }, { "epoch": 3.23, "learning_rate": 1.5830221806092518e-05, "loss": 0.9269, "step": 4129 }, { "epoch": 3.23, "learning_rate": 1.582816422231861e-05, "loss": 0.8014, "step": 4130 }, { "epoch": 3.23, "learning_rate": 1.582610626480608e-05, "loss": 0.9119, "step": 4131 }, { "epoch": 3.23, "learning_rate": 1.5824047933686913e-05, "loss": 1.048, "step": 4132 }, { "epoch": 3.23, "learning_rate": 1.5821989229093097e-05, "loss": 0.9055, "step": 4133 }, { "epoch": 3.23, "learning_rate": 1.5819930151156644e-05, "loss": 0.7488, "step": 4134 }, { "epoch": 3.23, "learning_rate": 1.5817870700009598e-05, "loss": 0.6857, "step": 4135 }, { "epoch": 3.23, "learning_rate": 1.5815810875784023e-05, "loss": 0.9662, "step": 4136 }, { "epoch": 3.23, "learning_rate": 1.5813750678612014e-05, "loss": 0.92, "step": 4137 }, { "epoch": 3.23, "learning_rate": 1.581169010862567e-05, "loss": 0.9507, "step": 4138 }, { "epoch": 3.23, "learning_rate": 1.5809629165957144e-05, "loss": 0.9435, "step": 4139 }, { "epoch": 3.24, "learning_rate": 1.5807567850738585e-05, "loss": 0.8605, "step": 4140 }, { "epoch": 3.24, "learning_rate": 1.5805506163102187e-05, "loss": 0.8882, "step": 4141 }, { "epoch": 3.24, "learning_rate": 1.580344410318015e-05, "loss": 0.8134, "step": 4142 }, { "epoch": 3.24, "learning_rate": 1.5801381671104708e-05, "loss": 1.0931, "step": 4143 }, { "epoch": 3.24, "learning_rate": 1.5799318867008118e-05, "loss": 0.9001, "step": 4144 }, { "epoch": 3.24, "learning_rate": 1.5797255691022664e-05, "loss": 0.9878, "step": 4145 }, { "epoch": 3.24, "learning_rate": 1.5795192143280643e-05, "loss": 0.9042, "step": 4146 }, { "epoch": 3.24, "learning_rate": 1.579312822391439e-05, "loss": 0.9311, "step": 4147 }, { "epoch": 3.24, "learning_rate": 1.579106393305625e-05, "loss": 1.0362, "step": 4148 }, { "epoch": 3.24, "learning_rate": 1.5788999270838603e-05, "loss": 1.0696, "step": 4149 }, { "epoch": 3.24, "learning_rate": 1.5786934237393844e-05, "loss": 0.9258, "step": 4150 }, { "epoch": 3.24, "learning_rate": 1.57848688328544e-05, "loss": 1.0629, "step": 4151 }, { "epoch": 3.25, "learning_rate": 1.5782803057352715e-05, "loss": 0.8689, "step": 4152 }, { "epoch": 3.25, "learning_rate": 1.578073691102126e-05, "loss": 0.9835, "step": 4153 }, { "epoch": 3.25, "learning_rate": 1.5778670393992527e-05, "loss": 1.0631, "step": 4154 }, { "epoch": 3.25, "learning_rate": 1.577660350639904e-05, "loss": 1.1396, "step": 4155 }, { "epoch": 3.25, "learning_rate": 1.5774536248373338e-05, "loss": 0.7642, "step": 4156 }, { "epoch": 3.25, "learning_rate": 1.5772468620047986e-05, "loss": 0.9147, "step": 4157 }, { "epoch": 3.25, "learning_rate": 1.577040062155557e-05, "loss": 1.1287, "step": 4158 }, { "epoch": 3.25, "learning_rate": 1.576833225302871e-05, "loss": 0.8367, "step": 4159 }, { "epoch": 3.25, "learning_rate": 1.576626351460004e-05, "loss": 1.1333, "step": 4160 }, { "epoch": 3.25, "learning_rate": 1.5764194406402218e-05, "loss": 0.8015, "step": 4161 }, { "epoch": 3.25, "learning_rate": 1.5762124928567934e-05, "loss": 0.6897, "step": 4162 }, { "epoch": 3.25, "learning_rate": 1.576005508122989e-05, "loss": 1.1121, "step": 4163 }, { "epoch": 3.25, "learning_rate": 1.5757984864520817e-05, "loss": 0.9756, "step": 4164 }, { "epoch": 3.26, "learning_rate": 1.5755914278573477e-05, "loss": 0.8513, "step": 4165 }, { "epoch": 3.26, "learning_rate": 1.5753843323520642e-05, "loss": 1.0903, "step": 4166 }, { "epoch": 3.26, "learning_rate": 1.575177199949512e-05, "loss": 0.7359, "step": 4167 }, { "epoch": 3.26, "learning_rate": 1.5749700306629734e-05, "loss": 0.7733, "step": 4168 }, { "epoch": 3.26, "learning_rate": 1.574762824505734e-05, "loss": 1.0222, "step": 4169 }, { "epoch": 3.26, "learning_rate": 1.5745555814910806e-05, "loss": 0.9662, "step": 4170 }, { "epoch": 3.26, "learning_rate": 1.5743483016323026e-05, "loss": 0.8883, "step": 4171 }, { "epoch": 3.26, "learning_rate": 1.5741409849426926e-05, "loss": 0.7909, "step": 4172 }, { "epoch": 3.26, "learning_rate": 1.573933631435545e-05, "loss": 0.6316, "step": 4173 }, { "epoch": 3.26, "learning_rate": 1.5737262411241566e-05, "loss": 1.0365, "step": 4174 }, { "epoch": 3.26, "learning_rate": 1.5735188140218266e-05, "loss": 0.7581, "step": 4175 }, { "epoch": 3.26, "learning_rate": 1.5733113501418564e-05, "loss": 0.9941, "step": 4176 }, { "epoch": 3.26, "learning_rate": 1.5731038494975503e-05, "loss": 0.7797, "step": 4177 }, { "epoch": 3.27, "learning_rate": 1.572896312102214e-05, "loss": 0.8264, "step": 4178 }, { "epoch": 3.27, "learning_rate": 1.5726887379691563e-05, "loss": 1.003, "step": 4179 }, { "epoch": 3.27, "learning_rate": 1.5724811271116882e-05, "loss": 0.9567, "step": 4180 }, { "epoch": 3.27, "learning_rate": 1.572273479543123e-05, "loss": 0.929, "step": 4181 }, { "epoch": 3.27, "learning_rate": 1.5720657952767764e-05, "loss": 0.707, "step": 4182 }, { "epoch": 3.27, "learning_rate": 1.5718580743259665e-05, "loss": 1.1553, "step": 4183 }, { "epoch": 3.27, "learning_rate": 1.5716503167040134e-05, "loss": 0.8791, "step": 4184 }, { "epoch": 3.27, "learning_rate": 1.5714425224242403e-05, "loss": 0.9531, "step": 4185 }, { "epoch": 3.27, "learning_rate": 1.5712346914999716e-05, "loss": 0.9598, "step": 4186 }, { "epoch": 3.27, "learning_rate": 1.5710268239445352e-05, "loss": 0.7061, "step": 4187 }, { "epoch": 3.27, "learning_rate": 1.570818919771261e-05, "loss": 1.0587, "step": 4188 }, { "epoch": 3.27, "learning_rate": 1.570610978993481e-05, "loss": 0.8447, "step": 4189 }, { "epoch": 3.27, "learning_rate": 1.5704030016245292e-05, "loss": 0.8743, "step": 4190 }, { "epoch": 3.28, "learning_rate": 1.5701949876777428e-05, "loss": 1.0403, "step": 4191 }, { "epoch": 3.28, "learning_rate": 1.5699869371664614e-05, "loss": 0.925, "step": 4192 }, { "epoch": 3.28, "learning_rate": 1.5697788501040257e-05, "loss": 0.8129, "step": 4193 }, { "epoch": 3.28, "learning_rate": 1.56957072650378e-05, "loss": 0.6656, "step": 4194 }, { "epoch": 3.28, "learning_rate": 1.5693625663790708e-05, "loss": 0.7995, "step": 4195 }, { "epoch": 3.28, "learning_rate": 1.569154369743246e-05, "loss": 0.9122, "step": 4196 }, { "epoch": 3.28, "learning_rate": 1.5689461366096567e-05, "loss": 1.1943, "step": 4197 }, { "epoch": 3.28, "learning_rate": 1.568737866991656e-05, "loss": 0.9537, "step": 4198 }, { "epoch": 3.28, "learning_rate": 1.5685295609026e-05, "loss": 1.26, "step": 4199 }, { "epoch": 3.28, "learning_rate": 1.568321218355846e-05, "loss": 1.0569, "step": 4200 }, { "epoch": 3.28, "learning_rate": 1.5681128393647545e-05, "loss": 0.8869, "step": 4201 }, { "epoch": 3.28, "learning_rate": 1.5679044239426887e-05, "loss": 0.7056, "step": 4202 }, { "epoch": 3.28, "learning_rate": 1.5676959721030122e-05, "loss": 0.8422, "step": 4203 }, { "epoch": 3.29, "learning_rate": 1.567487483859093e-05, "loss": 0.719, "step": 4204 }, { "epoch": 3.29, "learning_rate": 1.5672789592243004e-05, "loss": 0.9079, "step": 4205 }, { "epoch": 3.29, "learning_rate": 1.5670703982120066e-05, "loss": 0.9252, "step": 4206 }, { "epoch": 3.29, "learning_rate": 1.566861800835586e-05, "loss": 0.6925, "step": 4207 }, { "epoch": 3.29, "learning_rate": 1.5666531671084144e-05, "loss": 0.9694, "step": 4208 }, { "epoch": 3.29, "learning_rate": 1.5664444970438718e-05, "loss": 0.8313, "step": 4209 }, { "epoch": 3.29, "learning_rate": 1.5662357906553383e-05, "loss": 0.9277, "step": 4210 }, { "epoch": 3.29, "learning_rate": 1.5660270479561985e-05, "loss": 0.7628, "step": 4211 }, { "epoch": 3.29, "learning_rate": 1.5658182689598375e-05, "loss": 0.8473, "step": 4212 }, { "epoch": 3.29, "learning_rate": 1.5656094536796442e-05, "loss": 0.8709, "step": 4213 }, { "epoch": 3.29, "learning_rate": 1.5654006021290088e-05, "loss": 0.8226, "step": 4214 }, { "epoch": 3.29, "learning_rate": 1.565191714321324e-05, "loss": 1.0019, "step": 4215 }, { "epoch": 3.3, "learning_rate": 1.5649827902699852e-05, "loss": 1.0015, "step": 4216 }, { "epoch": 3.3, "learning_rate": 1.56477382998839e-05, "loss": 0.9276, "step": 4217 }, { "epoch": 3.3, "learning_rate": 1.5645648334899378e-05, "loss": 0.8595, "step": 4218 }, { "epoch": 3.3, "learning_rate": 1.5643558007880316e-05, "loss": 1.1613, "step": 4219 }, { "epoch": 3.3, "learning_rate": 1.564146731896075e-05, "loss": 1.0166, "step": 4220 }, { "epoch": 3.3, "learning_rate": 1.5639376268274754e-05, "loss": 0.9646, "step": 4221 }, { "epoch": 3.3, "learning_rate": 1.5637284855956422e-05, "loss": 0.9227, "step": 4222 }, { "epoch": 3.3, "learning_rate": 1.5635193082139858e-05, "loss": 0.8563, "step": 4223 }, { "epoch": 3.3, "learning_rate": 1.563310094695921e-05, "loss": 1.0196, "step": 4224 }, { "epoch": 3.3, "learning_rate": 1.5631008450548634e-05, "loss": 0.7964, "step": 4225 }, { "epoch": 3.3, "learning_rate": 1.5628915593042315e-05, "loss": 0.684, "step": 4226 }, { "epoch": 3.3, "learning_rate": 1.5626822374574464e-05, "loss": 0.9213, "step": 4227 }, { "epoch": 3.3, "learning_rate": 1.56247287952793e-05, "loss": 1.0616, "step": 4228 }, { "epoch": 3.31, "learning_rate": 1.562263485529109e-05, "loss": 0.7831, "step": 4229 }, { "epoch": 3.31, "learning_rate": 1.5620540554744103e-05, "loss": 0.9252, "step": 4230 }, { "epoch": 3.31, "learning_rate": 1.561844589377264e-05, "loss": 0.5835, "step": 4231 }, { "epoch": 3.31, "learning_rate": 1.5616350872511022e-05, "loss": 0.8359, "step": 4232 }, { "epoch": 3.31, "learning_rate": 1.5614255491093603e-05, "loss": 0.9914, "step": 4233 }, { "epoch": 3.31, "learning_rate": 1.5612159749654744e-05, "loss": 0.9584, "step": 4234 }, { "epoch": 3.31, "learning_rate": 1.5610063648328837e-05, "loss": 1.0681, "step": 4235 }, { "epoch": 3.31, "learning_rate": 1.56079671872503e-05, "loss": 1.0282, "step": 4236 }, { "epoch": 3.31, "learning_rate": 1.5605870366553576e-05, "loss": 0.8602, "step": 4237 }, { "epoch": 3.31, "learning_rate": 1.5603773186373114e-05, "loss": 0.9995, "step": 4238 }, { "epoch": 3.31, "learning_rate": 1.560167564684341e-05, "loss": 0.783, "step": 4239 }, { "epoch": 3.31, "learning_rate": 1.5599577748098966e-05, "loss": 0.9595, "step": 4240 }, { "epoch": 3.31, "learning_rate": 1.5597479490274315e-05, "loss": 0.9568, "step": 4241 }, { "epoch": 3.32, "learning_rate": 1.5595380873504013e-05, "loss": 0.8221, "step": 4242 }, { "epoch": 3.32, "learning_rate": 1.5593281897922622e-05, "loss": 0.9711, "step": 4243 }, { "epoch": 3.32, "learning_rate": 1.559118256366476e-05, "loss": 0.94, "step": 4244 }, { "epoch": 3.32, "learning_rate": 1.5589082870865043e-05, "loss": 0.8304, "step": 4245 }, { "epoch": 3.32, "learning_rate": 1.558698281965811e-05, "loss": 1.2592, "step": 4246 }, { "epoch": 3.32, "learning_rate": 1.558488241017864e-05, "loss": 0.8098, "step": 4247 }, { "epoch": 3.32, "learning_rate": 1.5582781642561315e-05, "loss": 0.9245, "step": 4248 }, { "epoch": 3.32, "learning_rate": 1.5580680516940857e-05, "loss": 0.9633, "step": 4249 }, { "epoch": 3.32, "learning_rate": 1.5578579033452e-05, "loss": 0.7849, "step": 4250 }, { "epoch": 3.32, "learning_rate": 1.55764771922295e-05, "loss": 0.8664, "step": 4251 }, { "epoch": 3.32, "learning_rate": 1.557437499340815e-05, "loss": 1.1113, "step": 4252 }, { "epoch": 3.32, "learning_rate": 1.557227243712275e-05, "loss": 1.0616, "step": 4253 }, { "epoch": 3.32, "learning_rate": 1.557016952350813e-05, "loss": 1.1235, "step": 4254 }, { "epoch": 3.33, "learning_rate": 1.5568066252699142e-05, "loss": 0.8928, "step": 4255 }, { "epoch": 3.33, "learning_rate": 1.556596262483066e-05, "loss": 1.0346, "step": 4256 }, { "epoch": 3.33, "learning_rate": 1.5563858640037587e-05, "loss": 1.0171, "step": 4257 }, { "epoch": 3.33, "learning_rate": 1.5561754298454835e-05, "loss": 1.1102, "step": 4258 }, { "epoch": 3.33, "learning_rate": 1.5559649600217355e-05, "loss": 0.977, "step": 4259 }, { "epoch": 3.33, "learning_rate": 1.555754454546011e-05, "loss": 1.0085, "step": 4260 }, { "epoch": 3.33, "learning_rate": 1.5555439134318094e-05, "loss": 0.9362, "step": 4261 }, { "epoch": 3.33, "learning_rate": 1.5553333366926313e-05, "loss": 1.1695, "step": 4262 }, { "epoch": 3.33, "learning_rate": 1.5551227243419805e-05, "loss": 1.154, "step": 4263 }, { "epoch": 3.33, "learning_rate": 1.554912076393363e-05, "loss": 1.0883, "step": 4264 }, { "epoch": 3.33, "learning_rate": 1.5547013928602865e-05, "loss": 1.0087, "step": 4265 }, { "epoch": 3.33, "learning_rate": 1.5544906737562613e-05, "loss": 0.9122, "step": 4266 }, { "epoch": 3.33, "learning_rate": 1.5542799190948003e-05, "loss": 0.824, "step": 4267 }, { "epoch": 3.34, "learning_rate": 1.5540691288894184e-05, "loss": 1.1439, "step": 4268 }, { "epoch": 3.34, "learning_rate": 1.5538583031536328e-05, "loss": 0.8937, "step": 4269 }, { "epoch": 3.34, "learning_rate": 1.553647441900963e-05, "loss": 1.0595, "step": 4270 }, { "epoch": 3.34, "learning_rate": 1.553436545144931e-05, "loss": 0.8371, "step": 4271 }, { "epoch": 3.34, "learning_rate": 1.55322561289906e-05, "loss": 1.0175, "step": 4272 }, { "epoch": 3.34, "learning_rate": 1.5530146451768768e-05, "loss": 1.0306, "step": 4273 }, { "epoch": 3.34, "learning_rate": 1.55280364199191e-05, "loss": 1.1515, "step": 4274 }, { "epoch": 3.34, "learning_rate": 1.5525926033576902e-05, "loss": 0.9355, "step": 4275 }, { "epoch": 3.34, "learning_rate": 1.5523815292877515e-05, "loss": 0.9609, "step": 4276 }, { "epoch": 3.34, "learning_rate": 1.552170419795628e-05, "loss": 0.9468, "step": 4277 }, { "epoch": 3.34, "learning_rate": 1.5519592748948583e-05, "loss": 0.8953, "step": 4278 }, { "epoch": 3.34, "learning_rate": 1.5517480945989815e-05, "loss": 1.1589, "step": 4279 }, { "epoch": 3.35, "learning_rate": 1.5515368789215406e-05, "loss": 0.8843, "step": 4280 }, { "epoch": 3.35, "learning_rate": 1.55132562787608e-05, "loss": 0.9328, "step": 4281 }, { "epoch": 3.35, "learning_rate": 1.551114341476146e-05, "loss": 0.9239, "step": 4282 }, { "epoch": 3.35, "learning_rate": 1.550903019735288e-05, "loss": 0.6302, "step": 4283 }, { "epoch": 3.35, "learning_rate": 1.550691662667057e-05, "loss": 0.947, "step": 4284 }, { "epoch": 3.35, "learning_rate": 1.5504802702850064e-05, "loss": 1.0106, "step": 4285 }, { "epoch": 3.35, "learning_rate": 1.5502688426026924e-05, "loss": 1.2342, "step": 4286 }, { "epoch": 3.35, "learning_rate": 1.5500573796336733e-05, "loss": 1.2781, "step": 4287 }, { "epoch": 3.35, "learning_rate": 1.5498458813915086e-05, "loss": 0.9988, "step": 4288 }, { "epoch": 3.35, "learning_rate": 1.549634347889762e-05, "loss": 0.8959, "step": 4289 }, { "epoch": 3.35, "learning_rate": 1.549422779141997e-05, "loss": 0.7759, "step": 4290 }, { "epoch": 3.35, "learning_rate": 1.5492111751617817e-05, "loss": 0.7681, "step": 4291 }, { "epoch": 3.35, "learning_rate": 1.5489995359626854e-05, "loss": 0.7234, "step": 4292 }, { "epoch": 3.36, "learning_rate": 1.5487878615582798e-05, "loss": 1.1627, "step": 4293 }, { "epoch": 3.36, "learning_rate": 1.5485761519621384e-05, "loss": 0.876, "step": 4294 }, { "epoch": 3.36, "learning_rate": 1.5483644071878375e-05, "loss": 0.842, "step": 4295 }, { "epoch": 3.36, "learning_rate": 1.5481526272489556e-05, "loss": 1.2447, "step": 4296 }, { "epoch": 3.36, "learning_rate": 1.5479408121590734e-05, "loss": 0.9127, "step": 4297 }, { "epoch": 3.36, "learning_rate": 1.5477289619317736e-05, "loss": 0.6632, "step": 4298 }, { "epoch": 3.36, "learning_rate": 1.5475170765806416e-05, "loss": 1.0634, "step": 4299 }, { "epoch": 3.36, "learning_rate": 1.5473051561192645e-05, "loss": 0.9721, "step": 4300 }, { "epoch": 3.36, "learning_rate": 1.5470932005612324e-05, "loss": 0.8247, "step": 4301 }, { "epoch": 3.36, "learning_rate": 1.5468812099201368e-05, "loss": 0.8541, "step": 4302 }, { "epoch": 3.36, "learning_rate": 1.5466691842095722e-05, "loss": 0.6766, "step": 4303 }, { "epoch": 3.36, "learning_rate": 1.5464571234431352e-05, "loss": 0.9513, "step": 4304 }, { "epoch": 3.36, "learning_rate": 1.546245027634424e-05, "loss": 1.0514, "step": 4305 }, { "epoch": 3.37, "learning_rate": 1.54603289679704e-05, "loss": 0.7817, "step": 4306 }, { "epoch": 3.37, "learning_rate": 1.545820730944586e-05, "loss": 0.9532, "step": 4307 }, { "epoch": 3.37, "learning_rate": 1.545608530090667e-05, "loss": 0.9529, "step": 4308 }, { "epoch": 3.37, "learning_rate": 1.5453962942488913e-05, "loss": 0.9618, "step": 4309 }, { "epoch": 3.37, "learning_rate": 1.5451840234328693e-05, "loss": 1.1347, "step": 4310 }, { "epoch": 3.37, "learning_rate": 1.544971717656212e-05, "loss": 0.6412, "step": 4311 }, { "epoch": 3.37, "learning_rate": 1.5447593769325344e-05, "loss": 0.9338, "step": 4312 }, { "epoch": 3.37, "learning_rate": 1.5445470012754528e-05, "loss": 1.2485, "step": 4313 }, { "epoch": 3.37, "learning_rate": 1.5443345906985863e-05, "loss": 0.9826, "step": 4314 }, { "epoch": 3.37, "learning_rate": 1.5441221452155563e-05, "loss": 0.8922, "step": 4315 }, { "epoch": 3.37, "learning_rate": 1.5439096648399857e-05, "loss": 0.7949, "step": 4316 }, { "epoch": 3.37, "learning_rate": 1.5436971495855e-05, "loss": 0.9545, "step": 4317 }, { "epoch": 3.37, "learning_rate": 1.543484599465727e-05, "loss": 1.0119, "step": 4318 }, { "epoch": 3.38, "learning_rate": 1.5432720144942974e-05, "loss": 1.0614, "step": 4319 }, { "epoch": 3.38, "learning_rate": 1.543059394684843e-05, "loss": 1.1581, "step": 4320 }, { "epoch": 3.38, "learning_rate": 1.5428467400509984e-05, "loss": 0.8822, "step": 4321 }, { "epoch": 3.38, "learning_rate": 1.5426340506064003e-05, "loss": 0.6682, "step": 4322 }, { "epoch": 3.38, "learning_rate": 1.5424213263646875e-05, "loss": 0.9155, "step": 4323 }, { "epoch": 3.38, "learning_rate": 1.5422085673395016e-05, "loss": 1.0001, "step": 4324 }, { "epoch": 3.38, "learning_rate": 1.5419957735444858e-05, "loss": 0.734, "step": 4325 }, { "epoch": 3.38, "learning_rate": 1.5417829449932862e-05, "loss": 1.3136, "step": 4326 }, { "epoch": 3.38, "learning_rate": 1.54157008169955e-05, "loss": 1.0552, "step": 4327 }, { "epoch": 3.38, "learning_rate": 1.541357183676928e-05, "loss": 1.113, "step": 4328 }, { "epoch": 3.38, "learning_rate": 1.541144250939072e-05, "loss": 1.1769, "step": 4329 }, { "epoch": 3.38, "learning_rate": 1.5409312834996373e-05, "loss": 0.9706, "step": 4330 }, { "epoch": 3.38, "learning_rate": 1.54071828137228e-05, "loss": 0.9494, "step": 4331 }, { "epoch": 3.39, "learning_rate": 1.5405052445706593e-05, "loss": 0.987, "step": 4332 }, { "epoch": 3.39, "learning_rate": 1.5402921731084368e-05, "loss": 1.0576, "step": 4333 }, { "epoch": 3.39, "learning_rate": 1.5400790669992757e-05, "loss": 0.9918, "step": 4334 }, { "epoch": 3.39, "learning_rate": 1.5398659262568416e-05, "loss": 0.9039, "step": 4335 }, { "epoch": 3.39, "learning_rate": 1.5396527508948028e-05, "loss": 0.8824, "step": 4336 }, { "epoch": 3.39, "learning_rate": 1.539439540926829e-05, "loss": 0.8215, "step": 4337 }, { "epoch": 3.39, "learning_rate": 1.5392262963665933e-05, "loss": 0.8664, "step": 4338 }, { "epoch": 3.39, "learning_rate": 1.5390130172277695e-05, "loss": 1.0624, "step": 4339 }, { "epoch": 3.39, "learning_rate": 1.5387997035240342e-05, "loss": 1.0065, "step": 4340 }, { "epoch": 3.39, "learning_rate": 1.5385863552690675e-05, "loss": 0.8436, "step": 4341 }, { "epoch": 3.39, "learning_rate": 1.5383729724765502e-05, "loss": 0.9634, "step": 4342 }, { "epoch": 3.39, "learning_rate": 1.538159555160165e-05, "loss": 1.1487, "step": 4343 }, { "epoch": 3.4, "learning_rate": 1.537946103333599e-05, "loss": 1.0637, "step": 4344 }, { "epoch": 3.4, "learning_rate": 1.5377326170105385e-05, "loss": 0.8417, "step": 4345 }, { "epoch": 3.4, "learning_rate": 1.5375190962046748e-05, "loss": 1.0321, "step": 4346 }, { "epoch": 3.4, "learning_rate": 1.5373055409296993e-05, "loss": 0.9975, "step": 4347 }, { "epoch": 3.4, "learning_rate": 1.537091951199307e-05, "loss": 0.8327, "step": 4348 }, { "epoch": 3.4, "learning_rate": 1.5368783270271948e-05, "loss": 1.0139, "step": 4349 }, { "epoch": 3.4, "learning_rate": 1.5366646684270615e-05, "loss": 0.7894, "step": 4350 }, { "epoch": 3.4, "learning_rate": 1.5364509754126077e-05, "loss": 0.886, "step": 4351 }, { "epoch": 3.4, "learning_rate": 1.5362372479975377e-05, "loss": 1.02, "step": 4352 }, { "epoch": 3.4, "learning_rate": 1.536023486195556e-05, "loss": 0.955, "step": 4353 }, { "epoch": 3.4, "learning_rate": 1.5358096900203713e-05, "loss": 0.9096, "step": 4354 }, { "epoch": 3.4, "learning_rate": 1.5355958594856926e-05, "loss": 1.0519, "step": 4355 }, { "epoch": 3.4, "learning_rate": 1.535381994605233e-05, "loss": 0.8492, "step": 4356 }, { "epoch": 3.41, "learning_rate": 1.5351680953927058e-05, "loss": 0.8659, "step": 4357 }, { "epoch": 3.41, "learning_rate": 1.534954161861829e-05, "loss": 1.1517, "step": 4358 }, { "epoch": 3.41, "learning_rate": 1.53474019402632e-05, "loss": 1.0528, "step": 4359 }, { "epoch": 3.41, "learning_rate": 1.534526191899901e-05, "loss": 0.8693, "step": 4360 }, { "epoch": 3.41, "learning_rate": 1.5343121554962942e-05, "loss": 0.7926, "step": 4361 }, { "epoch": 3.41, "learning_rate": 1.534098084829225e-05, "loss": 0.7821, "step": 4362 }, { "epoch": 3.41, "learning_rate": 1.5338839799124214e-05, "loss": 0.9282, "step": 4363 }, { "epoch": 3.41, "learning_rate": 1.5336698407596133e-05, "loss": 1.0969, "step": 4364 }, { "epoch": 3.41, "learning_rate": 1.533455667384532e-05, "loss": 0.8886, "step": 4365 }, { "epoch": 3.41, "learning_rate": 1.533241459800912e-05, "loss": 0.8326, "step": 4366 }, { "epoch": 3.41, "learning_rate": 1.53302721802249e-05, "loss": 1.1704, "step": 4367 }, { "epoch": 3.41, "learning_rate": 1.5328129420630035e-05, "loss": 0.9234, "step": 4368 }, { "epoch": 3.41, "learning_rate": 1.5325986319361942e-05, "loss": 0.9919, "step": 4369 }, { "epoch": 3.42, "learning_rate": 1.532384287655805e-05, "loss": 1.1076, "step": 4370 }, { "epoch": 3.42, "learning_rate": 1.5321699092355804e-05, "loss": 1.0836, "step": 4371 }, { "epoch": 3.42, "learning_rate": 1.5319554966892682e-05, "loss": 0.9754, "step": 4372 }, { "epoch": 3.42, "learning_rate": 1.5317410500306174e-05, "loss": 0.925, "step": 4373 }, { "epoch": 3.42, "learning_rate": 1.53152656927338e-05, "loss": 0.9169, "step": 4374 }, { "epoch": 3.42, "learning_rate": 1.53131205443131e-05, "loss": 0.8636, "step": 4375 }, { "epoch": 3.42, "learning_rate": 1.5310975055181633e-05, "loss": 0.6827, "step": 4376 }, { "epoch": 3.42, "learning_rate": 1.530882922547698e-05, "loss": 1.0673, "step": 4377 }, { "epoch": 3.42, "learning_rate": 1.5306683055336748e-05, "loss": 1.0558, "step": 4378 }, { "epoch": 3.42, "learning_rate": 1.530453654489856e-05, "loss": 1.0324, "step": 4379 }, { "epoch": 3.42, "learning_rate": 1.530238969430006e-05, "loss": 0.858, "step": 4380 }, { "epoch": 3.42, "learning_rate": 1.5300242503678928e-05, "loss": 0.8662, "step": 4381 }, { "epoch": 3.42, "learning_rate": 1.5298094973172845e-05, "loss": 0.9648, "step": 4382 }, { "epoch": 3.43, "learning_rate": 1.529594710291953e-05, "loss": 1.1044, "step": 4383 }, { "epoch": 3.43, "learning_rate": 1.529379889305672e-05, "loss": 0.8813, "step": 4384 }, { "epoch": 3.43, "learning_rate": 1.5291650343722166e-05, "loss": 1.049, "step": 4385 }, { "epoch": 3.43, "learning_rate": 1.5289501455053647e-05, "loss": 0.9363, "step": 4386 }, { "epoch": 3.43, "learning_rate": 1.528735222718897e-05, "loss": 0.967, "step": 4387 }, { "epoch": 3.43, "learning_rate": 1.528520266026595e-05, "loss": 0.9085, "step": 4388 }, { "epoch": 3.43, "learning_rate": 1.5283052754422435e-05, "loss": 0.8429, "step": 4389 }, { "epoch": 3.43, "learning_rate": 1.5280902509796287e-05, "loss": 0.8811, "step": 4390 }, { "epoch": 3.43, "learning_rate": 1.5278751926525393e-05, "loss": 0.9506, "step": 4391 }, { "epoch": 3.43, "learning_rate": 1.527660100474767e-05, "loss": 0.8765, "step": 4392 }, { "epoch": 3.43, "learning_rate": 1.5274449744601034e-05, "loss": 0.9989, "step": 4393 }, { "epoch": 3.43, "learning_rate": 1.5272298146223454e-05, "loss": 1.0099, "step": 4394 }, { "epoch": 3.43, "learning_rate": 1.527014620975289e-05, "loss": 0.826, "step": 4395 }, { "epoch": 3.44, "learning_rate": 1.526799393532735e-05, "loss": 0.8904, "step": 4396 }, { "epoch": 3.44, "learning_rate": 1.526584132308484e-05, "loss": 0.8492, "step": 4397 }, { "epoch": 3.44, "learning_rate": 1.5263688373163406e-05, "loss": 0.8003, "step": 4398 }, { "epoch": 3.44, "learning_rate": 1.5261535085701105e-05, "loss": 0.7964, "step": 4399 }, { "epoch": 3.44, "learning_rate": 1.5259381460836023e-05, "loss": 0.811, "step": 4400 }, { "epoch": 3.44, "learning_rate": 1.5257227498706263e-05, "loss": 0.7503, "step": 4401 }, { "epoch": 3.44, "learning_rate": 1.5255073199449949e-05, "loss": 0.813, "step": 4402 }, { "epoch": 3.44, "learning_rate": 1.5252918563205227e-05, "loss": 1.0458, "step": 4403 }, { "epoch": 3.44, "learning_rate": 1.5250763590110272e-05, "loss": 0.7641, "step": 4404 }, { "epoch": 3.44, "learning_rate": 1.5248608280303265e-05, "loss": 0.9733, "step": 4405 }, { "epoch": 3.44, "learning_rate": 1.5246452633922428e-05, "loss": 0.8379, "step": 4406 }, { "epoch": 3.44, "learning_rate": 1.5244296651105988e-05, "loss": 0.9547, "step": 4407 }, { "epoch": 3.45, "learning_rate": 1.5242140331992206e-05, "loss": 0.9344, "step": 4408 }, { "epoch": 3.45, "learning_rate": 1.523998367671935e-05, "loss": 0.9354, "step": 4409 }, { "epoch": 3.45, "learning_rate": 1.5237826685425723e-05, "loss": 0.927, "step": 4410 }, { "epoch": 3.45, "learning_rate": 1.523566935824965e-05, "loss": 1.1631, "step": 4411 }, { "epoch": 3.45, "learning_rate": 1.5233511695329464e-05, "loss": 0.8917, "step": 4412 }, { "epoch": 3.45, "learning_rate": 1.5231353696803535e-05, "loss": 0.8144, "step": 4413 }, { "epoch": 3.45, "learning_rate": 1.5229195362810238e-05, "loss": 0.903, "step": 4414 }, { "epoch": 3.45, "learning_rate": 1.522703669348799e-05, "loss": 0.7022, "step": 4415 }, { "epoch": 3.45, "learning_rate": 1.5224877688975213e-05, "loss": 0.9645, "step": 4416 }, { "epoch": 3.45, "learning_rate": 1.5222718349410354e-05, "loss": 1.1399, "step": 4417 }, { "epoch": 3.45, "learning_rate": 1.522055867493189e-05, "loss": 0.9262, "step": 4418 }, { "epoch": 3.45, "learning_rate": 1.5218398665678307e-05, "loss": 0.7735, "step": 4419 }, { "epoch": 3.45, "learning_rate": 1.5216238321788117e-05, "loss": 1.0342, "step": 4420 }, { "epoch": 3.46, "learning_rate": 1.5214077643399865e-05, "loss": 1.1013, "step": 4421 }, { "epoch": 3.46, "learning_rate": 1.5211916630652095e-05, "loss": 0.8822, "step": 4422 }, { "epoch": 3.46, "learning_rate": 1.5209755283683392e-05, "loss": 0.7874, "step": 4423 }, { "epoch": 3.46, "learning_rate": 1.5207593602632353e-05, "loss": 0.864, "step": 4424 }, { "epoch": 3.46, "learning_rate": 1.52054315876376e-05, "loss": 1.1555, "step": 4425 }, { "epoch": 3.46, "learning_rate": 1.5203269238837773e-05, "loss": 1.0856, "step": 4426 }, { "epoch": 3.46, "learning_rate": 1.5201106556371537e-05, "loss": 0.887, "step": 4427 }, { "epoch": 3.46, "learning_rate": 1.5198943540377573e-05, "loss": 0.9559, "step": 4428 }, { "epoch": 3.46, "learning_rate": 1.5196780190994598e-05, "loss": 0.8235, "step": 4429 }, { "epoch": 3.46, "learning_rate": 1.5194616508361323e-05, "loss": 0.8204, "step": 4430 }, { "epoch": 3.46, "learning_rate": 1.5192452492616511e-05, "loss": 1.1419, "step": 4431 }, { "epoch": 3.46, "learning_rate": 1.5190288143898927e-05, "loss": 1.1988, "step": 4432 }, { "epoch": 3.46, "learning_rate": 1.5188123462347367e-05, "loss": 0.8286, "step": 4433 }, { "epoch": 3.47, "learning_rate": 1.5185958448100633e-05, "loss": 0.9951, "step": 4434 }, { "epoch": 3.47, "learning_rate": 1.518379310129757e-05, "loss": 1.1141, "step": 4435 }, { "epoch": 3.47, "learning_rate": 1.5181627422077027e-05, "loss": 0.964, "step": 4436 }, { "epoch": 3.47, "learning_rate": 1.5179461410577889e-05, "loss": 0.9464, "step": 4437 }, { "epoch": 3.47, "learning_rate": 1.5177295066939048e-05, "loss": 1.0099, "step": 4438 }, { "epoch": 3.47, "learning_rate": 1.5175128391299421e-05, "loss": 0.9761, "step": 4439 }, { "epoch": 3.47, "learning_rate": 1.5172961383797957e-05, "loss": 0.8425, "step": 4440 }, { "epoch": 3.47, "learning_rate": 1.5170794044573614e-05, "loss": 0.9237, "step": 4441 }, { "epoch": 3.47, "learning_rate": 1.5168626373765375e-05, "loss": 1.1835, "step": 4442 }, { "epoch": 3.47, "learning_rate": 1.5166458371512245e-05, "loss": 0.984, "step": 4443 }, { "epoch": 3.47, "learning_rate": 1.5164290037953252e-05, "loss": 0.8642, "step": 4444 }, { "epoch": 3.47, "learning_rate": 1.5162121373227437e-05, "loss": 0.8446, "step": 4445 }, { "epoch": 3.47, "learning_rate": 1.5159952377473876e-05, "loss": 0.8937, "step": 4446 }, { "epoch": 3.48, "learning_rate": 1.5157783050831656e-05, "loss": 1.0677, "step": 4447 }, { "epoch": 3.48, "learning_rate": 1.5155613393439885e-05, "loss": 0.9596, "step": 4448 }, { "epoch": 3.48, "learning_rate": 1.51534434054377e-05, "loss": 0.9275, "step": 4449 }, { "epoch": 3.48, "learning_rate": 1.515127308696425e-05, "loss": 0.8955, "step": 4450 }, { "epoch": 3.48, "learning_rate": 1.5149102438158711e-05, "loss": 0.8523, "step": 4451 }, { "epoch": 3.48, "learning_rate": 1.5146931459160281e-05, "loss": 0.817, "step": 4452 }, { "epoch": 3.48, "learning_rate": 1.5144760150108174e-05, "loss": 1.0344, "step": 4453 }, { "epoch": 3.48, "learning_rate": 1.5142588511141629e-05, "loss": 1.0391, "step": 4454 }, { "epoch": 3.48, "learning_rate": 1.5140416542399905e-05, "loss": 0.9455, "step": 4455 }, { "epoch": 3.48, "learning_rate": 1.5138244244022281e-05, "loss": 0.9227, "step": 4456 }, { "epoch": 3.48, "learning_rate": 1.5136071616148062e-05, "loss": 0.9839, "step": 4457 }, { "epoch": 3.48, "learning_rate": 1.5133898658916568e-05, "loss": 0.7237, "step": 4458 }, { "epoch": 3.48, "learning_rate": 1.5131725372467143e-05, "loss": 0.6956, "step": 4459 }, { "epoch": 3.49, "learning_rate": 1.5129551756939154e-05, "loss": 0.956, "step": 4460 }, { "epoch": 3.49, "learning_rate": 1.5127377812471981e-05, "loss": 0.6097, "step": 4461 }, { "epoch": 3.49, "learning_rate": 1.5125203539205039e-05, "loss": 1.0075, "step": 4462 }, { "epoch": 3.49, "learning_rate": 1.5123028937277749e-05, "loss": 0.9328, "step": 4463 }, { "epoch": 3.49, "learning_rate": 1.5120854006829561e-05, "loss": 0.8819, "step": 4464 }, { "epoch": 3.49, "learning_rate": 1.511867874799995e-05, "loss": 1.2748, "step": 4465 }, { "epoch": 3.49, "learning_rate": 1.5116503160928406e-05, "loss": 0.9932, "step": 4466 }, { "epoch": 3.49, "learning_rate": 1.5114327245754439e-05, "loss": 0.7579, "step": 4467 }, { "epoch": 3.49, "learning_rate": 1.5112151002617581e-05, "loss": 0.6845, "step": 4468 }, { "epoch": 3.49, "learning_rate": 1.5109974431657395e-05, "loss": 0.8926, "step": 4469 }, { "epoch": 3.49, "learning_rate": 1.5107797533013444e-05, "loss": 0.8018, "step": 4470 }, { "epoch": 3.49, "learning_rate": 1.5105620306825335e-05, "loss": 0.7621, "step": 4471 }, { "epoch": 3.5, "learning_rate": 1.510344275323268e-05, "loss": 1.123, "step": 4472 }, { "epoch": 3.5, "learning_rate": 1.5101264872375118e-05, "loss": 1.0159, "step": 4473 }, { "epoch": 3.5, "learning_rate": 1.5099086664392309e-05, "loss": 0.9554, "step": 4474 }, { "epoch": 3.5, "learning_rate": 1.5096908129423937e-05, "loss": 0.9333, "step": 4475 }, { "epoch": 3.5, "learning_rate": 1.5094729267609696e-05, "loss": 0.9574, "step": 4476 }, { "epoch": 3.5, "learning_rate": 1.5092550079089315e-05, "loss": 0.9195, "step": 4477 }, { "epoch": 3.5, "learning_rate": 1.5090370564002533e-05, "loss": 0.9212, "step": 4478 }, { "epoch": 3.5, "learning_rate": 1.5088190722489116e-05, "loss": 0.7898, "step": 4479 }, { "epoch": 3.5, "learning_rate": 1.5086010554688848e-05, "loss": 1.1498, "step": 4480 }, { "epoch": 3.5, "learning_rate": 1.508383006074154e-05, "loss": 0.7723, "step": 4481 }, { "epoch": 3.5, "learning_rate": 1.508164924078701e-05, "loss": 1.1196, "step": 4482 }, { "epoch": 3.5, "learning_rate": 1.5079468094965113e-05, "loss": 0.8538, "step": 4483 }, { "epoch": 3.5, "learning_rate": 1.5077286623415716e-05, "loss": 0.8543, "step": 4484 }, { "epoch": 3.51, "learning_rate": 1.507510482627871e-05, "loss": 0.9029, "step": 4485 }, { "epoch": 3.51, "learning_rate": 1.5072922703694005e-05, "loss": 0.9847, "step": 4486 }, { "epoch": 3.51, "learning_rate": 1.5070740255801527e-05, "loss": 0.7931, "step": 4487 }, { "epoch": 3.51, "learning_rate": 1.5068557482741235e-05, "loss": 1.0897, "step": 4488 }, { "epoch": 3.51, "learning_rate": 1.5066374384653102e-05, "loss": 0.9209, "step": 4489 }, { "epoch": 3.51, "learning_rate": 1.5064190961677118e-05, "loss": 0.9969, "step": 4490 }, { "epoch": 3.51, "learning_rate": 1.50620072139533e-05, "loss": 1.0781, "step": 4491 }, { "epoch": 3.51, "learning_rate": 1.5059823141621688e-05, "loss": 1.1066, "step": 4492 }, { "epoch": 3.51, "learning_rate": 1.5057638744822327e-05, "loss": 0.8408, "step": 4493 }, { "epoch": 3.51, "learning_rate": 1.5055454023695304e-05, "loss": 0.9067, "step": 4494 }, { "epoch": 3.51, "learning_rate": 1.5053268978380718e-05, "loss": 1.1761, "step": 4495 }, { "epoch": 3.51, "learning_rate": 1.505108360901868e-05, "loss": 1.1207, "step": 4496 }, { "epoch": 3.51, "learning_rate": 1.5048897915749335e-05, "loss": 0.816, "step": 4497 }, { "epoch": 3.52, "learning_rate": 1.5046711898712844e-05, "loss": 1.0319, "step": 4498 }, { "epoch": 3.52, "learning_rate": 1.5044525558049383e-05, "loss": 0.9611, "step": 4499 }, { "epoch": 3.52, "learning_rate": 1.5042338893899162e-05, "loss": 1.0596, "step": 4500 }, { "epoch": 3.52, "learning_rate": 1.5040151906402396e-05, "loss": 0.781, "step": 4501 }, { "epoch": 3.52, "learning_rate": 1.5037964595699333e-05, "loss": 0.7473, "step": 4502 }, { "epoch": 3.52, "learning_rate": 1.5035776961930238e-05, "loss": 1.0636, "step": 4503 }, { "epoch": 3.52, "learning_rate": 1.503358900523539e-05, "loss": 0.8818, "step": 4504 }, { "epoch": 3.52, "learning_rate": 1.5031400725755098e-05, "loss": 1.1442, "step": 4505 }, { "epoch": 3.52, "learning_rate": 1.5029212123629693e-05, "loss": 0.9556, "step": 4506 }, { "epoch": 3.52, "learning_rate": 1.5027023198999513e-05, "loss": 0.6987, "step": 4507 }, { "epoch": 3.52, "learning_rate": 1.5024833952004931e-05, "loss": 0.9799, "step": 4508 }, { "epoch": 3.52, "learning_rate": 1.5022644382786333e-05, "loss": 0.9016, "step": 4509 }, { "epoch": 3.52, "learning_rate": 1.502045449148413e-05, "loss": 1.0276, "step": 4510 }, { "epoch": 3.53, "learning_rate": 1.5018264278238754e-05, "loss": 0.9683, "step": 4511 }, { "epoch": 3.53, "learning_rate": 1.501607374319065e-05, "loss": 0.9257, "step": 4512 }, { "epoch": 3.53, "learning_rate": 1.5013882886480291e-05, "loss": 1.0366, "step": 4513 }, { "epoch": 3.53, "learning_rate": 1.501169170824817e-05, "loss": 0.8687, "step": 4514 }, { "epoch": 3.53, "learning_rate": 1.5009500208634795e-05, "loss": 0.8018, "step": 4515 }, { "epoch": 3.53, "learning_rate": 1.5007308387780704e-05, "loss": 0.9949, "step": 4516 }, { "epoch": 3.53, "learning_rate": 1.500511624582645e-05, "loss": 1.209, "step": 4517 }, { "epoch": 3.53, "learning_rate": 1.5002923782912602e-05, "loss": 1.0425, "step": 4518 }, { "epoch": 3.53, "learning_rate": 1.5000730999179757e-05, "loss": 1.0702, "step": 4519 }, { "epoch": 3.53, "learning_rate": 1.4998537894768533e-05, "loss": 0.9971, "step": 4520 }, { "epoch": 3.53, "learning_rate": 1.499634446981956e-05, "loss": 1.0578, "step": 4521 }, { "epoch": 3.53, "learning_rate": 1.4994150724473498e-05, "loss": 0.9255, "step": 4522 }, { "epoch": 3.53, "learning_rate": 1.4991956658871027e-05, "loss": 0.8936, "step": 4523 }, { "epoch": 3.54, "learning_rate": 1.4989762273152838e-05, "loss": 0.8147, "step": 4524 }, { "epoch": 3.54, "learning_rate": 1.4987567567459652e-05, "loss": 1.009, "step": 4525 }, { "epoch": 3.54, "learning_rate": 1.4985372541932205e-05, "loss": 0.8021, "step": 4526 }, { "epoch": 3.54, "learning_rate": 1.4983177196711258e-05, "loss": 0.9437, "step": 4527 }, { "epoch": 3.54, "learning_rate": 1.4980981531937591e-05, "loss": 0.8439, "step": 4528 }, { "epoch": 3.54, "learning_rate": 1.4978785547752004e-05, "loss": 0.9238, "step": 4529 }, { "epoch": 3.54, "learning_rate": 1.4976589244295315e-05, "loss": 0.9092, "step": 4530 }, { "epoch": 3.54, "learning_rate": 1.4974392621708365e-05, "loss": 0.9896, "step": 4531 }, { "epoch": 3.54, "learning_rate": 1.4972195680132019e-05, "loss": 1.1656, "step": 4532 }, { "epoch": 3.54, "learning_rate": 1.4969998419707153e-05, "loss": 0.7417, "step": 4533 }, { "epoch": 3.54, "learning_rate": 1.4967800840574671e-05, "loss": 0.9363, "step": 4534 }, { "epoch": 3.54, "learning_rate": 1.4965602942875501e-05, "loss": 0.9791, "step": 4535 }, { "epoch": 3.55, "learning_rate": 1.4963404726750581e-05, "loss": 1.1127, "step": 4536 }, { "epoch": 3.55, "learning_rate": 1.4961206192340875e-05, "loss": 0.9255, "step": 4537 }, { "epoch": 3.55, "learning_rate": 1.4959007339787368e-05, "loss": 0.7315, "step": 4538 }, { "epoch": 3.55, "learning_rate": 1.4956808169231058e-05, "loss": 0.8593, "step": 4539 }, { "epoch": 3.55, "learning_rate": 1.4954608680812983e-05, "loss": 0.8629, "step": 4540 }, { "epoch": 3.55, "learning_rate": 1.4952408874674176e-05, "loss": 0.8137, "step": 4541 }, { "epoch": 3.55, "learning_rate": 1.4950208750955707e-05, "loss": 1.081, "step": 4542 }, { "epoch": 3.55, "learning_rate": 1.4948008309798663e-05, "loss": 0.9601, "step": 4543 }, { "epoch": 3.55, "learning_rate": 1.4945807551344146e-05, "loss": 1.2432, "step": 4544 }, { "epoch": 3.55, "learning_rate": 1.4943606475733289e-05, "loss": 0.9456, "step": 4545 }, { "epoch": 3.55, "learning_rate": 1.4941405083107232e-05, "loss": 0.8713, "step": 4546 }, { "epoch": 3.55, "learning_rate": 1.4939203373607145e-05, "loss": 0.9516, "step": 4547 }, { "epoch": 3.55, "learning_rate": 1.4937001347374218e-05, "loss": 1.0342, "step": 4548 }, { "epoch": 3.56, "learning_rate": 1.4934799004549653e-05, "loss": 1.0054, "step": 4549 }, { "epoch": 3.56, "learning_rate": 1.4932596345274686e-05, "loss": 0.9758, "step": 4550 }, { "epoch": 3.56, "learning_rate": 1.4930393369690557e-05, "loss": 0.7546, "step": 4551 }, { "epoch": 3.56, "learning_rate": 1.492819007793854e-05, "loss": 0.8837, "step": 4552 }, { "epoch": 3.56, "learning_rate": 1.492598647015992e-05, "loss": 0.7833, "step": 4553 }, { "epoch": 3.56, "learning_rate": 1.4923782546496015e-05, "loss": 0.9805, "step": 4554 }, { "epoch": 3.56, "learning_rate": 1.4921578307088145e-05, "loss": 1.1796, "step": 4555 }, { "epoch": 3.56, "learning_rate": 1.4919373752077661e-05, "loss": 1.1494, "step": 4556 }, { "epoch": 3.56, "learning_rate": 1.4917168881605936e-05, "loss": 0.9033, "step": 4557 }, { "epoch": 3.56, "learning_rate": 1.491496369581436e-05, "loss": 0.9645, "step": 4558 }, { "epoch": 3.56, "learning_rate": 1.4912758194844341e-05, "loss": 0.8516, "step": 4559 }, { "epoch": 3.56, "learning_rate": 1.4910552378837313e-05, "loss": 1.0391, "step": 4560 }, { "epoch": 3.56, "learning_rate": 1.4908346247934724e-05, "loss": 0.7616, "step": 4561 }, { "epoch": 3.57, "learning_rate": 1.4906139802278045e-05, "loss": 0.8956, "step": 4562 }, { "epoch": 3.57, "learning_rate": 1.4903933042008772e-05, "loss": 0.9397, "step": 4563 }, { "epoch": 3.57, "learning_rate": 1.4901725967268405e-05, "loss": 0.8125, "step": 4564 }, { "epoch": 3.57, "learning_rate": 1.4899518578198489e-05, "loss": 0.9113, "step": 4565 }, { "epoch": 3.57, "learning_rate": 1.489731087494057e-05, "loss": 0.9443, "step": 4566 }, { "epoch": 3.57, "learning_rate": 1.4895102857636217e-05, "loss": 0.9601, "step": 4567 }, { "epoch": 3.57, "learning_rate": 1.4892894526427024e-05, "loss": 1.1848, "step": 4568 }, { "epoch": 3.57, "learning_rate": 1.4890685881454608e-05, "loss": 0.9628, "step": 4569 }, { "epoch": 3.57, "learning_rate": 1.4888476922860591e-05, "loss": 1.0485, "step": 4570 }, { "epoch": 3.57, "learning_rate": 1.4886267650786636e-05, "loss": 0.8829, "step": 4571 }, { "epoch": 3.57, "learning_rate": 1.488405806537441e-05, "loss": 0.7041, "step": 4572 }, { "epoch": 3.57, "learning_rate": 1.4881848166765605e-05, "loss": 0.9156, "step": 4573 }, { "epoch": 3.57, "learning_rate": 1.4879637955101936e-05, "loss": 1.0099, "step": 4574 }, { "epoch": 3.58, "learning_rate": 1.4877427430525134e-05, "loss": 0.7996, "step": 4575 }, { "epoch": 3.58, "learning_rate": 1.4875216593176951e-05, "loss": 0.9947, "step": 4576 }, { "epoch": 3.58, "learning_rate": 1.4873005443199165e-05, "loss": 1.0072, "step": 4577 }, { "epoch": 3.58, "learning_rate": 1.487079398073356e-05, "loss": 0.934, "step": 4578 }, { "epoch": 3.58, "learning_rate": 1.4868582205921957e-05, "loss": 0.7805, "step": 4579 }, { "epoch": 3.58, "learning_rate": 1.4866370118906185e-05, "loss": 0.8758, "step": 4580 }, { "epoch": 3.58, "learning_rate": 1.4864157719828099e-05, "loss": 1.0778, "step": 4581 }, { "epoch": 3.58, "learning_rate": 1.4861945008829569e-05, "loss": 0.908, "step": 4582 }, { "epoch": 3.58, "learning_rate": 1.4859731986052493e-05, "loss": 0.9447, "step": 4583 }, { "epoch": 3.58, "learning_rate": 1.4857518651638776e-05, "loss": 0.6393, "step": 4584 }, { "epoch": 3.58, "learning_rate": 1.4855305005730358e-05, "loss": 1.0158, "step": 4585 }, { "epoch": 3.58, "learning_rate": 1.4853091048469191e-05, "loss": 1.0302, "step": 4586 }, { "epoch": 3.58, "learning_rate": 1.4850876779997245e-05, "loss": 0.969, "step": 4587 }, { "epoch": 3.59, "learning_rate": 1.4848662200456513e-05, "loss": 0.9249, "step": 4588 }, { "epoch": 3.59, "learning_rate": 1.4846447309989011e-05, "loss": 0.9471, "step": 4589 }, { "epoch": 3.59, "learning_rate": 1.4844232108736769e-05, "loss": 0.8875, "step": 4590 }, { "epoch": 3.59, "learning_rate": 1.4842016596841842e-05, "loss": 1.0525, "step": 4591 }, { "epoch": 3.59, "learning_rate": 1.4839800774446303e-05, "loss": 1.0749, "step": 4592 }, { "epoch": 3.59, "learning_rate": 1.4837584641692235e-05, "loss": 0.8818, "step": 4593 }, { "epoch": 3.59, "learning_rate": 1.4835368198721767e-05, "loss": 0.9755, "step": 4594 }, { "epoch": 3.59, "learning_rate": 1.4833151445677017e-05, "loss": 0.908, "step": 4595 }, { "epoch": 3.59, "learning_rate": 1.4830934382700143e-05, "loss": 0.9955, "step": 4596 }, { "epoch": 3.59, "learning_rate": 1.4828717009933317e-05, "loss": 0.8611, "step": 4597 }, { "epoch": 3.59, "learning_rate": 1.4826499327518731e-05, "loss": 1.077, "step": 4598 }, { "epoch": 3.59, "learning_rate": 1.4824281335598593e-05, "loss": 0.8997, "step": 4599 }, { "epoch": 3.6, "learning_rate": 1.4822063034315143e-05, "loss": 1.0723, "step": 4600 }, { "epoch": 3.6, "learning_rate": 1.4819844423810625e-05, "loss": 0.8335, "step": 4601 }, { "epoch": 3.6, "learning_rate": 1.4817625504227313e-05, "loss": 0.8385, "step": 4602 }, { "epoch": 3.6, "learning_rate": 1.4815406275707498e-05, "loss": 0.8762, "step": 4603 }, { "epoch": 3.6, "learning_rate": 1.481318673839349e-05, "loss": 1.0046, "step": 4604 }, { "epoch": 3.6, "learning_rate": 1.4810966892427618e-05, "loss": 0.7969, "step": 4605 }, { "epoch": 3.6, "learning_rate": 1.480874673795224e-05, "loss": 0.9643, "step": 4606 }, { "epoch": 3.6, "learning_rate": 1.4806526275109715e-05, "loss": 0.8182, "step": 4607 }, { "epoch": 3.6, "learning_rate": 1.4804305504042443e-05, "loss": 0.981, "step": 4608 }, { "epoch": 3.6, "learning_rate": 1.4802084424892829e-05, "loss": 0.8946, "step": 4609 }, { "epoch": 3.6, "learning_rate": 1.4799863037803307e-05, "loss": 0.8994, "step": 4610 }, { "epoch": 3.6, "learning_rate": 1.4797641342916318e-05, "loss": 0.9544, "step": 4611 }, { "epoch": 3.6, "learning_rate": 1.4795419340374334e-05, "loss": 0.8841, "step": 4612 }, { "epoch": 3.61, "learning_rate": 1.4793197030319852e-05, "loss": 0.8997, "step": 4613 }, { "epoch": 3.61, "learning_rate": 1.4790974412895373e-05, "loss": 0.9101, "step": 4614 }, { "epoch": 3.61, "learning_rate": 1.4788751488243425e-05, "loss": 0.9847, "step": 4615 }, { "epoch": 3.61, "learning_rate": 1.4786528256506555e-05, "loss": 1.0671, "step": 4616 }, { "epoch": 3.61, "learning_rate": 1.478430471782734e-05, "loss": 0.9195, "step": 4617 }, { "epoch": 3.61, "learning_rate": 1.4782080872348354e-05, "loss": 0.9648, "step": 4618 }, { "epoch": 3.61, "learning_rate": 1.4779856720212213e-05, "loss": 1.021, "step": 4619 }, { "epoch": 3.61, "learning_rate": 1.4777632261561543e-05, "loss": 0.7982, "step": 4620 }, { "epoch": 3.61, "learning_rate": 1.4775407496538988e-05, "loss": 1.1749, "step": 4621 }, { "epoch": 3.61, "learning_rate": 1.4773182425287211e-05, "loss": 0.8325, "step": 4622 }, { "epoch": 3.61, "learning_rate": 1.4770957047948904e-05, "loss": 0.8716, "step": 4623 }, { "epoch": 3.61, "learning_rate": 1.4768731364666767e-05, "loss": 1.0224, "step": 4624 }, { "epoch": 3.61, "learning_rate": 1.4766505375583528e-05, "loss": 1.0267, "step": 4625 }, { "epoch": 3.62, "learning_rate": 1.476427908084193e-05, "loss": 0.8854, "step": 4626 }, { "epoch": 3.62, "learning_rate": 1.4762052480584738e-05, "loss": 0.8719, "step": 4627 }, { "epoch": 3.62, "learning_rate": 1.4759825574954735e-05, "loss": 0.8719, "step": 4628 }, { "epoch": 3.62, "learning_rate": 1.4757598364094726e-05, "loss": 1.0526, "step": 4629 }, { "epoch": 3.62, "learning_rate": 1.4755370848147528e-05, "loss": 0.9784, "step": 4630 }, { "epoch": 3.62, "learning_rate": 1.4753143027255991e-05, "loss": 1.0102, "step": 4631 }, { "epoch": 3.62, "learning_rate": 1.4750914901562971e-05, "loss": 1.0713, "step": 4632 }, { "epoch": 3.62, "learning_rate": 1.4748686471211354e-05, "loss": 0.9258, "step": 4633 }, { "epoch": 3.62, "learning_rate": 1.474645773634404e-05, "loss": 0.8375, "step": 4634 }, { "epoch": 3.62, "learning_rate": 1.4744228697103947e-05, "loss": 1.035, "step": 4635 }, { "epoch": 3.62, "learning_rate": 1.4741999353634014e-05, "loss": 0.8351, "step": 4636 }, { "epoch": 3.62, "learning_rate": 1.4739769706077204e-05, "loss": 0.8312, "step": 4637 }, { "epoch": 3.62, "learning_rate": 1.4737539754576495e-05, "loss": 0.9621, "step": 4638 }, { "epoch": 3.63, "learning_rate": 1.4735309499274888e-05, "loss": 0.9163, "step": 4639 }, { "epoch": 3.63, "learning_rate": 1.4733078940315398e-05, "loss": 0.9634, "step": 4640 }, { "epoch": 3.63, "learning_rate": 1.473084807784106e-05, "loss": 1.135, "step": 4641 }, { "epoch": 3.63, "learning_rate": 1.4728616911994935e-05, "loss": 1.0504, "step": 4642 }, { "epoch": 3.63, "learning_rate": 1.4726385442920102e-05, "loss": 1.0141, "step": 4643 }, { "epoch": 3.63, "learning_rate": 1.4724153670759648e-05, "loss": 0.7651, "step": 4644 }, { "epoch": 3.63, "learning_rate": 1.4721921595656697e-05, "loss": 1.2465, "step": 4645 }, { "epoch": 3.63, "learning_rate": 1.4719689217754382e-05, "loss": 0.9242, "step": 4646 }, { "epoch": 3.63, "learning_rate": 1.4717456537195851e-05, "loss": 1.1231, "step": 4647 }, { "epoch": 3.63, "learning_rate": 1.4715223554124288e-05, "loss": 1.0944, "step": 4648 }, { "epoch": 3.63, "learning_rate": 1.4712990268682875e-05, "loss": 1.0375, "step": 4649 }, { "epoch": 3.63, "learning_rate": 1.4710756681014828e-05, "loss": 0.88, "step": 4650 }, { "epoch": 3.64, "learning_rate": 1.4708522791263385e-05, "loss": 0.8691, "step": 4651 }, { "epoch": 3.64, "learning_rate": 1.470628859957179e-05, "loss": 0.8727, "step": 4652 }, { "epoch": 3.64, "learning_rate": 1.4704054106083312e-05, "loss": 0.989, "step": 4653 }, { "epoch": 3.64, "learning_rate": 1.470181931094125e-05, "loss": 0.9623, "step": 4654 }, { "epoch": 3.64, "learning_rate": 1.4699584214288902e-05, "loss": 0.7078, "step": 4655 }, { "epoch": 3.64, "learning_rate": 1.4697348816269605e-05, "loss": 0.8077, "step": 4656 }, { "epoch": 3.64, "learning_rate": 1.4695113117026705e-05, "loss": 1.1305, "step": 4657 }, { "epoch": 3.64, "learning_rate": 1.4692877116703566e-05, "loss": 0.939, "step": 4658 }, { "epoch": 3.64, "learning_rate": 1.4690640815443576e-05, "loss": 1.0815, "step": 4659 }, { "epoch": 3.64, "learning_rate": 1.4688404213390143e-05, "loss": 1.0401, "step": 4660 }, { "epoch": 3.64, "learning_rate": 1.4686167310686687e-05, "loss": 0.7705, "step": 4661 }, { "epoch": 3.64, "learning_rate": 1.4683930107476659e-05, "loss": 0.9559, "step": 4662 }, { "epoch": 3.64, "learning_rate": 1.4681692603903517e-05, "loss": 0.9535, "step": 4663 }, { "epoch": 3.65, "learning_rate": 1.4679454800110749e-05, "loss": 0.9305, "step": 4664 }, { "epoch": 3.65, "learning_rate": 1.467721669624185e-05, "loss": 0.8486, "step": 4665 }, { "epoch": 3.65, "learning_rate": 1.4674978292440343e-05, "loss": 0.8266, "step": 4666 }, { "epoch": 3.65, "learning_rate": 1.4672739588849774e-05, "loss": 0.9443, "step": 4667 }, { "epoch": 3.65, "learning_rate": 1.46705005856137e-05, "loss": 0.7879, "step": 4668 }, { "epoch": 3.65, "learning_rate": 1.46682612828757e-05, "loss": 1.1283, "step": 4669 }, { "epoch": 3.65, "learning_rate": 1.4666021680779367e-05, "loss": 0.8312, "step": 4670 }, { "epoch": 3.65, "learning_rate": 1.4663781779468329e-05, "loss": 0.8813, "step": 4671 }, { "epoch": 3.65, "learning_rate": 1.4661541579086213e-05, "loss": 0.9579, "step": 4672 }, { "epoch": 3.65, "learning_rate": 1.465930107977668e-05, "loss": 0.9119, "step": 4673 }, { "epoch": 3.65, "learning_rate": 1.4657060281683403e-05, "loss": 1.007, "step": 4674 }, { "epoch": 3.65, "learning_rate": 1.4654819184950077e-05, "loss": 0.9056, "step": 4675 }, { "epoch": 3.65, "learning_rate": 1.4652577789720412e-05, "loss": 0.85, "step": 4676 }, { "epoch": 3.66, "learning_rate": 1.465033609613815e-05, "loss": 0.8313, "step": 4677 }, { "epoch": 3.66, "learning_rate": 1.4648094104347029e-05, "loss": 0.9845, "step": 4678 }, { "epoch": 3.66, "learning_rate": 1.4645851814490827e-05, "loss": 0.8439, "step": 4679 }, { "epoch": 3.66, "learning_rate": 1.4643609226713333e-05, "loss": 0.95, "step": 4680 }, { "epoch": 3.66, "learning_rate": 1.4641366341158358e-05, "loss": 0.9508, "step": 4681 }, { "epoch": 3.66, "learning_rate": 1.4639123157969728e-05, "loss": 0.749, "step": 4682 }, { "epoch": 3.66, "learning_rate": 1.463687967729129e-05, "loss": 0.814, "step": 4683 }, { "epoch": 3.66, "learning_rate": 1.4634635899266907e-05, "loss": 0.9083, "step": 4684 }, { "epoch": 3.66, "learning_rate": 1.4632391824040468e-05, "loss": 0.9247, "step": 4685 }, { "epoch": 3.66, "learning_rate": 1.4630147451755879e-05, "loss": 0.7904, "step": 4686 }, { "epoch": 3.66, "learning_rate": 1.4627902782557057e-05, "loss": 0.8433, "step": 4687 }, { "epoch": 3.66, "learning_rate": 1.462565781658795e-05, "loss": 0.9518, "step": 4688 }, { "epoch": 3.66, "learning_rate": 1.4623412553992518e-05, "loss": 1.0142, "step": 4689 }, { "epoch": 3.67, "learning_rate": 1.4621166994914739e-05, "loss": 1.1957, "step": 4690 }, { "epoch": 3.67, "learning_rate": 1.4618921139498616e-05, "loss": 1.0142, "step": 4691 }, { "epoch": 3.67, "learning_rate": 1.4616674987888164e-05, "loss": 0.9688, "step": 4692 }, { "epoch": 3.67, "learning_rate": 1.4614428540227424e-05, "loss": 0.8811, "step": 4693 }, { "epoch": 3.67, "learning_rate": 1.461218179666045e-05, "loss": 0.9066, "step": 4694 }, { "epoch": 3.67, "learning_rate": 1.4609934757331315e-05, "loss": 0.7388, "step": 4695 }, { "epoch": 3.67, "learning_rate": 1.4607687422384118e-05, "loss": 1.002, "step": 4696 }, { "epoch": 3.67, "learning_rate": 1.460543979196297e-05, "loss": 1.1506, "step": 4697 }, { "epoch": 3.67, "learning_rate": 1.4603191866212006e-05, "loss": 0.9635, "step": 4698 }, { "epoch": 3.67, "learning_rate": 1.460094364527537e-05, "loss": 0.8168, "step": 4699 }, { "epoch": 3.67, "learning_rate": 1.4598695129297243e-05, "loss": 0.9991, "step": 4700 }, { "epoch": 3.67, "learning_rate": 1.45964463184218e-05, "loss": 1.0619, "step": 4701 }, { "epoch": 3.67, "learning_rate": 1.4594197212793264e-05, "loss": 0.9709, "step": 4702 }, { "epoch": 3.68, "learning_rate": 1.4591947812555849e-05, "loss": 1.1055, "step": 4703 }, { "epoch": 3.68, "learning_rate": 1.458969811785381e-05, "loss": 0.8695, "step": 4704 }, { "epoch": 3.68, "learning_rate": 1.4587448128831407e-05, "loss": 0.8367, "step": 4705 }, { "epoch": 3.68, "learning_rate": 1.4585197845632923e-05, "loss": 1.1318, "step": 4706 }, { "epoch": 3.68, "learning_rate": 1.4582947268402661e-05, "loss": 1.0968, "step": 4707 }, { "epoch": 3.68, "learning_rate": 1.4580696397284947e-05, "loss": 0.9878, "step": 4708 }, { "epoch": 3.68, "learning_rate": 1.4578445232424112e-05, "loss": 1.1224, "step": 4709 }, { "epoch": 3.68, "learning_rate": 1.4576193773964523e-05, "loss": 0.8369, "step": 4710 }, { "epoch": 3.68, "learning_rate": 1.4573942022050554e-05, "loss": 0.9093, "step": 4711 }, { "epoch": 3.68, "learning_rate": 1.4571689976826602e-05, "loss": 0.9398, "step": 4712 }, { "epoch": 3.68, "learning_rate": 1.456943763843708e-05, "loss": 1.0687, "step": 4713 }, { "epoch": 3.68, "learning_rate": 1.4567185007026425e-05, "loss": 0.8944, "step": 4714 }, { "epoch": 3.69, "learning_rate": 1.4564932082739094e-05, "loss": 0.8146, "step": 4715 }, { "epoch": 3.69, "learning_rate": 1.4562678865719551e-05, "loss": 0.8411, "step": 4716 }, { "epoch": 3.69, "learning_rate": 1.4560425356112289e-05, "loss": 0.9877, "step": 4717 }, { "epoch": 3.69, "learning_rate": 1.4558171554061815e-05, "loss": 0.9267, "step": 4718 }, { "epoch": 3.69, "learning_rate": 1.4555917459712667e-05, "loss": 0.9141, "step": 4719 }, { "epoch": 3.69, "learning_rate": 1.455366307320938e-05, "loss": 1.0398, "step": 4720 }, { "epoch": 3.69, "learning_rate": 1.4551408394696523e-05, "loss": 0.875, "step": 4721 }, { "epoch": 3.69, "learning_rate": 1.4549153424318683e-05, "loss": 0.9888, "step": 4722 }, { "epoch": 3.69, "learning_rate": 1.4546898162220463e-05, "loss": 0.9322, "step": 4723 }, { "epoch": 3.69, "learning_rate": 1.4544642608546475e-05, "loss": 0.8075, "step": 4724 }, { "epoch": 3.69, "learning_rate": 1.4542386763441376e-05, "loss": 0.9902, "step": 4725 }, { "epoch": 3.69, "learning_rate": 1.454013062704981e-05, "loss": 0.9743, "step": 4726 }, { "epoch": 3.69, "learning_rate": 1.4537874199516463e-05, "loss": 1.0798, "step": 4727 }, { "epoch": 3.7, "learning_rate": 1.4535617480986031e-05, "loss": 0.8775, "step": 4728 }, { "epoch": 3.7, "learning_rate": 1.4533360471603226e-05, "loss": 0.9724, "step": 4729 }, { "epoch": 3.7, "learning_rate": 1.453110317151278e-05, "loss": 0.986, "step": 4730 }, { "epoch": 3.7, "learning_rate": 1.4528845580859452e-05, "loss": 0.9932, "step": 4731 }, { "epoch": 3.7, "learning_rate": 1.4526587699788004e-05, "loss": 0.8007, "step": 4732 }, { "epoch": 3.7, "learning_rate": 1.4524329528443236e-05, "loss": 1.0265, "step": 4733 }, { "epoch": 3.7, "learning_rate": 1.4522071066969947e-05, "loss": 1.0181, "step": 4734 }, { "epoch": 3.7, "learning_rate": 1.4519812315512968e-05, "loss": 0.8573, "step": 4735 }, { "epoch": 3.7, "learning_rate": 1.4517553274217145e-05, "loss": 1.1015, "step": 4736 }, { "epoch": 3.7, "learning_rate": 1.451529394322734e-05, "loss": 0.9767, "step": 4737 }, { "epoch": 3.7, "learning_rate": 1.4513034322688437e-05, "loss": 0.9433, "step": 4738 }, { "epoch": 3.7, "learning_rate": 1.4510774412745337e-05, "loss": 0.9649, "step": 4739 }, { "epoch": 3.7, "learning_rate": 1.4508514213542959e-05, "loss": 0.957, "step": 4740 }, { "epoch": 3.71, "learning_rate": 1.450625372522624e-05, "loss": 1.1064, "step": 4741 }, { "epoch": 3.71, "learning_rate": 1.450399294794014e-05, "loss": 0.937, "step": 4742 }, { "epoch": 3.71, "learning_rate": 1.4501731881829628e-05, "loss": 0.8593, "step": 4743 }, { "epoch": 3.71, "learning_rate": 1.4499470527039707e-05, "loss": 1.1134, "step": 4744 }, { "epoch": 3.71, "learning_rate": 1.4497208883715384e-05, "loss": 0.8863, "step": 4745 }, { "epoch": 3.71, "learning_rate": 1.4494946952001686e-05, "loss": 0.9259, "step": 4746 }, { "epoch": 3.71, "learning_rate": 1.449268473204367e-05, "loss": 0.8641, "step": 4747 }, { "epoch": 3.71, "learning_rate": 1.4490422223986404e-05, "loss": 0.9436, "step": 4748 }, { "epoch": 3.71, "learning_rate": 1.4488159427974965e-05, "loss": 0.9462, "step": 4749 }, { "epoch": 3.71, "learning_rate": 1.4485896344154466e-05, "loss": 1.0488, "step": 4750 }, { "epoch": 3.71, "learning_rate": 1.4483632972670028e-05, "loss": 1.0328, "step": 4751 }, { "epoch": 3.71, "learning_rate": 1.448136931366679e-05, "loss": 1.0926, "step": 4752 }, { "epoch": 3.71, "learning_rate": 1.4479105367289914e-05, "loss": 0.8972, "step": 4753 }, { "epoch": 3.72, "learning_rate": 1.4476841133684585e-05, "loss": 0.8491, "step": 4754 }, { "epoch": 3.72, "learning_rate": 1.4474576612995987e-05, "loss": 0.8918, "step": 4755 }, { "epoch": 3.72, "learning_rate": 1.4472311805369345e-05, "loss": 0.958, "step": 4756 }, { "epoch": 3.72, "learning_rate": 1.447004671094989e-05, "loss": 0.8408, "step": 4757 }, { "epoch": 3.72, "learning_rate": 1.4467781329882874e-05, "loss": 1.1022, "step": 4758 }, { "epoch": 3.72, "learning_rate": 1.4465515662313566e-05, "loss": 0.9458, "step": 4759 }, { "epoch": 3.72, "learning_rate": 1.4463249708387257e-05, "loss": 0.9604, "step": 4760 }, { "epoch": 3.72, "learning_rate": 1.4460983468249254e-05, "loss": 0.6076, "step": 4761 }, { "epoch": 3.72, "learning_rate": 1.4458716942044882e-05, "loss": 0.9305, "step": 4762 }, { "epoch": 3.72, "learning_rate": 1.4456450129919487e-05, "loss": 0.9811, "step": 4763 }, { "epoch": 3.72, "learning_rate": 1.4454183032018428e-05, "loss": 1.1049, "step": 4764 }, { "epoch": 3.72, "learning_rate": 1.4451915648487087e-05, "loss": 0.9978, "step": 4765 }, { "epoch": 3.72, "learning_rate": 1.444964797947086e-05, "loss": 0.753, "step": 4766 }, { "epoch": 3.73, "learning_rate": 1.444738002511517e-05, "loss": 0.7951, "step": 4767 }, { "epoch": 3.73, "learning_rate": 1.444511178556545e-05, "loss": 1.0457, "step": 4768 }, { "epoch": 3.73, "learning_rate": 1.4442843260967154e-05, "loss": 0.8852, "step": 4769 }, { "epoch": 3.73, "learning_rate": 1.4440574451465751e-05, "loss": 0.8569, "step": 4770 }, { "epoch": 3.73, "learning_rate": 1.4438305357206739e-05, "loss": 0.8189, "step": 4771 }, { "epoch": 3.73, "learning_rate": 1.4436035978335617e-05, "loss": 1.0953, "step": 4772 }, { "epoch": 3.73, "learning_rate": 1.4433766314997918e-05, "loss": 0.9906, "step": 4773 }, { "epoch": 3.73, "learning_rate": 1.4431496367339187e-05, "loss": 0.7982, "step": 4774 }, { "epoch": 3.73, "learning_rate": 1.4429226135504984e-05, "loss": 0.9712, "step": 4775 }, { "epoch": 3.73, "learning_rate": 1.4426955619640892e-05, "loss": 0.9666, "step": 4776 }, { "epoch": 3.73, "learning_rate": 1.4424684819892515e-05, "loss": 1.1538, "step": 4777 }, { "epoch": 3.73, "learning_rate": 1.4422413736405462e-05, "loss": 0.9244, "step": 4778 }, { "epoch": 3.74, "learning_rate": 1.442014236932538e-05, "loss": 0.812, "step": 4779 }, { "epoch": 3.74, "learning_rate": 1.4417870718797913e-05, "loss": 0.9056, "step": 4780 }, { "epoch": 3.74, "learning_rate": 1.441559878496874e-05, "loss": 0.9007, "step": 4781 }, { "epoch": 3.74, "learning_rate": 1.441332656798355e-05, "loss": 0.8558, "step": 4782 }, { "epoch": 3.74, "learning_rate": 1.4411054067988052e-05, "loss": 0.9992, "step": 4783 }, { "epoch": 3.74, "learning_rate": 1.4408781285127973e-05, "loss": 0.803, "step": 4784 }, { "epoch": 3.74, "learning_rate": 1.4406508219549063e-05, "loss": 0.9854, "step": 4785 }, { "epoch": 3.74, "learning_rate": 1.4404234871397074e-05, "loss": 0.827, "step": 4786 }, { "epoch": 3.74, "learning_rate": 1.4401961240817795e-05, "loss": 1.1087, "step": 4787 }, { "epoch": 3.74, "learning_rate": 1.4399687327957023e-05, "loss": 0.7912, "step": 4788 }, { "epoch": 3.74, "learning_rate": 1.4397413132960582e-05, "loss": 1.0665, "step": 4789 }, { "epoch": 3.74, "learning_rate": 1.4395138655974296e-05, "loss": 1.0066, "step": 4790 }, { "epoch": 3.74, "learning_rate": 1.4392863897144031e-05, "loss": 1.0732, "step": 4791 }, { "epoch": 3.75, "learning_rate": 1.4390588856615652e-05, "loss": 0.8848, "step": 4792 }, { "epoch": 3.75, "learning_rate": 1.438831353453505e-05, "loss": 0.8598, "step": 4793 }, { "epoch": 3.75, "learning_rate": 1.4386037931048133e-05, "loss": 0.8121, "step": 4794 }, { "epoch": 3.75, "learning_rate": 1.4383762046300827e-05, "loss": 1.0209, "step": 4795 }, { "epoch": 3.75, "learning_rate": 1.4381485880439078e-05, "loss": 0.931, "step": 4796 }, { "epoch": 3.75, "learning_rate": 1.4379209433608845e-05, "loss": 0.9698, "step": 4797 }, { "epoch": 3.75, "learning_rate": 1.4376932705956109e-05, "loss": 1.0277, "step": 4798 }, { "epoch": 3.75, "learning_rate": 1.4374655697626869e-05, "loss": 0.8672, "step": 4799 }, { "epoch": 3.75, "learning_rate": 1.4372378408767143e-05, "loss": 1.2097, "step": 4800 }, { "epoch": 3.75, "learning_rate": 1.437010083952296e-05, "loss": 0.8607, "step": 4801 }, { "epoch": 3.75, "learning_rate": 1.436782299004038e-05, "loss": 1.0065, "step": 4802 }, { "epoch": 3.75, "learning_rate": 1.4365544860465462e-05, "loss": 0.8603, "step": 4803 }, { "epoch": 3.75, "learning_rate": 1.4363266450944302e-05, "loss": 0.9935, "step": 4804 }, { "epoch": 3.76, "learning_rate": 1.4360987761623006e-05, "loss": 1.0902, "step": 4805 }, { "epoch": 3.76, "learning_rate": 1.4358708792647695e-05, "loss": 0.9417, "step": 4806 }, { "epoch": 3.76, "learning_rate": 1.4356429544164509e-05, "loss": 1.0284, "step": 4807 }, { "epoch": 3.76, "learning_rate": 1.4354150016319617e-05, "loss": 0.9326, "step": 4808 }, { "epoch": 3.76, "learning_rate": 1.4351870209259185e-05, "loss": 0.9078, "step": 4809 }, { "epoch": 3.76, "learning_rate": 1.4349590123129415e-05, "loss": 0.8432, "step": 4810 }, { "epoch": 3.76, "learning_rate": 1.4347309758076523e-05, "loss": 1.2413, "step": 4811 }, { "epoch": 3.76, "learning_rate": 1.4345029114246732e-05, "loss": 0.9573, "step": 4812 }, { "epoch": 3.76, "learning_rate": 1.4342748191786298e-05, "loss": 0.849, "step": 4813 }, { "epoch": 3.76, "learning_rate": 1.4340466990841484e-05, "loss": 0.7479, "step": 4814 }, { "epoch": 3.76, "learning_rate": 1.4338185511558579e-05, "loss": 0.735, "step": 4815 }, { "epoch": 3.76, "learning_rate": 1.4335903754083884e-05, "loss": 0.8781, "step": 4816 }, { "epoch": 3.76, "learning_rate": 1.4333621718563721e-05, "loss": 1.0778, "step": 4817 }, { "epoch": 3.77, "learning_rate": 1.4331339405144425e-05, "loss": 0.9028, "step": 4818 }, { "epoch": 3.77, "learning_rate": 1.4329056813972355e-05, "loss": 0.9467, "step": 4819 }, { "epoch": 3.77, "learning_rate": 1.4326773945193884e-05, "loss": 0.7291, "step": 4820 }, { "epoch": 3.77, "learning_rate": 1.4324490798955405e-05, "loss": 0.9264, "step": 4821 }, { "epoch": 3.77, "learning_rate": 1.432220737540333e-05, "loss": 1.0721, "step": 4822 }, { "epoch": 3.77, "learning_rate": 1.431992367468408e-05, "loss": 0.9226, "step": 4823 }, { "epoch": 3.77, "learning_rate": 1.4317639696944103e-05, "loss": 0.7041, "step": 4824 }, { "epoch": 3.77, "learning_rate": 1.431535544232987e-05, "loss": 0.823, "step": 4825 }, { "epoch": 3.77, "learning_rate": 1.4313070910987848e-05, "loss": 1.0406, "step": 4826 }, { "epoch": 3.77, "learning_rate": 1.4310786103064545e-05, "loss": 0.8246, "step": 4827 }, { "epoch": 3.77, "learning_rate": 1.4308501018706475e-05, "loss": 1.0929, "step": 4828 }, { "epoch": 3.77, "learning_rate": 1.4306215658060173e-05, "loss": 0.7709, "step": 4829 }, { "epoch": 3.77, "learning_rate": 1.4303930021272185e-05, "loss": 1.0453, "step": 4830 }, { "epoch": 3.78, "learning_rate": 1.4301644108489089e-05, "loss": 0.9063, "step": 4831 }, { "epoch": 3.78, "learning_rate": 1.4299357919857463e-05, "loss": 0.9054, "step": 4832 }, { "epoch": 3.78, "learning_rate": 1.4297071455523924e-05, "loss": 0.9958, "step": 4833 }, { "epoch": 3.78, "learning_rate": 1.429478471563508e-05, "loss": 1.0011, "step": 4834 }, { "epoch": 3.78, "learning_rate": 1.4292497700337582e-05, "loss": 1.1788, "step": 4835 }, { "epoch": 3.78, "learning_rate": 1.4290210409778085e-05, "loss": 0.8916, "step": 4836 }, { "epoch": 3.78, "learning_rate": 1.4287922844103263e-05, "loss": 0.8671, "step": 4837 }, { "epoch": 3.78, "learning_rate": 1.4285635003459808e-05, "loss": 0.9577, "step": 4838 }, { "epoch": 3.78, "learning_rate": 1.4283346887994436e-05, "loss": 0.8985, "step": 4839 }, { "epoch": 3.78, "learning_rate": 1.428105849785387e-05, "loss": 0.8053, "step": 4840 }, { "epoch": 3.78, "learning_rate": 1.427876983318486e-05, "loss": 0.9529, "step": 4841 }, { "epoch": 3.78, "learning_rate": 1.4276480894134166e-05, "loss": 0.9869, "step": 4842 }, { "epoch": 3.79, "learning_rate": 1.4274191680848568e-05, "loss": 0.8718, "step": 4843 }, { "epoch": 3.79, "learning_rate": 1.4271902193474873e-05, "loss": 0.9604, "step": 4844 }, { "epoch": 3.79, "learning_rate": 1.426961243215989e-05, "loss": 1.2089, "step": 4845 }, { "epoch": 3.79, "learning_rate": 1.4267322397050453e-05, "loss": 1.0818, "step": 4846 }, { "epoch": 3.79, "learning_rate": 1.4265032088293417e-05, "loss": 1.0063, "step": 4847 }, { "epoch": 3.79, "learning_rate": 1.4262741506035652e-05, "loss": 0.9615, "step": 4848 }, { "epoch": 3.79, "learning_rate": 1.4260450650424039e-05, "loss": 1.0096, "step": 4849 }, { "epoch": 3.79, "learning_rate": 1.4258159521605486e-05, "loss": 0.7712, "step": 4850 }, { "epoch": 3.79, "learning_rate": 1.425586811972691e-05, "loss": 0.9453, "step": 4851 }, { "epoch": 3.79, "learning_rate": 1.4253576444935258e-05, "loss": 1.0391, "step": 4852 }, { "epoch": 3.79, "learning_rate": 1.4251284497377482e-05, "loss": 0.8823, "step": 4853 }, { "epoch": 3.79, "learning_rate": 1.4248992277200558e-05, "loss": 1.0653, "step": 4854 }, { "epoch": 3.79, "learning_rate": 1.424669978455147e-05, "loss": 0.8714, "step": 4855 }, { "epoch": 3.8, "learning_rate": 1.424440701957724e-05, "loss": 0.8144, "step": 4856 }, { "epoch": 3.8, "learning_rate": 1.4242113982424883e-05, "loss": 0.792, "step": 4857 }, { "epoch": 3.8, "learning_rate": 1.4239820673241451e-05, "loss": 1.0108, "step": 4858 }, { "epoch": 3.8, "learning_rate": 1.4237527092174001e-05, "loss": 1.0268, "step": 4859 }, { "epoch": 3.8, "learning_rate": 1.4235233239369613e-05, "loss": 1.0812, "step": 4860 }, { "epoch": 3.8, "learning_rate": 1.4232939114975382e-05, "loss": 0.8715, "step": 4861 }, { "epoch": 3.8, "learning_rate": 1.4230644719138427e-05, "loss": 1.0112, "step": 4862 }, { "epoch": 3.8, "learning_rate": 1.4228350052005872e-05, "loss": 0.9316, "step": 4863 }, { "epoch": 3.8, "learning_rate": 1.422605511372487e-05, "loss": 0.8088, "step": 4864 }, { "epoch": 3.8, "learning_rate": 1.4223759904442584e-05, "loss": 1.2714, "step": 4865 }, { "epoch": 3.8, "learning_rate": 1.4221464424306199e-05, "loss": 0.96, "step": 4866 }, { "epoch": 3.8, "learning_rate": 1.4219168673462918e-05, "loss": 0.8975, "step": 4867 }, { "epoch": 3.8, "learning_rate": 1.4216872652059954e-05, "loss": 0.8298, "step": 4868 }, { "epoch": 3.81, "learning_rate": 1.4214576360244548e-05, "loss": 0.7867, "step": 4869 }, { "epoch": 3.81, "learning_rate": 1.4212279798163946e-05, "loss": 0.9533, "step": 4870 }, { "epoch": 3.81, "learning_rate": 1.4209982965965423e-05, "loss": 1.1534, "step": 4871 }, { "epoch": 3.81, "learning_rate": 1.4207685863796265e-05, "loss": 0.8123, "step": 4872 }, { "epoch": 3.81, "learning_rate": 1.4205388491803777e-05, "loss": 0.9704, "step": 4873 }, { "epoch": 3.81, "learning_rate": 1.4203090850135279e-05, "loss": 0.7743, "step": 4874 }, { "epoch": 3.81, "learning_rate": 1.4200792938938115e-05, "loss": 0.9685, "step": 4875 }, { "epoch": 3.81, "learning_rate": 1.4198494758359638e-05, "loss": 0.9503, "step": 4876 }, { "epoch": 3.81, "learning_rate": 1.4196196308547222e-05, "loss": 0.8924, "step": 4877 }, { "epoch": 3.81, "learning_rate": 1.4193897589648256e-05, "loss": 0.7573, "step": 4878 }, { "epoch": 3.81, "learning_rate": 1.4191598601810152e-05, "loss": 0.8651, "step": 4879 }, { "epoch": 3.81, "learning_rate": 1.4189299345180334e-05, "loss": 0.9181, "step": 4880 }, { "epoch": 3.81, "learning_rate": 1.4186999819906247e-05, "loss": 0.98, "step": 4881 }, { "epoch": 3.82, "learning_rate": 1.4184700026135345e-05, "loss": 0.9377, "step": 4882 }, { "epoch": 3.82, "learning_rate": 1.4182399964015115e-05, "loss": 0.8773, "step": 4883 }, { "epoch": 3.82, "learning_rate": 1.4180099633693041e-05, "loss": 0.9695, "step": 4884 }, { "epoch": 3.82, "learning_rate": 1.4177799035316641e-05, "loss": 0.8604, "step": 4885 }, { "epoch": 3.82, "learning_rate": 1.4175498169033444e-05, "loss": 1.2094, "step": 4886 }, { "epoch": 3.82, "learning_rate": 1.4173197034990992e-05, "loss": 0.8663, "step": 4887 }, { "epoch": 3.82, "learning_rate": 1.4170895633336855e-05, "loss": 0.8398, "step": 4888 }, { "epoch": 3.82, "learning_rate": 1.4168593964218605e-05, "loss": 0.804, "step": 4889 }, { "epoch": 3.82, "learning_rate": 1.4166292027783843e-05, "loss": 1.1909, "step": 4890 }, { "epoch": 3.82, "learning_rate": 1.4163989824180187e-05, "loss": 0.9091, "step": 4891 }, { "epoch": 3.82, "learning_rate": 1.4161687353555265e-05, "loss": 1.007, "step": 4892 }, { "epoch": 3.82, "learning_rate": 1.4159384616056725e-05, "loss": 1.0978, "step": 4893 }, { "epoch": 3.82, "learning_rate": 1.415708161183224e-05, "loss": 0.6501, "step": 4894 }, { "epoch": 3.83, "learning_rate": 1.4154778341029483e-05, "loss": 0.931, "step": 4895 }, { "epoch": 3.83, "learning_rate": 1.4152474803796163e-05, "loss": 0.82, "step": 4896 }, { "epoch": 3.83, "learning_rate": 1.4150171000279991e-05, "loss": 0.9906, "step": 4897 }, { "epoch": 3.83, "learning_rate": 1.4147866930628707e-05, "loss": 0.8469, "step": 4898 }, { "epoch": 3.83, "learning_rate": 1.4145562594990057e-05, "loss": 0.8504, "step": 4899 }, { "epoch": 3.83, "learning_rate": 1.4143257993511811e-05, "loss": 1.1455, "step": 4900 }, { "epoch": 3.83, "learning_rate": 1.414095312634176e-05, "loss": 1.0734, "step": 4901 }, { "epoch": 3.83, "learning_rate": 1.4138647993627702e-05, "loss": 1.0695, "step": 4902 }, { "epoch": 3.83, "learning_rate": 1.413634259551745e-05, "loss": 0.8774, "step": 4903 }, { "epoch": 3.83, "learning_rate": 1.4134036932158856e-05, "loss": 0.8703, "step": 4904 }, { "epoch": 3.83, "learning_rate": 1.413173100369976e-05, "loss": 0.9704, "step": 4905 }, { "epoch": 3.83, "learning_rate": 1.4129424810288041e-05, "loss": 0.9271, "step": 4906 }, { "epoch": 3.84, "learning_rate": 1.4127118352071582e-05, "loss": 1.1332, "step": 4907 }, { "epoch": 3.84, "learning_rate": 1.412481162919829e-05, "loss": 0.973, "step": 4908 }, { "epoch": 3.84, "learning_rate": 1.4122504641816082e-05, "loss": 1.025, "step": 4909 }, { "epoch": 3.84, "learning_rate": 1.4120197390072907e-05, "loss": 1.1164, "step": 4910 }, { "epoch": 3.84, "learning_rate": 1.4117889874116708e-05, "loss": 1.0047, "step": 4911 }, { "epoch": 3.84, "learning_rate": 1.4115582094095465e-05, "loss": 0.957, "step": 4912 }, { "epoch": 3.84, "learning_rate": 1.411327405015717e-05, "loss": 1.0847, "step": 4913 }, { "epoch": 3.84, "learning_rate": 1.411096574244982e-05, "loss": 1.2707, "step": 4914 }, { "epoch": 3.84, "learning_rate": 1.4108657171121443e-05, "loss": 0.983, "step": 4915 }, { "epoch": 3.84, "learning_rate": 1.4106348336320082e-05, "loss": 0.6425, "step": 4916 }, { "epoch": 3.84, "learning_rate": 1.410403923819379e-05, "loss": 0.8984, "step": 4917 }, { "epoch": 3.84, "learning_rate": 1.4101729876890642e-05, "loss": 0.7458, "step": 4918 }, { "epoch": 3.84, "learning_rate": 1.409942025255873e-05, "loss": 0.7749, "step": 4919 }, { "epoch": 3.85, "learning_rate": 1.409711036534616e-05, "loss": 0.9577, "step": 4920 }, { "epoch": 3.85, "learning_rate": 1.4094800215401059e-05, "loss": 1.071, "step": 4921 }, { "epoch": 3.85, "learning_rate": 1.4092489802871567e-05, "loss": 0.8869, "step": 4922 }, { "epoch": 3.85, "learning_rate": 1.4090179127905841e-05, "loss": 0.8839, "step": 4923 }, { "epoch": 3.85, "learning_rate": 1.4087868190652056e-05, "loss": 0.8732, "step": 4924 }, { "epoch": 3.85, "learning_rate": 1.4085556991258407e-05, "loss": 0.9968, "step": 4925 }, { "epoch": 3.85, "learning_rate": 1.4083245529873097e-05, "loss": 1.0005, "step": 4926 }, { "epoch": 3.85, "learning_rate": 1.4080933806644361e-05, "loss": 1.0407, "step": 4927 }, { "epoch": 3.85, "learning_rate": 1.407862182172043e-05, "loss": 1.1195, "step": 4928 }, { "epoch": 3.85, "learning_rate": 1.4076309575249569e-05, "loss": 0.9754, "step": 4929 }, { "epoch": 3.85, "learning_rate": 1.4073997067380054e-05, "loss": 0.9973, "step": 4930 }, { "epoch": 3.85, "learning_rate": 1.4071684298260178e-05, "loss": 1.0388, "step": 4931 }, { "epoch": 3.85, "learning_rate": 1.4069371268038245e-05, "loss": 0.7392, "step": 4932 }, { "epoch": 3.86, "learning_rate": 1.406705797686259e-05, "loss": 1.0314, "step": 4933 }, { "epoch": 3.86, "learning_rate": 1.4064744424881547e-05, "loss": 0.8738, "step": 4934 }, { "epoch": 3.86, "learning_rate": 1.4062430612243481e-05, "loss": 0.9786, "step": 4935 }, { "epoch": 3.86, "learning_rate": 1.4060116539096767e-05, "loss": 0.9201, "step": 4936 }, { "epoch": 3.86, "learning_rate": 1.4057802205589797e-05, "loss": 0.884, "step": 4937 }, { "epoch": 3.86, "learning_rate": 1.4055487611870979e-05, "loss": 0.9555, "step": 4938 }, { "epoch": 3.86, "learning_rate": 1.4053172758088746e-05, "loss": 1.2684, "step": 4939 }, { "epoch": 3.86, "learning_rate": 1.4050857644391536e-05, "loss": 0.993, "step": 4940 }, { "epoch": 3.86, "learning_rate": 1.4048542270927807e-05, "loss": 0.9028, "step": 4941 }, { "epoch": 3.86, "learning_rate": 1.4046226637846039e-05, "loss": 0.9382, "step": 4942 }, { "epoch": 3.86, "learning_rate": 1.4043910745294722e-05, "loss": 0.8803, "step": 4943 }, { "epoch": 3.86, "learning_rate": 1.404159459342237e-05, "loss": 1.0745, "step": 4944 }, { "epoch": 3.86, "learning_rate": 1.4039278182377503e-05, "loss": 1.1329, "step": 4945 }, { "epoch": 3.87, "learning_rate": 1.4036961512308669e-05, "loss": 1.1167, "step": 4946 }, { "epoch": 3.87, "learning_rate": 1.4034644583364425e-05, "loss": 1.252, "step": 4947 }, { "epoch": 3.87, "learning_rate": 1.403232739569335e-05, "loss": 1.0431, "step": 4948 }, { "epoch": 3.87, "learning_rate": 1.4030009949444032e-05, "loss": 0.9928, "step": 4949 }, { "epoch": 3.87, "learning_rate": 1.4027692244765086e-05, "loss": 1.0739, "step": 4950 }, { "epoch": 3.87, "learning_rate": 1.4025374281805131e-05, "loss": 0.8181, "step": 4951 }, { "epoch": 3.87, "learning_rate": 1.4023056060712814e-05, "loss": 1.0274, "step": 4952 }, { "epoch": 3.87, "learning_rate": 1.4020737581636797e-05, "loss": 0.9496, "step": 4953 }, { "epoch": 3.87, "learning_rate": 1.4018418844725747e-05, "loss": 0.9546, "step": 4954 }, { "epoch": 3.87, "learning_rate": 1.401609985012836e-05, "loss": 0.9514, "step": 4955 }, { "epoch": 3.87, "learning_rate": 1.401378059799335e-05, "loss": 0.942, "step": 4956 }, { "epoch": 3.87, "learning_rate": 1.4011461088469431e-05, "loss": 1.2904, "step": 4957 }, { "epoch": 3.87, "learning_rate": 1.4009141321705355e-05, "loss": 0.9067, "step": 4958 }, { "epoch": 3.88, "learning_rate": 1.4006821297849875e-05, "loss": 0.9502, "step": 4959 }, { "epoch": 3.88, "learning_rate": 1.4004501017051767e-05, "loss": 0.9887, "step": 4960 }, { "epoch": 3.88, "learning_rate": 1.400218047945982e-05, "loss": 0.9633, "step": 4961 }, { "epoch": 3.88, "learning_rate": 1.3999859685222843e-05, "loss": 0.9102, "step": 4962 }, { "epoch": 3.88, "learning_rate": 1.3997538634489658e-05, "loss": 1.1057, "step": 4963 }, { "epoch": 3.88, "learning_rate": 1.3995217327409108e-05, "loss": 1.0035, "step": 4964 }, { "epoch": 3.88, "learning_rate": 1.399289576413005e-05, "loss": 0.8099, "step": 4965 }, { "epoch": 3.88, "learning_rate": 1.3990573944801358e-05, "loss": 0.8401, "step": 4966 }, { "epoch": 3.88, "learning_rate": 1.3988251869571917e-05, "loss": 0.7197, "step": 4967 }, { "epoch": 3.88, "learning_rate": 1.3985929538590635e-05, "loss": 1.0098, "step": 4968 }, { "epoch": 3.88, "learning_rate": 1.3983606952006436e-05, "loss": 1.0791, "step": 4969 }, { "epoch": 3.88, "learning_rate": 1.3981284109968262e-05, "loss": 0.9521, "step": 4970 }, { "epoch": 3.89, "learning_rate": 1.3978961012625063e-05, "loss": 1.1326, "step": 4971 }, { "epoch": 3.89, "learning_rate": 1.397663766012581e-05, "loss": 1.1476, "step": 4972 }, { "epoch": 3.89, "learning_rate": 1.3974314052619497e-05, "loss": 0.9422, "step": 4973 }, { "epoch": 3.89, "learning_rate": 1.3971990190255122e-05, "loss": 0.8669, "step": 4974 }, { "epoch": 3.89, "learning_rate": 1.3969666073181708e-05, "loss": 1.1638, "step": 4975 }, { "epoch": 3.89, "learning_rate": 1.3967341701548296e-05, "loss": 0.8842, "step": 4976 }, { "epoch": 3.89, "learning_rate": 1.3965017075503933e-05, "loss": 0.8555, "step": 4977 }, { "epoch": 3.89, "learning_rate": 1.3962692195197693e-05, "loss": 0.899, "step": 4978 }, { "epoch": 3.89, "learning_rate": 1.3960367060778658e-05, "loss": 1.0145, "step": 4979 }, { "epoch": 3.89, "learning_rate": 1.3958041672395934e-05, "loss": 0.8907, "step": 4980 }, { "epoch": 3.89, "learning_rate": 1.395571603019864e-05, "loss": 0.9784, "step": 4981 }, { "epoch": 3.89, "learning_rate": 1.3953390134335905e-05, "loss": 1.0532, "step": 4982 }, { "epoch": 3.89, "learning_rate": 1.3951063984956887e-05, "loss": 1.1208, "step": 4983 }, { "epoch": 3.9, "learning_rate": 1.394873758221075e-05, "loss": 0.8665, "step": 4984 }, { "epoch": 3.9, "learning_rate": 1.3946410926246679e-05, "loss": 1.0052, "step": 4985 }, { "epoch": 3.9, "learning_rate": 1.394408401721387e-05, "loss": 1.0633, "step": 4986 }, { "epoch": 3.9, "learning_rate": 1.3941756855261546e-05, "loss": 0.8824, "step": 4987 }, { "epoch": 3.9, "learning_rate": 1.3939429440538932e-05, "loss": 1.1428, "step": 4988 }, { "epoch": 3.9, "learning_rate": 1.3937101773195281e-05, "loss": 0.9558, "step": 4989 }, { "epoch": 3.9, "learning_rate": 1.3934773853379857e-05, "loss": 0.9649, "step": 4990 }, { "epoch": 3.9, "learning_rate": 1.3932445681241942e-05, "loss": 1.032, "step": 4991 }, { "epoch": 3.9, "learning_rate": 1.3930117256930826e-05, "loss": 0.9659, "step": 4992 }, { "epoch": 3.9, "learning_rate": 1.3927788580595833e-05, "loss": 1.1303, "step": 4993 }, { "epoch": 3.9, "learning_rate": 1.3925459652386284e-05, "loss": 0.9013, "step": 4994 }, { "epoch": 3.9, "learning_rate": 1.392313047245153e-05, "loss": 0.9961, "step": 4995 }, { "epoch": 3.9, "learning_rate": 1.392080104094093e-05, "loss": 0.9074, "step": 4996 }, { "epoch": 3.91, "learning_rate": 1.391847135800386e-05, "loss": 0.9229, "step": 4997 }, { "epoch": 3.91, "learning_rate": 1.3916141423789719e-05, "loss": 0.905, "step": 4998 }, { "epoch": 3.91, "learning_rate": 1.3913811238447912e-05, "loss": 1.0693, "step": 4999 }, { "epoch": 3.91, "learning_rate": 1.3911480802127867e-05, "loss": 0.9576, "step": 5000 }, { "epoch": 3.91, "learning_rate": 1.390915011497903e-05, "loss": 1.0642, "step": 5001 }, { "epoch": 3.91, "learning_rate": 1.3906819177150855e-05, "loss": 0.8131, "step": 5002 }, { "epoch": 3.91, "learning_rate": 1.3904487988792811e-05, "loss": 1.0043, "step": 5003 }, { "epoch": 3.91, "learning_rate": 1.3902156550054404e-05, "loss": 0.9819, "step": 5004 }, { "epoch": 3.91, "learning_rate": 1.3899824861085125e-05, "loss": 0.9228, "step": 5005 }, { "epoch": 3.91, "learning_rate": 1.3897492922034506e-05, "loss": 0.8334, "step": 5006 }, { "epoch": 3.91, "learning_rate": 1.3895160733052082e-05, "loss": 0.8636, "step": 5007 }, { "epoch": 3.91, "learning_rate": 1.389282829428741e-05, "loss": 0.9258, "step": 5008 }, { "epoch": 3.91, "learning_rate": 1.3890495605890055e-05, "loss": 0.9463, "step": 5009 }, { "epoch": 3.92, "learning_rate": 1.3888162668009613e-05, "loss": 1.0583, "step": 5010 }, { "epoch": 3.92, "learning_rate": 1.3885829480795677e-05, "loss": 0.9669, "step": 5011 }, { "epoch": 3.92, "learning_rate": 1.388349604439787e-05, "loss": 0.9803, "step": 5012 }, { "epoch": 3.92, "learning_rate": 1.3881162358965829e-05, "loss": 0.8773, "step": 5013 }, { "epoch": 3.92, "learning_rate": 1.38788284246492e-05, "loss": 0.9091, "step": 5014 }, { "epoch": 3.92, "learning_rate": 1.3876494241597653e-05, "loss": 0.8442, "step": 5015 }, { "epoch": 3.92, "learning_rate": 1.3874159809960868e-05, "loss": 1.1411, "step": 5016 }, { "epoch": 3.92, "learning_rate": 1.3871825129888547e-05, "loss": 1.2856, "step": 5017 }, { "epoch": 3.92, "learning_rate": 1.3869490201530403e-05, "loss": 0.8141, "step": 5018 }, { "epoch": 3.92, "learning_rate": 1.3867155025036163e-05, "loss": 0.9175, "step": 5019 }, { "epoch": 3.92, "learning_rate": 1.3864819600555577e-05, "loss": 1.0251, "step": 5020 }, { "epoch": 3.92, "learning_rate": 1.3862483928238404e-05, "loss": 1.1333, "step": 5021 }, { "epoch": 3.92, "learning_rate": 1.3860148008234425e-05, "loss": 0.8507, "step": 5022 }, { "epoch": 3.93, "learning_rate": 1.3857811840693432e-05, "loss": 0.872, "step": 5023 }, { "epoch": 3.93, "learning_rate": 1.3855475425765237e-05, "loss": 0.7971, "step": 5024 }, { "epoch": 3.93, "learning_rate": 1.3853138763599663e-05, "loss": 1.0566, "step": 5025 }, { "epoch": 3.93, "learning_rate": 1.3850801854346552e-05, "loss": 0.9383, "step": 5026 }, { "epoch": 3.93, "learning_rate": 1.3848464698155766e-05, "loss": 0.8547, "step": 5027 }, { "epoch": 3.93, "learning_rate": 1.384612729517717e-05, "loss": 1.13, "step": 5028 }, { "epoch": 3.93, "learning_rate": 1.384378964556066e-05, "loss": 0.7685, "step": 5029 }, { "epoch": 3.93, "learning_rate": 1.3841451749456138e-05, "loss": 0.7231, "step": 5030 }, { "epoch": 3.93, "learning_rate": 1.3839113607013524e-05, "loss": 0.9659, "step": 5031 }, { "epoch": 3.93, "learning_rate": 1.3836775218382754e-05, "loss": 0.8981, "step": 5032 }, { "epoch": 3.93, "learning_rate": 1.3834436583713783e-05, "loss": 0.9432, "step": 5033 }, { "epoch": 3.93, "learning_rate": 1.3832097703156576e-05, "loss": 0.9512, "step": 5034 }, { "epoch": 3.94, "learning_rate": 1.382975857686112e-05, "loss": 0.8631, "step": 5035 }, { "epoch": 3.94, "learning_rate": 1.382741920497741e-05, "loss": 0.8139, "step": 5036 }, { "epoch": 3.94, "learning_rate": 1.3825079587655465e-05, "loss": 1.0018, "step": 5037 }, { "epoch": 3.94, "learning_rate": 1.3822739725045318e-05, "loss": 0.8049, "step": 5038 }, { "epoch": 3.94, "learning_rate": 1.3820399617297008e-05, "loss": 0.8354, "step": 5039 }, { "epoch": 3.94, "learning_rate": 1.3818059264560604e-05, "loss": 0.9743, "step": 5040 }, { "epoch": 3.94, "learning_rate": 1.3815718666986182e-05, "loss": 0.8412, "step": 5041 }, { "epoch": 3.94, "learning_rate": 1.3813377824723837e-05, "loss": 1.1182, "step": 5042 }, { "epoch": 3.94, "learning_rate": 1.3811036737923678e-05, "loss": 1.0206, "step": 5043 }, { "epoch": 3.94, "learning_rate": 1.380869540673583e-05, "loss": 0.7779, "step": 5044 }, { "epoch": 3.94, "learning_rate": 1.380635383131043e-05, "loss": 0.9864, "step": 5045 }, { "epoch": 3.94, "learning_rate": 1.3804012011797642e-05, "loss": 1.0847, "step": 5046 }, { "epoch": 3.94, "learning_rate": 1.3801669948347634e-05, "loss": 0.9959, "step": 5047 }, { "epoch": 3.95, "learning_rate": 1.3799327641110595e-05, "loss": 1.0688, "step": 5048 }, { "epoch": 3.95, "learning_rate": 1.3796985090236725e-05, "loss": 0.8361, "step": 5049 }, { "epoch": 3.95, "learning_rate": 1.379464229587625e-05, "loss": 1.1233, "step": 5050 }, { "epoch": 3.95, "learning_rate": 1.3792299258179399e-05, "loss": 0.8335, "step": 5051 }, { "epoch": 3.95, "learning_rate": 1.3789955977296427e-05, "loss": 1.0535, "step": 5052 }, { "epoch": 3.95, "learning_rate": 1.3787612453377593e-05, "loss": 0.9015, "step": 5053 }, { "epoch": 3.95, "learning_rate": 1.3785268686573183e-05, "loss": 0.711, "step": 5054 }, { "epoch": 3.95, "learning_rate": 1.3782924677033496e-05, "loss": 0.7415, "step": 5055 }, { "epoch": 3.95, "learning_rate": 1.3780580424908843e-05, "loss": 0.9368, "step": 5056 }, { "epoch": 3.95, "learning_rate": 1.3778235930349548e-05, "loss": 0.8827, "step": 5057 }, { "epoch": 3.95, "learning_rate": 1.3775891193505961e-05, "loss": 0.9928, "step": 5058 }, { "epoch": 3.95, "learning_rate": 1.3773546214528436e-05, "loss": 0.9166, "step": 5059 }, { "epoch": 3.95, "learning_rate": 1.3771200993567355e-05, "loss": 1.092, "step": 5060 }, { "epoch": 3.96, "learning_rate": 1.37688555307731e-05, "loss": 0.836, "step": 5061 }, { "epoch": 3.96, "learning_rate": 1.3766509826296082e-05, "loss": 0.8268, "step": 5062 }, { "epoch": 3.96, "learning_rate": 1.376416388028672e-05, "loss": 0.8423, "step": 5063 }, { "epoch": 3.96, "learning_rate": 1.3761817692895456e-05, "loss": 0.8791, "step": 5064 }, { "epoch": 3.96, "learning_rate": 1.3759471264272733e-05, "loss": 0.8958, "step": 5065 }, { "epoch": 3.96, "learning_rate": 1.3757124594569027e-05, "loss": 0.9238, "step": 5066 }, { "epoch": 3.96, "learning_rate": 1.3754777683934817e-05, "loss": 0.7715, "step": 5067 }, { "epoch": 3.96, "learning_rate": 1.3752430532520604e-05, "loss": 1.1051, "step": 5068 }, { "epoch": 3.96, "learning_rate": 1.37500831404769e-05, "loss": 0.9389, "step": 5069 }, { "epoch": 3.96, "learning_rate": 1.3747735507954233e-05, "loss": 0.8293, "step": 5070 }, { "epoch": 3.96, "learning_rate": 1.3745387635103156e-05, "loss": 1.127, "step": 5071 }, { "epoch": 3.96, "learning_rate": 1.3743039522074223e-05, "loss": 1.0914, "step": 5072 }, { "epoch": 3.96, "learning_rate": 1.3740691169018007e-05, "loss": 0.8893, "step": 5073 }, { "epoch": 3.97, "learning_rate": 1.3738342576085107e-05, "loss": 0.9986, "step": 5074 }, { "epoch": 3.97, "learning_rate": 1.3735993743426126e-05, "loss": 1.1182, "step": 5075 }, { "epoch": 3.97, "learning_rate": 1.3733644671191681e-05, "loss": 0.9102, "step": 5076 }, { "epoch": 3.97, "learning_rate": 1.3731295359532418e-05, "loss": 1.0641, "step": 5077 }, { "epoch": 3.97, "learning_rate": 1.3728945808598983e-05, "loss": 0.881, "step": 5078 }, { "epoch": 3.97, "learning_rate": 1.3726596018542048e-05, "loss": 1.0728, "step": 5079 }, { "epoch": 3.97, "learning_rate": 1.3724245989512295e-05, "loss": 0.9232, "step": 5080 }, { "epoch": 3.97, "learning_rate": 1.3721895721660424e-05, "loss": 0.8267, "step": 5081 }, { "epoch": 3.97, "learning_rate": 1.3719545215137144e-05, "loss": 1.0423, "step": 5082 }, { "epoch": 3.97, "learning_rate": 1.371719447009319e-05, "loss": 0.8171, "step": 5083 }, { "epoch": 3.97, "learning_rate": 1.3714843486679304e-05, "loss": 0.9725, "step": 5084 }, { "epoch": 3.97, "learning_rate": 1.3712492265046246e-05, "loss": 1.2636, "step": 5085 }, { "epoch": 3.97, "learning_rate": 1.3710140805344791e-05, "loss": 1.1797, "step": 5086 }, { "epoch": 3.98, "learning_rate": 1.3707789107725733e-05, "loss": 0.9386, "step": 5087 }, { "epoch": 3.98, "learning_rate": 1.370543717233987e-05, "loss": 0.8107, "step": 5088 }, { "epoch": 3.98, "learning_rate": 1.370308499933803e-05, "loss": 0.7612, "step": 5089 }, { "epoch": 3.98, "learning_rate": 1.3700732588871049e-05, "loss": 1.0483, "step": 5090 }, { "epoch": 3.98, "learning_rate": 1.3698379941089773e-05, "loss": 1.1854, "step": 5091 }, { "epoch": 3.98, "learning_rate": 1.3696027056145072e-05, "loss": 1.1097, "step": 5092 }, { "epoch": 3.98, "learning_rate": 1.3693673934187825e-05, "loss": 0.9403, "step": 5093 }, { "epoch": 3.98, "learning_rate": 1.3691320575368934e-05, "loss": 0.9947, "step": 5094 }, { "epoch": 3.98, "learning_rate": 1.3688966979839308e-05, "loss": 0.84, "step": 5095 }, { "epoch": 3.98, "learning_rate": 1.3686613147749877e-05, "loss": 0.8444, "step": 5096 }, { "epoch": 3.98, "learning_rate": 1.3684259079251579e-05, "loss": 0.7613, "step": 5097 }, { "epoch": 3.98, "learning_rate": 1.3681904774495376e-05, "loss": 0.9087, "step": 5098 }, { "epoch": 3.99, "learning_rate": 1.3679550233632234e-05, "loss": 1.0461, "step": 5099 }, { "epoch": 3.99, "learning_rate": 1.367719545681315e-05, "loss": 0.9043, "step": 5100 }, { "epoch": 3.99, "learning_rate": 1.3674840444189125e-05, "loss": 1.1731, "step": 5101 }, { "epoch": 3.99, "learning_rate": 1.3672485195911174e-05, "loss": 1.0392, "step": 5102 }, { "epoch": 3.99, "learning_rate": 1.367012971213033e-05, "loss": 1.0048, "step": 5103 }, { "epoch": 3.99, "learning_rate": 1.3667773992997648e-05, "loss": 0.885, "step": 5104 }, { "epoch": 3.99, "learning_rate": 1.3665418038664181e-05, "loss": 0.7832, "step": 5105 }, { "epoch": 3.99, "learning_rate": 1.3663061849281018e-05, "loss": 1.0521, "step": 5106 }, { "epoch": 3.99, "learning_rate": 1.3660705424999245e-05, "loss": 1.209, "step": 5107 }, { "epoch": 3.99, "learning_rate": 1.3658348765969975e-05, "loss": 0.9415, "step": 5108 }, { "epoch": 3.99, "learning_rate": 1.3655991872344331e-05, "loss": 0.8912, "step": 5109 }, { "epoch": 3.99, "learning_rate": 1.3653634744273452e-05, "loss": 0.9202, "step": 5110 }, { "epoch": 3.99, "learning_rate": 1.365127738190849e-05, "loss": 0.8237, "step": 5111 }, { "epoch": 4.0, "learning_rate": 1.3648919785400619e-05, "loss": 1.1894, "step": 5112 }, { "epoch": 4.0, "learning_rate": 1.3646561954901015e-05, "loss": 0.839, "step": 5113 }, { "epoch": 4.0, "learning_rate": 1.3644203890560883e-05, "loss": 0.8242, "step": 5114 }, { "epoch": 4.0, "learning_rate": 1.3641845592531438e-05, "loss": 1.0256, "step": 5115 }, { "epoch": 4.0, "learning_rate": 1.3639487060963904e-05, "loss": 1.0237, "step": 5116 }, { "epoch": 4.0, "learning_rate": 1.3637128296009523e-05, "loss": 1.001, "step": 5117 }, { "epoch": 4.0, "learning_rate": 1.3634769297819563e-05, "loss": 1.0356, "step": 5118 }, { "epoch": 4.0, "learning_rate": 1.3632410066545293e-05, "loss": 0.3931, "step": 5119 }, { "epoch": 4.0, "learning_rate": 1.3630050602338e-05, "loss": 0.6684, "step": 5120 }, { "epoch": 4.0, "learning_rate": 1.362769090534899e-05, "loss": 0.6391, "step": 5121 }, { "epoch": 4.0, "learning_rate": 1.3625330975729578e-05, "loss": 0.6377, "step": 5122 }, { "epoch": 4.0, "learning_rate": 1.3622970813631105e-05, "loss": 0.5155, "step": 5123 }, { "epoch": 4.0, "learning_rate": 1.3620610419204914e-05, "loss": 0.4226, "step": 5124 }, { "epoch": 4.01, "learning_rate": 1.3618249792602366e-05, "loss": 0.6279, "step": 5125 }, { "epoch": 4.01, "learning_rate": 1.3615888933974848e-05, "loss": 0.3047, "step": 5126 }, { "epoch": 4.01, "learning_rate": 1.3613527843473743e-05, "loss": 0.3609, "step": 5127 }, { "epoch": 4.01, "learning_rate": 1.3611166521250465e-05, "loss": 0.5145, "step": 5128 }, { "epoch": 4.01, "learning_rate": 1.360880496745644e-05, "loss": 0.535, "step": 5129 }, { "epoch": 4.01, "learning_rate": 1.3606443182243094e-05, "loss": 0.4862, "step": 5130 }, { "epoch": 4.01, "learning_rate": 1.360408116576189e-05, "loss": 0.6119, "step": 5131 }, { "epoch": 4.01, "learning_rate": 1.3601718918164296e-05, "loss": 0.5618, "step": 5132 }, { "epoch": 4.01, "learning_rate": 1.3599356439601785e-05, "loss": 0.5273, "step": 5133 }, { "epoch": 4.01, "learning_rate": 1.359699373022586e-05, "loss": 0.4322, "step": 5134 }, { "epoch": 4.01, "learning_rate": 1.3594630790188037e-05, "loss": 0.5235, "step": 5135 }, { "epoch": 4.01, "learning_rate": 1.3592267619639832e-05, "loss": 0.5907, "step": 5136 }, { "epoch": 4.01, "learning_rate": 1.3589904218732795e-05, "loss": 0.5525, "step": 5137 }, { "epoch": 4.02, "learning_rate": 1.358754058761848e-05, "loss": 0.3833, "step": 5138 }, { "epoch": 4.02, "learning_rate": 1.3585176726448457e-05, "loss": 0.535, "step": 5139 }, { "epoch": 4.02, "learning_rate": 1.3582812635374309e-05, "loss": 0.4183, "step": 5140 }, { "epoch": 4.02, "learning_rate": 1.3580448314547646e-05, "loss": 0.5968, "step": 5141 }, { "epoch": 4.02, "learning_rate": 1.357808376412007e-05, "loss": 0.4071, "step": 5142 }, { "epoch": 4.02, "learning_rate": 1.357571898424322e-05, "loss": 0.7133, "step": 5143 }, { "epoch": 4.02, "learning_rate": 1.357335397506874e-05, "loss": 0.4383, "step": 5144 }, { "epoch": 4.02, "learning_rate": 1.3570988736748285e-05, "loss": 0.3798, "step": 5145 }, { "epoch": 4.02, "learning_rate": 1.3568623269433532e-05, "loss": 0.4646, "step": 5146 }, { "epoch": 4.02, "learning_rate": 1.3566257573276168e-05, "loss": 0.4997, "step": 5147 }, { "epoch": 4.02, "learning_rate": 1.3563891648427897e-05, "loss": 0.5164, "step": 5148 }, { "epoch": 4.02, "learning_rate": 1.3561525495040438e-05, "loss": 0.3456, "step": 5149 }, { "epoch": 4.03, "learning_rate": 1.3559159113265524e-05, "loss": 0.5596, "step": 5150 }, { "epoch": 4.03, "learning_rate": 1.3556792503254898e-05, "loss": 0.6153, "step": 5151 }, { "epoch": 4.03, "learning_rate": 1.3554425665160332e-05, "loss": 0.479, "step": 5152 }, { "epoch": 4.03, "learning_rate": 1.3552058599133589e-05, "loss": 0.4696, "step": 5153 }, { "epoch": 4.03, "learning_rate": 1.3549691305326469e-05, "loss": 0.4122, "step": 5154 }, { "epoch": 4.03, "learning_rate": 1.3547323783890777e-05, "loss": 0.3427, "step": 5155 }, { "epoch": 4.03, "learning_rate": 1.3544956034978332e-05, "loss": 0.3719, "step": 5156 }, { "epoch": 4.03, "learning_rate": 1.3542588058740966e-05, "loss": 0.5054, "step": 5157 }, { "epoch": 4.03, "learning_rate": 1.354021985533054e-05, "loss": 0.4678, "step": 5158 }, { "epoch": 4.03, "learning_rate": 1.35378514248989e-05, "loss": 0.4924, "step": 5159 }, { "epoch": 4.03, "learning_rate": 1.3535482767597938e-05, "loss": 0.4506, "step": 5160 }, { "epoch": 4.03, "learning_rate": 1.3533113883579546e-05, "loss": 0.6918, "step": 5161 }, { "epoch": 4.03, "learning_rate": 1.3530744772995628e-05, "loss": 0.3891, "step": 5162 }, { "epoch": 4.04, "learning_rate": 1.352837543599811e-05, "loss": 0.5234, "step": 5163 }, { "epoch": 4.04, "learning_rate": 1.3526005872738923e-05, "loss": 0.5936, "step": 5164 }, { "epoch": 4.04, "learning_rate": 1.3523636083370021e-05, "loss": 0.6021, "step": 5165 }, { "epoch": 4.04, "learning_rate": 1.3521266068043375e-05, "loss": 0.4211, "step": 5166 }, { "epoch": 4.04, "learning_rate": 1.3518895826910961e-05, "loss": 0.4392, "step": 5167 }, { "epoch": 4.04, "learning_rate": 1.3516525360124772e-05, "loss": 0.4698, "step": 5168 }, { "epoch": 4.04, "learning_rate": 1.3514154667836818e-05, "loss": 0.4461, "step": 5169 }, { "epoch": 4.04, "learning_rate": 1.3511783750199126e-05, "loss": 0.5453, "step": 5170 }, { "epoch": 4.04, "learning_rate": 1.3509412607363732e-05, "loss": 0.4549, "step": 5171 }, { "epoch": 4.04, "learning_rate": 1.3507041239482687e-05, "loss": 0.5256, "step": 5172 }, { "epoch": 4.04, "learning_rate": 1.3504669646708061e-05, "loss": 0.4098, "step": 5173 }, { "epoch": 4.04, "learning_rate": 1.3502297829191933e-05, "loss": 0.4016, "step": 5174 }, { "epoch": 4.04, "learning_rate": 1.3499925787086406e-05, "loss": 0.5419, "step": 5175 }, { "epoch": 4.05, "learning_rate": 1.3497553520543577e-05, "loss": 0.3096, "step": 5176 }, { "epoch": 4.05, "learning_rate": 1.349518102971558e-05, "loss": 0.4708, "step": 5177 }, { "epoch": 4.05, "learning_rate": 1.3492808314754552e-05, "loss": 0.4618, "step": 5178 }, { "epoch": 4.05, "learning_rate": 1.349043537581265e-05, "loss": 0.4804, "step": 5179 }, { "epoch": 4.05, "learning_rate": 1.3488062213042037e-05, "loss": 0.4355, "step": 5180 }, { "epoch": 4.05, "learning_rate": 1.3485688826594894e-05, "loss": 0.5038, "step": 5181 }, { "epoch": 4.05, "learning_rate": 1.348331521662342e-05, "loss": 0.3139, "step": 5182 }, { "epoch": 4.05, "learning_rate": 1.348094138327983e-05, "loss": 0.2326, "step": 5183 }, { "epoch": 4.05, "learning_rate": 1.347856732671634e-05, "loss": 0.5144, "step": 5184 }, { "epoch": 4.05, "learning_rate": 1.3476193047085197e-05, "loss": 0.4411, "step": 5185 }, { "epoch": 4.05, "learning_rate": 1.3473818544538653e-05, "loss": 0.4998, "step": 5186 }, { "epoch": 4.05, "learning_rate": 1.3471443819228976e-05, "loss": 0.4091, "step": 5187 }, { "epoch": 4.05, "learning_rate": 1.3469068871308441e-05, "loss": 0.5208, "step": 5188 }, { "epoch": 4.06, "learning_rate": 1.346669370092936e-05, "loss": 0.5217, "step": 5189 }, { "epoch": 4.06, "learning_rate": 1.346431830824403e-05, "loss": 0.4647, "step": 5190 }, { "epoch": 4.06, "learning_rate": 1.3461942693404781e-05, "loss": 0.6047, "step": 5191 }, { "epoch": 4.06, "learning_rate": 1.3459566856563954e-05, "loss": 0.4089, "step": 5192 }, { "epoch": 4.06, "learning_rate": 1.3457190797873904e-05, "loss": 0.4775, "step": 5193 }, { "epoch": 4.06, "learning_rate": 1.3454814517486992e-05, "loss": 0.3894, "step": 5194 }, { "epoch": 4.06, "learning_rate": 1.3452438015555607e-05, "loss": 0.5919, "step": 5195 }, { "epoch": 4.06, "learning_rate": 1.3450061292232142e-05, "loss": 0.4628, "step": 5196 }, { "epoch": 4.06, "learning_rate": 1.3447684347669008e-05, "loss": 0.4758, "step": 5197 }, { "epoch": 4.06, "learning_rate": 1.3445307182018628e-05, "loss": 0.4964, "step": 5198 }, { "epoch": 4.06, "learning_rate": 1.3442929795433444e-05, "loss": 0.5105, "step": 5199 }, { "epoch": 4.06, "learning_rate": 1.3440552188065909e-05, "loss": 0.4475, "step": 5200 }, { "epoch": 4.06, "learning_rate": 1.3438174360068485e-05, "loss": 0.3471, "step": 5201 }, { "epoch": 4.07, "learning_rate": 1.3435796311593658e-05, "loss": 0.3493, "step": 5202 }, { "epoch": 4.07, "learning_rate": 1.3433418042793924e-05, "loss": 0.4031, "step": 5203 }, { "epoch": 4.07, "learning_rate": 1.3431039553821792e-05, "loss": 0.4779, "step": 5204 }, { "epoch": 4.07, "learning_rate": 1.3428660844829782e-05, "loss": 0.4248, "step": 5205 }, { "epoch": 4.07, "learning_rate": 1.3426281915970438e-05, "loss": 0.5875, "step": 5206 }, { "epoch": 4.07, "learning_rate": 1.3423902767396305e-05, "loss": 0.3533, "step": 5207 }, { "epoch": 4.07, "learning_rate": 1.3421523399259952e-05, "loss": 0.5144, "step": 5208 }, { "epoch": 4.07, "learning_rate": 1.3419143811713961e-05, "loss": 0.5496, "step": 5209 }, { "epoch": 4.07, "learning_rate": 1.3416764004910927e-05, "loss": 0.5214, "step": 5210 }, { "epoch": 4.07, "learning_rate": 1.3414383979003452e-05, "loss": 0.456, "step": 5211 }, { "epoch": 4.07, "learning_rate": 1.3412003734144165e-05, "loss": 0.5042, "step": 5212 }, { "epoch": 4.07, "learning_rate": 1.3409623270485697e-05, "loss": 0.4727, "step": 5213 }, { "epoch": 4.08, "learning_rate": 1.3407242588180703e-05, "loss": 0.3224, "step": 5214 }, { "epoch": 4.08, "learning_rate": 1.3404861687381842e-05, "loss": 0.4448, "step": 5215 }, { "epoch": 4.08, "learning_rate": 1.34024805682418e-05, "loss": 0.3355, "step": 5216 }, { "epoch": 4.08, "learning_rate": 1.3400099230913263e-05, "loss": 0.5678, "step": 5217 }, { "epoch": 4.08, "learning_rate": 1.3397717675548939e-05, "loss": 0.413, "step": 5218 }, { "epoch": 4.08, "learning_rate": 1.3395335902301545e-05, "loss": 0.4094, "step": 5219 }, { "epoch": 4.08, "learning_rate": 1.3392953911323822e-05, "loss": 0.5163, "step": 5220 }, { "epoch": 4.08, "learning_rate": 1.3390571702768517e-05, "loss": 0.3808, "step": 5221 }, { "epoch": 4.08, "learning_rate": 1.3388189276788388e-05, "loss": 0.5021, "step": 5222 }, { "epoch": 4.08, "learning_rate": 1.3385806633536214e-05, "loss": 0.436, "step": 5223 }, { "epoch": 4.08, "learning_rate": 1.3383423773164784e-05, "loss": 0.4695, "step": 5224 }, { "epoch": 4.08, "learning_rate": 1.3381040695826904e-05, "loss": 0.4802, "step": 5225 }, { "epoch": 4.08, "learning_rate": 1.3378657401675387e-05, "loss": 0.5499, "step": 5226 }, { "epoch": 4.09, "learning_rate": 1.3376273890863074e-05, "loss": 0.5302, "step": 5227 }, { "epoch": 4.09, "learning_rate": 1.3373890163542799e-05, "loss": 0.629, "step": 5228 }, { "epoch": 4.09, "learning_rate": 1.3371506219867434e-05, "loss": 0.5573, "step": 5229 }, { "epoch": 4.09, "learning_rate": 1.336912205998984e-05, "loss": 0.4282, "step": 5230 }, { "epoch": 4.09, "learning_rate": 1.3366737684062915e-05, "loss": 0.6489, "step": 5231 }, { "epoch": 4.09, "learning_rate": 1.3364353092239555e-05, "loss": 0.5154, "step": 5232 }, { "epoch": 4.09, "learning_rate": 1.3361968284672675e-05, "loss": 0.4865, "step": 5233 }, { "epoch": 4.09, "learning_rate": 1.3359583261515204e-05, "loss": 0.6573, "step": 5234 }, { "epoch": 4.09, "learning_rate": 1.335719802292009e-05, "loss": 0.4817, "step": 5235 }, { "epoch": 4.09, "learning_rate": 1.3354812569040279e-05, "loss": 0.432, "step": 5236 }, { "epoch": 4.09, "learning_rate": 1.3352426900028752e-05, "loss": 0.3948, "step": 5237 }, { "epoch": 4.09, "learning_rate": 1.3350041016038484e-05, "loss": 0.5167, "step": 5238 }, { "epoch": 4.09, "learning_rate": 1.3347654917222482e-05, "loss": 0.554, "step": 5239 }, { "epoch": 4.1, "learning_rate": 1.334526860373375e-05, "loss": 0.4463, "step": 5240 }, { "epoch": 4.1, "learning_rate": 1.3342882075725316e-05, "loss": 0.5495, "step": 5241 }, { "epoch": 4.1, "learning_rate": 1.3340495333350221e-05, "loss": 0.535, "step": 5242 }, { "epoch": 4.1, "learning_rate": 1.3338108376761516e-05, "loss": 0.5091, "step": 5243 }, { "epoch": 4.1, "learning_rate": 1.3335721206112268e-05, "loss": 0.5432, "step": 5244 }, { "epoch": 4.1, "learning_rate": 1.3333333821555558e-05, "loss": 0.4258, "step": 5245 }, { "epoch": 4.1, "learning_rate": 1.3330946223244484e-05, "loss": 0.289, "step": 5246 }, { "epoch": 4.1, "learning_rate": 1.3328558411332142e-05, "loss": 0.3757, "step": 5247 }, { "epoch": 4.1, "learning_rate": 1.3326170385971668e-05, "loss": 0.5597, "step": 5248 }, { "epoch": 4.1, "learning_rate": 1.3323782147316187e-05, "loss": 0.7313, "step": 5249 }, { "epoch": 4.1, "learning_rate": 1.3321393695518853e-05, "loss": 0.5494, "step": 5250 }, { "epoch": 4.1, "learning_rate": 1.3319005030732826e-05, "loss": 0.6183, "step": 5251 }, { "epoch": 4.1, "learning_rate": 1.3316616153111286e-05, "loss": 0.6753, "step": 5252 }, { "epoch": 4.11, "learning_rate": 1.3314227062807416e-05, "loss": 0.5829, "step": 5253 }, { "epoch": 4.11, "learning_rate": 1.3311837759974428e-05, "loss": 0.4958, "step": 5254 }, { "epoch": 4.11, "learning_rate": 1.330944824476553e-05, "loss": 0.4293, "step": 5255 }, { "epoch": 4.11, "learning_rate": 1.3307058517333962e-05, "loss": 0.4154, "step": 5256 }, { "epoch": 4.11, "learning_rate": 1.3304668577832965e-05, "loss": 0.4573, "step": 5257 }, { "epoch": 4.11, "learning_rate": 1.330227842641579e-05, "loss": 0.5155, "step": 5258 }, { "epoch": 4.11, "learning_rate": 1.3299888063235717e-05, "loss": 0.4023, "step": 5259 }, { "epoch": 4.11, "learning_rate": 1.3297497488446034e-05, "loss": 0.5172, "step": 5260 }, { "epoch": 4.11, "learning_rate": 1.3295106702200028e-05, "loss": 0.5035, "step": 5261 }, { "epoch": 4.11, "learning_rate": 1.3292715704651019e-05, "loss": 0.63, "step": 5262 }, { "epoch": 4.11, "learning_rate": 1.3290324495952334e-05, "loss": 0.5199, "step": 5263 }, { "epoch": 4.11, "learning_rate": 1.3287933076257306e-05, "loss": 0.3899, "step": 5264 }, { "epoch": 4.11, "learning_rate": 1.3285541445719291e-05, "loss": 0.2937, "step": 5265 }, { "epoch": 4.12, "learning_rate": 1.3283149604491664e-05, "loss": 0.475, "step": 5266 }, { "epoch": 4.12, "learning_rate": 1.328075755272779e-05, "loss": 0.4248, "step": 5267 }, { "epoch": 4.12, "learning_rate": 1.3278365290581072e-05, "loss": 0.5023, "step": 5268 }, { "epoch": 4.12, "learning_rate": 1.3275972818204914e-05, "loss": 0.4856, "step": 5269 }, { "epoch": 4.12, "learning_rate": 1.3273580135752737e-05, "loss": 0.3941, "step": 5270 }, { "epoch": 4.12, "learning_rate": 1.3271187243377977e-05, "loss": 0.4672, "step": 5271 }, { "epoch": 4.12, "learning_rate": 1.3268794141234073e-05, "loss": 0.4678, "step": 5272 }, { "epoch": 4.12, "learning_rate": 1.3266400829474497e-05, "loss": 0.5898, "step": 5273 }, { "epoch": 4.12, "learning_rate": 1.3264007308252718e-05, "loss": 0.4018, "step": 5274 }, { "epoch": 4.12, "learning_rate": 1.3261613577722222e-05, "loss": 0.5193, "step": 5275 }, { "epoch": 4.12, "learning_rate": 1.3259219638036514e-05, "loss": 0.5047, "step": 5276 }, { "epoch": 4.12, "learning_rate": 1.3256825489349106e-05, "loss": 0.5061, "step": 5277 }, { "epoch": 4.13, "learning_rate": 1.3254431131813524e-05, "loss": 0.4028, "step": 5278 }, { "epoch": 4.13, "learning_rate": 1.3252036565583313e-05, "loss": 0.3555, "step": 5279 }, { "epoch": 4.13, "learning_rate": 1.3249641790812026e-05, "loss": 0.4371, "step": 5280 }, { "epoch": 4.13, "learning_rate": 1.3247246807653232e-05, "loss": 0.4716, "step": 5281 }, { "epoch": 4.13, "learning_rate": 1.324485161626051e-05, "loss": 0.5673, "step": 5282 }, { "epoch": 4.13, "learning_rate": 1.3242456216787458e-05, "loss": 0.3555, "step": 5283 }, { "epoch": 4.13, "learning_rate": 1.3240060609387682e-05, "loss": 0.6849, "step": 5284 }, { "epoch": 4.13, "learning_rate": 1.3237664794214804e-05, "loss": 0.3974, "step": 5285 }, { "epoch": 4.13, "learning_rate": 1.3235268771422457e-05, "loss": 0.65, "step": 5286 }, { "epoch": 4.13, "learning_rate": 1.3232872541164295e-05, "loss": 0.3657, "step": 5287 }, { "epoch": 4.13, "learning_rate": 1.3230476103593972e-05, "loss": 0.4263, "step": 5288 }, { "epoch": 4.13, "learning_rate": 1.3228079458865168e-05, "loss": 0.4843, "step": 5289 }, { "epoch": 4.13, "learning_rate": 1.3225682607131567e-05, "loss": 0.3519, "step": 5290 }, { "epoch": 4.14, "learning_rate": 1.3223285548546873e-05, "loss": 0.5095, "step": 5291 }, { "epoch": 4.14, "learning_rate": 1.3220888283264802e-05, "loss": 0.4851, "step": 5292 }, { "epoch": 4.14, "learning_rate": 1.3218490811439076e-05, "loss": 0.4474, "step": 5293 }, { "epoch": 4.14, "learning_rate": 1.321609313322344e-05, "loss": 0.3827, "step": 5294 }, { "epoch": 4.14, "learning_rate": 1.3213695248771647e-05, "loss": 0.5276, "step": 5295 }, { "epoch": 4.14, "learning_rate": 1.3211297158237466e-05, "loss": 0.5252, "step": 5296 }, { "epoch": 4.14, "learning_rate": 1.3208898861774678e-05, "loss": 0.5197, "step": 5297 }, { "epoch": 4.14, "learning_rate": 1.3206500359537073e-05, "loss": 0.535, "step": 5298 }, { "epoch": 4.14, "learning_rate": 1.3204101651678463e-05, "loss": 0.4649, "step": 5299 }, { "epoch": 4.14, "learning_rate": 1.3201702738352667e-05, "loss": 0.4101, "step": 5300 }, { "epoch": 4.14, "learning_rate": 1.3199303619713513e-05, "loss": 0.4532, "step": 5301 }, { "epoch": 4.14, "learning_rate": 1.3196904295914855e-05, "loss": 0.4725, "step": 5302 }, { "epoch": 4.14, "learning_rate": 1.3194504767110547e-05, "loss": 0.4466, "step": 5303 }, { "epoch": 4.15, "learning_rate": 1.3192105033454468e-05, "loss": 0.568, "step": 5304 }, { "epoch": 4.15, "learning_rate": 1.3189705095100498e-05, "loss": 0.4072, "step": 5305 }, { "epoch": 4.15, "learning_rate": 1.3187304952202543e-05, "loss": 0.4665, "step": 5306 }, { "epoch": 4.15, "learning_rate": 1.3184904604914506e-05, "loss": 0.4115, "step": 5307 }, { "epoch": 4.15, "learning_rate": 1.3182504053390324e-05, "loss": 0.3861, "step": 5308 }, { "epoch": 4.15, "learning_rate": 1.3180103297783922e-05, "loss": 0.296, "step": 5309 }, { "epoch": 4.15, "learning_rate": 1.3177702338249263e-05, "loss": 0.5381, "step": 5310 }, { "epoch": 4.15, "learning_rate": 1.3175301174940307e-05, "loss": 0.4822, "step": 5311 }, { "epoch": 4.15, "learning_rate": 1.317289980801103e-05, "loss": 0.3111, "step": 5312 }, { "epoch": 4.15, "learning_rate": 1.3170498237615425e-05, "loss": 0.4173, "step": 5313 }, { "epoch": 4.15, "learning_rate": 1.3168096463907498e-05, "loss": 0.4833, "step": 5314 }, { "epoch": 4.15, "learning_rate": 1.316569448704126e-05, "loss": 0.4812, "step": 5315 }, { "epoch": 4.15, "learning_rate": 1.3163292307170748e-05, "loss": 0.3963, "step": 5316 }, { "epoch": 4.16, "learning_rate": 1.3160889924449998e-05, "loss": 0.4208, "step": 5317 }, { "epoch": 4.16, "learning_rate": 1.3158487339033073e-05, "loss": 0.4778, "step": 5318 }, { "epoch": 4.16, "learning_rate": 1.3156084551074034e-05, "loss": 0.48, "step": 5319 }, { "epoch": 4.16, "learning_rate": 1.3153681560726967e-05, "loss": 0.4848, "step": 5320 }, { "epoch": 4.16, "learning_rate": 1.315127836814597e-05, "loss": 0.3772, "step": 5321 }, { "epoch": 4.16, "learning_rate": 1.3148874973485147e-05, "loss": 0.478, "step": 5322 }, { "epoch": 4.16, "learning_rate": 1.314647137689862e-05, "loss": 0.4581, "step": 5323 }, { "epoch": 4.16, "learning_rate": 1.3144067578540518e-05, "loss": 0.4363, "step": 5324 }, { "epoch": 4.16, "learning_rate": 1.3141663578564996e-05, "loss": 0.4579, "step": 5325 }, { "epoch": 4.16, "learning_rate": 1.313925937712621e-05, "loss": 0.5306, "step": 5326 }, { "epoch": 4.16, "learning_rate": 1.313685497437833e-05, "loss": 0.3071, "step": 5327 }, { "epoch": 4.16, "learning_rate": 1.3134450370475546e-05, "loss": 0.3753, "step": 5328 }, { "epoch": 4.16, "learning_rate": 1.3132045565572053e-05, "loss": 0.3924, "step": 5329 }, { "epoch": 4.17, "learning_rate": 1.312964055982206e-05, "loss": 0.5197, "step": 5330 }, { "epoch": 4.17, "learning_rate": 1.3127235353379802e-05, "loss": 0.502, "step": 5331 }, { "epoch": 4.17, "learning_rate": 1.3124829946399501e-05, "loss": 0.4444, "step": 5332 }, { "epoch": 4.17, "learning_rate": 1.3122424339035417e-05, "loss": 0.465, "step": 5333 }, { "epoch": 4.17, "learning_rate": 1.312001853144181e-05, "loss": 0.3467, "step": 5334 }, { "epoch": 4.17, "learning_rate": 1.3117612523772956e-05, "loss": 0.5792, "step": 5335 }, { "epoch": 4.17, "learning_rate": 1.311520631618314e-05, "loss": 0.6369, "step": 5336 }, { "epoch": 4.17, "learning_rate": 1.311279990882667e-05, "loss": 0.5599, "step": 5337 }, { "epoch": 4.17, "learning_rate": 1.3110393301857851e-05, "loss": 0.5153, "step": 5338 }, { "epoch": 4.17, "learning_rate": 1.310798649543102e-05, "loss": 0.5568, "step": 5339 }, { "epoch": 4.17, "learning_rate": 1.3105579489700509e-05, "loss": 0.3935, "step": 5340 }, { "epoch": 4.17, "learning_rate": 1.3103172284820673e-05, "loss": 0.6132, "step": 5341 }, { "epoch": 4.18, "learning_rate": 1.3100764880945875e-05, "loss": 0.5418, "step": 5342 }, { "epoch": 4.18, "learning_rate": 1.30983572782305e-05, "loss": 0.4814, "step": 5343 }, { "epoch": 4.18, "learning_rate": 1.3095949476828929e-05, "loss": 0.4987, "step": 5344 }, { "epoch": 4.18, "learning_rate": 1.309354147689557e-05, "loss": 0.5069, "step": 5345 }, { "epoch": 4.18, "learning_rate": 1.309113327858484e-05, "loss": 0.5083, "step": 5346 }, { "epoch": 4.18, "learning_rate": 1.308872488205117e-05, "loss": 0.4696, "step": 5347 }, { "epoch": 4.18, "learning_rate": 1.3086316287448995e-05, "loss": 0.4868, "step": 5348 }, { "epoch": 4.18, "learning_rate": 1.308390749493277e-05, "loss": 0.4457, "step": 5349 }, { "epoch": 4.18, "learning_rate": 1.308149850465697e-05, "loss": 0.7151, "step": 5350 }, { "epoch": 4.18, "learning_rate": 1.3079089316776066e-05, "loss": 0.3969, "step": 5351 }, { "epoch": 4.18, "learning_rate": 1.3076679931444553e-05, "loss": 0.5004, "step": 5352 }, { "epoch": 4.18, "learning_rate": 1.3074270348816937e-05, "loss": 0.3729, "step": 5353 }, { "epoch": 4.18, "learning_rate": 1.3071860569047739e-05, "loss": 0.3895, "step": 5354 }, { "epoch": 4.19, "learning_rate": 1.3069450592291482e-05, "loss": 0.4089, "step": 5355 }, { "epoch": 4.19, "learning_rate": 1.3067040418702712e-05, "loss": 0.6104, "step": 5356 }, { "epoch": 4.19, "learning_rate": 1.3064630048435987e-05, "loss": 0.3572, "step": 5357 }, { "epoch": 4.19, "learning_rate": 1.306221948164587e-05, "loss": 0.4588, "step": 5358 }, { "epoch": 4.19, "learning_rate": 1.3059808718486946e-05, "loss": 0.503, "step": 5359 }, { "epoch": 4.19, "learning_rate": 1.305739775911381e-05, "loss": 0.6108, "step": 5360 }, { "epoch": 4.19, "learning_rate": 1.3054986603681062e-05, "loss": 0.5689, "step": 5361 }, { "epoch": 4.19, "learning_rate": 1.3052575252343322e-05, "loss": 0.6594, "step": 5362 }, { "epoch": 4.19, "learning_rate": 1.3050163705255225e-05, "loss": 0.4129, "step": 5363 }, { "epoch": 4.19, "learning_rate": 1.304775196257141e-05, "loss": 0.4162, "step": 5364 }, { "epoch": 4.19, "learning_rate": 1.3045340024446538e-05, "loss": 0.5697, "step": 5365 }, { "epoch": 4.19, "learning_rate": 1.3042927891035274e-05, "loss": 0.4213, "step": 5366 }, { "epoch": 4.19, "learning_rate": 1.3040515562492297e-05, "loss": 0.4397, "step": 5367 }, { "epoch": 4.2, "learning_rate": 1.3038103038972307e-05, "loss": 0.6168, "step": 5368 }, { "epoch": 4.2, "learning_rate": 1.3035690320630008e-05, "loss": 0.4113, "step": 5369 }, { "epoch": 4.2, "learning_rate": 1.3033277407620115e-05, "loss": 0.644, "step": 5370 }, { "epoch": 4.2, "learning_rate": 1.3030864300097364e-05, "loss": 0.5171, "step": 5371 }, { "epoch": 4.2, "learning_rate": 1.3028450998216492e-05, "loss": 0.5292, "step": 5372 }, { "epoch": 4.2, "learning_rate": 1.3026037502132264e-05, "loss": 0.423, "step": 5373 }, { "epoch": 4.2, "learning_rate": 1.3023623811999444e-05, "loss": 0.516, "step": 5374 }, { "epoch": 4.2, "learning_rate": 1.3021209927972813e-05, "loss": 0.4145, "step": 5375 }, { "epoch": 4.2, "learning_rate": 1.3018795850207164e-05, "loss": 0.5235, "step": 5376 }, { "epoch": 4.2, "learning_rate": 1.3016381578857304e-05, "loss": 0.3866, "step": 5377 }, { "epoch": 4.2, "learning_rate": 1.3013967114078047e-05, "loss": 0.5224, "step": 5378 }, { "epoch": 4.2, "learning_rate": 1.3011552456024232e-05, "loss": 0.491, "step": 5379 }, { "epoch": 4.2, "learning_rate": 1.3009137604850696e-05, "loss": 0.426, "step": 5380 }, { "epoch": 4.21, "learning_rate": 1.3006722560712296e-05, "loss": 0.6421, "step": 5381 }, { "epoch": 4.21, "learning_rate": 1.3004307323763903e-05, "loss": 0.5485, "step": 5382 }, { "epoch": 4.21, "learning_rate": 1.3001891894160392e-05, "loss": 0.365, "step": 5383 }, { "epoch": 4.21, "learning_rate": 1.2999476272056656e-05, "loss": 0.4434, "step": 5384 }, { "epoch": 4.21, "learning_rate": 1.2997060457607607e-05, "loss": 0.4733, "step": 5385 }, { "epoch": 4.21, "learning_rate": 1.299464445096815e-05, "loss": 0.5031, "step": 5386 }, { "epoch": 4.21, "learning_rate": 1.2992228252293226e-05, "loss": 0.7347, "step": 5387 }, { "epoch": 4.21, "learning_rate": 1.2989811861737772e-05, "loss": 0.523, "step": 5388 }, { "epoch": 4.21, "learning_rate": 1.298739527945674e-05, "loss": 0.4536, "step": 5389 }, { "epoch": 4.21, "learning_rate": 1.29849785056051e-05, "loss": 0.6381, "step": 5390 }, { "epoch": 4.21, "learning_rate": 1.2982561540337833e-05, "loss": 0.4359, "step": 5391 }, { "epoch": 4.21, "learning_rate": 1.298014438380992e-05, "loss": 0.4971, "step": 5392 }, { "epoch": 4.21, "learning_rate": 1.2977727036176377e-05, "loss": 0.5535, "step": 5393 }, { "epoch": 4.22, "learning_rate": 1.2975309497592213e-05, "loss": 0.3499, "step": 5394 }, { "epoch": 4.22, "learning_rate": 1.2972891768212455e-05, "loss": 0.3916, "step": 5395 }, { "epoch": 4.22, "learning_rate": 1.2970473848192142e-05, "loss": 0.4451, "step": 5396 }, { "epoch": 4.22, "learning_rate": 1.2968055737686334e-05, "loss": 0.4731, "step": 5397 }, { "epoch": 4.22, "learning_rate": 1.2965637436850086e-05, "loss": 0.3103, "step": 5398 }, { "epoch": 4.22, "learning_rate": 1.2963218945838482e-05, "loss": 0.4196, "step": 5399 }, { "epoch": 4.22, "learning_rate": 1.2960800264806607e-05, "loss": 0.4695, "step": 5400 }, { "epoch": 4.22, "learning_rate": 1.2958381393909563e-05, "loss": 0.5506, "step": 5401 }, { "epoch": 4.22, "learning_rate": 1.2955962333302466e-05, "loss": 0.4464, "step": 5402 }, { "epoch": 4.22, "learning_rate": 1.2953543083140432e-05, "loss": 0.3805, "step": 5403 }, { "epoch": 4.22, "learning_rate": 1.295112364357861e-05, "loss": 0.4862, "step": 5404 }, { "epoch": 4.22, "learning_rate": 1.2948704014772144e-05, "loss": 0.6438, "step": 5405 }, { "epoch": 4.23, "learning_rate": 1.2946284196876196e-05, "loss": 0.4, "step": 5406 }, { "epoch": 4.23, "learning_rate": 1.2943864190045941e-05, "loss": 0.4494, "step": 5407 }, { "epoch": 4.23, "learning_rate": 1.2941443994436567e-05, "loss": 0.3804, "step": 5408 }, { "epoch": 4.23, "learning_rate": 1.2939023610203266e-05, "loss": 0.4794, "step": 5409 }, { "epoch": 4.23, "learning_rate": 1.2936603037501251e-05, "loss": 0.4381, "step": 5410 }, { "epoch": 4.23, "learning_rate": 1.2934182276485752e-05, "loss": 0.4006, "step": 5411 }, { "epoch": 4.23, "learning_rate": 1.2931761327311993e-05, "loss": 0.5293, "step": 5412 }, { "epoch": 4.23, "learning_rate": 1.2929340190135222e-05, "loss": 0.5957, "step": 5413 }, { "epoch": 4.23, "learning_rate": 1.2926918865110706e-05, "loss": 0.4133, "step": 5414 }, { "epoch": 4.23, "learning_rate": 1.2924497352393705e-05, "loss": 0.4687, "step": 5415 }, { "epoch": 4.23, "learning_rate": 1.292207565213951e-05, "loss": 0.4587, "step": 5416 }, { "epoch": 4.23, "learning_rate": 1.2919653764503411e-05, "loss": 0.3639, "step": 5417 }, { "epoch": 4.23, "learning_rate": 1.2917231689640718e-05, "loss": 0.4433, "step": 5418 }, { "epoch": 4.24, "learning_rate": 1.2914809427706742e-05, "loss": 0.5109, "step": 5419 }, { "epoch": 4.24, "learning_rate": 1.2912386978856824e-05, "loss": 0.4332, "step": 5420 }, { "epoch": 4.24, "learning_rate": 1.2909964343246297e-05, "loss": 0.3458, "step": 5421 }, { "epoch": 4.24, "learning_rate": 1.2907541521030525e-05, "loss": 0.5197, "step": 5422 }, { "epoch": 4.24, "learning_rate": 1.2905118512364868e-05, "loss": 0.3983, "step": 5423 }, { "epoch": 4.24, "learning_rate": 1.290269531740471e-05, "loss": 0.4792, "step": 5424 }, { "epoch": 4.24, "learning_rate": 1.2900271936305436e-05, "loss": 0.637, "step": 5425 }, { "epoch": 4.24, "learning_rate": 1.2897848369222448e-05, "loss": 0.4793, "step": 5426 }, { "epoch": 4.24, "learning_rate": 1.2895424616311165e-05, "loss": 0.6661, "step": 5427 }, { "epoch": 4.24, "learning_rate": 1.2893000677727012e-05, "loss": 0.4799, "step": 5428 }, { "epoch": 4.24, "learning_rate": 1.289057655362543e-05, "loss": 0.4944, "step": 5429 }, { "epoch": 4.24, "learning_rate": 1.2888152244161858e-05, "loss": 0.435, "step": 5430 }, { "epoch": 4.24, "learning_rate": 1.2885727749491774e-05, "loss": 0.4572, "step": 5431 }, { "epoch": 4.25, "learning_rate": 1.2883303069770638e-05, "loss": 0.5457, "step": 5432 }, { "epoch": 4.25, "learning_rate": 1.2880878205153943e-05, "loss": 0.5284, "step": 5433 }, { "epoch": 4.25, "learning_rate": 1.2878453155797186e-05, "loss": 0.623, "step": 5434 }, { "epoch": 4.25, "learning_rate": 1.2876027921855876e-05, "loss": 0.48, "step": 5435 }, { "epoch": 4.25, "learning_rate": 1.2873602503485532e-05, "loss": 0.499, "step": 5436 }, { "epoch": 4.25, "learning_rate": 1.2871176900841692e-05, "loss": 0.4841, "step": 5437 }, { "epoch": 4.25, "learning_rate": 1.2868751114079892e-05, "loss": 0.4891, "step": 5438 }, { "epoch": 4.25, "learning_rate": 1.28663251433557e-05, "loss": 0.4238, "step": 5439 }, { "epoch": 4.25, "learning_rate": 1.2863898988824675e-05, "loss": 0.6328, "step": 5440 }, { "epoch": 4.25, "learning_rate": 1.2861472650642401e-05, "loss": 0.5958, "step": 5441 }, { "epoch": 4.25, "learning_rate": 1.2859046128964474e-05, "loss": 0.4366, "step": 5442 }, { "epoch": 4.25, "learning_rate": 1.2856619423946492e-05, "loss": 0.4437, "step": 5443 }, { "epoch": 4.25, "learning_rate": 1.2854192535744073e-05, "loss": 0.4723, "step": 5444 }, { "epoch": 4.26, "learning_rate": 1.2851765464512845e-05, "loss": 0.5128, "step": 5445 }, { "epoch": 4.26, "learning_rate": 1.2849338210408445e-05, "loss": 0.5934, "step": 5446 }, { "epoch": 4.26, "learning_rate": 1.2846910773586527e-05, "loss": 0.5098, "step": 5447 }, { "epoch": 4.26, "learning_rate": 1.2844483154202752e-05, "loss": 0.4905, "step": 5448 }, { "epoch": 4.26, "learning_rate": 1.2842055352412791e-05, "loss": 0.3357, "step": 5449 }, { "epoch": 4.26, "learning_rate": 1.2839627368372335e-05, "loss": 0.558, "step": 5450 }, { "epoch": 4.26, "learning_rate": 1.283719920223708e-05, "loss": 0.3771, "step": 5451 }, { "epoch": 4.26, "learning_rate": 1.2834770854162734e-05, "loss": 0.4606, "step": 5452 }, { "epoch": 4.26, "learning_rate": 1.2832342324305018e-05, "loss": 0.525, "step": 5453 }, { "epoch": 4.26, "learning_rate": 1.2829913612819672e-05, "loss": 0.4727, "step": 5454 }, { "epoch": 4.26, "learning_rate": 1.2827484719862424e-05, "loss": 0.6243, "step": 5455 }, { "epoch": 4.26, "learning_rate": 1.2825055645589046e-05, "loss": 0.4405, "step": 5456 }, { "epoch": 4.26, "learning_rate": 1.2822626390155298e-05, "loss": 0.3749, "step": 5457 }, { "epoch": 4.27, "learning_rate": 1.2820196953716959e-05, "loss": 0.4326, "step": 5458 }, { "epoch": 4.27, "learning_rate": 1.2817767336429822e-05, "loss": 0.5241, "step": 5459 }, { "epoch": 4.27, "learning_rate": 1.281533753844969e-05, "loss": 0.4712, "step": 5460 }, { "epoch": 4.27, "learning_rate": 1.2812907559932373e-05, "loss": 0.4628, "step": 5461 }, { "epoch": 4.27, "learning_rate": 1.2810477401033701e-05, "loss": 0.6838, "step": 5462 }, { "epoch": 4.27, "learning_rate": 1.2808047061909508e-05, "loss": 0.2633, "step": 5463 }, { "epoch": 4.27, "learning_rate": 1.2805616542715645e-05, "loss": 0.4096, "step": 5464 }, { "epoch": 4.27, "learning_rate": 1.280318584360797e-05, "loss": 0.4903, "step": 5465 }, { "epoch": 4.27, "learning_rate": 1.2800754964742355e-05, "loss": 0.5767, "step": 5466 }, { "epoch": 4.27, "learning_rate": 1.2798323906274683e-05, "loss": 0.5066, "step": 5467 }, { "epoch": 4.27, "learning_rate": 1.2795892668360855e-05, "loss": 0.3837, "step": 5468 }, { "epoch": 4.27, "learning_rate": 1.2793461251156767e-05, "loss": 0.382, "step": 5469 }, { "epoch": 4.28, "learning_rate": 1.2791029654818343e-05, "loss": 0.3759, "step": 5470 }, { "epoch": 4.28, "learning_rate": 1.2788597879501512e-05, "loss": 0.4165, "step": 5471 }, { "epoch": 4.28, "learning_rate": 1.2786165925362215e-05, "loss": 0.4141, "step": 5472 }, { "epoch": 4.28, "learning_rate": 1.27837337925564e-05, "loss": 0.6304, "step": 5473 }, { "epoch": 4.28, "learning_rate": 1.2781301481240036e-05, "loss": 0.476, "step": 5474 }, { "epoch": 4.28, "learning_rate": 1.2778868991569096e-05, "loss": 0.2853, "step": 5475 }, { "epoch": 4.28, "learning_rate": 1.2776436323699569e-05, "loss": 0.4719, "step": 5476 }, { "epoch": 4.28, "learning_rate": 1.2774003477787447e-05, "loss": 0.4789, "step": 5477 }, { "epoch": 4.28, "learning_rate": 1.2771570453988739e-05, "loss": 0.5746, "step": 5478 }, { "epoch": 4.28, "learning_rate": 1.2769137252459478e-05, "loss": 0.4594, "step": 5479 }, { "epoch": 4.28, "learning_rate": 1.2766703873355684e-05, "loss": 0.4423, "step": 5480 }, { "epoch": 4.28, "learning_rate": 1.2764270316833404e-05, "loss": 0.5525, "step": 5481 }, { "epoch": 4.28, "learning_rate": 1.2761836583048694e-05, "loss": 0.4867, "step": 5482 }, { "epoch": 4.29, "learning_rate": 1.2759402672157624e-05, "loss": 0.4046, "step": 5483 }, { "epoch": 4.29, "learning_rate": 1.275696858431626e-05, "loss": 0.6352, "step": 5484 }, { "epoch": 4.29, "learning_rate": 1.2754534319680705e-05, "loss": 0.496, "step": 5485 }, { "epoch": 4.29, "learning_rate": 1.275209987840705e-05, "loss": 0.5451, "step": 5486 }, { "epoch": 4.29, "learning_rate": 1.274966526065141e-05, "loss": 0.5207, "step": 5487 }, { "epoch": 4.29, "learning_rate": 1.2747230466569909e-05, "loss": 0.4338, "step": 5488 }, { "epoch": 4.29, "learning_rate": 1.2744795496318681e-05, "loss": 0.4539, "step": 5489 }, { "epoch": 4.29, "learning_rate": 1.2742360350053866e-05, "loss": 0.4137, "step": 5490 }, { "epoch": 4.29, "learning_rate": 1.273992502793163e-05, "loss": 0.4075, "step": 5491 }, { "epoch": 4.29, "learning_rate": 1.2737489530108134e-05, "loss": 0.4473, "step": 5492 }, { "epoch": 4.29, "learning_rate": 1.2735053856739566e-05, "loss": 0.5662, "step": 5493 }, { "epoch": 4.29, "learning_rate": 1.2732618007982105e-05, "loss": 0.522, "step": 5494 }, { "epoch": 4.29, "learning_rate": 1.2730181983991961e-05, "loss": 0.4769, "step": 5495 }, { "epoch": 4.3, "learning_rate": 1.2727745784925346e-05, "loss": 0.4115, "step": 5496 }, { "epoch": 4.3, "learning_rate": 1.2725309410938483e-05, "loss": 0.3976, "step": 5497 }, { "epoch": 4.3, "learning_rate": 1.2722872862187605e-05, "loss": 0.425, "step": 5498 }, { "epoch": 4.3, "learning_rate": 1.2720436138828967e-05, "loss": 0.3908, "step": 5499 }, { "epoch": 4.3, "learning_rate": 1.2717999241018819e-05, "loss": 0.4691, "step": 5500 }, { "epoch": 4.3, "learning_rate": 1.2715562168913435e-05, "loss": 0.5452, "step": 5501 }, { "epoch": 4.3, "learning_rate": 1.2713124922669096e-05, "loss": 0.464, "step": 5502 }, { "epoch": 4.3, "learning_rate": 1.2710687502442087e-05, "loss": 0.4661, "step": 5503 }, { "epoch": 4.3, "learning_rate": 1.270824990838872e-05, "loss": 0.4701, "step": 5504 }, { "epoch": 4.3, "learning_rate": 1.2705812140665302e-05, "loss": 0.4929, "step": 5505 }, { "epoch": 4.3, "learning_rate": 1.2703374199428158e-05, "loss": 0.4266, "step": 5506 }, { "epoch": 4.3, "learning_rate": 1.2700936084833627e-05, "loss": 0.497, "step": 5507 }, { "epoch": 4.3, "learning_rate": 1.2698497797038058e-05, "loss": 0.3912, "step": 5508 }, { "epoch": 4.31, "learning_rate": 1.2696059336197805e-05, "loss": 0.6176, "step": 5509 }, { "epoch": 4.31, "learning_rate": 1.269362070246924e-05, "loss": 0.3799, "step": 5510 }, { "epoch": 4.31, "learning_rate": 1.2691181896008744e-05, "loss": 0.5012, "step": 5511 }, { "epoch": 4.31, "learning_rate": 1.2688742916972708e-05, "loss": 0.4629, "step": 5512 }, { "epoch": 4.31, "learning_rate": 1.2686303765517533e-05, "loss": 0.4022, "step": 5513 }, { "epoch": 4.31, "learning_rate": 1.2683864441799635e-05, "loss": 0.4296, "step": 5514 }, { "epoch": 4.31, "learning_rate": 1.2681424945975434e-05, "loss": 0.4189, "step": 5515 }, { "epoch": 4.31, "learning_rate": 1.2678985278201374e-05, "loss": 0.3644, "step": 5516 }, { "epoch": 4.31, "learning_rate": 1.2676545438633895e-05, "loss": 0.4502, "step": 5517 }, { "epoch": 4.31, "learning_rate": 1.2674105427429459e-05, "loss": 0.529, "step": 5518 }, { "epoch": 4.31, "learning_rate": 1.2671665244744533e-05, "loss": 0.3611, "step": 5519 }, { "epoch": 4.31, "learning_rate": 1.2669224890735598e-05, "loss": 0.5344, "step": 5520 }, { "epoch": 4.31, "learning_rate": 1.2666784365559144e-05, "loss": 0.4122, "step": 5521 }, { "epoch": 4.32, "learning_rate": 1.2664343669371672e-05, "loss": 0.6437, "step": 5522 }, { "epoch": 4.32, "learning_rate": 1.2661902802329698e-05, "loss": 0.4114, "step": 5523 }, { "epoch": 4.32, "learning_rate": 1.265946176458974e-05, "loss": 0.4482, "step": 5524 }, { "epoch": 4.32, "learning_rate": 1.265702055630834e-05, "loss": 0.3071, "step": 5525 }, { "epoch": 4.32, "learning_rate": 1.2654579177642035e-05, "loss": 0.6098, "step": 5526 }, { "epoch": 4.32, "learning_rate": 1.2652137628747391e-05, "loss": 0.4691, "step": 5527 }, { "epoch": 4.32, "learning_rate": 1.264969590978097e-05, "loss": 0.4268, "step": 5528 }, { "epoch": 4.32, "learning_rate": 1.2647254020899351e-05, "loss": 0.512, "step": 5529 }, { "epoch": 4.32, "learning_rate": 1.2644811962259125e-05, "loss": 0.3701, "step": 5530 }, { "epoch": 4.32, "learning_rate": 1.264236973401689e-05, "loss": 0.3996, "step": 5531 }, { "epoch": 4.32, "learning_rate": 1.2639927336329256e-05, "loss": 0.6438, "step": 5532 }, { "epoch": 4.32, "learning_rate": 1.2637484769352852e-05, "loss": 0.5271, "step": 5533 }, { "epoch": 4.33, "learning_rate": 1.26350420332443e-05, "loss": 0.4963, "step": 5534 }, { "epoch": 4.33, "learning_rate": 1.2632599128160253e-05, "loss": 0.4807, "step": 5535 }, { "epoch": 4.33, "learning_rate": 1.263015605425736e-05, "loss": 0.6397, "step": 5536 }, { "epoch": 4.33, "learning_rate": 1.262771281169229e-05, "loss": 0.698, "step": 5537 }, { "epoch": 4.33, "learning_rate": 1.2625269400621716e-05, "loss": 0.6111, "step": 5538 }, { "epoch": 4.33, "learning_rate": 1.2622825821202327e-05, "loss": 0.6026, "step": 5539 }, { "epoch": 4.33, "learning_rate": 1.2620382073590818e-05, "loss": 0.3621, "step": 5540 }, { "epoch": 4.33, "learning_rate": 1.2617938157943901e-05, "loss": 0.5523, "step": 5541 }, { "epoch": 4.33, "learning_rate": 1.2615494074418294e-05, "loss": 0.4268, "step": 5542 }, { "epoch": 4.33, "learning_rate": 1.2613049823170726e-05, "loss": 0.5967, "step": 5543 }, { "epoch": 4.33, "learning_rate": 1.2610605404357936e-05, "loss": 0.5767, "step": 5544 }, { "epoch": 4.33, "learning_rate": 1.260816081813668e-05, "loss": 0.5249, "step": 5545 }, { "epoch": 4.33, "learning_rate": 1.260571606466372e-05, "loss": 0.5264, "step": 5546 }, { "epoch": 4.34, "learning_rate": 1.2603271144095826e-05, "loss": 0.307, "step": 5547 }, { "epoch": 4.34, "learning_rate": 1.260082605658978e-05, "loss": 0.559, "step": 5548 }, { "epoch": 4.34, "learning_rate": 1.2598380802302384e-05, "loss": 0.5343, "step": 5549 }, { "epoch": 4.34, "learning_rate": 1.2595935381390437e-05, "loss": 0.4048, "step": 5550 }, { "epoch": 4.34, "learning_rate": 1.2593489794010754e-05, "loss": 0.4599, "step": 5551 }, { "epoch": 4.34, "learning_rate": 1.2591044040320166e-05, "loss": 0.595, "step": 5552 }, { "epoch": 4.34, "learning_rate": 1.2588598120475508e-05, "loss": 0.3917, "step": 5553 }, { "epoch": 4.34, "learning_rate": 1.2586152034633626e-05, "loss": 0.5571, "step": 5554 }, { "epoch": 4.34, "learning_rate": 1.258370578295138e-05, "loss": 0.4772, "step": 5555 }, { "epoch": 4.34, "learning_rate": 1.2581259365585643e-05, "loss": 0.7369, "step": 5556 }, { "epoch": 4.34, "learning_rate": 1.2578812782693285e-05, "loss": 0.3045, "step": 5557 }, { "epoch": 4.34, "learning_rate": 1.2576366034431207e-05, "loss": 0.5148, "step": 5558 }, { "epoch": 4.34, "learning_rate": 1.2573919120956303e-05, "loss": 0.5942, "step": 5559 }, { "epoch": 4.35, "learning_rate": 1.2571472042425488e-05, "loss": 0.4654, "step": 5560 }, { "epoch": 4.35, "learning_rate": 1.256902479899568e-05, "loss": 0.3178, "step": 5561 }, { "epoch": 4.35, "learning_rate": 1.256657739082382e-05, "loss": 0.3112, "step": 5562 }, { "epoch": 4.35, "learning_rate": 1.256412981806684e-05, "loss": 0.5501, "step": 5563 }, { "epoch": 4.35, "learning_rate": 1.25616820808817e-05, "loss": 0.4584, "step": 5564 }, { "epoch": 4.35, "learning_rate": 1.2559234179425366e-05, "loss": 0.5193, "step": 5565 }, { "epoch": 4.35, "learning_rate": 1.2556786113854811e-05, "loss": 0.4024, "step": 5566 }, { "epoch": 4.35, "learning_rate": 1.2554337884327019e-05, "loss": 0.5215, "step": 5567 }, { "epoch": 4.35, "learning_rate": 1.2551889490998988e-05, "loss": 0.4393, "step": 5568 }, { "epoch": 4.35, "learning_rate": 1.2549440934027718e-05, "loss": 0.2711, "step": 5569 }, { "epoch": 4.35, "learning_rate": 1.2546992213570235e-05, "loss": 0.4949, "step": 5570 }, { "epoch": 4.35, "learning_rate": 1.2544543329783563e-05, "loss": 0.6149, "step": 5571 }, { "epoch": 4.35, "learning_rate": 1.254209428282474e-05, "loss": 0.4348, "step": 5572 }, { "epoch": 4.36, "learning_rate": 1.2539645072850814e-05, "loss": 0.5903, "step": 5573 }, { "epoch": 4.36, "learning_rate": 1.2537195700018843e-05, "loss": 0.3915, "step": 5574 }, { "epoch": 4.36, "learning_rate": 1.2534746164485893e-05, "loss": 0.3476, "step": 5575 }, { "epoch": 4.36, "learning_rate": 1.253229646640905e-05, "loss": 0.4742, "step": 5576 }, { "epoch": 4.36, "learning_rate": 1.2529846605945404e-05, "loss": 0.4656, "step": 5577 }, { "epoch": 4.36, "learning_rate": 1.2527396583252051e-05, "loss": 0.5029, "step": 5578 }, { "epoch": 4.36, "learning_rate": 1.2524946398486105e-05, "loss": 0.4617, "step": 5579 }, { "epoch": 4.36, "learning_rate": 1.2522496051804684e-05, "loss": 0.4264, "step": 5580 }, { "epoch": 4.36, "learning_rate": 1.2520045543364922e-05, "loss": 0.4812, "step": 5581 }, { "epoch": 4.36, "learning_rate": 1.2517594873323961e-05, "loss": 0.4318, "step": 5582 }, { "epoch": 4.36, "learning_rate": 1.2515144041838955e-05, "loss": 0.4593, "step": 5583 }, { "epoch": 4.36, "learning_rate": 1.2512693049067066e-05, "loss": 0.5296, "step": 5584 }, { "epoch": 4.36, "learning_rate": 1.2510241895165465e-05, "loss": 0.4226, "step": 5585 }, { "epoch": 4.37, "learning_rate": 1.2507790580291332e-05, "loss": 0.3397, "step": 5586 }, { "epoch": 4.37, "learning_rate": 1.2505339104601873e-05, "loss": 0.3761, "step": 5587 }, { "epoch": 4.37, "learning_rate": 1.2502887468254278e-05, "loss": 0.5728, "step": 5588 }, { "epoch": 4.37, "learning_rate": 1.2500435671405772e-05, "loss": 0.6353, "step": 5589 }, { "epoch": 4.37, "learning_rate": 1.2497983714213575e-05, "loss": 0.4951, "step": 5590 }, { "epoch": 4.37, "learning_rate": 1.249553159683492e-05, "loss": 0.5931, "step": 5591 }, { "epoch": 4.37, "learning_rate": 1.2493079319427054e-05, "loss": 0.5219, "step": 5592 }, { "epoch": 4.37, "learning_rate": 1.2490626882147238e-05, "loss": 0.544, "step": 5593 }, { "epoch": 4.37, "learning_rate": 1.2488174285152728e-05, "loss": 0.6237, "step": 5594 }, { "epoch": 4.37, "learning_rate": 1.2485721528600806e-05, "loss": 0.3687, "step": 5595 }, { "epoch": 4.37, "learning_rate": 1.248326861264876e-05, "loss": 0.4297, "step": 5596 }, { "epoch": 4.37, "learning_rate": 1.248081553745388e-05, "loss": 0.4902, "step": 5597 }, { "epoch": 4.38, "learning_rate": 1.2478362303173475e-05, "loss": 0.4499, "step": 5598 }, { "epoch": 4.38, "learning_rate": 1.2475908909964867e-05, "loss": 0.4692, "step": 5599 }, { "epoch": 4.38, "learning_rate": 1.2473455357985375e-05, "loss": 0.4607, "step": 5600 }, { "epoch": 4.38, "learning_rate": 1.2471001647392343e-05, "loss": 0.4844, "step": 5601 }, { "epoch": 4.38, "learning_rate": 1.2468547778343115e-05, "loss": 0.4586, "step": 5602 }, { "epoch": 4.38, "learning_rate": 1.2466093750995047e-05, "loss": 0.5424, "step": 5603 }, { "epoch": 4.38, "learning_rate": 1.2463639565505512e-05, "loss": 0.4114, "step": 5604 }, { "epoch": 4.38, "learning_rate": 1.2461185222031882e-05, "loss": 0.4918, "step": 5605 }, { "epoch": 4.38, "learning_rate": 1.2458730720731546e-05, "loss": 0.558, "step": 5606 }, { "epoch": 4.38, "learning_rate": 1.245627606176191e-05, "loss": 0.5055, "step": 5607 }, { "epoch": 4.38, "learning_rate": 1.245382124528037e-05, "loss": 0.5895, "step": 5608 }, { "epoch": 4.38, "learning_rate": 1.2451366271444352e-05, "loss": 0.4782, "step": 5609 }, { "epoch": 4.38, "learning_rate": 1.2448911140411283e-05, "loss": 0.5614, "step": 5610 }, { "epoch": 4.39, "learning_rate": 1.24464558523386e-05, "loss": 0.4546, "step": 5611 }, { "epoch": 4.39, "learning_rate": 1.2444000407383753e-05, "loss": 0.4703, "step": 5612 }, { "epoch": 4.39, "learning_rate": 1.24415448057042e-05, "loss": 0.4003, "step": 5613 }, { "epoch": 4.39, "learning_rate": 1.243908904745741e-05, "loss": 0.3532, "step": 5614 }, { "epoch": 4.39, "learning_rate": 1.2436633132800862e-05, "loss": 0.545, "step": 5615 }, { "epoch": 4.39, "learning_rate": 1.2434177061892046e-05, "loss": 0.4782, "step": 5616 }, { "epoch": 4.39, "learning_rate": 1.2431720834888454e-05, "loss": 0.5702, "step": 5617 }, { "epoch": 4.39, "learning_rate": 1.2429264451947602e-05, "loss": 0.4358, "step": 5618 }, { "epoch": 4.39, "learning_rate": 1.2426807913227009e-05, "loss": 0.4719, "step": 5619 }, { "epoch": 4.39, "learning_rate": 1.2424351218884198e-05, "loss": 0.5848, "step": 5620 }, { "epoch": 4.39, "learning_rate": 1.2421894369076715e-05, "loss": 0.6576, "step": 5621 }, { "epoch": 4.39, "learning_rate": 1.2419437363962102e-05, "loss": 0.7073, "step": 5622 }, { "epoch": 4.39, "learning_rate": 1.2416980203697917e-05, "loss": 0.5033, "step": 5623 }, { "epoch": 4.4, "learning_rate": 1.2414522888441736e-05, "loss": 0.5335, "step": 5624 }, { "epoch": 4.4, "learning_rate": 1.2412065418351133e-05, "loss": 0.3895, "step": 5625 }, { "epoch": 4.4, "learning_rate": 1.2409607793583696e-05, "loss": 0.2982, "step": 5626 }, { "epoch": 4.4, "learning_rate": 1.2407150014297025e-05, "loss": 0.2341, "step": 5627 }, { "epoch": 4.4, "learning_rate": 1.2404692080648726e-05, "loss": 0.3417, "step": 5628 }, { "epoch": 4.4, "learning_rate": 1.240223399279642e-05, "loss": 0.4271, "step": 5629 }, { "epoch": 4.4, "learning_rate": 1.2399775750897733e-05, "loss": 0.404, "step": 5630 }, { "epoch": 4.4, "learning_rate": 1.2397317355110305e-05, "loss": 0.5042, "step": 5631 }, { "epoch": 4.4, "learning_rate": 1.2394858805591781e-05, "loss": 0.3897, "step": 5632 }, { "epoch": 4.4, "learning_rate": 1.2392400102499825e-05, "loss": 0.5709, "step": 5633 }, { "epoch": 4.4, "learning_rate": 1.2389941245992092e-05, "loss": 0.5296, "step": 5634 }, { "epoch": 4.4, "learning_rate": 1.2387482236226273e-05, "loss": 0.718, "step": 5635 }, { "epoch": 4.4, "learning_rate": 1.2385023073360047e-05, "loss": 0.4631, "step": 5636 }, { "epoch": 4.41, "learning_rate": 1.2382563757551116e-05, "loss": 0.3672, "step": 5637 }, { "epoch": 4.41, "learning_rate": 1.238010428895718e-05, "loss": 0.4857, "step": 5638 }, { "epoch": 4.41, "learning_rate": 1.2377644667735966e-05, "loss": 0.4874, "step": 5639 }, { "epoch": 4.41, "learning_rate": 1.237518489404519e-05, "loss": 0.4743, "step": 5640 }, { "epoch": 4.41, "learning_rate": 1.2372724968042595e-05, "loss": 0.5847, "step": 5641 }, { "epoch": 4.41, "learning_rate": 1.2370264889885922e-05, "loss": 0.4475, "step": 5642 }, { "epoch": 4.41, "learning_rate": 1.2367804659732929e-05, "loss": 0.4672, "step": 5643 }, { "epoch": 4.41, "learning_rate": 1.2365344277741382e-05, "loss": 0.5164, "step": 5644 }, { "epoch": 4.41, "learning_rate": 1.2362883744069057e-05, "loss": 0.5069, "step": 5645 }, { "epoch": 4.41, "learning_rate": 1.2360423058873736e-05, "loss": 0.548, "step": 5646 }, { "epoch": 4.41, "learning_rate": 1.2357962222313216e-05, "loss": 0.4607, "step": 5647 }, { "epoch": 4.41, "learning_rate": 1.2355501234545302e-05, "loss": 0.4888, "step": 5648 }, { "epoch": 4.42, "learning_rate": 1.2353040095727806e-05, "loss": 0.4801, "step": 5649 }, { "epoch": 4.42, "learning_rate": 1.2350578806018551e-05, "loss": 0.6061, "step": 5650 }, { "epoch": 4.42, "learning_rate": 1.2348117365575374e-05, "loss": 0.3632, "step": 5651 }, { "epoch": 4.42, "learning_rate": 1.2345655774556114e-05, "loss": 0.3886, "step": 5652 }, { "epoch": 4.42, "learning_rate": 1.2343194033118628e-05, "loss": 0.5521, "step": 5653 }, { "epoch": 4.42, "learning_rate": 1.2340732141420775e-05, "loss": 0.4603, "step": 5654 }, { "epoch": 4.42, "learning_rate": 1.2338270099620429e-05, "loss": 0.4627, "step": 5655 }, { "epoch": 4.42, "learning_rate": 1.233580790787547e-05, "loss": 0.4905, "step": 5656 }, { "epoch": 4.42, "learning_rate": 1.2333345566343788e-05, "loss": 0.413, "step": 5657 }, { "epoch": 4.42, "learning_rate": 1.2330883075183289e-05, "loss": 0.5939, "step": 5658 }, { "epoch": 4.42, "learning_rate": 1.2328420434551878e-05, "loss": 0.36, "step": 5659 }, { "epoch": 4.42, "learning_rate": 1.2325957644607478e-05, "loss": 0.4797, "step": 5660 }, { "epoch": 4.42, "learning_rate": 1.2323494705508016e-05, "loss": 0.4333, "step": 5661 }, { "epoch": 4.43, "learning_rate": 1.2321031617411437e-05, "loss": 0.3996, "step": 5662 }, { "epoch": 4.43, "learning_rate": 1.2318568380475682e-05, "loss": 0.545, "step": 5663 }, { "epoch": 4.43, "learning_rate": 1.2316104994858718e-05, "loss": 0.5759, "step": 5664 }, { "epoch": 4.43, "learning_rate": 1.2313641460718502e-05, "loss": 0.4798, "step": 5665 }, { "epoch": 4.43, "learning_rate": 1.2311177778213022e-05, "loss": 0.3157, "step": 5666 }, { "epoch": 4.43, "learning_rate": 1.2308713947500257e-05, "loss": 0.5639, "step": 5667 }, { "epoch": 4.43, "learning_rate": 1.2306249968738209e-05, "loss": 0.476, "step": 5668 }, { "epoch": 4.43, "learning_rate": 1.2303785842084877e-05, "loss": 0.5251, "step": 5669 }, { "epoch": 4.43, "learning_rate": 1.2301321567698286e-05, "loss": 0.5684, "step": 5670 }, { "epoch": 4.43, "learning_rate": 1.2298857145736451e-05, "loss": 0.5185, "step": 5671 }, { "epoch": 4.43, "learning_rate": 1.2296392576357412e-05, "loss": 0.3898, "step": 5672 }, { "epoch": 4.43, "learning_rate": 1.2293927859719212e-05, "loss": 0.4742, "step": 5673 }, { "epoch": 4.43, "learning_rate": 1.2291462995979903e-05, "loss": 0.4364, "step": 5674 }, { "epoch": 4.44, "learning_rate": 1.2288997985297548e-05, "loss": 0.4968, "step": 5675 }, { "epoch": 4.44, "learning_rate": 1.228653282783022e-05, "loss": 0.4424, "step": 5676 }, { "epoch": 4.44, "learning_rate": 1.2284067523735999e-05, "loss": 0.4672, "step": 5677 }, { "epoch": 4.44, "learning_rate": 1.2281602073172977e-05, "loss": 0.3365, "step": 5678 }, { "epoch": 4.44, "learning_rate": 1.2279136476299253e-05, "loss": 0.5136, "step": 5679 }, { "epoch": 4.44, "learning_rate": 1.2276670733272935e-05, "loss": 0.5312, "step": 5680 }, { "epoch": 4.44, "learning_rate": 1.2274204844252151e-05, "loss": 0.4648, "step": 5681 }, { "epoch": 4.44, "learning_rate": 1.2271738809395015e-05, "loss": 0.5048, "step": 5682 }, { "epoch": 4.44, "learning_rate": 1.2269272628859676e-05, "loss": 0.3903, "step": 5683 }, { "epoch": 4.44, "learning_rate": 1.2266806302804277e-05, "loss": 0.5486, "step": 5684 }, { "epoch": 4.44, "learning_rate": 1.2264339831386975e-05, "loss": 0.4187, "step": 5685 }, { "epoch": 4.44, "learning_rate": 1.2261873214765934e-05, "loss": 0.4533, "step": 5686 }, { "epoch": 4.44, "learning_rate": 1.2259406453099331e-05, "loss": 0.3148, "step": 5687 }, { "epoch": 4.45, "learning_rate": 1.225693954654535e-05, "loss": 0.3899, "step": 5688 }, { "epoch": 4.45, "learning_rate": 1.2254472495262185e-05, "loss": 0.5897, "step": 5689 }, { "epoch": 4.45, "learning_rate": 1.2252005299408037e-05, "loss": 0.4941, "step": 5690 }, { "epoch": 4.45, "learning_rate": 1.2249537959141119e-05, "loss": 0.546, "step": 5691 }, { "epoch": 4.45, "learning_rate": 1.2247070474619651e-05, "loss": 0.696, "step": 5692 }, { "epoch": 4.45, "learning_rate": 1.224460284600187e-05, "loss": 0.3633, "step": 5693 }, { "epoch": 4.45, "learning_rate": 1.2242135073446007e-05, "loss": 0.3525, "step": 5694 }, { "epoch": 4.45, "learning_rate": 1.2239667157110318e-05, "loss": 0.5747, "step": 5695 }, { "epoch": 4.45, "learning_rate": 1.2237199097153057e-05, "loss": 0.3983, "step": 5696 }, { "epoch": 4.45, "learning_rate": 1.2234730893732491e-05, "loss": 0.4491, "step": 5697 }, { "epoch": 4.45, "learning_rate": 1.2232262547006904e-05, "loss": 0.3949, "step": 5698 }, { "epoch": 4.45, "learning_rate": 1.2229794057134574e-05, "loss": 0.3795, "step": 5699 }, { "epoch": 4.45, "learning_rate": 1.2227325424273796e-05, "loss": 0.5653, "step": 5700 }, { "epoch": 4.46, "learning_rate": 1.2224856648582884e-05, "loss": 0.5063, "step": 5701 }, { "epoch": 4.46, "learning_rate": 1.2222387730220142e-05, "loss": 0.4177, "step": 5702 }, { "epoch": 4.46, "learning_rate": 1.2219918669343896e-05, "loss": 0.4757, "step": 5703 }, { "epoch": 4.46, "learning_rate": 1.2217449466112476e-05, "loss": 0.5469, "step": 5704 }, { "epoch": 4.46, "learning_rate": 1.2214980120684224e-05, "loss": 0.446, "step": 5705 }, { "epoch": 4.46, "learning_rate": 1.2212510633217489e-05, "loss": 0.5065, "step": 5706 }, { "epoch": 4.46, "learning_rate": 1.2210041003870634e-05, "loss": 0.6181, "step": 5707 }, { "epoch": 4.46, "learning_rate": 1.2207571232802022e-05, "loss": 0.5339, "step": 5708 }, { "epoch": 4.46, "learning_rate": 1.2205101320170032e-05, "loss": 0.5066, "step": 5709 }, { "epoch": 4.46, "learning_rate": 1.2202631266133053e-05, "loss": 0.428, "step": 5710 }, { "epoch": 4.46, "learning_rate": 1.2200161070849477e-05, "loss": 0.5068, "step": 5711 }, { "epoch": 4.46, "learning_rate": 1.2197690734477711e-05, "loss": 0.3132, "step": 5712 }, { "epoch": 4.47, "learning_rate": 1.2195220257176168e-05, "loss": 0.5247, "step": 5713 }, { "epoch": 4.47, "learning_rate": 1.2192749639103269e-05, "loss": 0.5533, "step": 5714 }, { "epoch": 4.47, "learning_rate": 1.2190278880417447e-05, "loss": 0.4808, "step": 5715 }, { "epoch": 4.47, "learning_rate": 1.2187807981277142e-05, "loss": 0.4128, "step": 5716 }, { "epoch": 4.47, "learning_rate": 1.2185336941840801e-05, "loss": 0.6833, "step": 5717 }, { "epoch": 4.47, "learning_rate": 1.2182865762266892e-05, "loss": 0.48, "step": 5718 }, { "epoch": 4.47, "learning_rate": 1.218039444271387e-05, "loss": 0.7058, "step": 5719 }, { "epoch": 4.47, "learning_rate": 1.2177922983340218e-05, "loss": 0.5618, "step": 5720 }, { "epoch": 4.47, "learning_rate": 1.2175451384304425e-05, "loss": 0.3984, "step": 5721 }, { "epoch": 4.47, "learning_rate": 1.2172979645764977e-05, "loss": 0.4737, "step": 5722 }, { "epoch": 4.47, "learning_rate": 1.2170507767880384e-05, "loss": 0.4192, "step": 5723 }, { "epoch": 4.47, "learning_rate": 1.2168035750809158e-05, "loss": 0.4704, "step": 5724 }, { "epoch": 4.47, "learning_rate": 1.2165563594709818e-05, "loss": 0.4969, "step": 5725 }, { "epoch": 4.48, "learning_rate": 1.2163091299740895e-05, "loss": 0.4597, "step": 5726 }, { "epoch": 4.48, "learning_rate": 1.2160618866060927e-05, "loss": 0.494, "step": 5727 }, { "epoch": 4.48, "learning_rate": 1.2158146293828464e-05, "loss": 0.4886, "step": 5728 }, { "epoch": 4.48, "learning_rate": 1.2155673583202061e-05, "loss": 0.4831, "step": 5729 }, { "epoch": 4.48, "learning_rate": 1.2153200734340286e-05, "loss": 0.4194, "step": 5730 }, { "epoch": 4.48, "learning_rate": 1.2150727747401714e-05, "loss": 0.6007, "step": 5731 }, { "epoch": 4.48, "learning_rate": 1.2148254622544926e-05, "loss": 0.5334, "step": 5732 }, { "epoch": 4.48, "learning_rate": 1.2145781359928514e-05, "loss": 0.4516, "step": 5733 }, { "epoch": 4.48, "learning_rate": 1.214330795971108e-05, "loss": 0.3505, "step": 5734 }, { "epoch": 4.48, "learning_rate": 1.2140834422051239e-05, "loss": 0.559, "step": 5735 }, { "epoch": 4.48, "learning_rate": 1.21383607471076e-05, "loss": 0.4361, "step": 5736 }, { "epoch": 4.48, "learning_rate": 1.2135886935038802e-05, "loss": 0.5045, "step": 5737 }, { "epoch": 4.48, "learning_rate": 1.213341298600347e-05, "loss": 0.4956, "step": 5738 }, { "epoch": 4.49, "learning_rate": 1.2130938900160258e-05, "loss": 0.3449, "step": 5739 }, { "epoch": 4.49, "learning_rate": 1.2128464677667812e-05, "loss": 0.447, "step": 5740 }, { "epoch": 4.49, "learning_rate": 1.2125990318684806e-05, "loss": 0.5461, "step": 5741 }, { "epoch": 4.49, "learning_rate": 1.21235158233699e-05, "loss": 0.7683, "step": 5742 }, { "epoch": 4.49, "learning_rate": 1.2121041191881782e-05, "loss": 0.457, "step": 5743 }, { "epoch": 4.49, "learning_rate": 1.2118566424379138e-05, "loss": 0.5656, "step": 5744 }, { "epoch": 4.49, "learning_rate": 1.2116091521020663e-05, "loss": 0.3853, "step": 5745 }, { "epoch": 4.49, "learning_rate": 1.2113616481965065e-05, "loss": 0.3919, "step": 5746 }, { "epoch": 4.49, "learning_rate": 1.2111141307371064e-05, "loss": 0.3986, "step": 5747 }, { "epoch": 4.49, "learning_rate": 1.2108665997397378e-05, "loss": 0.323, "step": 5748 }, { "epoch": 4.49, "learning_rate": 1.210619055220274e-05, "loss": 0.435, "step": 5749 }, { "epoch": 4.49, "learning_rate": 1.2103714971945896e-05, "loss": 0.5563, "step": 5750 }, { "epoch": 4.49, "learning_rate": 1.2101239256785594e-05, "loss": 0.4969, "step": 5751 }, { "epoch": 4.5, "learning_rate": 1.2098763406880588e-05, "loss": 0.5009, "step": 5752 }, { "epoch": 4.5, "learning_rate": 1.2096287422389646e-05, "loss": 0.4073, "step": 5753 }, { "epoch": 4.5, "learning_rate": 1.2093811303471547e-05, "loss": 0.317, "step": 5754 }, { "epoch": 4.5, "learning_rate": 1.2091335050285076e-05, "loss": 0.4276, "step": 5755 }, { "epoch": 4.5, "learning_rate": 1.2088858662989023e-05, "loss": 0.4271, "step": 5756 }, { "epoch": 4.5, "learning_rate": 1.2086382141742188e-05, "loss": 0.6503, "step": 5757 }, { "epoch": 4.5, "learning_rate": 1.2083905486703388e-05, "loss": 0.4727, "step": 5758 }, { "epoch": 4.5, "learning_rate": 1.2081428698031435e-05, "loss": 0.2949, "step": 5759 }, { "epoch": 4.5, "learning_rate": 1.207895177588516e-05, "loss": 0.5071, "step": 5760 }, { "epoch": 4.5, "learning_rate": 1.2076474720423396e-05, "loss": 0.4843, "step": 5761 }, { "epoch": 4.5, "learning_rate": 1.2073997531804991e-05, "loss": 0.4651, "step": 5762 }, { "epoch": 4.5, "learning_rate": 1.2071520210188794e-05, "loss": 0.3319, "step": 5763 }, { "epoch": 4.5, "learning_rate": 1.206904275573367e-05, "loss": 0.5803, "step": 5764 }, { "epoch": 4.51, "learning_rate": 1.2066565168598486e-05, "loss": 0.5848, "step": 5765 }, { "epoch": 4.51, "learning_rate": 1.2064087448942124e-05, "loss": 0.6393, "step": 5766 }, { "epoch": 4.51, "learning_rate": 1.2061609596923467e-05, "loss": 0.3541, "step": 5767 }, { "epoch": 4.51, "learning_rate": 1.2059131612701412e-05, "loss": 0.4757, "step": 5768 }, { "epoch": 4.51, "learning_rate": 1.2056653496434864e-05, "loss": 0.4989, "step": 5769 }, { "epoch": 4.51, "learning_rate": 1.2054175248282735e-05, "loss": 0.4857, "step": 5770 }, { "epoch": 4.51, "learning_rate": 1.2051696868403945e-05, "loss": 0.4651, "step": 5771 }, { "epoch": 4.51, "learning_rate": 1.2049218356957427e-05, "loss": 0.5762, "step": 5772 }, { "epoch": 4.51, "learning_rate": 1.2046739714102112e-05, "loss": 0.543, "step": 5773 }, { "epoch": 4.51, "learning_rate": 1.2044260939996951e-05, "loss": 0.4522, "step": 5774 }, { "epoch": 4.51, "learning_rate": 1.2041782034800899e-05, "loss": 0.6004, "step": 5775 }, { "epoch": 4.51, "learning_rate": 1.2039302998672918e-05, "loss": 0.7094, "step": 5776 }, { "epoch": 4.52, "learning_rate": 1.2036823831771973e-05, "loss": 0.6763, "step": 5777 }, { "epoch": 4.52, "learning_rate": 1.2034344534257056e-05, "loss": 0.6182, "step": 5778 }, { "epoch": 4.52, "learning_rate": 1.2031865106287148e-05, "loss": 0.4099, "step": 5779 }, { "epoch": 4.52, "learning_rate": 1.2029385548021246e-05, "loss": 0.5338, "step": 5780 }, { "epoch": 4.52, "learning_rate": 1.2026905859618356e-05, "loss": 0.3965, "step": 5781 }, { "epoch": 4.52, "learning_rate": 1.2024426041237486e-05, "loss": 0.4302, "step": 5782 }, { "epoch": 4.52, "learning_rate": 1.2021946093037669e-05, "loss": 0.466, "step": 5783 }, { "epoch": 4.52, "learning_rate": 1.2019466015177925e-05, "loss": 0.4235, "step": 5784 }, { "epoch": 4.52, "learning_rate": 1.2016985807817296e-05, "loss": 0.4077, "step": 5785 }, { "epoch": 4.52, "learning_rate": 1.2014505471114826e-05, "loss": 0.5158, "step": 5786 }, { "epoch": 4.52, "learning_rate": 1.2012025005229578e-05, "loss": 0.546, "step": 5787 }, { "epoch": 4.52, "learning_rate": 1.2009544410320601e-05, "loss": 0.4828, "step": 5788 }, { "epoch": 4.52, "learning_rate": 1.200706368654698e-05, "loss": 0.5093, "step": 5789 }, { "epoch": 4.53, "learning_rate": 1.2004582834067784e-05, "loss": 0.3917, "step": 5790 }, { "epoch": 4.53, "learning_rate": 1.200210185304211e-05, "loss": 0.5358, "step": 5791 }, { "epoch": 4.53, "learning_rate": 1.1999620743629048e-05, "loss": 0.3936, "step": 5792 }, { "epoch": 4.53, "learning_rate": 1.1997139505987705e-05, "loss": 0.6244, "step": 5793 }, { "epoch": 4.53, "learning_rate": 1.199465814027719e-05, "loss": 0.7797, "step": 5794 }, { "epoch": 4.53, "learning_rate": 1.199217664665663e-05, "loss": 0.4911, "step": 5795 }, { "epoch": 4.53, "learning_rate": 1.198969502528515e-05, "loss": 0.6311, "step": 5796 }, { "epoch": 4.53, "learning_rate": 1.1987213276321885e-05, "loss": 0.5014, "step": 5797 }, { "epoch": 4.53, "learning_rate": 1.1984731399925986e-05, "loss": 0.47, "step": 5798 }, { "epoch": 4.53, "learning_rate": 1.1982249396256605e-05, "loss": 0.3994, "step": 5799 }, { "epoch": 4.53, "learning_rate": 1.1979767265472899e-05, "loss": 0.4573, "step": 5800 }, { "epoch": 4.53, "learning_rate": 1.1977285007734043e-05, "loss": 0.3166, "step": 5801 }, { "epoch": 4.53, "learning_rate": 1.1974802623199215e-05, "loss": 0.4352, "step": 5802 }, { "epoch": 4.54, "learning_rate": 1.1972320112027597e-05, "loss": 0.4823, "step": 5803 }, { "epoch": 4.54, "learning_rate": 1.1969837474378388e-05, "loss": 0.6287, "step": 5804 }, { "epoch": 4.54, "learning_rate": 1.1967354710410786e-05, "loss": 0.5738, "step": 5805 }, { "epoch": 4.54, "learning_rate": 1.1964871820284004e-05, "loss": 0.4746, "step": 5806 }, { "epoch": 4.54, "learning_rate": 1.196238880415726e-05, "loss": 0.3917, "step": 5807 }, { "epoch": 4.54, "learning_rate": 1.1959905662189783e-05, "loss": 0.4286, "step": 5808 }, { "epoch": 4.54, "learning_rate": 1.1957422394540805e-05, "loss": 0.3935, "step": 5809 }, { "epoch": 4.54, "learning_rate": 1.1954939001369567e-05, "loss": 0.3314, "step": 5810 }, { "epoch": 4.54, "learning_rate": 1.1952455482835324e-05, "loss": 0.4758, "step": 5811 }, { "epoch": 4.54, "learning_rate": 1.1949971839097334e-05, "loss": 0.4384, "step": 5812 }, { "epoch": 4.54, "learning_rate": 1.1947488070314859e-05, "loss": 0.6359, "step": 5813 }, { "epoch": 4.54, "learning_rate": 1.1945004176647182e-05, "loss": 0.575, "step": 5814 }, { "epoch": 4.54, "learning_rate": 1.1942520158253579e-05, "loss": 0.425, "step": 5815 }, { "epoch": 4.55, "learning_rate": 1.1940036015293344e-05, "loss": 0.5463, "step": 5816 }, { "epoch": 4.55, "learning_rate": 1.1937551747925776e-05, "loss": 0.5097, "step": 5817 }, { "epoch": 4.55, "learning_rate": 1.1935067356310181e-05, "loss": 0.5092, "step": 5818 }, { "epoch": 4.55, "learning_rate": 1.1932582840605874e-05, "loss": 0.5716, "step": 5819 }, { "epoch": 4.55, "learning_rate": 1.1930098200972178e-05, "loss": 0.5312, "step": 5820 }, { "epoch": 4.55, "learning_rate": 1.1927613437568425e-05, "loss": 0.5424, "step": 5821 }, { "epoch": 4.55, "learning_rate": 1.1925128550553952e-05, "loss": 0.3985, "step": 5822 }, { "epoch": 4.55, "learning_rate": 1.1922643540088103e-05, "loss": 0.5395, "step": 5823 }, { "epoch": 4.55, "learning_rate": 1.192015840633024e-05, "loss": 0.3499, "step": 5824 }, { "epoch": 4.55, "learning_rate": 1.191767314943972e-05, "loss": 0.3904, "step": 5825 }, { "epoch": 4.55, "learning_rate": 1.1915187769575911e-05, "loss": 0.5654, "step": 5826 }, { "epoch": 4.55, "learning_rate": 1.1912702266898197e-05, "loss": 0.4741, "step": 5827 }, { "epoch": 4.55, "learning_rate": 1.1910216641565963e-05, "loss": 0.4612, "step": 5828 }, { "epoch": 4.56, "learning_rate": 1.19077308937386e-05, "loss": 0.422, "step": 5829 }, { "epoch": 4.56, "learning_rate": 1.1905245023575511e-05, "loss": 0.6367, "step": 5830 }, { "epoch": 4.56, "learning_rate": 1.1902759031236107e-05, "loss": 0.3532, "step": 5831 }, { "epoch": 4.56, "learning_rate": 1.1900272916879804e-05, "loss": 0.4329, "step": 5832 }, { "epoch": 4.56, "learning_rate": 1.189778668066603e-05, "loss": 0.5313, "step": 5833 }, { "epoch": 4.56, "learning_rate": 1.1895300322754213e-05, "loss": 0.5406, "step": 5834 }, { "epoch": 4.56, "learning_rate": 1.1892813843303798e-05, "loss": 0.3819, "step": 5835 }, { "epoch": 4.56, "learning_rate": 1.1890327242474234e-05, "loss": 0.5964, "step": 5836 }, { "epoch": 4.56, "learning_rate": 1.1887840520424974e-05, "loss": 0.4955, "step": 5837 }, { "epoch": 4.56, "learning_rate": 1.1885353677315488e-05, "loss": 0.5699, "step": 5838 }, { "epoch": 4.56, "learning_rate": 1.1882866713305242e-05, "loss": 0.4204, "step": 5839 }, { "epoch": 4.56, "learning_rate": 1.1880379628553716e-05, "loss": 0.7621, "step": 5840 }, { "epoch": 4.57, "learning_rate": 1.1877892423220405e-05, "loss": 0.5417, "step": 5841 }, { "epoch": 4.57, "learning_rate": 1.1875405097464793e-05, "loss": 0.6143, "step": 5842 }, { "epoch": 4.57, "learning_rate": 1.1872917651446397e-05, "loss": 0.4885, "step": 5843 }, { "epoch": 4.57, "learning_rate": 1.1870430085324713e-05, "loss": 0.4743, "step": 5844 }, { "epoch": 4.57, "learning_rate": 1.1867942399259271e-05, "loss": 0.722, "step": 5845 }, { "epoch": 4.57, "learning_rate": 1.186545459340959e-05, "loss": 0.5601, "step": 5846 }, { "epoch": 4.57, "learning_rate": 1.1862966667935207e-05, "loss": 0.4442, "step": 5847 }, { "epoch": 4.57, "learning_rate": 1.186047862299566e-05, "loss": 0.5291, "step": 5848 }, { "epoch": 4.57, "learning_rate": 1.1857990458750503e-05, "loss": 0.4291, "step": 5849 }, { "epoch": 4.57, "learning_rate": 1.185550217535929e-05, "loss": 0.3836, "step": 5850 }, { "epoch": 4.57, "learning_rate": 1.1853013772981587e-05, "loss": 0.5251, "step": 5851 }, { "epoch": 4.57, "learning_rate": 1.1850525251776965e-05, "loss": 0.5449, "step": 5852 }, { "epoch": 4.57, "learning_rate": 1.1848036611905005e-05, "loss": 0.5669, "step": 5853 }, { "epoch": 4.58, "learning_rate": 1.1845547853525288e-05, "loss": 0.4555, "step": 5854 }, { "epoch": 4.58, "learning_rate": 1.1843058976797418e-05, "loss": 0.5616, "step": 5855 }, { "epoch": 4.58, "learning_rate": 1.1840569981880992e-05, "loss": 0.479, "step": 5856 }, { "epoch": 4.58, "learning_rate": 1.1838080868935621e-05, "loss": 0.5226, "step": 5857 }, { "epoch": 4.58, "learning_rate": 1.1835591638120925e-05, "loss": 0.4311, "step": 5858 }, { "epoch": 4.58, "learning_rate": 1.1833102289596523e-05, "loss": 0.386, "step": 5859 }, { "epoch": 4.58, "learning_rate": 1.1830612823522056e-05, "loss": 0.5309, "step": 5860 }, { "epoch": 4.58, "learning_rate": 1.1828123240057156e-05, "loss": 0.5218, "step": 5861 }, { "epoch": 4.58, "learning_rate": 1.1825633539361476e-05, "loss": 0.5611, "step": 5862 }, { "epoch": 4.58, "learning_rate": 1.1823143721594672e-05, "loss": 0.3835, "step": 5863 }, { "epoch": 4.58, "learning_rate": 1.1820653786916405e-05, "loss": 0.478, "step": 5864 }, { "epoch": 4.58, "learning_rate": 1.1818163735486342e-05, "loss": 0.3749, "step": 5865 }, { "epoch": 4.58, "learning_rate": 1.1815673567464167e-05, "loss": 0.4597, "step": 5866 }, { "epoch": 4.59, "learning_rate": 1.181318328300956e-05, "loss": 0.4901, "step": 5867 }, { "epoch": 4.59, "learning_rate": 1.1810692882282218e-05, "loss": 0.5371, "step": 5868 }, { "epoch": 4.59, "learning_rate": 1.1808202365441839e-05, "loss": 0.3861, "step": 5869 }, { "epoch": 4.59, "learning_rate": 1.180571173264813e-05, "loss": 0.4769, "step": 5870 }, { "epoch": 4.59, "learning_rate": 1.1803220984060807e-05, "loss": 0.4145, "step": 5871 }, { "epoch": 4.59, "learning_rate": 1.1800730119839594e-05, "loss": 0.7301, "step": 5872 }, { "epoch": 4.59, "learning_rate": 1.1798239140144215e-05, "loss": 0.5198, "step": 5873 }, { "epoch": 4.59, "learning_rate": 1.1795748045134414e-05, "loss": 0.5311, "step": 5874 }, { "epoch": 4.59, "learning_rate": 1.1793256834969933e-05, "loss": 0.5665, "step": 5875 }, { "epoch": 4.59, "learning_rate": 1.1790765509810527e-05, "loss": 0.393, "step": 5876 }, { "epoch": 4.59, "learning_rate": 1.1788274069815948e-05, "loss": 0.4451, "step": 5877 }, { "epoch": 4.59, "learning_rate": 1.1785782515145971e-05, "loss": 0.7246, "step": 5878 }, { "epoch": 4.59, "learning_rate": 1.1783290845960367e-05, "loss": 0.4557, "step": 5879 }, { "epoch": 4.6, "learning_rate": 1.1780799062418915e-05, "loss": 0.5714, "step": 5880 }, { "epoch": 4.6, "learning_rate": 1.1778307164681409e-05, "loss": 0.3398, "step": 5881 }, { "epoch": 4.6, "learning_rate": 1.1775815152907639e-05, "loss": 0.6151, "step": 5882 }, { "epoch": 4.6, "learning_rate": 1.1773323027257418e-05, "loss": 0.5516, "step": 5883 }, { "epoch": 4.6, "learning_rate": 1.1770830787890543e-05, "loss": 0.2974, "step": 5884 }, { "epoch": 4.6, "learning_rate": 1.1768338434966846e-05, "loss": 0.4127, "step": 5885 }, { "epoch": 4.6, "learning_rate": 1.1765845968646143e-05, "loss": 0.6039, "step": 5886 }, { "epoch": 4.6, "learning_rate": 1.1763353389088272e-05, "loss": 0.3613, "step": 5887 }, { "epoch": 4.6, "learning_rate": 1.1760860696453066e-05, "loss": 0.4231, "step": 5888 }, { "epoch": 4.6, "learning_rate": 1.1758367890900384e-05, "loss": 0.571, "step": 5889 }, { "epoch": 4.6, "learning_rate": 1.1755874972590068e-05, "loss": 0.573, "step": 5890 }, { "epoch": 4.6, "learning_rate": 1.1753381941681987e-05, "loss": 0.491, "step": 5891 }, { "epoch": 4.6, "learning_rate": 1.1750888798336008e-05, "loss": 0.4833, "step": 5892 }, { "epoch": 4.61, "learning_rate": 1.1748395542712008e-05, "loss": 0.533, "step": 5893 }, { "epoch": 4.61, "learning_rate": 1.1745902174969866e-05, "loss": 0.7053, "step": 5894 }, { "epoch": 4.61, "learning_rate": 1.1743408695269478e-05, "loss": 0.4522, "step": 5895 }, { "epoch": 4.61, "learning_rate": 1.1740915103770736e-05, "loss": 0.6003, "step": 5896 }, { "epoch": 4.61, "learning_rate": 1.1738421400633553e-05, "loss": 0.3167, "step": 5897 }, { "epoch": 4.61, "learning_rate": 1.1735927586017833e-05, "loss": 0.4397, "step": 5898 }, { "epoch": 4.61, "learning_rate": 1.1733433660083498e-05, "loss": 0.55, "step": 5899 }, { "epoch": 4.61, "learning_rate": 1.1730939622990474e-05, "loss": 0.5173, "step": 5900 }, { "epoch": 4.61, "learning_rate": 1.1728445474898696e-05, "loss": 0.4334, "step": 5901 }, { "epoch": 4.61, "learning_rate": 1.1725951215968101e-05, "loss": 0.6001, "step": 5902 }, { "epoch": 4.61, "learning_rate": 1.1723456846358641e-05, "loss": 0.4839, "step": 5903 }, { "epoch": 4.61, "learning_rate": 1.1720962366230266e-05, "loss": 0.5504, "step": 5904 }, { "epoch": 4.62, "learning_rate": 1.1718467775742944e-05, "loss": 0.7406, "step": 5905 }, { "epoch": 4.62, "learning_rate": 1.1715973075056637e-05, "loss": 0.4127, "step": 5906 }, { "epoch": 4.62, "learning_rate": 1.1713478264331324e-05, "loss": 0.5503, "step": 5907 }, { "epoch": 4.62, "learning_rate": 1.1710983343726988e-05, "loss": 0.4758, "step": 5908 }, { "epoch": 4.62, "learning_rate": 1.170848831340362e-05, "loss": 0.5328, "step": 5909 }, { "epoch": 4.62, "learning_rate": 1.1705993173521217e-05, "loss": 0.419, "step": 5910 }, { "epoch": 4.62, "learning_rate": 1.170349792423978e-05, "loss": 0.4742, "step": 5911 }, { "epoch": 4.62, "learning_rate": 1.1701002565719327e-05, "loss": 0.5254, "step": 5912 }, { "epoch": 4.62, "learning_rate": 1.1698507098119865e-05, "loss": 0.5615, "step": 5913 }, { "epoch": 4.62, "learning_rate": 1.169601152160143e-05, "loss": 0.5761, "step": 5914 }, { "epoch": 4.62, "learning_rate": 1.1693515836324047e-05, "loss": 0.6773, "step": 5915 }, { "epoch": 4.62, "learning_rate": 1.169102004244776e-05, "loss": 0.441, "step": 5916 }, { "epoch": 4.62, "learning_rate": 1.1688524140132611e-05, "loss": 0.5519, "step": 5917 }, { "epoch": 4.63, "learning_rate": 1.1686028129538656e-05, "loss": 0.391, "step": 5918 }, { "epoch": 4.63, "learning_rate": 1.168353201082595e-05, "loss": 0.497, "step": 5919 }, { "epoch": 4.63, "learning_rate": 1.168103578415457e-05, "loss": 0.5588, "step": 5920 }, { "epoch": 4.63, "learning_rate": 1.167853944968458e-05, "loss": 0.595, "step": 5921 }, { "epoch": 4.63, "learning_rate": 1.1676043007576062e-05, "loss": 0.5556, "step": 5922 }, { "epoch": 4.63, "learning_rate": 1.1673546457989107e-05, "loss": 0.4889, "step": 5923 }, { "epoch": 4.63, "learning_rate": 1.167104980108381e-05, "loss": 0.4213, "step": 5924 }, { "epoch": 4.63, "learning_rate": 1.1668553037020265e-05, "loss": 0.5542, "step": 5925 }, { "epoch": 4.63, "learning_rate": 1.166605616595859e-05, "loss": 0.5211, "step": 5926 }, { "epoch": 4.63, "learning_rate": 1.1663559188058894e-05, "loss": 0.4597, "step": 5927 }, { "epoch": 4.63, "learning_rate": 1.1661062103481304e-05, "loss": 0.4551, "step": 5928 }, { "epoch": 4.63, "learning_rate": 1.1658564912385944e-05, "loss": 0.3984, "step": 5929 }, { "epoch": 4.63, "learning_rate": 1.1656067614932948e-05, "loss": 0.4864, "step": 5930 }, { "epoch": 4.64, "learning_rate": 1.1653570211282466e-05, "loss": 0.5636, "step": 5931 }, { "epoch": 4.64, "learning_rate": 1.1651072701594642e-05, "loss": 0.6799, "step": 5932 }, { "epoch": 4.64, "learning_rate": 1.164857508602963e-05, "loss": 0.553, "step": 5933 }, { "epoch": 4.64, "learning_rate": 1.1646077364747599e-05, "loss": 0.5763, "step": 5934 }, { "epoch": 4.64, "learning_rate": 1.1643579537908714e-05, "loss": 0.5866, "step": 5935 }, { "epoch": 4.64, "learning_rate": 1.1641081605673151e-05, "loss": 0.4743, "step": 5936 }, { "epoch": 4.64, "learning_rate": 1.1638583568201098e-05, "loss": 0.5292, "step": 5937 }, { "epoch": 4.64, "learning_rate": 1.1636085425652739e-05, "loss": 0.5107, "step": 5938 }, { "epoch": 4.64, "learning_rate": 1.1633587178188277e-05, "loss": 0.4858, "step": 5939 }, { "epoch": 4.64, "learning_rate": 1.1631088825967909e-05, "loss": 0.3877, "step": 5940 }, { "epoch": 4.64, "learning_rate": 1.162859036915185e-05, "loss": 0.4725, "step": 5941 }, { "epoch": 4.64, "learning_rate": 1.162609180790031e-05, "loss": 0.7242, "step": 5942 }, { "epoch": 4.64, "learning_rate": 1.1623593142373524e-05, "loss": 0.4426, "step": 5943 }, { "epoch": 4.65, "learning_rate": 1.1621094372731712e-05, "loss": 0.4144, "step": 5944 }, { "epoch": 4.65, "learning_rate": 1.1618595499135115e-05, "loss": 0.4255, "step": 5945 }, { "epoch": 4.65, "learning_rate": 1.1616096521743974e-05, "loss": 0.439, "step": 5946 }, { "epoch": 4.65, "learning_rate": 1.1613597440718542e-05, "loss": 0.5458, "step": 5947 }, { "epoch": 4.65, "learning_rate": 1.1611098256219073e-05, "loss": 0.5261, "step": 5948 }, { "epoch": 4.65, "learning_rate": 1.1608598968405837e-05, "loss": 0.6044, "step": 5949 }, { "epoch": 4.65, "learning_rate": 1.1606099577439095e-05, "loss": 0.3331, "step": 5950 }, { "epoch": 4.65, "learning_rate": 1.1603600083479129e-05, "loss": 0.3541, "step": 5951 }, { "epoch": 4.65, "learning_rate": 1.1601100486686224e-05, "loss": 0.6196, "step": 5952 }, { "epoch": 4.65, "learning_rate": 1.1598600787220664e-05, "loss": 0.4042, "step": 5953 }, { "epoch": 4.65, "learning_rate": 1.1596100985242751e-05, "loss": 0.455, "step": 5954 }, { "epoch": 4.65, "learning_rate": 1.1593601080912781e-05, "loss": 0.4083, "step": 5955 }, { "epoch": 4.65, "learning_rate": 1.1591101074391073e-05, "loss": 0.4932, "step": 5956 }, { "epoch": 4.66, "learning_rate": 1.1588600965837939e-05, "loss": 0.7259, "step": 5957 }, { "epoch": 4.66, "learning_rate": 1.15861007554137e-05, "loss": 0.4484, "step": 5958 }, { "epoch": 4.66, "learning_rate": 1.1583600443278684e-05, "loss": 0.4038, "step": 5959 }, { "epoch": 4.66, "learning_rate": 1.1581100029593232e-05, "loss": 0.6828, "step": 5960 }, { "epoch": 4.66, "learning_rate": 1.1578599514517684e-05, "loss": 0.3966, "step": 5961 }, { "epoch": 4.66, "learning_rate": 1.1576098898212386e-05, "loss": 0.505, "step": 5962 }, { "epoch": 4.66, "learning_rate": 1.1573598180837697e-05, "loss": 0.5567, "step": 5963 }, { "epoch": 4.66, "learning_rate": 1.1571097362553978e-05, "loss": 0.4706, "step": 5964 }, { "epoch": 4.66, "learning_rate": 1.1568596443521597e-05, "loss": 0.4171, "step": 5965 }, { "epoch": 4.66, "learning_rate": 1.1566095423900927e-05, "loss": 0.4227, "step": 5966 }, { "epoch": 4.66, "learning_rate": 1.156359430385235e-05, "loss": 0.4967, "step": 5967 }, { "epoch": 4.66, "learning_rate": 1.1561093083536254e-05, "loss": 0.4409, "step": 5968 }, { "epoch": 4.67, "learning_rate": 1.1558591763113034e-05, "loss": 0.5001, "step": 5969 }, { "epoch": 4.67, "learning_rate": 1.1556090342743089e-05, "loss": 0.5933, "step": 5970 }, { "epoch": 4.67, "learning_rate": 1.1553588822586827e-05, "loss": 0.4006, "step": 5971 }, { "epoch": 4.67, "learning_rate": 1.1551087202804661e-05, "loss": 0.5816, "step": 5972 }, { "epoch": 4.67, "learning_rate": 1.1548585483557007e-05, "loss": 0.4293, "step": 5973 }, { "epoch": 4.67, "learning_rate": 1.1546083665004298e-05, "loss": 0.5569, "step": 5974 }, { "epoch": 4.67, "learning_rate": 1.1543581747306958e-05, "loss": 0.5932, "step": 5975 }, { "epoch": 4.67, "learning_rate": 1.1541079730625432e-05, "loss": 0.5597, "step": 5976 }, { "epoch": 4.67, "learning_rate": 1.1538577615120165e-05, "loss": 0.3632, "step": 5977 }, { "epoch": 4.67, "learning_rate": 1.1536075400951605e-05, "loss": 0.5073, "step": 5978 }, { "epoch": 4.67, "learning_rate": 1.153357308828021e-05, "loss": 0.5943, "step": 5979 }, { "epoch": 4.67, "learning_rate": 1.153107067726645e-05, "loss": 0.6475, "step": 5980 }, { "epoch": 4.67, "learning_rate": 1.1528568168070789e-05, "loss": 0.4783, "step": 5981 }, { "epoch": 4.68, "learning_rate": 1.1526065560853703e-05, "loss": 0.4319, "step": 5982 }, { "epoch": 4.68, "learning_rate": 1.1523562855775681e-05, "loss": 0.6154, "step": 5983 }, { "epoch": 4.68, "learning_rate": 1.1521060052997204e-05, "loss": 0.6061, "step": 5984 }, { "epoch": 4.68, "learning_rate": 1.1518557152678775e-05, "loss": 0.6379, "step": 5985 }, { "epoch": 4.68, "learning_rate": 1.1516054154980894e-05, "loss": 0.4713, "step": 5986 }, { "epoch": 4.68, "learning_rate": 1.1513551060064067e-05, "loss": 0.5682, "step": 5987 }, { "epoch": 4.68, "learning_rate": 1.1511047868088806e-05, "loss": 0.3858, "step": 5988 }, { "epoch": 4.68, "learning_rate": 1.1508544579215641e-05, "loss": 0.4459, "step": 5989 }, { "epoch": 4.68, "learning_rate": 1.150604119360509e-05, "loss": 0.5113, "step": 5990 }, { "epoch": 4.68, "learning_rate": 1.1503537711417687e-05, "loss": 0.4281, "step": 5991 }, { "epoch": 4.68, "learning_rate": 1.150103413281397e-05, "loss": 0.7035, "step": 5992 }, { "epoch": 4.68, "learning_rate": 1.149853045795449e-05, "loss": 0.6296, "step": 5993 }, { "epoch": 4.68, "learning_rate": 1.1496026686999793e-05, "loss": 0.5494, "step": 5994 }, { "epoch": 4.69, "learning_rate": 1.1493522820110438e-05, "loss": 0.4421, "step": 5995 }, { "epoch": 4.69, "learning_rate": 1.1491018857446987e-05, "loss": 0.4044, "step": 5996 }, { "epoch": 4.69, "learning_rate": 1.1488514799170016e-05, "loss": 0.6358, "step": 5997 }, { "epoch": 4.69, "learning_rate": 1.1486010645440096e-05, "loss": 0.4672, "step": 5998 }, { "epoch": 4.69, "learning_rate": 1.1483506396417807e-05, "loss": 0.5326, "step": 5999 }, { "epoch": 4.69, "learning_rate": 1.1481002052263743e-05, "loss": 0.5655, "step": 6000 }, { "epoch": 4.69, "learning_rate": 1.1478497613138498e-05, "loss": 0.4533, "step": 6001 }, { "epoch": 4.69, "learning_rate": 1.1475993079202664e-05, "loss": 0.4967, "step": 6002 }, { "epoch": 4.69, "learning_rate": 1.1473488450616856e-05, "loss": 0.5654, "step": 6003 }, { "epoch": 4.69, "learning_rate": 1.1470983727541685e-05, "loss": 0.5339, "step": 6004 }, { "epoch": 4.69, "learning_rate": 1.1468478910137768e-05, "loss": 0.6777, "step": 6005 }, { "epoch": 4.69, "learning_rate": 1.1465973998565732e-05, "loss": 0.6502, "step": 6006 }, { "epoch": 4.69, "learning_rate": 1.1463468992986203e-05, "loss": 0.4328, "step": 6007 }, { "epoch": 4.7, "learning_rate": 1.1460963893559825e-05, "loss": 0.5451, "step": 6008 }, { "epoch": 4.7, "learning_rate": 1.1458458700447232e-05, "loss": 0.4846, "step": 6009 }, { "epoch": 4.7, "learning_rate": 1.145595341380908e-05, "loss": 0.6316, "step": 6010 }, { "epoch": 4.7, "learning_rate": 1.145344803380602e-05, "loss": 0.4892, "step": 6011 }, { "epoch": 4.7, "learning_rate": 1.1450942560598714e-05, "loss": 0.6212, "step": 6012 }, { "epoch": 4.7, "learning_rate": 1.1448436994347829e-05, "loss": 0.5818, "step": 6013 }, { "epoch": 4.7, "learning_rate": 1.1445931335214037e-05, "loss": 0.4678, "step": 6014 }, { "epoch": 4.7, "learning_rate": 1.1443425583358016e-05, "loss": 0.4626, "step": 6015 }, { "epoch": 4.7, "learning_rate": 1.1440919738940455e-05, "loss": 0.5061, "step": 6016 }, { "epoch": 4.7, "learning_rate": 1.1438413802122037e-05, "loss": 0.4811, "step": 6017 }, { "epoch": 4.7, "learning_rate": 1.1435907773063467e-05, "loss": 0.4449, "step": 6018 }, { "epoch": 4.7, "learning_rate": 1.1433401651925438e-05, "loss": 0.4303, "step": 6019 }, { "epoch": 4.7, "learning_rate": 1.1430895438868667e-05, "loss": 0.4167, "step": 6020 }, { "epoch": 4.71, "learning_rate": 1.142838913405386e-05, "loss": 0.5255, "step": 6021 }, { "epoch": 4.71, "learning_rate": 1.1425882737641745e-05, "loss": 0.7459, "step": 6022 }, { "epoch": 4.71, "learning_rate": 1.1423376249793043e-05, "loss": 0.4393, "step": 6023 }, { "epoch": 4.71, "learning_rate": 1.1420869670668488e-05, "loss": 0.5289, "step": 6024 }, { "epoch": 4.71, "learning_rate": 1.1418363000428817e-05, "loss": 0.4796, "step": 6025 }, { "epoch": 4.71, "learning_rate": 1.1415856239234773e-05, "loss": 0.4715, "step": 6026 }, { "epoch": 4.71, "learning_rate": 1.1413349387247104e-05, "loss": 0.4717, "step": 6027 }, { "epoch": 4.71, "learning_rate": 1.1410842444626568e-05, "loss": 0.5229, "step": 6028 }, { "epoch": 4.71, "learning_rate": 1.1408335411533923e-05, "loss": 0.5449, "step": 6029 }, { "epoch": 4.71, "learning_rate": 1.1405828288129941e-05, "loss": 0.4635, "step": 6030 }, { "epoch": 4.71, "learning_rate": 1.1403321074575388e-05, "loss": 0.5387, "step": 6031 }, { "epoch": 4.71, "learning_rate": 1.1400813771031046e-05, "loss": 0.4712, "step": 6032 }, { "epoch": 4.72, "learning_rate": 1.13983063776577e-05, "loss": 0.5873, "step": 6033 }, { "epoch": 4.72, "learning_rate": 1.1395798894616138e-05, "loss": 0.4727, "step": 6034 }, { "epoch": 4.72, "learning_rate": 1.1393291322067155e-05, "loss": 0.3235, "step": 6035 }, { "epoch": 4.72, "learning_rate": 1.139078366017155e-05, "loss": 0.4381, "step": 6036 }, { "epoch": 4.72, "learning_rate": 1.138827590909014e-05, "loss": 0.6232, "step": 6037 }, { "epoch": 4.72, "learning_rate": 1.1385768068983725e-05, "loss": 0.5372, "step": 6038 }, { "epoch": 4.72, "learning_rate": 1.1383260140013132e-05, "loss": 0.6556, "step": 6039 }, { "epoch": 4.72, "learning_rate": 1.1380752122339184e-05, "loss": 0.592, "step": 6040 }, { "epoch": 4.72, "learning_rate": 1.137824401612271e-05, "loss": 0.4249, "step": 6041 }, { "epoch": 4.72, "learning_rate": 1.1375735821524543e-05, "loss": 0.5936, "step": 6042 }, { "epoch": 4.72, "learning_rate": 1.137322753870553e-05, "loss": 0.3724, "step": 6043 }, { "epoch": 4.72, "learning_rate": 1.137071916782651e-05, "loss": 0.4921, "step": 6044 }, { "epoch": 4.72, "learning_rate": 1.1368210709048346e-05, "loss": 0.4544, "step": 6045 }, { "epoch": 4.73, "learning_rate": 1.1365702162531884e-05, "loss": 0.677, "step": 6046 }, { "epoch": 4.73, "learning_rate": 1.1363193528437997e-05, "loss": 0.5215, "step": 6047 }, { "epoch": 4.73, "learning_rate": 1.136068480692755e-05, "loss": 0.5612, "step": 6048 }, { "epoch": 4.73, "learning_rate": 1.1358175998161422e-05, "loss": 0.4597, "step": 6049 }, { "epoch": 4.73, "learning_rate": 1.1355667102300489e-05, "loss": 0.4181, "step": 6050 }, { "epoch": 4.73, "learning_rate": 1.135315811950564e-05, "loss": 0.4744, "step": 6051 }, { "epoch": 4.73, "learning_rate": 1.1350649049937766e-05, "loss": 0.3622, "step": 6052 }, { "epoch": 4.73, "learning_rate": 1.1348139893757764e-05, "loss": 0.4369, "step": 6053 }, { "epoch": 4.73, "learning_rate": 1.1345630651126538e-05, "loss": 0.5326, "step": 6054 }, { "epoch": 4.73, "learning_rate": 1.1343121322204998e-05, "loss": 0.634, "step": 6055 }, { "epoch": 4.73, "learning_rate": 1.1340611907154051e-05, "loss": 0.6014, "step": 6056 }, { "epoch": 4.73, "learning_rate": 1.1338102406134624e-05, "loss": 0.6311, "step": 6057 }, { "epoch": 4.73, "learning_rate": 1.133559281930764e-05, "loss": 0.4698, "step": 6058 }, { "epoch": 4.74, "learning_rate": 1.1333083146834026e-05, "loss": 0.6024, "step": 6059 }, { "epoch": 4.74, "learning_rate": 1.1330573388874724e-05, "loss": 0.4352, "step": 6060 }, { "epoch": 4.74, "learning_rate": 1.1328063545590668e-05, "loss": 0.6279, "step": 6061 }, { "epoch": 4.74, "learning_rate": 1.1325553617142813e-05, "loss": 0.4724, "step": 6062 }, { "epoch": 4.74, "learning_rate": 1.1323043603692107e-05, "loss": 0.5417, "step": 6063 }, { "epoch": 4.74, "learning_rate": 1.132053350539951e-05, "loss": 0.6376, "step": 6064 }, { "epoch": 4.74, "learning_rate": 1.1318023322425982e-05, "loss": 0.3269, "step": 6065 }, { "epoch": 4.74, "learning_rate": 1.1315513054932495e-05, "loss": 0.735, "step": 6066 }, { "epoch": 4.74, "learning_rate": 1.1313002703080018e-05, "loss": 0.3755, "step": 6067 }, { "epoch": 4.74, "learning_rate": 1.131049226702954e-05, "loss": 0.5821, "step": 6068 }, { "epoch": 4.74, "learning_rate": 1.1307981746942036e-05, "loss": 0.5858, "step": 6069 }, { "epoch": 4.74, "learning_rate": 1.1305471142978504e-05, "loss": 0.5762, "step": 6070 }, { "epoch": 4.74, "learning_rate": 1.1302960455299934e-05, "loss": 0.5967, "step": 6071 }, { "epoch": 4.75, "learning_rate": 1.1300449684067332e-05, "loss": 0.6942, "step": 6072 }, { "epoch": 4.75, "learning_rate": 1.1297938829441697e-05, "loss": 0.37, "step": 6073 }, { "epoch": 4.75, "learning_rate": 1.1295427891584052e-05, "loss": 0.4603, "step": 6074 }, { "epoch": 4.75, "learning_rate": 1.1292916870655404e-05, "loss": 0.503, "step": 6075 }, { "epoch": 4.75, "learning_rate": 1.1290405766816784e-05, "loss": 0.4651, "step": 6076 }, { "epoch": 4.75, "learning_rate": 1.128789458022921e-05, "loss": 0.4209, "step": 6077 }, { "epoch": 4.75, "learning_rate": 1.1285383311053724e-05, "loss": 0.4589, "step": 6078 }, { "epoch": 4.75, "learning_rate": 1.1282871959451357e-05, "loss": 0.4956, "step": 6079 }, { "epoch": 4.75, "learning_rate": 1.1280360525583161e-05, "loss": 0.3725, "step": 6080 }, { "epoch": 4.75, "learning_rate": 1.127784900961018e-05, "loss": 0.362, "step": 6081 }, { "epoch": 4.75, "learning_rate": 1.1275337411693467e-05, "loss": 0.8813, "step": 6082 }, { "epoch": 4.75, "learning_rate": 1.1272825731994084e-05, "loss": 0.4158, "step": 6083 }, { "epoch": 4.75, "learning_rate": 1.1270313970673095e-05, "loss": 0.4252, "step": 6084 }, { "epoch": 4.76, "learning_rate": 1.1267802127891571e-05, "loss": 0.5148, "step": 6085 }, { "epoch": 4.76, "learning_rate": 1.1265290203810584e-05, "loss": 0.4436, "step": 6086 }, { "epoch": 4.76, "learning_rate": 1.1262778198591216e-05, "loss": 0.5144, "step": 6087 }, { "epoch": 4.76, "learning_rate": 1.1260266112394555e-05, "loss": 0.3538, "step": 6088 }, { "epoch": 4.76, "learning_rate": 1.125775394538169e-05, "loss": 0.4619, "step": 6089 }, { "epoch": 4.76, "learning_rate": 1.1255241697713714e-05, "loss": 0.5435, "step": 6090 }, { "epoch": 4.76, "learning_rate": 1.1252729369551736e-05, "loss": 0.357, "step": 6091 }, { "epoch": 4.76, "learning_rate": 1.1250216961056853e-05, "loss": 0.5418, "step": 6092 }, { "epoch": 4.76, "learning_rate": 1.1247704472390182e-05, "loss": 0.5979, "step": 6093 }, { "epoch": 4.76, "learning_rate": 1.124519190371284e-05, "loss": 0.4549, "step": 6094 }, { "epoch": 4.76, "learning_rate": 1.1242679255185947e-05, "loss": 0.4434, "step": 6095 }, { "epoch": 4.76, "learning_rate": 1.1240166526970626e-05, "loss": 0.4504, "step": 6096 }, { "epoch": 4.77, "learning_rate": 1.1237653719228019e-05, "loss": 0.624, "step": 6097 }, { "epoch": 4.77, "learning_rate": 1.1235140832119251e-05, "loss": 0.4431, "step": 6098 }, { "epoch": 4.77, "learning_rate": 1.1232627865805475e-05, "loss": 0.4804, "step": 6099 }, { "epoch": 4.77, "learning_rate": 1.123011482044783e-05, "loss": 0.4643, "step": 6100 }, { "epoch": 4.77, "learning_rate": 1.1227601696207476e-05, "loss": 0.5791, "step": 6101 }, { "epoch": 4.77, "learning_rate": 1.1225088493245562e-05, "loss": 0.3066, "step": 6102 }, { "epoch": 4.77, "learning_rate": 1.1222575211723256e-05, "loss": 0.6947, "step": 6103 }, { "epoch": 4.77, "learning_rate": 1.1220061851801722e-05, "loss": 0.3542, "step": 6104 }, { "epoch": 4.77, "learning_rate": 1.1217548413642136e-05, "loss": 0.5378, "step": 6105 }, { "epoch": 4.77, "learning_rate": 1.1215034897405675e-05, "loss": 0.7399, "step": 6106 }, { "epoch": 4.77, "learning_rate": 1.121252130325352e-05, "loss": 0.6142, "step": 6107 }, { "epoch": 4.77, "learning_rate": 1.121000763134686e-05, "loss": 0.4235, "step": 6108 }, { "epoch": 4.77, "learning_rate": 1.1207493881846882e-05, "loss": 0.4446, "step": 6109 }, { "epoch": 4.78, "learning_rate": 1.1204980054914791e-05, "loss": 0.5401, "step": 6110 }, { "epoch": 4.78, "learning_rate": 1.1202466150711786e-05, "loss": 0.4917, "step": 6111 }, { "epoch": 4.78, "learning_rate": 1.1199952169399076e-05, "loss": 0.415, "step": 6112 }, { "epoch": 4.78, "learning_rate": 1.1197438111137868e-05, "loss": 0.5748, "step": 6113 }, { "epoch": 4.78, "learning_rate": 1.1194923976089389e-05, "loss": 0.4376, "step": 6114 }, { "epoch": 4.78, "learning_rate": 1.1192409764414849e-05, "loss": 0.5762, "step": 6115 }, { "epoch": 4.78, "learning_rate": 1.1189895476275485e-05, "loss": 0.4568, "step": 6116 }, { "epoch": 4.78, "learning_rate": 1.1187381111832525e-05, "loss": 0.3713, "step": 6117 }, { "epoch": 4.78, "learning_rate": 1.1184866671247208e-05, "loss": 0.5483, "step": 6118 }, { "epoch": 4.78, "learning_rate": 1.1182352154680773e-05, "loss": 0.5176, "step": 6119 }, { "epoch": 4.78, "learning_rate": 1.1179837562294467e-05, "loss": 0.5279, "step": 6120 }, { "epoch": 4.78, "learning_rate": 1.1177322894249541e-05, "loss": 0.5269, "step": 6121 }, { "epoch": 4.78, "learning_rate": 1.1174808150707257e-05, "loss": 0.3231, "step": 6122 }, { "epoch": 4.79, "learning_rate": 1.1172293331828868e-05, "loss": 0.4942, "step": 6123 }, { "epoch": 4.79, "learning_rate": 1.1169778437775644e-05, "loss": 0.3642, "step": 6124 }, { "epoch": 4.79, "learning_rate": 1.116726346870886e-05, "loss": 0.398, "step": 6125 }, { "epoch": 4.79, "learning_rate": 1.1164748424789784e-05, "loss": 0.4765, "step": 6126 }, { "epoch": 4.79, "learning_rate": 1.1162233306179698e-05, "loss": 0.471, "step": 6127 }, { "epoch": 4.79, "learning_rate": 1.1159718113039893e-05, "loss": 0.5753, "step": 6128 }, { "epoch": 4.79, "learning_rate": 1.1157202845531653e-05, "loss": 0.4871, "step": 6129 }, { "epoch": 4.79, "learning_rate": 1.1154687503816277e-05, "loss": 0.695, "step": 6130 }, { "epoch": 4.79, "learning_rate": 1.115217208805506e-05, "loss": 0.4792, "step": 6131 }, { "epoch": 4.79, "learning_rate": 1.1149656598409311e-05, "loss": 0.5533, "step": 6132 }, { "epoch": 4.79, "learning_rate": 1.1147141035040334e-05, "loss": 0.6157, "step": 6133 }, { "epoch": 4.79, "learning_rate": 1.1144625398109448e-05, "loss": 0.5525, "step": 6134 }, { "epoch": 4.79, "learning_rate": 1.1142109687777969e-05, "loss": 0.5387, "step": 6135 }, { "epoch": 4.8, "learning_rate": 1.113959390420722e-05, "loss": 0.4834, "step": 6136 }, { "epoch": 4.8, "learning_rate": 1.1137078047558529e-05, "loss": 0.3827, "step": 6137 }, { "epoch": 4.8, "learning_rate": 1.1134562117993229e-05, "loss": 0.3745, "step": 6138 }, { "epoch": 4.8, "learning_rate": 1.113204611567266e-05, "loss": 0.4381, "step": 6139 }, { "epoch": 4.8, "learning_rate": 1.1129530040758156e-05, "loss": 0.4263, "step": 6140 }, { "epoch": 4.8, "learning_rate": 1.1127013893411072e-05, "loss": 0.5344, "step": 6141 }, { "epoch": 4.8, "learning_rate": 1.1124497673792753e-05, "loss": 0.5965, "step": 6142 }, { "epoch": 4.8, "learning_rate": 1.112198138206456e-05, "loss": 0.4452, "step": 6143 }, { "epoch": 4.8, "learning_rate": 1.1119465018387849e-05, "loss": 0.4283, "step": 6144 }, { "epoch": 4.8, "learning_rate": 1.1116948582923991e-05, "loss": 0.494, "step": 6145 }, { "epoch": 4.8, "learning_rate": 1.1114432075834347e-05, "loss": 0.3417, "step": 6146 }, { "epoch": 4.8, "learning_rate": 1.1111915497280301e-05, "loss": 0.5127, "step": 6147 }, { "epoch": 4.81, "learning_rate": 1.1109398847423225e-05, "loss": 0.5021, "step": 6148 }, { "epoch": 4.81, "learning_rate": 1.1106882126424505e-05, "loss": 0.5654, "step": 6149 }, { "epoch": 4.81, "learning_rate": 1.1104365334445526e-05, "loss": 0.603, "step": 6150 }, { "epoch": 4.81, "learning_rate": 1.1101848471647687e-05, "loss": 0.6798, "step": 6151 }, { "epoch": 4.81, "learning_rate": 1.1099331538192377e-05, "loss": 0.5589, "step": 6152 }, { "epoch": 4.81, "learning_rate": 1.1096814534241005e-05, "loss": 0.5225, "step": 6153 }, { "epoch": 4.81, "learning_rate": 1.1094297459954975e-05, "loss": 0.6021, "step": 6154 }, { "epoch": 4.81, "learning_rate": 1.1091780315495695e-05, "loss": 0.5098, "step": 6155 }, { "epoch": 4.81, "learning_rate": 1.108926310102458e-05, "loss": 0.5622, "step": 6156 }, { "epoch": 4.81, "learning_rate": 1.1086745816703052e-05, "loss": 0.3848, "step": 6157 }, { "epoch": 4.81, "learning_rate": 1.1084228462692536e-05, "loss": 0.4723, "step": 6158 }, { "epoch": 4.81, "learning_rate": 1.1081711039154462e-05, "loss": 0.576, "step": 6159 }, { "epoch": 4.81, "learning_rate": 1.1079193546250255e-05, "loss": 0.4747, "step": 6160 }, { "epoch": 4.82, "learning_rate": 1.107667598414136e-05, "loss": 0.5466, "step": 6161 }, { "epoch": 4.82, "learning_rate": 1.1074158352989219e-05, "loss": 0.588, "step": 6162 }, { "epoch": 4.82, "learning_rate": 1.1071640652955273e-05, "loss": 0.6852, "step": 6163 }, { "epoch": 4.82, "learning_rate": 1.1069122884200976e-05, "loss": 0.4368, "step": 6164 }, { "epoch": 4.82, "learning_rate": 1.1066605046887784e-05, "loss": 0.4646, "step": 6165 }, { "epoch": 4.82, "learning_rate": 1.1064087141177157e-05, "loss": 0.5007, "step": 6166 }, { "epoch": 4.82, "learning_rate": 1.1061569167230554e-05, "loss": 0.488, "step": 6167 }, { "epoch": 4.82, "learning_rate": 1.1059051125209452e-05, "loss": 0.403, "step": 6168 }, { "epoch": 4.82, "learning_rate": 1.1056533015275314e-05, "loss": 0.4687, "step": 6169 }, { "epoch": 4.82, "learning_rate": 1.1054014837589624e-05, "loss": 0.5917, "step": 6170 }, { "epoch": 4.82, "learning_rate": 1.1051496592313862e-05, "loss": 0.4552, "step": 6171 }, { "epoch": 4.82, "learning_rate": 1.1048978279609513e-05, "loss": 0.3908, "step": 6172 }, { "epoch": 4.82, "learning_rate": 1.1046459899638067e-05, "loss": 0.4567, "step": 6173 }, { "epoch": 4.83, "learning_rate": 1.1043941452561018e-05, "loss": 0.4012, "step": 6174 }, { "epoch": 4.83, "learning_rate": 1.104142293853986e-05, "loss": 0.4286, "step": 6175 }, { "epoch": 4.83, "learning_rate": 1.1038904357736111e-05, "loss": 0.7117, "step": 6176 }, { "epoch": 4.83, "learning_rate": 1.103638571031126e-05, "loss": 0.4742, "step": 6177 }, { "epoch": 4.83, "learning_rate": 1.1033866996426831e-05, "loss": 0.6125, "step": 6178 }, { "epoch": 4.83, "learning_rate": 1.1031348216244335e-05, "loss": 0.4459, "step": 6179 }, { "epoch": 4.83, "learning_rate": 1.1028829369925293e-05, "loss": 0.4245, "step": 6180 }, { "epoch": 4.83, "learning_rate": 1.102631045763123e-05, "loss": 0.4052, "step": 6181 }, { "epoch": 4.83, "learning_rate": 1.102379147952367e-05, "loss": 0.3427, "step": 6182 }, { "epoch": 4.83, "learning_rate": 1.1021272435764154e-05, "loss": 0.4817, "step": 6183 }, { "epoch": 4.83, "learning_rate": 1.1018753326514213e-05, "loss": 0.5379, "step": 6184 }, { "epoch": 4.83, "learning_rate": 1.1016234151935387e-05, "loss": 0.7427, "step": 6185 }, { "epoch": 4.83, "learning_rate": 1.1013714912189226e-05, "loss": 0.5842, "step": 6186 }, { "epoch": 4.84, "learning_rate": 1.1011195607437278e-05, "loss": 0.4328, "step": 6187 }, { "epoch": 4.84, "learning_rate": 1.1008676237841095e-05, "loss": 0.4851, "step": 6188 }, { "epoch": 4.84, "learning_rate": 1.1006156803562235e-05, "loss": 0.5649, "step": 6189 }, { "epoch": 4.84, "learning_rate": 1.1003637304762262e-05, "loss": 0.4618, "step": 6190 }, { "epoch": 4.84, "learning_rate": 1.1001117741602743e-05, "loss": 0.38, "step": 6191 }, { "epoch": 4.84, "learning_rate": 1.0998598114245242e-05, "loss": 0.5327, "step": 6192 }, { "epoch": 4.84, "learning_rate": 1.0996078422851344e-05, "loss": 0.5671, "step": 6193 }, { "epoch": 4.84, "learning_rate": 1.0993558667582615e-05, "loss": 0.4411, "step": 6194 }, { "epoch": 4.84, "learning_rate": 1.0991038848600645e-05, "loss": 0.4406, "step": 6195 }, { "epoch": 4.84, "learning_rate": 1.0988518966067023e-05, "loss": 0.4133, "step": 6196 }, { "epoch": 4.84, "learning_rate": 1.0985999020143335e-05, "loss": 0.5765, "step": 6197 }, { "epoch": 4.84, "learning_rate": 1.0983479010991174e-05, "loss": 0.4855, "step": 6198 }, { "epoch": 4.84, "learning_rate": 1.0980958938772145e-05, "loss": 0.5983, "step": 6199 }, { "epoch": 4.85, "learning_rate": 1.0978438803647844e-05, "loss": 0.4167, "step": 6200 }, { "epoch": 4.85, "learning_rate": 1.0975918605779885e-05, "loss": 0.5215, "step": 6201 }, { "epoch": 4.85, "learning_rate": 1.0973398345329875e-05, "loss": 0.5583, "step": 6202 }, { "epoch": 4.85, "learning_rate": 1.0970878022459433e-05, "loss": 0.4695, "step": 6203 }, { "epoch": 4.85, "learning_rate": 1.0968357637330167e-05, "loss": 0.4682, "step": 6204 }, { "epoch": 4.85, "learning_rate": 1.0965837190103715e-05, "loss": 0.6939, "step": 6205 }, { "epoch": 4.85, "learning_rate": 1.0963316680941691e-05, "loss": 0.3906, "step": 6206 }, { "epoch": 4.85, "learning_rate": 1.0960796110005735e-05, "loss": 0.7385, "step": 6207 }, { "epoch": 4.85, "learning_rate": 1.0958275477457478e-05, "loss": 0.5935, "step": 6208 }, { "epoch": 4.85, "learning_rate": 1.095575478345856e-05, "loss": 0.5971, "step": 6209 }, { "epoch": 4.85, "learning_rate": 1.0953234028170624e-05, "loss": 0.5999, "step": 6210 }, { "epoch": 4.85, "learning_rate": 1.095071321175531e-05, "loss": 0.4666, "step": 6211 }, { "epoch": 4.86, "learning_rate": 1.0948192334374278e-05, "loss": 0.6277, "step": 6212 }, { "epoch": 4.86, "learning_rate": 1.094567139618918e-05, "loss": 0.5035, "step": 6213 }, { "epoch": 4.86, "learning_rate": 1.0943150397361672e-05, "loss": 0.5575, "step": 6214 }, { "epoch": 4.86, "learning_rate": 1.0940629338053417e-05, "loss": 0.5963, "step": 6215 }, { "epoch": 4.86, "learning_rate": 1.0938108218426085e-05, "loss": 0.5955, "step": 6216 }, { "epoch": 4.86, "learning_rate": 1.093558703864134e-05, "loss": 0.4692, "step": 6217 }, { "epoch": 4.86, "learning_rate": 1.093306579886086e-05, "loss": 0.5039, "step": 6218 }, { "epoch": 4.86, "learning_rate": 1.093054449924632e-05, "loss": 0.4018, "step": 6219 }, { "epoch": 4.86, "learning_rate": 1.0928023139959407e-05, "loss": 0.4047, "step": 6220 }, { "epoch": 4.86, "learning_rate": 1.0925501721161798e-05, "loss": 0.5507, "step": 6221 }, { "epoch": 4.86, "learning_rate": 1.0922980243015193e-05, "loss": 0.607, "step": 6222 }, { "epoch": 4.86, "learning_rate": 1.0920458705681273e-05, "loss": 0.5346, "step": 6223 }, { "epoch": 4.86, "learning_rate": 1.0917937109321745e-05, "loss": 0.4936, "step": 6224 }, { "epoch": 4.87, "learning_rate": 1.0915415454098303e-05, "loss": 0.5697, "step": 6225 }, { "epoch": 4.87, "learning_rate": 1.0912893740172656e-05, "loss": 0.4362, "step": 6226 }, { "epoch": 4.87, "learning_rate": 1.0910371967706508e-05, "loss": 0.5481, "step": 6227 }, { "epoch": 4.87, "learning_rate": 1.0907850136861577e-05, "loss": 0.7296, "step": 6228 }, { "epoch": 4.87, "learning_rate": 1.0905328247799572e-05, "loss": 0.4319, "step": 6229 }, { "epoch": 4.87, "learning_rate": 1.0902806300682215e-05, "loss": 0.3963, "step": 6230 }, { "epoch": 4.87, "learning_rate": 1.090028429567123e-05, "loss": 0.4576, "step": 6231 }, { "epoch": 4.87, "learning_rate": 1.0897762232928347e-05, "loss": 0.4753, "step": 6232 }, { "epoch": 4.87, "learning_rate": 1.0895240112615288e-05, "loss": 0.5753, "step": 6233 }, { "epoch": 4.87, "learning_rate": 1.089271793489379e-05, "loss": 0.4027, "step": 6234 }, { "epoch": 4.87, "learning_rate": 1.0890195699925599e-05, "loss": 0.5623, "step": 6235 }, { "epoch": 4.87, "learning_rate": 1.0887673407872448e-05, "loss": 0.7205, "step": 6236 }, { "epoch": 4.87, "learning_rate": 1.0885151058896084e-05, "loss": 0.4439, "step": 6237 }, { "epoch": 4.88, "learning_rate": 1.0882628653158256e-05, "loss": 0.5552, "step": 6238 }, { "epoch": 4.88, "learning_rate": 1.088010619082072e-05, "loss": 0.5269, "step": 6239 }, { "epoch": 4.88, "learning_rate": 1.0877583672045226e-05, "loss": 0.4766, "step": 6240 }, { "epoch": 4.88, "learning_rate": 1.0875061096993541e-05, "loss": 0.6113, "step": 6241 }, { "epoch": 4.88, "learning_rate": 1.0872538465827425e-05, "loss": 0.405, "step": 6242 }, { "epoch": 4.88, "learning_rate": 1.0870015778708643e-05, "loss": 0.6237, "step": 6243 }, { "epoch": 4.88, "learning_rate": 1.0867493035798965e-05, "loss": 0.6043, "step": 6244 }, { "epoch": 4.88, "learning_rate": 1.0864970237260171e-05, "loss": 0.5946, "step": 6245 }, { "epoch": 4.88, "learning_rate": 1.0862447383254031e-05, "loss": 0.4069, "step": 6246 }, { "epoch": 4.88, "learning_rate": 1.0859924473942335e-05, "loss": 0.3728, "step": 6247 }, { "epoch": 4.88, "learning_rate": 1.0857401509486862e-05, "loss": 0.4172, "step": 6248 }, { "epoch": 4.88, "learning_rate": 1.08548784900494e-05, "loss": 0.4456, "step": 6249 }, { "epoch": 4.88, "learning_rate": 1.0852355415791745e-05, "loss": 0.4507, "step": 6250 }, { "epoch": 4.89, "learning_rate": 1.084983228687569e-05, "loss": 0.459, "step": 6251 }, { "epoch": 4.89, "learning_rate": 1.0847309103463031e-05, "loss": 0.5349, "step": 6252 }, { "epoch": 4.89, "learning_rate": 1.0844785865715577e-05, "loss": 0.6263, "step": 6253 }, { "epoch": 4.89, "learning_rate": 1.0842262573795125e-05, "loss": 0.3614, "step": 6254 }, { "epoch": 4.89, "learning_rate": 1.0839739227863495e-05, "loss": 0.4488, "step": 6255 }, { "epoch": 4.89, "learning_rate": 1.0837215828082492e-05, "loss": 0.4404, "step": 6256 }, { "epoch": 4.89, "learning_rate": 1.0834692374613937e-05, "loss": 0.5644, "step": 6257 }, { "epoch": 4.89, "learning_rate": 1.0832168867619643e-05, "loss": 0.4783, "step": 6258 }, { "epoch": 4.89, "learning_rate": 1.082964530726144e-05, "loss": 0.4568, "step": 6259 }, { "epoch": 4.89, "learning_rate": 1.0827121693701155e-05, "loss": 0.5565, "step": 6260 }, { "epoch": 4.89, "learning_rate": 1.0824598027100612e-05, "loss": 0.4647, "step": 6261 }, { "epoch": 4.89, "learning_rate": 1.0822074307621651e-05, "loss": 0.3826, "step": 6262 }, { "epoch": 4.89, "learning_rate": 1.08195505354261e-05, "loss": 0.6426, "step": 6263 }, { "epoch": 4.9, "learning_rate": 1.0817026710675809e-05, "loss": 0.5852, "step": 6264 }, { "epoch": 4.9, "learning_rate": 1.0814502833532616e-05, "loss": 0.5666, "step": 6265 }, { "epoch": 4.9, "learning_rate": 1.081197890415837e-05, "loss": 0.626, "step": 6266 }, { "epoch": 4.9, "learning_rate": 1.0809454922714919e-05, "loss": 0.5128, "step": 6267 }, { "epoch": 4.9, "learning_rate": 1.080693088936412e-05, "loss": 0.4885, "step": 6268 }, { "epoch": 4.9, "learning_rate": 1.0804406804267823e-05, "loss": 0.4935, "step": 6269 }, { "epoch": 4.9, "learning_rate": 1.0801882667587899e-05, "loss": 0.4789, "step": 6270 }, { "epoch": 4.9, "learning_rate": 1.0799358479486204e-05, "loss": 0.5415, "step": 6271 }, { "epoch": 4.9, "learning_rate": 1.0796834240124606e-05, "loss": 0.5296, "step": 6272 }, { "epoch": 4.9, "learning_rate": 1.0794309949664974e-05, "loss": 0.424, "step": 6273 }, { "epoch": 4.9, "learning_rate": 1.0791785608269185e-05, "loss": 0.532, "step": 6274 }, { "epoch": 4.9, "learning_rate": 1.0789261216099111e-05, "loss": 0.5018, "step": 6275 }, { "epoch": 4.91, "learning_rate": 1.078673677331664e-05, "loss": 0.4917, "step": 6276 }, { "epoch": 4.91, "learning_rate": 1.0784212280083645e-05, "loss": 0.5972, "step": 6277 }, { "epoch": 4.91, "learning_rate": 1.0781687736562019e-05, "loss": 0.3362, "step": 6278 }, { "epoch": 4.91, "learning_rate": 1.077916314291365e-05, "loss": 0.383, "step": 6279 }, { "epoch": 4.91, "learning_rate": 1.0776638499300432e-05, "loss": 0.599, "step": 6280 }, { "epoch": 4.91, "learning_rate": 1.0774113805884256e-05, "loss": 0.4392, "step": 6281 }, { "epoch": 4.91, "learning_rate": 1.0771589062827028e-05, "loss": 0.5124, "step": 6282 }, { "epoch": 4.91, "learning_rate": 1.0769064270290648e-05, "loss": 0.4095, "step": 6283 }, { "epoch": 4.91, "learning_rate": 1.076653942843702e-05, "loss": 0.5963, "step": 6284 }, { "epoch": 4.91, "learning_rate": 1.0764014537428053e-05, "loss": 0.5675, "step": 6285 }, { "epoch": 4.91, "learning_rate": 1.0761489597425658e-05, "loss": 0.5578, "step": 6286 }, { "epoch": 4.91, "learning_rate": 1.0758964608591758e-05, "loss": 0.4827, "step": 6287 }, { "epoch": 4.91, "learning_rate": 1.0756439571088259e-05, "loss": 0.489, "step": 6288 }, { "epoch": 4.92, "learning_rate": 1.0753914485077092e-05, "loss": 0.481, "step": 6289 }, { "epoch": 4.92, "learning_rate": 1.0751389350720179e-05, "loss": 0.5994, "step": 6290 }, { "epoch": 4.92, "learning_rate": 1.0748864168179443e-05, "loss": 0.5498, "step": 6291 }, { "epoch": 4.92, "learning_rate": 1.0746338937616815e-05, "loss": 0.6088, "step": 6292 }, { "epoch": 4.92, "learning_rate": 1.0743813659194239e-05, "loss": 0.5427, "step": 6293 }, { "epoch": 4.92, "learning_rate": 1.074128833307364e-05, "loss": 0.468, "step": 6294 }, { "epoch": 4.92, "learning_rate": 1.0738762959416964e-05, "loss": 0.4061, "step": 6295 }, { "epoch": 4.92, "learning_rate": 1.0736237538386152e-05, "loss": 0.5183, "step": 6296 }, { "epoch": 4.92, "learning_rate": 1.073371207014315e-05, "loss": 0.5618, "step": 6297 }, { "epoch": 4.92, "learning_rate": 1.0731186554849904e-05, "loss": 0.406, "step": 6298 }, { "epoch": 4.92, "learning_rate": 1.0728660992668372e-05, "loss": 0.4475, "step": 6299 }, { "epoch": 4.92, "learning_rate": 1.0726135383760502e-05, "loss": 0.4035, "step": 6300 }, { "epoch": 4.92, "learning_rate": 1.072360972828826e-05, "loss": 0.6051, "step": 6301 }, { "epoch": 4.93, "learning_rate": 1.0721084026413602e-05, "loss": 0.5158, "step": 6302 }, { "epoch": 4.93, "learning_rate": 1.0718558278298492e-05, "loss": 0.3646, "step": 6303 }, { "epoch": 4.93, "learning_rate": 1.07160324841049e-05, "loss": 0.6068, "step": 6304 }, { "epoch": 4.93, "learning_rate": 1.0713506643994789e-05, "loss": 0.4502, "step": 6305 }, { "epoch": 4.93, "learning_rate": 1.0710980758130136e-05, "loss": 0.4104, "step": 6306 }, { "epoch": 4.93, "learning_rate": 1.070845482667292e-05, "loss": 0.4134, "step": 6307 }, { "epoch": 4.93, "learning_rate": 1.0705928849785117e-05, "loss": 0.5248, "step": 6308 }, { "epoch": 4.93, "learning_rate": 1.0703402827628707e-05, "loss": 0.3906, "step": 6309 }, { "epoch": 4.93, "learning_rate": 1.0700876760365675e-05, "loss": 0.6492, "step": 6310 }, { "epoch": 4.93, "learning_rate": 1.0698350648158006e-05, "loss": 0.5949, "step": 6311 }, { "epoch": 4.93, "learning_rate": 1.0695824491167697e-05, "loss": 0.5454, "step": 6312 }, { "epoch": 4.93, "learning_rate": 1.0693298289556737e-05, "loss": 0.5667, "step": 6313 }, { "epoch": 4.93, "learning_rate": 1.0690772043487122e-05, "loss": 0.5117, "step": 6314 }, { "epoch": 4.94, "learning_rate": 1.0688245753120848e-05, "loss": 0.6974, "step": 6315 }, { "epoch": 4.94, "learning_rate": 1.0685719418619924e-05, "loss": 0.5648, "step": 6316 }, { "epoch": 4.94, "learning_rate": 1.0683193040146347e-05, "loss": 0.4906, "step": 6317 }, { "epoch": 4.94, "learning_rate": 1.068066661786213e-05, "loss": 0.6004, "step": 6318 }, { "epoch": 4.94, "learning_rate": 1.0678140151929279e-05, "loss": 0.4035, "step": 6319 }, { "epoch": 4.94, "learning_rate": 1.0675613642509807e-05, "loss": 0.3955, "step": 6320 }, { "epoch": 4.94, "learning_rate": 1.0673087089765735e-05, "loss": 0.3417, "step": 6321 }, { "epoch": 4.94, "learning_rate": 1.0670560493859077e-05, "loss": 0.6063, "step": 6322 }, { "epoch": 4.94, "learning_rate": 1.0668033854951852e-05, "loss": 0.4329, "step": 6323 }, { "epoch": 4.94, "learning_rate": 1.0665507173206088e-05, "loss": 0.5509, "step": 6324 }, { "epoch": 4.94, "learning_rate": 1.066298044878381e-05, "loss": 0.5714, "step": 6325 }, { "epoch": 4.94, "learning_rate": 1.0660453681847049e-05, "loss": 0.5299, "step": 6326 }, { "epoch": 4.94, "learning_rate": 1.0657926872557837e-05, "loss": 0.4782, "step": 6327 }, { "epoch": 4.95, "learning_rate": 1.0655400021078211e-05, "loss": 0.4913, "step": 6328 }, { "epoch": 4.95, "learning_rate": 1.06528731275702e-05, "loss": 0.4834, "step": 6329 }, { "epoch": 4.95, "learning_rate": 1.0650346192195855e-05, "loss": 0.3635, "step": 6330 }, { "epoch": 4.95, "learning_rate": 1.064781921511721e-05, "loss": 0.6119, "step": 6331 }, { "epoch": 4.95, "learning_rate": 1.064529219649632e-05, "loss": 0.4654, "step": 6332 }, { "epoch": 4.95, "learning_rate": 1.0642765136495226e-05, "loss": 0.4219, "step": 6333 }, { "epoch": 4.95, "learning_rate": 1.064023803527598e-05, "loss": 0.4204, "step": 6334 }, { "epoch": 4.95, "learning_rate": 1.0637710893000636e-05, "loss": 0.4966, "step": 6335 }, { "epoch": 4.95, "learning_rate": 1.0635183709831253e-05, "loss": 0.4237, "step": 6336 }, { "epoch": 4.95, "learning_rate": 1.0632656485929892e-05, "loss": 0.4747, "step": 6337 }, { "epoch": 4.95, "learning_rate": 1.0630129221458607e-05, "loss": 0.6557, "step": 6338 }, { "epoch": 4.95, "learning_rate": 1.0627601916579466e-05, "loss": 0.5155, "step": 6339 }, { "epoch": 4.96, "learning_rate": 1.0625074571454535e-05, "loss": 0.4959, "step": 6340 }, { "epoch": 4.96, "learning_rate": 1.0622547186245886e-05, "loss": 0.4628, "step": 6341 }, { "epoch": 4.96, "learning_rate": 1.0620019761115587e-05, "loss": 0.4795, "step": 6342 }, { "epoch": 4.96, "learning_rate": 1.0617492296225715e-05, "loss": 0.6122, "step": 6343 }, { "epoch": 4.96, "learning_rate": 1.0614964791738344e-05, "loss": 0.7198, "step": 6344 }, { "epoch": 4.96, "learning_rate": 1.0612437247815558e-05, "loss": 0.6921, "step": 6345 }, { "epoch": 4.96, "learning_rate": 1.0609909664619436e-05, "loss": 0.3346, "step": 6346 }, { "epoch": 4.96, "learning_rate": 1.0607382042312067e-05, "loss": 0.5019, "step": 6347 }, { "epoch": 4.96, "learning_rate": 1.0604854381055527e-05, "loss": 0.469, "step": 6348 }, { "epoch": 4.96, "learning_rate": 1.060232668101192e-05, "loss": 0.6604, "step": 6349 }, { "epoch": 4.96, "learning_rate": 1.0599798942343325e-05, "loss": 0.5633, "step": 6350 }, { "epoch": 4.96, "learning_rate": 1.0597271165211847e-05, "loss": 0.4759, "step": 6351 }, { "epoch": 4.96, "learning_rate": 1.0594743349779572e-05, "loss": 0.5401, "step": 6352 }, { "epoch": 4.97, "learning_rate": 1.0592215496208614e-05, "loss": 0.5238, "step": 6353 }, { "epoch": 4.97, "learning_rate": 1.058968760466106e-05, "loss": 0.4977, "step": 6354 }, { "epoch": 4.97, "learning_rate": 1.0587159675299025e-05, "loss": 0.5662, "step": 6355 }, { "epoch": 4.97, "learning_rate": 1.0584631708284614e-05, "loss": 0.5472, "step": 6356 }, { "epoch": 4.97, "learning_rate": 1.0582103703779931e-05, "loss": 0.4476, "step": 6357 }, { "epoch": 4.97, "learning_rate": 1.057957566194709e-05, "loss": 0.4971, "step": 6358 }, { "epoch": 4.97, "learning_rate": 1.0577047582948206e-05, "loss": 0.5211, "step": 6359 }, { "epoch": 4.97, "learning_rate": 1.0574519466945397e-05, "loss": 0.4836, "step": 6360 }, { "epoch": 4.97, "learning_rate": 1.0571991314100778e-05, "loss": 0.5184, "step": 6361 }, { "epoch": 4.97, "learning_rate": 1.0569463124576471e-05, "loss": 0.3656, "step": 6362 }, { "epoch": 4.97, "learning_rate": 1.05669348985346e-05, "loss": 0.5226, "step": 6363 }, { "epoch": 4.97, "learning_rate": 1.0564406636137295e-05, "loss": 0.3897, "step": 6364 }, { "epoch": 4.97, "learning_rate": 1.0561878337546676e-05, "loss": 0.4072, "step": 6365 }, { "epoch": 4.98, "learning_rate": 1.055935000292488e-05, "loss": 0.6055, "step": 6366 }, { "epoch": 4.98, "learning_rate": 1.0556821632434036e-05, "loss": 0.5242, "step": 6367 }, { "epoch": 4.98, "learning_rate": 1.055429322623628e-05, "loss": 0.4629, "step": 6368 }, { "epoch": 4.98, "learning_rate": 1.055176478449375e-05, "loss": 0.4887, "step": 6369 }, { "epoch": 4.98, "learning_rate": 1.054923630736859e-05, "loss": 0.4594, "step": 6370 }, { "epoch": 4.98, "learning_rate": 1.054670779502293e-05, "loss": 0.2975, "step": 6371 }, { "epoch": 4.98, "learning_rate": 1.0544179247618924e-05, "loss": 0.4976, "step": 6372 }, { "epoch": 4.98, "learning_rate": 1.0541650665318716e-05, "loss": 0.5918, "step": 6373 }, { "epoch": 4.98, "learning_rate": 1.0539122048284456e-05, "loss": 0.3872, "step": 6374 }, { "epoch": 4.98, "learning_rate": 1.053659339667829e-05, "loss": 0.4324, "step": 6375 }, { "epoch": 4.98, "learning_rate": 1.0534064710662379e-05, "loss": 0.4281, "step": 6376 }, { "epoch": 4.98, "learning_rate": 1.0531535990398869e-05, "loss": 0.4448, "step": 6377 }, { "epoch": 4.98, "learning_rate": 1.0529007236049927e-05, "loss": 0.6185, "step": 6378 }, { "epoch": 4.99, "learning_rate": 1.0526478447777701e-05, "loss": 0.4824, "step": 6379 }, { "epoch": 4.99, "learning_rate": 1.0523949625744365e-05, "loss": 0.3529, "step": 6380 }, { "epoch": 4.99, "learning_rate": 1.0521420770112076e-05, "loss": 0.5206, "step": 6381 }, { "epoch": 4.99, "learning_rate": 1.0518891881043001e-05, "loss": 0.4222, "step": 6382 }, { "epoch": 4.99, "learning_rate": 1.0516362958699309e-05, "loss": 0.5467, "step": 6383 }, { "epoch": 4.99, "learning_rate": 1.0513834003243171e-05, "loss": 0.5093, "step": 6384 }, { "epoch": 4.99, "learning_rate": 1.0511305014836759e-05, "loss": 0.5553, "step": 6385 }, { "epoch": 4.99, "learning_rate": 1.0508775993642248e-05, "loss": 0.4372, "step": 6386 }, { "epoch": 4.99, "learning_rate": 1.0506246939821814e-05, "loss": 0.4444, "step": 6387 }, { "epoch": 4.99, "learning_rate": 1.0503717853537634e-05, "loss": 0.5564, "step": 6388 }, { "epoch": 4.99, "learning_rate": 1.0501188734951895e-05, "loss": 0.4695, "step": 6389 }, { "epoch": 4.99, "learning_rate": 1.0498659584226775e-05, "loss": 0.5612, "step": 6390 }, { "epoch": 4.99, "learning_rate": 1.049613040152446e-05, "loss": 0.5887, "step": 6391 }, { "epoch": 5.0, "learning_rate": 1.0493601187007138e-05, "loss": 0.5423, "step": 6392 }, { "epoch": 5.0, "learning_rate": 1.0491071940836999e-05, "loss": 0.3504, "step": 6393 }, { "epoch": 5.0, "learning_rate": 1.0488542663176231e-05, "loss": 0.457, "step": 6394 }, { "epoch": 5.0, "learning_rate": 1.0486013354187032e-05, "loss": 0.6087, "step": 6395 }, { "epoch": 5.0, "learning_rate": 1.048348401403159e-05, "loss": 0.7193, "step": 6396 }, { "epoch": 5.0, "learning_rate": 1.0480954642872109e-05, "loss": 0.5456, "step": 6397 }, { "epoch": 5.0, "learning_rate": 1.0478425240870786e-05, "loss": 0.3024, "step": 6398 }, { "epoch": 5.0, "learning_rate": 1.0475895808189823e-05, "loss": 0.2294, "step": 6399 }, { "epoch": 5.0, "learning_rate": 1.0473366344991418e-05, "loss": 0.2144, "step": 6400 }, { "epoch": 5.0, "learning_rate": 1.0470836851437784e-05, "loss": 0.1778, "step": 6401 }, { "epoch": 5.0, "learning_rate": 1.0468307327691123e-05, "loss": 0.2213, "step": 6402 }, { "epoch": 5.0, "learning_rate": 1.0465777773913646e-05, "loss": 0.2766, "step": 6403 }, { "epoch": 5.01, "learning_rate": 1.046324819026756e-05, "loss": 0.2139, "step": 6404 }, { "epoch": 5.01, "learning_rate": 1.0460718576915086e-05, "loss": 0.3475, "step": 6405 }, { "epoch": 5.01, "learning_rate": 1.045818893401843e-05, "loss": 0.2038, "step": 6406 }, { "epoch": 5.01, "learning_rate": 1.0455659261739815e-05, "loss": 0.1551, "step": 6407 }, { "epoch": 5.01, "learning_rate": 1.0453129560241455e-05, "loss": 0.289, "step": 6408 }, { "epoch": 5.01, "learning_rate": 1.0450599829685575e-05, "loss": 0.0888, "step": 6409 }, { "epoch": 5.01, "learning_rate": 1.0448070070234394e-05, "loss": 0.3027, "step": 6410 }, { "epoch": 5.01, "learning_rate": 1.0445540282050135e-05, "loss": 0.2677, "step": 6411 }, { "epoch": 5.01, "learning_rate": 1.0443010465295024e-05, "loss": 0.2003, "step": 6412 }, { "epoch": 5.01, "learning_rate": 1.0440480620131294e-05, "loss": 0.2216, "step": 6413 }, { "epoch": 5.01, "learning_rate": 1.0437950746721172e-05, "loss": 0.3057, "step": 6414 }, { "epoch": 5.01, "learning_rate": 1.043542084522689e-05, "loss": 0.1487, "step": 6415 }, { "epoch": 5.01, "learning_rate": 1.0432890915810676e-05, "loss": 0.2146, "step": 6416 }, { "epoch": 5.02, "learning_rate": 1.0430360958634766e-05, "loss": 0.1387, "step": 6417 }, { "epoch": 5.02, "learning_rate": 1.0427830973861409e-05, "loss": 0.2198, "step": 6418 }, { "epoch": 5.02, "learning_rate": 1.0425300961652824e-05, "loss": 0.2442, "step": 6419 }, { "epoch": 5.02, "learning_rate": 1.0422770922171267e-05, "loss": 0.2463, "step": 6420 }, { "epoch": 5.02, "learning_rate": 1.0420240855578974e-05, "loss": 0.271, "step": 6421 }, { "epoch": 5.02, "learning_rate": 1.041771076203819e-05, "loss": 0.1264, "step": 6422 }, { "epoch": 5.02, "learning_rate": 1.0415180641711156e-05, "loss": 0.2268, "step": 6423 }, { "epoch": 5.02, "learning_rate": 1.0412650494760128e-05, "loss": 0.2676, "step": 6424 }, { "epoch": 5.02, "learning_rate": 1.0410120321347348e-05, "loss": 0.1113, "step": 6425 }, { "epoch": 5.02, "learning_rate": 1.0407590121635069e-05, "loss": 0.2339, "step": 6426 }, { "epoch": 5.02, "learning_rate": 1.0405059895785544e-05, "loss": 0.1543, "step": 6427 }, { "epoch": 5.02, "learning_rate": 1.0402529643961025e-05, "loss": 0.1751, "step": 6428 }, { "epoch": 5.02, "learning_rate": 1.0399999366323768e-05, "loss": 0.2569, "step": 6429 }, { "epoch": 5.03, "learning_rate": 1.0397469063036035e-05, "loss": 0.2409, "step": 6430 }, { "epoch": 5.03, "learning_rate": 1.0394938734260077e-05, "loss": 0.2595, "step": 6431 }, { "epoch": 5.03, "learning_rate": 1.039240838015816e-05, "loss": 0.2707, "step": 6432 }, { "epoch": 5.03, "learning_rate": 1.0389878000892546e-05, "loss": 0.2199, "step": 6433 }, { "epoch": 5.03, "learning_rate": 1.0387347596625498e-05, "loss": 0.3024, "step": 6434 }, { "epoch": 5.03, "learning_rate": 1.0384817167519284e-05, "loss": 0.2186, "step": 6435 }, { "epoch": 5.03, "learning_rate": 1.0382286713736162e-05, "loss": 0.2334, "step": 6436 }, { "epoch": 5.03, "learning_rate": 1.0379756235438411e-05, "loss": 0.1547, "step": 6437 }, { "epoch": 5.03, "learning_rate": 1.0377225732788296e-05, "loss": 0.2391, "step": 6438 }, { "epoch": 5.03, "learning_rate": 1.0374695205948093e-05, "loss": 0.1853, "step": 6439 }, { "epoch": 5.03, "learning_rate": 1.0372164655080067e-05, "loss": 0.1575, "step": 6440 }, { "epoch": 5.03, "learning_rate": 1.0369634080346507e-05, "loss": 0.2908, "step": 6441 }, { "epoch": 5.03, "learning_rate": 1.0367103481909672e-05, "loss": 0.166, "step": 6442 }, { "epoch": 5.04, "learning_rate": 1.0364572859931852e-05, "loss": 0.2064, "step": 6443 }, { "epoch": 5.04, "learning_rate": 1.0362042214575326e-05, "loss": 0.178, "step": 6444 }, { "epoch": 5.04, "learning_rate": 1.0359511546002367e-05, "loss": 0.1848, "step": 6445 }, { "epoch": 5.04, "learning_rate": 1.0356980854375262e-05, "loss": 0.1918, "step": 6446 }, { "epoch": 5.04, "learning_rate": 1.0354450139856302e-05, "loss": 0.2188, "step": 6447 }, { "epoch": 5.04, "learning_rate": 1.035191940260776e-05, "loss": 0.2225, "step": 6448 }, { "epoch": 5.04, "learning_rate": 1.0349388642791931e-05, "loss": 0.1329, "step": 6449 }, { "epoch": 5.04, "learning_rate": 1.0346857860571098e-05, "loss": 0.2004, "step": 6450 }, { "epoch": 5.04, "learning_rate": 1.0344327056107555e-05, "loss": 0.1588, "step": 6451 }, { "epoch": 5.04, "learning_rate": 1.0341796229563592e-05, "loss": 0.2171, "step": 6452 }, { "epoch": 5.04, "learning_rate": 1.0339265381101502e-05, "loss": 0.2469, "step": 6453 }, { "epoch": 5.04, "learning_rate": 1.0336734510883575e-05, "loss": 0.2088, "step": 6454 }, { "epoch": 5.04, "learning_rate": 1.0334203619072114e-05, "loss": 0.1869, "step": 6455 }, { "epoch": 5.05, "learning_rate": 1.0331672705829405e-05, "loss": 0.1052, "step": 6456 }, { "epoch": 5.05, "learning_rate": 1.0329141771317756e-05, "loss": 0.2382, "step": 6457 }, { "epoch": 5.05, "learning_rate": 1.0326610815699463e-05, "loss": 0.2481, "step": 6458 }, { "epoch": 5.05, "learning_rate": 1.0324079839136824e-05, "loss": 0.1662, "step": 6459 }, { "epoch": 5.05, "learning_rate": 1.0321548841792143e-05, "loss": 0.1422, "step": 6460 }, { "epoch": 5.05, "learning_rate": 1.0319017823827728e-05, "loss": 0.1506, "step": 6461 }, { "epoch": 5.05, "learning_rate": 1.031648678540588e-05, "loss": 0.1867, "step": 6462 }, { "epoch": 5.05, "learning_rate": 1.0313955726688902e-05, "loss": 0.2583, "step": 6463 }, { "epoch": 5.05, "learning_rate": 1.0311424647839107e-05, "loss": 0.1774, "step": 6464 }, { "epoch": 5.05, "learning_rate": 1.0308893549018799e-05, "loss": 0.2252, "step": 6465 }, { "epoch": 5.05, "learning_rate": 1.0306362430390293e-05, "loss": 0.181, "step": 6466 }, { "epoch": 5.05, "learning_rate": 1.0303831292115896e-05, "loss": 0.3052, "step": 6467 }, { "epoch": 5.06, "learning_rate": 1.0301300134357924e-05, "loss": 0.2724, "step": 6468 }, { "epoch": 5.06, "learning_rate": 1.0298768957278686e-05, "loss": 0.1951, "step": 6469 }, { "epoch": 5.06, "learning_rate": 1.0296237761040502e-05, "loss": 0.1358, "step": 6470 }, { "epoch": 5.06, "learning_rate": 1.0293706545805684e-05, "loss": 0.2566, "step": 6471 }, { "epoch": 5.06, "learning_rate": 1.0291175311736555e-05, "loss": 0.2193, "step": 6472 }, { "epoch": 5.06, "learning_rate": 1.0288644058995427e-05, "loss": 0.1982, "step": 6473 }, { "epoch": 5.06, "learning_rate": 1.0286112787744625e-05, "loss": 0.2047, "step": 6474 }, { "epoch": 5.06, "learning_rate": 1.0283581498146467e-05, "loss": 0.3858, "step": 6475 }, { "epoch": 5.06, "learning_rate": 1.0281050190363278e-05, "loss": 0.1757, "step": 6476 }, { "epoch": 5.06, "learning_rate": 1.0278518864557377e-05, "loss": 0.2216, "step": 6477 }, { "epoch": 5.06, "learning_rate": 1.0275987520891096e-05, "loss": 0.1718, "step": 6478 }, { "epoch": 5.06, "learning_rate": 1.0273456159526752e-05, "loss": 0.2432, "step": 6479 }, { "epoch": 5.06, "learning_rate": 1.0270924780626678e-05, "loss": 0.1557, "step": 6480 }, { "epoch": 5.07, "learning_rate": 1.02683933843532e-05, "loss": 0.2452, "step": 6481 }, { "epoch": 5.07, "learning_rate": 1.0265861970868647e-05, "loss": 0.2592, "step": 6482 }, { "epoch": 5.07, "learning_rate": 1.0263330540335345e-05, "loss": 0.1539, "step": 6483 }, { "epoch": 5.07, "learning_rate": 1.0260799092915637e-05, "loss": 0.2212, "step": 6484 }, { "epoch": 5.07, "learning_rate": 1.0258267628771843e-05, "loss": 0.2245, "step": 6485 }, { "epoch": 5.07, "learning_rate": 1.0255736148066302e-05, "loss": 0.2358, "step": 6486 }, { "epoch": 5.07, "learning_rate": 1.025320465096135e-05, "loss": 0.1723, "step": 6487 }, { "epoch": 5.07, "learning_rate": 1.0250673137619316e-05, "loss": 0.1001, "step": 6488 }, { "epoch": 5.07, "learning_rate": 1.0248141608202546e-05, "loss": 0.1353, "step": 6489 }, { "epoch": 5.07, "learning_rate": 1.0245610062873368e-05, "loss": 0.2575, "step": 6490 }, { "epoch": 5.07, "learning_rate": 1.0243078501794127e-05, "loss": 0.3056, "step": 6491 }, { "epoch": 5.07, "learning_rate": 1.0240546925127161e-05, "loss": 0.1985, "step": 6492 }, { "epoch": 5.07, "learning_rate": 1.0238015333034812e-05, "loss": 0.2793, "step": 6493 }, { "epoch": 5.08, "learning_rate": 1.0235483725679418e-05, "loss": 0.2331, "step": 6494 }, { "epoch": 5.08, "learning_rate": 1.0232952103223327e-05, "loss": 0.2174, "step": 6495 }, { "epoch": 5.08, "learning_rate": 1.0230420465828874e-05, "loss": 0.2088, "step": 6496 }, { "epoch": 5.08, "learning_rate": 1.0227888813658414e-05, "loss": 0.1691, "step": 6497 }, { "epoch": 5.08, "learning_rate": 1.0225357146874284e-05, "loss": 0.2203, "step": 6498 }, { "epoch": 5.08, "learning_rate": 1.0222825465638834e-05, "loss": 0.2072, "step": 6499 }, { "epoch": 5.08, "learning_rate": 1.022029377011441e-05, "loss": 0.2626, "step": 6500 }, { "epoch": 5.08, "learning_rate": 1.0217762060463365e-05, "loss": 0.1802, "step": 6501 }, { "epoch": 5.08, "learning_rate": 1.0215230336848041e-05, "loss": 0.2265, "step": 6502 }, { "epoch": 5.08, "learning_rate": 1.0212698599430794e-05, "loss": 0.2065, "step": 6503 }, { "epoch": 5.08, "learning_rate": 1.0210166848373973e-05, "loss": 0.249, "step": 6504 }, { "epoch": 5.08, "learning_rate": 1.0207635083839929e-05, "loss": 0.2639, "step": 6505 }, { "epoch": 5.08, "learning_rate": 1.0205103305991013e-05, "loss": 0.2831, "step": 6506 }, { "epoch": 5.09, "learning_rate": 1.0202571514989583e-05, "loss": 0.2616, "step": 6507 }, { "epoch": 5.09, "learning_rate": 1.020003971099799e-05, "loss": 0.2424, "step": 6508 }, { "epoch": 5.09, "learning_rate": 1.0197507894178589e-05, "loss": 0.1522, "step": 6509 }, { "epoch": 5.09, "learning_rate": 1.019497606469374e-05, "loss": 0.2547, "step": 6510 }, { "epoch": 5.09, "learning_rate": 1.0192444222705795e-05, "loss": 0.157, "step": 6511 }, { "epoch": 5.09, "learning_rate": 1.0189912368377116e-05, "loss": 0.309, "step": 6512 }, { "epoch": 5.09, "learning_rate": 1.0187380501870057e-05, "loss": 0.2592, "step": 6513 }, { "epoch": 5.09, "learning_rate": 1.018484862334698e-05, "loss": 0.1015, "step": 6514 }, { "epoch": 5.09, "learning_rate": 1.0182316732970245e-05, "loss": 0.2962, "step": 6515 }, { "epoch": 5.09, "learning_rate": 1.0179784830902213e-05, "loss": 0.248, "step": 6516 }, { "epoch": 5.09, "learning_rate": 1.0177252917305242e-05, "loss": 0.178, "step": 6517 }, { "epoch": 5.09, "learning_rate": 1.0174720992341703e-05, "loss": 0.2814, "step": 6518 }, { "epoch": 5.09, "learning_rate": 1.0172189056173947e-05, "loss": 0.1597, "step": 6519 }, { "epoch": 5.1, "learning_rate": 1.0169657108964345e-05, "loss": 0.2782, "step": 6520 }, { "epoch": 5.1, "learning_rate": 1.0167125150875263e-05, "loss": 0.3195, "step": 6521 }, { "epoch": 5.1, "learning_rate": 1.0164593182069063e-05, "loss": 0.1291, "step": 6522 }, { "epoch": 5.1, "learning_rate": 1.0162061202708109e-05, "loss": 0.2009, "step": 6523 }, { "epoch": 5.1, "learning_rate": 1.015952921295477e-05, "loss": 0.1476, "step": 6524 }, { "epoch": 5.1, "learning_rate": 1.0156997212971411e-05, "loss": 0.293, "step": 6525 }, { "epoch": 5.1, "learning_rate": 1.0154465202920405e-05, "loss": 0.1796, "step": 6526 }, { "epoch": 5.1, "learning_rate": 1.0151933182964113e-05, "loss": 0.1751, "step": 6527 }, { "epoch": 5.1, "learning_rate": 1.014940115326491e-05, "loss": 0.2776, "step": 6528 }, { "epoch": 5.1, "learning_rate": 1.0146869113985166e-05, "loss": 0.1689, "step": 6529 }, { "epoch": 5.1, "learning_rate": 1.0144337065287246e-05, "loss": 0.1807, "step": 6530 }, { "epoch": 5.1, "learning_rate": 1.0141805007333524e-05, "loss": 0.2079, "step": 6531 }, { "epoch": 5.11, "learning_rate": 1.0139272940286374e-05, "loss": 0.1478, "step": 6532 }, { "epoch": 5.11, "learning_rate": 1.013674086430816e-05, "loss": 0.3249, "step": 6533 }, { "epoch": 5.11, "learning_rate": 1.0134208779561266e-05, "loss": 0.2387, "step": 6534 }, { "epoch": 5.11, "learning_rate": 1.0131676686208058e-05, "loss": 0.238, "step": 6535 }, { "epoch": 5.11, "learning_rate": 1.012914458441091e-05, "loss": 0.1489, "step": 6536 }, { "epoch": 5.11, "learning_rate": 1.0126612474332196e-05, "loss": 0.1681, "step": 6537 }, { "epoch": 5.11, "learning_rate": 1.0124080356134293e-05, "loss": 0.2784, "step": 6538 }, { "epoch": 5.11, "learning_rate": 1.0121548229979578e-05, "loss": 0.2526, "step": 6539 }, { "epoch": 5.11, "learning_rate": 1.0119016096030425e-05, "loss": 0.2181, "step": 6540 }, { "epoch": 5.11, "learning_rate": 1.011648395444921e-05, "loss": 0.1493, "step": 6541 }, { "epoch": 5.11, "learning_rate": 1.0113951805398306e-05, "loss": 0.2248, "step": 6542 }, { "epoch": 5.11, "learning_rate": 1.01114196490401e-05, "loss": 0.3585, "step": 6543 }, { "epoch": 5.11, "learning_rate": 1.0108887485536958e-05, "loss": 0.3224, "step": 6544 }, { "epoch": 5.12, "learning_rate": 1.0106355315051266e-05, "loss": 0.2293, "step": 6545 }, { "epoch": 5.12, "learning_rate": 1.0103823137745404e-05, "loss": 0.2208, "step": 6546 }, { "epoch": 5.12, "learning_rate": 1.0101290953781747e-05, "loss": 0.2441, "step": 6547 }, { "epoch": 5.12, "learning_rate": 1.009875876332267e-05, "loss": 0.258, "step": 6548 }, { "epoch": 5.12, "learning_rate": 1.009622656653057e-05, "loss": 0.308, "step": 6549 }, { "epoch": 5.12, "learning_rate": 1.0093694363567808e-05, "loss": 0.1271, "step": 6550 }, { "epoch": 5.12, "learning_rate": 1.0091162154596774e-05, "loss": 0.2254, "step": 6551 }, { "epoch": 5.12, "learning_rate": 1.0088629939779851e-05, "loss": 0.1428, "step": 6552 }, { "epoch": 5.12, "learning_rate": 1.0086097719279415e-05, "loss": 0.2205, "step": 6553 }, { "epoch": 5.12, "learning_rate": 1.008356549325785e-05, "loss": 0.2512, "step": 6554 }, { "epoch": 5.12, "learning_rate": 1.0081033261877545e-05, "loss": 0.1835, "step": 6555 }, { "epoch": 5.12, "learning_rate": 1.007850102530087e-05, "loss": 0.3551, "step": 6556 }, { "epoch": 5.12, "learning_rate": 1.0075968783690218e-05, "loss": 0.2522, "step": 6557 }, { "epoch": 5.13, "learning_rate": 1.0073436537207968e-05, "loss": 0.1947, "step": 6558 }, { "epoch": 5.13, "learning_rate": 1.0070904286016505e-05, "loss": 0.2265, "step": 6559 }, { "epoch": 5.13, "learning_rate": 1.0068372030278213e-05, "loss": 0.1216, "step": 6560 }, { "epoch": 5.13, "learning_rate": 1.0065839770155472e-05, "loss": 0.1614, "step": 6561 }, { "epoch": 5.13, "learning_rate": 1.0063307505810673e-05, "loss": 0.1395, "step": 6562 }, { "epoch": 5.13, "learning_rate": 1.0060775237406196e-05, "loss": 0.2263, "step": 6563 }, { "epoch": 5.13, "learning_rate": 1.0058242965104432e-05, "loss": 0.156, "step": 6564 }, { "epoch": 5.13, "learning_rate": 1.0055710689067757e-05, "loss": 0.2268, "step": 6565 }, { "epoch": 5.13, "learning_rate": 1.0053178409458566e-05, "loss": 0.1762, "step": 6566 }, { "epoch": 5.13, "learning_rate": 1.0050646126439238e-05, "loss": 0.1657, "step": 6567 }, { "epoch": 5.13, "learning_rate": 1.0048113840172162e-05, "loss": 0.2328, "step": 6568 }, { "epoch": 5.13, "learning_rate": 1.0045581550819722e-05, "loss": 0.1764, "step": 6569 }, { "epoch": 5.13, "learning_rate": 1.004304925854431e-05, "loss": 0.225, "step": 6570 }, { "epoch": 5.14, "learning_rate": 1.0040516963508302e-05, "loss": 0.2031, "step": 6571 }, { "epoch": 5.14, "learning_rate": 1.0037984665874097e-05, "loss": 0.2647, "step": 6572 }, { "epoch": 5.14, "learning_rate": 1.003545236580407e-05, "loss": 0.2634, "step": 6573 }, { "epoch": 5.14, "learning_rate": 1.0032920063460616e-05, "loss": 0.2411, "step": 6574 }, { "epoch": 5.14, "learning_rate": 1.0030387759006121e-05, "loss": 0.1349, "step": 6575 }, { "epoch": 5.14, "learning_rate": 1.0027855452602973e-05, "loss": 0.1977, "step": 6576 }, { "epoch": 5.14, "learning_rate": 1.0025323144413554e-05, "loss": 0.2245, "step": 6577 }, { "epoch": 5.14, "learning_rate": 1.002279083460026e-05, "loss": 0.1963, "step": 6578 }, { "epoch": 5.14, "learning_rate": 1.002025852332547e-05, "loss": 0.1555, "step": 6579 }, { "epoch": 5.14, "learning_rate": 1.0017726210751582e-05, "loss": 0.1515, "step": 6580 }, { "epoch": 5.14, "learning_rate": 1.0015193897040969e-05, "loss": 0.1549, "step": 6581 }, { "epoch": 5.14, "learning_rate": 1.0012661582356034e-05, "loss": 0.2628, "step": 6582 }, { "epoch": 5.14, "learning_rate": 1.0010129266859157e-05, "loss": 0.2542, "step": 6583 }, { "epoch": 5.15, "learning_rate": 1.0007596950712726e-05, "loss": 0.2034, "step": 6584 }, { "epoch": 5.15, "learning_rate": 1.0005064634079129e-05, "loss": 0.2141, "step": 6585 }, { "epoch": 5.15, "learning_rate": 1.0002532317120759e-05, "loss": 0.2571, "step": 6586 }, { "epoch": 5.15, "learning_rate": 1e-05, "loss": 0.2598, "step": 6587 }, { "epoch": 5.15, "learning_rate": 9.997467682879243e-06, "loss": 0.187, "step": 6588 }, { "epoch": 5.15, "learning_rate": 9.99493536592087e-06, "loss": 0.2106, "step": 6589 }, { "epoch": 5.15, "learning_rate": 9.99240304928728e-06, "loss": 0.2122, "step": 6590 }, { "epoch": 5.15, "learning_rate": 9.98987073314085e-06, "loss": 0.1749, "step": 6591 }, { "epoch": 5.15, "learning_rate": 9.98733841764397e-06, "loss": 0.1286, "step": 6592 }, { "epoch": 5.15, "learning_rate": 9.984806102959033e-06, "loss": 0.1972, "step": 6593 }, { "epoch": 5.15, "learning_rate": 9.982273789248423e-06, "loss": 0.244, "step": 6594 }, { "epoch": 5.15, "learning_rate": 9.979741476674531e-06, "loss": 0.2003, "step": 6595 }, { "epoch": 5.16, "learning_rate": 9.977209165399744e-06, "loss": 0.1868, "step": 6596 }, { "epoch": 5.16, "learning_rate": 9.974676855586447e-06, "loss": 0.2194, "step": 6597 }, { "epoch": 5.16, "learning_rate": 9.97214454739703e-06, "loss": 0.2116, "step": 6598 }, { "epoch": 5.16, "learning_rate": 9.96961224099388e-06, "loss": 0.1321, "step": 6599 }, { "epoch": 5.16, "learning_rate": 9.967079936539384e-06, "loss": 0.1974, "step": 6600 }, { "epoch": 5.16, "learning_rate": 9.964547634195932e-06, "loss": 0.2197, "step": 6601 }, { "epoch": 5.16, "learning_rate": 9.96201533412591e-06, "loss": 0.2604, "step": 6602 }, { "epoch": 5.16, "learning_rate": 9.959483036491701e-06, "loss": 0.1811, "step": 6603 }, { "epoch": 5.16, "learning_rate": 9.956950741455696e-06, "loss": 0.2649, "step": 6604 }, { "epoch": 5.16, "learning_rate": 9.95441844918028e-06, "loss": 0.2263, "step": 6605 }, { "epoch": 5.16, "learning_rate": 9.95188615982784e-06, "loss": 0.2657, "step": 6606 }, { "epoch": 5.16, "learning_rate": 9.949353873560765e-06, "loss": 0.2677, "step": 6607 }, { "epoch": 5.16, "learning_rate": 9.946821590541437e-06, "loss": 0.2267, "step": 6608 }, { "epoch": 5.17, "learning_rate": 9.944289310932246e-06, "loss": 0.2858, "step": 6609 }, { "epoch": 5.17, "learning_rate": 9.941757034895571e-06, "loss": 0.1397, "step": 6610 }, { "epoch": 5.17, "learning_rate": 9.939224762593805e-06, "loss": 0.1993, "step": 6611 }, { "epoch": 5.17, "learning_rate": 9.936692494189327e-06, "loss": 0.122, "step": 6612 }, { "epoch": 5.17, "learning_rate": 9.934160229844531e-06, "loss": 0.2923, "step": 6613 }, { "epoch": 5.17, "learning_rate": 9.931627969721792e-06, "loss": 0.26, "step": 6614 }, { "epoch": 5.17, "learning_rate": 9.929095713983498e-06, "loss": 0.1689, "step": 6615 }, { "epoch": 5.17, "learning_rate": 9.926563462792034e-06, "loss": 0.1325, "step": 6616 }, { "epoch": 5.17, "learning_rate": 9.924031216309785e-06, "loss": 0.1833, "step": 6617 }, { "epoch": 5.17, "learning_rate": 9.921498974699131e-06, "loss": 0.1552, "step": 6618 }, { "epoch": 5.17, "learning_rate": 9.91896673812246e-06, "loss": 0.1892, "step": 6619 }, { "epoch": 5.17, "learning_rate": 9.916434506742152e-06, "loss": 0.22, "step": 6620 }, { "epoch": 5.17, "learning_rate": 9.913902280720587e-06, "loss": 0.1074, "step": 6621 }, { "epoch": 5.18, "learning_rate": 9.911370060220154e-06, "loss": 0.2587, "step": 6622 }, { "epoch": 5.18, "learning_rate": 9.908837845403226e-06, "loss": 0.2211, "step": 6623 }, { "epoch": 5.18, "learning_rate": 9.906305636432199e-06, "loss": 0.2083, "step": 6624 }, { "epoch": 5.18, "learning_rate": 9.903773433469437e-06, "loss": 0.2143, "step": 6625 }, { "epoch": 5.18, "learning_rate": 9.901241236677331e-06, "loss": 0.2021, "step": 6626 }, { "epoch": 5.18, "learning_rate": 9.898709046218256e-06, "loss": 0.1663, "step": 6627 }, { "epoch": 5.18, "learning_rate": 9.8961768622546e-06, "loss": 0.1669, "step": 6628 }, { "epoch": 5.18, "learning_rate": 9.893644684948734e-06, "loss": 0.2245, "step": 6629 }, { "epoch": 5.18, "learning_rate": 9.891112514463045e-06, "loss": 0.1745, "step": 6630 }, { "epoch": 5.18, "learning_rate": 9.888580350959907e-06, "loss": 0.2305, "step": 6631 }, { "epoch": 5.18, "learning_rate": 9.886048194601697e-06, "loss": 0.2074, "step": 6632 }, { "epoch": 5.18, "learning_rate": 9.883516045550794e-06, "loss": 0.1437, "step": 6633 }, { "epoch": 5.18, "learning_rate": 9.880983903969578e-06, "loss": 0.1751, "step": 6634 }, { "epoch": 5.19, "learning_rate": 9.878451770020422e-06, "loss": 0.3227, "step": 6635 }, { "epoch": 5.19, "learning_rate": 9.875919643865708e-06, "loss": 0.1332, "step": 6636 }, { "epoch": 5.19, "learning_rate": 9.873387525667807e-06, "loss": 0.2518, "step": 6637 }, { "epoch": 5.19, "learning_rate": 9.870855415589093e-06, "loss": 0.1767, "step": 6638 }, { "epoch": 5.19, "learning_rate": 9.868323313791946e-06, "loss": 0.1963, "step": 6639 }, { "epoch": 5.19, "learning_rate": 9.865791220438736e-06, "loss": 0.2516, "step": 6640 }, { "epoch": 5.19, "learning_rate": 9.863259135691838e-06, "loss": 0.2445, "step": 6641 }, { "epoch": 5.19, "learning_rate": 9.86072705971363e-06, "loss": 0.2416, "step": 6642 }, { "epoch": 5.19, "learning_rate": 9.858194992666478e-06, "loss": 0.22, "step": 6643 }, { "epoch": 5.19, "learning_rate": 9.855662934712757e-06, "loss": 0.3572, "step": 6644 }, { "epoch": 5.19, "learning_rate": 9.853130886014836e-06, "loss": 0.1789, "step": 6645 }, { "epoch": 5.19, "learning_rate": 9.85059884673509e-06, "loss": 0.2809, "step": 6646 }, { "epoch": 5.19, "learning_rate": 9.84806681703589e-06, "loss": 0.2666, "step": 6647 }, { "epoch": 5.2, "learning_rate": 9.845534797079598e-06, "loss": 0.2099, "step": 6648 }, { "epoch": 5.2, "learning_rate": 9.843002787028592e-06, "loss": 0.266, "step": 6649 }, { "epoch": 5.2, "learning_rate": 9.840470787045233e-06, "loss": 0.1952, "step": 6650 }, { "epoch": 5.2, "learning_rate": 9.837938797291895e-06, "loss": 0.2811, "step": 6651 }, { "epoch": 5.2, "learning_rate": 9.835406817930939e-06, "loss": 0.1413, "step": 6652 }, { "epoch": 5.2, "learning_rate": 9.832874849124742e-06, "loss": 0.175, "step": 6653 }, { "epoch": 5.2, "learning_rate": 9.830342891035656e-06, "loss": 0.2058, "step": 6654 }, { "epoch": 5.2, "learning_rate": 9.827810943826057e-06, "loss": 0.1335, "step": 6655 }, { "epoch": 5.2, "learning_rate": 9.8252790076583e-06, "loss": 0.1236, "step": 6656 }, { "epoch": 5.2, "learning_rate": 9.822747082694758e-06, "loss": 0.2369, "step": 6657 }, { "epoch": 5.2, "learning_rate": 9.820215169097787e-06, "loss": 0.3051, "step": 6658 }, { "epoch": 5.2, "learning_rate": 9.817683267029757e-06, "loss": 0.2744, "step": 6659 }, { "epoch": 5.21, "learning_rate": 9.815151376653022e-06, "loss": 0.2326, "step": 6660 }, { "epoch": 5.21, "learning_rate": 9.812619498129948e-06, "loss": 0.2319, "step": 6661 }, { "epoch": 5.21, "learning_rate": 9.810087631622886e-06, "loss": 0.2229, "step": 6662 }, { "epoch": 5.21, "learning_rate": 9.807555777294208e-06, "loss": 0.2984, "step": 6663 }, { "epoch": 5.21, "learning_rate": 9.805023935306265e-06, "loss": 0.3293, "step": 6664 }, { "epoch": 5.21, "learning_rate": 9.802492105821413e-06, "loss": 0.3067, "step": 6665 }, { "epoch": 5.21, "learning_rate": 9.799960289002014e-06, "loss": 0.1797, "step": 6666 }, { "epoch": 5.21, "learning_rate": 9.797428485010419e-06, "loss": 0.2065, "step": 6667 }, { "epoch": 5.21, "learning_rate": 9.794896694008989e-06, "loss": 0.1486, "step": 6668 }, { "epoch": 5.21, "learning_rate": 9.792364916160075e-06, "loss": 0.1913, "step": 6669 }, { "epoch": 5.21, "learning_rate": 9.789833151626032e-06, "loss": 0.1988, "step": 6670 }, { "epoch": 5.21, "learning_rate": 9.787301400569209e-06, "loss": 0.2249, "step": 6671 }, { "epoch": 5.21, "learning_rate": 9.78476966315196e-06, "loss": 0.3759, "step": 6672 }, { "epoch": 5.22, "learning_rate": 9.782237939536637e-06, "loss": 0.2723, "step": 6673 }, { "epoch": 5.22, "learning_rate": 9.779706229885591e-06, "loss": 0.2295, "step": 6674 }, { "epoch": 5.22, "learning_rate": 9.777174534361168e-06, "loss": 0.2106, "step": 6675 }, { "epoch": 5.22, "learning_rate": 9.774642853125722e-06, "loss": 0.2413, "step": 6676 }, { "epoch": 5.22, "learning_rate": 9.772111186341591e-06, "loss": 0.2746, "step": 6677 }, { "epoch": 5.22, "learning_rate": 9.769579534171128e-06, "loss": 0.1335, "step": 6678 }, { "epoch": 5.22, "learning_rate": 9.767047896776677e-06, "loss": 0.1787, "step": 6679 }, { "epoch": 5.22, "learning_rate": 9.764516274320583e-06, "loss": 0.2544, "step": 6680 }, { "epoch": 5.22, "learning_rate": 9.761984666965193e-06, "loss": 0.1852, "step": 6681 }, { "epoch": 5.22, "learning_rate": 9.75945307487284e-06, "loss": 0.1267, "step": 6682 }, { "epoch": 5.22, "learning_rate": 9.756921498205874e-06, "loss": 0.2036, "step": 6683 }, { "epoch": 5.22, "learning_rate": 9.754389937126634e-06, "loss": 0.1468, "step": 6684 }, { "epoch": 5.22, "learning_rate": 9.751858391797457e-06, "loss": 0.1094, "step": 6685 }, { "epoch": 5.23, "learning_rate": 9.749326862380684e-06, "loss": 0.2817, "step": 6686 }, { "epoch": 5.23, "learning_rate": 9.746795349038656e-06, "loss": 0.2734, "step": 6687 }, { "epoch": 5.23, "learning_rate": 9.7442638519337e-06, "loss": 0.1257, "step": 6688 }, { "epoch": 5.23, "learning_rate": 9.74173237122816e-06, "loss": 0.215, "step": 6689 }, { "epoch": 5.23, "learning_rate": 9.739200907084368e-06, "loss": 0.1396, "step": 6690 }, { "epoch": 5.23, "learning_rate": 9.736669459664655e-06, "loss": 0.2161, "step": 6691 }, { "epoch": 5.23, "learning_rate": 9.734138029131356e-06, "loss": 0.284, "step": 6692 }, { "epoch": 5.23, "learning_rate": 9.731606615646805e-06, "loss": 0.1798, "step": 6693 }, { "epoch": 5.23, "learning_rate": 9.729075219373325e-06, "loss": 0.2233, "step": 6694 }, { "epoch": 5.23, "learning_rate": 9.726543840473251e-06, "loss": 0.2184, "step": 6695 }, { "epoch": 5.23, "learning_rate": 9.724012479108908e-06, "loss": 0.2148, "step": 6696 }, { "epoch": 5.23, "learning_rate": 9.721481135442625e-06, "loss": 0.1756, "step": 6697 }, { "epoch": 5.23, "learning_rate": 9.718949809636727e-06, "loss": 0.2337, "step": 6698 }, { "epoch": 5.24, "learning_rate": 9.716418501853538e-06, "loss": 0.1745, "step": 6699 }, { "epoch": 5.24, "learning_rate": 9.713887212255379e-06, "loss": 0.1904, "step": 6700 }, { "epoch": 5.24, "learning_rate": 9.711355941004577e-06, "loss": 0.1456, "step": 6701 }, { "epoch": 5.24, "learning_rate": 9.708824688263448e-06, "loss": 0.2236, "step": 6702 }, { "epoch": 5.24, "learning_rate": 9.706293454194318e-06, "loss": 0.1745, "step": 6703 }, { "epoch": 5.24, "learning_rate": 9.703762238959503e-06, "loss": 0.4081, "step": 6704 }, { "epoch": 5.24, "learning_rate": 9.701231042721319e-06, "loss": 0.1782, "step": 6705 }, { "epoch": 5.24, "learning_rate": 9.698699865642081e-06, "loss": 0.2619, "step": 6706 }, { "epoch": 5.24, "learning_rate": 9.696168707884107e-06, "loss": 0.211, "step": 6707 }, { "epoch": 5.24, "learning_rate": 9.693637569609709e-06, "loss": 0.133, "step": 6708 }, { "epoch": 5.24, "learning_rate": 9.691106450981201e-06, "loss": 0.277, "step": 6709 }, { "epoch": 5.24, "learning_rate": 9.688575352160898e-06, "loss": 0.2086, "step": 6710 }, { "epoch": 5.25, "learning_rate": 9.6860442733111e-06, "loss": 0.1851, "step": 6711 }, { "epoch": 5.25, "learning_rate": 9.683513214594124e-06, "loss": 0.1758, "step": 6712 }, { "epoch": 5.25, "learning_rate": 9.680982176172274e-06, "loss": 0.2178, "step": 6713 }, { "epoch": 5.25, "learning_rate": 9.678451158207855e-06, "loss": 0.1461, "step": 6714 }, { "epoch": 5.25, "learning_rate": 9.675920160863178e-06, "loss": 0.2248, "step": 6715 }, { "epoch": 5.25, "learning_rate": 9.673389184300542e-06, "loss": 0.2951, "step": 6716 }, { "epoch": 5.25, "learning_rate": 9.670858228682247e-06, "loss": 0.2574, "step": 6717 }, { "epoch": 5.25, "learning_rate": 9.668327294170598e-06, "loss": 0.1567, "step": 6718 }, { "epoch": 5.25, "learning_rate": 9.66579638092789e-06, "loss": 0.1096, "step": 6719 }, { "epoch": 5.25, "learning_rate": 9.663265489116426e-06, "loss": 0.2696, "step": 6720 }, { "epoch": 5.25, "learning_rate": 9.660734618898503e-06, "loss": 0.1762, "step": 6721 }, { "epoch": 5.25, "learning_rate": 9.658203770436413e-06, "loss": 0.2098, "step": 6722 }, { "epoch": 5.25, "learning_rate": 9.655672943892447e-06, "loss": 0.2449, "step": 6723 }, { "epoch": 5.26, "learning_rate": 9.653142139428905e-06, "loss": 0.2266, "step": 6724 }, { "epoch": 5.26, "learning_rate": 9.65061135720807e-06, "loss": 0.2684, "step": 6725 }, { "epoch": 5.26, "learning_rate": 9.648080597392241e-06, "loss": 0.1578, "step": 6726 }, { "epoch": 5.26, "learning_rate": 9.645549860143704e-06, "loss": 0.2613, "step": 6727 }, { "epoch": 5.26, "learning_rate": 9.64301914562474e-06, "loss": 0.1704, "step": 6728 }, { "epoch": 5.26, "learning_rate": 9.640488453997634e-06, "loss": 0.1561, "step": 6729 }, { "epoch": 5.26, "learning_rate": 9.637957785424679e-06, "loss": 0.2093, "step": 6730 }, { "epoch": 5.26, "learning_rate": 9.635427140068148e-06, "loss": 0.3351, "step": 6731 }, { "epoch": 5.26, "learning_rate": 9.632896518090328e-06, "loss": 0.2873, "step": 6732 }, { "epoch": 5.26, "learning_rate": 9.6303659196535e-06, "loss": 0.1535, "step": 6733 }, { "epoch": 5.26, "learning_rate": 9.627835344919934e-06, "loss": 0.2099, "step": 6734 }, { "epoch": 5.26, "learning_rate": 9.625304794051909e-06, "loss": 0.1751, "step": 6735 }, { "epoch": 5.26, "learning_rate": 9.622774267211706e-06, "loss": 0.1696, "step": 6736 }, { "epoch": 5.27, "learning_rate": 9.62024376456159e-06, "loss": 0.184, "step": 6737 }, { "epoch": 5.27, "learning_rate": 9.617713286263841e-06, "loss": 0.2901, "step": 6738 }, { "epoch": 5.27, "learning_rate": 9.615182832480723e-06, "loss": 0.1839, "step": 6739 }, { "epoch": 5.27, "learning_rate": 9.612652403374503e-06, "loss": 0.1709, "step": 6740 }, { "epoch": 5.27, "learning_rate": 9.610121999107456e-06, "loss": 0.122, "step": 6741 }, { "epoch": 5.27, "learning_rate": 9.607591619841841e-06, "loss": 0.2818, "step": 6742 }, { "epoch": 5.27, "learning_rate": 9.605061265739923e-06, "loss": 0.2335, "step": 6743 }, { "epoch": 5.27, "learning_rate": 9.60253093696397e-06, "loss": 0.2539, "step": 6744 }, { "epoch": 5.27, "learning_rate": 9.600000633676235e-06, "loss": 0.2133, "step": 6745 }, { "epoch": 5.27, "learning_rate": 9.597470356038977e-06, "loss": 0.2085, "step": 6746 }, { "epoch": 5.27, "learning_rate": 9.59494010421446e-06, "loss": 0.2409, "step": 6747 }, { "epoch": 5.27, "learning_rate": 9.592409878364931e-06, "loss": 0.2727, "step": 6748 }, { "epoch": 5.27, "learning_rate": 9.589879678652652e-06, "loss": 0.1318, "step": 6749 }, { "epoch": 5.28, "learning_rate": 9.587349505239875e-06, "loss": 0.2049, "step": 6750 }, { "epoch": 5.28, "learning_rate": 9.584819358288845e-06, "loss": 0.2112, "step": 6751 }, { "epoch": 5.28, "learning_rate": 9.582289237961814e-06, "loss": 0.1991, "step": 6752 }, { "epoch": 5.28, "learning_rate": 9.579759144421029e-06, "loss": 0.2595, "step": 6753 }, { "epoch": 5.28, "learning_rate": 9.577229077828734e-06, "loss": 0.1896, "step": 6754 }, { "epoch": 5.28, "learning_rate": 9.574699038347178e-06, "loss": 0.2994, "step": 6755 }, { "epoch": 5.28, "learning_rate": 9.572169026138598e-06, "loss": 0.2287, "step": 6756 }, { "epoch": 5.28, "learning_rate": 9.569639041365236e-06, "loss": 0.1776, "step": 6757 }, { "epoch": 5.28, "learning_rate": 9.567109084189327e-06, "loss": 0.175, "step": 6758 }, { "epoch": 5.28, "learning_rate": 9.564579154773115e-06, "loss": 0.1449, "step": 6759 }, { "epoch": 5.28, "learning_rate": 9.562049253278828e-06, "loss": 0.2904, "step": 6760 }, { "epoch": 5.28, "learning_rate": 9.559519379868707e-06, "loss": 0.2747, "step": 6761 }, { "epoch": 5.28, "learning_rate": 9.556989534704978e-06, "loss": 0.2359, "step": 6762 }, { "epoch": 5.29, "learning_rate": 9.554459717949867e-06, "loss": 0.2801, "step": 6763 }, { "epoch": 5.29, "learning_rate": 9.551929929765608e-06, "loss": 0.228, "step": 6764 }, { "epoch": 5.29, "learning_rate": 9.549400170314428e-06, "loss": 0.2395, "step": 6765 }, { "epoch": 5.29, "learning_rate": 9.546870439758545e-06, "loss": 0.1842, "step": 6766 }, { "epoch": 5.29, "learning_rate": 9.544340738260189e-06, "loss": 0.1532, "step": 6767 }, { "epoch": 5.29, "learning_rate": 9.541811065981574e-06, "loss": 0.1471, "step": 6768 }, { "epoch": 5.29, "learning_rate": 9.539281423084917e-06, "loss": 0.1534, "step": 6769 }, { "epoch": 5.29, "learning_rate": 9.536751809732441e-06, "loss": 0.2525, "step": 6770 }, { "epoch": 5.29, "learning_rate": 9.534222226086358e-06, "loss": 0.225, "step": 6771 }, { "epoch": 5.29, "learning_rate": 9.531692672308884e-06, "loss": 0.2671, "step": 6772 }, { "epoch": 5.29, "learning_rate": 9.52916314856222e-06, "loss": 0.2366, "step": 6773 }, { "epoch": 5.29, "learning_rate": 9.526633655008586e-06, "loss": 0.1655, "step": 6774 }, { "epoch": 5.3, "learning_rate": 9.524104191810182e-06, "loss": 0.1676, "step": 6775 }, { "epoch": 5.3, "learning_rate": 9.521574759129217e-06, "loss": 0.1886, "step": 6776 }, { "epoch": 5.3, "learning_rate": 9.519045357127893e-06, "loss": 0.2726, "step": 6777 }, { "epoch": 5.3, "learning_rate": 9.516515985968415e-06, "loss": 0.1531, "step": 6778 }, { "epoch": 5.3, "learning_rate": 9.513986645812973e-06, "loss": 0.2227, "step": 6779 }, { "epoch": 5.3, "learning_rate": 9.511457336823772e-06, "loss": 0.2494, "step": 6780 }, { "epoch": 5.3, "learning_rate": 9.508928059163003e-06, "loss": 0.1919, "step": 6781 }, { "epoch": 5.3, "learning_rate": 9.506398812992864e-06, "loss": 0.1977, "step": 6782 }, { "epoch": 5.3, "learning_rate": 9.50386959847554e-06, "loss": 0.3095, "step": 6783 }, { "epoch": 5.3, "learning_rate": 9.501340415773226e-06, "loss": 0.154, "step": 6784 }, { "epoch": 5.3, "learning_rate": 9.498811265048108e-06, "loss": 0.1965, "step": 6785 }, { "epoch": 5.3, "learning_rate": 9.496282146462368e-06, "loss": 0.2155, "step": 6786 }, { "epoch": 5.3, "learning_rate": 9.49375306017819e-06, "loss": 0.2926, "step": 6787 }, { "epoch": 5.31, "learning_rate": 9.491224006357755e-06, "loss": 0.2038, "step": 6788 }, { "epoch": 5.31, "learning_rate": 9.488694985163241e-06, "loss": 0.1005, "step": 6789 }, { "epoch": 5.31, "learning_rate": 9.486165996756832e-06, "loss": 0.1145, "step": 6790 }, { "epoch": 5.31, "learning_rate": 9.483637041300694e-06, "loss": 0.2247, "step": 6791 }, { "epoch": 5.31, "learning_rate": 9.481108118957e-06, "loss": 0.2209, "step": 6792 }, { "epoch": 5.31, "learning_rate": 9.478579229887927e-06, "loss": 0.2805, "step": 6793 }, { "epoch": 5.31, "learning_rate": 9.476050374255638e-06, "loss": 0.2172, "step": 6794 }, { "epoch": 5.31, "learning_rate": 9.473521552222304e-06, "loss": 0.2507, "step": 6795 }, { "epoch": 5.31, "learning_rate": 9.470992763950078e-06, "loss": 0.2216, "step": 6796 }, { "epoch": 5.31, "learning_rate": 9.468464009601134e-06, "loss": 0.2381, "step": 6797 }, { "epoch": 5.31, "learning_rate": 9.465935289337625e-06, "loss": 0.1579, "step": 6798 }, { "epoch": 5.31, "learning_rate": 9.46340660332171e-06, "loss": 0.1719, "step": 6799 }, { "epoch": 5.31, "learning_rate": 9.460877951715545e-06, "loss": 0.3387, "step": 6800 }, { "epoch": 5.32, "learning_rate": 9.458349334681287e-06, "loss": 0.2389, "step": 6801 }, { "epoch": 5.32, "learning_rate": 9.45582075238108e-06, "loss": 0.1807, "step": 6802 }, { "epoch": 5.32, "learning_rate": 9.453292204977072e-06, "loss": 0.0894, "step": 6803 }, { "epoch": 5.32, "learning_rate": 9.450763692631414e-06, "loss": 0.2318, "step": 6804 }, { "epoch": 5.32, "learning_rate": 9.44823521550625e-06, "loss": 0.2125, "step": 6805 }, { "epoch": 5.32, "learning_rate": 9.445706773763718e-06, "loss": 0.1949, "step": 6806 }, { "epoch": 5.32, "learning_rate": 9.443178367565968e-06, "loss": 0.1959, "step": 6807 }, { "epoch": 5.32, "learning_rate": 9.440649997075123e-06, "loss": 0.2083, "step": 6808 }, { "epoch": 5.32, "learning_rate": 9.438121662453327e-06, "loss": 0.2916, "step": 6809 }, { "epoch": 5.32, "learning_rate": 9.435593363862707e-06, "loss": 0.2723, "step": 6810 }, { "epoch": 5.32, "learning_rate": 9.433065101465401e-06, "loss": 0.2154, "step": 6811 }, { "epoch": 5.32, "learning_rate": 9.430536875423534e-06, "loss": 0.3051, "step": 6812 }, { "epoch": 5.32, "learning_rate": 9.428008685899226e-06, "loss": 0.2294, "step": 6813 }, { "epoch": 5.33, "learning_rate": 9.425480533054607e-06, "loss": 0.2307, "step": 6814 }, { "epoch": 5.33, "learning_rate": 9.422952417051798e-06, "loss": 0.2405, "step": 6815 }, { "epoch": 5.33, "learning_rate": 9.420424338052912e-06, "loss": 0.1285, "step": 6816 }, { "epoch": 5.33, "learning_rate": 9.417896296220072e-06, "loss": 0.2686, "step": 6817 }, { "epoch": 5.33, "learning_rate": 9.415368291715393e-06, "loss": 0.2848, "step": 6818 }, { "epoch": 5.33, "learning_rate": 9.412840324700977e-06, "loss": 0.2096, "step": 6819 }, { "epoch": 5.33, "learning_rate": 9.410312395338942e-06, "loss": 0.1703, "step": 6820 }, { "epoch": 5.33, "learning_rate": 9.407784503791388e-06, "loss": 0.2472, "step": 6821 }, { "epoch": 5.33, "learning_rate": 9.405256650220426e-06, "loss": 0.1707, "step": 6822 }, { "epoch": 5.33, "learning_rate": 9.402728834788157e-06, "loss": 0.2724, "step": 6823 }, { "epoch": 5.33, "learning_rate": 9.400201057656679e-06, "loss": 0.1747, "step": 6824 }, { "epoch": 5.33, "learning_rate": 9.397673318988086e-06, "loss": 0.4873, "step": 6825 }, { "epoch": 5.33, "learning_rate": 9.395145618944475e-06, "loss": 0.1287, "step": 6826 }, { "epoch": 5.34, "learning_rate": 9.392617957687938e-06, "loss": 0.2585, "step": 6827 }, { "epoch": 5.34, "learning_rate": 9.390090335380565e-06, "loss": 0.1869, "step": 6828 }, { "epoch": 5.34, "learning_rate": 9.387562752184447e-06, "loss": 0.1814, "step": 6829 }, { "epoch": 5.34, "learning_rate": 9.38503520826166e-06, "loss": 0.3403, "step": 6830 }, { "epoch": 5.34, "learning_rate": 9.382507703774288e-06, "loss": 0.2666, "step": 6831 }, { "epoch": 5.34, "learning_rate": 9.379980238884416e-06, "loss": 0.1918, "step": 6832 }, { "epoch": 5.34, "learning_rate": 9.377452813754115e-06, "loss": 0.2002, "step": 6833 }, { "epoch": 5.34, "learning_rate": 9.374925428545466e-06, "loss": 0.2176, "step": 6834 }, { "epoch": 5.34, "learning_rate": 9.37239808342054e-06, "loss": 0.2712, "step": 6835 }, { "epoch": 5.34, "learning_rate": 9.369870778541395e-06, "loss": 0.2889, "step": 6836 }, { "epoch": 5.34, "learning_rate": 9.367343514070112e-06, "loss": 0.2702, "step": 6837 }, { "epoch": 5.34, "learning_rate": 9.364816290168749e-06, "loss": 0.1952, "step": 6838 }, { "epoch": 5.35, "learning_rate": 9.362289106999364e-06, "loss": 0.3231, "step": 6839 }, { "epoch": 5.35, "learning_rate": 9.359761964724023e-06, "loss": 0.2214, "step": 6840 }, { "epoch": 5.35, "learning_rate": 9.35723486350478e-06, "loss": 0.2461, "step": 6841 }, { "epoch": 5.35, "learning_rate": 9.354707803503685e-06, "loss": 0.2712, "step": 6842 }, { "epoch": 5.35, "learning_rate": 9.352180784882791e-06, "loss": 0.302, "step": 6843 }, { "epoch": 5.35, "learning_rate": 9.349653807804147e-06, "loss": 0.3157, "step": 6844 }, { "epoch": 5.35, "learning_rate": 9.347126872429802e-06, "loss": 0.1831, "step": 6845 }, { "epoch": 5.35, "learning_rate": 9.344599978921795e-06, "loss": 0.272, "step": 6846 }, { "epoch": 5.35, "learning_rate": 9.342073127442168e-06, "loss": 0.164, "step": 6847 }, { "epoch": 5.35, "learning_rate": 9.339546318152953e-06, "loss": 0.2002, "step": 6848 }, { "epoch": 5.35, "learning_rate": 9.337019551216192e-06, "loss": 0.2321, "step": 6849 }, { "epoch": 5.35, "learning_rate": 9.334492826793913e-06, "loss": 0.238, "step": 6850 }, { "epoch": 5.35, "learning_rate": 9.33196614504815e-06, "loss": 0.2379, "step": 6851 }, { "epoch": 5.36, "learning_rate": 9.32943950614093e-06, "loss": 0.1703, "step": 6852 }, { "epoch": 5.36, "learning_rate": 9.32691291023427e-06, "loss": 0.2822, "step": 6853 }, { "epoch": 5.36, "learning_rate": 9.324386357490194e-06, "loss": 0.3461, "step": 6854 }, { "epoch": 5.36, "learning_rate": 9.321859848070724e-06, "loss": 0.2731, "step": 6855 }, { "epoch": 5.36, "learning_rate": 9.319333382137872e-06, "loss": 0.1241, "step": 6856 }, { "epoch": 5.36, "learning_rate": 9.316806959853653e-06, "loss": 0.2985, "step": 6857 }, { "epoch": 5.36, "learning_rate": 9.314280581380081e-06, "loss": 0.2305, "step": 6858 }, { "epoch": 5.36, "learning_rate": 9.311754246879156e-06, "loss": 0.1381, "step": 6859 }, { "epoch": 5.36, "learning_rate": 9.309227956512882e-06, "loss": 0.1827, "step": 6860 }, { "epoch": 5.36, "learning_rate": 9.306701710443267e-06, "loss": 0.3681, "step": 6861 }, { "epoch": 5.36, "learning_rate": 9.304175508832303e-06, "loss": 0.277, "step": 6862 }, { "epoch": 5.36, "learning_rate": 9.301649351841996e-06, "loss": 0.2576, "step": 6863 }, { "epoch": 5.36, "learning_rate": 9.299123239634332e-06, "loss": 0.2281, "step": 6864 }, { "epoch": 5.37, "learning_rate": 9.296597172371296e-06, "loss": 0.2716, "step": 6865 }, { "epoch": 5.37, "learning_rate": 9.294071150214887e-06, "loss": 0.2903, "step": 6866 }, { "epoch": 5.37, "learning_rate": 9.291545173327084e-06, "loss": 0.2592, "step": 6867 }, { "epoch": 5.37, "learning_rate": 9.289019241869864e-06, "loss": 0.1233, "step": 6868 }, { "epoch": 5.37, "learning_rate": 9.286493356005214e-06, "loss": 0.1633, "step": 6869 }, { "epoch": 5.37, "learning_rate": 9.283967515895108e-06, "loss": 0.1301, "step": 6870 }, { "epoch": 5.37, "learning_rate": 9.28144172170151e-06, "loss": 0.3006, "step": 6871 }, { "epoch": 5.37, "learning_rate": 9.278915973586401e-06, "loss": 0.3367, "step": 6872 }, { "epoch": 5.37, "learning_rate": 9.276390271711741e-06, "loss": 0.1152, "step": 6873 }, { "epoch": 5.37, "learning_rate": 9.273864616239498e-06, "loss": 0.244, "step": 6874 }, { "epoch": 5.37, "learning_rate": 9.271339007331633e-06, "loss": 0.2315, "step": 6875 }, { "epoch": 5.37, "learning_rate": 9.2688134451501e-06, "loss": 0.2656, "step": 6876 }, { "epoch": 5.37, "learning_rate": 9.266287929856855e-06, "loss": 0.2088, "step": 6877 }, { "epoch": 5.38, "learning_rate": 9.263762461613853e-06, "loss": 0.3042, "step": 6878 }, { "epoch": 5.38, "learning_rate": 9.261237040583037e-06, "loss": 0.1882, "step": 6879 }, { "epoch": 5.38, "learning_rate": 9.258711666926362e-06, "loss": 0.3245, "step": 6880 }, { "epoch": 5.38, "learning_rate": 9.256186340805764e-06, "loss": 0.1757, "step": 6881 }, { "epoch": 5.38, "learning_rate": 9.253661062383186e-06, "loss": 0.1167, "step": 6882 }, { "epoch": 5.38, "learning_rate": 9.25113583182056e-06, "loss": 0.2181, "step": 6883 }, { "epoch": 5.38, "learning_rate": 9.248610649279825e-06, "loss": 0.1786, "step": 6884 }, { "epoch": 5.38, "learning_rate": 9.246085514922908e-06, "loss": 0.2117, "step": 6885 }, { "epoch": 5.38, "learning_rate": 9.243560428911743e-06, "loss": 0.3455, "step": 6886 }, { "epoch": 5.38, "learning_rate": 9.241035391408246e-06, "loss": 0.2265, "step": 6887 }, { "epoch": 5.38, "learning_rate": 9.238510402574343e-06, "loss": 0.1947, "step": 6888 }, { "epoch": 5.38, "learning_rate": 9.23598546257195e-06, "loss": 0.2835, "step": 6889 }, { "epoch": 5.38, "learning_rate": 9.233460571562984e-06, "loss": 0.2929, "step": 6890 }, { "epoch": 5.39, "learning_rate": 9.230935729709354e-06, "loss": 0.277, "step": 6891 }, { "epoch": 5.39, "learning_rate": 9.228410937172974e-06, "loss": 0.1687, "step": 6892 }, { "epoch": 5.39, "learning_rate": 9.225886194115748e-06, "loss": 0.2401, "step": 6893 }, { "epoch": 5.39, "learning_rate": 9.223361500699571e-06, "loss": 0.2919, "step": 6894 }, { "epoch": 5.39, "learning_rate": 9.220836857086353e-06, "loss": 0.1822, "step": 6895 }, { "epoch": 5.39, "learning_rate": 9.218312263437983e-06, "loss": 0.2545, "step": 6896 }, { "epoch": 5.39, "learning_rate": 9.215787719916357e-06, "loss": 0.1852, "step": 6897 }, { "epoch": 5.39, "learning_rate": 9.213263226683365e-06, "loss": 0.2361, "step": 6898 }, { "epoch": 5.39, "learning_rate": 9.21073878390089e-06, "loss": 0.2311, "step": 6899 }, { "epoch": 5.39, "learning_rate": 9.208214391730818e-06, "loss": 0.2434, "step": 6900 }, { "epoch": 5.39, "learning_rate": 9.205690050335028e-06, "loss": 0.2646, "step": 6901 }, { "epoch": 5.39, "learning_rate": 9.203165759875396e-06, "loss": 0.2997, "step": 6902 }, { "epoch": 5.4, "learning_rate": 9.200641520513803e-06, "loss": 0.2158, "step": 6903 }, { "epoch": 5.4, "learning_rate": 9.198117332412104e-06, "loss": 0.2422, "step": 6904 }, { "epoch": 5.4, "learning_rate": 9.195593195732178e-06, "loss": 0.1384, "step": 6905 }, { "epoch": 5.4, "learning_rate": 9.193069110635883e-06, "loss": 0.2296, "step": 6906 }, { "epoch": 5.4, "learning_rate": 9.190545077285083e-06, "loss": 0.147, "step": 6907 }, { "epoch": 5.4, "learning_rate": 9.18802109584163e-06, "loss": 0.3629, "step": 6908 }, { "epoch": 5.4, "learning_rate": 9.185497166467386e-06, "loss": 0.2282, "step": 6909 }, { "epoch": 5.4, "learning_rate": 9.182973289324194e-06, "loss": 0.2176, "step": 6910 }, { "epoch": 5.4, "learning_rate": 9.180449464573902e-06, "loss": 0.1893, "step": 6911 }, { "epoch": 5.4, "learning_rate": 9.177925692378354e-06, "loss": 0.2061, "step": 6912 }, { "epoch": 5.4, "learning_rate": 9.17540197289939e-06, "loss": 0.1898, "step": 6913 }, { "epoch": 5.4, "learning_rate": 9.172878306298847e-06, "loss": 0.2349, "step": 6914 }, { "epoch": 5.4, "learning_rate": 9.170354692738561e-06, "loss": 0.1811, "step": 6915 }, { "epoch": 5.41, "learning_rate": 9.167831132380359e-06, "loss": 0.1738, "step": 6916 }, { "epoch": 5.41, "learning_rate": 9.165307625386066e-06, "loss": 0.2779, "step": 6917 }, { "epoch": 5.41, "learning_rate": 9.16278417191751e-06, "loss": 0.1913, "step": 6918 }, { "epoch": 5.41, "learning_rate": 9.160260772136508e-06, "loss": 0.2379, "step": 6919 }, { "epoch": 5.41, "learning_rate": 9.157737426204878e-06, "loss": 0.2166, "step": 6920 }, { "epoch": 5.41, "learning_rate": 9.155214134284428e-06, "loss": 0.1543, "step": 6921 }, { "epoch": 5.41, "learning_rate": 9.152690896536974e-06, "loss": 0.3571, "step": 6922 }, { "epoch": 5.41, "learning_rate": 9.150167713124313e-06, "loss": 0.1944, "step": 6923 }, { "epoch": 5.41, "learning_rate": 9.147644584208257e-06, "loss": 0.258, "step": 6924 }, { "epoch": 5.41, "learning_rate": 9.1451215099506e-06, "loss": 0.142, "step": 6925 }, { "epoch": 5.41, "learning_rate": 9.142598490513143e-06, "loss": 0.1238, "step": 6926 }, { "epoch": 5.41, "learning_rate": 9.140075526057668e-06, "loss": 0.1971, "step": 6927 }, { "epoch": 5.41, "learning_rate": 9.13755261674597e-06, "loss": 0.1904, "step": 6928 }, { "epoch": 5.42, "learning_rate": 9.135029762739832e-06, "loss": 0.1591, "step": 6929 }, { "epoch": 5.42, "learning_rate": 9.132506964201037e-06, "loss": 0.2224, "step": 6930 }, { "epoch": 5.42, "learning_rate": 9.129984221291359e-06, "loss": 0.2232, "step": 6931 }, { "epoch": 5.42, "learning_rate": 9.127461534172582e-06, "loss": 0.2273, "step": 6932 }, { "epoch": 5.42, "learning_rate": 9.124938903006462e-06, "loss": 0.1318, "step": 6933 }, { "epoch": 5.42, "learning_rate": 9.122416327954775e-06, "loss": 0.1617, "step": 6934 }, { "epoch": 5.42, "learning_rate": 9.119893809179282e-06, "loss": 0.2505, "step": 6935 }, { "epoch": 5.42, "learning_rate": 9.117371346841744e-06, "loss": 0.1905, "step": 6936 }, { "epoch": 5.42, "learning_rate": 9.11484894110392e-06, "loss": 0.2656, "step": 6937 }, { "epoch": 5.42, "learning_rate": 9.112326592127555e-06, "loss": 0.2192, "step": 6938 }, { "epoch": 5.42, "learning_rate": 9.109804300074405e-06, "loss": 0.1879, "step": 6939 }, { "epoch": 5.42, "learning_rate": 9.10728206510621e-06, "loss": 0.3137, "step": 6940 }, { "epoch": 5.42, "learning_rate": 9.104759887384715e-06, "loss": 0.2894, "step": 6941 }, { "epoch": 5.43, "learning_rate": 9.102237767071658e-06, "loss": 0.2006, "step": 6942 }, { "epoch": 5.43, "learning_rate": 9.099715704328773e-06, "loss": 0.3543, "step": 6943 }, { "epoch": 5.43, "learning_rate": 9.097193699317786e-06, "loss": 0.2451, "step": 6944 }, { "epoch": 5.43, "learning_rate": 9.094671752200431e-06, "loss": 0.3989, "step": 6945 }, { "epoch": 5.43, "learning_rate": 9.092149863138425e-06, "loss": 0.2553, "step": 6946 }, { "epoch": 5.43, "learning_rate": 9.089628032293492e-06, "loss": 0.2918, "step": 6947 }, { "epoch": 5.43, "learning_rate": 9.087106259827346e-06, "loss": 0.2498, "step": 6948 }, { "epoch": 5.43, "learning_rate": 9.0845845459017e-06, "loss": 0.2274, "step": 6949 }, { "epoch": 5.43, "learning_rate": 9.082062890678258e-06, "loss": 0.2548, "step": 6950 }, { "epoch": 5.43, "learning_rate": 9.079541294318728e-06, "loss": 0.1615, "step": 6951 }, { "epoch": 5.43, "learning_rate": 9.07701975698481e-06, "loss": 0.2638, "step": 6952 }, { "epoch": 5.43, "learning_rate": 9.074498278838202e-06, "loss": 0.2411, "step": 6953 }, { "epoch": 5.43, "learning_rate": 9.071976860040595e-06, "loss": 0.3014, "step": 6954 }, { "epoch": 5.44, "learning_rate": 9.069455500753683e-06, "loss": 0.2229, "step": 6955 }, { "epoch": 5.44, "learning_rate": 9.066934201139143e-06, "loss": 0.2854, "step": 6956 }, { "epoch": 5.44, "learning_rate": 9.064412961358663e-06, "loss": 0.1836, "step": 6957 }, { "epoch": 5.44, "learning_rate": 9.061891781573917e-06, "loss": 0.1588, "step": 6958 }, { "epoch": 5.44, "learning_rate": 9.059370661946583e-06, "loss": 0.2848, "step": 6959 }, { "epoch": 5.44, "learning_rate": 9.056849602638333e-06, "loss": 0.1922, "step": 6960 }, { "epoch": 5.44, "learning_rate": 9.054328603810824e-06, "loss": 0.2207, "step": 6961 }, { "epoch": 5.44, "learning_rate": 9.051807665625725e-06, "loss": 0.151, "step": 6962 }, { "epoch": 5.44, "learning_rate": 9.049286788244693e-06, "loss": 0.3197, "step": 6963 }, { "epoch": 5.44, "learning_rate": 9.046765971829381e-06, "loss": 0.2506, "step": 6964 }, { "epoch": 5.44, "learning_rate": 9.044245216541442e-06, "loss": 0.274, "step": 6965 }, { "epoch": 5.44, "learning_rate": 9.041724522542527e-06, "loss": 0.2514, "step": 6966 }, { "epoch": 5.45, "learning_rate": 9.039203889994267e-06, "loss": 0.2591, "step": 6967 }, { "epoch": 5.45, "learning_rate": 9.03668331905831e-06, "loss": 0.1605, "step": 6968 }, { "epoch": 5.45, "learning_rate": 9.03416280989629e-06, "loss": 0.1418, "step": 6969 }, { "epoch": 5.45, "learning_rate": 9.031642362669833e-06, "loss": 0.1889, "step": 6970 }, { "epoch": 5.45, "learning_rate": 9.02912197754057e-06, "loss": 0.2181, "step": 6971 }, { "epoch": 5.45, "learning_rate": 9.026601654670129e-06, "loss": 0.1998, "step": 6972 }, { "epoch": 5.45, "learning_rate": 9.024081394220117e-06, "loss": 0.2379, "step": 6973 }, { "epoch": 5.45, "learning_rate": 9.021561196352158e-06, "loss": 0.2233, "step": 6974 }, { "epoch": 5.45, "learning_rate": 9.019041061227858e-06, "loss": 0.1634, "step": 6975 }, { "epoch": 5.45, "learning_rate": 9.016520989008828e-06, "loss": 0.2453, "step": 6976 }, { "epoch": 5.45, "learning_rate": 9.01400097985667e-06, "loss": 0.2509, "step": 6977 }, { "epoch": 5.45, "learning_rate": 9.011481033932983e-06, "loss": 0.1995, "step": 6978 }, { "epoch": 5.45, "learning_rate": 9.008961151399356e-06, "loss": 0.3378, "step": 6979 }, { "epoch": 5.46, "learning_rate": 9.006441332417388e-06, "loss": 0.2166, "step": 6980 }, { "epoch": 5.46, "learning_rate": 9.00392157714866e-06, "loss": 0.1792, "step": 6981 }, { "epoch": 5.46, "learning_rate": 9.001401885754758e-06, "loss": 0.2555, "step": 6982 }, { "epoch": 5.46, "learning_rate": 8.998882258397262e-06, "loss": 0.1836, "step": 6983 }, { "epoch": 5.46, "learning_rate": 8.996362695237742e-06, "loss": 0.2628, "step": 6984 }, { "epoch": 5.46, "learning_rate": 8.993843196437767e-06, "loss": 0.1752, "step": 6985 }, { "epoch": 5.46, "learning_rate": 8.991323762158907e-06, "loss": 0.199, "step": 6986 }, { "epoch": 5.46, "learning_rate": 8.988804392562724e-06, "loss": 0.3127, "step": 6987 }, { "epoch": 5.46, "learning_rate": 8.986285087810775e-06, "loss": 0.2166, "step": 6988 }, { "epoch": 5.46, "learning_rate": 8.983765848064616e-06, "loss": 0.2247, "step": 6989 }, { "epoch": 5.46, "learning_rate": 8.98124667348579e-06, "loss": 0.2446, "step": 6990 }, { "epoch": 5.46, "learning_rate": 8.97872756423585e-06, "loss": 0.3007, "step": 6991 }, { "epoch": 5.46, "learning_rate": 8.97620852047633e-06, "loss": 0.2549, "step": 6992 }, { "epoch": 5.47, "learning_rate": 8.973689542368772e-06, "loss": 0.3476, "step": 6993 }, { "epoch": 5.47, "learning_rate": 8.97117063007471e-06, "loss": 0.1102, "step": 6994 }, { "epoch": 5.47, "learning_rate": 8.968651783755668e-06, "loss": 0.2732, "step": 6995 }, { "epoch": 5.47, "learning_rate": 8.966133003573172e-06, "loss": 0.2572, "step": 6996 }, { "epoch": 5.47, "learning_rate": 8.963614289688742e-06, "loss": 0.2066, "step": 6997 }, { "epoch": 5.47, "learning_rate": 8.961095642263894e-06, "loss": 0.2494, "step": 6998 }, { "epoch": 5.47, "learning_rate": 8.95857706146014e-06, "loss": 0.1622, "step": 6999 }, { "epoch": 5.47, "learning_rate": 8.956058547438989e-06, "loss": 0.3001, "step": 7000 }, { "epoch": 5.47, "learning_rate": 8.95354010036194e-06, "loss": 0.2224, "step": 7001 }, { "epoch": 5.47, "learning_rate": 8.95102172039049e-06, "loss": 0.2838, "step": 7002 }, { "epoch": 5.47, "learning_rate": 8.948503407686142e-06, "loss": 0.1752, "step": 7003 }, { "epoch": 5.47, "learning_rate": 8.945985162410376e-06, "loss": 0.2466, "step": 7004 }, { "epoch": 5.47, "learning_rate": 8.943466984724687e-06, "loss": 0.2102, "step": 7005 }, { "epoch": 5.48, "learning_rate": 8.940948874790553e-06, "loss": 0.2243, "step": 7006 }, { "epoch": 5.48, "learning_rate": 8.938430832769449e-06, "loss": 0.3072, "step": 7007 }, { "epoch": 5.48, "learning_rate": 8.935912858822846e-06, "loss": 0.2375, "step": 7008 }, { "epoch": 5.48, "learning_rate": 8.933394953112218e-06, "loss": 0.1876, "step": 7009 }, { "epoch": 5.48, "learning_rate": 8.930877115799024e-06, "loss": 0.1677, "step": 7010 }, { "epoch": 5.48, "learning_rate": 8.92835934704473e-06, "loss": 0.1865, "step": 7011 }, { "epoch": 5.48, "learning_rate": 8.925841647010786e-06, "loss": 0.178, "step": 7012 }, { "epoch": 5.48, "learning_rate": 8.923324015858643e-06, "loss": 0.2138, "step": 7013 }, { "epoch": 5.48, "learning_rate": 8.920806453749747e-06, "loss": 0.2506, "step": 7014 }, { "epoch": 5.48, "learning_rate": 8.918288960845542e-06, "loss": 0.3214, "step": 7015 }, { "epoch": 5.48, "learning_rate": 8.915771537307463e-06, "loss": 0.3076, "step": 7016 }, { "epoch": 5.48, "learning_rate": 8.91325418329695e-06, "loss": 0.109, "step": 7017 }, { "epoch": 5.48, "learning_rate": 8.910736898975423e-06, "loss": 0.2241, "step": 7018 }, { "epoch": 5.49, "learning_rate": 8.908219684504309e-06, "loss": 0.1698, "step": 7019 }, { "epoch": 5.49, "learning_rate": 8.905702540045028e-06, "loss": 0.1814, "step": 7020 }, { "epoch": 5.49, "learning_rate": 8.903185465758996e-06, "loss": 0.2845, "step": 7021 }, { "epoch": 5.49, "learning_rate": 8.900668461807623e-06, "loss": 0.224, "step": 7022 }, { "epoch": 5.49, "learning_rate": 8.898151528352318e-06, "loss": 0.1608, "step": 7023 }, { "epoch": 5.49, "learning_rate": 8.895634665554476e-06, "loss": 0.2193, "step": 7024 }, { "epoch": 5.49, "learning_rate": 8.893117873575498e-06, "loss": 0.1393, "step": 7025 }, { "epoch": 5.49, "learning_rate": 8.890601152576777e-06, "loss": 0.418, "step": 7026 }, { "epoch": 5.49, "learning_rate": 8.8880845027197e-06, "loss": 0.3245, "step": 7027 }, { "epoch": 5.49, "learning_rate": 8.885567924165653e-06, "loss": 0.2173, "step": 7028 }, { "epoch": 5.49, "learning_rate": 8.883051417076014e-06, "loss": 0.2389, "step": 7029 }, { "epoch": 5.49, "learning_rate": 8.880534981612153e-06, "loss": 0.1258, "step": 7030 }, { "epoch": 5.5, "learning_rate": 8.878018617935442e-06, "loss": 0.1939, "step": 7031 }, { "epoch": 5.5, "learning_rate": 8.875502326207249e-06, "loss": 0.2859, "step": 7032 }, { "epoch": 5.5, "learning_rate": 8.87298610658893e-06, "loss": 0.192, "step": 7033 }, { "epoch": 5.5, "learning_rate": 8.870469959241847e-06, "loss": 0.2688, "step": 7034 }, { "epoch": 5.5, "learning_rate": 8.867953884327346e-06, "loss": 0.3044, "step": 7035 }, { "epoch": 5.5, "learning_rate": 8.865437882006774e-06, "loss": 0.2663, "step": 7036 }, { "epoch": 5.5, "learning_rate": 8.862921952441473e-06, "loss": 0.3373, "step": 7037 }, { "epoch": 5.5, "learning_rate": 8.860406095792781e-06, "loss": 0.1626, "step": 7038 }, { "epoch": 5.5, "learning_rate": 8.857890312222031e-06, "loss": 0.1824, "step": 7039 }, { "epoch": 5.5, "learning_rate": 8.855374601890556e-06, "loss": 0.192, "step": 7040 }, { "epoch": 5.5, "learning_rate": 8.852858964959669e-06, "loss": 0.2743, "step": 7041 }, { "epoch": 5.5, "learning_rate": 8.850343401590692e-06, "loss": 0.2146, "step": 7042 }, { "epoch": 5.5, "learning_rate": 8.847827911944942e-06, "loss": 0.2769, "step": 7043 }, { "epoch": 5.51, "learning_rate": 8.845312496183726e-06, "loss": 0.3162, "step": 7044 }, { "epoch": 5.51, "learning_rate": 8.842797154468348e-06, "loss": 0.2086, "step": 7045 }, { "epoch": 5.51, "learning_rate": 8.84028188696011e-06, "loss": 0.4422, "step": 7046 }, { "epoch": 5.51, "learning_rate": 8.837766693820304e-06, "loss": 0.2275, "step": 7047 }, { "epoch": 5.51, "learning_rate": 8.83525157521022e-06, "loss": 0.3234, "step": 7048 }, { "epoch": 5.51, "learning_rate": 8.832736531291144e-06, "loss": 0.2218, "step": 7049 }, { "epoch": 5.51, "learning_rate": 8.830221562224358e-06, "loss": 0.2624, "step": 7050 }, { "epoch": 5.51, "learning_rate": 8.827706668171137e-06, "loss": 0.2069, "step": 7051 }, { "epoch": 5.51, "learning_rate": 8.825191849292748e-06, "loss": 0.1751, "step": 7052 }, { "epoch": 5.51, "learning_rate": 8.82267710575046e-06, "loss": 0.1838, "step": 7053 }, { "epoch": 5.51, "learning_rate": 8.820162437705534e-06, "loss": 0.2484, "step": 7054 }, { "epoch": 5.51, "learning_rate": 8.81764784531923e-06, "loss": 0.3083, "step": 7055 }, { "epoch": 5.51, "learning_rate": 8.815133328752792e-06, "loss": 0.1876, "step": 7056 }, { "epoch": 5.52, "learning_rate": 8.81261888816748e-06, "loss": 0.3272, "step": 7057 }, { "epoch": 5.52, "learning_rate": 8.810104523724518e-06, "loss": 0.1936, "step": 7058 }, { "epoch": 5.52, "learning_rate": 8.807590235585155e-06, "loss": 0.2009, "step": 7059 }, { "epoch": 5.52, "learning_rate": 8.805076023910614e-06, "loss": 0.2458, "step": 7060 }, { "epoch": 5.52, "learning_rate": 8.802561888862134e-06, "loss": 0.2339, "step": 7061 }, { "epoch": 5.52, "learning_rate": 8.800047830600926e-06, "loss": 0.2565, "step": 7062 }, { "epoch": 5.52, "learning_rate": 8.797533849288217e-06, "loss": 0.295, "step": 7063 }, { "epoch": 5.52, "learning_rate": 8.795019945085212e-06, "loss": 0.311, "step": 7064 }, { "epoch": 5.52, "learning_rate": 8.792506118153121e-06, "loss": 0.2644, "step": 7065 }, { "epoch": 5.52, "learning_rate": 8.789992368653144e-06, "loss": 0.1982, "step": 7066 }, { "epoch": 5.52, "learning_rate": 8.787478696746483e-06, "loss": 0.2521, "step": 7067 }, { "epoch": 5.52, "learning_rate": 8.784965102594329e-06, "loss": 0.2294, "step": 7068 }, { "epoch": 5.52, "learning_rate": 8.782451586357867e-06, "loss": 0.2734, "step": 7069 }, { "epoch": 5.53, "learning_rate": 8.77993814819828e-06, "loss": 0.162, "step": 7070 }, { "epoch": 5.53, "learning_rate": 8.777424788276746e-06, "loss": 0.2751, "step": 7071 }, { "epoch": 5.53, "learning_rate": 8.774911506754442e-06, "loss": 0.3593, "step": 7072 }, { "epoch": 5.53, "learning_rate": 8.77239830379253e-06, "loss": 0.2257, "step": 7073 }, { "epoch": 5.53, "learning_rate": 8.769885179552175e-06, "loss": 0.2336, "step": 7074 }, { "epoch": 5.53, "learning_rate": 8.767372134194529e-06, "loss": 0.2473, "step": 7075 }, { "epoch": 5.53, "learning_rate": 8.764859167880752e-06, "loss": 0.2453, "step": 7076 }, { "epoch": 5.53, "learning_rate": 8.762346280771984e-06, "loss": 0.1451, "step": 7077 }, { "epoch": 5.53, "learning_rate": 8.759833473029374e-06, "loss": 0.1826, "step": 7078 }, { "epoch": 5.53, "learning_rate": 8.757320744814054e-06, "loss": 0.2158, "step": 7079 }, { "epoch": 5.53, "learning_rate": 8.754808096287165e-06, "loss": 0.1609, "step": 7080 }, { "epoch": 5.53, "learning_rate": 8.75229552760982e-06, "loss": 0.1851, "step": 7081 }, { "epoch": 5.53, "learning_rate": 8.74978303894315e-06, "loss": 0.1861, "step": 7082 }, { "epoch": 5.54, "learning_rate": 8.747270630448267e-06, "loss": 0.2888, "step": 7083 }, { "epoch": 5.54, "learning_rate": 8.744758302286287e-06, "loss": 0.2265, "step": 7084 }, { "epoch": 5.54, "learning_rate": 8.742246054618316e-06, "loss": 0.2555, "step": 7085 }, { "epoch": 5.54, "learning_rate": 8.73973388760545e-06, "loss": 0.2691, "step": 7086 }, { "epoch": 5.54, "learning_rate": 8.737221801408785e-06, "loss": 0.1304, "step": 7087 }, { "epoch": 5.54, "learning_rate": 8.73470979618942e-06, "loss": 0.1517, "step": 7088 }, { "epoch": 5.54, "learning_rate": 8.732197872108432e-06, "loss": 0.2022, "step": 7089 }, { "epoch": 5.54, "learning_rate": 8.729686029326907e-06, "loss": 0.2828, "step": 7090 }, { "epoch": 5.54, "learning_rate": 8.72717426800592e-06, "loss": 0.3254, "step": 7091 }, { "epoch": 5.54, "learning_rate": 8.724662588306536e-06, "loss": 0.177, "step": 7092 }, { "epoch": 5.54, "learning_rate": 8.722150990389824e-06, "loss": 0.2435, "step": 7093 }, { "epoch": 5.54, "learning_rate": 8.71963947441684e-06, "loss": 0.3183, "step": 7094 }, { "epoch": 5.55, "learning_rate": 8.717128040548643e-06, "loss": 0.3085, "step": 7095 }, { "epoch": 5.55, "learning_rate": 8.714616688946277e-06, "loss": 0.2262, "step": 7096 }, { "epoch": 5.55, "learning_rate": 8.712105419770795e-06, "loss": 0.3259, "step": 7097 }, { "epoch": 5.55, "learning_rate": 8.709594233183221e-06, "loss": 0.22, "step": 7098 }, { "epoch": 5.55, "learning_rate": 8.707083129344598e-06, "loss": 0.2638, "step": 7099 }, { "epoch": 5.55, "learning_rate": 8.70457210841595e-06, "loss": 0.3413, "step": 7100 }, { "epoch": 5.55, "learning_rate": 8.702061170558303e-06, "loss": 0.198, "step": 7101 }, { "epoch": 5.55, "learning_rate": 8.699550315932671e-06, "loss": 0.1392, "step": 7102 }, { "epoch": 5.55, "learning_rate": 8.697039544700071e-06, "loss": 0.3176, "step": 7103 }, { "epoch": 5.55, "learning_rate": 8.6945288570215e-06, "loss": 0.2366, "step": 7104 }, { "epoch": 5.55, "learning_rate": 8.692018253057966e-06, "loss": 0.212, "step": 7105 }, { "epoch": 5.55, "learning_rate": 8.689507732970462e-06, "loss": 0.2387, "step": 7106 }, { "epoch": 5.55, "learning_rate": 8.686997296919981e-06, "loss": 0.258, "step": 7107 }, { "epoch": 5.56, "learning_rate": 8.68448694506751e-06, "loss": 0.1409, "step": 7108 }, { "epoch": 5.56, "learning_rate": 8.681976677574023e-06, "loss": 0.2469, "step": 7109 }, { "epoch": 5.56, "learning_rate": 8.679466494600494e-06, "loss": 0.2803, "step": 7110 }, { "epoch": 5.56, "learning_rate": 8.676956396307896e-06, "loss": 0.2149, "step": 7111 }, { "epoch": 5.56, "learning_rate": 8.674446382857188e-06, "loss": 0.2268, "step": 7112 }, { "epoch": 5.56, "learning_rate": 8.671936454409331e-06, "loss": 0.1602, "step": 7113 }, { "epoch": 5.56, "learning_rate": 8.669426611125283e-06, "loss": 0.2342, "step": 7114 }, { "epoch": 5.56, "learning_rate": 8.666916853165976e-06, "loss": 0.1861, "step": 7115 }, { "epoch": 5.56, "learning_rate": 8.664407180692365e-06, "loss": 0.2705, "step": 7116 }, { "epoch": 5.56, "learning_rate": 8.66189759386538e-06, "loss": 0.2396, "step": 7117 }, { "epoch": 5.56, "learning_rate": 8.65938809284595e-06, "loss": 0.2658, "step": 7118 }, { "epoch": 5.56, "learning_rate": 8.656878677795007e-06, "loss": 0.2547, "step": 7119 }, { "epoch": 5.56, "learning_rate": 8.654369348873465e-06, "loss": 0.3282, "step": 7120 }, { "epoch": 5.57, "learning_rate": 8.651860106242238e-06, "loss": 0.1727, "step": 7121 }, { "epoch": 5.57, "learning_rate": 8.649350950062237e-06, "loss": 0.2373, "step": 7122 }, { "epoch": 5.57, "learning_rate": 8.646841880494361e-06, "loss": 0.1464, "step": 7123 }, { "epoch": 5.57, "learning_rate": 8.644332897699513e-06, "loss": 0.1638, "step": 7124 }, { "epoch": 5.57, "learning_rate": 8.641824001838583e-06, "loss": 0.4036, "step": 7125 }, { "epoch": 5.57, "learning_rate": 8.639315193072453e-06, "loss": 0.2795, "step": 7126 }, { "epoch": 5.57, "learning_rate": 8.636806471562007e-06, "loss": 0.1397, "step": 7127 }, { "epoch": 5.57, "learning_rate": 8.634297837468119e-06, "loss": 0.2732, "step": 7128 }, { "epoch": 5.57, "learning_rate": 8.631789290951658e-06, "loss": 0.2303, "step": 7129 }, { "epoch": 5.57, "learning_rate": 8.62928083217349e-06, "loss": 0.2096, "step": 7130 }, { "epoch": 5.57, "learning_rate": 8.626772461294475e-06, "loss": 0.2377, "step": 7131 }, { "epoch": 5.57, "learning_rate": 8.624264178475459e-06, "loss": 0.2848, "step": 7132 }, { "epoch": 5.57, "learning_rate": 8.621755983877293e-06, "loss": 0.2793, "step": 7133 }, { "epoch": 5.58, "learning_rate": 8.619247877660817e-06, "loss": 0.2839, "step": 7134 }, { "epoch": 5.58, "learning_rate": 8.616739859986868e-06, "loss": 0.1996, "step": 7135 }, { "epoch": 5.58, "learning_rate": 8.614231931016275e-06, "loss": 0.2181, "step": 7136 }, { "epoch": 5.58, "learning_rate": 8.611724090909866e-06, "loss": 0.1671, "step": 7137 }, { "epoch": 5.58, "learning_rate": 8.609216339828452e-06, "loss": 0.153, "step": 7138 }, { "epoch": 5.58, "learning_rate": 8.606708677932849e-06, "loss": 0.2308, "step": 7139 }, { "epoch": 5.58, "learning_rate": 8.604201105383866e-06, "loss": 0.1998, "step": 7140 }, { "epoch": 5.58, "learning_rate": 8.601693622342301e-06, "loss": 0.2444, "step": 7141 }, { "epoch": 5.58, "learning_rate": 8.599186228968956e-06, "loss": 0.2389, "step": 7142 }, { "epoch": 5.58, "learning_rate": 8.596678925424615e-06, "loss": 0.1429, "step": 7143 }, { "epoch": 5.58, "learning_rate": 8.594171711870062e-06, "loss": 0.1804, "step": 7144 }, { "epoch": 5.58, "learning_rate": 8.591664588466078e-06, "loss": 0.2848, "step": 7145 }, { "epoch": 5.58, "learning_rate": 8.589157555373435e-06, "loss": 0.1649, "step": 7146 }, { "epoch": 5.59, "learning_rate": 8.586650612752897e-06, "loss": 0.2111, "step": 7147 }, { "epoch": 5.59, "learning_rate": 8.58414376076523e-06, "loss": 0.2495, "step": 7148 }, { "epoch": 5.59, "learning_rate": 8.581636999571188e-06, "loss": 0.2376, "step": 7149 }, { "epoch": 5.59, "learning_rate": 8.579130329331513e-06, "loss": 0.1883, "step": 7150 }, { "epoch": 5.59, "learning_rate": 8.576623750206959e-06, "loss": 0.1354, "step": 7151 }, { "epoch": 5.59, "learning_rate": 8.574117262358255e-06, "loss": 0.3166, "step": 7152 }, { "epoch": 5.59, "learning_rate": 8.571610865946141e-06, "loss": 0.2306, "step": 7153 }, { "epoch": 5.59, "learning_rate": 8.569104561131338e-06, "loss": 0.1843, "step": 7154 }, { "epoch": 5.59, "learning_rate": 8.566598348074566e-06, "loss": 0.2737, "step": 7155 }, { "epoch": 5.59, "learning_rate": 8.564092226936537e-06, "loss": 0.2172, "step": 7156 }, { "epoch": 5.59, "learning_rate": 8.561586197877965e-06, "loss": 0.2898, "step": 7157 }, { "epoch": 5.59, "learning_rate": 8.559080261059547e-06, "loss": 0.3334, "step": 7158 }, { "epoch": 5.6, "learning_rate": 8.556574416641986e-06, "loss": 0.1161, "step": 7159 }, { "epoch": 5.6, "learning_rate": 8.554068664785966e-06, "loss": 0.2552, "step": 7160 }, { "epoch": 5.6, "learning_rate": 8.551563005652175e-06, "loss": 0.1625, "step": 7161 }, { "epoch": 5.6, "learning_rate": 8.549057439401289e-06, "loss": 0.278, "step": 7162 }, { "epoch": 5.6, "learning_rate": 8.546551966193982e-06, "loss": 0.2238, "step": 7163 }, { "epoch": 5.6, "learning_rate": 8.544046586190922e-06, "loss": 0.2428, "step": 7164 }, { "epoch": 5.6, "learning_rate": 8.541541299552771e-06, "loss": 0.3103, "step": 7165 }, { "epoch": 5.6, "learning_rate": 8.53903610644018e-06, "loss": 0.2175, "step": 7166 }, { "epoch": 5.6, "learning_rate": 8.536531007013799e-06, "loss": 0.2381, "step": 7167 }, { "epoch": 5.6, "learning_rate": 8.534026001434272e-06, "loss": 0.1504, "step": 7168 }, { "epoch": 5.6, "learning_rate": 8.531521089862234e-06, "loss": 0.2205, "step": 7169 }, { "epoch": 5.6, "learning_rate": 8.529016272458315e-06, "loss": 0.1863, "step": 7170 }, { "epoch": 5.6, "learning_rate": 8.526511549383146e-06, "loss": 0.2835, "step": 7171 }, { "epoch": 5.61, "learning_rate": 8.52400692079734e-06, "loss": 0.2934, "step": 7172 }, { "epoch": 5.61, "learning_rate": 8.521502386861506e-06, "loss": 0.2406, "step": 7173 }, { "epoch": 5.61, "learning_rate": 8.518997947736258e-06, "loss": 0.4315, "step": 7174 }, { "epoch": 5.61, "learning_rate": 8.516493603582195e-06, "loss": 0.282, "step": 7175 }, { "epoch": 5.61, "learning_rate": 8.513989354559906e-06, "loss": 0.226, "step": 7176 }, { "epoch": 5.61, "learning_rate": 8.511485200829988e-06, "loss": 0.3652, "step": 7177 }, { "epoch": 5.61, "learning_rate": 8.508981142553015e-06, "loss": 0.2191, "step": 7178 }, { "epoch": 5.61, "learning_rate": 8.506477179889565e-06, "loss": 0.311, "step": 7179 }, { "epoch": 5.61, "learning_rate": 8.50397331300021e-06, "loss": 0.2964, "step": 7180 }, { "epoch": 5.61, "learning_rate": 8.501469542045512e-06, "loss": 0.3035, "step": 7181 }, { "epoch": 5.61, "learning_rate": 8.498965867186035e-06, "loss": 0.2844, "step": 7182 }, { "epoch": 5.61, "learning_rate": 8.496462288582318e-06, "loss": 0.2758, "step": 7183 }, { "epoch": 5.61, "learning_rate": 8.493958806394916e-06, "loss": 0.2384, "step": 7184 }, { "epoch": 5.62, "learning_rate": 8.49145542078436e-06, "loss": 0.1955, "step": 7185 }, { "epoch": 5.62, "learning_rate": 8.488952131911194e-06, "loss": 0.2624, "step": 7186 }, { "epoch": 5.62, "learning_rate": 8.486448939935933e-06, "loss": 0.2094, "step": 7187 }, { "epoch": 5.62, "learning_rate": 8.483945845019108e-06, "loss": 0.3347, "step": 7188 }, { "epoch": 5.62, "learning_rate": 8.481442847321228e-06, "loss": 0.2453, "step": 7189 }, { "epoch": 5.62, "learning_rate": 8.478939947002799e-06, "loss": 0.2169, "step": 7190 }, { "epoch": 5.62, "learning_rate": 8.476437144224324e-06, "loss": 0.2149, "step": 7191 }, { "epoch": 5.62, "learning_rate": 8.4739344391463e-06, "loss": 0.2168, "step": 7192 }, { "epoch": 5.62, "learning_rate": 8.471431831929213e-06, "loss": 0.1803, "step": 7193 }, { "epoch": 5.62, "learning_rate": 8.468929322733555e-06, "loss": 0.1411, "step": 7194 }, { "epoch": 5.62, "learning_rate": 8.466426911719792e-06, "loss": 0.2361, "step": 7195 }, { "epoch": 5.62, "learning_rate": 8.463924599048396e-06, "loss": 0.334, "step": 7196 }, { "epoch": 5.62, "learning_rate": 8.461422384879838e-06, "loss": 0.1813, "step": 7197 }, { "epoch": 5.63, "learning_rate": 8.458920269374571e-06, "loss": 0.2666, "step": 7198 }, { "epoch": 5.63, "learning_rate": 8.456418252693047e-06, "loss": 0.1753, "step": 7199 }, { "epoch": 5.63, "learning_rate": 8.453916334995707e-06, "loss": 0.3405, "step": 7200 }, { "epoch": 5.63, "learning_rate": 8.451414516442998e-06, "loss": 0.3035, "step": 7201 }, { "epoch": 5.63, "learning_rate": 8.448912797195344e-06, "loss": 0.2455, "step": 7202 }, { "epoch": 5.63, "learning_rate": 8.446411177413176e-06, "loss": 0.1871, "step": 7203 }, { "epoch": 5.63, "learning_rate": 8.443909657256911e-06, "loss": 0.2378, "step": 7204 }, { "epoch": 5.63, "learning_rate": 8.44140823688697e-06, "loss": 0.3562, "step": 7205 }, { "epoch": 5.63, "learning_rate": 8.43890691646375e-06, "loss": 0.2027, "step": 7206 }, { "epoch": 5.63, "learning_rate": 8.436405696147655e-06, "loss": 0.2333, "step": 7207 }, { "epoch": 5.63, "learning_rate": 8.433904576099076e-06, "loss": 0.1366, "step": 7208 }, { "epoch": 5.63, "learning_rate": 8.431403556478405e-06, "loss": 0.2095, "step": 7209 }, { "epoch": 5.64, "learning_rate": 8.428902637446022e-06, "loss": 0.3775, "step": 7210 }, { "epoch": 5.64, "learning_rate": 8.426401819162306e-06, "loss": 0.2836, "step": 7211 }, { "epoch": 5.64, "learning_rate": 8.423901101787616e-06, "loss": 0.1683, "step": 7212 }, { "epoch": 5.64, "learning_rate": 8.42140048548232e-06, "loss": 0.2517, "step": 7213 }, { "epoch": 5.64, "learning_rate": 8.41889997040677e-06, "loss": 0.1426, "step": 7214 }, { "epoch": 5.64, "learning_rate": 8.416399556721316e-06, "loss": 0.2477, "step": 7215 }, { "epoch": 5.64, "learning_rate": 8.413899244586306e-06, "loss": 0.1782, "step": 7216 }, { "epoch": 5.64, "learning_rate": 8.411399034162065e-06, "loss": 0.1398, "step": 7217 }, { "epoch": 5.64, "learning_rate": 8.408898925608929e-06, "loss": 0.2984, "step": 7218 }, { "epoch": 5.64, "learning_rate": 8.40639891908722e-06, "loss": 0.2278, "step": 7219 }, { "epoch": 5.64, "learning_rate": 8.403899014757252e-06, "loss": 0.2866, "step": 7220 }, { "epoch": 5.64, "learning_rate": 8.401399212779338e-06, "loss": 0.2684, "step": 7221 }, { "epoch": 5.64, "learning_rate": 8.398899513313781e-06, "loss": 0.1668, "step": 7222 }, { "epoch": 5.65, "learning_rate": 8.396399916520873e-06, "loss": 0.1546, "step": 7223 }, { "epoch": 5.65, "learning_rate": 8.393900422560908e-06, "loss": 0.1394, "step": 7224 }, { "epoch": 5.65, "learning_rate": 8.391401031594165e-06, "loss": 0.1275, "step": 7225 }, { "epoch": 5.65, "learning_rate": 8.388901743780927e-06, "loss": 0.1591, "step": 7226 }, { "epoch": 5.65, "learning_rate": 8.386402559281461e-06, "loss": 0.2375, "step": 7227 }, { "epoch": 5.65, "learning_rate": 8.38390347825603e-06, "loss": 0.337, "step": 7228 }, { "epoch": 5.65, "learning_rate": 8.38140450086489e-06, "loss": 0.2042, "step": 7229 }, { "epoch": 5.65, "learning_rate": 8.378905627268291e-06, "loss": 0.218, "step": 7230 }, { "epoch": 5.65, "learning_rate": 8.376406857626478e-06, "loss": 0.1686, "step": 7231 }, { "epoch": 5.65, "learning_rate": 8.37390819209969e-06, "loss": 0.1865, "step": 7232 }, { "epoch": 5.65, "learning_rate": 8.371409630848156e-06, "loss": 0.215, "step": 7233 }, { "epoch": 5.65, "learning_rate": 8.368911174032096e-06, "loss": 0.2325, "step": 7234 }, { "epoch": 5.65, "learning_rate": 8.366412821811728e-06, "loss": 0.1415, "step": 7235 }, { "epoch": 5.66, "learning_rate": 8.363914574347263e-06, "loss": 0.2741, "step": 7236 }, { "epoch": 5.66, "learning_rate": 8.361416431798904e-06, "loss": 0.1749, "step": 7237 }, { "epoch": 5.66, "learning_rate": 8.35891839432685e-06, "loss": 0.2805, "step": 7238 }, { "epoch": 5.66, "learning_rate": 8.35642046209129e-06, "loss": 0.2314, "step": 7239 }, { "epoch": 5.66, "learning_rate": 8.353922635252405e-06, "loss": 0.2322, "step": 7240 }, { "epoch": 5.66, "learning_rate": 8.351424913970373e-06, "loss": 0.198, "step": 7241 }, { "epoch": 5.66, "learning_rate": 8.348927298405363e-06, "loss": 0.2586, "step": 7242 }, { "epoch": 5.66, "learning_rate": 8.346429788717535e-06, "loss": 0.2669, "step": 7243 }, { "epoch": 5.66, "learning_rate": 8.343932385067054e-06, "loss": 0.2362, "step": 7244 }, { "epoch": 5.66, "learning_rate": 8.341435087614061e-06, "loss": 0.2713, "step": 7245 }, { "epoch": 5.66, "learning_rate": 8.3389378965187e-06, "loss": 0.2537, "step": 7246 }, { "epoch": 5.66, "learning_rate": 8.336440811941107e-06, "loss": 0.2405, "step": 7247 }, { "epoch": 5.66, "learning_rate": 8.333943834041413e-06, "loss": 0.2081, "step": 7248 }, { "epoch": 5.67, "learning_rate": 8.331446962979735e-06, "loss": 0.151, "step": 7249 }, { "epoch": 5.67, "learning_rate": 8.328950198916195e-06, "loss": 0.1629, "step": 7250 }, { "epoch": 5.67, "learning_rate": 8.326453542010898e-06, "loss": 0.1909, "step": 7251 }, { "epoch": 5.67, "learning_rate": 8.32395699242394e-06, "loss": 0.1709, "step": 7252 }, { "epoch": 5.67, "learning_rate": 8.321460550315426e-06, "loss": 0.313, "step": 7253 }, { "epoch": 5.67, "learning_rate": 8.318964215845433e-06, "loss": 0.1952, "step": 7254 }, { "epoch": 5.67, "learning_rate": 8.31646798917405e-06, "loss": 0.2889, "step": 7255 }, { "epoch": 5.67, "learning_rate": 8.31397187046135e-06, "loss": 0.1764, "step": 7256 }, { "epoch": 5.67, "learning_rate": 8.311475859867392e-06, "loss": 0.1634, "step": 7257 }, { "epoch": 5.67, "learning_rate": 8.308979957552243e-06, "loss": 0.2679, "step": 7258 }, { "epoch": 5.67, "learning_rate": 8.306484163675955e-06, "loss": 0.317, "step": 7259 }, { "epoch": 5.67, "learning_rate": 8.303988478398572e-06, "loss": 0.2766, "step": 7260 }, { "epoch": 5.67, "learning_rate": 8.301492901880136e-06, "loss": 0.2084, "step": 7261 }, { "epoch": 5.68, "learning_rate": 8.298997434280678e-06, "loss": 0.4344, "step": 7262 }, { "epoch": 5.68, "learning_rate": 8.296502075760224e-06, "loss": 0.2601, "step": 7263 }, { "epoch": 5.68, "learning_rate": 8.294006826478785e-06, "loss": 0.1302, "step": 7264 }, { "epoch": 5.68, "learning_rate": 8.291511686596381e-06, "loss": 0.2516, "step": 7265 }, { "epoch": 5.68, "learning_rate": 8.289016656273013e-06, "loss": 0.4072, "step": 7266 }, { "epoch": 5.68, "learning_rate": 8.286521735668677e-06, "loss": 0.1953, "step": 7267 }, { "epoch": 5.68, "learning_rate": 8.284026924943368e-06, "loss": 0.2182, "step": 7268 }, { "epoch": 5.68, "learning_rate": 8.281532224257061e-06, "loss": 0.2627, "step": 7269 }, { "epoch": 5.68, "learning_rate": 8.279037633769737e-06, "loss": 0.2127, "step": 7270 }, { "epoch": 5.68, "learning_rate": 8.276543153641364e-06, "loss": 0.1484, "step": 7271 }, { "epoch": 5.68, "learning_rate": 8.2740487840319e-06, "loss": 0.1645, "step": 7272 }, { "epoch": 5.68, "learning_rate": 8.27155452510131e-06, "loss": 0.1506, "step": 7273 }, { "epoch": 5.69, "learning_rate": 8.26906037700953e-06, "loss": 0.2139, "step": 7274 }, { "epoch": 5.69, "learning_rate": 8.266566339916505e-06, "loss": 0.2422, "step": 7275 }, { "epoch": 5.69, "learning_rate": 8.264072413982172e-06, "loss": 0.242, "step": 7276 }, { "epoch": 5.69, "learning_rate": 8.261578599366448e-06, "loss": 0.2146, "step": 7277 }, { "epoch": 5.69, "learning_rate": 8.259084896229264e-06, "loss": 0.2021, "step": 7278 }, { "epoch": 5.69, "learning_rate": 8.256591304730527e-06, "loss": 0.3208, "step": 7279 }, { "epoch": 5.69, "learning_rate": 8.254097825030138e-06, "loss": 0.1334, "step": 7280 }, { "epoch": 5.69, "learning_rate": 8.251604457287996e-06, "loss": 0.2198, "step": 7281 }, { "epoch": 5.69, "learning_rate": 8.249111201663995e-06, "loss": 0.2815, "step": 7282 }, { "epoch": 5.69, "learning_rate": 8.246618058318013e-06, "loss": 0.2887, "step": 7283 }, { "epoch": 5.69, "learning_rate": 8.244125027409933e-06, "loss": 0.2011, "step": 7284 }, { "epoch": 5.69, "learning_rate": 8.241632109099619e-06, "loss": 0.113, "step": 7285 }, { "epoch": 5.69, "learning_rate": 8.239139303546936e-06, "loss": 0.2649, "step": 7286 }, { "epoch": 5.7, "learning_rate": 8.236646610911731e-06, "loss": 0.1485, "step": 7287 }, { "epoch": 5.7, "learning_rate": 8.234154031353859e-06, "loss": 0.233, "step": 7288 }, { "epoch": 5.7, "learning_rate": 8.231661565033156e-06, "loss": 0.2454, "step": 7289 }, { "epoch": 5.7, "learning_rate": 8.229169212109458e-06, "loss": 0.2768, "step": 7290 }, { "epoch": 5.7, "learning_rate": 8.226676972742587e-06, "loss": 0.1991, "step": 7291 }, { "epoch": 5.7, "learning_rate": 8.224184847092363e-06, "loss": 0.2671, "step": 7292 }, { "epoch": 5.7, "learning_rate": 8.221692835318593e-06, "loss": 0.1968, "step": 7293 }, { "epoch": 5.7, "learning_rate": 8.219200937581087e-06, "loss": 0.2845, "step": 7294 }, { "epoch": 5.7, "learning_rate": 8.216709154039635e-06, "loss": 0.1959, "step": 7295 }, { "epoch": 5.7, "learning_rate": 8.214217484854032e-06, "loss": 0.2548, "step": 7296 }, { "epoch": 5.7, "learning_rate": 8.211725930184054e-06, "loss": 0.1691, "step": 7297 }, { "epoch": 5.7, "learning_rate": 8.209234490189477e-06, "loss": 0.3201, "step": 7298 }, { "epoch": 5.7, "learning_rate": 8.206743165030068e-06, "loss": 0.217, "step": 7299 }, { "epoch": 5.71, "learning_rate": 8.20425195486559e-06, "loss": 0.1681, "step": 7300 }, { "epoch": 5.71, "learning_rate": 8.201760859855787e-06, "loss": 0.2263, "step": 7301 }, { "epoch": 5.71, "learning_rate": 8.199269880160412e-06, "loss": 0.2383, "step": 7302 }, { "epoch": 5.71, "learning_rate": 8.196779015939198e-06, "loss": 0.1459, "step": 7303 }, { "epoch": 5.71, "learning_rate": 8.194288267351873e-06, "loss": 0.2415, "step": 7304 }, { "epoch": 5.71, "learning_rate": 8.191797634558165e-06, "loss": 0.1358, "step": 7305 }, { "epoch": 5.71, "learning_rate": 8.189307117717783e-06, "loss": 0.318, "step": 7306 }, { "epoch": 5.71, "learning_rate": 8.186816716990445e-06, "loss": 0.2238, "step": 7307 }, { "epoch": 5.71, "learning_rate": 8.184326432535838e-06, "loss": 0.2249, "step": 7308 }, { "epoch": 5.71, "learning_rate": 8.18183626451366e-06, "loss": 0.2643, "step": 7309 }, { "epoch": 5.71, "learning_rate": 8.179346213083598e-06, "loss": 0.207, "step": 7310 }, { "epoch": 5.71, "learning_rate": 8.176856278405331e-06, "loss": 0.2569, "step": 7311 }, { "epoch": 5.71, "learning_rate": 8.174366460638524e-06, "loss": 0.2399, "step": 7312 }, { "epoch": 5.72, "learning_rate": 8.171876759942847e-06, "loss": 0.174, "step": 7313 }, { "epoch": 5.72, "learning_rate": 8.16938717647795e-06, "loss": 0.356, "step": 7314 }, { "epoch": 5.72, "learning_rate": 8.16689771040348e-06, "loss": 0.2947, "step": 7315 }, { "epoch": 5.72, "learning_rate": 8.16440836187908e-06, "loss": 0.1856, "step": 7316 }, { "epoch": 5.72, "learning_rate": 8.161919131064382e-06, "loss": 0.2774, "step": 7317 }, { "epoch": 5.72, "learning_rate": 8.15943001811901e-06, "loss": 0.2134, "step": 7318 }, { "epoch": 5.72, "learning_rate": 8.156941023202584e-06, "loss": 0.2929, "step": 7319 }, { "epoch": 5.72, "learning_rate": 8.154452146474714e-06, "loss": 0.218, "step": 7320 }, { "epoch": 5.72, "learning_rate": 8.151963388094999e-06, "loss": 0.2484, "step": 7321 }, { "epoch": 5.72, "learning_rate": 8.149474748223036e-06, "loss": 0.1968, "step": 7322 }, { "epoch": 5.72, "learning_rate": 8.146986227018415e-06, "loss": 0.1611, "step": 7323 }, { "epoch": 5.72, "learning_rate": 8.144497824640713e-06, "loss": 0.2686, "step": 7324 }, { "epoch": 5.72, "learning_rate": 8.142009541249499e-06, "loss": 0.2764, "step": 7325 }, { "epoch": 5.73, "learning_rate": 8.139521377004342e-06, "loss": 0.1799, "step": 7326 }, { "epoch": 5.73, "learning_rate": 8.137033332064797e-06, "loss": 0.1959, "step": 7327 }, { "epoch": 5.73, "learning_rate": 8.134545406590412e-06, "loss": 0.2817, "step": 7328 }, { "epoch": 5.73, "learning_rate": 8.132057600740732e-06, "loss": 0.1755, "step": 7329 }, { "epoch": 5.73, "learning_rate": 8.129569914675292e-06, "loss": 0.3087, "step": 7330 }, { "epoch": 5.73, "learning_rate": 8.127082348553608e-06, "loss": 0.3284, "step": 7331 }, { "epoch": 5.73, "learning_rate": 8.124594902535208e-06, "loss": 0.2182, "step": 7332 }, { "epoch": 5.73, "learning_rate": 8.122107576779598e-06, "loss": 0.1802, "step": 7333 }, { "epoch": 5.73, "learning_rate": 8.119620371446284e-06, "loss": 0.2294, "step": 7334 }, { "epoch": 5.73, "learning_rate": 8.11713328669476e-06, "loss": 0.2464, "step": 7335 }, { "epoch": 5.73, "learning_rate": 8.114646322684519e-06, "loss": 0.1798, "step": 7336 }, { "epoch": 5.73, "learning_rate": 8.112159479575028e-06, "loss": 0.1981, "step": 7337 }, { "epoch": 5.74, "learning_rate": 8.10967275752577e-06, "loss": 0.1813, "step": 7338 }, { "epoch": 5.74, "learning_rate": 8.107186156696204e-06, "loss": 0.3066, "step": 7339 }, { "epoch": 5.74, "learning_rate": 8.104699677245789e-06, "loss": 0.1879, "step": 7340 }, { "epoch": 5.74, "learning_rate": 8.102213319333971e-06, "loss": 0.2207, "step": 7341 }, { "epoch": 5.74, "learning_rate": 8.099727083120197e-06, "loss": 0.2398, "step": 7342 }, { "epoch": 5.74, "learning_rate": 8.097240968763895e-06, "loss": 0.1641, "step": 7343 }, { "epoch": 5.74, "learning_rate": 8.09475497642449e-06, "loss": 0.203, "step": 7344 }, { "epoch": 5.74, "learning_rate": 8.092269106261402e-06, "loss": 0.12, "step": 7345 }, { "epoch": 5.74, "learning_rate": 8.089783358434038e-06, "loss": 0.2937, "step": 7346 }, { "epoch": 5.74, "learning_rate": 8.087297733101806e-06, "loss": 0.1614, "step": 7347 }, { "epoch": 5.74, "learning_rate": 8.08481223042409e-06, "loss": 0.1689, "step": 7348 }, { "epoch": 5.74, "learning_rate": 8.082326850560285e-06, "loss": 0.2818, "step": 7349 }, { "epoch": 5.74, "learning_rate": 8.079841593669762e-06, "loss": 0.2148, "step": 7350 }, { "epoch": 5.75, "learning_rate": 8.077356459911898e-06, "loss": 0.2338, "step": 7351 }, { "epoch": 5.75, "learning_rate": 8.074871449446051e-06, "loss": 0.3078, "step": 7352 }, { "epoch": 5.75, "learning_rate": 8.07238656243158e-06, "loss": 0.2631, "step": 7353 }, { "epoch": 5.75, "learning_rate": 8.069901799027826e-06, "loss": 0.1502, "step": 7354 }, { "epoch": 5.75, "learning_rate": 8.067417159394128e-06, "loss": 0.3365, "step": 7355 }, { "epoch": 5.75, "learning_rate": 8.06493264368982e-06, "loss": 0.2472, "step": 7356 }, { "epoch": 5.75, "learning_rate": 8.062448252074226e-06, "loss": 0.268, "step": 7357 }, { "epoch": 5.75, "learning_rate": 8.059963984706656e-06, "loss": 0.2505, "step": 7358 }, { "epoch": 5.75, "learning_rate": 8.057479841746426e-06, "loss": 0.1436, "step": 7359 }, { "epoch": 5.75, "learning_rate": 8.054995823352823e-06, "loss": 0.2094, "step": 7360 }, { "epoch": 5.75, "learning_rate": 8.052511929685143e-06, "loss": 0.1404, "step": 7361 }, { "epoch": 5.75, "learning_rate": 8.050028160902669e-06, "loss": 0.2418, "step": 7362 }, { "epoch": 5.75, "learning_rate": 8.047544517164678e-06, "loss": 0.2161, "step": 7363 }, { "epoch": 5.76, "learning_rate": 8.045060998630436e-06, "loss": 0.271, "step": 7364 }, { "epoch": 5.76, "learning_rate": 8.0425776054592e-06, "loss": 0.263, "step": 7365 }, { "epoch": 5.76, "learning_rate": 8.04009433781022e-06, "loss": 0.2525, "step": 7366 }, { "epoch": 5.76, "learning_rate": 8.037611195842741e-06, "loss": 0.2756, "step": 7367 }, { "epoch": 5.76, "learning_rate": 8.035128179715997e-06, "loss": 0.2456, "step": 7368 }, { "epoch": 5.76, "learning_rate": 8.032645289589216e-06, "loss": 0.221, "step": 7369 }, { "epoch": 5.76, "learning_rate": 8.030162525621617e-06, "loss": 0.3232, "step": 7370 }, { "epoch": 5.76, "learning_rate": 8.027679887972405e-06, "loss": 0.3019, "step": 7371 }, { "epoch": 5.76, "learning_rate": 8.025197376800788e-06, "loss": 0.1968, "step": 7372 }, { "epoch": 5.76, "learning_rate": 8.02271499226596e-06, "loss": 0.2513, "step": 7373 }, { "epoch": 5.76, "learning_rate": 8.020232734527103e-06, "loss": 0.1804, "step": 7374 }, { "epoch": 5.76, "learning_rate": 8.017750603743399e-06, "loss": 0.1664, "step": 7375 }, { "epoch": 5.76, "learning_rate": 8.015268600074017e-06, "loss": 0.2416, "step": 7376 }, { "epoch": 5.77, "learning_rate": 8.012786723678117e-06, "loss": 0.2128, "step": 7377 }, { "epoch": 5.77, "learning_rate": 8.010304974714854e-06, "loss": 0.285, "step": 7378 }, { "epoch": 5.77, "learning_rate": 8.007823353343371e-06, "loss": 0.2572, "step": 7379 }, { "epoch": 5.77, "learning_rate": 8.00534185972281e-06, "loss": 0.3537, "step": 7380 }, { "epoch": 5.77, "learning_rate": 8.002860494012302e-06, "loss": 0.296, "step": 7381 }, { "epoch": 5.77, "learning_rate": 8.000379256370957e-06, "loss": 0.1306, "step": 7382 }, { "epoch": 5.77, "learning_rate": 7.997898146957893e-06, "loss": 0.1284, "step": 7383 }, { "epoch": 5.77, "learning_rate": 7.995417165932217e-06, "loss": 0.2131, "step": 7384 }, { "epoch": 5.77, "learning_rate": 7.992936313453022e-06, "loss": 0.2719, "step": 7385 }, { "epoch": 5.77, "learning_rate": 7.9904555896794e-06, "loss": 0.1946, "step": 7386 }, { "epoch": 5.77, "learning_rate": 7.987974994770429e-06, "loss": 0.2789, "step": 7387 }, { "epoch": 5.77, "learning_rate": 7.985494528885175e-06, "loss": 0.2393, "step": 7388 }, { "epoch": 5.77, "learning_rate": 7.983014192182707e-06, "loss": 0.255, "step": 7389 }, { "epoch": 5.78, "learning_rate": 7.980533984822078e-06, "loss": 0.2524, "step": 7390 }, { "epoch": 5.78, "learning_rate": 7.978053906962331e-06, "loss": 0.0943, "step": 7391 }, { "epoch": 5.78, "learning_rate": 7.975573958762512e-06, "loss": 0.2299, "step": 7392 }, { "epoch": 5.78, "learning_rate": 7.97309414038165e-06, "loss": 0.3867, "step": 7393 }, { "epoch": 5.78, "learning_rate": 7.970614451978757e-06, "loss": 0.3068, "step": 7394 }, { "epoch": 5.78, "learning_rate": 7.968134893712856e-06, "loss": 0.2939, "step": 7395 }, { "epoch": 5.78, "learning_rate": 7.965655465742947e-06, "loss": 0.2613, "step": 7396 }, { "epoch": 5.78, "learning_rate": 7.963176168228026e-06, "loss": 0.2773, "step": 7397 }, { "epoch": 5.78, "learning_rate": 7.960697001327089e-06, "loss": 0.3023, "step": 7398 }, { "epoch": 5.78, "learning_rate": 7.958217965199106e-06, "loss": 0.2538, "step": 7399 }, { "epoch": 5.78, "learning_rate": 7.95573906000305e-06, "loss": 0.1821, "step": 7400 }, { "epoch": 5.78, "learning_rate": 7.953260285897892e-06, "loss": 0.2465, "step": 7401 }, { "epoch": 5.79, "learning_rate": 7.950781643042575e-06, "loss": 0.201, "step": 7402 }, { "epoch": 5.79, "learning_rate": 7.948303131596056e-06, "loss": 0.22, "step": 7403 }, { "epoch": 5.79, "learning_rate": 7.945824751717268e-06, "loss": 0.2707, "step": 7404 }, { "epoch": 5.79, "learning_rate": 7.943346503565141e-06, "loss": 0.1982, "step": 7405 }, { "epoch": 5.79, "learning_rate": 7.94086838729859e-06, "loss": 0.188, "step": 7406 }, { "epoch": 5.79, "learning_rate": 7.938390403076536e-06, "loss": 0.2361, "step": 7407 }, { "epoch": 5.79, "learning_rate": 7.935912551057878e-06, "loss": 0.2468, "step": 7408 }, { "epoch": 5.79, "learning_rate": 7.933434831401516e-06, "loss": 0.2789, "step": 7409 }, { "epoch": 5.79, "learning_rate": 7.930957244266335e-06, "loss": 0.2255, "step": 7410 }, { "epoch": 5.79, "learning_rate": 7.92847978981121e-06, "loss": 0.2224, "step": 7411 }, { "epoch": 5.79, "learning_rate": 7.926002468195012e-06, "loss": 0.2395, "step": 7412 }, { "epoch": 5.79, "learning_rate": 7.923525279576605e-06, "loss": 0.3603, "step": 7413 }, { "epoch": 5.79, "learning_rate": 7.92104822411484e-06, "loss": 0.1665, "step": 7414 }, { "epoch": 5.8, "learning_rate": 7.918571301968567e-06, "loss": 0.2, "step": 7415 }, { "epoch": 5.8, "learning_rate": 7.916094513296617e-06, "loss": 0.2637, "step": 7416 }, { "epoch": 5.8, "learning_rate": 7.913617858257814e-06, "loss": 0.198, "step": 7417 }, { "epoch": 5.8, "learning_rate": 7.91114133701098e-06, "loss": 0.2353, "step": 7418 }, { "epoch": 5.8, "learning_rate": 7.908664949714927e-06, "loss": 0.2462, "step": 7419 }, { "epoch": 5.8, "learning_rate": 7.906188696528453e-06, "loss": 0.2446, "step": 7420 }, { "epoch": 5.8, "learning_rate": 7.903712577610357e-06, "loss": 0.2236, "step": 7421 }, { "epoch": 5.8, "learning_rate": 7.901236593119417e-06, "loss": 0.3193, "step": 7422 }, { "epoch": 5.8, "learning_rate": 7.89876074321441e-06, "loss": 0.2733, "step": 7423 }, { "epoch": 5.8, "learning_rate": 7.896285028054106e-06, "loss": 0.2525, "step": 7424 }, { "epoch": 5.8, "learning_rate": 7.893809447797261e-06, "loss": 0.1829, "step": 7425 }, { "epoch": 5.8, "learning_rate": 7.891334002602624e-06, "loss": 0.207, "step": 7426 }, { "epoch": 5.8, "learning_rate": 7.88885869262894e-06, "loss": 0.2337, "step": 7427 }, { "epoch": 5.81, "learning_rate": 7.886383518034939e-06, "loss": 0.2844, "step": 7428 }, { "epoch": 5.81, "learning_rate": 7.88390847897934e-06, "loss": 0.2003, "step": 7429 }, { "epoch": 5.81, "learning_rate": 7.881433575620867e-06, "loss": 0.2335, "step": 7430 }, { "epoch": 5.81, "learning_rate": 7.87895880811822e-06, "loss": 0.2401, "step": 7431 }, { "epoch": 5.81, "learning_rate": 7.876484176630101e-06, "loss": 0.2413, "step": 7432 }, { "epoch": 5.81, "learning_rate": 7.874009681315199e-06, "loss": 0.1225, "step": 7433 }, { "epoch": 5.81, "learning_rate": 7.87153532233219e-06, "loss": 0.2328, "step": 7434 }, { "epoch": 5.81, "learning_rate": 7.869061099839745e-06, "loss": 0.1848, "step": 7435 }, { "epoch": 5.81, "learning_rate": 7.866587013996531e-06, "loss": 0.2306, "step": 7436 }, { "epoch": 5.81, "learning_rate": 7.8641130649612e-06, "loss": 0.2705, "step": 7437 }, { "epoch": 5.81, "learning_rate": 7.8616392528924e-06, "loss": 0.2237, "step": 7438 }, { "epoch": 5.81, "learning_rate": 7.859165577948766e-06, "loss": 0.2478, "step": 7439 }, { "epoch": 5.81, "learning_rate": 7.856692040288923e-06, "loss": 0.1565, "step": 7440 }, { "epoch": 5.82, "learning_rate": 7.854218640071488e-06, "loss": 0.2444, "step": 7441 }, { "epoch": 5.82, "learning_rate": 7.851745377455077e-06, "loss": 0.1707, "step": 7442 }, { "epoch": 5.82, "learning_rate": 7.849272252598287e-06, "loss": 0.2077, "step": 7443 }, { "epoch": 5.82, "learning_rate": 7.846799265659715e-06, "loss": 0.2052, "step": 7444 }, { "epoch": 5.82, "learning_rate": 7.844326416797942e-06, "loss": 0.2256, "step": 7445 }, { "epoch": 5.82, "learning_rate": 7.841853706171541e-06, "loss": 0.1656, "step": 7446 }, { "epoch": 5.82, "learning_rate": 7.839381133939075e-06, "loss": 0.2103, "step": 7447 }, { "epoch": 5.82, "learning_rate": 7.836908700259108e-06, "loss": 0.2985, "step": 7448 }, { "epoch": 5.82, "learning_rate": 7.834436405290184e-06, "loss": 0.2222, "step": 7449 }, { "epoch": 5.82, "learning_rate": 7.831964249190846e-06, "loss": 0.3808, "step": 7450 }, { "epoch": 5.82, "learning_rate": 7.829492232119618e-06, "loss": 0.2523, "step": 7451 }, { "epoch": 5.82, "learning_rate": 7.827020354235024e-06, "loss": 0.3452, "step": 7452 }, { "epoch": 5.82, "learning_rate": 7.82454861569558e-06, "loss": 0.1272, "step": 7453 }, { "epoch": 5.83, "learning_rate": 7.822077016659784e-06, "loss": 0.1894, "step": 7454 }, { "epoch": 5.83, "learning_rate": 7.819605557286136e-06, "loss": 0.3344, "step": 7455 }, { "epoch": 5.83, "learning_rate": 7.817134237733113e-06, "loss": 0.2239, "step": 7456 }, { "epoch": 5.83, "learning_rate": 7.814663058159202e-06, "loss": 0.1359, "step": 7457 }, { "epoch": 5.83, "learning_rate": 7.812192018722861e-06, "loss": 0.1397, "step": 7458 }, { "epoch": 5.83, "learning_rate": 7.809721119582556e-06, "loss": 0.2642, "step": 7459 }, { "epoch": 5.83, "learning_rate": 7.807250360896733e-06, "loss": 0.1772, "step": 7460 }, { "epoch": 5.83, "learning_rate": 7.804779742823837e-06, "loss": 0.2593, "step": 7461 }, { "epoch": 5.83, "learning_rate": 7.80230926552229e-06, "loss": 0.2059, "step": 7462 }, { "epoch": 5.83, "learning_rate": 7.799838929150525e-06, "loss": 0.2044, "step": 7463 }, { "epoch": 5.83, "learning_rate": 7.797368733866948e-06, "loss": 0.2158, "step": 7464 }, { "epoch": 5.83, "learning_rate": 7.794898679829968e-06, "loss": 0.5032, "step": 7465 }, { "epoch": 5.84, "learning_rate": 7.792428767197978e-06, "loss": 0.2287, "step": 7466 }, { "epoch": 5.84, "learning_rate": 7.78995899612937e-06, "loss": 0.2615, "step": 7467 }, { "epoch": 5.84, "learning_rate": 7.787489366782513e-06, "loss": 0.24, "step": 7468 }, { "epoch": 5.84, "learning_rate": 7.785019879315781e-06, "loss": 0.1265, "step": 7469 }, { "epoch": 5.84, "learning_rate": 7.782550533887527e-06, "loss": 0.1656, "step": 7470 }, { "epoch": 5.84, "learning_rate": 7.780081330656108e-06, "loss": 0.2126, "step": 7471 }, { "epoch": 5.84, "learning_rate": 7.777612269779863e-06, "loss": 0.2564, "step": 7472 }, { "epoch": 5.84, "learning_rate": 7.77514335141712e-06, "loss": 0.1979, "step": 7473 }, { "epoch": 5.84, "learning_rate": 7.772674575726205e-06, "loss": 0.2303, "step": 7474 }, { "epoch": 5.84, "learning_rate": 7.770205942865429e-06, "loss": 0.2614, "step": 7475 }, { "epoch": 5.84, "learning_rate": 7.7677374529931e-06, "loss": 0.1735, "step": 7476 }, { "epoch": 5.84, "learning_rate": 7.76526910626751e-06, "loss": 0.195, "step": 7477 }, { "epoch": 5.84, "learning_rate": 7.76280090284695e-06, "loss": 0.3321, "step": 7478 }, { "epoch": 5.85, "learning_rate": 7.760332842889687e-06, "loss": 0.213, "step": 7479 }, { "epoch": 5.85, "learning_rate": 7.757864926553997e-06, "loss": 0.2517, "step": 7480 }, { "epoch": 5.85, "learning_rate": 7.755397153998134e-06, "loss": 0.3072, "step": 7481 }, { "epoch": 5.85, "learning_rate": 7.75292952538035e-06, "loss": 0.1777, "step": 7482 }, { "epoch": 5.85, "learning_rate": 7.750462040858883e-06, "loss": 0.2378, "step": 7483 }, { "epoch": 5.85, "learning_rate": 7.74799470059197e-06, "loss": 0.185, "step": 7484 }, { "epoch": 5.85, "learning_rate": 7.74552750473782e-06, "loss": 0.2366, "step": 7485 }, { "epoch": 5.85, "learning_rate": 7.743060453454654e-06, "loss": 0.177, "step": 7486 }, { "epoch": 5.85, "learning_rate": 7.74059354690067e-06, "loss": 0.3865, "step": 7487 }, { "epoch": 5.85, "learning_rate": 7.738126785234068e-06, "loss": 0.1527, "step": 7488 }, { "epoch": 5.85, "learning_rate": 7.73566016861303e-06, "loss": 0.1716, "step": 7489 }, { "epoch": 5.85, "learning_rate": 7.733193697195728e-06, "loss": 0.2136, "step": 7490 }, { "epoch": 5.85, "learning_rate": 7.730727371140326e-06, "loss": 0.1248, "step": 7491 }, { "epoch": 5.86, "learning_rate": 7.728261190604988e-06, "loss": 0.2796, "step": 7492 }, { "epoch": 5.86, "learning_rate": 7.725795155747852e-06, "loss": 0.2178, "step": 7493 }, { "epoch": 5.86, "learning_rate": 7.723329266727064e-06, "loss": 0.2812, "step": 7494 }, { "epoch": 5.86, "learning_rate": 7.720863523700752e-06, "loss": 0.2843, "step": 7495 }, { "epoch": 5.86, "learning_rate": 7.718397926827026e-06, "loss": 0.2841, "step": 7496 }, { "epoch": 5.86, "learning_rate": 7.715932476264003e-06, "loss": 0.2404, "step": 7497 }, { "epoch": 5.86, "learning_rate": 7.713467172169783e-06, "loss": 0.1999, "step": 7498 }, { "epoch": 5.86, "learning_rate": 7.711002014702454e-06, "loss": 0.1606, "step": 7499 }, { "epoch": 5.86, "learning_rate": 7.708537004020098e-06, "loss": 0.1234, "step": 7500 }, { "epoch": 5.86, "learning_rate": 7.706072140280793e-06, "loss": 0.2537, "step": 7501 }, { "epoch": 5.86, "learning_rate": 7.70360742364259e-06, "loss": 0.2727, "step": 7502 }, { "epoch": 5.86, "learning_rate": 7.701142854263552e-06, "loss": 0.2746, "step": 7503 }, { "epoch": 5.86, "learning_rate": 7.698678432301716e-06, "loss": 0.2137, "step": 7504 }, { "epoch": 5.87, "learning_rate": 7.696214157915123e-06, "loss": 0.2027, "step": 7505 }, { "epoch": 5.87, "learning_rate": 7.693750031261795e-06, "loss": 0.2673, "step": 7506 }, { "epoch": 5.87, "learning_rate": 7.691286052499748e-06, "loss": 0.2725, "step": 7507 }, { "epoch": 5.87, "learning_rate": 7.688822221786982e-06, "loss": 0.2424, "step": 7508 }, { "epoch": 5.87, "learning_rate": 7.6863585392815e-06, "loss": 0.2517, "step": 7509 }, { "epoch": 5.87, "learning_rate": 7.683895005141286e-06, "loss": 0.2929, "step": 7510 }, { "epoch": 5.87, "learning_rate": 7.68143161952432e-06, "loss": 0.213, "step": 7511 }, { "epoch": 5.87, "learning_rate": 7.678968382588568e-06, "loss": 0.3759, "step": 7512 }, { "epoch": 5.87, "learning_rate": 7.676505294491987e-06, "loss": 0.2334, "step": 7513 }, { "epoch": 5.87, "learning_rate": 7.674042355392525e-06, "loss": 0.2433, "step": 7514 }, { "epoch": 5.87, "learning_rate": 7.671579565448127e-06, "loss": 0.2038, "step": 7515 }, { "epoch": 5.87, "learning_rate": 7.669116924816713e-06, "loss": 0.284, "step": 7516 }, { "epoch": 5.87, "learning_rate": 7.666654433656213e-06, "loss": 0.1273, "step": 7517 }, { "epoch": 5.88, "learning_rate": 7.664192092124537e-06, "loss": 0.4443, "step": 7518 }, { "epoch": 5.88, "learning_rate": 7.661729900379574e-06, "loss": 0.2004, "step": 7519 }, { "epoch": 5.88, "learning_rate": 7.659267858579227e-06, "loss": 0.147, "step": 7520 }, { "epoch": 5.88, "learning_rate": 7.656805966881375e-06, "loss": 0.2672, "step": 7521 }, { "epoch": 5.88, "learning_rate": 7.654344225443886e-06, "loss": 0.1811, "step": 7522 }, { "epoch": 5.88, "learning_rate": 7.651882634424628e-06, "loss": 0.3081, "step": 7523 }, { "epoch": 5.88, "learning_rate": 7.649421193981452e-06, "loss": 0.1621, "step": 7524 }, { "epoch": 5.88, "learning_rate": 7.646959904272197e-06, "loss": 0.2379, "step": 7525 }, { "epoch": 5.88, "learning_rate": 7.644498765454702e-06, "loss": 0.1969, "step": 7526 }, { "epoch": 5.88, "learning_rate": 7.642037777686787e-06, "loss": 0.1964, "step": 7527 }, { "epoch": 5.88, "learning_rate": 7.639576941126266e-06, "loss": 0.2451, "step": 7528 }, { "epoch": 5.88, "learning_rate": 7.637116255930948e-06, "loss": 0.2251, "step": 7529 }, { "epoch": 5.89, "learning_rate": 7.634655722258623e-06, "loss": 0.2128, "step": 7530 }, { "epoch": 5.89, "learning_rate": 7.632195340267075e-06, "loss": 0.2422, "step": 7531 }, { "epoch": 5.89, "learning_rate": 7.629735110114083e-06, "loss": 0.1431, "step": 7532 }, { "epoch": 5.89, "learning_rate": 7.6272750319574085e-06, "loss": 0.1205, "step": 7533 }, { "epoch": 5.89, "learning_rate": 7.624815105954812e-06, "loss": 0.3714, "step": 7534 }, { "epoch": 5.89, "learning_rate": 7.62235533226404e-06, "loss": 0.2432, "step": 7535 }, { "epoch": 5.89, "learning_rate": 7.619895711042822e-06, "loss": 0.2277, "step": 7536 }, { "epoch": 5.89, "learning_rate": 7.617436242448887e-06, "loss": 0.1658, "step": 7537 }, { "epoch": 5.89, "learning_rate": 7.614976926639954e-06, "loss": 0.1808, "step": 7538 }, { "epoch": 5.89, "learning_rate": 7.612517763773728e-06, "loss": 0.2207, "step": 7539 }, { "epoch": 5.89, "learning_rate": 7.610058754007907e-06, "loss": 0.1838, "step": 7540 }, { "epoch": 5.89, "learning_rate": 7.607599897500182e-06, "loss": 0.1519, "step": 7541 }, { "epoch": 5.89, "learning_rate": 7.605141194408222e-06, "loss": 0.2049, "step": 7542 }, { "epoch": 5.9, "learning_rate": 7.602682644889697e-06, "loss": 0.2667, "step": 7543 }, { "epoch": 5.9, "learning_rate": 7.600224249102269e-06, "loss": 0.2581, "step": 7544 }, { "epoch": 5.9, "learning_rate": 7.5977660072035805e-06, "loss": 0.2031, "step": 7545 }, { "epoch": 5.9, "learning_rate": 7.595307919351278e-06, "loss": 0.2458, "step": 7546 }, { "epoch": 5.9, "learning_rate": 7.5928499857029805e-06, "loss": 0.2088, "step": 7547 }, { "epoch": 5.9, "learning_rate": 7.5903922064163075e-06, "loss": 0.2682, "step": 7548 }, { "epoch": 5.9, "learning_rate": 7.58793458164887e-06, "loss": 0.1775, "step": 7549 }, { "epoch": 5.9, "learning_rate": 7.585477111558267e-06, "loss": 0.1947, "step": 7550 }, { "epoch": 5.9, "learning_rate": 7.583019796302084e-06, "loss": 0.1971, "step": 7551 }, { "epoch": 5.9, "learning_rate": 7.580562636037903e-06, "loss": 0.2441, "step": 7552 }, { "epoch": 5.9, "learning_rate": 7.578105630923291e-06, "loss": 0.2154, "step": 7553 }, { "epoch": 5.9, "learning_rate": 7.575648781115803e-06, "loss": 0.2248, "step": 7554 }, { "epoch": 5.9, "learning_rate": 7.573192086772994e-06, "loss": 0.2576, "step": 7555 }, { "epoch": 5.91, "learning_rate": 7.570735548052397e-06, "loss": 0.2437, "step": 7556 }, { "epoch": 5.91, "learning_rate": 7.568279165111546e-06, "loss": 0.1861, "step": 7557 }, { "epoch": 5.91, "learning_rate": 7.56582293810796e-06, "loss": 0.2587, "step": 7558 }, { "epoch": 5.91, "learning_rate": 7.563366867199142e-06, "loss": 0.3112, "step": 7559 }, { "epoch": 5.91, "learning_rate": 7.560910952542591e-06, "loss": 0.2227, "step": 7560 }, { "epoch": 5.91, "learning_rate": 7.558455194295802e-06, "loss": 0.2737, "step": 7561 }, { "epoch": 5.91, "learning_rate": 7.555999592616247e-06, "loss": 0.1887, "step": 7562 }, { "epoch": 5.91, "learning_rate": 7.553544147661406e-06, "loss": 0.0888, "step": 7563 }, { "epoch": 5.91, "learning_rate": 7.551088859588721e-06, "loss": 0.2508, "step": 7564 }, { "epoch": 5.91, "learning_rate": 7.548633728555652e-06, "loss": 0.241, "step": 7565 }, { "epoch": 5.91, "learning_rate": 7.546178754719633e-06, "loss": 0.3692, "step": 7566 }, { "epoch": 5.91, "learning_rate": 7.543723938238095e-06, "loss": 0.1452, "step": 7567 }, { "epoch": 5.91, "learning_rate": 7.541269279268453e-06, "loss": 0.1461, "step": 7568 }, { "epoch": 5.92, "learning_rate": 7.538814777968122e-06, "loss": 0.2143, "step": 7569 }, { "epoch": 5.92, "learning_rate": 7.5363604344944925e-06, "loss": 0.1761, "step": 7570 }, { "epoch": 5.92, "learning_rate": 7.533906249004956e-06, "loss": 0.2786, "step": 7571 }, { "epoch": 5.92, "learning_rate": 7.531452221656888e-06, "loss": 0.2371, "step": 7572 }, { "epoch": 5.92, "learning_rate": 7.528998352607659e-06, "loss": 0.2309, "step": 7573 }, { "epoch": 5.92, "learning_rate": 7.526544642014625e-06, "loss": 0.2489, "step": 7574 }, { "epoch": 5.92, "learning_rate": 7.524091090035137e-06, "loss": 0.2249, "step": 7575 }, { "epoch": 5.92, "learning_rate": 7.521637696826527e-06, "loss": 0.1481, "step": 7576 }, { "epoch": 5.92, "learning_rate": 7.519184462546123e-06, "loss": 0.2361, "step": 7577 }, { "epoch": 5.92, "learning_rate": 7.516731387351243e-06, "loss": 0.2531, "step": 7578 }, { "epoch": 5.92, "learning_rate": 7.514278471399195e-06, "loss": 0.2672, "step": 7579 }, { "epoch": 5.92, "learning_rate": 7.5118257148472715e-06, "loss": 0.2353, "step": 7580 }, { "epoch": 5.92, "learning_rate": 7.509373117852766e-06, "loss": 0.3105, "step": 7581 }, { "epoch": 5.93, "learning_rate": 7.506920680572948e-06, "loss": 0.2235, "step": 7582 }, { "epoch": 5.93, "learning_rate": 7.504468403165081e-06, "loss": 0.3512, "step": 7583 }, { "epoch": 5.93, "learning_rate": 7.502016285786429e-06, "loss": 0.2246, "step": 7584 }, { "epoch": 5.93, "learning_rate": 7.499564328594229e-06, "loss": 0.2033, "step": 7585 }, { "epoch": 5.93, "learning_rate": 7.497112531745725e-06, "loss": 0.1932, "step": 7586 }, { "epoch": 5.93, "learning_rate": 7.4946608953981315e-06, "loss": 0.1662, "step": 7587 }, { "epoch": 5.93, "learning_rate": 7.492209419708669e-06, "loss": 0.2667, "step": 7588 }, { "epoch": 5.93, "learning_rate": 7.489758104834538e-06, "loss": 0.2319, "step": 7589 }, { "epoch": 5.93, "learning_rate": 7.487306950932938e-06, "loss": 0.1781, "step": 7590 }, { "epoch": 5.93, "learning_rate": 7.484855958161044e-06, "loss": 0.1306, "step": 7591 }, { "epoch": 5.93, "learning_rate": 7.48240512667604e-06, "loss": 0.2237, "step": 7592 }, { "epoch": 5.93, "learning_rate": 7.47995445663508e-06, "loss": 0.3106, "step": 7593 }, { "epoch": 5.94, "learning_rate": 7.477503948195319e-06, "loss": 0.1592, "step": 7594 }, { "epoch": 5.94, "learning_rate": 7.475053601513898e-06, "loss": 0.1797, "step": 7595 }, { "epoch": 5.94, "learning_rate": 7.472603416747951e-06, "loss": 0.1561, "step": 7596 }, { "epoch": 5.94, "learning_rate": 7.4701533940545966e-06, "loss": 0.1561, "step": 7597 }, { "epoch": 5.94, "learning_rate": 7.467703533590951e-06, "loss": 0.3931, "step": 7598 }, { "epoch": 5.94, "learning_rate": 7.465253835514109e-06, "loss": 0.1904, "step": 7599 }, { "epoch": 5.94, "learning_rate": 7.462804299981161e-06, "loss": 0.1762, "step": 7600 }, { "epoch": 5.94, "learning_rate": 7.460354927149189e-06, "loss": 0.1815, "step": 7601 }, { "epoch": 5.94, "learning_rate": 7.457905717175263e-06, "loss": 0.2979, "step": 7602 }, { "epoch": 5.94, "learning_rate": 7.455456670216441e-06, "loss": 0.1854, "step": 7603 }, { "epoch": 5.94, "learning_rate": 7.4530077864297664e-06, "loss": 0.1192, "step": 7604 }, { "epoch": 5.94, "learning_rate": 7.450559065972285e-06, "loss": 0.2378, "step": 7605 }, { "epoch": 5.94, "learning_rate": 7.448110509001016e-06, "loss": 0.2112, "step": 7606 }, { "epoch": 5.95, "learning_rate": 7.445662115672985e-06, "loss": 0.2498, "step": 7607 }, { "epoch": 5.95, "learning_rate": 7.443213886145193e-06, "loss": 0.2313, "step": 7608 }, { "epoch": 5.95, "learning_rate": 7.440765820574638e-06, "loss": 0.2528, "step": 7609 }, { "epoch": 5.95, "learning_rate": 7.438317919118302e-06, "loss": 0.2717, "step": 7610 }, { "epoch": 5.95, "learning_rate": 7.435870181933164e-06, "loss": 0.2477, "step": 7611 }, { "epoch": 5.95, "learning_rate": 7.433422609176184e-06, "loss": 0.2638, "step": 7612 }, { "epoch": 5.95, "learning_rate": 7.4309752010043205e-06, "loss": 0.2535, "step": 7613 }, { "epoch": 5.95, "learning_rate": 7.428527957574512e-06, "loss": 0.274, "step": 7614 }, { "epoch": 5.95, "learning_rate": 7.4260808790437015e-06, "loss": 0.2748, "step": 7615 }, { "epoch": 5.95, "learning_rate": 7.423633965568796e-06, "loss": 0.2553, "step": 7616 }, { "epoch": 5.95, "learning_rate": 7.4211872173067164e-06, "loss": 0.2412, "step": 7617 }, { "epoch": 5.95, "learning_rate": 7.4187406344143605e-06, "loss": 0.1557, "step": 7618 }, { "epoch": 5.95, "learning_rate": 7.416294217048621e-06, "loss": 0.2653, "step": 7619 }, { "epoch": 5.96, "learning_rate": 7.4138479653663785e-06, "loss": 0.2162, "step": 7620 }, { "epoch": 5.96, "learning_rate": 7.411401879524496e-06, "loss": 0.2481, "step": 7621 }, { "epoch": 5.96, "learning_rate": 7.408955959679838e-06, "loss": 0.1875, "step": 7622 }, { "epoch": 5.96, "learning_rate": 7.406510205989249e-06, "loss": 0.1997, "step": 7623 }, { "epoch": 5.96, "learning_rate": 7.404064618609566e-06, "loss": 0.3429, "step": 7624 }, { "epoch": 5.96, "learning_rate": 7.4016191976976185e-06, "loss": 0.2472, "step": 7625 }, { "epoch": 5.96, "learning_rate": 7.3991739434102226e-06, "loss": 0.2268, "step": 7626 }, { "epoch": 5.96, "learning_rate": 7.396728855904178e-06, "loss": 0.3623, "step": 7627 }, { "epoch": 5.96, "learning_rate": 7.3942839353362835e-06, "loss": 0.3783, "step": 7628 }, { "epoch": 5.96, "learning_rate": 7.39183918186332e-06, "loss": 0.22, "step": 7629 }, { "epoch": 5.96, "learning_rate": 7.389394595642065e-06, "loss": 0.2112, "step": 7630 }, { "epoch": 5.96, "learning_rate": 7.386950176829277e-06, "loss": 0.2472, "step": 7631 }, { "epoch": 5.96, "learning_rate": 7.384505925581712e-06, "loss": 0.2597, "step": 7632 }, { "epoch": 5.97, "learning_rate": 7.382061842056102e-06, "loss": 0.1819, "step": 7633 }, { "epoch": 5.97, "learning_rate": 7.379617926409185e-06, "loss": 0.298, "step": 7634 }, { "epoch": 5.97, "learning_rate": 7.377174178797675e-06, "loss": 0.1532, "step": 7635 }, { "epoch": 5.97, "learning_rate": 7.374730599378286e-06, "loss": 0.1793, "step": 7636 }, { "epoch": 5.97, "learning_rate": 7.372287188307714e-06, "loss": 0.1865, "step": 7637 }, { "epoch": 5.97, "learning_rate": 7.369843945742643e-06, "loss": 0.28, "step": 7638 }, { "epoch": 5.97, "learning_rate": 7.36740087183975e-06, "loss": 0.199, "step": 7639 }, { "epoch": 5.97, "learning_rate": 7.364957966755702e-06, "loss": 0.1975, "step": 7640 }, { "epoch": 5.97, "learning_rate": 7.362515230647152e-06, "loss": 0.1457, "step": 7641 }, { "epoch": 5.97, "learning_rate": 7.360072663670744e-06, "loss": 0.1855, "step": 7642 }, { "epoch": 5.97, "learning_rate": 7.357630265983114e-06, "loss": 0.1479, "step": 7643 }, { "epoch": 5.97, "learning_rate": 7.35518803774088e-06, "loss": 0.253, "step": 7644 }, { "epoch": 5.97, "learning_rate": 7.35274597910065e-06, "loss": 0.301, "step": 7645 }, { "epoch": 5.98, "learning_rate": 7.350304090219033e-06, "loss": 0.1846, "step": 7646 }, { "epoch": 5.98, "learning_rate": 7.34786237125261e-06, "loss": 0.3006, "step": 7647 }, { "epoch": 5.98, "learning_rate": 7.345420822357965e-06, "loss": 0.2194, "step": 7648 }, { "epoch": 5.98, "learning_rate": 7.342979443691666e-06, "loss": 0.3028, "step": 7649 }, { "epoch": 5.98, "learning_rate": 7.340538235410261e-06, "loss": 0.1628, "step": 7650 }, { "epoch": 5.98, "learning_rate": 7.338097197670306e-06, "loss": 0.2381, "step": 7651 }, { "epoch": 5.98, "learning_rate": 7.335656330628331e-06, "loss": 0.3662, "step": 7652 }, { "epoch": 5.98, "learning_rate": 7.333215634440858e-06, "loss": 0.231, "step": 7653 }, { "epoch": 5.98, "learning_rate": 7.3307751092644034e-06, "loss": 0.2338, "step": 7654 }, { "epoch": 5.98, "learning_rate": 7.328334755255471e-06, "loss": 0.2024, "step": 7655 }, { "epoch": 5.98, "learning_rate": 7.3258945725705434e-06, "loss": 0.159, "step": 7656 }, { "epoch": 5.98, "learning_rate": 7.323454561366108e-06, "loss": 0.1446, "step": 7657 }, { "epoch": 5.99, "learning_rate": 7.321014721798627e-06, "loss": 0.2366, "step": 7658 }, { "epoch": 5.99, "learning_rate": 7.318575054024567e-06, "loss": 0.2646, "step": 7659 }, { "epoch": 5.99, "learning_rate": 7.316135558200371e-06, "loss": 0.1908, "step": 7660 }, { "epoch": 5.99, "learning_rate": 7.313696234482472e-06, "loss": 0.2755, "step": 7661 }, { "epoch": 5.99, "learning_rate": 7.3112570830272965e-06, "loss": 0.2598, "step": 7662 }, { "epoch": 5.99, "learning_rate": 7.3088181039912584e-06, "loss": 0.2677, "step": 7663 }, { "epoch": 5.99, "learning_rate": 7.30637929753076e-06, "loss": 0.179, "step": 7664 }, { "epoch": 5.99, "learning_rate": 7.3039406638021956e-06, "loss": 0.2517, "step": 7665 }, { "epoch": 5.99, "learning_rate": 7.301502202961945e-06, "loss": 0.2399, "step": 7666 }, { "epoch": 5.99, "learning_rate": 7.299063915166376e-06, "loss": 0.1421, "step": 7667 }, { "epoch": 5.99, "learning_rate": 7.296625800571844e-06, "loss": 0.2113, "step": 7668 }, { "epoch": 5.99, "learning_rate": 7.294187859334703e-06, "loss": 0.2914, "step": 7669 }, { "epoch": 5.99, "learning_rate": 7.291750091611283e-06, "loss": 0.2775, "step": 7670 }, { "epoch": 6.0, "learning_rate": 7.289312497557913e-06, "loss": 0.2787, "step": 7671 }, { "epoch": 6.0, "learning_rate": 7.286875077330909e-06, "loss": 0.2098, "step": 7672 }, { "epoch": 6.0, "learning_rate": 7.284437831086566e-06, "loss": 0.2241, "step": 7673 }, { "epoch": 6.0, "learning_rate": 7.282000758981182e-06, "loss": 0.1518, "step": 7674 }, { "epoch": 6.0, "learning_rate": 7.279563861171037e-06, "loss": 0.2009, "step": 7675 }, { "epoch": 6.0, "learning_rate": 7.277127137812395e-06, "loss": 0.2288, "step": 7676 }, { "epoch": 6.0, "learning_rate": 7.274690589061523e-06, "loss": 0.251, "step": 7677 }, { "epoch": 6.0, "learning_rate": 7.27225421507466e-06, "loss": 0.0771, "step": 7678 }, { "epoch": 6.0, "learning_rate": 7.269818016008042e-06, "loss": 0.0702, "step": 7679 }, { "epoch": 6.0, "learning_rate": 7.267381992017899e-06, "loss": 0.1047, "step": 7680 }, { "epoch": 6.0, "learning_rate": 7.264946143260439e-06, "loss": 0.1949, "step": 7681 }, { "epoch": 6.0, "learning_rate": 7.262510469891866e-06, "loss": 0.1221, "step": 7682 }, { "epoch": 6.0, "learning_rate": 7.260074972068374e-06, "loss": 0.0964, "step": 7683 }, { "epoch": 6.01, "learning_rate": 7.257639649946137e-06, "loss": 0.117, "step": 7684 }, { "epoch": 6.01, "learning_rate": 7.255204503681323e-06, "loss": 0.0585, "step": 7685 }, { "epoch": 6.01, "learning_rate": 7.252769533430093e-06, "loss": 0.1008, "step": 7686 }, { "epoch": 6.01, "learning_rate": 7.25033473934859e-06, "loss": 0.0559, "step": 7687 }, { "epoch": 6.01, "learning_rate": 7.24790012159295e-06, "loss": 0.1157, "step": 7688 }, { "epoch": 6.01, "learning_rate": 7.245465680319299e-06, "loss": 0.0747, "step": 7689 }, { "epoch": 6.01, "learning_rate": 7.243031415683742e-06, "loss": 0.0802, "step": 7690 }, { "epoch": 6.01, "learning_rate": 7.24059732784238e-06, "loss": 0.0656, "step": 7691 }, { "epoch": 6.01, "learning_rate": 7.238163416951307e-06, "loss": 0.0946, "step": 7692 }, { "epoch": 6.01, "learning_rate": 7.2357296831665955e-06, "loss": 0.2176, "step": 7693 }, { "epoch": 6.01, "learning_rate": 7.233296126644319e-06, "loss": 0.059, "step": 7694 }, { "epoch": 6.01, "learning_rate": 7.230862747540526e-06, "loss": 0.0742, "step": 7695 }, { "epoch": 6.01, "learning_rate": 7.228429546011262e-06, "loss": 0.1146, "step": 7696 }, { "epoch": 6.02, "learning_rate": 7.225996522212557e-06, "loss": 0.1687, "step": 7697 }, { "epoch": 6.02, "learning_rate": 7.2235636763004355e-06, "loss": 0.1733, "step": 7698 }, { "epoch": 6.02, "learning_rate": 7.2211310084309035e-06, "loss": 0.1204, "step": 7699 }, { "epoch": 6.02, "learning_rate": 7.218698518759966e-06, "loss": 0.1964, "step": 7700 }, { "epoch": 6.02, "learning_rate": 7.216266207443603e-06, "loss": 0.1076, "step": 7701 }, { "epoch": 6.02, "learning_rate": 7.213834074637788e-06, "loss": 0.1354, "step": 7702 }, { "epoch": 6.02, "learning_rate": 7.211402120498489e-06, "loss": 0.1764, "step": 7703 }, { "epoch": 6.02, "learning_rate": 7.208970345181659e-06, "loss": 0.078, "step": 7704 }, { "epoch": 6.02, "learning_rate": 7.206538748843235e-06, "loss": 0.1381, "step": 7705 }, { "epoch": 6.02, "learning_rate": 7.20410733163915e-06, "loss": 0.1031, "step": 7706 }, { "epoch": 6.02, "learning_rate": 7.201676093725319e-06, "loss": 0.0873, "step": 7707 }, { "epoch": 6.02, "learning_rate": 7.199245035257647e-06, "loss": 0.0474, "step": 7708 }, { "epoch": 6.03, "learning_rate": 7.1968141563920335e-06, "loss": 0.1412, "step": 7709 }, { "epoch": 6.03, "learning_rate": 7.194383457284357e-06, "loss": 0.0732, "step": 7710 }, { "epoch": 6.03, "learning_rate": 7.191952938090499e-06, "loss": 0.0513, "step": 7711 }, { "epoch": 6.03, "learning_rate": 7.189522598966303e-06, "loss": 0.0901, "step": 7712 }, { "epoch": 6.03, "learning_rate": 7.187092440067631e-06, "loss": 0.0935, "step": 7713 }, { "epoch": 6.03, "learning_rate": 7.184662461550313e-06, "loss": 0.0976, "step": 7714 }, { "epoch": 6.03, "learning_rate": 7.18223266357018e-06, "loss": 0.1443, "step": 7715 }, { "epoch": 6.03, "learning_rate": 7.179803046283042e-06, "loss": 0.0836, "step": 7716 }, { "epoch": 6.03, "learning_rate": 7.177373609844706e-06, "loss": 0.0726, "step": 7717 }, { "epoch": 6.03, "learning_rate": 7.174944354410958e-06, "loss": 0.1148, "step": 7718 }, { "epoch": 6.03, "learning_rate": 7.1725152801375775e-06, "loss": 0.1797, "step": 7719 }, { "epoch": 6.03, "learning_rate": 7.1700863871803325e-06, "loss": 0.0409, "step": 7720 }, { "epoch": 6.03, "learning_rate": 7.167657675694981e-06, "loss": 0.1537, "step": 7721 }, { "epoch": 6.04, "learning_rate": 7.165229145837265e-06, "loss": 0.1156, "step": 7722 }, { "epoch": 6.04, "learning_rate": 7.162800797762922e-06, "loss": 0.0785, "step": 7723 }, { "epoch": 6.04, "learning_rate": 7.160372631627668e-06, "loss": 0.097, "step": 7724 }, { "epoch": 6.04, "learning_rate": 7.157944647587212e-06, "loss": 0.0826, "step": 7725 }, { "epoch": 6.04, "learning_rate": 7.15551684579725e-06, "loss": 0.1115, "step": 7726 }, { "epoch": 6.04, "learning_rate": 7.153089226413475e-06, "loss": 0.1126, "step": 7727 }, { "epoch": 6.04, "learning_rate": 7.1506617895915544e-06, "loss": 0.0765, "step": 7728 }, { "epoch": 6.04, "learning_rate": 7.1482345354871575e-06, "loss": 0.0865, "step": 7729 }, { "epoch": 6.04, "learning_rate": 7.1458074642559295e-06, "loss": 0.0993, "step": 7730 }, { "epoch": 6.04, "learning_rate": 7.143380576053509e-06, "loss": 0.1543, "step": 7731 }, { "epoch": 6.04, "learning_rate": 7.140953871035529e-06, "loss": 0.12, "step": 7732 }, { "epoch": 6.04, "learning_rate": 7.1385273493576e-06, "loss": 0.1395, "step": 7733 }, { "epoch": 6.04, "learning_rate": 7.1361010111753304e-06, "loss": 0.0874, "step": 7734 }, { "epoch": 6.05, "learning_rate": 7.133674856644306e-06, "loss": 0.0889, "step": 7735 }, { "epoch": 6.05, "learning_rate": 7.131248885920111e-06, "loss": 0.1534, "step": 7736 }, { "epoch": 6.05, "learning_rate": 7.128823099158313e-06, "loss": 0.1043, "step": 7737 }, { "epoch": 6.05, "learning_rate": 7.12639749651447e-06, "loss": 0.1064, "step": 7738 }, { "epoch": 6.05, "learning_rate": 7.123972078144126e-06, "loss": 0.0886, "step": 7739 }, { "epoch": 6.05, "learning_rate": 7.121546844202818e-06, "loss": 0.1177, "step": 7740 }, { "epoch": 6.05, "learning_rate": 7.119121794846059e-06, "loss": 0.0884, "step": 7741 }, { "epoch": 6.05, "learning_rate": 7.116696930229365e-06, "loss": 0.169, "step": 7742 }, { "epoch": 6.05, "learning_rate": 7.11427225050823e-06, "loss": 0.0918, "step": 7743 }, { "epoch": 6.05, "learning_rate": 7.111847755838142e-06, "loss": 0.0604, "step": 7744 }, { "epoch": 6.05, "learning_rate": 7.109423446374573e-06, "loss": 0.0925, "step": 7745 }, { "epoch": 6.05, "learning_rate": 7.106999322272989e-06, "loss": 0.1578, "step": 7746 }, { "epoch": 6.05, "learning_rate": 7.104575383688837e-06, "loss": 0.0283, "step": 7747 }, { "epoch": 6.06, "learning_rate": 7.102151630777555e-06, "loss": 0.0828, "step": 7748 }, { "epoch": 6.06, "learning_rate": 7.099728063694568e-06, "loss": 0.0806, "step": 7749 }, { "epoch": 6.06, "learning_rate": 7.097304682595294e-06, "loss": 0.0983, "step": 7750 }, { "epoch": 6.06, "learning_rate": 7.094881487635136e-06, "loss": 0.0846, "step": 7751 }, { "epoch": 6.06, "learning_rate": 7.092458478969477e-06, "loss": 0.0775, "step": 7752 }, { "epoch": 6.06, "learning_rate": 7.090035656753704e-06, "loss": 0.0638, "step": 7753 }, { "epoch": 6.06, "learning_rate": 7.0876130211431785e-06, "loss": 0.1021, "step": 7754 }, { "epoch": 6.06, "learning_rate": 7.085190572293259e-06, "loss": 0.0661, "step": 7755 }, { "epoch": 6.06, "learning_rate": 7.082768310359287e-06, "loss": 0.176, "step": 7756 }, { "epoch": 6.06, "learning_rate": 7.080346235496593e-06, "loss": 0.2196, "step": 7757 }, { "epoch": 6.06, "learning_rate": 7.077924347860492e-06, "loss": 0.1139, "step": 7758 }, { "epoch": 6.06, "learning_rate": 7.075502647606296e-06, "loss": 0.2696, "step": 7759 }, { "epoch": 6.06, "learning_rate": 7.073081134889296e-06, "loss": 0.0651, "step": 7760 }, { "epoch": 6.07, "learning_rate": 7.0706598098647776e-06, "loss": 0.1548, "step": 7761 }, { "epoch": 6.07, "learning_rate": 7.0682386726880085e-06, "loss": 0.0937, "step": 7762 }, { "epoch": 6.07, "learning_rate": 7.065817723514254e-06, "loss": 0.1132, "step": 7763 }, { "epoch": 6.07, "learning_rate": 7.06339696249875e-06, "loss": 0.0912, "step": 7764 }, { "epoch": 6.07, "learning_rate": 7.060976389796739e-06, "loss": 0.0568, "step": 7765 }, { "epoch": 6.07, "learning_rate": 7.058556005563437e-06, "loss": 0.1524, "step": 7766 }, { "epoch": 6.07, "learning_rate": 7.056135809954061e-06, "loss": 0.0687, "step": 7767 }, { "epoch": 6.07, "learning_rate": 7.053715803123809e-06, "loss": 0.0912, "step": 7768 }, { "epoch": 6.07, "learning_rate": 7.051295985227862e-06, "loss": 0.0797, "step": 7769 }, { "epoch": 6.07, "learning_rate": 7.048876356421393e-06, "loss": 0.1073, "step": 7770 }, { "epoch": 6.07, "learning_rate": 7.04645691685957e-06, "loss": 0.0752, "step": 7771 }, { "epoch": 6.07, "learning_rate": 7.044037666697538e-06, "loss": 0.0992, "step": 7772 }, { "epoch": 6.08, "learning_rate": 7.041618606090439e-06, "loss": 0.1052, "step": 7773 }, { "epoch": 6.08, "learning_rate": 7.039199735193396e-06, "loss": 0.1196, "step": 7774 }, { "epoch": 6.08, "learning_rate": 7.03678105416152e-06, "loss": 0.0437, "step": 7775 }, { "epoch": 6.08, "learning_rate": 7.0343625631499144e-06, "loss": 0.0613, "step": 7776 }, { "epoch": 6.08, "learning_rate": 7.03194426231367e-06, "loss": 0.1041, "step": 7777 }, { "epoch": 6.08, "learning_rate": 7.0295261518078576e-06, "loss": 0.1676, "step": 7778 }, { "epoch": 6.08, "learning_rate": 7.027108231787548e-06, "loss": 0.0659, "step": 7779 }, { "epoch": 6.08, "learning_rate": 7.024690502407794e-06, "loss": 0.0866, "step": 7780 }, { "epoch": 6.08, "learning_rate": 7.022272963823627e-06, "loss": 0.0951, "step": 7781 }, { "epoch": 6.08, "learning_rate": 7.019855616190081e-06, "loss": 0.0934, "step": 7782 }, { "epoch": 6.08, "learning_rate": 7.017438459662171e-06, "loss": 0.1723, "step": 7783 }, { "epoch": 6.08, "learning_rate": 7.015021494394902e-06, "loss": 0.107, "step": 7784 }, { "epoch": 6.08, "learning_rate": 7.012604720543265e-06, "loss": 0.0915, "step": 7785 }, { "epoch": 6.09, "learning_rate": 7.010188138262235e-06, "loss": 0.0992, "step": 7786 }, { "epoch": 6.09, "learning_rate": 7.007771747706777e-06, "loss": 0.0744, "step": 7787 }, { "epoch": 6.09, "learning_rate": 7.005355549031852e-06, "loss": 0.0505, "step": 7788 }, { "epoch": 6.09, "learning_rate": 7.002939542392397e-06, "loss": 0.0691, "step": 7789 }, { "epoch": 6.09, "learning_rate": 7.000523727943344e-06, "loss": 0.148, "step": 7790 }, { "epoch": 6.09, "learning_rate": 6.998108105839611e-06, "loss": 0.1232, "step": 7791 }, { "epoch": 6.09, "learning_rate": 6.995692676236101e-06, "loss": 0.088, "step": 7792 }, { "epoch": 6.09, "learning_rate": 6.9932774392877045e-06, "loss": 0.124, "step": 7793 }, { "epoch": 6.09, "learning_rate": 6.990862395149305e-06, "loss": 0.1491, "step": 7794 }, { "epoch": 6.09, "learning_rate": 6.9884475439757685e-06, "loss": 0.1759, "step": 7795 }, { "epoch": 6.09, "learning_rate": 6.9860328859219525e-06, "loss": 0.0938, "step": 7796 }, { "epoch": 6.09, "learning_rate": 6.9836184211427015e-06, "loss": 0.1668, "step": 7797 }, { "epoch": 6.09, "learning_rate": 6.98120414979284e-06, "loss": 0.0855, "step": 7798 }, { "epoch": 6.1, "learning_rate": 6.978790072027191e-06, "loss": 0.0483, "step": 7799 }, { "epoch": 6.1, "learning_rate": 6.97637618800056e-06, "loss": 0.0776, "step": 7800 }, { "epoch": 6.1, "learning_rate": 6.973962497867736e-06, "loss": 0.1124, "step": 7801 }, { "epoch": 6.1, "learning_rate": 6.97154900178351e-06, "loss": 0.1032, "step": 7802 }, { "epoch": 6.1, "learning_rate": 6.969135699902641e-06, "loss": 0.0972, "step": 7803 }, { "epoch": 6.1, "learning_rate": 6.966722592379888e-06, "loss": 0.1493, "step": 7804 }, { "epoch": 6.1, "learning_rate": 6.964309679369996e-06, "loss": 0.1348, "step": 7805 }, { "epoch": 6.1, "learning_rate": 6.961896961027694e-06, "loss": 0.104, "step": 7806 }, { "epoch": 6.1, "learning_rate": 6.959484437507702e-06, "loss": 0.0939, "step": 7807 }, { "epoch": 6.1, "learning_rate": 6.957072108964731e-06, "loss": 0.1414, "step": 7808 }, { "epoch": 6.1, "learning_rate": 6.9546599755534664e-06, "loss": 0.0764, "step": 7809 }, { "epoch": 6.1, "learning_rate": 6.952248037428591e-06, "loss": 0.0612, "step": 7810 }, { "epoch": 6.1, "learning_rate": 6.949836294744777e-06, "loss": 0.0779, "step": 7811 }, { "epoch": 6.11, "learning_rate": 6.947424747656678e-06, "loss": 0.1021, "step": 7812 }, { "epoch": 6.11, "learning_rate": 6.94501339631894e-06, "loss": 0.1478, "step": 7813 }, { "epoch": 6.11, "learning_rate": 6.942602240886196e-06, "loss": 0.1137, "step": 7814 }, { "epoch": 6.11, "learning_rate": 6.940191281513056e-06, "loss": 0.0483, "step": 7815 }, { "epoch": 6.11, "learning_rate": 6.9377805183541315e-06, "loss": 0.1367, "step": 7816 }, { "epoch": 6.11, "learning_rate": 6.935369951564017e-06, "loss": 0.0584, "step": 7817 }, { "epoch": 6.11, "learning_rate": 6.932959581297289e-06, "loss": 0.0543, "step": 7818 }, { "epoch": 6.11, "learning_rate": 6.93054940770852e-06, "loss": 0.139, "step": 7819 }, { "epoch": 6.11, "learning_rate": 6.9281394309522665e-06, "loss": 0.0766, "step": 7820 }, { "epoch": 6.11, "learning_rate": 6.925729651183066e-06, "loss": 0.104, "step": 7821 }, { "epoch": 6.11, "learning_rate": 6.923320068555449e-06, "loss": 0.141, "step": 7822 }, { "epoch": 6.11, "learning_rate": 6.920910683223938e-06, "loss": 0.0597, "step": 7823 }, { "epoch": 6.11, "learning_rate": 6.918501495343033e-06, "loss": 0.0746, "step": 7824 }, { "epoch": 6.12, "learning_rate": 6.916092505067233e-06, "loss": 0.1329, "step": 7825 }, { "epoch": 6.12, "learning_rate": 6.913683712551012e-06, "loss": 0.0679, "step": 7826 }, { "epoch": 6.12, "learning_rate": 6.911275117948835e-06, "loss": 0.0732, "step": 7827 }, { "epoch": 6.12, "learning_rate": 6.908866721415162e-06, "loss": 0.0959, "step": 7828 }, { "epoch": 6.12, "learning_rate": 6.9064585231044335e-06, "loss": 0.0573, "step": 7829 }, { "epoch": 6.12, "learning_rate": 6.9040505231710734e-06, "loss": 0.1705, "step": 7830 }, { "epoch": 6.12, "learning_rate": 6.901642721769506e-06, "loss": 0.086, "step": 7831 }, { "epoch": 6.12, "learning_rate": 6.899235119054127e-06, "loss": 0.118, "step": 7832 }, { "epoch": 6.12, "learning_rate": 6.896827715179329e-06, "loss": 0.0482, "step": 7833 }, { "epoch": 6.12, "learning_rate": 6.8944205102994936e-06, "loss": 0.0464, "step": 7834 }, { "epoch": 6.12, "learning_rate": 6.892013504568982e-06, "loss": 0.1153, "step": 7835 }, { "epoch": 6.12, "learning_rate": 6.889606698142149e-06, "loss": 0.1079, "step": 7836 }, { "epoch": 6.13, "learning_rate": 6.887200091173335e-06, "loss": 0.1019, "step": 7837 }, { "epoch": 6.13, "learning_rate": 6.884793683816864e-06, "loss": 0.1085, "step": 7838 }, { "epoch": 6.13, "learning_rate": 6.8823874762270474e-06, "loss": 0.1506, "step": 7839 }, { "epoch": 6.13, "learning_rate": 6.8799814685581925e-06, "loss": 0.0834, "step": 7840 }, { "epoch": 6.13, "learning_rate": 6.8775756609645836e-06, "loss": 0.0898, "step": 7841 }, { "epoch": 6.13, "learning_rate": 6.8751700536005035e-06, "loss": 0.1369, "step": 7842 }, { "epoch": 6.13, "learning_rate": 6.872764646620205e-06, "loss": 0.121, "step": 7843 }, { "epoch": 6.13, "learning_rate": 6.870359440177942e-06, "loss": 0.1477, "step": 7844 }, { "epoch": 6.13, "learning_rate": 6.8679544344279505e-06, "loss": 0.0637, "step": 7845 }, { "epoch": 6.13, "learning_rate": 6.8655496295244575e-06, "loss": 0.0584, "step": 7846 }, { "epoch": 6.13, "learning_rate": 6.863145025621669e-06, "loss": 0.1043, "step": 7847 }, { "epoch": 6.13, "learning_rate": 6.860740622873794e-06, "loss": 0.0736, "step": 7848 }, { "epoch": 6.13, "learning_rate": 6.858336421435006e-06, "loss": 0.0704, "step": 7849 }, { "epoch": 6.14, "learning_rate": 6.855932421459484e-06, "loss": 0.1034, "step": 7850 }, { "epoch": 6.14, "learning_rate": 6.853528623101385e-06, "loss": 0.0709, "step": 7851 }, { "epoch": 6.14, "learning_rate": 6.851125026514856e-06, "loss": 0.1525, "step": 7852 }, { "epoch": 6.14, "learning_rate": 6.848721631854031e-06, "loss": 0.0689, "step": 7853 }, { "epoch": 6.14, "learning_rate": 6.846318439273034e-06, "loss": 0.1095, "step": 7854 }, { "epoch": 6.14, "learning_rate": 6.8439154489259704e-06, "loss": 0.1125, "step": 7855 }, { "epoch": 6.14, "learning_rate": 6.8415126609669315e-06, "loss": 0.0509, "step": 7856 }, { "epoch": 6.14, "learning_rate": 6.839110075550004e-06, "loss": 0.2188, "step": 7857 }, { "epoch": 6.14, "learning_rate": 6.836707692829257e-06, "loss": 0.1476, "step": 7858 }, { "epoch": 6.14, "learning_rate": 6.834305512958744e-06, "loss": 0.1262, "step": 7859 }, { "epoch": 6.14, "learning_rate": 6.831903536092507e-06, "loss": 0.0864, "step": 7860 }, { "epoch": 6.14, "learning_rate": 6.829501762384578e-06, "loss": 0.0646, "step": 7861 }, { "epoch": 6.14, "learning_rate": 6.8271001919889726e-06, "loss": 0.0457, "step": 7862 }, { "epoch": 6.15, "learning_rate": 6.824698825059697e-06, "loss": 0.2274, "step": 7863 }, { "epoch": 6.15, "learning_rate": 6.822297661750739e-06, "loss": 0.0988, "step": 7864 }, { "epoch": 6.15, "learning_rate": 6.819896702216082e-06, "loss": 0.1439, "step": 7865 }, { "epoch": 6.15, "learning_rate": 6.817495946609682e-06, "loss": 0.0436, "step": 7866 }, { "epoch": 6.15, "learning_rate": 6.815095395085496e-06, "loss": 0.1162, "step": 7867 }, { "epoch": 6.15, "learning_rate": 6.81269504779746e-06, "loss": 0.1197, "step": 7868 }, { "epoch": 6.15, "learning_rate": 6.810294904899502e-06, "loss": 0.0952, "step": 7869 }, { "epoch": 6.15, "learning_rate": 6.807894966545532e-06, "loss": 0.0642, "step": 7870 }, { "epoch": 6.15, "learning_rate": 6.805495232889454e-06, "loss": 0.2251, "step": 7871 }, { "epoch": 6.15, "learning_rate": 6.8030957040851495e-06, "loss": 0.0699, "step": 7872 }, { "epoch": 6.15, "learning_rate": 6.800696380286491e-06, "loss": 0.1447, "step": 7873 }, { "epoch": 6.15, "learning_rate": 6.798297261647337e-06, "loss": 0.1204, "step": 7874 }, { "epoch": 6.15, "learning_rate": 6.795898348321539e-06, "loss": 0.1274, "step": 7875 }, { "epoch": 6.16, "learning_rate": 6.79349964046293e-06, "loss": 0.1188, "step": 7876 }, { "epoch": 6.16, "learning_rate": 6.791101138225326e-06, "loss": 0.0809, "step": 7877 }, { "epoch": 6.16, "learning_rate": 6.788702841762537e-06, "loss": 0.0877, "step": 7878 }, { "epoch": 6.16, "learning_rate": 6.786304751228354e-06, "loss": 0.1631, "step": 7879 }, { "epoch": 6.16, "learning_rate": 6.783906866776562e-06, "loss": 0.1007, "step": 7880 }, { "epoch": 6.16, "learning_rate": 6.7815091885609265e-06, "loss": 0.0837, "step": 7881 }, { "epoch": 6.16, "learning_rate": 6.779111716735205e-06, "loss": 0.0965, "step": 7882 }, { "epoch": 6.16, "learning_rate": 6.7767144514531295e-06, "loss": 0.1338, "step": 7883 }, { "epoch": 6.16, "learning_rate": 6.774317392868435e-06, "loss": 0.1605, "step": 7884 }, { "epoch": 6.16, "learning_rate": 6.7719205411348334e-06, "loss": 0.0883, "step": 7885 }, { "epoch": 6.16, "learning_rate": 6.769523896406029e-06, "loss": 0.0742, "step": 7886 }, { "epoch": 6.16, "learning_rate": 6.7671274588357075e-06, "loss": 0.106, "step": 7887 }, { "epoch": 6.16, "learning_rate": 6.764731228577546e-06, "loss": 0.0737, "step": 7888 }, { "epoch": 6.17, "learning_rate": 6.762335205785199e-06, "loss": 0.1108, "step": 7889 }, { "epoch": 6.17, "learning_rate": 6.759939390612321e-06, "loss": 0.1313, "step": 7890 }, { "epoch": 6.17, "learning_rate": 6.757543783212544e-06, "loss": 0.139, "step": 7891 }, { "epoch": 6.17, "learning_rate": 6.755148383739492e-06, "loss": 0.119, "step": 7892 }, { "epoch": 6.17, "learning_rate": 6.752753192346768e-06, "loss": 0.1084, "step": 7893 }, { "epoch": 6.17, "learning_rate": 6.75035820918798e-06, "loss": 0.1002, "step": 7894 }, { "epoch": 6.17, "learning_rate": 6.74796343441669e-06, "loss": 0.083, "step": 7895 }, { "epoch": 6.17, "learning_rate": 6.74556886818648e-06, "loss": 0.131, "step": 7896 }, { "epoch": 6.17, "learning_rate": 6.7431745106508975e-06, "loss": 0.0773, "step": 7897 }, { "epoch": 6.17, "learning_rate": 6.740780361963488e-06, "loss": 0.0834, "step": 7898 }, { "epoch": 6.17, "learning_rate": 6.738386422277782e-06, "loss": 0.0802, "step": 7899 }, { "epoch": 6.17, "learning_rate": 6.735992691747284e-06, "loss": 0.0895, "step": 7900 }, { "epoch": 6.18, "learning_rate": 6.733599170525505e-06, "loss": 0.0666, "step": 7901 }, { "epoch": 6.18, "learning_rate": 6.731205858765928e-06, "loss": 0.1239, "step": 7902 }, { "epoch": 6.18, "learning_rate": 6.728812756622027e-06, "loss": 0.058, "step": 7903 }, { "epoch": 6.18, "learning_rate": 6.726419864247265e-06, "loss": 0.111, "step": 7904 }, { "epoch": 6.18, "learning_rate": 6.72402718179509e-06, "loss": 0.0584, "step": 7905 }, { "epoch": 6.18, "learning_rate": 6.721634709418931e-06, "loss": 0.0987, "step": 7906 }, { "epoch": 6.18, "learning_rate": 6.719242447272213e-06, "loss": 0.0943, "step": 7907 }, { "epoch": 6.18, "learning_rate": 6.71685039550834e-06, "loss": 0.0849, "step": 7908 }, { "epoch": 6.18, "learning_rate": 6.714458554280708e-06, "loss": 0.1058, "step": 7909 }, { "epoch": 6.18, "learning_rate": 6.712066923742696e-06, "loss": 0.0952, "step": 7910 }, { "epoch": 6.18, "learning_rate": 6.709675504047672e-06, "loss": 0.1228, "step": 7911 }, { "epoch": 6.18, "learning_rate": 6.707284295348984e-06, "loss": 0.0646, "step": 7912 }, { "epoch": 6.18, "learning_rate": 6.704893297799976e-06, "loss": 0.1435, "step": 7913 }, { "epoch": 6.19, "learning_rate": 6.70250251155397e-06, "loss": 0.1182, "step": 7914 }, { "epoch": 6.19, "learning_rate": 6.7001119367642835e-06, "loss": 0.1516, "step": 7915 }, { "epoch": 6.19, "learning_rate": 6.697721573584213e-06, "loss": 0.1363, "step": 7916 }, { "epoch": 6.19, "learning_rate": 6.695331422167042e-06, "loss": 0.0722, "step": 7917 }, { "epoch": 6.19, "learning_rate": 6.69294148266604e-06, "loss": 0.1122, "step": 7918 }, { "epoch": 6.19, "learning_rate": 6.690551755234471e-06, "loss": 0.0805, "step": 7919 }, { "epoch": 6.19, "learning_rate": 6.6881622400255745e-06, "loss": 0.0727, "step": 7920 }, { "epoch": 6.19, "learning_rate": 6.685772937192584e-06, "loss": 0.0632, "step": 7921 }, { "epoch": 6.19, "learning_rate": 6.683383846888718e-06, "loss": 0.0569, "step": 7922 }, { "epoch": 6.19, "learning_rate": 6.680994969267177e-06, "loss": 0.0934, "step": 7923 }, { "epoch": 6.19, "learning_rate": 6.6786063044811485e-06, "loss": 0.1387, "step": 7924 }, { "epoch": 6.19, "learning_rate": 6.676217852683815e-06, "loss": 0.0391, "step": 7925 }, { "epoch": 6.19, "learning_rate": 6.673829614028333e-06, "loss": 0.0771, "step": 7926 }, { "epoch": 6.2, "learning_rate": 6.671441588667857e-06, "loss": 0.0894, "step": 7927 }, { "epoch": 6.2, "learning_rate": 6.669053776755522e-06, "loss": 0.1468, "step": 7928 }, { "epoch": 6.2, "learning_rate": 6.666666178444443e-06, "loss": 0.0417, "step": 7929 }, { "epoch": 6.2, "learning_rate": 6.664278793887734e-06, "loss": 0.0869, "step": 7930 }, { "epoch": 6.2, "learning_rate": 6.6618916232384875e-06, "loss": 0.0955, "step": 7931 }, { "epoch": 6.2, "learning_rate": 6.659504666649781e-06, "loss": 0.1376, "step": 7932 }, { "epoch": 6.2, "learning_rate": 6.657117924274687e-06, "loss": 0.0787, "step": 7933 }, { "epoch": 6.2, "learning_rate": 6.6547313962662555e-06, "loss": 0.1329, "step": 7934 }, { "epoch": 6.2, "learning_rate": 6.6523450827775225e-06, "loss": 0.1854, "step": 7935 }, { "epoch": 6.2, "learning_rate": 6.649958983961518e-06, "loss": 0.087, "step": 7936 }, { "epoch": 6.2, "learning_rate": 6.647573099971251e-06, "loss": 0.1241, "step": 7937 }, { "epoch": 6.2, "learning_rate": 6.645187430959721e-06, "loss": 0.0669, "step": 7938 }, { "epoch": 6.2, "learning_rate": 6.642801977079916e-06, "loss": 0.1376, "step": 7939 }, { "epoch": 6.21, "learning_rate": 6.640416738484799e-06, "loss": 0.1391, "step": 7940 }, { "epoch": 6.21, "learning_rate": 6.638031715327326e-06, "loss": 0.0401, "step": 7941 }, { "epoch": 6.21, "learning_rate": 6.635646907760447e-06, "loss": 0.1022, "step": 7942 }, { "epoch": 6.21, "learning_rate": 6.6332623159370855e-06, "loss": 0.0873, "step": 7943 }, { "epoch": 6.21, "learning_rate": 6.630877940010159e-06, "loss": 0.1119, "step": 7944 }, { "epoch": 6.21, "learning_rate": 6.62849378013257e-06, "loss": 0.0785, "step": 7945 }, { "epoch": 6.21, "learning_rate": 6.626109836457203e-06, "loss": 0.1583, "step": 7946 }, { "epoch": 6.21, "learning_rate": 6.62372610913693e-06, "loss": 0.1659, "step": 7947 }, { "epoch": 6.21, "learning_rate": 6.621342598324614e-06, "loss": 0.0728, "step": 7948 }, { "epoch": 6.21, "learning_rate": 6.618959304173098e-06, "loss": 0.0645, "step": 7949 }, { "epoch": 6.21, "learning_rate": 6.616576226835219e-06, "loss": 0.0922, "step": 7950 }, { "epoch": 6.21, "learning_rate": 6.61419336646379e-06, "loss": 0.067, "step": 7951 }, { "epoch": 6.21, "learning_rate": 6.611810723211613e-06, "loss": 0.0886, "step": 7952 }, { "epoch": 6.22, "learning_rate": 6.609428297231486e-06, "loss": 0.1442, "step": 7953 }, { "epoch": 6.22, "learning_rate": 6.607046088676179e-06, "loss": 0.2182, "step": 7954 }, { "epoch": 6.22, "learning_rate": 6.604664097698454e-06, "loss": 0.1243, "step": 7955 }, { "epoch": 6.22, "learning_rate": 6.6022823244510655e-06, "loss": 0.116, "step": 7956 }, { "epoch": 6.22, "learning_rate": 6.599900769086742e-06, "loss": 0.0495, "step": 7957 }, { "epoch": 6.22, "learning_rate": 6.5975194317582036e-06, "loss": 0.1116, "step": 7958 }, { "epoch": 6.22, "learning_rate": 6.5951383126181585e-06, "loss": 0.1315, "step": 7959 }, { "epoch": 6.22, "learning_rate": 6.592757411819298e-06, "loss": 0.104, "step": 7960 }, { "epoch": 6.22, "learning_rate": 6.5903767295143025e-06, "loss": 0.134, "step": 7961 }, { "epoch": 6.22, "learning_rate": 6.587996265855839e-06, "loss": 0.0843, "step": 7962 }, { "epoch": 6.22, "learning_rate": 6.585616020996551e-06, "loss": 0.1014, "step": 7963 }, { "epoch": 6.22, "learning_rate": 6.583235995089077e-06, "loss": 0.1895, "step": 7964 }, { "epoch": 6.23, "learning_rate": 6.58085618828604e-06, "loss": 0.039, "step": 7965 }, { "epoch": 6.23, "learning_rate": 6.578476600740048e-06, "loss": 0.1331, "step": 7966 }, { "epoch": 6.23, "learning_rate": 6.576097232603698e-06, "loss": 0.0869, "step": 7967 }, { "epoch": 6.23, "learning_rate": 6.573718084029568e-06, "loss": 0.1353, "step": 7968 }, { "epoch": 6.23, "learning_rate": 6.571339155170222e-06, "loss": 0.1303, "step": 7969 }, { "epoch": 6.23, "learning_rate": 6.568960446178211e-06, "loss": 0.0628, "step": 7970 }, { "epoch": 6.23, "learning_rate": 6.566581957206078e-06, "loss": 0.1419, "step": 7971 }, { "epoch": 6.23, "learning_rate": 6.564203688406341e-06, "loss": 0.1006, "step": 7972 }, { "epoch": 6.23, "learning_rate": 6.5618256399315185e-06, "loss": 0.0835, "step": 7973 }, { "epoch": 6.23, "learning_rate": 6.559447811934096e-06, "loss": 0.1134, "step": 7974 }, { "epoch": 6.23, "learning_rate": 6.55707020456656e-06, "loss": 0.0785, "step": 7975 }, { "epoch": 6.23, "learning_rate": 6.554692817981374e-06, "loss": 0.076, "step": 7976 }, { "epoch": 6.23, "learning_rate": 6.552315652330996e-06, "loss": 0.1075, "step": 7977 }, { "epoch": 6.24, "learning_rate": 6.54993870776786e-06, "loss": 0.0982, "step": 7978 }, { "epoch": 6.24, "learning_rate": 6.547561984444396e-06, "loss": 0.1019, "step": 7979 }, { "epoch": 6.24, "learning_rate": 6.54518548251301e-06, "loss": 0.0689, "step": 7980 }, { "epoch": 6.24, "learning_rate": 6.542809202126099e-06, "loss": 0.0478, "step": 7981 }, { "epoch": 6.24, "learning_rate": 6.540433143436047e-06, "loss": 0.0861, "step": 7982 }, { "epoch": 6.24, "learning_rate": 6.53805730659522e-06, "loss": 0.1033, "step": 7983 }, { "epoch": 6.24, "learning_rate": 6.535681691755971e-06, "loss": 0.0975, "step": 7984 }, { "epoch": 6.24, "learning_rate": 6.533306299070645e-06, "loss": 0.0958, "step": 7985 }, { "epoch": 6.24, "learning_rate": 6.53093112869156e-06, "loss": 0.0948, "step": 7986 }, { "epoch": 6.24, "learning_rate": 6.528556180771028e-06, "loss": 0.0958, "step": 7987 }, { "epoch": 6.24, "learning_rate": 6.52618145546135e-06, "loss": 0.1201, "step": 7988 }, { "epoch": 6.24, "learning_rate": 6.523806952914804e-06, "loss": 0.1066, "step": 7989 }, { "epoch": 6.24, "learning_rate": 6.521432673283666e-06, "loss": 0.106, "step": 7990 }, { "epoch": 6.25, "learning_rate": 6.519058616720175e-06, "loss": 0.0837, "step": 7991 }, { "epoch": 6.25, "learning_rate": 6.516684783376583e-06, "loss": 0.0547, "step": 7992 }, { "epoch": 6.25, "learning_rate": 6.514311173405108e-06, "loss": 0.2007, "step": 7993 }, { "epoch": 6.25, "learning_rate": 6.511937786957967e-06, "loss": 0.1442, "step": 7994 }, { "epoch": 6.25, "learning_rate": 6.509564624187351e-06, "loss": 0.08, "step": 7995 }, { "epoch": 6.25, "learning_rate": 6.5071916852454485e-06, "loss": 0.158, "step": 7996 }, { "epoch": 6.25, "learning_rate": 6.5048189702844235e-06, "loss": 0.0714, "step": 7997 }, { "epoch": 6.25, "learning_rate": 6.502446479456427e-06, "loss": 0.088, "step": 7998 }, { "epoch": 6.25, "learning_rate": 6.500074212913599e-06, "loss": 0.0906, "step": 7999 }, { "epoch": 6.25, "learning_rate": 6.4977021708080666e-06, "loss": 0.1062, "step": 8000 }, { "epoch": 6.25, "learning_rate": 6.495330353291939e-06, "loss": 0.0977, "step": 8001 }, { "epoch": 6.25, "learning_rate": 6.492958760517316e-06, "loss": 0.1311, "step": 8002 }, { "epoch": 6.25, "learning_rate": 6.490587392636272e-06, "loss": 0.0653, "step": 8003 }, { "epoch": 6.26, "learning_rate": 6.488216249800878e-06, "loss": 0.0974, "step": 8004 }, { "epoch": 6.26, "learning_rate": 6.485845332163183e-06, "loss": 0.0797, "step": 8005 }, { "epoch": 6.26, "learning_rate": 6.483474639875231e-06, "loss": 0.123, "step": 8006 }, { "epoch": 6.26, "learning_rate": 6.4811041730890455e-06, "loss": 0.0954, "step": 8007 }, { "epoch": 6.26, "learning_rate": 6.478733931956628e-06, "loss": 0.0999, "step": 8008 }, { "epoch": 6.26, "learning_rate": 6.476363916629981e-06, "loss": 0.118, "step": 8009 }, { "epoch": 6.26, "learning_rate": 6.473994127261078e-06, "loss": 0.1925, "step": 8010 }, { "epoch": 6.26, "learning_rate": 6.4716245640018946e-06, "loss": 0.1004, "step": 8011 }, { "epoch": 6.26, "learning_rate": 6.4692552270043744e-06, "loss": 0.1052, "step": 8012 }, { "epoch": 6.26, "learning_rate": 6.466886116420459e-06, "loss": 0.1341, "step": 8013 }, { "epoch": 6.26, "learning_rate": 6.464517232402063e-06, "loss": 0.2233, "step": 8014 }, { "epoch": 6.26, "learning_rate": 6.462148575101103e-06, "loss": 0.0712, "step": 8015 }, { "epoch": 6.26, "learning_rate": 6.4597801446694656e-06, "loss": 0.1145, "step": 8016 }, { "epoch": 6.27, "learning_rate": 6.457411941259034e-06, "loss": 0.0574, "step": 8017 }, { "epoch": 6.27, "learning_rate": 6.455043965021669e-06, "loss": 0.0833, "step": 8018 }, { "epoch": 6.27, "learning_rate": 6.4526762161092285e-06, "loss": 0.147, "step": 8019 }, { "epoch": 6.27, "learning_rate": 6.450308694673534e-06, "loss": 0.1225, "step": 8020 }, { "epoch": 6.27, "learning_rate": 6.447941400866414e-06, "loss": 0.2337, "step": 8021 }, { "epoch": 6.27, "learning_rate": 6.445574334839672e-06, "loss": 0.0918, "step": 8022 }, { "epoch": 6.27, "learning_rate": 6.443207496745103e-06, "loss": 0.103, "step": 8023 }, { "epoch": 6.27, "learning_rate": 6.440840886734481e-06, "loss": 0.0904, "step": 8024 }, { "epoch": 6.27, "learning_rate": 6.438474504959564e-06, "loss": 0.0879, "step": 8025 }, { "epoch": 6.27, "learning_rate": 6.436108351572106e-06, "loss": 0.0778, "step": 8026 }, { "epoch": 6.27, "learning_rate": 6.4337424267238365e-06, "loss": 0.0834, "step": 8027 }, { "epoch": 6.27, "learning_rate": 6.431376730566472e-06, "loss": 0.0942, "step": 8028 }, { "epoch": 6.28, "learning_rate": 6.429011263251718e-06, "loss": 0.0608, "step": 8029 }, { "epoch": 6.28, "learning_rate": 6.426646024931267e-06, "loss": 0.1079, "step": 8030 }, { "epoch": 6.28, "learning_rate": 6.4242810157567815e-06, "loss": 0.0747, "step": 8031 }, { "epoch": 6.28, "learning_rate": 6.4219162358799326e-06, "loss": 0.0993, "step": 8032 }, { "epoch": 6.28, "learning_rate": 6.4195516854523566e-06, "loss": 0.1065, "step": 8033 }, { "epoch": 6.28, "learning_rate": 6.41718736462569e-06, "loss": 0.1221, "step": 8034 }, { "epoch": 6.28, "learning_rate": 6.414823273551545e-06, "loss": 0.0714, "step": 8035 }, { "epoch": 6.28, "learning_rate": 6.4124594123815246e-06, "loss": 0.0865, "step": 8036 }, { "epoch": 6.28, "learning_rate": 6.410095781267207e-06, "loss": 0.1311, "step": 8037 }, { "epoch": 6.28, "learning_rate": 6.40773238036017e-06, "loss": 0.1553, "step": 8038 }, { "epoch": 6.28, "learning_rate": 6.405369209811967e-06, "loss": 0.1236, "step": 8039 }, { "epoch": 6.28, "learning_rate": 6.40300626977414e-06, "loss": 0.0601, "step": 8040 }, { "epoch": 6.28, "learning_rate": 6.4006435603982145e-06, "loss": 0.1033, "step": 8041 }, { "epoch": 6.29, "learning_rate": 6.398281081835711e-06, "loss": 0.099, "step": 8042 }, { "epoch": 6.29, "learning_rate": 6.395918834238111e-06, "loss": 0.1476, "step": 8043 }, { "epoch": 6.29, "learning_rate": 6.393556817756907e-06, "loss": 0.0686, "step": 8044 }, { "epoch": 6.29, "learning_rate": 6.391195032543564e-06, "loss": 0.1269, "step": 8045 }, { "epoch": 6.29, "learning_rate": 6.3888334787495355e-06, "loss": 0.0474, "step": 8046 }, { "epoch": 6.29, "learning_rate": 6.38647215652626e-06, "loss": 0.1348, "step": 8047 }, { "epoch": 6.29, "learning_rate": 6.384111066025158e-06, "loss": 0.0725, "step": 8048 }, { "epoch": 6.29, "learning_rate": 6.3817502073976345e-06, "loss": 0.0889, "step": 8049 }, { "epoch": 6.29, "learning_rate": 6.37938958079509e-06, "loss": 0.0651, "step": 8050 }, { "epoch": 6.29, "learning_rate": 6.377029186368897e-06, "loss": 0.141, "step": 8051 }, { "epoch": 6.29, "learning_rate": 6.374669024270422e-06, "loss": 0.0548, "step": 8052 }, { "epoch": 6.29, "learning_rate": 6.372309094651014e-06, "loss": 0.0572, "step": 8053 }, { "epoch": 6.29, "learning_rate": 6.369949397662003e-06, "loss": 0.0696, "step": 8054 }, { "epoch": 6.3, "learning_rate": 6.367589933454711e-06, "loss": 0.1354, "step": 8055 }, { "epoch": 6.3, "learning_rate": 6.36523070218044e-06, "loss": 0.0959, "step": 8056 }, { "epoch": 6.3, "learning_rate": 6.362871703990476e-06, "loss": 0.1676, "step": 8057 }, { "epoch": 6.3, "learning_rate": 6.3605129390361005e-06, "loss": 0.1092, "step": 8058 }, { "epoch": 6.3, "learning_rate": 6.358154407468568e-06, "loss": 0.1134, "step": 8059 }, { "epoch": 6.3, "learning_rate": 6.355796109439119e-06, "loss": 0.0908, "step": 8060 }, { "epoch": 6.3, "learning_rate": 6.3534380450989875e-06, "loss": 0.1255, "step": 8061 }, { "epoch": 6.3, "learning_rate": 6.351080214599385e-06, "loss": 0.0902, "step": 8062 }, { "epoch": 6.3, "learning_rate": 6.348722618091512e-06, "loss": 0.0955, "step": 8063 }, { "epoch": 6.3, "learning_rate": 6.3463652557265524e-06, "loss": 0.1228, "step": 8064 }, { "epoch": 6.3, "learning_rate": 6.344008127655674e-06, "loss": 0.096, "step": 8065 }, { "epoch": 6.3, "learning_rate": 6.341651234030027e-06, "loss": 0.1895, "step": 8066 }, { "epoch": 6.3, "learning_rate": 6.339294575000758e-06, "loss": 0.0866, "step": 8067 }, { "epoch": 6.31, "learning_rate": 6.336938150718984e-06, "loss": 0.1841, "step": 8068 }, { "epoch": 6.31, "learning_rate": 6.334581961335819e-06, "loss": 0.1036, "step": 8069 }, { "epoch": 6.31, "learning_rate": 6.332226007002357e-06, "loss": 0.1206, "step": 8070 }, { "epoch": 6.31, "learning_rate": 6.329870287869673e-06, "loss": 0.2543, "step": 8071 }, { "epoch": 6.31, "learning_rate": 6.327514804088828e-06, "loss": 0.1322, "step": 8072 }, { "epoch": 6.31, "learning_rate": 6.325159555810878e-06, "loss": 0.0469, "step": 8073 }, { "epoch": 6.31, "learning_rate": 6.322804543186849e-06, "loss": 0.0694, "step": 8074 }, { "epoch": 6.31, "learning_rate": 6.320449766367766e-06, "loss": 0.0921, "step": 8075 }, { "epoch": 6.31, "learning_rate": 6.3180952255046305e-06, "loss": 0.0605, "step": 8076 }, { "epoch": 6.31, "learning_rate": 6.315740920748424e-06, "loss": 0.0852, "step": 8077 }, { "epoch": 6.31, "learning_rate": 6.313386852250128e-06, "loss": 0.0947, "step": 8078 }, { "epoch": 6.31, "learning_rate": 6.311033020160695e-06, "loss": 0.0852, "step": 8079 }, { "epoch": 6.31, "learning_rate": 6.308679424631067e-06, "loss": 0.0626, "step": 8080 }, { "epoch": 6.32, "learning_rate": 6.3063260658121785e-06, "loss": 0.0698, "step": 8081 }, { "epoch": 6.32, "learning_rate": 6.303972943854933e-06, "loss": 0.1332, "step": 8082 }, { "epoch": 6.32, "learning_rate": 6.3016200589102306e-06, "loss": 0.0851, "step": 8083 }, { "epoch": 6.32, "learning_rate": 6.299267411128955e-06, "loss": 0.0656, "step": 8084 }, { "epoch": 6.32, "learning_rate": 6.2969150006619715e-06, "loss": 0.1044, "step": 8085 }, { "epoch": 6.32, "learning_rate": 6.294562827660129e-06, "loss": 0.0775, "step": 8086 }, { "epoch": 6.32, "learning_rate": 6.292210892274271e-06, "loss": 0.2034, "step": 8087 }, { "epoch": 6.32, "learning_rate": 6.289859194655211e-06, "loss": 0.0616, "step": 8088 }, { "epoch": 6.32, "learning_rate": 6.287507734953755e-06, "loss": 0.1067, "step": 8089 }, { "epoch": 6.32, "learning_rate": 6.285156513320698e-06, "loss": 0.1542, "step": 8090 }, { "epoch": 6.32, "learning_rate": 6.28280552990681e-06, "loss": 0.1563, "step": 8091 }, { "epoch": 6.32, "learning_rate": 6.280454784862857e-06, "loss": 0.2416, "step": 8092 }, { "epoch": 6.33, "learning_rate": 6.27810427833958e-06, "loss": 0.1127, "step": 8093 }, { "epoch": 6.33, "learning_rate": 6.275754010487709e-06, "loss": 0.1133, "step": 8094 }, { "epoch": 6.33, "learning_rate": 6.2734039814579525e-06, "loss": 0.1226, "step": 8095 }, { "epoch": 6.33, "learning_rate": 6.271054191401018e-06, "loss": 0.0632, "step": 8096 }, { "epoch": 6.33, "learning_rate": 6.268704640467582e-06, "loss": 0.0642, "step": 8097 }, { "epoch": 6.33, "learning_rate": 6.266355328808321e-06, "loss": 0.0749, "step": 8098 }, { "epoch": 6.33, "learning_rate": 6.264006256573879e-06, "loss": 0.0887, "step": 8099 }, { "epoch": 6.33, "learning_rate": 6.261657423914897e-06, "loss": 0.0863, "step": 8100 }, { "epoch": 6.33, "learning_rate": 6.259308830981994e-06, "loss": 0.1106, "step": 8101 }, { "epoch": 6.33, "learning_rate": 6.256960477925781e-06, "loss": 0.0767, "step": 8102 }, { "epoch": 6.33, "learning_rate": 6.254612364896845e-06, "loss": 0.139, "step": 8103 }, { "epoch": 6.33, "learning_rate": 6.2522644920457675e-06, "loss": 0.0952, "step": 8104 }, { "epoch": 6.33, "learning_rate": 6.249916859523104e-06, "loss": 0.0559, "step": 8105 }, { "epoch": 6.34, "learning_rate": 6.2475694674794e-06, "loss": 0.1817, "step": 8106 }, { "epoch": 6.34, "learning_rate": 6.245222316065185e-06, "loss": 0.1867, "step": 8107 }, { "epoch": 6.34, "learning_rate": 6.242875405430977e-06, "loss": 0.0834, "step": 8108 }, { "epoch": 6.34, "learning_rate": 6.240528735727267e-06, "loss": 0.0914, "step": 8109 }, { "epoch": 6.34, "learning_rate": 6.23818230710455e-06, "loss": 0.203, "step": 8110 }, { "epoch": 6.34, "learning_rate": 6.235836119713283e-06, "loss": 0.1138, "step": 8111 }, { "epoch": 6.34, "learning_rate": 6.23349017370392e-06, "loss": 0.0889, "step": 8112 }, { "epoch": 6.34, "learning_rate": 6.231144469226902e-06, "loss": 0.0672, "step": 8113 }, { "epoch": 6.34, "learning_rate": 6.228799006432646e-06, "loss": 0.0757, "step": 8114 }, { "epoch": 6.34, "learning_rate": 6.226453785471568e-06, "loss": 0.1145, "step": 8115 }, { "epoch": 6.34, "learning_rate": 6.224108806494042e-06, "loss": 0.1562, "step": 8116 }, { "epoch": 6.34, "learning_rate": 6.2217640696504555e-06, "loss": 0.0826, "step": 8117 }, { "epoch": 6.34, "learning_rate": 6.219419575091162e-06, "loss": 0.1018, "step": 8118 }, { "epoch": 6.35, "learning_rate": 6.217075322966506e-06, "loss": 0.0799, "step": 8119 }, { "epoch": 6.35, "learning_rate": 6.214731313426816e-06, "loss": 0.1158, "step": 8120 }, { "epoch": 6.35, "learning_rate": 6.212387546622411e-06, "loss": 0.0746, "step": 8121 }, { "epoch": 6.35, "learning_rate": 6.210044022703578e-06, "loss": 0.237, "step": 8122 }, { "epoch": 6.35, "learning_rate": 6.207700741820604e-06, "loss": 0.0703, "step": 8123 }, { "epoch": 6.35, "learning_rate": 6.205357704123751e-06, "loss": 0.1419, "step": 8124 }, { "epoch": 6.35, "learning_rate": 6.203014909763275e-06, "loss": 0.096, "step": 8125 }, { "epoch": 6.35, "learning_rate": 6.2006723588894054e-06, "loss": 0.0438, "step": 8126 }, { "epoch": 6.35, "learning_rate": 6.198330051652368e-06, "loss": 0.1714, "step": 8127 }, { "epoch": 6.35, "learning_rate": 6.195987988202361e-06, "loss": 0.1159, "step": 8128 }, { "epoch": 6.35, "learning_rate": 6.193646168689572e-06, "loss": 0.0601, "step": 8129 }, { "epoch": 6.35, "learning_rate": 6.191304593264174e-06, "loss": 0.1173, "step": 8130 }, { "epoch": 6.35, "learning_rate": 6.188963262076324e-06, "loss": 0.0688, "step": 8131 }, { "epoch": 6.36, "learning_rate": 6.186622175276162e-06, "loss": 0.1562, "step": 8132 }, { "epoch": 6.36, "learning_rate": 6.1842813330138195e-06, "loss": 0.0957, "step": 8133 }, { "epoch": 6.36, "learning_rate": 6.181940735439398e-06, "loss": 0.0703, "step": 8134 }, { "epoch": 6.36, "learning_rate": 6.1796003827029924e-06, "loss": 0.1347, "step": 8135 }, { "epoch": 6.36, "learning_rate": 6.177260274954685e-06, "loss": 0.1219, "step": 8136 }, { "epoch": 6.36, "learning_rate": 6.1749204123445365e-06, "loss": 0.0881, "step": 8137 }, { "epoch": 6.36, "learning_rate": 6.172580795022594e-06, "loss": 0.0945, "step": 8138 }, { "epoch": 6.36, "learning_rate": 6.170241423138883e-06, "loss": 0.1064, "step": 8139 }, { "epoch": 6.36, "learning_rate": 6.167902296843427e-06, "loss": 0.1018, "step": 8140 }, { "epoch": 6.36, "learning_rate": 6.1655634162862185e-06, "loss": 0.0693, "step": 8141 }, { "epoch": 6.36, "learning_rate": 6.163224781617248e-06, "loss": 0.0644, "step": 8142 }, { "epoch": 6.36, "learning_rate": 6.160886392986477e-06, "loss": 0.1746, "step": 8143 }, { "epoch": 6.36, "learning_rate": 6.158548250543868e-06, "loss": 0.1121, "step": 8144 }, { "epoch": 6.37, "learning_rate": 6.156210354439343e-06, "loss": 0.1331, "step": 8145 }, { "epoch": 6.37, "learning_rate": 6.153872704822832e-06, "loss": 0.1078, "step": 8146 }, { "epoch": 6.37, "learning_rate": 6.1515353018442356e-06, "loss": 0.1193, "step": 8147 }, { "epoch": 6.37, "learning_rate": 6.149198145653448e-06, "loss": 0.1341, "step": 8148 }, { "epoch": 6.37, "learning_rate": 6.1468612364003365e-06, "loss": 0.1029, "step": 8149 }, { "epoch": 6.37, "learning_rate": 6.144524574234767e-06, "loss": 0.0899, "step": 8150 }, { "epoch": 6.37, "learning_rate": 6.142188159306571e-06, "loss": 0.1127, "step": 8151 }, { "epoch": 6.37, "learning_rate": 6.13985199176558e-06, "loss": 0.0969, "step": 8152 }, { "epoch": 6.37, "learning_rate": 6.137516071761599e-06, "loss": 0.0635, "step": 8153 }, { "epoch": 6.37, "learning_rate": 6.135180399444427e-06, "loss": 0.1947, "step": 8154 }, { "epoch": 6.37, "learning_rate": 6.132844974963843e-06, "loss": 0.2136, "step": 8155 }, { "epoch": 6.37, "learning_rate": 6.130509798469601e-06, "loss": 0.0796, "step": 8156 }, { "epoch": 6.38, "learning_rate": 6.1281748701114566e-06, "loss": 0.044, "step": 8157 }, { "epoch": 6.38, "learning_rate": 6.125840190039134e-06, "loss": 0.0914, "step": 8158 }, { "epoch": 6.38, "learning_rate": 6.123505758402348e-06, "loss": 0.1554, "step": 8159 }, { "epoch": 6.38, "learning_rate": 6.121171575350802e-06, "loss": 0.0923, "step": 8160 }, { "epoch": 6.38, "learning_rate": 6.118837641034176e-06, "loss": 0.1902, "step": 8161 }, { "epoch": 6.38, "learning_rate": 6.116503955602132e-06, "loss": 0.0889, "step": 8162 }, { "epoch": 6.38, "learning_rate": 6.114170519204327e-06, "loss": 0.0554, "step": 8163 }, { "epoch": 6.38, "learning_rate": 6.11183733199039e-06, "loss": 0.1324, "step": 8164 }, { "epoch": 6.38, "learning_rate": 6.109504394109947e-06, "loss": 0.0879, "step": 8165 }, { "epoch": 6.38, "learning_rate": 6.1071717057125935e-06, "loss": 0.0958, "step": 8166 }, { "epoch": 6.38, "learning_rate": 6.104839266947922e-06, "loss": 0.1963, "step": 8167 }, { "epoch": 6.38, "learning_rate": 6.102507077965496e-06, "loss": 0.1081, "step": 8168 }, { "epoch": 6.38, "learning_rate": 6.100175138914877e-06, "loss": 0.1191, "step": 8169 }, { "epoch": 6.39, "learning_rate": 6.097843449945599e-06, "loss": 0.1, "step": 8170 }, { "epoch": 6.39, "learning_rate": 6.095512011207188e-06, "loss": 0.1146, "step": 8171 }, { "epoch": 6.39, "learning_rate": 6.093180822849152e-06, "loss": 0.09, "step": 8172 }, { "epoch": 6.39, "learning_rate": 6.090849885020975e-06, "loss": 0.1696, "step": 8173 }, { "epoch": 6.39, "learning_rate": 6.088519197872133e-06, "loss": 0.0999, "step": 8174 }, { "epoch": 6.39, "learning_rate": 6.086188761552091e-06, "loss": 0.0856, "step": 8175 }, { "epoch": 6.39, "learning_rate": 6.083858576210283e-06, "loss": 0.1344, "step": 8176 }, { "epoch": 6.39, "learning_rate": 6.081528641996141e-06, "loss": 0.1539, "step": 8177 }, { "epoch": 6.39, "learning_rate": 6.079198959059075e-06, "loss": 0.1281, "step": 8178 }, { "epoch": 6.39, "learning_rate": 6.076869527548473e-06, "loss": 0.0725, "step": 8179 }, { "epoch": 6.39, "learning_rate": 6.074540347613718e-06, "loss": 0.1303, "step": 8180 }, { "epoch": 6.39, "learning_rate": 6.07221141940417e-06, "loss": 0.1313, "step": 8181 }, { "epoch": 6.39, "learning_rate": 6.0698827430691745e-06, "loss": 0.0748, "step": 8182 }, { "epoch": 6.4, "learning_rate": 6.067554318758062e-06, "loss": 0.1799, "step": 8183 }, { "epoch": 6.4, "learning_rate": 6.0652261466201464e-06, "loss": 0.0765, "step": 8184 }, { "epoch": 6.4, "learning_rate": 6.062898226804721e-06, "loss": 0.1406, "step": 8185 }, { "epoch": 6.4, "learning_rate": 6.060570559461071e-06, "loss": 0.1114, "step": 8186 }, { "epoch": 6.4, "learning_rate": 6.058243144738456e-06, "loss": 0.0852, "step": 8187 }, { "epoch": 6.4, "learning_rate": 6.055915982786132e-06, "loss": 0.0782, "step": 8188 }, { "epoch": 6.4, "learning_rate": 6.053589073753325e-06, "loss": 0.0524, "step": 8189 }, { "epoch": 6.4, "learning_rate": 6.051262417789255e-06, "loss": 0.1591, "step": 8190 }, { "epoch": 6.4, "learning_rate": 6.048936015043116e-06, "loss": 0.1354, "step": 8191 }, { "epoch": 6.4, "learning_rate": 6.046609865664097e-06, "loss": 0.1252, "step": 8192 }, { "epoch": 6.4, "learning_rate": 6.044283969801363e-06, "loss": 0.1052, "step": 8193 }, { "epoch": 6.4, "learning_rate": 6.0419583276040675e-06, "loss": 0.0852, "step": 8194 }, { "epoch": 6.4, "learning_rate": 6.039632939221346e-06, "loss": 0.1161, "step": 8195 }, { "epoch": 6.41, "learning_rate": 6.037307804802313e-06, "loss": 0.0971, "step": 8196 }, { "epoch": 6.41, "learning_rate": 6.034982924496069e-06, "loss": 0.1672, "step": 8197 }, { "epoch": 6.41, "learning_rate": 6.032658298451708e-06, "loss": 0.1113, "step": 8198 }, { "epoch": 6.41, "learning_rate": 6.0303339268182916e-06, "loss": 0.0631, "step": 8199 }, { "epoch": 6.41, "learning_rate": 6.028009809744879e-06, "loss": 0.2085, "step": 8200 }, { "epoch": 6.41, "learning_rate": 6.025685947380506e-06, "loss": 0.1512, "step": 8201 }, { "epoch": 6.41, "learning_rate": 6.023362339874193e-06, "loss": 0.1319, "step": 8202 }, { "epoch": 6.41, "learning_rate": 6.02103898737494e-06, "loss": 0.0973, "step": 8203 }, { "epoch": 6.41, "learning_rate": 6.018715890031741e-06, "loss": 0.0618, "step": 8204 }, { "epoch": 6.41, "learning_rate": 6.0163930479935624e-06, "loss": 0.0907, "step": 8205 }, { "epoch": 6.41, "learning_rate": 6.014070461409365e-06, "loss": 0.0929, "step": 8206 }, { "epoch": 6.41, "learning_rate": 6.011748130428088e-06, "loss": 0.1616, "step": 8207 }, { "epoch": 6.42, "learning_rate": 6.009426055198646e-06, "loss": 0.1767, "step": 8208 }, { "epoch": 6.42, "learning_rate": 6.007104235869952e-06, "loss": 0.1197, "step": 8209 }, { "epoch": 6.42, "learning_rate": 6.004782672590894e-06, "loss": 0.0869, "step": 8210 }, { "epoch": 6.42, "learning_rate": 6.002461365510343e-06, "loss": 0.1605, "step": 8211 }, { "epoch": 6.42, "learning_rate": 6.000140314777163e-06, "loss": 0.1597, "step": 8212 }, { "epoch": 6.42, "learning_rate": 5.997819520540186e-06, "loss": 0.128, "step": 8213 }, { "epoch": 6.42, "learning_rate": 5.995498982948237e-06, "loss": 0.0846, "step": 8214 }, { "epoch": 6.42, "learning_rate": 5.993178702150129e-06, "loss": 0.1064, "step": 8215 }, { "epoch": 6.42, "learning_rate": 5.990858678294645e-06, "loss": 0.1275, "step": 8216 }, { "epoch": 6.42, "learning_rate": 5.9885389115305685e-06, "loss": 0.1454, "step": 8217 }, { "epoch": 6.42, "learning_rate": 5.9862194020066545e-06, "loss": 0.1118, "step": 8218 }, { "epoch": 6.42, "learning_rate": 5.983900149871642e-06, "loss": 0.0971, "step": 8219 }, { "epoch": 6.42, "learning_rate": 5.981581155274256e-06, "loss": 0.0765, "step": 8220 }, { "epoch": 6.43, "learning_rate": 5.979262418363207e-06, "loss": 0.1058, "step": 8221 }, { "epoch": 6.43, "learning_rate": 5.976943939287185e-06, "loss": 0.2584, "step": 8222 }, { "epoch": 6.43, "learning_rate": 5.97462571819487e-06, "loss": 0.1098, "step": 8223 }, { "epoch": 6.43, "learning_rate": 5.9723077552349185e-06, "loss": 0.1528, "step": 8224 }, { "epoch": 6.43, "learning_rate": 5.969990050555972e-06, "loss": 0.1645, "step": 8225 }, { "epoch": 6.43, "learning_rate": 5.967672604306654e-06, "loss": 0.0662, "step": 8226 }, { "epoch": 6.43, "learning_rate": 5.965355416635577e-06, "loss": 0.1244, "step": 8227 }, { "epoch": 6.43, "learning_rate": 5.963038487691333e-06, "loss": 0.0817, "step": 8228 }, { "epoch": 6.43, "learning_rate": 5.960721817622501e-06, "loss": 0.0629, "step": 8229 }, { "epoch": 6.43, "learning_rate": 5.958405406577636e-06, "loss": 0.1138, "step": 8230 }, { "epoch": 6.43, "learning_rate": 5.956089254705279e-06, "loss": 0.1624, "step": 8231 }, { "epoch": 6.43, "learning_rate": 5.953773362153965e-06, "loss": 0.1323, "step": 8232 }, { "epoch": 6.43, "learning_rate": 5.951457729072197e-06, "loss": 0.1293, "step": 8233 }, { "epoch": 6.44, "learning_rate": 5.949142355608466e-06, "loss": 0.0964, "step": 8234 }, { "epoch": 6.44, "learning_rate": 5.946827241911257e-06, "loss": 0.0746, "step": 8235 }, { "epoch": 6.44, "learning_rate": 5.944512388129023e-06, "loss": 0.0944, "step": 8236 }, { "epoch": 6.44, "learning_rate": 5.942197794410205e-06, "loss": 0.062, "step": 8237 }, { "epoch": 6.44, "learning_rate": 5.9398834609032355e-06, "loss": 0.1425, "step": 8238 }, { "epoch": 6.44, "learning_rate": 5.937569387756521e-06, "loss": 0.1101, "step": 8239 }, { "epoch": 6.44, "learning_rate": 5.935255575118453e-06, "loss": 0.1028, "step": 8240 }, { "epoch": 6.44, "learning_rate": 5.932942023137414e-06, "loss": 0.0936, "step": 8241 }, { "epoch": 6.44, "learning_rate": 5.930628731961757e-06, "loss": 0.099, "step": 8242 }, { "epoch": 6.44, "learning_rate": 5.928315701739825e-06, "loss": 0.0972, "step": 8243 }, { "epoch": 6.44, "learning_rate": 5.926002932619947e-06, "loss": 0.0741, "step": 8244 }, { "epoch": 6.44, "learning_rate": 5.923690424750431e-06, "loss": 0.1494, "step": 8245 }, { "epoch": 6.44, "learning_rate": 5.921378178279575e-06, "loss": 0.0496, "step": 8246 }, { "epoch": 6.45, "learning_rate": 5.919066193355645e-06, "loss": 0.1195, "step": 8247 }, { "epoch": 6.45, "learning_rate": 5.916754470126904e-06, "loss": 0.0822, "step": 8248 }, { "epoch": 6.45, "learning_rate": 5.914443008741595e-06, "loss": 0.1199, "step": 8249 }, { "epoch": 6.45, "learning_rate": 5.912131809347945e-06, "loss": 0.0537, "step": 8250 }, { "epoch": 6.45, "learning_rate": 5.90982087209416e-06, "loss": 0.142, "step": 8251 }, { "epoch": 6.45, "learning_rate": 5.907510197128437e-06, "loss": 0.1284, "step": 8252 }, { "epoch": 6.45, "learning_rate": 5.905199784598943e-06, "loss": 0.0626, "step": 8253 }, { "epoch": 6.45, "learning_rate": 5.902889634653842e-06, "loss": 0.0843, "step": 8254 }, { "epoch": 6.45, "learning_rate": 5.9005797474412705e-06, "loss": 0.1285, "step": 8255 }, { "epoch": 6.45, "learning_rate": 5.89827012310936e-06, "loss": 0.0891, "step": 8256 }, { "epoch": 6.45, "learning_rate": 5.895960761806211e-06, "loss": 0.1222, "step": 8257 }, { "epoch": 6.45, "learning_rate": 5.893651663679922e-06, "loss": 0.1397, "step": 8258 }, { "epoch": 6.45, "learning_rate": 5.89134282887856e-06, "loss": 0.0822, "step": 8259 }, { "epoch": 6.46, "learning_rate": 5.889034257550183e-06, "loss": 0.0851, "step": 8260 }, { "epoch": 6.46, "learning_rate": 5.886725949842835e-06, "loss": 0.1188, "step": 8261 }, { "epoch": 6.46, "learning_rate": 5.8844179059045355e-06, "loss": 0.1343, "step": 8262 }, { "epoch": 6.46, "learning_rate": 5.8821101258832955e-06, "loss": 0.1012, "step": 8263 }, { "epoch": 6.46, "learning_rate": 5.879802609927099e-06, "loss": 0.1416, "step": 8264 }, { "epoch": 6.46, "learning_rate": 5.87749535818392e-06, "loss": 0.1087, "step": 8265 }, { "epoch": 6.46, "learning_rate": 5.875188370801713e-06, "loss": 0.2625, "step": 8266 }, { "epoch": 6.46, "learning_rate": 5.872881647928421e-06, "loss": 0.1174, "step": 8267 }, { "epoch": 6.46, "learning_rate": 5.87057518971196e-06, "loss": 0.066, "step": 8268 }, { "epoch": 6.46, "learning_rate": 5.8682689963002435e-06, "loss": 0.079, "step": 8269 }, { "epoch": 6.46, "learning_rate": 5.865963067841148e-06, "loss": 0.0681, "step": 8270 }, { "epoch": 6.46, "learning_rate": 5.863657404482551e-06, "loss": 0.0709, "step": 8271 }, { "epoch": 6.47, "learning_rate": 5.861352006372303e-06, "loss": 0.1167, "step": 8272 }, { "epoch": 6.47, "learning_rate": 5.859046873658243e-06, "loss": 0.171, "step": 8273 }, { "epoch": 6.47, "learning_rate": 5.856742006488188e-06, "loss": 0.0567, "step": 8274 }, { "epoch": 6.47, "learning_rate": 5.854437405009947e-06, "loss": 0.1599, "step": 8275 }, { "epoch": 6.47, "learning_rate": 5.8521330693712976e-06, "loss": 0.0843, "step": 8276 }, { "epoch": 6.47, "learning_rate": 5.849828999720012e-06, "loss": 0.1457, "step": 8277 }, { "epoch": 6.47, "learning_rate": 5.847525196203839e-06, "loss": 0.1794, "step": 8278 }, { "epoch": 6.47, "learning_rate": 5.845221658970519e-06, "loss": 0.072, "step": 8279 }, { "epoch": 6.47, "learning_rate": 5.842918388167762e-06, "loss": 0.1523, "step": 8280 }, { "epoch": 6.47, "learning_rate": 5.840615383943275e-06, "loss": 0.0812, "step": 8281 }, { "epoch": 6.47, "learning_rate": 5.838312646444737e-06, "loss": 0.0654, "step": 8282 }, { "epoch": 6.47, "learning_rate": 5.836010175819815e-06, "loss": 0.1032, "step": 8283 }, { "epoch": 6.47, "learning_rate": 5.833707972216158e-06, "loss": 0.0936, "step": 8284 }, { "epoch": 6.48, "learning_rate": 5.831406035781397e-06, "loss": 0.0945, "step": 8285 }, { "epoch": 6.48, "learning_rate": 5.8291043666631495e-06, "loss": 0.0699, "step": 8286 }, { "epoch": 6.48, "learning_rate": 5.826802965009011e-06, "loss": 0.1613, "step": 8287 }, { "epoch": 6.48, "learning_rate": 5.82450183096656e-06, "loss": 0.1394, "step": 8288 }, { "epoch": 6.48, "learning_rate": 5.822200964683362e-06, "loss": 0.0761, "step": 8289 }, { "epoch": 6.48, "learning_rate": 5.819900366306959e-06, "loss": 0.1719, "step": 8290 }, { "epoch": 6.48, "learning_rate": 5.817600035984887e-06, "loss": 0.1161, "step": 8291 }, { "epoch": 6.48, "learning_rate": 5.815299973864659e-06, "loss": 0.0714, "step": 8292 }, { "epoch": 6.48, "learning_rate": 5.81300018009376e-06, "loss": 0.0848, "step": 8293 }, { "epoch": 6.48, "learning_rate": 5.810700654819668e-06, "loss": 0.0829, "step": 8294 }, { "epoch": 6.48, "learning_rate": 5.808401398189849e-06, "loss": 0.0884, "step": 8295 }, { "epoch": 6.48, "learning_rate": 5.806102410351746e-06, "loss": 0.0556, "step": 8296 }, { "epoch": 6.48, "learning_rate": 5.803803691452782e-06, "loss": 0.1229, "step": 8297 }, { "epoch": 6.49, "learning_rate": 5.801505241640366e-06, "loss": 0.1486, "step": 8298 }, { "epoch": 6.49, "learning_rate": 5.799207061061888e-06, "loss": 0.0785, "step": 8299 }, { "epoch": 6.49, "learning_rate": 5.796909149864722e-06, "loss": 0.0761, "step": 8300 }, { "epoch": 6.49, "learning_rate": 5.794611508196226e-06, "loss": 0.078, "step": 8301 }, { "epoch": 6.49, "learning_rate": 5.7923141362037335e-06, "loss": 0.107, "step": 8302 }, { "epoch": 6.49, "learning_rate": 5.79001703403458e-06, "loss": 0.1235, "step": 8303 }, { "epoch": 6.49, "learning_rate": 5.7877202018360575e-06, "loss": 0.0794, "step": 8304 }, { "epoch": 6.49, "learning_rate": 5.785423639755459e-06, "loss": 0.0391, "step": 8305 }, { "epoch": 6.49, "learning_rate": 5.783127347940047e-06, "loss": 0.1346, "step": 8306 }, { "epoch": 6.49, "learning_rate": 5.780831326537084e-06, "loss": 0.1278, "step": 8307 }, { "epoch": 6.49, "learning_rate": 5.778535575693802e-06, "loss": 0.1212, "step": 8308 }, { "epoch": 6.49, "learning_rate": 5.776240095557418e-06, "loss": 0.0842, "step": 8309 }, { "epoch": 6.49, "learning_rate": 5.7739448862751335e-06, "loss": 0.1034, "step": 8310 }, { "epoch": 6.5, "learning_rate": 5.771649947994131e-06, "loss": 0.0515, "step": 8311 }, { "epoch": 6.5, "learning_rate": 5.769355280861577e-06, "loss": 0.1876, "step": 8312 }, { "epoch": 6.5, "learning_rate": 5.76706088502462e-06, "loss": 0.1419, "step": 8313 }, { "epoch": 6.5, "learning_rate": 5.764766760630387e-06, "loss": 0.0449, "step": 8314 }, { "epoch": 6.5, "learning_rate": 5.762472907826004e-06, "loss": 0.0579, "step": 8315 }, { "epoch": 6.5, "learning_rate": 5.7601793267585545e-06, "loss": 0.1507, "step": 8316 }, { "epoch": 6.5, "learning_rate": 5.7578860175751175e-06, "loss": 0.0899, "step": 8317 }, { "epoch": 6.5, "learning_rate": 5.7555929804227626e-06, "loss": 0.1875, "step": 8318 }, { "epoch": 6.5, "learning_rate": 5.7533002154485294e-06, "loss": 0.0901, "step": 8319 }, { "epoch": 6.5, "learning_rate": 5.751007722799451e-06, "loss": 0.127, "step": 8320 }, { "epoch": 6.5, "learning_rate": 5.748715502622522e-06, "loss": 0.0973, "step": 8321 }, { "epoch": 6.5, "learning_rate": 5.746423555064744e-06, "loss": 0.171, "step": 8322 }, { "epoch": 6.5, "learning_rate": 5.744131880273091e-06, "loss": 0.1305, "step": 8323 }, { "epoch": 6.51, "learning_rate": 5.7418404783945195e-06, "loss": 0.1062, "step": 8324 }, { "epoch": 6.51, "learning_rate": 5.739549349575961e-06, "loss": 0.0447, "step": 8325 }, { "epoch": 6.51, "learning_rate": 5.737258493964353e-06, "loss": 0.1392, "step": 8326 }, { "epoch": 6.51, "learning_rate": 5.734967911706586e-06, "loss": 0.1162, "step": 8327 }, { "epoch": 6.51, "learning_rate": 5.73267760294955e-06, "loss": 0.1452, "step": 8328 }, { "epoch": 6.51, "learning_rate": 5.730387567840112e-06, "loss": 0.1741, "step": 8329 }, { "epoch": 6.51, "learning_rate": 5.728097806525128e-06, "loss": 0.2046, "step": 8330 }, { "epoch": 6.51, "learning_rate": 5.725808319151431e-06, "loss": 0.1629, "step": 8331 }, { "epoch": 6.51, "learning_rate": 5.723519105865837e-06, "loss": 0.0847, "step": 8332 }, { "epoch": 6.51, "learning_rate": 5.721230166815144e-06, "loss": 0.0511, "step": 8333 }, { "epoch": 6.51, "learning_rate": 5.718941502146132e-06, "loss": 0.1469, "step": 8334 }, { "epoch": 6.51, "learning_rate": 5.7166531120055675e-06, "loss": 0.1466, "step": 8335 }, { "epoch": 6.52, "learning_rate": 5.714364996540194e-06, "loss": 0.1446, "step": 8336 }, { "epoch": 6.52, "learning_rate": 5.71207715589674e-06, "loss": 0.1513, "step": 8337 }, { "epoch": 6.52, "learning_rate": 5.709789590221919e-06, "loss": 0.1385, "step": 8338 }, { "epoch": 6.52, "learning_rate": 5.707502299662422e-06, "loss": 0.1004, "step": 8339 }, { "epoch": 6.52, "learning_rate": 5.70521528436492e-06, "loss": 0.0862, "step": 8340 }, { "epoch": 6.52, "learning_rate": 5.702928544476079e-06, "loss": 0.0587, "step": 8341 }, { "epoch": 6.52, "learning_rate": 5.700642080142538e-06, "loss": 0.1571, "step": 8342 }, { "epoch": 6.52, "learning_rate": 5.698355891510918e-06, "loss": 0.1066, "step": 8343 }, { "epoch": 6.52, "learning_rate": 5.696069978727817e-06, "loss": 0.052, "step": 8344 }, { "epoch": 6.52, "learning_rate": 5.693784341939832e-06, "loss": 0.1634, "step": 8345 }, { "epoch": 6.52, "learning_rate": 5.691498981293528e-06, "loss": 0.0593, "step": 8346 }, { "epoch": 6.52, "learning_rate": 5.689213896935458e-06, "loss": 0.112, "step": 8347 }, { "epoch": 6.52, "learning_rate": 5.686929089012151e-06, "loss": 0.1307, "step": 8348 }, { "epoch": 6.53, "learning_rate": 5.6846445576701355e-06, "loss": 0.1349, "step": 8349 }, { "epoch": 6.53, "learning_rate": 5.682360303055898e-06, "loss": 0.1076, "step": 8350 }, { "epoch": 6.53, "learning_rate": 5.680076325315924e-06, "loss": 0.1002, "step": 8351 }, { "epoch": 6.53, "learning_rate": 5.677792624596672e-06, "loss": 0.1106, "step": 8352 }, { "epoch": 6.53, "learning_rate": 5.6755092010445945e-06, "loss": 0.1, "step": 8353 }, { "epoch": 6.53, "learning_rate": 5.673226054806115e-06, "loss": 0.086, "step": 8354 }, { "epoch": 6.53, "learning_rate": 5.670943186027646e-06, "loss": 0.1623, "step": 8355 }, { "epoch": 6.53, "learning_rate": 5.668660594855577e-06, "loss": 0.076, "step": 8356 }, { "epoch": 6.53, "learning_rate": 5.666378281436282e-06, "loss": 0.1006, "step": 8357 }, { "epoch": 6.53, "learning_rate": 5.664096245916117e-06, "loss": 0.1006, "step": 8358 }, { "epoch": 6.53, "learning_rate": 5.661814488441424e-06, "loss": 0.1036, "step": 8359 }, { "epoch": 6.53, "learning_rate": 5.659533009158518e-06, "loss": 0.1537, "step": 8360 }, { "epoch": 6.53, "learning_rate": 5.657251808213706e-06, "loss": 0.1257, "step": 8361 }, { "epoch": 6.54, "learning_rate": 5.6549708857532726e-06, "loss": 0.0725, "step": 8362 }, { "epoch": 6.54, "learning_rate": 5.65269024192348e-06, "loss": 0.1038, "step": 8363 }, { "epoch": 6.54, "learning_rate": 5.650409876870586e-06, "loss": 0.0723, "step": 8364 }, { "epoch": 6.54, "learning_rate": 5.648129790740816e-06, "loss": 0.0788, "step": 8365 }, { "epoch": 6.54, "learning_rate": 5.64584998368039e-06, "loss": 0.0762, "step": 8366 }, { "epoch": 6.54, "learning_rate": 5.643570455835491e-06, "loss": 0.0811, "step": 8367 }, { "epoch": 6.54, "learning_rate": 5.641291207352309e-06, "loss": 0.1823, "step": 8368 }, { "epoch": 6.54, "learning_rate": 5.639012238376998e-06, "loss": 0.2661, "step": 8369 }, { "epoch": 6.54, "learning_rate": 5.636733549055699e-06, "loss": 0.1068, "step": 8370 }, { "epoch": 6.54, "learning_rate": 5.634455139534537e-06, "loss": 0.0531, "step": 8371 }, { "epoch": 6.54, "learning_rate": 5.6321770099596265e-06, "loss": 0.2078, "step": 8372 }, { "epoch": 6.54, "learning_rate": 5.629899160477043e-06, "loss": 0.0966, "step": 8373 }, { "epoch": 6.54, "learning_rate": 5.627621591232861e-06, "loss": 0.1352, "step": 8374 }, { "epoch": 6.55, "learning_rate": 5.62534430237313e-06, "loss": 0.1293, "step": 8375 }, { "epoch": 6.55, "learning_rate": 5.623067294043891e-06, "loss": 0.1556, "step": 8376 }, { "epoch": 6.55, "learning_rate": 5.620790566391161e-06, "loss": 0.0854, "step": 8377 }, { "epoch": 6.55, "learning_rate": 5.618514119560929e-06, "loss": 0.1305, "step": 8378 }, { "epoch": 6.55, "learning_rate": 5.616237953699175e-06, "loss": 0.1326, "step": 8379 }, { "epoch": 6.55, "learning_rate": 5.613962068951869e-06, "loss": 0.0931, "step": 8380 }, { "epoch": 6.55, "learning_rate": 5.611686465464953e-06, "loss": 0.1104, "step": 8381 }, { "epoch": 6.55, "learning_rate": 5.609411143384351e-06, "loss": 0.1157, "step": 8382 }, { "epoch": 6.55, "learning_rate": 5.6071361028559725e-06, "loss": 0.0718, "step": 8383 }, { "epoch": 6.55, "learning_rate": 5.604861344025705e-06, "loss": 0.1203, "step": 8384 }, { "epoch": 6.55, "learning_rate": 5.602586867039424e-06, "loss": 0.0713, "step": 8385 }, { "epoch": 6.55, "learning_rate": 5.60031267204298e-06, "loss": 0.0818, "step": 8386 }, { "epoch": 6.55, "learning_rate": 5.598038759182206e-06, "loss": 0.1056, "step": 8387 }, { "epoch": 6.56, "learning_rate": 5.595765128602928e-06, "loss": 0.0686, "step": 8388 }, { "epoch": 6.56, "learning_rate": 5.593491780450946e-06, "loss": 0.0608, "step": 8389 }, { "epoch": 6.56, "learning_rate": 5.591218714872028e-06, "loss": 0.1022, "step": 8390 }, { "epoch": 6.56, "learning_rate": 5.588945932011948e-06, "loss": 0.1257, "step": 8391 }, { "epoch": 6.56, "learning_rate": 5.5866734320164515e-06, "loss": 0.1773, "step": 8392 }, { "epoch": 6.56, "learning_rate": 5.584401215031263e-06, "loss": 0.0922, "step": 8393 }, { "epoch": 6.56, "learning_rate": 5.58212928120209e-06, "loss": 0.1102, "step": 8394 }, { "epoch": 6.56, "learning_rate": 5.579857630674626e-06, "loss": 0.0994, "step": 8395 }, { "epoch": 6.56, "learning_rate": 5.577586263594541e-06, "loss": 0.1197, "step": 8396 }, { "epoch": 6.56, "learning_rate": 5.57531518010749e-06, "loss": 0.0905, "step": 8397 }, { "epoch": 6.56, "learning_rate": 5.5730443803591076e-06, "loss": 0.0948, "step": 8398 }, { "epoch": 6.56, "learning_rate": 5.570773864495016e-06, "loss": 0.0521, "step": 8399 }, { "epoch": 6.57, "learning_rate": 5.5685036326608184e-06, "loss": 0.1201, "step": 8400 }, { "epoch": 6.57, "learning_rate": 5.566233685002087e-06, "loss": 0.1071, "step": 8401 }, { "epoch": 6.57, "learning_rate": 5.563964021664383e-06, "loss": 0.135, "step": 8402 }, { "epoch": 6.57, "learning_rate": 5.561694642793264e-06, "loss": 0.1008, "step": 8403 }, { "epoch": 6.57, "learning_rate": 5.5594255485342495e-06, "loss": 0.1693, "step": 8404 }, { "epoch": 6.57, "learning_rate": 5.557156739032849e-06, "loss": 0.1546, "step": 8405 }, { "epoch": 6.57, "learning_rate": 5.554888214434551e-06, "loss": 0.079, "step": 8406 }, { "epoch": 6.57, "learning_rate": 5.552619974884832e-06, "loss": 0.097, "step": 8407 }, { "epoch": 6.57, "learning_rate": 5.550352020529142e-06, "loss": 0.1389, "step": 8408 }, { "epoch": 6.57, "learning_rate": 5.548084351512919e-06, "loss": 0.1146, "step": 8409 }, { "epoch": 6.57, "learning_rate": 5.5458169679815745e-06, "loss": 0.1284, "step": 8410 }, { "epoch": 6.57, "learning_rate": 5.543549870080519e-06, "loss": 0.0889, "step": 8411 }, { "epoch": 6.57, "learning_rate": 5.5412830579551224e-06, "loss": 0.149, "step": 8412 }, { "epoch": 6.58, "learning_rate": 5.539016531750747e-06, "loss": 0.0911, "step": 8413 }, { "epoch": 6.58, "learning_rate": 5.536750291612745e-06, "loss": 0.1178, "step": 8414 }, { "epoch": 6.58, "learning_rate": 5.534484337686437e-06, "loss": 0.0865, "step": 8415 }, { "epoch": 6.58, "learning_rate": 5.53221867011713e-06, "loss": 0.0541, "step": 8416 }, { "epoch": 6.58, "learning_rate": 5.529953289050114e-06, "loss": 0.0685, "step": 8417 }, { "epoch": 6.58, "learning_rate": 5.527688194630658e-06, "loss": 0.1112, "step": 8418 }, { "epoch": 6.58, "learning_rate": 5.5254233870040165e-06, "loss": 0.0874, "step": 8419 }, { "epoch": 6.58, "learning_rate": 5.523158866315421e-06, "loss": 0.051, "step": 8420 }, { "epoch": 6.58, "learning_rate": 5.520894632710085e-06, "loss": 0.0652, "step": 8421 }, { "epoch": 6.58, "learning_rate": 5.518630686333211e-06, "loss": 0.1742, "step": 8422 }, { "epoch": 6.58, "learning_rate": 5.516367027329979e-06, "loss": 0.1024, "step": 8423 }, { "epoch": 6.58, "learning_rate": 5.514103655845539e-06, "loss": 0.0532, "step": 8424 }, { "epoch": 6.58, "learning_rate": 5.5118405720250354e-06, "loss": 0.1717, "step": 8425 }, { "epoch": 6.59, "learning_rate": 5.5095777760135995e-06, "loss": 0.1038, "step": 8426 }, { "epoch": 6.59, "learning_rate": 5.50731526795633e-06, "loss": 0.0984, "step": 8427 }, { "epoch": 6.59, "learning_rate": 5.505053047998314e-06, "loss": 0.0773, "step": 8428 }, { "epoch": 6.59, "learning_rate": 5.5027911162846205e-06, "loss": 0.2708, "step": 8429 }, { "epoch": 6.59, "learning_rate": 5.500529472960297e-06, "loss": 0.0851, "step": 8430 }, { "epoch": 6.59, "learning_rate": 5.498268118170373e-06, "loss": 0.1337, "step": 8431 }, { "epoch": 6.59, "learning_rate": 5.496007052059865e-06, "loss": 0.0751, "step": 8432 }, { "epoch": 6.59, "learning_rate": 5.49374627477376e-06, "loss": 0.0631, "step": 8433 }, { "epoch": 6.59, "learning_rate": 5.491485786457047e-06, "loss": 0.0875, "step": 8434 }, { "epoch": 6.59, "learning_rate": 5.489225587254668e-06, "loss": 0.1017, "step": 8435 }, { "epoch": 6.59, "learning_rate": 5.486965677311564e-06, "loss": 0.095, "step": 8436 }, { "epoch": 6.59, "learning_rate": 5.484706056772661e-06, "loss": 0.0707, "step": 8437 }, { "epoch": 6.59, "learning_rate": 5.482446725782856e-06, "loss": 0.0931, "step": 8438 }, { "epoch": 6.6, "learning_rate": 5.480187684487032e-06, "loss": 0.0811, "step": 8439 }, { "epoch": 6.6, "learning_rate": 5.477928933030055e-06, "loss": 0.1116, "step": 8440 }, { "epoch": 6.6, "learning_rate": 5.4756704715567675e-06, "loss": 0.0891, "step": 8441 }, { "epoch": 6.6, "learning_rate": 5.473412300211998e-06, "loss": 0.1041, "step": 8442 }, { "epoch": 6.6, "learning_rate": 5.4711544191405534e-06, "loss": 0.0909, "step": 8443 }, { "epoch": 6.6, "learning_rate": 5.468896828487219e-06, "loss": 0.0588, "step": 8444 }, { "epoch": 6.6, "learning_rate": 5.466639528396775e-06, "loss": 0.1227, "step": 8445 }, { "epoch": 6.6, "learning_rate": 5.464382519013973e-06, "loss": 0.18, "step": 8446 }, { "epoch": 6.6, "learning_rate": 5.46212580048354e-06, "loss": 0.1296, "step": 8447 }, { "epoch": 6.6, "learning_rate": 5.459869372950191e-06, "loss": 0.0903, "step": 8448 }, { "epoch": 6.6, "learning_rate": 5.457613236558627e-06, "loss": 0.109, "step": 8449 }, { "epoch": 6.6, "learning_rate": 5.455357391453524e-06, "loss": 0.1381, "step": 8450 }, { "epoch": 6.6, "learning_rate": 5.453101837779542e-06, "loss": 0.0476, "step": 8451 }, { "epoch": 6.61, "learning_rate": 5.45084657568132e-06, "loss": 0.053, "step": 8452 }, { "epoch": 6.61, "learning_rate": 5.448591605303479e-06, "loss": 0.1396, "step": 8453 }, { "epoch": 6.61, "learning_rate": 5.446336926790625e-06, "loss": 0.0559, "step": 8454 }, { "epoch": 6.61, "learning_rate": 5.444082540287339e-06, "loss": 0.1817, "step": 8455 }, { "epoch": 6.61, "learning_rate": 5.441828445938182e-06, "loss": 0.1092, "step": 8456 }, { "epoch": 6.61, "learning_rate": 5.439574643887716e-06, "loss": 0.1328, "step": 8457 }, { "epoch": 6.61, "learning_rate": 5.437321134280455e-06, "loss": 0.1087, "step": 8458 }, { "epoch": 6.61, "learning_rate": 5.435067917260912e-06, "loss": 0.1084, "step": 8459 }, { "epoch": 6.61, "learning_rate": 5.432814992973574e-06, "loss": 0.096, "step": 8460 }, { "epoch": 6.61, "learning_rate": 5.4305623615629215e-06, "loss": 0.077, "step": 8461 }, { "epoch": 6.61, "learning_rate": 5.428310023173401e-06, "loss": 0.0614, "step": 8462 }, { "epoch": 6.61, "learning_rate": 5.4260579779494485e-06, "loss": 0.0821, "step": 8463 }, { "epoch": 6.62, "learning_rate": 5.42380622603548e-06, "loss": 0.0592, "step": 8464 }, { "epoch": 6.62, "learning_rate": 5.42155476757589e-06, "loss": 0.1724, "step": 8465 }, { "epoch": 6.62, "learning_rate": 5.419303602715058e-06, "loss": 0.1731, "step": 8466 }, { "epoch": 6.62, "learning_rate": 5.417052731597341e-06, "loss": 0.1167, "step": 8467 }, { "epoch": 6.62, "learning_rate": 5.4148021543670806e-06, "loss": 0.1182, "step": 8468 }, { "epoch": 6.62, "learning_rate": 5.412551871168599e-06, "loss": 0.1163, "step": 8469 }, { "epoch": 6.62, "learning_rate": 5.410301882146195e-06, "loss": 0.0497, "step": 8470 }, { "epoch": 6.62, "learning_rate": 5.4080521874441504e-06, "loss": 0.1289, "step": 8471 }, { "epoch": 6.62, "learning_rate": 5.405802787206739e-06, "loss": 0.0884, "step": 8472 }, { "epoch": 6.62, "learning_rate": 5.403553681578199e-06, "loss": 0.1198, "step": 8473 }, { "epoch": 6.62, "learning_rate": 5.401304870702765e-06, "loss": 0.0899, "step": 8474 }, { "epoch": 6.62, "learning_rate": 5.39905635472463e-06, "loss": 0.0926, "step": 8475 }, { "epoch": 6.62, "learning_rate": 5.396808133787998e-06, "loss": 0.0942, "step": 8476 }, { "epoch": 6.63, "learning_rate": 5.394560208037032e-06, "loss": 0.1155, "step": 8477 }, { "epoch": 6.63, "learning_rate": 5.392312577615884e-06, "loss": 0.071, "step": 8478 }, { "epoch": 6.63, "learning_rate": 5.390065242668683e-06, "loss": 0.1051, "step": 8479 }, { "epoch": 6.63, "learning_rate": 5.387818203339554e-06, "loss": 0.0527, "step": 8480 }, { "epoch": 6.63, "learning_rate": 5.385571459772581e-06, "loss": 0.0745, "step": 8481 }, { "epoch": 6.63, "learning_rate": 5.38332501211184e-06, "loss": 0.0799, "step": 8482 }, { "epoch": 6.63, "learning_rate": 5.381078860501385e-06, "loss": 0.0353, "step": 8483 }, { "epoch": 6.63, "learning_rate": 5.378833005085263e-06, "loss": 0.0982, "step": 8484 }, { "epoch": 6.63, "learning_rate": 5.3765874460074885e-06, "loss": 0.0928, "step": 8485 }, { "epoch": 6.63, "learning_rate": 5.374342183412052e-06, "loss": 0.0489, "step": 8486 }, { "epoch": 6.63, "learning_rate": 5.372097217442944e-06, "loss": 0.0793, "step": 8487 }, { "epoch": 6.63, "learning_rate": 5.369852548244125e-06, "loss": 0.1234, "step": 8488 }, { "epoch": 6.63, "learning_rate": 5.367608175959535e-06, "loss": 0.0672, "step": 8489 }, { "epoch": 6.64, "learning_rate": 5.365364100733097e-06, "loss": 0.0692, "step": 8490 }, { "epoch": 6.64, "learning_rate": 5.363120322708716e-06, "loss": 0.1089, "step": 8491 }, { "epoch": 6.64, "learning_rate": 5.360876842030277e-06, "loss": 0.1682, "step": 8492 }, { "epoch": 6.64, "learning_rate": 5.3586336588416455e-06, "loss": 0.0766, "step": 8493 }, { "epoch": 6.64, "learning_rate": 5.356390773286666e-06, "loss": 0.1105, "step": 8494 }, { "epoch": 6.64, "learning_rate": 5.354148185509173e-06, "loss": 0.1015, "step": 8495 }, { "epoch": 6.64, "learning_rate": 5.351905895652974e-06, "loss": 0.1135, "step": 8496 }, { "epoch": 6.64, "learning_rate": 5.349663903861858e-06, "loss": 0.1081, "step": 8497 }, { "epoch": 6.64, "learning_rate": 5.347422210279588e-06, "loss": 0.1929, "step": 8498 }, { "epoch": 6.64, "learning_rate": 5.345180815049926e-06, "loss": 0.1472, "step": 8499 }, { "epoch": 6.64, "learning_rate": 5.3429397183166e-06, "loss": 0.0637, "step": 8500 }, { "epoch": 6.64, "learning_rate": 5.340698920223323e-06, "loss": 0.1074, "step": 8501 }, { "epoch": 6.64, "learning_rate": 5.33845842091379e-06, "loss": 0.084, "step": 8502 }, { "epoch": 6.65, "learning_rate": 5.336218220531675e-06, "loss": 0.0909, "step": 8503 }, { "epoch": 6.65, "learning_rate": 5.333978319220634e-06, "loss": 0.111, "step": 8504 }, { "epoch": 6.65, "learning_rate": 5.331738717124306e-06, "loss": 0.1147, "step": 8505 }, { "epoch": 6.65, "learning_rate": 5.329499414386301e-06, "loss": 0.1932, "step": 8506 }, { "epoch": 6.65, "learning_rate": 5.327260411150226e-06, "loss": 0.1886, "step": 8507 }, { "epoch": 6.65, "learning_rate": 5.325021707559661e-06, "loss": 0.116, "step": 8508 }, { "epoch": 6.65, "learning_rate": 5.322783303758153e-06, "loss": 0.0726, "step": 8509 }, { "epoch": 6.65, "learning_rate": 5.320545199889255e-06, "loss": 0.0764, "step": 8510 }, { "epoch": 6.65, "learning_rate": 5.318307396096485e-06, "loss": 0.0417, "step": 8511 }, { "epoch": 6.65, "learning_rate": 5.316069892523343e-06, "loss": 0.111, "step": 8512 }, { "epoch": 6.65, "learning_rate": 5.313832689313314e-06, "loss": 0.1227, "step": 8513 }, { "epoch": 6.65, "learning_rate": 5.31159578660986e-06, "loss": 0.1681, "step": 8514 }, { "epoch": 6.65, "learning_rate": 5.309359184556427e-06, "loss": 0.0831, "step": 8515 }, { "epoch": 6.66, "learning_rate": 5.307122883296438e-06, "loss": 0.0859, "step": 8516 }, { "epoch": 6.66, "learning_rate": 5.304886882973296e-06, "loss": 0.0648, "step": 8517 }, { "epoch": 6.66, "learning_rate": 5.302651183730394e-06, "loss": 0.097, "step": 8518 }, { "epoch": 6.66, "learning_rate": 5.300415785711097e-06, "loss": 0.155, "step": 8519 }, { "epoch": 6.66, "learning_rate": 5.298180689058757e-06, "loss": 0.1582, "step": 8520 }, { "epoch": 6.66, "learning_rate": 5.295945893916689e-06, "loss": 0.107, "step": 8521 }, { "epoch": 6.66, "learning_rate": 5.293711400428214e-06, "loss": 0.0829, "step": 8522 }, { "epoch": 6.66, "learning_rate": 5.291477208736619e-06, "loss": 0.1003, "step": 8523 }, { "epoch": 6.66, "learning_rate": 5.289243318985173e-06, "loss": 0.1294, "step": 8524 }, { "epoch": 6.66, "learning_rate": 5.28700973131713e-06, "loss": 0.1133, "step": 8525 }, { "epoch": 6.66, "learning_rate": 5.284776445875719e-06, "loss": 0.1025, "step": 8526 }, { "epoch": 6.66, "learning_rate": 5.282543462804152e-06, "loss": 0.1368, "step": 8527 }, { "epoch": 6.67, "learning_rate": 5.280310782245623e-06, "loss": 0.158, "step": 8528 }, { "epoch": 6.67, "learning_rate": 5.278078404343303e-06, "loss": 0.1324, "step": 8529 }, { "epoch": 6.67, "learning_rate": 5.275846329240351e-06, "loss": 0.106, "step": 8530 }, { "epoch": 6.67, "learning_rate": 5.273614557079905e-06, "loss": 0.1052, "step": 8531 }, { "epoch": 6.67, "learning_rate": 5.271383088005064e-06, "loss": 0.1285, "step": 8532 }, { "epoch": 6.67, "learning_rate": 5.269151922158942e-06, "loss": 0.0558, "step": 8533 }, { "epoch": 6.67, "learning_rate": 5.2669210596846065e-06, "loss": 0.1083, "step": 8534 }, { "epoch": 6.67, "learning_rate": 5.264690500725116e-06, "loss": 0.1149, "step": 8535 }, { "epoch": 6.67, "learning_rate": 5.262460245423506e-06, "loss": 0.0756, "step": 8536 }, { "epoch": 6.67, "learning_rate": 5.260230293922799e-06, "loss": 0.0895, "step": 8537 }, { "epoch": 6.67, "learning_rate": 5.2580006463659906e-06, "loss": 0.1458, "step": 8538 }, { "epoch": 6.67, "learning_rate": 5.25577130289606e-06, "loss": 0.1071, "step": 8539 }, { "epoch": 6.67, "learning_rate": 5.253542263655966e-06, "loss": 0.1435, "step": 8540 }, { "epoch": 6.68, "learning_rate": 5.251313528788646e-06, "loss": 0.1329, "step": 8541 }, { "epoch": 6.68, "learning_rate": 5.249085098437032e-06, "loss": 0.0798, "step": 8542 }, { "epoch": 6.68, "learning_rate": 5.246856972744014e-06, "loss": 0.051, "step": 8543 }, { "epoch": 6.68, "learning_rate": 5.244629151852473e-06, "loss": 0.0821, "step": 8544 }, { "epoch": 6.68, "learning_rate": 5.2424016359052776e-06, "loss": 0.119, "step": 8545 }, { "epoch": 6.68, "learning_rate": 5.240174425045267e-06, "loss": 0.1362, "step": 8546 }, { "epoch": 6.68, "learning_rate": 5.237947519415264e-06, "loss": 0.1139, "step": 8547 }, { "epoch": 6.68, "learning_rate": 5.2357209191580715e-06, "loss": 0.1364, "step": 8548 }, { "epoch": 6.68, "learning_rate": 5.233494624416475e-06, "loss": 0.0611, "step": 8549 }, { "epoch": 6.68, "learning_rate": 5.231268635333236e-06, "loss": 0.0841, "step": 8550 }, { "epoch": 6.68, "learning_rate": 5.2290429520511e-06, "loss": 0.1301, "step": 8551 }, { "epoch": 6.68, "learning_rate": 5.226817574712788e-06, "loss": 0.085, "step": 8552 }, { "epoch": 6.68, "learning_rate": 5.224592503461014e-06, "loss": 0.1129, "step": 8553 }, { "epoch": 6.69, "learning_rate": 5.222367738438462e-06, "loss": 0.0572, "step": 8554 }, { "epoch": 6.69, "learning_rate": 5.220143279787791e-06, "loss": 0.1074, "step": 8555 }, { "epoch": 6.69, "learning_rate": 5.217919127651645e-06, "loss": 0.1647, "step": 8556 }, { "epoch": 6.69, "learning_rate": 5.215695282172663e-06, "loss": 0.2126, "step": 8557 }, { "epoch": 6.69, "learning_rate": 5.213471743493443e-06, "loss": 0.0959, "step": 8558 }, { "epoch": 6.69, "learning_rate": 5.211248511756577e-06, "loss": 0.2235, "step": 8559 }, { "epoch": 6.69, "learning_rate": 5.20902558710463e-06, "loss": 0.0758, "step": 8560 }, { "epoch": 6.69, "learning_rate": 5.2068029696801504e-06, "loss": 0.113, "step": 8561 }, { "epoch": 6.69, "learning_rate": 5.204580659625666e-06, "loss": 0.1863, "step": 8562 }, { "epoch": 6.69, "learning_rate": 5.202358657083686e-06, "loss": 0.0867, "step": 8563 }, { "epoch": 6.69, "learning_rate": 5.200136962196696e-06, "loss": 0.0915, "step": 8564 }, { "epoch": 6.69, "learning_rate": 5.197915575107175e-06, "loss": 0.1151, "step": 8565 }, { "epoch": 6.69, "learning_rate": 5.1956944959575615e-06, "loss": 0.1018, "step": 8566 }, { "epoch": 6.7, "learning_rate": 5.193473724890285e-06, "loss": 0.0946, "step": 8567 }, { "epoch": 6.7, "learning_rate": 5.191253262047764e-06, "loss": 0.0807, "step": 8568 }, { "epoch": 6.7, "learning_rate": 5.189033107572383e-06, "loss": 0.1036, "step": 8569 }, { "epoch": 6.7, "learning_rate": 5.1868132616065135e-06, "loss": 0.0864, "step": 8570 }, { "epoch": 6.7, "learning_rate": 5.184593724292505e-06, "loss": 0.1035, "step": 8571 }, { "epoch": 6.7, "learning_rate": 5.18237449577269e-06, "loss": 0.1844, "step": 8572 }, { "epoch": 6.7, "learning_rate": 5.180155576189378e-06, "loss": 0.1265, "step": 8573 }, { "epoch": 6.7, "learning_rate": 5.17793696568486e-06, "loss": 0.0813, "step": 8574 }, { "epoch": 6.7, "learning_rate": 5.175718664401403e-06, "loss": 0.071, "step": 8575 }, { "epoch": 6.7, "learning_rate": 5.173500672481273e-06, "loss": 0.0812, "step": 8576 }, { "epoch": 6.7, "learning_rate": 5.171282990066687e-06, "loss": 0.1428, "step": 8577 }, { "epoch": 6.7, "learning_rate": 5.169065617299862e-06, "loss": 0.1292, "step": 8578 }, { "epoch": 6.7, "learning_rate": 5.1668485543229855e-06, "loss": 0.0821, "step": 8579 }, { "epoch": 6.71, "learning_rate": 5.164631801278236e-06, "loss": 0.1402, "step": 8580 }, { "epoch": 6.71, "learning_rate": 5.162415358307764e-06, "loss": 0.1413, "step": 8581 }, { "epoch": 6.71, "learning_rate": 5.160199225553701e-06, "loss": 0.2138, "step": 8582 }, { "epoch": 6.71, "learning_rate": 5.15798340315816e-06, "loss": 0.0491, "step": 8583 }, { "epoch": 6.71, "learning_rate": 5.155767891263233e-06, "loss": 0.1397, "step": 8584 }, { "epoch": 6.71, "learning_rate": 5.153552690010992e-06, "loss": 0.0998, "step": 8585 }, { "epoch": 6.71, "learning_rate": 5.15133779954349e-06, "loss": 0.1008, "step": 8586 }, { "epoch": 6.71, "learning_rate": 5.149123220002757e-06, "loss": 0.09, "step": 8587 }, { "epoch": 6.71, "learning_rate": 5.146908951530813e-06, "loss": 0.0606, "step": 8588 }, { "epoch": 6.71, "learning_rate": 5.144694994269646e-06, "loss": 0.1498, "step": 8589 }, { "epoch": 6.71, "learning_rate": 5.142481348361225e-06, "loss": 0.0862, "step": 8590 }, { "epoch": 6.71, "learning_rate": 5.14026801394751e-06, "loss": 0.1604, "step": 8591 }, { "epoch": 6.72, "learning_rate": 5.138054991170432e-06, "loss": 0.0756, "step": 8592 }, { "epoch": 6.72, "learning_rate": 5.135842280171903e-06, "loss": 0.0953, "step": 8593 }, { "epoch": 6.72, "learning_rate": 5.133629881093818e-06, "loss": 0.0966, "step": 8594 }, { "epoch": 6.72, "learning_rate": 5.131417794078046e-06, "loss": 0.1532, "step": 8595 }, { "epoch": 6.72, "learning_rate": 5.129206019266442e-06, "loss": 0.1418, "step": 8596 }, { "epoch": 6.72, "learning_rate": 5.1269945568008395e-06, "loss": 0.1172, "step": 8597 }, { "epoch": 6.72, "learning_rate": 5.124783406823048e-06, "loss": 0.0426, "step": 8598 }, { "epoch": 6.72, "learning_rate": 5.122572569474871e-06, "loss": 0.0723, "step": 8599 }, { "epoch": 6.72, "learning_rate": 5.1203620448980685e-06, "loss": 0.1158, "step": 8600 }, { "epoch": 6.72, "learning_rate": 5.118151833234399e-06, "loss": 0.1093, "step": 8601 }, { "epoch": 6.72, "learning_rate": 5.115941934625591e-06, "loss": 0.1281, "step": 8602 }, { "epoch": 6.72, "learning_rate": 5.113732349213365e-06, "loss": 0.1207, "step": 8603 }, { "epoch": 6.72, "learning_rate": 5.111523077139409e-06, "loss": 0.0994, "step": 8604 }, { "epoch": 6.73, "learning_rate": 5.109314118545395e-06, "loss": 0.0894, "step": 8605 }, { "epoch": 6.73, "learning_rate": 5.107105473572978e-06, "loss": 0.105, "step": 8606 }, { "epoch": 6.73, "learning_rate": 5.104897142363785e-06, "loss": 0.1771, "step": 8607 }, { "epoch": 6.73, "learning_rate": 5.102689125059434e-06, "loss": 0.1426, "step": 8608 }, { "epoch": 6.73, "learning_rate": 5.100481421801513e-06, "loss": 0.0849, "step": 8609 }, { "epoch": 6.73, "learning_rate": 5.098274032731592e-06, "loss": 0.1109, "step": 8610 }, { "epoch": 6.73, "learning_rate": 5.096066957991235e-06, "loss": 0.1391, "step": 8611 }, { "epoch": 6.73, "learning_rate": 5.093860197721958e-06, "loss": 0.1146, "step": 8612 }, { "epoch": 6.73, "learning_rate": 5.091653752065277e-06, "loss": 0.0822, "step": 8613 }, { "epoch": 6.73, "learning_rate": 5.0894476211626885e-06, "loss": 0.1244, "step": 8614 }, { "epoch": 6.73, "learning_rate": 5.08724180515566e-06, "loss": 0.1118, "step": 8615 }, { "epoch": 6.73, "learning_rate": 5.085036304185647e-06, "loss": 0.1612, "step": 8616 }, { "epoch": 6.73, "learning_rate": 5.082831118394066e-06, "loss": 0.065, "step": 8617 }, { "epoch": 6.74, "learning_rate": 5.080626247922341e-06, "loss": 0.1791, "step": 8618 }, { "epoch": 6.74, "learning_rate": 5.078421692911859e-06, "loss": 0.1336, "step": 8619 }, { "epoch": 6.74, "learning_rate": 5.076217453503989e-06, "loss": 0.1542, "step": 8620 }, { "epoch": 6.74, "learning_rate": 5.074013529840081e-06, "loss": 0.125, "step": 8621 }, { "epoch": 6.74, "learning_rate": 5.071809922061464e-06, "loss": 0.0896, "step": 8622 }, { "epoch": 6.74, "learning_rate": 5.069606630309447e-06, "loss": 0.0866, "step": 8623 }, { "epoch": 6.74, "learning_rate": 5.067403654725319e-06, "loss": 0.1218, "step": 8624 }, { "epoch": 6.74, "learning_rate": 5.065200995450346e-06, "loss": 0.1689, "step": 8625 }, { "epoch": 6.74, "learning_rate": 5.062998652625783e-06, "loss": 0.0676, "step": 8626 }, { "epoch": 6.74, "learning_rate": 5.060796626392856e-06, "loss": 0.0601, "step": 8627 }, { "epoch": 6.74, "learning_rate": 5.058594916892774e-06, "loss": 0.0494, "step": 8628 }, { "epoch": 6.74, "learning_rate": 5.056393524266714e-06, "loss": 0.1112, "step": 8629 }, { "epoch": 6.74, "learning_rate": 5.054192448655855e-06, "loss": 0.1768, "step": 8630 }, { "epoch": 6.75, "learning_rate": 5.05199169020134e-06, "loss": 0.0833, "step": 8631 }, { "epoch": 6.75, "learning_rate": 5.049791249044296e-06, "loss": 0.1192, "step": 8632 }, { "epoch": 6.75, "learning_rate": 5.047591125325827e-06, "loss": 0.1872, "step": 8633 }, { "epoch": 6.75, "learning_rate": 5.045391319187021e-06, "loss": 0.1464, "step": 8634 }, { "epoch": 6.75, "learning_rate": 5.043191830768943e-06, "loss": 0.0554, "step": 8635 }, { "epoch": 6.75, "learning_rate": 5.040992660212638e-06, "loss": 0.0661, "step": 8636 }, { "epoch": 6.75, "learning_rate": 5.0387938076591255e-06, "loss": 0.072, "step": 8637 }, { "epoch": 6.75, "learning_rate": 5.03659527324942e-06, "loss": 0.0578, "step": 8638 }, { "epoch": 6.75, "learning_rate": 5.034397057124504e-06, "loss": 0.0744, "step": 8639 }, { "epoch": 6.75, "learning_rate": 5.032199159425328e-06, "loss": 0.1083, "step": 8640 }, { "epoch": 6.75, "learning_rate": 5.030001580292849e-06, "loss": 0.0936, "step": 8641 }, { "epoch": 6.75, "learning_rate": 5.027804319867985e-06, "loss": 0.0888, "step": 8642 }, { "epoch": 6.75, "learning_rate": 5.025607378291637e-06, "loss": 0.1125, "step": 8643 }, { "epoch": 6.76, "learning_rate": 5.023410755704689e-06, "loss": 0.1034, "step": 8644 }, { "epoch": 6.76, "learning_rate": 5.021214452248e-06, "loss": 0.0609, "step": 8645 }, { "epoch": 6.76, "learning_rate": 5.019018468062413e-06, "loss": 0.2029, "step": 8646 }, { "epoch": 6.76, "learning_rate": 5.016822803288746e-06, "loss": 0.0863, "step": 8647 }, { "epoch": 6.76, "learning_rate": 5.014627458067795e-06, "loss": 0.116, "step": 8648 }, { "epoch": 6.76, "learning_rate": 5.012432432540351e-06, "loss": 0.121, "step": 8649 }, { "epoch": 6.76, "learning_rate": 5.010237726847168e-06, "loss": 0.1233, "step": 8650 }, { "epoch": 6.76, "learning_rate": 5.00804334112898e-06, "loss": 0.0984, "step": 8651 }, { "epoch": 6.76, "learning_rate": 5.005849275526502e-06, "loss": 0.0431, "step": 8652 }, { "epoch": 6.76, "learning_rate": 5.003655530180442e-06, "loss": 0.1145, "step": 8653 }, { "epoch": 6.76, "learning_rate": 5.001462105231472e-06, "loss": 0.1343, "step": 8654 }, { "epoch": 6.76, "learning_rate": 4.999269000820246e-06, "loss": 0.1551, "step": 8655 }, { "epoch": 6.77, "learning_rate": 4.997076217087402e-06, "loss": 0.1274, "step": 8656 }, { "epoch": 6.77, "learning_rate": 4.994883754173555e-06, "loss": 0.0819, "step": 8657 }, { "epoch": 6.77, "learning_rate": 4.9926916122192995e-06, "loss": 0.1636, "step": 8658 }, { "epoch": 6.77, "learning_rate": 4.990499791365207e-06, "loss": 0.1459, "step": 8659 }, { "epoch": 6.77, "learning_rate": 4.988308291751831e-06, "loss": 0.109, "step": 8660 }, { "epoch": 6.77, "learning_rate": 4.98611711351971e-06, "loss": 0.0936, "step": 8661 }, { "epoch": 6.77, "learning_rate": 4.983926256809355e-06, "loss": 0.0849, "step": 8662 }, { "epoch": 6.77, "learning_rate": 4.981735721761248e-06, "loss": 0.1077, "step": 8663 }, { "epoch": 6.77, "learning_rate": 4.979545508515871e-06, "loss": 0.0696, "step": 8664 }, { "epoch": 6.77, "learning_rate": 4.9773556172136685e-06, "loss": 0.1666, "step": 8665 }, { "epoch": 6.77, "learning_rate": 4.975166047995073e-06, "loss": 0.0697, "step": 8666 }, { "epoch": 6.77, "learning_rate": 4.972976801000491e-06, "loss": 0.0378, "step": 8667 }, { "epoch": 6.77, "learning_rate": 4.970787876370312e-06, "loss": 0.1025, "step": 8668 }, { "epoch": 6.78, "learning_rate": 4.968599274244905e-06, "loss": 0.0801, "step": 8669 }, { "epoch": 6.78, "learning_rate": 4.966410994764614e-06, "loss": 0.1003, "step": 8670 }, { "epoch": 6.78, "learning_rate": 4.964223038069764e-06, "loss": 0.1056, "step": 8671 }, { "epoch": 6.78, "learning_rate": 4.962035404300667e-06, "loss": 0.2162, "step": 8672 }, { "epoch": 6.78, "learning_rate": 4.959848093597608e-06, "loss": 0.0868, "step": 8673 }, { "epoch": 6.78, "learning_rate": 4.957661106100843e-06, "loss": 0.146, "step": 8674 }, { "epoch": 6.78, "learning_rate": 4.955474441950616e-06, "loss": 0.0714, "step": 8675 }, { "epoch": 6.78, "learning_rate": 4.953288101287158e-06, "loss": 0.0915, "step": 8676 }, { "epoch": 6.78, "learning_rate": 4.951102084250666e-06, "loss": 0.081, "step": 8677 }, { "epoch": 6.78, "learning_rate": 4.948916390981322e-06, "loss": 0.1284, "step": 8678 }, { "epoch": 6.78, "learning_rate": 4.9467310216192855e-06, "loss": 0.083, "step": 8679 }, { "epoch": 6.78, "learning_rate": 4.944545976304698e-06, "loss": 0.136, "step": 8680 }, { "epoch": 6.78, "learning_rate": 4.942361255177675e-06, "loss": 0.138, "step": 8681 }, { "epoch": 6.79, "learning_rate": 4.940176858378318e-06, "loss": 0.071, "step": 8682 }, { "epoch": 6.79, "learning_rate": 4.937992786046699e-06, "loss": 0.1435, "step": 8683 }, { "epoch": 6.79, "learning_rate": 4.9358090383228815e-06, "loss": 0.062, "step": 8684 }, { "epoch": 6.79, "learning_rate": 4.933625615346903e-06, "loss": 0.0975, "step": 8685 }, { "epoch": 6.79, "learning_rate": 4.931442517258765e-06, "loss": 0.1685, "step": 8686 }, { "epoch": 6.79, "learning_rate": 4.9292597441984734e-06, "loss": 0.0785, "step": 8687 }, { "epoch": 6.79, "learning_rate": 4.927077296305999e-06, "loss": 0.0724, "step": 8688 }, { "epoch": 6.79, "learning_rate": 4.924895173721293e-06, "loss": 0.0587, "step": 8689 }, { "epoch": 6.79, "learning_rate": 4.9227133765842864e-06, "loss": 0.1323, "step": 8690 }, { "epoch": 6.79, "learning_rate": 4.920531905034889e-06, "loss": 0.1173, "step": 8691 }, { "epoch": 6.79, "learning_rate": 4.918350759212993e-06, "loss": 0.138, "step": 8692 }, { "epoch": 6.79, "learning_rate": 4.916169939258466e-06, "loss": 0.0778, "step": 8693 }, { "epoch": 6.79, "learning_rate": 4.913989445311151e-06, "loss": 0.1685, "step": 8694 }, { "epoch": 6.8, "learning_rate": 4.911809277510885e-06, "loss": 0.055, "step": 8695 }, { "epoch": 6.8, "learning_rate": 4.9096294359974725e-06, "loss": 0.0666, "step": 8696 }, { "epoch": 6.8, "learning_rate": 4.9074499209106905e-06, "loss": 0.1568, "step": 8697 }, { "epoch": 6.8, "learning_rate": 4.905270732390305e-06, "loss": 0.114, "step": 8698 }, { "epoch": 6.8, "learning_rate": 4.9030918705760655e-06, "loss": 0.118, "step": 8699 }, { "epoch": 6.8, "learning_rate": 4.900913335607691e-06, "loss": 0.1112, "step": 8700 }, { "epoch": 6.8, "learning_rate": 4.898735127624883e-06, "loss": 0.2568, "step": 8701 }, { "epoch": 6.8, "learning_rate": 4.896557246767322e-06, "loss": 0.1264, "step": 8702 }, { "epoch": 6.8, "learning_rate": 4.8943796931746675e-06, "loss": 0.0842, "step": 8703 }, { "epoch": 6.8, "learning_rate": 4.892202466986558e-06, "loss": 0.0628, "step": 8704 }, { "epoch": 6.8, "learning_rate": 4.890025568342609e-06, "loss": 0.1457, "step": 8705 }, { "epoch": 6.8, "learning_rate": 4.8878489973824165e-06, "loss": 0.1431, "step": 8706 }, { "epoch": 6.81, "learning_rate": 4.885672754245565e-06, "loss": 0.1393, "step": 8707 }, { "epoch": 6.81, "learning_rate": 4.883496839071599e-06, "loss": 0.0874, "step": 8708 }, { "epoch": 6.81, "learning_rate": 4.881321252000054e-06, "loss": 0.0624, "step": 8709 }, { "epoch": 6.81, "learning_rate": 4.87914599317044e-06, "loss": 0.0556, "step": 8710 }, { "epoch": 6.81, "learning_rate": 4.876971062722254e-06, "loss": 0.0863, "step": 8711 }, { "epoch": 6.81, "learning_rate": 4.874796460794965e-06, "loss": 0.081, "step": 8712 }, { "epoch": 6.81, "learning_rate": 4.872622187528021e-06, "loss": 0.1408, "step": 8713 }, { "epoch": 6.81, "learning_rate": 4.87044824306085e-06, "loss": 0.1775, "step": 8714 }, { "epoch": 6.81, "learning_rate": 4.86827462753286e-06, "loss": 0.103, "step": 8715 }, { "epoch": 6.81, "learning_rate": 4.866101341083435e-06, "loss": 0.1404, "step": 8716 }, { "epoch": 6.81, "learning_rate": 4.86392838385194e-06, "loss": 0.1651, "step": 8717 }, { "epoch": 6.81, "learning_rate": 4.861755755977718e-06, "loss": 0.0513, "step": 8718 }, { "epoch": 6.81, "learning_rate": 4.859583457600099e-06, "loss": 0.0797, "step": 8719 }, { "epoch": 6.82, "learning_rate": 4.857411488858376e-06, "loss": 0.1855, "step": 8720 }, { "epoch": 6.82, "learning_rate": 4.855239849891826e-06, "loss": 0.0961, "step": 8721 }, { "epoch": 6.82, "learning_rate": 4.8530685408397205e-06, "loss": 0.1196, "step": 8722 }, { "epoch": 6.82, "learning_rate": 4.8508975618412904e-06, "loss": 0.1009, "step": 8723 }, { "epoch": 6.82, "learning_rate": 4.848726913035756e-06, "loss": 0.0659, "step": 8724 }, { "epoch": 6.82, "learning_rate": 4.846556594562303e-06, "loss": 0.0643, "step": 8725 }, { "epoch": 6.82, "learning_rate": 4.844386606560117e-06, "loss": 0.1163, "step": 8726 }, { "epoch": 6.82, "learning_rate": 4.842216949168348e-06, "loss": 0.0992, "step": 8727 }, { "epoch": 6.82, "learning_rate": 4.840047622526127e-06, "loss": 0.1394, "step": 8728 }, { "epoch": 6.82, "learning_rate": 4.837878626772563e-06, "loss": 0.134, "step": 8729 }, { "epoch": 6.82, "learning_rate": 4.835709962046754e-06, "loss": 0.1119, "step": 8730 }, { "epoch": 6.82, "learning_rate": 4.83354162848776e-06, "loss": 0.0637, "step": 8731 }, { "epoch": 6.82, "learning_rate": 4.83137362623463e-06, "loss": 0.1249, "step": 8732 }, { "epoch": 6.83, "learning_rate": 4.829205955426387e-06, "loss": 0.137, "step": 8733 }, { "epoch": 6.83, "learning_rate": 4.827038616202044e-06, "loss": 0.082, "step": 8734 }, { "epoch": 6.83, "learning_rate": 4.824871608700579e-06, "loss": 0.1157, "step": 8735 }, { "epoch": 6.83, "learning_rate": 4.8227049330609555e-06, "loss": 0.0899, "step": 8736 }, { "epoch": 6.83, "learning_rate": 4.820538589422114e-06, "loss": 0.1153, "step": 8737 }, { "epoch": 6.83, "learning_rate": 4.818372577922974e-06, "loss": 0.1095, "step": 8738 }, { "epoch": 6.83, "learning_rate": 4.816206898702433e-06, "loss": 0.1794, "step": 8739 }, { "epoch": 6.83, "learning_rate": 4.81404155189937e-06, "loss": 0.1718, "step": 8740 }, { "epoch": 6.83, "learning_rate": 4.81187653765264e-06, "loss": 0.1167, "step": 8741 }, { "epoch": 6.83, "learning_rate": 4.809711856101076e-06, "loss": 0.0973, "step": 8742 }, { "epoch": 6.83, "learning_rate": 4.807547507383492e-06, "loss": 0.0645, "step": 8743 }, { "epoch": 6.83, "learning_rate": 4.8053834916386765e-06, "loss": 0.1124, "step": 8744 }, { "epoch": 6.83, "learning_rate": 4.803219809005406e-06, "loss": 0.0572, "step": 8745 }, { "epoch": 6.84, "learning_rate": 4.801056459622426e-06, "loss": 0.0779, "step": 8746 }, { "epoch": 6.84, "learning_rate": 4.79889344362847e-06, "loss": 0.078, "step": 8747 }, { "epoch": 6.84, "learning_rate": 4.796730761162229e-06, "loss": 0.0778, "step": 8748 }, { "epoch": 6.84, "learning_rate": 4.794568412362404e-06, "loss": 0.1345, "step": 8749 }, { "epoch": 6.84, "learning_rate": 4.792406397367649e-06, "loss": 0.1112, "step": 8750 }, { "epoch": 6.84, "learning_rate": 4.790244716316611e-06, "loss": 0.0695, "step": 8751 }, { "epoch": 6.84, "learning_rate": 4.7880833693479045e-06, "loss": 0.1723, "step": 8752 }, { "epoch": 6.84, "learning_rate": 4.78592235660014e-06, "loss": 0.1275, "step": 8753 }, { "epoch": 6.84, "learning_rate": 4.783761678211884e-06, "loss": 0.1366, "step": 8754 }, { "epoch": 6.84, "learning_rate": 4.781601334321697e-06, "loss": 0.1263, "step": 8755 }, { "epoch": 6.84, "learning_rate": 4.77944132506811e-06, "loss": 0.1289, "step": 8756 }, { "epoch": 6.84, "learning_rate": 4.777281650589644e-06, "loss": 0.0641, "step": 8757 }, { "epoch": 6.84, "learning_rate": 4.775122311024788e-06, "loss": 0.1108, "step": 8758 }, { "epoch": 6.85, "learning_rate": 4.772963306512011e-06, "loss": 0.1547, "step": 8759 }, { "epoch": 6.85, "learning_rate": 4.770804637189762e-06, "loss": 0.1645, "step": 8760 }, { "epoch": 6.85, "learning_rate": 4.768646303196469e-06, "loss": 0.0957, "step": 8761 }, { "epoch": 6.85, "learning_rate": 4.766488304670538e-06, "loss": 0.1366, "step": 8762 }, { "epoch": 6.85, "learning_rate": 4.764330641750353e-06, "loss": 0.0822, "step": 8763 }, { "epoch": 6.85, "learning_rate": 4.7621733145742785e-06, "loss": 0.1049, "step": 8764 }, { "epoch": 6.85, "learning_rate": 4.760016323280654e-06, "loss": 0.2393, "step": 8765 }, { "epoch": 6.85, "learning_rate": 4.757859668007801e-06, "loss": 0.1316, "step": 8766 }, { "epoch": 6.85, "learning_rate": 4.755703348894012e-06, "loss": 0.0818, "step": 8767 }, { "epoch": 6.85, "learning_rate": 4.753547366077573e-06, "loss": 0.0529, "step": 8768 }, { "epoch": 6.85, "learning_rate": 4.7513917196967355e-06, "loss": 0.072, "step": 8769 }, { "epoch": 6.85, "learning_rate": 4.749236409889735e-06, "loss": 0.1007, "step": 8770 }, { "epoch": 6.86, "learning_rate": 4.747081436794773e-06, "loss": 0.1688, "step": 8771 }, { "epoch": 6.86, "learning_rate": 4.744926800550055e-06, "loss": 0.1116, "step": 8772 }, { "epoch": 6.86, "learning_rate": 4.742772501293741e-06, "loss": 0.1283, "step": 8773 }, { "epoch": 6.86, "learning_rate": 4.740618539163979e-06, "loss": 0.0979, "step": 8774 }, { "epoch": 6.86, "learning_rate": 4.738464914298893e-06, "loss": 0.0973, "step": 8775 }, { "epoch": 6.86, "learning_rate": 4.736311626836599e-06, "loss": 0.0591, "step": 8776 }, { "epoch": 6.86, "learning_rate": 4.734158676915165e-06, "loss": 0.064, "step": 8777 }, { "epoch": 6.86, "learning_rate": 4.7320060646726565e-06, "loss": 0.0923, "step": 8778 }, { "epoch": 6.86, "learning_rate": 4.729853790247109e-06, "loss": 0.088, "step": 8779 }, { "epoch": 6.86, "learning_rate": 4.727701853776548e-06, "loss": 0.0571, "step": 8780 }, { "epoch": 6.86, "learning_rate": 4.725550255398968e-06, "loss": 0.0924, "step": 8781 }, { "epoch": 6.86, "learning_rate": 4.723398995252338e-06, "loss": 0.1066, "step": 8782 }, { "epoch": 6.86, "learning_rate": 4.721248073474608e-06, "loss": 0.1285, "step": 8783 }, { "epoch": 6.87, "learning_rate": 4.719097490203717e-06, "loss": 0.2046, "step": 8784 }, { "epoch": 6.87, "learning_rate": 4.716947245577569e-06, "loss": 0.1531, "step": 8785 }, { "epoch": 6.87, "learning_rate": 4.714797339734053e-06, "loss": 0.1387, "step": 8786 }, { "epoch": 6.87, "learning_rate": 4.7126477728110345e-06, "loss": 0.1517, "step": 8787 }, { "epoch": 6.87, "learning_rate": 4.710498544946356e-06, "loss": 0.0982, "step": 8788 }, { "epoch": 6.87, "learning_rate": 4.7083496562778395e-06, "loss": 0.0768, "step": 8789 }, { "epoch": 6.87, "learning_rate": 4.706201106943286e-06, "loss": 0.1379, "step": 8790 }, { "epoch": 6.87, "learning_rate": 4.704052897080471e-06, "loss": 0.1006, "step": 8791 }, { "epoch": 6.87, "learning_rate": 4.701905026827157e-06, "loss": 0.1775, "step": 8792 }, { "epoch": 6.87, "learning_rate": 4.69975749632108e-06, "loss": 0.0637, "step": 8793 }, { "epoch": 6.87, "learning_rate": 4.6976103056999424e-06, "loss": 0.0909, "step": 8794 }, { "epoch": 6.87, "learning_rate": 4.695463455101445e-06, "loss": 0.0863, "step": 8795 }, { "epoch": 6.87, "learning_rate": 4.6933169446632565e-06, "loss": 0.0602, "step": 8796 }, { "epoch": 6.88, "learning_rate": 4.691170774523023e-06, "loss": 0.0713, "step": 8797 }, { "epoch": 6.88, "learning_rate": 4.68902494481837e-06, "loss": 0.0595, "step": 8798 }, { "epoch": 6.88, "learning_rate": 4.686879455686901e-06, "loss": 0.1488, "step": 8799 }, { "epoch": 6.88, "learning_rate": 4.684734307266201e-06, "loss": 0.0634, "step": 8800 }, { "epoch": 6.88, "learning_rate": 4.682589499693828e-06, "loss": 0.1039, "step": 8801 }, { "epoch": 6.88, "learning_rate": 4.6804450331073185e-06, "loss": 0.1197, "step": 8802 }, { "epoch": 6.88, "learning_rate": 4.678300907644195e-06, "loss": 0.0775, "step": 8803 }, { "epoch": 6.88, "learning_rate": 4.6761571234419545e-06, "loss": 0.1014, "step": 8804 }, { "epoch": 6.88, "learning_rate": 4.67401368063806e-06, "loss": 0.0825, "step": 8805 }, { "epoch": 6.88, "learning_rate": 4.671870579369965e-06, "loss": 0.1206, "step": 8806 }, { "epoch": 6.88, "learning_rate": 4.669727819775103e-06, "loss": 0.1112, "step": 8807 }, { "epoch": 6.88, "learning_rate": 4.66758540199088e-06, "loss": 0.0378, "step": 8808 }, { "epoch": 6.88, "learning_rate": 4.665443326154682e-06, "loss": 0.1072, "step": 8809 }, { "epoch": 6.89, "learning_rate": 4.66330159240387e-06, "loss": 0.1686, "step": 8810 }, { "epoch": 6.89, "learning_rate": 4.661160200875787e-06, "loss": 0.0886, "step": 8811 }, { "epoch": 6.89, "learning_rate": 4.659019151707752e-06, "loss": 0.0465, "step": 8812 }, { "epoch": 6.89, "learning_rate": 4.656878445037063e-06, "loss": 0.1053, "step": 8813 }, { "epoch": 6.89, "learning_rate": 4.65473808100099e-06, "loss": 0.0904, "step": 8814 }, { "epoch": 6.89, "learning_rate": 4.652598059736802e-06, "loss": 0.1318, "step": 8815 }, { "epoch": 6.89, "learning_rate": 4.650458381381715e-06, "loss": 0.1049, "step": 8816 }, { "epoch": 6.89, "learning_rate": 4.64831904607294e-06, "loss": 0.1733, "step": 8817 }, { "epoch": 6.89, "learning_rate": 4.646180053947673e-06, "loss": 0.1611, "step": 8818 }, { "epoch": 6.89, "learning_rate": 4.644041405143076e-06, "loss": 0.0574, "step": 8819 }, { "epoch": 6.89, "learning_rate": 4.641903099796292e-06, "loss": 0.1184, "step": 8820 }, { "epoch": 6.89, "learning_rate": 4.639765138044443e-06, "loss": 0.2536, "step": 8821 }, { "epoch": 6.89, "learning_rate": 4.637627520024629e-06, "loss": 0.0992, "step": 8822 }, { "epoch": 6.9, "learning_rate": 4.635490245873926e-06, "loss": 0.1358, "step": 8823 }, { "epoch": 6.9, "learning_rate": 4.63335331572939e-06, "loss": 0.1215, "step": 8824 }, { "epoch": 6.9, "learning_rate": 4.631216729728052e-06, "loss": 0.1272, "step": 8825 }, { "epoch": 6.9, "learning_rate": 4.6290804880069285e-06, "loss": 0.0887, "step": 8826 }, { "epoch": 6.9, "learning_rate": 4.626944590703011e-06, "loss": 0.1353, "step": 8827 }, { "epoch": 6.9, "learning_rate": 4.624809037953258e-06, "loss": 0.0995, "step": 8828 }, { "epoch": 6.9, "learning_rate": 4.622673829894616e-06, "loss": 0.1199, "step": 8829 }, { "epoch": 6.9, "learning_rate": 4.620538966664015e-06, "loss": 0.1165, "step": 8830 }, { "epoch": 6.9, "learning_rate": 4.618404448398349e-06, "loss": 0.0797, "step": 8831 }, { "epoch": 6.9, "learning_rate": 4.616270275234501e-06, "loss": 0.0796, "step": 8832 }, { "epoch": 6.9, "learning_rate": 4.6141364473093265e-06, "loss": 0.1272, "step": 8833 }, { "epoch": 6.9, "learning_rate": 4.6120029647596575e-06, "loss": 0.065, "step": 8834 }, { "epoch": 6.91, "learning_rate": 4.6098698277223095e-06, "loss": 0.087, "step": 8835 }, { "epoch": 6.91, "learning_rate": 4.607737036334072e-06, "loss": 0.0929, "step": 8836 }, { "epoch": 6.91, "learning_rate": 4.605604590731708e-06, "loss": 0.0926, "step": 8837 }, { "epoch": 6.91, "learning_rate": 4.603472491051977e-06, "loss": 0.151, "step": 8838 }, { "epoch": 6.91, "learning_rate": 4.601340737431588e-06, "loss": 0.1195, "step": 8839 }, { "epoch": 6.91, "learning_rate": 4.599209330007245e-06, "loss": 0.086, "step": 8840 }, { "epoch": 6.91, "learning_rate": 4.597078268915633e-06, "loss": 0.0629, "step": 8841 }, { "epoch": 6.91, "learning_rate": 4.594947554293407e-06, "loss": 0.0959, "step": 8842 }, { "epoch": 6.91, "learning_rate": 4.592817186277202e-06, "loss": 0.0679, "step": 8843 }, { "epoch": 6.91, "learning_rate": 4.59068716500363e-06, "loss": 0.109, "step": 8844 }, { "epoch": 6.91, "learning_rate": 4.58855749060928e-06, "loss": 0.1687, "step": 8845 }, { "epoch": 6.91, "learning_rate": 4.586428163230722e-06, "loss": 0.178, "step": 8846 }, { "epoch": 6.91, "learning_rate": 4.5842991830045015e-06, "loss": 0.1332, "step": 8847 }, { "epoch": 6.92, "learning_rate": 4.5821705500671376e-06, "loss": 0.0652, "step": 8848 }, { "epoch": 6.92, "learning_rate": 4.5800422645551405e-06, "loss": 0.1172, "step": 8849 }, { "epoch": 6.92, "learning_rate": 4.577914326604986e-06, "loss": 0.139, "step": 8850 }, { "epoch": 6.92, "learning_rate": 4.5757867363531285e-06, "loss": 0.1285, "step": 8851 }, { "epoch": 6.92, "learning_rate": 4.573659493935999e-06, "loss": 0.1706, "step": 8852 }, { "epoch": 6.92, "learning_rate": 4.571532599490018e-06, "loss": 0.0854, "step": 8853 }, { "epoch": 6.92, "learning_rate": 4.569406053151572e-06, "loss": 0.1076, "step": 8854 }, { "epoch": 6.92, "learning_rate": 4.5672798550570305e-06, "loss": 0.1438, "step": 8855 }, { "epoch": 6.92, "learning_rate": 4.565154005342731e-06, "loss": 0.2074, "step": 8856 }, { "epoch": 6.92, "learning_rate": 4.563028504145004e-06, "loss": 0.0854, "step": 8857 }, { "epoch": 6.92, "learning_rate": 4.560903351600149e-06, "loss": 0.0528, "step": 8858 }, { "epoch": 6.92, "learning_rate": 4.558778547844442e-06, "loss": 0.0658, "step": 8859 }, { "epoch": 6.92, "learning_rate": 4.556654093014137e-06, "loss": 0.102, "step": 8860 }, { "epoch": 6.93, "learning_rate": 4.554529987245476e-06, "loss": 0.0922, "step": 8861 }, { "epoch": 6.93, "learning_rate": 4.5524062306746615e-06, "loss": 0.1073, "step": 8862 }, { "epoch": 6.93, "learning_rate": 4.550282823437886e-06, "loss": 0.1301, "step": 8863 }, { "epoch": 6.93, "learning_rate": 4.5481597656713104e-06, "loss": 0.0697, "step": 8864 }, { "epoch": 6.93, "learning_rate": 4.5460370575110855e-06, "loss": 0.118, "step": 8865 }, { "epoch": 6.93, "learning_rate": 4.543914699093331e-06, "loss": 0.0786, "step": 8866 }, { "epoch": 6.93, "learning_rate": 4.541792690554146e-06, "loss": 0.0794, "step": 8867 }, { "epoch": 6.93, "learning_rate": 4.539671032029604e-06, "loss": 0.1232, "step": 8868 }, { "epoch": 6.93, "learning_rate": 4.537549723655762e-06, "loss": 0.1217, "step": 8869 }, { "epoch": 6.93, "learning_rate": 4.53542876556865e-06, "loss": 0.121, "step": 8870 }, { "epoch": 6.93, "learning_rate": 4.533308157904279e-06, "loss": 0.0747, "step": 8871 }, { "epoch": 6.93, "learning_rate": 4.531187900798634e-06, "loss": 0.1516, "step": 8872 }, { "epoch": 6.93, "learning_rate": 4.52906799438768e-06, "loss": 0.153, "step": 8873 }, { "epoch": 6.94, "learning_rate": 4.526948438807359e-06, "loss": 0.0796, "step": 8874 }, { "epoch": 6.94, "learning_rate": 4.524829234193586e-06, "loss": 0.1273, "step": 8875 }, { "epoch": 6.94, "learning_rate": 4.522710380682266e-06, "loss": 0.0531, "step": 8876 }, { "epoch": 6.94, "learning_rate": 4.5205918784092685e-06, "loss": 0.0739, "step": 8877 }, { "epoch": 6.94, "learning_rate": 4.518473727510449e-06, "loss": 0.0634, "step": 8878 }, { "epoch": 6.94, "learning_rate": 4.516355928121627e-06, "loss": 0.0783, "step": 8879 }, { "epoch": 6.94, "learning_rate": 4.514238480378618e-06, "loss": 0.0885, "step": 8880 }, { "epoch": 6.94, "learning_rate": 4.512121384417205e-06, "loss": 0.1112, "step": 8881 }, { "epoch": 6.94, "learning_rate": 4.510004640373147e-06, "loss": 0.0823, "step": 8882 }, { "epoch": 6.94, "learning_rate": 4.507888248382181e-06, "loss": 0.1533, "step": 8883 }, { "epoch": 6.94, "learning_rate": 4.505772208580034e-06, "loss": 0.0682, "step": 8884 }, { "epoch": 6.94, "learning_rate": 4.5036565211023876e-06, "loss": 0.1105, "step": 8885 }, { "epoch": 6.94, "learning_rate": 4.501541186084918e-06, "loss": 0.1323, "step": 8886 }, { "epoch": 6.95, "learning_rate": 4.49942620366327e-06, "loss": 0.1702, "step": 8887 }, { "epoch": 6.95, "learning_rate": 4.497311573973077e-06, "loss": 0.0996, "step": 8888 }, { "epoch": 6.95, "learning_rate": 4.495197297149941e-06, "loss": 0.0873, "step": 8889 }, { "epoch": 6.95, "learning_rate": 4.493083373329434e-06, "loss": 0.1713, "step": 8890 }, { "epoch": 6.95, "learning_rate": 4.490969802647125e-06, "loss": 0.0703, "step": 8891 }, { "epoch": 6.95, "learning_rate": 4.488856585238544e-06, "loss": 0.0962, "step": 8892 }, { "epoch": 6.95, "learning_rate": 4.486743721239203e-06, "loss": 0.0992, "step": 8893 }, { "epoch": 6.95, "learning_rate": 4.484631210784595e-06, "loss": 0.1707, "step": 8894 }, { "epoch": 6.95, "learning_rate": 4.482519054010187e-06, "loss": 0.0847, "step": 8895 }, { "epoch": 6.95, "learning_rate": 4.480407251051422e-06, "loss": 0.1127, "step": 8896 }, { "epoch": 6.95, "learning_rate": 4.478295802043724e-06, "loss": 0.1281, "step": 8897 }, { "epoch": 6.95, "learning_rate": 4.476184707122487e-06, "loss": 0.1741, "step": 8898 }, { "epoch": 6.96, "learning_rate": 4.474073966423097e-06, "loss": 0.143, "step": 8899 }, { "epoch": 6.96, "learning_rate": 4.471963580080903e-06, "loss": 0.1107, "step": 8900 }, { "epoch": 6.96, "learning_rate": 4.469853548231239e-06, "loss": 0.0566, "step": 8901 }, { "epoch": 6.96, "learning_rate": 4.467743871009405e-06, "loss": 0.1359, "step": 8902 }, { "epoch": 6.96, "learning_rate": 4.4656345485506955e-06, "loss": 0.0997, "step": 8903 }, { "epoch": 6.96, "learning_rate": 4.463525580990372e-06, "loss": 0.1238, "step": 8904 }, { "epoch": 6.96, "learning_rate": 4.461416968463674e-06, "loss": 0.1041, "step": 8905 }, { "epoch": 6.96, "learning_rate": 4.4593087111058145e-06, "loss": 0.1194, "step": 8906 }, { "epoch": 6.96, "learning_rate": 4.457200809052e-06, "loss": 0.1324, "step": 8907 }, { "epoch": 6.96, "learning_rate": 4.455093262437391e-06, "loss": 0.0755, "step": 8908 }, { "epoch": 6.96, "learning_rate": 4.452986071397141e-06, "loss": 0.0814, "step": 8909 }, { "epoch": 6.96, "learning_rate": 4.450879236066372e-06, "loss": 0.0425, "step": 8910 }, { "epoch": 6.96, "learning_rate": 4.448772756580195e-06, "loss": 0.0823, "step": 8911 }, { "epoch": 6.97, "learning_rate": 4.4466666330736916e-06, "loss": 0.2066, "step": 8912 }, { "epoch": 6.97, "learning_rate": 4.4445608656819085e-06, "loss": 0.1427, "step": 8913 }, { "epoch": 6.97, "learning_rate": 4.44245545453989e-06, "loss": 0.2013, "step": 8914 }, { "epoch": 6.97, "learning_rate": 4.440350399782647e-06, "loss": 0.0925, "step": 8915 }, { "epoch": 6.97, "learning_rate": 4.438245701545167e-06, "loss": 0.1708, "step": 8916 }, { "epoch": 6.97, "learning_rate": 4.436141359962418e-06, "loss": 0.1063, "step": 8917 }, { "epoch": 6.97, "learning_rate": 4.434037375169343e-06, "loss": 0.1714, "step": 8918 }, { "epoch": 6.97, "learning_rate": 4.431933747300863e-06, "loss": 0.1392, "step": 8919 }, { "epoch": 6.97, "learning_rate": 4.429830476491876e-06, "loss": 0.1921, "step": 8920 }, { "epoch": 6.97, "learning_rate": 4.427727562877252e-06, "loss": 0.1641, "step": 8921 }, { "epoch": 6.97, "learning_rate": 4.4256250065918515e-06, "loss": 0.1109, "step": 8922 }, { "epoch": 6.97, "learning_rate": 4.4235228077705005e-06, "loss": 0.1254, "step": 8923 }, { "epoch": 6.97, "learning_rate": 4.421420966548008e-06, "loss": 0.0482, "step": 8924 }, { "epoch": 6.98, "learning_rate": 4.419319483059146e-06, "loss": 0.1047, "step": 8925 }, { "epoch": 6.98, "learning_rate": 4.417218357438687e-06, "loss": 0.2284, "step": 8926 }, { "epoch": 6.98, "learning_rate": 4.415117589821364e-06, "loss": 0.1014, "step": 8927 }, { "epoch": 6.98, "learning_rate": 4.413017180341892e-06, "loss": 0.1132, "step": 8928 }, { "epoch": 6.98, "learning_rate": 4.410917129134961e-06, "loss": 0.1089, "step": 8929 }, { "epoch": 6.98, "learning_rate": 4.408817436335242e-06, "loss": 0.0615, "step": 8930 }, { "epoch": 6.98, "learning_rate": 4.406718102077379e-06, "loss": 0.1374, "step": 8931 }, { "epoch": 6.98, "learning_rate": 4.404619126495995e-06, "loss": 0.0951, "step": 8932 }, { "epoch": 6.98, "learning_rate": 4.402520509725685e-06, "loss": 0.1099, "step": 8933 }, { "epoch": 6.98, "learning_rate": 4.400422251901033e-06, "loss": 0.1114, "step": 8934 }, { "epoch": 6.98, "learning_rate": 4.398324353156593e-06, "loss": 0.1274, "step": 8935 }, { "epoch": 6.98, "learning_rate": 4.396226813626889e-06, "loss": 0.1468, "step": 8936 }, { "epoch": 6.98, "learning_rate": 4.394129633446428e-06, "loss": 0.1011, "step": 8937 }, { "epoch": 6.99, "learning_rate": 4.3920328127497e-06, "loss": 0.0853, "step": 8938 }, { "epoch": 6.99, "learning_rate": 4.389936351671165e-06, "loss": 0.0542, "step": 8939 }, { "epoch": 6.99, "learning_rate": 4.38784025034526e-06, "loss": 0.0975, "step": 8940 }, { "epoch": 6.99, "learning_rate": 4.3857445089064e-06, "loss": 0.1207, "step": 8941 }, { "epoch": 6.99, "learning_rate": 4.383649127488978e-06, "loss": 0.1067, "step": 8942 }, { "epoch": 6.99, "learning_rate": 4.381554106227362e-06, "loss": 0.1273, "step": 8943 }, { "epoch": 6.99, "learning_rate": 4.3794594452559e-06, "loss": 0.0999, "step": 8944 }, { "epoch": 6.99, "learning_rate": 4.377365144708909e-06, "loss": 0.0821, "step": 8945 }, { "epoch": 6.99, "learning_rate": 4.375271204720703e-06, "loss": 0.2129, "step": 8946 }, { "epoch": 6.99, "learning_rate": 4.373177625425542e-06, "loss": 0.0757, "step": 8947 }, { "epoch": 6.99, "learning_rate": 4.371084406957684e-06, "loss": 0.1414, "step": 8948 }, { "epoch": 6.99, "learning_rate": 4.368991549451366e-06, "loss": 0.2605, "step": 8949 }, { "epoch": 6.99, "learning_rate": 4.366899053040791e-06, "loss": 0.1254, "step": 8950 }, { "epoch": 7.0, "learning_rate": 4.364806917860141e-06, "loss": 0.1721, "step": 8951 }, { "epoch": 7.0, "learning_rate": 4.362715144043581e-06, "loss": 0.1298, "step": 8952 }, { "epoch": 7.0, "learning_rate": 4.3606237317252466e-06, "loss": 0.0879, "step": 8953 }, { "epoch": 7.0, "learning_rate": 4.358532681039252e-06, "loss": 0.0951, "step": 8954 }, { "epoch": 7.0, "learning_rate": 4.3564419921196885e-06, "loss": 0.1004, "step": 8955 }, { "epoch": 7.0, "learning_rate": 4.354351665100621e-06, "loss": 0.1025, "step": 8956 }, { "epoch": 7.0, "learning_rate": 4.352261700116103e-06, "loss": 0.247, "step": 8957 }, { "epoch": 7.0, "learning_rate": 4.350172097300154e-06, "loss": 0.0393, "step": 8958 }, { "epoch": 7.0, "learning_rate": 4.348082856786766e-06, "loss": 0.0809, "step": 8959 }, { "epoch": 7.0, "learning_rate": 4.345993978709916e-06, "loss": 0.0695, "step": 8960 }, { "epoch": 7.0, "learning_rate": 4.343905463203561e-06, "loss": 0.0636, "step": 8961 }, { "epoch": 7.0, "learning_rate": 4.341817310401626e-06, "loss": 0.0929, "step": 8962 }, { "epoch": 7.01, "learning_rate": 4.339729520438017e-06, "loss": 0.0374, "step": 8963 }, { "epoch": 7.01, "learning_rate": 4.3376420934466175e-06, "loss": 0.049, "step": 8964 }, { "epoch": 7.01, "learning_rate": 4.335555029561287e-06, "loss": 0.0394, "step": 8965 }, { "epoch": 7.01, "learning_rate": 4.333468328915858e-06, "loss": 0.0442, "step": 8966 }, { "epoch": 7.01, "learning_rate": 4.331381991644146e-06, "loss": 0.0226, "step": 8967 }, { "epoch": 7.01, "learning_rate": 4.329296017879935e-06, "loss": 0.0544, "step": 8968 }, { "epoch": 7.01, "learning_rate": 4.327210407757001e-06, "loss": 0.0559, "step": 8969 }, { "epoch": 7.01, "learning_rate": 4.3251251614090775e-06, "loss": 0.0379, "step": 8970 }, { "epoch": 7.01, "learning_rate": 4.323040278969881e-06, "loss": 0.0387, "step": 8971 }, { "epoch": 7.01, "learning_rate": 4.320955760573118e-06, "loss": 0.0498, "step": 8972 }, { "epoch": 7.01, "learning_rate": 4.318871606352454e-06, "loss": 0.0479, "step": 8973 }, { "epoch": 7.01, "learning_rate": 4.31678781644154e-06, "loss": 0.0212, "step": 8974 }, { "epoch": 7.01, "learning_rate": 4.314704390974002e-06, "loss": 0.0438, "step": 8975 }, { "epoch": 7.02, "learning_rate": 4.3126213300834405e-06, "loss": 0.0482, "step": 8976 }, { "epoch": 7.02, "learning_rate": 4.310538633903436e-06, "loss": 0.067, "step": 8977 }, { "epoch": 7.02, "learning_rate": 4.308456302567544e-06, "loss": 0.016, "step": 8978 }, { "epoch": 7.02, "learning_rate": 4.3063743362092926e-06, "loss": 0.0274, "step": 8979 }, { "epoch": 7.02, "learning_rate": 4.304292734962202e-06, "loss": 0.034, "step": 8980 }, { "epoch": 7.02, "learning_rate": 4.302211498959746e-06, "loss": 0.0982, "step": 8981 }, { "epoch": 7.02, "learning_rate": 4.3001306283353914e-06, "loss": 0.0358, "step": 8982 }, { "epoch": 7.02, "learning_rate": 4.298050123222572e-06, "loss": 0.0798, "step": 8983 }, { "epoch": 7.02, "learning_rate": 4.295969983754711e-06, "loss": 0.0641, "step": 8984 }, { "epoch": 7.02, "learning_rate": 4.293890210065195e-06, "loss": 0.0722, "step": 8985 }, { "epoch": 7.02, "learning_rate": 4.2918108022873925e-06, "loss": 0.0509, "step": 8986 }, { "epoch": 7.02, "learning_rate": 4.2897317605546505e-06, "loss": 0.1097, "step": 8987 }, { "epoch": 7.02, "learning_rate": 4.287653085000288e-06, "loss": 0.0372, "step": 8988 }, { "epoch": 7.03, "learning_rate": 4.285574775757603e-06, "loss": 0.0631, "step": 8989 }, { "epoch": 7.03, "learning_rate": 4.2834968329598705e-06, "loss": 0.0578, "step": 8990 }, { "epoch": 7.03, "learning_rate": 4.281419256740337e-06, "loss": 0.0475, "step": 8991 }, { "epoch": 7.03, "learning_rate": 4.27934204723224e-06, "loss": 0.1321, "step": 8992 }, { "epoch": 7.03, "learning_rate": 4.277265204568774e-06, "loss": 0.0383, "step": 8993 }, { "epoch": 7.03, "learning_rate": 4.27518872888312e-06, "loss": 0.0563, "step": 8994 }, { "epoch": 7.03, "learning_rate": 4.27311262030844e-06, "loss": 0.0606, "step": 8995 }, { "epoch": 7.03, "learning_rate": 4.271036878977862e-06, "loss": 0.0285, "step": 8996 }, { "epoch": 7.03, "learning_rate": 4.268961505024499e-06, "loss": 0.0588, "step": 8997 }, { "epoch": 7.03, "learning_rate": 4.266886498581436e-06, "loss": 0.0309, "step": 8998 }, { "epoch": 7.03, "learning_rate": 4.264811859781735e-06, "loss": 0.0395, "step": 8999 }, { "epoch": 7.03, "learning_rate": 4.262737588758437e-06, "loss": 0.0667, "step": 9000 }, { "epoch": 7.03, "learning_rate": 4.260663685644552e-06, "loss": 0.0938, "step": 9001 }, { "epoch": 7.04, "learning_rate": 4.258590150573074e-06, "loss": 0.0972, "step": 9002 }, { "epoch": 7.04, "learning_rate": 4.256516983676979e-06, "loss": 0.0459, "step": 9003 }, { "epoch": 7.04, "learning_rate": 4.254444185089202e-06, "loss": 0.0471, "step": 9004 }, { "epoch": 7.04, "learning_rate": 4.2523717549426655e-06, "loss": 0.0397, "step": 9005 }, { "epoch": 7.04, "learning_rate": 4.250299693370265e-06, "loss": 0.0375, "step": 9006 }, { "epoch": 7.04, "learning_rate": 4.24822800050488e-06, "loss": 0.0309, "step": 9007 }, { "epoch": 7.04, "learning_rate": 4.246156676479358e-06, "loss": 0.0428, "step": 9008 }, { "epoch": 7.04, "learning_rate": 4.244085721426526e-06, "loss": 0.024, "step": 9009 }, { "epoch": 7.04, "learning_rate": 4.242015135479185e-06, "loss": 0.0685, "step": 9010 }, { "epoch": 7.04, "learning_rate": 4.239944918770115e-06, "loss": 0.0461, "step": 9011 }, { "epoch": 7.04, "learning_rate": 4.23787507143207e-06, "loss": 0.0344, "step": 9012 }, { "epoch": 7.04, "learning_rate": 4.235805593597785e-06, "loss": 0.0284, "step": 9013 }, { "epoch": 7.04, "learning_rate": 4.2337364853999595e-06, "loss": 0.0373, "step": 9014 }, { "epoch": 7.05, "learning_rate": 4.231667746971293e-06, "loss": 0.0275, "step": 9015 }, { "epoch": 7.05, "learning_rate": 4.229599378444433e-06, "loss": 0.1063, "step": 9016 }, { "epoch": 7.05, "learning_rate": 4.22753137995202e-06, "loss": 0.0343, "step": 9017 }, { "epoch": 7.05, "learning_rate": 4.225463751626664e-06, "loss": 0.0895, "step": 9018 }, { "epoch": 7.05, "learning_rate": 4.2233964936009605e-06, "loss": 0.0281, "step": 9019 }, { "epoch": 7.05, "learning_rate": 4.221329606007477e-06, "loss": 0.073, "step": 9020 }, { "epoch": 7.05, "learning_rate": 4.2192630889787435e-06, "loss": 0.0481, "step": 9021 }, { "epoch": 7.05, "learning_rate": 4.217196942647289e-06, "loss": 0.0253, "step": 9022 }, { "epoch": 7.05, "learning_rate": 4.215131167145604e-06, "loss": 0.1104, "step": 9023 }, { "epoch": 7.05, "learning_rate": 4.2130657626061584e-06, "loss": 0.0363, "step": 9024 }, { "epoch": 7.05, "learning_rate": 4.211000729161401e-06, "loss": 0.0468, "step": 9025 }, { "epoch": 7.05, "learning_rate": 4.208936066943753e-06, "loss": 0.0719, "step": 9026 }, { "epoch": 7.06, "learning_rate": 4.2068717760856134e-06, "loss": 0.04, "step": 9027 }, { "epoch": 7.06, "learning_rate": 4.204807856719359e-06, "loss": 0.0175, "step": 9028 }, { "epoch": 7.06, "learning_rate": 4.202744308977337e-06, "loss": 0.0343, "step": 9029 }, { "epoch": 7.06, "learning_rate": 4.2006811329918815e-06, "loss": 0.0846, "step": 9030 }, { "epoch": 7.06, "learning_rate": 4.198618328895294e-06, "loss": 0.0309, "step": 9031 }, { "epoch": 7.06, "learning_rate": 4.1965558968198575e-06, "loss": 0.0312, "step": 9032 }, { "epoch": 7.06, "learning_rate": 4.194493836897816e-06, "loss": 0.0698, "step": 9033 }, { "epoch": 7.06, "learning_rate": 4.192432149261416e-06, "loss": 0.045, "step": 9034 }, { "epoch": 7.06, "learning_rate": 4.1903708340428595e-06, "loss": 0.0579, "step": 9035 }, { "epoch": 7.06, "learning_rate": 4.1883098913743316e-06, "loss": 0.0592, "step": 9036 }, { "epoch": 7.06, "learning_rate": 4.186249321387993e-06, "loss": 0.0276, "step": 9037 }, { "epoch": 7.06, "learning_rate": 4.18418912421598e-06, "loss": 0.0322, "step": 9038 }, { "epoch": 7.06, "learning_rate": 4.182129299990406e-06, "loss": 0.03, "step": 9039 }, { "epoch": 7.07, "learning_rate": 4.180069848843361e-06, "loss": 0.0538, "step": 9040 }, { "epoch": 7.07, "learning_rate": 4.178010770906905e-06, "loss": 0.0563, "step": 9041 }, { "epoch": 7.07, "learning_rate": 4.175952066313086e-06, "loss": 0.0668, "step": 9042 }, { "epoch": 7.07, "learning_rate": 4.173893735193921e-06, "loss": 0.089, "step": 9043 }, { "epoch": 7.07, "learning_rate": 4.171835777681394e-06, "loss": 0.0324, "step": 9044 }, { "epoch": 7.07, "learning_rate": 4.169778193907484e-06, "loss": 0.0366, "step": 9045 }, { "epoch": 7.07, "learning_rate": 4.1677209840041325e-06, "loss": 0.0378, "step": 9046 }, { "epoch": 7.07, "learning_rate": 4.16566414810326e-06, "loss": 0.0364, "step": 9047 }, { "epoch": 7.07, "learning_rate": 4.1636076863367654e-06, "loss": 0.0942, "step": 9048 }, { "epoch": 7.07, "learning_rate": 4.161551598836522e-06, "loss": 0.0332, "step": 9049 }, { "epoch": 7.07, "learning_rate": 4.159495885734377e-06, "loss": 0.0282, "step": 9050 }, { "epoch": 7.07, "learning_rate": 4.157440547162157e-06, "loss": 0.0523, "step": 9051 }, { "epoch": 7.07, "learning_rate": 4.15538558325166e-06, "loss": 0.0192, "step": 9052 }, { "epoch": 7.08, "learning_rate": 4.15333099413467e-06, "loss": 0.0532, "step": 9053 }, { "epoch": 7.08, "learning_rate": 4.151276779942941e-06, "loss": 0.0792, "step": 9054 }, { "epoch": 7.08, "learning_rate": 4.149222940808193e-06, "loss": 0.0828, "step": 9055 }, { "epoch": 7.08, "learning_rate": 4.147169476862134e-06, "loss": 0.0246, "step": 9056 }, { "epoch": 7.08, "learning_rate": 4.1451163882364485e-06, "loss": 0.0176, "step": 9057 }, { "epoch": 7.08, "learning_rate": 4.143063675062794e-06, "loss": 0.0511, "step": 9058 }, { "epoch": 7.08, "learning_rate": 4.1410113374728e-06, "loss": 0.0272, "step": 9059 }, { "epoch": 7.08, "learning_rate": 4.1389593755980775e-06, "loss": 0.0176, "step": 9060 }, { "epoch": 7.08, "learning_rate": 4.13690778957021e-06, "loss": 0.0497, "step": 9061 }, { "epoch": 7.08, "learning_rate": 4.1348565795207585e-06, "loss": 0.0518, "step": 9062 }, { "epoch": 7.08, "learning_rate": 4.13280574558126e-06, "loss": 0.0537, "step": 9063 }, { "epoch": 7.08, "learning_rate": 4.130755287883222e-06, "loss": 0.0674, "step": 9064 }, { "epoch": 7.08, "learning_rate": 4.128705206558143e-06, "loss": 0.0252, "step": 9065 }, { "epoch": 7.09, "learning_rate": 4.126655501737482e-06, "loss": 0.018, "step": 9066 }, { "epoch": 7.09, "learning_rate": 4.124606173552672e-06, "loss": 0.0241, "step": 9067 }, { "epoch": 7.09, "learning_rate": 4.122557222135139e-06, "loss": 0.1344, "step": 9068 }, { "epoch": 7.09, "learning_rate": 4.120508647616273e-06, "loss": 0.0353, "step": 9069 }, { "epoch": 7.09, "learning_rate": 4.118460450127436e-06, "loss": 0.0416, "step": 9070 }, { "epoch": 7.09, "learning_rate": 4.116412629799976e-06, "loss": 0.0686, "step": 9071 }, { "epoch": 7.09, "learning_rate": 4.114365186765211e-06, "loss": 0.0314, "step": 9072 }, { "epoch": 7.09, "learning_rate": 4.112318121154436e-06, "loss": 0.0199, "step": 9073 }, { "epoch": 7.09, "learning_rate": 4.110271433098921e-06, "loss": 0.1384, "step": 9074 }, { "epoch": 7.09, "learning_rate": 4.1082251227299105e-06, "loss": 0.0884, "step": 9075 }, { "epoch": 7.09, "learning_rate": 4.1061791901786315e-06, "loss": 0.0728, "step": 9076 }, { "epoch": 7.09, "learning_rate": 4.104133635576285e-06, "loss": 0.0577, "step": 9077 }, { "epoch": 7.09, "learning_rate": 4.1020884590540354e-06, "loss": 0.0336, "step": 9078 }, { "epoch": 7.1, "learning_rate": 4.1000436607430335e-06, "loss": 0.0492, "step": 9079 }, { "epoch": 7.1, "learning_rate": 4.097999240774411e-06, "loss": 0.0226, "step": 9080 }, { "epoch": 7.1, "learning_rate": 4.095955199279266e-06, "loss": 0.0575, "step": 9081 }, { "epoch": 7.1, "learning_rate": 4.093911536388675e-06, "loss": 0.065, "step": 9082 }, { "epoch": 7.1, "learning_rate": 4.0918682522336915e-06, "loss": 0.0233, "step": 9083 }, { "epoch": 7.1, "learning_rate": 4.089825346945342e-06, "loss": 0.0311, "step": 9084 }, { "epoch": 7.1, "learning_rate": 4.087782820654632e-06, "loss": 0.0304, "step": 9085 }, { "epoch": 7.1, "learning_rate": 4.0857406734925405e-06, "loss": 0.0488, "step": 9086 }, { "epoch": 7.1, "learning_rate": 4.083698905590019e-06, "loss": 0.0276, "step": 9087 }, { "epoch": 7.1, "learning_rate": 4.081657517078007e-06, "loss": 0.033, "step": 9088 }, { "epoch": 7.1, "learning_rate": 4.07961650808741e-06, "loss": 0.0506, "step": 9089 }, { "epoch": 7.1, "learning_rate": 4.077575878749098e-06, "loss": 0.1098, "step": 9090 }, { "epoch": 7.11, "learning_rate": 4.075535629193944e-06, "loss": 0.0593, "step": 9091 }, { "epoch": 7.11, "learning_rate": 4.073495759552775e-06, "loss": 0.0239, "step": 9092 }, { "epoch": 7.11, "learning_rate": 4.0714562699564e-06, "loss": 0.0591, "step": 9093 }, { "epoch": 7.11, "learning_rate": 4.069417160535606e-06, "loss": 0.0621, "step": 9094 }, { "epoch": 7.11, "learning_rate": 4.067378431421152e-06, "loss": 0.0608, "step": 9095 }, { "epoch": 7.11, "learning_rate": 4.065340082743774e-06, "loss": 0.0302, "step": 9096 }, { "epoch": 7.11, "learning_rate": 4.063302114634186e-06, "loss": 0.0226, "step": 9097 }, { "epoch": 7.11, "learning_rate": 4.061264527223072e-06, "loss": 0.0289, "step": 9098 }, { "epoch": 7.11, "learning_rate": 4.059227320641094e-06, "loss": 0.0875, "step": 9099 }, { "epoch": 7.11, "learning_rate": 4.0571904950189e-06, "loss": 0.0395, "step": 9100 }, { "epoch": 7.11, "learning_rate": 4.055154050487094e-06, "loss": 0.0512, "step": 9101 }, { "epoch": 7.11, "learning_rate": 4.0531179871762645e-06, "loss": 0.0581, "step": 9102 }, { "epoch": 7.11, "learning_rate": 4.051082305216986e-06, "loss": 0.0702, "step": 9103 }, { "epoch": 7.12, "learning_rate": 4.049047004739793e-06, "loss": 0.0233, "step": 9104 }, { "epoch": 7.12, "learning_rate": 4.047012085875204e-06, "loss": 0.0247, "step": 9105 }, { "epoch": 7.12, "learning_rate": 4.04497754875371e-06, "loss": 0.0355, "step": 9106 }, { "epoch": 7.12, "learning_rate": 4.042943393505779e-06, "loss": 0.0188, "step": 9107 }, { "epoch": 7.12, "learning_rate": 4.040909620261851e-06, "loss": 0.0667, "step": 9108 }, { "epoch": 7.12, "learning_rate": 4.038876229152349e-06, "loss": 0.075, "step": 9109 }, { "epoch": 7.12, "learning_rate": 4.03684322030766e-06, "loss": 0.0605, "step": 9110 }, { "epoch": 7.12, "learning_rate": 4.034810593858165e-06, "loss": 0.1027, "step": 9111 }, { "epoch": 7.12, "learning_rate": 4.032778349934198e-06, "loss": 0.1, "step": 9112 }, { "epoch": 7.12, "learning_rate": 4.030746488666082e-06, "loss": 0.0918, "step": 9113 }, { "epoch": 7.12, "learning_rate": 4.02871501018411e-06, "loss": 0.0239, "step": 9114 }, { "epoch": 7.12, "learning_rate": 4.026683914618561e-06, "loss": 0.0496, "step": 9115 }, { "epoch": 7.12, "learning_rate": 4.024653202099677e-06, "loss": 0.0545, "step": 9116 }, { "epoch": 7.13, "learning_rate": 4.022622872757681e-06, "loss": 0.0454, "step": 9117 }, { "epoch": 7.13, "learning_rate": 4.020592926722771e-06, "loss": 0.0195, "step": 9118 }, { "epoch": 7.13, "learning_rate": 4.018563364125117e-06, "loss": 0.116, "step": 9119 }, { "epoch": 7.13, "learning_rate": 4.0165341850948705e-06, "loss": 0.0152, "step": 9120 }, { "epoch": 7.13, "learning_rate": 4.014505389762154e-06, "loss": 0.0471, "step": 9121 }, { "epoch": 7.13, "learning_rate": 4.012476978257063e-06, "loss": 0.0377, "step": 9122 }, { "epoch": 7.13, "learning_rate": 4.010448950709682e-06, "loss": 0.0293, "step": 9123 }, { "epoch": 7.13, "learning_rate": 4.0084213072500525e-06, "loss": 0.0297, "step": 9124 }, { "epoch": 7.13, "learning_rate": 4.006394048008197e-06, "loss": 0.0499, "step": 9125 }, { "epoch": 7.13, "learning_rate": 4.004367173114125e-06, "loss": 0.0681, "step": 9126 }, { "epoch": 7.13, "learning_rate": 4.002340682697808e-06, "loss": 0.0981, "step": 9127 }, { "epoch": 7.13, "learning_rate": 4.0003145768892025e-06, "loss": 0.0473, "step": 9128 }, { "epoch": 7.13, "learning_rate": 3.998288855818223e-06, "loss": 0.0477, "step": 9129 }, { "epoch": 7.14, "learning_rate": 3.996263519614783e-06, "loss": 0.0368, "step": 9130 }, { "epoch": 7.14, "learning_rate": 3.994238568408755e-06, "loss": 0.0712, "step": 9131 }, { "epoch": 7.14, "learning_rate": 3.9922140023299924e-06, "loss": 0.0812, "step": 9132 }, { "epoch": 7.14, "learning_rate": 3.99018982150832e-06, "loss": 0.1127, "step": 9133 }, { "epoch": 7.14, "learning_rate": 3.9881660260735525e-06, "loss": 0.0271, "step": 9134 }, { "epoch": 7.14, "learning_rate": 3.986142616155455e-06, "loss": 0.0407, "step": 9135 }, { "epoch": 7.14, "learning_rate": 3.9841195918837875e-06, "loss": 0.0307, "step": 9136 }, { "epoch": 7.14, "learning_rate": 3.982096953388274e-06, "loss": 0.0572, "step": 9137 }, { "epoch": 7.14, "learning_rate": 3.980074700798625e-06, "loss": 0.1049, "step": 9138 }, { "epoch": 7.14, "learning_rate": 3.9780528342445205e-06, "loss": 0.0243, "step": 9139 }, { "epoch": 7.14, "learning_rate": 3.97603135385561e-06, "loss": 0.0689, "step": 9140 }, { "epoch": 7.14, "learning_rate": 3.974010259761528e-06, "loss": 0.0777, "step": 9141 }, { "epoch": 7.14, "learning_rate": 3.971989552091878e-06, "loss": 0.0403, "step": 9142 }, { "epoch": 7.15, "learning_rate": 3.969969230976241e-06, "loss": 0.0699, "step": 9143 }, { "epoch": 7.15, "learning_rate": 3.967949296544171e-06, "loss": 0.1027, "step": 9144 }, { "epoch": 7.15, "learning_rate": 3.965929748925198e-06, "loss": 0.0491, "step": 9145 }, { "epoch": 7.15, "learning_rate": 3.963910588248837e-06, "loss": 0.0513, "step": 9146 }, { "epoch": 7.15, "learning_rate": 3.9618918146445585e-06, "loss": 0.0587, "step": 9147 }, { "epoch": 7.15, "learning_rate": 3.959873428241821e-06, "loss": 0.1093, "step": 9148 }, { "epoch": 7.15, "learning_rate": 3.95785542917006e-06, "loss": 0.0483, "step": 9149 }, { "epoch": 7.15, "learning_rate": 3.955837817558681e-06, "loss": 0.0478, "step": 9150 }, { "epoch": 7.15, "learning_rate": 3.95382059353707e-06, "loss": 0.0506, "step": 9151 }, { "epoch": 7.15, "learning_rate": 3.951803757234572e-06, "loss": 0.0772, "step": 9152 }, { "epoch": 7.15, "learning_rate": 3.949787308780531e-06, "loss": 0.0416, "step": 9153 }, { "epoch": 7.15, "learning_rate": 3.94777124830425e-06, "loss": 0.0386, "step": 9154 }, { "epoch": 7.16, "learning_rate": 3.945755575935012e-06, "loss": 0.0792, "step": 9155 }, { "epoch": 7.16, "learning_rate": 3.9437402918020705e-06, "loss": 0.0664, "step": 9156 }, { "epoch": 7.16, "learning_rate": 3.941725396034669e-06, "loss": 0.0362, "step": 9157 }, { "epoch": 7.16, "learning_rate": 3.939710888762006e-06, "loss": 0.0734, "step": 9158 }, { "epoch": 7.16, "learning_rate": 3.9376967701132665e-06, "loss": 0.0539, "step": 9159 }, { "epoch": 7.16, "learning_rate": 3.935683040217605e-06, "loss": 0.0787, "step": 9160 }, { "epoch": 7.16, "learning_rate": 3.9336696992041625e-06, "loss": 0.0213, "step": 9161 }, { "epoch": 7.16, "learning_rate": 3.931656747202042e-06, "loss": 0.032, "step": 9162 }, { "epoch": 7.16, "learning_rate": 3.9296441843403275e-06, "loss": 0.0448, "step": 9163 }, { "epoch": 7.16, "learning_rate": 3.927632010748079e-06, "loss": 0.032, "step": 9164 }, { "epoch": 7.16, "learning_rate": 3.925620226554328e-06, "loss": 0.062, "step": 9165 }, { "epoch": 7.16, "learning_rate": 3.923608831888081e-06, "loss": 0.0522, "step": 9166 }, { "epoch": 7.16, "learning_rate": 3.921597826878325e-06, "loss": 0.0214, "step": 9167 }, { "epoch": 7.17, "learning_rate": 3.919587211654015e-06, "loss": 0.0269, "step": 9168 }, { "epoch": 7.17, "learning_rate": 3.917576986344087e-06, "loss": 0.113, "step": 9169 }, { "epoch": 7.17, "learning_rate": 3.915567151077449e-06, "loss": 0.1047, "step": 9170 }, { "epoch": 7.17, "learning_rate": 3.913557705982978e-06, "loss": 0.0236, "step": 9171 }, { "epoch": 7.17, "learning_rate": 3.9115486511895415e-06, "loss": 0.0482, "step": 9172 }, { "epoch": 7.17, "learning_rate": 3.90953998682597e-06, "loss": 0.0759, "step": 9173 }, { "epoch": 7.17, "learning_rate": 3.907531713021075e-06, "loss": 0.0813, "step": 9174 }, { "epoch": 7.17, "learning_rate": 3.905523829903626e-06, "loss": 0.0687, "step": 9175 }, { "epoch": 7.17, "learning_rate": 3.903516337602396e-06, "loss": 0.0687, "step": 9176 }, { "epoch": 7.17, "learning_rate": 3.901509236246111e-06, "loss": 0.0766, "step": 9177 }, { "epoch": 7.17, "learning_rate": 3.899502525963481e-06, "loss": 0.0208, "step": 9178 }, { "epoch": 7.17, "learning_rate": 3.897496206883189e-06, "loss": 0.076, "step": 9179 }, { "epoch": 7.17, "learning_rate": 3.895490279133892e-06, "loss": 0.0428, "step": 9180 }, { "epoch": 7.18, "learning_rate": 3.893484742844223e-06, "loss": 0.0403, "step": 9181 }, { "epoch": 7.18, "learning_rate": 3.89147959814279e-06, "loss": 0.0625, "step": 9182 }, { "epoch": 7.18, "learning_rate": 3.889474845158171e-06, "loss": 0.0951, "step": 9183 }, { "epoch": 7.18, "learning_rate": 3.887470484018932e-06, "loss": 0.0385, "step": 9184 }, { "epoch": 7.18, "learning_rate": 3.885466514853603e-06, "loss": 0.0525, "step": 9185 }, { "epoch": 7.18, "learning_rate": 3.883462937790685e-06, "loss": 0.0195, "step": 9186 }, { "epoch": 7.18, "learning_rate": 3.8814597529586614e-06, "loss": 0.0899, "step": 9187 }, { "epoch": 7.18, "learning_rate": 3.879456960485996e-06, "loss": 0.0305, "step": 9188 }, { "epoch": 7.18, "learning_rate": 3.877454560501115e-06, "loss": 0.0765, "step": 9189 }, { "epoch": 7.18, "learning_rate": 3.875452553132426e-06, "loss": 0.0272, "step": 9190 }, { "epoch": 7.18, "learning_rate": 3.8734509385083095e-06, "loss": 0.0348, "step": 9191 }, { "epoch": 7.18, "learning_rate": 3.871449716757123e-06, "loss": 0.0443, "step": 9192 }, { "epoch": 7.18, "learning_rate": 3.869448888007198e-06, "loss": 0.0979, "step": 9193 }, { "epoch": 7.19, "learning_rate": 3.867448452386837e-06, "loss": 0.0557, "step": 9194 }, { "epoch": 7.19, "learning_rate": 3.86544841002432e-06, "loss": 0.0829, "step": 9195 }, { "epoch": 7.19, "learning_rate": 3.8634487610479085e-06, "loss": 0.0317, "step": 9196 }, { "epoch": 7.19, "learning_rate": 3.861449505585833e-06, "loss": 0.0916, "step": 9197 }, { "epoch": 7.19, "learning_rate": 3.859450643766287e-06, "loss": 0.0311, "step": 9198 }, { "epoch": 7.19, "learning_rate": 3.857452175717461e-06, "loss": 0.0635, "step": 9199 }, { "epoch": 7.19, "learning_rate": 3.855454101567506e-06, "loss": 0.0687, "step": 9200 }, { "epoch": 7.19, "learning_rate": 3.85345642144455e-06, "loss": 0.0451, "step": 9201 }, { "epoch": 7.19, "learning_rate": 3.851459135476699e-06, "loss": 0.0236, "step": 9202 }, { "epoch": 7.19, "learning_rate": 3.8494622437920305e-06, "loss": 0.1086, "step": 9203 }, { "epoch": 7.19, "learning_rate": 3.847465746518597e-06, "loss": 0.0569, "step": 9204 }, { "epoch": 7.19, "learning_rate": 3.845469643784428e-06, "loss": 0.0473, "step": 9205 }, { "epoch": 7.19, "learning_rate": 3.84347393571752e-06, "loss": 0.0552, "step": 9206 }, { "epoch": 7.2, "learning_rate": 3.84147862244586e-06, "loss": 0.0262, "step": 9207 }, { "epoch": 7.2, "learning_rate": 3.8394837040974e-06, "loss": 0.0625, "step": 9208 }, { "epoch": 7.2, "learning_rate": 3.8374891808000584e-06, "loss": 0.0675, "step": 9209 }, { "epoch": 7.2, "learning_rate": 3.835495052681737e-06, "loss": 0.021, "step": 9210 }, { "epoch": 7.2, "learning_rate": 3.83350131987032e-06, "loss": 0.0319, "step": 9211 }, { "epoch": 7.2, "learning_rate": 3.831507982493653e-06, "loss": 0.0422, "step": 9212 }, { "epoch": 7.2, "learning_rate": 3.829515040679561e-06, "loss": 0.0636, "step": 9213 }, { "epoch": 7.2, "learning_rate": 3.827522494555847e-06, "loss": 0.0298, "step": 9214 }, { "epoch": 7.2, "learning_rate": 3.825530344250282e-06, "loss": 0.0452, "step": 9215 }, { "epoch": 7.2, "learning_rate": 3.823538589890617e-06, "loss": 0.0252, "step": 9216 }, { "epoch": 7.2, "learning_rate": 3.821547231604576e-06, "loss": 0.0599, "step": 9217 }, { "epoch": 7.2, "learning_rate": 3.819556269519854e-06, "loss": 0.033, "step": 9218 }, { "epoch": 7.21, "learning_rate": 3.81756570376413e-06, "loss": 0.0867, "step": 9219 }, { "epoch": 7.21, "learning_rate": 3.815575534465053e-06, "loss": 0.0452, "step": 9220 }, { "epoch": 7.21, "learning_rate": 3.8135857617502337e-06, "loss": 0.057, "step": 9221 }, { "epoch": 7.21, "learning_rate": 3.811596385747279e-06, "loss": 0.04, "step": 9222 }, { "epoch": 7.21, "learning_rate": 3.8096074065837585e-06, "loss": 0.0603, "step": 9223 }, { "epoch": 7.21, "learning_rate": 3.807618824387216e-06, "loss": 0.0699, "step": 9224 }, { "epoch": 7.21, "learning_rate": 3.805630639285174e-06, "loss": 0.0288, "step": 9225 }, { "epoch": 7.21, "learning_rate": 3.803642851405125e-06, "loss": 0.0249, "step": 9226 }, { "epoch": 7.21, "learning_rate": 3.8016554608745405e-06, "loss": 0.0508, "step": 9227 }, { "epoch": 7.21, "learning_rate": 3.799668467820864e-06, "loss": 0.0869, "step": 9228 }, { "epoch": 7.21, "learning_rate": 3.797681872371509e-06, "loss": 0.0426, "step": 9229 }, { "epoch": 7.21, "learning_rate": 3.7956956746538774e-06, "loss": 0.1156, "step": 9230 }, { "epoch": 7.21, "learning_rate": 3.793709874795337e-06, "loss": 0.104, "step": 9231 }, { "epoch": 7.22, "learning_rate": 3.7917244729232218e-06, "loss": 0.0259, "step": 9232 }, { "epoch": 7.22, "learning_rate": 3.789739469164848e-06, "loss": 0.0716, "step": 9233 }, { "epoch": 7.22, "learning_rate": 3.7877548636475137e-06, "loss": 0.1053, "step": 9234 }, { "epoch": 7.22, "learning_rate": 3.785770656498482e-06, "loss": 0.057, "step": 9235 }, { "epoch": 7.22, "learning_rate": 3.7837868478449902e-06, "loss": 0.0344, "step": 9236 }, { "epoch": 7.22, "learning_rate": 3.7818034378142553e-06, "loss": 0.0608, "step": 9237 }, { "epoch": 7.22, "learning_rate": 3.779820426533466e-06, "loss": 0.0561, "step": 9238 }, { "epoch": 7.22, "learning_rate": 3.7778378141297835e-06, "loss": 0.0278, "step": 9239 }, { "epoch": 7.22, "learning_rate": 3.775855600730346e-06, "loss": 0.0422, "step": 9240 }, { "epoch": 7.22, "learning_rate": 3.7738737864622623e-06, "loss": 0.029, "step": 9241 }, { "epoch": 7.22, "learning_rate": 3.77189237145263e-06, "loss": 0.0785, "step": 9242 }, { "epoch": 7.22, "learning_rate": 3.7699113558284985e-06, "loss": 0.0559, "step": 9243 }, { "epoch": 7.22, "learning_rate": 3.767930739716904e-06, "loss": 0.0313, "step": 9244 }, { "epoch": 7.23, "learning_rate": 3.7659505232448614e-06, "loss": 0.0625, "step": 9245 }, { "epoch": 7.23, "learning_rate": 3.7639707065393526e-06, "loss": 0.058, "step": 9246 }, { "epoch": 7.23, "learning_rate": 3.761991289727336e-06, "loss": 0.0224, "step": 9247 }, { "epoch": 7.23, "learning_rate": 3.7600122729357436e-06, "loss": 0.0411, "step": 9248 }, { "epoch": 7.23, "learning_rate": 3.758033656291482e-06, "loss": 0.058, "step": 9249 }, { "epoch": 7.23, "learning_rate": 3.756055439921433e-06, "loss": 0.1176, "step": 9250 }, { "epoch": 7.23, "learning_rate": 3.7540776239524534e-06, "loss": 0.0361, "step": 9251 }, { "epoch": 7.23, "learning_rate": 3.752100208511369e-06, "loss": 0.0635, "step": 9252 }, { "epoch": 7.23, "learning_rate": 3.7501231937249904e-06, "loss": 0.0739, "step": 9253 }, { "epoch": 7.23, "learning_rate": 3.748146579720097e-06, "loss": 0.0253, "step": 9254 }, { "epoch": 7.23, "learning_rate": 3.7461703666234337e-06, "loss": 0.0718, "step": 9255 }, { "epoch": 7.23, "learning_rate": 3.7441945545617286e-06, "loss": 0.0571, "step": 9256 }, { "epoch": 7.23, "learning_rate": 3.7422191436616907e-06, "loss": 0.0372, "step": 9257 }, { "epoch": 7.24, "learning_rate": 3.740244134049992e-06, "loss": 0.0744, "step": 9258 }, { "epoch": 7.24, "learning_rate": 3.7382695258532855e-06, "loss": 0.065, "step": 9259 }, { "epoch": 7.24, "learning_rate": 3.7362953191981856e-06, "loss": 0.0553, "step": 9260 }, { "epoch": 7.24, "learning_rate": 3.734321514211301e-06, "loss": 0.0288, "step": 9261 }, { "epoch": 7.24, "learning_rate": 3.7323481110192005e-06, "loss": 0.0214, "step": 9262 }, { "epoch": 7.24, "learning_rate": 3.730375109748433e-06, "loss": 0.0466, "step": 9263 }, { "epoch": 7.24, "learning_rate": 3.728402510525514e-06, "loss": 0.1082, "step": 9264 }, { "epoch": 7.24, "learning_rate": 3.726430313476951e-06, "loss": 0.1635, "step": 9265 }, { "epoch": 7.24, "learning_rate": 3.7244585187292028e-06, "loss": 0.0205, "step": 9266 }, { "epoch": 7.24, "learning_rate": 3.722487126408716e-06, "loss": 0.0464, "step": 9267 }, { "epoch": 7.24, "learning_rate": 3.7205161366419073e-06, "loss": 0.0536, "step": 9268 }, { "epoch": 7.24, "learning_rate": 3.7185455495551738e-06, "loss": 0.0388, "step": 9269 }, { "epoch": 7.25, "learning_rate": 3.716575365274879e-06, "loss": 0.0679, "step": 9270 }, { "epoch": 7.25, "learning_rate": 3.7146055839273643e-06, "loss": 0.0313, "step": 9271 }, { "epoch": 7.25, "learning_rate": 3.7126362056389442e-06, "loss": 0.0566, "step": 9272 }, { "epoch": 7.25, "learning_rate": 3.710667230535907e-06, "loss": 0.0934, "step": 9273 }, { "epoch": 7.25, "learning_rate": 3.7086986587445163e-06, "loss": 0.0756, "step": 9274 }, { "epoch": 7.25, "learning_rate": 3.7067304903910093e-06, "loss": 0.0392, "step": 9275 }, { "epoch": 7.25, "learning_rate": 3.7047627256015974e-06, "loss": 0.094, "step": 9276 }, { "epoch": 7.25, "learning_rate": 3.702795364502466e-06, "loss": 0.0207, "step": 9277 }, { "epoch": 7.25, "learning_rate": 3.700828407219774e-06, "loss": 0.048, "step": 9278 }, { "epoch": 7.25, "learning_rate": 3.6988618538796537e-06, "loss": 0.0693, "step": 9279 }, { "epoch": 7.25, "learning_rate": 3.6968957046082177e-06, "loss": 0.0669, "step": 9280 }, { "epoch": 7.25, "learning_rate": 3.6949299595315457e-06, "loss": 0.0586, "step": 9281 }, { "epoch": 7.25, "learning_rate": 3.692964618775696e-06, "loss": 0.0547, "step": 9282 }, { "epoch": 7.26, "learning_rate": 3.6909996824666883e-06, "loss": 0.0552, "step": 9283 }, { "epoch": 7.26, "learning_rate": 3.689035150730539e-06, "loss": 0.0349, "step": 9284 }, { "epoch": 7.26, "learning_rate": 3.6870710236932205e-06, "loss": 0.0917, "step": 9285 }, { "epoch": 7.26, "learning_rate": 3.685107301480686e-06, "loss": 0.089, "step": 9286 }, { "epoch": 7.26, "learning_rate": 3.6831439842188575e-06, "loss": 0.0327, "step": 9287 }, { "epoch": 7.26, "learning_rate": 3.6811810720336483e-06, "loss": 0.0624, "step": 9288 }, { "epoch": 7.26, "learning_rate": 3.6792185650509183e-06, "loss": 0.0372, "step": 9289 }, { "epoch": 7.26, "learning_rate": 3.6772564633965225e-06, "loss": 0.0327, "step": 9290 }, { "epoch": 7.26, "learning_rate": 3.675294767196278e-06, "loss": 0.0571, "step": 9291 }, { "epoch": 7.26, "learning_rate": 3.67333347657599e-06, "loss": 0.055, "step": 9292 }, { "epoch": 7.26, "learning_rate": 3.6713725916614263e-06, "loss": 0.125, "step": 9293 }, { "epoch": 7.26, "learning_rate": 3.669412112578321e-06, "loss": 0.0813, "step": 9294 }, { "epoch": 7.26, "learning_rate": 3.6674520394524048e-06, "loss": 0.0316, "step": 9295 }, { "epoch": 7.27, "learning_rate": 3.665492372409366e-06, "loss": 0.0331, "step": 9296 }, { "epoch": 7.27, "learning_rate": 3.6635331115748695e-06, "loss": 0.0268, "step": 9297 }, { "epoch": 7.27, "learning_rate": 3.661574257074556e-06, "loss": 0.0228, "step": 9298 }, { "epoch": 7.27, "learning_rate": 3.6596158090340407e-06, "loss": 0.0239, "step": 9299 }, { "epoch": 7.27, "learning_rate": 3.6576577675789093e-06, "loss": 0.0508, "step": 9300 }, { "epoch": 7.27, "learning_rate": 3.6557001328347265e-06, "loss": 0.0252, "step": 9301 }, { "epoch": 7.27, "learning_rate": 3.653742904927022e-06, "loss": 0.0825, "step": 9302 }, { "epoch": 7.27, "learning_rate": 3.6517860839813147e-06, "loss": 0.0231, "step": 9303 }, { "epoch": 7.27, "learning_rate": 3.649829670123084e-06, "loss": 0.0343, "step": 9304 }, { "epoch": 7.27, "learning_rate": 3.6478736634777902e-06, "loss": 0.0296, "step": 9305 }, { "epoch": 7.27, "learning_rate": 3.645918064170856e-06, "loss": 0.0634, "step": 9306 }, { "epoch": 7.27, "learning_rate": 3.643962872327695e-06, "loss": 0.0195, "step": 9307 }, { "epoch": 7.27, "learning_rate": 3.6420080880736852e-06, "loss": 0.063, "step": 9308 }, { "epoch": 7.28, "learning_rate": 3.6400537115341784e-06, "loss": 0.0511, "step": 9309 }, { "epoch": 7.28, "learning_rate": 3.638099742834499e-06, "loss": 0.0859, "step": 9310 }, { "epoch": 7.28, "learning_rate": 3.6361461820999565e-06, "loss": 0.035, "step": 9311 }, { "epoch": 7.28, "learning_rate": 3.634193029455817e-06, "loss": 0.0497, "step": 9312 }, { "epoch": 7.28, "learning_rate": 3.632240285027331e-06, "loss": 0.0481, "step": 9313 }, { "epoch": 7.28, "learning_rate": 3.630287948939718e-06, "loss": 0.0336, "step": 9314 }, { "epoch": 7.28, "learning_rate": 3.6283360213181805e-06, "loss": 0.1182, "step": 9315 }, { "epoch": 7.28, "learning_rate": 3.6263845022878896e-06, "loss": 0.0358, "step": 9316 }, { "epoch": 7.28, "learning_rate": 3.6244333919739783e-06, "loss": 0.0607, "step": 9317 }, { "epoch": 7.28, "learning_rate": 3.6224826905015732e-06, "loss": 0.0485, "step": 9318 }, { "epoch": 7.28, "learning_rate": 3.620532397995763e-06, "loss": 0.1406, "step": 9319 }, { "epoch": 7.28, "learning_rate": 3.618582514581612e-06, "loss": 0.0892, "step": 9320 }, { "epoch": 7.28, "learning_rate": 3.616633040384161e-06, "loss": 0.0691, "step": 9321 }, { "epoch": 7.29, "learning_rate": 3.61468397552842e-06, "loss": 0.0799, "step": 9322 }, { "epoch": 7.29, "learning_rate": 3.6127353201393767e-06, "loss": 0.1135, "step": 9323 }, { "epoch": 7.29, "learning_rate": 3.6107870743419914e-06, "loss": 0.0257, "step": 9324 }, { "epoch": 7.29, "learning_rate": 3.608839238261195e-06, "loss": 0.0366, "step": 9325 }, { "epoch": 7.29, "learning_rate": 3.6068918120218997e-06, "loss": 0.0407, "step": 9326 }, { "epoch": 7.29, "learning_rate": 3.6049447957489846e-06, "loss": 0.0935, "step": 9327 }, { "epoch": 7.29, "learning_rate": 3.602998189567308e-06, "loss": 0.0419, "step": 9328 }, { "epoch": 7.29, "learning_rate": 3.60105199360169e-06, "loss": 0.0457, "step": 9329 }, { "epoch": 7.29, "learning_rate": 3.599106207976939e-06, "loss": 0.1191, "step": 9330 }, { "epoch": 7.29, "learning_rate": 3.597160832817832e-06, "loss": 0.0775, "step": 9331 }, { "epoch": 7.29, "learning_rate": 3.5952158682491156e-06, "loss": 0.0853, "step": 9332 }, { "epoch": 7.29, "learning_rate": 3.5932713143955156e-06, "loss": 0.0336, "step": 9333 }, { "epoch": 7.3, "learning_rate": 3.591327171381728e-06, "loss": 0.0745, "step": 9334 }, { "epoch": 7.3, "learning_rate": 3.5893834393324224e-06, "loss": 0.1047, "step": 9335 }, { "epoch": 7.3, "learning_rate": 3.587440118372244e-06, "loss": 0.0903, "step": 9336 }, { "epoch": 7.3, "learning_rate": 3.585497208625809e-06, "loss": 0.0411, "step": 9337 }, { "epoch": 7.3, "learning_rate": 3.5835547102177126e-06, "loss": 0.059, "step": 9338 }, { "epoch": 7.3, "learning_rate": 3.5816126232725234e-06, "loss": 0.0684, "step": 9339 }, { "epoch": 7.3, "learning_rate": 3.5796709479147706e-06, "loss": 0.0446, "step": 9340 }, { "epoch": 7.3, "learning_rate": 3.577729684268968e-06, "loss": 0.0621, "step": 9341 }, { "epoch": 7.3, "learning_rate": 3.5757888324596093e-06, "loss": 0.039, "step": 9342 }, { "epoch": 7.3, "learning_rate": 3.573848392611149e-06, "loss": 0.0551, "step": 9343 }, { "epoch": 7.3, "learning_rate": 3.5719083648480213e-06, "loss": 0.0438, "step": 9344 }, { "epoch": 7.3, "learning_rate": 3.5699687492946334e-06, "loss": 0.0593, "step": 9345 }, { "epoch": 7.3, "learning_rate": 3.5680295460753645e-06, "loss": 0.0786, "step": 9346 }, { "epoch": 7.31, "learning_rate": 3.5660907553145695e-06, "loss": 0.0288, "step": 9347 }, { "epoch": 7.31, "learning_rate": 3.564152377136575e-06, "loss": 0.0557, "step": 9348 }, { "epoch": 7.31, "learning_rate": 3.5622144116656796e-06, "loss": 0.0573, "step": 9349 }, { "epoch": 7.31, "learning_rate": 3.560276859026167e-06, "loss": 0.0402, "step": 9350 }, { "epoch": 7.31, "learning_rate": 3.5583397193422763e-06, "loss": 0.041, "step": 9351 }, { "epoch": 7.31, "learning_rate": 3.5564029927382284e-06, "loss": 0.0439, "step": 9352 }, { "epoch": 7.31, "learning_rate": 3.5544666793382253e-06, "loss": 0.0298, "step": 9353 }, { "epoch": 7.31, "learning_rate": 3.552530779266432e-06, "loss": 0.0526, "step": 9354 }, { "epoch": 7.31, "learning_rate": 3.550595292646991e-06, "loss": 0.0563, "step": 9355 }, { "epoch": 7.31, "learning_rate": 3.5486602196040177e-06, "loss": 0.0907, "step": 9356 }, { "epoch": 7.31, "learning_rate": 3.5467255602616013e-06, "loss": 0.0623, "step": 9357 }, { "epoch": 7.31, "learning_rate": 3.544791314743804e-06, "loss": 0.0705, "step": 9358 }, { "epoch": 7.31, "learning_rate": 3.542857483174662e-06, "loss": 0.0205, "step": 9359 }, { "epoch": 7.32, "learning_rate": 3.540924065678182e-06, "loss": 0.0416, "step": 9360 }, { "epoch": 7.32, "learning_rate": 3.5389910623783517e-06, "loss": 0.0498, "step": 9361 }, { "epoch": 7.32, "learning_rate": 3.53705847339913e-06, "loss": 0.0828, "step": 9362 }, { "epoch": 7.32, "learning_rate": 3.5351262988644388e-06, "loss": 0.1118, "step": 9363 }, { "epoch": 7.32, "learning_rate": 3.5331945388981812e-06, "loss": 0.0276, "step": 9364 }, { "epoch": 7.32, "learning_rate": 3.53126319362424e-06, "loss": 0.0498, "step": 9365 }, { "epoch": 7.32, "learning_rate": 3.529332263166464e-06, "loss": 0.0271, "step": 9366 }, { "epoch": 7.32, "learning_rate": 3.5274017476486743e-06, "loss": 0.0362, "step": 9367 }, { "epoch": 7.32, "learning_rate": 3.525471647194668e-06, "loss": 0.0564, "step": 9368 }, { "epoch": 7.32, "learning_rate": 3.5235419619282163e-06, "loss": 0.0466, "step": 9369 }, { "epoch": 7.32, "learning_rate": 3.5216126919730633e-06, "loss": 0.0473, "step": 9370 }, { "epoch": 7.32, "learning_rate": 3.5196838374529243e-06, "loss": 0.0325, "step": 9371 }, { "epoch": 7.32, "learning_rate": 3.517755398491486e-06, "loss": 0.0157, "step": 9372 }, { "epoch": 7.33, "learning_rate": 3.5158273752124238e-06, "loss": 0.0364, "step": 9373 }, { "epoch": 7.33, "learning_rate": 3.5138997677393637e-06, "loss": 0.0445, "step": 9374 }, { "epoch": 7.33, "learning_rate": 3.5119725761959167e-06, "loss": 0.0728, "step": 9375 }, { "epoch": 7.33, "learning_rate": 3.5100458007056724e-06, "loss": 0.017, "step": 9376 }, { "epoch": 7.33, "learning_rate": 3.5081194413921848e-06, "loss": 0.063, "step": 9377 }, { "epoch": 7.33, "learning_rate": 3.506193498378985e-06, "loss": 0.0899, "step": 9378 }, { "epoch": 7.33, "learning_rate": 3.5042679717895745e-06, "loss": 0.0481, "step": 9379 }, { "epoch": 7.33, "learning_rate": 3.502342861747432e-06, "loss": 0.1166, "step": 9380 }, { "epoch": 7.33, "learning_rate": 3.5004181683760075e-06, "loss": 0.0807, "step": 9381 }, { "epoch": 7.33, "learning_rate": 3.4984938917987233e-06, "loss": 0.0301, "step": 9382 }, { "epoch": 7.33, "learning_rate": 3.496570032138974e-06, "loss": 0.0548, "step": 9383 }, { "epoch": 7.33, "learning_rate": 3.4946465895201353e-06, "loss": 0.0423, "step": 9384 }, { "epoch": 7.33, "learning_rate": 3.4927235640655523e-06, "loss": 0.0817, "step": 9385 }, { "epoch": 7.34, "learning_rate": 3.4908009558985324e-06, "loss": 0.0453, "step": 9386 }, { "epoch": 7.34, "learning_rate": 3.4888787651423673e-06, "loss": 0.0472, "step": 9387 }, { "epoch": 7.34, "learning_rate": 3.486956991920325e-06, "loss": 0.0343, "step": 9388 }, { "epoch": 7.34, "learning_rate": 3.4850356363556405e-06, "loss": 0.0716, "step": 9389 }, { "epoch": 7.34, "learning_rate": 3.4831146985715214e-06, "loss": 0.0359, "step": 9390 }, { "epoch": 7.34, "learning_rate": 3.4811941786911506e-06, "loss": 0.0654, "step": 9391 }, { "epoch": 7.34, "learning_rate": 3.479274076837684e-06, "loss": 0.1362, "step": 9392 }, { "epoch": 7.34, "learning_rate": 3.4773543931342513e-06, "loss": 0.0577, "step": 9393 }, { "epoch": 7.34, "learning_rate": 3.4754351277039545e-06, "loss": 0.0789, "step": 9394 }, { "epoch": 7.34, "learning_rate": 3.473516280669865e-06, "loss": 0.0471, "step": 9395 }, { "epoch": 7.34, "learning_rate": 3.471597852155042e-06, "loss": 0.0388, "step": 9396 }, { "epoch": 7.34, "learning_rate": 3.4696798422824973e-06, "loss": 0.104, "step": 9397 }, { "epoch": 7.35, "learning_rate": 3.4677622511752253e-06, "loss": 0.0729, "step": 9398 }, { "epoch": 7.35, "learning_rate": 3.4658450789562017e-06, "loss": 0.0355, "step": 9399 }, { "epoch": 7.35, "learning_rate": 3.4639283257483626e-06, "loss": 0.0664, "step": 9400 }, { "epoch": 7.35, "learning_rate": 3.4620119916746232e-06, "loss": 0.0317, "step": 9401 }, { "epoch": 7.35, "learning_rate": 3.4600960768578706e-06, "loss": 0.0736, "step": 9402 }, { "epoch": 7.35, "learning_rate": 3.458180581420967e-06, "loss": 0.033, "step": 9403 }, { "epoch": 7.35, "learning_rate": 3.4562655054867432e-06, "loss": 0.0417, "step": 9404 }, { "epoch": 7.35, "learning_rate": 3.454350849178009e-06, "loss": 0.0819, "step": 9405 }, { "epoch": 7.35, "learning_rate": 3.452436612617538e-06, "loss": 0.0287, "step": 9406 }, { "epoch": 7.35, "learning_rate": 3.4505227959280953e-06, "loss": 0.0599, "step": 9407 }, { "epoch": 7.35, "learning_rate": 3.4486093992323956e-06, "loss": 0.0212, "step": 9408 }, { "epoch": 7.35, "learning_rate": 3.446696422653142e-06, "loss": 0.0692, "step": 9409 }, { "epoch": 7.35, "learning_rate": 3.4447838663130017e-06, "loss": 0.0593, "step": 9410 }, { "epoch": 7.36, "learning_rate": 3.442871730334629e-06, "loss": 0.0625, "step": 9411 }, { "epoch": 7.36, "learning_rate": 3.4409600148406385e-06, "loss": 0.0266, "step": 9412 }, { "epoch": 7.36, "learning_rate": 3.4390487199536226e-06, "loss": 0.0476, "step": 9413 }, { "epoch": 7.36, "learning_rate": 3.437137845796137e-06, "loss": 0.0623, "step": 9414 }, { "epoch": 7.36, "learning_rate": 3.4352273924907285e-06, "loss": 0.0573, "step": 9415 }, { "epoch": 7.36, "learning_rate": 3.4333173601599055e-06, "loss": 0.0434, "step": 9416 }, { "epoch": 7.36, "learning_rate": 3.43140774892615e-06, "loss": 0.0551, "step": 9417 }, { "epoch": 7.36, "learning_rate": 3.4294985589119144e-06, "loss": 0.0269, "step": 9418 }, { "epoch": 7.36, "learning_rate": 3.427589790239639e-06, "loss": 0.0773, "step": 9419 }, { "epoch": 7.36, "learning_rate": 3.4256814430317165e-06, "loss": 0.0325, "step": 9420 }, { "epoch": 7.36, "learning_rate": 3.423773517410525e-06, "loss": 0.0782, "step": 9421 }, { "epoch": 7.36, "learning_rate": 3.4218660134984082e-06, "loss": 0.0449, "step": 9422 }, { "epoch": 7.36, "learning_rate": 3.419958931417695e-06, "loss": 0.0713, "step": 9423 }, { "epoch": 7.37, "learning_rate": 3.4180522712906807e-06, "loss": 0.023, "step": 9424 }, { "epoch": 7.37, "learning_rate": 3.4161460332396203e-06, "loss": 0.0517, "step": 9425 }, { "epoch": 7.37, "learning_rate": 3.414240217386765e-06, "loss": 0.0479, "step": 9426 }, { "epoch": 7.37, "learning_rate": 3.412334823854323e-06, "loss": 0.0831, "step": 9427 }, { "epoch": 7.37, "learning_rate": 3.410429852764482e-06, "loss": 0.0554, "step": 9428 }, { "epoch": 7.37, "learning_rate": 3.4085253042393994e-06, "loss": 0.0525, "step": 9429 }, { "epoch": 7.37, "learning_rate": 3.4066211784012083e-06, "loss": 0.0583, "step": 9430 }, { "epoch": 7.37, "learning_rate": 3.4047174753720115e-06, "loss": 0.0318, "step": 9431 }, { "epoch": 7.37, "learning_rate": 3.402814195273888e-06, "loss": 0.0644, "step": 9432 }, { "epoch": 7.37, "learning_rate": 3.4009113382288837e-06, "loss": 0.0377, "step": 9433 }, { "epoch": 7.37, "learning_rate": 3.399008904359028e-06, "loss": 0.061, "step": 9434 }, { "epoch": 7.37, "learning_rate": 3.397106893786315e-06, "loss": 0.0682, "step": 9435 }, { "epoch": 7.37, "learning_rate": 3.3952053066327163e-06, "loss": 0.0736, "step": 9436 }, { "epoch": 7.38, "learning_rate": 3.393304143020164e-06, "loss": 0.0531, "step": 9437 }, { "epoch": 7.38, "learning_rate": 3.3914034030705824e-06, "loss": 0.0295, "step": 9438 }, { "epoch": 7.38, "learning_rate": 3.3895030869058554e-06, "loss": 0.0389, "step": 9439 }, { "epoch": 7.38, "learning_rate": 3.387603194647843e-06, "loss": 0.0865, "step": 9440 }, { "epoch": 7.38, "learning_rate": 3.385703726418379e-06, "loss": 0.0882, "step": 9441 }, { "epoch": 7.38, "learning_rate": 3.383804682339269e-06, "loss": 0.0579, "step": 9442 }, { "epoch": 7.38, "learning_rate": 3.3819060625322907e-06, "loss": 0.0228, "step": 9443 }, { "epoch": 7.38, "learning_rate": 3.380007867119197e-06, "loss": 0.0634, "step": 9444 }, { "epoch": 7.38, "learning_rate": 3.3781100962217097e-06, "loss": 0.0327, "step": 9445 }, { "epoch": 7.38, "learning_rate": 3.3762127499615293e-06, "loss": 0.0687, "step": 9446 }, { "epoch": 7.38, "learning_rate": 3.3743158284603284e-06, "loss": 0.0578, "step": 9447 }, { "epoch": 7.38, "learning_rate": 3.372419331839738e-06, "loss": 0.0481, "step": 9448 }, { "epoch": 7.38, "learning_rate": 3.3705232602213833e-06, "loss": 0.0398, "step": 9449 }, { "epoch": 7.39, "learning_rate": 3.3686276137268503e-06, "loss": 0.0645, "step": 9450 }, { "epoch": 7.39, "learning_rate": 3.3667323924776996e-06, "loss": 0.0168, "step": 9451 }, { "epoch": 7.39, "learning_rate": 3.3648375965954637e-06, "loss": 0.0532, "step": 9452 }, { "epoch": 7.39, "learning_rate": 3.3629432262016505e-06, "loss": 0.0775, "step": 9453 }, { "epoch": 7.39, "learning_rate": 3.3610492814177376e-06, "loss": 0.0225, "step": 9454 }, { "epoch": 7.39, "learning_rate": 3.359155762365176e-06, "loss": 0.0613, "step": 9455 }, { "epoch": 7.39, "learning_rate": 3.3572626691653887e-06, "loss": 0.0329, "step": 9456 }, { "epoch": 7.39, "learning_rate": 3.355370001939777e-06, "loss": 0.0559, "step": 9457 }, { "epoch": 7.39, "learning_rate": 3.35347776080971e-06, "loss": 0.016, "step": 9458 }, { "epoch": 7.39, "learning_rate": 3.3515859458965327e-06, "loss": 0.1134, "step": 9459 }, { "epoch": 7.39, "learning_rate": 3.349694557321549e-06, "loss": 0.0743, "step": 9460 }, { "epoch": 7.39, "learning_rate": 3.3478035952060572e-06, "loss": 0.0386, "step": 9461 }, { "epoch": 7.4, "learning_rate": 3.345913059671315e-06, "loss": 0.0755, "step": 9462 }, { "epoch": 7.4, "learning_rate": 3.344022950838556e-06, "loss": 0.075, "step": 9463 }, { "epoch": 7.4, "learning_rate": 3.3421332688289832e-06, "loss": 0.0816, "step": 9464 }, { "epoch": 7.4, "learning_rate": 3.340244013763777e-06, "loss": 0.0317, "step": 9465 }, { "epoch": 7.4, "learning_rate": 3.338355185764088e-06, "loss": 0.102, "step": 9466 }, { "epoch": 7.4, "learning_rate": 3.33646678495104e-06, "loss": 0.0304, "step": 9467 }, { "epoch": 7.4, "learning_rate": 3.334578811445726e-06, "loss": 0.0177, "step": 9468 }, { "epoch": 7.4, "learning_rate": 3.3326912653692202e-06, "loss": 0.0526, "step": 9469 }, { "epoch": 7.4, "learning_rate": 3.3308041468425644e-06, "loss": 0.0441, "step": 9470 }, { "epoch": 7.4, "learning_rate": 3.3289174559867632e-06, "loss": 0.0712, "step": 9471 }, { "epoch": 7.4, "learning_rate": 3.327031192922813e-06, "loss": 0.0435, "step": 9472 }, { "epoch": 7.4, "learning_rate": 3.3251453577716697e-06, "loss": 0.0478, "step": 9473 }, { "epoch": 7.4, "learning_rate": 3.323259950654264e-06, "loss": 0.0873, "step": 9474 }, { "epoch": 7.41, "learning_rate": 3.3213749716915e-06, "loss": 0.0229, "step": 9475 }, { "epoch": 7.41, "learning_rate": 3.3194904210042555e-06, "loss": 0.0582, "step": 9476 }, { "epoch": 7.41, "learning_rate": 3.3176062987133796e-06, "loss": 0.0531, "step": 9477 }, { "epoch": 7.41, "learning_rate": 3.315722604939693e-06, "loss": 0.0511, "step": 9478 }, { "epoch": 7.41, "learning_rate": 3.313839339803988e-06, "loss": 0.0379, "step": 9479 }, { "epoch": 7.41, "learning_rate": 3.3119565034270373e-06, "loss": 0.0884, "step": 9480 }, { "epoch": 7.41, "learning_rate": 3.3100740959295806e-06, "loss": 0.0449, "step": 9481 }, { "epoch": 7.41, "learning_rate": 3.308192117432324e-06, "loss": 0.1526, "step": 9482 }, { "epoch": 7.41, "learning_rate": 3.3063105680559493e-06, "loss": 0.0529, "step": 9483 }, { "epoch": 7.41, "learning_rate": 3.3044294479211223e-06, "loss": 0.0318, "step": 9484 }, { "epoch": 7.41, "learning_rate": 3.302548757148467e-06, "loss": 0.0337, "step": 9485 }, { "epoch": 7.41, "learning_rate": 3.300668495858588e-06, "loss": 0.0731, "step": 9486 }, { "epoch": 7.41, "learning_rate": 3.298788664172057e-06, "loss": 0.0471, "step": 9487 }, { "epoch": 7.42, "learning_rate": 3.29690926220942e-06, "loss": 0.1017, "step": 9488 }, { "epoch": 7.42, "learning_rate": 3.295030290091199e-06, "loss": 0.042, "step": 9489 }, { "epoch": 7.42, "learning_rate": 3.2931517479378836e-06, "loss": 0.026, "step": 9490 }, { "epoch": 7.42, "learning_rate": 3.291273635869935e-06, "loss": 0.0296, "step": 9491 }, { "epoch": 7.42, "learning_rate": 3.289395954007796e-06, "loss": 0.0493, "step": 9492 }, { "epoch": 7.42, "learning_rate": 3.2875187024718746e-06, "loss": 0.0248, "step": 9493 }, { "epoch": 7.42, "learning_rate": 3.2856418813825473e-06, "loss": 0.0196, "step": 9494 }, { "epoch": 7.42, "learning_rate": 3.283765490860167e-06, "loss": 0.0482, "step": 9495 }, { "epoch": 7.42, "learning_rate": 3.2818895310250642e-06, "loss": 0.0549, "step": 9496 }, { "epoch": 7.42, "learning_rate": 3.280014001997537e-06, "loss": 0.0967, "step": 9497 }, { "epoch": 7.42, "learning_rate": 3.278138903897855e-06, "loss": 0.0687, "step": 9498 }, { "epoch": 7.42, "learning_rate": 3.2762642368462606e-06, "loss": 0.1646, "step": 9499 }, { "epoch": 7.42, "learning_rate": 3.2743900009629694e-06, "loss": 0.0277, "step": 9500 }, { "epoch": 7.43, "learning_rate": 3.272516196368171e-06, "loss": 0.0907, "step": 9501 }, { "epoch": 7.43, "learning_rate": 3.270642823182022e-06, "loss": 0.0192, "step": 9502 }, { "epoch": 7.43, "learning_rate": 3.268769881524655e-06, "loss": 0.0421, "step": 9503 }, { "epoch": 7.43, "learning_rate": 3.2668973715161835e-06, "loss": 0.072, "step": 9504 }, { "epoch": 7.43, "learning_rate": 3.2650252932766747e-06, "loss": 0.0604, "step": 9505 }, { "epoch": 7.43, "learning_rate": 3.263153646926178e-06, "loss": 0.0989, "step": 9506 }, { "epoch": 7.43, "learning_rate": 3.2612824325847215e-06, "loss": 0.0619, "step": 9507 }, { "epoch": 7.43, "learning_rate": 3.2594116503722974e-06, "loss": 0.0519, "step": 9508 }, { "epoch": 7.43, "learning_rate": 3.2575413004088696e-06, "loss": 0.021, "step": 9509 }, { "epoch": 7.43, "learning_rate": 3.2556713828143783e-06, "loss": 0.0537, "step": 9510 }, { "epoch": 7.43, "learning_rate": 3.253801897708735e-06, "loss": 0.0641, "step": 9511 }, { "epoch": 7.43, "learning_rate": 3.2519328452118215e-06, "loss": 0.0806, "step": 9512 }, { "epoch": 7.43, "learning_rate": 3.2500642254434935e-06, "loss": 0.0781, "step": 9513 }, { "epoch": 7.44, "learning_rate": 3.2481960385235755e-06, "loss": 0.0549, "step": 9514 }, { "epoch": 7.44, "learning_rate": 3.246328284571878e-06, "loss": 0.0301, "step": 9515 }, { "epoch": 7.44, "learning_rate": 3.2444609637081627e-06, "loss": 0.0378, "step": 9516 }, { "epoch": 7.44, "learning_rate": 3.242594076052177e-06, "loss": 0.0955, "step": 9517 }, { "epoch": 7.44, "learning_rate": 3.2407276217236337e-06, "loss": 0.042, "step": 9518 }, { "epoch": 7.44, "learning_rate": 3.238861600842229e-06, "loss": 0.0196, "step": 9519 }, { "epoch": 7.44, "learning_rate": 3.2369960135276214e-06, "loss": 0.034, "step": 9520 }, { "epoch": 7.44, "learning_rate": 3.2351308598994435e-06, "loss": 0.0567, "step": 9521 }, { "epoch": 7.44, "learning_rate": 3.2332661400773002e-06, "loss": 0.0584, "step": 9522 }, { "epoch": 7.44, "learning_rate": 3.231401854180769e-06, "loss": 0.0676, "step": 9523 }, { "epoch": 7.44, "learning_rate": 3.2295380023294e-06, "loss": 0.0283, "step": 9524 }, { "epoch": 7.44, "learning_rate": 3.227674584642716e-06, "loss": 0.0548, "step": 9525 }, { "epoch": 7.45, "learning_rate": 3.2258116012402064e-06, "loss": 0.0715, "step": 9526 }, { "epoch": 7.45, "learning_rate": 3.2239490522413474e-06, "loss": 0.0179, "step": 9527 }, { "epoch": 7.45, "learning_rate": 3.2220869377655683e-06, "loss": 0.052, "step": 9528 }, { "epoch": 7.45, "learning_rate": 3.22022525793228e-06, "loss": 0.0612, "step": 9529 }, { "epoch": 7.45, "learning_rate": 3.218364012860871e-06, "loss": 0.0207, "step": 9530 }, { "epoch": 7.45, "learning_rate": 3.216503202670692e-06, "loss": 0.0308, "step": 9531 }, { "epoch": 7.45, "learning_rate": 3.2146428274810704e-06, "loss": 0.0962, "step": 9532 }, { "epoch": 7.45, "learning_rate": 3.2127828874113063e-06, "loss": 0.0785, "step": 9533 }, { "epoch": 7.45, "learning_rate": 3.2109233825806694e-06, "loss": 0.079, "step": 9534 }, { "epoch": 7.45, "learning_rate": 3.2090643131084032e-06, "loss": 0.0583, "step": 9535 }, { "epoch": 7.45, "learning_rate": 3.207205679113724e-06, "loss": 0.0414, "step": 9536 }, { "epoch": 7.45, "learning_rate": 3.2053474807158147e-06, "loss": 0.0486, "step": 9537 }, { "epoch": 7.45, "learning_rate": 3.203489718033844e-06, "loss": 0.0368, "step": 9538 }, { "epoch": 7.46, "learning_rate": 3.201632391186935e-06, "loss": 0.0197, "step": 9539 }, { "epoch": 7.46, "learning_rate": 3.199775500294193e-06, "loss": 0.0497, "step": 9540 }, { "epoch": 7.46, "learning_rate": 3.1979190454746924e-06, "loss": 0.081, "step": 9541 }, { "epoch": 7.46, "learning_rate": 3.196063026847486e-06, "loss": 0.0719, "step": 9542 }, { "epoch": 7.46, "learning_rate": 3.1942074445315883e-06, "loss": 0.0527, "step": 9543 }, { "epoch": 7.46, "learning_rate": 3.1923522986459944e-06, "loss": 0.0328, "step": 9544 }, { "epoch": 7.46, "learning_rate": 3.1904975893096657e-06, "loss": 0.0753, "step": 9545 }, { "epoch": 7.46, "learning_rate": 3.188643316641539e-06, "loss": 0.0239, "step": 9546 }, { "epoch": 7.46, "learning_rate": 3.1867894807605205e-06, "loss": 0.0836, "step": 9547 }, { "epoch": 7.46, "learning_rate": 3.18493608178549e-06, "loss": 0.0633, "step": 9548 }, { "epoch": 7.46, "learning_rate": 3.1830831198352985e-06, "loss": 0.0304, "step": 9549 }, { "epoch": 7.46, "learning_rate": 3.181230595028777e-06, "loss": 0.0547, "step": 9550 }, { "epoch": 7.46, "learning_rate": 3.1793785074847104e-06, "loss": 0.0711, "step": 9551 }, { "epoch": 7.47, "learning_rate": 3.1775268573218687e-06, "loss": 0.0556, "step": 9552 }, { "epoch": 7.47, "learning_rate": 3.1756756446589977e-06, "loss": 0.0622, "step": 9553 }, { "epoch": 7.47, "learning_rate": 3.173824869614803e-06, "loss": 0.0453, "step": 9554 }, { "epoch": 7.47, "learning_rate": 3.171974532307973e-06, "loss": 0.0318, "step": 9555 }, { "epoch": 7.47, "learning_rate": 3.1701246328571545e-06, "loss": 0.0811, "step": 9556 }, { "epoch": 7.47, "learning_rate": 3.168275171380981e-06, "loss": 0.0798, "step": 9557 }, { "epoch": 7.47, "learning_rate": 3.1664261479980517e-06, "loss": 0.0886, "step": 9558 }, { "epoch": 7.47, "learning_rate": 3.164577562826936e-06, "loss": 0.0197, "step": 9559 }, { "epoch": 7.47, "learning_rate": 3.1627294159861745e-06, "loss": 0.0795, "step": 9560 }, { "epoch": 7.47, "learning_rate": 3.16088170759429e-06, "loss": 0.0185, "step": 9561 }, { "epoch": 7.47, "learning_rate": 3.159034437769761e-06, "loss": 0.0526, "step": 9562 }, { "epoch": 7.47, "learning_rate": 3.1571876066310503e-06, "loss": 0.0469, "step": 9563 }, { "epoch": 7.47, "learning_rate": 3.155341214296582e-06, "loss": 0.1348, "step": 9564 }, { "epoch": 7.48, "learning_rate": 3.153495260884768e-06, "loss": 0.0569, "step": 9565 }, { "epoch": 7.48, "learning_rate": 3.1516497465139773e-06, "loss": 0.0855, "step": 9566 }, { "epoch": 7.48, "learning_rate": 3.1498046713025564e-06, "loss": 0.1005, "step": 9567 }, { "epoch": 7.48, "learning_rate": 3.1479600353688233e-06, "loss": 0.043, "step": 9568 }, { "epoch": 7.48, "learning_rate": 3.1461158388310675e-06, "loss": 0.0543, "step": 9569 }, { "epoch": 7.48, "learning_rate": 3.144272081807551e-06, "loss": 0.0386, "step": 9570 }, { "epoch": 7.48, "learning_rate": 3.1424287644165063e-06, "loss": 0.0915, "step": 9571 }, { "epoch": 7.48, "learning_rate": 3.1405858867761396e-06, "loss": 0.0775, "step": 9572 }, { "epoch": 7.48, "learning_rate": 3.1387434490046274e-06, "loss": 0.0686, "step": 9573 }, { "epoch": 7.48, "learning_rate": 3.136901451220118e-06, "loss": 0.0668, "step": 9574 }, { "epoch": 7.48, "learning_rate": 3.135059893540732e-06, "loss": 0.0606, "step": 9575 }, { "epoch": 7.48, "learning_rate": 3.133218776084558e-06, "loss": 0.0423, "step": 9576 }, { "epoch": 7.48, "learning_rate": 3.1313780989696674e-06, "loss": 0.0216, "step": 9577 }, { "epoch": 7.49, "learning_rate": 3.129537862314096e-06, "loss": 0.0257, "step": 9578 }, { "epoch": 7.49, "learning_rate": 3.127698066235841e-06, "loss": 0.0627, "step": 9579 }, { "epoch": 7.49, "learning_rate": 3.1258587108528927e-06, "loss": 0.0786, "step": 9580 }, { "epoch": 7.49, "learning_rate": 3.1240197962831965e-06, "loss": 0.1184, "step": 9581 }, { "epoch": 7.49, "learning_rate": 3.1221813226446774e-06, "loss": 0.0286, "step": 9582 }, { "epoch": 7.49, "learning_rate": 3.12034329005523e-06, "loss": 0.0368, "step": 9583 }, { "epoch": 7.49, "learning_rate": 3.118505698632719e-06, "loss": 0.0611, "step": 9584 }, { "epoch": 7.49, "learning_rate": 3.116668548494983e-06, "loss": 0.0794, "step": 9585 }, { "epoch": 7.49, "learning_rate": 3.114831839759832e-06, "loss": 0.0785, "step": 9586 }, { "epoch": 7.49, "learning_rate": 3.1129955725450447e-06, "loss": 0.0578, "step": 9587 }, { "epoch": 7.49, "learning_rate": 3.1111597469683787e-06, "loss": 0.0638, "step": 9588 }, { "epoch": 7.49, "learning_rate": 3.109324363147559e-06, "loss": 0.0645, "step": 9589 }, { "epoch": 7.5, "learning_rate": 3.107489421200277e-06, "loss": 0.0428, "step": 9590 }, { "epoch": 7.5, "learning_rate": 3.105654921244201e-06, "loss": 0.0995, "step": 9591 }, { "epoch": 7.5, "learning_rate": 3.1038208633969745e-06, "loss": 0.1024, "step": 9592 }, { "epoch": 7.5, "learning_rate": 3.1019872477762093e-06, "loss": 0.0335, "step": 9593 }, { "epoch": 7.5, "learning_rate": 3.100154074499484e-06, "loss": 0.0718, "step": 9594 }, { "epoch": 7.5, "learning_rate": 3.0983213436843574e-06, "loss": 0.0287, "step": 9595 }, { "epoch": 7.5, "learning_rate": 3.096489055448353e-06, "loss": 0.0661, "step": 9596 }, { "epoch": 7.5, "learning_rate": 3.09465720990897e-06, "loss": 0.0624, "step": 9597 }, { "epoch": 7.5, "learning_rate": 3.092825807183677e-06, "loss": 0.0713, "step": 9598 }, { "epoch": 7.5, "learning_rate": 3.090994847389912e-06, "loss": 0.0341, "step": 9599 }, { "epoch": 7.5, "learning_rate": 3.0891643306450956e-06, "loss": 0.0422, "step": 9600 }, { "epoch": 7.5, "learning_rate": 3.08733425706661e-06, "loss": 0.0276, "step": 9601 }, { "epoch": 7.5, "learning_rate": 3.0855046267718024e-06, "loss": 0.0318, "step": 9602 }, { "epoch": 7.51, "learning_rate": 3.0836754398780112e-06, "loss": 0.1244, "step": 9603 }, { "epoch": 7.51, "learning_rate": 3.0818466965025297e-06, "loss": 0.061, "step": 9604 }, { "epoch": 7.51, "learning_rate": 3.0800183967626286e-06, "loss": 0.0432, "step": 9605 }, { "epoch": 7.51, "learning_rate": 3.078190540775552e-06, "loss": 0.0345, "step": 9606 }, { "epoch": 7.51, "learning_rate": 3.0763631286585118e-06, "loss": 0.0815, "step": 9607 }, { "epoch": 7.51, "learning_rate": 3.0745361605286937e-06, "loss": 0.1288, "step": 9608 }, { "epoch": 7.51, "learning_rate": 3.072709636503256e-06, "loss": 0.0322, "step": 9609 }, { "epoch": 7.51, "learning_rate": 3.070883556699321e-06, "loss": 0.0578, "step": 9610 }, { "epoch": 7.51, "learning_rate": 3.0690579212339968e-06, "loss": 0.0394, "step": 9611 }, { "epoch": 7.51, "learning_rate": 3.067232730224353e-06, "loss": 0.0223, "step": 9612 }, { "epoch": 7.51, "learning_rate": 3.0654079837874274e-06, "loss": 0.0426, "step": 9613 }, { "epoch": 7.51, "learning_rate": 3.063583682040234e-06, "loss": 0.0972, "step": 9614 }, { "epoch": 7.51, "learning_rate": 3.0617598250997647e-06, "loss": 0.0574, "step": 9615 }, { "epoch": 7.52, "learning_rate": 3.059936413082973e-06, "loss": 0.0723, "step": 9616 }, { "epoch": 7.52, "learning_rate": 3.0581134461067887e-06, "loss": 0.102, "step": 9617 }, { "epoch": 7.52, "learning_rate": 3.05629092428811e-06, "loss": 0.0304, "step": 9618 }, { "epoch": 7.52, "learning_rate": 3.0544688477438113e-06, "loss": 0.0565, "step": 9619 }, { "epoch": 7.52, "learning_rate": 3.052647216590733e-06, "loss": 0.0956, "step": 9620 }, { "epoch": 7.52, "learning_rate": 3.0508260309456905e-06, "loss": 0.0215, "step": 9621 }, { "epoch": 7.52, "learning_rate": 3.0490052909254676e-06, "loss": 0.0342, "step": 9622 }, { "epoch": 7.52, "learning_rate": 3.047184996646827e-06, "loss": 0.0518, "step": 9623 }, { "epoch": 7.52, "learning_rate": 3.045365148226497e-06, "loss": 0.0299, "step": 9624 }, { "epoch": 7.52, "learning_rate": 3.043545745781168e-06, "loss": 0.0888, "step": 9625 }, { "epoch": 7.52, "learning_rate": 3.0417267894275226e-06, "loss": 0.0551, "step": 9626 }, { "epoch": 7.52, "learning_rate": 3.0399082792822e-06, "loss": 0.0738, "step": 9627 }, { "epoch": 7.52, "learning_rate": 3.038090215461814e-06, "loss": 0.0345, "step": 9628 }, { "epoch": 7.53, "learning_rate": 3.03627259808295e-06, "loss": 0.0264, "step": 9629 }, { "epoch": 7.53, "learning_rate": 3.034455427262166e-06, "loss": 0.0711, "step": 9630 }, { "epoch": 7.53, "learning_rate": 3.032638703115991e-06, "loss": 0.0391, "step": 9631 }, { "epoch": 7.53, "learning_rate": 3.030822425760923e-06, "loss": 0.0653, "step": 9632 }, { "epoch": 7.53, "learning_rate": 3.029006595313432e-06, "loss": 0.0646, "step": 9633 }, { "epoch": 7.53, "learning_rate": 3.027191211889964e-06, "loss": 0.0614, "step": 9634 }, { "epoch": 7.53, "learning_rate": 3.025376275606935e-06, "loss": 0.0328, "step": 9635 }, { "epoch": 7.53, "learning_rate": 3.0235617865807243e-06, "loss": 0.0901, "step": 9636 }, { "epoch": 7.53, "learning_rate": 3.0217477449276868e-06, "loss": 0.1334, "step": 9637 }, { "epoch": 7.53, "learning_rate": 3.019934150764157e-06, "loss": 0.0444, "step": 9638 }, { "epoch": 7.53, "learning_rate": 3.018121004206431e-06, "loss": 0.0389, "step": 9639 }, { "epoch": 7.53, "learning_rate": 3.01630830537078e-06, "loss": 0.1201, "step": 9640 }, { "epoch": 7.53, "learning_rate": 3.0144960543734437e-06, "loss": 0.0383, "step": 9641 }, { "epoch": 7.54, "learning_rate": 3.0126842513306377e-06, "loss": 0.0364, "step": 9642 }, { "epoch": 7.54, "learning_rate": 3.0108728963585433e-06, "loss": 0.0526, "step": 9643 }, { "epoch": 7.54, "learning_rate": 3.0090619895733187e-06, "loss": 0.0519, "step": 9644 }, { "epoch": 7.54, "learning_rate": 3.0072515310910853e-06, "loss": 0.0549, "step": 9645 }, { "epoch": 7.54, "learning_rate": 3.005441521027952e-06, "loss": 0.0491, "step": 9646 }, { "epoch": 7.54, "learning_rate": 3.0036319594999785e-06, "loss": 0.1046, "step": 9647 }, { "epoch": 7.54, "learning_rate": 3.0018228466232048e-06, "loss": 0.0602, "step": 9648 }, { "epoch": 7.54, "learning_rate": 3.0000141825136485e-06, "loss": 0.0634, "step": 9649 }, { "epoch": 7.54, "learning_rate": 2.998205967287291e-06, "loss": 0.0401, "step": 9650 }, { "epoch": 7.54, "learning_rate": 2.9963982010600847e-06, "loss": 0.0671, "step": 9651 }, { "epoch": 7.54, "learning_rate": 2.9945908839479554e-06, "loss": 0.0611, "step": 9652 }, { "epoch": 7.54, "learning_rate": 2.9927840160668e-06, "loss": 0.1181, "step": 9653 }, { "epoch": 7.55, "learning_rate": 2.990977597532486e-06, "loss": 0.0681, "step": 9654 }, { "epoch": 7.55, "learning_rate": 2.989171628460853e-06, "loss": 0.0501, "step": 9655 }, { "epoch": 7.55, "learning_rate": 2.9873661089677096e-06, "loss": 0.0314, "step": 9656 }, { "epoch": 7.55, "learning_rate": 2.985561039168836e-06, "loss": 0.0419, "step": 9657 }, { "epoch": 7.55, "learning_rate": 2.983756419179993e-06, "loss": 0.0524, "step": 9658 }, { "epoch": 7.55, "learning_rate": 2.981952249116895e-06, "loss": 0.059, "step": 9659 }, { "epoch": 7.55, "learning_rate": 2.980148529095236e-06, "loss": 0.0856, "step": 9660 }, { "epoch": 7.55, "learning_rate": 2.9783452592306885e-06, "loss": 0.0667, "step": 9661 }, { "epoch": 7.55, "learning_rate": 2.9765424396388874e-06, "loss": 0.0749, "step": 9662 }, { "epoch": 7.55, "learning_rate": 2.9747400704354422e-06, "loss": 0.0675, "step": 9663 }, { "epoch": 7.55, "learning_rate": 2.9729381517359246e-06, "loss": 0.0478, "step": 9664 }, { "epoch": 7.55, "learning_rate": 2.971136683655893e-06, "loss": 0.0567, "step": 9665 }, { "epoch": 7.55, "learning_rate": 2.9693356663108675e-06, "loss": 0.0388, "step": 9666 }, { "epoch": 7.56, "learning_rate": 2.967535099816339e-06, "loss": 0.0869, "step": 9667 }, { "epoch": 7.56, "learning_rate": 2.965734984287768e-06, "loss": 0.0547, "step": 9668 }, { "epoch": 7.56, "learning_rate": 2.9639353198405997e-06, "loss": 0.0504, "step": 9669 }, { "epoch": 7.56, "learning_rate": 2.962136106590229e-06, "loss": 0.0719, "step": 9670 }, { "epoch": 7.56, "learning_rate": 2.9603373446520377e-06, "loss": 0.0216, "step": 9671 }, { "epoch": 7.56, "learning_rate": 2.95853903414137e-06, "loss": 0.0761, "step": 9672 }, { "epoch": 7.56, "learning_rate": 2.9567411751735507e-06, "loss": 0.0416, "step": 9673 }, { "epoch": 7.56, "learning_rate": 2.9549437678638672e-06, "loss": 0.0648, "step": 9674 }, { "epoch": 7.56, "learning_rate": 2.9531468123275795e-06, "loss": 0.0591, "step": 9675 }, { "epoch": 7.56, "learning_rate": 2.9513503086799222e-06, "loss": 0.0222, "step": 9676 }, { "epoch": 7.56, "learning_rate": 2.949554257036096e-06, "loss": 0.053, "step": 9677 }, { "epoch": 7.56, "learning_rate": 2.947758657511276e-06, "loss": 0.0454, "step": 9678 }, { "epoch": 7.56, "learning_rate": 2.9459635102206074e-06, "loss": 0.0458, "step": 9679 }, { "epoch": 7.57, "learning_rate": 2.944168815279206e-06, "loss": 0.0649, "step": 9680 }, { "epoch": 7.57, "learning_rate": 2.942374572802159e-06, "loss": 0.1463, "step": 9681 }, { "epoch": 7.57, "learning_rate": 2.940580782904525e-06, "loss": 0.114, "step": 9682 }, { "epoch": 7.57, "learning_rate": 2.93878744570133e-06, "loss": 0.0599, "step": 9683 }, { "epoch": 7.57, "learning_rate": 2.936994561307579e-06, "loss": 0.0634, "step": 9684 }, { "epoch": 7.57, "learning_rate": 2.9352021298382415e-06, "loss": 0.0593, "step": 9685 }, { "epoch": 7.57, "learning_rate": 2.9334101514082625e-06, "loss": 0.0632, "step": 9686 }, { "epoch": 7.57, "learning_rate": 2.931618626132545e-06, "loss": 0.023, "step": 9687 }, { "epoch": 7.57, "learning_rate": 2.9298275541259823e-06, "loss": 0.0822, "step": 9688 }, { "epoch": 7.57, "learning_rate": 2.9280369355034256e-06, "loss": 0.0428, "step": 9689 }, { "epoch": 7.57, "learning_rate": 2.9262467703797017e-06, "loss": 0.0742, "step": 9690 }, { "epoch": 7.57, "learning_rate": 2.924457058869603e-06, "loss": 0.0563, "step": 9691 }, { "epoch": 7.57, "learning_rate": 2.9226678010879084e-06, "loss": 0.0565, "step": 9692 }, { "epoch": 7.58, "learning_rate": 2.9208789971493457e-06, "loss": 0.0738, "step": 9693 }, { "epoch": 7.58, "learning_rate": 2.9190906471686262e-06, "loss": 0.059, "step": 9694 }, { "epoch": 7.58, "learning_rate": 2.917302751260429e-06, "loss": 0.0631, "step": 9695 }, { "epoch": 7.58, "learning_rate": 2.9155153095394107e-06, "loss": 0.0378, "step": 9696 }, { "epoch": 7.58, "learning_rate": 2.9137283221201908e-06, "loss": 0.046, "step": 9697 }, { "epoch": 7.58, "learning_rate": 2.9119417891173605e-06, "loss": 0.0284, "step": 9698 }, { "epoch": 7.58, "learning_rate": 2.910155710645486e-06, "loss": 0.0699, "step": 9699 }, { "epoch": 7.58, "learning_rate": 2.9083700868191e-06, "loss": 0.0541, "step": 9700 }, { "epoch": 7.58, "learning_rate": 2.9065849177527083e-06, "loss": 0.0935, "step": 9701 }, { "epoch": 7.58, "learning_rate": 2.904800203560788e-06, "loss": 0.0414, "step": 9702 }, { "epoch": 7.58, "learning_rate": 2.9030159443577852e-06, "loss": 0.103, "step": 9703 }, { "epoch": 7.58, "learning_rate": 2.90123214025812e-06, "loss": 0.0737, "step": 9704 }, { "epoch": 7.58, "learning_rate": 2.899448791376178e-06, "loss": 0.048, "step": 9705 }, { "epoch": 7.59, "learning_rate": 2.897665897826317e-06, "loss": 0.0676, "step": 9706 }, { "epoch": 7.59, "learning_rate": 2.8958834597228748e-06, "loss": 0.0835, "step": 9707 }, { "epoch": 7.59, "learning_rate": 2.894101477180148e-06, "loss": 0.0499, "step": 9708 }, { "epoch": 7.59, "learning_rate": 2.892319950312411e-06, "loss": 0.051, "step": 9709 }, { "epoch": 7.59, "learning_rate": 2.8905388792338995e-06, "loss": 0.081, "step": 9710 }, { "epoch": 7.59, "learning_rate": 2.8887582640588353e-06, "loss": 0.0421, "step": 9711 }, { "epoch": 7.59, "learning_rate": 2.886978104901398e-06, "loss": 0.0861, "step": 9712 }, { "epoch": 7.59, "learning_rate": 2.885198401875745e-06, "loss": 0.1053, "step": 9713 }, { "epoch": 7.59, "learning_rate": 2.883419155095998e-06, "loss": 0.0411, "step": 9714 }, { "epoch": 7.59, "learning_rate": 2.8816403646762626e-06, "loss": 0.0868, "step": 9715 }, { "epoch": 7.59, "learning_rate": 2.879862030730596e-06, "loss": 0.0493, "step": 9716 }, { "epoch": 7.59, "learning_rate": 2.878084153373041e-06, "loss": 0.0719, "step": 9717 }, { "epoch": 7.6, "learning_rate": 2.876306732717602e-06, "loss": 0.0242, "step": 9718 }, { "epoch": 7.6, "learning_rate": 2.8745297688782658e-06, "loss": 0.0576, "step": 9719 }, { "epoch": 7.6, "learning_rate": 2.872753261968981e-06, "loss": 0.039, "step": 9720 }, { "epoch": 7.6, "learning_rate": 2.8709772121036604e-06, "loss": 0.0233, "step": 9721 }, { "epoch": 7.6, "learning_rate": 2.8692016193962035e-06, "loss": 0.0787, "step": 9722 }, { "epoch": 7.6, "learning_rate": 2.8674264839604705e-06, "loss": 0.0571, "step": 9723 }, { "epoch": 7.6, "learning_rate": 2.8656518059102944e-06, "loss": 0.0753, "step": 9724 }, { "epoch": 7.6, "learning_rate": 2.863877585359478e-06, "loss": 0.0669, "step": 9725 }, { "epoch": 7.6, "learning_rate": 2.8621038224217955e-06, "loss": 0.0316, "step": 9726 }, { "epoch": 7.6, "learning_rate": 2.8603305172109917e-06, "loss": 0.1045, "step": 9727 }, { "epoch": 7.6, "learning_rate": 2.8585576698407824e-06, "loss": 0.0325, "step": 9728 }, { "epoch": 7.6, "learning_rate": 2.8567852804248542e-06, "loss": 0.0543, "step": 9729 }, { "epoch": 7.6, "learning_rate": 2.8550133490768604e-06, "loss": 0.0163, "step": 9730 }, { "epoch": 7.61, "learning_rate": 2.8532418759104342e-06, "loss": 0.0554, "step": 9731 }, { "epoch": 7.61, "learning_rate": 2.851470861039174e-06, "loss": 0.0576, "step": 9732 }, { "epoch": 7.61, "learning_rate": 2.849700304576639e-06, "loss": 0.1282, "step": 9733 }, { "epoch": 7.61, "learning_rate": 2.847930206636378e-06, "loss": 0.0227, "step": 9734 }, { "epoch": 7.61, "learning_rate": 2.846160567331897e-06, "loss": 0.0476, "step": 9735 }, { "epoch": 7.61, "learning_rate": 2.844391386776678e-06, "loss": 0.057, "step": 9736 }, { "epoch": 7.61, "learning_rate": 2.8426226650841706e-06, "loss": 0.1002, "step": 9737 }, { "epoch": 7.61, "learning_rate": 2.8408544023677963e-06, "loss": 0.0559, "step": 9738 }, { "epoch": 7.61, "learning_rate": 2.8390865987409475e-06, "loss": 0.055, "step": 9739 }, { "epoch": 7.61, "learning_rate": 2.837319254316988e-06, "loss": 0.0337, "step": 9740 }, { "epoch": 7.61, "learning_rate": 2.8355523692092467e-06, "loss": 0.0612, "step": 9741 }, { "epoch": 7.61, "learning_rate": 2.8337859435310344e-06, "loss": 0.0929, "step": 9742 }, { "epoch": 7.61, "learning_rate": 2.8320199773956246e-06, "loss": 0.0767, "step": 9743 }, { "epoch": 7.62, "learning_rate": 2.830254470916256e-06, "loss": 0.0758, "step": 9744 }, { "epoch": 7.62, "learning_rate": 2.8284894242061444e-06, "loss": 0.0728, "step": 9745 }, { "epoch": 7.62, "learning_rate": 2.826724837378483e-06, "loss": 0.0709, "step": 9746 }, { "epoch": 7.62, "learning_rate": 2.8249607105464227e-06, "loss": 0.1286, "step": 9747 }, { "epoch": 7.62, "learning_rate": 2.8231970438230927e-06, "loss": 0.0915, "step": 9748 }, { "epoch": 7.62, "learning_rate": 2.821433837321589e-06, "loss": 0.0217, "step": 9749 }, { "epoch": 7.62, "learning_rate": 2.819671091154981e-06, "loss": 0.0839, "step": 9750 }, { "epoch": 7.62, "learning_rate": 2.8179088054363048e-06, "loss": 0.032, "step": 9751 }, { "epoch": 7.62, "learning_rate": 2.816146980278571e-06, "loss": 0.0521, "step": 9752 }, { "epoch": 7.62, "learning_rate": 2.814385615794756e-06, "loss": 0.0952, "step": 9753 }, { "epoch": 7.62, "learning_rate": 2.812624712097817e-06, "loss": 0.0826, "step": 9754 }, { "epoch": 7.62, "learning_rate": 2.810864269300667e-06, "loss": 0.0469, "step": 9755 }, { "epoch": 7.62, "learning_rate": 2.8091042875161958e-06, "loss": 0.045, "step": 9756 }, { "epoch": 7.63, "learning_rate": 2.807344766857271e-06, "loss": 0.0619, "step": 9757 }, { "epoch": 7.63, "learning_rate": 2.805585707436721e-06, "loss": 0.0783, "step": 9758 }, { "epoch": 7.63, "learning_rate": 2.8038271093673473e-06, "loss": 0.1014, "step": 9759 }, { "epoch": 7.63, "learning_rate": 2.8020689727619234e-06, "loss": 0.134, "step": 9760 }, { "epoch": 7.63, "learning_rate": 2.8003112977331903e-06, "loss": 0.0269, "step": 9761 }, { "epoch": 7.63, "learning_rate": 2.798554084393862e-06, "loss": 0.0574, "step": 9762 }, { "epoch": 7.63, "learning_rate": 2.796797332856623e-06, "loss": 0.0539, "step": 9763 }, { "epoch": 7.63, "learning_rate": 2.7950410432341236e-06, "loss": 0.0843, "step": 9764 }, { "epoch": 7.63, "learning_rate": 2.7932852156389943e-06, "loss": 0.0617, "step": 9765 }, { "epoch": 7.63, "learning_rate": 2.7915298501838294e-06, "loss": 0.0983, "step": 9766 }, { "epoch": 7.63, "learning_rate": 2.7897749469811885e-06, "loss": 0.0378, "step": 9767 }, { "epoch": 7.63, "learning_rate": 2.788020506143607e-06, "loss": 0.0626, "step": 9768 }, { "epoch": 7.64, "learning_rate": 2.7862665277835954e-06, "loss": 0.0245, "step": 9769 }, { "epoch": 7.64, "learning_rate": 2.78451301201363e-06, "loss": 0.0955, "step": 9770 }, { "epoch": 7.64, "learning_rate": 2.782759958946154e-06, "loss": 0.0371, "step": 9771 }, { "epoch": 7.64, "learning_rate": 2.7810073686935846e-06, "loss": 0.0348, "step": 9772 }, { "epoch": 7.64, "learning_rate": 2.7792552413683115e-06, "loss": 0.0417, "step": 9773 }, { "epoch": 7.64, "learning_rate": 2.7775035770826897e-06, "loss": 0.0765, "step": 9774 }, { "epoch": 7.64, "learning_rate": 2.775752375949048e-06, "loss": 0.0206, "step": 9775 }, { "epoch": 7.64, "learning_rate": 2.7740016380796807e-06, "loss": 0.1294, "step": 9776 }, { "epoch": 7.64, "learning_rate": 2.7722513635868655e-06, "loss": 0.0447, "step": 9777 }, { "epoch": 7.64, "learning_rate": 2.770501552582833e-06, "loss": 0.0351, "step": 9778 }, { "epoch": 7.64, "learning_rate": 2.7687522051797897e-06, "loss": 0.0429, "step": 9779 }, { "epoch": 7.64, "learning_rate": 2.767003321489924e-06, "loss": 0.0623, "step": 9780 }, { "epoch": 7.64, "learning_rate": 2.7652549016253795e-06, "loss": 0.0215, "step": 9781 }, { "epoch": 7.65, "learning_rate": 2.763506945698278e-06, "loss": 0.0899, "step": 9782 }, { "epoch": 7.65, "learning_rate": 2.761759453820707e-06, "loss": 0.0922, "step": 9783 }, { "epoch": 7.65, "learning_rate": 2.760012426104729e-06, "loss": 0.0989, "step": 9784 }, { "epoch": 7.65, "learning_rate": 2.758265862662374e-06, "loss": 0.0827, "step": 9785 }, { "epoch": 7.65, "learning_rate": 2.7565197636056417e-06, "loss": 0.0968, "step": 9786 }, { "epoch": 7.65, "learning_rate": 2.754774129046501e-06, "loss": 0.0245, "step": 9787 }, { "epoch": 7.65, "learning_rate": 2.7530289590968983e-06, "loss": 0.0475, "step": 9788 }, { "epoch": 7.65, "learning_rate": 2.7512842538687447e-06, "loss": 0.0487, "step": 9789 }, { "epoch": 7.65, "learning_rate": 2.749540013473917e-06, "loss": 0.0337, "step": 9790 }, { "epoch": 7.65, "learning_rate": 2.7477962380242653e-06, "loss": 0.0262, "step": 9791 }, { "epoch": 7.65, "learning_rate": 2.7460529276316184e-06, "loss": 0.067, "step": 9792 }, { "epoch": 7.65, "learning_rate": 2.744310082407765e-06, "loss": 0.0204, "step": 9793 }, { "epoch": 7.65, "learning_rate": 2.742567702464468e-06, "loss": 0.073, "step": 9794 }, { "epoch": 7.66, "learning_rate": 2.7408257879134583e-06, "loss": 0.0466, "step": 9795 }, { "epoch": 7.66, "learning_rate": 2.73908433886644e-06, "loss": 0.1601, "step": 9796 }, { "epoch": 7.66, "learning_rate": 2.7373433554350847e-06, "loss": 0.0179, "step": 9797 }, { "epoch": 7.66, "learning_rate": 2.735602837731036e-06, "loss": 0.0358, "step": 9798 }, { "epoch": 7.66, "learning_rate": 2.7338627858659038e-06, "loss": 0.0594, "step": 9799 }, { "epoch": 7.66, "learning_rate": 2.7321231999512786e-06, "loss": 0.0688, "step": 9800 }, { "epoch": 7.66, "learning_rate": 2.730384080098707e-06, "loss": 0.0762, "step": 9801 }, { "epoch": 7.66, "learning_rate": 2.7286454264197114e-06, "loss": 0.0674, "step": 9802 }, { "epoch": 7.66, "learning_rate": 2.726907239025791e-06, "loss": 0.1211, "step": 9803 }, { "epoch": 7.66, "learning_rate": 2.7251695180284056e-06, "loss": 0.0588, "step": 9804 }, { "epoch": 7.66, "learning_rate": 2.723432263538991e-06, "loss": 0.0293, "step": 9805 }, { "epoch": 7.66, "learning_rate": 2.7216954756689484e-06, "loss": 0.0674, "step": 9806 }, { "epoch": 7.66, "learning_rate": 2.719959154529653e-06, "loss": 0.1035, "step": 9807 }, { "epoch": 7.67, "learning_rate": 2.7182233002324478e-06, "loss": 0.0488, "step": 9808 }, { "epoch": 7.67, "learning_rate": 2.716487912888648e-06, "loss": 0.0692, "step": 9809 }, { "epoch": 7.67, "learning_rate": 2.714752992609537e-06, "loss": 0.0956, "step": 9810 }, { "epoch": 7.67, "learning_rate": 2.713018539506368e-06, "loss": 0.1309, "step": 9811 }, { "epoch": 7.67, "learning_rate": 2.7112845536903665e-06, "loss": 0.0554, "step": 9812 }, { "epoch": 7.67, "learning_rate": 2.709551035272725e-06, "loss": 0.055, "step": 9813 }, { "epoch": 7.67, "learning_rate": 2.707817984364606e-06, "loss": 0.0478, "step": 9814 }, { "epoch": 7.67, "learning_rate": 2.7060854010771487e-06, "loss": 0.0447, "step": 9815 }, { "epoch": 7.67, "learning_rate": 2.704353285521455e-06, "loss": 0.0483, "step": 9816 }, { "epoch": 7.67, "learning_rate": 2.7026216378086024e-06, "loss": 0.0533, "step": 9817 }, { "epoch": 7.67, "learning_rate": 2.7008904580496244e-06, "loss": 0.0773, "step": 9818 }, { "epoch": 7.67, "learning_rate": 2.6991597463555453e-06, "loss": 0.0302, "step": 9819 }, { "epoch": 7.67, "learning_rate": 2.6974295028373453e-06, "loss": 0.0392, "step": 9820 }, { "epoch": 7.68, "learning_rate": 2.6956997276059803e-06, "loss": 0.0298, "step": 9821 }, { "epoch": 7.68, "learning_rate": 2.6939704207723706e-06, "loss": 0.0319, "step": 9822 }, { "epoch": 7.68, "learning_rate": 2.692241582447418e-06, "loss": 0.0283, "step": 9823 }, { "epoch": 7.68, "learning_rate": 2.6905132127419797e-06, "loss": 0.0858, "step": 9824 }, { "epoch": 7.68, "learning_rate": 2.6887853117668907e-06, "loss": 0.0545, "step": 9825 }, { "epoch": 7.68, "learning_rate": 2.687057879632953e-06, "loss": 0.0458, "step": 9826 }, { "epoch": 7.68, "learning_rate": 2.6853309164509454e-06, "loss": 0.0436, "step": 9827 }, { "epoch": 7.68, "learning_rate": 2.6836044223316136e-06, "loss": 0.0513, "step": 9828 }, { "epoch": 7.68, "learning_rate": 2.68187839738566e-06, "loss": 0.0254, "step": 9829 }, { "epoch": 7.68, "learning_rate": 2.6801528417237788e-06, "loss": 0.1495, "step": 9830 }, { "epoch": 7.68, "learning_rate": 2.6784277554566194e-06, "loss": 0.0183, "step": 9831 }, { "epoch": 7.68, "learning_rate": 2.676703138694806e-06, "loss": 0.0531, "step": 9832 }, { "epoch": 7.69, "learning_rate": 2.6749789915489323e-06, "loss": 0.0315, "step": 9833 }, { "epoch": 7.69, "learning_rate": 2.6732553141295602e-06, "loss": 0.0554, "step": 9834 }, { "epoch": 7.69, "learning_rate": 2.6715321065472243e-06, "loss": 0.0293, "step": 9835 }, { "epoch": 7.69, "learning_rate": 2.669809368912426e-06, "loss": 0.0725, "step": 9836 }, { "epoch": 7.69, "learning_rate": 2.6680871013356347e-06, "loss": 0.0446, "step": 9837 }, { "epoch": 7.69, "learning_rate": 2.6663653039273008e-06, "loss": 0.1066, "step": 9838 }, { "epoch": 7.69, "learning_rate": 2.664643976797834e-06, "loss": 0.1228, "step": 9839 }, { "epoch": 7.69, "learning_rate": 2.6629231200576177e-06, "loss": 0.0614, "step": 9840 }, { "epoch": 7.69, "learning_rate": 2.6612027338169964e-06, "loss": 0.0814, "step": 9841 }, { "epoch": 7.69, "learning_rate": 2.659482818186301e-06, "loss": 0.0777, "step": 9842 }, { "epoch": 7.69, "learning_rate": 2.657763373275819e-06, "loss": 0.0526, "step": 9843 }, { "epoch": 7.69, "learning_rate": 2.656044399195814e-06, "loss": 0.0918, "step": 9844 }, { "epoch": 7.69, "learning_rate": 2.6543258960565166e-06, "loss": 0.0403, "step": 9845 }, { "epoch": 7.7, "learning_rate": 2.6526078639681287e-06, "loss": 0.0851, "step": 9846 }, { "epoch": 7.7, "learning_rate": 2.6508903030408193e-06, "loss": 0.0716, "step": 9847 }, { "epoch": 7.7, "learning_rate": 2.6491732133847314e-06, "loss": 0.077, "step": 9848 }, { "epoch": 7.7, "learning_rate": 2.647456595109972e-06, "loss": 0.0677, "step": 9849 }, { "epoch": 7.7, "learning_rate": 2.645740448326627e-06, "loss": 0.1167, "step": 9850 }, { "epoch": 7.7, "learning_rate": 2.6440247731447465e-06, "loss": 0.0812, "step": 9851 }, { "epoch": 7.7, "learning_rate": 2.6423095696743415e-06, "loss": 0.0828, "step": 9852 }, { "epoch": 7.7, "learning_rate": 2.640594838025412e-06, "loss": 0.071, "step": 9853 }, { "epoch": 7.7, "learning_rate": 2.638880578307913e-06, "loss": 0.1175, "step": 9854 }, { "epoch": 7.7, "learning_rate": 2.6371667906317743e-06, "loss": 0.0444, "step": 9855 }, { "epoch": 7.7, "learning_rate": 2.6354534751068938e-06, "loss": 0.0503, "step": 9856 }, { "epoch": 7.7, "learning_rate": 2.6337406318431404e-06, "loss": 0.0217, "step": 9857 }, { "epoch": 7.7, "learning_rate": 2.632028260950353e-06, "loss": 0.0246, "step": 9858 }, { "epoch": 7.71, "learning_rate": 2.6303163625383397e-06, "loss": 0.0719, "step": 9859 }, { "epoch": 7.71, "learning_rate": 2.6286049367168743e-06, "loss": 0.0409, "step": 9860 }, { "epoch": 7.71, "learning_rate": 2.6268939835957107e-06, "loss": 0.0362, "step": 9861 }, { "epoch": 7.71, "learning_rate": 2.625183503284564e-06, "loss": 0.0395, "step": 9862 }, { "epoch": 7.71, "learning_rate": 2.6234734958931218e-06, "loss": 0.0602, "step": 9863 }, { "epoch": 7.71, "learning_rate": 2.621763961531033e-06, "loss": 0.0768, "step": 9864 }, { "epoch": 7.71, "learning_rate": 2.6200549003079333e-06, "loss": 0.0268, "step": 9865 }, { "epoch": 7.71, "learning_rate": 2.618346312333414e-06, "loss": 0.0365, "step": 9866 }, { "epoch": 7.71, "learning_rate": 2.616638197717041e-06, "loss": 0.0266, "step": 9867 }, { "epoch": 7.71, "learning_rate": 2.6149305565683502e-06, "loss": 0.0361, "step": 9868 }, { "epoch": 7.71, "learning_rate": 2.613223388996845e-06, "loss": 0.0672, "step": 9869 }, { "epoch": 7.71, "learning_rate": 2.6115166951120006e-06, "loss": 0.0944, "step": 9870 }, { "epoch": 7.71, "learning_rate": 2.609810475023261e-06, "loss": 0.0592, "step": 9871 }, { "epoch": 7.72, "learning_rate": 2.6081047288400363e-06, "loss": 0.0616, "step": 9872 }, { "epoch": 7.72, "learning_rate": 2.6063994566717156e-06, "loss": 0.1182, "step": 9873 }, { "epoch": 7.72, "learning_rate": 2.6046946586276524e-06, "loss": 0.0442, "step": 9874 }, { "epoch": 7.72, "learning_rate": 2.6029903348171604e-06, "loss": 0.0479, "step": 9875 }, { "epoch": 7.72, "learning_rate": 2.601286485349539e-06, "loss": 0.0757, "step": 9876 }, { "epoch": 7.72, "learning_rate": 2.5995831103340473e-06, "loss": 0.0507, "step": 9877 }, { "epoch": 7.72, "learning_rate": 2.597880209879917e-06, "loss": 0.0567, "step": 9878 }, { "epoch": 7.72, "learning_rate": 2.5961777840963497e-06, "loss": 0.1174, "step": 9879 }, { "epoch": 7.72, "learning_rate": 2.5944758330925136e-06, "loss": 0.0504, "step": 9880 }, { "epoch": 7.72, "learning_rate": 2.59277435697755e-06, "loss": 0.0622, "step": 9881 }, { "epoch": 7.72, "learning_rate": 2.5910733558605673e-06, "loss": 0.0734, "step": 9882 }, { "epoch": 7.72, "learning_rate": 2.5893728298506427e-06, "loss": 0.0763, "step": 9883 }, { "epoch": 7.72, "learning_rate": 2.587672779056829e-06, "loss": 0.0342, "step": 9884 }, { "epoch": 7.73, "learning_rate": 2.5859732035881447e-06, "loss": 0.0449, "step": 9885 }, { "epoch": 7.73, "learning_rate": 2.584274103553572e-06, "loss": 0.0981, "step": 9886 }, { "epoch": 7.73, "learning_rate": 2.582575479062068e-06, "loss": 0.0657, "step": 9887 }, { "epoch": 7.73, "learning_rate": 2.580877330222564e-06, "loss": 0.028, "step": 9888 }, { "epoch": 7.73, "learning_rate": 2.579179657143954e-06, "loss": 0.1267, "step": 9889 }, { "epoch": 7.73, "learning_rate": 2.5774824599351035e-06, "loss": 0.0701, "step": 9890 }, { "epoch": 7.73, "learning_rate": 2.5757857387048467e-06, "loss": 0.0346, "step": 9891 }, { "epoch": 7.73, "learning_rate": 2.5740894935619885e-06, "loss": 0.0732, "step": 9892 }, { "epoch": 7.73, "learning_rate": 2.572393724615303e-06, "loss": 0.0563, "step": 9893 }, { "epoch": 7.73, "learning_rate": 2.570698431973534e-06, "loss": 0.0534, "step": 9894 }, { "epoch": 7.73, "learning_rate": 2.5690036157453902e-06, "loss": 0.0358, "step": 9895 }, { "epoch": 7.73, "learning_rate": 2.56730927603956e-06, "loss": 0.068, "step": 9896 }, { "epoch": 7.74, "learning_rate": 2.565615412964696e-06, "loss": 0.0271, "step": 9897 }, { "epoch": 7.74, "learning_rate": 2.563922026629414e-06, "loss": 0.1125, "step": 9898 }, { "epoch": 7.74, "learning_rate": 2.562229117142302e-06, "loss": 0.1302, "step": 9899 }, { "epoch": 7.74, "learning_rate": 2.5605366846119283e-06, "loss": 0.0907, "step": 9900 }, { "epoch": 7.74, "learning_rate": 2.5588447291468187e-06, "loss": 0.0617, "step": 9901 }, { "epoch": 7.74, "learning_rate": 2.557153250855472e-06, "loss": 0.0488, "step": 9902 }, { "epoch": 7.74, "learning_rate": 2.5554622498463567e-06, "loss": 0.0267, "step": 9903 }, { "epoch": 7.74, "learning_rate": 2.5537717262279105e-06, "loss": 0.0459, "step": 9904 }, { "epoch": 7.74, "learning_rate": 2.552081680108539e-06, "loss": 0.0332, "step": 9905 }, { "epoch": 7.74, "learning_rate": 2.55039211159662e-06, "loss": 0.0641, "step": 9906 }, { "epoch": 7.74, "learning_rate": 2.548703020800497e-06, "loss": 0.0708, "step": 9907 }, { "epoch": 7.74, "learning_rate": 2.547014407828492e-06, "loss": 0.0587, "step": 9908 }, { "epoch": 7.74, "learning_rate": 2.5453262727888816e-06, "loss": 0.0288, "step": 9909 }, { "epoch": 7.75, "learning_rate": 2.5436386157899206e-06, "loss": 0.0277, "step": 9910 }, { "epoch": 7.75, "learning_rate": 2.541951436939837e-06, "loss": 0.0395, "step": 9911 }, { "epoch": 7.75, "learning_rate": 2.5402647363468202e-06, "loss": 0.0182, "step": 9912 }, { "epoch": 7.75, "learning_rate": 2.5385785141190335e-06, "loss": 0.0544, "step": 9913 }, { "epoch": 7.75, "learning_rate": 2.5368927703646062e-06, "loss": 0.056, "step": 9914 }, { "epoch": 7.75, "learning_rate": 2.5352075051916403e-06, "loss": 0.0584, "step": 9915 }, { "epoch": 7.75, "learning_rate": 2.5335227187082047e-06, "loss": 0.0468, "step": 9916 }, { "epoch": 7.75, "learning_rate": 2.531838411022338e-06, "loss": 0.0762, "step": 9917 }, { "epoch": 7.75, "learning_rate": 2.5301545822420483e-06, "loss": 0.0204, "step": 9918 }, { "epoch": 7.75, "learning_rate": 2.528471232475319e-06, "loss": 0.1366, "step": 9919 }, { "epoch": 7.75, "learning_rate": 2.5267883618300903e-06, "loss": 0.0564, "step": 9920 }, { "epoch": 7.75, "learning_rate": 2.525105970414281e-06, "loss": 0.0461, "step": 9921 }, { "epoch": 7.75, "learning_rate": 2.523424058335773e-06, "loss": 0.0816, "step": 9922 }, { "epoch": 7.76, "learning_rate": 2.521742625702427e-06, "loss": 0.0531, "step": 9923 }, { "epoch": 7.76, "learning_rate": 2.520061672622065e-06, "loss": 0.0376, "step": 9924 }, { "epoch": 7.76, "learning_rate": 2.51838119920248e-06, "loss": 0.0287, "step": 9925 }, { "epoch": 7.76, "learning_rate": 2.516701205551434e-06, "loss": 0.0335, "step": 9926 }, { "epoch": 7.76, "learning_rate": 2.5150216917766603e-06, "loss": 0.0357, "step": 9927 }, { "epoch": 7.76, "learning_rate": 2.5133426579858576e-06, "loss": 0.0659, "step": 9928 }, { "epoch": 7.76, "learning_rate": 2.511664104286697e-06, "loss": 0.0504, "step": 9929 }, { "epoch": 7.76, "learning_rate": 2.509986030786816e-06, "loss": 0.0619, "step": 9930 }, { "epoch": 7.76, "learning_rate": 2.5083084375938317e-06, "loss": 0.0745, "step": 9931 }, { "epoch": 7.76, "learning_rate": 2.506631324815313e-06, "loss": 0.089, "step": 9932 }, { "epoch": 7.76, "learning_rate": 2.5049546925588064e-06, "loss": 0.0333, "step": 9933 }, { "epoch": 7.76, "learning_rate": 2.503278540931834e-06, "loss": 0.0226, "step": 9934 }, { "epoch": 7.76, "learning_rate": 2.5016028700418794e-06, "loss": 0.0371, "step": 9935 }, { "epoch": 7.77, "learning_rate": 2.4999276799963955e-06, "loss": 0.0261, "step": 9936 }, { "epoch": 7.77, "learning_rate": 2.498252970902807e-06, "loss": 0.0334, "step": 9937 }, { "epoch": 7.77, "learning_rate": 2.4965787428685075e-06, "loss": 0.0343, "step": 9938 }, { "epoch": 7.77, "learning_rate": 2.4949049960008576e-06, "loss": 0.062, "step": 9939 }, { "epoch": 7.77, "learning_rate": 2.4932317304071885e-06, "loss": 0.0361, "step": 9940 }, { "epoch": 7.77, "learning_rate": 2.4915589461947997e-06, "loss": 0.0369, "step": 9941 }, { "epoch": 7.77, "learning_rate": 2.489886643470967e-06, "loss": 0.0407, "step": 9942 }, { "epoch": 7.77, "learning_rate": 2.4882148223429216e-06, "loss": 0.0922, "step": 9943 }, { "epoch": 7.77, "learning_rate": 2.4865434829178735e-06, "loss": 0.0705, "step": 9944 }, { "epoch": 7.77, "learning_rate": 2.4848726253029975e-06, "loss": 0.0175, "step": 9945 }, { "epoch": 7.77, "learning_rate": 2.4832022496054454e-06, "loss": 0.1243, "step": 9946 }, { "epoch": 7.77, "learning_rate": 2.4815323559323267e-06, "loss": 0.0817, "step": 9947 }, { "epoch": 7.77, "learning_rate": 2.4798629443907287e-06, "loss": 0.026, "step": 9948 }, { "epoch": 7.78, "learning_rate": 2.4781940150877025e-06, "loss": 0.0555, "step": 9949 }, { "epoch": 7.78, "learning_rate": 2.4765255681302723e-06, "loss": 0.0473, "step": 9950 }, { "epoch": 7.78, "learning_rate": 2.474857603625428e-06, "loss": 0.0607, "step": 9951 }, { "epoch": 7.78, "learning_rate": 2.473190121680129e-06, "loss": 0.0476, "step": 9952 }, { "epoch": 7.78, "learning_rate": 2.471523122401305e-06, "loss": 0.0589, "step": 9953 }, { "epoch": 7.78, "learning_rate": 2.4698566058958596e-06, "loss": 0.0798, "step": 9954 }, { "epoch": 7.78, "learning_rate": 2.4681905722706546e-06, "loss": 0.0967, "step": 9955 }, { "epoch": 7.78, "learning_rate": 2.4665250216325244e-06, "loss": 0.1827, "step": 9956 }, { "epoch": 7.78, "learning_rate": 2.464859954088282e-06, "loss": 0.0608, "step": 9957 }, { "epoch": 7.78, "learning_rate": 2.4631953697446987e-06, "loss": 0.0205, "step": 9958 }, { "epoch": 7.78, "learning_rate": 2.46153126870852e-06, "loss": 0.0797, "step": 9959 }, { "epoch": 7.78, "learning_rate": 2.4598676510864516e-06, "loss": 0.0382, "step": 9960 }, { "epoch": 7.79, "learning_rate": 2.4582045169851822e-06, "loss": 0.0488, "step": 9961 }, { "epoch": 7.79, "learning_rate": 2.4565418665113607e-06, "loss": 0.0592, "step": 9962 }, { "epoch": 7.79, "learning_rate": 2.454879699771606e-06, "loss": 0.0339, "step": 9963 }, { "epoch": 7.79, "learning_rate": 2.453218016872504e-06, "loss": 0.0605, "step": 9964 }, { "epoch": 7.79, "learning_rate": 2.451556817920622e-06, "loss": 0.0185, "step": 9965 }, { "epoch": 7.79, "learning_rate": 2.4498961030224756e-06, "loss": 0.0319, "step": 9966 }, { "epoch": 7.79, "learning_rate": 2.4482358722845656e-06, "loss": 0.0684, "step": 9967 }, { "epoch": 7.79, "learning_rate": 2.4465761258133512e-06, "loss": 0.0452, "step": 9968 }, { "epoch": 7.79, "learning_rate": 2.444916863715273e-06, "loss": 0.0503, "step": 9969 }, { "epoch": 7.79, "learning_rate": 2.4432580860967313e-06, "loss": 0.0799, "step": 9970 }, { "epoch": 7.79, "learning_rate": 2.441599793064099e-06, "loss": 0.0196, "step": 9971 }, { "epoch": 7.79, "learning_rate": 2.439941984723707e-06, "loss": 0.0651, "step": 9972 }, { "epoch": 7.79, "learning_rate": 2.4382846611818755e-06, "loss": 0.0898, "step": 9973 }, { "epoch": 7.8, "learning_rate": 2.436627822544877e-06, "loss": 0.0599, "step": 9974 }, { "epoch": 7.8, "learning_rate": 2.43497146891896e-06, "loss": 0.0751, "step": 9975 }, { "epoch": 7.8, "learning_rate": 2.43331560041034e-06, "loss": 0.0652, "step": 9976 }, { "epoch": 7.8, "learning_rate": 2.4316602171252003e-06, "loss": 0.0818, "step": 9977 }, { "epoch": 7.8, "learning_rate": 2.430005319169697e-06, "loss": 0.1156, "step": 9978 }, { "epoch": 7.8, "learning_rate": 2.428350906649951e-06, "loss": 0.0727, "step": 9979 }, { "epoch": 7.8, "learning_rate": 2.4266969796720518e-06, "loss": 0.0415, "step": 9980 }, { "epoch": 7.8, "learning_rate": 2.4250435383420635e-06, "loss": 0.0488, "step": 9981 }, { "epoch": 7.8, "learning_rate": 2.4233905827660175e-06, "loss": 0.035, "step": 9982 }, { "epoch": 7.8, "learning_rate": 2.421738113049902e-06, "loss": 0.0398, "step": 9983 }, { "epoch": 7.8, "learning_rate": 2.420086129299691e-06, "loss": 0.0422, "step": 9984 }, { "epoch": 7.8, "learning_rate": 2.4184346316213202e-06, "loss": 0.0288, "step": 9985 }, { "epoch": 7.8, "learning_rate": 2.416783620120693e-06, "loss": 0.0436, "step": 9986 }, { "epoch": 7.81, "learning_rate": 2.415133094903681e-06, "loss": 0.062, "step": 9987 }, { "epoch": 7.81, "learning_rate": 2.413483056076127e-06, "loss": 0.06, "step": 9988 }, { "epoch": 7.81, "learning_rate": 2.4118335037438436e-06, "loss": 0.0296, "step": 9989 }, { "epoch": 7.81, "learning_rate": 2.4101844380126084e-06, "loss": 0.0601, "step": 9990 }, { "epoch": 7.81, "learning_rate": 2.408535858988168e-06, "loss": 0.1034, "step": 9991 }, { "epoch": 7.81, "learning_rate": 2.4068877667762448e-06, "loss": 0.0374, "step": 9992 }, { "epoch": 7.81, "learning_rate": 2.4052401614825248e-06, "loss": 0.0706, "step": 9993 }, { "epoch": 7.81, "learning_rate": 2.4035930432126587e-06, "loss": 0.0733, "step": 9994 }, { "epoch": 7.81, "learning_rate": 2.401946412072269e-06, "loss": 0.0379, "step": 9995 }, { "epoch": 7.81, "learning_rate": 2.4003002681669543e-06, "loss": 0.1049, "step": 9996 }, { "epoch": 7.81, "learning_rate": 2.3986546116022703e-06, "loss": 0.0869, "step": 9997 }, { "epoch": 7.81, "learning_rate": 2.397009442483751e-06, "loss": 0.0976, "step": 9998 }, { "epoch": 7.81, "learning_rate": 2.3953647609168916e-06, "loss": 0.0731, "step": 9999 }, { "epoch": 7.82, "learning_rate": 2.3937205670071606e-06, "loss": 0.067, "step": 10000 }, { "epoch": 7.82, "learning_rate": 2.3920768608599954e-06, "loss": 0.0531, "step": 10001 }, { "epoch": 7.82, "learning_rate": 2.3904336425807985e-06, "loss": 0.0252, "step": 10002 }, { "epoch": 7.82, "learning_rate": 2.388790912274943e-06, "loss": 0.0731, "step": 10003 }, { "epoch": 7.82, "learning_rate": 2.387148670047774e-06, "loss": 0.0533, "step": 10004 }, { "epoch": 7.82, "learning_rate": 2.3855069160046053e-06, "loss": 0.0468, "step": 10005 }, { "epoch": 7.82, "learning_rate": 2.383865650250705e-06, "loss": 0.0851, "step": 10006 }, { "epoch": 7.82, "learning_rate": 2.3822248728913335e-06, "loss": 0.0281, "step": 10007 }, { "epoch": 7.82, "learning_rate": 2.380584584031701e-06, "loss": 0.1142, "step": 10008 }, { "epoch": 7.82, "learning_rate": 2.378944783776996e-06, "loss": 0.1145, "step": 10009 }, { "epoch": 7.82, "learning_rate": 2.377305472232373e-06, "loss": 0.0723, "step": 10010 }, { "epoch": 7.82, "learning_rate": 2.3756666495029525e-06, "loss": 0.079, "step": 10011 }, { "epoch": 7.82, "learning_rate": 2.3740283156938284e-06, "loss": 0.0824, "step": 10012 }, { "epoch": 7.83, "learning_rate": 2.3723904709100597e-06, "loss": 0.0901, "step": 10013 }, { "epoch": 7.83, "learning_rate": 2.370753115256673e-06, "loss": 0.0847, "step": 10014 }, { "epoch": 7.83, "learning_rate": 2.369116248838671e-06, "loss": 0.0593, "step": 10015 }, { "epoch": 7.83, "learning_rate": 2.3674798717610215e-06, "loss": 0.0829, "step": 10016 }, { "epoch": 7.83, "learning_rate": 2.3658439841286518e-06, "loss": 0.0246, "step": 10017 }, { "epoch": 7.83, "learning_rate": 2.364208586046466e-06, "loss": 0.0619, "step": 10018 }, { "epoch": 7.83, "learning_rate": 2.3625736776193418e-06, "loss": 0.0453, "step": 10019 }, { "epoch": 7.83, "learning_rate": 2.3609392589521176e-06, "loss": 0.0434, "step": 10020 }, { "epoch": 7.83, "learning_rate": 2.3593053301496018e-06, "loss": 0.0612, "step": 10021 }, { "epoch": 7.83, "learning_rate": 2.357671891316572e-06, "loss": 0.0242, "step": 10022 }, { "epoch": 7.83, "learning_rate": 2.356038942557776e-06, "loss": 0.0346, "step": 10023 }, { "epoch": 7.83, "learning_rate": 2.354406483977927e-06, "loss": 0.0635, "step": 10024 }, { "epoch": 7.84, "learning_rate": 2.3527745156817096e-06, "loss": 0.0406, "step": 10025 }, { "epoch": 7.84, "learning_rate": 2.351143037773773e-06, "loss": 0.067, "step": 10026 }, { "epoch": 7.84, "learning_rate": 2.349512050358743e-06, "loss": 0.0201, "step": 10027 }, { "epoch": 7.84, "learning_rate": 2.347881553541209e-06, "loss": 0.0785, "step": 10028 }, { "epoch": 7.84, "learning_rate": 2.346251547425721e-06, "loss": 0.0407, "step": 10029 }, { "epoch": 7.84, "learning_rate": 2.3446220321168125e-06, "loss": 0.1496, "step": 10030 }, { "epoch": 7.84, "learning_rate": 2.342993007718977e-06, "loss": 0.1083, "step": 10031 }, { "epoch": 7.84, "learning_rate": 2.3413644743366747e-06, "loss": 0.068, "step": 10032 }, { "epoch": 7.84, "learning_rate": 2.3397364320743422e-06, "loss": 0.0181, "step": 10033 }, { "epoch": 7.84, "learning_rate": 2.338108881036375e-06, "loss": 0.1074, "step": 10034 }, { "epoch": 7.84, "learning_rate": 2.3364818213271457e-06, "loss": 0.0571, "step": 10035 }, { "epoch": 7.84, "learning_rate": 2.3348552530509893e-06, "loss": 0.123, "step": 10036 }, { "epoch": 7.84, "learning_rate": 2.3332291763122095e-06, "loss": 0.0387, "step": 10037 }, { "epoch": 7.85, "learning_rate": 2.331603591215087e-06, "loss": 0.0767, "step": 10038 }, { "epoch": 7.85, "learning_rate": 2.329978497863863e-06, "loss": 0.0534, "step": 10039 }, { "epoch": 7.85, "learning_rate": 2.328353896362745e-06, "loss": 0.0962, "step": 10040 }, { "epoch": 7.85, "learning_rate": 2.326729786815911e-06, "loss": 0.0362, "step": 10041 }, { "epoch": 7.85, "learning_rate": 2.3251061693275144e-06, "loss": 0.0299, "step": 10042 }, { "epoch": 7.85, "learning_rate": 2.323483044001672e-06, "loss": 0.1128, "step": 10043 }, { "epoch": 7.85, "learning_rate": 2.3218604109424647e-06, "loss": 0.0952, "step": 10044 }, { "epoch": 7.85, "learning_rate": 2.3202382702539493e-06, "loss": 0.045, "step": 10045 }, { "epoch": 7.85, "learning_rate": 2.318616622040146e-06, "loss": 0.0757, "step": 10046 }, { "epoch": 7.85, "learning_rate": 2.3169954664050455e-06, "loss": 0.0382, "step": 10047 }, { "epoch": 7.85, "learning_rate": 2.315374803452607e-06, "loss": 0.0525, "step": 10048 }, { "epoch": 7.85, "learning_rate": 2.3137546332867544e-06, "loss": 0.0258, "step": 10049 }, { "epoch": 7.85, "learning_rate": 2.3121349560113905e-06, "loss": 0.0635, "step": 10050 }, { "epoch": 7.86, "learning_rate": 2.3105157717303727e-06, "loss": 0.0203, "step": 10051 }, { "epoch": 7.86, "learning_rate": 2.3088970805475353e-06, "loss": 0.0556, "step": 10052 }, { "epoch": 7.86, "learning_rate": 2.307278882566676e-06, "loss": 0.072, "step": 10053 }, { "epoch": 7.86, "learning_rate": 2.3056611778915694e-06, "loss": 0.0891, "step": 10054 }, { "epoch": 7.86, "learning_rate": 2.3040439666259505e-06, "loss": 0.048, "step": 10055 }, { "epoch": 7.86, "learning_rate": 2.3024272488735242e-06, "loss": 0.0575, "step": 10056 }, { "epoch": 7.86, "learning_rate": 2.3008110247379667e-06, "loss": 0.0482, "step": 10057 }, { "epoch": 7.86, "learning_rate": 2.299195294322919e-06, "loss": 0.0986, "step": 10058 }, { "epoch": 7.86, "learning_rate": 2.297580057731991e-06, "loss": 0.0194, "step": 10059 }, { "epoch": 7.86, "learning_rate": 2.295965315068763e-06, "loss": 0.1371, "step": 10060 }, { "epoch": 7.86, "learning_rate": 2.2943510664367807e-06, "loss": 0.051, "step": 10061 }, { "epoch": 7.86, "learning_rate": 2.292737311939566e-06, "loss": 0.0356, "step": 10062 }, { "epoch": 7.86, "learning_rate": 2.2911240516805953e-06, "loss": 0.0192, "step": 10063 }, { "epoch": 7.87, "learning_rate": 2.289511285763323e-06, "loss": 0.037, "step": 10064 }, { "epoch": 7.87, "learning_rate": 2.287899014291173e-06, "loss": 0.036, "step": 10065 }, { "epoch": 7.87, "learning_rate": 2.2862872373675325e-06, "loss": 0.0405, "step": 10066 }, { "epoch": 7.87, "learning_rate": 2.284675955095761e-06, "loss": 0.0264, "step": 10067 }, { "epoch": 7.87, "learning_rate": 2.283065167579177e-06, "loss": 0.05, "step": 10068 }, { "epoch": 7.87, "learning_rate": 2.281454874921082e-06, "loss": 0.0541, "step": 10069 }, { "epoch": 7.87, "learning_rate": 2.2798450772247336e-06, "loss": 0.0278, "step": 10070 }, { "epoch": 7.87, "learning_rate": 2.278235774593365e-06, "loss": 0.0392, "step": 10071 }, { "epoch": 7.87, "learning_rate": 2.276626967130171e-06, "loss": 0.0717, "step": 10072 }, { "epoch": 7.87, "learning_rate": 2.2750186549383257e-06, "loss": 0.1432, "step": 10073 }, { "epoch": 7.87, "learning_rate": 2.2734108381209574e-06, "loss": 0.0604, "step": 10074 }, { "epoch": 7.87, "learning_rate": 2.2718035167811715e-06, "loss": 0.0563, "step": 10075 }, { "epoch": 7.87, "learning_rate": 2.2701966910220375e-06, "loss": 0.0469, "step": 10076 }, { "epoch": 7.88, "learning_rate": 2.2685903609466e-06, "loss": 0.0677, "step": 10077 }, { "epoch": 7.88, "learning_rate": 2.266984526657864e-06, "loss": 0.0177, "step": 10078 }, { "epoch": 7.88, "learning_rate": 2.265379188258806e-06, "loss": 0.0354, "step": 10079 }, { "epoch": 7.88, "learning_rate": 2.2637743458523708e-06, "loss": 0.0546, "step": 10080 }, { "epoch": 7.88, "learning_rate": 2.2621699995414703e-06, "loss": 0.0763, "step": 10081 }, { "epoch": 7.88, "learning_rate": 2.2605661494289853e-06, "loss": 0.0923, "step": 10082 }, { "epoch": 7.88, "learning_rate": 2.2589627956177662e-06, "loss": 0.0505, "step": 10083 }, { "epoch": 7.88, "learning_rate": 2.257359938210626e-06, "loss": 0.0162, "step": 10084 }, { "epoch": 7.88, "learning_rate": 2.255757577310359e-06, "loss": 0.0469, "step": 10085 }, { "epoch": 7.88, "learning_rate": 2.25415571301971e-06, "loss": 0.0507, "step": 10086 }, { "epoch": 7.88, "learning_rate": 2.2525543454414e-06, "loss": 0.0863, "step": 10087 }, { "epoch": 7.88, "learning_rate": 2.250953474678126e-06, "loss": 0.0816, "step": 10088 }, { "epoch": 7.89, "learning_rate": 2.249353100832542e-06, "loss": 0.035, "step": 10089 }, { "epoch": 7.89, "learning_rate": 2.2477532240072765e-06, "loss": 0.0882, "step": 10090 }, { "epoch": 7.89, "learning_rate": 2.246153844304916e-06, "loss": 0.0244, "step": 10091 }, { "epoch": 7.89, "learning_rate": 2.2445549618280315e-06, "loss": 0.0915, "step": 10092 }, { "epoch": 7.89, "learning_rate": 2.2429565766791493e-06, "loss": 0.0369, "step": 10093 }, { "epoch": 7.89, "learning_rate": 2.24135868896077e-06, "loss": 0.0445, "step": 10094 }, { "epoch": 7.89, "learning_rate": 2.2397612987753546e-06, "loss": 0.1518, "step": 10095 }, { "epoch": 7.89, "learning_rate": 2.2381644062253483e-06, "loss": 0.045, "step": 10096 }, { "epoch": 7.89, "learning_rate": 2.2365680114131463e-06, "loss": 0.0647, "step": 10097 }, { "epoch": 7.89, "learning_rate": 2.2349721144411196e-06, "loss": 0.034, "step": 10098 }, { "epoch": 7.89, "learning_rate": 2.233376715411606e-06, "loss": 0.0396, "step": 10099 }, { "epoch": 7.89, "learning_rate": 2.2317818144269176e-06, "loss": 0.039, "step": 10100 }, { "epoch": 7.89, "learning_rate": 2.2301874115893287e-06, "loss": 0.0739, "step": 10101 }, { "epoch": 7.9, "learning_rate": 2.228593507001079e-06, "loss": 0.0463, "step": 10102 }, { "epoch": 7.9, "learning_rate": 2.227000100764383e-06, "loss": 0.0511, "step": 10103 }, { "epoch": 7.9, "learning_rate": 2.225407192981417e-06, "loss": 0.0615, "step": 10104 }, { "epoch": 7.9, "learning_rate": 2.22381478375433e-06, "loss": 0.0647, "step": 10105 }, { "epoch": 7.9, "learning_rate": 2.2222228731852367e-06, "loss": 0.0638, "step": 10106 }, { "epoch": 7.9, "learning_rate": 2.2206314613762216e-06, "loss": 0.0368, "step": 10107 }, { "epoch": 7.9, "learning_rate": 2.2190405484293356e-06, "loss": 0.0278, "step": 10108 }, { "epoch": 7.9, "learning_rate": 2.217450134446597e-06, "loss": 0.0408, "step": 10109 }, { "epoch": 7.9, "learning_rate": 2.2158602195299907e-06, "loss": 0.1422, "step": 10110 }, { "epoch": 7.9, "learning_rate": 2.2142708037814787e-06, "loss": 0.058, "step": 10111 }, { "epoch": 7.9, "learning_rate": 2.2126818873029808e-06, "loss": 0.0619, "step": 10112 }, { "epoch": 7.9, "learning_rate": 2.2110934701963902e-06, "loss": 0.0204, "step": 10113 }, { "epoch": 7.9, "learning_rate": 2.20950555256356e-06, "loss": 0.0766, "step": 10114 }, { "epoch": 7.91, "learning_rate": 2.207918134506325e-06, "loss": 0.056, "step": 10115 }, { "epoch": 7.91, "learning_rate": 2.206331216126476e-06, "loss": 0.0289, "step": 10116 }, { "epoch": 7.91, "learning_rate": 2.2047447975257784e-06, "loss": 0.0635, "step": 10117 }, { "epoch": 7.91, "learning_rate": 2.20315887880596e-06, "loss": 0.1459, "step": 10118 }, { "epoch": 7.91, "learning_rate": 2.201573460068728e-06, "loss": 0.0522, "step": 10119 }, { "epoch": 7.91, "learning_rate": 2.1999885414157394e-06, "loss": 0.0311, "step": 10120 }, { "epoch": 7.91, "learning_rate": 2.1984041229486353e-06, "loss": 0.0993, "step": 10121 }, { "epoch": 7.91, "learning_rate": 2.1968202047690135e-06, "loss": 0.0555, "step": 10122 }, { "epoch": 7.91, "learning_rate": 2.195236786978452e-06, "loss": 0.0929, "step": 10123 }, { "epoch": 7.91, "learning_rate": 2.193653869678487e-06, "loss": 0.0165, "step": 10124 }, { "epoch": 7.91, "learning_rate": 2.192071452970622e-06, "loss": 0.0589, "step": 10125 }, { "epoch": 7.91, "learning_rate": 2.1904895369563305e-06, "loss": 0.1091, "step": 10126 }, { "epoch": 7.91, "learning_rate": 2.188908121737061e-06, "loss": 0.0952, "step": 10127 }, { "epoch": 7.92, "learning_rate": 2.1873272074142206e-06, "loss": 0.0439, "step": 10128 }, { "epoch": 7.92, "learning_rate": 2.1857467940891864e-06, "loss": 0.0356, "step": 10129 }, { "epoch": 7.92, "learning_rate": 2.184166881863307e-06, "loss": 0.0656, "step": 10130 }, { "epoch": 7.92, "learning_rate": 2.1825874708378936e-06, "loss": 0.0262, "step": 10131 }, { "epoch": 7.92, "learning_rate": 2.181008561114231e-06, "loss": 0.0239, "step": 10132 }, { "epoch": 7.92, "learning_rate": 2.1794301527935656e-06, "loss": 0.0842, "step": 10133 }, { "epoch": 7.92, "learning_rate": 2.1778522459771146e-06, "loss": 0.0404, "step": 10134 }, { "epoch": 7.92, "learning_rate": 2.176274840766067e-06, "loss": 0.0357, "step": 10135 }, { "epoch": 7.92, "learning_rate": 2.1746979372615775e-06, "loss": 0.0924, "step": 10136 }, { "epoch": 7.92, "learning_rate": 2.1731215355647582e-06, "loss": 0.0449, "step": 10137 }, { "epoch": 7.92, "learning_rate": 2.1715456357767063e-06, "loss": 0.0438, "step": 10138 }, { "epoch": 7.92, "learning_rate": 2.1699702379984756e-06, "loss": 0.0274, "step": 10139 }, { "epoch": 7.92, "learning_rate": 2.16839534233109e-06, "loss": 0.0597, "step": 10140 }, { "epoch": 7.93, "learning_rate": 2.166820948875542e-06, "loss": 0.0759, "step": 10141 }, { "epoch": 7.93, "learning_rate": 2.1652470577327923e-06, "loss": 0.0589, "step": 10142 }, { "epoch": 7.93, "learning_rate": 2.1636736690037687e-06, "loss": 0.0503, "step": 10143 }, { "epoch": 7.93, "learning_rate": 2.1621007827893658e-06, "loss": 0.0463, "step": 10144 }, { "epoch": 7.93, "learning_rate": 2.1605283991904445e-06, "loss": 0.0588, "step": 10145 }, { "epoch": 7.93, "learning_rate": 2.158956518307843e-06, "loss": 0.0188, "step": 10146 }, { "epoch": 7.93, "learning_rate": 2.1573851402423583e-06, "loss": 0.0493, "step": 10147 }, { "epoch": 7.93, "learning_rate": 2.1558142650947522e-06, "loss": 0.03, "step": 10148 }, { "epoch": 7.93, "learning_rate": 2.154243892965758e-06, "loss": 0.0446, "step": 10149 }, { "epoch": 7.93, "learning_rate": 2.152674023956086e-06, "loss": 0.0404, "step": 10150 }, { "epoch": 7.93, "learning_rate": 2.1511046581664007e-06, "loss": 0.1273, "step": 10151 }, { "epoch": 7.93, "learning_rate": 2.1495357956973417e-06, "loss": 0.0431, "step": 10152 }, { "epoch": 7.94, "learning_rate": 2.147967436649513e-06, "loss": 0.0758, "step": 10153 }, { "epoch": 7.94, "learning_rate": 2.1463995811234885e-06, "loss": 0.087, "step": 10154 }, { "epoch": 7.94, "learning_rate": 2.1448322292198086e-06, "loss": 0.0516, "step": 10155 }, { "epoch": 7.94, "learning_rate": 2.1432653810389813e-06, "loss": 0.1343, "step": 10156 }, { "epoch": 7.94, "learning_rate": 2.1416990366814804e-06, "loss": 0.0916, "step": 10157 }, { "epoch": 7.94, "learning_rate": 2.140133196247759e-06, "loss": 0.0488, "step": 10158 }, { "epoch": 7.94, "learning_rate": 2.1385678598382197e-06, "loss": 0.0779, "step": 10159 }, { "epoch": 7.94, "learning_rate": 2.1370030275532405e-06, "loss": 0.0345, "step": 10160 }, { "epoch": 7.94, "learning_rate": 2.1354386994931754e-06, "loss": 0.0408, "step": 10161 }, { "epoch": 7.94, "learning_rate": 2.133874875758336e-06, "loss": 0.0355, "step": 10162 }, { "epoch": 7.94, "learning_rate": 2.1323115564490036e-06, "loss": 0.0403, "step": 10163 }, { "epoch": 7.94, "learning_rate": 2.1307487416654294e-06, "loss": 0.0596, "step": 10164 }, { "epoch": 7.94, "learning_rate": 2.1291864315078304e-06, "loss": 0.0698, "step": 10165 }, { "epoch": 7.95, "learning_rate": 2.1276246260763913e-06, "loss": 0.1263, "step": 10166 }, { "epoch": 7.95, "learning_rate": 2.1260633254712647e-06, "loss": 0.0149, "step": 10167 }, { "epoch": 7.95, "learning_rate": 2.124502529792569e-06, "loss": 0.0506, "step": 10168 }, { "epoch": 7.95, "learning_rate": 2.1229422391403987e-06, "loss": 0.0365, "step": 10169 }, { "epoch": 7.95, "learning_rate": 2.1213824536148073e-06, "loss": 0.0546, "step": 10170 }, { "epoch": 7.95, "learning_rate": 2.119823173315814e-06, "loss": 0.0425, "step": 10171 }, { "epoch": 7.95, "learning_rate": 2.118264398343409e-06, "loss": 0.0453, "step": 10172 }, { "epoch": 7.95, "learning_rate": 2.1167061287975567e-06, "loss": 0.0449, "step": 10173 }, { "epoch": 7.95, "learning_rate": 2.11514836477818e-06, "loss": 0.0435, "step": 10174 }, { "epoch": 7.95, "learning_rate": 2.113591106385173e-06, "loss": 0.0827, "step": 10175 }, { "epoch": 7.95, "learning_rate": 2.1120343537183973e-06, "loss": 0.1164, "step": 10176 }, { "epoch": 7.95, "learning_rate": 2.1104781068776802e-06, "loss": 0.0459, "step": 10177 }, { "epoch": 7.95, "learning_rate": 2.1089223659628198e-06, "loss": 0.0883, "step": 10178 }, { "epoch": 7.96, "learning_rate": 2.107367131073579e-06, "loss": 0.0662, "step": 10179 }, { "epoch": 7.96, "learning_rate": 2.1058124023096872e-06, "loss": 0.0705, "step": 10180 }, { "epoch": 7.96, "learning_rate": 2.1042581797708495e-06, "loss": 0.0434, "step": 10181 }, { "epoch": 7.96, "learning_rate": 2.1027044635567272e-06, "loss": 0.0809, "step": 10182 }, { "epoch": 7.96, "learning_rate": 2.101151253766953e-06, "loss": 0.0506, "step": 10183 }, { "epoch": 7.96, "learning_rate": 2.099598550501134e-06, "loss": 0.0439, "step": 10184 }, { "epoch": 7.96, "learning_rate": 2.098046353858836e-06, "loss": 0.0784, "step": 10185 }, { "epoch": 7.96, "learning_rate": 2.0964946639395967e-06, "loss": 0.1236, "step": 10186 }, { "epoch": 7.96, "learning_rate": 2.0949434808429204e-06, "loss": 0.0396, "step": 10187 }, { "epoch": 7.96, "learning_rate": 2.093392804668277e-06, "loss": 0.0712, "step": 10188 }, { "epoch": 7.96, "learning_rate": 2.0918426355151068e-06, "loss": 0.0632, "step": 10189 }, { "epoch": 7.96, "learning_rate": 2.090292973482817e-06, "loss": 0.0417, "step": 10190 }, { "epoch": 7.96, "learning_rate": 2.0887438186707776e-06, "loss": 0.0525, "step": 10191 }, { "epoch": 7.97, "learning_rate": 2.0871951711783365e-06, "loss": 0.0286, "step": 10192 }, { "epoch": 7.97, "learning_rate": 2.0856470311048026e-06, "loss": 0.0355, "step": 10193 }, { "epoch": 7.97, "learning_rate": 2.0840993985494472e-06, "loss": 0.079, "step": 10194 }, { "epoch": 7.97, "learning_rate": 2.0825522736115143e-06, "loss": 0.062, "step": 10195 }, { "epoch": 7.97, "learning_rate": 2.0810056563902203e-06, "loss": 0.1032, "step": 10196 }, { "epoch": 7.97, "learning_rate": 2.079459546984741e-06, "loss": 0.0231, "step": 10197 }, { "epoch": 7.97, "learning_rate": 2.077913945494223e-06, "loss": 0.082, "step": 10198 }, { "epoch": 7.97, "learning_rate": 2.0763688520177803e-06, "loss": 0.0366, "step": 10199 }, { "epoch": 7.97, "learning_rate": 2.0748242666544936e-06, "loss": 0.0658, "step": 10200 }, { "epoch": 7.97, "learning_rate": 2.0732801895034117e-06, "loss": 0.0344, "step": 10201 }, { "epoch": 7.97, "learning_rate": 2.0717366206635515e-06, "loss": 0.0498, "step": 10202 }, { "epoch": 7.97, "learning_rate": 2.0701935602338917e-06, "loss": 0.0864, "step": 10203 }, { "epoch": 7.97, "learning_rate": 2.068651008313393e-06, "loss": 0.0306, "step": 10204 }, { "epoch": 7.98, "learning_rate": 2.0671089650009634e-06, "loss": 0.1127, "step": 10205 }, { "epoch": 7.98, "learning_rate": 2.0655674303954932e-06, "loss": 0.0249, "step": 10206 }, { "epoch": 7.98, "learning_rate": 2.0640264045958315e-06, "loss": 0.0652, "step": 10207 }, { "epoch": 7.98, "learning_rate": 2.0624858877008036e-06, "loss": 0.0488, "step": 10208 }, { "epoch": 7.98, "learning_rate": 2.0609458798091953e-06, "loss": 0.0305, "step": 10209 }, { "epoch": 7.98, "learning_rate": 2.0594063810197616e-06, "loss": 0.0421, "step": 10210 }, { "epoch": 7.98, "learning_rate": 2.057867391431225e-06, "loss": 0.0398, "step": 10211 }, { "epoch": 7.98, "learning_rate": 2.0563289111422758e-06, "loss": 0.0222, "step": 10212 }, { "epoch": 7.98, "learning_rate": 2.0547909402515687e-06, "loss": 0.0166, "step": 10213 }, { "epoch": 7.98, "learning_rate": 2.0532534788577296e-06, "loss": 0.0555, "step": 10214 }, { "epoch": 7.98, "learning_rate": 2.051716527059351e-06, "loss": 0.0458, "step": 10215 }, { "epoch": 7.98, "learning_rate": 2.0501800849549903e-06, "loss": 0.0668, "step": 10216 }, { "epoch": 7.99, "learning_rate": 2.0486441526431744e-06, "loss": 0.0955, "step": 10217 }, { "epoch": 7.99, "learning_rate": 2.047108730222395e-06, "loss": 0.0377, "step": 10218 }, { "epoch": 7.99, "learning_rate": 2.045573817791118e-06, "loss": 0.0534, "step": 10219 }, { "epoch": 7.99, "learning_rate": 2.0440394154477685e-06, "loss": 0.0914, "step": 10220 }, { "epoch": 7.99, "learning_rate": 2.0425055232907442e-06, "loss": 0.032, "step": 10221 }, { "epoch": 7.99, "learning_rate": 2.0409721414184026e-06, "loss": 0.0668, "step": 10222 }, { "epoch": 7.99, "learning_rate": 2.0394392699290786e-06, "loss": 0.1187, "step": 10223 }, { "epoch": 7.99, "learning_rate": 2.037906908921069e-06, "loss": 0.046, "step": 10224 }, { "epoch": 7.99, "learning_rate": 2.036375058492638e-06, "loss": 0.0241, "step": 10225 }, { "epoch": 7.99, "learning_rate": 2.0348437187420144e-06, "loss": 0.0327, "step": 10226 }, { "epoch": 7.99, "learning_rate": 2.033312889767405e-06, "loss": 0.0475, "step": 10227 }, { "epoch": 7.99, "learning_rate": 2.031782571666969e-06, "loss": 0.071, "step": 10228 }, { "epoch": 7.99, "learning_rate": 2.030252764538844e-06, "loss": 0.099, "step": 10229 }, { "epoch": 8.0, "learning_rate": 2.028723468481124e-06, "loss": 0.0613, "step": 10230 }, { "epoch": 8.0, "learning_rate": 2.0271946835918864e-06, "loss": 0.0575, "step": 10231 }, { "epoch": 8.0, "learning_rate": 2.0256664099691657e-06, "loss": 0.0387, "step": 10232 }, { "epoch": 8.0, "learning_rate": 2.0241386477109558e-06, "loss": 0.0264, "step": 10233 }, { "epoch": 8.0, "learning_rate": 2.022611396915235e-06, "loss": 0.0487, "step": 10234 }, { "epoch": 8.0, "learning_rate": 2.021084657679936e-06, "loss": 0.0318, "step": 10235 }, { "epoch": 8.0, "learning_rate": 2.0195584301029648e-06, "loss": 0.1245, "step": 10236 }, { "epoch": 8.0, "learning_rate": 2.018032714282192e-06, "loss": 0.0369, "step": 10237 }, { "epoch": 8.0, "learning_rate": 2.0165075103154563e-06, "loss": 0.016, "step": 10238 }, { "epoch": 8.0, "learning_rate": 2.0149828183005628e-06, "loss": 0.0349, "step": 10239 }, { "epoch": 8.0, "learning_rate": 2.013458638335285e-06, "loss": 0.0126, "step": 10240 }, { "epoch": 8.0, "learning_rate": 2.0119349705173607e-06, "loss": 0.0124, "step": 10241 }, { "epoch": 8.0, "learning_rate": 2.0104118149445007e-06, "loss": 0.0211, "step": 10242 }, { "epoch": 8.01, "learning_rate": 2.008889171714379e-06, "loss": 0.0258, "step": 10243 }, { "epoch": 8.01, "learning_rate": 2.007367040924637e-06, "loss": 0.0239, "step": 10244 }, { "epoch": 8.01, "learning_rate": 2.0058454226728785e-06, "loss": 0.0595, "step": 10245 }, { "epoch": 8.01, "learning_rate": 2.004324317056684e-06, "loss": 0.0206, "step": 10246 }, { "epoch": 8.01, "learning_rate": 2.0028037241735965e-06, "loss": 0.0469, "step": 10247 }, { "epoch": 8.01, "learning_rate": 2.0012836441211246e-06, "loss": 0.0526, "step": 10248 }, { "epoch": 8.01, "learning_rate": 1.999764076996742e-06, "loss": 0.0426, "step": 10249 }, { "epoch": 8.01, "learning_rate": 1.998245022897902e-06, "loss": 0.0273, "step": 10250 }, { "epoch": 8.01, "learning_rate": 1.996726481922009e-06, "loss": 0.0283, "step": 10251 }, { "epoch": 8.01, "learning_rate": 1.995208454166442e-06, "loss": 0.0409, "step": 10252 }, { "epoch": 8.01, "learning_rate": 1.9936909397285444e-06, "loss": 0.0261, "step": 10253 }, { "epoch": 8.01, "learning_rate": 1.992173938705635e-06, "loss": 0.0261, "step": 10254 }, { "epoch": 8.01, "learning_rate": 1.9906574511949927e-06, "loss": 0.0246, "step": 10255 }, { "epoch": 8.02, "learning_rate": 1.9891414772938554e-06, "loss": 0.0142, "step": 10256 }, { "epoch": 8.02, "learning_rate": 1.9876260170994464e-06, "loss": 0.0171, "step": 10257 }, { "epoch": 8.02, "learning_rate": 1.9861110707089446e-06, "loss": 0.0126, "step": 10258 }, { "epoch": 8.02, "learning_rate": 1.9845966382194958e-06, "loss": 0.0171, "step": 10259 }, { "epoch": 8.02, "learning_rate": 1.9830827197282156e-06, "loss": 0.0377, "step": 10260 }, { "epoch": 8.02, "learning_rate": 1.981569315332187e-06, "loss": 0.0226, "step": 10261 }, { "epoch": 8.02, "learning_rate": 1.980056425128457e-06, "loss": 0.0195, "step": 10262 }, { "epoch": 8.02, "learning_rate": 1.978544049214044e-06, "loss": 0.0097, "step": 10263 }, { "epoch": 8.02, "learning_rate": 1.9770321876859267e-06, "loss": 0.0091, "step": 10264 }, { "epoch": 8.02, "learning_rate": 1.975520840641062e-06, "loss": 0.036, "step": 10265 }, { "epoch": 8.02, "learning_rate": 1.9740100081763624e-06, "loss": 0.023, "step": 10266 }, { "epoch": 8.02, "learning_rate": 1.9724996903887162e-06, "loss": 0.0276, "step": 10267 }, { "epoch": 8.03, "learning_rate": 1.9709898873749668e-06, "loss": 0.037, "step": 10268 }, { "epoch": 8.03, "learning_rate": 1.9694805992319375e-06, "loss": 0.0645, "step": 10269 }, { "epoch": 8.03, "learning_rate": 1.967971826056415e-06, "loss": 0.0587, "step": 10270 }, { "epoch": 8.03, "learning_rate": 1.9664635679451484e-06, "loss": 0.0319, "step": 10271 }, { "epoch": 8.03, "learning_rate": 1.9649558249948564e-06, "loss": 0.0202, "step": 10272 }, { "epoch": 8.03, "learning_rate": 1.9634485973022266e-06, "loss": 0.0196, "step": 10273 }, { "epoch": 8.03, "learning_rate": 1.9619418849639118e-06, "loss": 0.0215, "step": 10274 }, { "epoch": 8.03, "learning_rate": 1.960435688076531e-06, "loss": 0.0653, "step": 10275 }, { "epoch": 8.03, "learning_rate": 1.9589300067366677e-06, "loss": 0.0199, "step": 10276 }, { "epoch": 8.03, "learning_rate": 1.9574248410408824e-06, "loss": 0.0241, "step": 10277 }, { "epoch": 8.03, "learning_rate": 1.9559201910856963e-06, "loss": 0.0156, "step": 10278 }, { "epoch": 8.03, "learning_rate": 1.9544160569675887e-06, "loss": 0.0194, "step": 10279 }, { "epoch": 8.03, "learning_rate": 1.95291243878302e-06, "loss": 0.0317, "step": 10280 }, { "epoch": 8.04, "learning_rate": 1.9514093366284117e-06, "loss": 0.0098, "step": 10281 }, { "epoch": 8.04, "learning_rate": 1.949906750600151e-06, "loss": 0.017, "step": 10282 }, { "epoch": 8.04, "learning_rate": 1.9484046807945934e-06, "loss": 0.0165, "step": 10283 }, { "epoch": 8.04, "learning_rate": 1.9469031273080608e-06, "loss": 0.0352, "step": 10284 }, { "epoch": 8.04, "learning_rate": 1.945402090236842e-06, "loss": 0.0214, "step": 10285 }, { "epoch": 8.04, "learning_rate": 1.9439015696771946e-06, "loss": 0.0316, "step": 10286 }, { "epoch": 8.04, "learning_rate": 1.942401565725339e-06, "loss": 0.0162, "step": 10287 }, { "epoch": 8.04, "learning_rate": 1.9409020784774645e-06, "loss": 0.0347, "step": 10288 }, { "epoch": 8.04, "learning_rate": 1.9394031080297337e-06, "loss": 0.0111, "step": 10289 }, { "epoch": 8.04, "learning_rate": 1.9379046544782643e-06, "loss": 0.0324, "step": 10290 }, { "epoch": 8.04, "learning_rate": 1.9364067179191437e-06, "loss": 0.0174, "step": 10291 }, { "epoch": 8.04, "learning_rate": 1.9349092984484374e-06, "loss": 0.0229, "step": 10292 }, { "epoch": 8.04, "learning_rate": 1.933412396162164e-06, "loss": 0.0254, "step": 10293 }, { "epoch": 8.05, "learning_rate": 1.9319160111563172e-06, "loss": 0.0425, "step": 10294 }, { "epoch": 8.05, "learning_rate": 1.930420143526852e-06, "loss": 0.0106, "step": 10295 }, { "epoch": 8.05, "learning_rate": 1.928924793369693e-06, "loss": 0.0202, "step": 10296 }, { "epoch": 8.05, "learning_rate": 1.9274299607807334e-06, "loss": 0.0326, "step": 10297 }, { "epoch": 8.05, "learning_rate": 1.92593564585583e-06, "loss": 0.0199, "step": 10298 }, { "epoch": 8.05, "learning_rate": 1.9244418486908057e-06, "loss": 0.0215, "step": 10299 }, { "epoch": 8.05, "learning_rate": 1.9229485693814564e-06, "loss": 0.02, "step": 10300 }, { "epoch": 8.05, "learning_rate": 1.921455808023541e-06, "loss": 0.0288, "step": 10301 }, { "epoch": 8.05, "learning_rate": 1.91996356471278e-06, "loss": 0.0334, "step": 10302 }, { "epoch": 8.05, "learning_rate": 1.9184718395448653e-06, "loss": 0.0306, "step": 10303 }, { "epoch": 8.05, "learning_rate": 1.916980632615462e-06, "loss": 0.0219, "step": 10304 }, { "epoch": 8.05, "learning_rate": 1.9154899440201904e-06, "loss": 0.0098, "step": 10305 }, { "epoch": 8.05, "learning_rate": 1.9139997738546433e-06, "loss": 0.0733, "step": 10306 }, { "epoch": 8.06, "learning_rate": 1.9125101222143828e-06, "loss": 0.0185, "step": 10307 }, { "epoch": 8.06, "learning_rate": 1.9110209891949317e-06, "loss": 0.0235, "step": 10308 }, { "epoch": 8.06, "learning_rate": 1.909532374891784e-06, "loss": 0.0118, "step": 10309 }, { "epoch": 8.06, "learning_rate": 1.9080442794003997e-06, "loss": 0.0222, "step": 10310 }, { "epoch": 8.06, "learning_rate": 1.906556702816199e-06, "loss": 0.0364, "step": 10311 }, { "epoch": 8.06, "learning_rate": 1.9050696452345863e-06, "loss": 0.0284, "step": 10312 }, { "epoch": 8.06, "learning_rate": 1.9035831067509114e-06, "loss": 0.0311, "step": 10313 }, { "epoch": 8.06, "learning_rate": 1.9020970874605016e-06, "loss": 0.0101, "step": 10314 }, { "epoch": 8.06, "learning_rate": 1.9006115874586527e-06, "loss": 0.0118, "step": 10315 }, { "epoch": 8.06, "learning_rate": 1.8991266068406244e-06, "loss": 0.0158, "step": 10316 }, { "epoch": 8.06, "learning_rate": 1.8976421457016414e-06, "loss": 0.0206, "step": 10317 }, { "epoch": 8.06, "learning_rate": 1.8961582041368976e-06, "loss": 0.0146, "step": 10318 }, { "epoch": 8.06, "learning_rate": 1.8946747822415522e-06, "loss": 0.0282, "step": 10319 }, { "epoch": 8.07, "learning_rate": 1.8931918801107319e-06, "loss": 0.0195, "step": 10320 }, { "epoch": 8.07, "learning_rate": 1.891709497839529e-06, "loss": 0.0219, "step": 10321 }, { "epoch": 8.07, "learning_rate": 1.890227635523001e-06, "loss": 0.0117, "step": 10322 }, { "epoch": 8.07, "learning_rate": 1.8887462932561795e-06, "loss": 0.0479, "step": 10323 }, { "epoch": 8.07, "learning_rate": 1.8872654711340588e-06, "loss": 0.0396, "step": 10324 }, { "epoch": 8.07, "learning_rate": 1.8857851692515905e-06, "loss": 0.0116, "step": 10325 }, { "epoch": 8.07, "learning_rate": 1.8843053877037032e-06, "loss": 0.0143, "step": 10326 }, { "epoch": 8.07, "learning_rate": 1.8828261265852944e-06, "loss": 0.0177, "step": 10327 }, { "epoch": 8.07, "learning_rate": 1.8813473859912201e-06, "loss": 0.0409, "step": 10328 }, { "epoch": 8.07, "learning_rate": 1.8798691660163083e-06, "loss": 0.0181, "step": 10329 }, { "epoch": 8.07, "learning_rate": 1.8783914667553493e-06, "loss": 0.0154, "step": 10330 }, { "epoch": 8.07, "learning_rate": 1.8769142883031056e-06, "loss": 0.0138, "step": 10331 }, { "epoch": 8.08, "learning_rate": 1.8754376307542998e-06, "loss": 0.0165, "step": 10332 }, { "epoch": 8.08, "learning_rate": 1.8739614942036276e-06, "loss": 0.0235, "step": 10333 }, { "epoch": 8.08, "learning_rate": 1.8724858787457444e-06, "loss": 0.0233, "step": 10334 }, { "epoch": 8.08, "learning_rate": 1.8710107844752824e-06, "loss": 0.0188, "step": 10335 }, { "epoch": 8.08, "learning_rate": 1.8695362114868275e-06, "loss": 0.023, "step": 10336 }, { "epoch": 8.08, "learning_rate": 1.8680621598749393e-06, "loss": 0.0138, "step": 10337 }, { "epoch": 8.08, "learning_rate": 1.8665886297341474e-06, "loss": 0.0291, "step": 10338 }, { "epoch": 8.08, "learning_rate": 1.8651156211589406e-06, "loss": 0.0115, "step": 10339 }, { "epoch": 8.08, "learning_rate": 1.8636431342437789e-06, "loss": 0.0487, "step": 10340 }, { "epoch": 8.08, "learning_rate": 1.8621711690830868e-06, "loss": 0.0308, "step": 10341 }, { "epoch": 8.08, "learning_rate": 1.8606997257712557e-06, "loss": 0.043, "step": 10342 }, { "epoch": 8.08, "learning_rate": 1.8592288044026441e-06, "loss": 0.0232, "step": 10343 }, { "epoch": 8.08, "learning_rate": 1.8577584050715781e-06, "loss": 0.0182, "step": 10344 }, { "epoch": 8.09, "learning_rate": 1.856288527872343e-06, "loss": 0.0168, "step": 10345 }, { "epoch": 8.09, "learning_rate": 1.8548191728992082e-06, "loss": 0.056, "step": 10346 }, { "epoch": 8.09, "learning_rate": 1.853350340246387e-06, "loss": 0.0235, "step": 10347 }, { "epoch": 8.09, "learning_rate": 1.8518820300080752e-06, "loss": 0.0368, "step": 10348 }, { "epoch": 8.09, "learning_rate": 1.8504142422784266e-06, "loss": 0.0153, "step": 10349 }, { "epoch": 8.09, "learning_rate": 1.84894697715157e-06, "loss": 0.0307, "step": 10350 }, { "epoch": 8.09, "learning_rate": 1.8474802347215937e-06, "loss": 0.0468, "step": 10351 }, { "epoch": 8.09, "learning_rate": 1.8460140150825534e-06, "loss": 0.0083, "step": 10352 }, { "epoch": 8.09, "learning_rate": 1.8445483183284728e-06, "loss": 0.0112, "step": 10353 }, { "epoch": 8.09, "learning_rate": 1.8430831445533425e-06, "loss": 0.0111, "step": 10354 }, { "epoch": 8.09, "learning_rate": 1.8416184938511173e-06, "loss": 0.0361, "step": 10355 }, { "epoch": 8.09, "learning_rate": 1.8401543663157206e-06, "loss": 0.0251, "step": 10356 }, { "epoch": 8.09, "learning_rate": 1.8386907620410388e-06, "loss": 0.0248, "step": 10357 }, { "epoch": 8.1, "learning_rate": 1.8372276811209354e-06, "loss": 0.0311, "step": 10358 }, { "epoch": 8.1, "learning_rate": 1.8357651236492235e-06, "loss": 0.0188, "step": 10359 }, { "epoch": 8.1, "learning_rate": 1.8343030897196934e-06, "loss": 0.0105, "step": 10360 }, { "epoch": 8.1, "learning_rate": 1.8328415794261033e-06, "loss": 0.0117, "step": 10361 }, { "epoch": 8.1, "learning_rate": 1.831380592862173e-06, "loss": 0.0307, "step": 10362 }, { "epoch": 8.1, "learning_rate": 1.829920130121593e-06, "loss": 0.044, "step": 10363 }, { "epoch": 8.1, "learning_rate": 1.8284601912980083e-06, "loss": 0.025, "step": 10364 }, { "epoch": 8.1, "learning_rate": 1.8270007764850472e-06, "loss": 0.0093, "step": 10365 }, { "epoch": 8.1, "learning_rate": 1.8255418857762963e-06, "loss": 0.0428, "step": 10366 }, { "epoch": 8.1, "learning_rate": 1.824083519265306e-06, "loss": 0.0587, "step": 10367 }, { "epoch": 8.1, "learning_rate": 1.8226256770455975e-06, "loss": 0.0266, "step": 10368 }, { "epoch": 8.1, "learning_rate": 1.8211683592106555e-06, "loss": 0.0178, "step": 10369 }, { "epoch": 8.1, "learning_rate": 1.8197115658539344e-06, "loss": 0.026, "step": 10370 }, { "epoch": 8.11, "learning_rate": 1.8182552970688517e-06, "loss": 0.0429, "step": 10371 }, { "epoch": 8.11, "learning_rate": 1.8167995529487904e-06, "loss": 0.0379, "step": 10372 }, { "epoch": 8.11, "learning_rate": 1.815344333587107e-06, "loss": 0.0195, "step": 10373 }, { "epoch": 8.11, "learning_rate": 1.8138896390771155e-06, "loss": 0.0287, "step": 10374 }, { "epoch": 8.11, "learning_rate": 1.812435469512105e-06, "loss": 0.0395, "step": 10375 }, { "epoch": 8.11, "learning_rate": 1.8109818249853173e-06, "loss": 0.0166, "step": 10376 }, { "epoch": 8.11, "learning_rate": 1.8095287055899756e-06, "loss": 0.0219, "step": 10377 }, { "epoch": 8.11, "learning_rate": 1.8080761114192624e-06, "loss": 0.0243, "step": 10378 }, { "epoch": 8.11, "learning_rate": 1.8066240425663272e-06, "loss": 0.0692, "step": 10379 }, { "epoch": 8.11, "learning_rate": 1.8051724991242847e-06, "loss": 0.0142, "step": 10380 }, { "epoch": 8.11, "learning_rate": 1.8037214811862169e-06, "loss": 0.0111, "step": 10381 }, { "epoch": 8.11, "learning_rate": 1.8022709888451727e-06, "loss": 0.0313, "step": 10382 }, { "epoch": 8.11, "learning_rate": 1.8008210221941668e-06, "loss": 0.0226, "step": 10383 }, { "epoch": 8.12, "learning_rate": 1.799371581326178e-06, "loss": 0.0382, "step": 10384 }, { "epoch": 8.12, "learning_rate": 1.7979226663341588e-06, "loss": 0.0505, "step": 10385 }, { "epoch": 8.12, "learning_rate": 1.796474277311021e-06, "loss": 0.035, "step": 10386 }, { "epoch": 8.12, "learning_rate": 1.7950264143496388e-06, "loss": 0.0318, "step": 10387 }, { "epoch": 8.12, "learning_rate": 1.7935790775428652e-06, "loss": 0.015, "step": 10388 }, { "epoch": 8.12, "learning_rate": 1.7921322669835106e-06, "loss": 0.0203, "step": 10389 }, { "epoch": 8.12, "learning_rate": 1.790685982764353e-06, "loss": 0.0324, "step": 10390 }, { "epoch": 8.12, "learning_rate": 1.789240224978137e-06, "loss": 0.0349, "step": 10391 }, { "epoch": 8.12, "learning_rate": 1.787794993717573e-06, "loss": 0.05, "step": 10392 }, { "epoch": 8.12, "learning_rate": 1.7863502890753403e-06, "loss": 0.0115, "step": 10393 }, { "epoch": 8.12, "learning_rate": 1.7849061111440825e-06, "loss": 0.0278, "step": 10394 }, { "epoch": 8.12, "learning_rate": 1.7834624600164042e-06, "loss": 0.0451, "step": 10395 }, { "epoch": 8.13, "learning_rate": 1.782019335784889e-06, "loss": 0.0344, "step": 10396 }, { "epoch": 8.13, "learning_rate": 1.7805767385420757e-06, "loss": 0.0427, "step": 10397 }, { "epoch": 8.13, "learning_rate": 1.7791346683804745e-06, "loss": 0.0137, "step": 10398 }, { "epoch": 8.13, "learning_rate": 1.7776931253925545e-06, "loss": 0.0315, "step": 10399 }, { "epoch": 8.13, "learning_rate": 1.7762521096707619e-06, "loss": 0.0609, "step": 10400 }, { "epoch": 8.13, "learning_rate": 1.7748116213075018e-06, "loss": 0.024, "step": 10401 }, { "epoch": 8.13, "learning_rate": 1.773371660395149e-06, "loss": 0.0105, "step": 10402 }, { "epoch": 8.13, "learning_rate": 1.7719322270260397e-06, "loss": 0.0254, "step": 10403 }, { "epoch": 8.13, "learning_rate": 1.7704933212924824e-06, "loss": 0.011, "step": 10404 }, { "epoch": 8.13, "learning_rate": 1.7690549432867476e-06, "loss": 0.0226, "step": 10405 }, { "epoch": 8.13, "learning_rate": 1.7676170931010738e-06, "loss": 0.0279, "step": 10406 }, { "epoch": 8.13, "learning_rate": 1.7661797708276617e-06, "loss": 0.042, "step": 10407 }, { "epoch": 8.13, "learning_rate": 1.7647429765586865e-06, "loss": 0.0152, "step": 10408 }, { "epoch": 8.14, "learning_rate": 1.7633067103862844e-06, "loss": 0.0292, "step": 10409 }, { "epoch": 8.14, "learning_rate": 1.7618709724025517e-06, "loss": 0.0459, "step": 10410 }, { "epoch": 8.14, "learning_rate": 1.7604357626995626e-06, "loss": 0.041, "step": 10411 }, { "epoch": 8.14, "learning_rate": 1.7590010813693504e-06, "loss": 0.0182, "step": 10412 }, { "epoch": 8.14, "learning_rate": 1.7575669285039154e-06, "loss": 0.0449, "step": 10413 }, { "epoch": 8.14, "learning_rate": 1.7561333041952245e-06, "loss": 0.0195, "step": 10414 }, { "epoch": 8.14, "learning_rate": 1.7547002085352116e-06, "loss": 0.0156, "step": 10415 }, { "epoch": 8.14, "learning_rate": 1.7532676416157746e-06, "loss": 0.0493, "step": 10416 }, { "epoch": 8.14, "learning_rate": 1.7518356035287798e-06, "loss": 0.0248, "step": 10417 }, { "epoch": 8.14, "learning_rate": 1.7504040943660561e-06, "loss": 0.0216, "step": 10418 }, { "epoch": 8.14, "learning_rate": 1.7489731142194044e-06, "loss": 0.0392, "step": 10419 }, { "epoch": 8.14, "learning_rate": 1.7475426631805892e-06, "loss": 0.0633, "step": 10420 }, { "epoch": 8.14, "learning_rate": 1.7461127413413348e-06, "loss": 0.0101, "step": 10421 }, { "epoch": 8.15, "learning_rate": 1.7446833487933367e-06, "loss": 0.0172, "step": 10422 }, { "epoch": 8.15, "learning_rate": 1.743254485628263e-06, "loss": 0.0373, "step": 10423 }, { "epoch": 8.15, "learning_rate": 1.7418261519377367e-06, "loss": 0.029, "step": 10424 }, { "epoch": 8.15, "learning_rate": 1.7403983478133523e-06, "loss": 0.0533, "step": 10425 }, { "epoch": 8.15, "learning_rate": 1.7389710733466702e-06, "loss": 0.0601, "step": 10426 }, { "epoch": 8.15, "learning_rate": 1.737544328629216e-06, "loss": 0.0507, "step": 10427 }, { "epoch": 8.15, "learning_rate": 1.7361181137524818e-06, "loss": 0.0408, "step": 10428 }, { "epoch": 8.15, "learning_rate": 1.7346924288079237e-06, "loss": 0.0194, "step": 10429 }, { "epoch": 8.15, "learning_rate": 1.733267273886966e-06, "loss": 0.0218, "step": 10430 }, { "epoch": 8.15, "learning_rate": 1.7318426490810014e-06, "loss": 0.0137, "step": 10431 }, { "epoch": 8.15, "learning_rate": 1.7304185544813868e-06, "loss": 0.0183, "step": 10432 }, { "epoch": 8.15, "learning_rate": 1.7289949901794356e-06, "loss": 0.0353, "step": 10433 }, { "epoch": 8.15, "learning_rate": 1.7275719562664451e-06, "loss": 0.0642, "step": 10434 }, { "epoch": 8.16, "learning_rate": 1.7261494528336654e-06, "loss": 0.0266, "step": 10435 }, { "epoch": 8.16, "learning_rate": 1.7247274799723157e-06, "loss": 0.0175, "step": 10436 }, { "epoch": 8.16, "learning_rate": 1.7233060377735834e-06, "loss": 0.0519, "step": 10437 }, { "epoch": 8.16, "learning_rate": 1.7218851263286196e-06, "loss": 0.0604, "step": 10438 }, { "epoch": 8.16, "learning_rate": 1.7204647457285417e-06, "loss": 0.0132, "step": 10439 }, { "epoch": 8.16, "learning_rate": 1.7190448960644346e-06, "loss": 0.0206, "step": 10440 }, { "epoch": 8.16, "learning_rate": 1.7176255774273442e-06, "loss": 0.02, "step": 10441 }, { "epoch": 8.16, "learning_rate": 1.7162067899082924e-06, "loss": 0.0225, "step": 10442 }, { "epoch": 8.16, "learning_rate": 1.7147885335982595e-06, "loss": 0.0275, "step": 10443 }, { "epoch": 8.16, "learning_rate": 1.7133708085881883e-06, "loss": 0.0495, "step": 10444 }, { "epoch": 8.16, "learning_rate": 1.7119536149689942e-06, "loss": 0.0283, "step": 10445 }, { "epoch": 8.16, "learning_rate": 1.7105369528315585e-06, "loss": 0.022, "step": 10446 }, { "epoch": 8.16, "learning_rate": 1.7091208222667267e-06, "loss": 0.0278, "step": 10447 }, { "epoch": 8.17, "learning_rate": 1.707705223365308e-06, "loss": 0.0742, "step": 10448 }, { "epoch": 8.17, "learning_rate": 1.70629015621808e-06, "loss": 0.0201, "step": 10449 }, { "epoch": 8.17, "learning_rate": 1.7048756209157869e-06, "loss": 0.0404, "step": 10450 }, { "epoch": 8.17, "learning_rate": 1.703461617549137e-06, "loss": 0.0269, "step": 10451 }, { "epoch": 8.17, "learning_rate": 1.7020481462088045e-06, "loss": 0.0324, "step": 10452 }, { "epoch": 8.17, "learning_rate": 1.7006352069854283e-06, "loss": 0.0505, "step": 10453 }, { "epoch": 8.17, "learning_rate": 1.699222799969622e-06, "loss": 0.0137, "step": 10454 }, { "epoch": 8.17, "learning_rate": 1.6978109252519504e-06, "loss": 0.011, "step": 10455 }, { "epoch": 8.17, "learning_rate": 1.6963995829229541e-06, "loss": 0.0409, "step": 10456 }, { "epoch": 8.17, "learning_rate": 1.6949887730731363e-06, "loss": 0.0709, "step": 10457 }, { "epoch": 8.17, "learning_rate": 1.6935784957929713e-06, "loss": 0.0212, "step": 10458 }, { "epoch": 8.17, "learning_rate": 1.6921687511728912e-06, "loss": 0.0104, "step": 10459 }, { "epoch": 8.18, "learning_rate": 1.6907595393032982e-06, "loss": 0.0289, "step": 10460 }, { "epoch": 8.18, "learning_rate": 1.6893508602745612e-06, "loss": 0.021, "step": 10461 }, { "epoch": 8.18, "learning_rate": 1.6879427141770133e-06, "loss": 0.0292, "step": 10462 }, { "epoch": 8.18, "learning_rate": 1.6865351011009524e-06, "loss": 0.0378, "step": 10463 }, { "epoch": 8.18, "learning_rate": 1.6851280211366439e-06, "loss": 0.0364, "step": 10464 }, { "epoch": 8.18, "learning_rate": 1.6837214743743168e-06, "loss": 0.0267, "step": 10465 }, { "epoch": 8.18, "learning_rate": 1.6823154609041748e-06, "loss": 0.0727, "step": 10466 }, { "epoch": 8.18, "learning_rate": 1.6809099808163721e-06, "loss": 0.0129, "step": 10467 }, { "epoch": 8.18, "learning_rate": 1.6795050342010388e-06, "loss": 0.0297, "step": 10468 }, { "epoch": 8.18, "learning_rate": 1.678100621148273e-06, "loss": 0.0232, "step": 10469 }, { "epoch": 8.18, "learning_rate": 1.6766967417481316e-06, "loss": 0.0126, "step": 10470 }, { "epoch": 8.18, "learning_rate": 1.6752933960906425e-06, "loss": 0.0098, "step": 10471 }, { "epoch": 8.18, "learning_rate": 1.6738905842657904e-06, "loss": 0.0651, "step": 10472 }, { "epoch": 8.19, "learning_rate": 1.67248830636354e-06, "loss": 0.0172, "step": 10473 }, { "epoch": 8.19, "learning_rate": 1.6710865624738105e-06, "loss": 0.0154, "step": 10474 }, { "epoch": 8.19, "learning_rate": 1.669685352686492e-06, "loss": 0.0348, "step": 10475 }, { "epoch": 8.19, "learning_rate": 1.668284677091435e-06, "loss": 0.0369, "step": 10476 }, { "epoch": 8.19, "learning_rate": 1.666884535778469e-06, "loss": 0.0189, "step": 10477 }, { "epoch": 8.19, "learning_rate": 1.665484928837371e-06, "loss": 0.0145, "step": 10478 }, { "epoch": 8.19, "learning_rate": 1.6640858563578943e-06, "loss": 0.0089, "step": 10479 }, { "epoch": 8.19, "learning_rate": 1.6626873184297565e-06, "loss": 0.0379, "step": 10480 }, { "epoch": 8.19, "learning_rate": 1.6612893151426424e-06, "loss": 0.0456, "step": 10481 }, { "epoch": 8.19, "learning_rate": 1.6598918465862013e-06, "loss": 0.0393, "step": 10482 }, { "epoch": 8.19, "learning_rate": 1.6584949128500448e-06, "loss": 0.0096, "step": 10483 }, { "epoch": 8.19, "learning_rate": 1.657098514023755e-06, "loss": 0.0234, "step": 10484 }, { "epoch": 8.19, "learning_rate": 1.6557026501968775e-06, "loss": 0.0103, "step": 10485 }, { "epoch": 8.2, "learning_rate": 1.6543073214589245e-06, "loss": 0.0525, "step": 10486 }, { "epoch": 8.2, "learning_rate": 1.6529125278993719e-06, "loss": 0.0251, "step": 10487 }, { "epoch": 8.2, "learning_rate": 1.6515182696076605e-06, "loss": 0.035, "step": 10488 }, { "epoch": 8.2, "learning_rate": 1.6501245466732063e-06, "loss": 0.0382, "step": 10489 }, { "epoch": 8.2, "learning_rate": 1.648731359185377e-06, "loss": 0.0328, "step": 10490 }, { "epoch": 8.2, "learning_rate": 1.6473387072335124e-06, "loss": 0.0106, "step": 10491 }, { "epoch": 8.2, "learning_rate": 1.6459465909069228e-06, "loss": 0.0298, "step": 10492 }, { "epoch": 8.2, "learning_rate": 1.644555010294876e-06, "loss": 0.0233, "step": 10493 }, { "epoch": 8.2, "learning_rate": 1.6431639654866126e-06, "loss": 0.015, "step": 10494 }, { "epoch": 8.2, "learning_rate": 1.6417734565713283e-06, "loss": 0.0266, "step": 10495 }, { "epoch": 8.2, "learning_rate": 1.640383483638197e-06, "loss": 0.0562, "step": 10496 }, { "epoch": 8.2, "learning_rate": 1.638994046776351e-06, "loss": 0.0215, "step": 10497 }, { "epoch": 8.2, "learning_rate": 1.6376051460748898e-06, "loss": 0.0186, "step": 10498 }, { "epoch": 8.21, "learning_rate": 1.6362167816228768e-06, "loss": 0.0241, "step": 10499 }, { "epoch": 8.21, "learning_rate": 1.6348289535093475e-06, "loss": 0.0304, "step": 10500 }, { "epoch": 8.21, "learning_rate": 1.633441661823294e-06, "loss": 0.0499, "step": 10501 }, { "epoch": 8.21, "learning_rate": 1.6320549066536773e-06, "loss": 0.0476, "step": 10502 }, { "epoch": 8.21, "learning_rate": 1.6306686880894251e-06, "loss": 0.0163, "step": 10503 }, { "epoch": 8.21, "learning_rate": 1.6292830062194353e-06, "loss": 0.0452, "step": 10504 }, { "epoch": 8.21, "learning_rate": 1.6278978611325624e-06, "loss": 0.0181, "step": 10505 }, { "epoch": 8.21, "learning_rate": 1.6265132529176308e-06, "loss": 0.0092, "step": 10506 }, { "epoch": 8.21, "learning_rate": 1.6251291816634318e-06, "loss": 0.0219, "step": 10507 }, { "epoch": 8.21, "learning_rate": 1.623745647458721e-06, "loss": 0.0578, "step": 10508 }, { "epoch": 8.21, "learning_rate": 1.6223626503922162e-06, "loss": 0.0208, "step": 10509 }, { "epoch": 8.21, "learning_rate": 1.6209801905526078e-06, "loss": 0.0219, "step": 10510 }, { "epoch": 8.21, "learning_rate": 1.619598268028545e-06, "loss": 0.0251, "step": 10511 }, { "epoch": 8.22, "learning_rate": 1.6182168829086464e-06, "loss": 0.0472, "step": 10512 }, { "epoch": 8.22, "learning_rate": 1.616836035281496e-06, "loss": 0.0427, "step": 10513 }, { "epoch": 8.22, "learning_rate": 1.6154557252356385e-06, "loss": 0.0115, "step": 10514 }, { "epoch": 8.22, "learning_rate": 1.614075952859593e-06, "loss": 0.015, "step": 10515 }, { "epoch": 8.22, "learning_rate": 1.6126967182418385e-06, "loss": 0.0255, "step": 10516 }, { "epoch": 8.22, "learning_rate": 1.6113180214708213e-06, "loss": 0.0407, "step": 10517 }, { "epoch": 8.22, "learning_rate": 1.6099398626349437e-06, "loss": 0.0202, "step": 10518 }, { "epoch": 8.22, "learning_rate": 1.608562241822592e-06, "loss": 0.0119, "step": 10519 }, { "epoch": 8.22, "learning_rate": 1.6071851591221033e-06, "loss": 0.0421, "step": 10520 }, { "epoch": 8.22, "learning_rate": 1.6058086146217866e-06, "loss": 0.0398, "step": 10521 }, { "epoch": 8.22, "learning_rate": 1.6044326084099105e-06, "loss": 0.0391, "step": 10522 }, { "epoch": 8.22, "learning_rate": 1.6030571405747209e-06, "loss": 0.0278, "step": 10523 }, { "epoch": 8.23, "learning_rate": 1.6016822112044138e-06, "loss": 0.0286, "step": 10524 }, { "epoch": 8.23, "learning_rate": 1.6003078203871624e-06, "loss": 0.0392, "step": 10525 }, { "epoch": 8.23, "learning_rate": 1.5989339682110982e-06, "loss": 0.0314, "step": 10526 }, { "epoch": 8.23, "learning_rate": 1.5975606547643252e-06, "loss": 0.0376, "step": 10527 }, { "epoch": 8.23, "learning_rate": 1.5961878801349084e-06, "loss": 0.0368, "step": 10528 }, { "epoch": 8.23, "learning_rate": 1.5948156444108764e-06, "loss": 0.041, "step": 10529 }, { "epoch": 8.23, "learning_rate": 1.5934439476802233e-06, "loss": 0.0139, "step": 10530 }, { "epoch": 8.23, "learning_rate": 1.5920727900309175e-06, "loss": 0.0689, "step": 10531 }, { "epoch": 8.23, "learning_rate": 1.5907021715508819e-06, "loss": 0.031, "step": 10532 }, { "epoch": 8.23, "learning_rate": 1.5893320923280109e-06, "loss": 0.0143, "step": 10533 }, { "epoch": 8.23, "learning_rate": 1.5879625524501618e-06, "loss": 0.0482, "step": 10534 }, { "epoch": 8.23, "learning_rate": 1.586593552005159e-06, "loss": 0.0099, "step": 10535 }, { "epoch": 8.23, "learning_rate": 1.585225091080791e-06, "loss": 0.0277, "step": 10536 }, { "epoch": 8.24, "learning_rate": 1.5838571697648108e-06, "loss": 0.047, "step": 10537 }, { "epoch": 8.24, "learning_rate": 1.5824897881449374e-06, "loss": 0.0367, "step": 10538 }, { "epoch": 8.24, "learning_rate": 1.5811229463088596e-06, "loss": 0.0147, "step": 10539 }, { "epoch": 8.24, "learning_rate": 1.579756644344229e-06, "loss": 0.0108, "step": 10540 }, { "epoch": 8.24, "learning_rate": 1.5783908823386541e-06, "loss": 0.0632, "step": 10541 }, { "epoch": 8.24, "learning_rate": 1.577025660379722e-06, "loss": 0.0555, "step": 10542 }, { "epoch": 8.24, "learning_rate": 1.5756609785549782e-06, "loss": 0.0229, "step": 10543 }, { "epoch": 8.24, "learning_rate": 1.574296836951935e-06, "loss": 0.0214, "step": 10544 }, { "epoch": 8.24, "learning_rate": 1.5729332356580683e-06, "loss": 0.0427, "step": 10545 }, { "epoch": 8.24, "learning_rate": 1.5715701747608236e-06, "loss": 0.0212, "step": 10546 }, { "epoch": 8.24, "learning_rate": 1.5702076543476052e-06, "loss": 0.0588, "step": 10547 }, { "epoch": 8.24, "learning_rate": 1.568845674505789e-06, "loss": 0.0259, "step": 10548 }, { "epoch": 8.24, "learning_rate": 1.5674842353227104e-06, "loss": 0.0215, "step": 10549 }, { "epoch": 8.25, "learning_rate": 1.5661233368856798e-06, "loss": 0.0389, "step": 10550 }, { "epoch": 8.25, "learning_rate": 1.564762979281964e-06, "loss": 0.0223, "step": 10551 }, { "epoch": 8.25, "learning_rate": 1.5634031625987944e-06, "loss": 0.03, "step": 10552 }, { "epoch": 8.25, "learning_rate": 1.5620438869233711e-06, "loss": 0.0177, "step": 10553 }, { "epoch": 8.25, "learning_rate": 1.5606851523428646e-06, "loss": 0.0286, "step": 10554 }, { "epoch": 8.25, "learning_rate": 1.5593269589444015e-06, "loss": 0.0464, "step": 10555 }, { "epoch": 8.25, "learning_rate": 1.5579693068150793e-06, "loss": 0.0446, "step": 10556 }, { "epoch": 8.25, "learning_rate": 1.5566121960419589e-06, "loss": 0.0155, "step": 10557 }, { "epoch": 8.25, "learning_rate": 1.5552556267120656e-06, "loss": 0.013, "step": 10558 }, { "epoch": 8.25, "learning_rate": 1.5538995989123929e-06, "loss": 0.0175, "step": 10559 }, { "epoch": 8.25, "learning_rate": 1.5525441127298969e-06, "loss": 0.0241, "step": 10560 }, { "epoch": 8.25, "learning_rate": 1.5511891682514967e-06, "loss": 0.0105, "step": 10561 }, { "epoch": 8.25, "learning_rate": 1.5498347655640866e-06, "loss": 0.0248, "step": 10562 }, { "epoch": 8.26, "learning_rate": 1.548480904754518e-06, "loss": 0.0304, "step": 10563 }, { "epoch": 8.26, "learning_rate": 1.5471275859096024e-06, "loss": 0.0169, "step": 10564 }, { "epoch": 8.26, "learning_rate": 1.5457748091161306e-06, "loss": 0.0241, "step": 10565 }, { "epoch": 8.26, "learning_rate": 1.5444225744608487e-06, "loss": 0.0624, "step": 10566 }, { "epoch": 8.26, "learning_rate": 1.5430708820304686e-06, "loss": 0.0169, "step": 10567 }, { "epoch": 8.26, "learning_rate": 1.5417197319116729e-06, "loss": 0.0355, "step": 10568 }, { "epoch": 8.26, "learning_rate": 1.5403691241911022e-06, "loss": 0.029, "step": 10569 }, { "epoch": 8.26, "learning_rate": 1.5390190589553677e-06, "loss": 0.0082, "step": 10570 }, { "epoch": 8.26, "learning_rate": 1.5376695362910453e-06, "loss": 0.0389, "step": 10571 }, { "epoch": 8.26, "learning_rate": 1.5363205562846707e-06, "loss": 0.061, "step": 10572 }, { "epoch": 8.26, "learning_rate": 1.534972119022754e-06, "loss": 0.0112, "step": 10573 }, { "epoch": 8.26, "learning_rate": 1.5336242245917654e-06, "loss": 0.0383, "step": 10574 }, { "epoch": 8.26, "learning_rate": 1.5322768730781369e-06, "loss": 0.0342, "step": 10575 }, { "epoch": 8.27, "learning_rate": 1.5309300645682679e-06, "loss": 0.0837, "step": 10576 }, { "epoch": 8.27, "learning_rate": 1.5295837991485284e-06, "loss": 0.0231, "step": 10577 }, { "epoch": 8.27, "learning_rate": 1.5282380769052496e-06, "loss": 0.0283, "step": 10578 }, { "epoch": 8.27, "learning_rate": 1.5268928979247255e-06, "loss": 0.0318, "step": 10579 }, { "epoch": 8.27, "learning_rate": 1.5255482622932182e-06, "loss": 0.0364, "step": 10580 }, { "epoch": 8.27, "learning_rate": 1.5242041700969533e-06, "loss": 0.0344, "step": 10581 }, { "epoch": 8.27, "learning_rate": 1.5228606214221242e-06, "loss": 0.0188, "step": 10582 }, { "epoch": 8.27, "learning_rate": 1.5215176163548872e-06, "loss": 0.0269, "step": 10583 }, { "epoch": 8.27, "learning_rate": 1.5201751549813605e-06, "loss": 0.0472, "step": 10584 }, { "epoch": 8.27, "learning_rate": 1.518833237387639e-06, "loss": 0.019, "step": 10585 }, { "epoch": 8.27, "learning_rate": 1.517491863659769e-06, "loss": 0.0081, "step": 10586 }, { "epoch": 8.27, "learning_rate": 1.516151033883766e-06, "loss": 0.0346, "step": 10587 }, { "epoch": 8.28, "learning_rate": 1.5148107481456186e-06, "loss": 0.0087, "step": 10588 }, { "epoch": 8.28, "learning_rate": 1.5134710065312708e-06, "loss": 0.0174, "step": 10589 }, { "epoch": 8.28, "learning_rate": 1.512131809126637e-06, "loss": 0.0172, "step": 10590 }, { "epoch": 8.28, "learning_rate": 1.5107931560175937e-06, "loss": 0.0314, "step": 10591 }, { "epoch": 8.28, "learning_rate": 1.5094550472899828e-06, "loss": 0.0163, "step": 10592 }, { "epoch": 8.28, "learning_rate": 1.5081174830296142e-06, "loss": 0.0902, "step": 10593 }, { "epoch": 8.28, "learning_rate": 1.5067804633222605e-06, "loss": 0.0146, "step": 10594 }, { "epoch": 8.28, "learning_rate": 1.5054439882536564e-06, "loss": 0.0131, "step": 10595 }, { "epoch": 8.28, "learning_rate": 1.5041080579095114e-06, "loss": 0.0097, "step": 10596 }, { "epoch": 8.28, "learning_rate": 1.502772672375492e-06, "loss": 0.0349, "step": 10597 }, { "epoch": 8.28, "learning_rate": 1.5014378317372292e-06, "loss": 0.0143, "step": 10598 }, { "epoch": 8.28, "learning_rate": 1.5001035360803184e-06, "loss": 0.0539, "step": 10599 }, { "epoch": 8.28, "learning_rate": 1.4987697854903306e-06, "loss": 0.0166, "step": 10600 }, { "epoch": 8.29, "learning_rate": 1.49743658005279e-06, "loss": 0.0374, "step": 10601 }, { "epoch": 8.29, "learning_rate": 1.4961039198531925e-06, "loss": 0.0192, "step": 10602 }, { "epoch": 8.29, "learning_rate": 1.494771804976991e-06, "loss": 0.0122, "step": 10603 }, { "epoch": 8.29, "learning_rate": 1.4934402355096156e-06, "loss": 0.0392, "step": 10604 }, { "epoch": 8.29, "learning_rate": 1.4921092115364522e-06, "loss": 0.033, "step": 10605 }, { "epoch": 8.29, "learning_rate": 1.4907787331428547e-06, "loss": 0.0684, "step": 10606 }, { "epoch": 8.29, "learning_rate": 1.4894488004141395e-06, "loss": 0.0112, "step": 10607 }, { "epoch": 8.29, "learning_rate": 1.4881194134355958e-06, "loss": 0.0296, "step": 10608 }, { "epoch": 8.29, "learning_rate": 1.486790572292468e-06, "loss": 0.0235, "step": 10609 }, { "epoch": 8.29, "learning_rate": 1.48546227706997e-06, "loss": 0.0324, "step": 10610 }, { "epoch": 8.29, "learning_rate": 1.4841345278532793e-06, "loss": 0.0235, "step": 10611 }, { "epoch": 8.29, "learning_rate": 1.4828073247275432e-06, "loss": 0.021, "step": 10612 }, { "epoch": 8.29, "learning_rate": 1.4814806677778693e-06, "loss": 0.0231, "step": 10613 }, { "epoch": 8.3, "learning_rate": 1.4801545570893294e-06, "loss": 0.0554, "step": 10614 }, { "epoch": 8.3, "learning_rate": 1.4788289927469645e-06, "loss": 0.0293, "step": 10615 }, { "epoch": 8.3, "learning_rate": 1.4775039748357755e-06, "loss": 0.0257, "step": 10616 }, { "epoch": 8.3, "learning_rate": 1.4761795034407322e-06, "loss": 0.011, "step": 10617 }, { "epoch": 8.3, "learning_rate": 1.4748555786467688e-06, "loss": 0.0174, "step": 10618 }, { "epoch": 8.3, "learning_rate": 1.473532200538782e-06, "loss": 0.0282, "step": 10619 }, { "epoch": 8.3, "learning_rate": 1.4722093692016358e-06, "loss": 0.019, "step": 10620 }, { "epoch": 8.3, "learning_rate": 1.4708870847201596e-06, "loss": 0.0514, "step": 10621 }, { "epoch": 8.3, "learning_rate": 1.4695653471791437e-06, "loss": 0.0277, "step": 10622 }, { "epoch": 8.3, "learning_rate": 1.4682441566633499e-06, "loss": 0.0242, "step": 10623 }, { "epoch": 8.3, "learning_rate": 1.4669235132574988e-06, "loss": 0.0206, "step": 10624 }, { "epoch": 8.3, "learning_rate": 1.465603417046283e-06, "loss": 0.0145, "step": 10625 }, { "epoch": 8.3, "learning_rate": 1.4642838681143457e-06, "loss": 0.039, "step": 10626 }, { "epoch": 8.31, "learning_rate": 1.4629648665463136e-06, "loss": 0.0135, "step": 10627 }, { "epoch": 8.31, "learning_rate": 1.461646412426766e-06, "loss": 0.0298, "step": 10628 }, { "epoch": 8.31, "learning_rate": 1.4603285058402516e-06, "loss": 0.0217, "step": 10629 }, { "epoch": 8.31, "learning_rate": 1.4590111468712798e-06, "loss": 0.0549, "step": 10630 }, { "epoch": 8.31, "learning_rate": 1.4576943356043338e-06, "loss": 0.0228, "step": 10631 }, { "epoch": 8.31, "learning_rate": 1.4563780721238508e-06, "loss": 0.0172, "step": 10632 }, { "epoch": 8.31, "learning_rate": 1.4550623565142397e-06, "loss": 0.0253, "step": 10633 }, { "epoch": 8.31, "learning_rate": 1.4537471888598687e-06, "loss": 0.0241, "step": 10634 }, { "epoch": 8.31, "learning_rate": 1.4524325692450813e-06, "loss": 0.0558, "step": 10635 }, { "epoch": 8.31, "learning_rate": 1.451118497754176e-06, "loss": 0.0386, "step": 10636 }, { "epoch": 8.31, "learning_rate": 1.4498049744714193e-06, "loss": 0.0585, "step": 10637 }, { "epoch": 8.31, "learning_rate": 1.4484919994810432e-06, "loss": 0.0624, "step": 10638 }, { "epoch": 8.31, "learning_rate": 1.4471795728672422e-06, "loss": 0.0232, "step": 10639 }, { "epoch": 8.32, "learning_rate": 1.4458676947141791e-06, "loss": 0.0221, "step": 10640 }, { "epoch": 8.32, "learning_rate": 1.444556365105979e-06, "loss": 0.0277, "step": 10641 }, { "epoch": 8.32, "learning_rate": 1.4432455841267335e-06, "loss": 0.0154, "step": 10642 }, { "epoch": 8.32, "learning_rate": 1.4419353518604973e-06, "loss": 0.0508, "step": 10643 }, { "epoch": 8.32, "learning_rate": 1.440625668391291e-06, "loss": 0.0255, "step": 10644 }, { "epoch": 8.32, "learning_rate": 1.4393165338030968e-06, "loss": 0.0188, "step": 10645 }, { "epoch": 8.32, "learning_rate": 1.4380079481798703e-06, "loss": 0.0271, "step": 10646 }, { "epoch": 8.32, "learning_rate": 1.4366999116055237e-06, "loss": 0.035, "step": 10647 }, { "epoch": 8.32, "learning_rate": 1.4353924241639382e-06, "loss": 0.0363, "step": 10648 }, { "epoch": 8.32, "learning_rate": 1.434085485938953e-06, "loss": 0.0147, "step": 10649 }, { "epoch": 8.32, "learning_rate": 1.4327790970143817e-06, "loss": 0.1099, "step": 10650 }, { "epoch": 8.32, "learning_rate": 1.4314732574739965e-06, "loss": 0.061, "step": 10651 }, { "epoch": 8.33, "learning_rate": 1.430167967401538e-06, "loss": 0.0539, "step": 10652 }, { "epoch": 8.33, "learning_rate": 1.4288632268807046e-06, "loss": 0.0139, "step": 10653 }, { "epoch": 8.33, "learning_rate": 1.427559035995173e-06, "loss": 0.0119, "step": 10654 }, { "epoch": 8.33, "learning_rate": 1.4262553948285695e-06, "loss": 0.0275, "step": 10655 }, { "epoch": 8.33, "learning_rate": 1.424952303464493e-06, "loss": 0.0252, "step": 10656 }, { "epoch": 8.33, "learning_rate": 1.4236497619865041e-06, "loss": 0.0199, "step": 10657 }, { "epoch": 8.33, "learning_rate": 1.4223477704781353e-06, "loss": 0.0473, "step": 10658 }, { "epoch": 8.33, "learning_rate": 1.4210463290228771e-06, "loss": 0.044, "step": 10659 }, { "epoch": 8.33, "learning_rate": 1.4197454377041797e-06, "loss": 0.0113, "step": 10660 }, { "epoch": 8.33, "learning_rate": 1.418445096605472e-06, "loss": 0.0216, "step": 10661 }, { "epoch": 8.33, "learning_rate": 1.417145305810137e-06, "loss": 0.0257, "step": 10662 }, { "epoch": 8.33, "learning_rate": 1.415846065401526e-06, "loss": 0.0275, "step": 10663 }, { "epoch": 8.33, "learning_rate": 1.4145473754629545e-06, "loss": 0.0626, "step": 10664 }, { "epoch": 8.34, "learning_rate": 1.4132492360777017e-06, "loss": 0.0209, "step": 10665 }, { "epoch": 8.34, "learning_rate": 1.411951647329014e-06, "loss": 0.0208, "step": 10666 }, { "epoch": 8.34, "learning_rate": 1.4106546093001005e-06, "loss": 0.0173, "step": 10667 }, { "epoch": 8.34, "learning_rate": 1.4093581220741327e-06, "loss": 0.0108, "step": 10668 }, { "epoch": 8.34, "learning_rate": 1.4080621857342535e-06, "loss": 0.028, "step": 10669 }, { "epoch": 8.34, "learning_rate": 1.4067668003635659e-06, "loss": 0.025, "step": 10670 }, { "epoch": 8.34, "learning_rate": 1.405471966045139e-06, "loss": 0.0567, "step": 10671 }, { "epoch": 8.34, "learning_rate": 1.4041776828619991e-06, "loss": 0.0307, "step": 10672 }, { "epoch": 8.34, "learning_rate": 1.4028839508971514e-06, "loss": 0.0144, "step": 10673 }, { "epoch": 8.34, "learning_rate": 1.4015907702335562e-06, "loss": 0.0244, "step": 10674 }, { "epoch": 8.34, "learning_rate": 1.4002981409541383e-06, "loss": 0.0425, "step": 10675 }, { "epoch": 8.34, "learning_rate": 1.3990060631417911e-06, "loss": 0.0127, "step": 10676 }, { "epoch": 8.34, "learning_rate": 1.397714536879371e-06, "loss": 0.05, "step": 10677 }, { "epoch": 8.35, "learning_rate": 1.3964235622496969e-06, "loss": 0.0282, "step": 10678 }, { "epoch": 8.35, "learning_rate": 1.3951331393355562e-06, "loss": 0.0223, "step": 10679 }, { "epoch": 8.35, "learning_rate": 1.3938432682196957e-06, "loss": 0.018, "step": 10680 }, { "epoch": 8.35, "learning_rate": 1.392553948984835e-06, "loss": 0.0283, "step": 10681 }, { "epoch": 8.35, "learning_rate": 1.3912651817136525e-06, "loss": 0.0455, "step": 10682 }, { "epoch": 8.35, "learning_rate": 1.3899769664887885e-06, "loss": 0.0354, "step": 10683 }, { "epoch": 8.35, "learning_rate": 1.3886893033928505e-06, "loss": 0.043, "step": 10684 }, { "epoch": 8.35, "learning_rate": 1.387402192508418e-06, "loss": 0.0165, "step": 10685 }, { "epoch": 8.35, "learning_rate": 1.3861156339180236e-06, "loss": 0.0457, "step": 10686 }, { "epoch": 8.35, "learning_rate": 1.3848296277041718e-06, "loss": 0.0765, "step": 10687 }, { "epoch": 8.35, "learning_rate": 1.3835441739493293e-06, "loss": 0.0098, "step": 10688 }, { "epoch": 8.35, "learning_rate": 1.3822592727359263e-06, "loss": 0.046, "step": 10689 }, { "epoch": 8.35, "learning_rate": 1.38097492414636e-06, "loss": 0.0575, "step": 10690 }, { "epoch": 8.36, "learning_rate": 1.3796911282629899e-06, "loss": 0.0614, "step": 10691 }, { "epoch": 8.36, "learning_rate": 1.378407885168138e-06, "loss": 0.0193, "step": 10692 }, { "epoch": 8.36, "learning_rate": 1.3771251949441034e-06, "loss": 0.0173, "step": 10693 }, { "epoch": 8.36, "learning_rate": 1.3758430576731318e-06, "loss": 0.0385, "step": 10694 }, { "epoch": 8.36, "learning_rate": 1.3745614734374424e-06, "loss": 0.0357, "step": 10695 }, { "epoch": 8.36, "learning_rate": 1.3732804423192226e-06, "loss": 0.0131, "step": 10696 }, { "epoch": 8.36, "learning_rate": 1.3719999644006176e-06, "loss": 0.0452, "step": 10697 }, { "epoch": 8.36, "learning_rate": 1.37072003976374e-06, "loss": 0.0105, "step": 10698 }, { "epoch": 8.36, "learning_rate": 1.369440668490667e-06, "loss": 0.035, "step": 10699 }, { "epoch": 8.36, "learning_rate": 1.3681618506634398e-06, "loss": 0.0236, "step": 10700 }, { "epoch": 8.36, "learning_rate": 1.366883586364064e-06, "loss": 0.0283, "step": 10701 }, { "epoch": 8.36, "learning_rate": 1.3656058756745105e-06, "loss": 0.0306, "step": 10702 }, { "epoch": 8.36, "learning_rate": 1.3643287186767118e-06, "loss": 0.0567, "step": 10703 }, { "epoch": 8.37, "learning_rate": 1.3630521154525711e-06, "loss": 0.0208, "step": 10704 }, { "epoch": 8.37, "learning_rate": 1.361776066083953e-06, "loss": 0.0654, "step": 10705 }, { "epoch": 8.37, "learning_rate": 1.360500570652681e-06, "loss": 0.0091, "step": 10706 }, { "epoch": 8.37, "learning_rate": 1.359225629240547e-06, "loss": 0.023, "step": 10707 }, { "epoch": 8.37, "learning_rate": 1.357951241929315e-06, "loss": 0.0393, "step": 10708 }, { "epoch": 8.37, "learning_rate": 1.3566774088007018e-06, "loss": 0.0565, "step": 10709 }, { "epoch": 8.37, "learning_rate": 1.3554041299363962e-06, "loss": 0.0322, "step": 10710 }, { "epoch": 8.37, "learning_rate": 1.3541314054180465e-06, "loss": 0.1402, "step": 10711 }, { "epoch": 8.37, "learning_rate": 1.3528592353272696e-06, "loss": 0.0926, "step": 10712 }, { "epoch": 8.37, "learning_rate": 1.3515876197456446e-06, "loss": 0.0267, "step": 10713 }, { "epoch": 8.37, "learning_rate": 1.3503165587547152e-06, "loss": 0.0385, "step": 10714 }, { "epoch": 8.37, "learning_rate": 1.3490460524359872e-06, "loss": 0.0129, "step": 10715 }, { "epoch": 8.38, "learning_rate": 1.347776100870941e-06, "loss": 0.0231, "step": 10716 }, { "epoch": 8.38, "learning_rate": 1.3465067041410074e-06, "loss": 0.0274, "step": 10717 }, { "epoch": 8.38, "learning_rate": 1.3452378623275874e-06, "loss": 0.0167, "step": 10718 }, { "epoch": 8.38, "learning_rate": 1.3439695755120518e-06, "loss": 0.0296, "step": 10719 }, { "epoch": 8.38, "learning_rate": 1.342701843775729e-06, "loss": 0.0165, "step": 10720 }, { "epoch": 8.38, "learning_rate": 1.3414346671999134e-06, "loss": 0.0281, "step": 10721 }, { "epoch": 8.38, "learning_rate": 1.340168045865865e-06, "loss": 0.0481, "step": 10722 }, { "epoch": 8.38, "learning_rate": 1.3389019798548065e-06, "loss": 0.0457, "step": 10723 }, { "epoch": 8.38, "learning_rate": 1.3376364692479282e-06, "loss": 0.0163, "step": 10724 }, { "epoch": 8.38, "learning_rate": 1.3363715141263812e-06, "loss": 0.042, "step": 10725 }, { "epoch": 8.38, "learning_rate": 1.3351071145712792e-06, "loss": 0.0309, "step": 10726 }, { "epoch": 8.38, "learning_rate": 1.3338432706637095e-06, "loss": 0.0634, "step": 10727 }, { "epoch": 8.38, "learning_rate": 1.3325799824847173e-06, "loss": 0.0266, "step": 10728 }, { "epoch": 8.39, "learning_rate": 1.3313172501153083e-06, "loss": 0.0746, "step": 10729 }, { "epoch": 8.39, "learning_rate": 1.3300550736364571e-06, "loss": 0.0248, "step": 10730 }, { "epoch": 8.39, "learning_rate": 1.3287934531291058e-06, "loss": 0.0597, "step": 10731 }, { "epoch": 8.39, "learning_rate": 1.3275323886741554e-06, "loss": 0.0548, "step": 10732 }, { "epoch": 8.39, "learning_rate": 1.3262718803524743e-06, "loss": 0.0348, "step": 10733 }, { "epoch": 8.39, "learning_rate": 1.3250119282448936e-06, "loss": 0.0252, "step": 10734 }, { "epoch": 8.39, "learning_rate": 1.3237525324322086e-06, "loss": 0.012, "step": 10735 }, { "epoch": 8.39, "learning_rate": 1.3224936929951816e-06, "loss": 0.0193, "step": 10736 }, { "epoch": 8.39, "learning_rate": 1.3212354100145352e-06, "loss": 0.0101, "step": 10737 }, { "epoch": 8.39, "learning_rate": 1.3199776835709577e-06, "loss": 0.0201, "step": 10738 }, { "epoch": 8.39, "learning_rate": 1.3187205137451086e-06, "loss": 0.0276, "step": 10739 }, { "epoch": 8.39, "learning_rate": 1.3174639006175993e-06, "loss": 0.0291, "step": 10740 }, { "epoch": 8.39, "learning_rate": 1.3162078442690118e-06, "loss": 0.0153, "step": 10741 }, { "epoch": 8.4, "learning_rate": 1.314952344779895e-06, "loss": 0.0352, "step": 10742 }, { "epoch": 8.4, "learning_rate": 1.3136974022307603e-06, "loss": 0.0173, "step": 10743 }, { "epoch": 8.4, "learning_rate": 1.3124430167020797e-06, "loss": 0.0132, "step": 10744 }, { "epoch": 8.4, "learning_rate": 1.3111891882742934e-06, "loss": 0.0281, "step": 10745 }, { "epoch": 8.4, "learning_rate": 1.3099359170278048e-06, "loss": 0.0338, "step": 10746 }, { "epoch": 8.4, "learning_rate": 1.3086832030429819e-06, "loss": 0.0387, "step": 10747 }, { "epoch": 8.4, "learning_rate": 1.307431046400156e-06, "loss": 0.0186, "step": 10748 }, { "epoch": 8.4, "learning_rate": 1.3061794471796207e-06, "loss": 0.0528, "step": 10749 }, { "epoch": 8.4, "learning_rate": 1.304928405461645e-06, "loss": 0.0245, "step": 10750 }, { "epoch": 8.4, "learning_rate": 1.303677921326445e-06, "loss": 0.0438, "step": 10751 }, { "epoch": 8.4, "learning_rate": 1.3024279948542117e-06, "loss": 0.0384, "step": 10752 }, { "epoch": 8.4, "learning_rate": 1.301178626125098e-06, "loss": 0.008, "step": 10753 }, { "epoch": 8.4, "learning_rate": 1.299929815219224e-06, "loss": 0.0479, "step": 10754 }, { "epoch": 8.41, "learning_rate": 1.2986815622166693e-06, "loss": 0.0291, "step": 10755 }, { "epoch": 8.41, "learning_rate": 1.2974338671974806e-06, "loss": 0.019, "step": 10756 }, { "epoch": 8.41, "learning_rate": 1.2961867302416675e-06, "loss": 0.0431, "step": 10757 }, { "epoch": 8.41, "learning_rate": 1.2949401514292036e-06, "loss": 0.0568, "step": 10758 }, { "epoch": 8.41, "learning_rate": 1.2936941308400285e-06, "loss": 0.0301, "step": 10759 }, { "epoch": 8.41, "learning_rate": 1.292448668554045e-06, "loss": 0.0242, "step": 10760 }, { "epoch": 8.41, "learning_rate": 1.2912037646511167e-06, "loss": 0.0247, "step": 10761 }, { "epoch": 8.41, "learning_rate": 1.289959419211082e-06, "loss": 0.0414, "step": 10762 }, { "epoch": 8.41, "learning_rate": 1.2887156323137285e-06, "loss": 0.0247, "step": 10763 }, { "epoch": 8.41, "learning_rate": 1.2874724040388197e-06, "loss": 0.0353, "step": 10764 }, { "epoch": 8.41, "learning_rate": 1.2862297344660758e-06, "loss": 0.0239, "step": 10765 }, { "epoch": 8.41, "learning_rate": 1.284987623675189e-06, "loss": 0.0229, "step": 10766 }, { "epoch": 8.42, "learning_rate": 1.2837460717458117e-06, "loss": 0.0272, "step": 10767 }, { "epoch": 8.42, "learning_rate": 1.2825050787575532e-06, "loss": 0.0716, "step": 10768 }, { "epoch": 8.42, "learning_rate": 1.28126464479e-06, "loss": 0.0275, "step": 10769 }, { "epoch": 8.42, "learning_rate": 1.2800247699226952e-06, "loss": 0.0626, "step": 10770 }, { "epoch": 8.42, "learning_rate": 1.2787854542351464e-06, "loss": 0.0409, "step": 10771 }, { "epoch": 8.42, "learning_rate": 1.2775466978068274e-06, "loss": 0.0266, "step": 10772 }, { "epoch": 8.42, "learning_rate": 1.2763085007171748e-06, "loss": 0.0369, "step": 10773 }, { "epoch": 8.42, "learning_rate": 1.27507086304559e-06, "loss": 0.0243, "step": 10774 }, { "epoch": 8.42, "learning_rate": 1.2738337848714365e-06, "loss": 0.0432, "step": 10775 }, { "epoch": 8.42, "learning_rate": 1.2725972662740416e-06, "loss": 0.0467, "step": 10776 }, { "epoch": 8.42, "learning_rate": 1.2713613073327047e-06, "loss": 0.0496, "step": 10777 }, { "epoch": 8.42, "learning_rate": 1.2701259081266804e-06, "loss": 0.009, "step": 10778 }, { "epoch": 8.42, "learning_rate": 1.2688910687351918e-06, "loss": 0.0222, "step": 10779 }, { "epoch": 8.43, "learning_rate": 1.2676567892374191e-06, "loss": 0.0087, "step": 10780 }, { "epoch": 8.43, "learning_rate": 1.266423069712518e-06, "loss": 0.0293, "step": 10781 }, { "epoch": 8.43, "learning_rate": 1.2651899102395992e-06, "loss": 0.0309, "step": 10782 }, { "epoch": 8.43, "learning_rate": 1.263957310897742e-06, "loss": 0.0342, "step": 10783 }, { "epoch": 8.43, "learning_rate": 1.2627252717659887e-06, "loss": 0.0406, "step": 10784 }, { "epoch": 8.43, "learning_rate": 1.2614937929233452e-06, "loss": 0.0604, "step": 10785 }, { "epoch": 8.43, "learning_rate": 1.26026287444878e-06, "loss": 0.0101, "step": 10786 }, { "epoch": 8.43, "learning_rate": 1.2590325164212292e-06, "loss": 0.0377, "step": 10787 }, { "epoch": 8.43, "learning_rate": 1.2578027189195885e-06, "loss": 0.0254, "step": 10788 }, { "epoch": 8.43, "learning_rate": 1.2565734820227237e-06, "loss": 0.0256, "step": 10789 }, { "epoch": 8.43, "learning_rate": 1.255344805809463e-06, "loss": 0.0167, "step": 10790 }, { "epoch": 8.43, "learning_rate": 1.2541166903585888e-06, "loss": 0.0428, "step": 10791 }, { "epoch": 8.43, "learning_rate": 1.2528891357488627e-06, "loss": 0.0142, "step": 10792 }, { "epoch": 8.44, "learning_rate": 1.2516621420590002e-06, "loss": 0.0222, "step": 10793 }, { "epoch": 8.44, "learning_rate": 1.2504357093676855e-06, "loss": 0.0292, "step": 10794 }, { "epoch": 8.44, "learning_rate": 1.2492098377535644e-06, "loss": 0.028, "step": 10795 }, { "epoch": 8.44, "learning_rate": 1.2479845272952484e-06, "loss": 0.0114, "step": 10796 }, { "epoch": 8.44, "learning_rate": 1.2467597780713103e-06, "loss": 0.0693, "step": 10797 }, { "epoch": 8.44, "learning_rate": 1.2455355901602895e-06, "loss": 0.0307, "step": 10798 }, { "epoch": 8.44, "learning_rate": 1.2443119636406864e-06, "loss": 0.0227, "step": 10799 }, { "epoch": 8.44, "learning_rate": 1.2430888985909727e-06, "loss": 0.0115, "step": 10800 }, { "epoch": 8.44, "learning_rate": 1.241866395089576e-06, "loss": 0.0141, "step": 10801 }, { "epoch": 8.44, "learning_rate": 1.2406444532148942e-06, "loss": 0.046, "step": 10802 }, { "epoch": 8.44, "learning_rate": 1.2394230730452782e-06, "loss": 0.0303, "step": 10803 }, { "epoch": 8.44, "learning_rate": 1.2382022546590577e-06, "loss": 0.0249, "step": 10804 }, { "epoch": 8.44, "learning_rate": 1.2369819981345166e-06, "loss": 0.0091, "step": 10805 }, { "epoch": 8.45, "learning_rate": 1.2357623035499055e-06, "loss": 0.0296, "step": 10806 }, { "epoch": 8.45, "learning_rate": 1.2345431709834399e-06, "loss": 0.0185, "step": 10807 }, { "epoch": 8.45, "learning_rate": 1.2333246005132981e-06, "loss": 0.0389, "step": 10808 }, { "epoch": 8.45, "learning_rate": 1.2321065922176212e-06, "loss": 0.0463, "step": 10809 }, { "epoch": 8.45, "learning_rate": 1.2308891461745165e-06, "loss": 0.0187, "step": 10810 }, { "epoch": 8.45, "learning_rate": 1.2296722624620516e-06, "loss": 0.0293, "step": 10811 }, { "epoch": 8.45, "learning_rate": 1.2284559411582663e-06, "loss": 0.0597, "step": 10812 }, { "epoch": 8.45, "learning_rate": 1.2272401823411573e-06, "loss": 0.0224, "step": 10813 }, { "epoch": 8.45, "learning_rate": 1.2260249860886807e-06, "loss": 0.0273, "step": 10814 }, { "epoch": 8.45, "learning_rate": 1.2248103524787703e-06, "loss": 0.0402, "step": 10815 }, { "epoch": 8.45, "learning_rate": 1.2235962815893121e-06, "loss": 0.0107, "step": 10816 }, { "epoch": 8.45, "learning_rate": 1.2223827734981608e-06, "loss": 0.0252, "step": 10817 }, { "epoch": 8.45, "learning_rate": 1.2211698282831342e-06, "loss": 0.0499, "step": 10818 }, { "epoch": 8.46, "learning_rate": 1.219957446022013e-06, "loss": 0.0232, "step": 10819 }, { "epoch": 8.46, "learning_rate": 1.2187456267925446e-06, "loss": 0.0166, "step": 10820 }, { "epoch": 8.46, "learning_rate": 1.2175343706724374e-06, "loss": 0.0132, "step": 10821 }, { "epoch": 8.46, "learning_rate": 1.2163236777393627e-06, "loss": 0.0131, "step": 10822 }, { "epoch": 8.46, "learning_rate": 1.215113548070962e-06, "loss": 0.0115, "step": 10823 }, { "epoch": 8.46, "learning_rate": 1.2139039817448372e-06, "loss": 0.039, "step": 10824 }, { "epoch": 8.46, "learning_rate": 1.2126949788385478e-06, "loss": 0.0126, "step": 10825 }, { "epoch": 8.46, "learning_rate": 1.2114865394296227e-06, "loss": 0.0312, "step": 10826 }, { "epoch": 8.46, "learning_rate": 1.210278663595561e-06, "loss": 0.0309, "step": 10827 }, { "epoch": 8.46, "learning_rate": 1.209071351413814e-06, "loss": 0.0545, "step": 10828 }, { "epoch": 8.46, "learning_rate": 1.2078646029618047e-06, "loss": 0.0308, "step": 10829 }, { "epoch": 8.46, "learning_rate": 1.2066584183169171e-06, "loss": 0.0535, "step": 10830 }, { "epoch": 8.47, "learning_rate": 1.2054527975564978e-06, "loss": 0.0829, "step": 10831 }, { "epoch": 8.47, "learning_rate": 1.2042477407578611e-06, "loss": 0.0317, "step": 10832 }, { "epoch": 8.47, "learning_rate": 1.20304324799828e-06, "loss": 0.0361, "step": 10833 }, { "epoch": 8.47, "learning_rate": 1.2018393193549948e-06, "loss": 0.0226, "step": 10834 }, { "epoch": 8.47, "learning_rate": 1.2006359549052116e-06, "loss": 0.0311, "step": 10835 }, { "epoch": 8.47, "learning_rate": 1.1994331547260986e-06, "loss": 0.0215, "step": 10836 }, { "epoch": 8.47, "learning_rate": 1.1982309188947793e-06, "loss": 0.0177, "step": 10837 }, { "epoch": 8.47, "learning_rate": 1.197029247488356e-06, "loss": 0.0115, "step": 10838 }, { "epoch": 8.47, "learning_rate": 1.1958281405838857e-06, "loss": 0.0162, "step": 10839 }, { "epoch": 8.47, "learning_rate": 1.1946275982583888e-06, "loss": 0.0298, "step": 10840 }, { "epoch": 8.47, "learning_rate": 1.193427620588855e-06, "loss": 0.0111, "step": 10841 }, { "epoch": 8.47, "learning_rate": 1.1922282076522318e-06, "loss": 0.0428, "step": 10842 }, { "epoch": 8.47, "learning_rate": 1.191029359525434e-06, "loss": 0.0645, "step": 10843 }, { "epoch": 8.48, "learning_rate": 1.1898310762853392e-06, "loss": 0.0154, "step": 10844 }, { "epoch": 8.48, "learning_rate": 1.1886333580087895e-06, "loss": 0.0096, "step": 10845 }, { "epoch": 8.48, "learning_rate": 1.1874362047725873e-06, "loss": 0.0568, "step": 10846 }, { "epoch": 8.48, "learning_rate": 1.1862396166535072e-06, "loss": 0.0174, "step": 10847 }, { "epoch": 8.48, "learning_rate": 1.1850435937282767e-06, "loss": 0.0469, "step": 10848 }, { "epoch": 8.48, "learning_rate": 1.183848136073593e-06, "loss": 0.0161, "step": 10849 }, { "epoch": 8.48, "learning_rate": 1.1826532437661197e-06, "loss": 0.0308, "step": 10850 }, { "epoch": 8.48, "learning_rate": 1.1814589168824786e-06, "loss": 0.0299, "step": 10851 }, { "epoch": 8.48, "learning_rate": 1.1802651554992572e-06, "loss": 0.0275, "step": 10852 }, { "epoch": 8.48, "learning_rate": 1.1790719596930077e-06, "loss": 0.0191, "step": 10853 }, { "epoch": 8.48, "learning_rate": 1.1778793295402447e-06, "loss": 0.0375, "step": 10854 }, { "epoch": 8.48, "learning_rate": 1.1766872651174476e-06, "loss": 0.0219, "step": 10855 }, { "epoch": 8.48, "learning_rate": 1.1754957665010603e-06, "loss": 0.0365, "step": 10856 }, { "epoch": 8.49, "learning_rate": 1.1743048337674845e-06, "loss": 0.0101, "step": 10857 }, { "epoch": 8.49, "learning_rate": 1.1731144669930983e-06, "loss": 0.0716, "step": 10858 }, { "epoch": 8.49, "learning_rate": 1.1719246662542283e-06, "loss": 0.0269, "step": 10859 }, { "epoch": 8.49, "learning_rate": 1.1707354316271758e-06, "loss": 0.0982, "step": 10860 }, { "epoch": 8.49, "learning_rate": 1.1695467631881973e-06, "loss": 0.0286, "step": 10861 }, { "epoch": 8.49, "learning_rate": 1.1683586610135245e-06, "loss": 0.0358, "step": 10862 }, { "epoch": 8.49, "learning_rate": 1.1671711251793427e-06, "loss": 0.0375, "step": 10863 }, { "epoch": 8.49, "learning_rate": 1.1659841557618034e-06, "loss": 0.0137, "step": 10864 }, { "epoch": 8.49, "learning_rate": 1.1647977528370248e-06, "loss": 0.0435, "step": 10865 }, { "epoch": 8.49, "learning_rate": 1.1636119164810834e-06, "loss": 0.0779, "step": 10866 }, { "epoch": 8.49, "learning_rate": 1.1624266467700251e-06, "loss": 0.0157, "step": 10867 }, { "epoch": 8.49, "learning_rate": 1.1612419437798573e-06, "loss": 0.0187, "step": 10868 }, { "epoch": 8.49, "learning_rate": 1.1600578075865454e-06, "loss": 0.0277, "step": 10869 }, { "epoch": 8.5, "learning_rate": 1.1588742382660323e-06, "loss": 0.0225, "step": 10870 }, { "epoch": 8.5, "learning_rate": 1.1576912358942093e-06, "loss": 0.0284, "step": 10871 }, { "epoch": 8.5, "learning_rate": 1.1565088005469383e-06, "loss": 0.0264, "step": 10872 }, { "epoch": 8.5, "learning_rate": 1.155326932300047e-06, "loss": 0.0257, "step": 10873 }, { "epoch": 8.5, "learning_rate": 1.1541456312293241e-06, "loss": 0.0518, "step": 10874 }, { "epoch": 8.5, "learning_rate": 1.1529648974105224e-06, "loss": 0.0241, "step": 10875 }, { "epoch": 8.5, "learning_rate": 1.1517847309193552e-06, "loss": 0.0359, "step": 10876 }, { "epoch": 8.5, "learning_rate": 1.1506051318315048e-06, "loss": 0.0154, "step": 10877 }, { "epoch": 8.5, "learning_rate": 1.1494261002226125e-06, "loss": 0.029, "step": 10878 }, { "epoch": 8.5, "learning_rate": 1.1482476361682882e-06, "loss": 0.0213, "step": 10879 }, { "epoch": 8.5, "learning_rate": 1.1470697397440967e-06, "loss": 0.0232, "step": 10880 }, { "epoch": 8.5, "learning_rate": 1.145892411025582e-06, "loss": 0.0341, "step": 10881 }, { "epoch": 8.5, "learning_rate": 1.144715650088233e-06, "loss": 0.0286, "step": 10882 }, { "epoch": 8.51, "learning_rate": 1.1435394570075132e-06, "loss": 0.0155, "step": 10883 }, { "epoch": 8.51, "learning_rate": 1.142363831858847e-06, "loss": 0.0347, "step": 10884 }, { "epoch": 8.51, "learning_rate": 1.1411887747176253e-06, "loss": 0.0281, "step": 10885 }, { "epoch": 8.51, "learning_rate": 1.1400142856592e-06, "loss": 0.0789, "step": 10886 }, { "epoch": 8.51, "learning_rate": 1.1388403647588853e-06, "loss": 0.0339, "step": 10887 }, { "epoch": 8.51, "learning_rate": 1.1376670120919608e-06, "loss": 0.0693, "step": 10888 }, { "epoch": 8.51, "learning_rate": 1.1364942277336698e-06, "loss": 0.0479, "step": 10889 }, { "epoch": 8.51, "learning_rate": 1.135322011759218e-06, "loss": 0.0472, "step": 10890 }, { "epoch": 8.51, "learning_rate": 1.1341503642437746e-06, "loss": 0.0219, "step": 10891 }, { "epoch": 8.51, "learning_rate": 1.1329792852624721e-06, "loss": 0.0638, "step": 10892 }, { "epoch": 8.51, "learning_rate": 1.131808774890414e-06, "loss": 0.0332, "step": 10893 }, { "epoch": 8.51, "learning_rate": 1.130638833202653e-06, "loss": 0.0312, "step": 10894 }, { "epoch": 8.52, "learning_rate": 1.1294694602742152e-06, "loss": 0.0306, "step": 10895 }, { "epoch": 8.52, "learning_rate": 1.1283006561800901e-06, "loss": 0.0397, "step": 10896 }, { "epoch": 8.52, "learning_rate": 1.1271324209952284e-06, "loss": 0.0661, "step": 10897 }, { "epoch": 8.52, "learning_rate": 1.1259647547945452e-06, "loss": 0.0288, "step": 10898 }, { "epoch": 8.52, "learning_rate": 1.124797657652914e-06, "loss": 0.017, "step": 10899 }, { "epoch": 8.52, "learning_rate": 1.123631129645182e-06, "loss": 0.0106, "step": 10900 }, { "epoch": 8.52, "learning_rate": 1.122465170846152e-06, "loss": 0.0217, "step": 10901 }, { "epoch": 8.52, "learning_rate": 1.1212997813305926e-06, "loss": 0.0424, "step": 10902 }, { "epoch": 8.52, "learning_rate": 1.120134961173235e-06, "loss": 0.0269, "step": 10903 }, { "epoch": 8.52, "learning_rate": 1.118970710448779e-06, "loss": 0.0087, "step": 10904 }, { "epoch": 8.52, "learning_rate": 1.1178070292318798e-06, "loss": 0.0252, "step": 10905 }, { "epoch": 8.52, "learning_rate": 1.11664391759716e-06, "loss": 0.0354, "step": 10906 }, { "epoch": 8.52, "learning_rate": 1.115481375619205e-06, "loss": 0.0286, "step": 10907 }, { "epoch": 8.53, "learning_rate": 1.1143194033725669e-06, "loss": 0.0284, "step": 10908 }, { "epoch": 8.53, "learning_rate": 1.1131580009317588e-06, "loss": 0.0118, "step": 10909 }, { "epoch": 8.53, "learning_rate": 1.111997168371255e-06, "loss": 0.0195, "step": 10910 }, { "epoch": 8.53, "learning_rate": 1.1108369057654967e-06, "loss": 0.0605, "step": 10911 }, { "epoch": 8.53, "learning_rate": 1.1096772131888866e-06, "loss": 0.0324, "step": 10912 }, { "epoch": 8.53, "learning_rate": 1.1085180907157922e-06, "loss": 0.05, "step": 10913 }, { "epoch": 8.53, "learning_rate": 1.1073595384205427e-06, "loss": 0.0566, "step": 10914 }, { "epoch": 8.53, "learning_rate": 1.106201556377433e-06, "loss": 0.0131, "step": 10915 }, { "epoch": 8.53, "learning_rate": 1.1050441446607196e-06, "loss": 0.0298, "step": 10916 }, { "epoch": 8.53, "learning_rate": 1.103887303344623e-06, "loss": 0.0224, "step": 10917 }, { "epoch": 8.53, "learning_rate": 1.1027310325033247e-06, "loss": 0.0237, "step": 10918 }, { "epoch": 8.53, "learning_rate": 1.1015753322109767e-06, "loss": 0.0373, "step": 10919 }, { "epoch": 8.53, "learning_rate": 1.1004202025416876e-06, "loss": 0.0186, "step": 10920 }, { "epoch": 8.54, "learning_rate": 1.0992656435695337e-06, "loss": 0.0154, "step": 10921 }, { "epoch": 8.54, "learning_rate": 1.0981116553685467e-06, "loss": 0.0221, "step": 10922 }, { "epoch": 8.54, "learning_rate": 1.0969582380127343e-06, "loss": 0.0281, "step": 10923 }, { "epoch": 8.54, "learning_rate": 1.0958053915760569e-06, "loss": 0.0621, "step": 10924 }, { "epoch": 8.54, "learning_rate": 1.0946531161324425e-06, "loss": 0.028, "step": 10925 }, { "epoch": 8.54, "learning_rate": 1.0935014117557841e-06, "loss": 0.0254, "step": 10926 }, { "epoch": 8.54, "learning_rate": 1.0923502785199359e-06, "loss": 0.0238, "step": 10927 }, { "epoch": 8.54, "learning_rate": 1.0911997164987142e-06, "loss": 0.0438, "step": 10928 }, { "epoch": 8.54, "learning_rate": 1.0900497257659016e-06, "loss": 0.0392, "step": 10929 }, { "epoch": 8.54, "learning_rate": 1.0889003063952407e-06, "loss": 0.0265, "step": 10930 }, { "epoch": 8.54, "learning_rate": 1.0877514584604432e-06, "loss": 0.0098, "step": 10931 }, { "epoch": 8.54, "learning_rate": 1.0866031820351809e-06, "loss": 0.0427, "step": 10932 }, { "epoch": 8.54, "learning_rate": 1.0854554771930837e-06, "loss": 0.0396, "step": 10933 }, { "epoch": 8.55, "learning_rate": 1.0843083440077496e-06, "loss": 0.0367, "step": 10934 }, { "epoch": 8.55, "learning_rate": 1.0831617825527452e-06, "loss": 0.0241, "step": 10935 }, { "epoch": 8.55, "learning_rate": 1.0820157929015917e-06, "loss": 0.0337, "step": 10936 }, { "epoch": 8.55, "learning_rate": 1.0808703751277795e-06, "loss": 0.0544, "step": 10937 }, { "epoch": 8.55, "learning_rate": 1.0797255293047571e-06, "loss": 0.0315, "step": 10938 }, { "epoch": 8.55, "learning_rate": 1.0785812555059417e-06, "loss": 0.0345, "step": 10939 }, { "epoch": 8.55, "learning_rate": 1.07743755380471e-06, "loss": 0.0093, "step": 10940 }, { "epoch": 8.55, "learning_rate": 1.0762944242744034e-06, "loss": 0.018, "step": 10941 }, { "epoch": 8.55, "learning_rate": 1.0751518669883244e-06, "loss": 0.0297, "step": 10942 }, { "epoch": 8.55, "learning_rate": 1.0740098820197465e-06, "loss": 0.0411, "step": 10943 }, { "epoch": 8.55, "learning_rate": 1.072868469441899e-06, "loss": 0.0572, "step": 10944 }, { "epoch": 8.55, "learning_rate": 1.0717276293279722e-06, "loss": 0.0353, "step": 10945 }, { "epoch": 8.55, "learning_rate": 1.0705873617511287e-06, "loss": 0.036, "step": 10946 }, { "epoch": 8.56, "learning_rate": 1.0694476667844878e-06, "loss": 0.0317, "step": 10947 }, { "epoch": 8.56, "learning_rate": 1.0683085445011342e-06, "loss": 0.0406, "step": 10948 }, { "epoch": 8.56, "learning_rate": 1.0671699949741153e-06, "loss": 0.0257, "step": 10949 }, { "epoch": 8.56, "learning_rate": 1.066032018276444e-06, "loss": 0.0636, "step": 10950 }, { "epoch": 8.56, "learning_rate": 1.0648946144810911e-06, "loss": 0.0292, "step": 10951 }, { "epoch": 8.56, "learning_rate": 1.063757783660997e-06, "loss": 0.0337, "step": 10952 }, { "epoch": 8.56, "learning_rate": 1.0626215258890581e-06, "loss": 0.0133, "step": 10953 }, { "epoch": 8.56, "learning_rate": 1.0614858412381445e-06, "loss": 0.074, "step": 10954 }, { "epoch": 8.56, "learning_rate": 1.0603507297810823e-06, "loss": 0.0428, "step": 10955 }, { "epoch": 8.56, "learning_rate": 1.0592161915906584e-06, "loss": 0.0238, "step": 10956 }, { "epoch": 8.56, "learning_rate": 1.0580822267396262e-06, "loss": 0.0686, "step": 10957 }, { "epoch": 8.56, "learning_rate": 1.0569488353007073e-06, "loss": 0.0369, "step": 10958 }, { "epoch": 8.57, "learning_rate": 1.0558160173465793e-06, "loss": 0.0398, "step": 10959 }, { "epoch": 8.57, "learning_rate": 1.054683772949886e-06, "loss": 0.0437, "step": 10960 }, { "epoch": 8.57, "learning_rate": 1.053552102183234e-06, "loss": 0.0214, "step": 10961 }, { "epoch": 8.57, "learning_rate": 1.052421005119193e-06, "loss": 0.0417, "step": 10962 }, { "epoch": 8.57, "learning_rate": 1.051290481830296e-06, "loss": 0.0222, "step": 10963 }, { "epoch": 8.57, "learning_rate": 1.0501605323890385e-06, "loss": 0.0364, "step": 10964 }, { "epoch": 8.57, "learning_rate": 1.04903115686788e-06, "loss": 0.0215, "step": 10965 }, { "epoch": 8.57, "learning_rate": 1.0479023553392453e-06, "loss": 0.0208, "step": 10966 }, { "epoch": 8.57, "learning_rate": 1.0467741278755218e-06, "loss": 0.0349, "step": 10967 }, { "epoch": 8.57, "learning_rate": 1.0456464745490514e-06, "loss": 0.0124, "step": 10968 }, { "epoch": 8.57, "learning_rate": 1.044519395432153e-06, "loss": 0.0325, "step": 10969 }, { "epoch": 8.57, "learning_rate": 1.0433928905970992e-06, "loss": 0.0311, "step": 10970 }, { "epoch": 8.57, "learning_rate": 1.0422669601161294e-06, "loss": 0.0471, "step": 10971 }, { "epoch": 8.58, "learning_rate": 1.0411416040614442e-06, "loss": 0.0158, "step": 10972 }, { "epoch": 8.58, "learning_rate": 1.040016822505211e-06, "loss": 0.0265, "step": 10973 }, { "epoch": 8.58, "learning_rate": 1.0388926155195545e-06, "loss": 0.0304, "step": 10974 }, { "epoch": 8.58, "learning_rate": 1.0377689831765692e-06, "loss": 0.0126, "step": 10975 }, { "epoch": 8.58, "learning_rate": 1.0366459255483052e-06, "loss": 0.0409, "step": 10976 }, { "epoch": 8.58, "learning_rate": 1.035523442706785e-06, "loss": 0.0354, "step": 10977 }, { "epoch": 8.58, "learning_rate": 1.0344015347239888e-06, "loss": 0.0249, "step": 10978 }, { "epoch": 8.58, "learning_rate": 1.0332802016718568e-06, "loss": 0.0193, "step": 10979 }, { "epoch": 8.58, "learning_rate": 1.0321594436222958e-06, "loss": 0.0137, "step": 10980 }, { "epoch": 8.58, "learning_rate": 1.0310392606471798e-06, "loss": 0.0463, "step": 10981 }, { "epoch": 8.58, "learning_rate": 1.0299196528183408e-06, "loss": 0.0347, "step": 10982 }, { "epoch": 8.58, "learning_rate": 1.028800620207574e-06, "loss": 0.027, "step": 10983 }, { "epoch": 8.58, "learning_rate": 1.0276821628866385e-06, "loss": 0.0144, "step": 10984 }, { "epoch": 8.59, "learning_rate": 1.026564280927258e-06, "loss": 0.0253, "step": 10985 }, { "epoch": 8.59, "learning_rate": 1.0254469744011174e-06, "loss": 0.013, "step": 10986 }, { "epoch": 8.59, "learning_rate": 1.0243302433798663e-06, "loss": 0.0105, "step": 10987 }, { "epoch": 8.59, "learning_rate": 1.0232140879351138e-06, "loss": 0.0422, "step": 10988 }, { "epoch": 8.59, "learning_rate": 1.0220985081384416e-06, "loss": 0.0284, "step": 10989 }, { "epoch": 8.59, "learning_rate": 1.0209835040613802e-06, "loss": 0.0194, "step": 10990 }, { "epoch": 8.59, "learning_rate": 1.0198690757754315e-06, "loss": 0.0474, "step": 10991 }, { "epoch": 8.59, "learning_rate": 1.0187552233520648e-06, "loss": 0.016, "step": 10992 }, { "epoch": 8.59, "learning_rate": 1.017641946862703e-06, "loss": 0.0516, "step": 10993 }, { "epoch": 8.59, "learning_rate": 1.016529246378738e-06, "loss": 0.0467, "step": 10994 }, { "epoch": 8.59, "learning_rate": 1.0154171219715237e-06, "loss": 0.032, "step": 10995 }, { "epoch": 8.59, "learning_rate": 1.0143055737123742e-06, "loss": 0.0196, "step": 10996 }, { "epoch": 8.59, "learning_rate": 1.0131946016725714e-06, "loss": 0.0221, "step": 10997 }, { "epoch": 8.6, "learning_rate": 1.0120842059233572e-06, "loss": 0.0113, "step": 10998 }, { "epoch": 8.6, "learning_rate": 1.0109743865359334e-06, "loss": 0.0313, "step": 10999 }, { "epoch": 8.6, "learning_rate": 1.0098651435814756e-06, "loss": 0.0494, "step": 11000 } ], "max_steps": 12790, "num_train_epochs": 10, "total_flos": 4.651331645792584e+18, "trial_name": null, "trial_params": null }