{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.999458581483487, "global_step": 3692, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.405405405405406e-06, "loss": 2.6714, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.0810810810810812e-05, "loss": 2.4616, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.6216216216216218e-05, "loss": 2.5089, "step": 6 }, { "epoch": 0.01, "learning_rate": 2.1621621621621624e-05, "loss": 2.5389, "step": 8 }, { "epoch": 0.01, "learning_rate": 2.702702702702703e-05, "loss": 2.4892, "step": 10 }, { "epoch": 0.01, "learning_rate": 3.2432432432432436e-05, "loss": 2.4949, "step": 12 }, { "epoch": 0.02, "learning_rate": 3.783783783783784e-05, "loss": 2.5016, "step": 14 }, { "epoch": 0.02, "learning_rate": 4.324324324324325e-05, "loss": 2.5727, "step": 16 }, { "epoch": 0.02, "learning_rate": 4.8648648648648654e-05, "loss": 2.605, "step": 18 }, { "epoch": 0.02, "learning_rate": 5.405405405405406e-05, "loss": 2.5045, "step": 20 }, { "epoch": 0.02, "learning_rate": 5.9459459459459466e-05, "loss": 2.5509, "step": 22 }, { "epoch": 0.03, "learning_rate": 6.486486486486487e-05, "loss": 2.6363, "step": 24 }, { "epoch": 0.03, "learning_rate": 7.027027027027028e-05, "loss": 2.3918, "step": 26 }, { "epoch": 0.03, "learning_rate": 7.567567567567568e-05, "loss": 2.5172, "step": 28 }, { "epoch": 0.03, "learning_rate": 8.108108108108109e-05, "loss": 2.6391, "step": 30 }, { "epoch": 0.03, "learning_rate": 8.64864864864865e-05, "loss": 2.3696, "step": 32 }, { "epoch": 0.04, "learning_rate": 9.18918918918919e-05, "loss": 2.4359, "step": 34 }, { "epoch": 0.04, "learning_rate": 9.729729729729731e-05, "loss": 2.5197, "step": 36 }, { "epoch": 0.04, "learning_rate": 9.999998153008212e-05, "loss": 2.5001, "step": 38 }, { "epoch": 0.04, "learning_rate": 9.999983377082087e-05, "loss": 2.7154, "step": 40 }, { "epoch": 0.05, "learning_rate": 9.999953825273504e-05, "loss": 2.5842, "step": 42 }, { "epoch": 0.05, "learning_rate": 9.999909497669792e-05, "loss": 2.4231, "step": 44 }, { "epoch": 0.05, "learning_rate": 9.99985039440195e-05, "loss": 2.6799, "step": 46 }, { "epoch": 0.05, "learning_rate": 9.999776515644638e-05, "loss": 2.5285, "step": 48 }, { "epoch": 0.05, "learning_rate": 9.999687861616181e-05, "loss": 2.6049, "step": 50 }, { "epoch": 0.06, "learning_rate": 9.999584432578569e-05, "loss": 2.46, "step": 52 }, { "epoch": 0.06, "learning_rate": 9.999466228837451e-05, "loss": 2.5835, "step": 54 }, { "epoch": 0.06, "learning_rate": 9.999333250742145e-05, "loss": 2.6909, "step": 56 }, { "epoch": 0.06, "learning_rate": 9.999185498685624e-05, "loss": 2.4055, "step": 58 }, { "epoch": 0.06, "learning_rate": 9.999022973104525e-05, "loss": 2.5816, "step": 60 }, { "epoch": 0.07, "learning_rate": 9.99884567447914e-05, "loss": 2.5871, "step": 62 }, { "epoch": 0.07, "learning_rate": 9.998653603333418e-05, "loss": 2.701, "step": 64 }, { "epoch": 0.07, "learning_rate": 9.998446760234966e-05, "loss": 2.5853, "step": 66 }, { "epoch": 0.07, "learning_rate": 9.998225145795046e-05, "loss": 2.4068, "step": 68 }, { "epoch": 0.08, "learning_rate": 9.997988760668566e-05, "loss": 2.5417, "step": 70 }, { "epoch": 0.08, "learning_rate": 9.997737605554092e-05, "loss": 2.531, "step": 72 }, { "epoch": 0.08, "learning_rate": 9.997471681193833e-05, "loss": 2.5509, "step": 74 }, { "epoch": 0.08, "learning_rate": 9.997190988373644e-05, "loss": 2.4764, "step": 76 }, { "epoch": 0.08, "learning_rate": 9.996895527923023e-05, "loss": 2.4522, "step": 78 }, { "epoch": 0.09, "learning_rate": 9.996585300715116e-05, "loss": 2.332, "step": 80 }, { "epoch": 0.09, "learning_rate": 9.996260307666696e-05, "loss": 2.518, "step": 82 }, { "epoch": 0.09, "learning_rate": 9.995920549738183e-05, "loss": 2.5257, "step": 84 }, { "epoch": 0.09, "learning_rate": 9.995566027933621e-05, "loss": 2.4631, "step": 86 }, { "epoch": 0.1, "learning_rate": 9.995196743300692e-05, "loss": 2.6791, "step": 88 }, { "epoch": 0.1, "learning_rate": 9.994812696930698e-05, "loss": 2.5043, "step": 90 }, { "epoch": 0.1, "learning_rate": 9.994413889958568e-05, "loss": 2.5534, "step": 92 }, { "epoch": 0.1, "learning_rate": 9.994000323562852e-05, "loss": 2.3725, "step": 94 }, { "epoch": 0.1, "learning_rate": 9.993571998965714e-05, "loss": 2.7813, "step": 96 }, { "epoch": 0.11, "learning_rate": 9.993128917432934e-05, "loss": 2.4455, "step": 98 }, { "epoch": 0.11, "learning_rate": 9.992671080273903e-05, "loss": 2.4049, "step": 100 }, { "epoch": 0.11, "learning_rate": 9.992198488841611e-05, "loss": 2.4881, "step": 102 }, { "epoch": 0.11, "learning_rate": 9.991711144532654e-05, "loss": 2.4836, "step": 104 }, { "epoch": 0.11, "learning_rate": 9.991209048787228e-05, "loss": 2.4244, "step": 106 }, { "epoch": 0.12, "learning_rate": 9.990692203089119e-05, "loss": 2.5105, "step": 108 }, { "epoch": 0.12, "learning_rate": 9.9901606089657e-05, "loss": 2.6217, "step": 110 }, { "epoch": 0.12, "learning_rate": 9.989614267987933e-05, "loss": 2.5529, "step": 112 }, { "epoch": 0.12, "learning_rate": 9.989053181770356e-05, "loss": 2.512, "step": 114 }, { "epoch": 0.13, "learning_rate": 9.988477351971084e-05, "loss": 2.5365, "step": 116 }, { "epoch": 0.13, "learning_rate": 9.9878867802918e-05, "loss": 2.6218, "step": 118 }, { "epoch": 0.13, "learning_rate": 9.987281468477756e-05, "loss": 2.4998, "step": 120 }, { "epoch": 0.13, "learning_rate": 9.986661418317759e-05, "loss": 2.4597, "step": 122 }, { "epoch": 0.13, "learning_rate": 9.986026631644173e-05, "loss": 2.6517, "step": 124 }, { "epoch": 0.14, "learning_rate": 9.985377110332912e-05, "loss": 2.6685, "step": 126 }, { "epoch": 0.14, "learning_rate": 9.984712856303432e-05, "loss": 2.5139, "step": 128 }, { "epoch": 0.14, "learning_rate": 9.984033871518727e-05, "loss": 2.4927, "step": 130 }, { "epoch": 0.14, "learning_rate": 9.983340157985324e-05, "loss": 2.4832, "step": 132 }, { "epoch": 0.15, "learning_rate": 9.982631717753275e-05, "loss": 2.5724, "step": 134 }, { "epoch": 0.15, "learning_rate": 9.981908552916153e-05, "loss": 2.5026, "step": 136 }, { "epoch": 0.15, "learning_rate": 9.981170665611046e-05, "loss": 2.4104, "step": 138 }, { "epoch": 0.15, "learning_rate": 9.980418058018547e-05, "loss": 2.4806, "step": 140 }, { "epoch": 0.15, "learning_rate": 9.979650732362753e-05, "loss": 2.3937, "step": 142 }, { "epoch": 0.16, "learning_rate": 9.978868690911253e-05, "loss": 2.7488, "step": 144 }, { "epoch": 0.16, "learning_rate": 9.978071935975126e-05, "loss": 2.5604, "step": 146 }, { "epoch": 0.16, "learning_rate": 9.977260469908931e-05, "loss": 2.4197, "step": 148 }, { "epoch": 0.16, "learning_rate": 9.976434295110701e-05, "loss": 2.6003, "step": 150 }, { "epoch": 0.16, "learning_rate": 9.975593414021938e-05, "loss": 2.4822, "step": 152 }, { "epoch": 0.17, "learning_rate": 9.974737829127602e-05, "loss": 2.4176, "step": 154 }, { "epoch": 0.17, "learning_rate": 9.973867542956104e-05, "loss": 2.5665, "step": 156 }, { "epoch": 0.17, "learning_rate": 9.972982558079303e-05, "loss": 2.7229, "step": 158 }, { "epoch": 0.17, "learning_rate": 9.972082877112494e-05, "loss": 2.7438, "step": 160 }, { "epoch": 0.18, "learning_rate": 9.971168502714401e-05, "loss": 2.5602, "step": 162 }, { "epoch": 0.18, "learning_rate": 9.970239437587174e-05, "loss": 2.5438, "step": 164 }, { "epoch": 0.18, "learning_rate": 9.96929568447637e-05, "loss": 2.5334, "step": 166 }, { "epoch": 0.18, "learning_rate": 9.968337246170956e-05, "loss": 2.496, "step": 168 }, { "epoch": 0.18, "learning_rate": 9.967364125503295e-05, "loss": 2.4688, "step": 170 }, { "epoch": 0.19, "learning_rate": 9.966376325349143e-05, "loss": 2.5845, "step": 172 }, { "epoch": 0.19, "learning_rate": 9.965373848627631e-05, "loss": 2.3486, "step": 174 }, { "epoch": 0.19, "learning_rate": 9.964356698301264e-05, "loss": 2.6254, "step": 176 }, { "epoch": 0.19, "learning_rate": 9.963324877375912e-05, "loss": 2.2205, "step": 178 }, { "epoch": 0.19, "learning_rate": 9.9622783889008e-05, "loss": 2.4156, "step": 180 }, { "epoch": 0.2, "learning_rate": 9.961217235968494e-05, "loss": 2.6328, "step": 182 }, { "epoch": 0.2, "learning_rate": 9.960141421714897e-05, "loss": 2.5354, "step": 184 }, { "epoch": 0.2, "learning_rate": 9.959050949319244e-05, "loss": 2.4779, "step": 186 }, { "epoch": 0.2, "learning_rate": 9.957945822004083e-05, "loss": 2.4148, "step": 188 }, { "epoch": 0.21, "learning_rate": 9.956826043035268e-05, "loss": 2.5947, "step": 190 }, { "epoch": 0.21, "learning_rate": 9.95569161572196e-05, "loss": 2.6662, "step": 192 }, { "epoch": 0.21, "learning_rate": 9.954542543416599e-05, "loss": 2.4853, "step": 194 }, { "epoch": 0.21, "learning_rate": 9.953378829514908e-05, "loss": 2.4561, "step": 196 }, { "epoch": 0.21, "learning_rate": 9.952200477455881e-05, "loss": 2.5139, "step": 198 }, { "epoch": 0.22, "learning_rate": 9.951007490721766e-05, "loss": 2.5978, "step": 200 }, { "epoch": 0.22, "learning_rate": 9.949799872838061e-05, "loss": 2.6275, "step": 202 }, { "epoch": 0.22, "learning_rate": 9.948577627373503e-05, "loss": 2.4584, "step": 204 }, { "epoch": 0.22, "learning_rate": 9.947340757940053e-05, "loss": 2.3811, "step": 206 }, { "epoch": 0.23, "learning_rate": 9.946089268192895e-05, "loss": 2.4847, "step": 208 }, { "epoch": 0.23, "learning_rate": 9.944823161830407e-05, "loss": 2.5872, "step": 210 }, { "epoch": 0.23, "learning_rate": 9.943542442594177e-05, "loss": 2.7864, "step": 212 }, { "epoch": 0.23, "learning_rate": 9.942247114268964e-05, "loss": 2.5991, "step": 214 }, { "epoch": 0.23, "learning_rate": 9.940937180682706e-05, "loss": 2.4734, "step": 216 }, { "epoch": 0.24, "learning_rate": 9.9396126457065e-05, "loss": 2.6016, "step": 218 }, { "epoch": 0.24, "learning_rate": 9.938273513254597e-05, "loss": 2.5428, "step": 220 }, { "epoch": 0.24, "learning_rate": 9.936919787284378e-05, "loss": 2.6381, "step": 222 }, { "epoch": 0.24, "learning_rate": 9.935551471796358e-05, "loss": 2.6325, "step": 224 }, { "epoch": 0.24, "learning_rate": 9.934168570834165e-05, "loss": 2.5066, "step": 226 }, { "epoch": 0.25, "learning_rate": 9.932771088484527e-05, "loss": 2.5039, "step": 228 }, { "epoch": 0.25, "learning_rate": 9.931359028877267e-05, "loss": 2.7124, "step": 230 }, { "epoch": 0.25, "learning_rate": 9.929932396185281e-05, "loss": 2.4925, "step": 232 }, { "epoch": 0.25, "learning_rate": 9.928491194624539e-05, "loss": 2.579, "step": 234 }, { "epoch": 0.26, "learning_rate": 9.927035428454055e-05, "loss": 2.6093, "step": 236 }, { "epoch": 0.26, "learning_rate": 9.925565101975893e-05, "loss": 2.3589, "step": 238 }, { "epoch": 0.26, "learning_rate": 9.924080219535141e-05, "loss": 2.6058, "step": 240 }, { "epoch": 0.26, "learning_rate": 9.922580785519901e-05, "loss": 2.5016, "step": 242 }, { "epoch": 0.26, "learning_rate": 9.921066804361284e-05, "loss": 2.6041, "step": 244 }, { "epoch": 0.27, "learning_rate": 9.919538280533382e-05, "loss": 2.4222, "step": 246 }, { "epoch": 0.27, "learning_rate": 9.917995218553271e-05, "loss": 2.5596, "step": 248 }, { "epoch": 0.27, "learning_rate": 9.916437622980985e-05, "loss": 2.5427, "step": 250 }, { "epoch": 0.27, "learning_rate": 9.91486549841951e-05, "loss": 2.5865, "step": 252 }, { "epoch": 0.28, "learning_rate": 9.913278849514765e-05, "loss": 2.4464, "step": 254 }, { "epoch": 0.28, "learning_rate": 9.911677680955596e-05, "loss": 2.4279, "step": 256 }, { "epoch": 0.28, "learning_rate": 9.910061997473752e-05, "loss": 2.4858, "step": 258 }, { "epoch": 0.28, "learning_rate": 9.908431803843881e-05, "loss": 2.3309, "step": 260 }, { "epoch": 0.28, "learning_rate": 9.906787104883506e-05, "loss": 2.5427, "step": 262 }, { "epoch": 0.29, "learning_rate": 9.905127905453023e-05, "loss": 2.568, "step": 264 }, { "epoch": 0.29, "learning_rate": 9.90345421045567e-05, "loss": 2.5368, "step": 266 }, { "epoch": 0.29, "learning_rate": 9.90176602483753e-05, "loss": 2.562, "step": 268 }, { "epoch": 0.29, "learning_rate": 9.90006335358751e-05, "loss": 2.43, "step": 270 }, { "epoch": 0.29, "learning_rate": 9.898346201737317e-05, "loss": 2.6544, "step": 272 }, { "epoch": 0.3, "learning_rate": 9.896614574361454e-05, "loss": 2.5701, "step": 274 }, { "epoch": 0.3, "learning_rate": 9.894868476577201e-05, "loss": 2.5294, "step": 276 }, { "epoch": 0.3, "learning_rate": 9.893107913544609e-05, "loss": 2.6586, "step": 278 }, { "epoch": 0.3, "learning_rate": 9.891332890466463e-05, "loss": 2.4904, "step": 280 }, { "epoch": 0.31, "learning_rate": 9.88954341258829e-05, "loss": 2.5686, "step": 282 }, { "epoch": 0.31, "learning_rate": 9.887739485198331e-05, "loss": 2.5522, "step": 284 }, { "epoch": 0.31, "learning_rate": 9.885921113627525e-05, "loss": 2.6227, "step": 286 }, { "epoch": 0.31, "learning_rate": 9.884088303249501e-05, "loss": 2.6265, "step": 288 }, { "epoch": 0.31, "learning_rate": 9.882241059480555e-05, "loss": 2.6851, "step": 290 }, { "epoch": 0.32, "learning_rate": 9.880379387779637e-05, "loss": 2.4501, "step": 292 }, { "epoch": 0.32, "learning_rate": 9.878503293648332e-05, "loss": 2.3563, "step": 294 }, { "epoch": 0.32, "learning_rate": 9.876612782630848e-05, "loss": 2.3076, "step": 296 }, { "epoch": 0.32, "learning_rate": 9.874707860313997e-05, "loss": 2.5158, "step": 298 }, { "epoch": 0.32, "learning_rate": 9.87278853232718e-05, "loss": 2.4241, "step": 300 }, { "epoch": 0.33, "learning_rate": 9.87085480434237e-05, "loss": 2.6496, "step": 302 }, { "epoch": 0.33, "learning_rate": 9.868906682074093e-05, "loss": 2.5175, "step": 304 }, { "epoch": 0.33, "learning_rate": 9.866944171279411e-05, "loss": 2.4658, "step": 306 }, { "epoch": 0.33, "learning_rate": 9.864967277757911e-05, "loss": 2.3329, "step": 308 }, { "epoch": 0.34, "learning_rate": 9.862976007351683e-05, "loss": 2.6255, "step": 310 }, { "epoch": 0.34, "learning_rate": 9.860970365945299e-05, "loss": 2.5342, "step": 312 }, { "epoch": 0.34, "learning_rate": 9.858950359465805e-05, "loss": 2.3472, "step": 314 }, { "epoch": 0.34, "learning_rate": 9.856915993882696e-05, "loss": 2.4315, "step": 316 }, { "epoch": 0.34, "learning_rate": 9.854867275207901e-05, "loss": 2.5811, "step": 318 }, { "epoch": 0.35, "learning_rate": 9.852804209495766e-05, "loss": 2.5231, "step": 320 }, { "epoch": 0.35, "learning_rate": 9.850726802843034e-05, "loss": 2.6642, "step": 322 }, { "epoch": 0.35, "learning_rate": 9.84863506138883e-05, "loss": 2.5686, "step": 324 }, { "epoch": 0.35, "learning_rate": 9.846528991314639e-05, "loss": 2.7031, "step": 326 }, { "epoch": 0.36, "learning_rate": 9.844408598844288e-05, "loss": 2.4843, "step": 328 }, { "epoch": 0.36, "learning_rate": 9.842273890243936e-05, "loss": 2.6201, "step": 330 }, { "epoch": 0.36, "learning_rate": 9.840124871822041e-05, "loss": 2.4424, "step": 332 }, { "epoch": 0.36, "learning_rate": 9.837961549929356e-05, "loss": 2.5032, "step": 334 }, { "epoch": 0.36, "learning_rate": 9.835783930958897e-05, "loss": 2.6527, "step": 336 }, { "epoch": 0.37, "learning_rate": 9.833592021345937e-05, "loss": 2.4956, "step": 338 }, { "epoch": 0.37, "learning_rate": 9.831385827567975e-05, "loss": 2.3891, "step": 340 }, { "epoch": 0.37, "learning_rate": 9.829165356144727e-05, "loss": 2.2116, "step": 342 }, { "epoch": 0.37, "learning_rate": 9.826930613638098e-05, "loss": 2.5029, "step": 344 }, { "epoch": 0.37, "learning_rate": 9.824681606652168e-05, "loss": 2.5519, "step": 346 }, { "epoch": 0.38, "learning_rate": 9.822418341833172e-05, "loss": 2.5432, "step": 348 }, { "epoch": 0.38, "learning_rate": 9.82014082586948e-05, "loss": 2.5587, "step": 350 }, { "epoch": 0.38, "learning_rate": 9.817849065491575e-05, "loss": 2.552, "step": 352 }, { "epoch": 0.38, "learning_rate": 9.815543067472039e-05, "loss": 2.4258, "step": 354 }, { "epoch": 0.39, "learning_rate": 9.813222838625521e-05, "loss": 2.4393, "step": 356 }, { "epoch": 0.39, "learning_rate": 9.810888385808732e-05, "loss": 2.5239, "step": 358 }, { "epoch": 0.39, "learning_rate": 9.808539715920414e-05, "loss": 2.4571, "step": 360 }, { "epoch": 0.39, "learning_rate": 9.806176835901328e-05, "loss": 2.5202, "step": 362 }, { "epoch": 0.39, "learning_rate": 9.803799752734219e-05, "loss": 2.4761, "step": 364 }, { "epoch": 0.4, "learning_rate": 9.801408473443816e-05, "loss": 2.8216, "step": 366 }, { "epoch": 0.4, "learning_rate": 9.79900300509679e-05, "loss": 2.6249, "step": 368 }, { "epoch": 0.4, "learning_rate": 9.796583354801752e-05, "loss": 2.5059, "step": 370 }, { "epoch": 0.4, "learning_rate": 9.794149529709216e-05, "loss": 2.5326, "step": 372 }, { "epoch": 0.4, "learning_rate": 9.791701537011591e-05, "loss": 2.7352, "step": 374 }, { "epoch": 0.41, "learning_rate": 9.789239383943152e-05, "loss": 2.6958, "step": 376 }, { "epoch": 0.41, "learning_rate": 9.78676307778002e-05, "loss": 2.4794, "step": 378 }, { "epoch": 0.41, "learning_rate": 9.784272625840136e-05, "loss": 2.5559, "step": 380 }, { "epoch": 0.41, "learning_rate": 9.781768035483256e-05, "loss": 2.4731, "step": 382 }, { "epoch": 0.42, "learning_rate": 9.779249314110909e-05, "loss": 2.5427, "step": 384 }, { "epoch": 0.42, "learning_rate": 9.776716469166384e-05, "loss": 2.6534, "step": 386 }, { "epoch": 0.42, "learning_rate": 9.774169508134715e-05, "loss": 2.5991, "step": 388 }, { "epoch": 0.42, "learning_rate": 9.771608438542639e-05, "loss": 2.4887, "step": 390 }, { "epoch": 0.42, "learning_rate": 9.769033267958598e-05, "loss": 2.5762, "step": 392 }, { "epoch": 0.43, "learning_rate": 9.766444003992703e-05, "loss": 2.3775, "step": 394 }, { "epoch": 0.43, "learning_rate": 9.763840654296706e-05, "loss": 2.4067, "step": 396 }, { "epoch": 0.43, "learning_rate": 9.761223226563996e-05, "loss": 2.3338, "step": 398 }, { "epoch": 0.43, "learning_rate": 9.758591728529555e-05, "loss": 2.4981, "step": 400 }, { "epoch": 0.44, "learning_rate": 9.755946167969952e-05, "loss": 2.402, "step": 402 }, { "epoch": 0.44, "learning_rate": 9.753286552703312e-05, "loss": 2.7678, "step": 404 }, { "epoch": 0.44, "learning_rate": 9.750612890589293e-05, "loss": 2.5216, "step": 406 }, { "epoch": 0.44, "learning_rate": 9.747925189529063e-05, "loss": 2.3811, "step": 408 }, { "epoch": 0.44, "learning_rate": 9.745223457465282e-05, "loss": 2.4442, "step": 410 }, { "epoch": 0.45, "learning_rate": 9.742507702382071e-05, "loss": 2.4474, "step": 412 }, { "epoch": 0.45, "learning_rate": 9.739777932304992e-05, "loss": 2.5238, "step": 414 }, { "epoch": 0.45, "learning_rate": 9.737034155301024e-05, "loss": 2.4573, "step": 416 }, { "epoch": 0.45, "learning_rate": 9.734276379478538e-05, "loss": 2.5096, "step": 418 }, { "epoch": 0.45, "learning_rate": 9.731504612987279e-05, "loss": 2.5997, "step": 420 }, { "epoch": 0.46, "learning_rate": 9.728718864018329e-05, "loss": 2.3851, "step": 422 }, { "epoch": 0.46, "learning_rate": 9.725919140804099e-05, "loss": 2.5155, "step": 424 }, { "epoch": 0.46, "learning_rate": 9.72310545161829e-05, "loss": 2.614, "step": 426 }, { "epoch": 0.46, "learning_rate": 9.72027780477588e-05, "loss": 2.5027, "step": 428 }, { "epoch": 0.47, "learning_rate": 9.717436208633088e-05, "loss": 2.4011, "step": 430 }, { "epoch": 0.47, "learning_rate": 9.714580671587366e-05, "loss": 2.5327, "step": 432 }, { "epoch": 0.47, "learning_rate": 9.711711202077354e-05, "loss": 2.3772, "step": 434 }, { "epoch": 0.47, "learning_rate": 9.708827808582871e-05, "loss": 2.5332, "step": 436 }, { "epoch": 0.47, "learning_rate": 9.70593049962488e-05, "loss": 2.4211, "step": 438 }, { "epoch": 0.48, "learning_rate": 9.703019283765471e-05, "loss": 2.5811, "step": 440 }, { "epoch": 0.48, "learning_rate": 9.700094169607828e-05, "loss": 2.5859, "step": 442 }, { "epoch": 0.48, "learning_rate": 9.697155165796209e-05, "loss": 2.7103, "step": 444 }, { "epoch": 0.48, "learning_rate": 9.694202281015918e-05, "loss": 2.5435, "step": 446 }, { "epoch": 0.49, "learning_rate": 9.691235523993278e-05, "loss": 2.4327, "step": 448 }, { "epoch": 0.49, "learning_rate": 9.688254903495609e-05, "loss": 2.4772, "step": 450 }, { "epoch": 0.49, "learning_rate": 9.685260428331202e-05, "loss": 2.5445, "step": 452 }, { "epoch": 0.49, "learning_rate": 9.682252107349288e-05, "loss": 2.5334, "step": 454 }, { "epoch": 0.49, "learning_rate": 9.679229949440015e-05, "loss": 2.4343, "step": 456 }, { "epoch": 0.5, "learning_rate": 9.676193963534423e-05, "loss": 2.4341, "step": 458 }, { "epoch": 0.5, "learning_rate": 9.673144158604419e-05, "loss": 2.5318, "step": 460 }, { "epoch": 0.5, "learning_rate": 9.67008054366274e-05, "loss": 2.435, "step": 462 }, { "epoch": 0.5, "learning_rate": 9.667003127762942e-05, "loss": 2.6514, "step": 464 }, { "epoch": 0.5, "learning_rate": 9.663911919999362e-05, "loss": 2.4744, "step": 466 }, { "epoch": 0.51, "learning_rate": 9.660806929507095e-05, "loss": 2.5498, "step": 468 }, { "epoch": 0.51, "learning_rate": 9.657688165461965e-05, "loss": 2.6276, "step": 470 }, { "epoch": 0.51, "learning_rate": 9.654555637080502e-05, "loss": 2.6097, "step": 472 }, { "epoch": 0.51, "learning_rate": 9.65140935361991e-05, "loss": 2.3675, "step": 474 }, { "epoch": 0.52, "learning_rate": 9.648249324378044e-05, "loss": 2.469, "step": 476 }, { "epoch": 0.52, "learning_rate": 9.64507555869338e-05, "loss": 2.5554, "step": 478 }, { "epoch": 0.52, "learning_rate": 9.641888065944984e-05, "loss": 2.35, "step": 480 }, { "epoch": 0.52, "learning_rate": 9.638686855552494e-05, "loss": 2.495, "step": 482 }, { "epoch": 0.52, "learning_rate": 9.635471936976081e-05, "loss": 2.4547, "step": 484 }, { "epoch": 0.53, "learning_rate": 9.63224331971643e-05, "loss": 2.3311, "step": 486 }, { "epoch": 0.53, "learning_rate": 9.629001013314705e-05, "loss": 2.5145, "step": 488 }, { "epoch": 0.53, "learning_rate": 9.625745027352526e-05, "loss": 2.5413, "step": 490 }, { "epoch": 0.53, "learning_rate": 9.622475371451939e-05, "loss": 2.5209, "step": 492 }, { "epoch": 0.53, "learning_rate": 9.619192055275386e-05, "loss": 2.4376, "step": 494 }, { "epoch": 0.54, "learning_rate": 9.615895088525677e-05, "loss": 2.455, "step": 496 }, { "epoch": 0.54, "learning_rate": 9.612584480945964e-05, "loss": 2.3267, "step": 498 }, { "epoch": 0.54, "learning_rate": 9.609260242319709e-05, "loss": 2.6783, "step": 500 }, { "epoch": 0.54, "learning_rate": 9.605922382470658e-05, "loss": 2.4699, "step": 502 }, { "epoch": 0.55, "learning_rate": 9.602570911262805e-05, "loss": 2.4731, "step": 504 }, { "epoch": 0.55, "learning_rate": 9.599205838600375e-05, "loss": 2.4779, "step": 506 }, { "epoch": 0.55, "learning_rate": 9.595827174427786e-05, "loss": 2.6002, "step": 508 }, { "epoch": 0.55, "learning_rate": 9.592434928729616e-05, "loss": 2.5211, "step": 510 }, { "epoch": 0.55, "learning_rate": 9.589029111530586e-05, "loss": 2.5308, "step": 512 }, { "epoch": 0.56, "learning_rate": 9.585609732895517e-05, "loss": 2.5043, "step": 514 }, { "epoch": 0.56, "learning_rate": 9.582176802929315e-05, "loss": 2.4683, "step": 516 }, { "epoch": 0.56, "learning_rate": 9.578730331776924e-05, "loss": 2.4226, "step": 518 }, { "epoch": 0.56, "learning_rate": 9.575270329623309e-05, "loss": 2.3188, "step": 520 }, { "epoch": 0.57, "learning_rate": 9.571796806693422e-05, "loss": 2.6134, "step": 522 }, { "epoch": 0.57, "learning_rate": 9.568309773252171e-05, "loss": 2.3866, "step": 524 }, { "epoch": 0.57, "learning_rate": 9.564809239604388e-05, "loss": 2.6209, "step": 526 }, { "epoch": 0.57, "learning_rate": 9.5612952160948e-05, "loss": 2.5562, "step": 528 }, { "epoch": 0.57, "learning_rate": 9.557767713108009e-05, "loss": 2.6116, "step": 530 }, { "epoch": 0.58, "learning_rate": 9.554226741068432e-05, "loss": 2.5081, "step": 532 }, { "epoch": 0.58, "learning_rate": 9.550672310440311e-05, "loss": 2.7225, "step": 534 }, { "epoch": 0.58, "learning_rate": 9.547104431727647e-05, "loss": 2.6353, "step": 536 }, { "epoch": 0.58, "learning_rate": 9.543523115474187e-05, "loss": 2.5658, "step": 538 }, { "epoch": 0.58, "learning_rate": 9.539928372263387e-05, "loss": 2.5224, "step": 540 }, { "epoch": 0.59, "learning_rate": 9.536320212718382e-05, "loss": 2.702, "step": 542 }, { "epoch": 0.59, "learning_rate": 9.532698647501958e-05, "loss": 2.4106, "step": 544 }, { "epoch": 0.59, "learning_rate": 9.529063687316513e-05, "loss": 2.5009, "step": 546 }, { "epoch": 0.59, "learning_rate": 9.525415342904034e-05, "loss": 2.4723, "step": 548 }, { "epoch": 0.6, "learning_rate": 9.521753625046056e-05, "loss": 2.3118, "step": 550 }, { "epoch": 0.6, "learning_rate": 9.51807854456364e-05, "loss": 2.6302, "step": 552 }, { "epoch": 0.6, "learning_rate": 9.51439011231733e-05, "loss": 2.3981, "step": 554 }, { "epoch": 0.6, "learning_rate": 9.510688339207133e-05, "loss": 2.4194, "step": 556 }, { "epoch": 0.6, "learning_rate": 9.506973236172478e-05, "loss": 2.5114, "step": 558 }, { "epoch": 0.61, "learning_rate": 9.503244814192187e-05, "loss": 2.4885, "step": 560 }, { "epoch": 0.61, "learning_rate": 9.499503084284441e-05, "loss": 2.4262, "step": 562 }, { "epoch": 0.61, "learning_rate": 9.49574805750675e-05, "loss": 2.2484, "step": 564 }, { "epoch": 0.61, "learning_rate": 9.491979744955915e-05, "loss": 2.3817, "step": 566 }, { "epoch": 0.62, "learning_rate": 9.488198157768005e-05, "loss": 2.455, "step": 568 }, { "epoch": 0.62, "learning_rate": 9.484403307118312e-05, "loss": 2.3778, "step": 570 }, { "epoch": 0.62, "learning_rate": 9.480595204221329e-05, "loss": 2.5499, "step": 572 }, { "epoch": 0.62, "learning_rate": 9.47677386033071e-05, "loss": 2.5838, "step": 574 }, { "epoch": 0.62, "learning_rate": 9.472939286739235e-05, "loss": 2.5077, "step": 576 }, { "epoch": 0.63, "learning_rate": 9.469091494778785e-05, "loss": 2.6054, "step": 578 }, { "epoch": 0.63, "learning_rate": 9.465230495820303e-05, "loss": 2.4767, "step": 580 }, { "epoch": 0.63, "learning_rate": 9.461356301273758e-05, "loss": 2.6251, "step": 582 }, { "epoch": 0.63, "learning_rate": 9.45746892258812e-05, "loss": 2.4725, "step": 584 }, { "epoch": 0.63, "learning_rate": 9.453568371251316e-05, "loss": 2.5408, "step": 586 }, { "epoch": 0.64, "learning_rate": 9.4496546587902e-05, "loss": 2.5397, "step": 588 }, { "epoch": 0.64, "learning_rate": 9.445727796770524e-05, "loss": 2.666, "step": 590 }, { "epoch": 0.64, "learning_rate": 9.441787796796896e-05, "loss": 2.3805, "step": 592 }, { "epoch": 0.64, "learning_rate": 9.43783467051275e-05, "loss": 2.57, "step": 594 }, { "epoch": 0.65, "learning_rate": 9.43386842960031e-05, "loss": 2.6776, "step": 596 }, { "epoch": 0.65, "learning_rate": 9.429889085780557e-05, "loss": 2.447, "step": 598 }, { "epoch": 0.65, "learning_rate": 9.425896650813196e-05, "loss": 2.6253, "step": 600 }, { "epoch": 0.65, "learning_rate": 9.421891136496612e-05, "loss": 2.3422, "step": 602 }, { "epoch": 0.65, "learning_rate": 9.41787255466785e-05, "loss": 2.3565, "step": 604 }, { "epoch": 0.66, "learning_rate": 9.413840917202566e-05, "loss": 2.4135, "step": 606 }, { "epoch": 0.66, "learning_rate": 9.409796236014999e-05, "loss": 2.6778, "step": 608 }, { "epoch": 0.66, "learning_rate": 9.405738523057938e-05, "loss": 2.4313, "step": 610 }, { "epoch": 0.66, "learning_rate": 9.401667790322679e-05, "loss": 2.4427, "step": 612 }, { "epoch": 0.66, "learning_rate": 9.397584049838996e-05, "loss": 2.6661, "step": 614 }, { "epoch": 0.67, "learning_rate": 9.393487313675102e-05, "loss": 2.4825, "step": 616 }, { "epoch": 0.67, "learning_rate": 9.389377593937618e-05, "loss": 2.5834, "step": 618 }, { "epoch": 0.67, "learning_rate": 9.38525490277153e-05, "loss": 2.4413, "step": 620 }, { "epoch": 0.67, "learning_rate": 9.38111925236016e-05, "loss": 2.5265, "step": 622 }, { "epoch": 0.68, "learning_rate": 9.376970654925124e-05, "loss": 2.5181, "step": 624 }, { "epoch": 0.68, "learning_rate": 9.372809122726299e-05, "loss": 2.6319, "step": 626 }, { "epoch": 0.68, "learning_rate": 9.368634668061791e-05, "loss": 2.7302, "step": 628 }, { "epoch": 0.68, "learning_rate": 9.364447303267889e-05, "loss": 2.5624, "step": 630 }, { "epoch": 0.68, "learning_rate": 9.360247040719039e-05, "loss": 2.4739, "step": 632 }, { "epoch": 0.69, "learning_rate": 9.356033892827796e-05, "loss": 2.3922, "step": 634 }, { "epoch": 0.69, "learning_rate": 9.3518078720448e-05, "loss": 2.5975, "step": 636 }, { "epoch": 0.69, "learning_rate": 9.347568990858726e-05, "loss": 2.4533, "step": 638 }, { "epoch": 0.69, "learning_rate": 9.343317261796262e-05, "loss": 2.4955, "step": 640 }, { "epoch": 0.7, "learning_rate": 9.339052697422057e-05, "loss": 2.4098, "step": 642 }, { "epoch": 0.7, "learning_rate": 9.334775310338694e-05, "loss": 2.705, "step": 644 }, { "epoch": 0.7, "learning_rate": 9.330485113186648e-05, "loss": 2.4335, "step": 646 }, { "epoch": 0.7, "learning_rate": 9.326182118644254e-05, "loss": 2.6452, "step": 648 }, { "epoch": 0.7, "learning_rate": 9.321866339427658e-05, "loss": 2.4124, "step": 650 }, { "epoch": 0.71, "learning_rate": 9.317537788290794e-05, "loss": 2.3303, "step": 652 }, { "epoch": 0.71, "learning_rate": 9.313196478025337e-05, "loss": 2.4199, "step": 654 }, { "epoch": 0.71, "learning_rate": 9.308842421460667e-05, "loss": 2.4577, "step": 656 }, { "epoch": 0.71, "learning_rate": 9.304475631463834e-05, "loss": 2.5357, "step": 658 }, { "epoch": 0.71, "learning_rate": 9.300096120939516e-05, "loss": 2.418, "step": 660 }, { "epoch": 0.72, "learning_rate": 9.29570390282998e-05, "loss": 2.4678, "step": 662 }, { "epoch": 0.72, "learning_rate": 9.29129899011505e-05, "loss": 2.3826, "step": 664 }, { "epoch": 0.72, "learning_rate": 9.286881395812066e-05, "loss": 2.4426, "step": 666 }, { "epoch": 0.72, "learning_rate": 9.28245113297584e-05, "loss": 2.3528, "step": 668 }, { "epoch": 0.73, "learning_rate": 9.278008214698624e-05, "loss": 2.4743, "step": 670 }, { "epoch": 0.73, "learning_rate": 9.27355265411007e-05, "loss": 2.3887, "step": 672 }, { "epoch": 0.73, "learning_rate": 9.26908446437719e-05, "loss": 2.468, "step": 674 }, { "epoch": 0.73, "learning_rate": 9.264603658704318e-05, "loss": 2.352, "step": 676 }, { "epoch": 0.73, "learning_rate": 9.260110250333066e-05, "loss": 2.4429, "step": 678 }, { "epoch": 0.74, "learning_rate": 9.255604252542296e-05, "loss": 2.3712, "step": 680 }, { "epoch": 0.74, "learning_rate": 9.251085678648072e-05, "loss": 2.5003, "step": 682 }, { "epoch": 0.74, "learning_rate": 9.246554542003618e-05, "loss": 2.5098, "step": 684 }, { "epoch": 0.74, "learning_rate": 9.24201085599929e-05, "loss": 2.6184, "step": 686 }, { "epoch": 0.74, "learning_rate": 9.237454634062525e-05, "loss": 2.4172, "step": 688 }, { "epoch": 0.75, "learning_rate": 9.23288588965781e-05, "loss": 2.4234, "step": 690 }, { "epoch": 0.75, "learning_rate": 9.228304636286633e-05, "loss": 2.6617, "step": 692 }, { "epoch": 0.75, "learning_rate": 9.223710887487453e-05, "loss": 2.422, "step": 694 }, { "epoch": 0.75, "learning_rate": 9.219104656835654e-05, "loss": 2.6878, "step": 696 }, { "epoch": 0.76, "learning_rate": 9.214485957943503e-05, "loss": 2.6575, "step": 698 }, { "epoch": 0.76, "learning_rate": 9.209854804460121e-05, "loss": 2.369, "step": 700 }, { "epoch": 0.76, "learning_rate": 9.205211210071426e-05, "loss": 2.5432, "step": 702 }, { "epoch": 0.76, "learning_rate": 9.200555188500103e-05, "loss": 2.5313, "step": 704 }, { "epoch": 0.76, "learning_rate": 9.195886753505565e-05, "loss": 2.3887, "step": 706 }, { "epoch": 0.77, "learning_rate": 9.191205918883909e-05, "loss": 2.6655, "step": 708 }, { "epoch": 0.77, "learning_rate": 9.18651269846787e-05, "loss": 2.5833, "step": 710 }, { "epoch": 0.77, "learning_rate": 9.181807106126792e-05, "loss": 2.6638, "step": 712 }, { "epoch": 0.77, "learning_rate": 9.177089155766574e-05, "loss": 2.4395, "step": 714 }, { "epoch": 0.78, "learning_rate": 9.172358861329641e-05, "loss": 2.5247, "step": 716 }, { "epoch": 0.78, "learning_rate": 9.167616236794894e-05, "loss": 2.4711, "step": 718 }, { "epoch": 0.78, "learning_rate": 9.162861296177671e-05, "loss": 2.4537, "step": 720 }, { "epoch": 0.78, "learning_rate": 9.158094053529709e-05, "loss": 2.4404, "step": 722 }, { "epoch": 0.78, "learning_rate": 9.153314522939096e-05, "loss": 2.4599, "step": 724 }, { "epoch": 0.79, "learning_rate": 9.148522718530236e-05, "loss": 2.5289, "step": 726 }, { "epoch": 0.79, "learning_rate": 9.143718654463804e-05, "loss": 2.2966, "step": 728 }, { "epoch": 0.79, "learning_rate": 9.138902344936706e-05, "loss": 2.4635, "step": 730 }, { "epoch": 0.79, "learning_rate": 9.134073804182033e-05, "loss": 2.6182, "step": 732 }, { "epoch": 0.79, "learning_rate": 9.129233046469022e-05, "loss": 2.6568, "step": 734 }, { "epoch": 0.8, "learning_rate": 9.124380086103013e-05, "loss": 2.5841, "step": 736 }, { "epoch": 0.8, "learning_rate": 9.11951493742541e-05, "loss": 2.609, "step": 738 }, { "epoch": 0.8, "learning_rate": 9.114637614813634e-05, "loss": 2.3299, "step": 740 }, { "epoch": 0.8, "learning_rate": 9.109748132681082e-05, "loss": 2.5093, "step": 742 }, { "epoch": 0.81, "learning_rate": 9.104846505477083e-05, "loss": 2.4223, "step": 744 }, { "epoch": 0.81, "learning_rate": 9.09993274768686e-05, "loss": 2.4636, "step": 746 }, { "epoch": 0.81, "learning_rate": 9.095006873831479e-05, "loss": 2.3136, "step": 748 }, { "epoch": 0.81, "learning_rate": 9.090068898467823e-05, "loss": 2.557, "step": 750 }, { "epoch": 0.81, "learning_rate": 9.085118836188521e-05, "loss": 2.4634, "step": 752 }, { "epoch": 0.82, "learning_rate": 9.080156701621936e-05, "loss": 2.5238, "step": 754 }, { "epoch": 0.82, "learning_rate": 9.075182509432095e-05, "loss": 2.4833, "step": 756 }, { "epoch": 0.82, "learning_rate": 9.070196274318666e-05, "loss": 2.6603, "step": 758 }, { "epoch": 0.82, "learning_rate": 9.0651980110169e-05, "loss": 2.4763, "step": 760 }, { "epoch": 0.83, "learning_rate": 9.060187734297599e-05, "loss": 2.4662, "step": 762 }, { "epoch": 0.83, "learning_rate": 9.055165458967063e-05, "loss": 2.4409, "step": 764 }, { "epoch": 0.83, "learning_rate": 9.050131199867052e-05, "loss": 2.5474, "step": 766 }, { "epoch": 0.83, "learning_rate": 9.045084971874738e-05, "loss": 2.5071, "step": 768 }, { "epoch": 0.83, "learning_rate": 9.040026789902665e-05, "loss": 2.4774, "step": 770 }, { "epoch": 0.84, "learning_rate": 9.034956668898706e-05, "loss": 2.4119, "step": 772 }, { "epoch": 0.84, "learning_rate": 9.029874623846011e-05, "loss": 2.4335, "step": 774 }, { "epoch": 0.84, "learning_rate": 9.02478066976297e-05, "loss": 2.3666, "step": 776 }, { "epoch": 0.84, "learning_rate": 9.019674821703166e-05, "loss": 2.5817, "step": 778 }, { "epoch": 0.84, "learning_rate": 9.014557094755331e-05, "loss": 2.2798, "step": 780 }, { "epoch": 0.85, "learning_rate": 9.009427504043305e-05, "loss": 2.5065, "step": 782 }, { "epoch": 0.85, "learning_rate": 9.004286064725982e-05, "loss": 2.4121, "step": 784 }, { "epoch": 0.85, "learning_rate": 8.999132791997271e-05, "loss": 2.5618, "step": 786 }, { "epoch": 0.85, "learning_rate": 8.993967701086057e-05, "loss": 2.7772, "step": 788 }, { "epoch": 0.86, "learning_rate": 8.988790807256143e-05, "loss": 2.3717, "step": 790 }, { "epoch": 0.86, "learning_rate": 8.983602125806216e-05, "loss": 2.5273, "step": 792 }, { "epoch": 0.86, "learning_rate": 8.978401672069797e-05, "loss": 2.5246, "step": 794 }, { "epoch": 0.86, "learning_rate": 8.973189461415194e-05, "loss": 2.5115, "step": 796 }, { "epoch": 0.86, "learning_rate": 8.967965509245461e-05, "loss": 2.583, "step": 798 }, { "epoch": 0.87, "learning_rate": 8.962729830998353e-05, "loss": 2.4989, "step": 800 }, { "epoch": 0.87, "learning_rate": 8.957482442146272e-05, "loss": 2.4011, "step": 802 }, { "epoch": 0.87, "learning_rate": 8.952223358196227e-05, "loss": 2.424, "step": 804 }, { "epoch": 0.87, "learning_rate": 8.946952594689797e-05, "loss": 2.5144, "step": 806 }, { "epoch": 0.87, "learning_rate": 8.941670167203067e-05, "loss": 2.4956, "step": 808 }, { "epoch": 0.88, "learning_rate": 8.936376091346595e-05, "loss": 2.5917, "step": 810 }, { "epoch": 0.88, "learning_rate": 8.931070382765359e-05, "loss": 2.3386, "step": 812 }, { "epoch": 0.88, "learning_rate": 8.925753057138719e-05, "loss": 2.4911, "step": 814 }, { "epoch": 0.88, "learning_rate": 8.920424130180363e-05, "loss": 2.5727, "step": 816 }, { "epoch": 0.89, "learning_rate": 8.915083617638262e-05, "loss": 2.4148, "step": 818 }, { "epoch": 0.89, "learning_rate": 8.909731535294628e-05, "loss": 2.4859, "step": 820 }, { "epoch": 0.89, "learning_rate": 8.904367898965857e-05, "loss": 2.4704, "step": 822 }, { "epoch": 0.89, "learning_rate": 8.898992724502498e-05, "loss": 2.5904, "step": 824 }, { "epoch": 0.89, "learning_rate": 8.893606027789192e-05, "loss": 2.5586, "step": 826 }, { "epoch": 0.9, "learning_rate": 8.888207824744629e-05, "loss": 2.4723, "step": 828 }, { "epoch": 0.9, "learning_rate": 8.882798131321508e-05, "loss": 2.5011, "step": 830 }, { "epoch": 0.9, "learning_rate": 8.877376963506477e-05, "loss": 2.6237, "step": 832 }, { "epoch": 0.9, "learning_rate": 8.871944337320102e-05, "loss": 2.548, "step": 834 }, { "epoch": 0.91, "learning_rate": 8.866500268816803e-05, "loss": 2.3527, "step": 836 }, { "epoch": 0.91, "learning_rate": 8.861044774084815e-05, "loss": 2.6638, "step": 838 }, { "epoch": 0.91, "learning_rate": 8.855577869246142e-05, "loss": 2.4873, "step": 840 }, { "epoch": 0.91, "learning_rate": 8.850099570456509e-05, "loss": 2.4461, "step": 842 }, { "epoch": 0.91, "learning_rate": 8.844609893905309e-05, "loss": 2.4031, "step": 844 }, { "epoch": 0.92, "learning_rate": 8.839108855815557e-05, "loss": 2.5516, "step": 846 }, { "epoch": 0.92, "learning_rate": 8.833596472443848e-05, "loss": 2.4283, "step": 848 }, { "epoch": 0.92, "learning_rate": 8.828072760080299e-05, "loss": 2.2932, "step": 850 }, { "epoch": 0.92, "learning_rate": 8.822537735048512e-05, "loss": 2.3761, "step": 852 }, { "epoch": 0.92, "learning_rate": 8.816991413705516e-05, "loss": 2.4804, "step": 854 }, { "epoch": 0.93, "learning_rate": 8.811433812441722e-05, "loss": 2.6496, "step": 856 }, { "epoch": 0.93, "learning_rate": 8.80586494768088e-05, "loss": 2.4868, "step": 858 }, { "epoch": 0.93, "learning_rate": 8.800284835880024e-05, "loss": 2.679, "step": 860 }, { "epoch": 0.93, "learning_rate": 8.79469349352942e-05, "loss": 2.6229, "step": 862 }, { "epoch": 0.94, "learning_rate": 8.78909093715253e-05, "loss": 2.5438, "step": 864 }, { "epoch": 0.94, "learning_rate": 8.783477183305949e-05, "loss": 2.4863, "step": 866 }, { "epoch": 0.94, "learning_rate": 8.777852248579367e-05, "loss": 2.5205, "step": 868 }, { "epoch": 0.94, "learning_rate": 8.772216149595513e-05, "loss": 2.4179, "step": 870 }, { "epoch": 0.94, "learning_rate": 8.766568903010113e-05, "loss": 2.4653, "step": 872 }, { "epoch": 0.95, "learning_rate": 8.76091052551183e-05, "loss": 2.3727, "step": 874 }, { "epoch": 0.95, "learning_rate": 8.755241033822224e-05, "loss": 2.5503, "step": 876 }, { "epoch": 0.95, "learning_rate": 8.7495604446957e-05, "loss": 2.4288, "step": 878 }, { "epoch": 0.95, "learning_rate": 8.743868774919458e-05, "loss": 2.451, "step": 880 }, { "epoch": 0.96, "learning_rate": 8.738166041313439e-05, "loss": 2.3869, "step": 882 }, { "epoch": 0.96, "learning_rate": 8.732452260730286e-05, "loss": 2.5419, "step": 884 }, { "epoch": 0.96, "learning_rate": 8.726727450055287e-05, "loss": 2.3962, "step": 886 }, { "epoch": 0.96, "learning_rate": 8.720991626206321e-05, "loss": 2.4672, "step": 888 }, { "epoch": 0.96, "learning_rate": 8.715244806133816e-05, "loss": 2.4988, "step": 890 }, { "epoch": 0.97, "learning_rate": 8.7094870068207e-05, "loss": 2.2557, "step": 892 }, { "epoch": 0.97, "learning_rate": 8.703718245282337e-05, "loss": 2.5007, "step": 894 }, { "epoch": 0.97, "learning_rate": 8.697938538566499e-05, "loss": 2.4908, "step": 896 }, { "epoch": 0.97, "learning_rate": 8.69214790375329e-05, "loss": 2.4346, "step": 898 }, { "epoch": 0.97, "learning_rate": 8.686346357955117e-05, "loss": 2.2897, "step": 900 }, { "epoch": 0.98, "learning_rate": 8.68053391831663e-05, "loss": 2.3337, "step": 902 }, { "epoch": 0.98, "learning_rate": 8.674710602014671e-05, "loss": 2.4618, "step": 904 }, { "epoch": 0.98, "learning_rate": 8.668876426258221e-05, "loss": 2.5041, "step": 906 }, { "epoch": 0.98, "learning_rate": 8.66303140828836e-05, "loss": 2.4207, "step": 908 }, { "epoch": 0.99, "learning_rate": 8.657175565378206e-05, "loss": 2.4657, "step": 910 }, { "epoch": 0.99, "learning_rate": 8.651308914832862e-05, "loss": 2.5422, "step": 912 }, { "epoch": 0.99, "learning_rate": 8.645431473989376e-05, "loss": 2.4069, "step": 914 }, { "epoch": 0.99, "learning_rate": 8.63954326021668e-05, "loss": 2.6141, "step": 916 }, { "epoch": 0.99, "learning_rate": 8.633644290915545e-05, "loss": 2.7452, "step": 918 }, { "epoch": 1.0, "learning_rate": 8.627734583518521e-05, "loss": 2.4625, "step": 920 }, { "epoch": 1.0, "learning_rate": 8.621814155489895e-05, "loss": 2.3913, "step": 922 }, { "epoch": 1.0, "learning_rate": 8.615883024325636e-05, "loss": 3.1472, "step": 924 }, { "epoch": 1.0, "learning_rate": 8.609941207553342e-05, "loss": 2.4791, "step": 926 }, { "epoch": 1.01, "learning_rate": 8.603988722732186e-05, "loss": 2.4555, "step": 928 }, { "epoch": 1.01, "learning_rate": 8.598025587452873e-05, "loss": 2.5092, "step": 930 }, { "epoch": 1.01, "learning_rate": 8.592051819337579e-05, "loss": 2.5088, "step": 932 }, { "epoch": 1.01, "learning_rate": 8.586067436039899e-05, "loss": 2.5663, "step": 934 }, { "epoch": 1.01, "learning_rate": 8.580072455244801e-05, "loss": 2.5562, "step": 936 }, { "epoch": 1.02, "learning_rate": 8.574066894668573e-05, "loss": 2.4265, "step": 938 }, { "epoch": 1.02, "learning_rate": 8.568050772058762e-05, "loss": 2.473, "step": 940 }, { "epoch": 1.02, "learning_rate": 8.562024105194133e-05, "loss": 2.5223, "step": 942 }, { "epoch": 1.02, "learning_rate": 8.555986911884609e-05, "loss": 2.3263, "step": 944 }, { "epoch": 1.02, "learning_rate": 8.549939209971221e-05, "loss": 2.2938, "step": 946 }, { "epoch": 1.03, "learning_rate": 8.543881017326057e-05, "loss": 2.321, "step": 948 }, { "epoch": 1.03, "learning_rate": 8.537812351852201e-05, "loss": 2.4323, "step": 950 }, { "epoch": 1.03, "learning_rate": 8.531733231483694e-05, "loss": 2.365, "step": 952 }, { "epoch": 1.03, "learning_rate": 8.525643674185466e-05, "loss": 2.4085, "step": 954 }, { "epoch": 1.04, "learning_rate": 8.519543697953296e-05, "loss": 2.4288, "step": 956 }, { "epoch": 1.04, "learning_rate": 8.51343332081375e-05, "loss": 2.6551, "step": 958 }, { "epoch": 1.04, "learning_rate": 8.50731256082413e-05, "loss": 2.4887, "step": 960 }, { "epoch": 1.04, "learning_rate": 8.501181436072422e-05, "loss": 2.6168, "step": 962 }, { "epoch": 1.04, "learning_rate": 8.495039964677241e-05, "loss": 2.4247, "step": 964 }, { "epoch": 1.05, "learning_rate": 8.488888164787782e-05, "loss": 2.5132, "step": 966 }, { "epoch": 1.05, "learning_rate": 8.482726054583761e-05, "loss": 2.5011, "step": 968 }, { "epoch": 1.05, "learning_rate": 8.476553652275356e-05, "loss": 2.4964, "step": 970 }, { "epoch": 1.05, "learning_rate": 8.47037097610317e-05, "loss": 2.3202, "step": 972 }, { "epoch": 1.06, "learning_rate": 8.464178044338162e-05, "loss": 2.2058, "step": 974 }, { "epoch": 1.06, "learning_rate": 8.4579748752816e-05, "loss": 2.585, "step": 976 }, { "epoch": 1.06, "learning_rate": 8.451761487265003e-05, "loss": 2.3743, "step": 978 }, { "epoch": 1.06, "learning_rate": 8.44553789865009e-05, "loss": 2.2927, "step": 980 }, { "epoch": 1.06, "learning_rate": 8.439304127828728e-05, "loss": 2.3899, "step": 982 }, { "epoch": 1.07, "learning_rate": 8.433060193222868e-05, "loss": 2.462, "step": 984 }, { "epoch": 1.07, "learning_rate": 8.426806113284502e-05, "loss": 2.4369, "step": 986 }, { "epoch": 1.07, "learning_rate": 8.420541906495599e-05, "loss": 2.4967, "step": 988 }, { "epoch": 1.07, "learning_rate": 8.414267591368058e-05, "loss": 2.6217, "step": 990 }, { "epoch": 1.07, "learning_rate": 8.407983186443653e-05, "loss": 2.6545, "step": 992 }, { "epoch": 1.08, "learning_rate": 8.401688710293967e-05, "loss": 2.4993, "step": 994 }, { "epoch": 1.08, "learning_rate": 8.395384181520351e-05, "loss": 2.3227, "step": 996 }, { "epoch": 1.08, "learning_rate": 8.389069618753865e-05, "loss": 2.325, "step": 998 }, { "epoch": 1.08, "learning_rate": 8.382745040655212e-05, "loss": 2.6491, "step": 1000 }, { "epoch": 1.09, "learning_rate": 8.376410465914705e-05, "loss": 2.4874, "step": 1002 }, { "epoch": 1.09, "learning_rate": 8.370065913252188e-05, "loss": 2.505, "step": 1004 }, { "epoch": 1.09, "learning_rate": 8.363711401417e-05, "loss": 2.4867, "step": 1006 }, { "epoch": 1.09, "learning_rate": 8.357346949187906e-05, "loss": 2.2378, "step": 1008 }, { "epoch": 1.09, "learning_rate": 8.350972575373047e-05, "loss": 2.372, "step": 1010 }, { "epoch": 1.1, "learning_rate": 8.344588298809887e-05, "loss": 2.3432, "step": 1012 }, { "epoch": 1.1, "learning_rate": 8.338194138365151e-05, "loss": 2.6878, "step": 1014 }, { "epoch": 1.1, "learning_rate": 8.331790112934777e-05, "loss": 2.4083, "step": 1016 }, { "epoch": 1.1, "learning_rate": 8.325376241443849e-05, "loss": 2.4451, "step": 1018 }, { "epoch": 1.11, "learning_rate": 8.318952542846557e-05, "loss": 2.3759, "step": 1020 }, { "epoch": 1.11, "learning_rate": 8.312519036126125e-05, "loss": 2.5355, "step": 1022 }, { "epoch": 1.11, "learning_rate": 8.306075740294763e-05, "loss": 2.4161, "step": 1024 }, { "epoch": 1.11, "learning_rate": 8.299622674393614e-05, "loss": 2.3455, "step": 1026 }, { "epoch": 1.11, "learning_rate": 8.293159857492686e-05, "loss": 2.469, "step": 1028 }, { "epoch": 1.12, "learning_rate": 8.28668730869081e-05, "loss": 2.3113, "step": 1030 }, { "epoch": 1.12, "learning_rate": 8.280205047115572e-05, "loss": 2.4072, "step": 1032 }, { "epoch": 1.12, "learning_rate": 8.273713091923264e-05, "loss": 2.5218, "step": 1034 }, { "epoch": 1.12, "learning_rate": 8.267211462298822e-05, "loss": 2.374, "step": 1036 }, { "epoch": 1.12, "learning_rate": 8.260700177455773e-05, "loss": 2.452, "step": 1038 }, { "epoch": 1.13, "learning_rate": 8.254179256636179e-05, "loss": 2.4523, "step": 1040 }, { "epoch": 1.13, "learning_rate": 8.247648719110572e-05, "loss": 2.5231, "step": 1042 }, { "epoch": 1.13, "learning_rate": 8.241108584177911e-05, "loss": 2.5678, "step": 1044 }, { "epoch": 1.13, "learning_rate": 8.234558871165512e-05, "loss": 2.449, "step": 1046 }, { "epoch": 1.14, "learning_rate": 8.227999599428995e-05, "loss": 2.3786, "step": 1048 }, { "epoch": 1.14, "learning_rate": 8.221430788352233e-05, "loss": 2.3994, "step": 1050 }, { "epoch": 1.14, "learning_rate": 8.214852457347286e-05, "loss": 2.5034, "step": 1052 }, { "epoch": 1.14, "learning_rate": 8.208264625854347e-05, "loss": 2.2819, "step": 1054 }, { "epoch": 1.14, "learning_rate": 8.201667313341685e-05, "loss": 2.4361, "step": 1056 }, { "epoch": 1.15, "learning_rate": 8.19506053930559e-05, "loss": 2.3855, "step": 1058 }, { "epoch": 1.15, "learning_rate": 8.18844432327031e-05, "loss": 2.4898, "step": 1060 }, { "epoch": 1.15, "learning_rate": 8.181818684787992e-05, "loss": 2.5017, "step": 1062 }, { "epoch": 1.15, "learning_rate": 8.175183643438635e-05, "loss": 2.402, "step": 1064 }, { "epoch": 1.15, "learning_rate": 8.168539218830024e-05, "loss": 2.3225, "step": 1066 }, { "epoch": 1.16, "learning_rate": 8.16188543059767e-05, "loss": 2.3171, "step": 1068 }, { "epoch": 1.16, "learning_rate": 8.155222298404756e-05, "loss": 2.5654, "step": 1070 }, { "epoch": 1.16, "learning_rate": 8.148549841942082e-05, "loss": 2.3448, "step": 1072 }, { "epoch": 1.16, "learning_rate": 8.141868080927996e-05, "loss": 2.2422, "step": 1074 }, { "epoch": 1.17, "learning_rate": 8.135177035108352e-05, "loss": 2.6608, "step": 1076 }, { "epoch": 1.17, "learning_rate": 8.128476724256431e-05, "loss": 2.486, "step": 1078 }, { "epoch": 1.17, "learning_rate": 8.121767168172904e-05, "loss": 2.3347, "step": 1080 }, { "epoch": 1.17, "learning_rate": 8.115048386685757e-05, "loss": 2.4229, "step": 1082 }, { "epoch": 1.17, "learning_rate": 8.108320399650244e-05, "loss": 2.6345, "step": 1084 }, { "epoch": 1.18, "learning_rate": 8.101583226948819e-05, "loss": 2.5783, "step": 1086 }, { "epoch": 1.18, "learning_rate": 8.09483688849108e-05, "loss": 2.4985, "step": 1088 }, { "epoch": 1.18, "learning_rate": 8.088081404213718e-05, "loss": 2.3184, "step": 1090 }, { "epoch": 1.18, "learning_rate": 8.081316794080445e-05, "loss": 2.324, "step": 1092 }, { "epoch": 1.19, "learning_rate": 8.074543078081946e-05, "loss": 2.504, "step": 1094 }, { "epoch": 1.19, "learning_rate": 8.067760276235812e-05, "loss": 2.3798, "step": 1096 }, { "epoch": 1.19, "learning_rate": 8.060968408586489e-05, "loss": 2.4197, "step": 1098 }, { "epoch": 1.19, "learning_rate": 8.054167495205207e-05, "loss": 2.4555, "step": 1100 }, { "epoch": 1.19, "learning_rate": 8.047357556189936e-05, "loss": 2.6626, "step": 1102 }, { "epoch": 1.2, "learning_rate": 8.040538611665314e-05, "loss": 2.5664, "step": 1104 }, { "epoch": 1.2, "learning_rate": 8.033710681782592e-05, "loss": 2.4436, "step": 1106 }, { "epoch": 1.2, "learning_rate": 8.026873786719573e-05, "loss": 2.5044, "step": 1108 }, { "epoch": 1.2, "learning_rate": 8.02002794668056e-05, "loss": 2.488, "step": 1110 }, { "epoch": 1.2, "learning_rate": 8.013173181896283e-05, "loss": 2.4565, "step": 1112 }, { "epoch": 1.21, "learning_rate": 8.006309512623848e-05, "loss": 2.5484, "step": 1114 }, { "epoch": 1.21, "learning_rate": 7.99943695914668e-05, "loss": 2.4438, "step": 1116 }, { "epoch": 1.21, "learning_rate": 7.992555541774452e-05, "loss": 2.4668, "step": 1118 }, { "epoch": 1.21, "learning_rate": 7.985665280843035e-05, "loss": 2.5129, "step": 1120 }, { "epoch": 1.22, "learning_rate": 7.978766196714436e-05, "loss": 2.2599, "step": 1122 }, { "epoch": 1.22, "learning_rate": 7.97185830977673e-05, "loss": 2.4388, "step": 1124 }, { "epoch": 1.22, "learning_rate": 7.964941640444014e-05, "loss": 2.5566, "step": 1126 }, { "epoch": 1.22, "learning_rate": 7.958016209156331e-05, "loss": 2.3852, "step": 1128 }, { "epoch": 1.22, "learning_rate": 7.951082036379625e-05, "loss": 2.3447, "step": 1130 }, { "epoch": 1.23, "learning_rate": 7.944139142605665e-05, "loss": 2.471, "step": 1132 }, { "epoch": 1.23, "learning_rate": 7.937187548351996e-05, "loss": 2.4846, "step": 1134 }, { "epoch": 1.23, "learning_rate": 7.930227274161877e-05, "loss": 2.433, "step": 1136 }, { "epoch": 1.23, "learning_rate": 7.923258340604212e-05, "loss": 2.7046, "step": 1138 }, { "epoch": 1.23, "learning_rate": 7.916280768273498e-05, "loss": 2.2928, "step": 1140 }, { "epoch": 1.24, "learning_rate": 7.909294577789766e-05, "loss": 2.5962, "step": 1142 }, { "epoch": 1.24, "learning_rate": 7.902299789798505e-05, "loss": 2.4707, "step": 1144 }, { "epoch": 1.24, "learning_rate": 7.895296424970618e-05, "loss": 2.4212, "step": 1146 }, { "epoch": 1.24, "learning_rate": 7.888284504002352e-05, "loss": 2.5168, "step": 1148 }, { "epoch": 1.25, "learning_rate": 7.881264047615245e-05, "loss": 2.5038, "step": 1150 }, { "epoch": 1.25, "learning_rate": 7.874235076556046e-05, "loss": 2.2647, "step": 1152 }, { "epoch": 1.25, "learning_rate": 7.867197611596683e-05, "loss": 2.5225, "step": 1154 }, { "epoch": 1.25, "learning_rate": 7.860151673534168e-05, "loss": 2.3552, "step": 1156 }, { "epoch": 1.25, "learning_rate": 7.853097283190567e-05, "loss": 2.5299, "step": 1158 }, { "epoch": 1.26, "learning_rate": 7.846034461412912e-05, "loss": 2.476, "step": 1160 }, { "epoch": 1.26, "learning_rate": 7.838963229073162e-05, "loss": 2.3523, "step": 1162 }, { "epoch": 1.26, "learning_rate": 7.831883607068125e-05, "loss": 2.4746, "step": 1164 }, { "epoch": 1.26, "learning_rate": 7.824795616319402e-05, "loss": 2.4551, "step": 1166 }, { "epoch": 1.27, "learning_rate": 7.817699277773325e-05, "loss": 2.4863, "step": 1168 }, { "epoch": 1.27, "learning_rate": 7.810594612400898e-05, "loss": 2.5789, "step": 1170 }, { "epoch": 1.27, "learning_rate": 7.803481641197733e-05, "loss": 2.487, "step": 1172 }, { "epoch": 1.27, "learning_rate": 7.796360385183984e-05, "loss": 2.5997, "step": 1174 }, { "epoch": 1.27, "learning_rate": 7.789230865404287e-05, "loss": 2.3587, "step": 1176 }, { "epoch": 1.28, "learning_rate": 7.782093102927703e-05, "loss": 2.7109, "step": 1178 }, { "epoch": 1.28, "learning_rate": 7.77494711884765e-05, "loss": 2.5783, "step": 1180 }, { "epoch": 1.28, "learning_rate": 7.767792934281843e-05, "loss": 2.4947, "step": 1182 }, { "epoch": 1.28, "learning_rate": 7.76063057037223e-05, "loss": 2.3812, "step": 1184 }, { "epoch": 1.28, "learning_rate": 7.753460048284928e-05, "loss": 2.3337, "step": 1186 }, { "epoch": 1.29, "learning_rate": 7.74628138921017e-05, "loss": 2.5691, "step": 1188 }, { "epoch": 1.29, "learning_rate": 7.739094614362229e-05, "loss": 2.4811, "step": 1190 }, { "epoch": 1.29, "learning_rate": 7.731899744979364e-05, "loss": 2.618, "step": 1192 }, { "epoch": 1.29, "learning_rate": 7.724696802323755e-05, "loss": 2.2892, "step": 1194 }, { "epoch": 1.3, "learning_rate": 7.717485807681437e-05, "loss": 2.3032, "step": 1196 }, { "epoch": 1.3, "learning_rate": 7.710266782362247e-05, "loss": 2.4592, "step": 1198 }, { "epoch": 1.3, "learning_rate": 7.703039747699747e-05, "loss": 2.3496, "step": 1200 }, { "epoch": 1.3, "learning_rate": 7.695804725051172e-05, "loss": 2.423, "step": 1202 }, { "epoch": 1.3, "learning_rate": 7.68856173579736e-05, "loss": 2.4122, "step": 1204 }, { "epoch": 1.31, "learning_rate": 7.681310801342696e-05, "loss": 2.3985, "step": 1206 }, { "epoch": 1.31, "learning_rate": 7.674051943115042e-05, "loss": 2.2799, "step": 1208 }, { "epoch": 1.31, "learning_rate": 7.666785182565677e-05, "loss": 2.3947, "step": 1210 }, { "epoch": 1.31, "learning_rate": 7.65951054116923e-05, "loss": 2.3299, "step": 1212 }, { "epoch": 1.32, "learning_rate": 7.652228040423622e-05, "loss": 2.274, "step": 1214 }, { "epoch": 1.32, "learning_rate": 7.644937701850002e-05, "loss": 2.3697, "step": 1216 }, { "epoch": 1.32, "learning_rate": 7.637639546992677e-05, "loss": 2.3167, "step": 1218 }, { "epoch": 1.32, "learning_rate": 7.630333597419054e-05, "loss": 2.4688, "step": 1220 }, { "epoch": 1.32, "learning_rate": 7.623019874719579e-05, "loss": 2.2979, "step": 1222 }, { "epoch": 1.33, "learning_rate": 7.61569840050766e-05, "loss": 2.4614, "step": 1224 }, { "epoch": 1.33, "learning_rate": 7.60836919641962e-05, "loss": 2.5093, "step": 1226 }, { "epoch": 1.33, "learning_rate": 7.60103228411462e-05, "loss": 2.4832, "step": 1228 }, { "epoch": 1.33, "learning_rate": 7.593687685274609e-05, "loss": 2.4112, "step": 1230 }, { "epoch": 1.33, "learning_rate": 7.586335421604238e-05, "loss": 2.3033, "step": 1232 }, { "epoch": 1.34, "learning_rate": 7.578975514830821e-05, "loss": 2.6554, "step": 1234 }, { "epoch": 1.34, "learning_rate": 7.571607986704252e-05, "loss": 2.3495, "step": 1236 }, { "epoch": 1.34, "learning_rate": 7.564232858996949e-05, "loss": 2.4517, "step": 1238 }, { "epoch": 1.34, "learning_rate": 7.556850153503787e-05, "loss": 2.4985, "step": 1240 }, { "epoch": 1.35, "learning_rate": 7.549459892042041e-05, "loss": 2.5046, "step": 1242 }, { "epoch": 1.35, "learning_rate": 7.542062096451305e-05, "loss": 2.5004, "step": 1244 }, { "epoch": 1.35, "learning_rate": 7.534656788593446e-05, "loss": 2.3215, "step": 1246 }, { "epoch": 1.35, "learning_rate": 7.527243990352529e-05, "loss": 2.5481, "step": 1248 }, { "epoch": 1.35, "learning_rate": 7.519823723634753e-05, "loss": 2.3608, "step": 1250 }, { "epoch": 1.36, "learning_rate": 7.51239601036839e-05, "loss": 2.2113, "step": 1252 }, { "epoch": 1.36, "learning_rate": 7.504960872503715e-05, "loss": 2.6318, "step": 1254 }, { "epoch": 1.36, "learning_rate": 7.497518332012946e-05, "loss": 2.3967, "step": 1256 }, { "epoch": 1.36, "learning_rate": 7.490068410890175e-05, "loss": 2.1024, "step": 1258 }, { "epoch": 1.36, "learning_rate": 7.48261113115131e-05, "loss": 2.5322, "step": 1260 }, { "epoch": 1.37, "learning_rate": 7.475146514834001e-05, "loss": 2.3737, "step": 1262 }, { "epoch": 1.37, "learning_rate": 7.46767458399758e-05, "loss": 2.4803, "step": 1264 }, { "epoch": 1.37, "learning_rate": 7.460195360722995e-05, "loss": 2.1737, "step": 1266 }, { "epoch": 1.37, "learning_rate": 7.452708867112745e-05, "loss": 2.5601, "step": 1268 }, { "epoch": 1.38, "learning_rate": 7.44521512529081e-05, "loss": 2.5452, "step": 1270 }, { "epoch": 1.38, "learning_rate": 7.437714157402598e-05, "loss": 2.3953, "step": 1272 }, { "epoch": 1.38, "learning_rate": 7.430205985614864e-05, "loss": 2.4914, "step": 1274 }, { "epoch": 1.38, "learning_rate": 7.422690632115654e-05, "loss": 2.3997, "step": 1276 }, { "epoch": 1.38, "learning_rate": 7.41516811911424e-05, "loss": 2.2561, "step": 1278 }, { "epoch": 1.39, "learning_rate": 7.407638468841047e-05, "loss": 2.6531, "step": 1280 }, { "epoch": 1.39, "learning_rate": 7.400101703547597e-05, "loss": 2.6299, "step": 1282 }, { "epoch": 1.39, "learning_rate": 7.392557845506432e-05, "loss": 2.4573, "step": 1284 }, { "epoch": 1.39, "learning_rate": 7.385006917011063e-05, "loss": 2.5633, "step": 1286 }, { "epoch": 1.4, "learning_rate": 7.377448940375887e-05, "loss": 2.6371, "step": 1288 }, { "epoch": 1.4, "learning_rate": 7.369883937936136e-05, "loss": 2.2814, "step": 1290 }, { "epoch": 1.4, "learning_rate": 7.362311932047797e-05, "loss": 2.6985, "step": 1292 }, { "epoch": 1.4, "learning_rate": 7.354732945087563e-05, "loss": 2.3274, "step": 1294 }, { "epoch": 1.4, "learning_rate": 7.34714699945275e-05, "loss": 2.2417, "step": 1296 }, { "epoch": 1.41, "learning_rate": 7.33955411756124e-05, "loss": 2.4285, "step": 1298 }, { "epoch": 1.41, "learning_rate": 7.331954321851418e-05, "loss": 2.4677, "step": 1300 }, { "epoch": 1.41, "learning_rate": 7.32434763478209e-05, "loss": 2.4342, "step": 1302 }, { "epoch": 1.41, "learning_rate": 7.316734078832438e-05, "loss": 2.3903, "step": 1304 }, { "epoch": 1.41, "learning_rate": 7.309113676501939e-05, "loss": 2.4379, "step": 1306 }, { "epoch": 1.42, "learning_rate": 7.301486450310298e-05, "loss": 2.4929, "step": 1308 }, { "epoch": 1.42, "learning_rate": 7.293852422797391e-05, "loss": 2.4626, "step": 1310 }, { "epoch": 1.42, "learning_rate": 7.286211616523193e-05, "loss": 2.5199, "step": 1312 }, { "epoch": 1.42, "learning_rate": 7.278564054067709e-05, "loss": 2.3659, "step": 1314 }, { "epoch": 1.43, "learning_rate": 7.270909758030912e-05, "loss": 2.4869, "step": 1316 }, { "epoch": 1.43, "learning_rate": 7.263248751032671e-05, "loss": 2.5166, "step": 1318 }, { "epoch": 1.43, "learning_rate": 7.255581055712688e-05, "loss": 2.139, "step": 1320 }, { "epoch": 1.43, "learning_rate": 7.247906694730437e-05, "loss": 2.4807, "step": 1322 }, { "epoch": 1.43, "learning_rate": 7.24022569076508e-05, "loss": 2.4607, "step": 1324 }, { "epoch": 1.44, "learning_rate": 7.232538066515414e-05, "loss": 2.3367, "step": 1326 }, { "epoch": 1.44, "learning_rate": 7.224843844699803e-05, "loss": 2.6005, "step": 1328 }, { "epoch": 1.44, "learning_rate": 7.217143048056108e-05, "loss": 2.3467, "step": 1330 }, { "epoch": 1.44, "learning_rate": 7.209435699341613e-05, "loss": 2.4132, "step": 1332 }, { "epoch": 1.45, "learning_rate": 7.201721821332973e-05, "loss": 2.3049, "step": 1334 }, { "epoch": 1.45, "learning_rate": 7.194001436826135e-05, "loss": 2.3176, "step": 1336 }, { "epoch": 1.45, "learning_rate": 7.18627456863627e-05, "loss": 2.5401, "step": 1338 }, { "epoch": 1.45, "learning_rate": 7.178541239597717e-05, "loss": 2.4131, "step": 1340 }, { "epoch": 1.45, "learning_rate": 7.170801472563903e-05, "loss": 2.4554, "step": 1342 }, { "epoch": 1.46, "learning_rate": 7.163055290407282e-05, "loss": 2.405, "step": 1344 }, { "epoch": 1.46, "learning_rate": 7.155302716019263e-05, "loss": 2.4435, "step": 1346 }, { "epoch": 1.46, "learning_rate": 7.14754377231015e-05, "loss": 2.4068, "step": 1348 }, { "epoch": 1.46, "learning_rate": 7.139778482209068e-05, "loss": 2.4863, "step": 1350 }, { "epoch": 1.46, "learning_rate": 7.132006868663894e-05, "loss": 2.3856, "step": 1352 }, { "epoch": 1.47, "learning_rate": 7.124228954641196e-05, "loss": 2.3076, "step": 1354 }, { "epoch": 1.47, "learning_rate": 7.116444763126158e-05, "loss": 2.334, "step": 1356 }, { "epoch": 1.47, "learning_rate": 7.108654317122515e-05, "loss": 2.3639, "step": 1358 }, { "epoch": 1.47, "learning_rate": 7.100857639652489e-05, "loss": 2.7099, "step": 1360 }, { "epoch": 1.48, "learning_rate": 7.093054753756713e-05, "loss": 2.6381, "step": 1362 }, { "epoch": 1.48, "learning_rate": 7.085245682494168e-05, "loss": 2.4935, "step": 1364 }, { "epoch": 1.48, "learning_rate": 7.077430448942117e-05, "loss": 2.3986, "step": 1366 }, { "epoch": 1.48, "learning_rate": 7.069609076196029e-05, "loss": 2.5647, "step": 1368 }, { "epoch": 1.48, "learning_rate": 7.061781587369519e-05, "loss": 2.34, "step": 1370 }, { "epoch": 1.49, "learning_rate": 7.053948005594273e-05, "loss": 2.5114, "step": 1372 }, { "epoch": 1.49, "learning_rate": 7.046108354019987e-05, "loss": 2.4023, "step": 1374 }, { "epoch": 1.49, "learning_rate": 7.038262655814291e-05, "loss": 2.5273, "step": 1376 }, { "epoch": 1.49, "learning_rate": 7.030410934162684e-05, "loss": 2.3996, "step": 1378 }, { "epoch": 1.49, "learning_rate": 7.022553212268469e-05, "loss": 2.5281, "step": 1380 }, { "epoch": 1.5, "learning_rate": 7.014689513352675e-05, "loss": 2.2748, "step": 1382 }, { "epoch": 1.5, "learning_rate": 7.006819860654001e-05, "loss": 2.6029, "step": 1384 }, { "epoch": 1.5, "learning_rate": 6.998944277428734e-05, "loss": 2.3091, "step": 1386 }, { "epoch": 1.5, "learning_rate": 6.991062786950691e-05, "loss": 2.4689, "step": 1388 }, { "epoch": 1.51, "learning_rate": 6.983175412511145e-05, "loss": 2.4879, "step": 1390 }, { "epoch": 1.51, "learning_rate": 6.975282177418756e-05, "loss": 2.5369, "step": 1392 }, { "epoch": 1.51, "learning_rate": 6.967383104999505e-05, "loss": 2.4159, "step": 1394 }, { "epoch": 1.51, "learning_rate": 6.959478218596625e-05, "loss": 2.5748, "step": 1396 }, { "epoch": 1.51, "learning_rate": 6.951567541570523e-05, "loss": 2.5503, "step": 1398 }, { "epoch": 1.52, "learning_rate": 6.943651097298727e-05, "loss": 2.5094, "step": 1400 }, { "epoch": 1.52, "learning_rate": 6.935728909175805e-05, "loss": 2.3161, "step": 1402 }, { "epoch": 1.52, "learning_rate": 6.927801000613298e-05, "loss": 2.3559, "step": 1404 }, { "epoch": 1.52, "learning_rate": 6.919867395039652e-05, "loss": 2.4446, "step": 1406 }, { "epoch": 1.53, "learning_rate": 6.91192811590015e-05, "loss": 2.3579, "step": 1408 }, { "epoch": 1.53, "learning_rate": 6.903983186656844e-05, "loss": 2.3263, "step": 1410 }, { "epoch": 1.53, "learning_rate": 6.896032630788476e-05, "loss": 2.4279, "step": 1412 }, { "epoch": 1.53, "learning_rate": 6.888076471790424e-05, "loss": 2.3288, "step": 1414 }, { "epoch": 1.53, "learning_rate": 6.880114733174615e-05, "loss": 2.3714, "step": 1416 }, { "epoch": 1.54, "learning_rate": 6.872147438469476e-05, "loss": 2.5845, "step": 1418 }, { "epoch": 1.54, "learning_rate": 6.864174611219841e-05, "loss": 2.2575, "step": 1420 }, { "epoch": 1.54, "learning_rate": 6.856196274986907e-05, "loss": 2.7716, "step": 1422 }, { "epoch": 1.54, "learning_rate": 6.848212453348137e-05, "loss": 2.408, "step": 1424 }, { "epoch": 1.54, "learning_rate": 6.840223169897217e-05, "loss": 2.5191, "step": 1426 }, { "epoch": 1.55, "learning_rate": 6.832228448243964e-05, "loss": 2.3474, "step": 1428 }, { "epoch": 1.55, "learning_rate": 6.824228312014274e-05, "loss": 2.5852, "step": 1430 }, { "epoch": 1.55, "learning_rate": 6.816222784850038e-05, "loss": 2.5364, "step": 1432 }, { "epoch": 1.55, "learning_rate": 6.80821189040908e-05, "loss": 2.26, "step": 1434 }, { "epoch": 1.56, "learning_rate": 6.800195652365087e-05, "loss": 2.4253, "step": 1436 }, { "epoch": 1.56, "learning_rate": 6.792174094407533e-05, "loss": 2.3855, "step": 1438 }, { "epoch": 1.56, "learning_rate": 6.784147240241619e-05, "loss": 2.2678, "step": 1440 }, { "epoch": 1.56, "learning_rate": 6.776115113588194e-05, "loss": 2.4646, "step": 1442 }, { "epoch": 1.56, "learning_rate": 6.76807773818369e-05, "loss": 2.6316, "step": 1444 }, { "epoch": 1.57, "learning_rate": 6.760035137780046e-05, "loss": 2.5357, "step": 1446 }, { "epoch": 1.57, "learning_rate": 6.751987336144648e-05, "loss": 2.5943, "step": 1448 }, { "epoch": 1.57, "learning_rate": 6.743934357060246e-05, "loss": 2.4468, "step": 1450 }, { "epoch": 1.57, "learning_rate": 6.735876224324895e-05, "loss": 2.3678, "step": 1452 }, { "epoch": 1.57, "learning_rate": 6.72781296175188e-05, "loss": 2.3095, "step": 1454 }, { "epoch": 1.58, "learning_rate": 6.719744593169641e-05, "loss": 2.4335, "step": 1456 }, { "epoch": 1.58, "learning_rate": 6.711671142421714e-05, "loss": 2.5255, "step": 1458 }, { "epoch": 1.58, "learning_rate": 6.703592633366647e-05, "loss": 2.3837, "step": 1460 }, { "epoch": 1.58, "learning_rate": 6.695509089877943e-05, "loss": 2.5474, "step": 1462 }, { "epoch": 1.59, "learning_rate": 6.687420535843975e-05, "loss": 2.5055, "step": 1464 }, { "epoch": 1.59, "learning_rate": 6.679326995167932e-05, "loss": 2.4212, "step": 1466 }, { "epoch": 1.59, "learning_rate": 6.671228491767728e-05, "loss": 2.4475, "step": 1468 }, { "epoch": 1.59, "learning_rate": 6.663125049575956e-05, "loss": 2.5926, "step": 1470 }, { "epoch": 1.59, "learning_rate": 6.655016692539793e-05, "loss": 2.2358, "step": 1472 }, { "epoch": 1.6, "learning_rate": 6.646903444620949e-05, "loss": 2.6164, "step": 1474 }, { "epoch": 1.6, "learning_rate": 6.63878532979558e-05, "loss": 2.5778, "step": 1476 }, { "epoch": 1.6, "learning_rate": 6.630662372054227e-05, "loss": 2.4401, "step": 1478 }, { "epoch": 1.6, "learning_rate": 6.622534595401746e-05, "loss": 2.4327, "step": 1480 }, { "epoch": 1.61, "learning_rate": 6.614402023857232e-05, "loss": 2.3336, "step": 1482 }, { "epoch": 1.61, "learning_rate": 6.606264681453946e-05, "loss": 2.4107, "step": 1484 }, { "epoch": 1.61, "learning_rate": 6.598122592239255e-05, "loss": 2.4793, "step": 1486 }, { "epoch": 1.61, "learning_rate": 6.589975780274544e-05, "loss": 2.6092, "step": 1488 }, { "epoch": 1.61, "learning_rate": 6.581824269635166e-05, "loss": 2.4823, "step": 1490 }, { "epoch": 1.62, "learning_rate": 6.57366808441035e-05, "loss": 2.3623, "step": 1492 }, { "epoch": 1.62, "learning_rate": 6.565507248703144e-05, "loss": 2.5841, "step": 1494 }, { "epoch": 1.62, "learning_rate": 6.557341786630339e-05, "loss": 2.3636, "step": 1496 }, { "epoch": 1.62, "learning_rate": 6.549171722322395e-05, "loss": 2.5033, "step": 1498 }, { "epoch": 1.62, "learning_rate": 6.540997079923376e-05, "loss": 2.5465, "step": 1500 }, { "epoch": 1.63, "learning_rate": 6.532817883590874e-05, "loss": 2.4308, "step": 1502 }, { "epoch": 1.63, "learning_rate": 6.524634157495935e-05, "loss": 2.6063, "step": 1504 }, { "epoch": 1.63, "learning_rate": 6.516445925822997e-05, "loss": 2.3648, "step": 1506 }, { "epoch": 1.63, "learning_rate": 6.508253212769808e-05, "loss": 2.5649, "step": 1508 }, { "epoch": 1.64, "learning_rate": 6.500056042547364e-05, "loss": 2.4303, "step": 1510 }, { "epoch": 1.64, "learning_rate": 6.491854439379827e-05, "loss": 2.2518, "step": 1512 }, { "epoch": 1.64, "learning_rate": 6.483648427504467e-05, "loss": 2.6185, "step": 1514 }, { "epoch": 1.64, "learning_rate": 6.475438031171574e-05, "loss": 2.4631, "step": 1516 }, { "epoch": 1.64, "learning_rate": 6.4672232746444e-05, "loss": 2.5055, "step": 1518 }, { "epoch": 1.65, "learning_rate": 6.459004182199082e-05, "loss": 2.4789, "step": 1520 }, { "epoch": 1.65, "learning_rate": 6.45078077812457e-05, "loss": 2.518, "step": 1522 }, { "epoch": 1.65, "learning_rate": 6.442553086722554e-05, "loss": 2.2487, "step": 1524 }, { "epoch": 1.65, "learning_rate": 6.434321132307394e-05, "loss": 2.4873, "step": 1526 }, { "epoch": 1.66, "learning_rate": 6.426084939206051e-05, "loss": 2.4427, "step": 1528 }, { "epoch": 1.66, "learning_rate": 6.417844531758009e-05, "loss": 2.5523, "step": 1530 }, { "epoch": 1.66, "learning_rate": 6.40959993431521e-05, "loss": 2.4331, "step": 1532 }, { "epoch": 1.66, "learning_rate": 6.401351171241971e-05, "loss": 2.2483, "step": 1534 }, { "epoch": 1.66, "learning_rate": 6.393098266914925e-05, "loss": 2.3769, "step": 1536 }, { "epoch": 1.67, "learning_rate": 6.384841245722945e-05, "loss": 2.4459, "step": 1538 }, { "epoch": 1.67, "learning_rate": 6.376580132067065e-05, "loss": 2.4104, "step": 1540 }, { "epoch": 1.67, "learning_rate": 6.368314950360415e-05, "loss": 2.3963, "step": 1542 }, { "epoch": 1.67, "learning_rate": 6.360045725028146e-05, "loss": 2.4358, "step": 1544 }, { "epoch": 1.67, "learning_rate": 6.351772480507363e-05, "loss": 2.3851, "step": 1546 }, { "epoch": 1.68, "learning_rate": 6.34349524124704e-05, "loss": 2.3434, "step": 1548 }, { "epoch": 1.68, "learning_rate": 6.335214031707965e-05, "loss": 2.3168, "step": 1550 }, { "epoch": 1.68, "learning_rate": 6.326928876362652e-05, "loss": 2.5622, "step": 1552 }, { "epoch": 1.68, "learning_rate": 6.318639799695285e-05, "loss": 2.4061, "step": 1554 }, { "epoch": 1.69, "learning_rate": 6.310346826201621e-05, "loss": 2.6289, "step": 1556 }, { "epoch": 1.69, "learning_rate": 6.302049980388948e-05, "loss": 2.4561, "step": 1558 }, { "epoch": 1.69, "learning_rate": 6.29374928677599e-05, "loss": 2.4697, "step": 1560 }, { "epoch": 1.69, "learning_rate": 6.28544476989284e-05, "loss": 2.4481, "step": 1562 }, { "epoch": 1.69, "learning_rate": 6.277136454280898e-05, "loss": 2.5529, "step": 1564 }, { "epoch": 1.7, "learning_rate": 6.268824364492782e-05, "loss": 2.4358, "step": 1566 }, { "epoch": 1.7, "learning_rate": 6.260508525092266e-05, "loss": 2.3754, "step": 1568 }, { "epoch": 1.7, "learning_rate": 6.252188960654204e-05, "loss": 2.5845, "step": 1570 }, { "epoch": 1.7, "learning_rate": 6.243865695764459e-05, "loss": 2.5552, "step": 1572 }, { "epoch": 1.7, "learning_rate": 6.235538755019832e-05, "loss": 2.4616, "step": 1574 }, { "epoch": 1.71, "learning_rate": 6.227208163027982e-05, "loss": 2.3196, "step": 1576 }, { "epoch": 1.71, "learning_rate": 6.218873944407361e-05, "loss": 2.4119, "step": 1578 }, { "epoch": 1.71, "learning_rate": 6.210536123787138e-05, "loss": 2.2707, "step": 1580 }, { "epoch": 1.71, "learning_rate": 6.202194725807127e-05, "loss": 2.7299, "step": 1582 }, { "epoch": 1.72, "learning_rate": 6.19384977511771e-05, "loss": 2.2659, "step": 1584 }, { "epoch": 1.72, "learning_rate": 6.185501296379777e-05, "loss": 2.5439, "step": 1586 }, { "epoch": 1.72, "learning_rate": 6.177149314264631e-05, "loss": 2.6154, "step": 1588 }, { "epoch": 1.72, "learning_rate": 6.168793853453943e-05, "loss": 2.5537, "step": 1590 }, { "epoch": 1.72, "learning_rate": 6.160434938639648e-05, "loss": 2.3475, "step": 1592 }, { "epoch": 1.73, "learning_rate": 6.152072594523906e-05, "loss": 2.3385, "step": 1594 }, { "epoch": 1.73, "learning_rate": 6.143706845818992e-05, "loss": 2.4313, "step": 1596 }, { "epoch": 1.73, "learning_rate": 6.135337717247261e-05, "loss": 2.3323, "step": 1598 }, { "epoch": 1.73, "learning_rate": 6.12696523354104e-05, "loss": 2.4587, "step": 1600 }, { "epoch": 1.74, "learning_rate": 6.118589419442584e-05, "loss": 2.6458, "step": 1602 }, { "epoch": 1.74, "learning_rate": 6.110210299703982e-05, "loss": 2.5148, "step": 1604 }, { "epoch": 1.74, "learning_rate": 6.101827899087094e-05, "loss": 2.5324, "step": 1606 }, { "epoch": 1.74, "learning_rate": 6.0934422423634744e-05, "loss": 2.4962, "step": 1608 }, { "epoch": 1.74, "learning_rate": 6.085053354314302e-05, "loss": 2.4868, "step": 1610 }, { "epoch": 1.75, "learning_rate": 6.076661259730305e-05, "loss": 2.506, "step": 1612 }, { "epoch": 1.75, "learning_rate": 6.068265983411685e-05, "loss": 2.3774, "step": 1614 }, { "epoch": 1.75, "learning_rate": 6.05986755016805e-05, "loss": 2.4287, "step": 1616 }, { "epoch": 1.75, "learning_rate": 6.051465984818332e-05, "loss": 2.3267, "step": 1618 }, { "epoch": 1.75, "learning_rate": 6.043061312190723e-05, "loss": 2.4453, "step": 1620 }, { "epoch": 1.76, "learning_rate": 6.034653557122598e-05, "loss": 2.4022, "step": 1622 }, { "epoch": 1.76, "learning_rate": 6.0262427444604384e-05, "loss": 2.5554, "step": 1624 }, { "epoch": 1.76, "learning_rate": 6.017828899059763e-05, "loss": 2.4102, "step": 1626 }, { "epoch": 1.76, "learning_rate": 6.009412045785051e-05, "loss": 2.3266, "step": 1628 }, { "epoch": 1.77, "learning_rate": 6.000992209509676e-05, "loss": 2.2382, "step": 1630 }, { "epoch": 1.77, "learning_rate": 5.9925694151158184e-05, "loss": 2.2763, "step": 1632 }, { "epoch": 1.77, "learning_rate": 5.984143687494409e-05, "loss": 2.481, "step": 1634 }, { "epoch": 1.77, "learning_rate": 5.975715051545039e-05, "loss": 2.5598, "step": 1636 }, { "epoch": 1.77, "learning_rate": 5.9672835321759016e-05, "loss": 2.3289, "step": 1638 }, { "epoch": 1.78, "learning_rate": 5.958849154303704e-05, "loss": 2.4317, "step": 1640 }, { "epoch": 1.78, "learning_rate": 5.9504119428536076e-05, "loss": 2.448, "step": 1642 }, { "epoch": 1.78, "learning_rate": 5.9419719227591405e-05, "loss": 2.2034, "step": 1644 }, { "epoch": 1.78, "learning_rate": 5.933529118962138e-05, "loss": 2.4841, "step": 1646 }, { "epoch": 1.79, "learning_rate": 5.925083556412657e-05, "loss": 2.5998, "step": 1648 }, { "epoch": 1.79, "learning_rate": 5.916635260068909e-05, "loss": 2.5288, "step": 1650 }, { "epoch": 1.79, "learning_rate": 5.908184254897182e-05, "loss": 2.5148, "step": 1652 }, { "epoch": 1.79, "learning_rate": 5.899730565871774e-05, "loss": 2.5166, "step": 1654 }, { "epoch": 1.79, "learning_rate": 5.891274217974907e-05, "loss": 2.4235, "step": 1656 }, { "epoch": 1.8, "learning_rate": 5.8828152361966685e-05, "loss": 2.5575, "step": 1658 }, { "epoch": 1.8, "learning_rate": 5.874353645534922e-05, "loss": 2.4232, "step": 1660 }, { "epoch": 1.8, "learning_rate": 5.865889470995248e-05, "loss": 2.2509, "step": 1662 }, { "epoch": 1.8, "learning_rate": 5.857422737590857e-05, "loss": 2.2636, "step": 1664 }, { "epoch": 1.8, "learning_rate": 5.8489534703425256e-05, "loss": 2.4923, "step": 1666 }, { "epoch": 1.81, "learning_rate": 5.8404816942785134e-05, "loss": 2.3899, "step": 1668 }, { "epoch": 1.81, "learning_rate": 5.8320074344345e-05, "loss": 2.4698, "step": 1670 }, { "epoch": 1.81, "learning_rate": 5.8235307158535e-05, "loss": 2.65, "step": 1672 }, { "epoch": 1.81, "learning_rate": 5.8150515635858e-05, "loss": 2.4687, "step": 1674 }, { "epoch": 1.82, "learning_rate": 5.806570002688869e-05, "loss": 2.4793, "step": 1676 }, { "epoch": 1.82, "learning_rate": 5.798086058227304e-05, "loss": 2.2238, "step": 1678 }, { "epoch": 1.82, "learning_rate": 5.78959975527274e-05, "loss": 2.4365, "step": 1680 }, { "epoch": 1.82, "learning_rate": 5.781111118903785e-05, "loss": 2.4891, "step": 1682 }, { "epoch": 1.82, "learning_rate": 5.772620174205938e-05, "loss": 2.3248, "step": 1684 }, { "epoch": 1.83, "learning_rate": 5.764126946271526e-05, "loss": 2.6325, "step": 1686 }, { "epoch": 1.83, "learning_rate": 5.755631460199616e-05, "loss": 2.4281, "step": 1688 }, { "epoch": 1.83, "learning_rate": 5.747133741095956e-05, "loss": 2.4829, "step": 1690 }, { "epoch": 1.83, "learning_rate": 5.738633814072888e-05, "loss": 2.3321, "step": 1692 }, { "epoch": 1.83, "learning_rate": 5.730131704249278e-05, "loss": 2.5413, "step": 1694 }, { "epoch": 1.84, "learning_rate": 5.721627436750449e-05, "loss": 2.0978, "step": 1696 }, { "epoch": 1.84, "learning_rate": 5.713121036708091e-05, "loss": 2.5072, "step": 1698 }, { "epoch": 1.84, "learning_rate": 5.704612529260205e-05, "loss": 2.4096, "step": 1700 }, { "epoch": 1.84, "learning_rate": 5.6961019395510126e-05, "loss": 2.3686, "step": 1702 }, { "epoch": 1.85, "learning_rate": 5.6875892927308936e-05, "loss": 2.475, "step": 1704 }, { "epoch": 1.85, "learning_rate": 5.679074613956307e-05, "loss": 2.375, "step": 1706 }, { "epoch": 1.85, "learning_rate": 5.6705579283897116e-05, "loss": 2.5238, "step": 1708 }, { "epoch": 1.85, "learning_rate": 5.662039261199502e-05, "loss": 2.5842, "step": 1710 }, { "epoch": 1.85, "learning_rate": 5.6535186375599266e-05, "loss": 2.5468, "step": 1712 }, { "epoch": 1.86, "learning_rate": 5.644996082651017e-05, "loss": 2.4626, "step": 1714 }, { "epoch": 1.86, "learning_rate": 5.636471621658508e-05, "loss": 2.5552, "step": 1716 }, { "epoch": 1.86, "learning_rate": 5.627945279773774e-05, "loss": 2.2431, "step": 1718 }, { "epoch": 1.86, "learning_rate": 5.61941708219374e-05, "loss": 2.36, "step": 1720 }, { "epoch": 1.87, "learning_rate": 5.6108870541208224e-05, "loss": 2.3865, "step": 1722 }, { "epoch": 1.87, "learning_rate": 5.602355220762838e-05, "loss": 2.5472, "step": 1724 }, { "epoch": 1.87, "learning_rate": 5.593821607332952e-05, "loss": 2.3935, "step": 1726 }, { "epoch": 1.87, "learning_rate": 5.585286239049574e-05, "loss": 2.5526, "step": 1728 }, { "epoch": 1.87, "learning_rate": 5.576749141136313e-05, "loss": 2.5119, "step": 1730 }, { "epoch": 1.88, "learning_rate": 5.568210338821881e-05, "loss": 2.3868, "step": 1732 }, { "epoch": 1.88, "learning_rate": 5.5596698573400306e-05, "loss": 2.4324, "step": 1734 }, { "epoch": 1.88, "learning_rate": 5.5511277219294765e-05, "loss": 2.4088, "step": 1736 }, { "epoch": 1.88, "learning_rate": 5.54258395783382e-05, "loss": 2.447, "step": 1738 }, { "epoch": 1.88, "learning_rate": 5.534038590301476e-05, "loss": 2.4857, "step": 1740 }, { "epoch": 1.89, "learning_rate": 5.5254916445855974e-05, "loss": 2.3698, "step": 1742 }, { "epoch": 1.89, "learning_rate": 5.5169431459440014e-05, "loss": 2.5048, "step": 1744 }, { "epoch": 1.89, "learning_rate": 5.508393119639094e-05, "loss": 2.5057, "step": 1746 }, { "epoch": 1.89, "learning_rate": 5.499841590937795e-05, "loss": 2.4211, "step": 1748 }, { "epoch": 1.9, "learning_rate": 5.491288585111467e-05, "loss": 2.7328, "step": 1750 }, { "epoch": 1.9, "learning_rate": 5.4827341274358344e-05, "loss": 2.5598, "step": 1752 }, { "epoch": 1.9, "learning_rate": 5.4741782431909136e-05, "loss": 2.2472, "step": 1754 }, { "epoch": 1.9, "learning_rate": 5.465620957660938e-05, "loss": 2.4122, "step": 1756 }, { "epoch": 1.9, "learning_rate": 5.457062296134279e-05, "loss": 2.4685, "step": 1758 }, { "epoch": 1.91, "learning_rate": 5.448502283903377e-05, "loss": 2.5201, "step": 1760 }, { "epoch": 1.91, "learning_rate": 5.439940946264662e-05, "loss": 2.5483, "step": 1762 }, { "epoch": 1.91, "learning_rate": 5.4313783085184825e-05, "loss": 2.4956, "step": 1764 }, { "epoch": 1.91, "learning_rate": 5.422814395969029e-05, "loss": 2.5378, "step": 1766 }, { "epoch": 1.91, "learning_rate": 5.414249233924258e-05, "loss": 2.218, "step": 1768 }, { "epoch": 1.92, "learning_rate": 5.40568284769582e-05, "loss": 2.5364, "step": 1770 }, { "epoch": 1.92, "learning_rate": 5.39711526259898e-05, "loss": 2.4421, "step": 1772 }, { "epoch": 1.92, "learning_rate": 5.388546503952551e-05, "loss": 2.3293, "step": 1774 }, { "epoch": 1.92, "learning_rate": 5.379976597078808e-05, "loss": 2.4037, "step": 1776 }, { "epoch": 1.93, "learning_rate": 5.371405567303428e-05, "loss": 2.4786, "step": 1778 }, { "epoch": 1.93, "learning_rate": 5.362833439955396e-05, "loss": 2.2926, "step": 1780 }, { "epoch": 1.93, "learning_rate": 5.354260240366947e-05, "loss": 2.5112, "step": 1782 }, { "epoch": 1.93, "learning_rate": 5.3456859938734836e-05, "loss": 2.2066, "step": 1784 }, { "epoch": 1.93, "learning_rate": 5.337110725813501e-05, "loss": 2.4357, "step": 1786 }, { "epoch": 1.94, "learning_rate": 5.328534461528515e-05, "loss": 2.502, "step": 1788 }, { "epoch": 1.94, "learning_rate": 5.3199572263629824e-05, "loss": 2.2781, "step": 1790 }, { "epoch": 1.94, "learning_rate": 5.3113790456642345e-05, "loss": 2.3274, "step": 1792 }, { "epoch": 1.94, "learning_rate": 5.3027999447823905e-05, "loss": 2.4531, "step": 1794 }, { "epoch": 1.95, "learning_rate": 5.2942199490702924e-05, "loss": 2.4264, "step": 1796 }, { "epoch": 1.95, "learning_rate": 5.285639083883428e-05, "loss": 2.3976, "step": 1798 }, { "epoch": 1.95, "learning_rate": 5.27705737457985e-05, "loss": 2.3159, "step": 1800 }, { "epoch": 1.95, "learning_rate": 5.268474846520112e-05, "loss": 2.3113, "step": 1802 }, { "epoch": 1.95, "learning_rate": 5.259891525067179e-05, "loss": 2.3999, "step": 1804 }, { "epoch": 1.96, "learning_rate": 5.251307435586368e-05, "loss": 2.4202, "step": 1806 }, { "epoch": 1.96, "learning_rate": 5.2427226034452614e-05, "loss": 2.4635, "step": 1808 }, { "epoch": 1.96, "learning_rate": 5.23413705401364e-05, "loss": 2.588, "step": 1810 }, { "epoch": 1.96, "learning_rate": 5.225550812663399e-05, "loss": 2.3913, "step": 1812 }, { "epoch": 1.96, "learning_rate": 5.216963904768485e-05, "loss": 2.3559, "step": 1814 }, { "epoch": 1.97, "learning_rate": 5.2083763557048056e-05, "loss": 2.3511, "step": 1816 }, { "epoch": 1.97, "learning_rate": 5.1997881908501736e-05, "loss": 2.0888, "step": 1818 }, { "epoch": 1.97, "learning_rate": 5.191199435584211e-05, "loss": 2.3658, "step": 1820 }, { "epoch": 1.97, "learning_rate": 5.182610115288295e-05, "loss": 2.4578, "step": 1822 }, { "epoch": 1.98, "learning_rate": 5.174020255345464e-05, "loss": 2.5353, "step": 1824 }, { "epoch": 1.98, "learning_rate": 5.1654298811403556e-05, "loss": 2.3506, "step": 1826 }, { "epoch": 1.98, "learning_rate": 5.1568390180591265e-05, "loss": 2.43, "step": 1828 }, { "epoch": 1.98, "learning_rate": 5.148247691489377e-05, "loss": 2.5092, "step": 1830 }, { "epoch": 1.98, "learning_rate": 5.139655926820078e-05, "loss": 2.4586, "step": 1832 }, { "epoch": 1.99, "learning_rate": 5.131063749441496e-05, "loss": 2.3623, "step": 1834 }, { "epoch": 1.99, "learning_rate": 5.1224711847451145e-05, "loss": 2.5055, "step": 1836 }, { "epoch": 1.99, "learning_rate": 5.113878258123563e-05, "loss": 2.3857, "step": 1838 }, { "epoch": 1.99, "learning_rate": 5.105284994970543e-05, "loss": 2.6249, "step": 1840 }, { "epoch": 2.0, "learning_rate": 5.096691420680745e-05, "loss": 2.408, "step": 1842 }, { "epoch": 2.0, "learning_rate": 5.088097560649784e-05, "loss": 2.5748, "step": 1844 }, { "epoch": 2.0, "learning_rate": 5.0795034402741185e-05, "loss": 2.2292, "step": 1846 }, { "epoch": 2.0, "learning_rate": 5.06661182712092e-05, "loss": 3.0448, "step": 1848 }, { "epoch": 2.0, "learning_rate": 5.0580171669978546e-05, "loss": 2.4891, "step": 1850 }, { "epoch": 2.01, "learning_rate": 5.049422335423252e-05, "loss": 2.5769, "step": 1852 }, { "epoch": 2.01, "learning_rate": 5.04082735779644e-05, "loss": 2.3243, "step": 1854 }, { "epoch": 2.01, "learning_rate": 5.032232259517179e-05, "loss": 2.5441, "step": 1856 }, { "epoch": 2.01, "learning_rate": 5.023637065985585e-05, "loss": 2.4429, "step": 1858 }, { "epoch": 2.02, "learning_rate": 5.015041802602057e-05, "loss": 2.3943, "step": 1860 }, { "epoch": 2.02, "learning_rate": 5.0064464947672e-05, "loss": 2.3169, "step": 1862 }, { "epoch": 2.02, "learning_rate": 4.9978511678817496e-05, "loss": 2.4604, "step": 1864 }, { "epoch": 2.02, "learning_rate": 4.989255847346499e-05, "loss": 2.3745, "step": 1866 }, { "epoch": 2.02, "learning_rate": 4.980660558562222e-05, "loss": 2.3082, "step": 1868 }, { "epoch": 2.03, "learning_rate": 4.972065326929598e-05, "loss": 2.4983, "step": 1870 }, { "epoch": 2.03, "learning_rate": 4.963470177849135e-05, "loss": 2.3494, "step": 1872 }, { "epoch": 2.03, "learning_rate": 4.954875136721104e-05, "loss": 2.2882, "step": 1874 }, { "epoch": 2.03, "learning_rate": 4.946280228945453e-05, "loss": 2.401, "step": 1876 }, { "epoch": 2.03, "learning_rate": 4.9376854799217327e-05, "loss": 2.3044, "step": 1878 }, { "epoch": 2.04, "learning_rate": 4.929090915049029e-05, "loss": 2.51, "step": 1880 }, { "epoch": 2.04, "learning_rate": 4.920496559725883e-05, "loss": 2.5332, "step": 1882 }, { "epoch": 2.04, "learning_rate": 4.911902439350217e-05, "loss": 2.449, "step": 1884 }, { "epoch": 2.04, "learning_rate": 4.9033085793192574e-05, "loss": 2.4766, "step": 1886 }, { "epoch": 2.05, "learning_rate": 4.894715005029459e-05, "loss": 2.5255, "step": 1888 }, { "epoch": 2.05, "learning_rate": 4.8861217418764374e-05, "loss": 2.4169, "step": 1890 }, { "epoch": 2.05, "learning_rate": 4.8775288152548866e-05, "loss": 2.4542, "step": 1892 }, { "epoch": 2.05, "learning_rate": 4.868936250558506e-05, "loss": 2.3703, "step": 1894 }, { "epoch": 2.05, "learning_rate": 4.8603440731799216e-05, "loss": 2.4712, "step": 1896 }, { "epoch": 2.06, "learning_rate": 4.851752308510624e-05, "loss": 2.3871, "step": 1898 }, { "epoch": 2.06, "learning_rate": 4.843160981940875e-05, "loss": 2.3593, "step": 1900 }, { "epoch": 2.06, "learning_rate": 4.8345701188596456e-05, "loss": 2.3834, "step": 1902 }, { "epoch": 2.06, "learning_rate": 4.825979744654536e-05, "loss": 2.5184, "step": 1904 }, { "epoch": 2.06, "learning_rate": 4.817389884711705e-05, "loss": 2.3947, "step": 1906 }, { "epoch": 2.07, "learning_rate": 4.8088005644157895e-05, "loss": 2.3947, "step": 1908 }, { "epoch": 2.07, "learning_rate": 4.800211809149829e-05, "loss": 2.332, "step": 1910 }, { "epoch": 2.07, "learning_rate": 4.791623644295195e-05, "loss": 2.4736, "step": 1912 }, { "epoch": 2.07, "learning_rate": 4.7830360952315164e-05, "loss": 2.3257, "step": 1914 }, { "epoch": 2.08, "learning_rate": 4.774449187336602e-05, "loss": 2.4029, "step": 1916 }, { "epoch": 2.08, "learning_rate": 4.765862945986362e-05, "loss": 2.2457, "step": 1918 }, { "epoch": 2.08, "learning_rate": 4.7572773965547384e-05, "loss": 2.4841, "step": 1920 }, { "epoch": 2.08, "learning_rate": 4.7486925644136324e-05, "loss": 2.2552, "step": 1922 }, { "epoch": 2.08, "learning_rate": 4.740108474932822e-05, "loss": 2.2952, "step": 1924 }, { "epoch": 2.09, "learning_rate": 4.731525153479891e-05, "loss": 2.6216, "step": 1926 }, { "epoch": 2.09, "learning_rate": 4.72294262542015e-05, "loss": 2.6685, "step": 1928 }, { "epoch": 2.09, "learning_rate": 4.7143609161165736e-05, "loss": 2.5377, "step": 1930 }, { "epoch": 2.09, "learning_rate": 4.705780050929708e-05, "loss": 2.3924, "step": 1932 }, { "epoch": 2.1, "learning_rate": 4.697200055217612e-05, "loss": 2.3375, "step": 1934 }, { "epoch": 2.1, "learning_rate": 4.688620954335766e-05, "loss": 2.5853, "step": 1936 }, { "epoch": 2.1, "learning_rate": 4.680042773637018e-05, "loss": 2.4731, "step": 1938 }, { "epoch": 2.1, "learning_rate": 4.671465538471486e-05, "loss": 2.3143, "step": 1940 }, { "epoch": 2.1, "learning_rate": 4.6628892741865e-05, "loss": 2.3821, "step": 1942 }, { "epoch": 2.11, "learning_rate": 4.654314006126516e-05, "loss": 2.5513, "step": 1944 }, { "epoch": 2.11, "learning_rate": 4.645739759633054e-05, "loss": 2.3581, "step": 1946 }, { "epoch": 2.11, "learning_rate": 4.637166560044605e-05, "loss": 2.3677, "step": 1948 }, { "epoch": 2.11, "learning_rate": 4.628594432696573e-05, "loss": 2.2615, "step": 1950 }, { "epoch": 2.11, "learning_rate": 4.620023402921191e-05, "loss": 2.4965, "step": 1952 }, { "epoch": 2.12, "learning_rate": 4.61145349604745e-05, "loss": 2.3364, "step": 1954 }, { "epoch": 2.12, "learning_rate": 4.602884737401022e-05, "loss": 2.1789, "step": 1956 }, { "epoch": 2.12, "learning_rate": 4.594317152304183e-05, "loss": 2.229, "step": 1958 }, { "epoch": 2.12, "learning_rate": 4.5857507660757424e-05, "loss": 2.7305, "step": 1960 }, { "epoch": 2.13, "learning_rate": 4.5771856040309716e-05, "loss": 2.4481, "step": 1962 }, { "epoch": 2.13, "learning_rate": 4.568621691481519e-05, "loss": 2.5205, "step": 1964 }, { "epoch": 2.13, "learning_rate": 4.5600590537353397e-05, "loss": 2.4775, "step": 1966 }, { "epoch": 2.13, "learning_rate": 4.551497716096624e-05, "loss": 2.5465, "step": 1968 }, { "epoch": 2.13, "learning_rate": 4.5429377038657214e-05, "loss": 2.4793, "step": 1970 }, { "epoch": 2.14, "learning_rate": 4.534379042339063e-05, "loss": 2.5299, "step": 1972 }, { "epoch": 2.14, "learning_rate": 4.5258217568090876e-05, "loss": 2.4299, "step": 1974 }, { "epoch": 2.14, "learning_rate": 4.517265872564167e-05, "loss": 2.4201, "step": 1976 }, { "epoch": 2.14, "learning_rate": 4.508711414888534e-05, "loss": 2.3312, "step": 1978 }, { "epoch": 2.15, "learning_rate": 4.5001584090622065e-05, "loss": 2.3622, "step": 1980 }, { "epoch": 2.15, "learning_rate": 4.491606880360909e-05, "loss": 2.3231, "step": 1982 }, { "epoch": 2.15, "learning_rate": 4.483056854055999e-05, "loss": 2.3689, "step": 1984 }, { "epoch": 2.15, "learning_rate": 4.474508355414404e-05, "loss": 2.4958, "step": 1986 }, { "epoch": 2.15, "learning_rate": 4.465961409698525e-05, "loss": 2.5048, "step": 1988 }, { "epoch": 2.16, "learning_rate": 4.457416042166181e-05, "loss": 2.4204, "step": 1990 }, { "epoch": 2.16, "learning_rate": 4.448872278070523e-05, "loss": 2.5445, "step": 1992 }, { "epoch": 2.16, "learning_rate": 4.4403301426599706e-05, "loss": 2.1568, "step": 1994 }, { "epoch": 2.16, "learning_rate": 4.431789661178121e-05, "loss": 2.2937, "step": 1996 }, { "epoch": 2.16, "learning_rate": 4.423250858863689e-05, "loss": 2.336, "step": 1998 }, { "epoch": 2.17, "learning_rate": 4.4147137609504266e-05, "loss": 2.6334, "step": 2000 }, { "epoch": 2.17, "learning_rate": 4.4061783926670496e-05, "loss": 2.2973, "step": 2002 }, { "epoch": 2.17, "learning_rate": 4.3976447792371624e-05, "loss": 2.5251, "step": 2004 }, { "epoch": 2.17, "learning_rate": 4.38911294587918e-05, "loss": 2.4591, "step": 2006 }, { "epoch": 2.18, "learning_rate": 4.38058291780626e-05, "loss": 2.4621, "step": 2008 }, { "epoch": 2.18, "learning_rate": 4.372054720226227e-05, "loss": 2.3918, "step": 2010 }, { "epoch": 2.18, "learning_rate": 4.3635283783414924e-05, "loss": 2.357, "step": 2012 }, { "epoch": 2.18, "learning_rate": 4.3550039173489845e-05, "loss": 2.3846, "step": 2014 }, { "epoch": 2.18, "learning_rate": 4.346481362440074e-05, "loss": 2.421, "step": 2016 }, { "epoch": 2.19, "learning_rate": 4.337960738800498e-05, "loss": 2.523, "step": 2018 }, { "epoch": 2.19, "learning_rate": 4.3294420716102895e-05, "loss": 2.3806, "step": 2020 }, { "epoch": 2.19, "learning_rate": 4.320925386043696e-05, "loss": 2.4476, "step": 2022 }, { "epoch": 2.19, "learning_rate": 4.3124107072691055e-05, "loss": 2.4976, "step": 2024 }, { "epoch": 2.19, "learning_rate": 4.3038980604489885e-05, "loss": 2.6172, "step": 2026 }, { "epoch": 2.2, "learning_rate": 4.2953874707397964e-05, "loss": 2.4828, "step": 2028 }, { "epoch": 2.2, "learning_rate": 4.28687896329191e-05, "loss": 2.3404, "step": 2030 }, { "epoch": 2.2, "learning_rate": 4.278372563249552e-05, "loss": 2.4298, "step": 2032 }, { "epoch": 2.2, "learning_rate": 4.269868295750722e-05, "loss": 2.5339, "step": 2034 }, { "epoch": 2.21, "learning_rate": 4.261366185927114e-05, "loss": 2.2219, "step": 2036 }, { "epoch": 2.21, "learning_rate": 4.252866258904045e-05, "loss": 2.3277, "step": 2038 }, { "epoch": 2.21, "learning_rate": 4.2443685398003835e-05, "loss": 2.3991, "step": 2040 }, { "epoch": 2.21, "learning_rate": 4.235873053728475e-05, "loss": 2.3344, "step": 2042 }, { "epoch": 2.21, "learning_rate": 4.227379825794063e-05, "loss": 2.3301, "step": 2044 }, { "epoch": 2.22, "learning_rate": 4.218888881096217e-05, "loss": 2.4981, "step": 2046 }, { "epoch": 2.22, "learning_rate": 4.21040024472726e-05, "loss": 2.4976, "step": 2048 }, { "epoch": 2.22, "learning_rate": 4.201913941772696e-05, "loss": 2.6412, "step": 2050 }, { "epoch": 2.22, "learning_rate": 4.193429997311132e-05, "loss": 2.3847, "step": 2052 }, { "epoch": 2.23, "learning_rate": 4.184948436414203e-05, "loss": 2.2447, "step": 2054 }, { "epoch": 2.23, "learning_rate": 4.1764692841464995e-05, "loss": 2.5925, "step": 2056 }, { "epoch": 2.23, "learning_rate": 4.1679925655655e-05, "loss": 2.3348, "step": 2058 }, { "epoch": 2.23, "learning_rate": 4.159518305721487e-05, "loss": 2.5109, "step": 2060 }, { "epoch": 2.23, "learning_rate": 4.151046529657477e-05, "loss": 2.5121, "step": 2062 }, { "epoch": 2.24, "learning_rate": 4.142577262409144e-05, "loss": 2.4563, "step": 2064 }, { "epoch": 2.24, "learning_rate": 4.134110529004753e-05, "loss": 2.4912, "step": 2066 }, { "epoch": 2.24, "learning_rate": 4.1256463544650783e-05, "loss": 2.4457, "step": 2068 }, { "epoch": 2.24, "learning_rate": 4.117184763803334e-05, "loss": 2.4428, "step": 2070 }, { "epoch": 2.24, "learning_rate": 4.108725782025092e-05, "loss": 2.3274, "step": 2072 }, { "epoch": 2.25, "learning_rate": 4.1002694341282276e-05, "loss": 2.4062, "step": 2074 }, { "epoch": 2.25, "learning_rate": 4.0918157451028185e-05, "loss": 2.6018, "step": 2076 }, { "epoch": 2.25, "learning_rate": 4.083364739931092e-05, "loss": 2.4493, "step": 2078 }, { "epoch": 2.25, "learning_rate": 4.0749164435873425e-05, "loss": 2.5798, "step": 2080 }, { "epoch": 2.26, "learning_rate": 4.0664708810378625e-05, "loss": 2.2729, "step": 2082 }, { "epoch": 2.26, "learning_rate": 4.05802807724086e-05, "loss": 2.3844, "step": 2084 }, { "epoch": 2.26, "learning_rate": 4.049588057146394e-05, "loss": 2.402, "step": 2086 }, { "epoch": 2.26, "learning_rate": 4.041150845696296e-05, "loss": 2.4163, "step": 2088 }, { "epoch": 2.26, "learning_rate": 4.032716467824099e-05, "loss": 2.428, "step": 2090 }, { "epoch": 2.27, "learning_rate": 4.0242849484549623e-05, "loss": 2.4803, "step": 2092 }, { "epoch": 2.27, "learning_rate": 4.015856312505593e-05, "loss": 2.2398, "step": 2094 }, { "epoch": 2.27, "learning_rate": 4.0074305848841814e-05, "loss": 2.2521, "step": 2096 }, { "epoch": 2.27, "learning_rate": 3.9990077904903254e-05, "loss": 2.3918, "step": 2098 }, { "epoch": 2.28, "learning_rate": 3.99058795421495e-05, "loss": 2.519, "step": 2100 }, { "epoch": 2.28, "learning_rate": 3.982171100940239e-05, "loss": 2.4067, "step": 2102 }, { "epoch": 2.28, "learning_rate": 3.973757255539562e-05, "loss": 2.4408, "step": 2104 }, { "epoch": 2.28, "learning_rate": 3.965346442877403e-05, "loss": 2.4309, "step": 2106 }, { "epoch": 2.28, "learning_rate": 3.9569386878092774e-05, "loss": 2.2934, "step": 2108 }, { "epoch": 2.29, "learning_rate": 3.94853401518167e-05, "loss": 2.356, "step": 2110 }, { "epoch": 2.29, "learning_rate": 3.94013244983195e-05, "loss": 2.3314, "step": 2112 }, { "epoch": 2.29, "learning_rate": 3.9317340165883156e-05, "loss": 2.4871, "step": 2114 }, { "epoch": 2.29, "learning_rate": 3.923338740269696e-05, "loss": 2.4932, "step": 2116 }, { "epoch": 2.29, "learning_rate": 3.9149466456857e-05, "loss": 2.2894, "step": 2118 }, { "epoch": 2.3, "learning_rate": 3.906557757636526e-05, "loss": 2.4726, "step": 2120 }, { "epoch": 2.3, "learning_rate": 3.898172100912908e-05, "loss": 2.3281, "step": 2122 }, { "epoch": 2.3, "learning_rate": 3.8897897002960195e-05, "loss": 2.6179, "step": 2124 }, { "epoch": 2.3, "learning_rate": 3.8814105805574166e-05, "loss": 2.3778, "step": 2126 }, { "epoch": 2.31, "learning_rate": 3.873034766458959e-05, "loss": 2.4198, "step": 2128 }, { "epoch": 2.31, "learning_rate": 3.86466228275274e-05, "loss": 2.318, "step": 2130 }, { "epoch": 2.31, "learning_rate": 3.856293154181009e-05, "loss": 2.3894, "step": 2132 }, { "epoch": 2.31, "learning_rate": 3.847927405476097e-05, "loss": 2.388, "step": 2134 }, { "epoch": 2.31, "learning_rate": 3.839565061360352e-05, "loss": 2.4026, "step": 2136 }, { "epoch": 2.32, "learning_rate": 3.831206146546059e-05, "loss": 2.3764, "step": 2138 }, { "epoch": 2.32, "learning_rate": 3.82285068573537e-05, "loss": 2.2921, "step": 2140 }, { "epoch": 2.32, "learning_rate": 3.814498703620226e-05, "loss": 2.5344, "step": 2142 }, { "epoch": 2.32, "learning_rate": 3.80615022488229e-05, "loss": 2.3933, "step": 2144 }, { "epoch": 2.32, "learning_rate": 3.797805274192875e-05, "loss": 2.5373, "step": 2146 }, { "epoch": 2.33, "learning_rate": 3.789463876212863e-05, "loss": 2.2517, "step": 2148 }, { "epoch": 2.33, "learning_rate": 3.781126055592641e-05, "loss": 2.5876, "step": 2150 }, { "epoch": 2.33, "learning_rate": 3.772791836972019e-05, "loss": 2.5212, "step": 2152 }, { "epoch": 2.33, "learning_rate": 3.764461244980169e-05, "loss": 2.4556, "step": 2154 }, { "epoch": 2.34, "learning_rate": 3.7561343042355415e-05, "loss": 2.4632, "step": 2156 }, { "epoch": 2.34, "learning_rate": 3.747811039345798e-05, "loss": 2.333, "step": 2158 }, { "epoch": 2.34, "learning_rate": 3.739491474907735e-05, "loss": 2.3092, "step": 2160 }, { "epoch": 2.34, "learning_rate": 3.731175635507219e-05, "loss": 2.4144, "step": 2162 }, { "epoch": 2.34, "learning_rate": 3.722863545719103e-05, "loss": 2.51, "step": 2164 }, { "epoch": 2.35, "learning_rate": 3.7145552301071594e-05, "loss": 2.449, "step": 2166 }, { "epoch": 2.35, "learning_rate": 3.706250713224011e-05, "loss": 2.4989, "step": 2168 }, { "epoch": 2.35, "learning_rate": 3.697950019611054e-05, "loss": 2.4551, "step": 2170 }, { "epoch": 2.35, "learning_rate": 3.689653173798381e-05, "loss": 2.4758, "step": 2172 }, { "epoch": 2.36, "learning_rate": 3.681360200304718e-05, "loss": 2.5718, "step": 2174 }, { "epoch": 2.36, "learning_rate": 3.673071123637347e-05, "loss": 2.4931, "step": 2176 }, { "epoch": 2.36, "learning_rate": 3.664785968292036e-05, "loss": 2.5029, "step": 2178 }, { "epoch": 2.36, "learning_rate": 3.656504758752961e-05, "loss": 2.4353, "step": 2180 }, { "epoch": 2.36, "learning_rate": 3.64822751949264e-05, "loss": 2.2827, "step": 2182 }, { "epoch": 2.37, "learning_rate": 3.639954274971854e-05, "loss": 2.4667, "step": 2184 }, { "epoch": 2.37, "learning_rate": 3.631685049639586e-05, "loss": 2.3599, "step": 2186 }, { "epoch": 2.37, "learning_rate": 3.623419867932937e-05, "loss": 2.3421, "step": 2188 }, { "epoch": 2.37, "learning_rate": 3.6151587542770567e-05, "loss": 2.4358, "step": 2190 }, { "epoch": 2.37, "learning_rate": 3.6069017330850754e-05, "loss": 2.4472, "step": 2192 }, { "epoch": 2.38, "learning_rate": 3.598648828758031e-05, "loss": 2.4576, "step": 2194 }, { "epoch": 2.38, "learning_rate": 3.590400065684792e-05, "loss": 2.5057, "step": 2196 }, { "epoch": 2.38, "learning_rate": 3.582155468241993e-05, "loss": 2.4759, "step": 2198 }, { "epoch": 2.38, "learning_rate": 3.573915060793949e-05, "loss": 2.1946, "step": 2200 }, { "epoch": 2.39, "learning_rate": 3.5656788676926066e-05, "loss": 2.562, "step": 2202 }, { "epoch": 2.39, "learning_rate": 3.557446913277448e-05, "loss": 2.7023, "step": 2204 }, { "epoch": 2.39, "learning_rate": 3.5492192218754326e-05, "loss": 2.5342, "step": 2206 }, { "epoch": 2.39, "learning_rate": 3.540995817800917e-05, "loss": 2.4178, "step": 2208 }, { "epoch": 2.39, "learning_rate": 3.532776725355601e-05, "loss": 2.4072, "step": 2210 }, { "epoch": 2.4, "learning_rate": 3.5245619688284274e-05, "loss": 2.3832, "step": 2212 }, { "epoch": 2.4, "learning_rate": 3.516351572495535e-05, "loss": 2.5958, "step": 2214 }, { "epoch": 2.4, "learning_rate": 3.508145560620173e-05, "loss": 2.3778, "step": 2216 }, { "epoch": 2.4, "learning_rate": 3.499943957452637e-05, "loss": 2.3753, "step": 2218 }, { "epoch": 2.4, "learning_rate": 3.4917467872301934e-05, "loss": 2.4079, "step": 2220 }, { "epoch": 2.41, "learning_rate": 3.4835540741770054e-05, "loss": 2.4669, "step": 2222 }, { "epoch": 2.41, "learning_rate": 3.4753658425040656e-05, "loss": 2.4278, "step": 2224 }, { "epoch": 2.41, "learning_rate": 3.467182116409127e-05, "loss": 2.4575, "step": 2226 }, { "epoch": 2.41, "learning_rate": 3.459002920076625e-05, "loss": 2.5202, "step": 2228 }, { "epoch": 2.42, "learning_rate": 3.450828277677606e-05, "loss": 2.4366, "step": 2230 }, { "epoch": 2.42, "learning_rate": 3.442658213369662e-05, "loss": 2.4061, "step": 2232 }, { "epoch": 2.42, "learning_rate": 3.434492751296856e-05, "loss": 2.393, "step": 2234 }, { "epoch": 2.42, "learning_rate": 3.426331915589651e-05, "loss": 2.3859, "step": 2236 }, { "epoch": 2.42, "learning_rate": 3.418175730364836e-05, "loss": 2.516, "step": 2238 }, { "epoch": 2.43, "learning_rate": 3.4100242197254564e-05, "loss": 2.2877, "step": 2240 }, { "epoch": 2.43, "learning_rate": 3.401877407760747e-05, "loss": 2.3637, "step": 2242 }, { "epoch": 2.43, "learning_rate": 3.393735318546054e-05, "loss": 2.4131, "step": 2244 }, { "epoch": 2.43, "learning_rate": 3.38559797614277e-05, "loss": 2.4939, "step": 2246 }, { "epoch": 2.44, "learning_rate": 3.377465404598253e-05, "loss": 2.4648, "step": 2248 }, { "epoch": 2.44, "learning_rate": 3.369337627945774e-05, "loss": 2.504, "step": 2250 }, { "epoch": 2.44, "learning_rate": 3.3612146702044226e-05, "loss": 2.5889, "step": 2252 }, { "epoch": 2.44, "learning_rate": 3.3530965553790526e-05, "loss": 2.5468, "step": 2254 }, { "epoch": 2.44, "learning_rate": 3.3449833074602064e-05, "loss": 2.3507, "step": 2256 }, { "epoch": 2.45, "learning_rate": 3.336874950424046e-05, "loss": 2.418, "step": 2258 }, { "epoch": 2.45, "learning_rate": 3.328771508232273e-05, "loss": 2.424, "step": 2260 }, { "epoch": 2.45, "learning_rate": 3.320673004832071e-05, "loss": 2.4613, "step": 2262 }, { "epoch": 2.45, "learning_rate": 3.312579464156025e-05, "loss": 2.5197, "step": 2264 }, { "epoch": 2.45, "learning_rate": 3.304490910122058e-05, "loss": 2.3393, "step": 2266 }, { "epoch": 2.46, "learning_rate": 3.2964073666333536e-05, "loss": 2.2088, "step": 2268 }, { "epoch": 2.46, "learning_rate": 3.2883288575782875e-05, "loss": 2.4951, "step": 2270 }, { "epoch": 2.46, "learning_rate": 3.2802554068303596e-05, "loss": 2.3728, "step": 2272 }, { "epoch": 2.46, "learning_rate": 3.272187038248121e-05, "loss": 2.2495, "step": 2274 }, { "epoch": 2.47, "learning_rate": 3.264123775675106e-05, "loss": 2.3507, "step": 2276 }, { "epoch": 2.47, "learning_rate": 3.256065642939756e-05, "loss": 2.3987, "step": 2278 }, { "epoch": 2.47, "learning_rate": 3.248012663855353e-05, "loss": 2.4537, "step": 2280 }, { "epoch": 2.47, "learning_rate": 3.239964862219954e-05, "loss": 2.4883, "step": 2282 }, { "epoch": 2.47, "learning_rate": 3.231922261816311e-05, "loss": 2.1862, "step": 2284 }, { "epoch": 2.48, "learning_rate": 3.223884886411807e-05, "loss": 2.4353, "step": 2286 }, { "epoch": 2.48, "learning_rate": 3.215852759758381e-05, "loss": 2.2626, "step": 2288 }, { "epoch": 2.48, "learning_rate": 3.2078259055924675e-05, "loss": 2.3032, "step": 2290 }, { "epoch": 2.48, "learning_rate": 3.199804347634915e-05, "loss": 2.3917, "step": 2292 }, { "epoch": 2.49, "learning_rate": 3.191788109590922e-05, "loss": 2.4218, "step": 2294 }, { "epoch": 2.49, "learning_rate": 3.183777215149962e-05, "loss": 2.3915, "step": 2296 }, { "epoch": 2.49, "learning_rate": 3.175771687985726e-05, "loss": 2.5188, "step": 2298 }, { "epoch": 2.49, "learning_rate": 3.167771551756036e-05, "loss": 2.4356, "step": 2300 }, { "epoch": 2.49, "learning_rate": 3.159776830102784e-05, "loss": 2.3713, "step": 2302 }, { "epoch": 2.5, "learning_rate": 3.1517875466518626e-05, "loss": 2.488, "step": 2304 }, { "epoch": 2.5, "learning_rate": 3.1438037250130944e-05, "loss": 2.5058, "step": 2306 }, { "epoch": 2.5, "learning_rate": 3.135825388780159e-05, "loss": 2.5503, "step": 2308 }, { "epoch": 2.5, "learning_rate": 3.127852561530526e-05, "loss": 2.4376, "step": 2310 }, { "epoch": 2.5, "learning_rate": 3.1198852668253856e-05, "loss": 2.4054, "step": 2312 }, { "epoch": 2.51, "learning_rate": 3.111923528209577e-05, "loss": 2.3189, "step": 2314 }, { "epoch": 2.51, "learning_rate": 3.103967369211525e-05, "loss": 2.3736, "step": 2316 }, { "epoch": 2.51, "learning_rate": 3.096016813343158e-05, "loss": 2.4304, "step": 2318 }, { "epoch": 2.51, "learning_rate": 3.08807188409985e-05, "loss": 2.3717, "step": 2320 }, { "epoch": 2.52, "learning_rate": 3.080132604960349e-05, "loss": 2.3996, "step": 2322 }, { "epoch": 2.52, "learning_rate": 3.072198999386704e-05, "loss": 2.5024, "step": 2324 }, { "epoch": 2.52, "learning_rate": 3.064271090824197e-05, "loss": 2.3109, "step": 2326 }, { "epoch": 2.52, "learning_rate": 3.056348902701274e-05, "loss": 2.376, "step": 2328 }, { "epoch": 2.52, "learning_rate": 3.0484324584294783e-05, "loss": 2.5198, "step": 2330 }, { "epoch": 2.53, "learning_rate": 3.040521781403377e-05, "loss": 2.2787, "step": 2332 }, { "epoch": 2.53, "learning_rate": 3.0326168950004964e-05, "loss": 2.3356, "step": 2334 }, { "epoch": 2.53, "learning_rate": 3.0247178225812435e-05, "loss": 2.4738, "step": 2336 }, { "epoch": 2.53, "learning_rate": 3.0168245874888557e-05, "loss": 2.41, "step": 2338 }, { "epoch": 2.53, "learning_rate": 3.00893721304931e-05, "loss": 2.4233, "step": 2340 }, { "epoch": 2.54, "learning_rate": 3.0010557225712667e-05, "loss": 2.4596, "step": 2342 }, { "epoch": 2.54, "learning_rate": 2.993180139345999e-05, "loss": 2.5253, "step": 2344 }, { "epoch": 2.54, "learning_rate": 2.9853104866473246e-05, "loss": 2.3779, "step": 2346 }, { "epoch": 2.54, "learning_rate": 2.977446787731532e-05, "loss": 2.397, "step": 2348 }, { "epoch": 2.55, "learning_rate": 2.9695890658373164e-05, "loss": 2.4016, "step": 2350 }, { "epoch": 2.55, "learning_rate": 2.96173734418571e-05, "loss": 2.3168, "step": 2352 }, { "epoch": 2.55, "learning_rate": 2.9538916459800136e-05, "loss": 2.2834, "step": 2354 }, { "epoch": 2.55, "learning_rate": 2.9460519944057284e-05, "loss": 2.4692, "step": 2356 }, { "epoch": 2.55, "learning_rate": 2.9382184126304834e-05, "loss": 2.1485, "step": 2358 }, { "epoch": 2.56, "learning_rate": 2.9303909238039718e-05, "loss": 2.1016, "step": 2360 }, { "epoch": 2.56, "learning_rate": 2.9225695510578843e-05, "loss": 2.1819, "step": 2362 }, { "epoch": 2.56, "learning_rate": 2.9147543175058335e-05, "loss": 2.4526, "step": 2364 }, { "epoch": 2.56, "learning_rate": 2.9069452462432883e-05, "loss": 2.5037, "step": 2366 }, { "epoch": 2.57, "learning_rate": 2.899142360347511e-05, "loss": 2.223, "step": 2368 }, { "epoch": 2.57, "learning_rate": 2.8913456828774854e-05, "loss": 2.4296, "step": 2370 }, { "epoch": 2.57, "learning_rate": 2.883555236873845e-05, "loss": 2.3899, "step": 2372 }, { "epoch": 2.57, "learning_rate": 2.875771045358805e-05, "loss": 2.4758, "step": 2374 }, { "epoch": 2.57, "learning_rate": 2.8679931313361053e-05, "loss": 2.4788, "step": 2376 }, { "epoch": 2.58, "learning_rate": 2.860221517790933e-05, "loss": 2.4869, "step": 2378 }, { "epoch": 2.58, "learning_rate": 2.8524562276898513e-05, "loss": 2.4313, "step": 2380 }, { "epoch": 2.58, "learning_rate": 2.8446972839807384e-05, "loss": 2.2432, "step": 2382 }, { "epoch": 2.58, "learning_rate": 2.8369447095927195e-05, "loss": 2.2748, "step": 2384 }, { "epoch": 2.58, "learning_rate": 2.8291985274360983e-05, "loss": 2.6419, "step": 2386 }, { "epoch": 2.59, "learning_rate": 2.8214587604022847e-05, "loss": 2.4079, "step": 2388 }, { "epoch": 2.59, "learning_rate": 2.8137254313637306e-05, "loss": 2.3603, "step": 2390 }, { "epoch": 2.59, "learning_rate": 2.805998563173866e-05, "loss": 2.2753, "step": 2392 }, { "epoch": 2.59, "learning_rate": 2.798278178667028e-05, "loss": 2.3884, "step": 2394 }, { "epoch": 2.6, "learning_rate": 2.790564300658387e-05, "loss": 2.5817, "step": 2396 }, { "epoch": 2.6, "learning_rate": 2.7828569519438942e-05, "loss": 2.4844, "step": 2398 }, { "epoch": 2.6, "learning_rate": 2.775156155300197e-05, "loss": 2.4505, "step": 2400 }, { "epoch": 2.6, "learning_rate": 2.7674619334845876e-05, "loss": 2.443, "step": 2402 }, { "epoch": 2.6, "learning_rate": 2.7597743092349217e-05, "loss": 2.3359, "step": 2404 }, { "epoch": 2.61, "learning_rate": 2.752093305269565e-05, "loss": 2.4345, "step": 2406 }, { "epoch": 2.61, "learning_rate": 2.7444189442873115e-05, "loss": 2.2828, "step": 2408 }, { "epoch": 2.61, "learning_rate": 2.7367512489673312e-05, "loss": 2.5291, "step": 2410 }, { "epoch": 2.61, "learning_rate": 2.7290902419690895e-05, "loss": 2.391, "step": 2412 }, { "epoch": 2.62, "learning_rate": 2.7214359459322924e-05, "loss": 2.06, "step": 2414 }, { "epoch": 2.62, "learning_rate": 2.7137883834768073e-05, "loss": 2.6396, "step": 2416 }, { "epoch": 2.62, "learning_rate": 2.7061475772026086e-05, "loss": 2.4143, "step": 2418 }, { "epoch": 2.62, "learning_rate": 2.698513549689703e-05, "loss": 2.4331, "step": 2420 }, { "epoch": 2.62, "learning_rate": 2.6908863234980636e-05, "loss": 2.4801, "step": 2422 }, { "epoch": 2.63, "learning_rate": 2.6832659211675627e-05, "loss": 2.3912, "step": 2424 }, { "epoch": 2.63, "learning_rate": 2.67565236521791e-05, "loss": 2.4359, "step": 2426 }, { "epoch": 2.63, "learning_rate": 2.668045678148584e-05, "loss": 2.5352, "step": 2428 }, { "epoch": 2.63, "learning_rate": 2.6604458824387614e-05, "loss": 2.4982, "step": 2430 }, { "epoch": 2.63, "learning_rate": 2.6528530005472518e-05, "loss": 2.4898, "step": 2432 }, { "epoch": 2.64, "learning_rate": 2.6452670549124375e-05, "loss": 2.5291, "step": 2434 }, { "epoch": 2.64, "learning_rate": 2.637688067952204e-05, "loss": 2.3178, "step": 2436 }, { "epoch": 2.64, "learning_rate": 2.630116062063867e-05, "loss": 2.5588, "step": 2438 }, { "epoch": 2.64, "learning_rate": 2.622551059624113e-05, "loss": 2.3862, "step": 2440 }, { "epoch": 2.65, "learning_rate": 2.614993082988937e-05, "loss": 2.3327, "step": 2442 }, { "epoch": 2.65, "learning_rate": 2.607442154493568e-05, "loss": 2.3323, "step": 2444 }, { "epoch": 2.65, "learning_rate": 2.599898296452406e-05, "loss": 2.237, "step": 2446 }, { "epoch": 2.65, "learning_rate": 2.592361531158952e-05, "loss": 2.1117, "step": 2448 }, { "epoch": 2.65, "learning_rate": 2.5848318808857606e-05, "loss": 2.3355, "step": 2450 }, { "epoch": 2.66, "learning_rate": 2.5773093678843473e-05, "loss": 2.3701, "step": 2452 }, { "epoch": 2.66, "learning_rate": 2.5697940143851375e-05, "loss": 2.4158, "step": 2454 }, { "epoch": 2.66, "learning_rate": 2.5622858425974018e-05, "loss": 2.4807, "step": 2456 }, { "epoch": 2.66, "learning_rate": 2.5547848747091897e-05, "loss": 2.2695, "step": 2458 }, { "epoch": 2.66, "learning_rate": 2.5472911328872574e-05, "loss": 2.4957, "step": 2460 }, { "epoch": 2.67, "learning_rate": 2.5398046392770054e-05, "loss": 2.3591, "step": 2462 }, { "epoch": 2.67, "learning_rate": 2.532325416002419e-05, "loss": 2.6757, "step": 2464 }, { "epoch": 2.67, "learning_rate": 2.524853485166e-05, "loss": 2.3271, "step": 2466 }, { "epoch": 2.67, "learning_rate": 2.517388868848692e-05, "loss": 2.3084, "step": 2468 }, { "epoch": 2.68, "learning_rate": 2.5099315891098264e-05, "loss": 2.3597, "step": 2470 }, { "epoch": 2.68, "learning_rate": 2.5024816679870556e-05, "loss": 2.5597, "step": 2472 }, { "epoch": 2.68, "learning_rate": 2.495039127496287e-05, "loss": 2.4667, "step": 2474 }, { "epoch": 2.68, "learning_rate": 2.4876039896316123e-05, "loss": 2.2991, "step": 2476 }, { "epoch": 2.68, "learning_rate": 2.4801762763652474e-05, "loss": 2.27, "step": 2478 }, { "epoch": 2.69, "learning_rate": 2.4727560096474706e-05, "loss": 2.5184, "step": 2480 }, { "epoch": 2.69, "learning_rate": 2.4653432114065544e-05, "loss": 2.5034, "step": 2482 }, { "epoch": 2.69, "learning_rate": 2.457937903548695e-05, "loss": 2.3994, "step": 2484 }, { "epoch": 2.69, "learning_rate": 2.450540107957961e-05, "loss": 2.229, "step": 2486 }, { "epoch": 2.7, "learning_rate": 2.443149846496212e-05, "loss": 2.4133, "step": 2488 }, { "epoch": 2.7, "learning_rate": 2.4357671410030526e-05, "loss": 2.5226, "step": 2490 }, { "epoch": 2.7, "learning_rate": 2.4283920132957482e-05, "loss": 2.3836, "step": 2492 }, { "epoch": 2.7, "learning_rate": 2.42102448516918e-05, "loss": 2.4287, "step": 2494 }, { "epoch": 2.7, "learning_rate": 2.413664578395761e-05, "loss": 2.4322, "step": 2496 }, { "epoch": 2.71, "learning_rate": 2.4063123147253923e-05, "loss": 2.3545, "step": 2498 }, { "epoch": 2.71, "learning_rate": 2.398967715885379e-05, "loss": 2.3359, "step": 2500 }, { "epoch": 2.71, "learning_rate": 2.391630803580382e-05, "loss": 2.4889, "step": 2502 }, { "epoch": 2.71, "learning_rate": 2.3843015994923412e-05, "loss": 2.3731, "step": 2504 }, { "epoch": 2.71, "learning_rate": 2.3769801252804213e-05, "loss": 2.2901, "step": 2506 }, { "epoch": 2.72, "learning_rate": 2.3696664025809458e-05, "loss": 2.3341, "step": 2508 }, { "epoch": 2.72, "learning_rate": 2.3623604530073245e-05, "loss": 2.3624, "step": 2510 }, { "epoch": 2.72, "learning_rate": 2.3550622981499988e-05, "loss": 2.4377, "step": 2512 }, { "epoch": 2.72, "learning_rate": 2.3477719595763774e-05, "loss": 2.2931, "step": 2514 }, { "epoch": 2.73, "learning_rate": 2.340489458830772e-05, "loss": 2.3726, "step": 2516 }, { "epoch": 2.73, "learning_rate": 2.3332148174343254e-05, "loss": 2.2644, "step": 2518 }, { "epoch": 2.73, "learning_rate": 2.3259480568849586e-05, "loss": 2.5434, "step": 2520 }, { "epoch": 2.73, "learning_rate": 2.3186891986573035e-05, "loss": 2.2445, "step": 2522 }, { "epoch": 2.73, "learning_rate": 2.3114382642026404e-05, "loss": 2.3012, "step": 2524 }, { "epoch": 2.74, "learning_rate": 2.3041952749488304e-05, "loss": 2.298, "step": 2526 }, { "epoch": 2.74, "learning_rate": 2.2969602523002543e-05, "loss": 2.3226, "step": 2528 }, { "epoch": 2.74, "learning_rate": 2.2897332176377528e-05, "loss": 2.4809, "step": 2530 }, { "epoch": 2.74, "learning_rate": 2.2825141923185632e-05, "loss": 2.3514, "step": 2532 }, { "epoch": 2.74, "learning_rate": 2.275303197676248e-05, "loss": 2.4344, "step": 2534 }, { "epoch": 2.75, "learning_rate": 2.2681002550206355e-05, "loss": 2.3313, "step": 2536 }, { "epoch": 2.75, "learning_rate": 2.2609053856377714e-05, "loss": 2.1924, "step": 2538 }, { "epoch": 2.75, "learning_rate": 2.2537186107898313e-05, "loss": 2.1984, "step": 2540 }, { "epoch": 2.75, "learning_rate": 2.2465399517150722e-05, "loss": 2.4612, "step": 2542 }, { "epoch": 2.76, "learning_rate": 2.2393694296277707e-05, "loss": 2.3225, "step": 2544 }, { "epoch": 2.76, "learning_rate": 2.2322070657181583e-05, "loss": 2.3635, "step": 2546 }, { "epoch": 2.76, "learning_rate": 2.2250528811523513e-05, "loss": 2.4144, "step": 2548 }, { "epoch": 2.76, "learning_rate": 2.2179068970722978e-05, "loss": 2.5847, "step": 2550 }, { "epoch": 2.76, "learning_rate": 2.2107691345957133e-05, "loss": 2.3221, "step": 2552 }, { "epoch": 2.77, "learning_rate": 2.203639614816017e-05, "loss": 2.4227, "step": 2554 }, { "epoch": 2.77, "learning_rate": 2.196518358802268e-05, "loss": 2.4364, "step": 2556 }, { "epoch": 2.77, "learning_rate": 2.1894053875991017e-05, "loss": 2.4288, "step": 2558 }, { "epoch": 2.77, "learning_rate": 2.182300722226675e-05, "loss": 2.3931, "step": 2560 }, { "epoch": 2.78, "learning_rate": 2.1752043836806002e-05, "loss": 2.4772, "step": 2562 }, { "epoch": 2.78, "learning_rate": 2.1681163929318777e-05, "loss": 2.3936, "step": 2564 }, { "epoch": 2.78, "learning_rate": 2.1610367709268387e-05, "loss": 2.3759, "step": 2566 }, { "epoch": 2.78, "learning_rate": 2.1539655385870877e-05, "loss": 2.4427, "step": 2568 }, { "epoch": 2.78, "learning_rate": 2.1469027168094347e-05, "loss": 2.387, "step": 2570 }, { "epoch": 2.79, "learning_rate": 2.1398483264658313e-05, "loss": 2.2637, "step": 2572 }, { "epoch": 2.79, "learning_rate": 2.132802388403319e-05, "loss": 2.3364, "step": 2574 }, { "epoch": 2.79, "learning_rate": 2.125764923443953e-05, "loss": 2.3348, "step": 2576 }, { "epoch": 2.79, "learning_rate": 2.118735952384757e-05, "loss": 2.34, "step": 2578 }, { "epoch": 2.79, "learning_rate": 2.1117154959976482e-05, "loss": 2.2867, "step": 2580 }, { "epoch": 2.8, "learning_rate": 2.104703575029385e-05, "loss": 2.4191, "step": 2582 }, { "epoch": 2.8, "learning_rate": 2.097700210201497e-05, "loss": 2.2275, "step": 2584 }, { "epoch": 2.8, "learning_rate": 2.090705422210237e-05, "loss": 2.6198, "step": 2586 }, { "epoch": 2.8, "learning_rate": 2.0837192317265016e-05, "loss": 2.364, "step": 2588 }, { "epoch": 2.81, "learning_rate": 2.0767416593957894e-05, "loss": 2.2663, "step": 2590 }, { "epoch": 2.81, "learning_rate": 2.0697727258381238e-05, "loss": 2.2649, "step": 2592 }, { "epoch": 2.81, "learning_rate": 2.0628124516480046e-05, "loss": 2.5761, "step": 2594 }, { "epoch": 2.81, "learning_rate": 2.0558608573943354e-05, "loss": 2.2132, "step": 2596 }, { "epoch": 2.81, "learning_rate": 2.0489179636203766e-05, "loss": 2.3719, "step": 2598 }, { "epoch": 2.82, "learning_rate": 2.0419837908436688e-05, "loss": 2.4978, "step": 2600 }, { "epoch": 2.82, "learning_rate": 2.0350583595559865e-05, "loss": 2.2988, "step": 2602 }, { "epoch": 2.82, "learning_rate": 2.0281416902232708e-05, "loss": 2.255, "step": 2604 }, { "epoch": 2.82, "learning_rate": 2.021233803285567e-05, "loss": 2.2799, "step": 2606 }, { "epoch": 2.83, "learning_rate": 2.014334719156966e-05, "loss": 2.2972, "step": 2608 }, { "epoch": 2.83, "learning_rate": 2.0074444582255485e-05, "loss": 2.4158, "step": 2610 }, { "epoch": 2.83, "learning_rate": 2.0005630408533215e-05, "loss": 2.3353, "step": 2612 }, { "epoch": 2.83, "learning_rate": 1.9936904873761536e-05, "loss": 2.3829, "step": 2614 }, { "epoch": 2.83, "learning_rate": 1.9868268181037185e-05, "loss": 2.1709, "step": 2616 }, { "epoch": 2.84, "learning_rate": 1.9799720533194404e-05, "loss": 2.549, "step": 2618 }, { "epoch": 2.84, "learning_rate": 1.9731262132804274e-05, "loss": 2.5804, "step": 2620 }, { "epoch": 2.84, "learning_rate": 1.966289318217411e-05, "loss": 2.5311, "step": 2622 }, { "epoch": 2.84, "learning_rate": 1.959461388334686e-05, "loss": 2.3825, "step": 2624 }, { "epoch": 2.84, "learning_rate": 1.9526424438100642e-05, "loss": 2.3505, "step": 2626 }, { "epoch": 2.85, "learning_rate": 1.9458325047947938e-05, "loss": 2.3793, "step": 2628 }, { "epoch": 2.85, "learning_rate": 1.9390315914135125e-05, "loss": 2.2617, "step": 2630 }, { "epoch": 2.85, "learning_rate": 1.9322397237641875e-05, "loss": 2.5081, "step": 2632 }, { "epoch": 2.85, "learning_rate": 1.925456921918055e-05, "loss": 2.578, "step": 2634 }, { "epoch": 2.86, "learning_rate": 1.918683205919557e-05, "loss": 2.3566, "step": 2636 }, { "epoch": 2.86, "learning_rate": 1.9119185957862835e-05, "loss": 2.5683, "step": 2638 }, { "epoch": 2.86, "learning_rate": 1.9051631115089196e-05, "loss": 2.213, "step": 2640 }, { "epoch": 2.86, "learning_rate": 1.8984167730511825e-05, "loss": 2.6764, "step": 2642 }, { "epoch": 2.86, "learning_rate": 1.8916796003497572e-05, "loss": 2.422, "step": 2644 }, { "epoch": 2.87, "learning_rate": 1.8849516133142432e-05, "loss": 2.1258, "step": 2646 }, { "epoch": 2.87, "learning_rate": 1.8782328318270964e-05, "loss": 2.505, "step": 2648 }, { "epoch": 2.87, "learning_rate": 1.8715232757435704e-05, "loss": 2.2427, "step": 2650 }, { "epoch": 2.87, "learning_rate": 1.864822964891651e-05, "loss": 2.3599, "step": 2652 }, { "epoch": 2.87, "learning_rate": 1.8581319190720035e-05, "loss": 2.679, "step": 2654 }, { "epoch": 2.88, "learning_rate": 1.851450158057918e-05, "loss": 2.3708, "step": 2656 }, { "epoch": 2.88, "learning_rate": 1.844777701595244e-05, "loss": 2.414, "step": 2658 }, { "epoch": 2.88, "learning_rate": 1.83811456940233e-05, "loss": 2.524, "step": 2660 }, { "epoch": 2.88, "learning_rate": 1.8314607811699762e-05, "loss": 2.4521, "step": 2662 }, { "epoch": 2.89, "learning_rate": 1.824816356561364e-05, "loss": 2.3931, "step": 2664 }, { "epoch": 2.89, "learning_rate": 1.8181813152120092e-05, "loss": 2.3704, "step": 2666 }, { "epoch": 2.89, "learning_rate": 1.8115556767296914e-05, "loss": 2.5238, "step": 2668 }, { "epoch": 2.89, "learning_rate": 1.804939460694411e-05, "loss": 2.5418, "step": 2670 }, { "epoch": 2.89, "learning_rate": 1.7983326866583144e-05, "loss": 2.6173, "step": 2672 }, { "epoch": 2.9, "learning_rate": 1.7917353741456545e-05, "loss": 2.4272, "step": 2674 }, { "epoch": 2.9, "learning_rate": 1.7851475426527142e-05, "loss": 2.4986, "step": 2676 }, { "epoch": 2.9, "learning_rate": 1.7785692116477682e-05, "loss": 2.4365, "step": 2678 }, { "epoch": 2.9, "learning_rate": 1.772000400571005e-05, "loss": 2.5869, "step": 2680 }, { "epoch": 2.91, "learning_rate": 1.76544112883449e-05, "loss": 2.4987, "step": 2682 }, { "epoch": 2.91, "learning_rate": 1.7588914158220898e-05, "loss": 2.4701, "step": 2684 }, { "epoch": 2.91, "learning_rate": 1.7523512808894288e-05, "loss": 2.5142, "step": 2686 }, { "epoch": 2.91, "learning_rate": 1.7458207433638223e-05, "loss": 2.518, "step": 2688 }, { "epoch": 2.91, "learning_rate": 1.7392998225442263e-05, "loss": 2.386, "step": 2690 }, { "epoch": 2.92, "learning_rate": 1.732788537701179e-05, "loss": 2.2214, "step": 2692 }, { "epoch": 2.92, "learning_rate": 1.726286908076738e-05, "loss": 2.358, "step": 2694 }, { "epoch": 2.92, "learning_rate": 1.7197949528844286e-05, "loss": 2.5727, "step": 2696 }, { "epoch": 2.92, "learning_rate": 1.7133126913091903e-05, "loss": 2.5317, "step": 2698 }, { "epoch": 2.92, "learning_rate": 1.706840142507315e-05, "loss": 2.2929, "step": 2700 }, { "epoch": 2.93, "learning_rate": 1.700377325606388e-05, "loss": 2.4207, "step": 2702 }, { "epoch": 2.93, "learning_rate": 1.6939242597052373e-05, "loss": 2.4398, "step": 2704 }, { "epoch": 2.93, "learning_rate": 1.6874809638738754e-05, "loss": 2.3671, "step": 2706 }, { "epoch": 2.93, "learning_rate": 1.681047457153444e-05, "loss": 2.5831, "step": 2708 }, { "epoch": 2.94, "learning_rate": 1.6746237585561524e-05, "loss": 2.414, "step": 2710 }, { "epoch": 2.94, "learning_rate": 1.6682098870652236e-05, "loss": 2.2996, "step": 2712 }, { "epoch": 2.94, "learning_rate": 1.6618058616348492e-05, "loss": 2.4037, "step": 2714 }, { "epoch": 2.94, "learning_rate": 1.655411701190115e-05, "loss": 2.592, "step": 2716 }, { "epoch": 2.94, "learning_rate": 1.6490274246269533e-05, "loss": 2.2498, "step": 2718 }, { "epoch": 2.95, "learning_rate": 1.642653050812094e-05, "loss": 2.2538, "step": 2720 }, { "epoch": 2.95, "learning_rate": 1.636288598583e-05, "loss": 2.3926, "step": 2722 }, { "epoch": 2.95, "learning_rate": 1.629934086747813e-05, "loss": 2.5224, "step": 2724 }, { "epoch": 2.95, "learning_rate": 1.6235895340852964e-05, "loss": 2.3785, "step": 2726 }, { "epoch": 2.96, "learning_rate": 1.6172549593447877e-05, "loss": 2.4254, "step": 2728 }, { "epoch": 2.96, "learning_rate": 1.6109303812461375e-05, "loss": 2.2977, "step": 2730 }, { "epoch": 2.96, "learning_rate": 1.60461581847965e-05, "loss": 2.4296, "step": 2732 }, { "epoch": 2.96, "learning_rate": 1.598311289706033e-05, "loss": 2.1917, "step": 2734 }, { "epoch": 2.96, "learning_rate": 1.592016813556347e-05, "loss": 2.361, "step": 2736 }, { "epoch": 2.97, "learning_rate": 1.5857324086319414e-05, "loss": 2.3198, "step": 2738 }, { "epoch": 2.97, "learning_rate": 1.579458093504403e-05, "loss": 2.3945, "step": 2740 }, { "epoch": 2.97, "learning_rate": 1.5731938867155e-05, "loss": 2.2314, "step": 2742 }, { "epoch": 2.97, "learning_rate": 1.5669398067771324e-05, "loss": 2.4571, "step": 2744 }, { "epoch": 2.97, "learning_rate": 1.560695872171273e-05, "loss": 2.2473, "step": 2746 }, { "epoch": 2.98, "learning_rate": 1.5544621013499094e-05, "loss": 2.4553, "step": 2748 }, { "epoch": 2.98, "learning_rate": 1.548238512734998e-05, "loss": 2.3213, "step": 2750 }, { "epoch": 2.98, "learning_rate": 1.542025124718401e-05, "loss": 2.3302, "step": 2752 }, { "epoch": 2.98, "learning_rate": 1.535821955661839e-05, "loss": 2.2468, "step": 2754 }, { "epoch": 2.99, "learning_rate": 1.5296290238968303e-05, "loss": 2.3087, "step": 2756 }, { "epoch": 2.99, "learning_rate": 1.5234463477246452e-05, "loss": 2.4679, "step": 2758 }, { "epoch": 2.99, "learning_rate": 1.5172739454162405e-05, "loss": 2.3439, "step": 2760 }, { "epoch": 2.99, "learning_rate": 1.5111118352122183e-05, "loss": 2.2882, "step": 2762 }, { "epoch": 2.99, "learning_rate": 1.5049600353227588e-05, "loss": 2.4456, "step": 2764 }, { "epoch": 3.0, "learning_rate": 1.4988185639275798e-05, "loss": 2.3367, "step": 2766 }, { "epoch": 3.0, "learning_rate": 1.4926874391758716e-05, "loss": 2.3341, "step": 2768 }, { "epoch": 3.0, "learning_rate": 1.4865666791862521e-05, "loss": 2.7886, "step": 2770 }, { "epoch": 3.0, "learning_rate": 1.4804563020467044e-05, "loss": 2.6835, "step": 2772 }, { "epoch": 3.01, "learning_rate": 1.4743563258145353e-05, "loss": 2.3864, "step": 2774 }, { "epoch": 3.01, "learning_rate": 1.4682667685163071e-05, "loss": 2.3261, "step": 2776 }, { "epoch": 3.01, "learning_rate": 1.4621876481477987e-05, "loss": 2.4467, "step": 2778 }, { "epoch": 3.01, "learning_rate": 1.4561189826739446e-05, "loss": 2.3331, "step": 2780 }, { "epoch": 3.01, "learning_rate": 1.45006079002878e-05, "loss": 2.2346, "step": 2782 }, { "epoch": 3.02, "learning_rate": 1.4440130881153917e-05, "loss": 2.3942, "step": 2784 }, { "epoch": 3.02, "learning_rate": 1.437975894805867e-05, "loss": 2.441, "step": 2786 }, { "epoch": 3.02, "learning_rate": 1.4319492279412388e-05, "loss": 2.3998, "step": 2788 }, { "epoch": 3.02, "learning_rate": 1.425933105331429e-05, "loss": 2.518, "step": 2790 }, { "epoch": 3.02, "learning_rate": 1.419927544755199e-05, "loss": 2.3147, "step": 2792 }, { "epoch": 3.03, "learning_rate": 1.4139325639601015e-05, "loss": 2.2925, "step": 2794 }, { "epoch": 3.03, "learning_rate": 1.4079481806624217e-05, "loss": 2.5182, "step": 2796 }, { "epoch": 3.03, "learning_rate": 1.4019744125471274e-05, "loss": 2.4969, "step": 2798 }, { "epoch": 3.03, "learning_rate": 1.3960112772678125e-05, "loss": 2.4316, "step": 2800 }, { "epoch": 3.04, "learning_rate": 1.3900587924466585e-05, "loss": 2.3239, "step": 2802 }, { "epoch": 3.04, "learning_rate": 1.3841169756743649e-05, "loss": 2.6349, "step": 2804 }, { "epoch": 3.04, "learning_rate": 1.378185844510107e-05, "loss": 2.3982, "step": 2806 }, { "epoch": 3.04, "learning_rate": 1.3722654164814796e-05, "loss": 2.4663, "step": 2808 }, { "epoch": 3.04, "learning_rate": 1.366355709084456e-05, "loss": 2.4762, "step": 2810 }, { "epoch": 3.05, "learning_rate": 1.3604567397833201e-05, "loss": 2.4103, "step": 2812 }, { "epoch": 3.05, "learning_rate": 1.354568526010624e-05, "loss": 2.4714, "step": 2814 }, { "epoch": 3.05, "learning_rate": 1.3486910851671374e-05, "loss": 2.1582, "step": 2816 }, { "epoch": 3.05, "learning_rate": 1.342824434621795e-05, "loss": 2.3474, "step": 2818 }, { "epoch": 3.06, "learning_rate": 1.3369685917116408e-05, "loss": 2.3022, "step": 2820 }, { "epoch": 3.06, "learning_rate": 1.3311235737417793e-05, "loss": 2.2013, "step": 2822 }, { "epoch": 3.06, "learning_rate": 1.3252893979853304e-05, "loss": 2.5426, "step": 2824 }, { "epoch": 3.06, "learning_rate": 1.319466081683371e-05, "loss": 2.3739, "step": 2826 }, { "epoch": 3.06, "learning_rate": 1.3136536420448841e-05, "loss": 2.3773, "step": 2828 }, { "epoch": 3.07, "learning_rate": 1.307852096246711e-05, "loss": 2.4481, "step": 2830 }, { "epoch": 3.07, "learning_rate": 1.302061461433502e-05, "loss": 2.5957, "step": 2832 }, { "epoch": 3.07, "learning_rate": 1.2962817547176625e-05, "loss": 2.5113, "step": 2834 }, { "epoch": 3.07, "learning_rate": 1.2905129931793009e-05, "loss": 2.3745, "step": 2836 }, { "epoch": 3.07, "learning_rate": 1.2847551938661839e-05, "loss": 2.3667, "step": 2838 }, { "epoch": 3.08, "learning_rate": 1.2790083737936798e-05, "loss": 2.3051, "step": 2840 }, { "epoch": 3.08, "learning_rate": 1.2732725499447146e-05, "loss": 2.2803, "step": 2842 }, { "epoch": 3.08, "learning_rate": 1.2675477392697139e-05, "loss": 2.3317, "step": 2844 }, { "epoch": 3.08, "learning_rate": 1.2618339586865625e-05, "loss": 2.5069, "step": 2846 }, { "epoch": 3.09, "learning_rate": 1.2561312250805435e-05, "loss": 2.403, "step": 2848 }, { "epoch": 3.09, "learning_rate": 1.2504395553043008e-05, "loss": 2.3479, "step": 2850 }, { "epoch": 3.09, "learning_rate": 1.2447589661777759e-05, "loss": 2.2771, "step": 2852 }, { "epoch": 3.09, "learning_rate": 1.239089474488171e-05, "loss": 2.3368, "step": 2854 }, { "epoch": 3.09, "learning_rate": 1.2334310969898871e-05, "loss": 2.3194, "step": 2856 }, { "epoch": 3.1, "learning_rate": 1.227783850404487e-05, "loss": 2.3038, "step": 2858 }, { "epoch": 3.1, "learning_rate": 1.2221477514206337e-05, "loss": 2.5084, "step": 2860 }, { "epoch": 3.1, "learning_rate": 1.216522816694053e-05, "loss": 2.2501, "step": 2862 }, { "epoch": 3.1, "learning_rate": 1.2109090628474718e-05, "loss": 2.2798, "step": 2864 }, { "epoch": 3.11, "learning_rate": 1.2053065064705805e-05, "loss": 2.2456, "step": 2866 }, { "epoch": 3.11, "learning_rate": 1.1997151641199772e-05, "loss": 2.4106, "step": 2868 }, { "epoch": 3.11, "learning_rate": 1.1941350523191208e-05, "loss": 2.4705, "step": 2870 }, { "epoch": 3.11, "learning_rate": 1.1885661875582783e-05, "loss": 2.5891, "step": 2872 }, { "epoch": 3.11, "learning_rate": 1.183008586294485e-05, "loss": 2.2367, "step": 2874 }, { "epoch": 3.12, "learning_rate": 1.1774622649514889e-05, "loss": 2.5675, "step": 2876 }, { "epoch": 3.12, "learning_rate": 1.1719272399197023e-05, "loss": 2.4596, "step": 2878 }, { "epoch": 3.12, "learning_rate": 1.166403527556153e-05, "loss": 2.2995, "step": 2880 }, { "epoch": 3.12, "learning_rate": 1.1608911441844429e-05, "loss": 2.2225, "step": 2882 }, { "epoch": 3.12, "learning_rate": 1.155390106094692e-05, "loss": 2.2498, "step": 2884 }, { "epoch": 3.13, "learning_rate": 1.1499004295434918e-05, "loss": 2.3428, "step": 2886 }, { "epoch": 3.13, "learning_rate": 1.1444221307538571e-05, "loss": 2.3654, "step": 2888 }, { "epoch": 3.13, "learning_rate": 1.1389552259151864e-05, "loss": 2.3089, "step": 2890 }, { "epoch": 3.13, "learning_rate": 1.1334997311832002e-05, "loss": 2.3778, "step": 2892 }, { "epoch": 3.14, "learning_rate": 1.1280556626799005e-05, "loss": 2.3831, "step": 2894 }, { "epoch": 3.14, "learning_rate": 1.1226230364935226e-05, "loss": 2.4711, "step": 2896 }, { "epoch": 3.14, "learning_rate": 1.1172018686784935e-05, "loss": 2.5057, "step": 2898 }, { "epoch": 3.14, "learning_rate": 1.1117921752553723e-05, "loss": 2.3913, "step": 2900 }, { "epoch": 3.14, "learning_rate": 1.106393972210809e-05, "loss": 2.5023, "step": 2902 }, { "epoch": 3.15, "learning_rate": 1.1010072754975014e-05, "loss": 2.3522, "step": 2904 }, { "epoch": 3.15, "learning_rate": 1.095632101034143e-05, "loss": 2.6258, "step": 2906 }, { "epoch": 3.15, "learning_rate": 1.0902684647053735e-05, "loss": 2.3644, "step": 2908 }, { "epoch": 3.15, "learning_rate": 1.0849163823617375e-05, "loss": 2.4708, "step": 2910 }, { "epoch": 3.15, "learning_rate": 1.0795758698196368e-05, "loss": 2.2643, "step": 2912 }, { "epoch": 3.16, "learning_rate": 1.0742469428612816e-05, "loss": 2.4429, "step": 2914 }, { "epoch": 3.16, "learning_rate": 1.0689296172346431e-05, "loss": 2.2625, "step": 2916 }, { "epoch": 3.16, "learning_rate": 1.0636239086534072e-05, "loss": 2.3484, "step": 2918 }, { "epoch": 3.16, "learning_rate": 1.0583298327969338e-05, "loss": 2.4041, "step": 2920 }, { "epoch": 3.17, "learning_rate": 1.0530474053102034e-05, "loss": 2.1622, "step": 2922 }, { "epoch": 3.17, "learning_rate": 1.047776641803772e-05, "loss": 2.417, "step": 2924 }, { "epoch": 3.17, "learning_rate": 1.0425175578537299e-05, "loss": 2.3336, "step": 2926 }, { "epoch": 3.17, "learning_rate": 1.0372701690016474e-05, "loss": 2.4013, "step": 2928 }, { "epoch": 3.17, "learning_rate": 1.0320344907545388e-05, "loss": 2.2072, "step": 2930 }, { "epoch": 3.18, "learning_rate": 1.0268105385848064e-05, "loss": 2.5827, "step": 2932 }, { "epoch": 3.18, "learning_rate": 1.0215983279302049e-05, "loss": 2.5077, "step": 2934 }, { "epoch": 3.18, "learning_rate": 1.0163978741937847e-05, "loss": 2.512, "step": 2936 }, { "epoch": 3.18, "learning_rate": 1.0112091927438583e-05, "loss": 2.457, "step": 2938 }, { "epoch": 3.19, "learning_rate": 1.0060322989139442e-05, "loss": 2.5785, "step": 2940 }, { "epoch": 3.19, "learning_rate": 1.0008672080027298e-05, "loss": 2.4435, "step": 2942 }, { "epoch": 3.19, "learning_rate": 9.957139352740191e-06, "loss": 2.2668, "step": 2944 }, { "epoch": 3.19, "learning_rate": 9.90572495956696e-06, "loss": 2.4811, "step": 2946 }, { "epoch": 3.19, "learning_rate": 9.854429052446684e-06, "loss": 2.4208, "step": 2948 }, { "epoch": 3.2, "learning_rate": 9.803251782968358e-06, "loss": 2.4254, "step": 2950 }, { "epoch": 3.2, "learning_rate": 9.752193302370315e-06, "loss": 2.3277, "step": 2952 }, { "epoch": 3.2, "learning_rate": 9.701253761539897e-06, "loss": 2.2717, "step": 2954 }, { "epoch": 3.2, "learning_rate": 9.650433311012946e-06, "loss": 2.4127, "step": 2956 }, { "epoch": 3.2, "learning_rate": 9.599732100973357e-06, "loss": 2.6305, "step": 2958 }, { "epoch": 3.21, "learning_rate": 9.549150281252633e-06, "loss": 2.3212, "step": 2960 }, { "epoch": 3.21, "learning_rate": 9.498688001329486e-06, "loss": 2.3556, "step": 2962 }, { "epoch": 3.21, "learning_rate": 9.448345410329379e-06, "loss": 2.3342, "step": 2964 }, { "epoch": 3.21, "learning_rate": 9.398122657024022e-06, "loss": 2.454, "step": 2966 }, { "epoch": 3.22, "learning_rate": 9.348019889831006e-06, "loss": 2.6068, "step": 2968 }, { "epoch": 3.22, "learning_rate": 9.298037256813347e-06, "loss": 2.6167, "step": 2970 }, { "epoch": 3.22, "learning_rate": 9.248174905679058e-06, "loss": 2.3684, "step": 2972 }, { "epoch": 3.22, "learning_rate": 9.198432983780658e-06, "loss": 2.5119, "step": 2974 }, { "epoch": 3.22, "learning_rate": 9.14881163811479e-06, "loss": 2.3928, "step": 2976 }, { "epoch": 3.23, "learning_rate": 9.099311015321782e-06, "loss": 2.3656, "step": 2978 }, { "epoch": 3.23, "learning_rate": 9.049931261685207e-06, "loss": 2.3461, "step": 2980 }, { "epoch": 3.23, "learning_rate": 9.000672523131431e-06, "loss": 2.3134, "step": 2982 }, { "epoch": 3.23, "learning_rate": 8.951534945229172e-06, "loss": 2.3456, "step": 2984 }, { "epoch": 3.23, "learning_rate": 8.902518673189192e-06, "loss": 2.3656, "step": 2986 }, { "epoch": 3.24, "learning_rate": 8.853623851863663e-06, "loss": 2.3751, "step": 2988 }, { "epoch": 3.24, "learning_rate": 8.804850625745897e-06, "loss": 2.5872, "step": 2990 }, { "epoch": 3.24, "learning_rate": 8.756199138969866e-06, "loss": 2.2217, "step": 2992 }, { "epoch": 3.24, "learning_rate": 8.707669535309793e-06, "loss": 2.0714, "step": 2994 }, { "epoch": 3.25, "learning_rate": 8.659261958179688e-06, "loss": 2.5951, "step": 2996 }, { "epoch": 3.25, "learning_rate": 8.610976550632943e-06, "loss": 2.3067, "step": 2998 }, { "epoch": 3.25, "learning_rate": 8.562813455361957e-06, "loss": 2.3471, "step": 3000 }, { "epoch": 3.25, "learning_rate": 8.514772814697653e-06, "loss": 2.4585, "step": 3002 }, { "epoch": 3.25, "learning_rate": 8.466854770609062e-06, "loss": 2.199, "step": 3004 }, { "epoch": 3.26, "learning_rate": 8.419059464702927e-06, "loss": 2.2591, "step": 3006 }, { "epoch": 3.26, "learning_rate": 8.371387038223289e-06, "loss": 2.3367, "step": 3008 }, { "epoch": 3.26, "learning_rate": 8.323837632051062e-06, "loss": 2.5848, "step": 3010 }, { "epoch": 3.26, "learning_rate": 8.27641138670358e-06, "loss": 2.2525, "step": 3012 }, { "epoch": 3.27, "learning_rate": 8.229108442334255e-06, "loss": 2.7048, "step": 3014 }, { "epoch": 3.27, "learning_rate": 8.18192893873208e-06, "loss": 2.4397, "step": 3016 }, { "epoch": 3.27, "learning_rate": 8.134873015321303e-06, "loss": 2.3919, "step": 3018 }, { "epoch": 3.27, "learning_rate": 8.087940811160916e-06, "loss": 2.3169, "step": 3020 }, { "epoch": 3.27, "learning_rate": 8.041132464944351e-06, "loss": 2.4048, "step": 3022 }, { "epoch": 3.28, "learning_rate": 7.994448114998975e-06, "loss": 2.5458, "step": 3024 }, { "epoch": 3.28, "learning_rate": 7.947887899285761e-06, "loss": 2.2902, "step": 3026 }, { "epoch": 3.28, "learning_rate": 7.901451955398792e-06, "loss": 2.4315, "step": 3028 }, { "epoch": 3.28, "learning_rate": 7.855140420564965e-06, "loss": 2.5107, "step": 3030 }, { "epoch": 3.28, "learning_rate": 7.808953431643467e-06, "loss": 2.3578, "step": 3032 }, { "epoch": 3.29, "learning_rate": 7.762891125125476e-06, "loss": 2.3267, "step": 3034 }, { "epoch": 3.29, "learning_rate": 7.716953637133677e-06, "loss": 2.3038, "step": 3036 }, { "epoch": 3.29, "learning_rate": 7.671141103421919e-06, "loss": 2.2405, "step": 3038 }, { "epoch": 3.29, "learning_rate": 7.625453659374754e-06, "loss": 2.2669, "step": 3040 }, { "epoch": 3.3, "learning_rate": 7.579891440007103e-06, "loss": 2.4222, "step": 3042 }, { "epoch": 3.3, "learning_rate": 7.534454579963829e-06, "loss": 2.4086, "step": 3044 }, { "epoch": 3.3, "learning_rate": 7.489143213519301e-06, "loss": 2.3461, "step": 3046 }, { "epoch": 3.3, "learning_rate": 7.44395747457704e-06, "loss": 2.224, "step": 3048 }, { "epoch": 3.3, "learning_rate": 7.398897496669338e-06, "loss": 2.316, "step": 3050 }, { "epoch": 3.31, "learning_rate": 7.353963412956838e-06, "loss": 2.4673, "step": 3052 }, { "epoch": 3.31, "learning_rate": 7.309155356228109e-06, "loss": 2.3921, "step": 3054 }, { "epoch": 3.31, "learning_rate": 7.264473458899301e-06, "loss": 2.3709, "step": 3056 }, { "epoch": 3.31, "learning_rate": 7.219917853013764e-06, "loss": 2.5216, "step": 3058 }, { "epoch": 3.32, "learning_rate": 7.175488670241609e-06, "loss": 2.4435, "step": 3060 }, { "epoch": 3.32, "learning_rate": 7.131186041879357e-06, "loss": 2.4123, "step": 3062 }, { "epoch": 3.32, "learning_rate": 7.0870100988495004e-06, "loss": 2.2985, "step": 3064 }, { "epoch": 3.32, "learning_rate": 7.0429609717002076e-06, "loss": 2.4648, "step": 3066 }, { "epoch": 3.32, "learning_rate": 6.999038790604856e-06, "loss": 2.4027, "step": 3068 }, { "epoch": 3.33, "learning_rate": 6.955243685361673e-06, "loss": 2.5828, "step": 3070 }, { "epoch": 3.33, "learning_rate": 6.911575785393326e-06, "loss": 2.2331, "step": 3072 }, { "epoch": 3.33, "learning_rate": 6.868035219746638e-06, "loss": 2.3046, "step": 3074 }, { "epoch": 3.33, "learning_rate": 6.824622117092078e-06, "loss": 2.3877, "step": 3076 }, { "epoch": 3.33, "learning_rate": 6.781336605723432e-06, "loss": 2.307, "step": 3078 }, { "epoch": 3.34, "learning_rate": 6.738178813557472e-06, "loss": 2.4418, "step": 3080 }, { "epoch": 3.34, "learning_rate": 6.695148868133516e-06, "loss": 2.3749, "step": 3082 }, { "epoch": 3.34, "learning_rate": 6.652246896613068e-06, "loss": 2.4227, "step": 3084 }, { "epoch": 3.34, "learning_rate": 6.609473025779434e-06, "loss": 2.5151, "step": 3086 }, { "epoch": 3.35, "learning_rate": 6.566827382037383e-06, "loss": 2.4882, "step": 3088 }, { "epoch": 3.35, "learning_rate": 6.524310091412739e-06, "loss": 2.3111, "step": 3090 }, { "epoch": 3.35, "learning_rate": 6.481921279552023e-06, "loss": 2.3321, "step": 3092 }, { "epoch": 3.35, "learning_rate": 6.439661071722048e-06, "loss": 2.2051, "step": 3094 }, { "epoch": 3.35, "learning_rate": 6.397529592809614e-06, "loss": 2.3448, "step": 3096 }, { "epoch": 3.36, "learning_rate": 6.355526967321112e-06, "loss": 2.4095, "step": 3098 }, { "epoch": 3.36, "learning_rate": 6.313653319382107e-06, "loss": 2.1535, "step": 3100 }, { "epoch": 3.36, "learning_rate": 6.271908772737017e-06, "loss": 2.3662, "step": 3102 }, { "epoch": 3.36, "learning_rate": 6.2302934507487755e-06, "loss": 2.1468, "step": 3104 }, { "epoch": 3.36, "learning_rate": 6.188807476398412e-06, "loss": 2.4795, "step": 3106 }, { "epoch": 3.37, "learning_rate": 6.147450972284696e-06, "loss": 2.353, "step": 3108 }, { "epoch": 3.37, "learning_rate": 6.106224060623822e-06, "loss": 2.3579, "step": 3110 }, { "epoch": 3.37, "learning_rate": 6.065126863248976e-06, "loss": 2.3739, "step": 3112 }, { "epoch": 3.37, "learning_rate": 6.0241595016100545e-06, "loss": 2.3866, "step": 3114 }, { "epoch": 3.38, "learning_rate": 5.98332209677322e-06, "loss": 2.2253, "step": 3116 }, { "epoch": 3.38, "learning_rate": 5.942614769420629e-06, "loss": 2.3605, "step": 3118 }, { "epoch": 3.38, "learning_rate": 5.902037639850011e-06, "loss": 2.3848, "step": 3120 }, { "epoch": 3.38, "learning_rate": 5.86159082797435e-06, "loss": 2.1943, "step": 3122 }, { "epoch": 3.38, "learning_rate": 5.8212744533215016e-06, "loss": 2.3193, "step": 3124 }, { "epoch": 3.39, "learning_rate": 5.781088635033882e-06, "loss": 2.5142, "step": 3126 }, { "epoch": 3.39, "learning_rate": 5.741033491868047e-06, "loss": 2.5701, "step": 3128 }, { "epoch": 3.39, "learning_rate": 5.701109142194422e-06, "loss": 2.5069, "step": 3130 }, { "epoch": 3.39, "learning_rate": 5.6613157039969055e-06, "loss": 2.4168, "step": 3132 }, { "epoch": 3.4, "learning_rate": 5.621653294872514e-06, "loss": 2.4338, "step": 3134 }, { "epoch": 3.4, "learning_rate": 5.582122032031051e-06, "loss": 2.4563, "step": 3136 }, { "epoch": 3.4, "learning_rate": 5.542722032294761e-06, "loss": 2.2138, "step": 3138 }, { "epoch": 3.4, "learning_rate": 5.503453412098003e-06, "loss": 2.6032, "step": 3140 }, { "epoch": 3.4, "learning_rate": 5.464316287486859e-06, "loss": 2.3332, "step": 3142 }, { "epoch": 3.41, "learning_rate": 5.425310774118802e-06, "loss": 2.3154, "step": 3144 }, { "epoch": 3.41, "learning_rate": 5.386436987262416e-06, "loss": 2.6818, "step": 3146 }, { "epoch": 3.41, "learning_rate": 5.347695041796985e-06, "loss": 2.2799, "step": 3148 }, { "epoch": 3.41, "learning_rate": 5.309085052212165e-06, "loss": 2.5646, "step": 3150 }, { "epoch": 3.41, "learning_rate": 5.270607132607663e-06, "loss": 2.3395, "step": 3152 }, { "epoch": 3.42, "learning_rate": 5.232261396692911e-06, "loss": 2.4606, "step": 3154 }, { "epoch": 3.42, "learning_rate": 5.194047957786713e-06, "loss": 2.3552, "step": 3156 }, { "epoch": 3.42, "learning_rate": 5.155966928816885e-06, "loss": 2.5682, "step": 3158 }, { "epoch": 3.42, "learning_rate": 5.118018422319948e-06, "loss": 2.4571, "step": 3160 }, { "epoch": 3.43, "learning_rate": 5.080202550440849e-06, "loss": 2.24, "step": 3162 }, { "epoch": 3.43, "learning_rate": 5.042519424932513e-06, "loss": 2.5308, "step": 3164 }, { "epoch": 3.43, "learning_rate": 5.0049691571555925e-06, "loss": 2.4177, "step": 3166 }, { "epoch": 3.43, "learning_rate": 4.967551858078129e-06, "loss": 2.6177, "step": 3168 }, { "epoch": 3.43, "learning_rate": 4.930267638275221e-06, "loss": 2.505, "step": 3170 }, { "epoch": 3.44, "learning_rate": 4.893116607928677e-06, "loss": 2.5166, "step": 3172 }, { "epoch": 3.44, "learning_rate": 4.856098876826709e-06, "loss": 2.0793, "step": 3174 }, { "epoch": 3.44, "learning_rate": 4.819214554363616e-06, "loss": 2.4421, "step": 3176 }, { "epoch": 3.44, "learning_rate": 4.782463749539446e-06, "loss": 2.3317, "step": 3178 }, { "epoch": 3.45, "learning_rate": 4.745846570959672e-06, "loss": 2.4747, "step": 3180 }, { "epoch": 3.45, "learning_rate": 4.70936312683487e-06, "loss": 2.2323, "step": 3182 }, { "epoch": 3.45, "learning_rate": 4.673013524980424e-06, "loss": 2.3297, "step": 3184 }, { "epoch": 3.45, "learning_rate": 4.63679787281619e-06, "loss": 2.5994, "step": 3186 }, { "epoch": 3.45, "learning_rate": 4.6007162773661515e-06, "loss": 2.2933, "step": 3188 }, { "epoch": 3.46, "learning_rate": 4.564768845258139e-06, "loss": 2.4649, "step": 3190 }, { "epoch": 3.46, "learning_rate": 4.528955682723529e-06, "loss": 2.3754, "step": 3192 }, { "epoch": 3.46, "learning_rate": 4.4932768955968876e-06, "loss": 2.6034, "step": 3194 }, { "epoch": 3.46, "learning_rate": 4.4577325893156715e-06, "loss": 2.5477, "step": 3196 }, { "epoch": 3.46, "learning_rate": 4.422322868919937e-06, "loss": 2.3983, "step": 3198 }, { "epoch": 3.47, "learning_rate": 4.3870478390519884e-06, "loss": 2.3261, "step": 3200 }, { "epoch": 3.47, "learning_rate": 4.3519076039561345e-06, "loss": 2.4168, "step": 3202 }, { "epoch": 3.47, "learning_rate": 4.316902267478296e-06, "loss": 2.4235, "step": 3204 }, { "epoch": 3.47, "learning_rate": 4.2820319330657835e-06, "loss": 2.2992, "step": 3206 }, { "epoch": 3.48, "learning_rate": 4.2472967037669066e-06, "loss": 2.4394, "step": 3208 }, { "epoch": 3.48, "learning_rate": 4.2126966822307715e-06, "loss": 2.374, "step": 3210 }, { "epoch": 3.48, "learning_rate": 4.178231970706858e-06, "loss": 2.4277, "step": 3212 }, { "epoch": 3.48, "learning_rate": 4.1439026710448355e-06, "loss": 2.4958, "step": 3214 }, { "epoch": 3.48, "learning_rate": 4.109708884694158e-06, "loss": 2.3339, "step": 3216 }, { "epoch": 3.49, "learning_rate": 4.075650712703849e-06, "loss": 2.3244, "step": 3218 }, { "epoch": 3.49, "learning_rate": 4.041728255722154e-06, "loss": 2.4202, "step": 3220 }, { "epoch": 3.49, "learning_rate": 4.0079416139962525e-06, "loss": 2.4348, "step": 3222 }, { "epoch": 3.49, "learning_rate": 3.974290887371951e-06, "loss": 2.5305, "step": 3224 }, { "epoch": 3.49, "learning_rate": 3.940776175293431e-06, "loss": 2.3909, "step": 3226 }, { "epoch": 3.5, "learning_rate": 3.9073975768029124e-06, "loss": 2.4669, "step": 3228 }, { "epoch": 3.5, "learning_rate": 3.8741551905403735e-06, "loss": 2.5117, "step": 3230 }, { "epoch": 3.5, "learning_rate": 3.8410491147432395e-06, "loss": 2.3205, "step": 3232 }, { "epoch": 3.5, "learning_rate": 3.808079447246149e-06, "loss": 2.5002, "step": 3234 }, { "epoch": 3.51, "learning_rate": 3.7752462854806213e-06, "loss": 2.5181, "step": 3236 }, { "epoch": 3.51, "learning_rate": 3.7425497264747534e-06, "loss": 2.5206, "step": 3238 }, { "epoch": 3.51, "learning_rate": 3.7099898668529642e-06, "loss": 2.4898, "step": 3240 }, { "epoch": 3.51, "learning_rate": 3.677566802835708e-06, "loss": 2.4225, "step": 3242 }, { "epoch": 3.51, "learning_rate": 3.6452806302392007e-06, "loss": 2.3201, "step": 3244 }, { "epoch": 3.52, "learning_rate": 3.6131314444750765e-06, "loss": 2.4289, "step": 3246 }, { "epoch": 3.52, "learning_rate": 3.58111934055016e-06, "loss": 2.2184, "step": 3248 }, { "epoch": 3.52, "learning_rate": 3.5492444130662108e-06, "loss": 2.5492, "step": 3250 }, { "epoch": 3.52, "learning_rate": 3.517506756219563e-06, "loss": 2.5086, "step": 3252 }, { "epoch": 3.53, "learning_rate": 3.4859064638009033e-06, "loss": 2.4952, "step": 3254 }, { "epoch": 3.53, "learning_rate": 3.4544436291949867e-06, "loss": 2.469, "step": 3256 }, { "epoch": 3.53, "learning_rate": 3.4231183453803604e-06, "loss": 2.3824, "step": 3258 }, { "epoch": 3.53, "learning_rate": 3.391930704929064e-06, "loss": 2.4994, "step": 3260 }, { "epoch": 3.53, "learning_rate": 3.360880800006383e-06, "loss": 2.545, "step": 3262 }, { "epoch": 3.54, "learning_rate": 3.3299687223705745e-06, "loss": 2.3291, "step": 3264 }, { "epoch": 3.54, "learning_rate": 3.299194563372604e-06, "loss": 2.5543, "step": 3266 }, { "epoch": 3.54, "learning_rate": 3.2685584139558243e-06, "loss": 2.3818, "step": 3268 }, { "epoch": 3.54, "learning_rate": 3.238060364655765e-06, "loss": 2.6038, "step": 3270 }, { "epoch": 3.54, "learning_rate": 3.2077005055998533e-06, "loss": 2.4691, "step": 3272 }, { "epoch": 3.55, "learning_rate": 3.177478926507127e-06, "loss": 2.4399, "step": 3274 }, { "epoch": 3.55, "learning_rate": 3.1473957166879897e-06, "loss": 2.5692, "step": 3276 }, { "epoch": 3.55, "learning_rate": 3.117450965043911e-06, "loss": 2.4183, "step": 3278 }, { "epoch": 3.55, "learning_rate": 3.087644760067232e-06, "loss": 2.4085, "step": 3280 }, { "epoch": 3.56, "learning_rate": 3.0579771898408326e-06, "loss": 2.2894, "step": 3282 }, { "epoch": 3.56, "learning_rate": 3.0284483420379097e-06, "loss": 2.2705, "step": 3284 }, { "epoch": 3.56, "learning_rate": 2.9990583039217203e-06, "loss": 2.2714, "step": 3286 }, { "epoch": 3.56, "learning_rate": 2.9698071623452895e-06, "loss": 2.366, "step": 3288 }, { "epoch": 3.56, "learning_rate": 2.940695003751198e-06, "loss": 2.3525, "step": 3290 }, { "epoch": 3.57, "learning_rate": 2.9117219141712947e-06, "loss": 2.3377, "step": 3292 }, { "epoch": 3.57, "learning_rate": 2.8828879792264675e-06, "loss": 2.1998, "step": 3294 }, { "epoch": 3.57, "learning_rate": 2.854193284126344e-06, "loss": 2.3437, "step": 3296 }, { "epoch": 3.57, "learning_rate": 2.825637913669121e-06, "loss": 2.2963, "step": 3298 }, { "epoch": 3.57, "learning_rate": 2.797221952241219e-06, "loss": 2.3955, "step": 3300 }, { "epoch": 3.58, "learning_rate": 2.7689454838171147e-06, "loss": 2.2326, "step": 3302 }, { "epoch": 3.58, "learning_rate": 2.7408085919590264e-06, "loss": 2.3897, "step": 3304 }, { "epoch": 3.58, "learning_rate": 2.7128113598167137e-06, "loss": 2.4245, "step": 3306 }, { "epoch": 3.58, "learning_rate": 2.684953870127227e-06, "loss": 2.488, "step": 3308 }, { "epoch": 3.59, "learning_rate": 2.657236205214625e-06, "loss": 2.3614, "step": 3310 }, { "epoch": 3.59, "learning_rate": 2.6296584469897743e-06, "loss": 2.1686, "step": 3312 }, { "epoch": 3.59, "learning_rate": 2.6022206769500845e-06, "loss": 2.6152, "step": 3314 }, { "epoch": 3.59, "learning_rate": 2.574922976179295e-06, "loss": 2.3362, "step": 3316 }, { "epoch": 3.59, "learning_rate": 2.547765425347187e-06, "loss": 2.382, "step": 3318 }, { "epoch": 3.6, "learning_rate": 2.520748104709375e-06, "loss": 2.4045, "step": 3320 }, { "epoch": 3.6, "learning_rate": 2.493871094107081e-06, "loss": 2.2771, "step": 3322 }, { "epoch": 3.6, "learning_rate": 2.467134472966892e-06, "loss": 2.3296, "step": 3324 }, { "epoch": 3.6, "learning_rate": 2.4405383203004894e-06, "loss": 2.3129, "step": 3326 }, { "epoch": 3.61, "learning_rate": 2.414082714704463e-06, "loss": 2.2268, "step": 3328 }, { "epoch": 3.61, "learning_rate": 2.3877677343600524e-06, "loss": 2.476, "step": 3330 }, { "epoch": 3.61, "learning_rate": 2.36159345703294e-06, "loss": 2.5804, "step": 3332 }, { "epoch": 3.61, "learning_rate": 2.3355599600729915e-06, "loss": 2.4219, "step": 3334 }, { "epoch": 3.61, "learning_rate": 2.3096673204140108e-06, "loss": 2.4168, "step": 3336 }, { "epoch": 3.62, "learning_rate": 2.2839156145736174e-06, "loss": 2.3116, "step": 3338 }, { "epoch": 3.62, "learning_rate": 2.2583049186528704e-06, "loss": 2.3238, "step": 3340 }, { "epoch": 3.62, "learning_rate": 2.2328353083361562e-06, "loss": 2.4897, "step": 3342 }, { "epoch": 3.62, "learning_rate": 2.207506858890912e-06, "loss": 2.307, "step": 3344 }, { "epoch": 3.62, "learning_rate": 2.182319645167441e-06, "loss": 2.3267, "step": 3346 }, { "epoch": 3.63, "learning_rate": 2.1572737415986422e-06, "loss": 2.424, "step": 3348 }, { "epoch": 3.63, "learning_rate": 2.1323692221998257e-06, "loss": 2.4612, "step": 3350 }, { "epoch": 3.63, "learning_rate": 2.1076061605684818e-06, "loss": 2.4219, "step": 3352 }, { "epoch": 3.63, "learning_rate": 2.0829846298840884e-06, "loss": 2.4251, "step": 3354 }, { "epoch": 3.64, "learning_rate": 2.058504702907843e-06, "loss": 2.5063, "step": 3356 }, { "epoch": 3.64, "learning_rate": 2.0341664519824887e-06, "loss": 2.5947, "step": 3358 }, { "epoch": 3.64, "learning_rate": 2.009969949032098e-06, "loss": 2.4399, "step": 3360 }, { "epoch": 3.64, "learning_rate": 1.9859152655618498e-06, "loss": 2.405, "step": 3362 }, { "epoch": 3.64, "learning_rate": 1.962002472657809e-06, "loss": 2.3689, "step": 3364 }, { "epoch": 3.65, "learning_rate": 1.9382316409867264e-06, "loss": 2.451, "step": 3366 }, { "epoch": 3.65, "learning_rate": 1.9146028407958484e-06, "loss": 2.3364, "step": 3368 }, { "epoch": 3.65, "learning_rate": 1.8911161419126854e-06, "loss": 2.3012, "step": 3370 }, { "epoch": 3.65, "learning_rate": 1.8677716137447954e-06, "loss": 2.3833, "step": 3372 }, { "epoch": 3.66, "learning_rate": 1.844569325279627e-06, "loss": 2.5041, "step": 3374 }, { "epoch": 3.66, "learning_rate": 1.8215093450842435e-06, "loss": 2.2862, "step": 3376 }, { "epoch": 3.66, "learning_rate": 1.7985917413052055e-06, "loss": 2.3316, "step": 3378 }, { "epoch": 3.66, "learning_rate": 1.7758165816682826e-06, "loss": 2.2273, "step": 3380 }, { "epoch": 3.66, "learning_rate": 1.7531839334783306e-06, "loss": 2.2901, "step": 3382 }, { "epoch": 3.67, "learning_rate": 1.7306938636190262e-06, "loss": 2.5521, "step": 3384 }, { "epoch": 3.67, "learning_rate": 1.7083464385527325e-06, "loss": 2.3896, "step": 3386 }, { "epoch": 3.67, "learning_rate": 1.686141724320245e-06, "loss": 2.2818, "step": 3388 }, { "epoch": 3.67, "learning_rate": 1.6640797865406288e-06, "loss": 2.2733, "step": 3390 }, { "epoch": 3.67, "learning_rate": 1.6421606904110264e-06, "loss": 2.4238, "step": 3392 }, { "epoch": 3.68, "learning_rate": 1.6203845007064455e-06, "loss": 2.6201, "step": 3394 }, { "epoch": 3.68, "learning_rate": 1.5987512817795924e-06, "loss": 2.422, "step": 3396 }, { "epoch": 3.68, "learning_rate": 1.5772610975606561e-06, "loss": 2.2933, "step": 3398 }, { "epoch": 3.68, "learning_rate": 1.5559140115571246e-06, "loss": 2.4014, "step": 3400 }, { "epoch": 3.69, "learning_rate": 1.5347100868536246e-06, "loss": 2.3193, "step": 3402 }, { "epoch": 3.69, "learning_rate": 1.5136493861117097e-06, "loss": 2.4959, "step": 3404 }, { "epoch": 3.69, "learning_rate": 1.4927319715696607e-06, "loss": 2.3566, "step": 3406 }, { "epoch": 3.69, "learning_rate": 1.4719579050423427e-06, "loss": 2.3291, "step": 3408 }, { "epoch": 3.69, "learning_rate": 1.4513272479209917e-06, "loss": 2.3138, "step": 3410 }, { "epoch": 3.7, "learning_rate": 1.43084006117305e-06, "loss": 2.2497, "step": 3412 }, { "epoch": 3.7, "learning_rate": 1.41049640534196e-06, "loss": 2.2461, "step": 3414 }, { "epoch": 3.7, "learning_rate": 1.3902963405470148e-06, "loss": 2.3886, "step": 3416 }, { "epoch": 3.7, "learning_rate": 1.37023992648318e-06, "loss": 2.2535, "step": 3418 }, { "epoch": 3.7, "learning_rate": 1.3503272224208884e-06, "loss": 2.3367, "step": 3420 }, { "epoch": 3.71, "learning_rate": 1.3305582872058963e-06, "loss": 2.4806, "step": 3422 }, { "epoch": 3.71, "learning_rate": 1.3109331792590773e-06, "loss": 2.4335, "step": 3424 }, { "epoch": 3.71, "learning_rate": 1.2914519565763062e-06, "loss": 2.4195, "step": 3426 }, { "epoch": 3.71, "learning_rate": 1.2721146767282033e-06, "loss": 2.4332, "step": 3428 }, { "epoch": 3.72, "learning_rate": 1.2529213968600406e-06, "loss": 2.2733, "step": 3430 }, { "epoch": 3.72, "learning_rate": 1.233872173691536e-06, "loss": 2.3522, "step": 3432 }, { "epoch": 3.72, "learning_rate": 1.2149670635166976e-06, "loss": 2.5166, "step": 3434 }, { "epoch": 3.72, "learning_rate": 1.196206122203647e-06, "loss": 2.4279, "step": 3436 }, { "epoch": 3.72, "learning_rate": 1.1775894051944514e-06, "loss": 2.2575, "step": 3438 }, { "epoch": 3.73, "learning_rate": 1.1591169675049863e-06, "loss": 2.514, "step": 3440 }, { "epoch": 3.73, "learning_rate": 1.140788863724751e-06, "loss": 2.4809, "step": 3442 }, { "epoch": 3.73, "learning_rate": 1.1226051480167032e-06, "loss": 2.444, "step": 3444 }, { "epoch": 3.73, "learning_rate": 1.1045658741171028e-06, "loss": 2.3813, "step": 3446 }, { "epoch": 3.74, "learning_rate": 1.0866710953353731e-06, "loss": 2.3445, "step": 3448 }, { "epoch": 3.74, "learning_rate": 1.068920864553924e-06, "loss": 2.4029, "step": 3450 }, { "epoch": 3.74, "learning_rate": 1.0513152342279842e-06, "loss": 2.4019, "step": 3452 }, { "epoch": 3.74, "learning_rate": 1.0338542563854748e-06, "loss": 2.3407, "step": 3454 }, { "epoch": 3.74, "learning_rate": 1.0165379826268417e-06, "loss": 2.414, "step": 3456 }, { "epoch": 3.75, "learning_rate": 9.993664641249012e-07, "loss": 2.4748, "step": 3458 }, { "epoch": 3.75, "learning_rate": 9.823397516246834e-07, "loss": 2.2681, "step": 3460 }, { "epoch": 3.75, "learning_rate": 9.654578954433059e-07, "loss": 2.3164, "step": 3462 }, { "epoch": 3.75, "learning_rate": 9.487209454697887e-07, "loss": 2.382, "step": 3464 }, { "epoch": 3.75, "learning_rate": 9.321289511649456e-07, "loss": 2.3299, "step": 3466 }, { "epoch": 3.76, "learning_rate": 9.156819615612044e-07, "loss": 2.3526, "step": 3468 }, { "epoch": 3.76, "learning_rate": 8.993800252624862e-07, "loss": 2.4167, "step": 3470 }, { "epoch": 3.76, "learning_rate": 8.832231904440491e-07, "loss": 2.5703, "step": 3472 }, { "epoch": 3.76, "learning_rate": 8.672115048523554e-07, "loss": 2.5794, "step": 3474 }, { "epoch": 3.77, "learning_rate": 8.513450158049108e-07, "loss": 2.3276, "step": 3476 }, { "epoch": 3.77, "learning_rate": 8.356237701901582e-07, "loss": 2.3394, "step": 3478 }, { "epoch": 3.77, "learning_rate": 8.200478144672952e-07, "loss": 2.3505, "step": 3480 }, { "epoch": 3.77, "learning_rate": 8.046171946661796e-07, "loss": 2.4532, "step": 3482 }, { "epoch": 3.77, "learning_rate": 7.893319563871682e-07, "loss": 2.513, "step": 3484 }, { "epoch": 3.78, "learning_rate": 7.741921448009837e-07, "loss": 2.4655, "step": 3486 }, { "epoch": 3.78, "learning_rate": 7.591978046485926e-07, "loss": 2.605, "step": 3488 }, { "epoch": 3.78, "learning_rate": 7.443489802410663e-07, "loss": 2.4451, "step": 3490 }, { "epoch": 3.78, "learning_rate": 7.296457154594482e-07, "loss": 2.5196, "step": 3492 }, { "epoch": 3.79, "learning_rate": 7.150880537546201e-07, "loss": 2.2368, "step": 3494 }, { "epoch": 3.79, "learning_rate": 7.006760381471856e-07, "loss": 2.4034, "step": 3496 }, { "epoch": 3.79, "learning_rate": 6.86409711227337e-07, "loss": 2.5032, "step": 3498 }, { "epoch": 3.79, "learning_rate": 6.722891151547284e-07, "loss": 2.3998, "step": 3500 }, { "epoch": 3.79, "learning_rate": 6.583142916583574e-07, "loss": 2.4205, "step": 3502 }, { "epoch": 3.8, "learning_rate": 6.444852820364222e-07, "loss": 2.3034, "step": 3504 }, { "epoch": 3.8, "learning_rate": 6.30802127156227e-07, "loss": 2.3639, "step": 3506 }, { "epoch": 3.8, "learning_rate": 6.172648674540426e-07, "loss": 2.3491, "step": 3508 }, { "epoch": 3.8, "learning_rate": 6.038735429349962e-07, "loss": 2.4925, "step": 3510 }, { "epoch": 3.8, "learning_rate": 5.90628193172943e-07, "loss": 2.2371, "step": 3512 }, { "epoch": 3.81, "learning_rate": 5.775288573103666e-07, "loss": 2.3831, "step": 3514 }, { "epoch": 3.81, "learning_rate": 5.645755740582404e-07, "loss": 2.6221, "step": 3516 }, { "epoch": 3.81, "learning_rate": 5.517683816959219e-07, "loss": 2.4171, "step": 3518 }, { "epoch": 3.81, "learning_rate": 5.391073180710638e-07, "loss": 2.3518, "step": 3520 }, { "epoch": 3.82, "learning_rate": 5.265924205994644e-07, "loss": 2.4263, "step": 3522 }, { "epoch": 3.82, "learning_rate": 5.14223726264973e-07, "loss": 2.3707, "step": 3524 }, { "epoch": 3.82, "learning_rate": 5.020012716193901e-07, "loss": 2.2659, "step": 3526 }, { "epoch": 3.82, "learning_rate": 4.899250927823396e-07, "loss": 2.3573, "step": 3528 }, { "epoch": 3.82, "learning_rate": 4.779952254411913e-07, "loss": 2.2359, "step": 3530 }, { "epoch": 3.83, "learning_rate": 4.662117048509218e-07, "loss": 2.3461, "step": 3532 }, { "epoch": 3.83, "learning_rate": 4.545745658340206e-07, "loss": 2.4581, "step": 3534 }, { "epoch": 3.83, "learning_rate": 4.4308384278041183e-07, "loss": 2.4515, "step": 3536 }, { "epoch": 3.83, "learning_rate": 4.317395696473214e-07, "loss": 2.4953, "step": 3538 }, { "epoch": 3.83, "learning_rate": 4.2054177995919374e-07, "loss": 2.5276, "step": 3540 }, { "epoch": 3.84, "learning_rate": 4.094905068075694e-07, "loss": 2.323, "step": 3542 }, { "epoch": 3.84, "learning_rate": 3.985857828510353e-07, "loss": 2.4943, "step": 3544 }, { "epoch": 3.84, "learning_rate": 3.878276403150749e-07, "loss": 2.4179, "step": 3546 }, { "epoch": 3.84, "learning_rate": 3.7721611099200693e-07, "loss": 2.3685, "step": 3548 }, { "epoch": 3.85, "learning_rate": 3.6675122624087454e-07, "loss": 2.4998, "step": 3550 }, { "epoch": 3.85, "learning_rate": 3.5643301698736196e-07, "loss": 2.484, "step": 3552 }, { "epoch": 3.85, "learning_rate": 3.462615137237002e-07, "loss": 2.3272, "step": 3554 }, { "epoch": 3.85, "learning_rate": 3.3623674650857806e-07, "loss": 2.3971, "step": 3556 }, { "epoch": 3.85, "learning_rate": 3.2635874496705356e-07, "loss": 2.544, "step": 3558 }, { "epoch": 3.86, "learning_rate": 3.1662753829045375e-07, "loss": 2.6006, "step": 3560 }, { "epoch": 3.86, "learning_rate": 3.0704315523631953e-07, "loss": 2.3817, "step": 3562 }, { "epoch": 3.86, "learning_rate": 2.976056241282721e-07, "loss": 2.4897, "step": 3564 }, { "epoch": 3.86, "learning_rate": 2.8831497285599085e-07, "loss": 2.3797, "step": 3566 }, { "epoch": 3.87, "learning_rate": 2.7917122887506364e-07, "loss": 2.4418, "step": 3568 }, { "epoch": 3.87, "learning_rate": 2.701744192069755e-07, "loss": 2.4906, "step": 3570 }, { "epoch": 3.87, "learning_rate": 2.613245704389644e-07, "loss": 2.4531, "step": 3572 }, { "epoch": 3.87, "learning_rate": 2.5262170872398796e-07, "loss": 2.3002, "step": 3574 }, { "epoch": 3.87, "learning_rate": 2.440658597806178e-07, "loss": 2.5243, "step": 3576 }, { "epoch": 3.88, "learning_rate": 2.3565704889298434e-07, "loss": 2.4181, "step": 3578 }, { "epoch": 3.88, "learning_rate": 2.2739530091069328e-07, "loss": 2.2211, "step": 3580 }, { "epoch": 3.88, "learning_rate": 2.1928064024874796e-07, "loss": 2.0089, "step": 3582 }, { "epoch": 3.88, "learning_rate": 2.113130908874772e-07, "loss": 2.269, "step": 3584 }, { "epoch": 3.88, "learning_rate": 2.0349267637247982e-07, "loss": 2.4197, "step": 3586 }, { "epoch": 3.89, "learning_rate": 1.9581941981453579e-07, "loss": 2.319, "step": 3588 }, { "epoch": 3.89, "learning_rate": 1.8829334388955067e-07, "loss": 2.2629, "step": 3590 }, { "epoch": 3.89, "learning_rate": 1.80914470838478e-07, "loss": 2.5493, "step": 3592 }, { "epoch": 3.89, "learning_rate": 1.7368282246726376e-07, "loss": 2.3166, "step": 3594 }, { "epoch": 3.9, "learning_rate": 1.6659842014677406e-07, "loss": 2.5148, "step": 3596 }, { "epoch": 3.9, "learning_rate": 1.596612848127399e-07, "loss": 2.339, "step": 3598 }, { "epoch": 3.9, "learning_rate": 1.5287143696568473e-07, "loss": 2.308, "step": 3600 }, { "epoch": 3.9, "learning_rate": 1.462288966708858e-07, "loss": 2.5463, "step": 3602 }, { "epoch": 3.9, "learning_rate": 1.397336835582741e-07, "loss": 2.6243, "step": 3604 }, { "epoch": 3.91, "learning_rate": 1.333858168224178e-07, "loss": 2.4714, "step": 3606 }, { "epoch": 3.91, "learning_rate": 1.2718531522244447e-07, "loss": 2.2877, "step": 3608 }, { "epoch": 3.91, "learning_rate": 1.211321970820023e-07, "loss": 2.3737, "step": 3610 }, { "epoch": 3.91, "learning_rate": 1.1522648028917116e-07, "loss": 2.6491, "step": 3612 }, { "epoch": 3.91, "learning_rate": 1.0946818229644607e-07, "loss": 2.1288, "step": 3614 }, { "epoch": 3.92, "learning_rate": 1.0385732012067607e-07, "loss": 2.4494, "step": 3616 }, { "epoch": 3.92, "learning_rate": 9.839391034300316e-08, "loss": 2.5278, "step": 3618 }, { "epoch": 3.92, "learning_rate": 9.307796910881794e-08, "loss": 2.2621, "step": 3620 }, { "epoch": 3.92, "learning_rate": 8.790951212771514e-08, "loss": 2.2438, "step": 3622 }, { "epoch": 3.93, "learning_rate": 8.28885546734548e-08, "loss": 2.5771, "step": 3624 }, { "epoch": 3.93, "learning_rate": 7.801511158390118e-08, "loss": 2.1801, "step": 3626 }, { "epoch": 3.93, "learning_rate": 7.328919726097838e-08, "loss": 2.3484, "step": 3628 }, { "epoch": 3.93, "learning_rate": 6.871082567065367e-08, "loss": 2.5062, "step": 3630 }, { "epoch": 3.93, "learning_rate": 6.42800103428598e-08, "loss": 2.2535, "step": 3632 }, { "epoch": 3.94, "learning_rate": 5.999676437148938e-08, "loss": 2.4677, "step": 3634 }, { "epoch": 3.94, "learning_rate": 5.5861100414322796e-08, "loss": 2.2292, "step": 3636 }, { "epoch": 3.94, "learning_rate": 5.1873030693028177e-08, "loss": 2.3609, "step": 3638 }, { "epoch": 3.94, "learning_rate": 4.8032566993089225e-08, "loss": 2.4992, "step": 3640 }, { "epoch": 3.95, "learning_rate": 4.4339720663788555e-08, "loss": 2.5409, "step": 3642 }, { "epoch": 3.95, "learning_rate": 4.079450261817997e-08, "loss": 2.4727, "step": 3644 }, { "epoch": 3.95, "learning_rate": 3.739692333304401e-08, "loss": 2.2859, "step": 3646 }, { "epoch": 3.95, "learning_rate": 3.4146992848854695e-08, "loss": 2.3062, "step": 3648 }, { "epoch": 3.95, "learning_rate": 3.104472076976839e-08, "loss": 2.3304, "step": 3650 }, { "epoch": 3.96, "learning_rate": 2.809011626357383e-08, "loss": 2.4489, "step": 3652 }, { "epoch": 3.96, "learning_rate": 2.528318806168106e-08, "loss": 2.3078, "step": 3654 }, { "epoch": 3.96, "learning_rate": 2.2623944459082557e-08, "loss": 2.3173, "step": 3656 }, { "epoch": 3.96, "learning_rate": 2.0112393314336565e-08, "loss": 2.3973, "step": 3658 }, { "epoch": 3.96, "learning_rate": 1.7748542049550453e-08, "loss": 2.4975, "step": 3660 }, { "epoch": 3.97, "learning_rate": 1.553239765034187e-08, "loss": 2.4494, "step": 3662 }, { "epoch": 3.97, "learning_rate": 1.346396666582761e-08, "loss": 2.4442, "step": 3664 }, { "epoch": 3.97, "learning_rate": 1.1543255208612546e-08, "loss": 2.4622, "step": 3666 }, { "epoch": 3.97, "learning_rate": 9.770268954756301e-09, "loss": 2.3676, "step": 3668 }, { "epoch": 3.98, "learning_rate": 8.145013143756597e-09, "loss": 2.5927, "step": 3670 }, { "epoch": 3.98, "learning_rate": 6.6674925785548125e-09, "loss": 2.4922, "step": 3672 }, { "epoch": 3.98, "learning_rate": 5.337711625497121e-09, "loss": 2.1202, "step": 3674 }, { "epoch": 3.98, "learning_rate": 4.155674214328942e-09, "loss": 2.5643, "step": 3676 }, { "epoch": 3.98, "learning_rate": 3.1213838382004867e-09, "loss": 2.2801, "step": 3678 }, { "epoch": 3.99, "learning_rate": 2.234843553627908e-09, "loss": 2.4424, "step": 3680 }, { "epoch": 3.99, "learning_rate": 1.496055980498845e-09, "loss": 2.6128, "step": 3682 }, { "epoch": 3.99, "learning_rate": 9.050233020779786e-10, "loss": 2.3174, "step": 3684 }, { "epoch": 3.99, "learning_rate": 4.6174726496817087e-10, "loss": 2.4364, "step": 3686 }, { "epoch": 4.0, "learning_rate": 1.6622917913267088e-10, "loss": 2.5562, "step": 3688 }, { "epoch": 4.0, "learning_rate": 1.8469917889563094e-11, "loss": 2.2758, "step": 3690 }, { "epoch": 4.0, "learning_rate": 1.8469917889563094e-11, "loss": 2.4287, "step": 3692 }, { "epoch": 4.0, "step": 3692, "total_flos": 8.7881966778581e+16, "train_loss": 2.440685138283933, "train_runtime": 22362.2858, "train_samples_per_second": 10.572, "train_steps_per_second": 0.165 } ], "max_steps": 3692, "num_train_epochs": 4, "total_flos": 8.7881966778581e+16, "trial_name": null, "trial_params": null }