diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,11101 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.999458581483487, + "global_step": 3692, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.405405405405406e-06, + "loss": 2.6714, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.0810810810810812e-05, + "loss": 2.4616, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 1.6216216216216218e-05, + "loss": 2.5089, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 2.1621621621621624e-05, + "loss": 2.5389, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 2.702702702702703e-05, + "loss": 2.4892, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 3.2432432432432436e-05, + "loss": 2.4949, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 3.783783783783784e-05, + "loss": 2.5016, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 4.324324324324325e-05, + "loss": 2.5727, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 4.8648648648648654e-05, + "loss": 2.605, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 5.405405405405406e-05, + "loss": 2.5045, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 5.9459459459459466e-05, + "loss": 2.5509, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 6.486486486486487e-05, + "loss": 2.6363, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 7.027027027027028e-05, + "loss": 2.3918, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 7.567567567567568e-05, + "loss": 2.5172, + "step": 28 + }, + { + "epoch": 0.03, + "learning_rate": 8.108108108108109e-05, + "loss": 2.6391, + "step": 30 + }, + { + "epoch": 0.03, + "learning_rate": 8.64864864864865e-05, + "loss": 2.3696, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 9.18918918918919e-05, + "loss": 2.4359, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 9.729729729729731e-05, + "loss": 2.5197, + "step": 36 + }, + { + "epoch": 0.04, + "learning_rate": 9.999998153008212e-05, + "loss": 2.5001, + "step": 38 + }, + { + "epoch": 0.04, + "learning_rate": 9.999983377082087e-05, + "loss": 2.7154, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 9.999953825273504e-05, + "loss": 2.5842, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 9.999909497669792e-05, + "loss": 2.4231, + "step": 44 + }, + { + "epoch": 0.05, + "learning_rate": 9.99985039440195e-05, + "loss": 2.6799, + "step": 46 + }, + { + "epoch": 0.05, + "learning_rate": 9.999776515644638e-05, + "loss": 2.5285, + "step": 48 + }, + { + "epoch": 0.05, + "learning_rate": 9.999687861616181e-05, + "loss": 2.6049, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 9.999584432578569e-05, + "loss": 2.46, + "step": 52 + }, + { + "epoch": 0.06, + "learning_rate": 9.999466228837451e-05, + "loss": 2.5835, + "step": 54 + }, + { + "epoch": 0.06, + "learning_rate": 9.999333250742145e-05, + "loss": 2.6909, + "step": 56 + }, + { + "epoch": 0.06, + "learning_rate": 9.999185498685624e-05, + "loss": 2.4055, + "step": 58 + }, + { + "epoch": 0.06, + "learning_rate": 9.999022973104525e-05, + "loss": 2.5816, + "step": 60 + }, + { + "epoch": 0.07, + "learning_rate": 9.99884567447914e-05, + "loss": 2.5871, + "step": 62 + }, + { + "epoch": 0.07, + "learning_rate": 9.998653603333418e-05, + "loss": 2.701, + "step": 64 + }, + { + "epoch": 0.07, + "learning_rate": 9.998446760234966e-05, + "loss": 2.5853, + "step": 66 + }, + { + "epoch": 0.07, + "learning_rate": 9.998225145795046e-05, + "loss": 2.4068, + "step": 68 + }, + { + "epoch": 0.08, + "learning_rate": 9.997988760668566e-05, + "loss": 2.5417, + "step": 70 + }, + { + "epoch": 0.08, + "learning_rate": 9.997737605554092e-05, + "loss": 2.531, + "step": 72 + }, + { + "epoch": 0.08, + "learning_rate": 9.997471681193833e-05, + "loss": 2.5509, + "step": 74 + }, + { + "epoch": 0.08, + "learning_rate": 9.997190988373644e-05, + "loss": 2.4764, + "step": 76 + }, + { + "epoch": 0.08, + "learning_rate": 9.996895527923023e-05, + "loss": 2.4522, + "step": 78 + }, + { + "epoch": 0.09, + "learning_rate": 9.996585300715116e-05, + "loss": 2.332, + "step": 80 + }, + { + "epoch": 0.09, + "learning_rate": 9.996260307666696e-05, + "loss": 2.518, + "step": 82 + }, + { + "epoch": 0.09, + "learning_rate": 9.995920549738183e-05, + "loss": 2.5257, + "step": 84 + }, + { + "epoch": 0.09, + "learning_rate": 9.995566027933621e-05, + "loss": 2.4631, + "step": 86 + }, + { + "epoch": 0.1, + "learning_rate": 9.995196743300692e-05, + "loss": 2.6791, + "step": 88 + }, + { + "epoch": 0.1, + "learning_rate": 9.994812696930698e-05, + "loss": 2.5043, + "step": 90 + }, + { + "epoch": 0.1, + "learning_rate": 9.994413889958568e-05, + "loss": 2.5534, + "step": 92 + }, + { + "epoch": 0.1, + "learning_rate": 9.994000323562852e-05, + "loss": 2.3725, + "step": 94 + }, + { + "epoch": 0.1, + "learning_rate": 9.993571998965714e-05, + "loss": 2.7813, + "step": 96 + }, + { + "epoch": 0.11, + "learning_rate": 9.993128917432934e-05, + "loss": 2.4455, + "step": 98 + }, + { + "epoch": 0.11, + "learning_rate": 9.992671080273903e-05, + "loss": 2.4049, + "step": 100 + }, + { + "epoch": 0.11, + "learning_rate": 9.992198488841611e-05, + "loss": 2.4881, + "step": 102 + }, + { + "epoch": 0.11, + "learning_rate": 9.991711144532654e-05, + "loss": 2.4836, + "step": 104 + }, + { + "epoch": 0.11, + "learning_rate": 9.991209048787228e-05, + "loss": 2.4244, + "step": 106 + }, + { + "epoch": 0.12, + "learning_rate": 9.990692203089119e-05, + "loss": 2.5105, + "step": 108 + }, + { + "epoch": 0.12, + "learning_rate": 9.9901606089657e-05, + "loss": 2.6217, + "step": 110 + }, + { + "epoch": 0.12, + "learning_rate": 9.989614267987933e-05, + "loss": 2.5529, + "step": 112 + }, + { + "epoch": 0.12, + "learning_rate": 9.989053181770356e-05, + "loss": 2.512, + "step": 114 + }, + { + "epoch": 0.13, + "learning_rate": 9.988477351971084e-05, + "loss": 2.5365, + "step": 116 + }, + { + "epoch": 0.13, + "learning_rate": 9.9878867802918e-05, + "loss": 2.6218, + "step": 118 + }, + { + "epoch": 0.13, + "learning_rate": 9.987281468477756e-05, + "loss": 2.4998, + "step": 120 + }, + { + "epoch": 0.13, + "learning_rate": 9.986661418317759e-05, + "loss": 2.4597, + "step": 122 + }, + { + "epoch": 0.13, + "learning_rate": 9.986026631644173e-05, + "loss": 2.6517, + "step": 124 + }, + { + "epoch": 0.14, + "learning_rate": 9.985377110332912e-05, + "loss": 2.6685, + "step": 126 + }, + { + "epoch": 0.14, + "learning_rate": 9.984712856303432e-05, + "loss": 2.5139, + "step": 128 + }, + { + "epoch": 0.14, + "learning_rate": 9.984033871518727e-05, + "loss": 2.4927, + "step": 130 + }, + { + "epoch": 0.14, + "learning_rate": 9.983340157985324e-05, + "loss": 2.4832, + "step": 132 + }, + { + "epoch": 0.15, + "learning_rate": 9.982631717753275e-05, + "loss": 2.5724, + "step": 134 + }, + { + "epoch": 0.15, + "learning_rate": 9.981908552916153e-05, + "loss": 2.5026, + "step": 136 + }, + { + "epoch": 0.15, + "learning_rate": 9.981170665611046e-05, + "loss": 2.4104, + "step": 138 + }, + { + "epoch": 0.15, + "learning_rate": 9.980418058018547e-05, + "loss": 2.4806, + "step": 140 + }, + { + "epoch": 0.15, + "learning_rate": 9.979650732362753e-05, + "loss": 2.3937, + "step": 142 + }, + { + "epoch": 0.16, + "learning_rate": 9.978868690911253e-05, + "loss": 2.7488, + "step": 144 + }, + { + "epoch": 0.16, + "learning_rate": 9.978071935975126e-05, + "loss": 2.5604, + "step": 146 + }, + { + "epoch": 0.16, + "learning_rate": 9.977260469908931e-05, + "loss": 2.4197, + "step": 148 + }, + { + "epoch": 0.16, + "learning_rate": 9.976434295110701e-05, + "loss": 2.6003, + "step": 150 + }, + { + "epoch": 0.16, + "learning_rate": 9.975593414021938e-05, + "loss": 2.4822, + "step": 152 + }, + { + "epoch": 0.17, + "learning_rate": 9.974737829127602e-05, + "loss": 2.4176, + "step": 154 + }, + { + "epoch": 0.17, + "learning_rate": 9.973867542956104e-05, + "loss": 2.5665, + "step": 156 + }, + { + "epoch": 0.17, + "learning_rate": 9.972982558079303e-05, + "loss": 2.7229, + "step": 158 + }, + { + "epoch": 0.17, + "learning_rate": 9.972082877112494e-05, + "loss": 2.7438, + "step": 160 + }, + { + "epoch": 0.18, + "learning_rate": 9.971168502714401e-05, + "loss": 2.5602, + "step": 162 + }, + { + "epoch": 0.18, + "learning_rate": 9.970239437587174e-05, + "loss": 2.5438, + "step": 164 + }, + { + "epoch": 0.18, + "learning_rate": 9.96929568447637e-05, + "loss": 2.5334, + "step": 166 + }, + { + "epoch": 0.18, + "learning_rate": 9.968337246170956e-05, + "loss": 2.496, + "step": 168 + }, + { + "epoch": 0.18, + "learning_rate": 9.967364125503295e-05, + "loss": 2.4688, + "step": 170 + }, + { + "epoch": 0.19, + "learning_rate": 9.966376325349143e-05, + "loss": 2.5845, + "step": 172 + }, + { + "epoch": 0.19, + "learning_rate": 9.965373848627631e-05, + "loss": 2.3486, + "step": 174 + }, + { + "epoch": 0.19, + "learning_rate": 9.964356698301264e-05, + "loss": 2.6254, + "step": 176 + }, + { + "epoch": 0.19, + "learning_rate": 9.963324877375912e-05, + "loss": 2.2205, + "step": 178 + }, + { + "epoch": 0.19, + "learning_rate": 9.9622783889008e-05, + "loss": 2.4156, + "step": 180 + }, + { + "epoch": 0.2, + "learning_rate": 9.961217235968494e-05, + "loss": 2.6328, + "step": 182 + }, + { + "epoch": 0.2, + "learning_rate": 9.960141421714897e-05, + "loss": 2.5354, + "step": 184 + }, + { + "epoch": 0.2, + "learning_rate": 9.959050949319244e-05, + "loss": 2.4779, + "step": 186 + }, + { + "epoch": 0.2, + "learning_rate": 9.957945822004083e-05, + "loss": 2.4148, + "step": 188 + }, + { + "epoch": 0.21, + "learning_rate": 9.956826043035268e-05, + "loss": 2.5947, + "step": 190 + }, + { + "epoch": 0.21, + "learning_rate": 9.95569161572196e-05, + "loss": 2.6662, + "step": 192 + }, + { + "epoch": 0.21, + "learning_rate": 9.954542543416599e-05, + "loss": 2.4853, + "step": 194 + }, + { + "epoch": 0.21, + "learning_rate": 9.953378829514908e-05, + "loss": 2.4561, + "step": 196 + }, + { + "epoch": 0.21, + "learning_rate": 9.952200477455881e-05, + "loss": 2.5139, + "step": 198 + }, + { + "epoch": 0.22, + "learning_rate": 9.951007490721766e-05, + "loss": 2.5978, + "step": 200 + }, + { + "epoch": 0.22, + "learning_rate": 9.949799872838061e-05, + "loss": 2.6275, + "step": 202 + }, + { + "epoch": 0.22, + "learning_rate": 9.948577627373503e-05, + "loss": 2.4584, + "step": 204 + }, + { + "epoch": 0.22, + "learning_rate": 9.947340757940053e-05, + "loss": 2.3811, + "step": 206 + }, + { + "epoch": 0.23, + "learning_rate": 9.946089268192895e-05, + "loss": 2.4847, + "step": 208 + }, + { + "epoch": 0.23, + "learning_rate": 9.944823161830407e-05, + "loss": 2.5872, + "step": 210 + }, + { + "epoch": 0.23, + "learning_rate": 9.943542442594177e-05, + "loss": 2.7864, + "step": 212 + }, + { + "epoch": 0.23, + "learning_rate": 9.942247114268964e-05, + "loss": 2.5991, + "step": 214 + }, + { + "epoch": 0.23, + "learning_rate": 9.940937180682706e-05, + "loss": 2.4734, + "step": 216 + }, + { + "epoch": 0.24, + "learning_rate": 9.9396126457065e-05, + "loss": 2.6016, + "step": 218 + }, + { + "epoch": 0.24, + "learning_rate": 9.938273513254597e-05, + "loss": 2.5428, + "step": 220 + }, + { + "epoch": 0.24, + "learning_rate": 9.936919787284378e-05, + "loss": 2.6381, + "step": 222 + }, + { + "epoch": 0.24, + "learning_rate": 9.935551471796358e-05, + "loss": 2.6325, + "step": 224 + }, + { + "epoch": 0.24, + "learning_rate": 9.934168570834165e-05, + "loss": 2.5066, + "step": 226 + }, + { + "epoch": 0.25, + "learning_rate": 9.932771088484527e-05, + "loss": 2.5039, + "step": 228 + }, + { + "epoch": 0.25, + "learning_rate": 9.931359028877267e-05, + "loss": 2.7124, + "step": 230 + }, + { + "epoch": 0.25, + "learning_rate": 9.929932396185281e-05, + "loss": 2.4925, + "step": 232 + }, + { + "epoch": 0.25, + "learning_rate": 9.928491194624539e-05, + "loss": 2.579, + "step": 234 + }, + { + "epoch": 0.26, + "learning_rate": 9.927035428454055e-05, + "loss": 2.6093, + "step": 236 + }, + { + "epoch": 0.26, + "learning_rate": 9.925565101975893e-05, + "loss": 2.3589, + "step": 238 + }, + { + "epoch": 0.26, + "learning_rate": 9.924080219535141e-05, + "loss": 2.6058, + "step": 240 + }, + { + "epoch": 0.26, + "learning_rate": 9.922580785519901e-05, + "loss": 2.5016, + "step": 242 + }, + { + "epoch": 0.26, + "learning_rate": 9.921066804361284e-05, + "loss": 2.6041, + "step": 244 + }, + { + "epoch": 0.27, + "learning_rate": 9.919538280533382e-05, + "loss": 2.4222, + "step": 246 + }, + { + "epoch": 0.27, + "learning_rate": 9.917995218553271e-05, + "loss": 2.5596, + "step": 248 + }, + { + "epoch": 0.27, + "learning_rate": 9.916437622980985e-05, + "loss": 2.5427, + "step": 250 + }, + { + "epoch": 0.27, + "learning_rate": 9.91486549841951e-05, + "loss": 2.5865, + "step": 252 + }, + { + "epoch": 0.28, + "learning_rate": 9.913278849514765e-05, + "loss": 2.4464, + "step": 254 + }, + { + "epoch": 0.28, + "learning_rate": 9.911677680955596e-05, + "loss": 2.4279, + "step": 256 + }, + { + "epoch": 0.28, + "learning_rate": 9.910061997473752e-05, + "loss": 2.4858, + "step": 258 + }, + { + "epoch": 0.28, + "learning_rate": 9.908431803843881e-05, + "loss": 2.3309, + "step": 260 + }, + { + "epoch": 0.28, + "learning_rate": 9.906787104883506e-05, + "loss": 2.5427, + "step": 262 + }, + { + "epoch": 0.29, + "learning_rate": 9.905127905453023e-05, + "loss": 2.568, + "step": 264 + }, + { + "epoch": 0.29, + "learning_rate": 9.90345421045567e-05, + "loss": 2.5368, + "step": 266 + }, + { + "epoch": 0.29, + "learning_rate": 9.90176602483753e-05, + "loss": 2.562, + "step": 268 + }, + { + "epoch": 0.29, + "learning_rate": 9.90006335358751e-05, + "loss": 2.43, + "step": 270 + }, + { + "epoch": 0.29, + "learning_rate": 9.898346201737317e-05, + "loss": 2.6544, + "step": 272 + }, + { + "epoch": 0.3, + "learning_rate": 9.896614574361454e-05, + "loss": 2.5701, + "step": 274 + }, + { + "epoch": 0.3, + "learning_rate": 9.894868476577201e-05, + "loss": 2.5294, + "step": 276 + }, + { + "epoch": 0.3, + "learning_rate": 9.893107913544609e-05, + "loss": 2.6586, + "step": 278 + }, + { + "epoch": 0.3, + "learning_rate": 9.891332890466463e-05, + "loss": 2.4904, + "step": 280 + }, + { + "epoch": 0.31, + "learning_rate": 9.88954341258829e-05, + "loss": 2.5686, + "step": 282 + }, + { + "epoch": 0.31, + "learning_rate": 9.887739485198331e-05, + "loss": 2.5522, + "step": 284 + }, + { + "epoch": 0.31, + "learning_rate": 9.885921113627525e-05, + "loss": 2.6227, + "step": 286 + }, + { + "epoch": 0.31, + "learning_rate": 9.884088303249501e-05, + "loss": 2.6265, + "step": 288 + }, + { + "epoch": 0.31, + "learning_rate": 9.882241059480555e-05, + "loss": 2.6851, + "step": 290 + }, + { + "epoch": 0.32, + "learning_rate": 9.880379387779637e-05, + "loss": 2.4501, + "step": 292 + }, + { + "epoch": 0.32, + "learning_rate": 9.878503293648332e-05, + "loss": 2.3563, + "step": 294 + }, + { + "epoch": 0.32, + "learning_rate": 9.876612782630848e-05, + "loss": 2.3076, + "step": 296 + }, + { + "epoch": 0.32, + "learning_rate": 9.874707860313997e-05, + "loss": 2.5158, + "step": 298 + }, + { + "epoch": 0.32, + "learning_rate": 9.87278853232718e-05, + "loss": 2.4241, + "step": 300 + }, + { + "epoch": 0.33, + "learning_rate": 9.87085480434237e-05, + "loss": 2.6496, + "step": 302 + }, + { + "epoch": 0.33, + "learning_rate": 9.868906682074093e-05, + "loss": 2.5175, + "step": 304 + }, + { + "epoch": 0.33, + "learning_rate": 9.866944171279411e-05, + "loss": 2.4658, + "step": 306 + }, + { + "epoch": 0.33, + "learning_rate": 9.864967277757911e-05, + "loss": 2.3329, + "step": 308 + }, + { + "epoch": 0.34, + "learning_rate": 9.862976007351683e-05, + "loss": 2.6255, + "step": 310 + }, + { + "epoch": 0.34, + "learning_rate": 9.860970365945299e-05, + "loss": 2.5342, + "step": 312 + }, + { + "epoch": 0.34, + "learning_rate": 9.858950359465805e-05, + "loss": 2.3472, + "step": 314 + }, + { + "epoch": 0.34, + "learning_rate": 9.856915993882696e-05, + "loss": 2.4315, + "step": 316 + }, + { + "epoch": 0.34, + "learning_rate": 9.854867275207901e-05, + "loss": 2.5811, + "step": 318 + }, + { + "epoch": 0.35, + "learning_rate": 9.852804209495766e-05, + "loss": 2.5231, + "step": 320 + }, + { + "epoch": 0.35, + "learning_rate": 9.850726802843034e-05, + "loss": 2.6642, + "step": 322 + }, + { + "epoch": 0.35, + "learning_rate": 9.84863506138883e-05, + "loss": 2.5686, + "step": 324 + }, + { + "epoch": 0.35, + "learning_rate": 9.846528991314639e-05, + "loss": 2.7031, + "step": 326 + }, + { + "epoch": 0.36, + "learning_rate": 9.844408598844288e-05, + "loss": 2.4843, + "step": 328 + }, + { + "epoch": 0.36, + "learning_rate": 9.842273890243936e-05, + "loss": 2.6201, + "step": 330 + }, + { + "epoch": 0.36, + "learning_rate": 9.840124871822041e-05, + "loss": 2.4424, + "step": 332 + }, + { + "epoch": 0.36, + "learning_rate": 9.837961549929356e-05, + "loss": 2.5032, + "step": 334 + }, + { + "epoch": 0.36, + "learning_rate": 9.835783930958897e-05, + "loss": 2.6527, + "step": 336 + }, + { + "epoch": 0.37, + "learning_rate": 9.833592021345937e-05, + "loss": 2.4956, + "step": 338 + }, + { + "epoch": 0.37, + "learning_rate": 9.831385827567975e-05, + "loss": 2.3891, + "step": 340 + }, + { + "epoch": 0.37, + "learning_rate": 9.829165356144727e-05, + "loss": 2.2116, + "step": 342 + }, + { + "epoch": 0.37, + "learning_rate": 9.826930613638098e-05, + "loss": 2.5029, + "step": 344 + }, + { + "epoch": 0.37, + "learning_rate": 9.824681606652168e-05, + "loss": 2.5519, + "step": 346 + }, + { + "epoch": 0.38, + "learning_rate": 9.822418341833172e-05, + "loss": 2.5432, + "step": 348 + }, + { + "epoch": 0.38, + "learning_rate": 9.82014082586948e-05, + "loss": 2.5587, + "step": 350 + }, + { + "epoch": 0.38, + "learning_rate": 9.817849065491575e-05, + "loss": 2.552, + "step": 352 + }, + { + "epoch": 0.38, + "learning_rate": 9.815543067472039e-05, + "loss": 2.4258, + "step": 354 + }, + { + "epoch": 0.39, + "learning_rate": 9.813222838625521e-05, + "loss": 2.4393, + "step": 356 + }, + { + "epoch": 0.39, + "learning_rate": 9.810888385808732e-05, + "loss": 2.5239, + "step": 358 + }, + { + "epoch": 0.39, + "learning_rate": 9.808539715920414e-05, + "loss": 2.4571, + "step": 360 + }, + { + "epoch": 0.39, + "learning_rate": 9.806176835901328e-05, + "loss": 2.5202, + "step": 362 + }, + { + "epoch": 0.39, + "learning_rate": 9.803799752734219e-05, + "loss": 2.4761, + "step": 364 + }, + { + "epoch": 0.4, + "learning_rate": 9.801408473443816e-05, + "loss": 2.8216, + "step": 366 + }, + { + "epoch": 0.4, + "learning_rate": 9.79900300509679e-05, + "loss": 2.6249, + "step": 368 + }, + { + "epoch": 0.4, + "learning_rate": 9.796583354801752e-05, + "loss": 2.5059, + "step": 370 + }, + { + "epoch": 0.4, + "learning_rate": 9.794149529709216e-05, + "loss": 2.5326, + "step": 372 + }, + { + "epoch": 0.4, + "learning_rate": 9.791701537011591e-05, + "loss": 2.7352, + "step": 374 + }, + { + "epoch": 0.41, + "learning_rate": 9.789239383943152e-05, + "loss": 2.6958, + "step": 376 + }, + { + "epoch": 0.41, + "learning_rate": 9.78676307778002e-05, + "loss": 2.4794, + "step": 378 + }, + { + "epoch": 0.41, + "learning_rate": 9.784272625840136e-05, + "loss": 2.5559, + "step": 380 + }, + { + "epoch": 0.41, + "learning_rate": 9.781768035483256e-05, + "loss": 2.4731, + "step": 382 + }, + { + "epoch": 0.42, + "learning_rate": 9.779249314110909e-05, + "loss": 2.5427, + "step": 384 + }, + { + "epoch": 0.42, + "learning_rate": 9.776716469166384e-05, + "loss": 2.6534, + "step": 386 + }, + { + "epoch": 0.42, + "learning_rate": 9.774169508134715e-05, + "loss": 2.5991, + "step": 388 + }, + { + "epoch": 0.42, + "learning_rate": 9.771608438542639e-05, + "loss": 2.4887, + "step": 390 + }, + { + "epoch": 0.42, + "learning_rate": 9.769033267958598e-05, + "loss": 2.5762, + "step": 392 + }, + { + "epoch": 0.43, + "learning_rate": 9.766444003992703e-05, + "loss": 2.3775, + "step": 394 + }, + { + "epoch": 0.43, + "learning_rate": 9.763840654296706e-05, + "loss": 2.4067, + "step": 396 + }, + { + "epoch": 0.43, + "learning_rate": 9.761223226563996e-05, + "loss": 2.3338, + "step": 398 + }, + { + "epoch": 0.43, + "learning_rate": 9.758591728529555e-05, + "loss": 2.4981, + "step": 400 + }, + { + "epoch": 0.44, + "learning_rate": 9.755946167969952e-05, + "loss": 2.402, + "step": 402 + }, + { + "epoch": 0.44, + "learning_rate": 9.753286552703312e-05, + "loss": 2.7678, + "step": 404 + }, + { + "epoch": 0.44, + "learning_rate": 9.750612890589293e-05, + "loss": 2.5216, + "step": 406 + }, + { + "epoch": 0.44, + "learning_rate": 9.747925189529063e-05, + "loss": 2.3811, + "step": 408 + }, + { + "epoch": 0.44, + "learning_rate": 9.745223457465282e-05, + "loss": 2.4442, + "step": 410 + }, + { + "epoch": 0.45, + "learning_rate": 9.742507702382071e-05, + "loss": 2.4474, + "step": 412 + }, + { + "epoch": 0.45, + "learning_rate": 9.739777932304992e-05, + "loss": 2.5238, + "step": 414 + }, + { + "epoch": 0.45, + "learning_rate": 9.737034155301024e-05, + "loss": 2.4573, + "step": 416 + }, + { + "epoch": 0.45, + "learning_rate": 9.734276379478538e-05, + "loss": 2.5096, + "step": 418 + }, + { + "epoch": 0.45, + "learning_rate": 9.731504612987279e-05, + "loss": 2.5997, + "step": 420 + }, + { + "epoch": 0.46, + "learning_rate": 9.728718864018329e-05, + "loss": 2.3851, + "step": 422 + }, + { + "epoch": 0.46, + "learning_rate": 9.725919140804099e-05, + "loss": 2.5155, + "step": 424 + }, + { + "epoch": 0.46, + "learning_rate": 9.72310545161829e-05, + "loss": 2.614, + "step": 426 + }, + { + "epoch": 0.46, + "learning_rate": 9.72027780477588e-05, + "loss": 2.5027, + "step": 428 + }, + { + "epoch": 0.47, + "learning_rate": 9.717436208633088e-05, + "loss": 2.4011, + "step": 430 + }, + { + "epoch": 0.47, + "learning_rate": 9.714580671587366e-05, + "loss": 2.5327, + "step": 432 + }, + { + "epoch": 0.47, + "learning_rate": 9.711711202077354e-05, + "loss": 2.3772, + "step": 434 + }, + { + "epoch": 0.47, + "learning_rate": 9.708827808582871e-05, + "loss": 2.5332, + "step": 436 + }, + { + "epoch": 0.47, + "learning_rate": 9.70593049962488e-05, + "loss": 2.4211, + "step": 438 + }, + { + "epoch": 0.48, + "learning_rate": 9.703019283765471e-05, + "loss": 2.5811, + "step": 440 + }, + { + "epoch": 0.48, + "learning_rate": 9.700094169607828e-05, + "loss": 2.5859, + "step": 442 + }, + { + "epoch": 0.48, + "learning_rate": 9.697155165796209e-05, + "loss": 2.7103, + "step": 444 + }, + { + "epoch": 0.48, + "learning_rate": 9.694202281015918e-05, + "loss": 2.5435, + "step": 446 + }, + { + "epoch": 0.49, + "learning_rate": 9.691235523993278e-05, + "loss": 2.4327, + "step": 448 + }, + { + "epoch": 0.49, + "learning_rate": 9.688254903495609e-05, + "loss": 2.4772, + "step": 450 + }, + { + "epoch": 0.49, + "learning_rate": 9.685260428331202e-05, + "loss": 2.5445, + "step": 452 + }, + { + "epoch": 0.49, + "learning_rate": 9.682252107349288e-05, + "loss": 2.5334, + "step": 454 + }, + { + "epoch": 0.49, + "learning_rate": 9.679229949440015e-05, + "loss": 2.4343, + "step": 456 + }, + { + "epoch": 0.5, + "learning_rate": 9.676193963534423e-05, + "loss": 2.4341, + "step": 458 + }, + { + "epoch": 0.5, + "learning_rate": 9.673144158604419e-05, + "loss": 2.5318, + "step": 460 + }, + { + "epoch": 0.5, + "learning_rate": 9.67008054366274e-05, + "loss": 2.435, + "step": 462 + }, + { + "epoch": 0.5, + "learning_rate": 9.667003127762942e-05, + "loss": 2.6514, + "step": 464 + }, + { + "epoch": 0.5, + "learning_rate": 9.663911919999362e-05, + "loss": 2.4744, + "step": 466 + }, + { + "epoch": 0.51, + "learning_rate": 9.660806929507095e-05, + "loss": 2.5498, + "step": 468 + }, + { + "epoch": 0.51, + "learning_rate": 9.657688165461965e-05, + "loss": 2.6276, + "step": 470 + }, + { + "epoch": 0.51, + "learning_rate": 9.654555637080502e-05, + "loss": 2.6097, + "step": 472 + }, + { + "epoch": 0.51, + "learning_rate": 9.65140935361991e-05, + "loss": 2.3675, + "step": 474 + }, + { + "epoch": 0.52, + "learning_rate": 9.648249324378044e-05, + "loss": 2.469, + "step": 476 + }, + { + "epoch": 0.52, + "learning_rate": 9.64507555869338e-05, + "loss": 2.5554, + "step": 478 + }, + { + "epoch": 0.52, + "learning_rate": 9.641888065944984e-05, + "loss": 2.35, + "step": 480 + }, + { + "epoch": 0.52, + "learning_rate": 9.638686855552494e-05, + "loss": 2.495, + "step": 482 + }, + { + "epoch": 0.52, + "learning_rate": 9.635471936976081e-05, + "loss": 2.4547, + "step": 484 + }, + { + "epoch": 0.53, + "learning_rate": 9.63224331971643e-05, + "loss": 2.3311, + "step": 486 + }, + { + "epoch": 0.53, + "learning_rate": 9.629001013314705e-05, + "loss": 2.5145, + "step": 488 + }, + { + "epoch": 0.53, + "learning_rate": 9.625745027352526e-05, + "loss": 2.5413, + "step": 490 + }, + { + "epoch": 0.53, + "learning_rate": 9.622475371451939e-05, + "loss": 2.5209, + "step": 492 + }, + { + "epoch": 0.53, + "learning_rate": 9.619192055275386e-05, + "loss": 2.4376, + "step": 494 + }, + { + "epoch": 0.54, + "learning_rate": 9.615895088525677e-05, + "loss": 2.455, + "step": 496 + }, + { + "epoch": 0.54, + "learning_rate": 9.612584480945964e-05, + "loss": 2.3267, + "step": 498 + }, + { + "epoch": 0.54, + "learning_rate": 9.609260242319709e-05, + "loss": 2.6783, + "step": 500 + }, + { + "epoch": 0.54, + "learning_rate": 9.605922382470658e-05, + "loss": 2.4699, + "step": 502 + }, + { + "epoch": 0.55, + "learning_rate": 9.602570911262805e-05, + "loss": 2.4731, + "step": 504 + }, + { + "epoch": 0.55, + "learning_rate": 9.599205838600375e-05, + "loss": 2.4779, + "step": 506 + }, + { + "epoch": 0.55, + "learning_rate": 9.595827174427786e-05, + "loss": 2.6002, + "step": 508 + }, + { + "epoch": 0.55, + "learning_rate": 9.592434928729616e-05, + "loss": 2.5211, + "step": 510 + }, + { + "epoch": 0.55, + "learning_rate": 9.589029111530586e-05, + "loss": 2.5308, + "step": 512 + }, + { + "epoch": 0.56, + "learning_rate": 9.585609732895517e-05, + "loss": 2.5043, + "step": 514 + }, + { + "epoch": 0.56, + "learning_rate": 9.582176802929315e-05, + "loss": 2.4683, + "step": 516 + }, + { + "epoch": 0.56, + "learning_rate": 9.578730331776924e-05, + "loss": 2.4226, + "step": 518 + }, + { + "epoch": 0.56, + "learning_rate": 9.575270329623309e-05, + "loss": 2.3188, + "step": 520 + }, + { + "epoch": 0.57, + "learning_rate": 9.571796806693422e-05, + "loss": 2.6134, + "step": 522 + }, + { + "epoch": 0.57, + "learning_rate": 9.568309773252171e-05, + "loss": 2.3866, + "step": 524 + }, + { + "epoch": 0.57, + "learning_rate": 9.564809239604388e-05, + "loss": 2.6209, + "step": 526 + }, + { + "epoch": 0.57, + "learning_rate": 9.5612952160948e-05, + "loss": 2.5562, + "step": 528 + }, + { + "epoch": 0.57, + "learning_rate": 9.557767713108009e-05, + "loss": 2.6116, + "step": 530 + }, + { + "epoch": 0.58, + "learning_rate": 9.554226741068432e-05, + "loss": 2.5081, + "step": 532 + }, + { + "epoch": 0.58, + "learning_rate": 9.550672310440311e-05, + "loss": 2.7225, + "step": 534 + }, + { + "epoch": 0.58, + "learning_rate": 9.547104431727647e-05, + "loss": 2.6353, + "step": 536 + }, + { + "epoch": 0.58, + "learning_rate": 9.543523115474187e-05, + "loss": 2.5658, + "step": 538 + }, + { + "epoch": 0.58, + "learning_rate": 9.539928372263387e-05, + "loss": 2.5224, + "step": 540 + }, + { + "epoch": 0.59, + "learning_rate": 9.536320212718382e-05, + "loss": 2.702, + "step": 542 + }, + { + "epoch": 0.59, + "learning_rate": 9.532698647501958e-05, + "loss": 2.4106, + "step": 544 + }, + { + "epoch": 0.59, + "learning_rate": 9.529063687316513e-05, + "loss": 2.5009, + "step": 546 + }, + { + "epoch": 0.59, + "learning_rate": 9.525415342904034e-05, + "loss": 2.4723, + "step": 548 + }, + { + "epoch": 0.6, + "learning_rate": 9.521753625046056e-05, + "loss": 2.3118, + "step": 550 + }, + { + "epoch": 0.6, + "learning_rate": 9.51807854456364e-05, + "loss": 2.6302, + "step": 552 + }, + { + "epoch": 0.6, + "learning_rate": 9.51439011231733e-05, + "loss": 2.3981, + "step": 554 + }, + { + "epoch": 0.6, + "learning_rate": 9.510688339207133e-05, + "loss": 2.4194, + "step": 556 + }, + { + "epoch": 0.6, + "learning_rate": 9.506973236172478e-05, + "loss": 2.5114, + "step": 558 + }, + { + "epoch": 0.61, + "learning_rate": 9.503244814192187e-05, + "loss": 2.4885, + "step": 560 + }, + { + "epoch": 0.61, + "learning_rate": 9.499503084284441e-05, + "loss": 2.4262, + "step": 562 + }, + { + "epoch": 0.61, + "learning_rate": 9.49574805750675e-05, + "loss": 2.2484, + "step": 564 + }, + { + "epoch": 0.61, + "learning_rate": 9.491979744955915e-05, + "loss": 2.3817, + "step": 566 + }, + { + "epoch": 0.62, + "learning_rate": 9.488198157768005e-05, + "loss": 2.455, + "step": 568 + }, + { + "epoch": 0.62, + "learning_rate": 9.484403307118312e-05, + "loss": 2.3778, + "step": 570 + }, + { + "epoch": 0.62, + "learning_rate": 9.480595204221329e-05, + "loss": 2.5499, + "step": 572 + }, + { + "epoch": 0.62, + "learning_rate": 9.47677386033071e-05, + "loss": 2.5838, + "step": 574 + }, + { + "epoch": 0.62, + "learning_rate": 9.472939286739235e-05, + "loss": 2.5077, + "step": 576 + }, + { + "epoch": 0.63, + "learning_rate": 9.469091494778785e-05, + "loss": 2.6054, + "step": 578 + }, + { + "epoch": 0.63, + "learning_rate": 9.465230495820303e-05, + "loss": 2.4767, + "step": 580 + }, + { + "epoch": 0.63, + "learning_rate": 9.461356301273758e-05, + "loss": 2.6251, + "step": 582 + }, + { + "epoch": 0.63, + "learning_rate": 9.45746892258812e-05, + "loss": 2.4725, + "step": 584 + }, + { + "epoch": 0.63, + "learning_rate": 9.453568371251316e-05, + "loss": 2.5408, + "step": 586 + }, + { + "epoch": 0.64, + "learning_rate": 9.4496546587902e-05, + "loss": 2.5397, + "step": 588 + }, + { + "epoch": 0.64, + "learning_rate": 9.445727796770524e-05, + "loss": 2.666, + "step": 590 + }, + { + "epoch": 0.64, + "learning_rate": 9.441787796796896e-05, + "loss": 2.3805, + "step": 592 + }, + { + "epoch": 0.64, + "learning_rate": 9.43783467051275e-05, + "loss": 2.57, + "step": 594 + }, + { + "epoch": 0.65, + "learning_rate": 9.43386842960031e-05, + "loss": 2.6776, + "step": 596 + }, + { + "epoch": 0.65, + "learning_rate": 9.429889085780557e-05, + "loss": 2.447, + "step": 598 + }, + { + "epoch": 0.65, + "learning_rate": 9.425896650813196e-05, + "loss": 2.6253, + "step": 600 + }, + { + "epoch": 0.65, + "learning_rate": 9.421891136496612e-05, + "loss": 2.3422, + "step": 602 + }, + { + "epoch": 0.65, + "learning_rate": 9.41787255466785e-05, + "loss": 2.3565, + "step": 604 + }, + { + "epoch": 0.66, + "learning_rate": 9.413840917202566e-05, + "loss": 2.4135, + "step": 606 + }, + { + "epoch": 0.66, + "learning_rate": 9.409796236014999e-05, + "loss": 2.6778, + "step": 608 + }, + { + "epoch": 0.66, + "learning_rate": 9.405738523057938e-05, + "loss": 2.4313, + "step": 610 + }, + { + "epoch": 0.66, + "learning_rate": 9.401667790322679e-05, + "loss": 2.4427, + "step": 612 + }, + { + "epoch": 0.66, + "learning_rate": 9.397584049838996e-05, + "loss": 2.6661, + "step": 614 + }, + { + "epoch": 0.67, + "learning_rate": 9.393487313675102e-05, + "loss": 2.4825, + "step": 616 + }, + { + "epoch": 0.67, + "learning_rate": 9.389377593937618e-05, + "loss": 2.5834, + "step": 618 + }, + { + "epoch": 0.67, + "learning_rate": 9.38525490277153e-05, + "loss": 2.4413, + "step": 620 + }, + { + "epoch": 0.67, + "learning_rate": 9.38111925236016e-05, + "loss": 2.5265, + "step": 622 + }, + { + "epoch": 0.68, + "learning_rate": 9.376970654925124e-05, + "loss": 2.5181, + "step": 624 + }, + { + "epoch": 0.68, + "learning_rate": 9.372809122726299e-05, + "loss": 2.6319, + "step": 626 + }, + { + "epoch": 0.68, + "learning_rate": 9.368634668061791e-05, + "loss": 2.7302, + "step": 628 + }, + { + "epoch": 0.68, + "learning_rate": 9.364447303267889e-05, + "loss": 2.5624, + "step": 630 + }, + { + "epoch": 0.68, + "learning_rate": 9.360247040719039e-05, + "loss": 2.4739, + "step": 632 + }, + { + "epoch": 0.69, + "learning_rate": 9.356033892827796e-05, + "loss": 2.3922, + "step": 634 + }, + { + "epoch": 0.69, + "learning_rate": 9.3518078720448e-05, + "loss": 2.5975, + "step": 636 + }, + { + "epoch": 0.69, + "learning_rate": 9.347568990858726e-05, + "loss": 2.4533, + "step": 638 + }, + { + "epoch": 0.69, + "learning_rate": 9.343317261796262e-05, + "loss": 2.4955, + "step": 640 + }, + { + "epoch": 0.7, + "learning_rate": 9.339052697422057e-05, + "loss": 2.4098, + "step": 642 + }, + { + "epoch": 0.7, + "learning_rate": 9.334775310338694e-05, + "loss": 2.705, + "step": 644 + }, + { + "epoch": 0.7, + "learning_rate": 9.330485113186648e-05, + "loss": 2.4335, + "step": 646 + }, + { + "epoch": 0.7, + "learning_rate": 9.326182118644254e-05, + "loss": 2.6452, + "step": 648 + }, + { + "epoch": 0.7, + "learning_rate": 9.321866339427658e-05, + "loss": 2.4124, + "step": 650 + }, + { + "epoch": 0.71, + "learning_rate": 9.317537788290794e-05, + "loss": 2.3303, + "step": 652 + }, + { + "epoch": 0.71, + "learning_rate": 9.313196478025337e-05, + "loss": 2.4199, + "step": 654 + }, + { + "epoch": 0.71, + "learning_rate": 9.308842421460667e-05, + "loss": 2.4577, + "step": 656 + }, + { + "epoch": 0.71, + "learning_rate": 9.304475631463834e-05, + "loss": 2.5357, + "step": 658 + }, + { + "epoch": 0.71, + "learning_rate": 9.300096120939516e-05, + "loss": 2.418, + "step": 660 + }, + { + "epoch": 0.72, + "learning_rate": 9.29570390282998e-05, + "loss": 2.4678, + "step": 662 + }, + { + "epoch": 0.72, + "learning_rate": 9.29129899011505e-05, + "loss": 2.3826, + "step": 664 + }, + { + "epoch": 0.72, + "learning_rate": 9.286881395812066e-05, + "loss": 2.4426, + "step": 666 + }, + { + "epoch": 0.72, + "learning_rate": 9.28245113297584e-05, + "loss": 2.3528, + "step": 668 + }, + { + "epoch": 0.73, + "learning_rate": 9.278008214698624e-05, + "loss": 2.4743, + "step": 670 + }, + { + "epoch": 0.73, + "learning_rate": 9.27355265411007e-05, + "loss": 2.3887, + "step": 672 + }, + { + "epoch": 0.73, + "learning_rate": 9.26908446437719e-05, + "loss": 2.468, + "step": 674 + }, + { + "epoch": 0.73, + "learning_rate": 9.264603658704318e-05, + "loss": 2.352, + "step": 676 + }, + { + "epoch": 0.73, + "learning_rate": 9.260110250333066e-05, + "loss": 2.4429, + "step": 678 + }, + { + "epoch": 0.74, + "learning_rate": 9.255604252542296e-05, + "loss": 2.3712, + "step": 680 + }, + { + "epoch": 0.74, + "learning_rate": 9.251085678648072e-05, + "loss": 2.5003, + "step": 682 + }, + { + "epoch": 0.74, + "learning_rate": 9.246554542003618e-05, + "loss": 2.5098, + "step": 684 + }, + { + "epoch": 0.74, + "learning_rate": 9.24201085599929e-05, + "loss": 2.6184, + "step": 686 + }, + { + "epoch": 0.74, + "learning_rate": 9.237454634062525e-05, + "loss": 2.4172, + "step": 688 + }, + { + "epoch": 0.75, + "learning_rate": 9.23288588965781e-05, + "loss": 2.4234, + "step": 690 + }, + { + "epoch": 0.75, + "learning_rate": 9.228304636286633e-05, + "loss": 2.6617, + "step": 692 + }, + { + "epoch": 0.75, + "learning_rate": 9.223710887487453e-05, + "loss": 2.422, + "step": 694 + }, + { + "epoch": 0.75, + "learning_rate": 9.219104656835654e-05, + "loss": 2.6878, + "step": 696 + }, + { + "epoch": 0.76, + "learning_rate": 9.214485957943503e-05, + "loss": 2.6575, + "step": 698 + }, + { + "epoch": 0.76, + "learning_rate": 9.209854804460121e-05, + "loss": 2.369, + "step": 700 + }, + { + "epoch": 0.76, + "learning_rate": 9.205211210071426e-05, + "loss": 2.5432, + "step": 702 + }, + { + "epoch": 0.76, + "learning_rate": 9.200555188500103e-05, + "loss": 2.5313, + "step": 704 + }, + { + "epoch": 0.76, + "learning_rate": 9.195886753505565e-05, + "loss": 2.3887, + "step": 706 + }, + { + "epoch": 0.77, + "learning_rate": 9.191205918883909e-05, + "loss": 2.6655, + "step": 708 + }, + { + "epoch": 0.77, + "learning_rate": 9.18651269846787e-05, + "loss": 2.5833, + "step": 710 + }, + { + "epoch": 0.77, + "learning_rate": 9.181807106126792e-05, + "loss": 2.6638, + "step": 712 + }, + { + "epoch": 0.77, + "learning_rate": 9.177089155766574e-05, + "loss": 2.4395, + "step": 714 + }, + { + "epoch": 0.78, + "learning_rate": 9.172358861329641e-05, + "loss": 2.5247, + "step": 716 + }, + { + "epoch": 0.78, + "learning_rate": 9.167616236794894e-05, + "loss": 2.4711, + "step": 718 + }, + { + "epoch": 0.78, + "learning_rate": 9.162861296177671e-05, + "loss": 2.4537, + "step": 720 + }, + { + "epoch": 0.78, + "learning_rate": 9.158094053529709e-05, + "loss": 2.4404, + "step": 722 + }, + { + "epoch": 0.78, + "learning_rate": 9.153314522939096e-05, + "loss": 2.4599, + "step": 724 + }, + { + "epoch": 0.79, + "learning_rate": 9.148522718530236e-05, + "loss": 2.5289, + "step": 726 + }, + { + "epoch": 0.79, + "learning_rate": 9.143718654463804e-05, + "loss": 2.2966, + "step": 728 + }, + { + "epoch": 0.79, + "learning_rate": 9.138902344936706e-05, + "loss": 2.4635, + "step": 730 + }, + { + "epoch": 0.79, + "learning_rate": 9.134073804182033e-05, + "loss": 2.6182, + "step": 732 + }, + { + "epoch": 0.79, + "learning_rate": 9.129233046469022e-05, + "loss": 2.6568, + "step": 734 + }, + { + "epoch": 0.8, + "learning_rate": 9.124380086103013e-05, + "loss": 2.5841, + "step": 736 + }, + { + "epoch": 0.8, + "learning_rate": 9.11951493742541e-05, + "loss": 2.609, + "step": 738 + }, + { + "epoch": 0.8, + "learning_rate": 9.114637614813634e-05, + "loss": 2.3299, + "step": 740 + }, + { + "epoch": 0.8, + "learning_rate": 9.109748132681082e-05, + "loss": 2.5093, + "step": 742 + }, + { + "epoch": 0.81, + "learning_rate": 9.104846505477083e-05, + "loss": 2.4223, + "step": 744 + }, + { + "epoch": 0.81, + "learning_rate": 9.09993274768686e-05, + "loss": 2.4636, + "step": 746 + }, + { + "epoch": 0.81, + "learning_rate": 9.095006873831479e-05, + "loss": 2.3136, + "step": 748 + }, + { + "epoch": 0.81, + "learning_rate": 9.090068898467823e-05, + "loss": 2.557, + "step": 750 + }, + { + "epoch": 0.81, + "learning_rate": 9.085118836188521e-05, + "loss": 2.4634, + "step": 752 + }, + { + "epoch": 0.82, + "learning_rate": 9.080156701621936e-05, + "loss": 2.5238, + "step": 754 + }, + { + "epoch": 0.82, + "learning_rate": 9.075182509432095e-05, + "loss": 2.4833, + "step": 756 + }, + { + "epoch": 0.82, + "learning_rate": 9.070196274318666e-05, + "loss": 2.6603, + "step": 758 + }, + { + "epoch": 0.82, + "learning_rate": 9.0651980110169e-05, + "loss": 2.4763, + "step": 760 + }, + { + "epoch": 0.83, + "learning_rate": 9.060187734297599e-05, + "loss": 2.4662, + "step": 762 + }, + { + "epoch": 0.83, + "learning_rate": 9.055165458967063e-05, + "loss": 2.4409, + "step": 764 + }, + { + "epoch": 0.83, + "learning_rate": 9.050131199867052e-05, + "loss": 2.5474, + "step": 766 + }, + { + "epoch": 0.83, + "learning_rate": 9.045084971874738e-05, + "loss": 2.5071, + "step": 768 + }, + { + "epoch": 0.83, + "learning_rate": 9.040026789902665e-05, + "loss": 2.4774, + "step": 770 + }, + { + "epoch": 0.84, + "learning_rate": 9.034956668898706e-05, + "loss": 2.4119, + "step": 772 + }, + { + "epoch": 0.84, + "learning_rate": 9.029874623846011e-05, + "loss": 2.4335, + "step": 774 + }, + { + "epoch": 0.84, + "learning_rate": 9.02478066976297e-05, + "loss": 2.3666, + "step": 776 + }, + { + "epoch": 0.84, + "learning_rate": 9.019674821703166e-05, + "loss": 2.5817, + "step": 778 + }, + { + "epoch": 0.84, + "learning_rate": 9.014557094755331e-05, + "loss": 2.2798, + "step": 780 + }, + { + "epoch": 0.85, + "learning_rate": 9.009427504043305e-05, + "loss": 2.5065, + "step": 782 + }, + { + "epoch": 0.85, + "learning_rate": 9.004286064725982e-05, + "loss": 2.4121, + "step": 784 + }, + { + "epoch": 0.85, + "learning_rate": 8.999132791997271e-05, + "loss": 2.5618, + "step": 786 + }, + { + "epoch": 0.85, + "learning_rate": 8.993967701086057e-05, + "loss": 2.7772, + "step": 788 + }, + { + "epoch": 0.86, + "learning_rate": 8.988790807256143e-05, + "loss": 2.3717, + "step": 790 + }, + { + "epoch": 0.86, + "learning_rate": 8.983602125806216e-05, + "loss": 2.5273, + "step": 792 + }, + { + "epoch": 0.86, + "learning_rate": 8.978401672069797e-05, + "loss": 2.5246, + "step": 794 + }, + { + "epoch": 0.86, + "learning_rate": 8.973189461415194e-05, + "loss": 2.5115, + "step": 796 + }, + { + "epoch": 0.86, + "learning_rate": 8.967965509245461e-05, + "loss": 2.583, + "step": 798 + }, + { + "epoch": 0.87, + "learning_rate": 8.962729830998353e-05, + "loss": 2.4989, + "step": 800 + }, + { + "epoch": 0.87, + "learning_rate": 8.957482442146272e-05, + "loss": 2.4011, + "step": 802 + }, + { + "epoch": 0.87, + "learning_rate": 8.952223358196227e-05, + "loss": 2.424, + "step": 804 + }, + { + "epoch": 0.87, + "learning_rate": 8.946952594689797e-05, + "loss": 2.5144, + "step": 806 + }, + { + "epoch": 0.87, + "learning_rate": 8.941670167203067e-05, + "loss": 2.4956, + "step": 808 + }, + { + "epoch": 0.88, + "learning_rate": 8.936376091346595e-05, + "loss": 2.5917, + "step": 810 + }, + { + "epoch": 0.88, + "learning_rate": 8.931070382765359e-05, + "loss": 2.3386, + "step": 812 + }, + { + "epoch": 0.88, + "learning_rate": 8.925753057138719e-05, + "loss": 2.4911, + "step": 814 + }, + { + "epoch": 0.88, + "learning_rate": 8.920424130180363e-05, + "loss": 2.5727, + "step": 816 + }, + { + "epoch": 0.89, + "learning_rate": 8.915083617638262e-05, + "loss": 2.4148, + "step": 818 + }, + { + "epoch": 0.89, + "learning_rate": 8.909731535294628e-05, + "loss": 2.4859, + "step": 820 + }, + { + "epoch": 0.89, + "learning_rate": 8.904367898965857e-05, + "loss": 2.4704, + "step": 822 + }, + { + "epoch": 0.89, + "learning_rate": 8.898992724502498e-05, + "loss": 2.5904, + "step": 824 + }, + { + "epoch": 0.89, + "learning_rate": 8.893606027789192e-05, + "loss": 2.5586, + "step": 826 + }, + { + "epoch": 0.9, + "learning_rate": 8.888207824744629e-05, + "loss": 2.4723, + "step": 828 + }, + { + "epoch": 0.9, + "learning_rate": 8.882798131321508e-05, + "loss": 2.5011, + "step": 830 + }, + { + "epoch": 0.9, + "learning_rate": 8.877376963506477e-05, + "loss": 2.6237, + "step": 832 + }, + { + "epoch": 0.9, + "learning_rate": 8.871944337320102e-05, + "loss": 2.548, + "step": 834 + }, + { + "epoch": 0.91, + "learning_rate": 8.866500268816803e-05, + "loss": 2.3527, + "step": 836 + }, + { + "epoch": 0.91, + "learning_rate": 8.861044774084815e-05, + "loss": 2.6638, + "step": 838 + }, + { + "epoch": 0.91, + "learning_rate": 8.855577869246142e-05, + "loss": 2.4873, + "step": 840 + }, + { + "epoch": 0.91, + "learning_rate": 8.850099570456509e-05, + "loss": 2.4461, + "step": 842 + }, + { + "epoch": 0.91, + "learning_rate": 8.844609893905309e-05, + "loss": 2.4031, + "step": 844 + }, + { + "epoch": 0.92, + "learning_rate": 8.839108855815557e-05, + "loss": 2.5516, + "step": 846 + }, + { + "epoch": 0.92, + "learning_rate": 8.833596472443848e-05, + "loss": 2.4283, + "step": 848 + }, + { + "epoch": 0.92, + "learning_rate": 8.828072760080299e-05, + "loss": 2.2932, + "step": 850 + }, + { + "epoch": 0.92, + "learning_rate": 8.822537735048512e-05, + "loss": 2.3761, + "step": 852 + }, + { + "epoch": 0.92, + "learning_rate": 8.816991413705516e-05, + "loss": 2.4804, + "step": 854 + }, + { + "epoch": 0.93, + "learning_rate": 8.811433812441722e-05, + "loss": 2.6496, + "step": 856 + }, + { + "epoch": 0.93, + "learning_rate": 8.80586494768088e-05, + "loss": 2.4868, + "step": 858 + }, + { + "epoch": 0.93, + "learning_rate": 8.800284835880024e-05, + "loss": 2.679, + "step": 860 + }, + { + "epoch": 0.93, + "learning_rate": 8.79469349352942e-05, + "loss": 2.6229, + "step": 862 + }, + { + "epoch": 0.94, + "learning_rate": 8.78909093715253e-05, + "loss": 2.5438, + "step": 864 + }, + { + "epoch": 0.94, + "learning_rate": 8.783477183305949e-05, + "loss": 2.4863, + "step": 866 + }, + { + "epoch": 0.94, + "learning_rate": 8.777852248579367e-05, + "loss": 2.5205, + "step": 868 + }, + { + "epoch": 0.94, + "learning_rate": 8.772216149595513e-05, + "loss": 2.4179, + "step": 870 + }, + { + "epoch": 0.94, + "learning_rate": 8.766568903010113e-05, + "loss": 2.4653, + "step": 872 + }, + { + "epoch": 0.95, + "learning_rate": 8.76091052551183e-05, + "loss": 2.3727, + "step": 874 + }, + { + "epoch": 0.95, + "learning_rate": 8.755241033822224e-05, + "loss": 2.5503, + "step": 876 + }, + { + "epoch": 0.95, + "learning_rate": 8.7495604446957e-05, + "loss": 2.4288, + "step": 878 + }, + { + "epoch": 0.95, + "learning_rate": 8.743868774919458e-05, + "loss": 2.451, + "step": 880 + }, + { + "epoch": 0.96, + "learning_rate": 8.738166041313439e-05, + "loss": 2.3869, + "step": 882 + }, + { + "epoch": 0.96, + "learning_rate": 8.732452260730286e-05, + "loss": 2.5419, + "step": 884 + }, + { + "epoch": 0.96, + "learning_rate": 8.726727450055287e-05, + "loss": 2.3962, + "step": 886 + }, + { + "epoch": 0.96, + "learning_rate": 8.720991626206321e-05, + "loss": 2.4672, + "step": 888 + }, + { + "epoch": 0.96, + "learning_rate": 8.715244806133816e-05, + "loss": 2.4988, + "step": 890 + }, + { + "epoch": 0.97, + "learning_rate": 8.7094870068207e-05, + "loss": 2.2557, + "step": 892 + }, + { + "epoch": 0.97, + "learning_rate": 8.703718245282337e-05, + "loss": 2.5007, + "step": 894 + }, + { + "epoch": 0.97, + "learning_rate": 8.697938538566499e-05, + "loss": 2.4908, + "step": 896 + }, + { + "epoch": 0.97, + "learning_rate": 8.69214790375329e-05, + "loss": 2.4346, + "step": 898 + }, + { + "epoch": 0.97, + "learning_rate": 8.686346357955117e-05, + "loss": 2.2897, + "step": 900 + }, + { + "epoch": 0.98, + "learning_rate": 8.68053391831663e-05, + "loss": 2.3337, + "step": 902 + }, + { + "epoch": 0.98, + "learning_rate": 8.674710602014671e-05, + "loss": 2.4618, + "step": 904 + }, + { + "epoch": 0.98, + "learning_rate": 8.668876426258221e-05, + "loss": 2.5041, + "step": 906 + }, + { + "epoch": 0.98, + "learning_rate": 8.66303140828836e-05, + "loss": 2.4207, + "step": 908 + }, + { + "epoch": 0.99, + "learning_rate": 8.657175565378206e-05, + "loss": 2.4657, + "step": 910 + }, + { + "epoch": 0.99, + "learning_rate": 8.651308914832862e-05, + "loss": 2.5422, + "step": 912 + }, + { + "epoch": 0.99, + "learning_rate": 8.645431473989376e-05, + "loss": 2.4069, + "step": 914 + }, + { + "epoch": 0.99, + "learning_rate": 8.63954326021668e-05, + "loss": 2.6141, + "step": 916 + }, + { + "epoch": 0.99, + "learning_rate": 8.633644290915545e-05, + "loss": 2.7452, + "step": 918 + }, + { + "epoch": 1.0, + "learning_rate": 8.627734583518521e-05, + "loss": 2.4625, + "step": 920 + }, + { + "epoch": 1.0, + "learning_rate": 8.621814155489895e-05, + "loss": 2.3913, + "step": 922 + }, + { + "epoch": 1.0, + "learning_rate": 8.615883024325636e-05, + "loss": 3.1472, + "step": 924 + }, + { + "epoch": 1.0, + "learning_rate": 8.609941207553342e-05, + "loss": 2.4791, + "step": 926 + }, + { + "epoch": 1.01, + "learning_rate": 8.603988722732186e-05, + "loss": 2.4555, + "step": 928 + }, + { + "epoch": 1.01, + "learning_rate": 8.598025587452873e-05, + "loss": 2.5092, + "step": 930 + }, + { + "epoch": 1.01, + "learning_rate": 8.592051819337579e-05, + "loss": 2.5088, + "step": 932 + }, + { + "epoch": 1.01, + "learning_rate": 8.586067436039899e-05, + "loss": 2.5663, + "step": 934 + }, + { + "epoch": 1.01, + "learning_rate": 8.580072455244801e-05, + "loss": 2.5562, + "step": 936 + }, + { + "epoch": 1.02, + "learning_rate": 8.574066894668573e-05, + "loss": 2.4265, + "step": 938 + }, + { + "epoch": 1.02, + "learning_rate": 8.568050772058762e-05, + "loss": 2.473, + "step": 940 + }, + { + "epoch": 1.02, + "learning_rate": 8.562024105194133e-05, + "loss": 2.5223, + "step": 942 + }, + { + "epoch": 1.02, + "learning_rate": 8.555986911884609e-05, + "loss": 2.3263, + "step": 944 + }, + { + "epoch": 1.02, + "learning_rate": 8.549939209971221e-05, + "loss": 2.2938, + "step": 946 + }, + { + "epoch": 1.03, + "learning_rate": 8.543881017326057e-05, + "loss": 2.321, + "step": 948 + }, + { + "epoch": 1.03, + "learning_rate": 8.537812351852201e-05, + "loss": 2.4323, + "step": 950 + }, + { + "epoch": 1.03, + "learning_rate": 8.531733231483694e-05, + "loss": 2.365, + "step": 952 + }, + { + "epoch": 1.03, + "learning_rate": 8.525643674185466e-05, + "loss": 2.4085, + "step": 954 + }, + { + "epoch": 1.04, + "learning_rate": 8.519543697953296e-05, + "loss": 2.4288, + "step": 956 + }, + { + "epoch": 1.04, + "learning_rate": 8.51343332081375e-05, + "loss": 2.6551, + "step": 958 + }, + { + "epoch": 1.04, + "learning_rate": 8.50731256082413e-05, + "loss": 2.4887, + "step": 960 + }, + { + "epoch": 1.04, + "learning_rate": 8.501181436072422e-05, + "loss": 2.6168, + "step": 962 + }, + { + "epoch": 1.04, + "learning_rate": 8.495039964677241e-05, + "loss": 2.4247, + "step": 964 + }, + { + "epoch": 1.05, + "learning_rate": 8.488888164787782e-05, + "loss": 2.5132, + "step": 966 + }, + { + "epoch": 1.05, + "learning_rate": 8.482726054583761e-05, + "loss": 2.5011, + "step": 968 + }, + { + "epoch": 1.05, + "learning_rate": 8.476553652275356e-05, + "loss": 2.4964, + "step": 970 + }, + { + "epoch": 1.05, + "learning_rate": 8.47037097610317e-05, + "loss": 2.3202, + "step": 972 + }, + { + "epoch": 1.06, + "learning_rate": 8.464178044338162e-05, + "loss": 2.2058, + "step": 974 + }, + { + "epoch": 1.06, + "learning_rate": 8.4579748752816e-05, + "loss": 2.585, + "step": 976 + }, + { + "epoch": 1.06, + "learning_rate": 8.451761487265003e-05, + "loss": 2.3743, + "step": 978 + }, + { + "epoch": 1.06, + "learning_rate": 8.44553789865009e-05, + "loss": 2.2927, + "step": 980 + }, + { + "epoch": 1.06, + "learning_rate": 8.439304127828728e-05, + "loss": 2.3899, + "step": 982 + }, + { + "epoch": 1.07, + "learning_rate": 8.433060193222868e-05, + "loss": 2.462, + "step": 984 + }, + { + "epoch": 1.07, + "learning_rate": 8.426806113284502e-05, + "loss": 2.4369, + "step": 986 + }, + { + "epoch": 1.07, + "learning_rate": 8.420541906495599e-05, + "loss": 2.4967, + "step": 988 + }, + { + "epoch": 1.07, + "learning_rate": 8.414267591368058e-05, + "loss": 2.6217, + "step": 990 + }, + { + "epoch": 1.07, + "learning_rate": 8.407983186443653e-05, + "loss": 2.6545, + "step": 992 + }, + { + "epoch": 1.08, + "learning_rate": 8.401688710293967e-05, + "loss": 2.4993, + "step": 994 + }, + { + "epoch": 1.08, + "learning_rate": 8.395384181520351e-05, + "loss": 2.3227, + "step": 996 + }, + { + "epoch": 1.08, + "learning_rate": 8.389069618753865e-05, + "loss": 2.325, + "step": 998 + }, + { + "epoch": 1.08, + "learning_rate": 8.382745040655212e-05, + "loss": 2.6491, + "step": 1000 + }, + { + "epoch": 1.09, + "learning_rate": 8.376410465914705e-05, + "loss": 2.4874, + "step": 1002 + }, + { + "epoch": 1.09, + "learning_rate": 8.370065913252188e-05, + "loss": 2.505, + "step": 1004 + }, + { + "epoch": 1.09, + "learning_rate": 8.363711401417e-05, + "loss": 2.4867, + "step": 1006 + }, + { + "epoch": 1.09, + "learning_rate": 8.357346949187906e-05, + "loss": 2.2378, + "step": 1008 + }, + { + "epoch": 1.09, + "learning_rate": 8.350972575373047e-05, + "loss": 2.372, + "step": 1010 + }, + { + "epoch": 1.1, + "learning_rate": 8.344588298809887e-05, + "loss": 2.3432, + "step": 1012 + }, + { + "epoch": 1.1, + "learning_rate": 8.338194138365151e-05, + "loss": 2.6878, + "step": 1014 + }, + { + "epoch": 1.1, + "learning_rate": 8.331790112934777e-05, + "loss": 2.4083, + "step": 1016 + }, + { + "epoch": 1.1, + "learning_rate": 8.325376241443849e-05, + "loss": 2.4451, + "step": 1018 + }, + { + "epoch": 1.11, + "learning_rate": 8.318952542846557e-05, + "loss": 2.3759, + "step": 1020 + }, + { + "epoch": 1.11, + "learning_rate": 8.312519036126125e-05, + "loss": 2.5355, + "step": 1022 + }, + { + "epoch": 1.11, + "learning_rate": 8.306075740294763e-05, + "loss": 2.4161, + "step": 1024 + }, + { + "epoch": 1.11, + "learning_rate": 8.299622674393614e-05, + "loss": 2.3455, + "step": 1026 + }, + { + "epoch": 1.11, + "learning_rate": 8.293159857492686e-05, + "loss": 2.469, + "step": 1028 + }, + { + "epoch": 1.12, + "learning_rate": 8.28668730869081e-05, + "loss": 2.3113, + "step": 1030 + }, + { + "epoch": 1.12, + "learning_rate": 8.280205047115572e-05, + "loss": 2.4072, + "step": 1032 + }, + { + "epoch": 1.12, + "learning_rate": 8.273713091923264e-05, + "loss": 2.5218, + "step": 1034 + }, + { + "epoch": 1.12, + "learning_rate": 8.267211462298822e-05, + "loss": 2.374, + "step": 1036 + }, + { + "epoch": 1.12, + "learning_rate": 8.260700177455773e-05, + "loss": 2.452, + "step": 1038 + }, + { + "epoch": 1.13, + "learning_rate": 8.254179256636179e-05, + "loss": 2.4523, + "step": 1040 + }, + { + "epoch": 1.13, + "learning_rate": 8.247648719110572e-05, + "loss": 2.5231, + "step": 1042 + }, + { + "epoch": 1.13, + "learning_rate": 8.241108584177911e-05, + "loss": 2.5678, + "step": 1044 + }, + { + "epoch": 1.13, + "learning_rate": 8.234558871165512e-05, + "loss": 2.449, + "step": 1046 + }, + { + "epoch": 1.14, + "learning_rate": 8.227999599428995e-05, + "loss": 2.3786, + "step": 1048 + }, + { + "epoch": 1.14, + "learning_rate": 8.221430788352233e-05, + "loss": 2.3994, + "step": 1050 + }, + { + "epoch": 1.14, + "learning_rate": 8.214852457347286e-05, + "loss": 2.5034, + "step": 1052 + }, + { + "epoch": 1.14, + "learning_rate": 8.208264625854347e-05, + "loss": 2.2819, + "step": 1054 + }, + { + "epoch": 1.14, + "learning_rate": 8.201667313341685e-05, + "loss": 2.4361, + "step": 1056 + }, + { + "epoch": 1.15, + "learning_rate": 8.19506053930559e-05, + "loss": 2.3855, + "step": 1058 + }, + { + "epoch": 1.15, + "learning_rate": 8.18844432327031e-05, + "loss": 2.4898, + "step": 1060 + }, + { + "epoch": 1.15, + "learning_rate": 8.181818684787992e-05, + "loss": 2.5017, + "step": 1062 + }, + { + "epoch": 1.15, + "learning_rate": 8.175183643438635e-05, + "loss": 2.402, + "step": 1064 + }, + { + "epoch": 1.15, + "learning_rate": 8.168539218830024e-05, + "loss": 2.3225, + "step": 1066 + }, + { + "epoch": 1.16, + "learning_rate": 8.16188543059767e-05, + "loss": 2.3171, + "step": 1068 + }, + { + "epoch": 1.16, + "learning_rate": 8.155222298404756e-05, + "loss": 2.5654, + "step": 1070 + }, + { + "epoch": 1.16, + "learning_rate": 8.148549841942082e-05, + "loss": 2.3448, + "step": 1072 + }, + { + "epoch": 1.16, + "learning_rate": 8.141868080927996e-05, + "loss": 2.2422, + "step": 1074 + }, + { + "epoch": 1.17, + "learning_rate": 8.135177035108352e-05, + "loss": 2.6608, + "step": 1076 + }, + { + "epoch": 1.17, + "learning_rate": 8.128476724256431e-05, + "loss": 2.486, + "step": 1078 + }, + { + "epoch": 1.17, + "learning_rate": 8.121767168172904e-05, + "loss": 2.3347, + "step": 1080 + }, + { + "epoch": 1.17, + "learning_rate": 8.115048386685757e-05, + "loss": 2.4229, + "step": 1082 + }, + { + "epoch": 1.17, + "learning_rate": 8.108320399650244e-05, + "loss": 2.6345, + "step": 1084 + }, + { + "epoch": 1.18, + "learning_rate": 8.101583226948819e-05, + "loss": 2.5783, + "step": 1086 + }, + { + "epoch": 1.18, + "learning_rate": 8.09483688849108e-05, + "loss": 2.4985, + "step": 1088 + }, + { + "epoch": 1.18, + "learning_rate": 8.088081404213718e-05, + "loss": 2.3184, + "step": 1090 + }, + { + "epoch": 1.18, + "learning_rate": 8.081316794080445e-05, + "loss": 2.324, + "step": 1092 + }, + { + "epoch": 1.19, + "learning_rate": 8.074543078081946e-05, + "loss": 2.504, + "step": 1094 + }, + { + "epoch": 1.19, + "learning_rate": 8.067760276235812e-05, + "loss": 2.3798, + "step": 1096 + }, + { + "epoch": 1.19, + "learning_rate": 8.060968408586489e-05, + "loss": 2.4197, + "step": 1098 + }, + { + "epoch": 1.19, + "learning_rate": 8.054167495205207e-05, + "loss": 2.4555, + "step": 1100 + }, + { + "epoch": 1.19, + "learning_rate": 8.047357556189936e-05, + "loss": 2.6626, + "step": 1102 + }, + { + "epoch": 1.2, + "learning_rate": 8.040538611665314e-05, + "loss": 2.5664, + "step": 1104 + }, + { + "epoch": 1.2, + "learning_rate": 8.033710681782592e-05, + "loss": 2.4436, + "step": 1106 + }, + { + "epoch": 1.2, + "learning_rate": 8.026873786719573e-05, + "loss": 2.5044, + "step": 1108 + }, + { + "epoch": 1.2, + "learning_rate": 8.02002794668056e-05, + "loss": 2.488, + "step": 1110 + }, + { + "epoch": 1.2, + "learning_rate": 8.013173181896283e-05, + "loss": 2.4565, + "step": 1112 + }, + { + "epoch": 1.21, + "learning_rate": 8.006309512623848e-05, + "loss": 2.5484, + "step": 1114 + }, + { + "epoch": 1.21, + "learning_rate": 7.99943695914668e-05, + "loss": 2.4438, + "step": 1116 + }, + { + "epoch": 1.21, + "learning_rate": 7.992555541774452e-05, + "loss": 2.4668, + "step": 1118 + }, + { + "epoch": 1.21, + "learning_rate": 7.985665280843035e-05, + "loss": 2.5129, + "step": 1120 + }, + { + "epoch": 1.22, + "learning_rate": 7.978766196714436e-05, + "loss": 2.2599, + "step": 1122 + }, + { + "epoch": 1.22, + "learning_rate": 7.97185830977673e-05, + "loss": 2.4388, + "step": 1124 + }, + { + "epoch": 1.22, + "learning_rate": 7.964941640444014e-05, + "loss": 2.5566, + "step": 1126 + }, + { + "epoch": 1.22, + "learning_rate": 7.958016209156331e-05, + "loss": 2.3852, + "step": 1128 + }, + { + "epoch": 1.22, + "learning_rate": 7.951082036379625e-05, + "loss": 2.3447, + "step": 1130 + }, + { + "epoch": 1.23, + "learning_rate": 7.944139142605665e-05, + "loss": 2.471, + "step": 1132 + }, + { + "epoch": 1.23, + "learning_rate": 7.937187548351996e-05, + "loss": 2.4846, + "step": 1134 + }, + { + "epoch": 1.23, + "learning_rate": 7.930227274161877e-05, + "loss": 2.433, + "step": 1136 + }, + { + "epoch": 1.23, + "learning_rate": 7.923258340604212e-05, + "loss": 2.7046, + "step": 1138 + }, + { + "epoch": 1.23, + "learning_rate": 7.916280768273498e-05, + "loss": 2.2928, + "step": 1140 + }, + { + "epoch": 1.24, + "learning_rate": 7.909294577789766e-05, + "loss": 2.5962, + "step": 1142 + }, + { + "epoch": 1.24, + "learning_rate": 7.902299789798505e-05, + "loss": 2.4707, + "step": 1144 + }, + { + "epoch": 1.24, + "learning_rate": 7.895296424970618e-05, + "loss": 2.4212, + "step": 1146 + }, + { + "epoch": 1.24, + "learning_rate": 7.888284504002352e-05, + "loss": 2.5168, + "step": 1148 + }, + { + "epoch": 1.25, + "learning_rate": 7.881264047615245e-05, + "loss": 2.5038, + "step": 1150 + }, + { + "epoch": 1.25, + "learning_rate": 7.874235076556046e-05, + "loss": 2.2647, + "step": 1152 + }, + { + "epoch": 1.25, + "learning_rate": 7.867197611596683e-05, + "loss": 2.5225, + "step": 1154 + }, + { + "epoch": 1.25, + "learning_rate": 7.860151673534168e-05, + "loss": 2.3552, + "step": 1156 + }, + { + "epoch": 1.25, + "learning_rate": 7.853097283190567e-05, + "loss": 2.5299, + "step": 1158 + }, + { + "epoch": 1.26, + "learning_rate": 7.846034461412912e-05, + "loss": 2.476, + "step": 1160 + }, + { + "epoch": 1.26, + "learning_rate": 7.838963229073162e-05, + "loss": 2.3523, + "step": 1162 + }, + { + "epoch": 1.26, + "learning_rate": 7.831883607068125e-05, + "loss": 2.4746, + "step": 1164 + }, + { + "epoch": 1.26, + "learning_rate": 7.824795616319402e-05, + "loss": 2.4551, + "step": 1166 + }, + { + "epoch": 1.27, + "learning_rate": 7.817699277773325e-05, + "loss": 2.4863, + "step": 1168 + }, + { + "epoch": 1.27, + "learning_rate": 7.810594612400898e-05, + "loss": 2.5789, + "step": 1170 + }, + { + "epoch": 1.27, + "learning_rate": 7.803481641197733e-05, + "loss": 2.487, + "step": 1172 + }, + { + "epoch": 1.27, + "learning_rate": 7.796360385183984e-05, + "loss": 2.5997, + "step": 1174 + }, + { + "epoch": 1.27, + "learning_rate": 7.789230865404287e-05, + "loss": 2.3587, + "step": 1176 + }, + { + "epoch": 1.28, + "learning_rate": 7.782093102927703e-05, + "loss": 2.7109, + "step": 1178 + }, + { + "epoch": 1.28, + "learning_rate": 7.77494711884765e-05, + "loss": 2.5783, + "step": 1180 + }, + { + "epoch": 1.28, + "learning_rate": 7.767792934281843e-05, + "loss": 2.4947, + "step": 1182 + }, + { + "epoch": 1.28, + "learning_rate": 7.76063057037223e-05, + "loss": 2.3812, + "step": 1184 + }, + { + "epoch": 1.28, + "learning_rate": 7.753460048284928e-05, + "loss": 2.3337, + "step": 1186 + }, + { + "epoch": 1.29, + "learning_rate": 7.74628138921017e-05, + "loss": 2.5691, + "step": 1188 + }, + { + "epoch": 1.29, + "learning_rate": 7.739094614362229e-05, + "loss": 2.4811, + "step": 1190 + }, + { + "epoch": 1.29, + "learning_rate": 7.731899744979364e-05, + "loss": 2.618, + "step": 1192 + }, + { + "epoch": 1.29, + "learning_rate": 7.724696802323755e-05, + "loss": 2.2892, + "step": 1194 + }, + { + "epoch": 1.3, + "learning_rate": 7.717485807681437e-05, + "loss": 2.3032, + "step": 1196 + }, + { + "epoch": 1.3, + "learning_rate": 7.710266782362247e-05, + "loss": 2.4592, + "step": 1198 + }, + { + "epoch": 1.3, + "learning_rate": 7.703039747699747e-05, + "loss": 2.3496, + "step": 1200 + }, + { + "epoch": 1.3, + "learning_rate": 7.695804725051172e-05, + "loss": 2.423, + "step": 1202 + }, + { + "epoch": 1.3, + "learning_rate": 7.68856173579736e-05, + "loss": 2.4122, + "step": 1204 + }, + { + "epoch": 1.31, + "learning_rate": 7.681310801342696e-05, + "loss": 2.3985, + "step": 1206 + }, + { + "epoch": 1.31, + "learning_rate": 7.674051943115042e-05, + "loss": 2.2799, + "step": 1208 + }, + { + "epoch": 1.31, + "learning_rate": 7.666785182565677e-05, + "loss": 2.3947, + "step": 1210 + }, + { + "epoch": 1.31, + "learning_rate": 7.65951054116923e-05, + "loss": 2.3299, + "step": 1212 + }, + { + "epoch": 1.32, + "learning_rate": 7.652228040423622e-05, + "loss": 2.274, + "step": 1214 + }, + { + "epoch": 1.32, + "learning_rate": 7.644937701850002e-05, + "loss": 2.3697, + "step": 1216 + }, + { + "epoch": 1.32, + "learning_rate": 7.637639546992677e-05, + "loss": 2.3167, + "step": 1218 + }, + { + "epoch": 1.32, + "learning_rate": 7.630333597419054e-05, + "loss": 2.4688, + "step": 1220 + }, + { + "epoch": 1.32, + "learning_rate": 7.623019874719579e-05, + "loss": 2.2979, + "step": 1222 + }, + { + "epoch": 1.33, + "learning_rate": 7.61569840050766e-05, + "loss": 2.4614, + "step": 1224 + }, + { + "epoch": 1.33, + "learning_rate": 7.60836919641962e-05, + "loss": 2.5093, + "step": 1226 + }, + { + "epoch": 1.33, + "learning_rate": 7.60103228411462e-05, + "loss": 2.4832, + "step": 1228 + }, + { + "epoch": 1.33, + "learning_rate": 7.593687685274609e-05, + "loss": 2.4112, + "step": 1230 + }, + { + "epoch": 1.33, + "learning_rate": 7.586335421604238e-05, + "loss": 2.3033, + "step": 1232 + }, + { + "epoch": 1.34, + "learning_rate": 7.578975514830821e-05, + "loss": 2.6554, + "step": 1234 + }, + { + "epoch": 1.34, + "learning_rate": 7.571607986704252e-05, + "loss": 2.3495, + "step": 1236 + }, + { + "epoch": 1.34, + "learning_rate": 7.564232858996949e-05, + "loss": 2.4517, + "step": 1238 + }, + { + "epoch": 1.34, + "learning_rate": 7.556850153503787e-05, + "loss": 2.4985, + "step": 1240 + }, + { + "epoch": 1.35, + "learning_rate": 7.549459892042041e-05, + "loss": 2.5046, + "step": 1242 + }, + { + "epoch": 1.35, + "learning_rate": 7.542062096451305e-05, + "loss": 2.5004, + "step": 1244 + }, + { + "epoch": 1.35, + "learning_rate": 7.534656788593446e-05, + "loss": 2.3215, + "step": 1246 + }, + { + "epoch": 1.35, + "learning_rate": 7.527243990352529e-05, + "loss": 2.5481, + "step": 1248 + }, + { + "epoch": 1.35, + "learning_rate": 7.519823723634753e-05, + "loss": 2.3608, + "step": 1250 + }, + { + "epoch": 1.36, + "learning_rate": 7.51239601036839e-05, + "loss": 2.2113, + "step": 1252 + }, + { + "epoch": 1.36, + "learning_rate": 7.504960872503715e-05, + "loss": 2.6318, + "step": 1254 + }, + { + "epoch": 1.36, + "learning_rate": 7.497518332012946e-05, + "loss": 2.3967, + "step": 1256 + }, + { + "epoch": 1.36, + "learning_rate": 7.490068410890175e-05, + "loss": 2.1024, + "step": 1258 + }, + { + "epoch": 1.36, + "learning_rate": 7.48261113115131e-05, + "loss": 2.5322, + "step": 1260 + }, + { + "epoch": 1.37, + "learning_rate": 7.475146514834001e-05, + "loss": 2.3737, + "step": 1262 + }, + { + "epoch": 1.37, + "learning_rate": 7.46767458399758e-05, + "loss": 2.4803, + "step": 1264 + }, + { + "epoch": 1.37, + "learning_rate": 7.460195360722995e-05, + "loss": 2.1737, + "step": 1266 + }, + { + "epoch": 1.37, + "learning_rate": 7.452708867112745e-05, + "loss": 2.5601, + "step": 1268 + }, + { + "epoch": 1.38, + "learning_rate": 7.44521512529081e-05, + "loss": 2.5452, + "step": 1270 + }, + { + "epoch": 1.38, + "learning_rate": 7.437714157402598e-05, + "loss": 2.3953, + "step": 1272 + }, + { + "epoch": 1.38, + "learning_rate": 7.430205985614864e-05, + "loss": 2.4914, + "step": 1274 + }, + { + "epoch": 1.38, + "learning_rate": 7.422690632115654e-05, + "loss": 2.3997, + "step": 1276 + }, + { + "epoch": 1.38, + "learning_rate": 7.41516811911424e-05, + "loss": 2.2561, + "step": 1278 + }, + { + "epoch": 1.39, + "learning_rate": 7.407638468841047e-05, + "loss": 2.6531, + "step": 1280 + }, + { + "epoch": 1.39, + "learning_rate": 7.400101703547597e-05, + "loss": 2.6299, + "step": 1282 + }, + { + "epoch": 1.39, + "learning_rate": 7.392557845506432e-05, + "loss": 2.4573, + "step": 1284 + }, + { + "epoch": 1.39, + "learning_rate": 7.385006917011063e-05, + "loss": 2.5633, + "step": 1286 + }, + { + "epoch": 1.4, + "learning_rate": 7.377448940375887e-05, + "loss": 2.6371, + "step": 1288 + }, + { + "epoch": 1.4, + "learning_rate": 7.369883937936136e-05, + "loss": 2.2814, + "step": 1290 + }, + { + "epoch": 1.4, + "learning_rate": 7.362311932047797e-05, + "loss": 2.6985, + "step": 1292 + }, + { + "epoch": 1.4, + "learning_rate": 7.354732945087563e-05, + "loss": 2.3274, + "step": 1294 + }, + { + "epoch": 1.4, + "learning_rate": 7.34714699945275e-05, + "loss": 2.2417, + "step": 1296 + }, + { + "epoch": 1.41, + "learning_rate": 7.33955411756124e-05, + "loss": 2.4285, + "step": 1298 + }, + { + "epoch": 1.41, + "learning_rate": 7.331954321851418e-05, + "loss": 2.4677, + "step": 1300 + }, + { + "epoch": 1.41, + "learning_rate": 7.32434763478209e-05, + "loss": 2.4342, + "step": 1302 + }, + { + "epoch": 1.41, + "learning_rate": 7.316734078832438e-05, + "loss": 2.3903, + "step": 1304 + }, + { + "epoch": 1.41, + "learning_rate": 7.309113676501939e-05, + "loss": 2.4379, + "step": 1306 + }, + { + "epoch": 1.42, + "learning_rate": 7.301486450310298e-05, + "loss": 2.4929, + "step": 1308 + }, + { + "epoch": 1.42, + "learning_rate": 7.293852422797391e-05, + "loss": 2.4626, + "step": 1310 + }, + { + "epoch": 1.42, + "learning_rate": 7.286211616523193e-05, + "loss": 2.5199, + "step": 1312 + }, + { + "epoch": 1.42, + "learning_rate": 7.278564054067709e-05, + "loss": 2.3659, + "step": 1314 + }, + { + "epoch": 1.43, + "learning_rate": 7.270909758030912e-05, + "loss": 2.4869, + "step": 1316 + }, + { + "epoch": 1.43, + "learning_rate": 7.263248751032671e-05, + "loss": 2.5166, + "step": 1318 + }, + { + "epoch": 1.43, + "learning_rate": 7.255581055712688e-05, + "loss": 2.139, + "step": 1320 + }, + { + "epoch": 1.43, + "learning_rate": 7.247906694730437e-05, + "loss": 2.4807, + "step": 1322 + }, + { + "epoch": 1.43, + "learning_rate": 7.24022569076508e-05, + "loss": 2.4607, + "step": 1324 + }, + { + "epoch": 1.44, + "learning_rate": 7.232538066515414e-05, + "loss": 2.3367, + "step": 1326 + }, + { + "epoch": 1.44, + "learning_rate": 7.224843844699803e-05, + "loss": 2.6005, + "step": 1328 + }, + { + "epoch": 1.44, + "learning_rate": 7.217143048056108e-05, + "loss": 2.3467, + "step": 1330 + }, + { + "epoch": 1.44, + "learning_rate": 7.209435699341613e-05, + "loss": 2.4132, + "step": 1332 + }, + { + "epoch": 1.45, + "learning_rate": 7.201721821332973e-05, + "loss": 2.3049, + "step": 1334 + }, + { + "epoch": 1.45, + "learning_rate": 7.194001436826135e-05, + "loss": 2.3176, + "step": 1336 + }, + { + "epoch": 1.45, + "learning_rate": 7.18627456863627e-05, + "loss": 2.5401, + "step": 1338 + }, + { + "epoch": 1.45, + "learning_rate": 7.178541239597717e-05, + "loss": 2.4131, + "step": 1340 + }, + { + "epoch": 1.45, + "learning_rate": 7.170801472563903e-05, + "loss": 2.4554, + "step": 1342 + }, + { + "epoch": 1.46, + "learning_rate": 7.163055290407282e-05, + "loss": 2.405, + "step": 1344 + }, + { + "epoch": 1.46, + "learning_rate": 7.155302716019263e-05, + "loss": 2.4435, + "step": 1346 + }, + { + "epoch": 1.46, + "learning_rate": 7.14754377231015e-05, + "loss": 2.4068, + "step": 1348 + }, + { + "epoch": 1.46, + "learning_rate": 7.139778482209068e-05, + "loss": 2.4863, + "step": 1350 + }, + { + "epoch": 1.46, + "learning_rate": 7.132006868663894e-05, + "loss": 2.3856, + "step": 1352 + }, + { + "epoch": 1.47, + "learning_rate": 7.124228954641196e-05, + "loss": 2.3076, + "step": 1354 + }, + { + "epoch": 1.47, + "learning_rate": 7.116444763126158e-05, + "loss": 2.334, + "step": 1356 + }, + { + "epoch": 1.47, + "learning_rate": 7.108654317122515e-05, + "loss": 2.3639, + "step": 1358 + }, + { + "epoch": 1.47, + "learning_rate": 7.100857639652489e-05, + "loss": 2.7099, + "step": 1360 + }, + { + "epoch": 1.48, + "learning_rate": 7.093054753756713e-05, + "loss": 2.6381, + "step": 1362 + }, + { + "epoch": 1.48, + "learning_rate": 7.085245682494168e-05, + "loss": 2.4935, + "step": 1364 + }, + { + "epoch": 1.48, + "learning_rate": 7.077430448942117e-05, + "loss": 2.3986, + "step": 1366 + }, + { + "epoch": 1.48, + "learning_rate": 7.069609076196029e-05, + "loss": 2.5647, + "step": 1368 + }, + { + "epoch": 1.48, + "learning_rate": 7.061781587369519e-05, + "loss": 2.34, + "step": 1370 + }, + { + "epoch": 1.49, + "learning_rate": 7.053948005594273e-05, + "loss": 2.5114, + "step": 1372 + }, + { + "epoch": 1.49, + "learning_rate": 7.046108354019987e-05, + "loss": 2.4023, + "step": 1374 + }, + { + "epoch": 1.49, + "learning_rate": 7.038262655814291e-05, + "loss": 2.5273, + "step": 1376 + }, + { + "epoch": 1.49, + "learning_rate": 7.030410934162684e-05, + "loss": 2.3996, + "step": 1378 + }, + { + "epoch": 1.49, + "learning_rate": 7.022553212268469e-05, + "loss": 2.5281, + "step": 1380 + }, + { + "epoch": 1.5, + "learning_rate": 7.014689513352675e-05, + "loss": 2.2748, + "step": 1382 + }, + { + "epoch": 1.5, + "learning_rate": 7.006819860654001e-05, + "loss": 2.6029, + "step": 1384 + }, + { + "epoch": 1.5, + "learning_rate": 6.998944277428734e-05, + "loss": 2.3091, + "step": 1386 + }, + { + "epoch": 1.5, + "learning_rate": 6.991062786950691e-05, + "loss": 2.4689, + "step": 1388 + }, + { + "epoch": 1.51, + "learning_rate": 6.983175412511145e-05, + "loss": 2.4879, + "step": 1390 + }, + { + "epoch": 1.51, + "learning_rate": 6.975282177418756e-05, + "loss": 2.5369, + "step": 1392 + }, + { + "epoch": 1.51, + "learning_rate": 6.967383104999505e-05, + "loss": 2.4159, + "step": 1394 + }, + { + "epoch": 1.51, + "learning_rate": 6.959478218596625e-05, + "loss": 2.5748, + "step": 1396 + }, + { + "epoch": 1.51, + "learning_rate": 6.951567541570523e-05, + "loss": 2.5503, + "step": 1398 + }, + { + "epoch": 1.52, + "learning_rate": 6.943651097298727e-05, + "loss": 2.5094, + "step": 1400 + }, + { + "epoch": 1.52, + "learning_rate": 6.935728909175805e-05, + "loss": 2.3161, + "step": 1402 + }, + { + "epoch": 1.52, + "learning_rate": 6.927801000613298e-05, + "loss": 2.3559, + "step": 1404 + }, + { + "epoch": 1.52, + "learning_rate": 6.919867395039652e-05, + "loss": 2.4446, + "step": 1406 + }, + { + "epoch": 1.53, + "learning_rate": 6.91192811590015e-05, + "loss": 2.3579, + "step": 1408 + }, + { + "epoch": 1.53, + "learning_rate": 6.903983186656844e-05, + "loss": 2.3263, + "step": 1410 + }, + { + "epoch": 1.53, + "learning_rate": 6.896032630788476e-05, + "loss": 2.4279, + "step": 1412 + }, + { + "epoch": 1.53, + "learning_rate": 6.888076471790424e-05, + "loss": 2.3288, + "step": 1414 + }, + { + "epoch": 1.53, + "learning_rate": 6.880114733174615e-05, + "loss": 2.3714, + "step": 1416 + }, + { + "epoch": 1.54, + "learning_rate": 6.872147438469476e-05, + "loss": 2.5845, + "step": 1418 + }, + { + "epoch": 1.54, + "learning_rate": 6.864174611219841e-05, + "loss": 2.2575, + "step": 1420 + }, + { + "epoch": 1.54, + "learning_rate": 6.856196274986907e-05, + "loss": 2.7716, + "step": 1422 + }, + { + "epoch": 1.54, + "learning_rate": 6.848212453348137e-05, + "loss": 2.408, + "step": 1424 + }, + { + "epoch": 1.54, + "learning_rate": 6.840223169897217e-05, + "loss": 2.5191, + "step": 1426 + }, + { + "epoch": 1.55, + "learning_rate": 6.832228448243964e-05, + "loss": 2.3474, + "step": 1428 + }, + { + "epoch": 1.55, + "learning_rate": 6.824228312014274e-05, + "loss": 2.5852, + "step": 1430 + }, + { + "epoch": 1.55, + "learning_rate": 6.816222784850038e-05, + "loss": 2.5364, + "step": 1432 + }, + { + "epoch": 1.55, + "learning_rate": 6.80821189040908e-05, + "loss": 2.26, + "step": 1434 + }, + { + "epoch": 1.56, + "learning_rate": 6.800195652365087e-05, + "loss": 2.4253, + "step": 1436 + }, + { + "epoch": 1.56, + "learning_rate": 6.792174094407533e-05, + "loss": 2.3855, + "step": 1438 + }, + { + "epoch": 1.56, + "learning_rate": 6.784147240241619e-05, + "loss": 2.2678, + "step": 1440 + }, + { + "epoch": 1.56, + "learning_rate": 6.776115113588194e-05, + "loss": 2.4646, + "step": 1442 + }, + { + "epoch": 1.56, + "learning_rate": 6.76807773818369e-05, + "loss": 2.6316, + "step": 1444 + }, + { + "epoch": 1.57, + "learning_rate": 6.760035137780046e-05, + "loss": 2.5357, + "step": 1446 + }, + { + "epoch": 1.57, + "learning_rate": 6.751987336144648e-05, + "loss": 2.5943, + "step": 1448 + }, + { + "epoch": 1.57, + "learning_rate": 6.743934357060246e-05, + "loss": 2.4468, + "step": 1450 + }, + { + "epoch": 1.57, + "learning_rate": 6.735876224324895e-05, + "loss": 2.3678, + "step": 1452 + }, + { + "epoch": 1.57, + "learning_rate": 6.72781296175188e-05, + "loss": 2.3095, + "step": 1454 + }, + { + "epoch": 1.58, + "learning_rate": 6.719744593169641e-05, + "loss": 2.4335, + "step": 1456 + }, + { + "epoch": 1.58, + "learning_rate": 6.711671142421714e-05, + "loss": 2.5255, + "step": 1458 + }, + { + "epoch": 1.58, + "learning_rate": 6.703592633366647e-05, + "loss": 2.3837, + "step": 1460 + }, + { + "epoch": 1.58, + "learning_rate": 6.695509089877943e-05, + "loss": 2.5474, + "step": 1462 + }, + { + "epoch": 1.59, + "learning_rate": 6.687420535843975e-05, + "loss": 2.5055, + "step": 1464 + }, + { + "epoch": 1.59, + "learning_rate": 6.679326995167932e-05, + "loss": 2.4212, + "step": 1466 + }, + { + "epoch": 1.59, + "learning_rate": 6.671228491767728e-05, + "loss": 2.4475, + "step": 1468 + }, + { + "epoch": 1.59, + "learning_rate": 6.663125049575956e-05, + "loss": 2.5926, + "step": 1470 + }, + { + "epoch": 1.59, + "learning_rate": 6.655016692539793e-05, + "loss": 2.2358, + "step": 1472 + }, + { + "epoch": 1.6, + "learning_rate": 6.646903444620949e-05, + "loss": 2.6164, + "step": 1474 + }, + { + "epoch": 1.6, + "learning_rate": 6.63878532979558e-05, + "loss": 2.5778, + "step": 1476 + }, + { + "epoch": 1.6, + "learning_rate": 6.630662372054227e-05, + "loss": 2.4401, + "step": 1478 + }, + { + "epoch": 1.6, + "learning_rate": 6.622534595401746e-05, + "loss": 2.4327, + "step": 1480 + }, + { + "epoch": 1.61, + "learning_rate": 6.614402023857232e-05, + "loss": 2.3336, + "step": 1482 + }, + { + "epoch": 1.61, + "learning_rate": 6.606264681453946e-05, + "loss": 2.4107, + "step": 1484 + }, + { + "epoch": 1.61, + "learning_rate": 6.598122592239255e-05, + "loss": 2.4793, + "step": 1486 + }, + { + "epoch": 1.61, + "learning_rate": 6.589975780274544e-05, + "loss": 2.6092, + "step": 1488 + }, + { + "epoch": 1.61, + "learning_rate": 6.581824269635166e-05, + "loss": 2.4823, + "step": 1490 + }, + { + "epoch": 1.62, + "learning_rate": 6.57366808441035e-05, + "loss": 2.3623, + "step": 1492 + }, + { + "epoch": 1.62, + "learning_rate": 6.565507248703144e-05, + "loss": 2.5841, + "step": 1494 + }, + { + "epoch": 1.62, + "learning_rate": 6.557341786630339e-05, + "loss": 2.3636, + "step": 1496 + }, + { + "epoch": 1.62, + "learning_rate": 6.549171722322395e-05, + "loss": 2.5033, + "step": 1498 + }, + { + "epoch": 1.62, + "learning_rate": 6.540997079923376e-05, + "loss": 2.5465, + "step": 1500 + }, + { + "epoch": 1.63, + "learning_rate": 6.532817883590874e-05, + "loss": 2.4308, + "step": 1502 + }, + { + "epoch": 1.63, + "learning_rate": 6.524634157495935e-05, + "loss": 2.6063, + "step": 1504 + }, + { + "epoch": 1.63, + "learning_rate": 6.516445925822997e-05, + "loss": 2.3648, + "step": 1506 + }, + { + "epoch": 1.63, + "learning_rate": 6.508253212769808e-05, + "loss": 2.5649, + "step": 1508 + }, + { + "epoch": 1.64, + "learning_rate": 6.500056042547364e-05, + "loss": 2.4303, + "step": 1510 + }, + { + "epoch": 1.64, + "learning_rate": 6.491854439379827e-05, + "loss": 2.2518, + "step": 1512 + }, + { + "epoch": 1.64, + "learning_rate": 6.483648427504467e-05, + "loss": 2.6185, + "step": 1514 + }, + { + "epoch": 1.64, + "learning_rate": 6.475438031171574e-05, + "loss": 2.4631, + "step": 1516 + }, + { + "epoch": 1.64, + "learning_rate": 6.4672232746444e-05, + "loss": 2.5055, + "step": 1518 + }, + { + "epoch": 1.65, + "learning_rate": 6.459004182199082e-05, + "loss": 2.4789, + "step": 1520 + }, + { + "epoch": 1.65, + "learning_rate": 6.45078077812457e-05, + "loss": 2.518, + "step": 1522 + }, + { + "epoch": 1.65, + "learning_rate": 6.442553086722554e-05, + "loss": 2.2487, + "step": 1524 + }, + { + "epoch": 1.65, + "learning_rate": 6.434321132307394e-05, + "loss": 2.4873, + "step": 1526 + }, + { + "epoch": 1.66, + "learning_rate": 6.426084939206051e-05, + "loss": 2.4427, + "step": 1528 + }, + { + "epoch": 1.66, + "learning_rate": 6.417844531758009e-05, + "loss": 2.5523, + "step": 1530 + }, + { + "epoch": 1.66, + "learning_rate": 6.40959993431521e-05, + "loss": 2.4331, + "step": 1532 + }, + { + "epoch": 1.66, + "learning_rate": 6.401351171241971e-05, + "loss": 2.2483, + "step": 1534 + }, + { + "epoch": 1.66, + "learning_rate": 6.393098266914925e-05, + "loss": 2.3769, + "step": 1536 + }, + { + "epoch": 1.67, + "learning_rate": 6.384841245722945e-05, + "loss": 2.4459, + "step": 1538 + }, + { + "epoch": 1.67, + "learning_rate": 6.376580132067065e-05, + "loss": 2.4104, + "step": 1540 + }, + { + "epoch": 1.67, + "learning_rate": 6.368314950360415e-05, + "loss": 2.3963, + "step": 1542 + }, + { + "epoch": 1.67, + "learning_rate": 6.360045725028146e-05, + "loss": 2.4358, + "step": 1544 + }, + { + "epoch": 1.67, + "learning_rate": 6.351772480507363e-05, + "loss": 2.3851, + "step": 1546 + }, + { + "epoch": 1.68, + "learning_rate": 6.34349524124704e-05, + "loss": 2.3434, + "step": 1548 + }, + { + "epoch": 1.68, + "learning_rate": 6.335214031707965e-05, + "loss": 2.3168, + "step": 1550 + }, + { + "epoch": 1.68, + "learning_rate": 6.326928876362652e-05, + "loss": 2.5622, + "step": 1552 + }, + { + "epoch": 1.68, + "learning_rate": 6.318639799695285e-05, + "loss": 2.4061, + "step": 1554 + }, + { + "epoch": 1.69, + "learning_rate": 6.310346826201621e-05, + "loss": 2.6289, + "step": 1556 + }, + { + "epoch": 1.69, + "learning_rate": 6.302049980388948e-05, + "loss": 2.4561, + "step": 1558 + }, + { + "epoch": 1.69, + "learning_rate": 6.29374928677599e-05, + "loss": 2.4697, + "step": 1560 + }, + { + "epoch": 1.69, + "learning_rate": 6.28544476989284e-05, + "loss": 2.4481, + "step": 1562 + }, + { + "epoch": 1.69, + "learning_rate": 6.277136454280898e-05, + "loss": 2.5529, + "step": 1564 + }, + { + "epoch": 1.7, + "learning_rate": 6.268824364492782e-05, + "loss": 2.4358, + "step": 1566 + }, + { + "epoch": 1.7, + "learning_rate": 6.260508525092266e-05, + "loss": 2.3754, + "step": 1568 + }, + { + "epoch": 1.7, + "learning_rate": 6.252188960654204e-05, + "loss": 2.5845, + "step": 1570 + }, + { + "epoch": 1.7, + "learning_rate": 6.243865695764459e-05, + "loss": 2.5552, + "step": 1572 + }, + { + "epoch": 1.7, + "learning_rate": 6.235538755019832e-05, + "loss": 2.4616, + "step": 1574 + }, + { + "epoch": 1.71, + "learning_rate": 6.227208163027982e-05, + "loss": 2.3196, + "step": 1576 + }, + { + "epoch": 1.71, + "learning_rate": 6.218873944407361e-05, + "loss": 2.4119, + "step": 1578 + }, + { + "epoch": 1.71, + "learning_rate": 6.210536123787138e-05, + "loss": 2.2707, + "step": 1580 + }, + { + "epoch": 1.71, + "learning_rate": 6.202194725807127e-05, + "loss": 2.7299, + "step": 1582 + }, + { + "epoch": 1.72, + "learning_rate": 6.19384977511771e-05, + "loss": 2.2659, + "step": 1584 + }, + { + "epoch": 1.72, + "learning_rate": 6.185501296379777e-05, + "loss": 2.5439, + "step": 1586 + }, + { + "epoch": 1.72, + "learning_rate": 6.177149314264631e-05, + "loss": 2.6154, + "step": 1588 + }, + { + "epoch": 1.72, + "learning_rate": 6.168793853453943e-05, + "loss": 2.5537, + "step": 1590 + }, + { + "epoch": 1.72, + "learning_rate": 6.160434938639648e-05, + "loss": 2.3475, + "step": 1592 + }, + { + "epoch": 1.73, + "learning_rate": 6.152072594523906e-05, + "loss": 2.3385, + "step": 1594 + }, + { + "epoch": 1.73, + "learning_rate": 6.143706845818992e-05, + "loss": 2.4313, + "step": 1596 + }, + { + "epoch": 1.73, + "learning_rate": 6.135337717247261e-05, + "loss": 2.3323, + "step": 1598 + }, + { + "epoch": 1.73, + "learning_rate": 6.12696523354104e-05, + "loss": 2.4587, + "step": 1600 + }, + { + "epoch": 1.74, + "learning_rate": 6.118589419442584e-05, + "loss": 2.6458, + "step": 1602 + }, + { + "epoch": 1.74, + "learning_rate": 6.110210299703982e-05, + "loss": 2.5148, + "step": 1604 + }, + { + "epoch": 1.74, + "learning_rate": 6.101827899087094e-05, + "loss": 2.5324, + "step": 1606 + }, + { + "epoch": 1.74, + "learning_rate": 6.0934422423634744e-05, + "loss": 2.4962, + "step": 1608 + }, + { + "epoch": 1.74, + "learning_rate": 6.085053354314302e-05, + "loss": 2.4868, + "step": 1610 + }, + { + "epoch": 1.75, + "learning_rate": 6.076661259730305e-05, + "loss": 2.506, + "step": 1612 + }, + { + "epoch": 1.75, + "learning_rate": 6.068265983411685e-05, + "loss": 2.3774, + "step": 1614 + }, + { + "epoch": 1.75, + "learning_rate": 6.05986755016805e-05, + "loss": 2.4287, + "step": 1616 + }, + { + "epoch": 1.75, + "learning_rate": 6.051465984818332e-05, + "loss": 2.3267, + "step": 1618 + }, + { + "epoch": 1.75, + "learning_rate": 6.043061312190723e-05, + "loss": 2.4453, + "step": 1620 + }, + { + "epoch": 1.76, + "learning_rate": 6.034653557122598e-05, + "loss": 2.4022, + "step": 1622 + }, + { + "epoch": 1.76, + "learning_rate": 6.0262427444604384e-05, + "loss": 2.5554, + "step": 1624 + }, + { + "epoch": 1.76, + "learning_rate": 6.017828899059763e-05, + "loss": 2.4102, + "step": 1626 + }, + { + "epoch": 1.76, + "learning_rate": 6.009412045785051e-05, + "loss": 2.3266, + "step": 1628 + }, + { + "epoch": 1.77, + "learning_rate": 6.000992209509676e-05, + "loss": 2.2382, + "step": 1630 + }, + { + "epoch": 1.77, + "learning_rate": 5.9925694151158184e-05, + "loss": 2.2763, + "step": 1632 + }, + { + "epoch": 1.77, + "learning_rate": 5.984143687494409e-05, + "loss": 2.481, + "step": 1634 + }, + { + "epoch": 1.77, + "learning_rate": 5.975715051545039e-05, + "loss": 2.5598, + "step": 1636 + }, + { + "epoch": 1.77, + "learning_rate": 5.9672835321759016e-05, + "loss": 2.3289, + "step": 1638 + }, + { + "epoch": 1.78, + "learning_rate": 5.958849154303704e-05, + "loss": 2.4317, + "step": 1640 + }, + { + "epoch": 1.78, + "learning_rate": 5.9504119428536076e-05, + "loss": 2.448, + "step": 1642 + }, + { + "epoch": 1.78, + "learning_rate": 5.9419719227591405e-05, + "loss": 2.2034, + "step": 1644 + }, + { + "epoch": 1.78, + "learning_rate": 5.933529118962138e-05, + "loss": 2.4841, + "step": 1646 + }, + { + "epoch": 1.79, + "learning_rate": 5.925083556412657e-05, + "loss": 2.5998, + "step": 1648 + }, + { + "epoch": 1.79, + "learning_rate": 5.916635260068909e-05, + "loss": 2.5288, + "step": 1650 + }, + { + "epoch": 1.79, + "learning_rate": 5.908184254897182e-05, + "loss": 2.5148, + "step": 1652 + }, + { + "epoch": 1.79, + "learning_rate": 5.899730565871774e-05, + "loss": 2.5166, + "step": 1654 + }, + { + "epoch": 1.79, + "learning_rate": 5.891274217974907e-05, + "loss": 2.4235, + "step": 1656 + }, + { + "epoch": 1.8, + "learning_rate": 5.8828152361966685e-05, + "loss": 2.5575, + "step": 1658 + }, + { + "epoch": 1.8, + "learning_rate": 5.874353645534922e-05, + "loss": 2.4232, + "step": 1660 + }, + { + "epoch": 1.8, + "learning_rate": 5.865889470995248e-05, + "loss": 2.2509, + "step": 1662 + }, + { + "epoch": 1.8, + "learning_rate": 5.857422737590857e-05, + "loss": 2.2636, + "step": 1664 + }, + { + "epoch": 1.8, + "learning_rate": 5.8489534703425256e-05, + "loss": 2.4923, + "step": 1666 + }, + { + "epoch": 1.81, + "learning_rate": 5.8404816942785134e-05, + "loss": 2.3899, + "step": 1668 + }, + { + "epoch": 1.81, + "learning_rate": 5.8320074344345e-05, + "loss": 2.4698, + "step": 1670 + }, + { + "epoch": 1.81, + "learning_rate": 5.8235307158535e-05, + "loss": 2.65, + "step": 1672 + }, + { + "epoch": 1.81, + "learning_rate": 5.8150515635858e-05, + "loss": 2.4687, + "step": 1674 + }, + { + "epoch": 1.82, + "learning_rate": 5.806570002688869e-05, + "loss": 2.4793, + "step": 1676 + }, + { + "epoch": 1.82, + "learning_rate": 5.798086058227304e-05, + "loss": 2.2238, + "step": 1678 + }, + { + "epoch": 1.82, + "learning_rate": 5.78959975527274e-05, + "loss": 2.4365, + "step": 1680 + }, + { + "epoch": 1.82, + "learning_rate": 5.781111118903785e-05, + "loss": 2.4891, + "step": 1682 + }, + { + "epoch": 1.82, + "learning_rate": 5.772620174205938e-05, + "loss": 2.3248, + "step": 1684 + }, + { + "epoch": 1.83, + "learning_rate": 5.764126946271526e-05, + "loss": 2.6325, + "step": 1686 + }, + { + "epoch": 1.83, + "learning_rate": 5.755631460199616e-05, + "loss": 2.4281, + "step": 1688 + }, + { + "epoch": 1.83, + "learning_rate": 5.747133741095956e-05, + "loss": 2.4829, + "step": 1690 + }, + { + "epoch": 1.83, + "learning_rate": 5.738633814072888e-05, + "loss": 2.3321, + "step": 1692 + }, + { + "epoch": 1.83, + "learning_rate": 5.730131704249278e-05, + "loss": 2.5413, + "step": 1694 + }, + { + "epoch": 1.84, + "learning_rate": 5.721627436750449e-05, + "loss": 2.0978, + "step": 1696 + }, + { + "epoch": 1.84, + "learning_rate": 5.713121036708091e-05, + "loss": 2.5072, + "step": 1698 + }, + { + "epoch": 1.84, + "learning_rate": 5.704612529260205e-05, + "loss": 2.4096, + "step": 1700 + }, + { + "epoch": 1.84, + "learning_rate": 5.6961019395510126e-05, + "loss": 2.3686, + "step": 1702 + }, + { + "epoch": 1.85, + "learning_rate": 5.6875892927308936e-05, + "loss": 2.475, + "step": 1704 + }, + { + "epoch": 1.85, + "learning_rate": 5.679074613956307e-05, + "loss": 2.375, + "step": 1706 + }, + { + "epoch": 1.85, + "learning_rate": 5.6705579283897116e-05, + "loss": 2.5238, + "step": 1708 + }, + { + "epoch": 1.85, + "learning_rate": 5.662039261199502e-05, + "loss": 2.5842, + "step": 1710 + }, + { + "epoch": 1.85, + "learning_rate": 5.6535186375599266e-05, + "loss": 2.5468, + "step": 1712 + }, + { + "epoch": 1.86, + "learning_rate": 5.644996082651017e-05, + "loss": 2.4626, + "step": 1714 + }, + { + "epoch": 1.86, + "learning_rate": 5.636471621658508e-05, + "loss": 2.5552, + "step": 1716 + }, + { + "epoch": 1.86, + "learning_rate": 5.627945279773774e-05, + "loss": 2.2431, + "step": 1718 + }, + { + "epoch": 1.86, + "learning_rate": 5.61941708219374e-05, + "loss": 2.36, + "step": 1720 + }, + { + "epoch": 1.87, + "learning_rate": 5.6108870541208224e-05, + "loss": 2.3865, + "step": 1722 + }, + { + "epoch": 1.87, + "learning_rate": 5.602355220762838e-05, + "loss": 2.5472, + "step": 1724 + }, + { + "epoch": 1.87, + "learning_rate": 5.593821607332952e-05, + "loss": 2.3935, + "step": 1726 + }, + { + "epoch": 1.87, + "learning_rate": 5.585286239049574e-05, + "loss": 2.5526, + "step": 1728 + }, + { + "epoch": 1.87, + "learning_rate": 5.576749141136313e-05, + "loss": 2.5119, + "step": 1730 + }, + { + "epoch": 1.88, + "learning_rate": 5.568210338821881e-05, + "loss": 2.3868, + "step": 1732 + }, + { + "epoch": 1.88, + "learning_rate": 5.5596698573400306e-05, + "loss": 2.4324, + "step": 1734 + }, + { + "epoch": 1.88, + "learning_rate": 5.5511277219294765e-05, + "loss": 2.4088, + "step": 1736 + }, + { + "epoch": 1.88, + "learning_rate": 5.54258395783382e-05, + "loss": 2.447, + "step": 1738 + }, + { + "epoch": 1.88, + "learning_rate": 5.534038590301476e-05, + "loss": 2.4857, + "step": 1740 + }, + { + "epoch": 1.89, + "learning_rate": 5.5254916445855974e-05, + "loss": 2.3698, + "step": 1742 + }, + { + "epoch": 1.89, + "learning_rate": 5.5169431459440014e-05, + "loss": 2.5048, + "step": 1744 + }, + { + "epoch": 1.89, + "learning_rate": 5.508393119639094e-05, + "loss": 2.5057, + "step": 1746 + }, + { + "epoch": 1.89, + "learning_rate": 5.499841590937795e-05, + "loss": 2.4211, + "step": 1748 + }, + { + "epoch": 1.9, + "learning_rate": 5.491288585111467e-05, + "loss": 2.7328, + "step": 1750 + }, + { + "epoch": 1.9, + "learning_rate": 5.4827341274358344e-05, + "loss": 2.5598, + "step": 1752 + }, + { + "epoch": 1.9, + "learning_rate": 5.4741782431909136e-05, + "loss": 2.2472, + "step": 1754 + }, + { + "epoch": 1.9, + "learning_rate": 5.465620957660938e-05, + "loss": 2.4122, + "step": 1756 + }, + { + "epoch": 1.9, + "learning_rate": 5.457062296134279e-05, + "loss": 2.4685, + "step": 1758 + }, + { + "epoch": 1.91, + "learning_rate": 5.448502283903377e-05, + "loss": 2.5201, + "step": 1760 + }, + { + "epoch": 1.91, + "learning_rate": 5.439940946264662e-05, + "loss": 2.5483, + "step": 1762 + }, + { + "epoch": 1.91, + "learning_rate": 5.4313783085184825e-05, + "loss": 2.4956, + "step": 1764 + }, + { + "epoch": 1.91, + "learning_rate": 5.422814395969029e-05, + "loss": 2.5378, + "step": 1766 + }, + { + "epoch": 1.91, + "learning_rate": 5.414249233924258e-05, + "loss": 2.218, + "step": 1768 + }, + { + "epoch": 1.92, + "learning_rate": 5.40568284769582e-05, + "loss": 2.5364, + "step": 1770 + }, + { + "epoch": 1.92, + "learning_rate": 5.39711526259898e-05, + "loss": 2.4421, + "step": 1772 + }, + { + "epoch": 1.92, + "learning_rate": 5.388546503952551e-05, + "loss": 2.3293, + "step": 1774 + }, + { + "epoch": 1.92, + "learning_rate": 5.379976597078808e-05, + "loss": 2.4037, + "step": 1776 + }, + { + "epoch": 1.93, + "learning_rate": 5.371405567303428e-05, + "loss": 2.4786, + "step": 1778 + }, + { + "epoch": 1.93, + "learning_rate": 5.362833439955396e-05, + "loss": 2.2926, + "step": 1780 + }, + { + "epoch": 1.93, + "learning_rate": 5.354260240366947e-05, + "loss": 2.5112, + "step": 1782 + }, + { + "epoch": 1.93, + "learning_rate": 5.3456859938734836e-05, + "loss": 2.2066, + "step": 1784 + }, + { + "epoch": 1.93, + "learning_rate": 5.337110725813501e-05, + "loss": 2.4357, + "step": 1786 + }, + { + "epoch": 1.94, + "learning_rate": 5.328534461528515e-05, + "loss": 2.502, + "step": 1788 + }, + { + "epoch": 1.94, + "learning_rate": 5.3199572263629824e-05, + "loss": 2.2781, + "step": 1790 + }, + { + "epoch": 1.94, + "learning_rate": 5.3113790456642345e-05, + "loss": 2.3274, + "step": 1792 + }, + { + "epoch": 1.94, + "learning_rate": 5.3027999447823905e-05, + "loss": 2.4531, + "step": 1794 + }, + { + "epoch": 1.95, + "learning_rate": 5.2942199490702924e-05, + "loss": 2.4264, + "step": 1796 + }, + { + "epoch": 1.95, + "learning_rate": 5.285639083883428e-05, + "loss": 2.3976, + "step": 1798 + }, + { + "epoch": 1.95, + "learning_rate": 5.27705737457985e-05, + "loss": 2.3159, + "step": 1800 + }, + { + "epoch": 1.95, + "learning_rate": 5.268474846520112e-05, + "loss": 2.3113, + "step": 1802 + }, + { + "epoch": 1.95, + "learning_rate": 5.259891525067179e-05, + "loss": 2.3999, + "step": 1804 + }, + { + "epoch": 1.96, + "learning_rate": 5.251307435586368e-05, + "loss": 2.4202, + "step": 1806 + }, + { + "epoch": 1.96, + "learning_rate": 5.2427226034452614e-05, + "loss": 2.4635, + "step": 1808 + }, + { + "epoch": 1.96, + "learning_rate": 5.23413705401364e-05, + "loss": 2.588, + "step": 1810 + }, + { + "epoch": 1.96, + "learning_rate": 5.225550812663399e-05, + "loss": 2.3913, + "step": 1812 + }, + { + "epoch": 1.96, + "learning_rate": 5.216963904768485e-05, + "loss": 2.3559, + "step": 1814 + }, + { + "epoch": 1.97, + "learning_rate": 5.2083763557048056e-05, + "loss": 2.3511, + "step": 1816 + }, + { + "epoch": 1.97, + "learning_rate": 5.1997881908501736e-05, + "loss": 2.0888, + "step": 1818 + }, + { + "epoch": 1.97, + "learning_rate": 5.191199435584211e-05, + "loss": 2.3658, + "step": 1820 + }, + { + "epoch": 1.97, + "learning_rate": 5.182610115288295e-05, + "loss": 2.4578, + "step": 1822 + }, + { + "epoch": 1.98, + "learning_rate": 5.174020255345464e-05, + "loss": 2.5353, + "step": 1824 + }, + { + "epoch": 1.98, + "learning_rate": 5.1654298811403556e-05, + "loss": 2.3506, + "step": 1826 + }, + { + "epoch": 1.98, + "learning_rate": 5.1568390180591265e-05, + "loss": 2.43, + "step": 1828 + }, + { + "epoch": 1.98, + "learning_rate": 5.148247691489377e-05, + "loss": 2.5092, + "step": 1830 + }, + { + "epoch": 1.98, + "learning_rate": 5.139655926820078e-05, + "loss": 2.4586, + "step": 1832 + }, + { + "epoch": 1.99, + "learning_rate": 5.131063749441496e-05, + "loss": 2.3623, + "step": 1834 + }, + { + "epoch": 1.99, + "learning_rate": 5.1224711847451145e-05, + "loss": 2.5055, + "step": 1836 + }, + { + "epoch": 1.99, + "learning_rate": 5.113878258123563e-05, + "loss": 2.3857, + "step": 1838 + }, + { + "epoch": 1.99, + "learning_rate": 5.105284994970543e-05, + "loss": 2.6249, + "step": 1840 + }, + { + "epoch": 2.0, + "learning_rate": 5.096691420680745e-05, + "loss": 2.408, + "step": 1842 + }, + { + "epoch": 2.0, + "learning_rate": 5.088097560649784e-05, + "loss": 2.5748, + "step": 1844 + }, + { + "epoch": 2.0, + "learning_rate": 5.0795034402741185e-05, + "loss": 2.2292, + "step": 1846 + }, + { + "epoch": 2.0, + "learning_rate": 5.06661182712092e-05, + "loss": 3.0448, + "step": 1848 + }, + { + "epoch": 2.0, + "learning_rate": 5.0580171669978546e-05, + "loss": 2.4891, + "step": 1850 + }, + { + "epoch": 2.01, + "learning_rate": 5.049422335423252e-05, + "loss": 2.5769, + "step": 1852 + }, + { + "epoch": 2.01, + "learning_rate": 5.04082735779644e-05, + "loss": 2.3243, + "step": 1854 + }, + { + "epoch": 2.01, + "learning_rate": 5.032232259517179e-05, + "loss": 2.5441, + "step": 1856 + }, + { + "epoch": 2.01, + "learning_rate": 5.023637065985585e-05, + "loss": 2.4429, + "step": 1858 + }, + { + "epoch": 2.02, + "learning_rate": 5.015041802602057e-05, + "loss": 2.3943, + "step": 1860 + }, + { + "epoch": 2.02, + "learning_rate": 5.0064464947672e-05, + "loss": 2.3169, + "step": 1862 + }, + { + "epoch": 2.02, + "learning_rate": 4.9978511678817496e-05, + "loss": 2.4604, + "step": 1864 + }, + { + "epoch": 2.02, + "learning_rate": 4.989255847346499e-05, + "loss": 2.3745, + "step": 1866 + }, + { + "epoch": 2.02, + "learning_rate": 4.980660558562222e-05, + "loss": 2.3082, + "step": 1868 + }, + { + "epoch": 2.03, + "learning_rate": 4.972065326929598e-05, + "loss": 2.4983, + "step": 1870 + }, + { + "epoch": 2.03, + "learning_rate": 4.963470177849135e-05, + "loss": 2.3494, + "step": 1872 + }, + { + "epoch": 2.03, + "learning_rate": 4.954875136721104e-05, + "loss": 2.2882, + "step": 1874 + }, + { + "epoch": 2.03, + "learning_rate": 4.946280228945453e-05, + "loss": 2.401, + "step": 1876 + }, + { + "epoch": 2.03, + "learning_rate": 4.9376854799217327e-05, + "loss": 2.3044, + "step": 1878 + }, + { + "epoch": 2.04, + "learning_rate": 4.929090915049029e-05, + "loss": 2.51, + "step": 1880 + }, + { + "epoch": 2.04, + "learning_rate": 4.920496559725883e-05, + "loss": 2.5332, + "step": 1882 + }, + { + "epoch": 2.04, + "learning_rate": 4.911902439350217e-05, + "loss": 2.449, + "step": 1884 + }, + { + "epoch": 2.04, + "learning_rate": 4.9033085793192574e-05, + "loss": 2.4766, + "step": 1886 + }, + { + "epoch": 2.05, + "learning_rate": 4.894715005029459e-05, + "loss": 2.5255, + "step": 1888 + }, + { + "epoch": 2.05, + "learning_rate": 4.8861217418764374e-05, + "loss": 2.4169, + "step": 1890 + }, + { + "epoch": 2.05, + "learning_rate": 4.8775288152548866e-05, + "loss": 2.4542, + "step": 1892 + }, + { + "epoch": 2.05, + "learning_rate": 4.868936250558506e-05, + "loss": 2.3703, + "step": 1894 + }, + { + "epoch": 2.05, + "learning_rate": 4.8603440731799216e-05, + "loss": 2.4712, + "step": 1896 + }, + { + "epoch": 2.06, + "learning_rate": 4.851752308510624e-05, + "loss": 2.3871, + "step": 1898 + }, + { + "epoch": 2.06, + "learning_rate": 4.843160981940875e-05, + "loss": 2.3593, + "step": 1900 + }, + { + "epoch": 2.06, + "learning_rate": 4.8345701188596456e-05, + "loss": 2.3834, + "step": 1902 + }, + { + "epoch": 2.06, + "learning_rate": 4.825979744654536e-05, + "loss": 2.5184, + "step": 1904 + }, + { + "epoch": 2.06, + "learning_rate": 4.817389884711705e-05, + "loss": 2.3947, + "step": 1906 + }, + { + "epoch": 2.07, + "learning_rate": 4.8088005644157895e-05, + "loss": 2.3947, + "step": 1908 + }, + { + "epoch": 2.07, + "learning_rate": 4.800211809149829e-05, + "loss": 2.332, + "step": 1910 + }, + { + "epoch": 2.07, + "learning_rate": 4.791623644295195e-05, + "loss": 2.4736, + "step": 1912 + }, + { + "epoch": 2.07, + "learning_rate": 4.7830360952315164e-05, + "loss": 2.3257, + "step": 1914 + }, + { + "epoch": 2.08, + "learning_rate": 4.774449187336602e-05, + "loss": 2.4029, + "step": 1916 + }, + { + "epoch": 2.08, + "learning_rate": 4.765862945986362e-05, + "loss": 2.2457, + "step": 1918 + }, + { + "epoch": 2.08, + "learning_rate": 4.7572773965547384e-05, + "loss": 2.4841, + "step": 1920 + }, + { + "epoch": 2.08, + "learning_rate": 4.7486925644136324e-05, + "loss": 2.2552, + "step": 1922 + }, + { + "epoch": 2.08, + "learning_rate": 4.740108474932822e-05, + "loss": 2.2952, + "step": 1924 + }, + { + "epoch": 2.09, + "learning_rate": 4.731525153479891e-05, + "loss": 2.6216, + "step": 1926 + }, + { + "epoch": 2.09, + "learning_rate": 4.72294262542015e-05, + "loss": 2.6685, + "step": 1928 + }, + { + "epoch": 2.09, + "learning_rate": 4.7143609161165736e-05, + "loss": 2.5377, + "step": 1930 + }, + { + "epoch": 2.09, + "learning_rate": 4.705780050929708e-05, + "loss": 2.3924, + "step": 1932 + }, + { + "epoch": 2.1, + "learning_rate": 4.697200055217612e-05, + "loss": 2.3375, + "step": 1934 + }, + { + "epoch": 2.1, + "learning_rate": 4.688620954335766e-05, + "loss": 2.5853, + "step": 1936 + }, + { + "epoch": 2.1, + "learning_rate": 4.680042773637018e-05, + "loss": 2.4731, + "step": 1938 + }, + { + "epoch": 2.1, + "learning_rate": 4.671465538471486e-05, + "loss": 2.3143, + "step": 1940 + }, + { + "epoch": 2.1, + "learning_rate": 4.6628892741865e-05, + "loss": 2.3821, + "step": 1942 + }, + { + "epoch": 2.11, + "learning_rate": 4.654314006126516e-05, + "loss": 2.5513, + "step": 1944 + }, + { + "epoch": 2.11, + "learning_rate": 4.645739759633054e-05, + "loss": 2.3581, + "step": 1946 + }, + { + "epoch": 2.11, + "learning_rate": 4.637166560044605e-05, + "loss": 2.3677, + "step": 1948 + }, + { + "epoch": 2.11, + "learning_rate": 4.628594432696573e-05, + "loss": 2.2615, + "step": 1950 + }, + { + "epoch": 2.11, + "learning_rate": 4.620023402921191e-05, + "loss": 2.4965, + "step": 1952 + }, + { + "epoch": 2.12, + "learning_rate": 4.61145349604745e-05, + "loss": 2.3364, + "step": 1954 + }, + { + "epoch": 2.12, + "learning_rate": 4.602884737401022e-05, + "loss": 2.1789, + "step": 1956 + }, + { + "epoch": 2.12, + "learning_rate": 4.594317152304183e-05, + "loss": 2.229, + "step": 1958 + }, + { + "epoch": 2.12, + "learning_rate": 4.5857507660757424e-05, + "loss": 2.7305, + "step": 1960 + }, + { + "epoch": 2.13, + "learning_rate": 4.5771856040309716e-05, + "loss": 2.4481, + "step": 1962 + }, + { + "epoch": 2.13, + "learning_rate": 4.568621691481519e-05, + "loss": 2.5205, + "step": 1964 + }, + { + "epoch": 2.13, + "learning_rate": 4.5600590537353397e-05, + "loss": 2.4775, + "step": 1966 + }, + { + "epoch": 2.13, + "learning_rate": 4.551497716096624e-05, + "loss": 2.5465, + "step": 1968 + }, + { + "epoch": 2.13, + "learning_rate": 4.5429377038657214e-05, + "loss": 2.4793, + "step": 1970 + }, + { + "epoch": 2.14, + "learning_rate": 4.534379042339063e-05, + "loss": 2.5299, + "step": 1972 + }, + { + "epoch": 2.14, + "learning_rate": 4.5258217568090876e-05, + "loss": 2.4299, + "step": 1974 + }, + { + "epoch": 2.14, + "learning_rate": 4.517265872564167e-05, + "loss": 2.4201, + "step": 1976 + }, + { + "epoch": 2.14, + "learning_rate": 4.508711414888534e-05, + "loss": 2.3312, + "step": 1978 + }, + { + "epoch": 2.15, + "learning_rate": 4.5001584090622065e-05, + "loss": 2.3622, + "step": 1980 + }, + { + "epoch": 2.15, + "learning_rate": 4.491606880360909e-05, + "loss": 2.3231, + "step": 1982 + }, + { + "epoch": 2.15, + "learning_rate": 4.483056854055999e-05, + "loss": 2.3689, + "step": 1984 + }, + { + "epoch": 2.15, + "learning_rate": 4.474508355414404e-05, + "loss": 2.4958, + "step": 1986 + }, + { + "epoch": 2.15, + "learning_rate": 4.465961409698525e-05, + "loss": 2.5048, + "step": 1988 + }, + { + "epoch": 2.16, + "learning_rate": 4.457416042166181e-05, + "loss": 2.4204, + "step": 1990 + }, + { + "epoch": 2.16, + "learning_rate": 4.448872278070523e-05, + "loss": 2.5445, + "step": 1992 + }, + { + "epoch": 2.16, + "learning_rate": 4.4403301426599706e-05, + "loss": 2.1568, + "step": 1994 + }, + { + "epoch": 2.16, + "learning_rate": 4.431789661178121e-05, + "loss": 2.2937, + "step": 1996 + }, + { + "epoch": 2.16, + "learning_rate": 4.423250858863689e-05, + "loss": 2.336, + "step": 1998 + }, + { + "epoch": 2.17, + "learning_rate": 4.4147137609504266e-05, + "loss": 2.6334, + "step": 2000 + }, + { + "epoch": 2.17, + "learning_rate": 4.4061783926670496e-05, + "loss": 2.2973, + "step": 2002 + }, + { + "epoch": 2.17, + "learning_rate": 4.3976447792371624e-05, + "loss": 2.5251, + "step": 2004 + }, + { + "epoch": 2.17, + "learning_rate": 4.38911294587918e-05, + "loss": 2.4591, + "step": 2006 + }, + { + "epoch": 2.18, + "learning_rate": 4.38058291780626e-05, + "loss": 2.4621, + "step": 2008 + }, + { + "epoch": 2.18, + "learning_rate": 4.372054720226227e-05, + "loss": 2.3918, + "step": 2010 + }, + { + "epoch": 2.18, + "learning_rate": 4.3635283783414924e-05, + "loss": 2.357, + "step": 2012 + }, + { + "epoch": 2.18, + "learning_rate": 4.3550039173489845e-05, + "loss": 2.3846, + "step": 2014 + }, + { + "epoch": 2.18, + "learning_rate": 4.346481362440074e-05, + "loss": 2.421, + "step": 2016 + }, + { + "epoch": 2.19, + "learning_rate": 4.337960738800498e-05, + "loss": 2.523, + "step": 2018 + }, + { + "epoch": 2.19, + "learning_rate": 4.3294420716102895e-05, + "loss": 2.3806, + "step": 2020 + }, + { + "epoch": 2.19, + "learning_rate": 4.320925386043696e-05, + "loss": 2.4476, + "step": 2022 + }, + { + "epoch": 2.19, + "learning_rate": 4.3124107072691055e-05, + "loss": 2.4976, + "step": 2024 + }, + { + "epoch": 2.19, + "learning_rate": 4.3038980604489885e-05, + "loss": 2.6172, + "step": 2026 + }, + { + "epoch": 2.2, + "learning_rate": 4.2953874707397964e-05, + "loss": 2.4828, + "step": 2028 + }, + { + "epoch": 2.2, + "learning_rate": 4.28687896329191e-05, + "loss": 2.3404, + "step": 2030 + }, + { + "epoch": 2.2, + "learning_rate": 4.278372563249552e-05, + "loss": 2.4298, + "step": 2032 + }, + { + "epoch": 2.2, + "learning_rate": 4.269868295750722e-05, + "loss": 2.5339, + "step": 2034 + }, + { + "epoch": 2.21, + "learning_rate": 4.261366185927114e-05, + "loss": 2.2219, + "step": 2036 + }, + { + "epoch": 2.21, + "learning_rate": 4.252866258904045e-05, + "loss": 2.3277, + "step": 2038 + }, + { + "epoch": 2.21, + "learning_rate": 4.2443685398003835e-05, + "loss": 2.3991, + "step": 2040 + }, + { + "epoch": 2.21, + "learning_rate": 4.235873053728475e-05, + "loss": 2.3344, + "step": 2042 + }, + { + "epoch": 2.21, + "learning_rate": 4.227379825794063e-05, + "loss": 2.3301, + "step": 2044 + }, + { + "epoch": 2.22, + "learning_rate": 4.218888881096217e-05, + "loss": 2.4981, + "step": 2046 + }, + { + "epoch": 2.22, + "learning_rate": 4.21040024472726e-05, + "loss": 2.4976, + "step": 2048 + }, + { + "epoch": 2.22, + "learning_rate": 4.201913941772696e-05, + "loss": 2.6412, + "step": 2050 + }, + { + "epoch": 2.22, + "learning_rate": 4.193429997311132e-05, + "loss": 2.3847, + "step": 2052 + }, + { + "epoch": 2.23, + "learning_rate": 4.184948436414203e-05, + "loss": 2.2447, + "step": 2054 + }, + { + "epoch": 2.23, + "learning_rate": 4.1764692841464995e-05, + "loss": 2.5925, + "step": 2056 + }, + { + "epoch": 2.23, + "learning_rate": 4.1679925655655e-05, + "loss": 2.3348, + "step": 2058 + }, + { + "epoch": 2.23, + "learning_rate": 4.159518305721487e-05, + "loss": 2.5109, + "step": 2060 + }, + { + "epoch": 2.23, + "learning_rate": 4.151046529657477e-05, + "loss": 2.5121, + "step": 2062 + }, + { + "epoch": 2.24, + "learning_rate": 4.142577262409144e-05, + "loss": 2.4563, + "step": 2064 + }, + { + "epoch": 2.24, + "learning_rate": 4.134110529004753e-05, + "loss": 2.4912, + "step": 2066 + }, + { + "epoch": 2.24, + "learning_rate": 4.1256463544650783e-05, + "loss": 2.4457, + "step": 2068 + }, + { + "epoch": 2.24, + "learning_rate": 4.117184763803334e-05, + "loss": 2.4428, + "step": 2070 + }, + { + "epoch": 2.24, + "learning_rate": 4.108725782025092e-05, + "loss": 2.3274, + "step": 2072 + }, + { + "epoch": 2.25, + "learning_rate": 4.1002694341282276e-05, + "loss": 2.4062, + "step": 2074 + }, + { + "epoch": 2.25, + "learning_rate": 4.0918157451028185e-05, + "loss": 2.6018, + "step": 2076 + }, + { + "epoch": 2.25, + "learning_rate": 4.083364739931092e-05, + "loss": 2.4493, + "step": 2078 + }, + { + "epoch": 2.25, + "learning_rate": 4.0749164435873425e-05, + "loss": 2.5798, + "step": 2080 + }, + { + "epoch": 2.26, + "learning_rate": 4.0664708810378625e-05, + "loss": 2.2729, + "step": 2082 + }, + { + "epoch": 2.26, + "learning_rate": 4.05802807724086e-05, + "loss": 2.3844, + "step": 2084 + }, + { + "epoch": 2.26, + "learning_rate": 4.049588057146394e-05, + "loss": 2.402, + "step": 2086 + }, + { + "epoch": 2.26, + "learning_rate": 4.041150845696296e-05, + "loss": 2.4163, + "step": 2088 + }, + { + "epoch": 2.26, + "learning_rate": 4.032716467824099e-05, + "loss": 2.428, + "step": 2090 + }, + { + "epoch": 2.27, + "learning_rate": 4.0242849484549623e-05, + "loss": 2.4803, + "step": 2092 + }, + { + "epoch": 2.27, + "learning_rate": 4.015856312505593e-05, + "loss": 2.2398, + "step": 2094 + }, + { + "epoch": 2.27, + "learning_rate": 4.0074305848841814e-05, + "loss": 2.2521, + "step": 2096 + }, + { + "epoch": 2.27, + "learning_rate": 3.9990077904903254e-05, + "loss": 2.3918, + "step": 2098 + }, + { + "epoch": 2.28, + "learning_rate": 3.99058795421495e-05, + "loss": 2.519, + "step": 2100 + }, + { + "epoch": 2.28, + "learning_rate": 3.982171100940239e-05, + "loss": 2.4067, + "step": 2102 + }, + { + "epoch": 2.28, + "learning_rate": 3.973757255539562e-05, + "loss": 2.4408, + "step": 2104 + }, + { + "epoch": 2.28, + "learning_rate": 3.965346442877403e-05, + "loss": 2.4309, + "step": 2106 + }, + { + "epoch": 2.28, + "learning_rate": 3.9569386878092774e-05, + "loss": 2.2934, + "step": 2108 + }, + { + "epoch": 2.29, + "learning_rate": 3.94853401518167e-05, + "loss": 2.356, + "step": 2110 + }, + { + "epoch": 2.29, + "learning_rate": 3.94013244983195e-05, + "loss": 2.3314, + "step": 2112 + }, + { + "epoch": 2.29, + "learning_rate": 3.9317340165883156e-05, + "loss": 2.4871, + "step": 2114 + }, + { + "epoch": 2.29, + "learning_rate": 3.923338740269696e-05, + "loss": 2.4932, + "step": 2116 + }, + { + "epoch": 2.29, + "learning_rate": 3.9149466456857e-05, + "loss": 2.2894, + "step": 2118 + }, + { + "epoch": 2.3, + "learning_rate": 3.906557757636526e-05, + "loss": 2.4726, + "step": 2120 + }, + { + "epoch": 2.3, + "learning_rate": 3.898172100912908e-05, + "loss": 2.3281, + "step": 2122 + }, + { + "epoch": 2.3, + "learning_rate": 3.8897897002960195e-05, + "loss": 2.6179, + "step": 2124 + }, + { + "epoch": 2.3, + "learning_rate": 3.8814105805574166e-05, + "loss": 2.3778, + "step": 2126 + }, + { + "epoch": 2.31, + "learning_rate": 3.873034766458959e-05, + "loss": 2.4198, + "step": 2128 + }, + { + "epoch": 2.31, + "learning_rate": 3.86466228275274e-05, + "loss": 2.318, + "step": 2130 + }, + { + "epoch": 2.31, + "learning_rate": 3.856293154181009e-05, + "loss": 2.3894, + "step": 2132 + }, + { + "epoch": 2.31, + "learning_rate": 3.847927405476097e-05, + "loss": 2.388, + "step": 2134 + }, + { + "epoch": 2.31, + "learning_rate": 3.839565061360352e-05, + "loss": 2.4026, + "step": 2136 + }, + { + "epoch": 2.32, + "learning_rate": 3.831206146546059e-05, + "loss": 2.3764, + "step": 2138 + }, + { + "epoch": 2.32, + "learning_rate": 3.82285068573537e-05, + "loss": 2.2921, + "step": 2140 + }, + { + "epoch": 2.32, + "learning_rate": 3.814498703620226e-05, + "loss": 2.5344, + "step": 2142 + }, + { + "epoch": 2.32, + "learning_rate": 3.80615022488229e-05, + "loss": 2.3933, + "step": 2144 + }, + { + "epoch": 2.32, + "learning_rate": 3.797805274192875e-05, + "loss": 2.5373, + "step": 2146 + }, + { + "epoch": 2.33, + "learning_rate": 3.789463876212863e-05, + "loss": 2.2517, + "step": 2148 + }, + { + "epoch": 2.33, + "learning_rate": 3.781126055592641e-05, + "loss": 2.5876, + "step": 2150 + }, + { + "epoch": 2.33, + "learning_rate": 3.772791836972019e-05, + "loss": 2.5212, + "step": 2152 + }, + { + "epoch": 2.33, + "learning_rate": 3.764461244980169e-05, + "loss": 2.4556, + "step": 2154 + }, + { + "epoch": 2.34, + "learning_rate": 3.7561343042355415e-05, + "loss": 2.4632, + "step": 2156 + }, + { + "epoch": 2.34, + "learning_rate": 3.747811039345798e-05, + "loss": 2.333, + "step": 2158 + }, + { + "epoch": 2.34, + "learning_rate": 3.739491474907735e-05, + "loss": 2.3092, + "step": 2160 + }, + { + "epoch": 2.34, + "learning_rate": 3.731175635507219e-05, + "loss": 2.4144, + "step": 2162 + }, + { + "epoch": 2.34, + "learning_rate": 3.722863545719103e-05, + "loss": 2.51, + "step": 2164 + }, + { + "epoch": 2.35, + "learning_rate": 3.7145552301071594e-05, + "loss": 2.449, + "step": 2166 + }, + { + "epoch": 2.35, + "learning_rate": 3.706250713224011e-05, + "loss": 2.4989, + "step": 2168 + }, + { + "epoch": 2.35, + "learning_rate": 3.697950019611054e-05, + "loss": 2.4551, + "step": 2170 + }, + { + "epoch": 2.35, + "learning_rate": 3.689653173798381e-05, + "loss": 2.4758, + "step": 2172 + }, + { + "epoch": 2.36, + "learning_rate": 3.681360200304718e-05, + "loss": 2.5718, + "step": 2174 + }, + { + "epoch": 2.36, + "learning_rate": 3.673071123637347e-05, + "loss": 2.4931, + "step": 2176 + }, + { + "epoch": 2.36, + "learning_rate": 3.664785968292036e-05, + "loss": 2.5029, + "step": 2178 + }, + { + "epoch": 2.36, + "learning_rate": 3.656504758752961e-05, + "loss": 2.4353, + "step": 2180 + }, + { + "epoch": 2.36, + "learning_rate": 3.64822751949264e-05, + "loss": 2.2827, + "step": 2182 + }, + { + "epoch": 2.37, + "learning_rate": 3.639954274971854e-05, + "loss": 2.4667, + "step": 2184 + }, + { + "epoch": 2.37, + "learning_rate": 3.631685049639586e-05, + "loss": 2.3599, + "step": 2186 + }, + { + "epoch": 2.37, + "learning_rate": 3.623419867932937e-05, + "loss": 2.3421, + "step": 2188 + }, + { + "epoch": 2.37, + "learning_rate": 3.6151587542770567e-05, + "loss": 2.4358, + "step": 2190 + }, + { + "epoch": 2.37, + "learning_rate": 3.6069017330850754e-05, + "loss": 2.4472, + "step": 2192 + }, + { + "epoch": 2.38, + "learning_rate": 3.598648828758031e-05, + "loss": 2.4576, + "step": 2194 + }, + { + "epoch": 2.38, + "learning_rate": 3.590400065684792e-05, + "loss": 2.5057, + "step": 2196 + }, + { + "epoch": 2.38, + "learning_rate": 3.582155468241993e-05, + "loss": 2.4759, + "step": 2198 + }, + { + "epoch": 2.38, + "learning_rate": 3.573915060793949e-05, + "loss": 2.1946, + "step": 2200 + }, + { + "epoch": 2.39, + "learning_rate": 3.5656788676926066e-05, + "loss": 2.562, + "step": 2202 + }, + { + "epoch": 2.39, + "learning_rate": 3.557446913277448e-05, + "loss": 2.7023, + "step": 2204 + }, + { + "epoch": 2.39, + "learning_rate": 3.5492192218754326e-05, + "loss": 2.5342, + "step": 2206 + }, + { + "epoch": 2.39, + "learning_rate": 3.540995817800917e-05, + "loss": 2.4178, + "step": 2208 + }, + { + "epoch": 2.39, + "learning_rate": 3.532776725355601e-05, + "loss": 2.4072, + "step": 2210 + }, + { + "epoch": 2.4, + "learning_rate": 3.5245619688284274e-05, + "loss": 2.3832, + "step": 2212 + }, + { + "epoch": 2.4, + "learning_rate": 3.516351572495535e-05, + "loss": 2.5958, + "step": 2214 + }, + { + "epoch": 2.4, + "learning_rate": 3.508145560620173e-05, + "loss": 2.3778, + "step": 2216 + }, + { + "epoch": 2.4, + "learning_rate": 3.499943957452637e-05, + "loss": 2.3753, + "step": 2218 + }, + { + "epoch": 2.4, + "learning_rate": 3.4917467872301934e-05, + "loss": 2.4079, + "step": 2220 + }, + { + "epoch": 2.41, + "learning_rate": 3.4835540741770054e-05, + "loss": 2.4669, + "step": 2222 + }, + { + "epoch": 2.41, + "learning_rate": 3.4753658425040656e-05, + "loss": 2.4278, + "step": 2224 + }, + { + "epoch": 2.41, + "learning_rate": 3.467182116409127e-05, + "loss": 2.4575, + "step": 2226 + }, + { + "epoch": 2.41, + "learning_rate": 3.459002920076625e-05, + "loss": 2.5202, + "step": 2228 + }, + { + "epoch": 2.42, + "learning_rate": 3.450828277677606e-05, + "loss": 2.4366, + "step": 2230 + }, + { + "epoch": 2.42, + "learning_rate": 3.442658213369662e-05, + "loss": 2.4061, + "step": 2232 + }, + { + "epoch": 2.42, + "learning_rate": 3.434492751296856e-05, + "loss": 2.393, + "step": 2234 + }, + { + "epoch": 2.42, + "learning_rate": 3.426331915589651e-05, + "loss": 2.3859, + "step": 2236 + }, + { + "epoch": 2.42, + "learning_rate": 3.418175730364836e-05, + "loss": 2.516, + "step": 2238 + }, + { + "epoch": 2.43, + "learning_rate": 3.4100242197254564e-05, + "loss": 2.2877, + "step": 2240 + }, + { + "epoch": 2.43, + "learning_rate": 3.401877407760747e-05, + "loss": 2.3637, + "step": 2242 + }, + { + "epoch": 2.43, + "learning_rate": 3.393735318546054e-05, + "loss": 2.4131, + "step": 2244 + }, + { + "epoch": 2.43, + "learning_rate": 3.38559797614277e-05, + "loss": 2.4939, + "step": 2246 + }, + { + "epoch": 2.44, + "learning_rate": 3.377465404598253e-05, + "loss": 2.4648, + "step": 2248 + }, + { + "epoch": 2.44, + "learning_rate": 3.369337627945774e-05, + "loss": 2.504, + "step": 2250 + }, + { + "epoch": 2.44, + "learning_rate": 3.3612146702044226e-05, + "loss": 2.5889, + "step": 2252 + }, + { + "epoch": 2.44, + "learning_rate": 3.3530965553790526e-05, + "loss": 2.5468, + "step": 2254 + }, + { + "epoch": 2.44, + "learning_rate": 3.3449833074602064e-05, + "loss": 2.3507, + "step": 2256 + }, + { + "epoch": 2.45, + "learning_rate": 3.336874950424046e-05, + "loss": 2.418, + "step": 2258 + }, + { + "epoch": 2.45, + "learning_rate": 3.328771508232273e-05, + "loss": 2.424, + "step": 2260 + }, + { + "epoch": 2.45, + "learning_rate": 3.320673004832071e-05, + "loss": 2.4613, + "step": 2262 + }, + { + "epoch": 2.45, + "learning_rate": 3.312579464156025e-05, + "loss": 2.5197, + "step": 2264 + }, + { + "epoch": 2.45, + "learning_rate": 3.304490910122058e-05, + "loss": 2.3393, + "step": 2266 + }, + { + "epoch": 2.46, + "learning_rate": 3.2964073666333536e-05, + "loss": 2.2088, + "step": 2268 + }, + { + "epoch": 2.46, + "learning_rate": 3.2883288575782875e-05, + "loss": 2.4951, + "step": 2270 + }, + { + "epoch": 2.46, + "learning_rate": 3.2802554068303596e-05, + "loss": 2.3728, + "step": 2272 + }, + { + "epoch": 2.46, + "learning_rate": 3.272187038248121e-05, + "loss": 2.2495, + "step": 2274 + }, + { + "epoch": 2.47, + "learning_rate": 3.264123775675106e-05, + "loss": 2.3507, + "step": 2276 + }, + { + "epoch": 2.47, + "learning_rate": 3.256065642939756e-05, + "loss": 2.3987, + "step": 2278 + }, + { + "epoch": 2.47, + "learning_rate": 3.248012663855353e-05, + "loss": 2.4537, + "step": 2280 + }, + { + "epoch": 2.47, + "learning_rate": 3.239964862219954e-05, + "loss": 2.4883, + "step": 2282 + }, + { + "epoch": 2.47, + "learning_rate": 3.231922261816311e-05, + "loss": 2.1862, + "step": 2284 + }, + { + "epoch": 2.48, + "learning_rate": 3.223884886411807e-05, + "loss": 2.4353, + "step": 2286 + }, + { + "epoch": 2.48, + "learning_rate": 3.215852759758381e-05, + "loss": 2.2626, + "step": 2288 + }, + { + "epoch": 2.48, + "learning_rate": 3.2078259055924675e-05, + "loss": 2.3032, + "step": 2290 + }, + { + "epoch": 2.48, + "learning_rate": 3.199804347634915e-05, + "loss": 2.3917, + "step": 2292 + }, + { + "epoch": 2.49, + "learning_rate": 3.191788109590922e-05, + "loss": 2.4218, + "step": 2294 + }, + { + "epoch": 2.49, + "learning_rate": 3.183777215149962e-05, + "loss": 2.3915, + "step": 2296 + }, + { + "epoch": 2.49, + "learning_rate": 3.175771687985726e-05, + "loss": 2.5188, + "step": 2298 + }, + { + "epoch": 2.49, + "learning_rate": 3.167771551756036e-05, + "loss": 2.4356, + "step": 2300 + }, + { + "epoch": 2.49, + "learning_rate": 3.159776830102784e-05, + "loss": 2.3713, + "step": 2302 + }, + { + "epoch": 2.5, + "learning_rate": 3.1517875466518626e-05, + "loss": 2.488, + "step": 2304 + }, + { + "epoch": 2.5, + "learning_rate": 3.1438037250130944e-05, + "loss": 2.5058, + "step": 2306 + }, + { + "epoch": 2.5, + "learning_rate": 3.135825388780159e-05, + "loss": 2.5503, + "step": 2308 + }, + { + "epoch": 2.5, + "learning_rate": 3.127852561530526e-05, + "loss": 2.4376, + "step": 2310 + }, + { + "epoch": 2.5, + "learning_rate": 3.1198852668253856e-05, + "loss": 2.4054, + "step": 2312 + }, + { + "epoch": 2.51, + "learning_rate": 3.111923528209577e-05, + "loss": 2.3189, + "step": 2314 + }, + { + "epoch": 2.51, + "learning_rate": 3.103967369211525e-05, + "loss": 2.3736, + "step": 2316 + }, + { + "epoch": 2.51, + "learning_rate": 3.096016813343158e-05, + "loss": 2.4304, + "step": 2318 + }, + { + "epoch": 2.51, + "learning_rate": 3.08807188409985e-05, + "loss": 2.3717, + "step": 2320 + }, + { + "epoch": 2.52, + "learning_rate": 3.080132604960349e-05, + "loss": 2.3996, + "step": 2322 + }, + { + "epoch": 2.52, + "learning_rate": 3.072198999386704e-05, + "loss": 2.5024, + "step": 2324 + }, + { + "epoch": 2.52, + "learning_rate": 3.064271090824197e-05, + "loss": 2.3109, + "step": 2326 + }, + { + "epoch": 2.52, + "learning_rate": 3.056348902701274e-05, + "loss": 2.376, + "step": 2328 + }, + { + "epoch": 2.52, + "learning_rate": 3.0484324584294783e-05, + "loss": 2.5198, + "step": 2330 + }, + { + "epoch": 2.53, + "learning_rate": 3.040521781403377e-05, + "loss": 2.2787, + "step": 2332 + }, + { + "epoch": 2.53, + "learning_rate": 3.0326168950004964e-05, + "loss": 2.3356, + "step": 2334 + }, + { + "epoch": 2.53, + "learning_rate": 3.0247178225812435e-05, + "loss": 2.4738, + "step": 2336 + }, + { + "epoch": 2.53, + "learning_rate": 3.0168245874888557e-05, + "loss": 2.41, + "step": 2338 + }, + { + "epoch": 2.53, + "learning_rate": 3.00893721304931e-05, + "loss": 2.4233, + "step": 2340 + }, + { + "epoch": 2.54, + "learning_rate": 3.0010557225712667e-05, + "loss": 2.4596, + "step": 2342 + }, + { + "epoch": 2.54, + "learning_rate": 2.993180139345999e-05, + "loss": 2.5253, + "step": 2344 + }, + { + "epoch": 2.54, + "learning_rate": 2.9853104866473246e-05, + "loss": 2.3779, + "step": 2346 + }, + { + "epoch": 2.54, + "learning_rate": 2.977446787731532e-05, + "loss": 2.397, + "step": 2348 + }, + { + "epoch": 2.55, + "learning_rate": 2.9695890658373164e-05, + "loss": 2.4016, + "step": 2350 + }, + { + "epoch": 2.55, + "learning_rate": 2.96173734418571e-05, + "loss": 2.3168, + "step": 2352 + }, + { + "epoch": 2.55, + "learning_rate": 2.9538916459800136e-05, + "loss": 2.2834, + "step": 2354 + }, + { + "epoch": 2.55, + "learning_rate": 2.9460519944057284e-05, + "loss": 2.4692, + "step": 2356 + }, + { + "epoch": 2.55, + "learning_rate": 2.9382184126304834e-05, + "loss": 2.1485, + "step": 2358 + }, + { + "epoch": 2.56, + "learning_rate": 2.9303909238039718e-05, + "loss": 2.1016, + "step": 2360 + }, + { + "epoch": 2.56, + "learning_rate": 2.9225695510578843e-05, + "loss": 2.1819, + "step": 2362 + }, + { + "epoch": 2.56, + "learning_rate": 2.9147543175058335e-05, + "loss": 2.4526, + "step": 2364 + }, + { + "epoch": 2.56, + "learning_rate": 2.9069452462432883e-05, + "loss": 2.5037, + "step": 2366 + }, + { + "epoch": 2.57, + "learning_rate": 2.899142360347511e-05, + "loss": 2.223, + "step": 2368 + }, + { + "epoch": 2.57, + "learning_rate": 2.8913456828774854e-05, + "loss": 2.4296, + "step": 2370 + }, + { + "epoch": 2.57, + "learning_rate": 2.883555236873845e-05, + "loss": 2.3899, + "step": 2372 + }, + { + "epoch": 2.57, + "learning_rate": 2.875771045358805e-05, + "loss": 2.4758, + "step": 2374 + }, + { + "epoch": 2.57, + "learning_rate": 2.8679931313361053e-05, + "loss": 2.4788, + "step": 2376 + }, + { + "epoch": 2.58, + "learning_rate": 2.860221517790933e-05, + "loss": 2.4869, + "step": 2378 + }, + { + "epoch": 2.58, + "learning_rate": 2.8524562276898513e-05, + "loss": 2.4313, + "step": 2380 + }, + { + "epoch": 2.58, + "learning_rate": 2.8446972839807384e-05, + "loss": 2.2432, + "step": 2382 + }, + { + "epoch": 2.58, + "learning_rate": 2.8369447095927195e-05, + "loss": 2.2748, + "step": 2384 + }, + { + "epoch": 2.58, + "learning_rate": 2.8291985274360983e-05, + "loss": 2.6419, + "step": 2386 + }, + { + "epoch": 2.59, + "learning_rate": 2.8214587604022847e-05, + "loss": 2.4079, + "step": 2388 + }, + { + "epoch": 2.59, + "learning_rate": 2.8137254313637306e-05, + "loss": 2.3603, + "step": 2390 + }, + { + "epoch": 2.59, + "learning_rate": 2.805998563173866e-05, + "loss": 2.2753, + "step": 2392 + }, + { + "epoch": 2.59, + "learning_rate": 2.798278178667028e-05, + "loss": 2.3884, + "step": 2394 + }, + { + "epoch": 2.6, + "learning_rate": 2.790564300658387e-05, + "loss": 2.5817, + "step": 2396 + }, + { + "epoch": 2.6, + "learning_rate": 2.7828569519438942e-05, + "loss": 2.4844, + "step": 2398 + }, + { + "epoch": 2.6, + "learning_rate": 2.775156155300197e-05, + "loss": 2.4505, + "step": 2400 + }, + { + "epoch": 2.6, + "learning_rate": 2.7674619334845876e-05, + "loss": 2.443, + "step": 2402 + }, + { + "epoch": 2.6, + "learning_rate": 2.7597743092349217e-05, + "loss": 2.3359, + "step": 2404 + }, + { + "epoch": 2.61, + "learning_rate": 2.752093305269565e-05, + "loss": 2.4345, + "step": 2406 + }, + { + "epoch": 2.61, + "learning_rate": 2.7444189442873115e-05, + "loss": 2.2828, + "step": 2408 + }, + { + "epoch": 2.61, + "learning_rate": 2.7367512489673312e-05, + "loss": 2.5291, + "step": 2410 + }, + { + "epoch": 2.61, + "learning_rate": 2.7290902419690895e-05, + "loss": 2.391, + "step": 2412 + }, + { + "epoch": 2.62, + "learning_rate": 2.7214359459322924e-05, + "loss": 2.06, + "step": 2414 + }, + { + "epoch": 2.62, + "learning_rate": 2.7137883834768073e-05, + "loss": 2.6396, + "step": 2416 + }, + { + "epoch": 2.62, + "learning_rate": 2.7061475772026086e-05, + "loss": 2.4143, + "step": 2418 + }, + { + "epoch": 2.62, + "learning_rate": 2.698513549689703e-05, + "loss": 2.4331, + "step": 2420 + }, + { + "epoch": 2.62, + "learning_rate": 2.6908863234980636e-05, + "loss": 2.4801, + "step": 2422 + }, + { + "epoch": 2.63, + "learning_rate": 2.6832659211675627e-05, + "loss": 2.3912, + "step": 2424 + }, + { + "epoch": 2.63, + "learning_rate": 2.67565236521791e-05, + "loss": 2.4359, + "step": 2426 + }, + { + "epoch": 2.63, + "learning_rate": 2.668045678148584e-05, + "loss": 2.5352, + "step": 2428 + }, + { + "epoch": 2.63, + "learning_rate": 2.6604458824387614e-05, + "loss": 2.4982, + "step": 2430 + }, + { + "epoch": 2.63, + "learning_rate": 2.6528530005472518e-05, + "loss": 2.4898, + "step": 2432 + }, + { + "epoch": 2.64, + "learning_rate": 2.6452670549124375e-05, + "loss": 2.5291, + "step": 2434 + }, + { + "epoch": 2.64, + "learning_rate": 2.637688067952204e-05, + "loss": 2.3178, + "step": 2436 + }, + { + "epoch": 2.64, + "learning_rate": 2.630116062063867e-05, + "loss": 2.5588, + "step": 2438 + }, + { + "epoch": 2.64, + "learning_rate": 2.622551059624113e-05, + "loss": 2.3862, + "step": 2440 + }, + { + "epoch": 2.65, + "learning_rate": 2.614993082988937e-05, + "loss": 2.3327, + "step": 2442 + }, + { + "epoch": 2.65, + "learning_rate": 2.607442154493568e-05, + "loss": 2.3323, + "step": 2444 + }, + { + "epoch": 2.65, + "learning_rate": 2.599898296452406e-05, + "loss": 2.237, + "step": 2446 + }, + { + "epoch": 2.65, + "learning_rate": 2.592361531158952e-05, + "loss": 2.1117, + "step": 2448 + }, + { + "epoch": 2.65, + "learning_rate": 2.5848318808857606e-05, + "loss": 2.3355, + "step": 2450 + }, + { + "epoch": 2.66, + "learning_rate": 2.5773093678843473e-05, + "loss": 2.3701, + "step": 2452 + }, + { + "epoch": 2.66, + "learning_rate": 2.5697940143851375e-05, + "loss": 2.4158, + "step": 2454 + }, + { + "epoch": 2.66, + "learning_rate": 2.5622858425974018e-05, + "loss": 2.4807, + "step": 2456 + }, + { + "epoch": 2.66, + "learning_rate": 2.5547848747091897e-05, + "loss": 2.2695, + "step": 2458 + }, + { + "epoch": 2.66, + "learning_rate": 2.5472911328872574e-05, + "loss": 2.4957, + "step": 2460 + }, + { + "epoch": 2.67, + "learning_rate": 2.5398046392770054e-05, + "loss": 2.3591, + "step": 2462 + }, + { + "epoch": 2.67, + "learning_rate": 2.532325416002419e-05, + "loss": 2.6757, + "step": 2464 + }, + { + "epoch": 2.67, + "learning_rate": 2.524853485166e-05, + "loss": 2.3271, + "step": 2466 + }, + { + "epoch": 2.67, + "learning_rate": 2.517388868848692e-05, + "loss": 2.3084, + "step": 2468 + }, + { + "epoch": 2.68, + "learning_rate": 2.5099315891098264e-05, + "loss": 2.3597, + "step": 2470 + }, + { + "epoch": 2.68, + "learning_rate": 2.5024816679870556e-05, + "loss": 2.5597, + "step": 2472 + }, + { + "epoch": 2.68, + "learning_rate": 2.495039127496287e-05, + "loss": 2.4667, + "step": 2474 + }, + { + "epoch": 2.68, + "learning_rate": 2.4876039896316123e-05, + "loss": 2.2991, + "step": 2476 + }, + { + "epoch": 2.68, + "learning_rate": 2.4801762763652474e-05, + "loss": 2.27, + "step": 2478 + }, + { + "epoch": 2.69, + "learning_rate": 2.4727560096474706e-05, + "loss": 2.5184, + "step": 2480 + }, + { + "epoch": 2.69, + "learning_rate": 2.4653432114065544e-05, + "loss": 2.5034, + "step": 2482 + }, + { + "epoch": 2.69, + "learning_rate": 2.457937903548695e-05, + "loss": 2.3994, + "step": 2484 + }, + { + "epoch": 2.69, + "learning_rate": 2.450540107957961e-05, + "loss": 2.229, + "step": 2486 + }, + { + "epoch": 2.7, + "learning_rate": 2.443149846496212e-05, + "loss": 2.4133, + "step": 2488 + }, + { + "epoch": 2.7, + "learning_rate": 2.4357671410030526e-05, + "loss": 2.5226, + "step": 2490 + }, + { + "epoch": 2.7, + "learning_rate": 2.4283920132957482e-05, + "loss": 2.3836, + "step": 2492 + }, + { + "epoch": 2.7, + "learning_rate": 2.42102448516918e-05, + "loss": 2.4287, + "step": 2494 + }, + { + "epoch": 2.7, + "learning_rate": 2.413664578395761e-05, + "loss": 2.4322, + "step": 2496 + }, + { + "epoch": 2.71, + "learning_rate": 2.4063123147253923e-05, + "loss": 2.3545, + "step": 2498 + }, + { + "epoch": 2.71, + "learning_rate": 2.398967715885379e-05, + "loss": 2.3359, + "step": 2500 + }, + { + "epoch": 2.71, + "learning_rate": 2.391630803580382e-05, + "loss": 2.4889, + "step": 2502 + }, + { + "epoch": 2.71, + "learning_rate": 2.3843015994923412e-05, + "loss": 2.3731, + "step": 2504 + }, + { + "epoch": 2.71, + "learning_rate": 2.3769801252804213e-05, + "loss": 2.2901, + "step": 2506 + }, + { + "epoch": 2.72, + "learning_rate": 2.3696664025809458e-05, + "loss": 2.3341, + "step": 2508 + }, + { + "epoch": 2.72, + "learning_rate": 2.3623604530073245e-05, + "loss": 2.3624, + "step": 2510 + }, + { + "epoch": 2.72, + "learning_rate": 2.3550622981499988e-05, + "loss": 2.4377, + "step": 2512 + }, + { + "epoch": 2.72, + "learning_rate": 2.3477719595763774e-05, + "loss": 2.2931, + "step": 2514 + }, + { + "epoch": 2.73, + "learning_rate": 2.340489458830772e-05, + "loss": 2.3726, + "step": 2516 + }, + { + "epoch": 2.73, + "learning_rate": 2.3332148174343254e-05, + "loss": 2.2644, + "step": 2518 + }, + { + "epoch": 2.73, + "learning_rate": 2.3259480568849586e-05, + "loss": 2.5434, + "step": 2520 + }, + { + "epoch": 2.73, + "learning_rate": 2.3186891986573035e-05, + "loss": 2.2445, + "step": 2522 + }, + { + "epoch": 2.73, + "learning_rate": 2.3114382642026404e-05, + "loss": 2.3012, + "step": 2524 + }, + { + "epoch": 2.74, + "learning_rate": 2.3041952749488304e-05, + "loss": 2.298, + "step": 2526 + }, + { + "epoch": 2.74, + "learning_rate": 2.2969602523002543e-05, + "loss": 2.3226, + "step": 2528 + }, + { + "epoch": 2.74, + "learning_rate": 2.2897332176377528e-05, + "loss": 2.4809, + "step": 2530 + }, + { + "epoch": 2.74, + "learning_rate": 2.2825141923185632e-05, + "loss": 2.3514, + "step": 2532 + }, + { + "epoch": 2.74, + "learning_rate": 2.275303197676248e-05, + "loss": 2.4344, + "step": 2534 + }, + { + "epoch": 2.75, + "learning_rate": 2.2681002550206355e-05, + "loss": 2.3313, + "step": 2536 + }, + { + "epoch": 2.75, + "learning_rate": 2.2609053856377714e-05, + "loss": 2.1924, + "step": 2538 + }, + { + "epoch": 2.75, + "learning_rate": 2.2537186107898313e-05, + "loss": 2.1984, + "step": 2540 + }, + { + "epoch": 2.75, + "learning_rate": 2.2465399517150722e-05, + "loss": 2.4612, + "step": 2542 + }, + { + "epoch": 2.76, + "learning_rate": 2.2393694296277707e-05, + "loss": 2.3225, + "step": 2544 + }, + { + "epoch": 2.76, + "learning_rate": 2.2322070657181583e-05, + "loss": 2.3635, + "step": 2546 + }, + { + "epoch": 2.76, + "learning_rate": 2.2250528811523513e-05, + "loss": 2.4144, + "step": 2548 + }, + { + "epoch": 2.76, + "learning_rate": 2.2179068970722978e-05, + "loss": 2.5847, + "step": 2550 + }, + { + "epoch": 2.76, + "learning_rate": 2.2107691345957133e-05, + "loss": 2.3221, + "step": 2552 + }, + { + "epoch": 2.77, + "learning_rate": 2.203639614816017e-05, + "loss": 2.4227, + "step": 2554 + }, + { + "epoch": 2.77, + "learning_rate": 2.196518358802268e-05, + "loss": 2.4364, + "step": 2556 + }, + { + "epoch": 2.77, + "learning_rate": 2.1894053875991017e-05, + "loss": 2.4288, + "step": 2558 + }, + { + "epoch": 2.77, + "learning_rate": 2.182300722226675e-05, + "loss": 2.3931, + "step": 2560 + }, + { + "epoch": 2.78, + "learning_rate": 2.1752043836806002e-05, + "loss": 2.4772, + "step": 2562 + }, + { + "epoch": 2.78, + "learning_rate": 2.1681163929318777e-05, + "loss": 2.3936, + "step": 2564 + }, + { + "epoch": 2.78, + "learning_rate": 2.1610367709268387e-05, + "loss": 2.3759, + "step": 2566 + }, + { + "epoch": 2.78, + "learning_rate": 2.1539655385870877e-05, + "loss": 2.4427, + "step": 2568 + }, + { + "epoch": 2.78, + "learning_rate": 2.1469027168094347e-05, + "loss": 2.387, + "step": 2570 + }, + { + "epoch": 2.79, + "learning_rate": 2.1398483264658313e-05, + "loss": 2.2637, + "step": 2572 + }, + { + "epoch": 2.79, + "learning_rate": 2.132802388403319e-05, + "loss": 2.3364, + "step": 2574 + }, + { + "epoch": 2.79, + "learning_rate": 2.125764923443953e-05, + "loss": 2.3348, + "step": 2576 + }, + { + "epoch": 2.79, + "learning_rate": 2.118735952384757e-05, + "loss": 2.34, + "step": 2578 + }, + { + "epoch": 2.79, + "learning_rate": 2.1117154959976482e-05, + "loss": 2.2867, + "step": 2580 + }, + { + "epoch": 2.8, + "learning_rate": 2.104703575029385e-05, + "loss": 2.4191, + "step": 2582 + }, + { + "epoch": 2.8, + "learning_rate": 2.097700210201497e-05, + "loss": 2.2275, + "step": 2584 + }, + { + "epoch": 2.8, + "learning_rate": 2.090705422210237e-05, + "loss": 2.6198, + "step": 2586 + }, + { + "epoch": 2.8, + "learning_rate": 2.0837192317265016e-05, + "loss": 2.364, + "step": 2588 + }, + { + "epoch": 2.81, + "learning_rate": 2.0767416593957894e-05, + "loss": 2.2663, + "step": 2590 + }, + { + "epoch": 2.81, + "learning_rate": 2.0697727258381238e-05, + "loss": 2.2649, + "step": 2592 + }, + { + "epoch": 2.81, + "learning_rate": 2.0628124516480046e-05, + "loss": 2.5761, + "step": 2594 + }, + { + "epoch": 2.81, + "learning_rate": 2.0558608573943354e-05, + "loss": 2.2132, + "step": 2596 + }, + { + "epoch": 2.81, + "learning_rate": 2.0489179636203766e-05, + "loss": 2.3719, + "step": 2598 + }, + { + "epoch": 2.82, + "learning_rate": 2.0419837908436688e-05, + "loss": 2.4978, + "step": 2600 + }, + { + "epoch": 2.82, + "learning_rate": 2.0350583595559865e-05, + "loss": 2.2988, + "step": 2602 + }, + { + "epoch": 2.82, + "learning_rate": 2.0281416902232708e-05, + "loss": 2.255, + "step": 2604 + }, + { + "epoch": 2.82, + "learning_rate": 2.021233803285567e-05, + "loss": 2.2799, + "step": 2606 + }, + { + "epoch": 2.83, + "learning_rate": 2.014334719156966e-05, + "loss": 2.2972, + "step": 2608 + }, + { + "epoch": 2.83, + "learning_rate": 2.0074444582255485e-05, + "loss": 2.4158, + "step": 2610 + }, + { + "epoch": 2.83, + "learning_rate": 2.0005630408533215e-05, + "loss": 2.3353, + "step": 2612 + }, + { + "epoch": 2.83, + "learning_rate": 1.9936904873761536e-05, + "loss": 2.3829, + "step": 2614 + }, + { + "epoch": 2.83, + "learning_rate": 1.9868268181037185e-05, + "loss": 2.1709, + "step": 2616 + }, + { + "epoch": 2.84, + "learning_rate": 1.9799720533194404e-05, + "loss": 2.549, + "step": 2618 + }, + { + "epoch": 2.84, + "learning_rate": 1.9731262132804274e-05, + "loss": 2.5804, + "step": 2620 + }, + { + "epoch": 2.84, + "learning_rate": 1.966289318217411e-05, + "loss": 2.5311, + "step": 2622 + }, + { + "epoch": 2.84, + "learning_rate": 1.959461388334686e-05, + "loss": 2.3825, + "step": 2624 + }, + { + "epoch": 2.84, + "learning_rate": 1.9526424438100642e-05, + "loss": 2.3505, + "step": 2626 + }, + { + "epoch": 2.85, + "learning_rate": 1.9458325047947938e-05, + "loss": 2.3793, + "step": 2628 + }, + { + "epoch": 2.85, + "learning_rate": 1.9390315914135125e-05, + "loss": 2.2617, + "step": 2630 + }, + { + "epoch": 2.85, + "learning_rate": 1.9322397237641875e-05, + "loss": 2.5081, + "step": 2632 + }, + { + "epoch": 2.85, + "learning_rate": 1.925456921918055e-05, + "loss": 2.578, + "step": 2634 + }, + { + "epoch": 2.86, + "learning_rate": 1.918683205919557e-05, + "loss": 2.3566, + "step": 2636 + }, + { + "epoch": 2.86, + "learning_rate": 1.9119185957862835e-05, + "loss": 2.5683, + "step": 2638 + }, + { + "epoch": 2.86, + "learning_rate": 1.9051631115089196e-05, + "loss": 2.213, + "step": 2640 + }, + { + "epoch": 2.86, + "learning_rate": 1.8984167730511825e-05, + "loss": 2.6764, + "step": 2642 + }, + { + "epoch": 2.86, + "learning_rate": 1.8916796003497572e-05, + "loss": 2.422, + "step": 2644 + }, + { + "epoch": 2.87, + "learning_rate": 1.8849516133142432e-05, + "loss": 2.1258, + "step": 2646 + }, + { + "epoch": 2.87, + "learning_rate": 1.8782328318270964e-05, + "loss": 2.505, + "step": 2648 + }, + { + "epoch": 2.87, + "learning_rate": 1.8715232757435704e-05, + "loss": 2.2427, + "step": 2650 + }, + { + "epoch": 2.87, + "learning_rate": 1.864822964891651e-05, + "loss": 2.3599, + "step": 2652 + }, + { + "epoch": 2.87, + "learning_rate": 1.8581319190720035e-05, + "loss": 2.679, + "step": 2654 + }, + { + "epoch": 2.88, + "learning_rate": 1.851450158057918e-05, + "loss": 2.3708, + "step": 2656 + }, + { + "epoch": 2.88, + "learning_rate": 1.844777701595244e-05, + "loss": 2.414, + "step": 2658 + }, + { + "epoch": 2.88, + "learning_rate": 1.83811456940233e-05, + "loss": 2.524, + "step": 2660 + }, + { + "epoch": 2.88, + "learning_rate": 1.8314607811699762e-05, + "loss": 2.4521, + "step": 2662 + }, + { + "epoch": 2.89, + "learning_rate": 1.824816356561364e-05, + "loss": 2.3931, + "step": 2664 + }, + { + "epoch": 2.89, + "learning_rate": 1.8181813152120092e-05, + "loss": 2.3704, + "step": 2666 + }, + { + "epoch": 2.89, + "learning_rate": 1.8115556767296914e-05, + "loss": 2.5238, + "step": 2668 + }, + { + "epoch": 2.89, + "learning_rate": 1.804939460694411e-05, + "loss": 2.5418, + "step": 2670 + }, + { + "epoch": 2.89, + "learning_rate": 1.7983326866583144e-05, + "loss": 2.6173, + "step": 2672 + }, + { + "epoch": 2.9, + "learning_rate": 1.7917353741456545e-05, + "loss": 2.4272, + "step": 2674 + }, + { + "epoch": 2.9, + "learning_rate": 1.7851475426527142e-05, + "loss": 2.4986, + "step": 2676 + }, + { + "epoch": 2.9, + "learning_rate": 1.7785692116477682e-05, + "loss": 2.4365, + "step": 2678 + }, + { + "epoch": 2.9, + "learning_rate": 1.772000400571005e-05, + "loss": 2.5869, + "step": 2680 + }, + { + "epoch": 2.91, + "learning_rate": 1.76544112883449e-05, + "loss": 2.4987, + "step": 2682 + }, + { + "epoch": 2.91, + "learning_rate": 1.7588914158220898e-05, + "loss": 2.4701, + "step": 2684 + }, + { + "epoch": 2.91, + "learning_rate": 1.7523512808894288e-05, + "loss": 2.5142, + "step": 2686 + }, + { + "epoch": 2.91, + "learning_rate": 1.7458207433638223e-05, + "loss": 2.518, + "step": 2688 + }, + { + "epoch": 2.91, + "learning_rate": 1.7392998225442263e-05, + "loss": 2.386, + "step": 2690 + }, + { + "epoch": 2.92, + "learning_rate": 1.732788537701179e-05, + "loss": 2.2214, + "step": 2692 + }, + { + "epoch": 2.92, + "learning_rate": 1.726286908076738e-05, + "loss": 2.358, + "step": 2694 + }, + { + "epoch": 2.92, + "learning_rate": 1.7197949528844286e-05, + "loss": 2.5727, + "step": 2696 + }, + { + "epoch": 2.92, + "learning_rate": 1.7133126913091903e-05, + "loss": 2.5317, + "step": 2698 + }, + { + "epoch": 2.92, + "learning_rate": 1.706840142507315e-05, + "loss": 2.2929, + "step": 2700 + }, + { + "epoch": 2.93, + "learning_rate": 1.700377325606388e-05, + "loss": 2.4207, + "step": 2702 + }, + { + "epoch": 2.93, + "learning_rate": 1.6939242597052373e-05, + "loss": 2.4398, + "step": 2704 + }, + { + "epoch": 2.93, + "learning_rate": 1.6874809638738754e-05, + "loss": 2.3671, + "step": 2706 + }, + { + "epoch": 2.93, + "learning_rate": 1.681047457153444e-05, + "loss": 2.5831, + "step": 2708 + }, + { + "epoch": 2.94, + "learning_rate": 1.6746237585561524e-05, + "loss": 2.414, + "step": 2710 + }, + { + "epoch": 2.94, + "learning_rate": 1.6682098870652236e-05, + "loss": 2.2996, + "step": 2712 + }, + { + "epoch": 2.94, + "learning_rate": 1.6618058616348492e-05, + "loss": 2.4037, + "step": 2714 + }, + { + "epoch": 2.94, + "learning_rate": 1.655411701190115e-05, + "loss": 2.592, + "step": 2716 + }, + { + "epoch": 2.94, + "learning_rate": 1.6490274246269533e-05, + "loss": 2.2498, + "step": 2718 + }, + { + "epoch": 2.95, + "learning_rate": 1.642653050812094e-05, + "loss": 2.2538, + "step": 2720 + }, + { + "epoch": 2.95, + "learning_rate": 1.636288598583e-05, + "loss": 2.3926, + "step": 2722 + }, + { + "epoch": 2.95, + "learning_rate": 1.629934086747813e-05, + "loss": 2.5224, + "step": 2724 + }, + { + "epoch": 2.95, + "learning_rate": 1.6235895340852964e-05, + "loss": 2.3785, + "step": 2726 + }, + { + "epoch": 2.96, + "learning_rate": 1.6172549593447877e-05, + "loss": 2.4254, + "step": 2728 + }, + { + "epoch": 2.96, + "learning_rate": 1.6109303812461375e-05, + "loss": 2.2977, + "step": 2730 + }, + { + "epoch": 2.96, + "learning_rate": 1.60461581847965e-05, + "loss": 2.4296, + "step": 2732 + }, + { + "epoch": 2.96, + "learning_rate": 1.598311289706033e-05, + "loss": 2.1917, + "step": 2734 + }, + { + "epoch": 2.96, + "learning_rate": 1.592016813556347e-05, + "loss": 2.361, + "step": 2736 + }, + { + "epoch": 2.97, + "learning_rate": 1.5857324086319414e-05, + "loss": 2.3198, + "step": 2738 + }, + { + "epoch": 2.97, + "learning_rate": 1.579458093504403e-05, + "loss": 2.3945, + "step": 2740 + }, + { + "epoch": 2.97, + "learning_rate": 1.5731938867155e-05, + "loss": 2.2314, + "step": 2742 + }, + { + "epoch": 2.97, + "learning_rate": 1.5669398067771324e-05, + "loss": 2.4571, + "step": 2744 + }, + { + "epoch": 2.97, + "learning_rate": 1.560695872171273e-05, + "loss": 2.2473, + "step": 2746 + }, + { + "epoch": 2.98, + "learning_rate": 1.5544621013499094e-05, + "loss": 2.4553, + "step": 2748 + }, + { + "epoch": 2.98, + "learning_rate": 1.548238512734998e-05, + "loss": 2.3213, + "step": 2750 + }, + { + "epoch": 2.98, + "learning_rate": 1.542025124718401e-05, + "loss": 2.3302, + "step": 2752 + }, + { + "epoch": 2.98, + "learning_rate": 1.535821955661839e-05, + "loss": 2.2468, + "step": 2754 + }, + { + "epoch": 2.99, + "learning_rate": 1.5296290238968303e-05, + "loss": 2.3087, + "step": 2756 + }, + { + "epoch": 2.99, + "learning_rate": 1.5234463477246452e-05, + "loss": 2.4679, + "step": 2758 + }, + { + "epoch": 2.99, + "learning_rate": 1.5172739454162405e-05, + "loss": 2.3439, + "step": 2760 + }, + { + "epoch": 2.99, + "learning_rate": 1.5111118352122183e-05, + "loss": 2.2882, + "step": 2762 + }, + { + "epoch": 2.99, + "learning_rate": 1.5049600353227588e-05, + "loss": 2.4456, + "step": 2764 + }, + { + "epoch": 3.0, + "learning_rate": 1.4988185639275798e-05, + "loss": 2.3367, + "step": 2766 + }, + { + "epoch": 3.0, + "learning_rate": 1.4926874391758716e-05, + "loss": 2.3341, + "step": 2768 + }, + { + "epoch": 3.0, + "learning_rate": 1.4865666791862521e-05, + "loss": 2.7886, + "step": 2770 + }, + { + "epoch": 3.0, + "learning_rate": 1.4804563020467044e-05, + "loss": 2.6835, + "step": 2772 + }, + { + "epoch": 3.01, + "learning_rate": 1.4743563258145353e-05, + "loss": 2.3864, + "step": 2774 + }, + { + "epoch": 3.01, + "learning_rate": 1.4682667685163071e-05, + "loss": 2.3261, + "step": 2776 + }, + { + "epoch": 3.01, + "learning_rate": 1.4621876481477987e-05, + "loss": 2.4467, + "step": 2778 + }, + { + "epoch": 3.01, + "learning_rate": 1.4561189826739446e-05, + "loss": 2.3331, + "step": 2780 + }, + { + "epoch": 3.01, + "learning_rate": 1.45006079002878e-05, + "loss": 2.2346, + "step": 2782 + }, + { + "epoch": 3.02, + "learning_rate": 1.4440130881153917e-05, + "loss": 2.3942, + "step": 2784 + }, + { + "epoch": 3.02, + "learning_rate": 1.437975894805867e-05, + "loss": 2.441, + "step": 2786 + }, + { + "epoch": 3.02, + "learning_rate": 1.4319492279412388e-05, + "loss": 2.3998, + "step": 2788 + }, + { + "epoch": 3.02, + "learning_rate": 1.425933105331429e-05, + "loss": 2.518, + "step": 2790 + }, + { + "epoch": 3.02, + "learning_rate": 1.419927544755199e-05, + "loss": 2.3147, + "step": 2792 + }, + { + "epoch": 3.03, + "learning_rate": 1.4139325639601015e-05, + "loss": 2.2925, + "step": 2794 + }, + { + "epoch": 3.03, + "learning_rate": 1.4079481806624217e-05, + "loss": 2.5182, + "step": 2796 + }, + { + "epoch": 3.03, + "learning_rate": 1.4019744125471274e-05, + "loss": 2.4969, + "step": 2798 + }, + { + "epoch": 3.03, + "learning_rate": 1.3960112772678125e-05, + "loss": 2.4316, + "step": 2800 + }, + { + "epoch": 3.04, + "learning_rate": 1.3900587924466585e-05, + "loss": 2.3239, + "step": 2802 + }, + { + "epoch": 3.04, + "learning_rate": 1.3841169756743649e-05, + "loss": 2.6349, + "step": 2804 + }, + { + "epoch": 3.04, + "learning_rate": 1.378185844510107e-05, + "loss": 2.3982, + "step": 2806 + }, + { + "epoch": 3.04, + "learning_rate": 1.3722654164814796e-05, + "loss": 2.4663, + "step": 2808 + }, + { + "epoch": 3.04, + "learning_rate": 1.366355709084456e-05, + "loss": 2.4762, + "step": 2810 + }, + { + "epoch": 3.05, + "learning_rate": 1.3604567397833201e-05, + "loss": 2.4103, + "step": 2812 + }, + { + "epoch": 3.05, + "learning_rate": 1.354568526010624e-05, + "loss": 2.4714, + "step": 2814 + }, + { + "epoch": 3.05, + "learning_rate": 1.3486910851671374e-05, + "loss": 2.1582, + "step": 2816 + }, + { + "epoch": 3.05, + "learning_rate": 1.342824434621795e-05, + "loss": 2.3474, + "step": 2818 + }, + { + "epoch": 3.06, + "learning_rate": 1.3369685917116408e-05, + "loss": 2.3022, + "step": 2820 + }, + { + "epoch": 3.06, + "learning_rate": 1.3311235737417793e-05, + "loss": 2.2013, + "step": 2822 + }, + { + "epoch": 3.06, + "learning_rate": 1.3252893979853304e-05, + "loss": 2.5426, + "step": 2824 + }, + { + "epoch": 3.06, + "learning_rate": 1.319466081683371e-05, + "loss": 2.3739, + "step": 2826 + }, + { + "epoch": 3.06, + "learning_rate": 1.3136536420448841e-05, + "loss": 2.3773, + "step": 2828 + }, + { + "epoch": 3.07, + "learning_rate": 1.307852096246711e-05, + "loss": 2.4481, + "step": 2830 + }, + { + "epoch": 3.07, + "learning_rate": 1.302061461433502e-05, + "loss": 2.5957, + "step": 2832 + }, + { + "epoch": 3.07, + "learning_rate": 1.2962817547176625e-05, + "loss": 2.5113, + "step": 2834 + }, + { + "epoch": 3.07, + "learning_rate": 1.2905129931793009e-05, + "loss": 2.3745, + "step": 2836 + }, + { + "epoch": 3.07, + "learning_rate": 1.2847551938661839e-05, + "loss": 2.3667, + "step": 2838 + }, + { + "epoch": 3.08, + "learning_rate": 1.2790083737936798e-05, + "loss": 2.3051, + "step": 2840 + }, + { + "epoch": 3.08, + "learning_rate": 1.2732725499447146e-05, + "loss": 2.2803, + "step": 2842 + }, + { + "epoch": 3.08, + "learning_rate": 1.2675477392697139e-05, + "loss": 2.3317, + "step": 2844 + }, + { + "epoch": 3.08, + "learning_rate": 1.2618339586865625e-05, + "loss": 2.5069, + "step": 2846 + }, + { + "epoch": 3.09, + "learning_rate": 1.2561312250805435e-05, + "loss": 2.403, + "step": 2848 + }, + { + "epoch": 3.09, + "learning_rate": 1.2504395553043008e-05, + "loss": 2.3479, + "step": 2850 + }, + { + "epoch": 3.09, + "learning_rate": 1.2447589661777759e-05, + "loss": 2.2771, + "step": 2852 + }, + { + "epoch": 3.09, + "learning_rate": 1.239089474488171e-05, + "loss": 2.3368, + "step": 2854 + }, + { + "epoch": 3.09, + "learning_rate": 1.2334310969898871e-05, + "loss": 2.3194, + "step": 2856 + }, + { + "epoch": 3.1, + "learning_rate": 1.227783850404487e-05, + "loss": 2.3038, + "step": 2858 + }, + { + "epoch": 3.1, + "learning_rate": 1.2221477514206337e-05, + "loss": 2.5084, + "step": 2860 + }, + { + "epoch": 3.1, + "learning_rate": 1.216522816694053e-05, + "loss": 2.2501, + "step": 2862 + }, + { + "epoch": 3.1, + "learning_rate": 1.2109090628474718e-05, + "loss": 2.2798, + "step": 2864 + }, + { + "epoch": 3.11, + "learning_rate": 1.2053065064705805e-05, + "loss": 2.2456, + "step": 2866 + }, + { + "epoch": 3.11, + "learning_rate": 1.1997151641199772e-05, + "loss": 2.4106, + "step": 2868 + }, + { + "epoch": 3.11, + "learning_rate": 1.1941350523191208e-05, + "loss": 2.4705, + "step": 2870 + }, + { + "epoch": 3.11, + "learning_rate": 1.1885661875582783e-05, + "loss": 2.5891, + "step": 2872 + }, + { + "epoch": 3.11, + "learning_rate": 1.183008586294485e-05, + "loss": 2.2367, + "step": 2874 + }, + { + "epoch": 3.12, + "learning_rate": 1.1774622649514889e-05, + "loss": 2.5675, + "step": 2876 + }, + { + "epoch": 3.12, + "learning_rate": 1.1719272399197023e-05, + "loss": 2.4596, + "step": 2878 + }, + { + "epoch": 3.12, + "learning_rate": 1.166403527556153e-05, + "loss": 2.2995, + "step": 2880 + }, + { + "epoch": 3.12, + "learning_rate": 1.1608911441844429e-05, + "loss": 2.2225, + "step": 2882 + }, + { + "epoch": 3.12, + "learning_rate": 1.155390106094692e-05, + "loss": 2.2498, + "step": 2884 + }, + { + "epoch": 3.13, + "learning_rate": 1.1499004295434918e-05, + "loss": 2.3428, + "step": 2886 + }, + { + "epoch": 3.13, + "learning_rate": 1.1444221307538571e-05, + "loss": 2.3654, + "step": 2888 + }, + { + "epoch": 3.13, + "learning_rate": 1.1389552259151864e-05, + "loss": 2.3089, + "step": 2890 + }, + { + "epoch": 3.13, + "learning_rate": 1.1334997311832002e-05, + "loss": 2.3778, + "step": 2892 + }, + { + "epoch": 3.14, + "learning_rate": 1.1280556626799005e-05, + "loss": 2.3831, + "step": 2894 + }, + { + "epoch": 3.14, + "learning_rate": 1.1226230364935226e-05, + "loss": 2.4711, + "step": 2896 + }, + { + "epoch": 3.14, + "learning_rate": 1.1172018686784935e-05, + "loss": 2.5057, + "step": 2898 + }, + { + "epoch": 3.14, + "learning_rate": 1.1117921752553723e-05, + "loss": 2.3913, + "step": 2900 + }, + { + "epoch": 3.14, + "learning_rate": 1.106393972210809e-05, + "loss": 2.5023, + "step": 2902 + }, + { + "epoch": 3.15, + "learning_rate": 1.1010072754975014e-05, + "loss": 2.3522, + "step": 2904 + }, + { + "epoch": 3.15, + "learning_rate": 1.095632101034143e-05, + "loss": 2.6258, + "step": 2906 + }, + { + "epoch": 3.15, + "learning_rate": 1.0902684647053735e-05, + "loss": 2.3644, + "step": 2908 + }, + { + "epoch": 3.15, + "learning_rate": 1.0849163823617375e-05, + "loss": 2.4708, + "step": 2910 + }, + { + "epoch": 3.15, + "learning_rate": 1.0795758698196368e-05, + "loss": 2.2643, + "step": 2912 + }, + { + "epoch": 3.16, + "learning_rate": 1.0742469428612816e-05, + "loss": 2.4429, + "step": 2914 + }, + { + "epoch": 3.16, + "learning_rate": 1.0689296172346431e-05, + "loss": 2.2625, + "step": 2916 + }, + { + "epoch": 3.16, + "learning_rate": 1.0636239086534072e-05, + "loss": 2.3484, + "step": 2918 + }, + { + "epoch": 3.16, + "learning_rate": 1.0583298327969338e-05, + "loss": 2.4041, + "step": 2920 + }, + { + "epoch": 3.17, + "learning_rate": 1.0530474053102034e-05, + "loss": 2.1622, + "step": 2922 + }, + { + "epoch": 3.17, + "learning_rate": 1.047776641803772e-05, + "loss": 2.417, + "step": 2924 + }, + { + "epoch": 3.17, + "learning_rate": 1.0425175578537299e-05, + "loss": 2.3336, + "step": 2926 + }, + { + "epoch": 3.17, + "learning_rate": 1.0372701690016474e-05, + "loss": 2.4013, + "step": 2928 + }, + { + "epoch": 3.17, + "learning_rate": 1.0320344907545388e-05, + "loss": 2.2072, + "step": 2930 + }, + { + "epoch": 3.18, + "learning_rate": 1.0268105385848064e-05, + "loss": 2.5827, + "step": 2932 + }, + { + "epoch": 3.18, + "learning_rate": 1.0215983279302049e-05, + "loss": 2.5077, + "step": 2934 + }, + { + "epoch": 3.18, + "learning_rate": 1.0163978741937847e-05, + "loss": 2.512, + "step": 2936 + }, + { + "epoch": 3.18, + "learning_rate": 1.0112091927438583e-05, + "loss": 2.457, + "step": 2938 + }, + { + "epoch": 3.19, + "learning_rate": 1.0060322989139442e-05, + "loss": 2.5785, + "step": 2940 + }, + { + "epoch": 3.19, + "learning_rate": 1.0008672080027298e-05, + "loss": 2.4435, + "step": 2942 + }, + { + "epoch": 3.19, + "learning_rate": 9.957139352740191e-06, + "loss": 2.2668, + "step": 2944 + }, + { + "epoch": 3.19, + "learning_rate": 9.90572495956696e-06, + "loss": 2.4811, + "step": 2946 + }, + { + "epoch": 3.19, + "learning_rate": 9.854429052446684e-06, + "loss": 2.4208, + "step": 2948 + }, + { + "epoch": 3.2, + "learning_rate": 9.803251782968358e-06, + "loss": 2.4254, + "step": 2950 + }, + { + "epoch": 3.2, + "learning_rate": 9.752193302370315e-06, + "loss": 2.3277, + "step": 2952 + }, + { + "epoch": 3.2, + "learning_rate": 9.701253761539897e-06, + "loss": 2.2717, + "step": 2954 + }, + { + "epoch": 3.2, + "learning_rate": 9.650433311012946e-06, + "loss": 2.4127, + "step": 2956 + }, + { + "epoch": 3.2, + "learning_rate": 9.599732100973357e-06, + "loss": 2.6305, + "step": 2958 + }, + { + "epoch": 3.21, + "learning_rate": 9.549150281252633e-06, + "loss": 2.3212, + "step": 2960 + }, + { + "epoch": 3.21, + "learning_rate": 9.498688001329486e-06, + "loss": 2.3556, + "step": 2962 + }, + { + "epoch": 3.21, + "learning_rate": 9.448345410329379e-06, + "loss": 2.3342, + "step": 2964 + }, + { + "epoch": 3.21, + "learning_rate": 9.398122657024022e-06, + "loss": 2.454, + "step": 2966 + }, + { + "epoch": 3.22, + "learning_rate": 9.348019889831006e-06, + "loss": 2.6068, + "step": 2968 + }, + { + "epoch": 3.22, + "learning_rate": 9.298037256813347e-06, + "loss": 2.6167, + "step": 2970 + }, + { + "epoch": 3.22, + "learning_rate": 9.248174905679058e-06, + "loss": 2.3684, + "step": 2972 + }, + { + "epoch": 3.22, + "learning_rate": 9.198432983780658e-06, + "loss": 2.5119, + "step": 2974 + }, + { + "epoch": 3.22, + "learning_rate": 9.14881163811479e-06, + "loss": 2.3928, + "step": 2976 + }, + { + "epoch": 3.23, + "learning_rate": 9.099311015321782e-06, + "loss": 2.3656, + "step": 2978 + }, + { + "epoch": 3.23, + "learning_rate": 9.049931261685207e-06, + "loss": 2.3461, + "step": 2980 + }, + { + "epoch": 3.23, + "learning_rate": 9.000672523131431e-06, + "loss": 2.3134, + "step": 2982 + }, + { + "epoch": 3.23, + "learning_rate": 8.951534945229172e-06, + "loss": 2.3456, + "step": 2984 + }, + { + "epoch": 3.23, + "learning_rate": 8.902518673189192e-06, + "loss": 2.3656, + "step": 2986 + }, + { + "epoch": 3.24, + "learning_rate": 8.853623851863663e-06, + "loss": 2.3751, + "step": 2988 + }, + { + "epoch": 3.24, + "learning_rate": 8.804850625745897e-06, + "loss": 2.5872, + "step": 2990 + }, + { + "epoch": 3.24, + "learning_rate": 8.756199138969866e-06, + "loss": 2.2217, + "step": 2992 + }, + { + "epoch": 3.24, + "learning_rate": 8.707669535309793e-06, + "loss": 2.0714, + "step": 2994 + }, + { + "epoch": 3.25, + "learning_rate": 8.659261958179688e-06, + "loss": 2.5951, + "step": 2996 + }, + { + "epoch": 3.25, + "learning_rate": 8.610976550632943e-06, + "loss": 2.3067, + "step": 2998 + }, + { + "epoch": 3.25, + "learning_rate": 8.562813455361957e-06, + "loss": 2.3471, + "step": 3000 + }, + { + "epoch": 3.25, + "learning_rate": 8.514772814697653e-06, + "loss": 2.4585, + "step": 3002 + }, + { + "epoch": 3.25, + "learning_rate": 8.466854770609062e-06, + "loss": 2.199, + "step": 3004 + }, + { + "epoch": 3.26, + "learning_rate": 8.419059464702927e-06, + "loss": 2.2591, + "step": 3006 + }, + { + "epoch": 3.26, + "learning_rate": 8.371387038223289e-06, + "loss": 2.3367, + "step": 3008 + }, + { + "epoch": 3.26, + "learning_rate": 8.323837632051062e-06, + "loss": 2.5848, + "step": 3010 + }, + { + "epoch": 3.26, + "learning_rate": 8.27641138670358e-06, + "loss": 2.2525, + "step": 3012 + }, + { + "epoch": 3.27, + "learning_rate": 8.229108442334255e-06, + "loss": 2.7048, + "step": 3014 + }, + { + "epoch": 3.27, + "learning_rate": 8.18192893873208e-06, + "loss": 2.4397, + "step": 3016 + }, + { + "epoch": 3.27, + "learning_rate": 8.134873015321303e-06, + "loss": 2.3919, + "step": 3018 + }, + { + "epoch": 3.27, + "learning_rate": 8.087940811160916e-06, + "loss": 2.3169, + "step": 3020 + }, + { + "epoch": 3.27, + "learning_rate": 8.041132464944351e-06, + "loss": 2.4048, + "step": 3022 + }, + { + "epoch": 3.28, + "learning_rate": 7.994448114998975e-06, + "loss": 2.5458, + "step": 3024 + }, + { + "epoch": 3.28, + "learning_rate": 7.947887899285761e-06, + "loss": 2.2902, + "step": 3026 + }, + { + "epoch": 3.28, + "learning_rate": 7.901451955398792e-06, + "loss": 2.4315, + "step": 3028 + }, + { + "epoch": 3.28, + "learning_rate": 7.855140420564965e-06, + "loss": 2.5107, + "step": 3030 + }, + { + "epoch": 3.28, + "learning_rate": 7.808953431643467e-06, + "loss": 2.3578, + "step": 3032 + }, + { + "epoch": 3.29, + "learning_rate": 7.762891125125476e-06, + "loss": 2.3267, + "step": 3034 + }, + { + "epoch": 3.29, + "learning_rate": 7.716953637133677e-06, + "loss": 2.3038, + "step": 3036 + }, + { + "epoch": 3.29, + "learning_rate": 7.671141103421919e-06, + "loss": 2.2405, + "step": 3038 + }, + { + "epoch": 3.29, + "learning_rate": 7.625453659374754e-06, + "loss": 2.2669, + "step": 3040 + }, + { + "epoch": 3.3, + "learning_rate": 7.579891440007103e-06, + "loss": 2.4222, + "step": 3042 + }, + { + "epoch": 3.3, + "learning_rate": 7.534454579963829e-06, + "loss": 2.4086, + "step": 3044 + }, + { + "epoch": 3.3, + "learning_rate": 7.489143213519301e-06, + "loss": 2.3461, + "step": 3046 + }, + { + "epoch": 3.3, + "learning_rate": 7.44395747457704e-06, + "loss": 2.224, + "step": 3048 + }, + { + "epoch": 3.3, + "learning_rate": 7.398897496669338e-06, + "loss": 2.316, + "step": 3050 + }, + { + "epoch": 3.31, + "learning_rate": 7.353963412956838e-06, + "loss": 2.4673, + "step": 3052 + }, + { + "epoch": 3.31, + "learning_rate": 7.309155356228109e-06, + "loss": 2.3921, + "step": 3054 + }, + { + "epoch": 3.31, + "learning_rate": 7.264473458899301e-06, + "loss": 2.3709, + "step": 3056 + }, + { + "epoch": 3.31, + "learning_rate": 7.219917853013764e-06, + "loss": 2.5216, + "step": 3058 + }, + { + "epoch": 3.32, + "learning_rate": 7.175488670241609e-06, + "loss": 2.4435, + "step": 3060 + }, + { + "epoch": 3.32, + "learning_rate": 7.131186041879357e-06, + "loss": 2.4123, + "step": 3062 + }, + { + "epoch": 3.32, + "learning_rate": 7.0870100988495004e-06, + "loss": 2.2985, + "step": 3064 + }, + { + "epoch": 3.32, + "learning_rate": 7.0429609717002076e-06, + "loss": 2.4648, + "step": 3066 + }, + { + "epoch": 3.32, + "learning_rate": 6.999038790604856e-06, + "loss": 2.4027, + "step": 3068 + }, + { + "epoch": 3.33, + "learning_rate": 6.955243685361673e-06, + "loss": 2.5828, + "step": 3070 + }, + { + "epoch": 3.33, + "learning_rate": 6.911575785393326e-06, + "loss": 2.2331, + "step": 3072 + }, + { + "epoch": 3.33, + "learning_rate": 6.868035219746638e-06, + "loss": 2.3046, + "step": 3074 + }, + { + "epoch": 3.33, + "learning_rate": 6.824622117092078e-06, + "loss": 2.3877, + "step": 3076 + }, + { + "epoch": 3.33, + "learning_rate": 6.781336605723432e-06, + "loss": 2.307, + "step": 3078 + }, + { + "epoch": 3.34, + "learning_rate": 6.738178813557472e-06, + "loss": 2.4418, + "step": 3080 + }, + { + "epoch": 3.34, + "learning_rate": 6.695148868133516e-06, + "loss": 2.3749, + "step": 3082 + }, + { + "epoch": 3.34, + "learning_rate": 6.652246896613068e-06, + "loss": 2.4227, + "step": 3084 + }, + { + "epoch": 3.34, + "learning_rate": 6.609473025779434e-06, + "loss": 2.5151, + "step": 3086 + }, + { + "epoch": 3.35, + "learning_rate": 6.566827382037383e-06, + "loss": 2.4882, + "step": 3088 + }, + { + "epoch": 3.35, + "learning_rate": 6.524310091412739e-06, + "loss": 2.3111, + "step": 3090 + }, + { + "epoch": 3.35, + "learning_rate": 6.481921279552023e-06, + "loss": 2.3321, + "step": 3092 + }, + { + "epoch": 3.35, + "learning_rate": 6.439661071722048e-06, + "loss": 2.2051, + "step": 3094 + }, + { + "epoch": 3.35, + "learning_rate": 6.397529592809614e-06, + "loss": 2.3448, + "step": 3096 + }, + { + "epoch": 3.36, + "learning_rate": 6.355526967321112e-06, + "loss": 2.4095, + "step": 3098 + }, + { + "epoch": 3.36, + "learning_rate": 6.313653319382107e-06, + "loss": 2.1535, + "step": 3100 + }, + { + "epoch": 3.36, + "learning_rate": 6.271908772737017e-06, + "loss": 2.3662, + "step": 3102 + }, + { + "epoch": 3.36, + "learning_rate": 6.2302934507487755e-06, + "loss": 2.1468, + "step": 3104 + }, + { + "epoch": 3.36, + "learning_rate": 6.188807476398412e-06, + "loss": 2.4795, + "step": 3106 + }, + { + "epoch": 3.37, + "learning_rate": 6.147450972284696e-06, + "loss": 2.353, + "step": 3108 + }, + { + "epoch": 3.37, + "learning_rate": 6.106224060623822e-06, + "loss": 2.3579, + "step": 3110 + }, + { + "epoch": 3.37, + "learning_rate": 6.065126863248976e-06, + "loss": 2.3739, + "step": 3112 + }, + { + "epoch": 3.37, + "learning_rate": 6.0241595016100545e-06, + "loss": 2.3866, + "step": 3114 + }, + { + "epoch": 3.38, + "learning_rate": 5.98332209677322e-06, + "loss": 2.2253, + "step": 3116 + }, + { + "epoch": 3.38, + "learning_rate": 5.942614769420629e-06, + "loss": 2.3605, + "step": 3118 + }, + { + "epoch": 3.38, + "learning_rate": 5.902037639850011e-06, + "loss": 2.3848, + "step": 3120 + }, + { + "epoch": 3.38, + "learning_rate": 5.86159082797435e-06, + "loss": 2.1943, + "step": 3122 + }, + { + "epoch": 3.38, + "learning_rate": 5.8212744533215016e-06, + "loss": 2.3193, + "step": 3124 + }, + { + "epoch": 3.39, + "learning_rate": 5.781088635033882e-06, + "loss": 2.5142, + "step": 3126 + }, + { + "epoch": 3.39, + "learning_rate": 5.741033491868047e-06, + "loss": 2.5701, + "step": 3128 + }, + { + "epoch": 3.39, + "learning_rate": 5.701109142194422e-06, + "loss": 2.5069, + "step": 3130 + }, + { + "epoch": 3.39, + "learning_rate": 5.6613157039969055e-06, + "loss": 2.4168, + "step": 3132 + }, + { + "epoch": 3.4, + "learning_rate": 5.621653294872514e-06, + "loss": 2.4338, + "step": 3134 + }, + { + "epoch": 3.4, + "learning_rate": 5.582122032031051e-06, + "loss": 2.4563, + "step": 3136 + }, + { + "epoch": 3.4, + "learning_rate": 5.542722032294761e-06, + "loss": 2.2138, + "step": 3138 + }, + { + "epoch": 3.4, + "learning_rate": 5.503453412098003e-06, + "loss": 2.6032, + "step": 3140 + }, + { + "epoch": 3.4, + "learning_rate": 5.464316287486859e-06, + "loss": 2.3332, + "step": 3142 + }, + { + "epoch": 3.41, + "learning_rate": 5.425310774118802e-06, + "loss": 2.3154, + "step": 3144 + }, + { + "epoch": 3.41, + "learning_rate": 5.386436987262416e-06, + "loss": 2.6818, + "step": 3146 + }, + { + "epoch": 3.41, + "learning_rate": 5.347695041796985e-06, + "loss": 2.2799, + "step": 3148 + }, + { + "epoch": 3.41, + "learning_rate": 5.309085052212165e-06, + "loss": 2.5646, + "step": 3150 + }, + { + "epoch": 3.41, + "learning_rate": 5.270607132607663e-06, + "loss": 2.3395, + "step": 3152 + }, + { + "epoch": 3.42, + "learning_rate": 5.232261396692911e-06, + "loss": 2.4606, + "step": 3154 + }, + { + "epoch": 3.42, + "learning_rate": 5.194047957786713e-06, + "loss": 2.3552, + "step": 3156 + }, + { + "epoch": 3.42, + "learning_rate": 5.155966928816885e-06, + "loss": 2.5682, + "step": 3158 + }, + { + "epoch": 3.42, + "learning_rate": 5.118018422319948e-06, + "loss": 2.4571, + "step": 3160 + }, + { + "epoch": 3.43, + "learning_rate": 5.080202550440849e-06, + "loss": 2.24, + "step": 3162 + }, + { + "epoch": 3.43, + "learning_rate": 5.042519424932513e-06, + "loss": 2.5308, + "step": 3164 + }, + { + "epoch": 3.43, + "learning_rate": 5.0049691571555925e-06, + "loss": 2.4177, + "step": 3166 + }, + { + "epoch": 3.43, + "learning_rate": 4.967551858078129e-06, + "loss": 2.6177, + "step": 3168 + }, + { + "epoch": 3.43, + "learning_rate": 4.930267638275221e-06, + "loss": 2.505, + "step": 3170 + }, + { + "epoch": 3.44, + "learning_rate": 4.893116607928677e-06, + "loss": 2.5166, + "step": 3172 + }, + { + "epoch": 3.44, + "learning_rate": 4.856098876826709e-06, + "loss": 2.0793, + "step": 3174 + }, + { + "epoch": 3.44, + "learning_rate": 4.819214554363616e-06, + "loss": 2.4421, + "step": 3176 + }, + { + "epoch": 3.44, + "learning_rate": 4.782463749539446e-06, + "loss": 2.3317, + "step": 3178 + }, + { + "epoch": 3.45, + "learning_rate": 4.745846570959672e-06, + "loss": 2.4747, + "step": 3180 + }, + { + "epoch": 3.45, + "learning_rate": 4.70936312683487e-06, + "loss": 2.2323, + "step": 3182 + }, + { + "epoch": 3.45, + "learning_rate": 4.673013524980424e-06, + "loss": 2.3297, + "step": 3184 + }, + { + "epoch": 3.45, + "learning_rate": 4.63679787281619e-06, + "loss": 2.5994, + "step": 3186 + }, + { + "epoch": 3.45, + "learning_rate": 4.6007162773661515e-06, + "loss": 2.2933, + "step": 3188 + }, + { + "epoch": 3.46, + "learning_rate": 4.564768845258139e-06, + "loss": 2.4649, + "step": 3190 + }, + { + "epoch": 3.46, + "learning_rate": 4.528955682723529e-06, + "loss": 2.3754, + "step": 3192 + }, + { + "epoch": 3.46, + "learning_rate": 4.4932768955968876e-06, + "loss": 2.6034, + "step": 3194 + }, + { + "epoch": 3.46, + "learning_rate": 4.4577325893156715e-06, + "loss": 2.5477, + "step": 3196 + }, + { + "epoch": 3.46, + "learning_rate": 4.422322868919937e-06, + "loss": 2.3983, + "step": 3198 + }, + { + "epoch": 3.47, + "learning_rate": 4.3870478390519884e-06, + "loss": 2.3261, + "step": 3200 + }, + { + "epoch": 3.47, + "learning_rate": 4.3519076039561345e-06, + "loss": 2.4168, + "step": 3202 + }, + { + "epoch": 3.47, + "learning_rate": 4.316902267478296e-06, + "loss": 2.4235, + "step": 3204 + }, + { + "epoch": 3.47, + "learning_rate": 4.2820319330657835e-06, + "loss": 2.2992, + "step": 3206 + }, + { + "epoch": 3.48, + "learning_rate": 4.2472967037669066e-06, + "loss": 2.4394, + "step": 3208 + }, + { + "epoch": 3.48, + "learning_rate": 4.2126966822307715e-06, + "loss": 2.374, + "step": 3210 + }, + { + "epoch": 3.48, + "learning_rate": 4.178231970706858e-06, + "loss": 2.4277, + "step": 3212 + }, + { + "epoch": 3.48, + "learning_rate": 4.1439026710448355e-06, + "loss": 2.4958, + "step": 3214 + }, + { + "epoch": 3.48, + "learning_rate": 4.109708884694158e-06, + "loss": 2.3339, + "step": 3216 + }, + { + "epoch": 3.49, + "learning_rate": 4.075650712703849e-06, + "loss": 2.3244, + "step": 3218 + }, + { + "epoch": 3.49, + "learning_rate": 4.041728255722154e-06, + "loss": 2.4202, + "step": 3220 + }, + { + "epoch": 3.49, + "learning_rate": 4.0079416139962525e-06, + "loss": 2.4348, + "step": 3222 + }, + { + "epoch": 3.49, + "learning_rate": 3.974290887371951e-06, + "loss": 2.5305, + "step": 3224 + }, + { + "epoch": 3.49, + "learning_rate": 3.940776175293431e-06, + "loss": 2.3909, + "step": 3226 + }, + { + "epoch": 3.5, + "learning_rate": 3.9073975768029124e-06, + "loss": 2.4669, + "step": 3228 + }, + { + "epoch": 3.5, + "learning_rate": 3.8741551905403735e-06, + "loss": 2.5117, + "step": 3230 + }, + { + "epoch": 3.5, + "learning_rate": 3.8410491147432395e-06, + "loss": 2.3205, + "step": 3232 + }, + { + "epoch": 3.5, + "learning_rate": 3.808079447246149e-06, + "loss": 2.5002, + "step": 3234 + }, + { + "epoch": 3.51, + "learning_rate": 3.7752462854806213e-06, + "loss": 2.5181, + "step": 3236 + }, + { + "epoch": 3.51, + "learning_rate": 3.7425497264747534e-06, + "loss": 2.5206, + "step": 3238 + }, + { + "epoch": 3.51, + "learning_rate": 3.7099898668529642e-06, + "loss": 2.4898, + "step": 3240 + }, + { + "epoch": 3.51, + "learning_rate": 3.677566802835708e-06, + "loss": 2.4225, + "step": 3242 + }, + { + "epoch": 3.51, + "learning_rate": 3.6452806302392007e-06, + "loss": 2.3201, + "step": 3244 + }, + { + "epoch": 3.52, + "learning_rate": 3.6131314444750765e-06, + "loss": 2.4289, + "step": 3246 + }, + { + "epoch": 3.52, + "learning_rate": 3.58111934055016e-06, + "loss": 2.2184, + "step": 3248 + }, + { + "epoch": 3.52, + "learning_rate": 3.5492444130662108e-06, + "loss": 2.5492, + "step": 3250 + }, + { + "epoch": 3.52, + "learning_rate": 3.517506756219563e-06, + "loss": 2.5086, + "step": 3252 + }, + { + "epoch": 3.53, + "learning_rate": 3.4859064638009033e-06, + "loss": 2.4952, + "step": 3254 + }, + { + "epoch": 3.53, + "learning_rate": 3.4544436291949867e-06, + "loss": 2.469, + "step": 3256 + }, + { + "epoch": 3.53, + "learning_rate": 3.4231183453803604e-06, + "loss": 2.3824, + "step": 3258 + }, + { + "epoch": 3.53, + "learning_rate": 3.391930704929064e-06, + "loss": 2.4994, + "step": 3260 + }, + { + "epoch": 3.53, + "learning_rate": 3.360880800006383e-06, + "loss": 2.545, + "step": 3262 + }, + { + "epoch": 3.54, + "learning_rate": 3.3299687223705745e-06, + "loss": 2.3291, + "step": 3264 + }, + { + "epoch": 3.54, + "learning_rate": 3.299194563372604e-06, + "loss": 2.5543, + "step": 3266 + }, + { + "epoch": 3.54, + "learning_rate": 3.2685584139558243e-06, + "loss": 2.3818, + "step": 3268 + }, + { + "epoch": 3.54, + "learning_rate": 3.238060364655765e-06, + "loss": 2.6038, + "step": 3270 + }, + { + "epoch": 3.54, + "learning_rate": 3.2077005055998533e-06, + "loss": 2.4691, + "step": 3272 + }, + { + "epoch": 3.55, + "learning_rate": 3.177478926507127e-06, + "loss": 2.4399, + "step": 3274 + }, + { + "epoch": 3.55, + "learning_rate": 3.1473957166879897e-06, + "loss": 2.5692, + "step": 3276 + }, + { + "epoch": 3.55, + "learning_rate": 3.117450965043911e-06, + "loss": 2.4183, + "step": 3278 + }, + { + "epoch": 3.55, + "learning_rate": 3.087644760067232e-06, + "loss": 2.4085, + "step": 3280 + }, + { + "epoch": 3.56, + "learning_rate": 3.0579771898408326e-06, + "loss": 2.2894, + "step": 3282 + }, + { + "epoch": 3.56, + "learning_rate": 3.0284483420379097e-06, + "loss": 2.2705, + "step": 3284 + }, + { + "epoch": 3.56, + "learning_rate": 2.9990583039217203e-06, + "loss": 2.2714, + "step": 3286 + }, + { + "epoch": 3.56, + "learning_rate": 2.9698071623452895e-06, + "loss": 2.366, + "step": 3288 + }, + { + "epoch": 3.56, + "learning_rate": 2.940695003751198e-06, + "loss": 2.3525, + "step": 3290 + }, + { + "epoch": 3.57, + "learning_rate": 2.9117219141712947e-06, + "loss": 2.3377, + "step": 3292 + }, + { + "epoch": 3.57, + "learning_rate": 2.8828879792264675e-06, + "loss": 2.1998, + "step": 3294 + }, + { + "epoch": 3.57, + "learning_rate": 2.854193284126344e-06, + "loss": 2.3437, + "step": 3296 + }, + { + "epoch": 3.57, + "learning_rate": 2.825637913669121e-06, + "loss": 2.2963, + "step": 3298 + }, + { + "epoch": 3.57, + "learning_rate": 2.797221952241219e-06, + "loss": 2.3955, + "step": 3300 + }, + { + "epoch": 3.58, + "learning_rate": 2.7689454838171147e-06, + "loss": 2.2326, + "step": 3302 + }, + { + "epoch": 3.58, + "learning_rate": 2.7408085919590264e-06, + "loss": 2.3897, + "step": 3304 + }, + { + "epoch": 3.58, + "learning_rate": 2.7128113598167137e-06, + "loss": 2.4245, + "step": 3306 + }, + { + "epoch": 3.58, + "learning_rate": 2.684953870127227e-06, + "loss": 2.488, + "step": 3308 + }, + { + "epoch": 3.59, + "learning_rate": 2.657236205214625e-06, + "loss": 2.3614, + "step": 3310 + }, + { + "epoch": 3.59, + "learning_rate": 2.6296584469897743e-06, + "loss": 2.1686, + "step": 3312 + }, + { + "epoch": 3.59, + "learning_rate": 2.6022206769500845e-06, + "loss": 2.6152, + "step": 3314 + }, + { + "epoch": 3.59, + "learning_rate": 2.574922976179295e-06, + "loss": 2.3362, + "step": 3316 + }, + { + "epoch": 3.59, + "learning_rate": 2.547765425347187e-06, + "loss": 2.382, + "step": 3318 + }, + { + "epoch": 3.6, + "learning_rate": 2.520748104709375e-06, + "loss": 2.4045, + "step": 3320 + }, + { + "epoch": 3.6, + "learning_rate": 2.493871094107081e-06, + "loss": 2.2771, + "step": 3322 + }, + { + "epoch": 3.6, + "learning_rate": 2.467134472966892e-06, + "loss": 2.3296, + "step": 3324 + }, + { + "epoch": 3.6, + "learning_rate": 2.4405383203004894e-06, + "loss": 2.3129, + "step": 3326 + }, + { + "epoch": 3.61, + "learning_rate": 2.414082714704463e-06, + "loss": 2.2268, + "step": 3328 + }, + { + "epoch": 3.61, + "learning_rate": 2.3877677343600524e-06, + "loss": 2.476, + "step": 3330 + }, + { + "epoch": 3.61, + "learning_rate": 2.36159345703294e-06, + "loss": 2.5804, + "step": 3332 + }, + { + "epoch": 3.61, + "learning_rate": 2.3355599600729915e-06, + "loss": 2.4219, + "step": 3334 + }, + { + "epoch": 3.61, + "learning_rate": 2.3096673204140108e-06, + "loss": 2.4168, + "step": 3336 + }, + { + "epoch": 3.62, + "learning_rate": 2.2839156145736174e-06, + "loss": 2.3116, + "step": 3338 + }, + { + "epoch": 3.62, + "learning_rate": 2.2583049186528704e-06, + "loss": 2.3238, + "step": 3340 + }, + { + "epoch": 3.62, + "learning_rate": 2.2328353083361562e-06, + "loss": 2.4897, + "step": 3342 + }, + { + "epoch": 3.62, + "learning_rate": 2.207506858890912e-06, + "loss": 2.307, + "step": 3344 + }, + { + "epoch": 3.62, + "learning_rate": 2.182319645167441e-06, + "loss": 2.3267, + "step": 3346 + }, + { + "epoch": 3.63, + "learning_rate": 2.1572737415986422e-06, + "loss": 2.424, + "step": 3348 + }, + { + "epoch": 3.63, + "learning_rate": 2.1323692221998257e-06, + "loss": 2.4612, + "step": 3350 + }, + { + "epoch": 3.63, + "learning_rate": 2.1076061605684818e-06, + "loss": 2.4219, + "step": 3352 + }, + { + "epoch": 3.63, + "learning_rate": 2.0829846298840884e-06, + "loss": 2.4251, + "step": 3354 + }, + { + "epoch": 3.64, + "learning_rate": 2.058504702907843e-06, + "loss": 2.5063, + "step": 3356 + }, + { + "epoch": 3.64, + "learning_rate": 2.0341664519824887e-06, + "loss": 2.5947, + "step": 3358 + }, + { + "epoch": 3.64, + "learning_rate": 2.009969949032098e-06, + "loss": 2.4399, + "step": 3360 + }, + { + "epoch": 3.64, + "learning_rate": 1.9859152655618498e-06, + "loss": 2.405, + "step": 3362 + }, + { + "epoch": 3.64, + "learning_rate": 1.962002472657809e-06, + "loss": 2.3689, + "step": 3364 + }, + { + "epoch": 3.65, + "learning_rate": 1.9382316409867264e-06, + "loss": 2.451, + "step": 3366 + }, + { + "epoch": 3.65, + "learning_rate": 1.9146028407958484e-06, + "loss": 2.3364, + "step": 3368 + }, + { + "epoch": 3.65, + "learning_rate": 1.8911161419126854e-06, + "loss": 2.3012, + "step": 3370 + }, + { + "epoch": 3.65, + "learning_rate": 1.8677716137447954e-06, + "loss": 2.3833, + "step": 3372 + }, + { + "epoch": 3.66, + "learning_rate": 1.844569325279627e-06, + "loss": 2.5041, + "step": 3374 + }, + { + "epoch": 3.66, + "learning_rate": 1.8215093450842435e-06, + "loss": 2.2862, + "step": 3376 + }, + { + "epoch": 3.66, + "learning_rate": 1.7985917413052055e-06, + "loss": 2.3316, + "step": 3378 + }, + { + "epoch": 3.66, + "learning_rate": 1.7758165816682826e-06, + "loss": 2.2273, + "step": 3380 + }, + { + "epoch": 3.66, + "learning_rate": 1.7531839334783306e-06, + "loss": 2.2901, + "step": 3382 + }, + { + "epoch": 3.67, + "learning_rate": 1.7306938636190262e-06, + "loss": 2.5521, + "step": 3384 + }, + { + "epoch": 3.67, + "learning_rate": 1.7083464385527325e-06, + "loss": 2.3896, + "step": 3386 + }, + { + "epoch": 3.67, + "learning_rate": 1.686141724320245e-06, + "loss": 2.2818, + "step": 3388 + }, + { + "epoch": 3.67, + "learning_rate": 1.6640797865406288e-06, + "loss": 2.2733, + "step": 3390 + }, + { + "epoch": 3.67, + "learning_rate": 1.6421606904110264e-06, + "loss": 2.4238, + "step": 3392 + }, + { + "epoch": 3.68, + "learning_rate": 1.6203845007064455e-06, + "loss": 2.6201, + "step": 3394 + }, + { + "epoch": 3.68, + "learning_rate": 1.5987512817795924e-06, + "loss": 2.422, + "step": 3396 + }, + { + "epoch": 3.68, + "learning_rate": 1.5772610975606561e-06, + "loss": 2.2933, + "step": 3398 + }, + { + "epoch": 3.68, + "learning_rate": 1.5559140115571246e-06, + "loss": 2.4014, + "step": 3400 + }, + { + "epoch": 3.69, + "learning_rate": 1.5347100868536246e-06, + "loss": 2.3193, + "step": 3402 + }, + { + "epoch": 3.69, + "learning_rate": 1.5136493861117097e-06, + "loss": 2.4959, + "step": 3404 + }, + { + "epoch": 3.69, + "learning_rate": 1.4927319715696607e-06, + "loss": 2.3566, + "step": 3406 + }, + { + "epoch": 3.69, + "learning_rate": 1.4719579050423427e-06, + "loss": 2.3291, + "step": 3408 + }, + { + "epoch": 3.69, + "learning_rate": 1.4513272479209917e-06, + "loss": 2.3138, + "step": 3410 + }, + { + "epoch": 3.7, + "learning_rate": 1.43084006117305e-06, + "loss": 2.2497, + "step": 3412 + }, + { + "epoch": 3.7, + "learning_rate": 1.41049640534196e-06, + "loss": 2.2461, + "step": 3414 + }, + { + "epoch": 3.7, + "learning_rate": 1.3902963405470148e-06, + "loss": 2.3886, + "step": 3416 + }, + { + "epoch": 3.7, + "learning_rate": 1.37023992648318e-06, + "loss": 2.2535, + "step": 3418 + }, + { + "epoch": 3.7, + "learning_rate": 1.3503272224208884e-06, + "loss": 2.3367, + "step": 3420 + }, + { + "epoch": 3.71, + "learning_rate": 1.3305582872058963e-06, + "loss": 2.4806, + "step": 3422 + }, + { + "epoch": 3.71, + "learning_rate": 1.3109331792590773e-06, + "loss": 2.4335, + "step": 3424 + }, + { + "epoch": 3.71, + "learning_rate": 1.2914519565763062e-06, + "loss": 2.4195, + "step": 3426 + }, + { + "epoch": 3.71, + "learning_rate": 1.2721146767282033e-06, + "loss": 2.4332, + "step": 3428 + }, + { + "epoch": 3.72, + "learning_rate": 1.2529213968600406e-06, + "loss": 2.2733, + "step": 3430 + }, + { + "epoch": 3.72, + "learning_rate": 1.233872173691536e-06, + "loss": 2.3522, + "step": 3432 + }, + { + "epoch": 3.72, + "learning_rate": 1.2149670635166976e-06, + "loss": 2.5166, + "step": 3434 + }, + { + "epoch": 3.72, + "learning_rate": 1.196206122203647e-06, + "loss": 2.4279, + "step": 3436 + }, + { + "epoch": 3.72, + "learning_rate": 1.1775894051944514e-06, + "loss": 2.2575, + "step": 3438 + }, + { + "epoch": 3.73, + "learning_rate": 1.1591169675049863e-06, + "loss": 2.514, + "step": 3440 + }, + { + "epoch": 3.73, + "learning_rate": 1.140788863724751e-06, + "loss": 2.4809, + "step": 3442 + }, + { + "epoch": 3.73, + "learning_rate": 1.1226051480167032e-06, + "loss": 2.444, + "step": 3444 + }, + { + "epoch": 3.73, + "learning_rate": 1.1045658741171028e-06, + "loss": 2.3813, + "step": 3446 + }, + { + "epoch": 3.74, + "learning_rate": 1.0866710953353731e-06, + "loss": 2.3445, + "step": 3448 + }, + { + "epoch": 3.74, + "learning_rate": 1.068920864553924e-06, + "loss": 2.4029, + "step": 3450 + }, + { + "epoch": 3.74, + "learning_rate": 1.0513152342279842e-06, + "loss": 2.4019, + "step": 3452 + }, + { + "epoch": 3.74, + "learning_rate": 1.0338542563854748e-06, + "loss": 2.3407, + "step": 3454 + }, + { + "epoch": 3.74, + "learning_rate": 1.0165379826268417e-06, + "loss": 2.414, + "step": 3456 + }, + { + "epoch": 3.75, + "learning_rate": 9.993664641249012e-07, + "loss": 2.4748, + "step": 3458 + }, + { + "epoch": 3.75, + "learning_rate": 9.823397516246834e-07, + "loss": 2.2681, + "step": 3460 + }, + { + "epoch": 3.75, + "learning_rate": 9.654578954433059e-07, + "loss": 2.3164, + "step": 3462 + }, + { + "epoch": 3.75, + "learning_rate": 9.487209454697887e-07, + "loss": 2.382, + "step": 3464 + }, + { + "epoch": 3.75, + "learning_rate": 9.321289511649456e-07, + "loss": 2.3299, + "step": 3466 + }, + { + "epoch": 3.76, + "learning_rate": 9.156819615612044e-07, + "loss": 2.3526, + "step": 3468 + }, + { + "epoch": 3.76, + "learning_rate": 8.993800252624862e-07, + "loss": 2.4167, + "step": 3470 + }, + { + "epoch": 3.76, + "learning_rate": 8.832231904440491e-07, + "loss": 2.5703, + "step": 3472 + }, + { + "epoch": 3.76, + "learning_rate": 8.672115048523554e-07, + "loss": 2.5794, + "step": 3474 + }, + { + "epoch": 3.77, + "learning_rate": 8.513450158049108e-07, + "loss": 2.3276, + "step": 3476 + }, + { + "epoch": 3.77, + "learning_rate": 8.356237701901582e-07, + "loss": 2.3394, + "step": 3478 + }, + { + "epoch": 3.77, + "learning_rate": 8.200478144672952e-07, + "loss": 2.3505, + "step": 3480 + }, + { + "epoch": 3.77, + "learning_rate": 8.046171946661796e-07, + "loss": 2.4532, + "step": 3482 + }, + { + "epoch": 3.77, + "learning_rate": 7.893319563871682e-07, + "loss": 2.513, + "step": 3484 + }, + { + "epoch": 3.78, + "learning_rate": 7.741921448009837e-07, + "loss": 2.4655, + "step": 3486 + }, + { + "epoch": 3.78, + "learning_rate": 7.591978046485926e-07, + "loss": 2.605, + "step": 3488 + }, + { + "epoch": 3.78, + "learning_rate": 7.443489802410663e-07, + "loss": 2.4451, + "step": 3490 + }, + { + "epoch": 3.78, + "learning_rate": 7.296457154594482e-07, + "loss": 2.5196, + "step": 3492 + }, + { + "epoch": 3.79, + "learning_rate": 7.150880537546201e-07, + "loss": 2.2368, + "step": 3494 + }, + { + "epoch": 3.79, + "learning_rate": 7.006760381471856e-07, + "loss": 2.4034, + "step": 3496 + }, + { + "epoch": 3.79, + "learning_rate": 6.86409711227337e-07, + "loss": 2.5032, + "step": 3498 + }, + { + "epoch": 3.79, + "learning_rate": 6.722891151547284e-07, + "loss": 2.3998, + "step": 3500 + }, + { + "epoch": 3.79, + "learning_rate": 6.583142916583574e-07, + "loss": 2.4205, + "step": 3502 + }, + { + "epoch": 3.8, + "learning_rate": 6.444852820364222e-07, + "loss": 2.3034, + "step": 3504 + }, + { + "epoch": 3.8, + "learning_rate": 6.30802127156227e-07, + "loss": 2.3639, + "step": 3506 + }, + { + "epoch": 3.8, + "learning_rate": 6.172648674540426e-07, + "loss": 2.3491, + "step": 3508 + }, + { + "epoch": 3.8, + "learning_rate": 6.038735429349962e-07, + "loss": 2.4925, + "step": 3510 + }, + { + "epoch": 3.8, + "learning_rate": 5.90628193172943e-07, + "loss": 2.2371, + "step": 3512 + }, + { + "epoch": 3.81, + "learning_rate": 5.775288573103666e-07, + "loss": 2.3831, + "step": 3514 + }, + { + "epoch": 3.81, + "learning_rate": 5.645755740582404e-07, + "loss": 2.6221, + "step": 3516 + }, + { + "epoch": 3.81, + "learning_rate": 5.517683816959219e-07, + "loss": 2.4171, + "step": 3518 + }, + { + "epoch": 3.81, + "learning_rate": 5.391073180710638e-07, + "loss": 2.3518, + "step": 3520 + }, + { + "epoch": 3.82, + "learning_rate": 5.265924205994644e-07, + "loss": 2.4263, + "step": 3522 + }, + { + "epoch": 3.82, + "learning_rate": 5.14223726264973e-07, + "loss": 2.3707, + "step": 3524 + }, + { + "epoch": 3.82, + "learning_rate": 5.020012716193901e-07, + "loss": 2.2659, + "step": 3526 + }, + { + "epoch": 3.82, + "learning_rate": 4.899250927823396e-07, + "loss": 2.3573, + "step": 3528 + }, + { + "epoch": 3.82, + "learning_rate": 4.779952254411913e-07, + "loss": 2.2359, + "step": 3530 + }, + { + "epoch": 3.83, + "learning_rate": 4.662117048509218e-07, + "loss": 2.3461, + "step": 3532 + }, + { + "epoch": 3.83, + "learning_rate": 4.545745658340206e-07, + "loss": 2.4581, + "step": 3534 + }, + { + "epoch": 3.83, + "learning_rate": 4.4308384278041183e-07, + "loss": 2.4515, + "step": 3536 + }, + { + "epoch": 3.83, + "learning_rate": 4.317395696473214e-07, + "loss": 2.4953, + "step": 3538 + }, + { + "epoch": 3.83, + "learning_rate": 4.2054177995919374e-07, + "loss": 2.5276, + "step": 3540 + }, + { + "epoch": 3.84, + "learning_rate": 4.094905068075694e-07, + "loss": 2.323, + "step": 3542 + }, + { + "epoch": 3.84, + "learning_rate": 3.985857828510353e-07, + "loss": 2.4943, + "step": 3544 + }, + { + "epoch": 3.84, + "learning_rate": 3.878276403150749e-07, + "loss": 2.4179, + "step": 3546 + }, + { + "epoch": 3.84, + "learning_rate": 3.7721611099200693e-07, + "loss": 2.3685, + "step": 3548 + }, + { + "epoch": 3.85, + "learning_rate": 3.6675122624087454e-07, + "loss": 2.4998, + "step": 3550 + }, + { + "epoch": 3.85, + "learning_rate": 3.5643301698736196e-07, + "loss": 2.484, + "step": 3552 + }, + { + "epoch": 3.85, + "learning_rate": 3.462615137237002e-07, + "loss": 2.3272, + "step": 3554 + }, + { + "epoch": 3.85, + "learning_rate": 3.3623674650857806e-07, + "loss": 2.3971, + "step": 3556 + }, + { + "epoch": 3.85, + "learning_rate": 3.2635874496705356e-07, + "loss": 2.544, + "step": 3558 + }, + { + "epoch": 3.86, + "learning_rate": 3.1662753829045375e-07, + "loss": 2.6006, + "step": 3560 + }, + { + "epoch": 3.86, + "learning_rate": 3.0704315523631953e-07, + "loss": 2.3817, + "step": 3562 + }, + { + "epoch": 3.86, + "learning_rate": 2.976056241282721e-07, + "loss": 2.4897, + "step": 3564 + }, + { + "epoch": 3.86, + "learning_rate": 2.8831497285599085e-07, + "loss": 2.3797, + "step": 3566 + }, + { + "epoch": 3.87, + "learning_rate": 2.7917122887506364e-07, + "loss": 2.4418, + "step": 3568 + }, + { + "epoch": 3.87, + "learning_rate": 2.701744192069755e-07, + "loss": 2.4906, + "step": 3570 + }, + { + "epoch": 3.87, + "learning_rate": 2.613245704389644e-07, + "loss": 2.4531, + "step": 3572 + }, + { + "epoch": 3.87, + "learning_rate": 2.5262170872398796e-07, + "loss": 2.3002, + "step": 3574 + }, + { + "epoch": 3.87, + "learning_rate": 2.440658597806178e-07, + "loss": 2.5243, + "step": 3576 + }, + { + "epoch": 3.88, + "learning_rate": 2.3565704889298434e-07, + "loss": 2.4181, + "step": 3578 + }, + { + "epoch": 3.88, + "learning_rate": 2.2739530091069328e-07, + "loss": 2.2211, + "step": 3580 + }, + { + "epoch": 3.88, + "learning_rate": 2.1928064024874796e-07, + "loss": 2.0089, + "step": 3582 + }, + { + "epoch": 3.88, + "learning_rate": 2.113130908874772e-07, + "loss": 2.269, + "step": 3584 + }, + { + "epoch": 3.88, + "learning_rate": 2.0349267637247982e-07, + "loss": 2.4197, + "step": 3586 + }, + { + "epoch": 3.89, + "learning_rate": 1.9581941981453579e-07, + "loss": 2.319, + "step": 3588 + }, + { + "epoch": 3.89, + "learning_rate": 1.8829334388955067e-07, + "loss": 2.2629, + "step": 3590 + }, + { + "epoch": 3.89, + "learning_rate": 1.80914470838478e-07, + "loss": 2.5493, + "step": 3592 + }, + { + "epoch": 3.89, + "learning_rate": 1.7368282246726376e-07, + "loss": 2.3166, + "step": 3594 + }, + { + "epoch": 3.9, + "learning_rate": 1.6659842014677406e-07, + "loss": 2.5148, + "step": 3596 + }, + { + "epoch": 3.9, + "learning_rate": 1.596612848127399e-07, + "loss": 2.339, + "step": 3598 + }, + { + "epoch": 3.9, + "learning_rate": 1.5287143696568473e-07, + "loss": 2.308, + "step": 3600 + }, + { + "epoch": 3.9, + "learning_rate": 1.462288966708858e-07, + "loss": 2.5463, + "step": 3602 + }, + { + "epoch": 3.9, + "learning_rate": 1.397336835582741e-07, + "loss": 2.6243, + "step": 3604 + }, + { + "epoch": 3.91, + "learning_rate": 1.333858168224178e-07, + "loss": 2.4714, + "step": 3606 + }, + { + "epoch": 3.91, + "learning_rate": 1.2718531522244447e-07, + "loss": 2.2877, + "step": 3608 + }, + { + "epoch": 3.91, + "learning_rate": 1.211321970820023e-07, + "loss": 2.3737, + "step": 3610 + }, + { + "epoch": 3.91, + "learning_rate": 1.1522648028917116e-07, + "loss": 2.6491, + "step": 3612 + }, + { + "epoch": 3.91, + "learning_rate": 1.0946818229644607e-07, + "loss": 2.1288, + "step": 3614 + }, + { + "epoch": 3.92, + "learning_rate": 1.0385732012067607e-07, + "loss": 2.4494, + "step": 3616 + }, + { + "epoch": 3.92, + "learning_rate": 9.839391034300316e-08, + "loss": 2.5278, + "step": 3618 + }, + { + "epoch": 3.92, + "learning_rate": 9.307796910881794e-08, + "loss": 2.2621, + "step": 3620 + }, + { + "epoch": 3.92, + "learning_rate": 8.790951212771514e-08, + "loss": 2.2438, + "step": 3622 + }, + { + "epoch": 3.93, + "learning_rate": 8.28885546734548e-08, + "loss": 2.5771, + "step": 3624 + }, + { + "epoch": 3.93, + "learning_rate": 7.801511158390118e-08, + "loss": 2.1801, + "step": 3626 + }, + { + "epoch": 3.93, + "learning_rate": 7.328919726097838e-08, + "loss": 2.3484, + "step": 3628 + }, + { + "epoch": 3.93, + "learning_rate": 6.871082567065367e-08, + "loss": 2.5062, + "step": 3630 + }, + { + "epoch": 3.93, + "learning_rate": 6.42800103428598e-08, + "loss": 2.2535, + "step": 3632 + }, + { + "epoch": 3.94, + "learning_rate": 5.999676437148938e-08, + "loss": 2.4677, + "step": 3634 + }, + { + "epoch": 3.94, + "learning_rate": 5.5861100414322796e-08, + "loss": 2.2292, + "step": 3636 + }, + { + "epoch": 3.94, + "learning_rate": 5.1873030693028177e-08, + "loss": 2.3609, + "step": 3638 + }, + { + "epoch": 3.94, + "learning_rate": 4.8032566993089225e-08, + "loss": 2.4992, + "step": 3640 + }, + { + "epoch": 3.95, + "learning_rate": 4.4339720663788555e-08, + "loss": 2.5409, + "step": 3642 + }, + { + "epoch": 3.95, + "learning_rate": 4.079450261817997e-08, + "loss": 2.4727, + "step": 3644 + }, + { + "epoch": 3.95, + "learning_rate": 3.739692333304401e-08, + "loss": 2.2859, + "step": 3646 + }, + { + "epoch": 3.95, + "learning_rate": 3.4146992848854695e-08, + "loss": 2.3062, + "step": 3648 + }, + { + "epoch": 3.95, + "learning_rate": 3.104472076976839e-08, + "loss": 2.3304, + "step": 3650 + }, + { + "epoch": 3.96, + "learning_rate": 2.809011626357383e-08, + "loss": 2.4489, + "step": 3652 + }, + { + "epoch": 3.96, + "learning_rate": 2.528318806168106e-08, + "loss": 2.3078, + "step": 3654 + }, + { + "epoch": 3.96, + "learning_rate": 2.2623944459082557e-08, + "loss": 2.3173, + "step": 3656 + }, + { + "epoch": 3.96, + "learning_rate": 2.0112393314336565e-08, + "loss": 2.3973, + "step": 3658 + }, + { + "epoch": 3.96, + "learning_rate": 1.7748542049550453e-08, + "loss": 2.4975, + "step": 3660 + }, + { + "epoch": 3.97, + "learning_rate": 1.553239765034187e-08, + "loss": 2.4494, + "step": 3662 + }, + { + "epoch": 3.97, + "learning_rate": 1.346396666582761e-08, + "loss": 2.4442, + "step": 3664 + }, + { + "epoch": 3.97, + "learning_rate": 1.1543255208612546e-08, + "loss": 2.4622, + "step": 3666 + }, + { + "epoch": 3.97, + "learning_rate": 9.770268954756301e-09, + "loss": 2.3676, + "step": 3668 + }, + { + "epoch": 3.98, + "learning_rate": 8.145013143756597e-09, + "loss": 2.5927, + "step": 3670 + }, + { + "epoch": 3.98, + "learning_rate": 6.6674925785548125e-09, + "loss": 2.4922, + "step": 3672 + }, + { + "epoch": 3.98, + "learning_rate": 5.337711625497121e-09, + "loss": 2.1202, + "step": 3674 + }, + { + "epoch": 3.98, + "learning_rate": 4.155674214328942e-09, + "loss": 2.5643, + "step": 3676 + }, + { + "epoch": 3.98, + "learning_rate": 3.1213838382004867e-09, + "loss": 2.2801, + "step": 3678 + }, + { + "epoch": 3.99, + "learning_rate": 2.234843553627908e-09, + "loss": 2.4424, + "step": 3680 + }, + { + "epoch": 3.99, + "learning_rate": 1.496055980498845e-09, + "loss": 2.6128, + "step": 3682 + }, + { + "epoch": 3.99, + "learning_rate": 9.050233020779786e-10, + "loss": 2.3174, + "step": 3684 + }, + { + "epoch": 3.99, + "learning_rate": 4.6174726496817087e-10, + "loss": 2.4364, + "step": 3686 + }, + { + "epoch": 4.0, + "learning_rate": 1.6622917913267088e-10, + "loss": 2.5562, + "step": 3688 + }, + { + "epoch": 4.0, + "learning_rate": 1.8469917889563094e-11, + "loss": 2.2758, + "step": 3690 + }, + { + "epoch": 4.0, + "learning_rate": 1.8469917889563094e-11, + "loss": 2.4287, + "step": 3692 + }, + { + "epoch": 4.0, + "step": 3692, + "total_flos": 8.7881966778581e+16, + "train_loss": 2.440685138283933, + "train_runtime": 22362.2858, + "train_samples_per_second": 10.572, + "train_steps_per_second": 0.165 + } + ], + "max_steps": 3692, + "num_train_epochs": 4, + "total_flos": 8.7881966778581e+16, + "trial_name": null, + "trial_params": null +}