{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.998925020155872, "eval_steps": 500, "global_step": 3720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.571428571428571e-08, "loss": 1.7752, "step": 2 }, { "epoch": 0.0, "learning_rate": 7.142857142857142e-08, "loss": 1.7952, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.0714285714285713e-07, "loss": 1.7784, "step": 6 }, { "epoch": 0.01, "learning_rate": 1.4285714285714285e-07, "loss": 1.7848, "step": 8 }, { "epoch": 0.01, "learning_rate": 1.7857142857142858e-07, "loss": 1.7954, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.1428571428571426e-07, "loss": 1.7604, "step": 12 }, { "epoch": 0.02, "learning_rate": 2.5e-07, "loss": 1.8112, "step": 14 }, { "epoch": 0.02, "learning_rate": 2.857142857142857e-07, "loss": 1.7883, "step": 16 }, { "epoch": 0.02, "learning_rate": 3.2142857142857145e-07, "loss": 1.8288, "step": 18 }, { "epoch": 0.02, "learning_rate": 3.5714285714285716e-07, "loss": 1.838, "step": 20 }, { "epoch": 0.02, "learning_rate": 3.928571428571428e-07, "loss": 1.7953, "step": 22 }, { "epoch": 0.03, "learning_rate": 4.285714285714285e-07, "loss": 1.751, "step": 24 }, { "epoch": 0.03, "learning_rate": 4.6428571428571427e-07, "loss": 1.8237, "step": 26 }, { "epoch": 0.03, "learning_rate": 5e-07, "loss": 1.8142, "step": 28 }, { "epoch": 0.03, "learning_rate": 5.357142857142857e-07, "loss": 1.8103, "step": 30 }, { "epoch": 0.03, "learning_rate": 5.714285714285714e-07, "loss": 1.7695, "step": 32 }, { "epoch": 0.04, "learning_rate": 6.071428571428571e-07, "loss": 1.8141, "step": 34 }, { "epoch": 0.04, "learning_rate": 6.428571428571429e-07, "loss": 1.8245, "step": 36 }, { "epoch": 0.04, "learning_rate": 6.785714285714286e-07, "loss": 1.8322, "step": 38 }, { "epoch": 0.04, "learning_rate": 7.142857142857143e-07, "loss": 1.8062, "step": 40 }, { "epoch": 0.05, "learning_rate": 7.5e-07, "loss": 1.8008, "step": 42 }, { "epoch": 0.05, "learning_rate": 7.857142857142856e-07, "loss": 1.8011, "step": 44 }, { "epoch": 0.05, "learning_rate": 8.214285714285713e-07, "loss": 1.7714, "step": 46 }, { "epoch": 0.05, "learning_rate": 8.57142857142857e-07, "loss": 1.7806, "step": 48 }, { "epoch": 0.05, "learning_rate": 8.928571428571428e-07, "loss": 1.8029, "step": 50 }, { "epoch": 0.06, "learning_rate": 9.285714285714285e-07, "loss": 1.7956, "step": 52 }, { "epoch": 0.06, "learning_rate": 9.642857142857142e-07, "loss": 1.759, "step": 54 }, { "epoch": 0.06, "learning_rate": 1e-06, "loss": 1.8218, "step": 56 }, { "epoch": 0.06, "learning_rate": 1.0357142857142857e-06, "loss": 1.7866, "step": 58 }, { "epoch": 0.06, "learning_rate": 1.0714285714285714e-06, "loss": 1.7776, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.107142857142857e-06, "loss": 1.7683, "step": 62 }, { "epoch": 0.07, "learning_rate": 1.1428571428571428e-06, "loss": 1.7662, "step": 64 }, { "epoch": 0.07, "learning_rate": 1.1785714285714285e-06, "loss": 1.844, "step": 66 }, { "epoch": 0.07, "learning_rate": 1.2142857142857142e-06, "loss": 1.8085, "step": 68 }, { "epoch": 0.08, "learning_rate": 1.2499999999999999e-06, "loss": 1.7617, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.2857142857142858e-06, "loss": 1.7718, "step": 72 }, { "epoch": 0.08, "learning_rate": 1.3214285714285713e-06, "loss": 1.8005, "step": 74 }, { "epoch": 0.08, "learning_rate": 1.3571428571428572e-06, "loss": 1.8495, "step": 76 }, { "epoch": 0.08, "learning_rate": 1.3928571428571427e-06, "loss": 1.77, "step": 78 }, { "epoch": 0.09, "learning_rate": 1.4285714285714286e-06, "loss": 1.8365, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.4642857142857141e-06, "loss": 1.8015, "step": 82 }, { "epoch": 0.09, "learning_rate": 1.5e-06, "loss": 1.7715, "step": 84 }, { "epoch": 0.09, "learning_rate": 1.5357142857142857e-06, "loss": 1.7716, "step": 86 }, { "epoch": 0.09, "learning_rate": 1.5714285714285712e-06, "loss": 1.8093, "step": 88 }, { "epoch": 0.1, "learning_rate": 1.6071428571428572e-06, "loss": 1.7822, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.6428571428571426e-06, "loss": 1.8427, "step": 92 }, { "epoch": 0.1, "learning_rate": 1.6785714285714286e-06, "loss": 1.7447, "step": 94 }, { "epoch": 0.1, "learning_rate": 1.714285714285714e-06, "loss": 1.8466, "step": 96 }, { "epoch": 0.11, "learning_rate": 1.75e-06, "loss": 1.7943, "step": 98 }, { "epoch": 0.11, "learning_rate": 1.7857142857142857e-06, "loss": 1.7368, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.8214285714285714e-06, "loss": 1.7748, "step": 102 }, { "epoch": 0.11, "learning_rate": 1.857142857142857e-06, "loss": 1.7385, "step": 104 }, { "epoch": 0.11, "learning_rate": 1.8928571428571428e-06, "loss": 1.7912, "step": 106 }, { "epoch": 0.12, "learning_rate": 1.9285714285714285e-06, "loss": 1.8207, "step": 108 }, { "epoch": 0.12, "learning_rate": 1.964285714285714e-06, "loss": 1.8504, "step": 110 }, { "epoch": 0.12, "learning_rate": 2e-06, "loss": 1.7297, "step": 112 }, { "epoch": 0.12, "learning_rate": 1.9999984836600627e-06, "loss": 1.804, "step": 114 }, { "epoch": 0.12, "learning_rate": 1.9999939346448493e-06, "loss": 1.7596, "step": 116 }, { "epoch": 0.13, "learning_rate": 1.9999863529681556e-06, "loss": 1.7427, "step": 118 }, { "epoch": 0.13, "learning_rate": 1.9999757386529745e-06, "loss": 1.7642, "step": 120 }, { "epoch": 0.13, "learning_rate": 1.9999620917314953e-06, "loss": 1.7923, "step": 122 }, { "epoch": 0.13, "learning_rate": 1.9999454122451056e-06, "loss": 1.8078, "step": 124 }, { "epoch": 0.14, "learning_rate": 1.9999257002443882e-06, "loss": 1.7529, "step": 126 }, { "epoch": 0.14, "learning_rate": 1.9999029557891238e-06, "loss": 1.8209, "step": 128 }, { "epoch": 0.14, "learning_rate": 1.9998771789482887e-06, "loss": 1.7966, "step": 130 }, { "epoch": 0.14, "learning_rate": 1.999848369800056e-06, "loss": 1.7624, "step": 132 }, { "epoch": 0.14, "learning_rate": 1.9998165284317942e-06, "loss": 1.7955, "step": 134 }, { "epoch": 0.15, "learning_rate": 1.9997816549400686e-06, "loss": 1.7967, "step": 136 }, { "epoch": 0.15, "learning_rate": 1.999743749430639e-06, "loss": 1.812, "step": 138 }, { "epoch": 0.15, "learning_rate": 1.999702812018461e-06, "loss": 1.7924, "step": 140 }, { "epoch": 0.15, "learning_rate": 1.999658842827684e-06, "loss": 1.8119, "step": 142 }, { "epoch": 0.15, "learning_rate": 1.999611841991653e-06, "loss": 1.7845, "step": 144 }, { "epoch": 0.16, "learning_rate": 1.9995618096529063e-06, "loss": 1.7908, "step": 146 }, { "epoch": 0.16, "learning_rate": 1.999508745963176e-06, "loss": 1.7558, "step": 148 }, { "epoch": 0.16, "learning_rate": 1.999452651083388e-06, "loss": 1.7613, "step": 150 }, { "epoch": 0.16, "learning_rate": 1.9993935251836588e-06, "loss": 1.7936, "step": 152 }, { "epoch": 0.17, "learning_rate": 1.999331368443299e-06, "loss": 1.8252, "step": 154 }, { "epoch": 0.17, "learning_rate": 1.99926618105081e-06, "loss": 1.7885, "step": 156 }, { "epoch": 0.17, "learning_rate": 1.9991979632038844e-06, "loss": 1.7688, "step": 158 }, { "epoch": 0.17, "learning_rate": 1.999126715109405e-06, "loss": 1.7978, "step": 160 }, { "epoch": 0.17, "learning_rate": 1.9990524369834445e-06, "loss": 1.7914, "step": 162 }, { "epoch": 0.18, "learning_rate": 1.9989751290512647e-06, "loss": 1.7627, "step": 164 }, { "epoch": 0.18, "learning_rate": 1.998894791547316e-06, "loss": 1.7832, "step": 166 }, { "epoch": 0.18, "learning_rate": 1.998811424715236e-06, "loss": 1.7679, "step": 168 }, { "epoch": 0.18, "learning_rate": 1.99872502880785e-06, "loss": 1.7889, "step": 170 }, { "epoch": 0.18, "learning_rate": 1.9986356040871684e-06, "loss": 1.755, "step": 172 }, { "epoch": 0.19, "learning_rate": 1.9985431508243884e-06, "loss": 1.8065, "step": 174 }, { "epoch": 0.19, "learning_rate": 1.998447669299891e-06, "loss": 1.7217, "step": 176 }, { "epoch": 0.19, "learning_rate": 1.998349159803241e-06, "loss": 1.79, "step": 178 }, { "epoch": 0.19, "learning_rate": 1.9982476226331863e-06, "loss": 1.7365, "step": 180 }, { "epoch": 0.2, "learning_rate": 1.9981430580976567e-06, "loss": 1.7904, "step": 182 }, { "epoch": 0.2, "learning_rate": 1.9980354665137626e-06, "loss": 1.7985, "step": 184 }, { "epoch": 0.2, "learning_rate": 1.997924848207795e-06, "loss": 1.7578, "step": 186 }, { "epoch": 0.2, "learning_rate": 1.997811203515224e-06, "loss": 1.7971, "step": 188 }, { "epoch": 0.2, "learning_rate": 1.997694532780697e-06, "loss": 1.7861, "step": 190 }, { "epoch": 0.21, "learning_rate": 1.9975748363580403e-06, "loss": 1.8196, "step": 192 }, { "epoch": 0.21, "learning_rate": 1.9974521146102534e-06, "loss": 1.7824, "step": 194 }, { "epoch": 0.21, "learning_rate": 1.9973263679095126e-06, "loss": 1.7837, "step": 196 }, { "epoch": 0.21, "learning_rate": 1.9971975966371677e-06, "loss": 1.7168, "step": 198 }, { "epoch": 0.21, "learning_rate": 1.9970658011837403e-06, "loss": 1.786, "step": 200 }, { "epoch": 0.22, "learning_rate": 1.996930981948924e-06, "loss": 1.7517, "step": 202 }, { "epoch": 0.22, "learning_rate": 1.9967931393415824e-06, "loss": 1.7935, "step": 204 }, { "epoch": 0.22, "learning_rate": 1.996652273779748e-06, "loss": 1.7614, "step": 206 }, { "epoch": 0.22, "learning_rate": 1.996508385690621e-06, "loss": 1.7422, "step": 208 }, { "epoch": 0.23, "learning_rate": 1.9963614755105675e-06, "loss": 1.7906, "step": 210 }, { "epoch": 0.23, "learning_rate": 1.9962115436851197e-06, "loss": 1.7935, "step": 212 }, { "epoch": 0.23, "learning_rate": 1.9960585906689724e-06, "loss": 1.8088, "step": 214 }, { "epoch": 0.23, "learning_rate": 1.995902616925983e-06, "loss": 1.7836, "step": 216 }, { "epoch": 0.23, "learning_rate": 1.995743622929171e-06, "loss": 1.8082, "step": 218 }, { "epoch": 0.24, "learning_rate": 1.9955816091607123e-06, "loss": 1.7421, "step": 220 }, { "epoch": 0.24, "learning_rate": 1.9954165761119447e-06, "loss": 1.7582, "step": 222 }, { "epoch": 0.24, "learning_rate": 1.99524852428336e-06, "loss": 1.7798, "step": 224 }, { "epoch": 0.24, "learning_rate": 1.9950774541846052e-06, "loss": 1.7439, "step": 226 }, { "epoch": 0.25, "learning_rate": 1.9949033663344813e-06, "loss": 1.8031, "step": 228 }, { "epoch": 0.25, "learning_rate": 1.9947262612609412e-06, "loss": 1.8042, "step": 230 }, { "epoch": 0.25, "learning_rate": 1.994546139501088e-06, "loss": 1.723, "step": 232 }, { "epoch": 0.25, "learning_rate": 1.994363001601173e-06, "loss": 1.7698, "step": 234 }, { "epoch": 0.25, "learning_rate": 1.994176848116595e-06, "loss": 1.8292, "step": 236 }, { "epoch": 0.26, "learning_rate": 1.993987679611898e-06, "loss": 1.7419, "step": 238 }, { "epoch": 0.26, "learning_rate": 1.993795496660769e-06, "loss": 1.7854, "step": 240 }, { "epoch": 0.26, "learning_rate": 1.9936002998460383e-06, "loss": 1.7535, "step": 242 }, { "epoch": 0.26, "learning_rate": 1.9934020897596747e-06, "loss": 1.7828, "step": 244 }, { "epoch": 0.26, "learning_rate": 1.9932008670027864e-06, "loss": 1.7446, "step": 246 }, { "epoch": 0.27, "learning_rate": 1.992996632185617e-06, "loss": 1.7538, "step": 248 }, { "epoch": 0.27, "learning_rate": 1.9927893859275457e-06, "loss": 1.7395, "step": 250 }, { "epoch": 0.27, "learning_rate": 1.9925791288570837e-06, "loss": 1.8124, "step": 252 }, { "epoch": 0.27, "learning_rate": 1.992365861611874e-06, "loss": 1.8087, "step": 254 }, { "epoch": 0.28, "learning_rate": 1.9921495848386874e-06, "loss": 1.8488, "step": 256 }, { "epoch": 0.28, "learning_rate": 1.9919302991934224e-06, "loss": 1.7739, "step": 258 }, { "epoch": 0.28, "learning_rate": 1.991708005341102e-06, "loss": 1.8297, "step": 260 }, { "epoch": 0.28, "learning_rate": 1.991482703955872e-06, "loss": 1.718, "step": 262 }, { "epoch": 0.28, "learning_rate": 1.9912543957209997e-06, "loss": 1.8038, "step": 264 }, { "epoch": 0.29, "learning_rate": 1.991023081328871e-06, "loss": 1.7552, "step": 266 }, { "epoch": 0.29, "learning_rate": 1.9907887614809888e-06, "loss": 1.7854, "step": 268 }, { "epoch": 0.29, "learning_rate": 1.990551436887969e-06, "loss": 1.7622, "step": 270 }, { "epoch": 0.29, "learning_rate": 1.990311108269542e-06, "loss": 1.7976, "step": 272 }, { "epoch": 0.29, "learning_rate": 1.990067776354547e-06, "loss": 1.7512, "step": 274 }, { "epoch": 0.3, "learning_rate": 1.9898214418809326e-06, "loss": 1.7963, "step": 276 }, { "epoch": 0.3, "learning_rate": 1.989572105595752e-06, "loss": 1.7836, "step": 278 }, { "epoch": 0.3, "learning_rate": 1.989319768255162e-06, "loss": 1.8183, "step": 280 }, { "epoch": 0.3, "learning_rate": 1.9890644306244213e-06, "loss": 1.7846, "step": 282 }, { "epoch": 0.31, "learning_rate": 1.9888060934778874e-06, "loss": 1.7769, "step": 284 }, { "epoch": 0.31, "learning_rate": 1.988544757599014e-06, "loss": 1.7932, "step": 286 }, { "epoch": 0.31, "learning_rate": 1.9882804237803485e-06, "loss": 1.8283, "step": 288 }, { "epoch": 0.31, "learning_rate": 1.9880130928235315e-06, "loss": 1.7194, "step": 290 }, { "epoch": 0.31, "learning_rate": 1.9877427655392924e-06, "loss": 1.7643, "step": 292 }, { "epoch": 0.32, "learning_rate": 1.9874694427474464e-06, "loss": 1.8056, "step": 294 }, { "epoch": 0.32, "learning_rate": 1.9871931252768952e-06, "loss": 1.8192, "step": 296 }, { "epoch": 0.32, "learning_rate": 1.98691381396562e-06, "loss": 1.7687, "step": 298 }, { "epoch": 0.32, "learning_rate": 1.986631509660684e-06, "loss": 1.7932, "step": 300 }, { "epoch": 0.32, "learning_rate": 1.9863462132182247e-06, "loss": 1.8684, "step": 302 }, { "epoch": 0.33, "learning_rate": 1.986057925503455e-06, "loss": 1.8455, "step": 304 }, { "epoch": 0.33, "learning_rate": 1.985766647390659e-06, "loss": 1.7187, "step": 306 }, { "epoch": 0.33, "learning_rate": 1.9854723797631912e-06, "loss": 1.7546, "step": 308 }, { "epoch": 0.33, "learning_rate": 1.98517512351347e-06, "loss": 1.7731, "step": 310 }, { "epoch": 0.34, "learning_rate": 1.9848748795429785e-06, "loss": 1.7852, "step": 312 }, { "epoch": 0.34, "learning_rate": 1.984571648762261e-06, "loss": 1.7534, "step": 314 }, { "epoch": 0.34, "learning_rate": 1.9842654320909194e-06, "loss": 1.8198, "step": 316 }, { "epoch": 0.34, "learning_rate": 1.98395623045761e-06, "loss": 1.7499, "step": 318 }, { "epoch": 0.34, "learning_rate": 1.983644044800044e-06, "loss": 1.741, "step": 320 }, { "epoch": 0.35, "learning_rate": 1.9833288760649787e-06, "loss": 1.7557, "step": 322 }, { "epoch": 0.35, "learning_rate": 1.983010725208221e-06, "loss": 1.7607, "step": 324 }, { "epoch": 0.35, "learning_rate": 1.9826895931946206e-06, "loss": 1.7521, "step": 326 }, { "epoch": 0.35, "learning_rate": 1.9823654809980682e-06, "loss": 1.8065, "step": 328 }, { "epoch": 0.35, "learning_rate": 1.9820383896014917e-06, "loss": 1.7385, "step": 330 }, { "epoch": 0.36, "learning_rate": 1.981708319996855e-06, "loss": 1.866, "step": 332 }, { "epoch": 0.36, "learning_rate": 1.9813752731851535e-06, "loss": 1.7722, "step": 334 }, { "epoch": 0.36, "learning_rate": 1.9810392501764116e-06, "loss": 1.7947, "step": 336 }, { "epoch": 0.36, "learning_rate": 1.9807002519896793e-06, "loss": 1.7951, "step": 338 }, { "epoch": 0.37, "learning_rate": 1.98035827965303e-06, "loss": 1.735, "step": 340 }, { "epoch": 0.37, "learning_rate": 1.980013334203556e-06, "loss": 1.7485, "step": 342 }, { "epoch": 0.37, "learning_rate": 1.9796654166873666e-06, "loss": 1.7587, "step": 344 }, { "epoch": 0.37, "learning_rate": 1.979314528159584e-06, "loss": 1.8017, "step": 346 }, { "epoch": 0.37, "learning_rate": 1.978960669684341e-06, "loss": 1.7886, "step": 348 }, { "epoch": 0.38, "learning_rate": 1.9786038423347772e-06, "loss": 1.8133, "step": 350 }, { "epoch": 0.38, "learning_rate": 1.978244047193035e-06, "loss": 1.8114, "step": 352 }, { "epoch": 0.38, "learning_rate": 1.977881285350259e-06, "loss": 1.7753, "step": 354 }, { "epoch": 0.38, "learning_rate": 1.9775155579065892e-06, "loss": 1.8068, "step": 356 }, { "epoch": 0.38, "learning_rate": 1.9771468659711594e-06, "loss": 1.7784, "step": 358 }, { "epoch": 0.39, "learning_rate": 1.9767752106620947e-06, "loss": 1.8183, "step": 360 }, { "epoch": 0.39, "learning_rate": 1.976400593106507e-06, "loss": 1.7066, "step": 362 }, { "epoch": 0.39, "learning_rate": 1.9760230144404907e-06, "loss": 1.7062, "step": 364 }, { "epoch": 0.39, "learning_rate": 1.9756424758091217e-06, "loss": 1.7605, "step": 366 }, { "epoch": 0.4, "learning_rate": 1.975258978366451e-06, "loss": 1.7559, "step": 368 }, { "epoch": 0.4, "learning_rate": 1.974872523275504e-06, "loss": 1.8098, "step": 370 }, { "epoch": 0.4, "learning_rate": 1.9744831117082755e-06, "loss": 1.772, "step": 372 }, { "epoch": 0.4, "learning_rate": 1.974090744845726e-06, "loss": 1.8103, "step": 374 }, { "epoch": 0.4, "learning_rate": 1.973695423877779e-06, "loss": 1.7503, "step": 376 }, { "epoch": 0.41, "learning_rate": 1.9732971500033156e-06, "loss": 1.7739, "step": 378 }, { "epoch": 0.41, "learning_rate": 1.9728959244301735e-06, "loss": 1.7637, "step": 380 }, { "epoch": 0.41, "learning_rate": 1.972491748375141e-06, "loss": 1.8106, "step": 382 }, { "epoch": 0.41, "learning_rate": 1.9720846230639556e-06, "loss": 1.743, "step": 384 }, { "epoch": 0.41, "learning_rate": 1.971674549731297e-06, "loss": 1.7469, "step": 386 }, { "epoch": 0.42, "learning_rate": 1.971261529620787e-06, "loss": 1.7519, "step": 388 }, { "epoch": 0.42, "learning_rate": 1.9708455639849825e-06, "loss": 1.7682, "step": 390 }, { "epoch": 0.42, "learning_rate": 1.970426654085375e-06, "loss": 1.7515, "step": 392 }, { "epoch": 0.42, "learning_rate": 1.970004801192384e-06, "loss": 1.7138, "step": 394 }, { "epoch": 0.43, "learning_rate": 1.9695800065853547e-06, "loss": 1.8249, "step": 396 }, { "epoch": 0.43, "learning_rate": 1.9691522715525517e-06, "loss": 1.7805, "step": 398 }, { "epoch": 0.43, "learning_rate": 1.9687215973911596e-06, "loss": 1.7202, "step": 400 }, { "epoch": 0.43, "learning_rate": 1.9682879854072753e-06, "loss": 1.7721, "step": 402 }, { "epoch": 0.43, "learning_rate": 1.9678514369159046e-06, "loss": 1.7675, "step": 404 }, { "epoch": 0.44, "learning_rate": 1.9674119532409598e-06, "loss": 1.779, "step": 406 }, { "epoch": 0.44, "learning_rate": 1.9669695357152538e-06, "loss": 1.8394, "step": 408 }, { "epoch": 0.44, "learning_rate": 1.9665241856804975e-06, "loss": 1.748, "step": 410 }, { "epoch": 0.44, "learning_rate": 1.9660759044872946e-06, "loss": 1.7679, "step": 412 }, { "epoch": 0.45, "learning_rate": 1.965624693495139e-06, "loss": 1.7867, "step": 414 }, { "epoch": 0.45, "learning_rate": 1.965170554072409e-06, "loss": 1.7682, "step": 416 }, { "epoch": 0.45, "learning_rate": 1.964713487596364e-06, "loss": 1.7262, "step": 418 }, { "epoch": 0.45, "learning_rate": 1.964253495453141e-06, "loss": 1.7713, "step": 420 }, { "epoch": 0.45, "learning_rate": 1.963790579037748e-06, "loss": 1.7083, "step": 422 }, { "epoch": 0.46, "learning_rate": 1.9633247397540626e-06, "loss": 1.8342, "step": 424 }, { "epoch": 0.46, "learning_rate": 1.962855979014826e-06, "loss": 1.7704, "step": 426 }, { "epoch": 0.46, "learning_rate": 1.96238429824164e-06, "loss": 1.7844, "step": 428 }, { "epoch": 0.46, "learning_rate": 1.961909698864961e-06, "loss": 1.7696, "step": 430 }, { "epoch": 0.46, "learning_rate": 1.961432182324097e-06, "loss": 1.7241, "step": 432 }, { "epoch": 0.47, "learning_rate": 1.960951750067203e-06, "loss": 1.7886, "step": 434 }, { "epoch": 0.47, "learning_rate": 1.9604684035512757e-06, "loss": 1.7559, "step": 436 }, { "epoch": 0.47, "learning_rate": 1.9599821442421505e-06, "loss": 1.7642, "step": 438 }, { "epoch": 0.47, "learning_rate": 1.9594929736144973e-06, "loss": 1.7509, "step": 440 }, { "epoch": 0.48, "learning_rate": 1.959000893151813e-06, "loss": 1.8134, "step": 442 }, { "epoch": 0.48, "learning_rate": 1.95850590434642e-06, "loss": 1.6866, "step": 444 }, { "epoch": 0.48, "learning_rate": 1.9580080086994616e-06, "loss": 1.8187, "step": 446 }, { "epoch": 0.48, "learning_rate": 1.9575072077208952e-06, "loss": 1.7784, "step": 448 }, { "epoch": 0.48, "learning_rate": 1.95700350292949e-06, "loss": 1.761, "step": 450 }, { "epoch": 0.49, "learning_rate": 1.9564968958528217e-06, "loss": 1.7806, "step": 452 }, { "epoch": 0.49, "learning_rate": 1.9559873880272677e-06, "loss": 1.7697, "step": 454 }, { "epoch": 0.49, "learning_rate": 1.955474980998001e-06, "loss": 1.7629, "step": 456 }, { "epoch": 0.49, "learning_rate": 1.954959676318989e-06, "loss": 1.8016, "step": 458 }, { "epoch": 0.49, "learning_rate": 1.9544414755529855e-06, "loss": 1.7795, "step": 460 }, { "epoch": 0.5, "learning_rate": 1.9539203802715277e-06, "loss": 1.7224, "step": 462 }, { "epoch": 0.5, "learning_rate": 1.9533963920549303e-06, "loss": 1.7114, "step": 464 }, { "epoch": 0.5, "learning_rate": 1.9528695124922823e-06, "loss": 1.7711, "step": 466 }, { "epoch": 0.5, "learning_rate": 1.952339743181441e-06, "loss": 1.7241, "step": 468 }, { "epoch": 0.51, "learning_rate": 1.9518070857290267e-06, "loss": 1.7445, "step": 470 }, { "epoch": 0.51, "learning_rate": 1.951271541750419e-06, "loss": 1.7339, "step": 472 }, { "epoch": 0.51, "learning_rate": 1.9507331128697513e-06, "loss": 1.7487, "step": 474 }, { "epoch": 0.51, "learning_rate": 1.950191800719906e-06, "loss": 1.7585, "step": 476 }, { "epoch": 0.51, "learning_rate": 1.9496476069425093e-06, "loss": 1.7481, "step": 478 }, { "epoch": 0.52, "learning_rate": 1.9491005331879276e-06, "loss": 1.7295, "step": 480 }, { "epoch": 0.52, "learning_rate": 1.9485505811152596e-06, "loss": 1.802, "step": 482 }, { "epoch": 0.52, "learning_rate": 1.9479977523923344e-06, "loss": 1.7432, "step": 484 }, { "epoch": 0.52, "learning_rate": 1.947442048695704e-06, "loss": 1.7524, "step": 486 }, { "epoch": 0.52, "learning_rate": 1.9468834717106405e-06, "loss": 1.7055, "step": 488 }, { "epoch": 0.53, "learning_rate": 1.946322023131129e-06, "loss": 1.7729, "step": 490 }, { "epoch": 0.53, "learning_rate": 1.9457577046598623e-06, "loss": 1.7301, "step": 492 }, { "epoch": 0.53, "learning_rate": 1.9451905180082392e-06, "loss": 1.7957, "step": 494 }, { "epoch": 0.53, "learning_rate": 1.9446204648963537e-06, "loss": 1.7317, "step": 496 }, { "epoch": 0.54, "learning_rate": 1.9440475470529956e-06, "loss": 1.8001, "step": 498 }, { "epoch": 0.54, "learning_rate": 1.9434717662156406e-06, "loss": 1.794, "step": 500 }, { "epoch": 0.54, "learning_rate": 1.9428931241304487e-06, "loss": 1.7253, "step": 502 }, { "epoch": 0.54, "learning_rate": 1.9423116225522545e-06, "loss": 1.7507, "step": 504 }, { "epoch": 0.54, "learning_rate": 1.9417272632445675e-06, "loss": 1.7617, "step": 506 }, { "epoch": 0.55, "learning_rate": 1.9411400479795615e-06, "loss": 1.7473, "step": 508 }, { "epoch": 0.55, "learning_rate": 1.940549978538073e-06, "loss": 1.7441, "step": 510 }, { "epoch": 0.55, "learning_rate": 1.9399570567095935e-06, "loss": 1.7785, "step": 512 }, { "epoch": 0.55, "learning_rate": 1.939361284292265e-06, "loss": 1.7945, "step": 514 }, { "epoch": 0.55, "learning_rate": 1.9387626630928748e-06, "loss": 1.7766, "step": 516 }, { "epoch": 0.56, "learning_rate": 1.9381611949268493e-06, "loss": 1.7683, "step": 518 }, { "epoch": 0.56, "learning_rate": 1.9375568816182486e-06, "loss": 1.7753, "step": 520 }, { "epoch": 0.56, "learning_rate": 1.936949724999762e-06, "loss": 1.7494, "step": 522 }, { "epoch": 0.56, "learning_rate": 1.9363397269127003e-06, "loss": 1.7311, "step": 524 }, { "epoch": 0.57, "learning_rate": 1.9357268892069932e-06, "loss": 1.7668, "step": 526 }, { "epoch": 0.57, "learning_rate": 1.935111213741181e-06, "loss": 1.7202, "step": 528 }, { "epoch": 0.57, "learning_rate": 1.934492702382411e-06, "loss": 1.7074, "step": 530 }, { "epoch": 0.57, "learning_rate": 1.933871357006429e-06, "loss": 1.7274, "step": 532 }, { "epoch": 0.57, "learning_rate": 1.9332471794975773e-06, "loss": 1.7251, "step": 534 }, { "epoch": 0.58, "learning_rate": 1.9326201717487864e-06, "loss": 1.7558, "step": 536 }, { "epoch": 0.58, "learning_rate": 1.9319903356615692e-06, "loss": 1.7585, "step": 538 }, { "epoch": 0.58, "learning_rate": 1.9313576731460187e-06, "loss": 1.7493, "step": 540 }, { "epoch": 0.58, "learning_rate": 1.9307221861207964e-06, "loss": 1.7689, "step": 542 }, { "epoch": 0.58, "learning_rate": 1.930083876513131e-06, "loss": 1.7846, "step": 544 }, { "epoch": 0.59, "learning_rate": 1.929442746258812e-06, "loss": 1.7653, "step": 546 }, { "epoch": 0.59, "learning_rate": 1.928798797302182e-06, "loss": 1.7985, "step": 548 }, { "epoch": 0.59, "learning_rate": 1.928152031596132e-06, "loss": 1.7336, "step": 550 }, { "epoch": 0.59, "learning_rate": 1.927502451102095e-06, "loss": 1.7383, "step": 552 }, { "epoch": 0.6, "learning_rate": 1.92685005779004e-06, "loss": 1.7086, "step": 554 }, { "epoch": 0.6, "learning_rate": 1.926194853638469e-06, "loss": 1.6971, "step": 556 }, { "epoch": 0.6, "learning_rate": 1.925536840634405e-06, "loss": 1.752, "step": 558 }, { "epoch": 0.6, "learning_rate": 1.9248760207733917e-06, "loss": 1.7625, "step": 560 }, { "epoch": 0.6, "learning_rate": 1.924212396059483e-06, "loss": 1.7666, "step": 562 }, { "epoch": 0.61, "learning_rate": 1.9235459685052414e-06, "loss": 1.7895, "step": 564 }, { "epoch": 0.61, "learning_rate": 1.9228767401317273e-06, "loss": 1.7431, "step": 566 }, { "epoch": 0.61, "learning_rate": 1.922204712968497e-06, "loss": 1.7777, "step": 568 }, { "epoch": 0.61, "learning_rate": 1.9215298890535935e-06, "loss": 1.8083, "step": 570 }, { "epoch": 0.61, "learning_rate": 1.9208522704335415e-06, "loss": 1.7462, "step": 572 }, { "epoch": 0.62, "learning_rate": 1.9201718591633418e-06, "loss": 1.789, "step": 574 }, { "epoch": 0.62, "learning_rate": 1.919488657306463e-06, "loss": 1.7564, "step": 576 }, { "epoch": 0.62, "learning_rate": 1.918802666934839e-06, "loss": 1.796, "step": 578 }, { "epoch": 0.62, "learning_rate": 1.9181138901288575e-06, "loss": 1.8037, "step": 580 }, { "epoch": 0.63, "learning_rate": 1.9174223289773593e-06, "loss": 1.7606, "step": 582 }, { "epoch": 0.63, "learning_rate": 1.9167279855776273e-06, "loss": 1.7393, "step": 584 }, { "epoch": 0.63, "learning_rate": 1.916030862035383e-06, "loss": 1.782, "step": 586 }, { "epoch": 0.63, "learning_rate": 1.9153309604647786e-06, "loss": 1.7636, "step": 588 }, { "epoch": 0.63, "learning_rate": 1.9146282829883923e-06, "loss": 1.8191, "step": 590 }, { "epoch": 0.64, "learning_rate": 1.9139228317372193e-06, "loss": 1.75, "step": 592 }, { "epoch": 0.64, "learning_rate": 1.913214608850667e-06, "loss": 1.7738, "step": 594 }, { "epoch": 0.64, "learning_rate": 1.91250361647655e-06, "loss": 1.7473, "step": 596 }, { "epoch": 0.64, "learning_rate": 1.9117898567710796e-06, "loss": 1.7738, "step": 598 }, { "epoch": 0.64, "learning_rate": 1.9110733318988605e-06, "loss": 1.7436, "step": 600 }, { "epoch": 0.65, "learning_rate": 1.910354044032883e-06, "loss": 1.7629, "step": 602 }, { "epoch": 0.65, "learning_rate": 1.9096319953545185e-06, "loss": 1.7607, "step": 604 }, { "epoch": 0.65, "learning_rate": 1.9089071880535074e-06, "loss": 1.7382, "step": 606 }, { "epoch": 0.65, "learning_rate": 1.9081796243279597e-06, "loss": 1.7465, "step": 608 }, { "epoch": 0.66, "learning_rate": 1.907449306384343e-06, "loss": 1.7443, "step": 610 }, { "epoch": 0.66, "learning_rate": 1.906716236437477e-06, "loss": 1.7323, "step": 612 }, { "epoch": 0.66, "learning_rate": 1.905980416710529e-06, "loss": 1.7313, "step": 614 }, { "epoch": 0.66, "learning_rate": 1.9052418494350046e-06, "loss": 1.7307, "step": 616 }, { "epoch": 0.66, "learning_rate": 1.9045005368507417e-06, "loss": 1.7547, "step": 618 }, { "epoch": 0.67, "learning_rate": 1.9037564812059039e-06, "loss": 1.7434, "step": 620 }, { "epoch": 0.67, "learning_rate": 1.903009684756974e-06, "loss": 1.8041, "step": 622 }, { "epoch": 0.67, "learning_rate": 1.902260149768747e-06, "loss": 1.7097, "step": 624 }, { "epoch": 0.67, "learning_rate": 1.9015078785143217e-06, "loss": 1.7547, "step": 626 }, { "epoch": 0.68, "learning_rate": 1.9007528732750967e-06, "loss": 1.7094, "step": 628 }, { "epoch": 0.68, "learning_rate": 1.8999951363407609e-06, "loss": 1.7195, "step": 630 }, { "epoch": 0.68, "learning_rate": 1.8992346700092879e-06, "loss": 1.7275, "step": 632 }, { "epoch": 0.68, "learning_rate": 1.8984714765869284e-06, "loss": 1.6978, "step": 634 }, { "epoch": 0.68, "learning_rate": 1.897705558388204e-06, "loss": 1.7924, "step": 636 }, { "epoch": 0.69, "learning_rate": 1.8969369177358994e-06, "loss": 1.7121, "step": 638 }, { "epoch": 0.69, "learning_rate": 1.8961655569610556e-06, "loss": 1.714, "step": 640 }, { "epoch": 0.69, "learning_rate": 1.8953914784029627e-06, "loss": 1.7649, "step": 642 }, { "epoch": 0.69, "learning_rate": 1.8946146844091535e-06, "loss": 1.7804, "step": 644 }, { "epoch": 0.69, "learning_rate": 1.8938351773353954e-06, "loss": 1.7319, "step": 646 }, { "epoch": 0.7, "learning_rate": 1.8930529595456837e-06, "loss": 1.7672, "step": 648 }, { "epoch": 0.7, "learning_rate": 1.8922680334122347e-06, "loss": 1.7306, "step": 650 }, { "epoch": 0.7, "learning_rate": 1.8914804013154782e-06, "loss": 1.8346, "step": 652 }, { "epoch": 0.7, "learning_rate": 1.8906900656440498e-06, "loss": 1.7535, "step": 654 }, { "epoch": 0.71, "learning_rate": 1.8898970287947847e-06, "loss": 1.7585, "step": 656 }, { "epoch": 0.71, "learning_rate": 1.8891012931727102e-06, "loss": 1.7482, "step": 658 }, { "epoch": 0.71, "learning_rate": 1.888302861191037e-06, "loss": 1.7485, "step": 660 }, { "epoch": 0.71, "learning_rate": 1.8875017352711545e-06, "loss": 1.7369, "step": 662 }, { "epoch": 0.71, "learning_rate": 1.8866979178426204e-06, "loss": 1.7972, "step": 664 }, { "epoch": 0.72, "learning_rate": 1.8858914113431562e-06, "loss": 1.7787, "step": 666 }, { "epoch": 0.72, "learning_rate": 1.8850822182186379e-06, "loss": 1.7233, "step": 668 }, { "epoch": 0.72, "learning_rate": 1.8842703409230888e-06, "loss": 1.8037, "step": 670 }, { "epoch": 0.72, "learning_rate": 1.883455781918673e-06, "loss": 1.7835, "step": 672 }, { "epoch": 0.72, "learning_rate": 1.8826385436756874e-06, "loss": 1.7171, "step": 674 }, { "epoch": 0.73, "learning_rate": 1.8818186286725538e-06, "loss": 1.7468, "step": 676 }, { "epoch": 0.73, "learning_rate": 1.880996039395812e-06, "loss": 1.752, "step": 678 }, { "epoch": 0.73, "learning_rate": 1.880170778340112e-06, "loss": 1.7464, "step": 680 }, { "epoch": 0.73, "learning_rate": 1.879342848008206e-06, "loss": 1.7679, "step": 682 }, { "epoch": 0.74, "learning_rate": 1.8785122509109423e-06, "loss": 1.733, "step": 684 }, { "epoch": 0.74, "learning_rate": 1.8776789895672556e-06, "loss": 1.7939, "step": 686 }, { "epoch": 0.74, "learning_rate": 1.8768430665041607e-06, "loss": 1.7427, "step": 688 }, { "epoch": 0.74, "learning_rate": 1.8760044842567449e-06, "loss": 1.7692, "step": 690 }, { "epoch": 0.74, "learning_rate": 1.8751632453681595e-06, "loss": 1.7502, "step": 692 }, { "epoch": 0.75, "learning_rate": 1.8743193523896132e-06, "loss": 1.7305, "step": 694 }, { "epoch": 0.75, "learning_rate": 1.8734728078803627e-06, "loss": 1.7461, "step": 696 }, { "epoch": 0.75, "learning_rate": 1.8726236144077068e-06, "loss": 1.7059, "step": 698 }, { "epoch": 0.75, "learning_rate": 1.8717717745469774e-06, "loss": 1.7649, "step": 700 }, { "epoch": 0.75, "learning_rate": 1.870917290881532e-06, "loss": 1.7414, "step": 702 }, { "epoch": 0.76, "learning_rate": 1.870060166002746e-06, "loss": 1.7796, "step": 704 }, { "epoch": 0.76, "learning_rate": 1.8692004025100051e-06, "loss": 1.7181, "step": 706 }, { "epoch": 0.76, "learning_rate": 1.8683380030106966e-06, "loss": 1.7578, "step": 708 }, { "epoch": 0.76, "learning_rate": 1.8674729701202017e-06, "loss": 1.721, "step": 710 }, { "epoch": 0.77, "learning_rate": 1.8666053064618886e-06, "loss": 1.7622, "step": 712 }, { "epoch": 0.77, "learning_rate": 1.8657350146671034e-06, "loss": 1.7699, "step": 714 }, { "epoch": 0.77, "learning_rate": 1.8648620973751625e-06, "loss": 1.7665, "step": 716 }, { "epoch": 0.77, "learning_rate": 1.8639865572333446e-06, "loss": 1.7573, "step": 718 }, { "epoch": 0.77, "learning_rate": 1.8631083968968825e-06, "loss": 1.7599, "step": 720 }, { "epoch": 0.78, "learning_rate": 1.8622276190289559e-06, "loss": 1.7141, "step": 722 }, { "epoch": 0.78, "learning_rate": 1.8613442263006812e-06, "loss": 1.8069, "step": 724 }, { "epoch": 0.78, "learning_rate": 1.8604582213911066e-06, "loss": 1.7608, "step": 726 }, { "epoch": 0.78, "learning_rate": 1.859569606987201e-06, "loss": 1.7121, "step": 728 }, { "epoch": 0.78, "learning_rate": 1.8586783857838476e-06, "loss": 1.7228, "step": 730 }, { "epoch": 0.79, "learning_rate": 1.8577845604838347e-06, "loss": 1.7938, "step": 732 }, { "epoch": 0.79, "learning_rate": 1.8568881337978483e-06, "loss": 1.7717, "step": 734 }, { "epoch": 0.79, "learning_rate": 1.855989108444464e-06, "loss": 1.7354, "step": 736 }, { "epoch": 0.79, "learning_rate": 1.8550874871501377e-06, "loss": 1.7209, "step": 738 }, { "epoch": 0.8, "learning_rate": 1.8541832726491979e-06, "loss": 1.7432, "step": 740 }, { "epoch": 0.8, "learning_rate": 1.8532764676838378e-06, "loss": 1.7413, "step": 742 }, { "epoch": 0.8, "learning_rate": 1.852367075004107e-06, "loss": 1.724, "step": 744 }, { "epoch": 0.8, "learning_rate": 1.8514550973679022e-06, "loss": 1.7774, "step": 746 }, { "epoch": 0.8, "learning_rate": 1.8505405375409587e-06, "loss": 1.7286, "step": 748 }, { "epoch": 0.81, "learning_rate": 1.8496233982968455e-06, "loss": 1.7957, "step": 750 }, { "epoch": 0.81, "learning_rate": 1.8487036824169505e-06, "loss": 1.6974, "step": 752 }, { "epoch": 0.81, "learning_rate": 1.8477813926904786e-06, "loss": 1.7072, "step": 754 }, { "epoch": 0.81, "learning_rate": 1.846856531914439e-06, "loss": 1.7808, "step": 756 }, { "epoch": 0.81, "learning_rate": 1.8459291028936383e-06, "loss": 1.7283, "step": 758 }, { "epoch": 0.82, "learning_rate": 1.844999108440672e-06, "loss": 1.7755, "step": 760 }, { "epoch": 0.82, "learning_rate": 1.8440665513759153e-06, "loss": 1.7294, "step": 762 }, { "epoch": 0.82, "learning_rate": 1.8431314345275157e-06, "loss": 1.7471, "step": 764 }, { "epoch": 0.82, "learning_rate": 1.8421937607313826e-06, "loss": 1.732, "step": 766 }, { "epoch": 0.83, "learning_rate": 1.8412535328311812e-06, "loss": 1.7594, "step": 768 }, { "epoch": 0.83, "learning_rate": 1.840310753678321e-06, "loss": 1.7588, "step": 770 }, { "epoch": 0.83, "learning_rate": 1.83936542613195e-06, "loss": 1.7023, "step": 772 }, { "epoch": 0.83, "learning_rate": 1.8384175530589434e-06, "loss": 1.7175, "step": 774 }, { "epoch": 0.83, "learning_rate": 1.8374671373338973e-06, "loss": 1.7447, "step": 776 }, { "epoch": 0.84, "learning_rate": 1.836514181839118e-06, "loss": 1.7687, "step": 778 }, { "epoch": 0.84, "learning_rate": 1.835558689464615e-06, "loss": 1.7562, "step": 780 }, { "epoch": 0.84, "learning_rate": 1.8346006631080902e-06, "loss": 1.7792, "step": 782 }, { "epoch": 0.84, "learning_rate": 1.833640105674931e-06, "loss": 1.7616, "step": 784 }, { "epoch": 0.84, "learning_rate": 1.8326770200782007e-06, "loss": 1.7163, "step": 786 }, { "epoch": 0.85, "learning_rate": 1.8317114092386295e-06, "loss": 1.7233, "step": 788 }, { "epoch": 0.85, "learning_rate": 1.830743276084606e-06, "loss": 1.7349, "step": 790 }, { "epoch": 0.85, "learning_rate": 1.8297726235521682e-06, "loss": 1.7296, "step": 792 }, { "epoch": 0.85, "learning_rate": 1.8287994545849945e-06, "loss": 1.7412, "step": 794 }, { "epoch": 0.86, "learning_rate": 1.8278237721343946e-06, "loss": 1.7284, "step": 796 }, { "epoch": 0.86, "learning_rate": 1.8268455791593014e-06, "loss": 1.7835, "step": 798 }, { "epoch": 0.86, "learning_rate": 1.8258648786262608e-06, "loss": 1.7264, "step": 800 }, { "epoch": 0.86, "learning_rate": 1.8248816735094236e-06, "loss": 1.7499, "step": 802 }, { "epoch": 0.86, "learning_rate": 1.8238959667905365e-06, "loss": 1.6927, "step": 804 }, { "epoch": 0.87, "learning_rate": 1.8229077614589318e-06, "loss": 1.75, "step": 806 }, { "epoch": 0.87, "learning_rate": 1.8219170605115206e-06, "loss": 1.7551, "step": 808 }, { "epoch": 0.87, "learning_rate": 1.8209238669527812e-06, "loss": 1.7534, "step": 810 }, { "epoch": 0.87, "learning_rate": 1.8199281837947517e-06, "loss": 1.7627, "step": 812 }, { "epoch": 0.88, "learning_rate": 1.8189300140570207e-06, "loss": 1.7738, "step": 814 }, { "epoch": 0.88, "learning_rate": 1.8179293607667177e-06, "loss": 1.7273, "step": 816 }, { "epoch": 0.88, "learning_rate": 1.816926226958503e-06, "loss": 1.7357, "step": 818 }, { "epoch": 0.88, "learning_rate": 1.815920615674561e-06, "loss": 1.7591, "step": 820 }, { "epoch": 0.88, "learning_rate": 1.8149125299645886e-06, "loss": 1.7488, "step": 822 }, { "epoch": 0.89, "learning_rate": 1.8139019728857869e-06, "loss": 1.7682, "step": 824 }, { "epoch": 0.89, "learning_rate": 1.8128889475028522e-06, "loss": 1.7349, "step": 826 }, { "epoch": 0.89, "learning_rate": 1.8118734568879658e-06, "loss": 1.6782, "step": 828 }, { "epoch": 0.89, "learning_rate": 1.8108555041207865e-06, "loss": 1.7851, "step": 830 }, { "epoch": 0.89, "learning_rate": 1.8098350922884383e-06, "loss": 1.7103, "step": 832 }, { "epoch": 0.9, "learning_rate": 1.808812224485504e-06, "loss": 1.7015, "step": 834 }, { "epoch": 0.9, "learning_rate": 1.807786903814014e-06, "loss": 1.8041, "step": 836 }, { "epoch": 0.9, "learning_rate": 1.806759133383438e-06, "loss": 1.701, "step": 838 }, { "epoch": 0.9, "learning_rate": 1.8057289163106745e-06, "loss": 1.7549, "step": 840 }, { "epoch": 0.91, "learning_rate": 1.8046962557200423e-06, "loss": 1.8104, "step": 842 }, { "epoch": 0.91, "learning_rate": 1.80366115474327e-06, "loss": 1.7538, "step": 844 }, { "epoch": 0.91, "learning_rate": 1.8026236165194879e-06, "loss": 1.7609, "step": 846 }, { "epoch": 0.91, "learning_rate": 1.801583644195217e-06, "loss": 1.7476, "step": 848 }, { "epoch": 0.91, "learning_rate": 1.8005412409243603e-06, "loss": 1.7166, "step": 850 }, { "epoch": 0.92, "learning_rate": 1.7994964098681936e-06, "loss": 1.734, "step": 852 }, { "epoch": 0.92, "learning_rate": 1.7984491541953548e-06, "loss": 1.756, "step": 854 }, { "epoch": 0.92, "learning_rate": 1.7973994770818355e-06, "loss": 1.7508, "step": 856 }, { "epoch": 0.92, "learning_rate": 1.7963473817109697e-06, "loss": 1.7495, "step": 858 }, { "epoch": 0.92, "learning_rate": 1.7952928712734265e-06, "loss": 1.7661, "step": 860 }, { "epoch": 0.93, "learning_rate": 1.7942359489671976e-06, "loss": 1.74, "step": 862 }, { "epoch": 0.93, "learning_rate": 1.7931766179975912e-06, "loss": 1.7775, "step": 864 }, { "epoch": 0.93, "learning_rate": 1.792114881577218e-06, "loss": 1.7375, "step": 866 }, { "epoch": 0.93, "learning_rate": 1.7910507429259854e-06, "loss": 1.7299, "step": 868 }, { "epoch": 0.94, "learning_rate": 1.7899842052710844e-06, "loss": 1.708, "step": 870 }, { "epoch": 0.94, "learning_rate": 1.7889152718469833e-06, "loss": 1.7349, "step": 872 }, { "epoch": 0.94, "learning_rate": 1.7878439458954145e-06, "loss": 1.7186, "step": 874 }, { "epoch": 0.94, "learning_rate": 1.7867702306653664e-06, "loss": 1.7884, "step": 876 }, { "epoch": 0.94, "learning_rate": 1.785694129413074e-06, "loss": 1.7809, "step": 878 }, { "epoch": 0.95, "learning_rate": 1.7846156454020073e-06, "loss": 1.6939, "step": 880 }, { "epoch": 0.95, "learning_rate": 1.783534781902864e-06, "loss": 1.7329, "step": 882 }, { "epoch": 0.95, "learning_rate": 1.7824515421935564e-06, "loss": 1.7574, "step": 884 }, { "epoch": 0.95, "learning_rate": 1.781365929559204e-06, "loss": 1.7442, "step": 886 }, { "epoch": 0.95, "learning_rate": 1.780277947292122e-06, "loss": 1.7395, "step": 888 }, { "epoch": 0.96, "learning_rate": 1.779187598691813e-06, "loss": 1.7084, "step": 890 }, { "epoch": 0.96, "learning_rate": 1.7780948870649549e-06, "loss": 1.7761, "step": 892 }, { "epoch": 0.96, "learning_rate": 1.776999815725392e-06, "loss": 1.7553, "step": 894 }, { "epoch": 0.96, "learning_rate": 1.7759023879941256e-06, "loss": 1.7694, "step": 896 }, { "epoch": 0.97, "learning_rate": 1.7748026071993026e-06, "loss": 1.7368, "step": 898 }, { "epoch": 0.97, "learning_rate": 1.7737004766762053e-06, "loss": 1.724, "step": 900 }, { "epoch": 0.97, "learning_rate": 1.772595999767244e-06, "loss": 1.7354, "step": 902 }, { "epoch": 0.97, "learning_rate": 1.771489179821943e-06, "loss": 1.7073, "step": 904 }, { "epoch": 0.97, "learning_rate": 1.7703800201969326e-06, "loss": 1.7193, "step": 906 }, { "epoch": 0.98, "learning_rate": 1.7692685242559394e-06, "loss": 1.782, "step": 908 }, { "epoch": 0.98, "learning_rate": 1.768154695369774e-06, "loss": 1.7302, "step": 910 }, { "epoch": 0.98, "learning_rate": 1.767038536916324e-06, "loss": 1.815, "step": 912 }, { "epoch": 0.98, "learning_rate": 1.7659200522805399e-06, "loss": 1.7186, "step": 914 }, { "epoch": 0.98, "learning_rate": 1.7647992448544274e-06, "loss": 1.7699, "step": 916 }, { "epoch": 0.99, "learning_rate": 1.7636761180370373e-06, "loss": 1.7206, "step": 918 }, { "epoch": 0.99, "learning_rate": 1.762550675234453e-06, "loss": 1.7246, "step": 920 }, { "epoch": 0.99, "learning_rate": 1.7614229198597825e-06, "loss": 1.7262, "step": 922 }, { "epoch": 0.99, "learning_rate": 1.760292855333147e-06, "loss": 1.7184, "step": 924 }, { "epoch": 1.0, "learning_rate": 1.7591604850816704e-06, "loss": 1.7639, "step": 926 }, { "epoch": 1.0, "learning_rate": 1.7580258125394691e-06, "loss": 1.715, "step": 928 }, { "epoch": 1.0, "learning_rate": 1.7568888411476416e-06, "loss": 1.7317, "step": 930 }, { "epoch": 1.0, "learning_rate": 1.7557495743542582e-06, "loss": 1.7356, "step": 932 }, { "epoch": 1.0, "learning_rate": 1.7546080156143503e-06, "loss": 1.6646, "step": 934 }, { "epoch": 1.01, "learning_rate": 1.7534641683899006e-06, "loss": 1.6957, "step": 936 }, { "epoch": 1.01, "learning_rate": 1.752318036149831e-06, "loss": 1.6826, "step": 938 }, { "epoch": 1.01, "learning_rate": 1.7511696223699937e-06, "loss": 1.7156, "step": 940 }, { "epoch": 1.01, "learning_rate": 1.7500189305331605e-06, "loss": 1.7372, "step": 942 }, { "epoch": 1.01, "learning_rate": 1.7488659641290108e-06, "loss": 1.7314, "step": 944 }, { "epoch": 1.02, "learning_rate": 1.747710726654123e-06, "loss": 1.6861, "step": 946 }, { "epoch": 1.02, "learning_rate": 1.7465532216119624e-06, "loss": 1.7126, "step": 948 }, { "epoch": 1.02, "learning_rate": 1.7453934525128715e-06, "loss": 1.7377, "step": 950 }, { "epoch": 1.02, "learning_rate": 1.7442314228740584e-06, "loss": 1.7103, "step": 952 }, { "epoch": 1.03, "learning_rate": 1.743067136219587e-06, "loss": 1.7215, "step": 954 }, { "epoch": 1.03, "learning_rate": 1.7419005960803663e-06, "loss": 1.71, "step": 956 }, { "epoch": 1.03, "learning_rate": 1.7407318059941386e-06, "loss": 1.6762, "step": 958 }, { "epoch": 1.03, "learning_rate": 1.7395607695054709e-06, "loss": 1.663, "step": 960 }, { "epoch": 1.03, "learning_rate": 1.7383874901657412e-06, "loss": 1.7282, "step": 962 }, { "epoch": 1.04, "learning_rate": 1.7372119715331301e-06, "loss": 1.6706, "step": 964 }, { "epoch": 1.04, "learning_rate": 1.7360342171726102e-06, "loss": 1.6852, "step": 966 }, { "epoch": 1.04, "learning_rate": 1.7348542306559325e-06, "loss": 1.7062, "step": 968 }, { "epoch": 1.04, "learning_rate": 1.7336720155616185e-06, "loss": 1.716, "step": 970 }, { "epoch": 1.04, "learning_rate": 1.7324875754749484e-06, "loss": 1.7045, "step": 972 }, { "epoch": 1.05, "learning_rate": 1.7313009139879503e-06, "loss": 1.7872, "step": 974 }, { "epoch": 1.05, "learning_rate": 1.7301120346993875e-06, "loss": 1.7124, "step": 976 }, { "epoch": 1.05, "learning_rate": 1.728920941214751e-06, "loss": 1.6671, "step": 978 }, { "epoch": 1.05, "learning_rate": 1.727727637146246e-06, "loss": 1.7266, "step": 980 }, { "epoch": 1.06, "learning_rate": 1.7265321261127816e-06, "loss": 1.7231, "step": 982 }, { "epoch": 1.06, "learning_rate": 1.72533441173996e-06, "loss": 1.7419, "step": 984 }, { "epoch": 1.06, "learning_rate": 1.7241344976600655e-06, "loss": 1.7027, "step": 986 }, { "epoch": 1.06, "learning_rate": 1.7229323875120536e-06, "loss": 1.6814, "step": 988 }, { "epoch": 1.06, "learning_rate": 1.7217280849415392e-06, "loss": 1.7554, "step": 990 }, { "epoch": 1.07, "learning_rate": 1.7205215936007869e-06, "loss": 1.7154, "step": 992 }, { "epoch": 1.07, "learning_rate": 1.7193129171486985e-06, "loss": 1.7149, "step": 994 }, { "epoch": 1.07, "learning_rate": 1.7181020592508025e-06, "loss": 1.6895, "step": 996 }, { "epoch": 1.07, "learning_rate": 1.7168890235792434e-06, "loss": 1.7698, "step": 998 }, { "epoch": 1.07, "learning_rate": 1.7156738138127704e-06, "loss": 1.7474, "step": 1000 }, { "epoch": 1.08, "learning_rate": 1.7144564336367254e-06, "loss": 1.7258, "step": 1002 }, { "epoch": 1.08, "learning_rate": 1.713236886743033e-06, "loss": 1.7011, "step": 1004 }, { "epoch": 1.08, "learning_rate": 1.712015176830188e-06, "loss": 1.7504, "step": 1006 }, { "epoch": 1.08, "learning_rate": 1.7107913076032458e-06, "loss": 1.7323, "step": 1008 }, { "epoch": 1.09, "learning_rate": 1.7095652827738103e-06, "loss": 1.7371, "step": 1010 }, { "epoch": 1.09, "learning_rate": 1.7083371060600218e-06, "loss": 1.7088, "step": 1012 }, { "epoch": 1.09, "learning_rate": 1.7071067811865474e-06, "loss": 1.6849, "step": 1014 }, { "epoch": 1.09, "learning_rate": 1.7058743118845685e-06, "loss": 1.7234, "step": 1016 }, { "epoch": 1.09, "learning_rate": 1.70463970189177e-06, "loss": 1.7471, "step": 1018 }, { "epoch": 1.1, "learning_rate": 1.7034029549523284e-06, "loss": 1.749, "step": 1020 }, { "epoch": 1.1, "learning_rate": 1.7021640748169022e-06, "loss": 1.7243, "step": 1022 }, { "epoch": 1.1, "learning_rate": 1.700923065242617e-06, "loss": 1.6974, "step": 1024 }, { "epoch": 1.1, "learning_rate": 1.6996799299930586e-06, "loss": 1.7965, "step": 1026 }, { "epoch": 1.11, "learning_rate": 1.6984346728382574e-06, "loss": 1.7427, "step": 1028 }, { "epoch": 1.11, "learning_rate": 1.6971872975546804e-06, "loss": 1.709, "step": 1030 }, { "epoch": 1.11, "learning_rate": 1.6959378079252174e-06, "loss": 1.759, "step": 1032 }, { "epoch": 1.11, "learning_rate": 1.6946862077391702e-06, "loss": 1.7407, "step": 1034 }, { "epoch": 1.11, "learning_rate": 1.6934325007922417e-06, "loss": 1.7409, "step": 1036 }, { "epoch": 1.12, "learning_rate": 1.6921766908865235e-06, "loss": 1.708, "step": 1038 }, { "epoch": 1.12, "learning_rate": 1.6909187818304853e-06, "loss": 1.7044, "step": 1040 }, { "epoch": 1.12, "learning_rate": 1.6896587774389625e-06, "loss": 1.694, "step": 1042 }, { "epoch": 1.12, "learning_rate": 1.688396681533145e-06, "loss": 1.6885, "step": 1044 }, { "epoch": 1.12, "learning_rate": 1.6871324979405654e-06, "loss": 1.7031, "step": 1046 }, { "epoch": 1.13, "learning_rate": 1.6858662304950884e-06, "loss": 1.7578, "step": 1048 }, { "epoch": 1.13, "learning_rate": 1.6845978830368974e-06, "loss": 1.7447, "step": 1050 }, { "epoch": 1.13, "learning_rate": 1.6833274594124843e-06, "loss": 1.7132, "step": 1052 }, { "epoch": 1.13, "learning_rate": 1.6820549634746372e-06, "loss": 1.7471, "step": 1054 }, { "epoch": 1.14, "learning_rate": 1.6807803990824292e-06, "loss": 1.7291, "step": 1056 }, { "epoch": 1.14, "learning_rate": 1.6795037701012055e-06, "loss": 1.7702, "step": 1058 }, { "epoch": 1.14, "learning_rate": 1.6782250804025738e-06, "loss": 1.6608, "step": 1060 }, { "epoch": 1.14, "learning_rate": 1.6769443338643903e-06, "loss": 1.7129, "step": 1062 }, { "epoch": 1.14, "learning_rate": 1.6756615343707492e-06, "loss": 1.7026, "step": 1064 }, { "epoch": 1.15, "learning_rate": 1.6743766858119707e-06, "loss": 1.6642, "step": 1066 }, { "epoch": 1.15, "learning_rate": 1.6730897920845895e-06, "loss": 1.737, "step": 1068 }, { "epoch": 1.15, "learning_rate": 1.6718008570913418e-06, "loss": 1.7233, "step": 1070 }, { "epoch": 1.15, "learning_rate": 1.6705098847411549e-06, "loss": 1.6943, "step": 1072 }, { "epoch": 1.15, "learning_rate": 1.6692168789491352e-06, "loss": 1.7018, "step": 1074 }, { "epoch": 1.16, "learning_rate": 1.6679218436365545e-06, "loss": 1.7279, "step": 1076 }, { "epoch": 1.16, "learning_rate": 1.6666247827308412e-06, "loss": 1.7212, "step": 1078 }, { "epoch": 1.16, "learning_rate": 1.665325700165565e-06, "loss": 1.718, "step": 1080 }, { "epoch": 1.16, "learning_rate": 1.6640245998804283e-06, "loss": 1.7408, "step": 1082 }, { "epoch": 1.17, "learning_rate": 1.6627214858212513e-06, "loss": 1.726, "step": 1084 }, { "epoch": 1.17, "learning_rate": 1.6614163619399614e-06, "loss": 1.7318, "step": 1086 }, { "epoch": 1.17, "learning_rate": 1.660109232194582e-06, "loss": 1.7315, "step": 1088 }, { "epoch": 1.17, "learning_rate": 1.6588001005492194e-06, "loss": 1.7284, "step": 1090 }, { "epoch": 1.17, "learning_rate": 1.6574889709740502e-06, "loss": 1.6915, "step": 1092 }, { "epoch": 1.18, "learning_rate": 1.656175847445311e-06, "loss": 1.687, "step": 1094 }, { "epoch": 1.18, "learning_rate": 1.6548607339452852e-06, "loss": 1.7136, "step": 1096 }, { "epoch": 1.18, "learning_rate": 1.6535436344622907e-06, "loss": 1.6719, "step": 1098 }, { "epoch": 1.18, "learning_rate": 1.6522245529906687e-06, "loss": 1.7783, "step": 1100 }, { "epoch": 1.18, "learning_rate": 1.6509034935307714e-06, "loss": 1.7075, "step": 1102 }, { "epoch": 1.19, "learning_rate": 1.6495804600889485e-06, "loss": 1.7239, "step": 1104 }, { "epoch": 1.19, "learning_rate": 1.6482554566775378e-06, "loss": 1.6832, "step": 1106 }, { "epoch": 1.19, "learning_rate": 1.6469284873148497e-06, "loss": 1.698, "step": 1108 }, { "epoch": 1.19, "learning_rate": 1.6455995560251582e-06, "loss": 1.7136, "step": 1110 }, { "epoch": 1.2, "learning_rate": 1.6442686668386858e-06, "loss": 1.7593, "step": 1112 }, { "epoch": 1.2, "learning_rate": 1.6429358237915936e-06, "loss": 1.7345, "step": 1114 }, { "epoch": 1.2, "learning_rate": 1.641601030925968e-06, "loss": 1.752, "step": 1116 }, { "epoch": 1.2, "learning_rate": 1.6402642922898084e-06, "loss": 1.7478, "step": 1118 }, { "epoch": 1.2, "learning_rate": 1.638925611937015e-06, "loss": 1.6655, "step": 1120 }, { "epoch": 1.21, "learning_rate": 1.637584993927377e-06, "loss": 1.7483, "step": 1122 }, { "epoch": 1.21, "learning_rate": 1.6362424423265597e-06, "loss": 1.744, "step": 1124 }, { "epoch": 1.21, "learning_rate": 1.634897961206092e-06, "loss": 1.7287, "step": 1126 }, { "epoch": 1.21, "learning_rate": 1.6335515546433551e-06, "loss": 1.6842, "step": 1128 }, { "epoch": 1.21, "learning_rate": 1.6322032267215688e-06, "loss": 1.7137, "step": 1130 }, { "epoch": 1.22, "learning_rate": 1.6308529815297803e-06, "loss": 1.6969, "step": 1132 }, { "epoch": 1.22, "learning_rate": 1.6295008231628507e-06, "loss": 1.7182, "step": 1134 }, { "epoch": 1.22, "learning_rate": 1.6281467557214436e-06, "loss": 1.7306, "step": 1136 }, { "epoch": 1.22, "learning_rate": 1.6267907833120122e-06, "loss": 1.6912, "step": 1138 }, { "epoch": 1.23, "learning_rate": 1.6254329100467868e-06, "loss": 1.7322, "step": 1140 }, { "epoch": 1.23, "learning_rate": 1.624073140043762e-06, "loss": 1.7041, "step": 1142 }, { "epoch": 1.23, "learning_rate": 1.6227114774266852e-06, "loss": 1.7464, "step": 1144 }, { "epoch": 1.23, "learning_rate": 1.6213479263250432e-06, "loss": 1.6828, "step": 1146 }, { "epoch": 1.23, "learning_rate": 1.6199824908740497e-06, "loss": 1.71, "step": 1148 }, { "epoch": 1.24, "learning_rate": 1.6186151752146334e-06, "loss": 1.7388, "step": 1150 }, { "epoch": 1.24, "learning_rate": 1.6172459834934253e-06, "loss": 1.7104, "step": 1152 }, { "epoch": 1.24, "learning_rate": 1.6158749198627454e-06, "loss": 1.7244, "step": 1154 }, { "epoch": 1.24, "learning_rate": 1.6145019884805908e-06, "loss": 1.7424, "step": 1156 }, { "epoch": 1.24, "learning_rate": 1.6131271935106227e-06, "loss": 1.7338, "step": 1158 }, { "epoch": 1.25, "learning_rate": 1.6117505391221542e-06, "loss": 1.7144, "step": 1160 }, { "epoch": 1.25, "learning_rate": 1.6103720294901377e-06, "loss": 1.7206, "step": 1162 }, { "epoch": 1.25, "learning_rate": 1.6089916687951511e-06, "loss": 1.721, "step": 1164 }, { "epoch": 1.25, "learning_rate": 1.6076094612233871e-06, "loss": 1.7459, "step": 1166 }, { "epoch": 1.26, "learning_rate": 1.606225410966638e-06, "loss": 1.6682, "step": 1168 }, { "epoch": 1.26, "learning_rate": 1.6048395222222859e-06, "loss": 1.6769, "step": 1170 }, { "epoch": 1.26, "learning_rate": 1.6034517991932871e-06, "loss": 1.6905, "step": 1172 }, { "epoch": 1.26, "learning_rate": 1.6020622460881614e-06, "loss": 1.7261, "step": 1174 }, { "epoch": 1.26, "learning_rate": 1.6006708671209792e-06, "loss": 1.7343, "step": 1176 }, { "epoch": 1.27, "learning_rate": 1.5992776665113468e-06, "loss": 1.726, "step": 1178 }, { "epoch": 1.27, "learning_rate": 1.5978826484843958e-06, "loss": 1.755, "step": 1180 }, { "epoch": 1.27, "learning_rate": 1.5964858172707695e-06, "loss": 1.7536, "step": 1182 }, { "epoch": 1.27, "learning_rate": 1.5950871771066096e-06, "loss": 1.7137, "step": 1184 }, { "epoch": 1.27, "learning_rate": 1.5936867322335444e-06, "loss": 1.7038, "step": 1186 }, { "epoch": 1.28, "learning_rate": 1.5922844868986743e-06, "loss": 1.7289, "step": 1188 }, { "epoch": 1.28, "learning_rate": 1.5908804453545606e-06, "loss": 1.667, "step": 1190 }, { "epoch": 1.28, "learning_rate": 1.5894746118592121e-06, "loss": 1.7183, "step": 1192 }, { "epoch": 1.28, "learning_rate": 1.5880669906760714e-06, "loss": 1.712, "step": 1194 }, { "epoch": 1.29, "learning_rate": 1.5866575860740034e-06, "loss": 1.7129, "step": 1196 }, { "epoch": 1.29, "learning_rate": 1.5852464023272807e-06, "loss": 1.7167, "step": 1198 }, { "epoch": 1.29, "learning_rate": 1.583833443715572e-06, "loss": 1.7159, "step": 1200 }, { "epoch": 1.29, "learning_rate": 1.5824187145239284e-06, "loss": 1.7041, "step": 1202 }, { "epoch": 1.29, "learning_rate": 1.5810022190427708e-06, "loss": 1.7068, "step": 1204 }, { "epoch": 1.3, "learning_rate": 1.5795839615678763e-06, "loss": 1.7819, "step": 1206 }, { "epoch": 1.3, "learning_rate": 1.578163946400366e-06, "loss": 1.7145, "step": 1208 }, { "epoch": 1.3, "learning_rate": 1.576742177846691e-06, "loss": 1.7034, "step": 1210 }, { "epoch": 1.3, "learning_rate": 1.5753186602186206e-06, "loss": 1.721, "step": 1212 }, { "epoch": 1.31, "learning_rate": 1.5738933978332277e-06, "loss": 1.6848, "step": 1214 }, { "epoch": 1.31, "learning_rate": 1.5724663950128774e-06, "loss": 1.6854, "step": 1216 }, { "epoch": 1.31, "learning_rate": 1.5710376560852116e-06, "loss": 1.7422, "step": 1218 }, { "epoch": 1.31, "learning_rate": 1.5696071853831387e-06, "loss": 1.7145, "step": 1220 }, { "epoch": 1.31, "learning_rate": 1.5681749872448182e-06, "loss": 1.6522, "step": 1222 }, { "epoch": 1.32, "learning_rate": 1.5667410660136487e-06, "loss": 1.708, "step": 1224 }, { "epoch": 1.32, "learning_rate": 1.5653054260382544e-06, "loss": 1.7109, "step": 1226 }, { "epoch": 1.32, "learning_rate": 1.5638680716724712e-06, "loss": 1.722, "step": 1228 }, { "epoch": 1.32, "learning_rate": 1.5624290072753352e-06, "loss": 1.6766, "step": 1230 }, { "epoch": 1.32, "learning_rate": 1.560988237211068e-06, "loss": 1.7269, "step": 1232 }, { "epoch": 1.33, "learning_rate": 1.559545765849064e-06, "loss": 1.7164, "step": 1234 }, { "epoch": 1.33, "learning_rate": 1.5581015975638767e-06, "loss": 1.7223, "step": 1236 }, { "epoch": 1.33, "learning_rate": 1.5566557367352068e-06, "loss": 1.6917, "step": 1238 }, { "epoch": 1.33, "learning_rate": 1.5552081877478868e-06, "loss": 1.733, "step": 1240 }, { "epoch": 1.34, "learning_rate": 1.5537589549918699e-06, "loss": 1.7121, "step": 1242 }, { "epoch": 1.34, "learning_rate": 1.5523080428622146e-06, "loss": 1.748, "step": 1244 }, { "epoch": 1.34, "learning_rate": 1.550855455759073e-06, "loss": 1.7165, "step": 1246 }, { "epoch": 1.34, "learning_rate": 1.5494011980876769e-06, "loss": 1.6621, "step": 1248 }, { "epoch": 1.34, "learning_rate": 1.5479452742583245e-06, "loss": 1.7292, "step": 1250 }, { "epoch": 1.35, "learning_rate": 1.5464876886863664e-06, "loss": 1.7089, "step": 1252 }, { "epoch": 1.35, "learning_rate": 1.545028445792193e-06, "loss": 1.7536, "step": 1254 }, { "epoch": 1.35, "learning_rate": 1.5435675500012212e-06, "loss": 1.7184, "step": 1256 }, { "epoch": 1.35, "learning_rate": 1.5421050057438799e-06, "loss": 1.7835, "step": 1258 }, { "epoch": 1.35, "learning_rate": 1.5406408174555977e-06, "loss": 1.7032, "step": 1260 }, { "epoch": 1.36, "learning_rate": 1.539174989576789e-06, "loss": 1.6932, "step": 1262 }, { "epoch": 1.36, "learning_rate": 1.5377075265528405e-06, "loss": 1.7308, "step": 1264 }, { "epoch": 1.36, "learning_rate": 1.5362384328340978e-06, "loss": 1.7066, "step": 1266 }, { "epoch": 1.36, "learning_rate": 1.5347677128758516e-06, "loss": 1.6998, "step": 1268 }, { "epoch": 1.37, "learning_rate": 1.5332953711383252e-06, "loss": 1.6986, "step": 1270 }, { "epoch": 1.37, "learning_rate": 1.5318214120866598e-06, "loss": 1.7444, "step": 1272 }, { "epoch": 1.37, "learning_rate": 1.530345840190901e-06, "loss": 1.7171, "step": 1274 }, { "epoch": 1.37, "learning_rate": 1.5288686599259855e-06, "loss": 1.6862, "step": 1276 }, { "epoch": 1.37, "learning_rate": 1.5273898757717292e-06, "loss": 1.7048, "step": 1278 }, { "epoch": 1.38, "learning_rate": 1.5259094922128107e-06, "loss": 1.6982, "step": 1280 }, { "epoch": 1.38, "learning_rate": 1.5244275137387592e-06, "loss": 1.6649, "step": 1282 }, { "epoch": 1.38, "learning_rate": 1.5229439448439409e-06, "loss": 1.6761, "step": 1284 }, { "epoch": 1.38, "learning_rate": 1.5214587900275455e-06, "loss": 1.7277, "step": 1286 }, { "epoch": 1.38, "learning_rate": 1.5199720537935725e-06, "loss": 1.754, "step": 1288 }, { "epoch": 1.39, "learning_rate": 1.5184837406508163e-06, "loss": 1.7415, "step": 1290 }, { "epoch": 1.39, "learning_rate": 1.5169938551128545e-06, "loss": 1.7301, "step": 1292 }, { "epoch": 1.39, "learning_rate": 1.5155024016980331e-06, "loss": 1.7108, "step": 1294 }, { "epoch": 1.39, "learning_rate": 1.5140093849294528e-06, "loss": 1.6945, "step": 1296 }, { "epoch": 1.4, "learning_rate": 1.5125148093349553e-06, "loss": 1.6618, "step": 1298 }, { "epoch": 1.4, "learning_rate": 1.5110186794471103e-06, "loss": 1.7243, "step": 1300 }, { "epoch": 1.4, "learning_rate": 1.5095209998032004e-06, "loss": 1.7369, "step": 1302 }, { "epoch": 1.4, "learning_rate": 1.5080217749452092e-06, "loss": 1.7114, "step": 1304 }, { "epoch": 1.4, "learning_rate": 1.5065210094198047e-06, "loss": 1.6971, "step": 1306 }, { "epoch": 1.41, "learning_rate": 1.505018707778329e-06, "loss": 1.7366, "step": 1308 }, { "epoch": 1.41, "learning_rate": 1.503514874576782e-06, "loss": 1.7264, "step": 1310 }, { "epoch": 1.41, "learning_rate": 1.5020095143758082e-06, "loss": 1.784, "step": 1312 }, { "epoch": 1.41, "learning_rate": 1.5005026317406833e-06, "loss": 1.7189, "step": 1314 }, { "epoch": 1.41, "learning_rate": 1.4989942312412999e-06, "loss": 1.6925, "step": 1316 }, { "epoch": 1.42, "learning_rate": 1.497484317452154e-06, "loss": 1.6767, "step": 1318 }, { "epoch": 1.42, "learning_rate": 1.4959728949523305e-06, "loss": 1.7302, "step": 1320 }, { "epoch": 1.42, "learning_rate": 1.49445996832549e-06, "loss": 1.7876, "step": 1322 }, { "epoch": 1.42, "learning_rate": 1.4929455421598552e-06, "loss": 1.735, "step": 1324 }, { "epoch": 1.43, "learning_rate": 1.4914296210481951e-06, "loss": 1.6793, "step": 1326 }, { "epoch": 1.43, "learning_rate": 1.4899122095878136e-06, "loss": 1.7335, "step": 1328 }, { "epoch": 1.43, "learning_rate": 1.4883933123805337e-06, "loss": 1.7311, "step": 1330 }, { "epoch": 1.43, "learning_rate": 1.4868729340326844e-06, "loss": 1.7139, "step": 1332 }, { "epoch": 1.43, "learning_rate": 1.4853510791550865e-06, "loss": 1.7346, "step": 1334 }, { "epoch": 1.44, "learning_rate": 1.4838277523630387e-06, "loss": 1.7138, "step": 1336 }, { "epoch": 1.44, "learning_rate": 1.4823029582763038e-06, "loss": 1.7414, "step": 1338 }, { "epoch": 1.44, "learning_rate": 1.480776701519094e-06, "loss": 1.6534, "step": 1340 }, { "epoch": 1.44, "learning_rate": 1.4792489867200568e-06, "loss": 1.685, "step": 1342 }, { "epoch": 1.44, "learning_rate": 1.4777198185122628e-06, "loss": 1.7148, "step": 1344 }, { "epoch": 1.45, "learning_rate": 1.4761892015331895e-06, "loss": 1.6957, "step": 1346 }, { "epoch": 1.45, "learning_rate": 1.4746571404247082e-06, "loss": 1.704, "step": 1348 }, { "epoch": 1.45, "learning_rate": 1.4731236398330703e-06, "loss": 1.7824, "step": 1350 }, { "epoch": 1.45, "learning_rate": 1.471588704408891e-06, "loss": 1.7159, "step": 1352 }, { "epoch": 1.46, "learning_rate": 1.470052338807139e-06, "loss": 1.7172, "step": 1354 }, { "epoch": 1.46, "learning_rate": 1.4685145476871192e-06, "loss": 1.7338, "step": 1356 }, { "epoch": 1.46, "learning_rate": 1.4669753357124596e-06, "loss": 1.7265, "step": 1358 }, { "epoch": 1.46, "learning_rate": 1.4654347075510974e-06, "loss": 1.7153, "step": 1360 }, { "epoch": 1.46, "learning_rate": 1.4638926678752648e-06, "loss": 1.6877, "step": 1362 }, { "epoch": 1.47, "learning_rate": 1.4623492213614742e-06, "loss": 1.7213, "step": 1364 }, { "epoch": 1.47, "learning_rate": 1.4608043726905049e-06, "loss": 1.7088, "step": 1366 }, { "epoch": 1.47, "learning_rate": 1.4592581265473881e-06, "loss": 1.7151, "step": 1368 }, { "epoch": 1.47, "learning_rate": 1.4577104876213944e-06, "loss": 1.7175, "step": 1370 }, { "epoch": 1.47, "learning_rate": 1.456161460606016e-06, "loss": 1.727, "step": 1372 }, { "epoch": 1.48, "learning_rate": 1.4546110501989569e-06, "loss": 1.7406, "step": 1374 }, { "epoch": 1.48, "learning_rate": 1.4530592611021143e-06, "loss": 1.6604, "step": 1376 }, { "epoch": 1.48, "learning_rate": 1.4515060980215692e-06, "loss": 1.7018, "step": 1378 }, { "epoch": 1.48, "learning_rate": 1.4499515656675675e-06, "loss": 1.6778, "step": 1380 }, { "epoch": 1.49, "learning_rate": 1.4483956687545074e-06, "loss": 1.7269, "step": 1382 }, { "epoch": 1.49, "learning_rate": 1.4468384120009271e-06, "loss": 1.7276, "step": 1384 }, { "epoch": 1.49, "learning_rate": 1.4452798001294878e-06, "loss": 1.7092, "step": 1386 }, { "epoch": 1.49, "learning_rate": 1.4437198378669597e-06, "loss": 1.7161, "step": 1388 }, { "epoch": 1.49, "learning_rate": 1.4421585299442094e-06, "loss": 1.7091, "step": 1390 }, { "epoch": 1.5, "learning_rate": 1.440595881096184e-06, "loss": 1.6838, "step": 1392 }, { "epoch": 1.5, "learning_rate": 1.4390318960618971e-06, "loss": 1.6945, "step": 1394 }, { "epoch": 1.5, "learning_rate": 1.437466579584415e-06, "loss": 1.6958, "step": 1396 }, { "epoch": 1.5, "learning_rate": 1.435899936410841e-06, "loss": 1.6436, "step": 1398 }, { "epoch": 1.5, "learning_rate": 1.4343319712923024e-06, "loss": 1.6958, "step": 1400 }, { "epoch": 1.51, "learning_rate": 1.4327626889839355e-06, "loss": 1.7065, "step": 1402 }, { "epoch": 1.51, "learning_rate": 1.4311920942448716e-06, "loss": 1.6859, "step": 1404 }, { "epoch": 1.51, "learning_rate": 1.429620191838221e-06, "loss": 1.7051, "step": 1406 }, { "epoch": 1.51, "learning_rate": 1.4280469865310612e-06, "loss": 1.7125, "step": 1408 }, { "epoch": 1.52, "learning_rate": 1.4264724830944197e-06, "loss": 1.7075, "step": 1410 }, { "epoch": 1.52, "learning_rate": 1.4248966863032617e-06, "loss": 1.6968, "step": 1412 }, { "epoch": 1.52, "learning_rate": 1.4233196009364745e-06, "loss": 1.7106, "step": 1414 }, { "epoch": 1.52, "learning_rate": 1.421741231776853e-06, "loss": 1.7062, "step": 1416 }, { "epoch": 1.52, "learning_rate": 1.4201615836110854e-06, "loss": 1.7371, "step": 1418 }, { "epoch": 1.53, "learning_rate": 1.4185806612297394e-06, "loss": 1.7413, "step": 1420 }, { "epoch": 1.53, "learning_rate": 1.4169984694272457e-06, "loss": 1.6971, "step": 1422 }, { "epoch": 1.53, "learning_rate": 1.4154150130018865e-06, "loss": 1.6919, "step": 1424 }, { "epoch": 1.53, "learning_rate": 1.4138302967557776e-06, "loss": 1.6432, "step": 1426 }, { "epoch": 1.54, "learning_rate": 1.4122443254948559e-06, "loss": 1.6771, "step": 1428 }, { "epoch": 1.54, "learning_rate": 1.4106571040288653e-06, "loss": 1.7331, "step": 1430 }, { "epoch": 1.54, "learning_rate": 1.40906863717134e-06, "loss": 1.6976, "step": 1432 }, { "epoch": 1.54, "learning_rate": 1.4074789297395912e-06, "loss": 1.756, "step": 1434 }, { "epoch": 1.54, "learning_rate": 1.4058879865546929e-06, "loss": 1.6803, "step": 1436 }, { "epoch": 1.55, "learning_rate": 1.4042958124414663e-06, "loss": 1.7093, "step": 1438 }, { "epoch": 1.55, "learning_rate": 1.4027024122284662e-06, "loss": 1.6884, "step": 1440 }, { "epoch": 1.55, "learning_rate": 1.4011077907479647e-06, "loss": 1.7701, "step": 1442 }, { "epoch": 1.55, "learning_rate": 1.3995119528359388e-06, "loss": 1.7824, "step": 1444 }, { "epoch": 1.55, "learning_rate": 1.3979149033320538e-06, "loss": 1.6869, "step": 1446 }, { "epoch": 1.56, "learning_rate": 1.39631664707965e-06, "loss": 1.6744, "step": 1448 }, { "epoch": 1.56, "learning_rate": 1.3947171889257266e-06, "loss": 1.6944, "step": 1450 }, { "epoch": 1.56, "learning_rate": 1.3931165337209277e-06, "loss": 1.6467, "step": 1452 }, { "epoch": 1.56, "learning_rate": 1.391514686319529e-06, "loss": 1.7144, "step": 1454 }, { "epoch": 1.57, "learning_rate": 1.3899116515794203e-06, "loss": 1.7199, "step": 1456 }, { "epoch": 1.57, "learning_rate": 1.388307434362093e-06, "loss": 1.7072, "step": 1458 }, { "epoch": 1.57, "learning_rate": 1.3867020395326246e-06, "loss": 1.7341, "step": 1460 }, { "epoch": 1.57, "learning_rate": 1.3850954719596632e-06, "loss": 1.6839, "step": 1462 }, { "epoch": 1.57, "learning_rate": 1.3834877365154142e-06, "loss": 1.7171, "step": 1464 }, { "epoch": 1.58, "learning_rate": 1.3818788380756243e-06, "loss": 1.7735, "step": 1466 }, { "epoch": 1.58, "learning_rate": 1.380268781519568e-06, "loss": 1.707, "step": 1468 }, { "epoch": 1.58, "learning_rate": 1.3786575717300308e-06, "loss": 1.7312, "step": 1470 }, { "epoch": 1.58, "learning_rate": 1.3770452135932967e-06, "loss": 1.6706, "step": 1472 }, { "epoch": 1.58, "learning_rate": 1.3754317119991312e-06, "loss": 1.6678, "step": 1474 }, { "epoch": 1.59, "learning_rate": 1.3738170718407686e-06, "loss": 1.766, "step": 1476 }, { "epoch": 1.59, "learning_rate": 1.3722012980148955e-06, "loss": 1.7033, "step": 1478 }, { "epoch": 1.59, "learning_rate": 1.3705843954216366e-06, "loss": 1.741, "step": 1480 }, { "epoch": 1.59, "learning_rate": 1.3689663689645398e-06, "loss": 1.7144, "step": 1482 }, { "epoch": 1.6, "learning_rate": 1.3673472235505616e-06, "loss": 1.7407, "step": 1484 }, { "epoch": 1.6, "learning_rate": 1.3657269640900516e-06, "loss": 1.6924, "step": 1486 }, { "epoch": 1.6, "learning_rate": 1.3641055954967375e-06, "loss": 1.7044, "step": 1488 }, { "epoch": 1.6, "learning_rate": 1.3624831226877118e-06, "loss": 1.7388, "step": 1490 }, { "epoch": 1.6, "learning_rate": 1.3608595505834153e-06, "loss": 1.7409, "step": 1492 }, { "epoch": 1.61, "learning_rate": 1.3592348841076223e-06, "loss": 1.7766, "step": 1494 }, { "epoch": 1.61, "learning_rate": 1.3576091281874255e-06, "loss": 1.695, "step": 1496 }, { "epoch": 1.61, "learning_rate": 1.3559822877532232e-06, "loss": 1.7264, "step": 1498 }, { "epoch": 1.61, "learning_rate": 1.354354367738701e-06, "loss": 1.6905, "step": 1500 }, { "epoch": 1.61, "learning_rate": 1.3527253730808192e-06, "loss": 1.6954, "step": 1502 }, { "epoch": 1.62, "learning_rate": 1.3510953087197972e-06, "loss": 1.7274, "step": 1504 }, { "epoch": 1.62, "learning_rate": 1.3494641795990985e-06, "loss": 1.648, "step": 1506 }, { "epoch": 1.62, "learning_rate": 1.3478319906654151e-06, "loss": 1.6577, "step": 1508 }, { "epoch": 1.62, "learning_rate": 1.346198746868654e-06, "loss": 1.6769, "step": 1510 }, { "epoch": 1.63, "learning_rate": 1.3445644531619209e-06, "loss": 1.6664, "step": 1512 }, { "epoch": 1.63, "learning_rate": 1.3429291145015047e-06, "loss": 1.7119, "step": 1514 }, { "epoch": 1.63, "learning_rate": 1.3412927358468648e-06, "loss": 1.6691, "step": 1516 }, { "epoch": 1.63, "learning_rate": 1.3396553221606137e-06, "loss": 1.7531, "step": 1518 }, { "epoch": 1.63, "learning_rate": 1.3380168784085026e-06, "loss": 1.7171, "step": 1520 }, { "epoch": 1.64, "learning_rate": 1.3363774095594074e-06, "loss": 1.6915, "step": 1522 }, { "epoch": 1.64, "learning_rate": 1.3347369205853116e-06, "loss": 1.7239, "step": 1524 }, { "epoch": 1.64, "learning_rate": 1.3330954164612936e-06, "loss": 1.7342, "step": 1526 }, { "epoch": 1.64, "learning_rate": 1.3314529021655097e-06, "loss": 1.7195, "step": 1528 }, { "epoch": 1.64, "learning_rate": 1.32980938267918e-06, "loss": 1.6779, "step": 1530 }, { "epoch": 1.65, "learning_rate": 1.3281648629865732e-06, "loss": 1.7145, "step": 1532 }, { "epoch": 1.65, "learning_rate": 1.3265193480749904e-06, "loss": 1.6962, "step": 1534 }, { "epoch": 1.65, "learning_rate": 1.3248728429347525e-06, "loss": 1.6629, "step": 1536 }, { "epoch": 1.65, "learning_rate": 1.3232253525591819e-06, "loss": 1.7328, "step": 1538 }, { "epoch": 1.66, "learning_rate": 1.3215768819445894e-06, "loss": 1.7226, "step": 1540 }, { "epoch": 1.66, "learning_rate": 1.3199274360902588e-06, "loss": 1.7535, "step": 1542 }, { "epoch": 1.66, "learning_rate": 1.318277019998432e-06, "loss": 1.7136, "step": 1544 }, { "epoch": 1.66, "learning_rate": 1.3166256386742919e-06, "loss": 1.7045, "step": 1546 }, { "epoch": 1.66, "learning_rate": 1.3149732971259493e-06, "loss": 1.7004, "step": 1548 }, { "epoch": 1.67, "learning_rate": 1.3133200003644276e-06, "loss": 1.7544, "step": 1550 }, { "epoch": 1.67, "learning_rate": 1.3116657534036466e-06, "loss": 1.6561, "step": 1552 }, { "epoch": 1.67, "learning_rate": 1.3100105612604076e-06, "loss": 1.7337, "step": 1554 }, { "epoch": 1.67, "learning_rate": 1.3083544289543784e-06, "loss": 1.6645, "step": 1556 }, { "epoch": 1.67, "learning_rate": 1.3066973615080785e-06, "loss": 1.7252, "step": 1558 }, { "epoch": 1.68, "learning_rate": 1.3050393639468627e-06, "loss": 1.7016, "step": 1560 }, { "epoch": 1.68, "learning_rate": 1.3033804412989069e-06, "loss": 1.6807, "step": 1562 }, { "epoch": 1.68, "learning_rate": 1.3017205985951924e-06, "loss": 1.6845, "step": 1564 }, { "epoch": 1.68, "learning_rate": 1.3000598408694904e-06, "loss": 1.7144, "step": 1566 }, { "epoch": 1.69, "learning_rate": 1.2983981731583483e-06, "loss": 1.717, "step": 1568 }, { "epoch": 1.69, "learning_rate": 1.2967356005010718e-06, "loss": 1.7302, "step": 1570 }, { "epoch": 1.69, "learning_rate": 1.2950721279397114e-06, "loss": 1.6868, "step": 1572 }, { "epoch": 1.69, "learning_rate": 1.2934077605190471e-06, "loss": 1.6902, "step": 1574 }, { "epoch": 1.69, "learning_rate": 1.2917425032865728e-06, "loss": 1.7324, "step": 1576 }, { "epoch": 1.7, "learning_rate": 1.29007636129248e-06, "loss": 1.6848, "step": 1578 }, { "epoch": 1.7, "learning_rate": 1.288409339589644e-06, "loss": 1.714, "step": 1580 }, { "epoch": 1.7, "learning_rate": 1.286741443233608e-06, "loss": 1.6321, "step": 1582 }, { "epoch": 1.7, "learning_rate": 1.2850726772825684e-06, "loss": 1.682, "step": 1584 }, { "epoch": 1.7, "learning_rate": 1.2834030467973571e-06, "loss": 1.7173, "step": 1586 }, { "epoch": 1.71, "learning_rate": 1.2817325568414297e-06, "loss": 1.7706, "step": 1588 }, { "epoch": 1.71, "learning_rate": 1.280061212480847e-06, "loss": 1.7157, "step": 1590 }, { "epoch": 1.71, "learning_rate": 1.2783890187842615e-06, "loss": 1.7145, "step": 1592 }, { "epoch": 1.71, "learning_rate": 1.2767159808229018e-06, "loss": 1.6997, "step": 1594 }, { "epoch": 1.72, "learning_rate": 1.2750421036705556e-06, "loss": 1.7341, "step": 1596 }, { "epoch": 1.72, "learning_rate": 1.2733673924035572e-06, "loss": 1.7162, "step": 1598 }, { "epoch": 1.72, "learning_rate": 1.2716918521007695e-06, "loss": 1.7477, "step": 1600 }, { "epoch": 1.72, "learning_rate": 1.2700154878435697e-06, "loss": 1.7039, "step": 1602 }, { "epoch": 1.72, "learning_rate": 1.2683383047158343e-06, "loss": 1.7734, "step": 1604 }, { "epoch": 1.73, "learning_rate": 1.2666603078039223e-06, "loss": 1.7188, "step": 1606 }, { "epoch": 1.73, "learning_rate": 1.264981502196662e-06, "loss": 1.6747, "step": 1608 }, { "epoch": 1.73, "learning_rate": 1.2633018929853322e-06, "loss": 1.6853, "step": 1610 }, { "epoch": 1.73, "learning_rate": 1.2616214852636507e-06, "loss": 1.696, "step": 1612 }, { "epoch": 1.74, "learning_rate": 1.2599402841277563e-06, "loss": 1.7188, "step": 1614 }, { "epoch": 1.74, "learning_rate": 1.2582582946761938e-06, "loss": 1.7015, "step": 1616 }, { "epoch": 1.74, "learning_rate": 1.2565755220098981e-06, "loss": 1.709, "step": 1618 }, { "epoch": 1.74, "learning_rate": 1.2548919712321807e-06, "loss": 1.7432, "step": 1620 }, { "epoch": 1.74, "learning_rate": 1.2532076474487121e-06, "loss": 1.666, "step": 1622 }, { "epoch": 1.75, "learning_rate": 1.251522555767507e-06, "loss": 1.7293, "step": 1624 }, { "epoch": 1.75, "learning_rate": 1.2498367012989085e-06, "loss": 1.67, "step": 1626 }, { "epoch": 1.75, "learning_rate": 1.2481500891555746e-06, "loss": 1.7527, "step": 1628 }, { "epoch": 1.75, "learning_rate": 1.2464627244524593e-06, "loss": 1.7247, "step": 1630 }, { "epoch": 1.75, "learning_rate": 1.2447746123067995e-06, "loss": 1.7901, "step": 1632 }, { "epoch": 1.76, "learning_rate": 1.2430857578380994e-06, "loss": 1.7128, "step": 1634 }, { "epoch": 1.76, "learning_rate": 1.2413961661681133e-06, "loss": 1.745, "step": 1636 }, { "epoch": 1.76, "learning_rate": 1.2397058424208326e-06, "loss": 1.7129, "step": 1638 }, { "epoch": 1.76, "learning_rate": 1.2380147917224677e-06, "loss": 1.682, "step": 1640 }, { "epoch": 1.77, "learning_rate": 1.2363230192014343e-06, "loss": 1.7325, "step": 1642 }, { "epoch": 1.77, "learning_rate": 1.2346305299883364e-06, "loss": 1.7165, "step": 1644 }, { "epoch": 1.77, "learning_rate": 1.2329373292159524e-06, "loss": 1.7265, "step": 1646 }, { "epoch": 1.77, "learning_rate": 1.2312434220192176e-06, "loss": 1.711, "step": 1648 }, { "epoch": 1.77, "learning_rate": 1.2295488135352113e-06, "loss": 1.6986, "step": 1650 }, { "epoch": 1.78, "learning_rate": 1.2278535089031377e-06, "loss": 1.7186, "step": 1652 }, { "epoch": 1.78, "learning_rate": 1.2261575132643134e-06, "loss": 1.693, "step": 1654 }, { "epoch": 1.78, "learning_rate": 1.2244608317621499e-06, "loss": 1.6866, "step": 1656 }, { "epoch": 1.78, "learning_rate": 1.2227634695421393e-06, "loss": 1.7142, "step": 1658 }, { "epoch": 1.78, "learning_rate": 1.221065431751838e-06, "loss": 1.747, "step": 1660 }, { "epoch": 1.79, "learning_rate": 1.2193667235408507e-06, "loss": 1.6544, "step": 1662 }, { "epoch": 1.79, "learning_rate": 1.2176673500608154e-06, "loss": 1.687, "step": 1664 }, { "epoch": 1.79, "learning_rate": 1.215967316465389e-06, "loss": 1.7248, "step": 1666 }, { "epoch": 1.79, "learning_rate": 1.214266627910228e-06, "loss": 1.7385, "step": 1668 }, { "epoch": 1.8, "learning_rate": 1.2125652895529766e-06, "loss": 1.722, "step": 1670 }, { "epoch": 1.8, "learning_rate": 1.2108633065532497e-06, "loss": 1.7037, "step": 1672 }, { "epoch": 1.8, "learning_rate": 1.2091606840726167e-06, "loss": 1.7116, "step": 1674 }, { "epoch": 1.8, "learning_rate": 1.2074574272745868e-06, "loss": 1.6718, "step": 1676 }, { "epoch": 1.8, "learning_rate": 1.2057535413245918e-06, "loss": 1.6715, "step": 1678 }, { "epoch": 1.81, "learning_rate": 1.2040490313899735e-06, "loss": 1.6836, "step": 1680 }, { "epoch": 1.81, "learning_rate": 1.202343902639964e-06, "loss": 1.6968, "step": 1682 }, { "epoch": 1.81, "learning_rate": 1.2006381602456732e-06, "loss": 1.6733, "step": 1684 }, { "epoch": 1.81, "learning_rate": 1.1989318093800713e-06, "loss": 1.6851, "step": 1686 }, { "epoch": 1.81, "learning_rate": 1.1972248552179753e-06, "loss": 1.7461, "step": 1688 }, { "epoch": 1.82, "learning_rate": 1.19551730293603e-06, "loss": 1.6481, "step": 1690 }, { "epoch": 1.82, "learning_rate": 1.193809157712695e-06, "loss": 1.6965, "step": 1692 }, { "epoch": 1.82, "learning_rate": 1.1921004247282275e-06, "loss": 1.6584, "step": 1694 }, { "epoch": 1.82, "learning_rate": 1.1903911091646684e-06, "loss": 1.7731, "step": 1696 }, { "epoch": 1.83, "learning_rate": 1.1886812162058241e-06, "loss": 1.7779, "step": 1698 }, { "epoch": 1.83, "learning_rate": 1.1869707510372526e-06, "loss": 1.7142, "step": 1700 }, { "epoch": 1.83, "learning_rate": 1.1852597188462474e-06, "loss": 1.6581, "step": 1702 }, { "epoch": 1.83, "learning_rate": 1.1835481248218213e-06, "loss": 1.6806, "step": 1704 }, { "epoch": 1.83, "learning_rate": 1.1818359741546912e-06, "loss": 1.7324, "step": 1706 }, { "epoch": 1.84, "learning_rate": 1.1801232720372617e-06, "loss": 1.7549, "step": 1708 }, { "epoch": 1.84, "learning_rate": 1.1784100236636097e-06, "loss": 1.7423, "step": 1710 }, { "epoch": 1.84, "learning_rate": 1.17669623422947e-06, "loss": 1.7045, "step": 1712 }, { "epoch": 1.84, "learning_rate": 1.1749819089322165e-06, "loss": 1.7012, "step": 1714 }, { "epoch": 1.84, "learning_rate": 1.1732670529708494e-06, "loss": 1.6738, "step": 1716 }, { "epoch": 1.85, "learning_rate": 1.1715516715459784e-06, "loss": 1.7019, "step": 1718 }, { "epoch": 1.85, "learning_rate": 1.1698357698598052e-06, "loss": 1.6911, "step": 1720 }, { "epoch": 1.85, "learning_rate": 1.168119353116111e-06, "loss": 1.7288, "step": 1722 }, { "epoch": 1.85, "learning_rate": 1.1664024265202376e-06, "loss": 1.696, "step": 1724 }, { "epoch": 1.86, "learning_rate": 1.1646849952790744e-06, "loss": 1.676, "step": 1726 }, { "epoch": 1.86, "learning_rate": 1.1629670646010405e-06, "loss": 1.6942, "step": 1728 }, { "epoch": 1.86, "learning_rate": 1.1612486396960694e-06, "loss": 1.6838, "step": 1730 }, { "epoch": 1.86, "learning_rate": 1.159529725775594e-06, "loss": 1.7286, "step": 1732 }, { "epoch": 1.86, "learning_rate": 1.1578103280525295e-06, "loss": 1.7121, "step": 1734 }, { "epoch": 1.87, "learning_rate": 1.1560904517412597e-06, "loss": 1.7135, "step": 1736 }, { "epoch": 1.87, "learning_rate": 1.154370102057618e-06, "loss": 1.69, "step": 1738 }, { "epoch": 1.87, "learning_rate": 1.1526492842188744e-06, "loss": 1.7195, "step": 1740 }, { "epoch": 1.87, "learning_rate": 1.150928003443719e-06, "loss": 1.6468, "step": 1742 }, { "epoch": 1.87, "learning_rate": 1.149206264952245e-06, "loss": 1.7329, "step": 1744 }, { "epoch": 1.88, "learning_rate": 1.1474840739659337e-06, "loss": 1.6914, "step": 1746 }, { "epoch": 1.88, "learning_rate": 1.1457614357076398e-06, "loss": 1.6938, "step": 1748 }, { "epoch": 1.88, "learning_rate": 1.1440383554015733e-06, "loss": 1.6753, "step": 1750 }, { "epoch": 1.88, "learning_rate": 1.1423148382732853e-06, "loss": 1.6927, "step": 1752 }, { "epoch": 1.89, "learning_rate": 1.1405908895496511e-06, "loss": 1.7203, "step": 1754 }, { "epoch": 1.89, "learning_rate": 1.1388665144588558e-06, "loss": 1.7106, "step": 1756 }, { "epoch": 1.89, "learning_rate": 1.1371417182303769e-06, "loss": 1.7114, "step": 1758 }, { "epoch": 1.89, "learning_rate": 1.135416506094969e-06, "loss": 1.6941, "step": 1760 }, { "epoch": 1.89, "learning_rate": 1.1336908832846483e-06, "loss": 1.6957, "step": 1762 }, { "epoch": 1.9, "learning_rate": 1.1319648550326769e-06, "loss": 1.7461, "step": 1764 }, { "epoch": 1.9, "learning_rate": 1.1302384265735451e-06, "loss": 1.7403, "step": 1766 }, { "epoch": 1.9, "learning_rate": 1.1285116031429583e-06, "loss": 1.7075, "step": 1768 }, { "epoch": 1.9, "learning_rate": 1.1267843899778188e-06, "loss": 1.691, "step": 1770 }, { "epoch": 1.9, "learning_rate": 1.1250567923162116e-06, "loss": 1.6923, "step": 1772 }, { "epoch": 1.91, "learning_rate": 1.1233288153973871e-06, "loss": 1.6711, "step": 1774 }, { "epoch": 1.91, "learning_rate": 1.121600464461746e-06, "loss": 1.7347, "step": 1776 }, { "epoch": 1.91, "learning_rate": 1.1198717447508238e-06, "loss": 1.7163, "step": 1778 }, { "epoch": 1.91, "learning_rate": 1.1181426615072738e-06, "loss": 1.7165, "step": 1780 }, { "epoch": 1.92, "learning_rate": 1.1164132199748517e-06, "loss": 1.7139, "step": 1782 }, { "epoch": 1.92, "learning_rate": 1.1146834253984005e-06, "loss": 1.6898, "step": 1784 }, { "epoch": 1.92, "learning_rate": 1.1129532830238328e-06, "loss": 1.6665, "step": 1786 }, { "epoch": 1.92, "learning_rate": 1.1112227980981173e-06, "loss": 1.7527, "step": 1788 }, { "epoch": 1.92, "learning_rate": 1.1094919758692603e-06, "loss": 1.773, "step": 1790 }, { "epoch": 1.93, "learning_rate": 1.1077608215862913e-06, "loss": 1.6996, "step": 1792 }, { "epoch": 1.93, "learning_rate": 1.1060293404992478e-06, "loss": 1.7005, "step": 1794 }, { "epoch": 1.93, "learning_rate": 1.1042975378591572e-06, "loss": 1.6702, "step": 1796 }, { "epoch": 1.93, "learning_rate": 1.1025654189180225e-06, "loss": 1.6519, "step": 1798 }, { "epoch": 1.93, "learning_rate": 1.1008329889288059e-06, "loss": 1.7702, "step": 1800 }, { "epoch": 1.94, "learning_rate": 1.0991002531454133e-06, "loss": 1.6652, "step": 1802 }, { "epoch": 1.94, "learning_rate": 1.0973672168226773e-06, "loss": 1.6596, "step": 1804 }, { "epoch": 1.94, "learning_rate": 1.0956338852163423e-06, "loss": 1.6831, "step": 1806 }, { "epoch": 1.94, "learning_rate": 1.0939002635830484e-06, "loss": 1.6836, "step": 1808 }, { "epoch": 1.95, "learning_rate": 1.0921663571803148e-06, "loss": 1.7197, "step": 1810 }, { "epoch": 1.95, "learning_rate": 1.0904321712665247e-06, "loss": 1.617, "step": 1812 }, { "epoch": 1.95, "learning_rate": 1.0886977111009088e-06, "loss": 1.7049, "step": 1814 }, { "epoch": 1.95, "learning_rate": 1.0869629819435295e-06, "loss": 1.7344, "step": 1816 }, { "epoch": 1.95, "learning_rate": 1.085227989055265e-06, "loss": 1.715, "step": 1818 }, { "epoch": 1.96, "learning_rate": 1.0834927376977937e-06, "loss": 1.7326, "step": 1820 }, { "epoch": 1.96, "learning_rate": 1.0817572331335766e-06, "loss": 1.7108, "step": 1822 }, { "epoch": 1.96, "learning_rate": 1.0800214806258443e-06, "loss": 1.6798, "step": 1824 }, { "epoch": 1.96, "learning_rate": 1.078285485438578e-06, "loss": 1.7543, "step": 1826 }, { "epoch": 1.97, "learning_rate": 1.076549252836496e-06, "loss": 1.7059, "step": 1828 }, { "epoch": 1.97, "learning_rate": 1.0748127880850348e-06, "loss": 1.7489, "step": 1830 }, { "epoch": 1.97, "learning_rate": 1.073076096450337e-06, "loss": 1.7203, "step": 1832 }, { "epoch": 1.97, "learning_rate": 1.0713391831992323e-06, "loss": 1.6616, "step": 1834 }, { "epoch": 1.97, "learning_rate": 1.0696020535992225e-06, "loss": 1.681, "step": 1836 }, { "epoch": 1.98, "learning_rate": 1.0678647129184652e-06, "loss": 1.6962, "step": 1838 }, { "epoch": 1.98, "learning_rate": 1.0661271664257591e-06, "loss": 1.6594, "step": 1840 }, { "epoch": 1.98, "learning_rate": 1.0643894193905264e-06, "loss": 1.7364, "step": 1842 }, { "epoch": 1.98, "learning_rate": 1.0626514770827971e-06, "loss": 1.7061, "step": 1844 }, { "epoch": 1.98, "learning_rate": 1.0609133447731941e-06, "loss": 1.7167, "step": 1846 }, { "epoch": 1.99, "learning_rate": 1.0591750277329165e-06, "loss": 1.6882, "step": 1848 }, { "epoch": 1.99, "learning_rate": 1.0574365312337234e-06, "loss": 1.6871, "step": 1850 }, { "epoch": 1.99, "learning_rate": 1.0556978605479174e-06, "loss": 1.6935, "step": 1852 }, { "epoch": 1.99, "learning_rate": 1.053959020948331e-06, "loss": 1.7205, "step": 1854 }, { "epoch": 2.0, "learning_rate": 1.0522200177083075e-06, "loss": 1.6969, "step": 1856 }, { "epoch": 2.0, "learning_rate": 1.0504808561016875e-06, "loss": 1.7253, "step": 1858 }, { "epoch": 2.0, "learning_rate": 1.048741541402791e-06, "loss": 1.7234, "step": 1860 }, { "epoch": 2.0, "learning_rate": 1.047002078886403e-06, "loss": 1.6991, "step": 1862 }, { "epoch": 2.0, "learning_rate": 1.0452624738277563e-06, "loss": 1.6794, "step": 1864 }, { "epoch": 2.01, "learning_rate": 1.043522731502516e-06, "loss": 1.6694, "step": 1866 }, { "epoch": 2.01, "learning_rate": 1.0417828571867637e-06, "loss": 1.6975, "step": 1868 }, { "epoch": 2.01, "learning_rate": 1.0400428561569817e-06, "loss": 1.7029, "step": 1870 }, { "epoch": 2.01, "learning_rate": 1.0383027336900353e-06, "loss": 1.7138, "step": 1872 }, { "epoch": 2.01, "learning_rate": 1.0365624950631596e-06, "loss": 1.6758, "step": 1874 }, { "epoch": 2.02, "learning_rate": 1.0348221455539407e-06, "loss": 1.6867, "step": 1876 }, { "epoch": 2.02, "learning_rate": 1.0330816904403019e-06, "loss": 1.6861, "step": 1878 }, { "epoch": 2.02, "learning_rate": 1.0313411350004862e-06, "loss": 1.6985, "step": 1880 }, { "epoch": 2.02, "learning_rate": 1.0296004845130412e-06, "loss": 1.7534, "step": 1882 }, { "epoch": 2.03, "learning_rate": 1.0278597442568024e-06, "loss": 1.6866, "step": 1884 }, { "epoch": 2.03, "learning_rate": 1.026118919510878e-06, "loss": 1.6866, "step": 1886 }, { "epoch": 2.03, "learning_rate": 1.0243780155546322e-06, "loss": 1.659, "step": 1888 }, { "epoch": 2.03, "learning_rate": 1.022637037667669e-06, "loss": 1.7039, "step": 1890 }, { "epoch": 2.03, "learning_rate": 1.0208959911298173e-06, "loss": 1.6146, "step": 1892 }, { "epoch": 2.04, "learning_rate": 1.0191548812211142e-06, "loss": 1.7017, "step": 1894 }, { "epoch": 2.04, "learning_rate": 1.0174137132217882e-06, "loss": 1.7409, "step": 1896 }, { "epoch": 2.04, "learning_rate": 1.0156724924122442e-06, "loss": 1.6915, "step": 1898 }, { "epoch": 2.04, "learning_rate": 1.013931224073048e-06, "loss": 1.6647, "step": 1900 }, { "epoch": 2.04, "learning_rate": 1.012189913484909e-06, "loss": 1.6729, "step": 1902 }, { "epoch": 2.05, "learning_rate": 1.0104485659286647e-06, "loss": 1.7372, "step": 1904 }, { "epoch": 2.05, "learning_rate": 1.0087071866852645e-06, "loss": 1.6982, "step": 1906 }, { "epoch": 2.05, "learning_rate": 1.006965781035755e-06, "loss": 1.7022, "step": 1908 }, { "epoch": 2.05, "learning_rate": 1.0052243542612613e-06, "loss": 1.737, "step": 1910 }, { "epoch": 2.06, "learning_rate": 1.0034829116429738e-06, "loss": 1.6449, "step": 1912 }, { "epoch": 2.06, "learning_rate": 1.0017414584621299e-06, "loss": 1.6763, "step": 1914 }, { "epoch": 2.06, "learning_rate": 1e-06, "loss": 1.6913, "step": 1916 }, { "epoch": 2.06, "learning_rate": 9.9825854153787e-07, "loss": 1.7034, "step": 1918 }, { "epoch": 2.06, "learning_rate": 9.965170883570263e-07, "loss": 1.6671, "step": 1920 }, { "epoch": 2.07, "learning_rate": 9.947756457387386e-07, "loss": 1.6979, "step": 1922 }, { "epoch": 2.07, "learning_rate": 9.93034218964245e-07, "loss": 1.6894, "step": 1924 }, { "epoch": 2.07, "learning_rate": 9.912928133147352e-07, "loss": 1.6678, "step": 1926 }, { "epoch": 2.07, "learning_rate": 9.895514340713352e-07, "loss": 1.6519, "step": 1928 }, { "epoch": 2.07, "learning_rate": 9.87810086515091e-07, "loss": 1.6842, "step": 1930 }, { "epoch": 2.08, "learning_rate": 9.860687759269523e-07, "loss": 1.6717, "step": 1932 }, { "epoch": 2.08, "learning_rate": 9.84327507587756e-07, "loss": 1.6812, "step": 1934 }, { "epoch": 2.08, "learning_rate": 9.825862867782123e-07, "loss": 1.694, "step": 1936 }, { "epoch": 2.08, "learning_rate": 9.80845118778886e-07, "loss": 1.6862, "step": 1938 }, { "epoch": 2.09, "learning_rate": 9.791040088701828e-07, "loss": 1.6877, "step": 1940 }, { "epoch": 2.09, "learning_rate": 9.77362962332331e-07, "loss": 1.6804, "step": 1942 }, { "epoch": 2.09, "learning_rate": 9.756219844453675e-07, "loss": 1.6541, "step": 1944 }, { "epoch": 2.09, "learning_rate": 9.738810804891218e-07, "loss": 1.6688, "step": 1946 }, { "epoch": 2.09, "learning_rate": 9.721402557431973e-07, "loss": 1.6979, "step": 1948 }, { "epoch": 2.1, "learning_rate": 9.703995154869587e-07, "loss": 1.69, "step": 1950 }, { "epoch": 2.1, "learning_rate": 9.686588649995137e-07, "loss": 1.7195, "step": 1952 }, { "epoch": 2.1, "learning_rate": 9.669183095596982e-07, "loss": 1.6727, "step": 1954 }, { "epoch": 2.1, "learning_rate": 9.651778544460594e-07, "loss": 1.6705, "step": 1956 }, { "epoch": 2.1, "learning_rate": 9.634375049368405e-07, "loss": 1.6648, "step": 1958 }, { "epoch": 2.11, "learning_rate": 9.616972663099646e-07, "loss": 1.6725, "step": 1960 }, { "epoch": 2.11, "learning_rate": 9.599571438430186e-07, "loss": 1.6684, "step": 1962 }, { "epoch": 2.11, "learning_rate": 9.582171428132362e-07, "loss": 1.7053, "step": 1964 }, { "epoch": 2.11, "learning_rate": 9.564772684974838e-07, "loss": 1.7238, "step": 1966 }, { "epoch": 2.12, "learning_rate": 9.547375261722436e-07, "loss": 1.7292, "step": 1968 }, { "epoch": 2.12, "learning_rate": 9.529979211135968e-07, "loss": 1.6638, "step": 1970 }, { "epoch": 2.12, "learning_rate": 9.512584585972089e-07, "loss": 1.5997, "step": 1972 }, { "epoch": 2.12, "learning_rate": 9.495191438983121e-07, "loss": 1.7083, "step": 1974 }, { "epoch": 2.12, "learning_rate": 9.477799822916923e-07, "loss": 1.7368, "step": 1976 }, { "epoch": 2.13, "learning_rate": 9.460409790516689e-07, "loss": 1.7246, "step": 1978 }, { "epoch": 2.13, "learning_rate": 9.443021394520825e-07, "loss": 1.6992, "step": 1980 }, { "epoch": 2.13, "learning_rate": 9.425634687662766e-07, "loss": 1.6767, "step": 1982 }, { "epoch": 2.13, "learning_rate": 9.408249722670836e-07, "loss": 1.6429, "step": 1984 }, { "epoch": 2.13, "learning_rate": 9.390866552268058e-07, "loss": 1.6825, "step": 1986 }, { "epoch": 2.14, "learning_rate": 9.373485229172029e-07, "loss": 1.6945, "step": 1988 }, { "epoch": 2.14, "learning_rate": 9.356105806094736e-07, "loss": 1.6499, "step": 1990 }, { "epoch": 2.14, "learning_rate": 9.338728335742405e-07, "loss": 1.7397, "step": 1992 }, { "epoch": 2.14, "learning_rate": 9.321352870815347e-07, "loss": 1.6727, "step": 1994 }, { "epoch": 2.15, "learning_rate": 9.303979464007775e-07, "loss": 1.6525, "step": 1996 }, { "epoch": 2.15, "learning_rate": 9.286608168007676e-07, "loss": 1.698, "step": 1998 }, { "epoch": 2.15, "learning_rate": 9.269239035496628e-07, "loss": 1.7011, "step": 2000 }, { "epoch": 2.15, "learning_rate": 9.260555297010704e-07, "loss": 1.6581, "step": 2002 }, { "epoch": 2.15, "learning_rate": 9.251872119149656e-07, "loss": 1.6775, "step": 2004 }, { "epoch": 2.16, "learning_rate": 9.234507471635042e-07, "loss": 1.6328, "step": 2006 }, { "epoch": 2.16, "learning_rate": 9.217145145614221e-07, "loss": 1.6563, "step": 2008 }, { "epoch": 2.16, "learning_rate": 9.199785193741557e-07, "loss": 1.6793, "step": 2010 }, { "epoch": 2.16, "learning_rate": 9.182427668664233e-07, "loss": 1.652, "step": 2012 }, { "epoch": 2.17, "learning_rate": 9.165072623022063e-07, "loss": 1.6535, "step": 2014 }, { "epoch": 2.17, "learning_rate": 9.147720109447345e-07, "loss": 1.6903, "step": 2016 }, { "epoch": 2.17, "learning_rate": 9.130370180564705e-07, "loss": 1.6495, "step": 2018 }, { "epoch": 2.17, "learning_rate": 9.113022888990911e-07, "loss": 1.6845, "step": 2020 }, { "epoch": 2.17, "learning_rate": 9.095678287334753e-07, "loss": 1.7318, "step": 2022 }, { "epoch": 2.18, "learning_rate": 9.078336428196851e-07, "loss": 1.6889, "step": 2024 }, { "epoch": 2.18, "learning_rate": 9.060997364169519e-07, "loss": 1.6974, "step": 2026 }, { "epoch": 2.18, "learning_rate": 9.043661147836578e-07, "loss": 1.6742, "step": 2028 }, { "epoch": 2.18, "learning_rate": 9.026327831773229e-07, "loss": 1.6707, "step": 2030 }, { "epoch": 2.18, "learning_rate": 9.008997468545868e-07, "loss": 1.725, "step": 2032 }, { "epoch": 2.19, "learning_rate": 8.991670110711943e-07, "loss": 1.6996, "step": 2034 }, { "epoch": 2.19, "learning_rate": 8.974345810819775e-07, "loss": 1.6642, "step": 2036 }, { "epoch": 2.19, "learning_rate": 8.957024621408431e-07, "loss": 1.6846, "step": 2038 }, { "epoch": 2.19, "learning_rate": 8.939706595007522e-07, "loss": 1.6958, "step": 2040 }, { "epoch": 2.2, "learning_rate": 8.922391784137084e-07, "loss": 1.7046, "step": 2042 }, { "epoch": 2.2, "learning_rate": 8.905080241307397e-07, "loss": 1.6949, "step": 2044 }, { "epoch": 2.2, "learning_rate": 8.887772019018825e-07, "loss": 1.7253, "step": 2046 }, { "epoch": 2.2, "learning_rate": 8.870467169761671e-07, "loss": 1.7006, "step": 2048 }, { "epoch": 2.2, "learning_rate": 8.853165746015995e-07, "loss": 1.6521, "step": 2050 }, { "epoch": 2.21, "learning_rate": 8.835867800251483e-07, "loss": 1.651, "step": 2052 }, { "epoch": 2.21, "learning_rate": 8.818573384927262e-07, "loss": 1.7189, "step": 2054 }, { "epoch": 2.21, "learning_rate": 8.801282552491763e-07, "loss": 1.6733, "step": 2056 }, { "epoch": 2.21, "learning_rate": 8.78399535538254e-07, "loss": 1.6779, "step": 2058 }, { "epoch": 2.21, "learning_rate": 8.766711846026131e-07, "loss": 1.6881, "step": 2060 }, { "epoch": 2.22, "learning_rate": 8.749432076837884e-07, "loss": 1.6704, "step": 2062 }, { "epoch": 2.22, "learning_rate": 8.732156100221815e-07, "loss": 1.6909, "step": 2064 }, { "epoch": 2.22, "learning_rate": 8.714883968570417e-07, "loss": 1.6817, "step": 2066 }, { "epoch": 2.22, "learning_rate": 8.697615734264547e-07, "loss": 1.6882, "step": 2068 }, { "epoch": 2.23, "learning_rate": 8.680351449673234e-07, "loss": 1.6907, "step": 2070 }, { "epoch": 2.23, "learning_rate": 8.663091167153514e-07, "loss": 1.6665, "step": 2072 }, { "epoch": 2.23, "learning_rate": 8.64583493905031e-07, "loss": 1.6925, "step": 2074 }, { "epoch": 2.23, "learning_rate": 8.62858281769623e-07, "loss": 1.6217, "step": 2076 }, { "epoch": 2.23, "learning_rate": 8.611334855411444e-07, "loss": 1.6762, "step": 2078 }, { "epoch": 2.24, "learning_rate": 8.594091104503489e-07, "loss": 1.6979, "step": 2080 }, { "epoch": 2.24, "learning_rate": 8.576851617267149e-07, "loss": 1.7207, "step": 2082 }, { "epoch": 2.24, "learning_rate": 8.559616445984267e-07, "loss": 1.6718, "step": 2084 }, { "epoch": 2.24, "learning_rate": 8.542385642923604e-07, "loss": 1.66, "step": 2086 }, { "epoch": 2.24, "learning_rate": 8.525159260340665e-07, "loss": 1.6588, "step": 2088 }, { "epoch": 2.25, "learning_rate": 8.507937350477552e-07, "loss": 1.663, "step": 2090 }, { "epoch": 2.25, "learning_rate": 8.490719965562812e-07, "loss": 1.6708, "step": 2092 }, { "epoch": 2.25, "learning_rate": 8.473507157811254e-07, "loss": 1.7245, "step": 2094 }, { "epoch": 2.25, "learning_rate": 8.45629897942382e-07, "loss": 1.6641, "step": 2096 }, { "epoch": 2.26, "learning_rate": 8.439095482587402e-07, "loss": 1.6388, "step": 2098 }, { "epoch": 2.26, "learning_rate": 8.421896719474704e-07, "loss": 1.643, "step": 2100 }, { "epoch": 2.26, "learning_rate": 8.404702742244061e-07, "loss": 1.6584, "step": 2102 }, { "epoch": 2.26, "learning_rate": 8.387513603039306e-07, "loss": 1.7217, "step": 2104 }, { "epoch": 2.26, "learning_rate": 8.370329353989595e-07, "loss": 1.6549, "step": 2106 }, { "epoch": 2.27, "learning_rate": 8.353150047209259e-07, "loss": 1.6791, "step": 2108 }, { "epoch": 2.27, "learning_rate": 8.335975734797626e-07, "loss": 1.664, "step": 2110 }, { "epoch": 2.27, "learning_rate": 8.318806468838895e-07, "loss": 1.7046, "step": 2112 }, { "epoch": 2.27, "learning_rate": 8.301642301401949e-07, "loss": 1.6804, "step": 2114 }, { "epoch": 2.27, "learning_rate": 8.284483284540216e-07, "loss": 1.6934, "step": 2116 }, { "epoch": 2.28, "learning_rate": 8.267329470291505e-07, "loss": 1.6658, "step": 2118 }, { "epoch": 2.28, "learning_rate": 8.250180910677833e-07, "loss": 1.6732, "step": 2120 }, { "epoch": 2.28, "learning_rate": 8.233037657705302e-07, "loss": 1.7052, "step": 2122 }, { "epoch": 2.28, "learning_rate": 8.215899763363902e-07, "loss": 1.6471, "step": 2124 }, { "epoch": 2.29, "learning_rate": 8.198767279627385e-07, "loss": 1.7186, "step": 2126 }, { "epoch": 2.29, "learning_rate": 8.181640258453088e-07, "loss": 1.7106, "step": 2128 }, { "epoch": 2.29, "learning_rate": 8.164518751781788e-07, "loss": 1.6692, "step": 2130 }, { "epoch": 2.29, "learning_rate": 8.147402811537525e-07, "loss": 1.6377, "step": 2132 }, { "epoch": 2.29, "learning_rate": 8.130292489627474e-07, "loss": 1.7158, "step": 2134 }, { "epoch": 2.3, "learning_rate": 8.11318783794176e-07, "loss": 1.6664, "step": 2136 }, { "epoch": 2.3, "learning_rate": 8.096088908353315e-07, "loss": 1.7066, "step": 2138 }, { "epoch": 2.3, "learning_rate": 8.078995752717725e-07, "loss": 1.6674, "step": 2140 }, { "epoch": 2.3, "learning_rate": 8.061908422873051e-07, "loss": 1.674, "step": 2142 }, { "epoch": 2.3, "learning_rate": 8.0448269706397e-07, "loss": 1.6951, "step": 2144 }, { "epoch": 2.31, "learning_rate": 8.027751447820246e-07, "loss": 1.6946, "step": 2146 }, { "epoch": 2.31, "learning_rate": 8.010681906199287e-07, "loss": 1.6304, "step": 2148 }, { "epoch": 2.31, "learning_rate": 7.993618397543268e-07, "loss": 1.709, "step": 2150 }, { "epoch": 2.31, "learning_rate": 7.976560973600363e-07, "loss": 1.7127, "step": 2152 }, { "epoch": 2.32, "learning_rate": 7.959509686100267e-07, "loss": 1.6559, "step": 2154 }, { "epoch": 2.32, "learning_rate": 7.942464586754082e-07, "loss": 1.6825, "step": 2156 }, { "epoch": 2.32, "learning_rate": 7.925425727254134e-07, "loss": 1.6144, "step": 2158 }, { "epoch": 2.32, "learning_rate": 7.908393159273836e-07, "loss": 1.7109, "step": 2160 }, { "epoch": 2.32, "learning_rate": 7.891366934467503e-07, "loss": 1.696, "step": 2162 }, { "epoch": 2.33, "learning_rate": 7.874347104470232e-07, "loss": 1.6669, "step": 2164 }, { "epoch": 2.33, "learning_rate": 7.857333720897721e-07, "loss": 1.7349, "step": 2166 }, { "epoch": 2.33, "learning_rate": 7.84032683534611e-07, "loss": 1.7119, "step": 2168 }, { "epoch": 2.33, "learning_rate": 7.823326499391845e-07, "loss": 1.6871, "step": 2170 }, { "epoch": 2.33, "learning_rate": 7.806332764591495e-07, "loss": 1.7483, "step": 2172 }, { "epoch": 2.34, "learning_rate": 7.789345682481622e-07, "loss": 1.699, "step": 2174 }, { "epoch": 2.34, "learning_rate": 7.772365304578608e-07, "loss": 1.7096, "step": 2176 }, { "epoch": 2.34, "learning_rate": 7.755391682378505e-07, "loss": 1.63, "step": 2178 }, { "epoch": 2.34, "learning_rate": 7.738424867356867e-07, "loss": 1.6633, "step": 2180 }, { "epoch": 2.35, "learning_rate": 7.721464910968626e-07, "loss": 1.7003, "step": 2182 }, { "epoch": 2.35, "learning_rate": 7.704511864647889e-07, "loss": 1.6877, "step": 2184 }, { "epoch": 2.35, "learning_rate": 7.687565779807823e-07, "loss": 1.636, "step": 2186 }, { "epoch": 2.35, "learning_rate": 7.670626707840477e-07, "loss": 1.6685, "step": 2188 }, { "epoch": 2.35, "learning_rate": 7.653694700116636e-07, "loss": 1.6634, "step": 2190 }, { "epoch": 2.36, "learning_rate": 7.63676980798566e-07, "loss": 1.7052, "step": 2192 }, { "epoch": 2.36, "learning_rate": 7.619852082775322e-07, "loss": 1.6881, "step": 2194 }, { "epoch": 2.36, "learning_rate": 7.602941575791674e-07, "loss": 1.6235, "step": 2196 }, { "epoch": 2.36, "learning_rate": 7.586038338318864e-07, "loss": 1.6522, "step": 2198 }, { "epoch": 2.36, "learning_rate": 7.569142421619009e-07, "loss": 1.7054, "step": 2200 }, { "epoch": 2.37, "learning_rate": 7.552253876932005e-07, "loss": 1.6686, "step": 2202 }, { "epoch": 2.37, "learning_rate": 7.53537275547541e-07, "loss": 1.6714, "step": 2204 }, { "epoch": 2.37, "learning_rate": 7.518499108444255e-07, "loss": 1.6668, "step": 2206 }, { "epoch": 2.37, "learning_rate": 7.501632987010916e-07, "loss": 1.6984, "step": 2208 }, { "epoch": 2.38, "learning_rate": 7.484774442324931e-07, "loss": 1.6506, "step": 2210 }, { "epoch": 2.38, "learning_rate": 7.467923525512878e-07, "loss": 1.6957, "step": 2212 }, { "epoch": 2.38, "learning_rate": 7.451080287678194e-07, "loss": 1.6763, "step": 2214 }, { "epoch": 2.38, "learning_rate": 7.434244779901018e-07, "loss": 1.7088, "step": 2216 }, { "epoch": 2.38, "learning_rate": 7.417417053238064e-07, "loss": 1.6602, "step": 2218 }, { "epoch": 2.39, "learning_rate": 7.400597158722435e-07, "loss": 1.6661, "step": 2220 }, { "epoch": 2.39, "learning_rate": 7.383785147363493e-07, "loss": 1.7322, "step": 2222 }, { "epoch": 2.39, "learning_rate": 7.366981070146678e-07, "loss": 1.6725, "step": 2224 }, { "epoch": 2.39, "learning_rate": 7.350184978033385e-07, "loss": 1.6806, "step": 2226 }, { "epoch": 2.4, "learning_rate": 7.333396921960776e-07, "loss": 1.6918, "step": 2228 }, { "epoch": 2.4, "learning_rate": 7.316616952841661e-07, "loss": 1.6206, "step": 2230 }, { "epoch": 2.4, "learning_rate": 7.299845121564303e-07, "loss": 1.6846, "step": 2232 }, { "epoch": 2.4, "learning_rate": 7.283081478992307e-07, "loss": 1.689, "step": 2234 }, { "epoch": 2.4, "learning_rate": 7.266326075964428e-07, "loss": 1.6578, "step": 2236 }, { "epoch": 2.41, "learning_rate": 7.249578963294441e-07, "loss": 1.7333, "step": 2238 }, { "epoch": 2.41, "learning_rate": 7.232840191770983e-07, "loss": 1.675, "step": 2240 }, { "epoch": 2.41, "learning_rate": 7.216109812157382e-07, "loss": 1.6838, "step": 2242 }, { "epoch": 2.41, "learning_rate": 7.19938787519153e-07, "loss": 1.6929, "step": 2244 }, { "epoch": 2.41, "learning_rate": 7.182674431585702e-07, "loss": 1.6498, "step": 2246 }, { "epoch": 2.42, "learning_rate": 7.165969532026429e-07, "loss": 1.6689, "step": 2248 }, { "epoch": 2.42, "learning_rate": 7.149273227174318e-07, "loss": 1.6738, "step": 2250 }, { "epoch": 2.42, "learning_rate": 7.132585567663922e-07, "loss": 1.6882, "step": 2252 }, { "epoch": 2.42, "learning_rate": 7.115906604103563e-07, "loss": 1.7063, "step": 2254 }, { "epoch": 2.43, "learning_rate": 7.099236387075203e-07, "loss": 1.6674, "step": 2256 }, { "epoch": 2.43, "learning_rate": 7.082574967134274e-07, "loss": 1.7112, "step": 2258 }, { "epoch": 2.43, "learning_rate": 7.065922394809525e-07, "loss": 1.6887, "step": 2260 }, { "epoch": 2.43, "learning_rate": 7.049278720602886e-07, "loss": 1.6402, "step": 2262 }, { "epoch": 2.43, "learning_rate": 7.032643994989282e-07, "loss": 1.659, "step": 2264 }, { "epoch": 2.44, "learning_rate": 7.016018268416517e-07, "loss": 1.6611, "step": 2266 }, { "epoch": 2.44, "learning_rate": 6.999401591305092e-07, "loss": 1.7139, "step": 2268 }, { "epoch": 2.44, "learning_rate": 6.982794014048077e-07, "loss": 1.6484, "step": 2270 }, { "epoch": 2.44, "learning_rate": 6.96619558701093e-07, "loss": 1.6803, "step": 2272 }, { "epoch": 2.44, "learning_rate": 6.949606360531375e-07, "loss": 1.6501, "step": 2274 }, { "epoch": 2.45, "learning_rate": 6.933026384919215e-07, "loss": 1.6921, "step": 2276 }, { "epoch": 2.45, "learning_rate": 6.916455710456215e-07, "loss": 1.668, "step": 2278 }, { "epoch": 2.45, "learning_rate": 6.899894387395924e-07, "loss": 1.6566, "step": 2280 }, { "epoch": 2.45, "learning_rate": 6.883342465963536e-07, "loss": 1.6592, "step": 2282 }, { "epoch": 2.46, "learning_rate": 6.866799996355724e-07, "loss": 1.6973, "step": 2284 }, { "epoch": 2.46, "learning_rate": 6.850267028740506e-07, "loss": 1.6754, "step": 2286 }, { "epoch": 2.46, "learning_rate": 6.833743613257084e-07, "loss": 1.7442, "step": 2288 }, { "epoch": 2.46, "learning_rate": 6.817229800015681e-07, "loss": 1.7135, "step": 2290 }, { "epoch": 2.46, "learning_rate": 6.800725639097411e-07, "loss": 1.6156, "step": 2292 }, { "epoch": 2.47, "learning_rate": 6.784231180554106e-07, "loss": 1.6559, "step": 2294 }, { "epoch": 2.47, "learning_rate": 6.767746474408185e-07, "loss": 1.6719, "step": 2296 }, { "epoch": 2.47, "learning_rate": 6.751271570652476e-07, "loss": 1.7022, "step": 2298 }, { "epoch": 2.47, "learning_rate": 6.734806519250095e-07, "loss": 1.6923, "step": 2300 }, { "epoch": 2.47, "learning_rate": 6.71835137013427e-07, "loss": 1.6442, "step": 2302 }, { "epoch": 2.48, "learning_rate": 6.701906173208203e-07, "loss": 1.6474, "step": 2304 }, { "epoch": 2.48, "learning_rate": 6.685470978344905e-07, "loss": 1.6584, "step": 2306 }, { "epoch": 2.48, "learning_rate": 6.669045835387066e-07, "loss": 1.6675, "step": 2308 }, { "epoch": 2.48, "learning_rate": 6.652630794146884e-07, "loss": 1.6566, "step": 2310 }, { "epoch": 2.49, "learning_rate": 6.636225904405925e-07, "loss": 1.7168, "step": 2312 }, { "epoch": 2.49, "learning_rate": 6.619831215914973e-07, "loss": 1.7003, "step": 2314 }, { "epoch": 2.49, "learning_rate": 6.603446778393862e-07, "loss": 1.6721, "step": 2316 }, { "epoch": 2.49, "learning_rate": 6.58707264153135e-07, "loss": 1.6589, "step": 2318 }, { "epoch": 2.49, "learning_rate": 6.57070885498495e-07, "loss": 1.666, "step": 2320 }, { "epoch": 2.5, "learning_rate": 6.554355468380795e-07, "loss": 1.7522, "step": 2322 }, { "epoch": 2.5, "learning_rate": 6.538012531313459e-07, "loss": 1.6406, "step": 2324 }, { "epoch": 2.5, "learning_rate": 6.521680093345851e-07, "loss": 1.6841, "step": 2326 }, { "epoch": 2.5, "learning_rate": 6.505358204009017e-07, "loss": 1.6939, "step": 2328 }, { "epoch": 2.5, "learning_rate": 6.48904691280203e-07, "loss": 1.7099, "step": 2330 }, { "epoch": 2.51, "learning_rate": 6.472746269191808e-07, "loss": 1.6797, "step": 2332 }, { "epoch": 2.51, "learning_rate": 6.456456322612989e-07, "loss": 1.6792, "step": 2334 }, { "epoch": 2.51, "learning_rate": 6.440177122467768e-07, "loss": 1.7058, "step": 2336 }, { "epoch": 2.51, "learning_rate": 6.423908718125742e-07, "loss": 1.7208, "step": 2338 }, { "epoch": 2.52, "learning_rate": 6.407651158923777e-07, "loss": 1.6497, "step": 2340 }, { "epoch": 2.52, "learning_rate": 6.391404494165844e-07, "loss": 1.6367, "step": 2342 }, { "epoch": 2.52, "learning_rate": 6.375168773122881e-07, "loss": 1.6693, "step": 2344 }, { "epoch": 2.52, "learning_rate": 6.358944045032626e-07, "loss": 1.6637, "step": 2346 }, { "epoch": 2.52, "learning_rate": 6.342730359099489e-07, "loss": 1.683, "step": 2348 }, { "epoch": 2.53, "learning_rate": 6.326527764494384e-07, "loss": 1.7098, "step": 2350 }, { "epoch": 2.53, "learning_rate": 6.310336310354604e-07, "loss": 1.6868, "step": 2352 }, { "epoch": 2.53, "learning_rate": 6.294156045783634e-07, "loss": 1.7512, "step": 2354 }, { "epoch": 2.53, "learning_rate": 6.277987019851045e-07, "loss": 1.6898, "step": 2356 }, { "epoch": 2.53, "learning_rate": 6.261829281592312e-07, "loss": 1.6715, "step": 2358 }, { "epoch": 2.54, "learning_rate": 6.245682880008685e-07, "loss": 1.7164, "step": 2360 }, { "epoch": 2.54, "learning_rate": 6.229547864067033e-07, "loss": 1.6406, "step": 2362 }, { "epoch": 2.54, "learning_rate": 6.213424282699688e-07, "loss": 1.686, "step": 2364 }, { "epoch": 2.54, "learning_rate": 6.19731218480432e-07, "loss": 1.6529, "step": 2366 }, { "epoch": 2.55, "learning_rate": 6.181211619243756e-07, "loss": 1.6868, "step": 2368 }, { "epoch": 2.55, "learning_rate": 6.165122634845859e-07, "loss": 1.6871, "step": 2370 }, { "epoch": 2.55, "learning_rate": 6.149045280403369e-07, "loss": 1.6746, "step": 2372 }, { "epoch": 2.55, "learning_rate": 6.132979604673758e-07, "loss": 1.669, "step": 2374 }, { "epoch": 2.55, "learning_rate": 6.11692565637907e-07, "loss": 1.648, "step": 2376 }, { "epoch": 2.56, "learning_rate": 6.100883484205799e-07, "loss": 1.6893, "step": 2378 }, { "epoch": 2.56, "learning_rate": 6.084853136804711e-07, "loss": 1.7075, "step": 2380 }, { "epoch": 2.56, "learning_rate": 6.068834662790722e-07, "loss": 1.6784, "step": 2382 }, { "epoch": 2.56, "learning_rate": 6.052828110742736e-07, "loss": 1.6885, "step": 2384 }, { "epoch": 2.56, "learning_rate": 6.036833529203499e-07, "loss": 1.6594, "step": 2386 }, { "epoch": 2.57, "learning_rate": 6.02085096667946e-07, "loss": 1.6347, "step": 2388 }, { "epoch": 2.57, "learning_rate": 6.004880471640611e-07, "loss": 1.6649, "step": 2390 }, { "epoch": 2.57, "learning_rate": 5.988922092520353e-07, "loss": 1.6519, "step": 2392 }, { "epoch": 2.57, "learning_rate": 5.972975877715338e-07, "loss": 1.6736, "step": 2394 }, { "epoch": 2.58, "learning_rate": 5.957041875585339e-07, "loss": 1.6814, "step": 2396 }, { "epoch": 2.58, "learning_rate": 5.941120134453073e-07, "loss": 1.6911, "step": 2398 }, { "epoch": 2.58, "learning_rate": 5.92521070260409e-07, "loss": 1.6841, "step": 2400 }, { "epoch": 2.58, "learning_rate": 5.9093136282866e-07, "loss": 1.7432, "step": 2402 }, { "epoch": 2.58, "learning_rate": 5.893428959711349e-07, "loss": 1.6946, "step": 2404 }, { "epoch": 2.59, "learning_rate": 5.877556745051439e-07, "loss": 1.6804, "step": 2406 }, { "epoch": 2.59, "learning_rate": 5.861697032442226e-07, "loss": 1.6391, "step": 2408 }, { "epoch": 2.59, "learning_rate": 5.845849869981136e-07, "loss": 1.7019, "step": 2410 }, { "epoch": 2.59, "learning_rate": 5.830015305727542e-07, "loss": 1.6807, "step": 2412 }, { "epoch": 2.6, "learning_rate": 5.814193387702609e-07, "loss": 1.6717, "step": 2414 }, { "epoch": 2.6, "learning_rate": 5.798384163889147e-07, "loss": 1.6516, "step": 2416 }, { "epoch": 2.6, "learning_rate": 5.782587682231472e-07, "loss": 1.704, "step": 2418 }, { "epoch": 2.6, "learning_rate": 5.766803990635254e-07, "loss": 1.6612, "step": 2420 }, { "epoch": 2.6, "learning_rate": 5.751033136967384e-07, "loss": 1.6555, "step": 2422 }, { "epoch": 2.61, "learning_rate": 5.735275169055803e-07, "loss": 1.7314, "step": 2424 }, { "epoch": 2.61, "learning_rate": 5.719530134689389e-07, "loss": 1.696, "step": 2426 }, { "epoch": 2.61, "learning_rate": 5.703798081617789e-07, "loss": 1.6956, "step": 2428 }, { "epoch": 2.61, "learning_rate": 5.688079057551282e-07, "loss": 1.7311, "step": 2430 }, { "epoch": 2.61, "learning_rate": 5.672373110160647e-07, "loss": 1.687, "step": 2432 }, { "epoch": 2.62, "learning_rate": 5.656680287076976e-07, "loss": 1.6902, "step": 2434 }, { "epoch": 2.62, "learning_rate": 5.641000635891591e-07, "loss": 1.6872, "step": 2436 }, { "epoch": 2.62, "learning_rate": 5.625334204155852e-07, "loss": 1.678, "step": 2438 }, { "epoch": 2.62, "learning_rate": 5.609681039381029e-07, "loss": 1.68, "step": 2440 }, { "epoch": 2.63, "learning_rate": 5.594041189038157e-07, "loss": 1.7455, "step": 2442 }, { "epoch": 2.63, "learning_rate": 5.578414700557907e-07, "loss": 1.7074, "step": 2444 }, { "epoch": 2.63, "learning_rate": 5.562801621330402e-07, "loss": 1.6827, "step": 2446 }, { "epoch": 2.63, "learning_rate": 5.547201998705123e-07, "loss": 1.691, "step": 2448 }, { "epoch": 2.63, "learning_rate": 5.531615879990729e-07, "loss": 1.6659, "step": 2450 }, { "epoch": 2.64, "learning_rate": 5.516043312454927e-07, "loss": 1.7509, "step": 2452 }, { "epoch": 2.64, "learning_rate": 5.50048434332433e-07, "loss": 1.7094, "step": 2454 }, { "epoch": 2.64, "learning_rate": 5.484939019784305e-07, "loss": 1.6719, "step": 2456 }, { "epoch": 2.64, "learning_rate": 5.469407388978854e-07, "loss": 1.6651, "step": 2458 }, { "epoch": 2.64, "learning_rate": 5.453889498010433e-07, "loss": 1.7097, "step": 2460 }, { "epoch": 2.65, "learning_rate": 5.43838539393984e-07, "loss": 1.6689, "step": 2462 }, { "epoch": 2.65, "learning_rate": 5.422895123786058e-07, "loss": 1.6613, "step": 2464 }, { "epoch": 2.65, "learning_rate": 5.407418734526118e-07, "loss": 1.6762, "step": 2466 }, { "epoch": 2.65, "learning_rate": 5.391956273094951e-07, "loss": 1.693, "step": 2468 }, { "epoch": 2.66, "learning_rate": 5.376507786385263e-07, "loss": 1.6739, "step": 2470 }, { "epoch": 2.66, "learning_rate": 5.361073321247354e-07, "loss": 1.6348, "step": 2472 }, { "epoch": 2.66, "learning_rate": 5.345652924489027e-07, "loss": 1.6836, "step": 2474 }, { "epoch": 2.66, "learning_rate": 5.330246642875406e-07, "loss": 1.7196, "step": 2476 }, { "epoch": 2.66, "learning_rate": 5.31485452312881e-07, "loss": 1.6465, "step": 2478 }, { "epoch": 2.67, "learning_rate": 5.299476611928607e-07, "loss": 1.715, "step": 2480 }, { "epoch": 2.67, "learning_rate": 5.284112955911088e-07, "loss": 1.6288, "step": 2482 }, { "epoch": 2.67, "learning_rate": 5.268763601669299e-07, "loss": 1.6751, "step": 2484 }, { "epoch": 2.67, "learning_rate": 5.253428595752916e-07, "loss": 1.6313, "step": 2486 }, { "epoch": 2.67, "learning_rate": 5.238107984668105e-07, "loss": 1.7374, "step": 2488 }, { "epoch": 2.68, "learning_rate": 5.222801814877369e-07, "loss": 1.7189, "step": 2490 }, { "epoch": 2.68, "learning_rate": 5.207510132799436e-07, "loss": 1.6231, "step": 2492 }, { "epoch": 2.68, "learning_rate": 5.192232984809062e-07, "loss": 1.7164, "step": 2494 }, { "epoch": 2.68, "learning_rate": 5.17697041723696e-07, "loss": 1.6455, "step": 2496 }, { "epoch": 2.69, "learning_rate": 5.161722476369612e-07, "loss": 1.7, "step": 2498 }, { "epoch": 2.69, "learning_rate": 5.146489208449136e-07, "loss": 1.67, "step": 2500 }, { "epoch": 2.69, "learning_rate": 5.131270659673155e-07, "loss": 1.6286, "step": 2502 }, { "epoch": 2.69, "learning_rate": 5.116066876194662e-07, "loss": 1.6824, "step": 2504 }, { "epoch": 2.69, "learning_rate": 5.100877904121864e-07, "loss": 1.7038, "step": 2506 }, { "epoch": 2.7, "learning_rate": 5.085703789518049e-07, "loss": 1.6302, "step": 2508 }, { "epoch": 2.7, "learning_rate": 5.07054457840145e-07, "loss": 1.6829, "step": 2510 }, { "epoch": 2.7, "learning_rate": 5.055400316745095e-07, "loss": 1.6355, "step": 2512 }, { "epoch": 2.7, "learning_rate": 5.040271050476697e-07, "loss": 1.6683, "step": 2514 }, { "epoch": 2.7, "learning_rate": 5.02515682547846e-07, "loss": 1.6439, "step": 2516 }, { "epoch": 2.71, "learning_rate": 5.010057687587e-07, "loss": 1.6893, "step": 2518 }, { "epoch": 2.71, "learning_rate": 4.994973682593167e-07, "loss": 1.6663, "step": 2520 }, { "epoch": 2.71, "learning_rate": 4.97990485624192e-07, "loss": 1.6837, "step": 2522 }, { "epoch": 2.71, "learning_rate": 4.964851254232183e-07, "loss": 1.6524, "step": 2524 }, { "epoch": 2.72, "learning_rate": 4.949812922216713e-07, "loss": 1.7032, "step": 2526 }, { "epoch": 2.72, "learning_rate": 4.934789905801954e-07, "loss": 1.6978, "step": 2528 }, { "epoch": 2.72, "learning_rate": 4.919782250547911e-07, "loss": 1.6881, "step": 2530 }, { "epoch": 2.72, "learning_rate": 4.904790001967996e-07, "loss": 1.671, "step": 2532 }, { "epoch": 2.72, "learning_rate": 4.889813205528894e-07, "loss": 1.7022, "step": 2534 }, { "epoch": 2.73, "learning_rate": 4.874851906650448e-07, "loss": 1.7007, "step": 2536 }, { "epoch": 2.73, "learning_rate": 4.859906150705471e-07, "loss": 1.6365, "step": 2538 }, { "epoch": 2.73, "learning_rate": 4.844975983019668e-07, "loss": 1.6813, "step": 2540 }, { "epoch": 2.73, "learning_rate": 4.830061448871454e-07, "loss": 1.6612, "step": 2542 }, { "epoch": 2.73, "learning_rate": 4.815162593491838e-07, "loss": 1.6307, "step": 2544 }, { "epoch": 2.74, "learning_rate": 4.800279462064278e-07, "loss": 1.6695, "step": 2546 }, { "epoch": 2.74, "learning_rate": 4.785412099724546e-07, "loss": 1.6348, "step": 2548 }, { "epoch": 2.74, "learning_rate": 4.770560551560589e-07, "loss": 1.6561, "step": 2550 }, { "epoch": 2.74, "learning_rate": 4.7557248626124093e-07, "loss": 1.6805, "step": 2552 }, { "epoch": 2.75, "learning_rate": 4.740905077871894e-07, "loss": 1.6929, "step": 2554 }, { "epoch": 2.75, "learning_rate": 4.7261012422827074e-07, "loss": 1.6704, "step": 2556 }, { "epoch": 2.75, "learning_rate": 4.7113134007401443e-07, "loss": 1.7108, "step": 2558 }, { "epoch": 2.75, "learning_rate": 4.696541598090991e-07, "loss": 1.6612, "step": 2560 }, { "epoch": 2.75, "learning_rate": 4.681785879133402e-07, "loss": 1.6299, "step": 2562 }, { "epoch": 2.76, "learning_rate": 4.667046288616746e-07, "loss": 1.6696, "step": 2564 }, { "epoch": 2.76, "learning_rate": 4.652322871241483e-07, "loss": 1.6444, "step": 2566 }, { "epoch": 2.76, "learning_rate": 4.637615671659024e-07, "loss": 1.6816, "step": 2568 }, { "epoch": 2.76, "learning_rate": 4.6229247344715983e-07, "loss": 1.6689, "step": 2570 }, { "epoch": 2.76, "learning_rate": 4.60825010423211e-07, "loss": 1.6677, "step": 2572 }, { "epoch": 2.77, "learning_rate": 4.5935918254440274e-07, "loss": 1.6505, "step": 2574 }, { "epoch": 2.77, "learning_rate": 4.578949942561202e-07, "loss": 1.6733, "step": 2576 }, { "epoch": 2.77, "learning_rate": 4.5643244999877896e-07, "loss": 1.68, "step": 2578 }, { "epoch": 2.77, "learning_rate": 4.5497155420780696e-07, "loss": 1.6563, "step": 2580 }, { "epoch": 2.78, "learning_rate": 4.5351231131363333e-07, "loss": 1.6426, "step": 2582 }, { "epoch": 2.78, "learning_rate": 4.5205472574167567e-07, "loss": 1.717, "step": 2584 }, { "epoch": 2.78, "learning_rate": 4.505988019123228e-07, "loss": 1.7117, "step": 2586 }, { "epoch": 2.78, "learning_rate": 4.4914454424092696e-07, "loss": 1.7123, "step": 2588 }, { "epoch": 2.78, "learning_rate": 4.4769195713778554e-07, "loss": 1.6705, "step": 2590 }, { "epoch": 2.79, "learning_rate": 4.4624104500813033e-07, "loss": 1.6447, "step": 2592 }, { "epoch": 2.79, "learning_rate": 4.447918122521128e-07, "loss": 1.681, "step": 2594 }, { "epoch": 2.79, "learning_rate": 4.4334426326479336e-07, "loss": 1.6716, "step": 2596 }, { "epoch": 2.79, "learning_rate": 4.418984024361231e-07, "loss": 1.6941, "step": 2598 }, { "epoch": 2.79, "learning_rate": 4.40454234150936e-07, "loss": 1.6666, "step": 2600 }, { "epoch": 2.8, "learning_rate": 4.3901176278893194e-07, "loss": 1.6906, "step": 2602 }, { "epoch": 2.8, "learning_rate": 4.3757099272466445e-07, "loss": 1.6618, "step": 2604 }, { "epoch": 2.8, "learning_rate": 4.361319283275289e-07, "loss": 1.6624, "step": 2606 }, { "epoch": 2.8, "learning_rate": 4.3469457396174556e-07, "loss": 1.6755, "step": 2608 }, { "epoch": 2.81, "learning_rate": 4.332589339863512e-07, "loss": 1.7124, "step": 2610 }, { "epoch": 2.81, "learning_rate": 4.318250127551817e-07, "loss": 1.6608, "step": 2612 }, { "epoch": 2.81, "learning_rate": 4.303928146168614e-07, "loss": 1.7228, "step": 2614 }, { "epoch": 2.81, "learning_rate": 4.2896234391478815e-07, "loss": 1.6907, "step": 2616 }, { "epoch": 2.81, "learning_rate": 4.27533604987123e-07, "loss": 1.6645, "step": 2618 }, { "epoch": 2.82, "learning_rate": 4.2610660216677206e-07, "loss": 1.6969, "step": 2620 }, { "epoch": 2.82, "learning_rate": 4.246813397813794e-07, "loss": 1.6414, "step": 2622 }, { "epoch": 2.82, "learning_rate": 4.2325782215330897e-07, "loss": 1.7107, "step": 2624 }, { "epoch": 2.82, "learning_rate": 4.218360535996338e-07, "loss": 1.7069, "step": 2626 }, { "epoch": 2.83, "learning_rate": 4.2041603843212395e-07, "loss": 1.6569, "step": 2628 }, { "epoch": 2.83, "learning_rate": 4.1899778095722915e-07, "loss": 1.7065, "step": 2630 }, { "epoch": 2.83, "learning_rate": 4.1758128547607155e-07, "loss": 1.6701, "step": 2632 }, { "epoch": 2.83, "learning_rate": 4.16166556284428e-07, "loss": 1.6951, "step": 2634 }, { "epoch": 2.83, "learning_rate": 4.1475359767271934e-07, "loss": 1.7141, "step": 2636 }, { "epoch": 2.84, "learning_rate": 4.133424139259968e-07, "loss": 1.6782, "step": 2638 }, { "epoch": 2.84, "learning_rate": 4.119330093239287e-07, "loss": 1.672, "step": 2640 }, { "epoch": 2.84, "learning_rate": 4.1052538814078784e-07, "loss": 1.6418, "step": 2642 }, { "epoch": 2.84, "learning_rate": 4.0911955464543976e-07, "loss": 1.6769, "step": 2644 }, { "epoch": 2.84, "learning_rate": 4.077155131013258e-07, "loss": 1.7021, "step": 2646 }, { "epoch": 2.85, "learning_rate": 4.063132677664557e-07, "loss": 1.6438, "step": 2648 }, { "epoch": 2.85, "learning_rate": 4.049128228933902e-07, "loss": 1.6945, "step": 2650 }, { "epoch": 2.85, "learning_rate": 4.035141827292301e-07, "loss": 1.6318, "step": 2652 }, { "epoch": 2.85, "learning_rate": 4.0211735151560386e-07, "loss": 1.7213, "step": 2654 }, { "epoch": 2.86, "learning_rate": 4.0072233348865304e-07, "loss": 1.7055, "step": 2656 }, { "epoch": 2.86, "learning_rate": 3.993291328790208e-07, "loss": 1.6711, "step": 2658 }, { "epoch": 2.86, "learning_rate": 3.9793775391183846e-07, "loss": 1.7406, "step": 2660 }, { "epoch": 2.86, "learning_rate": 3.9654820080671314e-07, "loss": 1.7186, "step": 2662 }, { "epoch": 2.86, "learning_rate": 3.951604777777141e-07, "loss": 1.6811, "step": 2664 }, { "epoch": 2.87, "learning_rate": 3.9377458903336223e-07, "loss": 1.679, "step": 2666 }, { "epoch": 2.87, "learning_rate": 3.92390538776613e-07, "loss": 1.6272, "step": 2668 }, { "epoch": 2.87, "learning_rate": 3.9100833120484876e-07, "loss": 1.639, "step": 2670 }, { "epoch": 2.87, "learning_rate": 3.896279705098623e-07, "loss": 1.6719, "step": 2672 }, { "epoch": 2.87, "learning_rate": 3.8824946087784536e-07, "loss": 1.6864, "step": 2674 }, { "epoch": 2.88, "learning_rate": 3.8687280648937703e-07, "loss": 1.6651, "step": 2676 }, { "epoch": 2.88, "learning_rate": 3.8549801151940906e-07, "loss": 1.7015, "step": 2678 }, { "epoch": 2.88, "learning_rate": 3.841250801372544e-07, "loss": 1.6805, "step": 2680 }, { "epoch": 2.88, "learning_rate": 3.827540165065746e-07, "loss": 1.6918, "step": 2682 }, { "epoch": 2.89, "learning_rate": 3.813848247853665e-07, "loss": 1.6806, "step": 2684 }, { "epoch": 2.89, "learning_rate": 3.800175091259501e-07, "loss": 1.6735, "step": 2686 }, { "epoch": 2.89, "learning_rate": 3.786520736749571e-07, "loss": 1.7098, "step": 2688 }, { "epoch": 2.89, "learning_rate": 3.7728852257331467e-07, "loss": 1.6358, "step": 2690 }, { "epoch": 2.89, "learning_rate": 3.75926859956238e-07, "loss": 1.6875, "step": 2692 }, { "epoch": 2.9, "learning_rate": 3.7456708995321327e-07, "loss": 1.6994, "step": 2694 }, { "epoch": 2.9, "learning_rate": 3.7320921668798775e-07, "loss": 1.6525, "step": 2696 }, { "epoch": 2.9, "learning_rate": 3.7185324427855647e-07, "loss": 1.7098, "step": 2698 }, { "epoch": 2.9, "learning_rate": 3.7049917683714915e-07, "loss": 1.6688, "step": 2700 }, { "epoch": 2.9, "learning_rate": 3.691470184702197e-07, "loss": 1.6341, "step": 2702 }, { "epoch": 2.91, "learning_rate": 3.6779677327843105e-07, "loss": 1.6446, "step": 2704 }, { "epoch": 2.91, "learning_rate": 3.664484453566449e-07, "loss": 1.6291, "step": 2706 }, { "epoch": 2.91, "learning_rate": 3.6510203879390756e-07, "loss": 1.6933, "step": 2708 }, { "epoch": 2.91, "learning_rate": 3.6375755767344043e-07, "loss": 1.6932, "step": 2710 }, { "epoch": 2.92, "learning_rate": 3.624150060726227e-07, "loss": 1.6898, "step": 2712 }, { "epoch": 2.92, "learning_rate": 3.6107438806298487e-07, "loss": 1.6837, "step": 2714 }, { "epoch": 2.92, "learning_rate": 3.5973570771019155e-07, "loss": 1.7272, "step": 2716 }, { "epoch": 2.92, "learning_rate": 3.583989690740321e-07, "loss": 1.6672, "step": 2718 }, { "epoch": 2.92, "learning_rate": 3.570641762084066e-07, "loss": 1.6944, "step": 2720 }, { "epoch": 2.93, "learning_rate": 3.5573133316131445e-07, "loss": 1.6733, "step": 2722 }, { "epoch": 2.93, "learning_rate": 3.544004439748418e-07, "loss": 1.7539, "step": 2724 }, { "epoch": 2.93, "learning_rate": 3.5307151268515024e-07, "loss": 1.6343, "step": 2726 }, { "epoch": 2.93, "learning_rate": 3.517445433224623e-07, "loss": 1.6285, "step": 2728 }, { "epoch": 2.93, "learning_rate": 3.5041953991105154e-07, "loss": 1.7435, "step": 2730 }, { "epoch": 2.94, "learning_rate": 3.4909650646922894e-07, "loss": 1.6805, "step": 2732 }, { "epoch": 2.94, "learning_rate": 3.4777544700933114e-07, "loss": 1.6832, "step": 2734 }, { "epoch": 2.94, "learning_rate": 3.464563655377094e-07, "loss": 1.6731, "step": 2736 }, { "epoch": 2.94, "learning_rate": 3.45139266054715e-07, "loss": 1.6178, "step": 2738 }, { "epoch": 2.95, "learning_rate": 3.43824152554689e-07, "loss": 1.6611, "step": 2740 }, { "epoch": 2.95, "learning_rate": 3.4251102902594985e-07, "loss": 1.6671, "step": 2742 }, { "epoch": 2.95, "learning_rate": 3.411998994507808e-07, "loss": 1.6669, "step": 2744 }, { "epoch": 2.95, "learning_rate": 3.398907678054177e-07, "loss": 1.6837, "step": 2746 }, { "epoch": 2.95, "learning_rate": 3.385836380600384e-07, "loss": 1.6484, "step": 2748 }, { "epoch": 2.96, "learning_rate": 3.3727851417874875e-07, "loss": 1.6734, "step": 2750 }, { "epoch": 2.96, "learning_rate": 3.359754001195716e-07, "loss": 1.6938, "step": 2752 }, { "epoch": 2.96, "learning_rate": 3.3467429983443476e-07, "loss": 1.7249, "step": 2754 }, { "epoch": 2.96, "learning_rate": 3.3337521726915853e-07, "loss": 1.6563, "step": 2756 }, { "epoch": 2.96, "learning_rate": 3.320781563634455e-07, "loss": 1.6845, "step": 2758 }, { "epoch": 2.97, "learning_rate": 3.307831210508648e-07, "loss": 1.6449, "step": 2760 }, { "epoch": 2.97, "learning_rate": 3.2949011525884497e-07, "loss": 1.709, "step": 2762 }, { "epoch": 2.97, "learning_rate": 3.2819914290865835e-07, "loss": 1.7084, "step": 2764 }, { "epoch": 2.97, "learning_rate": 3.269102079154107e-07, "loss": 1.6734, "step": 2766 }, { "epoch": 2.98, "learning_rate": 3.25623314188029e-07, "loss": 1.6561, "step": 2768 }, { "epoch": 2.98, "learning_rate": 3.2433846562925103e-07, "loss": 1.7016, "step": 2770 }, { "epoch": 2.98, "learning_rate": 3.2305566613560964e-07, "loss": 1.6527, "step": 2772 }, { "epoch": 2.98, "learning_rate": 3.217749195974262e-07, "loss": 1.7127, "step": 2774 }, { "epoch": 2.98, "learning_rate": 3.204962298987944e-07, "loss": 1.662, "step": 2776 }, { "epoch": 2.99, "learning_rate": 3.1921960091757073e-07, "loss": 1.6959, "step": 2778 }, { "epoch": 2.99, "learning_rate": 3.17945036525363e-07, "loss": 1.68, "step": 2780 }, { "epoch": 2.99, "learning_rate": 3.166725405875157e-07, "loss": 1.6603, "step": 2782 }, { "epoch": 2.99, "learning_rate": 3.154021169631026e-07, "loss": 1.6363, "step": 2784 }, { "epoch": 2.99, "learning_rate": 3.1413376950491166e-07, "loss": 1.6702, "step": 2786 }, { "epoch": 3.0, "learning_rate": 3.128675020594347e-07, "loss": 1.6898, "step": 2788 }, { "epoch": 3.0, "learning_rate": 3.1160331846685526e-07, "loss": 1.688, "step": 2790 }, { "epoch": 3.0, "learning_rate": 3.103412225610378e-07, "loss": 1.6445, "step": 2792 }, { "epoch": 3.0, "learning_rate": 3.090812181695146e-07, "loss": 1.6745, "step": 2794 }, { "epoch": 3.01, "learning_rate": 3.078233091134764e-07, "loss": 1.6506, "step": 2796 }, { "epoch": 3.01, "learning_rate": 3.065674992077584e-07, "loss": 1.6474, "step": 2798 }, { "epoch": 3.01, "learning_rate": 3.053137922608295e-07, "loss": 1.661, "step": 2800 }, { "epoch": 3.01, "learning_rate": 3.040621920747827e-07, "loss": 1.6831, "step": 2802 }, { "epoch": 3.01, "learning_rate": 3.028127024453193e-07, "loss": 1.6901, "step": 2804 }, { "epoch": 3.02, "learning_rate": 3.0156532716174243e-07, "loss": 1.6924, "step": 2806 }, { "epoch": 3.02, "learning_rate": 3.003200700069415e-07, "loss": 1.6815, "step": 2808 }, { "epoch": 3.02, "learning_rate": 2.9907693475738303e-07, "loss": 1.6765, "step": 2810 }, { "epoch": 3.02, "learning_rate": 2.978359251830981e-07, "loss": 1.6304, "step": 2812 }, { "epoch": 3.02, "learning_rate": 2.9659704504767157e-07, "loss": 1.6442, "step": 2814 }, { "epoch": 3.03, "learning_rate": 2.9536029810822994e-07, "loss": 1.6585, "step": 2816 }, { "epoch": 3.03, "learning_rate": 2.941256881154317e-07, "loss": 1.6403, "step": 2818 }, { "epoch": 3.03, "learning_rate": 2.9289321881345254e-07, "loss": 1.6504, "step": 2820 }, { "epoch": 3.03, "learning_rate": 2.916628939399779e-07, "loss": 1.672, "step": 2822 }, { "epoch": 3.04, "learning_rate": 2.904347172261897e-07, "loss": 1.653, "step": 2824 }, { "epoch": 3.04, "learning_rate": 2.8920869239675383e-07, "loss": 1.6278, "step": 2826 }, { "epoch": 3.04, "learning_rate": 2.879848231698119e-07, "loss": 1.6327, "step": 2828 }, { "epoch": 3.04, "learning_rate": 2.867631132569671e-07, "loss": 1.6616, "step": 2830 }, { "epoch": 3.04, "learning_rate": 2.855435663632746e-07, "loss": 1.6865, "step": 2832 }, { "epoch": 3.05, "learning_rate": 2.843261861872296e-07, "loss": 1.6742, "step": 2834 }, { "epoch": 3.05, "learning_rate": 2.8311097642075657e-07, "loss": 1.6369, "step": 2836 }, { "epoch": 3.05, "learning_rate": 2.8189794074919735e-07, "loss": 1.6254, "step": 2838 }, { "epoch": 3.05, "learning_rate": 2.8068708285130184e-07, "loss": 1.6118, "step": 2840 }, { "epoch": 3.06, "learning_rate": 2.7947840639921303e-07, "loss": 1.677, "step": 2842 }, { "epoch": 3.06, "learning_rate": 2.782719150584607e-07, "loss": 1.6502, "step": 2844 }, { "epoch": 3.06, "learning_rate": 2.770676124879464e-07, "loss": 1.6279, "step": 2846 }, { "epoch": 3.06, "learning_rate": 2.758655023399342e-07, "loss": 1.615, "step": 2848 }, { "epoch": 3.06, "learning_rate": 2.7466558826003996e-07, "loss": 1.6452, "step": 2850 }, { "epoch": 3.07, "learning_rate": 2.7346787388721835e-07, "loss": 1.6349, "step": 2852 }, { "epoch": 3.07, "learning_rate": 2.72272362853754e-07, "loss": 1.7027, "step": 2854 }, { "epoch": 3.07, "learning_rate": 2.710790587852491e-07, "loss": 1.7175, "step": 2856 }, { "epoch": 3.07, "learning_rate": 2.6988796530061265e-07, "loss": 1.6837, "step": 2858 }, { "epoch": 3.07, "learning_rate": 2.686990860120497e-07, "loss": 1.678, "step": 2860 }, { "epoch": 3.08, "learning_rate": 2.6751242452505163e-07, "loss": 1.691, "step": 2862 }, { "epoch": 3.08, "learning_rate": 2.6632798443838145e-07, "loss": 1.6405, "step": 2864 }, { "epoch": 3.08, "learning_rate": 2.651457693440677e-07, "loss": 1.6452, "step": 2866 }, { "epoch": 3.08, "learning_rate": 2.6396578282739015e-07, "loss": 1.6385, "step": 2868 }, { "epoch": 3.09, "learning_rate": 2.6278802846686966e-07, "loss": 1.6936, "step": 2870 }, { "epoch": 3.09, "learning_rate": 2.616125098342591e-07, "loss": 1.6382, "step": 2872 }, { "epoch": 3.09, "learning_rate": 2.604392304945291e-07, "loss": 1.6935, "step": 2874 }, { "epoch": 3.09, "learning_rate": 2.592681940058611e-07, "loss": 1.6619, "step": 2876 }, { "epoch": 3.09, "learning_rate": 2.580994039196337e-07, "loss": 1.65, "step": 2878 }, { "epoch": 3.1, "learning_rate": 2.5693286378041293e-07, "loss": 1.7102, "step": 2880 }, { "epoch": 3.1, "learning_rate": 2.5576857712594135e-07, "loss": 1.6367, "step": 2882 }, { "epoch": 3.1, "learning_rate": 2.5460654748712864e-07, "loss": 1.6511, "step": 2884 }, { "epoch": 3.1, "learning_rate": 2.534467783880373e-07, "loss": 1.6729, "step": 2886 }, { "epoch": 3.1, "learning_rate": 2.522892733458769e-07, "loss": 1.7258, "step": 2888 }, { "epoch": 3.11, "learning_rate": 2.5113403587098913e-07, "loss": 1.6821, "step": 2890 }, { "epoch": 3.11, "learning_rate": 2.499810694668396e-07, "loss": 1.6606, "step": 2892 }, { "epoch": 3.11, "learning_rate": 2.4883037763000635e-07, "loss": 1.6669, "step": 2894 }, { "epoch": 3.11, "learning_rate": 2.476819638501689e-07, "loss": 1.6648, "step": 2896 }, { "epoch": 3.12, "learning_rate": 2.465358316100994e-07, "loss": 1.6439, "step": 2898 }, { "epoch": 3.12, "learning_rate": 2.4539198438564944e-07, "loss": 1.6422, "step": 2900 }, { "epoch": 3.12, "learning_rate": 2.4425042564574185e-07, "loss": 1.731, "step": 2902 }, { "epoch": 3.12, "learning_rate": 2.4311115885235843e-07, "loss": 1.6503, "step": 2904 }, { "epoch": 3.12, "learning_rate": 2.41974187460531e-07, "loss": 1.6699, "step": 2906 }, { "epoch": 3.13, "learning_rate": 2.408395149183294e-07, "loss": 1.672, "step": 2908 }, { "epoch": 3.13, "learning_rate": 2.397071446668528e-07, "loss": 1.6862, "step": 2910 }, { "epoch": 3.13, "learning_rate": 2.3857708014021736e-07, "loss": 1.6478, "step": 2912 }, { "epoch": 3.13, "learning_rate": 2.3744932476554714e-07, "loss": 1.6619, "step": 2914 }, { "epoch": 3.13, "learning_rate": 2.3632388196296294e-07, "loss": 1.712, "step": 2916 }, { "epoch": 3.14, "learning_rate": 2.3520075514557235e-07, "loss": 1.6427, "step": 2918 }, { "epoch": 3.14, "learning_rate": 2.3407994771946016e-07, "loss": 1.6813, "step": 2920 }, { "epoch": 3.14, "learning_rate": 2.3296146308367593e-07, "loss": 1.6614, "step": 2922 }, { "epoch": 3.14, "learning_rate": 2.3184530463022577e-07, "loss": 1.664, "step": 2924 }, { "epoch": 3.15, "learning_rate": 2.3073147574406083e-07, "loss": 1.6342, "step": 2926 }, { "epoch": 3.15, "learning_rate": 2.2961997980306745e-07, "loss": 1.6329, "step": 2928 }, { "epoch": 3.15, "learning_rate": 2.28510820178057e-07, "loss": 1.6314, "step": 2930 }, { "epoch": 3.15, "learning_rate": 2.274040002327562e-07, "loss": 1.6135, "step": 2932 }, { "epoch": 3.15, "learning_rate": 2.2629952332379444e-07, "loss": 1.6362, "step": 2934 }, { "epoch": 3.16, "learning_rate": 2.2519739280069762e-07, "loss": 1.633, "step": 2936 }, { "epoch": 3.16, "learning_rate": 2.240976120058745e-07, "loss": 1.6842, "step": 2938 }, { "epoch": 3.16, "learning_rate": 2.2300018427460809e-07, "loss": 1.6551, "step": 2940 }, { "epoch": 3.16, "learning_rate": 2.219051129350451e-07, "loss": 1.645, "step": 2942 }, { "epoch": 3.16, "learning_rate": 2.208124013081869e-07, "loss": 1.6249, "step": 2944 }, { "epoch": 3.17, "learning_rate": 2.197220527078778e-07, "loss": 1.645, "step": 2946 }, { "epoch": 3.17, "learning_rate": 2.1863407044079606e-07, "loss": 1.6616, "step": 2948 }, { "epoch": 3.17, "learning_rate": 2.175484578064436e-07, "loss": 1.638, "step": 2950 }, { "epoch": 3.17, "learning_rate": 2.164652180971358e-07, "loss": 1.6651, "step": 2952 }, { "epoch": 3.18, "learning_rate": 2.1538435459799264e-07, "loss": 1.6273, "step": 2954 }, { "epoch": 3.18, "learning_rate": 2.1430587058692606e-07, "loss": 1.6759, "step": 2956 }, { "epoch": 3.18, "learning_rate": 2.1322976933463354e-07, "loss": 1.6511, "step": 2958 }, { "epoch": 3.18, "learning_rate": 2.121560541045856e-07, "loss": 1.6723, "step": 2960 }, { "epoch": 3.18, "learning_rate": 2.110847281530167e-07, "loss": 1.6751, "step": 2962 }, { "epoch": 3.19, "learning_rate": 2.100157947289155e-07, "loss": 1.6742, "step": 2964 }, { "epoch": 3.19, "learning_rate": 2.0894925707401488e-07, "loss": 1.6711, "step": 2966 }, { "epoch": 3.19, "learning_rate": 2.0788511842278177e-07, "loss": 1.6633, "step": 2968 }, { "epoch": 3.19, "learning_rate": 2.0682338200240878e-07, "loss": 1.6559, "step": 2970 }, { "epoch": 3.19, "learning_rate": 2.0576405103280213e-07, "loss": 1.6424, "step": 2972 }, { "epoch": 3.2, "learning_rate": 2.0470712872657348e-07, "loss": 1.6524, "step": 2974 }, { "epoch": 3.2, "learning_rate": 2.0365261828903035e-07, "loss": 1.68, "step": 2976 }, { "epoch": 3.2, "learning_rate": 2.0260052291816443e-07, "loss": 1.6301, "step": 2978 }, { "epoch": 3.2, "learning_rate": 2.0155084580464498e-07, "loss": 1.6836, "step": 2980 }, { "epoch": 3.21, "learning_rate": 2.005035901318063e-07, "loss": 1.6594, "step": 2982 }, { "epoch": 3.21, "learning_rate": 1.9945875907563968e-07, "loss": 1.672, "step": 2984 }, { "epoch": 3.21, "learning_rate": 1.9841635580478322e-07, "loss": 1.688, "step": 2986 }, { "epoch": 3.21, "learning_rate": 1.9737638348051233e-07, "loss": 1.6405, "step": 2988 }, { "epoch": 3.21, "learning_rate": 1.9633884525672983e-07, "loss": 1.6533, "step": 2990 }, { "epoch": 3.22, "learning_rate": 1.9530374427995766e-07, "loss": 1.637, "step": 2992 }, { "epoch": 3.22, "learning_rate": 1.9427108368932533e-07, "loss": 1.6396, "step": 2994 }, { "epoch": 3.22, "learning_rate": 1.9324086661656168e-07, "loss": 1.6993, "step": 2996 }, { "epoch": 3.22, "learning_rate": 1.9221309618598602e-07, "loss": 1.7117, "step": 2998 }, { "epoch": 3.22, "learning_rate": 1.9118777551449595e-07, "loss": 1.6908, "step": 3000 }, { "epoch": 3.23, "learning_rate": 1.901649077115617e-07, "loss": 1.6728, "step": 3002 }, { "epoch": 3.23, "learning_rate": 1.8914449587921367e-07, "loss": 1.662, "step": 3004 }, { "epoch": 3.23, "learning_rate": 1.8812654311203412e-07, "loss": 1.6658, "step": 3006 }, { "epoch": 3.23, "learning_rate": 1.8711105249714798e-07, "loss": 1.698, "step": 3008 }, { "epoch": 3.24, "learning_rate": 1.866042314595e-07, "loss": 1.65, "step": 3010 }, { "epoch": 3.24, "learning_rate": 1.8559243984507645e-07, "loss": 1.6631, "step": 3012 }, { "epoch": 3.24, "learning_rate": 1.845831180680706e-07, "loss": 1.6182, "step": 3014 }, { "epoch": 3.24, "learning_rate": 1.8357626918943204e-07, "loss": 1.6959, "step": 3016 }, { "epoch": 3.24, "learning_rate": 1.8257189626261105e-07, "loss": 1.6473, "step": 3018 }, { "epoch": 3.25, "learning_rate": 1.8157000233354915e-07, "loss": 1.6782, "step": 3020 }, { "epoch": 3.25, "learning_rate": 1.8106998594297917e-07, "loss": 1.6507, "step": 3022 }, { "epoch": 3.25, "learning_rate": 1.8007181620524804e-07, "loss": 1.6444, "step": 3024 }, { "epoch": 3.25, "learning_rate": 1.7907613304721903e-07, "loss": 1.6327, "step": 3026 }, { "epoch": 3.26, "learning_rate": 1.780829394884794e-07, "loss": 1.6667, "step": 3028 }, { "epoch": 3.26, "learning_rate": 1.7709223854106802e-07, "loss": 1.6786, "step": 3030 }, { "epoch": 3.26, "learning_rate": 1.7610403320946353e-07, "loss": 1.6811, "step": 3032 }, { "epoch": 3.26, "learning_rate": 1.7511832649057624e-07, "loss": 1.6612, "step": 3034 }, { "epoch": 3.26, "learning_rate": 1.7413512137373897e-07, "loss": 1.6821, "step": 3036 }, { "epoch": 3.27, "learning_rate": 1.7315442084069865e-07, "loss": 1.6305, "step": 3038 }, { "epoch": 3.27, "learning_rate": 1.7217622786560525e-07, "loss": 1.6646, "step": 3040 }, { "epoch": 3.27, "learning_rate": 1.712005454150055e-07, "loss": 1.6486, "step": 3042 }, { "epoch": 3.27, "learning_rate": 1.702273764478318e-07, "loss": 1.6482, "step": 3044 }, { "epoch": 3.27, "learning_rate": 1.6925672391539382e-07, "loss": 1.6928, "step": 3046 }, { "epoch": 3.28, "learning_rate": 1.682885907613707e-07, "loss": 1.7189, "step": 3048 }, { "epoch": 3.28, "learning_rate": 1.6732297992179933e-07, "loss": 1.6629, "step": 3050 }, { "epoch": 3.28, "learning_rate": 1.6635989432506904e-07, "loss": 1.6371, "step": 3052 }, { "epoch": 3.28, "learning_rate": 1.6539933689190988e-07, "loss": 1.7218, "step": 3054 }, { "epoch": 3.29, "learning_rate": 1.6444131053538512e-07, "loss": 1.6245, "step": 3056 }, { "epoch": 3.29, "learning_rate": 1.634858181608816e-07, "loss": 1.6936, "step": 3058 }, { "epoch": 3.29, "learning_rate": 1.6253286266610278e-07, "loss": 1.6722, "step": 3060 }, { "epoch": 3.29, "learning_rate": 1.615824469410565e-07, "loss": 1.6761, "step": 3062 }, { "epoch": 3.29, "learning_rate": 1.6063457386805003e-07, "loss": 1.6712, "step": 3064 }, { "epoch": 3.3, "learning_rate": 1.596892463216789e-07, "loss": 1.6428, "step": 3066 }, { "epoch": 3.3, "learning_rate": 1.5874646716881868e-07, "loss": 1.6976, "step": 3068 }, { "epoch": 3.3, "learning_rate": 1.5780623926861736e-07, "loss": 1.6576, "step": 3070 }, { "epoch": 3.3, "learning_rate": 1.5686856547248428e-07, "loss": 1.6432, "step": 3072 }, { "epoch": 3.3, "learning_rate": 1.5593344862408454e-07, "loss": 1.6876, "step": 3074 }, { "epoch": 3.31, "learning_rate": 1.5500089155932804e-07, "loss": 1.6723, "step": 3076 }, { "epoch": 3.31, "learning_rate": 1.540708971063618e-07, "loss": 1.6702, "step": 3078 }, { "epoch": 3.31, "learning_rate": 1.5314346808556111e-07, "loss": 1.7136, "step": 3080 }, { "epoch": 3.31, "learning_rate": 1.522186073095215e-07, "loss": 1.685, "step": 3082 }, { "epoch": 3.32, "learning_rate": 1.512963175830494e-07, "loss": 1.6599, "step": 3084 }, { "epoch": 3.32, "learning_rate": 1.503766017031547e-07, "loss": 1.639, "step": 3086 }, { "epoch": 3.32, "learning_rate": 1.4945946245904095e-07, "loss": 1.6334, "step": 3088 }, { "epoch": 3.32, "learning_rate": 1.4854490263209797e-07, "loss": 1.6169, "step": 3090 }, { "epoch": 3.32, "learning_rate": 1.4763292499589298e-07, "loss": 1.6248, "step": 3092 }, { "epoch": 3.33, "learning_rate": 1.4672353231616186e-07, "loss": 1.6857, "step": 3094 }, { "epoch": 3.33, "learning_rate": 1.4581672735080198e-07, "loss": 1.6417, "step": 3096 }, { "epoch": 3.33, "learning_rate": 1.4491251284986227e-07, "loss": 1.7102, "step": 3098 }, { "epoch": 3.33, "learning_rate": 1.440108915555358e-07, "loss": 1.6613, "step": 3100 }, { "epoch": 3.33, "learning_rate": 1.4311186620215154e-07, "loss": 1.7211, "step": 3102 }, { "epoch": 3.34, "learning_rate": 1.4221543951616532e-07, "loss": 1.6401, "step": 3104 }, { "epoch": 3.34, "learning_rate": 1.413216142161523e-07, "loss": 1.6696, "step": 3106 }, { "epoch": 3.34, "learning_rate": 1.4043039301279903e-07, "loss": 1.7063, "step": 3108 }, { "epoch": 3.34, "learning_rate": 1.3954177860889327e-07, "loss": 1.6578, "step": 3110 }, { "epoch": 3.35, "learning_rate": 1.3865577369931868e-07, "loss": 1.6273, "step": 3112 }, { "epoch": 3.35, "learning_rate": 1.3777238097104426e-07, "loss": 1.6556, "step": 3114 }, { "epoch": 3.35, "learning_rate": 1.368916031031172e-07, "loss": 1.6406, "step": 3116 }, { "epoch": 3.35, "learning_rate": 1.3601344276665527e-07, "loss": 1.6864, "step": 3118 }, { "epoch": 3.35, "learning_rate": 1.3513790262483738e-07, "loss": 1.6016, "step": 3120 }, { "epoch": 3.36, "learning_rate": 1.3426498533289654e-07, "loss": 1.6372, "step": 3122 }, { "epoch": 3.36, "learning_rate": 1.3339469353811138e-07, "loss": 1.6766, "step": 3124 }, { "epoch": 3.36, "learning_rate": 1.3252702987979836e-07, "loss": 1.6493, "step": 3126 }, { "epoch": 3.36, "learning_rate": 1.3166199698930337e-07, "loss": 1.7053, "step": 3128 }, { "epoch": 3.36, "learning_rate": 1.3079959748999493e-07, "loss": 1.6686, "step": 3130 }, { "epoch": 3.37, "learning_rate": 1.2993983399725372e-07, "loss": 1.6379, "step": 3132 }, { "epoch": 3.37, "learning_rate": 1.2908270911846785e-07, "loss": 1.6551, "step": 3134 }, { "epoch": 3.37, "learning_rate": 1.282282254530226e-07, "loss": 1.6568, "step": 3136 }, { "epoch": 3.37, "learning_rate": 1.2737638559229314e-07, "loss": 1.6266, "step": 3138 }, { "epoch": 3.38, "learning_rate": 1.2652719211963725e-07, "loss": 1.6982, "step": 3140 }, { "epoch": 3.38, "learning_rate": 1.2568064761038665e-07, "loss": 1.6939, "step": 3142 }, { "epoch": 3.38, "learning_rate": 1.2483675463184018e-07, "loss": 1.6788, "step": 3144 }, { "epoch": 3.38, "learning_rate": 1.2399551574325496e-07, "loss": 1.6979, "step": 3146 }, { "epoch": 3.38, "learning_rate": 1.2315693349583923e-07, "loss": 1.6756, "step": 3148 }, { "epoch": 3.39, "learning_rate": 1.2232101043274435e-07, "loss": 1.6593, "step": 3150 }, { "epoch": 3.39, "learning_rate": 1.2148774908905778e-07, "loss": 1.6466, "step": 3152 }, { "epoch": 3.39, "learning_rate": 1.2065715199179383e-07, "loss": 1.6645, "step": 3154 }, { "epoch": 3.39, "learning_rate": 1.1982922165988807e-07, "loss": 1.686, "step": 3156 }, { "epoch": 3.39, "learning_rate": 1.1900396060418794e-07, "loss": 1.6871, "step": 3158 }, { "epoch": 3.4, "learning_rate": 1.1818137132744621e-07, "loss": 1.6692, "step": 3160 }, { "epoch": 3.4, "learning_rate": 1.173614563243126e-07, "loss": 1.6918, "step": 3162 }, { "epoch": 3.4, "learning_rate": 1.1654421808132686e-07, "loss": 1.6722, "step": 3164 }, { "epoch": 3.4, "learning_rate": 1.1572965907691124e-07, "loss": 1.6424, "step": 3166 }, { "epoch": 3.41, "learning_rate": 1.1491778178136224e-07, "loss": 1.6632, "step": 3168 }, { "epoch": 3.41, "learning_rate": 1.141085886568437e-07, "loss": 1.6738, "step": 3170 }, { "epoch": 3.41, "learning_rate": 1.1330208215737935e-07, "loss": 1.6415, "step": 3172 }, { "epoch": 3.41, "learning_rate": 1.1249826472884571e-07, "loss": 1.7036, "step": 3174 }, { "epoch": 3.41, "learning_rate": 1.1169713880896281e-07, "loss": 1.6395, "step": 3176 }, { "epoch": 3.42, "learning_rate": 1.1089870682728985e-07, "loss": 1.6563, "step": 3178 }, { "epoch": 3.42, "learning_rate": 1.1010297120521528e-07, "loss": 1.6361, "step": 3180 }, { "epoch": 3.42, "learning_rate": 1.0930993435595026e-07, "loss": 1.6285, "step": 3182 }, { "epoch": 3.42, "learning_rate": 1.0851959868452198e-07, "loss": 1.6754, "step": 3184 }, { "epoch": 3.42, "learning_rate": 1.0773196658776529e-07, "loss": 1.6357, "step": 3186 }, { "epoch": 3.43, "learning_rate": 1.0694704045431602e-07, "loss": 1.6388, "step": 3188 }, { "epoch": 3.43, "learning_rate": 1.0616482266460447e-07, "loss": 1.6697, "step": 3190 }, { "epoch": 3.43, "learning_rate": 1.0538531559084641e-07, "loss": 1.7182, "step": 3192 }, { "epoch": 3.43, "learning_rate": 1.0460852159703715e-07, "loss": 1.6484, "step": 3194 }, { "epoch": 3.44, "learning_rate": 1.038344430389445e-07, "loss": 1.7152, "step": 3196 }, { "epoch": 3.44, "learning_rate": 1.0306308226410054e-07, "loss": 1.7203, "step": 3198 }, { "epoch": 3.44, "learning_rate": 1.0229444161179612e-07, "loss": 1.6617, "step": 3200 }, { "epoch": 3.44, "learning_rate": 1.015285234130716e-07, "loss": 1.6696, "step": 3202 }, { "epoch": 3.44, "learning_rate": 1.0076532999071219e-07, "loss": 1.6612, "step": 3204 }, { "epoch": 3.45, "learning_rate": 1.000048636592391e-07, "loss": 1.7077, "step": 3206 }, { "epoch": 3.45, "learning_rate": 9.924712672490331e-08, "loss": 1.6499, "step": 3208 }, { "epoch": 3.45, "learning_rate": 9.849212148567798e-08, "loss": 1.6717, "step": 3210 }, { "epoch": 3.45, "learning_rate": 9.773985023125308e-08, "loss": 1.6788, "step": 3212 }, { "epoch": 3.45, "learning_rate": 9.69903152430257e-08, "loss": 1.6249, "step": 3214 }, { "epoch": 3.46, "learning_rate": 9.624351879409598e-08, "loss": 1.6898, "step": 3216 }, { "epoch": 3.46, "learning_rate": 9.549946314925839e-08, "loss": 1.641, "step": 3218 }, { "epoch": 3.46, "learning_rate": 9.475815056499526e-08, "loss": 1.635, "step": 3220 }, { "epoch": 3.46, "learning_rate": 9.401958328947102e-08, "loss": 1.6742, "step": 3222 }, { "epoch": 3.47, "learning_rate": 9.328376356252288e-08, "loss": 1.6689, "step": 3224 }, { "epoch": 3.47, "learning_rate": 9.255069361565715e-08, "loss": 1.7212, "step": 3226 }, { "epoch": 3.47, "learning_rate": 9.182037567204016e-08, "loss": 1.6297, "step": 3228 }, { "epoch": 3.47, "learning_rate": 9.109281194649243e-08, "loss": 1.6644, "step": 3230 }, { "epoch": 3.47, "learning_rate": 9.036800464548156e-08, "loss": 1.7154, "step": 3232 }, { "epoch": 3.48, "learning_rate": 8.964595596711667e-08, "loss": 1.7058, "step": 3234 }, { "epoch": 3.48, "learning_rate": 8.892666810113958e-08, "loss": 1.6416, "step": 3236 }, { "epoch": 3.48, "learning_rate": 8.821014322892051e-08, "loss": 1.649, "step": 3238 }, { "epoch": 3.48, "learning_rate": 8.749638352345001e-08, "loss": 1.6482, "step": 3240 }, { "epoch": 3.49, "learning_rate": 8.678539114933259e-08, "loss": 1.6535, "step": 3242 }, { "epoch": 3.49, "learning_rate": 8.607716826278089e-08, "loss": 1.6195, "step": 3244 }, { "epoch": 3.49, "learning_rate": 8.537171701160762e-08, "loss": 1.6657, "step": 3246 }, { "epoch": 3.49, "learning_rate": 8.466903953522109e-08, "loss": 1.6363, "step": 3248 }, { "epoch": 3.49, "learning_rate": 8.396913796461703e-08, "loss": 1.6807, "step": 3250 }, { "epoch": 3.5, "learning_rate": 8.327201442237274e-08, "loss": 1.6893, "step": 3252 }, { "epoch": 3.5, "learning_rate": 8.257767102264079e-08, "loss": 1.6344, "step": 3254 }, { "epoch": 3.5, "learning_rate": 8.188610987114241e-08, "loss": 1.6344, "step": 3256 }, { "epoch": 3.5, "learning_rate": 8.119733306516108e-08, "loss": 1.7071, "step": 3258 }, { "epoch": 3.5, "learning_rate": 8.051134269353687e-08, "loss": 1.6781, "step": 3260 }, { "epoch": 3.51, "learning_rate": 7.982814083665823e-08, "loss": 1.7103, "step": 3262 }, { "epoch": 3.51, "learning_rate": 7.91477295664581e-08, "loss": 1.6994, "step": 3264 }, { "epoch": 3.51, "learning_rate": 7.847011094640633e-08, "loss": 1.6686, "step": 3266 }, { "epoch": 3.51, "learning_rate": 7.779528703150262e-08, "loss": 1.6597, "step": 3268 }, { "epoch": 3.52, "learning_rate": 7.71232598682724e-08, "loss": 1.6923, "step": 3270 }, { "epoch": 3.52, "learning_rate": 7.64540314947586e-08, "loss": 1.7059, "step": 3272 }, { "epoch": 3.52, "learning_rate": 7.578760394051687e-08, "loss": 1.6724, "step": 3274 }, { "epoch": 3.52, "learning_rate": 7.512397922660852e-08, "loss": 1.6546, "step": 3276 }, { "epoch": 3.52, "learning_rate": 7.446315936559488e-08, "loss": 1.6656, "step": 3278 }, { "epoch": 3.53, "learning_rate": 7.380514636153079e-08, "loss": 1.6757, "step": 3280 }, { "epoch": 3.53, "learning_rate": 7.314994220995974e-08, "loss": 1.6955, "step": 3282 }, { "epoch": 3.53, "learning_rate": 7.249754889790538e-08, "loss": 1.7442, "step": 3284 }, { "epoch": 3.53, "learning_rate": 7.184796840386809e-08, "loss": 1.6814, "step": 3286 }, { "epoch": 3.53, "learning_rate": 7.120120269781792e-08, "loss": 1.7133, "step": 3288 }, { "epoch": 3.54, "learning_rate": 7.05572537411876e-08, "loss": 1.6284, "step": 3290 }, { "epoch": 3.54, "learning_rate": 6.99161234868686e-08, "loss": 1.6831, "step": 3292 }, { "epoch": 3.54, "learning_rate": 6.927781387920362e-08, "loss": 1.6694, "step": 3294 }, { "epoch": 3.54, "learning_rate": 6.864232685398141e-08, "loss": 1.7051, "step": 3296 }, { "epoch": 3.55, "learning_rate": 6.800966433843048e-08, "loss": 1.7096, "step": 3298 }, { "epoch": 3.55, "learning_rate": 6.737982825121391e-08, "loss": 1.6188, "step": 3300 }, { "epoch": 3.55, "learning_rate": 6.67528205024227e-08, "loss": 1.6744, "step": 3302 }, { "epoch": 3.55, "learning_rate": 6.612864299357112e-08, "loss": 1.5937, "step": 3304 }, { "epoch": 3.55, "learning_rate": 6.550729761758899e-08, "loss": 1.6218, "step": 3306 }, { "epoch": 3.56, "learning_rate": 6.488878625881866e-08, "loss": 1.6318, "step": 3308 }, { "epoch": 3.56, "learning_rate": 6.427311079300668e-08, "loss": 1.6842, "step": 3310 }, { "epoch": 3.56, "learning_rate": 6.36602730872996e-08, "loss": 1.6758, "step": 3312 }, { "epoch": 3.56, "learning_rate": 6.30502750002384e-08, "loss": 1.6575, "step": 3314 }, { "epoch": 3.56, "learning_rate": 6.244311838175143e-08, "loss": 1.6651, "step": 3316 }, { "epoch": 3.57, "learning_rate": 6.183880507315075e-08, "loss": 1.7021, "step": 3318 }, { "epoch": 3.57, "learning_rate": 6.123733690712518e-08, "loss": 1.6429, "step": 3320 }, { "epoch": 3.57, "learning_rate": 6.063871570773493e-08, "loss": 1.7242, "step": 3322 }, { "epoch": 3.57, "learning_rate": 6.004294329040638e-08, "loss": 1.6261, "step": 3324 }, { "epoch": 3.58, "learning_rate": 5.9450021461927125e-08, "loss": 1.6618, "step": 3326 }, { "epoch": 3.58, "learning_rate": 5.885995202043847e-08, "loss": 1.6459, "step": 3328 }, { "epoch": 3.58, "learning_rate": 5.827273675543265e-08, "loss": 1.6774, "step": 3330 }, { "epoch": 3.58, "learning_rate": 5.7688377447745465e-08, "loss": 1.678, "step": 3332 }, { "epoch": 3.58, "learning_rate": 5.710687586955143e-08, "loss": 1.6966, "step": 3334 }, { "epoch": 3.59, "learning_rate": 5.652823378435911e-08, "loss": 1.6546, "step": 3336 }, { "epoch": 3.59, "learning_rate": 5.595245294700424e-08, "loss": 1.6564, "step": 3338 }, { "epoch": 3.59, "learning_rate": 5.5379535103646125e-08, "loss": 1.6682, "step": 3340 }, { "epoch": 3.59, "learning_rate": 5.4809481991761056e-08, "loss": 1.7469, "step": 3342 }, { "epoch": 3.59, "learning_rate": 5.4242295340137576e-08, "loss": 1.6954, "step": 3344 }, { "epoch": 3.6, "learning_rate": 5.36779768688711e-08, "loss": 1.6665, "step": 3346 }, { "epoch": 3.6, "learning_rate": 5.311652828935942e-08, "loss": 1.6482, "step": 3348 }, { "epoch": 3.6, "learning_rate": 5.2557951304295747e-08, "loss": 1.6741, "step": 3350 }, { "epoch": 3.6, "learning_rate": 5.2002247607665586e-08, "loss": 1.6335, "step": 3352 }, { "epoch": 3.61, "learning_rate": 5.14494188847403e-08, "loss": 1.645, "step": 3354 }, { "epoch": 3.61, "learning_rate": 5.0899466812072464e-08, "loss": 1.6584, "step": 3356 }, { "epoch": 3.61, "learning_rate": 5.035239305749062e-08, "loss": 1.6438, "step": 3358 }, { "epoch": 3.61, "learning_rate": 4.9808199280094055e-08, "loss": 1.6484, "step": 3360 }, { "epoch": 3.61, "learning_rate": 4.9266887130248734e-08, "loss": 1.6708, "step": 3362 }, { "epoch": 3.62, "learning_rate": 4.872845824958105e-08, "loss": 1.6236, "step": 3364 }, { "epoch": 3.62, "learning_rate": 4.819291427097327e-08, "loss": 1.6732, "step": 3366 }, { "epoch": 3.62, "learning_rate": 4.7660256818558783e-08, "loss": 1.7199, "step": 3368 }, { "epoch": 3.62, "learning_rate": 4.713048750771731e-08, "loss": 1.7204, "step": 3370 }, { "epoch": 3.62, "learning_rate": 4.6603607945069456e-08, "loss": 1.6991, "step": 3372 }, { "epoch": 3.63, "learning_rate": 4.6079619728472515e-08, "loss": 1.6393, "step": 3374 }, { "epoch": 3.63, "learning_rate": 4.555852444701447e-08, "loss": 1.6464, "step": 3376 }, { "epoch": 3.63, "learning_rate": 4.5040323681011074e-08, "loss": 1.6666, "step": 3378 }, { "epoch": 3.63, "learning_rate": 4.452501900199901e-08, "loss": 1.6701, "step": 3380 }, { "epoch": 3.64, "learning_rate": 4.401261197273254e-08, "loss": 1.7052, "step": 3382 }, { "epoch": 3.64, "learning_rate": 4.350310414717806e-08, "loss": 1.6852, "step": 3384 }, { "epoch": 3.64, "learning_rate": 4.299649707050979e-08, "loss": 1.6899, "step": 3386 }, { "epoch": 3.64, "learning_rate": 4.249279227910485e-08, "loss": 1.6644, "step": 3388 }, { "epoch": 3.64, "learning_rate": 4.199199130053854e-08, "loss": 1.6361, "step": 3390 }, { "epoch": 3.65, "learning_rate": 4.1494095653579974e-08, "loss": 1.6708, "step": 3392 }, { "epoch": 3.65, "learning_rate": 4.099910684818697e-08, "loss": 1.6374, "step": 3394 }, { "epoch": 3.65, "learning_rate": 4.050702638550274e-08, "loss": 1.6507, "step": 3396 }, { "epoch": 3.65, "learning_rate": 4.0017855757849105e-08, "loss": 1.6768, "step": 3398 }, { "epoch": 3.65, "learning_rate": 3.953159644872439e-08, "loss": 1.6593, "step": 3400 }, { "epoch": 3.66, "learning_rate": 3.9048249932797425e-08, "loss": 1.6431, "step": 3402 }, { "epoch": 3.66, "learning_rate": 3.856781767590334e-08, "loss": 1.7, "step": 3404 }, { "epoch": 3.66, "learning_rate": 3.809030113503919e-08, "loss": 1.6935, "step": 3406 }, { "epoch": 3.66, "learning_rate": 3.761570175836015e-08, "loss": 1.647, "step": 3408 }, { "epoch": 3.67, "learning_rate": 3.7144020985173994e-08, "loss": 1.6749, "step": 3410 }, { "epoch": 3.67, "learning_rate": 3.667526024593759e-08, "loss": 1.6753, "step": 3412 }, { "epoch": 3.67, "learning_rate": 3.6209420962252104e-08, "loss": 1.6501, "step": 3414 }, { "epoch": 3.67, "learning_rate": 3.574650454685901e-08, "loss": 1.6958, "step": 3416 }, { "epoch": 3.67, "learning_rate": 3.528651240363567e-08, "loss": 1.6502, "step": 3418 }, { "epoch": 3.68, "learning_rate": 3.482944592759085e-08, "loss": 1.6681, "step": 3420 }, { "epoch": 3.68, "learning_rate": 3.437530650486098e-08, "loss": 1.6767, "step": 3422 }, { "epoch": 3.68, "learning_rate": 3.3924095512705477e-08, "loss": 1.6495, "step": 3424 }, { "epoch": 3.68, "learning_rate": 3.347581431950286e-08, "loss": 1.6932, "step": 3426 }, { "epoch": 3.69, "learning_rate": 3.303046428474643e-08, "loss": 1.6783, "step": 3428 }, { "epoch": 3.69, "learning_rate": 3.258804675904037e-08, "loss": 1.6615, "step": 3430 }, { "epoch": 3.69, "learning_rate": 3.2148563084095306e-08, "loss": 1.7301, "step": 3432 }, { "epoch": 3.69, "learning_rate": 3.1712014592724656e-08, "loss": 1.7104, "step": 3434 }, { "epoch": 3.69, "learning_rate": 3.127840260884018e-08, "loss": 1.6831, "step": 3436 }, { "epoch": 3.7, "learning_rate": 3.08477284474481e-08, "loss": 1.6193, "step": 3438 }, { "epoch": 3.7, "learning_rate": 3.041999341464563e-08, "loss": 1.645, "step": 3440 }, { "epoch": 3.7, "learning_rate": 2.9995198807615695e-08, "loss": 1.6467, "step": 3442 }, { "epoch": 3.7, "learning_rate": 2.9573345914624794e-08, "loss": 1.6273, "step": 3444 }, { "epoch": 3.7, "learning_rate": 2.9154436015017435e-08, "loss": 1.6477, "step": 3446 }, { "epoch": 3.71, "learning_rate": 2.8738470379213398e-08, "loss": 1.662, "step": 3448 }, { "epoch": 3.71, "learning_rate": 2.8325450268703145e-08, "loss": 1.6946, "step": 3450 }, { "epoch": 3.71, "learning_rate": 2.7915376936044622e-08, "loss": 1.7115, "step": 3452 }, { "epoch": 3.71, "learning_rate": 2.75082516248587e-08, "loss": 1.6926, "step": 3454 }, { "epoch": 3.72, "learning_rate": 2.7104075569826413e-08, "loss": 1.7017, "step": 3456 }, { "epoch": 3.72, "learning_rate": 2.6702849996684263e-08, "loss": 1.6817, "step": 3458 }, { "epoch": 3.72, "learning_rate": 2.6304576122221034e-08, "loss": 1.6707, "step": 3460 }, { "epoch": 3.72, "learning_rate": 2.5909255154273667e-08, "loss": 1.6643, "step": 3462 }, { "epoch": 3.72, "learning_rate": 2.551688829172416e-08, "loss": 1.6317, "step": 3464 }, { "epoch": 3.73, "learning_rate": 2.5127476724495778e-08, "loss": 1.6635, "step": 3466 }, { "epoch": 3.73, "learning_rate": 2.4741021633549076e-08, "loss": 1.6439, "step": 3468 }, { "epoch": 3.73, "learning_rate": 2.4357524190878665e-08, "loss": 1.6555, "step": 3470 }, { "epoch": 3.73, "learning_rate": 2.3976985559509333e-08, "loss": 1.6431, "step": 3472 }, { "epoch": 3.73, "learning_rate": 2.3599406893493157e-08, "loss": 1.654, "step": 3474 }, { "epoch": 3.74, "learning_rate": 2.322478933790506e-08, "loss": 1.6237, "step": 3476 }, { "epoch": 3.74, "learning_rate": 2.2853134028840594e-08, "loss": 1.6238, "step": 3478 }, { "epoch": 3.74, "learning_rate": 2.2484442093410826e-08, "loss": 1.6644, "step": 3480 }, { "epoch": 3.74, "learning_rate": 2.211871464974091e-08, "loss": 1.6945, "step": 3482 }, { "epoch": 3.75, "learning_rate": 2.1755952806964627e-08, "loss": 1.647, "step": 3484 }, { "epoch": 3.75, "learning_rate": 2.1396157665222737e-08, "loss": 1.6786, "step": 3486 }, { "epoch": 3.75, "learning_rate": 2.1039330315658964e-08, "loss": 1.6438, "step": 3488 }, { "epoch": 3.75, "learning_rate": 2.0685471840415913e-08, "loss": 1.6849, "step": 3490 }, { "epoch": 3.75, "learning_rate": 2.0334583312633378e-08, "loss": 1.6909, "step": 3492 }, { "epoch": 3.76, "learning_rate": 1.9986665796443926e-08, "loss": 1.6418, "step": 3494 }, { "epoch": 3.76, "learning_rate": 1.9641720346969982e-08, "loss": 1.6534, "step": 3496 }, { "epoch": 3.76, "learning_rate": 1.9299748010320527e-08, "loss": 1.6602, "step": 3498 }, { "epoch": 3.76, "learning_rate": 1.8960749823588527e-08, "loss": 1.6439, "step": 3500 }, { "epoch": 3.76, "learning_rate": 1.8624726814846504e-08, "loss": 1.7139, "step": 3502 }, { "epoch": 3.77, "learning_rate": 1.8291680003145073e-08, "loss": 1.6126, "step": 3504 }, { "epoch": 3.77, "learning_rate": 1.796161039850841e-08, "loss": 1.6808, "step": 3506 }, { "epoch": 3.77, "learning_rate": 1.7634519001931914e-08, "loss": 1.7076, "step": 3508 }, { "epoch": 3.77, "learning_rate": 1.7310406805379207e-08, "loss": 1.6675, "step": 3510 }, { "epoch": 3.78, "learning_rate": 1.6989274791778697e-08, "loss": 1.6708, "step": 3512 }, { "epoch": 3.78, "learning_rate": 1.6671123935021125e-08, "loss": 1.6062, "step": 3514 }, { "epoch": 3.78, "learning_rate": 1.635595519995614e-08, "loss": 1.6837, "step": 3516 }, { "epoch": 3.78, "learning_rate": 1.6043769542389617e-08, "loss": 1.6469, "step": 3518 }, { "epoch": 3.78, "learning_rate": 1.5734567909080565e-08, "loss": 1.6466, "step": 3520 }, { "epoch": 3.79, "learning_rate": 1.542835123773889e-08, "loss": 1.7045, "step": 3522 }, { "epoch": 3.79, "learning_rate": 1.5125120457021302e-08, "loss": 1.6338, "step": 3524 }, { "epoch": 3.79, "learning_rate": 1.482487648653008e-08, "loss": 1.6408, "step": 3526 }, { "epoch": 3.79, "learning_rate": 1.4527620236808868e-08, "loss": 1.6534, "step": 3528 }, { "epoch": 3.79, "learning_rate": 1.4233352609340665e-08, "loss": 1.6566, "step": 3530 }, { "epoch": 3.8, "learning_rate": 1.3942074496545165e-08, "loss": 1.6992, "step": 3532 }, { "epoch": 3.8, "learning_rate": 1.3653786781775422e-08, "loss": 1.6856, "step": 3534 }, { "epoch": 3.8, "learning_rate": 1.3368490339315974e-08, "loss": 1.6908, "step": 3536 }, { "epoch": 3.8, "learning_rate": 1.308618603437961e-08, "loss": 1.6859, "step": 3538 }, { "epoch": 3.81, "learning_rate": 1.2806874723104822e-08, "loss": 1.7119, "step": 3540 }, { "epoch": 3.81, "learning_rate": 1.2530557252553364e-08, "loss": 1.6799, "step": 3542 }, { "epoch": 3.81, "learning_rate": 1.2257234460707699e-08, "loss": 1.6392, "step": 3544 }, { "epoch": 3.81, "learning_rate": 1.198690717646833e-08, "loss": 1.6594, "step": 3546 }, { "epoch": 3.81, "learning_rate": 1.1719576219651584e-08, "loss": 1.6087, "step": 3548 }, { "epoch": 3.82, "learning_rate": 1.1455242400986276e-08, "loss": 1.7065, "step": 3550 }, { "epoch": 3.82, "learning_rate": 1.1193906522112607e-08, "loss": 1.6663, "step": 3552 }, { "epoch": 3.82, "learning_rate": 1.0935569375578602e-08, "loss": 1.6866, "step": 3554 }, { "epoch": 3.82, "learning_rate": 1.0680231744837897e-08, "loss": 1.6843, "step": 3556 }, { "epoch": 3.82, "learning_rate": 1.0427894404248072e-08, "loss": 1.6836, "step": 3558 }, { "epoch": 3.83, "learning_rate": 1.0178558119067315e-08, "loss": 1.6905, "step": 3560 }, { "epoch": 3.83, "learning_rate": 9.932223645452763e-09, "loss": 1.6477, "step": 3562 }, { "epoch": 3.83, "learning_rate": 9.68889173045806e-09, "loss": 1.6528, "step": 3564 }, { "epoch": 3.83, "learning_rate": 9.448563112031127e-09, "loss": 1.6646, "step": 3566 }, { "epoch": 3.84, "learning_rate": 9.2112385190114e-09, "loss": 1.6845, "step": 3568 }, { "epoch": 3.84, "learning_rate": 8.97691867112882e-09, "loss": 1.7005, "step": 3570 }, { "epoch": 3.84, "learning_rate": 8.745604279000175e-09, "loss": 1.7016, "step": 3572 }, { "epoch": 3.84, "learning_rate": 8.517296044127986e-09, "loss": 1.6726, "step": 3574 }, { "epoch": 3.84, "learning_rate": 8.291994658898182e-09, "loss": 1.6214, "step": 3576 }, { "epoch": 3.85, "learning_rate": 8.06970080657765e-09, "loss": 1.6927, "step": 3578 }, { "epoch": 3.85, "learning_rate": 7.850415161312462e-09, "loss": 1.642, "step": 3580 }, { "epoch": 3.85, "learning_rate": 7.634138388125877e-09, "loss": 1.6652, "step": 3582 }, { "epoch": 3.85, "learning_rate": 7.420871142916119e-09, "loss": 1.6784, "step": 3584 }, { "epoch": 3.85, "learning_rate": 7.210614072454269e-09, "loss": 1.6558, "step": 3586 }, { "epoch": 3.86, "learning_rate": 7.003367814382933e-09, "loss": 1.6801, "step": 3588 }, { "epoch": 3.86, "learning_rate": 6.799132997213464e-09, "loss": 1.7099, "step": 3590 }, { "epoch": 3.86, "learning_rate": 6.5979102403249664e-09, "loss": 1.6444, "step": 3592 }, { "epoch": 3.86, "learning_rate": 6.3997001539614074e-09, "loss": 1.6845, "step": 3594 }, { "epoch": 3.87, "learning_rate": 6.204503339230504e-09, "loss": 1.6861, "step": 3596 }, { "epoch": 3.87, "learning_rate": 6.012320388101955e-09, "loss": 1.652, "step": 3598 }, { "epoch": 3.87, "learning_rate": 5.823151883404876e-09, "loss": 1.7028, "step": 3600 }, { "epoch": 3.87, "learning_rate": 5.6369983988269195e-09, "loss": 1.6658, "step": 3602 }, { "epoch": 3.87, "learning_rate": 5.453860498911944e-09, "loss": 1.6543, "step": 3604 }, { "epoch": 3.88, "learning_rate": 5.273738739058675e-09, "loss": 1.6666, "step": 3606 }, { "epoch": 3.88, "learning_rate": 5.096633665518601e-09, "loss": 1.6808, "step": 3608 }, { "epoch": 3.88, "learning_rate": 4.922545815394863e-09, "loss": 1.6442, "step": 3610 }, { "epoch": 3.88, "learning_rate": 4.75147571664003e-09, "loss": 1.6584, "step": 3612 }, { "epoch": 3.88, "learning_rate": 4.583423888055105e-09, "loss": 1.6805, "step": 3614 }, { "epoch": 3.89, "learning_rate": 4.4183908392873005e-09, "loss": 1.6294, "step": 3616 }, { "epoch": 3.89, "learning_rate": 4.256377070829264e-09, "loss": 1.688, "step": 3618 }, { "epoch": 3.89, "learning_rate": 4.097383074016636e-09, "loss": 1.7435, "step": 3620 }, { "epoch": 3.89, "learning_rate": 3.9414093310274895e-09, "loss": 1.6952, "step": 3622 }, { "epoch": 3.9, "learning_rate": 3.7884563148802286e-09, "loss": 1.6994, "step": 3624 }, { "epoch": 3.9, "learning_rate": 3.6385244894323596e-09, "loss": 1.636, "step": 3626 }, { "epoch": 3.9, "learning_rate": 3.4916143093790538e-09, "loss": 1.6797, "step": 3628 }, { "epoch": 3.9, "learning_rate": 3.347726220251923e-09, "loss": 1.6591, "step": 3630 }, { "epoch": 3.9, "learning_rate": 3.2068606584174652e-09, "loss": 1.6905, "step": 3632 }, { "epoch": 3.91, "learning_rate": 3.0690180510758444e-09, "loss": 1.6511, "step": 3634 }, { "epoch": 3.91, "learning_rate": 2.934198816259559e-09, "loss": 1.6138, "step": 3636 }, { "epoch": 3.91, "learning_rate": 2.8024033628321066e-09, "loss": 1.6458, "step": 3638 }, { "epoch": 3.91, "learning_rate": 2.673632090487099e-09, "loss": 1.6346, "step": 3640 }, { "epoch": 3.92, "learning_rate": 2.5478853897464848e-09, "loss": 1.6801, "step": 3642 }, { "epoch": 3.92, "learning_rate": 2.42516364195966e-09, "loss": 1.6812, "step": 3644 }, { "epoch": 3.92, "learning_rate": 2.3054672193024704e-09, "loss": 1.6319, "step": 3646 }, { "epoch": 3.92, "learning_rate": 2.18879648477599e-09, "loss": 1.6834, "step": 3648 }, { "epoch": 3.92, "learning_rate": 2.0751517922048546e-09, "loss": 1.7166, "step": 3650 }, { "epoch": 3.93, "learning_rate": 1.9645334862373743e-09, "loss": 1.7328, "step": 3652 }, { "epoch": 3.93, "learning_rate": 1.8569419023433119e-09, "loss": 1.6523, "step": 3654 }, { "epoch": 3.93, "learning_rate": 1.7523773668135512e-09, "loss": 1.6912, "step": 3656 }, { "epoch": 3.93, "learning_rate": 1.6508401967588736e-09, "loss": 1.7194, "step": 3658 }, { "epoch": 3.93, "learning_rate": 1.5523307001088503e-09, "loss": 1.6586, "step": 3660 }, { "epoch": 3.94, "learning_rate": 1.4568491756115075e-09, "loss": 1.6649, "step": 3662 }, { "epoch": 3.94, "learning_rate": 1.3643959128314398e-09, "loss": 1.693, "step": 3664 }, { "epoch": 3.94, "learning_rate": 1.2749711921500318e-09, "loss": 1.6872, "step": 3666 }, { "epoch": 3.94, "learning_rate": 1.188575284763793e-09, "loss": 1.68, "step": 3668 }, { "epoch": 3.95, "learning_rate": 1.1052084526838035e-09, "loss": 1.6039, "step": 3670 }, { "epoch": 3.95, "learning_rate": 1.0248709487349349e-09, "loss": 1.6443, "step": 3672 }, { "epoch": 3.95, "learning_rate": 9.475630165552973e-10, "loss": 1.7098, "step": 3674 }, { "epoch": 3.95, "learning_rate": 8.732848905947942e-10, "loss": 1.6615, "step": 3676 }, { "epoch": 3.95, "learning_rate": 8.020367961155683e-10, "loss": 1.6977, "step": 3678 }, { "epoch": 3.96, "learning_rate": 7.338189491900015e-10, "loss": 1.6328, "step": 3680 }, { "epoch": 3.96, "learning_rate": 6.686315567010492e-10, "loss": 1.6473, "step": 3682 }, { "epoch": 3.96, "learning_rate": 6.064748163413513e-10, "loss": 1.6573, "step": 3684 }, { "epoch": 3.96, "learning_rate": 5.473489166122335e-10, "loss": 1.6143, "step": 3686 }, { "epoch": 3.96, "learning_rate": 4.912540368237072e-10, "loss": 1.6866, "step": 3688 }, { "epoch": 3.97, "learning_rate": 4.3819034709358105e-10, "loss": 1.6848, "step": 3690 }, { "epoch": 3.97, "learning_rate": 3.88158008346906e-10, "loss": 1.6417, "step": 3692 }, { "epoch": 3.97, "learning_rate": 3.4115717231597564e-10, "loss": 1.6711, "step": 3694 }, { "epoch": 3.97, "learning_rate": 2.971879815391043e-10, "loss": 1.6467, "step": 3696 }, { "epoch": 3.98, "learning_rate": 2.5625056936084966e-10, "loss": 1.649, "step": 3698 }, { "epoch": 3.98, "learning_rate": 2.1834505993134633e-10, "loss": 1.6476, "step": 3700 }, { "epoch": 3.98, "learning_rate": 1.834715682056398e-10, "loss": 1.637, "step": 3702 }, { "epoch": 3.98, "learning_rate": 1.516301999441305e-10, "loss": 1.7084, "step": 3704 }, { "epoch": 3.98, "learning_rate": 1.228210517113526e-10, "loss": 1.7026, "step": 3706 }, { "epoch": 3.99, "learning_rate": 9.704421087619596e-11, "loss": 1.6253, "step": 3708 }, { "epoch": 3.99, "learning_rate": 7.42997556115732e-11, "loss": 1.6609, "step": 3710 }, { "epoch": 3.99, "learning_rate": 5.458775489430856e-11, "loss": 1.6664, "step": 3712 }, { "epoch": 3.99, "learning_rate": 3.7908268504471816e-11, "loss": 1.6594, "step": 3714 }, { "epoch": 3.99, "learning_rate": 2.426134702548932e-11, "loss": 1.6718, "step": 3716 }, { "epoch": 4.0, "learning_rate": 1.3647031844365997e-11, "loss": 1.6577, "step": 3718 }, { "epoch": 4.0, "learning_rate": 6.06535515068618e-12, "loss": 1.6686, "step": 3720 }, { "epoch": 4.0, "step": 3720, "total_flos": 2.1177725398863053e+17, "train_loss": 1.7057791815650079, "train_runtime": 39113.1965, "train_samples_per_second": 6.088, "train_steps_per_second": 0.095 } ], "logging_steps": 2, "max_steps": 3720, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 4000, "total_flos": 2.1177725398863053e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }