|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9961464354527938, |
|
"eval_steps": 500, |
|
"global_step": 518, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0038535645472061657, |
|
"grad_norm": 1432.749671594044, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 11.5124, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007707129094412331, |
|
"grad_norm": 1357.8414028391733, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 11.4455, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.011560693641618497, |
|
"grad_norm": 1337.1958013405376, |
|
"learning_rate": 2.307692307692308e-06, |
|
"loss": 11.2831, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.015414258188824663, |
|
"grad_norm": 916.8677401776845, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 9.6523, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.019267822736030827, |
|
"grad_norm": 668.9521736359394, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 7.3813, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.023121387283236993, |
|
"grad_norm": 409.96263800148665, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 5.8322, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02697495183044316, |
|
"grad_norm": 226.39376976378787, |
|
"learning_rate": 5.384615384615385e-06, |
|
"loss": 4.2464, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.030828516377649325, |
|
"grad_norm": 165.82496194680013, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 3.7425, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03468208092485549, |
|
"grad_norm": 173.89210974421533, |
|
"learning_rate": 6.923076923076923e-06, |
|
"loss": 4.0184, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.038535645472061654, |
|
"grad_norm": 168.95311819558103, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 3.7993, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04238921001926782, |
|
"grad_norm": 76.30995564433688, |
|
"learning_rate": 8.461538461538462e-06, |
|
"loss": 4.4087, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.046242774566473986, |
|
"grad_norm": 74.48219270936819, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 4.141, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05009633911368015, |
|
"grad_norm": 80.53194275608223, |
|
"learning_rate": 1e-05, |
|
"loss": 4.1239, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.05394990366088632, |
|
"grad_norm": 33.55587129310653, |
|
"learning_rate": 1.076923076923077e-05, |
|
"loss": 3.6328, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.057803468208092484, |
|
"grad_norm": 63.09531505114449, |
|
"learning_rate": 1.1538461538461538e-05, |
|
"loss": 3.8233, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06165703275529865, |
|
"grad_norm": 67.77031346149326, |
|
"learning_rate": 1.230769230769231e-05, |
|
"loss": 3.8786, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06551059730250482, |
|
"grad_norm": 82.90135582492816, |
|
"learning_rate": 1.3076923076923078e-05, |
|
"loss": 4.1657, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06936416184971098, |
|
"grad_norm": 41.832941343610734, |
|
"learning_rate": 1.3846153846153847e-05, |
|
"loss": 3.408, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07321772639691715, |
|
"grad_norm": 48.26728390692674, |
|
"learning_rate": 1.4615384615384615e-05, |
|
"loss": 3.3997, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07707129094412331, |
|
"grad_norm": 68.06129545630621, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 3.7407, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08092485549132948, |
|
"grad_norm": 22.59279364181322, |
|
"learning_rate": 1.6153846153846154e-05, |
|
"loss": 3.2956, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08477842003853564, |
|
"grad_norm": 15.9339021622544, |
|
"learning_rate": 1.6923076923076924e-05, |
|
"loss": 2.8921, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08863198458574181, |
|
"grad_norm": 41.43770891931384, |
|
"learning_rate": 1.7692307692307694e-05, |
|
"loss": 3.2055, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.09248554913294797, |
|
"grad_norm": 42.58954954740387, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 3.2822, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09633911368015415, |
|
"grad_norm": 23.174039496957583, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 2.7902, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1001926782273603, |
|
"grad_norm": 21.687527912513175, |
|
"learning_rate": 2e-05, |
|
"loss": 2.7725, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.10404624277456648, |
|
"grad_norm": 25.192833588413695, |
|
"learning_rate": 2.0769230769230772e-05, |
|
"loss": 2.6381, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.10789980732177264, |
|
"grad_norm": 27.608898717578253, |
|
"learning_rate": 2.153846153846154e-05, |
|
"loss": 2.633, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.11175337186897881, |
|
"grad_norm": 23.800035559481127, |
|
"learning_rate": 2.230769230769231e-05, |
|
"loss": 2.5231, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.11560693641618497, |
|
"grad_norm": 13.963396003674896, |
|
"learning_rate": 2.3076923076923076e-05, |
|
"loss": 2.1474, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11946050096339114, |
|
"grad_norm": 13.74744780299132, |
|
"learning_rate": 2.384615384615385e-05, |
|
"loss": 2.0851, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1233140655105973, |
|
"grad_norm": 14.540640312216865, |
|
"learning_rate": 2.461538461538462e-05, |
|
"loss": 2.0825, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12716763005780346, |
|
"grad_norm": 13.210931532751994, |
|
"learning_rate": 2.5384615384615386e-05, |
|
"loss": 1.8231, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.13102119460500963, |
|
"grad_norm": 14.18049338122164, |
|
"learning_rate": 2.6153846153846157e-05, |
|
"loss": 2.0161, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1348747591522158, |
|
"grad_norm": 7.365239232178003, |
|
"learning_rate": 2.6923076923076927e-05, |
|
"loss": 1.636, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13872832369942195, |
|
"grad_norm": 11.42487604515666, |
|
"learning_rate": 2.7692307692307694e-05, |
|
"loss": 1.7636, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.14258188824662812, |
|
"grad_norm": 10.521730704250018, |
|
"learning_rate": 2.8461538461538464e-05, |
|
"loss": 1.6425, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1464354527938343, |
|
"grad_norm": 15.837868821185545, |
|
"learning_rate": 2.923076923076923e-05, |
|
"loss": 1.7335, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.15028901734104047, |
|
"grad_norm": 10.63875357372187, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 1.4748, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.15414258188824662, |
|
"grad_norm": 15.842407967828168, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 1.4917, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1579961464354528, |
|
"grad_norm": 8.822203607549136, |
|
"learning_rate": 3.153846153846154e-05, |
|
"loss": 1.4943, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.16184971098265896, |
|
"grad_norm": 13.809138343712586, |
|
"learning_rate": 3.230769230769231e-05, |
|
"loss": 1.5356, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.16570327552986513, |
|
"grad_norm": 9.367395780583822, |
|
"learning_rate": 3.307692307692308e-05, |
|
"loss": 1.4704, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.16955684007707128, |
|
"grad_norm": 10.227364292661408, |
|
"learning_rate": 3.384615384615385e-05, |
|
"loss": 1.3102, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.17341040462427745, |
|
"grad_norm": 9.413833051288245, |
|
"learning_rate": 3.461538461538462e-05, |
|
"loss": 1.255, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17726396917148363, |
|
"grad_norm": 9.94687228320031, |
|
"learning_rate": 3.538461538461539e-05, |
|
"loss": 1.3163, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1811175337186898, |
|
"grad_norm": 7.092554527238564, |
|
"learning_rate": 3.615384615384616e-05, |
|
"loss": 1.2354, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.18497109826589594, |
|
"grad_norm": 9.149268401400676, |
|
"learning_rate": 3.692307692307693e-05, |
|
"loss": 1.2504, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.18882466281310212, |
|
"grad_norm": 6.289853231412649, |
|
"learning_rate": 3.769230769230769e-05, |
|
"loss": 1.1468, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1926782273603083, |
|
"grad_norm": 9.45344983093482, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 1.2058, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19653179190751446, |
|
"grad_norm": 10.360186688388032, |
|
"learning_rate": 3.923076923076923e-05, |
|
"loss": 1.3109, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2003853564547206, |
|
"grad_norm": 9.072775454723184, |
|
"learning_rate": 4e-05, |
|
"loss": 1.2695, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.20423892100192678, |
|
"grad_norm": 8.438625780983935, |
|
"learning_rate": 3.999954550797489e-05, |
|
"loss": 1.3253, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.20809248554913296, |
|
"grad_norm": 7.900240323931455, |
|
"learning_rate": 3.999818205255586e-05, |
|
"loss": 1.1021, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2119460500963391, |
|
"grad_norm": 5.82014263071836, |
|
"learning_rate": 3.9995909695710856e-05, |
|
"loss": 1.0841, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.21579961464354527, |
|
"grad_norm": 7.201881900115451, |
|
"learning_rate": 3.999272854071669e-05, |
|
"loss": 1.133, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.21965317919075145, |
|
"grad_norm": 7.6958656122670295, |
|
"learning_rate": 3.998863873215434e-05, |
|
"loss": 1.0334, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.22350674373795762, |
|
"grad_norm": 5.905685554617661, |
|
"learning_rate": 3.998364045590232e-05, |
|
"loss": 1.017, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.22736030828516376, |
|
"grad_norm": 6.204779741056835, |
|
"learning_rate": 3.9977733939128304e-05, |
|
"loss": 1.0027, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.23121387283236994, |
|
"grad_norm": 4.951471635667849, |
|
"learning_rate": 3.997091945027878e-05, |
|
"loss": 0.9547, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2350674373795761, |
|
"grad_norm": 7.304094138753475, |
|
"learning_rate": 3.996319729906682e-05, |
|
"loss": 1.1246, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.23892100192678228, |
|
"grad_norm": 6.298474879300807, |
|
"learning_rate": 3.995456783645805e-05, |
|
"loss": 1.0221, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.24277456647398843, |
|
"grad_norm": 7.776498792672179, |
|
"learning_rate": 3.994503145465464e-05, |
|
"loss": 1.097, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2466281310211946, |
|
"grad_norm": 5.607410872804594, |
|
"learning_rate": 3.993458858707756e-05, |
|
"loss": 1.0371, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2504816955684008, |
|
"grad_norm": 4.77643068735735, |
|
"learning_rate": 3.992323970834682e-05, |
|
"loss": 1.004, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2543352601156069, |
|
"grad_norm": 7.743150531664644, |
|
"learning_rate": 3.991098533425988e-05, |
|
"loss": 1.0132, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.2581888246628131, |
|
"grad_norm": 5.8368050703956245, |
|
"learning_rate": 3.989782602176829e-05, |
|
"loss": 0.9401, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.26204238921001927, |
|
"grad_norm": 3.2101782475861, |
|
"learning_rate": 3.988376236895231e-05, |
|
"loss": 0.9025, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2658959537572254, |
|
"grad_norm": 13.049333458920938, |
|
"learning_rate": 3.986879501499373e-05, |
|
"loss": 1.0269, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2697495183044316, |
|
"grad_norm": 7.880498887675296, |
|
"learning_rate": 3.985292464014686e-05, |
|
"loss": 1.0398, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.27360308285163776, |
|
"grad_norm": 5.313107024369285, |
|
"learning_rate": 3.9836151965707585e-05, |
|
"loss": 0.958, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.2774566473988439, |
|
"grad_norm": 4.395056829854694, |
|
"learning_rate": 3.9818477753980566e-05, |
|
"loss": 0.9323, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.2813102119460501, |
|
"grad_norm": 6.266655645573501, |
|
"learning_rate": 3.979990280824465e-05, |
|
"loss": 0.9909, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.28516377649325625, |
|
"grad_norm": 3.957118496983222, |
|
"learning_rate": 3.9780427972716296e-05, |
|
"loss": 1.0391, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.28901734104046245, |
|
"grad_norm": 3.508018525774183, |
|
"learning_rate": 3.976005413251125e-05, |
|
"loss": 1.0032, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2928709055876686, |
|
"grad_norm": 10.69825193127433, |
|
"learning_rate": 3.9738782213604305e-05, |
|
"loss": 1.0472, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.29672447013487474, |
|
"grad_norm": 6.501782774298941, |
|
"learning_rate": 3.971661318278721e-05, |
|
"loss": 1.0126, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.30057803468208094, |
|
"grad_norm": 5.11168856069891, |
|
"learning_rate": 3.969354804762473e-05, |
|
"loss": 0.96, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3044315992292871, |
|
"grad_norm": 5.039914432332962, |
|
"learning_rate": 3.966958785640887e-05, |
|
"loss": 0.9067, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.30828516377649323, |
|
"grad_norm": 6.647066379901366, |
|
"learning_rate": 3.9644733698111206e-05, |
|
"loss": 0.9367, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.31213872832369943, |
|
"grad_norm": 3.5432939083746655, |
|
"learning_rate": 3.9618986702333424e-05, |
|
"loss": 0.854, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.3159922928709056, |
|
"grad_norm": 2.9831981250415525, |
|
"learning_rate": 3.959234803925594e-05, |
|
"loss": 0.8707, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.3198458574181118, |
|
"grad_norm": 4.000857879722409, |
|
"learning_rate": 3.956481891958475e-05, |
|
"loss": 0.8518, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3236994219653179, |
|
"grad_norm": 3.4582769025055793, |
|
"learning_rate": 3.9536400594496386e-05, |
|
"loss": 0.8765, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.32755298651252407, |
|
"grad_norm": 3.930610795014948, |
|
"learning_rate": 3.950709435558106e-05, |
|
"loss": 0.9758, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.33140655105973027, |
|
"grad_norm": 3.7172875758901154, |
|
"learning_rate": 3.947690153478396e-05, |
|
"loss": 0.91, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.3352601156069364, |
|
"grad_norm": 4.5575804046102455, |
|
"learning_rate": 3.9445823504344725e-05, |
|
"loss": 0.8402, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.33911368015414256, |
|
"grad_norm": 3.8988704845029223, |
|
"learning_rate": 3.9413861676735034e-05, |
|
"loss": 0.9235, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.34296724470134876, |
|
"grad_norm": 3.965085606406442, |
|
"learning_rate": 3.938101750459447e-05, |
|
"loss": 0.8447, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.3468208092485549, |
|
"grad_norm": 3.046774416650305, |
|
"learning_rate": 3.9347292480664465e-05, |
|
"loss": 0.8227, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.35067437379576105, |
|
"grad_norm": 4.720525497029076, |
|
"learning_rate": 3.931268813772047e-05, |
|
"loss": 0.8303, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.35452793834296725, |
|
"grad_norm": 3.961704867922078, |
|
"learning_rate": 3.927720604850226e-05, |
|
"loss": 0.8955, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.3583815028901734, |
|
"grad_norm": 3.3173574867131967, |
|
"learning_rate": 3.92408478256425e-05, |
|
"loss": 0.8748, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.3622350674373796, |
|
"grad_norm": 4.270129625253774, |
|
"learning_rate": 3.920361512159343e-05, |
|
"loss": 0.8354, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.36608863198458574, |
|
"grad_norm": 4.083301813465707, |
|
"learning_rate": 3.916550962855174e-05, |
|
"loss": 0.8428, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.3699421965317919, |
|
"grad_norm": 3.8426066258082745, |
|
"learning_rate": 3.912653307838173e-05, |
|
"loss": 0.882, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3737957610789981, |
|
"grad_norm": 4.858960264331484, |
|
"learning_rate": 3.908668724253649e-05, |
|
"loss": 0.883, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.37764932562620424, |
|
"grad_norm": 3.529748790118145, |
|
"learning_rate": 3.9045973931977495e-05, |
|
"loss": 0.8818, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.3815028901734104, |
|
"grad_norm": 4.3956107314706685, |
|
"learning_rate": 3.900439499709224e-05, |
|
"loss": 0.8682, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3853564547206166, |
|
"grad_norm": 4.190396012770089, |
|
"learning_rate": 3.896195232761016e-05, |
|
"loss": 0.8228, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3892100192678227, |
|
"grad_norm": 3.10753800197593, |
|
"learning_rate": 3.891864785251673e-05, |
|
"loss": 0.8403, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3930635838150289, |
|
"grad_norm": 2.9288118038221125, |
|
"learning_rate": 3.887448353996582e-05, |
|
"loss": 0.8621, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3969171483622351, |
|
"grad_norm": 5.08781315771577, |
|
"learning_rate": 3.88294613971902e-05, |
|
"loss": 0.9015, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.4007707129094412, |
|
"grad_norm": 2.825525117803277, |
|
"learning_rate": 3.8783583470410365e-05, |
|
"loss": 0.746, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4046242774566474, |
|
"grad_norm": 3.7918665790803674, |
|
"learning_rate": 3.87368518447415e-05, |
|
"loss": 0.7484, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.40847784200385356, |
|
"grad_norm": 2.600804604880562, |
|
"learning_rate": 3.8689268644098715e-05, |
|
"loss": 0.8912, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.4123314065510597, |
|
"grad_norm": 3.3941246553611775, |
|
"learning_rate": 3.864083603110053e-05, |
|
"loss": 0.7785, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4161849710982659, |
|
"grad_norm": 4.397542143481628, |
|
"learning_rate": 3.8591556206970594e-05, |
|
"loss": 0.8569, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.42003853564547206, |
|
"grad_norm": 3.0636662940182124, |
|
"learning_rate": 3.8541431411437616e-05, |
|
"loss": 0.8718, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.4238921001926782, |
|
"grad_norm": 4.18117825185653, |
|
"learning_rate": 3.8490463922633564e-05, |
|
"loss": 0.7702, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4277456647398844, |
|
"grad_norm": 2.43373547813132, |
|
"learning_rate": 3.843865605699017e-05, |
|
"loss": 0.7936, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.43159922928709055, |
|
"grad_norm": 3.943685910342036, |
|
"learning_rate": 3.8386010169133596e-05, |
|
"loss": 0.8022, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.43545279383429675, |
|
"grad_norm": 3.655514814817605, |
|
"learning_rate": 3.833252865177748e-05, |
|
"loss": 0.774, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.4393063583815029, |
|
"grad_norm": 3.0272004910923487, |
|
"learning_rate": 3.8278213935614126e-05, |
|
"loss": 0.7132, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.44315992292870904, |
|
"grad_norm": 4.609405687465504, |
|
"learning_rate": 3.8223068489204064e-05, |
|
"loss": 0.9129, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.44701348747591524, |
|
"grad_norm": 3.223236986816773, |
|
"learning_rate": 3.816709481886386e-05, |
|
"loss": 0.766, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.4508670520231214, |
|
"grad_norm": 3.4852321075320933, |
|
"learning_rate": 3.81102954685522e-05, |
|
"loss": 0.7169, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.45472061657032753, |
|
"grad_norm": 2.6796073181657745, |
|
"learning_rate": 3.805267301975424e-05, |
|
"loss": 0.7362, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.45857418111753373, |
|
"grad_norm": 2.839001944033485, |
|
"learning_rate": 3.799423009136434e-05, |
|
"loss": 0.8818, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.4624277456647399, |
|
"grad_norm": 2.328668662319875, |
|
"learning_rate": 3.793496933956699e-05, |
|
"loss": 0.7686, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4662813102119461, |
|
"grad_norm": 3.5890624141236764, |
|
"learning_rate": 3.7874893457716086e-05, |
|
"loss": 0.7887, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.4701348747591522, |
|
"grad_norm": 3.2377955419056494, |
|
"learning_rate": 3.7814005176212555e-05, |
|
"loss": 0.8295, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.47398843930635837, |
|
"grad_norm": 3.1936213076859197, |
|
"learning_rate": 3.775230726238023e-05, |
|
"loss": 0.7653, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.47784200385356457, |
|
"grad_norm": 3.314436046941277, |
|
"learning_rate": 3.7689802520340103e-05, |
|
"loss": 0.7326, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.4816955684007707, |
|
"grad_norm": 1.6311346221294762, |
|
"learning_rate": 3.7626493790882846e-05, |
|
"loss": 0.7467, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.48554913294797686, |
|
"grad_norm": 3.364013140417832, |
|
"learning_rate": 3.756238395133972e-05, |
|
"loss": 0.7601, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.48940269749518306, |
|
"grad_norm": 2.347603254239086, |
|
"learning_rate": 3.7497475915451806e-05, |
|
"loss": 0.823, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.4932562620423892, |
|
"grad_norm": 3.847030422820461, |
|
"learning_rate": 3.743177263323758e-05, |
|
"loss": 0.7091, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.49710982658959535, |
|
"grad_norm": 3.463187618636158, |
|
"learning_rate": 3.7365277090858815e-05, |
|
"loss": 0.7412, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.5009633911368016, |
|
"grad_norm": 2.818735989314964, |
|
"learning_rate": 3.729799231048488e-05, |
|
"loss": 0.7571, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5048169556840078, |
|
"grad_norm": 3.5601441226127655, |
|
"learning_rate": 3.722992135015539e-05, |
|
"loss": 0.6498, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.5086705202312138, |
|
"grad_norm": 3.3243689525232716, |
|
"learning_rate": 3.71610673036412e-05, |
|
"loss": 0.7063, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.51252408477842, |
|
"grad_norm": 2.79620087521176, |
|
"learning_rate": 3.709143330030383e-05, |
|
"loss": 0.7519, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5163776493256262, |
|
"grad_norm": 2.6274884881066813, |
|
"learning_rate": 3.702102250495318e-05, |
|
"loss": 0.6862, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.5202312138728323, |
|
"grad_norm": 1.310261156652054, |
|
"learning_rate": 3.694983811770375e-05, |
|
"loss": 0.5968, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5240847784200385, |
|
"grad_norm": 2.961796401692403, |
|
"learning_rate": 3.687788337382918e-05, |
|
"loss": 0.6842, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.5279383429672447, |
|
"grad_norm": 3.715435487194137, |
|
"learning_rate": 3.6805161543615186e-05, |
|
"loss": 0.7836, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.5317919075144508, |
|
"grad_norm": 1.9101920792437495, |
|
"learning_rate": 3.673167593221097e-05, |
|
"loss": 0.6828, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.535645472061657, |
|
"grad_norm": 4.2524126868327885, |
|
"learning_rate": 3.665742987947895e-05, |
|
"loss": 0.7685, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.5394990366088632, |
|
"grad_norm": 2.3826617460275217, |
|
"learning_rate": 3.658242675984302e-05, |
|
"loss": 0.6548, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5433526011560693, |
|
"grad_norm": 2.7362234194986823, |
|
"learning_rate": 3.6506669982135166e-05, |
|
"loss": 0.7131, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.5472061657032755, |
|
"grad_norm": 2.839379036151834, |
|
"learning_rate": 3.6430162989440495e-05, |
|
"loss": 0.6846, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.5510597302504817, |
|
"grad_norm": 2.217944430478911, |
|
"learning_rate": 3.635290925894083e-05, |
|
"loss": 0.7183, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.5549132947976878, |
|
"grad_norm": 2.2686355763545203, |
|
"learning_rate": 3.627491230175661e-05, |
|
"loss": 0.6742, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.558766859344894, |
|
"grad_norm": 2.8612073083735075, |
|
"learning_rate": 3.6196175662787326e-05, |
|
"loss": 0.6969, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5626204238921002, |
|
"grad_norm": 2.379426816508497, |
|
"learning_rate": 3.6116702920550445e-05, |
|
"loss": 0.7102, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.5664739884393064, |
|
"grad_norm": 3.5635505745818774, |
|
"learning_rate": 3.6036497687018704e-05, |
|
"loss": 0.737, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.5703275529865125, |
|
"grad_norm": 2.5565000760425094, |
|
"learning_rate": 3.5955563607456025e-05, |
|
"loss": 0.6599, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.5741811175337187, |
|
"grad_norm": 2.7437675382633326, |
|
"learning_rate": 3.5873904360251766e-05, |
|
"loss": 0.7464, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.5780346820809249, |
|
"grad_norm": 2.3771605731220986, |
|
"learning_rate": 3.579152365675359e-05, |
|
"loss": 0.6092, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.581888246628131, |
|
"grad_norm": 2.393218132886115, |
|
"learning_rate": 3.570842524109878e-05, |
|
"loss": 0.711, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.5857418111753372, |
|
"grad_norm": 2.4377648761435196, |
|
"learning_rate": 3.562461289004406e-05, |
|
"loss": 0.6765, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.5895953757225434, |
|
"grad_norm": 1.726127421558755, |
|
"learning_rate": 3.5540090412793926e-05, |
|
"loss": 0.6122, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.5934489402697495, |
|
"grad_norm": 2.2397924893238628, |
|
"learning_rate": 3.545486165082759e-05, |
|
"loss": 0.6207, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.5973025048169557, |
|
"grad_norm": 2.0431721405303764, |
|
"learning_rate": 3.53689304777243e-05, |
|
"loss": 0.621, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6011560693641619, |
|
"grad_norm": 1.8373936110598785, |
|
"learning_rate": 3.528230079898734e-05, |
|
"loss": 0.6832, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.605009633911368, |
|
"grad_norm": 3.218900986872675, |
|
"learning_rate": 3.5194976551866535e-05, |
|
"loss": 0.6867, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.6088631984585742, |
|
"grad_norm": 2.7681277459165186, |
|
"learning_rate": 3.510696170517927e-05, |
|
"loss": 0.6388, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.6127167630057804, |
|
"grad_norm": 2.789942125405049, |
|
"learning_rate": 3.5018260259130134e-05, |
|
"loss": 0.65, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.6165703275529865, |
|
"grad_norm": 2.3067452480491193, |
|
"learning_rate": 3.492887624512912e-05, |
|
"loss": 0.5852, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6204238921001927, |
|
"grad_norm": 1.5893723542941685, |
|
"learning_rate": 3.483881372560837e-05, |
|
"loss": 0.6286, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.6242774566473989, |
|
"grad_norm": 2.8053299102587146, |
|
"learning_rate": 3.474807679383758e-05, |
|
"loss": 0.6866, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.628131021194605, |
|
"grad_norm": 1.9932305081667505, |
|
"learning_rate": 3.4656669573737934e-05, |
|
"loss": 0.6618, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.6319845857418112, |
|
"grad_norm": 2.5322390617691677, |
|
"learning_rate": 3.456459621969469e-05, |
|
"loss": 0.6699, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.6358381502890174, |
|
"grad_norm": 2.714324952697276, |
|
"learning_rate": 3.447186091636836e-05, |
|
"loss": 0.567, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.6396917148362236, |
|
"grad_norm": 2.3260631874283795, |
|
"learning_rate": 3.437846787850454e-05, |
|
"loss": 0.6787, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.6435452793834296, |
|
"grad_norm": 3.244130833526928, |
|
"learning_rate": 3.42844213507423e-05, |
|
"loss": 0.6283, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.6473988439306358, |
|
"grad_norm": 2.2697249458670403, |
|
"learning_rate": 3.418972560742133e-05, |
|
"loss": 0.6043, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.651252408477842, |
|
"grad_norm": 2.261002336792345, |
|
"learning_rate": 3.409438495238765e-05, |
|
"loss": 0.7039, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.6551059730250481, |
|
"grad_norm": 2.9798712765043542, |
|
"learning_rate": 3.3998403718798005e-05, |
|
"loss": 0.687, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.6589595375722543, |
|
"grad_norm": 1.7575260995525863, |
|
"learning_rate": 3.390178626892291e-05, |
|
"loss": 0.5704, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.6628131021194605, |
|
"grad_norm": 1.959592501385033, |
|
"learning_rate": 3.38045369939484e-05, |
|
"loss": 0.6365, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 2.7459302559428127, |
|
"learning_rate": 3.370666031377648e-05, |
|
"loss": 0.6081, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.6705202312138728, |
|
"grad_norm": 1.868927815180204, |
|
"learning_rate": 3.3608160676824216e-05, |
|
"loss": 0.5927, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.674373795761079, |
|
"grad_norm": 3.4467192352082856, |
|
"learning_rate": 3.350904255982154e-05, |
|
"loss": 0.6956, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6782273603082851, |
|
"grad_norm": 1.740260469132678, |
|
"learning_rate": 3.3409310467607824e-05, |
|
"loss": 0.5574, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.6820809248554913, |
|
"grad_norm": 2.055938978641809, |
|
"learning_rate": 3.330896893292714e-05, |
|
"loss": 0.5761, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.6859344894026975, |
|
"grad_norm": 2.2106935577605427, |
|
"learning_rate": 3.3208022516222195e-05, |
|
"loss": 0.5955, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.6897880539499036, |
|
"grad_norm": 1.8460288906944335, |
|
"learning_rate": 3.310647580542715e-05, |
|
"loss": 0.5769, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.6936416184971098, |
|
"grad_norm": 2.1583501173847024, |
|
"learning_rate": 3.300433341575901e-05, |
|
"loss": 0.5949, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.697495183044316, |
|
"grad_norm": 2.748430083318313, |
|
"learning_rate": 3.2901599989507935e-05, |
|
"loss": 0.5629, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.7013487475915221, |
|
"grad_norm": 1.942682422315833, |
|
"learning_rate": 3.279828019582621e-05, |
|
"loss": 0.612, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.7052023121387283, |
|
"grad_norm": 3.1214108190867487, |
|
"learning_rate": 3.2694378730516074e-05, |
|
"loss": 0.5638, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.7090558766859345, |
|
"grad_norm": 2.5088639648861912, |
|
"learning_rate": 3.2589900315816266e-05, |
|
"loss": 0.5643, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.7129094412331407, |
|
"grad_norm": 1.6797637949182582, |
|
"learning_rate": 3.24848497001874e-05, |
|
"loss": 0.5467, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7167630057803468, |
|
"grad_norm": 2.2031923107978004, |
|
"learning_rate": 3.237923165809619e-05, |
|
"loss": 0.6403, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.720616570327553, |
|
"grad_norm": 2.217707978859453, |
|
"learning_rate": 3.227305098979842e-05, |
|
"loss": 0.5575, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.7244701348747592, |
|
"grad_norm": 1.8030994444840251, |
|
"learning_rate": 3.2166312521120775e-05, |
|
"loss": 0.5964, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.7283236994219653, |
|
"grad_norm": 2.7881810961948035, |
|
"learning_rate": 3.2059021103241556e-05, |
|
"loss": 0.5627, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.7321772639691715, |
|
"grad_norm": 1.6025626542354654, |
|
"learning_rate": 3.195118161247011e-05, |
|
"loss": 0.5366, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.7360308285163777, |
|
"grad_norm": 2.7898667044495196, |
|
"learning_rate": 3.184279895002533e-05, |
|
"loss": 0.6269, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.7398843930635838, |
|
"grad_norm": 2.012521948946768, |
|
"learning_rate": 3.1733878041812756e-05, |
|
"loss": 0.578, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.74373795761079, |
|
"grad_norm": 1.7202183467823577, |
|
"learning_rate": 3.1624423838200824e-05, |
|
"loss": 0.6021, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.7475915221579962, |
|
"grad_norm": 2.391244509621585, |
|
"learning_rate": 3.151444131379579e-05, |
|
"loss": 0.6227, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.7514450867052023, |
|
"grad_norm": 1.290881388687675, |
|
"learning_rate": 3.140393546721569e-05, |
|
"loss": 0.4976, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.7552986512524085, |
|
"grad_norm": 2.747140614154264, |
|
"learning_rate": 3.1292911320863104e-05, |
|
"loss": 0.5795, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.7591522157996147, |
|
"grad_norm": 2.1090959982249022, |
|
"learning_rate": 3.118137392069696e-05, |
|
"loss": 0.5987, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.7630057803468208, |
|
"grad_norm": 1.920242941952473, |
|
"learning_rate": 3.106932833600314e-05, |
|
"loss": 0.5862, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.766859344894027, |
|
"grad_norm": 2.4556835621447477, |
|
"learning_rate": 3.095677965916411e-05, |
|
"loss": 0.5432, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.7707129094412332, |
|
"grad_norm": 1.7404959875815949, |
|
"learning_rate": 3.084373300542748e-05, |
|
"loss": 0.4656, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7745664739884393, |
|
"grad_norm": 2.4156874663926433, |
|
"learning_rate": 3.0730193512673515e-05, |
|
"loss": 0.6341, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.7784200385356455, |
|
"grad_norm": 1.9237119705970884, |
|
"learning_rate": 3.06161663411816e-05, |
|
"loss": 0.4988, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.7822736030828517, |
|
"grad_norm": 2.1640461913240245, |
|
"learning_rate": 3.0501656673395756e-05, |
|
"loss": 0.5871, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.7861271676300579, |
|
"grad_norm": 2.7705536239605704, |
|
"learning_rate": 3.0386669713689057e-05, |
|
"loss": 0.6961, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.789980732177264, |
|
"grad_norm": 2.361536461003985, |
|
"learning_rate": 3.0271210688127123e-05, |
|
"loss": 0.5827, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.7938342967244701, |
|
"grad_norm": 1.724308147378069, |
|
"learning_rate": 3.015528484423059e-05, |
|
"loss": 0.5157, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.7976878612716763, |
|
"grad_norm": 1.8279294004024718, |
|
"learning_rate": 3.0038897450736612e-05, |
|
"loss": 0.5094, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.8015414258188824, |
|
"grad_norm": 1.836316615161725, |
|
"learning_rate": 2.9922053797359406e-05, |
|
"loss": 0.5172, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.8053949903660886, |
|
"grad_norm": 1.6458484857811015, |
|
"learning_rate": 2.980475919454984e-05, |
|
"loss": 0.5253, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.8092485549132948, |
|
"grad_norm": 2.2344584087944535, |
|
"learning_rate": 2.9687018973254055e-05, |
|
"loss": 0.5379, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.8131021194605009, |
|
"grad_norm": 1.8412670292971436, |
|
"learning_rate": 2.956883848467123e-05, |
|
"loss": 0.5655, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.8169556840077071, |
|
"grad_norm": 1.6198673535897559, |
|
"learning_rate": 2.945022310001032e-05, |
|
"loss": 0.4977, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.8208092485549133, |
|
"grad_norm": 1.6875535729885185, |
|
"learning_rate": 2.9331178210245962e-05, |
|
"loss": 0.4592, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.8246628131021194, |
|
"grad_norm": 1.5196518137741946, |
|
"learning_rate": 2.921170922587346e-05, |
|
"loss": 0.5223, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.8285163776493256, |
|
"grad_norm": 1.5091415438145863, |
|
"learning_rate": 2.909182157666287e-05, |
|
"loss": 0.4706, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.8323699421965318, |
|
"grad_norm": 2.1212922277105, |
|
"learning_rate": 2.897152071141225e-05, |
|
"loss": 0.4369, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.8362235067437379, |
|
"grad_norm": 1.6767049591987147, |
|
"learning_rate": 2.885081209769998e-05, |
|
"loss": 0.5098, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.8400770712909441, |
|
"grad_norm": 1.6518844332576945, |
|
"learning_rate": 2.8729701221636294e-05, |
|
"loss": 0.5113, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.8439306358381503, |
|
"grad_norm": 2.0287593345047665, |
|
"learning_rate": 2.8608193587613917e-05, |
|
"loss": 0.5342, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.8477842003853564, |
|
"grad_norm": 1.4891909790069897, |
|
"learning_rate": 2.8486294718057936e-05, |
|
"loss": 0.4461, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8516377649325626, |
|
"grad_norm": 1.52500498490846, |
|
"learning_rate": 2.8364010153174733e-05, |
|
"loss": 0.543, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.8554913294797688, |
|
"grad_norm": 1.5785692053698055, |
|
"learning_rate": 2.8241345450700275e-05, |
|
"loss": 0.4329, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.859344894026975, |
|
"grad_norm": 2.2507626884600187, |
|
"learning_rate": 2.8118306185647458e-05, |
|
"loss": 0.5709, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.8631984585741811, |
|
"grad_norm": 1.7477813182417148, |
|
"learning_rate": 2.7994897950052764e-05, |
|
"loss": 0.5607, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.8670520231213873, |
|
"grad_norm": 1.5393581266661793, |
|
"learning_rate": 2.7871126352722086e-05, |
|
"loss": 0.4671, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.8709055876685935, |
|
"grad_norm": 1.5948515405066201, |
|
"learning_rate": 2.7746997018975804e-05, |
|
"loss": 0.5141, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.8747591522157996, |
|
"grad_norm": 1.6725721797685633, |
|
"learning_rate": 2.7622515590393158e-05, |
|
"loss": 0.5134, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.8786127167630058, |
|
"grad_norm": 1.9430866061833014, |
|
"learning_rate": 2.74976877245558e-05, |
|
"loss": 0.4567, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.882466281310212, |
|
"grad_norm": 1.7951935855325456, |
|
"learning_rate": 2.737251909479068e-05, |
|
"loss": 0.5176, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.8863198458574181, |
|
"grad_norm": 1.5515427698063549, |
|
"learning_rate": 2.7247015389912203e-05, |
|
"loss": 0.4987, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8901734104046243, |
|
"grad_norm": 2.423785012348166, |
|
"learning_rate": 2.7121182313963666e-05, |
|
"loss": 0.4493, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.8940269749518305, |
|
"grad_norm": 1.5248030223399374, |
|
"learning_rate": 2.6995025585958026e-05, |
|
"loss": 0.5154, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.8978805394990366, |
|
"grad_norm": 1.9085369464839403, |
|
"learning_rate": 2.686855093961795e-05, |
|
"loss": 0.4695, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.9017341040462428, |
|
"grad_norm": 2.4921294993771657, |
|
"learning_rate": 2.674176412311527e-05, |
|
"loss": 0.5108, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.905587668593449, |
|
"grad_norm": 1.016492898729909, |
|
"learning_rate": 2.6614670898809675e-05, |
|
"loss": 0.4305, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.9094412331406551, |
|
"grad_norm": 1.7973135593288705, |
|
"learning_rate": 2.648727704298685e-05, |
|
"loss": 0.5085, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.9132947976878613, |
|
"grad_norm": 1.298610518247185, |
|
"learning_rate": 2.6359588345595956e-05, |
|
"loss": 0.4336, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.9171483622350675, |
|
"grad_norm": 1.7615802157799905, |
|
"learning_rate": 2.6231610609986442e-05, |
|
"loss": 0.4425, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.9210019267822736, |
|
"grad_norm": 2.0393603639058404, |
|
"learning_rate": 2.6103349652644356e-05, |
|
"loss": 0.5329, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.9248554913294798, |
|
"grad_norm": 1.1646102704038852, |
|
"learning_rate": 2.5974811302927907e-05, |
|
"loss": 0.4098, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.928709055876686, |
|
"grad_norm": 1.6746235496159276, |
|
"learning_rate": 2.5846001402802594e-05, |
|
"loss": 0.4589, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.9325626204238922, |
|
"grad_norm": 1.4114912644400681, |
|
"learning_rate": 2.5716925806575628e-05, |
|
"loss": 0.4426, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.9364161849710982, |
|
"grad_norm": 1.5300374827507783, |
|
"learning_rate": 2.5587590380629947e-05, |
|
"loss": 0.4389, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.9402697495183044, |
|
"grad_norm": 2.084072213353826, |
|
"learning_rate": 2.54580010031575e-05, |
|
"loss": 0.5058, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.9441233140655106, |
|
"grad_norm": 1.3650050749477864, |
|
"learning_rate": 2.5328163563892162e-05, |
|
"loss": 0.4072, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.9479768786127167, |
|
"grad_norm": 1.3576937516102847, |
|
"learning_rate": 2.5198083963841988e-05, |
|
"loss": 0.4471, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.9518304431599229, |
|
"grad_norm": 1.5006654890833853, |
|
"learning_rate": 2.5067768115021077e-05, |
|
"loss": 0.4013, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.9556840077071291, |
|
"grad_norm": 1.3335516949676123, |
|
"learning_rate": 2.493722194018082e-05, |
|
"loss": 0.4171, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.9595375722543352, |
|
"grad_norm": 1.7188267137418751, |
|
"learning_rate": 2.4806451372540767e-05, |
|
"loss": 0.436, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.9633911368015414, |
|
"grad_norm": 1.5038230569609343, |
|
"learning_rate": 2.467546235551892e-05, |
|
"loss": 0.4305, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9672447013487476, |
|
"grad_norm": 1.2898240292343814, |
|
"learning_rate": 2.4544260842461638e-05, |
|
"loss": 0.4476, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.9710982658959537, |
|
"grad_norm": 1.7360498495478183, |
|
"learning_rate": 2.441285279637307e-05, |
|
"loss": 0.4324, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.9749518304431599, |
|
"grad_norm": 1.8924393082812891, |
|
"learning_rate": 2.4281244189644108e-05, |
|
"loss": 0.439, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.9788053949903661, |
|
"grad_norm": 1.2832735489854135, |
|
"learning_rate": 2.414944100378097e-05, |
|
"loss": 0.3963, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.9826589595375722, |
|
"grad_norm": 1.9408813544452537, |
|
"learning_rate": 2.401744922913334e-05, |
|
"loss": 0.4501, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.9865125240847784, |
|
"grad_norm": 1.2867999932148788, |
|
"learning_rate": 2.388527486462212e-05, |
|
"loss": 0.3981, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.9903660886319846, |
|
"grad_norm": 1.0196210656417135, |
|
"learning_rate": 2.3752923917466763e-05, |
|
"loss": 0.4396, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.9942196531791907, |
|
"grad_norm": 1.2740978791601063, |
|
"learning_rate": 2.362040240291227e-05, |
|
"loss": 0.3905, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.9980732177263969, |
|
"grad_norm": 1.050993597160996, |
|
"learning_rate": 2.34877163439558e-05, |
|
"loss": 0.3971, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.9980732177263969, |
|
"eval_loss": 0.4258769750595093, |
|
"eval_runtime": 151.7734, |
|
"eval_samples_per_second": 13.164, |
|
"eval_steps_per_second": 0.415, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.001926782273603, |
|
"grad_norm": 1.3018898740308236, |
|
"learning_rate": 2.3354871771072906e-05, |
|
"loss": 0.3663, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.0057803468208093, |
|
"grad_norm": 1.768289755543873, |
|
"learning_rate": 2.3221874721943495e-05, |
|
"loss": 0.3706, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.0096339113680155, |
|
"grad_norm": 1.3709089238690337, |
|
"learning_rate": 2.3088731241177378e-05, |
|
"loss": 0.4303, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.0134874759152215, |
|
"grad_norm": 1.2747609251876415, |
|
"learning_rate": 2.2955447380039576e-05, |
|
"loss": 0.3878, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.0173410404624277, |
|
"grad_norm": 1.833982622948961, |
|
"learning_rate": 2.282202919617529e-05, |
|
"loss": 0.4726, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.0211946050096339, |
|
"grad_norm": 1.0257892835642075, |
|
"learning_rate": 2.2688482753334568e-05, |
|
"loss": 0.4311, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.02504816955684, |
|
"grad_norm": 1.211081382753947, |
|
"learning_rate": 2.2554814121096748e-05, |
|
"loss": 0.3797, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.0289017341040463, |
|
"grad_norm": 1.603141836779412, |
|
"learning_rate": 2.242102937459456e-05, |
|
"loss": 0.4334, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.0327552986512525, |
|
"grad_norm": 0.9082356976857431, |
|
"learning_rate": 2.228713459423804e-05, |
|
"loss": 0.3477, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.0366088631984587, |
|
"grad_norm": 1.4556959201763473, |
|
"learning_rate": 2.215313586543818e-05, |
|
"loss": 0.4605, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.0404624277456647, |
|
"grad_norm": 1.157918758481043, |
|
"learning_rate": 2.2019039278330324e-05, |
|
"loss": 0.3749, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.0443159922928709, |
|
"grad_norm": 1.2365434381583742, |
|
"learning_rate": 2.188485092749744e-05, |
|
"loss": 0.3438, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.048169556840077, |
|
"grad_norm": 1.3583710499888804, |
|
"learning_rate": 2.1750576911693043e-05, |
|
"loss": 0.4338, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.0520231213872833, |
|
"grad_norm": 1.373322024670892, |
|
"learning_rate": 2.161622333356408e-05, |
|
"loss": 0.4425, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.0558766859344895, |
|
"grad_norm": 1.5222033465026896, |
|
"learning_rate": 2.148179629937352e-05, |
|
"loss": 0.3746, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.0597302504816957, |
|
"grad_norm": 0.9301197570829759, |
|
"learning_rate": 2.134730191872288e-05, |
|
"loss": 0.3603, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.0635838150289016, |
|
"grad_norm": 1.3349261365465133, |
|
"learning_rate": 2.1212746304274482e-05, |
|
"loss": 0.3896, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.0674373795761078, |
|
"grad_norm": 1.6542379771126683, |
|
"learning_rate": 2.1078135571473712e-05, |
|
"loss": 0.4346, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.071290944123314, |
|
"grad_norm": 1.2625424774815728, |
|
"learning_rate": 2.094347583827102e-05, |
|
"loss": 0.4369, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.0751445086705202, |
|
"grad_norm": 1.242814350522191, |
|
"learning_rate": 2.0808773224843882e-05, |
|
"loss": 0.4306, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.0789980732177264, |
|
"grad_norm": 1.441960169277262, |
|
"learning_rate": 2.0674033853318666e-05, |
|
"loss": 0.3905, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0828516377649327, |
|
"grad_norm": 1.176801789036799, |
|
"learning_rate": 2.0539263847492355e-05, |
|
"loss": 0.3745, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.0867052023121386, |
|
"grad_norm": 1.0478011807250012, |
|
"learning_rate": 2.040446933255423e-05, |
|
"loss": 0.4304, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.0905587668593448, |
|
"grad_norm": 0.991803808778807, |
|
"learning_rate": 2.0269656434807504e-05, |
|
"loss": 0.3328, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.094412331406551, |
|
"grad_norm": 1.536330372789938, |
|
"learning_rate": 2.013483128139086e-05, |
|
"loss": 0.391, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.0982658959537572, |
|
"grad_norm": 1.1995638320125537, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4014, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.1021194605009634, |
|
"grad_norm": 1.0998808179080417, |
|
"learning_rate": 1.9865168718609142e-05, |
|
"loss": 0.3467, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.1059730250481696, |
|
"grad_norm": 1.1533276354863014, |
|
"learning_rate": 1.9730343565192506e-05, |
|
"loss": 0.3654, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.1098265895953756, |
|
"grad_norm": 1.395165430926343, |
|
"learning_rate": 1.9595530667445775e-05, |
|
"loss": 0.3897, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.1136801541425818, |
|
"grad_norm": 1.3829869394831174, |
|
"learning_rate": 1.946073615250765e-05, |
|
"loss": 0.3664, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.117533718689788, |
|
"grad_norm": 1.6694598636029911, |
|
"learning_rate": 1.9325966146681337e-05, |
|
"loss": 0.3937, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.1213872832369942, |
|
"grad_norm": 1.3093898924216434, |
|
"learning_rate": 1.919122677515612e-05, |
|
"loss": 0.4475, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.1252408477842004, |
|
"grad_norm": 1.2830702205218405, |
|
"learning_rate": 1.905652416172899e-05, |
|
"loss": 0.4055, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.1290944123314066, |
|
"grad_norm": 1.422016127363369, |
|
"learning_rate": 1.8921864428526295e-05, |
|
"loss": 0.4306, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.1329479768786128, |
|
"grad_norm": 1.0443832316958135, |
|
"learning_rate": 1.8787253695725524e-05, |
|
"loss": 0.306, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.1368015414258188, |
|
"grad_norm": 1.0396170075258768, |
|
"learning_rate": 1.8652698081277127e-05, |
|
"loss": 0.3808, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.140655105973025, |
|
"grad_norm": 1.4482799449738568, |
|
"learning_rate": 1.851820370062648e-05, |
|
"loss": 0.3766, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.1445086705202312, |
|
"grad_norm": 1.4383150667662055, |
|
"learning_rate": 1.8383776666435927e-05, |
|
"loss": 0.3807, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.1483622350674374, |
|
"grad_norm": 1.052851787398113, |
|
"learning_rate": 1.824942308830696e-05, |
|
"loss": 0.3193, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.1522157996146436, |
|
"grad_norm": 1.1901475083434114, |
|
"learning_rate": 1.8115149072502564e-05, |
|
"loss": 0.3644, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.1560693641618498, |
|
"grad_norm": 1.0719741190000858, |
|
"learning_rate": 1.798096072166968e-05, |
|
"loss": 0.3265, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1599229287090558, |
|
"grad_norm": 0.9399484435440966, |
|
"learning_rate": 1.7846864134561828e-05, |
|
"loss": 0.3013, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.163776493256262, |
|
"grad_norm": 0.8649780684652991, |
|
"learning_rate": 1.7712865405761967e-05, |
|
"loss": 0.331, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.1676300578034682, |
|
"grad_norm": 1.093013914822741, |
|
"learning_rate": 1.757897062540545e-05, |
|
"loss": 0.3672, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.1714836223506744, |
|
"grad_norm": 1.3242259881261507, |
|
"learning_rate": 1.7445185878903252e-05, |
|
"loss": 0.3461, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.1753371868978806, |
|
"grad_norm": 1.5294924031862211, |
|
"learning_rate": 1.7311517246665435e-05, |
|
"loss": 0.3381, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.1791907514450868, |
|
"grad_norm": 1.4097073214129088, |
|
"learning_rate": 1.7177970803824714e-05, |
|
"loss": 0.4501, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.183044315992293, |
|
"grad_norm": 1.1405636655876248, |
|
"learning_rate": 1.7044552619960434e-05, |
|
"loss": 0.322, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.186897880539499, |
|
"grad_norm": 1.172637190005047, |
|
"learning_rate": 1.691126875882263e-05, |
|
"loss": 0.3453, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.1907514450867052, |
|
"grad_norm": 1.0104423854938296, |
|
"learning_rate": 1.677812527805651e-05, |
|
"loss": 0.3248, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.1946050096339114, |
|
"grad_norm": 0.9585927088271504, |
|
"learning_rate": 1.6645128228927104e-05, |
|
"loss": 0.3153, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.1984585741811176, |
|
"grad_norm": 0.7642180964221266, |
|
"learning_rate": 1.6512283656044207e-05, |
|
"loss": 0.3576, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.2023121387283238, |
|
"grad_norm": 1.064288446414666, |
|
"learning_rate": 1.637959759708774e-05, |
|
"loss": 0.3842, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.2061657032755297, |
|
"grad_norm": 1.1471701151580358, |
|
"learning_rate": 1.6247076082533244e-05, |
|
"loss": 0.3682, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.210019267822736, |
|
"grad_norm": 0.7613384576508663, |
|
"learning_rate": 1.6114725135377883e-05, |
|
"loss": 0.3671, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.2138728323699421, |
|
"grad_norm": 1.1314879376679692, |
|
"learning_rate": 1.5982550770866665e-05, |
|
"loss": 0.3509, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.2177263969171483, |
|
"grad_norm": 1.0721877801971562, |
|
"learning_rate": 1.585055899621904e-05, |
|
"loss": 0.388, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.2215799614643545, |
|
"grad_norm": 1.261955038248264, |
|
"learning_rate": 1.5718755810355895e-05, |
|
"loss": 0.3453, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.2254335260115607, |
|
"grad_norm": 1.2728238431643502, |
|
"learning_rate": 1.5587147203626934e-05, |
|
"loss": 0.3638, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.229287090558767, |
|
"grad_norm": 1.3228194347927702, |
|
"learning_rate": 1.5455739157538362e-05, |
|
"loss": 0.3922, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.2331406551059731, |
|
"grad_norm": 0.759365184137883, |
|
"learning_rate": 1.532453764448109e-05, |
|
"loss": 0.3769, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.2369942196531791, |
|
"grad_norm": 1.0387794799239043, |
|
"learning_rate": 1.5193548627459238e-05, |
|
"loss": 0.3487, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.2408477842003853, |
|
"grad_norm": 1.653174900496495, |
|
"learning_rate": 1.5062778059819184e-05, |
|
"loss": 0.3325, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.2447013487475915, |
|
"grad_norm": 0.9071493490028995, |
|
"learning_rate": 1.493223188497893e-05, |
|
"loss": 0.3099, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.2485549132947977, |
|
"grad_norm": 0.909128453253325, |
|
"learning_rate": 1.4801916036158017e-05, |
|
"loss": 0.3484, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.252408477842004, |
|
"grad_norm": 0.9104702137645303, |
|
"learning_rate": 1.4671836436107851e-05, |
|
"loss": 0.3151, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.25626204238921, |
|
"grad_norm": 1.0657384235342173, |
|
"learning_rate": 1.4541998996842503e-05, |
|
"loss": 0.3328, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.260115606936416, |
|
"grad_norm": 1.0115714194064442, |
|
"learning_rate": 1.4412409619370058e-05, |
|
"loss": 0.3114, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.2639691714836223, |
|
"grad_norm": 0.9001086745125582, |
|
"learning_rate": 1.4283074193424379e-05, |
|
"loss": 0.3188, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.2678227360308285, |
|
"grad_norm": 1.2276345166542122, |
|
"learning_rate": 1.4153998597197417e-05, |
|
"loss": 0.3498, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.2716763005780347, |
|
"grad_norm": 0.7533021116197035, |
|
"learning_rate": 1.4025188697072098e-05, |
|
"loss": 0.3418, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.275529865125241, |
|
"grad_norm": 0.7620164921410112, |
|
"learning_rate": 1.3896650347355652e-05, |
|
"loss": 0.2843, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.2793834296724471, |
|
"grad_norm": 1.085725296514296, |
|
"learning_rate": 1.3768389390013558e-05, |
|
"loss": 0.3698, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.2832369942196533, |
|
"grad_norm": 0.7656462946423266, |
|
"learning_rate": 1.3640411654404058e-05, |
|
"loss": 0.3526, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.2870905587668593, |
|
"grad_norm": 0.8688261241761966, |
|
"learning_rate": 1.3512722957013157e-05, |
|
"loss": 0.3287, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.2909441233140655, |
|
"grad_norm": 0.9102316741848134, |
|
"learning_rate": 1.3385329101190338e-05, |
|
"loss": 0.3328, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.2947976878612717, |
|
"grad_norm": 0.8532527486919501, |
|
"learning_rate": 1.3258235876884735e-05, |
|
"loss": 0.3231, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.298651252408478, |
|
"grad_norm": 0.6725300585239861, |
|
"learning_rate": 1.3131449060382053e-05, |
|
"loss": 0.2852, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.3025048169556839, |
|
"grad_norm": 1.0698268373783526, |
|
"learning_rate": 1.3004974414041987e-05, |
|
"loss": 0.3087, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.30635838150289, |
|
"grad_norm": 1.1722160127956573, |
|
"learning_rate": 1.287881768603634e-05, |
|
"loss": 0.3365, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.3102119460500963, |
|
"grad_norm": 0.6736626507697272, |
|
"learning_rate": 1.27529846100878e-05, |
|
"loss": 0.2999, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.3140655105973025, |
|
"grad_norm": 1.1063913885843315, |
|
"learning_rate": 1.2627480905209328e-05, |
|
"loss": 0.2894, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.3179190751445087, |
|
"grad_norm": 1.109602932151094, |
|
"learning_rate": 1.2502312275444205e-05, |
|
"loss": 0.3495, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.3217726396917149, |
|
"grad_norm": 1.1957687821810759, |
|
"learning_rate": 1.2377484409606848e-05, |
|
"loss": 0.3601, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.325626204238921, |
|
"grad_norm": 1.128237352664405, |
|
"learning_rate": 1.22530029810242e-05, |
|
"loss": 0.3514, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.3294797687861273, |
|
"grad_norm": 1.415323100911558, |
|
"learning_rate": 1.2128873647277919e-05, |
|
"loss": 0.3287, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 1.236826446216501, |
|
"learning_rate": 1.200510204994724e-05, |
|
"loss": 0.3062, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.3371868978805395, |
|
"grad_norm": 1.0869697615225693, |
|
"learning_rate": 1.1881693814352543e-05, |
|
"loss": 0.3372, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.3410404624277457, |
|
"grad_norm": 0.8579173376211442, |
|
"learning_rate": 1.1758654549299735e-05, |
|
"loss": 0.3162, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.3448940269749519, |
|
"grad_norm": 1.1076519772349536, |
|
"learning_rate": 1.1635989846825275e-05, |
|
"loss": 0.3595, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.348747591522158, |
|
"grad_norm": 1.0968809462254943, |
|
"learning_rate": 1.1513705281942072e-05, |
|
"loss": 0.3138, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.352601156069364, |
|
"grad_norm": 0.8097631352039958, |
|
"learning_rate": 1.1391806412386086e-05, |
|
"loss": 0.3064, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.3564547206165702, |
|
"grad_norm": 0.9302059439245486, |
|
"learning_rate": 1.127029877836371e-05, |
|
"loss": 0.3361, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.3603082851637764, |
|
"grad_norm": 1.1085708625474628, |
|
"learning_rate": 1.1149187902300032e-05, |
|
"loss": 0.2918, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.3641618497109826, |
|
"grad_norm": 1.2982099303179524, |
|
"learning_rate": 1.102847928858776e-05, |
|
"loss": 0.3243, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.3680154142581888, |
|
"grad_norm": 0.8325178281772523, |
|
"learning_rate": 1.0908178423337135e-05, |
|
"loss": 0.3042, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.371868978805395, |
|
"grad_norm": 1.0264691032333937, |
|
"learning_rate": 1.0788290774126549e-05, |
|
"loss": 0.3674, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.3757225433526012, |
|
"grad_norm": 1.5611140030270088, |
|
"learning_rate": 1.0668821789754041e-05, |
|
"loss": 0.3395, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.3795761078998074, |
|
"grad_norm": 0.6905356535042156, |
|
"learning_rate": 1.0549776899989686e-05, |
|
"loss": 0.326, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.3834296724470134, |
|
"grad_norm": 0.813432697566474, |
|
"learning_rate": 1.043116151532877e-05, |
|
"loss": 0.3308, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.3872832369942196, |
|
"grad_norm": 0.947125986929793, |
|
"learning_rate": 1.0312981026745952e-05, |
|
"loss": 0.3079, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.3911368015414258, |
|
"grad_norm": 0.9541134784519973, |
|
"learning_rate": 1.019524080545017e-05, |
|
"loss": 0.3527, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.394990366088632, |
|
"grad_norm": 0.650621794786191, |
|
"learning_rate": 1.0077946202640603e-05, |
|
"loss": 0.2879, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.3988439306358382, |
|
"grad_norm": 0.7999542405054426, |
|
"learning_rate": 9.961102549263393e-06, |
|
"loss": 0.293, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.4026974951830442, |
|
"grad_norm": 1.0074632544627469, |
|
"learning_rate": 9.844715155769418e-06, |
|
"loss": 0.3679, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.4065510597302504, |
|
"grad_norm": 0.7914057577416254, |
|
"learning_rate": 9.72878931187288e-06, |
|
"loss": 0.2646, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.4104046242774566, |
|
"grad_norm": 0.8463237752662545, |
|
"learning_rate": 9.613330286310952e-06, |
|
"loss": 0.2932, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.4142581888246628, |
|
"grad_norm": 0.932880991782926, |
|
"learning_rate": 9.498343326604249e-06, |
|
"loss": 0.3013, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.418111753371869, |
|
"grad_norm": 0.9821817374697017, |
|
"learning_rate": 9.3838336588184e-06, |
|
"loss": 0.3028, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.4219653179190752, |
|
"grad_norm": 0.8469145630358891, |
|
"learning_rate": 9.269806487326491e-06, |
|
"loss": 0.3345, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.4258188824662814, |
|
"grad_norm": 1.037169772414413, |
|
"learning_rate": 9.156266994572518e-06, |
|
"loss": 0.355, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.4296724470134876, |
|
"grad_norm": 0.7213160294660059, |
|
"learning_rate": 9.043220340835895e-06, |
|
"loss": 0.2769, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.4335260115606936, |
|
"grad_norm": 0.8831900688609602, |
|
"learning_rate": 8.930671663996864e-06, |
|
"loss": 0.2811, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.4373795761078998, |
|
"grad_norm": 0.9088605343264572, |
|
"learning_rate": 8.818626079303038e-06, |
|
"loss": 0.3326, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.441233140655106, |
|
"grad_norm": 1.0151812161616465, |
|
"learning_rate": 8.707088679136898e-06, |
|
"loss": 0.3964, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.4450867052023122, |
|
"grad_norm": 0.721796968080708, |
|
"learning_rate": 8.59606453278432e-06, |
|
"loss": 0.2616, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.4489402697495182, |
|
"grad_norm": 0.7436852548835157, |
|
"learning_rate": 8.485558686204215e-06, |
|
"loss": 0.3289, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.4527938342967244, |
|
"grad_norm": 0.9085710653664684, |
|
"learning_rate": 8.37557616179918e-06, |
|
"loss": 0.3263, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.4566473988439306, |
|
"grad_norm": 0.7242098860139589, |
|
"learning_rate": 8.266121958187246e-06, |
|
"loss": 0.3063, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.4605009633911368, |
|
"grad_norm": 0.8037417491106659, |
|
"learning_rate": 8.15720104997468e-06, |
|
"loss": 0.2835, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.464354527938343, |
|
"grad_norm": 0.6693841689504254, |
|
"learning_rate": 8.048818387529888e-06, |
|
"loss": 0.3096, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.4682080924855492, |
|
"grad_norm": 0.6386913396187637, |
|
"learning_rate": 7.940978896758449e-06, |
|
"loss": 0.2871, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.4720616570327554, |
|
"grad_norm": 0.7334763653626704, |
|
"learning_rate": 7.833687478879228e-06, |
|
"loss": 0.32, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.4759152215799616, |
|
"grad_norm": 0.7171482705229991, |
|
"learning_rate": 7.726949010201585e-06, |
|
"loss": 0.3153, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.4797687861271676, |
|
"grad_norm": 0.786019322707822, |
|
"learning_rate": 7.620768341903817e-06, |
|
"loss": 0.2843, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.4836223506743738, |
|
"grad_norm": 0.9217824425126812, |
|
"learning_rate": 7.5151502998126035e-06, |
|
"loss": 0.342, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.48747591522158, |
|
"grad_norm": 0.9974009492954946, |
|
"learning_rate": 7.410099684183738e-06, |
|
"loss": 0.2469, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.4913294797687862, |
|
"grad_norm": 0.7613952875838962, |
|
"learning_rate": 7.305621269483927e-06, |
|
"loss": 0.311, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.4951830443159924, |
|
"grad_norm": 0.8389199365953313, |
|
"learning_rate": 7.201719804173797e-06, |
|
"loss": 0.321, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.4990366088631983, |
|
"grad_norm": 0.8436343238052573, |
|
"learning_rate": 7.098400010492079e-06, |
|
"loss": 0.3375, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.5028901734104045, |
|
"grad_norm": 0.7057291557928584, |
|
"learning_rate": 6.995666584240998e-06, |
|
"loss": 0.2924, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.5067437379576107, |
|
"grad_norm": 0.7636872711492277, |
|
"learning_rate": 6.893524194572856e-06, |
|
"loss": 0.293, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.510597302504817, |
|
"grad_norm": 0.926804984511261, |
|
"learning_rate": 6.791977483777808e-06, |
|
"loss": 0.304, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.5144508670520231, |
|
"grad_norm": 0.8113244891141366, |
|
"learning_rate": 6.691031067072866e-06, |
|
"loss": 0.3202, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.5183044315992293, |
|
"grad_norm": 0.6667861770618242, |
|
"learning_rate": 6.5906895323921805e-06, |
|
"loss": 0.2816, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.5221579961464355, |
|
"grad_norm": 0.8795394551278755, |
|
"learning_rate": 6.490957440178467e-06, |
|
"loss": 0.2892, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.5260115606936417, |
|
"grad_norm": 0.9108462944261728, |
|
"learning_rate": 6.391839323175788e-06, |
|
"loss": 0.3503, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.529865125240848, |
|
"grad_norm": 0.8980380594575577, |
|
"learning_rate": 6.293339686223521e-06, |
|
"loss": 0.3322, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.533718689788054, |
|
"grad_norm": 0.6868397537542229, |
|
"learning_rate": 6.1954630060516005e-06, |
|
"loss": 0.2826, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.5375722543352601, |
|
"grad_norm": 0.7791737364861953, |
|
"learning_rate": 6.098213731077101e-06, |
|
"loss": 0.3134, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.5414258188824663, |
|
"grad_norm": 0.6867571528771647, |
|
"learning_rate": 6.001596281201998e-06, |
|
"loss": 0.2865, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.5452793834296723, |
|
"grad_norm": 0.9804075340302699, |
|
"learning_rate": 5.905615047612352e-06, |
|
"loss": 0.3528, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.5491329479768785, |
|
"grad_norm": 0.6720710667167724, |
|
"learning_rate": 5.810274392578672e-06, |
|
"loss": 0.2754, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.5529865125240847, |
|
"grad_norm": 0.6802925027816661, |
|
"learning_rate": 5.715578649257709e-06, |
|
"loss": 0.2516, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.556840077071291, |
|
"grad_norm": 0.7494941138748478, |
|
"learning_rate": 5.621532121495468e-06, |
|
"loss": 0.2928, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.560693641618497, |
|
"grad_norm": 0.5855579359311635, |
|
"learning_rate": 5.528139083631641e-06, |
|
"loss": 0.2755, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.5645472061657033, |
|
"grad_norm": 0.8765684000753372, |
|
"learning_rate": 5.4354037803053124e-06, |
|
"loss": 0.2721, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.5684007707129095, |
|
"grad_norm": 0.8693102037593315, |
|
"learning_rate": 5.343330426262075e-06, |
|
"loss": 0.2847, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.5722543352601157, |
|
"grad_norm": 1.0221938673812996, |
|
"learning_rate": 5.2519232061624255e-06, |
|
"loss": 0.3375, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.576107899807322, |
|
"grad_norm": 0.6916381894839094, |
|
"learning_rate": 5.161186274391632e-06, |
|
"loss": 0.2952, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.579961464354528, |
|
"grad_norm": 0.6607350250192912, |
|
"learning_rate": 5.071123754870888e-06, |
|
"loss": 0.2892, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.583815028901734, |
|
"grad_norm": 0.709997113458217, |
|
"learning_rate": 4.981739740869866e-06, |
|
"loss": 0.3006, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.5876685934489403, |
|
"grad_norm": 0.8317299871071954, |
|
"learning_rate": 4.893038294820736e-06, |
|
"loss": 0.2681, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.5915221579961463, |
|
"grad_norm": 0.6514943944174995, |
|
"learning_rate": 4.805023448133468e-06, |
|
"loss": 0.3109, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.5953757225433525, |
|
"grad_norm": 0.7060506760393167, |
|
"learning_rate": 4.717699201012658e-06, |
|
"loss": 0.2561, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.5992292870905587, |
|
"grad_norm": 0.6560682680927723, |
|
"learning_rate": 4.6310695222757065e-06, |
|
"loss": 0.351, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.6030828516377649, |
|
"grad_norm": 0.7519900581847846, |
|
"learning_rate": 4.545138349172418e-06, |
|
"loss": 0.2967, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.606936416184971, |
|
"grad_norm": 0.7734834025884938, |
|
"learning_rate": 4.459909587206082e-06, |
|
"loss": 0.2859, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.6107899807321773, |
|
"grad_norm": 0.6021137879538668, |
|
"learning_rate": 4.375387109955953e-06, |
|
"loss": 0.2766, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.6146435452793835, |
|
"grad_norm": 0.617528729879879, |
|
"learning_rate": 4.291574758901224e-06, |
|
"loss": 0.2882, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.6184971098265897, |
|
"grad_norm": 0.6024787320260311, |
|
"learning_rate": 4.208476343246417e-06, |
|
"loss": 0.3055, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.6223506743737959, |
|
"grad_norm": 0.7809194181983308, |
|
"learning_rate": 4.12609563974824e-06, |
|
"loss": 0.3413, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.626204238921002, |
|
"grad_norm": 0.7428919044347968, |
|
"learning_rate": 4.0444363925439845e-06, |
|
"loss": 0.2822, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.630057803468208, |
|
"grad_norm": 0.8313087879713988, |
|
"learning_rate": 3.963502312981298e-06, |
|
"loss": 0.3037, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.6339113680154143, |
|
"grad_norm": 0.7485349105988456, |
|
"learning_rate": 3.883297079449559e-06, |
|
"loss": 0.2736, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.6377649325626205, |
|
"grad_norm": 0.6278539139461561, |
|
"learning_rate": 3.803824337212678e-06, |
|
"loss": 0.2834, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.6416184971098264, |
|
"grad_norm": 0.6237587967099586, |
|
"learning_rate": 3.7250876982433947e-06, |
|
"loss": 0.275, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.6454720616570326, |
|
"grad_norm": 0.6345631909381717, |
|
"learning_rate": 3.6470907410591695e-06, |
|
"loss": 0.2961, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.6493256262042388, |
|
"grad_norm": 0.6867481603368362, |
|
"learning_rate": 3.569837010559505e-06, |
|
"loss": 0.3018, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.653179190751445, |
|
"grad_norm": 0.5911022328472578, |
|
"learning_rate": 3.4933300178648423e-06, |
|
"loss": 0.257, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.6570327552986512, |
|
"grad_norm": 0.6804693404362794, |
|
"learning_rate": 3.417573240156984e-06, |
|
"loss": 0.296, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.6608863198458574, |
|
"grad_norm": 0.72735026642116, |
|
"learning_rate": 3.3425701205210557e-06, |
|
"loss": 0.3169, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.6647398843930636, |
|
"grad_norm": 0.6635536624459238, |
|
"learning_rate": 3.2683240677890373e-06, |
|
"loss": 0.2652, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.6685934489402698, |
|
"grad_norm": 0.607762952833936, |
|
"learning_rate": 3.194838456384819e-06, |
|
"loss": 0.2476, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.672447013487476, |
|
"grad_norm": 0.5435405578589255, |
|
"learning_rate": 3.122116626170826e-06, |
|
"loss": 0.2365, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.6763005780346822, |
|
"grad_norm": 0.6817577487553824, |
|
"learning_rate": 3.0501618822962566e-06, |
|
"loss": 0.3021, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.6801541425818882, |
|
"grad_norm": 0.7502351560290864, |
|
"learning_rate": 2.9789774950468265e-06, |
|
"loss": 0.3289, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.6840077071290944, |
|
"grad_norm": 0.6694176059458067, |
|
"learning_rate": 2.908566699696174e-06, |
|
"loss": 0.2915, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.6878612716763006, |
|
"grad_norm": 0.6629312059099088, |
|
"learning_rate": 2.838932696358798e-06, |
|
"loss": 0.3103, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.6917148362235066, |
|
"grad_norm": 0.8349480949645633, |
|
"learning_rate": 2.77007864984461e-06, |
|
"loss": 0.2938, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.6955684007707128, |
|
"grad_norm": 0.7489107344916376, |
|
"learning_rate": 2.7020076895151226e-06, |
|
"loss": 0.3284, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.699421965317919, |
|
"grad_norm": 0.6851569068577846, |
|
"learning_rate": 2.6347229091411876e-06, |
|
"loss": 0.3074, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.7032755298651252, |
|
"grad_norm": 0.4757754476528696, |
|
"learning_rate": 2.5682273667624235e-06, |
|
"loss": 0.2425, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.7071290944123314, |
|
"grad_norm": 0.6049269606790216, |
|
"learning_rate": 2.5025240845481945e-06, |
|
"loss": 0.2709, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.7109826589595376, |
|
"grad_norm": 0.6082751240069765, |
|
"learning_rate": 2.4376160486602875e-06, |
|
"loss": 0.3106, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.7148362235067438, |
|
"grad_norm": 0.6511104858946544, |
|
"learning_rate": 2.37350620911716e-06, |
|
"loss": 0.2983, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.71868978805395, |
|
"grad_norm": 0.6008366910511428, |
|
"learning_rate": 2.3101974796599015e-06, |
|
"loss": 0.2836, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.7225433526011562, |
|
"grad_norm": 0.6288330836855687, |
|
"learning_rate": 2.247692737619769e-06, |
|
"loss": 0.2594, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.7263969171483622, |
|
"grad_norm": 0.6073567738874573, |
|
"learning_rate": 2.1859948237874517e-06, |
|
"loss": 0.2655, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.7302504816955684, |
|
"grad_norm": 0.5540646853483305, |
|
"learning_rate": 2.1251065422839212e-06, |
|
"loss": 0.2721, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.7341040462427746, |
|
"grad_norm": 0.6179650224299514, |
|
"learning_rate": 2.0650306604330163e-06, |
|
"loss": 0.2871, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.7379576107899806, |
|
"grad_norm": 0.784590739172643, |
|
"learning_rate": 2.005769908635662e-06, |
|
"loss": 0.3636, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.7418111753371868, |
|
"grad_norm": 0.48504899559732223, |
|
"learning_rate": 1.947326980245763e-06, |
|
"loss": 0.2493, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.745664739884393, |
|
"grad_norm": 0.8987552328931523, |
|
"learning_rate": 1.889704531447809e-06, |
|
"loss": 0.2573, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.7495183044315992, |
|
"grad_norm": 0.5767751215303322, |
|
"learning_rate": 1.832905181136142e-06, |
|
"loss": 0.2385, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.7533718689788054, |
|
"grad_norm": 0.6027911760471953, |
|
"learning_rate": 1.7769315107959385e-06, |
|
"loss": 0.2577, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.7572254335260116, |
|
"grad_norm": 0.5310368311114125, |
|
"learning_rate": 1.7217860643858797e-06, |
|
"loss": 0.2632, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.7610789980732178, |
|
"grad_norm": 0.907578227845979, |
|
"learning_rate": 1.6674713482225246e-06, |
|
"loss": 0.2815, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.764932562620424, |
|
"grad_norm": 0.6172664735840684, |
|
"learning_rate": 1.6139898308664093e-06, |
|
"loss": 0.2779, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.7687861271676302, |
|
"grad_norm": 0.6470799229179937, |
|
"learning_rate": 1.5613439430098388e-06, |
|
"loss": 0.3075, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.7726396917148364, |
|
"grad_norm": 0.6173914059389243, |
|
"learning_rate": 1.5095360773664402e-06, |
|
"loss": 0.2604, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.7764932562620424, |
|
"grad_norm": 0.6555070912013644, |
|
"learning_rate": 1.4585685885623901e-06, |
|
"loss": 0.3061, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.7803468208092486, |
|
"grad_norm": 0.5423585126994777, |
|
"learning_rate": 1.4084437930294059e-06, |
|
"loss": 0.2416, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.7842003853564548, |
|
"grad_norm": 0.5128625019538622, |
|
"learning_rate": 1.359163968899473e-06, |
|
"loss": 0.2296, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.7880539499036607, |
|
"grad_norm": 0.8828336205245328, |
|
"learning_rate": 1.3107313559012936e-06, |
|
"loss": 0.2975, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.791907514450867, |
|
"grad_norm": 0.6056697475279614, |
|
"learning_rate": 1.2631481552585067e-06, |
|
"loss": 0.2689, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.7957610789980731, |
|
"grad_norm": 0.7746740424893049, |
|
"learning_rate": 1.2164165295896392e-06, |
|
"loss": 0.2669, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.7996146435452793, |
|
"grad_norm": 0.6006232506779448, |
|
"learning_rate": 1.1705386028098009e-06, |
|
"loss": 0.3039, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.8034682080924855, |
|
"grad_norm": 0.5403526649265826, |
|
"learning_rate": 1.1255164600341816e-06, |
|
"loss": 0.2754, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.8073217726396917, |
|
"grad_norm": 0.5862736861808353, |
|
"learning_rate": 1.08135214748327e-06, |
|
"loss": 0.287, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.811175337186898, |
|
"grad_norm": 0.7474202062547819, |
|
"learning_rate": 1.0380476723898458e-06, |
|
"loss": 0.3006, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.8150289017341041, |
|
"grad_norm": 0.5942504408819616, |
|
"learning_rate": 9.956050029077646e-07, |
|
"loss": 0.2911, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.8188824662813103, |
|
"grad_norm": 0.5958984883417842, |
|
"learning_rate": 9.540260680225133e-07, |
|
"loss": 0.3008, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.8227360308285165, |
|
"grad_norm": 0.5546378435963755, |
|
"learning_rate": 9.133127574635181e-07, |
|
"loss": 0.2325, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.8265895953757225, |
|
"grad_norm": 0.6588191099653756, |
|
"learning_rate": 8.734669216182779e-07, |
|
"loss": 0.2822, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.8304431599229287, |
|
"grad_norm": 0.5229342432978052, |
|
"learning_rate": 8.344903714482555e-07, |
|
"loss": 0.248, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.834296724470135, |
|
"grad_norm": 0.5268589130032248, |
|
"learning_rate": 7.963848784065753e-07, |
|
"loss": 0.3033, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.838150289017341, |
|
"grad_norm": 0.6011796025596555, |
|
"learning_rate": 7.591521743575003e-07, |
|
"loss": 0.289, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.842003853564547, |
|
"grad_norm": 0.6073026645436564, |
|
"learning_rate": 7.227939514977422e-07, |
|
"loss": 0.3053, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.8458574181117533, |
|
"grad_norm": 0.5341540712274524, |
|
"learning_rate": 6.87311862279536e-07, |
|
"loss": 0.2535, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.8497109826589595, |
|
"grad_norm": 0.6384475776837815, |
|
"learning_rate": 6.527075193355337e-07, |
|
"loss": 0.3018, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.8535645472061657, |
|
"grad_norm": 0.6168993774079564, |
|
"learning_rate": 6.189824954055335e-07, |
|
"loss": 0.3332, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.857418111753372, |
|
"grad_norm": 0.6689061938191962, |
|
"learning_rate": 5.861383232649708e-07, |
|
"loss": 0.3018, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.861271676300578, |
|
"grad_norm": 0.5773513436403104, |
|
"learning_rate": 5.541764956552831e-07, |
|
"loss": 0.3086, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.8651252408477843, |
|
"grad_norm": 0.5674684683333645, |
|
"learning_rate": 5.230984652160387e-07, |
|
"loss": 0.2904, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.8689788053949905, |
|
"grad_norm": 0.6136551285882403, |
|
"learning_rate": 4.92905644418944e-07, |
|
"loss": 0.2868, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.8728323699421965, |
|
"grad_norm": 0.5814884659334725, |
|
"learning_rate": 4.635994055036208e-07, |
|
"loss": 0.2837, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.8766859344894027, |
|
"grad_norm": 0.5724578775003472, |
|
"learning_rate": 4.3518108041525675e-07, |
|
"loss": 0.266, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.8805394990366089, |
|
"grad_norm": 0.5758555299044388, |
|
"learning_rate": 4.0765196074406433e-07, |
|
"loss": 0.2733, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.8843930635838149, |
|
"grad_norm": 1.583650395260831, |
|
"learning_rate": 3.8101329766657924e-07, |
|
"loss": 0.2487, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.888246628131021, |
|
"grad_norm": 0.5001989878794011, |
|
"learning_rate": 3.5526630188879475e-07, |
|
"loss": 0.2928, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.8921001926782273, |
|
"grad_norm": 0.5835273327136795, |
|
"learning_rate": 3.304121435911345e-07, |
|
"loss": 0.2951, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.8959537572254335, |
|
"grad_norm": 0.6058658063907555, |
|
"learning_rate": 3.064519523752751e-07, |
|
"loss": 0.3092, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.8998073217726397, |
|
"grad_norm": 0.48508060199041647, |
|
"learning_rate": 2.8338681721279627e-07, |
|
"loss": 0.2587, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.9036608863198459, |
|
"grad_norm": 0.6173948412532628, |
|
"learning_rate": 2.612177863956977e-07, |
|
"loss": 0.3553, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.907514450867052, |
|
"grad_norm": 0.5583615292360055, |
|
"learning_rate": 2.3994586748875116e-07, |
|
"loss": 0.2629, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.9113680154142583, |
|
"grad_norm": 0.6358847375735043, |
|
"learning_rate": 2.1957202728370542e-07, |
|
"loss": 0.3183, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.9152215799614645, |
|
"grad_norm": 0.5168808695590897, |
|
"learning_rate": 2.000971917553529e-07, |
|
"loss": 0.2417, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.9190751445086707, |
|
"grad_norm": 0.6311552147683044, |
|
"learning_rate": 1.8152224601943435e-07, |
|
"loss": 0.297, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.9229287090558767, |
|
"grad_norm": 0.6268922330585508, |
|
"learning_rate": 1.6384803429242202e-07, |
|
"loss": 0.3107, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.9267822736030829, |
|
"grad_norm": 0.4990465421997129, |
|
"learning_rate": 1.4707535985314158e-07, |
|
"loss": 0.2362, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.930635838150289, |
|
"grad_norm": 0.5842077868087064, |
|
"learning_rate": 1.3120498500627243e-07, |
|
"loss": 0.2877, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.934489402697495, |
|
"grad_norm": 0.6539135809078528, |
|
"learning_rate": 1.1623763104769536e-07, |
|
"loss": 0.2189, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.9383429672447012, |
|
"grad_norm": 0.5738597010304898, |
|
"learning_rate": 1.0217397823170771e-07, |
|
"loss": 0.3352, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.9421965317919074, |
|
"grad_norm": 0.5459102574082527, |
|
"learning_rate": 8.901466574011919e-08, |
|
"loss": 0.2929, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.9460500963391136, |
|
"grad_norm": 0.5914038878762246, |
|
"learning_rate": 7.676029165318622e-08, |
|
"loss": 0.2649, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.9499036608863198, |
|
"grad_norm": 0.48711166138829143, |
|
"learning_rate": 6.541141292243814e-08, |
|
"loss": 0.2734, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.953757225433526, |
|
"grad_norm": 0.5110998668531642, |
|
"learning_rate": 5.496854534536189e-08, |
|
"loss": 0.2689, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.9576107899807322, |
|
"grad_norm": 0.580604888687543, |
|
"learning_rate": 4.5432163541960785e-08, |
|
"loss": 0.2636, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.9614643545279384, |
|
"grad_norm": 0.5478455993832356, |
|
"learning_rate": 3.680270093318505e-08, |
|
"loss": 0.2662, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.9653179190751446, |
|
"grad_norm": 0.5860745443181511, |
|
"learning_rate": 2.9080549721225426e-08, |
|
"loss": 0.2812, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.9691714836223508, |
|
"grad_norm": 0.5859204655633964, |
|
"learning_rate": 2.226606087169847e-08, |
|
"loss": 0.2682, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.9730250481695568, |
|
"grad_norm": 0.552238848617358, |
|
"learning_rate": 1.6359544097686033e-08, |
|
"loss": 0.2589, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.976878612716763, |
|
"grad_norm": 0.6319565374117269, |
|
"learning_rate": 1.136126784566649e-08, |
|
"loss": 0.28, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.9807321772639692, |
|
"grad_norm": 0.6410471677739641, |
|
"learning_rate": 7.271459283308968e-09, |
|
"loss": 0.3306, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.9845857418111752, |
|
"grad_norm": 0.5612641252616729, |
|
"learning_rate": 4.090304289150471e-09, |
|
"loss": 0.2771, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.9884393063583814, |
|
"grad_norm": 0.49679899268137545, |
|
"learning_rate": 1.817947444149315e-09, |
|
"loss": 0.2396, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.9922928709055876, |
|
"grad_norm": 0.559672250420135, |
|
"learning_rate": 4.544920251126073e-10, |
|
"loss": 0.2364, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.9961464354527938, |
|
"grad_norm": 0.5094885850047902, |
|
"learning_rate": 0.0, |
|
"loss": 0.2353, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.9961464354527938, |
|
"eval_loss": 0.27941030263900757, |
|
"eval_runtime": 147.0149, |
|
"eval_samples_per_second": 13.59, |
|
"eval_steps_per_second": 0.429, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.9961464354527938, |
|
"step": 518, |
|
"total_flos": 2.6836433607911014e+17, |
|
"train_loss": 0.7686227933723033, |
|
"train_runtime": 16387.3806, |
|
"train_samples_per_second": 4.053, |
|
"train_steps_per_second": 0.032 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 518, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.6836433607911014e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|