|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9995258416311048, |
|
"eval_steps": 500, |
|
"global_step": 1054, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000948316737790422, |
|
"grad_norm": 12.246086019020522, |
|
"learning_rate": 3.1545741324921134e-08, |
|
"loss": 0.3675, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001896633475580844, |
|
"grad_norm": 11.694026905196086, |
|
"learning_rate": 6.309148264984227e-08, |
|
"loss": 0.3513, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002844950213371266, |
|
"grad_norm": 10.688899348005608, |
|
"learning_rate": 9.463722397476342e-08, |
|
"loss": 0.3177, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003793266951161688, |
|
"grad_norm": 11.921298417211082, |
|
"learning_rate": 1.2618296529968454e-07, |
|
"loss": 0.3556, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00474158368895211, |
|
"grad_norm": 11.282902382225787, |
|
"learning_rate": 1.5772870662460568e-07, |
|
"loss": 0.3662, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005689900426742532, |
|
"grad_norm": 12.449826286939778, |
|
"learning_rate": 1.8927444794952683e-07, |
|
"loss": 0.3546, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006638217164532954, |
|
"grad_norm": 11.727097480206721, |
|
"learning_rate": 2.2082018927444798e-07, |
|
"loss": 0.3635, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007586533902323376, |
|
"grad_norm": 11.441751312661113, |
|
"learning_rate": 2.5236593059936907e-07, |
|
"loss": 0.3636, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008534850640113799, |
|
"grad_norm": 10.632930884848795, |
|
"learning_rate": 2.8391167192429027e-07, |
|
"loss": 0.2923, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00948316737790422, |
|
"grad_norm": 11.025857208188647, |
|
"learning_rate": 3.1545741324921137e-07, |
|
"loss": 0.3449, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010431484115694643, |
|
"grad_norm": 11.86359857266447, |
|
"learning_rate": 3.470031545741325e-07, |
|
"loss": 0.3354, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011379800853485065, |
|
"grad_norm": 11.01751351812872, |
|
"learning_rate": 3.7854889589905366e-07, |
|
"loss": 0.3369, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012328117591275486, |
|
"grad_norm": 9.502190495628849, |
|
"learning_rate": 4.100946372239748e-07, |
|
"loss": 0.3179, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.013276434329065908, |
|
"grad_norm": 7.858408977040966, |
|
"learning_rate": 4.4164037854889596e-07, |
|
"loss": 0.2565, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01422475106685633, |
|
"grad_norm": 8.154333698814211, |
|
"learning_rate": 4.7318611987381705e-07, |
|
"loss": 0.2589, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015173067804646752, |
|
"grad_norm": 8.475444781638856, |
|
"learning_rate": 5.047318611987381e-07, |
|
"loss": 0.3001, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.016121384542437174, |
|
"grad_norm": 7.13737669899092, |
|
"learning_rate": 5.362776025236594e-07, |
|
"loss": 0.2641, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.017069701280227598, |
|
"grad_norm": 5.554684184392653, |
|
"learning_rate": 5.678233438485805e-07, |
|
"loss": 0.1902, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.018018018018018018, |
|
"grad_norm": 4.568211300813283, |
|
"learning_rate": 5.993690851735017e-07, |
|
"loss": 0.208, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01896633475580844, |
|
"grad_norm": 4.7579569152913646, |
|
"learning_rate": 6.309148264984227e-07, |
|
"loss": 0.1994, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01991465149359886, |
|
"grad_norm": 4.7128465676673486, |
|
"learning_rate": 6.62460567823344e-07, |
|
"loss": 0.229, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.020862968231389285, |
|
"grad_norm": 4.005405411985473, |
|
"learning_rate": 6.94006309148265e-07, |
|
"loss": 0.2095, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.021811284969179705, |
|
"grad_norm": 4.676075338959145, |
|
"learning_rate": 7.255520504731863e-07, |
|
"loss": 0.2178, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02275960170697013, |
|
"grad_norm": 2.3652635706654435, |
|
"learning_rate": 7.570977917981073e-07, |
|
"loss": 0.1524, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02370791844476055, |
|
"grad_norm": 2.685337556789167, |
|
"learning_rate": 7.886435331230284e-07, |
|
"loss": 0.1672, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.024656235182550973, |
|
"grad_norm": 2.430942973848189, |
|
"learning_rate": 8.201892744479496e-07, |
|
"loss": 0.1526, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.025604551920341393, |
|
"grad_norm": 2.6467399445694286, |
|
"learning_rate": 8.517350157728707e-07, |
|
"loss": 0.1605, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.026552868658131817, |
|
"grad_norm": 1.9805248826128374, |
|
"learning_rate": 8.832807570977919e-07, |
|
"loss": 0.1223, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.027501185395922237, |
|
"grad_norm": 2.2695664454959785, |
|
"learning_rate": 9.148264984227131e-07, |
|
"loss": 0.1991, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02844950213371266, |
|
"grad_norm": 2.221333597086963, |
|
"learning_rate": 9.463722397476341e-07, |
|
"loss": 0.1466, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02939781887150308, |
|
"grad_norm": 2.4412316593782633, |
|
"learning_rate": 9.779179810725552e-07, |
|
"loss": 0.1757, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.030346135609293504, |
|
"grad_norm": 2.3894901293863198, |
|
"learning_rate": 1.0094637223974763e-06, |
|
"loss": 0.1467, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.031294452347083924, |
|
"grad_norm": 2.2254181707911003, |
|
"learning_rate": 1.0410094637223975e-06, |
|
"loss": 0.1403, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03224276908487435, |
|
"grad_norm": 2.0835670435267573, |
|
"learning_rate": 1.0725552050473188e-06, |
|
"loss": 0.1295, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03319108582266477, |
|
"grad_norm": 2.490534255553767, |
|
"learning_rate": 1.1041009463722398e-06, |
|
"loss": 0.1781, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.034139402560455195, |
|
"grad_norm": 2.4852979753797526, |
|
"learning_rate": 1.135646687697161e-06, |
|
"loss": 0.1635, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03508771929824561, |
|
"grad_norm": 2.1821267836376093, |
|
"learning_rate": 1.1671924290220821e-06, |
|
"loss": 0.1536, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.036036036036036036, |
|
"grad_norm": 2.0995981360286193, |
|
"learning_rate": 1.1987381703470034e-06, |
|
"loss": 0.1892, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03698435277382646, |
|
"grad_norm": 1.7671849396147046, |
|
"learning_rate": 1.2302839116719244e-06, |
|
"loss": 0.1441, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03793266951161688, |
|
"grad_norm": 1.9145592423222202, |
|
"learning_rate": 1.2618296529968455e-06, |
|
"loss": 0.1515, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0388809862494073, |
|
"grad_norm": 2.909485299628588, |
|
"learning_rate": 1.2933753943217667e-06, |
|
"loss": 0.176, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03982930298719772, |
|
"grad_norm": 1.6871888183428478, |
|
"learning_rate": 1.324921135646688e-06, |
|
"loss": 0.1215, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04077761972498815, |
|
"grad_norm": 1.5396564532901138, |
|
"learning_rate": 1.3564668769716088e-06, |
|
"loss": 0.1334, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04172593646277857, |
|
"grad_norm": 2.205033481070129, |
|
"learning_rate": 1.38801261829653e-06, |
|
"loss": 0.1397, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04267425320056899, |
|
"grad_norm": 1.8497757762613358, |
|
"learning_rate": 1.4195583596214513e-06, |
|
"loss": 0.1274, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04362256993835941, |
|
"grad_norm": 1.9376215434540043, |
|
"learning_rate": 1.4511041009463726e-06, |
|
"loss": 0.1228, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.044570886676149835, |
|
"grad_norm": 1.594889970345864, |
|
"learning_rate": 1.4826498422712934e-06, |
|
"loss": 0.1137, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04551920341394026, |
|
"grad_norm": 1.7592570423176281, |
|
"learning_rate": 1.5141955835962146e-06, |
|
"loss": 0.141, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.046467520151730675, |
|
"grad_norm": 1.6146283602515956, |
|
"learning_rate": 1.545741324921136e-06, |
|
"loss": 0.1428, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0474158368895211, |
|
"grad_norm": 1.503278573378982, |
|
"learning_rate": 1.5772870662460567e-06, |
|
"loss": 0.1318, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04836415362731152, |
|
"grad_norm": 1.37572777178569, |
|
"learning_rate": 1.608832807570978e-06, |
|
"loss": 0.1315, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.049312470365101946, |
|
"grad_norm": 1.6002275154635794, |
|
"learning_rate": 1.6403785488958992e-06, |
|
"loss": 0.0935, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05026078710289237, |
|
"grad_norm": 1.9567696662008847, |
|
"learning_rate": 1.6719242902208203e-06, |
|
"loss": 0.1271, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.051209103840682786, |
|
"grad_norm": 1.601626063178932, |
|
"learning_rate": 1.7034700315457413e-06, |
|
"loss": 0.0959, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05215742057847321, |
|
"grad_norm": 1.886431535590579, |
|
"learning_rate": 1.7350157728706626e-06, |
|
"loss": 0.1218, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05310573731626363, |
|
"grad_norm": 1.5354494166136305, |
|
"learning_rate": 1.7665615141955838e-06, |
|
"loss": 0.1139, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05405405405405406, |
|
"grad_norm": 2.311230053300576, |
|
"learning_rate": 1.7981072555205049e-06, |
|
"loss": 0.1426, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.055002370791844474, |
|
"grad_norm": 1.6253071180005185, |
|
"learning_rate": 1.8296529968454261e-06, |
|
"loss": 0.1175, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0559506875296349, |
|
"grad_norm": 1.3821063491809322, |
|
"learning_rate": 1.8611987381703472e-06, |
|
"loss": 0.132, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05689900426742532, |
|
"grad_norm": 1.7624392868013044, |
|
"learning_rate": 1.8927444794952682e-06, |
|
"loss": 0.1221, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.057847321005215745, |
|
"grad_norm": 1.3398437874784876, |
|
"learning_rate": 1.9242902208201892e-06, |
|
"loss": 0.125, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05879563774300616, |
|
"grad_norm": 1.562570182505017, |
|
"learning_rate": 1.9558359621451105e-06, |
|
"loss": 0.1413, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.059743954480796585, |
|
"grad_norm": 1.6769755616188486, |
|
"learning_rate": 1.9873817034700317e-06, |
|
"loss": 0.1559, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06069227121858701, |
|
"grad_norm": 1.3917364499829268, |
|
"learning_rate": 2.0189274447949526e-06, |
|
"loss": 0.1377, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06164058795637743, |
|
"grad_norm": 1.8502674559797263, |
|
"learning_rate": 2.050473186119874e-06, |
|
"loss": 0.1487, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06258890469416785, |
|
"grad_norm": 3.158783977874437, |
|
"learning_rate": 2.082018927444795e-06, |
|
"loss": 0.119, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06353722143195828, |
|
"grad_norm": 1.811584236109641, |
|
"learning_rate": 2.1135646687697163e-06, |
|
"loss": 0.122, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0644855381697487, |
|
"grad_norm": 2.917344328319794, |
|
"learning_rate": 2.1451104100946376e-06, |
|
"loss": 0.1313, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06543385490753911, |
|
"grad_norm": 1.8029019845335916, |
|
"learning_rate": 2.1766561514195584e-06, |
|
"loss": 0.1138, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06638217164532954, |
|
"grad_norm": 1.6898543330406532, |
|
"learning_rate": 2.2082018927444797e-06, |
|
"loss": 0.1191, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06733048838311996, |
|
"grad_norm": 2.2925732127308214, |
|
"learning_rate": 2.239747634069401e-06, |
|
"loss": 0.1306, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06827880512091039, |
|
"grad_norm": 1.4433490292568716, |
|
"learning_rate": 2.271293375394322e-06, |
|
"loss": 0.1055, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06922712185870081, |
|
"grad_norm": 1.3862506183642664, |
|
"learning_rate": 2.302839116719243e-06, |
|
"loss": 0.1075, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07017543859649122, |
|
"grad_norm": 1.2816575561632197, |
|
"learning_rate": 2.3343848580441643e-06, |
|
"loss": 0.1028, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07112375533428165, |
|
"grad_norm": 1.893923472034316, |
|
"learning_rate": 2.3659305993690855e-06, |
|
"loss": 0.1011, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07207207207207207, |
|
"grad_norm": 1.6025824634915868, |
|
"learning_rate": 2.3974763406940068e-06, |
|
"loss": 0.1317, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07302038880986249, |
|
"grad_norm": 1.7176261068301808, |
|
"learning_rate": 2.4290220820189276e-06, |
|
"loss": 0.1447, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07396870554765292, |
|
"grad_norm": 2.4231160050612863, |
|
"learning_rate": 2.460567823343849e-06, |
|
"loss": 0.1384, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07491702228544334, |
|
"grad_norm": 1.2193411546548798, |
|
"learning_rate": 2.49211356466877e-06, |
|
"loss": 0.0992, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07586533902323377, |
|
"grad_norm": 1.5164983059809367, |
|
"learning_rate": 2.523659305993691e-06, |
|
"loss": 0.1001, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07681365576102418, |
|
"grad_norm": 1.6017905795769134, |
|
"learning_rate": 2.5552050473186126e-06, |
|
"loss": 0.1048, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0777619724988146, |
|
"grad_norm": 1.1836761079302904, |
|
"learning_rate": 2.5867507886435334e-06, |
|
"loss": 0.0982, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07871028923660503, |
|
"grad_norm": 3.3493572839513566, |
|
"learning_rate": 2.6182965299684543e-06, |
|
"loss": 0.1184, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07965860597439545, |
|
"grad_norm": 2.0313461386241722, |
|
"learning_rate": 2.649842271293376e-06, |
|
"loss": 0.1174, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08060692271218586, |
|
"grad_norm": 1.7152579326543271, |
|
"learning_rate": 2.6813880126182968e-06, |
|
"loss": 0.117, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0815552394499763, |
|
"grad_norm": 1.9082671591126898, |
|
"learning_rate": 2.7129337539432176e-06, |
|
"loss": 0.1538, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08250355618776671, |
|
"grad_norm": 1.1544236926306861, |
|
"learning_rate": 2.7444794952681393e-06, |
|
"loss": 0.0906, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08345187292555714, |
|
"grad_norm": 1.2516823902614436, |
|
"learning_rate": 2.77602523659306e-06, |
|
"loss": 0.1452, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08440018966334756, |
|
"grad_norm": 1.0339206815219761, |
|
"learning_rate": 2.807570977917981e-06, |
|
"loss": 0.0836, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08534850640113797, |
|
"grad_norm": 1.668394516826565, |
|
"learning_rate": 2.8391167192429026e-06, |
|
"loss": 0.1129, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0862968231389284, |
|
"grad_norm": 1.497152077632149, |
|
"learning_rate": 2.8706624605678234e-06, |
|
"loss": 0.1062, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08724513987671882, |
|
"grad_norm": 1.197731872894548, |
|
"learning_rate": 2.902208201892745e-06, |
|
"loss": 0.1102, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08819345661450925, |
|
"grad_norm": 1.4271367098608596, |
|
"learning_rate": 2.933753943217666e-06, |
|
"loss": 0.1175, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08914177335229967, |
|
"grad_norm": 1.6337936419255448, |
|
"learning_rate": 2.9652996845425868e-06, |
|
"loss": 0.1148, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09009009009009009, |
|
"grad_norm": 1.7427850789821318, |
|
"learning_rate": 2.9968454258675085e-06, |
|
"loss": 0.1246, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09103840682788052, |
|
"grad_norm": 1.2870967429199511, |
|
"learning_rate": 3.0283911671924293e-06, |
|
"loss": 0.087, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.09198672356567093, |
|
"grad_norm": 1.1311991406490958, |
|
"learning_rate": 3.05993690851735e-06, |
|
"loss": 0.0996, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09293504030346135, |
|
"grad_norm": 1.3742928108454626, |
|
"learning_rate": 3.091482649842272e-06, |
|
"loss": 0.1293, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09388335704125178, |
|
"grad_norm": 1.516028333204866, |
|
"learning_rate": 3.1230283911671926e-06, |
|
"loss": 0.1078, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0948316737790422, |
|
"grad_norm": 1.1404699421620854, |
|
"learning_rate": 3.1545741324921135e-06, |
|
"loss": 0.1142, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09577999051683263, |
|
"grad_norm": 1.7924070029612504, |
|
"learning_rate": 3.186119873817035e-06, |
|
"loss": 0.1419, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09672830725462304, |
|
"grad_norm": 1.7297435544466835, |
|
"learning_rate": 3.217665615141956e-06, |
|
"loss": 0.117, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09767662399241346, |
|
"grad_norm": 1.2304625316537265, |
|
"learning_rate": 3.2492113564668772e-06, |
|
"loss": 0.0834, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09862494073020389, |
|
"grad_norm": 1.6554297434059837, |
|
"learning_rate": 3.2807570977917985e-06, |
|
"loss": 0.1251, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09957325746799431, |
|
"grad_norm": 1.9749022078409877, |
|
"learning_rate": 3.3123028391167193e-06, |
|
"loss": 0.1485, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10052157420578474, |
|
"grad_norm": 1.7816458729316766, |
|
"learning_rate": 3.3438485804416405e-06, |
|
"loss": 0.1168, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.10146989094357516, |
|
"grad_norm": 1.6366795934026652, |
|
"learning_rate": 3.375394321766562e-06, |
|
"loss": 0.1221, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.10241820768136557, |
|
"grad_norm": 1.0846701931516913, |
|
"learning_rate": 3.4069400630914826e-06, |
|
"loss": 0.086, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.103366524419156, |
|
"grad_norm": 1.374499027535318, |
|
"learning_rate": 3.4384858044164043e-06, |
|
"loss": 0.1097, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.10431484115694642, |
|
"grad_norm": 1.7733166976712489, |
|
"learning_rate": 3.470031545741325e-06, |
|
"loss": 0.1098, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10526315789473684, |
|
"grad_norm": 2.980678296553409, |
|
"learning_rate": 3.5015772870662464e-06, |
|
"loss": 0.0917, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.10621147463252727, |
|
"grad_norm": 0.9904577188744437, |
|
"learning_rate": 3.5331230283911676e-06, |
|
"loss": 0.0777, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.10715979137031768, |
|
"grad_norm": 1.4107631975145143, |
|
"learning_rate": 3.5646687697160885e-06, |
|
"loss": 0.0902, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.10810810810810811, |
|
"grad_norm": 1.786967710835369, |
|
"learning_rate": 3.5962145110410097e-06, |
|
"loss": 0.0974, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.10905642484589853, |
|
"grad_norm": 1.6278373703409408, |
|
"learning_rate": 3.627760252365931e-06, |
|
"loss": 0.1031, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11000474158368895, |
|
"grad_norm": 1.0856958566494381, |
|
"learning_rate": 3.6593059936908522e-06, |
|
"loss": 0.0872, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.11095305832147938, |
|
"grad_norm": 1.0746142572780863, |
|
"learning_rate": 3.690851735015773e-06, |
|
"loss": 0.0753, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.1119013750592698, |
|
"grad_norm": 1.794687772916648, |
|
"learning_rate": 3.7223974763406943e-06, |
|
"loss": 0.094, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.11284969179706021, |
|
"grad_norm": 2.0574961246450543, |
|
"learning_rate": 3.7539432176656156e-06, |
|
"loss": 0.1032, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.11379800853485064, |
|
"grad_norm": 1.0887603543641189, |
|
"learning_rate": 3.7854889589905364e-06, |
|
"loss": 0.0869, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11474632527264106, |
|
"grad_norm": 1.4381581196511768, |
|
"learning_rate": 3.817034700315458e-06, |
|
"loss": 0.105, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.11569464201043149, |
|
"grad_norm": 2.0884869908112984, |
|
"learning_rate": 3.8485804416403785e-06, |
|
"loss": 0.1072, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.1166429587482219, |
|
"grad_norm": 1.3918016525882038, |
|
"learning_rate": 3.8801261829653e-06, |
|
"loss": 0.0995, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.11759127548601232, |
|
"grad_norm": 1.1199618265144746, |
|
"learning_rate": 3.911671924290221e-06, |
|
"loss": 0.0693, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.11853959222380275, |
|
"grad_norm": 2.913976726787567, |
|
"learning_rate": 3.943217665615142e-06, |
|
"loss": 0.1203, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11948790896159317, |
|
"grad_norm": 1.4548880466216083, |
|
"learning_rate": 3.9747634069400635e-06, |
|
"loss": 0.0891, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1204362256993836, |
|
"grad_norm": 3.1711394720986235, |
|
"learning_rate": 4.006309148264985e-06, |
|
"loss": 0.1223, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.12138454243717402, |
|
"grad_norm": 1.888765811166245, |
|
"learning_rate": 4.037854889589905e-06, |
|
"loss": 0.13, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.12233285917496443, |
|
"grad_norm": 1.2398551211997078, |
|
"learning_rate": 4.069400630914827e-06, |
|
"loss": 0.1103, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.12328117591275486, |
|
"grad_norm": 1.7438322556304724, |
|
"learning_rate": 4.100946372239748e-06, |
|
"loss": 0.1147, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12422949265054528, |
|
"grad_norm": 0.9363387889716617, |
|
"learning_rate": 4.132492113564669e-06, |
|
"loss": 0.0995, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1251778093883357, |
|
"grad_norm": 1.446859084810851, |
|
"learning_rate": 4.16403785488959e-06, |
|
"loss": 0.0994, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.12612612612612611, |
|
"grad_norm": 1.1856203072681963, |
|
"learning_rate": 4.195583596214511e-06, |
|
"loss": 0.0927, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.12707444286391656, |
|
"grad_norm": 1.103336827372462, |
|
"learning_rate": 4.227129337539433e-06, |
|
"loss": 0.0815, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.12802275960170698, |
|
"grad_norm": 1.897384655096208, |
|
"learning_rate": 4.258675078864354e-06, |
|
"loss": 0.1248, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1289710763394974, |
|
"grad_norm": 1.6223901695891558, |
|
"learning_rate": 4.290220820189275e-06, |
|
"loss": 0.1456, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1299193930772878, |
|
"grad_norm": 1.93689861193564, |
|
"learning_rate": 4.321766561514196e-06, |
|
"loss": 0.1236, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.13086770981507823, |
|
"grad_norm": 1.7202395942479507, |
|
"learning_rate": 4.353312302839117e-06, |
|
"loss": 0.0994, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.13181602655286867, |
|
"grad_norm": 2.1336251410837717, |
|
"learning_rate": 4.384858044164038e-06, |
|
"loss": 0.0963, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1327643432906591, |
|
"grad_norm": 2.086908025505944, |
|
"learning_rate": 4.416403785488959e-06, |
|
"loss": 0.1397, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1337126600284495, |
|
"grad_norm": 1.903049841336412, |
|
"learning_rate": 4.447949526813881e-06, |
|
"loss": 0.1188, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.13466097676623992, |
|
"grad_norm": 1.237639055790405, |
|
"learning_rate": 4.479495268138802e-06, |
|
"loss": 0.0864, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.13560929350403034, |
|
"grad_norm": 1.533833989919448, |
|
"learning_rate": 4.511041009463723e-06, |
|
"loss": 0.1188, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.13655761024182078, |
|
"grad_norm": 1.7546010414420699, |
|
"learning_rate": 4.542586750788644e-06, |
|
"loss": 0.1364, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1375059269796112, |
|
"grad_norm": 2.9799276151902645, |
|
"learning_rate": 4.574132492113565e-06, |
|
"loss": 0.1226, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.13845424371740161, |
|
"grad_norm": 1.1723876001348499, |
|
"learning_rate": 4.605678233438486e-06, |
|
"loss": 0.086, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.13940256045519203, |
|
"grad_norm": 2.069220754870492, |
|
"learning_rate": 4.637223974763407e-06, |
|
"loss": 0.1196, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.14035087719298245, |
|
"grad_norm": 3.9795001087139124, |
|
"learning_rate": 4.6687697160883285e-06, |
|
"loss": 0.1152, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.14129919393077287, |
|
"grad_norm": 1.4634422746453415, |
|
"learning_rate": 4.70031545741325e-06, |
|
"loss": 0.0916, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.1422475106685633, |
|
"grad_norm": 1.3185726560010742, |
|
"learning_rate": 4.731861198738171e-06, |
|
"loss": 0.0904, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14319582740635373, |
|
"grad_norm": 1.5552910898557228, |
|
"learning_rate": 4.7634069400630914e-06, |
|
"loss": 0.0899, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.14414414414414414, |
|
"grad_norm": 1.0997805514097108, |
|
"learning_rate": 4.7949526813880135e-06, |
|
"loss": 0.0795, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.14509246088193456, |
|
"grad_norm": 1.7076641753438397, |
|
"learning_rate": 4.826498422712934e-06, |
|
"loss": 0.1081, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.14604077761972498, |
|
"grad_norm": 1.6735518250841006, |
|
"learning_rate": 4.858044164037855e-06, |
|
"loss": 0.1068, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.14698909435751542, |
|
"grad_norm": 1.2033878521779449, |
|
"learning_rate": 4.8895899053627764e-06, |
|
"loss": 0.0934, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.14793741109530584, |
|
"grad_norm": 1.4908718795559122, |
|
"learning_rate": 4.921135646687698e-06, |
|
"loss": 0.1072, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.14888572783309625, |
|
"grad_norm": 1.3234990953707453, |
|
"learning_rate": 4.952681388012618e-06, |
|
"loss": 0.104, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.14983404457088667, |
|
"grad_norm": 1.3354249814975963, |
|
"learning_rate": 4.98422712933754e-06, |
|
"loss": 0.1189, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.1507823613086771, |
|
"grad_norm": 1.224445144859879, |
|
"learning_rate": 5.015772870662461e-06, |
|
"loss": 0.1114, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.15173067804646753, |
|
"grad_norm": 1.3554440133569026, |
|
"learning_rate": 5.047318611987382e-06, |
|
"loss": 0.1278, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15267899478425795, |
|
"grad_norm": 1.4393478098545054, |
|
"learning_rate": 5.078864353312303e-06, |
|
"loss": 0.1201, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.15362731152204837, |
|
"grad_norm": 1.032684740456474, |
|
"learning_rate": 5.110410094637225e-06, |
|
"loss": 0.0841, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.15457562825983878, |
|
"grad_norm": 1.2508286920209446, |
|
"learning_rate": 5.141955835962146e-06, |
|
"loss": 0.0863, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1555239449976292, |
|
"grad_norm": 1.899093372512286, |
|
"learning_rate": 5.173501577287067e-06, |
|
"loss": 0.1662, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.15647226173541964, |
|
"grad_norm": 1.4000014551423334, |
|
"learning_rate": 5.205047318611987e-06, |
|
"loss": 0.0909, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.15742057847321006, |
|
"grad_norm": 1.9418542456678585, |
|
"learning_rate": 5.2365930599369085e-06, |
|
"loss": 0.1013, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.15836889521100048, |
|
"grad_norm": 1.5538903766146939, |
|
"learning_rate": 5.268138801261831e-06, |
|
"loss": 0.1177, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.1593172119487909, |
|
"grad_norm": 1.3035129364423688, |
|
"learning_rate": 5.299684542586752e-06, |
|
"loss": 0.0961, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1602655286865813, |
|
"grad_norm": 1.273421849890499, |
|
"learning_rate": 5.331230283911672e-06, |
|
"loss": 0.1252, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.16121384542437173, |
|
"grad_norm": 1.123016604976548, |
|
"learning_rate": 5.3627760252365935e-06, |
|
"loss": 0.0999, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16216216216216217, |
|
"grad_norm": 1.2409364166994, |
|
"learning_rate": 5.394321766561515e-06, |
|
"loss": 0.1095, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1631104788999526, |
|
"grad_norm": 1.1082140455460585, |
|
"learning_rate": 5.425867507886435e-06, |
|
"loss": 0.0736, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.164058795637743, |
|
"grad_norm": 1.2872459579560394, |
|
"learning_rate": 5.457413249211357e-06, |
|
"loss": 0.0928, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.16500711237553342, |
|
"grad_norm": 1.3830237110418746, |
|
"learning_rate": 5.4889589905362786e-06, |
|
"loss": 0.0973, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.16595542911332384, |
|
"grad_norm": 1.2546887092347754, |
|
"learning_rate": 5.520504731861199e-06, |
|
"loss": 0.0832, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.16690374585111428, |
|
"grad_norm": 1.1708284069676944, |
|
"learning_rate": 5.55205047318612e-06, |
|
"loss": 0.1075, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1678520625889047, |
|
"grad_norm": 1.101853335061695, |
|
"learning_rate": 5.5835962145110415e-06, |
|
"loss": 0.0897, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.16880037932669512, |
|
"grad_norm": 1.015907357215909, |
|
"learning_rate": 5.615141955835962e-06, |
|
"loss": 0.0819, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.16974869606448553, |
|
"grad_norm": 1.8752154604515816, |
|
"learning_rate": 5.646687697160884e-06, |
|
"loss": 0.1021, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.17069701280227595, |
|
"grad_norm": 1.6971011710183759, |
|
"learning_rate": 5.678233438485805e-06, |
|
"loss": 0.0996, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1716453295400664, |
|
"grad_norm": 1.2212507178791898, |
|
"learning_rate": 5.709779179810726e-06, |
|
"loss": 0.1079, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1725936462778568, |
|
"grad_norm": 1.7343284525300247, |
|
"learning_rate": 5.741324921135647e-06, |
|
"loss": 0.1292, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.17354196301564723, |
|
"grad_norm": 1.4376592014404461, |
|
"learning_rate": 5.772870662460568e-06, |
|
"loss": 0.1312, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.17449027975343764, |
|
"grad_norm": 1.2528619821880524, |
|
"learning_rate": 5.80441640378549e-06, |
|
"loss": 0.0762, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.17543859649122806, |
|
"grad_norm": 1.9247297159171304, |
|
"learning_rate": 5.835962145110411e-06, |
|
"loss": 0.1403, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1763869132290185, |
|
"grad_norm": 1.5028101353474104, |
|
"learning_rate": 5.867507886435332e-06, |
|
"loss": 0.1147, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.17733522996680892, |
|
"grad_norm": 2.4179600186213714, |
|
"learning_rate": 5.899053627760253e-06, |
|
"loss": 0.0913, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.17828354670459934, |
|
"grad_norm": 1.518835105924909, |
|
"learning_rate": 5.9305993690851736e-06, |
|
"loss": 0.0918, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.17923186344238975, |
|
"grad_norm": 1.6543687104918372, |
|
"learning_rate": 5.962145110410095e-06, |
|
"loss": 0.122, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.18018018018018017, |
|
"grad_norm": 1.4531807393638785, |
|
"learning_rate": 5.993690851735017e-06, |
|
"loss": 0.1228, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1811284969179706, |
|
"grad_norm": 1.4665808153812976, |
|
"learning_rate": 6.025236593059937e-06, |
|
"loss": 0.1014, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.18207681365576103, |
|
"grad_norm": 1.2889682170490027, |
|
"learning_rate": 6.056782334384859e-06, |
|
"loss": 0.1055, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.18302513039355145, |
|
"grad_norm": 1.3310497561635966, |
|
"learning_rate": 6.08832807570978e-06, |
|
"loss": 0.119, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.18397344713134187, |
|
"grad_norm": 1.3246051325093873, |
|
"learning_rate": 6.1198738170347e-06, |
|
"loss": 0.1288, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.18492176386913228, |
|
"grad_norm": 1.1979924093987135, |
|
"learning_rate": 6.1514195583596215e-06, |
|
"loss": 0.0877, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1858700806069227, |
|
"grad_norm": 1.1280419900810446, |
|
"learning_rate": 6.182965299684544e-06, |
|
"loss": 0.1085, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.18681839734471314, |
|
"grad_norm": 1.3307017446168579, |
|
"learning_rate": 6.214511041009465e-06, |
|
"loss": 0.0853, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.18776671408250356, |
|
"grad_norm": 1.1814823672365349, |
|
"learning_rate": 6.246056782334385e-06, |
|
"loss": 0.1066, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.18871503082029398, |
|
"grad_norm": 0.7829348670836794, |
|
"learning_rate": 6.2776025236593065e-06, |
|
"loss": 0.0662, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1896633475580844, |
|
"grad_norm": 1.2435224715978643, |
|
"learning_rate": 6.309148264984227e-06, |
|
"loss": 0.088, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1906116642958748, |
|
"grad_norm": 1.0014149948809556, |
|
"learning_rate": 6.340694006309149e-06, |
|
"loss": 0.0975, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.19155998103366526, |
|
"grad_norm": 0.9250673471848995, |
|
"learning_rate": 6.37223974763407e-06, |
|
"loss": 0.0877, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.19250829777145567, |
|
"grad_norm": 1.056412139362465, |
|
"learning_rate": 6.4037854889589915e-06, |
|
"loss": 0.0763, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1934566145092461, |
|
"grad_norm": 0.9891782097788515, |
|
"learning_rate": 6.435331230283912e-06, |
|
"loss": 0.0834, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1944049312470365, |
|
"grad_norm": 1.0792725374885792, |
|
"learning_rate": 6.466876971608833e-06, |
|
"loss": 0.0885, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.19535324798482692, |
|
"grad_norm": 1.2366811021393578, |
|
"learning_rate": 6.4984227129337544e-06, |
|
"loss": 0.0954, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.19630156472261737, |
|
"grad_norm": 1.024115365006771, |
|
"learning_rate": 6.529968454258676e-06, |
|
"loss": 0.1215, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.19724988146040778, |
|
"grad_norm": 1.2203185957532192, |
|
"learning_rate": 6.561514195583597e-06, |
|
"loss": 0.1202, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.1981981981981982, |
|
"grad_norm": 0.9501403270885721, |
|
"learning_rate": 6.593059936908518e-06, |
|
"loss": 0.0715, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.19914651493598862, |
|
"grad_norm": 1.5511308370546482, |
|
"learning_rate": 6.624605678233439e-06, |
|
"loss": 0.1089, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20009483167377903, |
|
"grad_norm": 0.9433860573102355, |
|
"learning_rate": 6.65615141955836e-06, |
|
"loss": 0.0648, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.20104314841156948, |
|
"grad_norm": 1.0981902231687461, |
|
"learning_rate": 6.687697160883281e-06, |
|
"loss": 0.0663, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.2019914651493599, |
|
"grad_norm": 1.064443363672458, |
|
"learning_rate": 6.719242902208203e-06, |
|
"loss": 0.077, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.2029397818871503, |
|
"grad_norm": 1.3753290546304533, |
|
"learning_rate": 6.750788643533124e-06, |
|
"loss": 0.1093, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.20388809862494073, |
|
"grad_norm": 1.2200081175269764, |
|
"learning_rate": 6.782334384858045e-06, |
|
"loss": 0.1094, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.20483641536273114, |
|
"grad_norm": 0.9141258918864384, |
|
"learning_rate": 6.813880126182965e-06, |
|
"loss": 0.0911, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.20578473210052156, |
|
"grad_norm": 2.528170753397052, |
|
"learning_rate": 6.8454258675078865e-06, |
|
"loss": 0.1079, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.206733048838312, |
|
"grad_norm": 1.4430688823297448, |
|
"learning_rate": 6.876971608832809e-06, |
|
"loss": 0.1053, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.20768136557610242, |
|
"grad_norm": 1.0186932336289805, |
|
"learning_rate": 6.90851735015773e-06, |
|
"loss": 0.0861, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.20862968231389284, |
|
"grad_norm": 1.1420742589304766, |
|
"learning_rate": 6.94006309148265e-06, |
|
"loss": 0.094, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.20957799905168326, |
|
"grad_norm": 1.2741420533987797, |
|
"learning_rate": 6.9716088328075715e-06, |
|
"loss": 0.0951, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"grad_norm": 0.9075216722351295, |
|
"learning_rate": 7.003154574132493e-06, |
|
"loss": 0.0866, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.21147463252726412, |
|
"grad_norm": 1.1980754719122302, |
|
"learning_rate": 7.034700315457413e-06, |
|
"loss": 0.0914, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.21242294926505453, |
|
"grad_norm": 1.1939921471415105, |
|
"learning_rate": 7.066246056782335e-06, |
|
"loss": 0.1047, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.21337126600284495, |
|
"grad_norm": 0.8519438677271276, |
|
"learning_rate": 7.0977917981072565e-06, |
|
"loss": 0.0941, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.21431958274063537, |
|
"grad_norm": 0.789532854502906, |
|
"learning_rate": 7.129337539432177e-06, |
|
"loss": 0.0819, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.21526789947842578, |
|
"grad_norm": 1.2111156014392817, |
|
"learning_rate": 7.160883280757098e-06, |
|
"loss": 0.1027, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.21621621621621623, |
|
"grad_norm": 1.0588737043402552, |
|
"learning_rate": 7.1924290220820195e-06, |
|
"loss": 0.0952, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.21716453295400664, |
|
"grad_norm": 0.933483217055125, |
|
"learning_rate": 7.22397476340694e-06, |
|
"loss": 0.0763, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.21811284969179706, |
|
"grad_norm": 1.049586247769339, |
|
"learning_rate": 7.255520504731862e-06, |
|
"loss": 0.0789, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.21906116642958748, |
|
"grad_norm": 1.1220808424289264, |
|
"learning_rate": 7.287066246056783e-06, |
|
"loss": 0.074, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.2200094831673779, |
|
"grad_norm": 1.254391611101815, |
|
"learning_rate": 7.3186119873817045e-06, |
|
"loss": 0.093, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.22095779990516834, |
|
"grad_norm": 1.274839766592392, |
|
"learning_rate": 7.350157728706625e-06, |
|
"loss": 0.0938, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.22190611664295876, |
|
"grad_norm": 1.2629251738997191, |
|
"learning_rate": 7.381703470031546e-06, |
|
"loss": 0.1129, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.22285443338074917, |
|
"grad_norm": 1.3595829605121952, |
|
"learning_rate": 7.413249211356468e-06, |
|
"loss": 0.1062, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.2238027501185396, |
|
"grad_norm": 1.353026352957774, |
|
"learning_rate": 7.444794952681389e-06, |
|
"loss": 0.117, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.22475106685633, |
|
"grad_norm": 1.3472351125895725, |
|
"learning_rate": 7.47634069400631e-06, |
|
"loss": 0.0827, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.22569938359412042, |
|
"grad_norm": 0.9510770172761661, |
|
"learning_rate": 7.507886435331231e-06, |
|
"loss": 0.0759, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.22664770033191087, |
|
"grad_norm": 1.2025915899822757, |
|
"learning_rate": 7.5394321766561515e-06, |
|
"loss": 0.0807, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.22759601706970128, |
|
"grad_norm": 1.1640028047547857, |
|
"learning_rate": 7.570977917981073e-06, |
|
"loss": 0.0709, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2285443338074917, |
|
"grad_norm": 1.5223127858935517, |
|
"learning_rate": 7.602523659305995e-06, |
|
"loss": 0.1018, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.22949265054528212, |
|
"grad_norm": 1.8495916864800697, |
|
"learning_rate": 7.634069400630916e-06, |
|
"loss": 0.0968, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.23044096728307253, |
|
"grad_norm": 1.8476848640745251, |
|
"learning_rate": 7.665615141955837e-06, |
|
"loss": 0.086, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.23138928402086298, |
|
"grad_norm": 1.4644626825262619, |
|
"learning_rate": 7.697160883280757e-06, |
|
"loss": 0.0974, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.2323376007586534, |
|
"grad_norm": 1.8857810882326624, |
|
"learning_rate": 7.728706624605679e-06, |
|
"loss": 0.1036, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2332859174964438, |
|
"grad_norm": 1.7638762752182895, |
|
"learning_rate": 7.7602523659306e-06, |
|
"loss": 0.1097, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.23423423423423423, |
|
"grad_norm": 1.2348758426158113, |
|
"learning_rate": 7.791798107255522e-06, |
|
"loss": 0.0866, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.23518255097202465, |
|
"grad_norm": 1.1223471436540764, |
|
"learning_rate": 7.823343848580442e-06, |
|
"loss": 0.0564, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.2361308677098151, |
|
"grad_norm": 0.8821001750676984, |
|
"learning_rate": 7.854889589905364e-06, |
|
"loss": 0.0696, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.2370791844476055, |
|
"grad_norm": 0.9899264223411232, |
|
"learning_rate": 7.886435331230284e-06, |
|
"loss": 0.0702, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23802750118539592, |
|
"grad_norm": 0.9289219027994224, |
|
"learning_rate": 7.917981072555205e-06, |
|
"loss": 0.0843, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.23897581792318634, |
|
"grad_norm": 1.0579670590751298, |
|
"learning_rate": 7.949526813880127e-06, |
|
"loss": 0.0921, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.23992413466097676, |
|
"grad_norm": 1.4593486745973783, |
|
"learning_rate": 7.981072555205049e-06, |
|
"loss": 0.1229, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2408724513987672, |
|
"grad_norm": 0.9496576247693762, |
|
"learning_rate": 8.01261829652997e-06, |
|
"loss": 0.0861, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.24182076813655762, |
|
"grad_norm": 1.1030565317688061, |
|
"learning_rate": 8.04416403785489e-06, |
|
"loss": 0.0893, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.24276908487434803, |
|
"grad_norm": 0.9907604990146169, |
|
"learning_rate": 8.07570977917981e-06, |
|
"loss": 0.0928, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.24371740161213845, |
|
"grad_norm": 0.9460810229319789, |
|
"learning_rate": 8.107255520504732e-06, |
|
"loss": 0.0974, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.24466571834992887, |
|
"grad_norm": 0.8329291976282354, |
|
"learning_rate": 8.138801261829655e-06, |
|
"loss": 0.077, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.24561403508771928, |
|
"grad_norm": 0.8587085474520708, |
|
"learning_rate": 8.170347003154575e-06, |
|
"loss": 0.0837, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.24656235182550973, |
|
"grad_norm": 0.9113223159844124, |
|
"learning_rate": 8.201892744479495e-06, |
|
"loss": 0.088, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.24751066856330015, |
|
"grad_norm": 0.8328940868524983, |
|
"learning_rate": 8.233438485804417e-06, |
|
"loss": 0.091, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.24845898530109056, |
|
"grad_norm": 1.4264090310082065, |
|
"learning_rate": 8.264984227129338e-06, |
|
"loss": 0.1354, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.24940730203888098, |
|
"grad_norm": 1.0550225951223755, |
|
"learning_rate": 8.296529968454258e-06, |
|
"loss": 0.0972, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2503556187766714, |
|
"grad_norm": 1.053508559451355, |
|
"learning_rate": 8.32807570977918e-06, |
|
"loss": 0.1035, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.25130393551446184, |
|
"grad_norm": 1.4971087544821369, |
|
"learning_rate": 8.359621451104102e-06, |
|
"loss": 0.1001, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.25225225225225223, |
|
"grad_norm": 1.075521297085326, |
|
"learning_rate": 8.391167192429023e-06, |
|
"loss": 0.0923, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.2532005689900427, |
|
"grad_norm": 1.6910075728505873, |
|
"learning_rate": 8.422712933753943e-06, |
|
"loss": 0.1212, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2541488857278331, |
|
"grad_norm": 1.5073460991202734, |
|
"learning_rate": 8.454258675078865e-06, |
|
"loss": 0.087, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.2550972024656235, |
|
"grad_norm": 1.0201575671512444, |
|
"learning_rate": 8.485804416403787e-06, |
|
"loss": 0.0871, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.25604551920341395, |
|
"grad_norm": 1.1193230353064818, |
|
"learning_rate": 8.517350157728708e-06, |
|
"loss": 0.1031, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.25699383594120434, |
|
"grad_norm": 1.3593779355277376, |
|
"learning_rate": 8.548895899053628e-06, |
|
"loss": 0.0861, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.2579421526789948, |
|
"grad_norm": 1.5824627519870196, |
|
"learning_rate": 8.58044164037855e-06, |
|
"loss": 0.0998, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.25889046941678523, |
|
"grad_norm": 2.316620691088296, |
|
"learning_rate": 8.61198738170347e-06, |
|
"loss": 0.1237, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.2598387861545756, |
|
"grad_norm": 1.3708391836342668, |
|
"learning_rate": 8.643533123028391e-06, |
|
"loss": 0.0806, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.26078710289236606, |
|
"grad_norm": 1.259879695037933, |
|
"learning_rate": 8.675078864353313e-06, |
|
"loss": 0.088, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.26173541963015645, |
|
"grad_norm": 1.236718933875791, |
|
"learning_rate": 8.706624605678234e-06, |
|
"loss": 0.0842, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.2626837363679469, |
|
"grad_norm": 1.438488419989871, |
|
"learning_rate": 8.738170347003156e-06, |
|
"loss": 0.0955, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.26363205310573734, |
|
"grad_norm": 0.9563516338397714, |
|
"learning_rate": 8.769716088328076e-06, |
|
"loss": 0.0761, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.26458036984352773, |
|
"grad_norm": 1.2728124128011007, |
|
"learning_rate": 8.801261829652997e-06, |
|
"loss": 0.0805, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.2655286865813182, |
|
"grad_norm": 1.2205595373118223, |
|
"learning_rate": 8.832807570977919e-06, |
|
"loss": 0.0879, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.26647700331910856, |
|
"grad_norm": 0.959493141925286, |
|
"learning_rate": 8.86435331230284e-06, |
|
"loss": 0.0728, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.267425320056899, |
|
"grad_norm": 1.4340945839201555, |
|
"learning_rate": 8.895899053627761e-06, |
|
"loss": 0.0897, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.26837363679468945, |
|
"grad_norm": 1.0061297486879381, |
|
"learning_rate": 8.927444794952682e-06, |
|
"loss": 0.0857, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.26932195353247984, |
|
"grad_norm": 1.5459293734675696, |
|
"learning_rate": 8.958990536277604e-06, |
|
"loss": 0.1029, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 1.3222303946698841, |
|
"learning_rate": 8.990536277602524e-06, |
|
"loss": 0.084, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2712185870080607, |
|
"grad_norm": 1.185863549947665, |
|
"learning_rate": 9.022082018927446e-06, |
|
"loss": 0.1311, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.2721669037458511, |
|
"grad_norm": 0.8959238307125761, |
|
"learning_rate": 9.053627760252367e-06, |
|
"loss": 0.067, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.27311522048364156, |
|
"grad_norm": 1.369443136318961, |
|
"learning_rate": 9.085173501577289e-06, |
|
"loss": 0.1093, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.27406353722143195, |
|
"grad_norm": 1.1052390238476015, |
|
"learning_rate": 9.116719242902209e-06, |
|
"loss": 0.103, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2750118539592224, |
|
"grad_norm": 1.325059650748033, |
|
"learning_rate": 9.14826498422713e-06, |
|
"loss": 0.1111, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2759601706970128, |
|
"grad_norm": 1.3248936963910136, |
|
"learning_rate": 9.17981072555205e-06, |
|
"loss": 0.0933, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.27690848743480323, |
|
"grad_norm": 1.127118183479871, |
|
"learning_rate": 9.211356466876972e-06, |
|
"loss": 0.0891, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.2778568041725936, |
|
"grad_norm": 1.3108916887707827, |
|
"learning_rate": 9.242902208201894e-06, |
|
"loss": 0.0939, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.27880512091038406, |
|
"grad_norm": 1.0013886049046197, |
|
"learning_rate": 9.274447949526815e-06, |
|
"loss": 0.0692, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2797534376481745, |
|
"grad_norm": 1.1156101698361054, |
|
"learning_rate": 9.305993690851735e-06, |
|
"loss": 0.0868, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2807017543859649, |
|
"grad_norm": 1.2522202479933553, |
|
"learning_rate": 9.337539432176657e-06, |
|
"loss": 0.0914, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.28165007112375534, |
|
"grad_norm": 1.3755827124206237, |
|
"learning_rate": 9.369085173501577e-06, |
|
"loss": 0.0936, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.28259838786154573, |
|
"grad_norm": 1.4694162511089293, |
|
"learning_rate": 9.4006309148265e-06, |
|
"loss": 0.1071, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2835467045993362, |
|
"grad_norm": 1.255879045911956, |
|
"learning_rate": 9.43217665615142e-06, |
|
"loss": 0.0815, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.2844950213371266, |
|
"grad_norm": 1.560204819302283, |
|
"learning_rate": 9.463722397476342e-06, |
|
"loss": 0.1234, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.285443338074917, |
|
"grad_norm": 1.0121817898281276, |
|
"learning_rate": 9.495268138801262e-06, |
|
"loss": 0.0595, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.28639165481270745, |
|
"grad_norm": 1.0711466156341418, |
|
"learning_rate": 9.526813880126183e-06, |
|
"loss": 0.0641, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.28733997155049784, |
|
"grad_norm": 1.1496695710149105, |
|
"learning_rate": 9.558359621451105e-06, |
|
"loss": 0.0761, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.2882882882882883, |
|
"grad_norm": 1.2059272704315518, |
|
"learning_rate": 9.589905362776027e-06, |
|
"loss": 0.0756, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.28923660502607873, |
|
"grad_norm": 1.0424292745296735, |
|
"learning_rate": 9.621451104100947e-06, |
|
"loss": 0.0855, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2901849217638691, |
|
"grad_norm": 1.1497786768197902, |
|
"learning_rate": 9.652996845425868e-06, |
|
"loss": 0.071, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.29113323850165956, |
|
"grad_norm": 1.3472444992692172, |
|
"learning_rate": 9.68454258675079e-06, |
|
"loss": 0.0934, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.29208155523944995, |
|
"grad_norm": 1.3345310370843513, |
|
"learning_rate": 9.71608832807571e-06, |
|
"loss": 0.0998, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2930298719772404, |
|
"grad_norm": 1.01109508034154, |
|
"learning_rate": 9.747634069400632e-06, |
|
"loss": 0.0762, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.29397818871503084, |
|
"grad_norm": 0.9249973635125475, |
|
"learning_rate": 9.779179810725553e-06, |
|
"loss": 0.074, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.29492650545282123, |
|
"grad_norm": 0.804446344253587, |
|
"learning_rate": 9.810725552050473e-06, |
|
"loss": 0.0517, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2958748221906117, |
|
"grad_norm": 0.965596925556689, |
|
"learning_rate": 9.842271293375395e-06, |
|
"loss": 0.098, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.29682313892840206, |
|
"grad_norm": 2.012807451707843, |
|
"learning_rate": 9.873817034700316e-06, |
|
"loss": 0.1038, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2977714556661925, |
|
"grad_norm": 1.2864066063043205, |
|
"learning_rate": 9.905362776025236e-06, |
|
"loss": 0.1102, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.29871977240398295, |
|
"grad_norm": 0.8775284858258785, |
|
"learning_rate": 9.936908517350158e-06, |
|
"loss": 0.0913, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.29966808914177334, |
|
"grad_norm": 0.9395466275555749, |
|
"learning_rate": 9.96845425867508e-06, |
|
"loss": 0.1156, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.3006164058795638, |
|
"grad_norm": 1.031977177693936, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0772, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.3015647226173542, |
|
"grad_norm": 0.906696222035988, |
|
"learning_rate": 9.999996951577431e-06, |
|
"loss": 0.0745, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.3025130393551446, |
|
"grad_norm": 1.6486632782552955, |
|
"learning_rate": 9.999987806313436e-06, |
|
"loss": 0.1295, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.30346135609293506, |
|
"grad_norm": 1.0682004904191784, |
|
"learning_rate": 9.999972564219169e-06, |
|
"loss": 0.089, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.30440967283072545, |
|
"grad_norm": 1.0160084965418597, |
|
"learning_rate": 9.999951225313217e-06, |
|
"loss": 0.0795, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.3053579895685159, |
|
"grad_norm": 1.1229797355618714, |
|
"learning_rate": 9.999923789621598e-06, |
|
"loss": 0.0924, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.3063063063063063, |
|
"grad_norm": 0.9925832526069106, |
|
"learning_rate": 9.999890257177766e-06, |
|
"loss": 0.0803, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.30725462304409673, |
|
"grad_norm": 1.1785860516178814, |
|
"learning_rate": 9.999850628022611e-06, |
|
"loss": 0.0797, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.3082029397818872, |
|
"grad_norm": 1.1520304204509717, |
|
"learning_rate": 9.999804902204455e-06, |
|
"loss": 0.0775, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.30915125651967756, |
|
"grad_norm": 1.0880132191910508, |
|
"learning_rate": 9.999753079779054e-06, |
|
"loss": 0.0906, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.310099573257468, |
|
"grad_norm": 1.5767657455822397, |
|
"learning_rate": 9.999695160809598e-06, |
|
"loss": 0.0956, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.3110478899952584, |
|
"grad_norm": 0.7125012678361342, |
|
"learning_rate": 9.999631145366713e-06, |
|
"loss": 0.0661, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.31199620673304884, |
|
"grad_norm": 1.088584252037159, |
|
"learning_rate": 9.999561033528457e-06, |
|
"loss": 0.1149, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.3129445234708393, |
|
"grad_norm": 0.8523222222870042, |
|
"learning_rate": 9.999484825380323e-06, |
|
"loss": 0.0913, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3138928402086297, |
|
"grad_norm": 1.0164571883774136, |
|
"learning_rate": 9.999402521015236e-06, |
|
"loss": 0.0878, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.3148411569464201, |
|
"grad_norm": 0.7164573705993513, |
|
"learning_rate": 9.999314120533557e-06, |
|
"loss": 0.0866, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.3157894736842105, |
|
"grad_norm": 0.7954216406429697, |
|
"learning_rate": 9.999219624043075e-06, |
|
"loss": 0.0702, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.31673779042200095, |
|
"grad_norm": 0.7996263107367133, |
|
"learning_rate": 9.99911903165902e-06, |
|
"loss": 0.0758, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.3176861071597914, |
|
"grad_norm": 1.101451187378474, |
|
"learning_rate": 9.999012343504049e-06, |
|
"loss": 0.0957, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3186344238975818, |
|
"grad_norm": 0.7265535166036453, |
|
"learning_rate": 9.998899559708254e-06, |
|
"loss": 0.0743, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.31958274063537223, |
|
"grad_norm": 1.272801256055057, |
|
"learning_rate": 9.998780680409161e-06, |
|
"loss": 0.0952, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.3205310573731626, |
|
"grad_norm": 0.8770881337944402, |
|
"learning_rate": 9.99865570575173e-06, |
|
"loss": 0.066, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.32147937411095306, |
|
"grad_norm": 1.0607119132841634, |
|
"learning_rate": 9.998524635888347e-06, |
|
"loss": 0.0913, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.32242769084874345, |
|
"grad_norm": 0.9189346974278031, |
|
"learning_rate": 9.998387470978837e-06, |
|
"loss": 0.0881, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3233760075865339, |
|
"grad_norm": 0.7272168469454553, |
|
"learning_rate": 9.998244211190454e-06, |
|
"loss": 0.0713, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.32432432432432434, |
|
"grad_norm": 0.9819255696828616, |
|
"learning_rate": 9.998094856697885e-06, |
|
"loss": 0.0834, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.32527264106211473, |
|
"grad_norm": 0.6857773270509248, |
|
"learning_rate": 9.997939407683249e-06, |
|
"loss": 0.0524, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.3262209577999052, |
|
"grad_norm": 1.0324591704355464, |
|
"learning_rate": 9.99777786433609e-06, |
|
"loss": 0.1108, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.32716927453769556, |
|
"grad_norm": 1.1264206703681527, |
|
"learning_rate": 9.997610226853399e-06, |
|
"loss": 0.0987, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.328117591275486, |
|
"grad_norm": 0.95789066514891, |
|
"learning_rate": 9.997436495439581e-06, |
|
"loss": 0.093, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.32906590801327645, |
|
"grad_norm": 1.0448222803112024, |
|
"learning_rate": 9.997256670306478e-06, |
|
"loss": 0.0983, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.33001422475106684, |
|
"grad_norm": 0.7737283316563024, |
|
"learning_rate": 9.997070751673367e-06, |
|
"loss": 0.0706, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.3309625414888573, |
|
"grad_norm": 0.9596984880180834, |
|
"learning_rate": 9.99687873976695e-06, |
|
"loss": 0.0991, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.3319108582266477, |
|
"grad_norm": 0.8411109119380658, |
|
"learning_rate": 9.99668063482136e-06, |
|
"loss": 0.0678, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3328591749644381, |
|
"grad_norm": 1.136491883808786, |
|
"learning_rate": 9.996476437078162e-06, |
|
"loss": 0.0986, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.33380749170222856, |
|
"grad_norm": 3.03438587624818, |
|
"learning_rate": 9.996266146786344e-06, |
|
"loss": 0.0969, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.33475580844001895, |
|
"grad_norm": 1.2333568047254937, |
|
"learning_rate": 9.996049764202332e-06, |
|
"loss": 0.0832, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.3357041251778094, |
|
"grad_norm": 1.1301139087376384, |
|
"learning_rate": 9.995827289589974e-06, |
|
"loss": 0.0994, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.3366524419155998, |
|
"grad_norm": 1.0303329732235522, |
|
"learning_rate": 9.995598723220548e-06, |
|
"loss": 0.0757, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.33760075865339023, |
|
"grad_norm": 1.0605991674508604, |
|
"learning_rate": 9.995364065372762e-06, |
|
"loss": 0.0815, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.3385490753911807, |
|
"grad_norm": 0.7941030771981634, |
|
"learning_rate": 9.995123316332752e-06, |
|
"loss": 0.0747, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.33949739212897106, |
|
"grad_norm": 1.2313896272302265, |
|
"learning_rate": 9.994876476394075e-06, |
|
"loss": 0.0769, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.3404457088667615, |
|
"grad_norm": 1.1944743493159886, |
|
"learning_rate": 9.994623545857727e-06, |
|
"loss": 0.0979, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.3413940256045519, |
|
"grad_norm": 0.8285281294809631, |
|
"learning_rate": 9.994364525032116e-06, |
|
"loss": 0.0793, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.34234234234234234, |
|
"grad_norm": 1.4761389910370195, |
|
"learning_rate": 9.994099414233091e-06, |
|
"loss": 0.0913, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.3432906590801328, |
|
"grad_norm": 1.5408966458771916, |
|
"learning_rate": 9.993828213783915e-06, |
|
"loss": 0.0973, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.3442389758179232, |
|
"grad_norm": 1.4559933930399096, |
|
"learning_rate": 9.993550924015283e-06, |
|
"loss": 0.0999, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.3451872925557136, |
|
"grad_norm": 0.8454336561992738, |
|
"learning_rate": 9.993267545265314e-06, |
|
"loss": 0.0655, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.346135609293504, |
|
"grad_norm": 0.796992439441769, |
|
"learning_rate": 9.992978077879552e-06, |
|
"loss": 0.0696, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.34708392603129445, |
|
"grad_norm": 1.0553149426590827, |
|
"learning_rate": 9.992682522210963e-06, |
|
"loss": 0.0787, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.3480322427690849, |
|
"grad_norm": 1.4860431297237584, |
|
"learning_rate": 9.992380878619939e-06, |
|
"loss": 0.106, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.3489805595068753, |
|
"grad_norm": 1.3032907057151817, |
|
"learning_rate": 9.992073147474292e-06, |
|
"loss": 0.1021, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.34992887624466573, |
|
"grad_norm": 1.0894704335759804, |
|
"learning_rate": 9.991759329149266e-06, |
|
"loss": 0.0905, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 1.1130576081628205, |
|
"learning_rate": 9.991439424027518e-06, |
|
"loss": 0.0846, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.35182550972024657, |
|
"grad_norm": 0.9253664091514998, |
|
"learning_rate": 9.991113432499128e-06, |
|
"loss": 0.0882, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.352773826458037, |
|
"grad_norm": 0.841899923853967, |
|
"learning_rate": 9.990781354961605e-06, |
|
"loss": 0.0806, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.3537221431958274, |
|
"grad_norm": 0.9407729946270026, |
|
"learning_rate": 9.99044319181987e-06, |
|
"loss": 0.0939, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.35467045993361784, |
|
"grad_norm": 0.9090058769044609, |
|
"learning_rate": 9.99009894348627e-06, |
|
"loss": 0.0891, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.35561877667140823, |
|
"grad_norm": 0.6294083333837054, |
|
"learning_rate": 9.989748610380571e-06, |
|
"loss": 0.0706, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3565670934091987, |
|
"grad_norm": 0.9163781177038506, |
|
"learning_rate": 9.98939219292996e-06, |
|
"loss": 0.0697, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.3575154101469891, |
|
"grad_norm": 1.1693511630739546, |
|
"learning_rate": 9.989029691569037e-06, |
|
"loss": 0.1056, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3584637268847795, |
|
"grad_norm": 1.0414233510818562, |
|
"learning_rate": 9.988661106739827e-06, |
|
"loss": 0.0988, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.35941204362256995, |
|
"grad_norm": 1.2822153621266594, |
|
"learning_rate": 9.988286438891774e-06, |
|
"loss": 0.1189, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.36036036036036034, |
|
"grad_norm": 0.63669429794073, |
|
"learning_rate": 9.987905688481732e-06, |
|
"loss": 0.0828, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3613086770981508, |
|
"grad_norm": 0.826754093590745, |
|
"learning_rate": 9.98751885597398e-06, |
|
"loss": 0.0848, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3622569938359412, |
|
"grad_norm": 0.8825949393702691, |
|
"learning_rate": 9.987125941840205e-06, |
|
"loss": 0.092, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3632053105737316, |
|
"grad_norm": 0.6103241173744877, |
|
"learning_rate": 9.986726946559517e-06, |
|
"loss": 0.08, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.36415362731152207, |
|
"grad_norm": 0.7105367439957658, |
|
"learning_rate": 9.986321870618441e-06, |
|
"loss": 0.0685, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.36510194404931245, |
|
"grad_norm": 1.802287343988455, |
|
"learning_rate": 9.985910714510908e-06, |
|
"loss": 0.0818, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3660502607871029, |
|
"grad_norm": 0.7732813708584271, |
|
"learning_rate": 9.985493478738275e-06, |
|
"loss": 0.07, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.3669985775248933, |
|
"grad_norm": 0.8451643375246307, |
|
"learning_rate": 9.985070163809306e-06, |
|
"loss": 0.0744, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.36794689426268373, |
|
"grad_norm": 1.126067442650852, |
|
"learning_rate": 9.984640770240173e-06, |
|
"loss": 0.1101, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.3688952110004742, |
|
"grad_norm": 0.6652401258855057, |
|
"learning_rate": 9.984205298554467e-06, |
|
"loss": 0.0663, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.36984352773826457, |
|
"grad_norm": 1.0802552975196003, |
|
"learning_rate": 9.983763749283193e-06, |
|
"loss": 0.0975, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.370791844476055, |
|
"grad_norm": 0.7496808510910429, |
|
"learning_rate": 9.983316122964757e-06, |
|
"loss": 0.0701, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.3717401612138454, |
|
"grad_norm": 0.6248602765248035, |
|
"learning_rate": 9.982862420144986e-06, |
|
"loss": 0.0643, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.37268847795163584, |
|
"grad_norm": 1.7058022738803864, |
|
"learning_rate": 9.982402641377105e-06, |
|
"loss": 0.0936, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.3736367946894263, |
|
"grad_norm": 1.205579756742393, |
|
"learning_rate": 9.98193678722176e-06, |
|
"loss": 0.0811, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.3745851114272167, |
|
"grad_norm": 0.8021701752607538, |
|
"learning_rate": 9.981464858246993e-06, |
|
"loss": 0.0719, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3755334281650071, |
|
"grad_norm": 0.9210208736552777, |
|
"learning_rate": 9.980986855028267e-06, |
|
"loss": 0.0589, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.3764817449027975, |
|
"grad_norm": 1.0458476195224804, |
|
"learning_rate": 9.980502778148438e-06, |
|
"loss": 0.0696, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.37743006164058795, |
|
"grad_norm": 1.5095103680379303, |
|
"learning_rate": 9.980012628197778e-06, |
|
"loss": 0.0909, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3783783783783784, |
|
"grad_norm": 0.9521689001456719, |
|
"learning_rate": 9.979516405773956e-06, |
|
"loss": 0.0844, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3793266951161688, |
|
"grad_norm": 0.9909335290642662, |
|
"learning_rate": 9.979014111482057e-06, |
|
"loss": 0.079, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38027501185395923, |
|
"grad_norm": 1.300023515267878, |
|
"learning_rate": 9.978505745934559e-06, |
|
"loss": 0.1087, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3812233285917496, |
|
"grad_norm": 0.8905160216053487, |
|
"learning_rate": 9.977991309751347e-06, |
|
"loss": 0.0654, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.38217164532954007, |
|
"grad_norm": 0.7908744916198801, |
|
"learning_rate": 9.97747080355971e-06, |
|
"loss": 0.0697, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.3831199620673305, |
|
"grad_norm": 1.0819522254088034, |
|
"learning_rate": 9.976944227994337e-06, |
|
"loss": 0.0729, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.3840682788051209, |
|
"grad_norm": 0.9319836261266163, |
|
"learning_rate": 9.976411583697316e-06, |
|
"loss": 0.077, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.38501659554291134, |
|
"grad_norm": 0.7209233770781128, |
|
"learning_rate": 9.97587287131814e-06, |
|
"loss": 0.0708, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.38596491228070173, |
|
"grad_norm": 0.8430932582390814, |
|
"learning_rate": 9.975328091513696e-06, |
|
"loss": 0.07, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3869132290184922, |
|
"grad_norm": 0.7932090811238357, |
|
"learning_rate": 9.974777244948271e-06, |
|
"loss": 0.0648, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.3878615457562826, |
|
"grad_norm": 0.9213278429313838, |
|
"learning_rate": 9.974220332293554e-06, |
|
"loss": 0.0737, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.388809862494073, |
|
"grad_norm": 0.4369389269684112, |
|
"learning_rate": 9.973657354228623e-06, |
|
"loss": 0.0509, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.38975817923186346, |
|
"grad_norm": 0.7988805293653696, |
|
"learning_rate": 9.973088311439957e-06, |
|
"loss": 0.0684, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.39070649596965384, |
|
"grad_norm": 0.9648310793568026, |
|
"learning_rate": 9.97251320462143e-06, |
|
"loss": 0.0849, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3916548127074443, |
|
"grad_norm": 0.7585613690692753, |
|
"learning_rate": 9.97193203447431e-06, |
|
"loss": 0.077, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.39260312944523473, |
|
"grad_norm": 0.9380377046145346, |
|
"learning_rate": 9.971344801707256e-06, |
|
"loss": 0.0771, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3935514461830251, |
|
"grad_norm": 0.9822247506181627, |
|
"learning_rate": 9.970751507036323e-06, |
|
"loss": 0.1123, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.39449976292081557, |
|
"grad_norm": 0.7156423865364446, |
|
"learning_rate": 9.970152151184956e-06, |
|
"loss": 0.0801, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.39544807965860596, |
|
"grad_norm": 1.05912629502688, |
|
"learning_rate": 9.96954673488399e-06, |
|
"loss": 0.0804, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3963963963963964, |
|
"grad_norm": 1.1230479850270394, |
|
"learning_rate": 9.968935258871652e-06, |
|
"loss": 0.0799, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.39734471313418684, |
|
"grad_norm": 1.0054642393242061, |
|
"learning_rate": 9.968317723893556e-06, |
|
"loss": 0.082, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.39829302987197723, |
|
"grad_norm": 1.227859524837509, |
|
"learning_rate": 9.967694130702706e-06, |
|
"loss": 0.1069, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3992413466097677, |
|
"grad_norm": 1.2136272659300074, |
|
"learning_rate": 9.96706448005949e-06, |
|
"loss": 0.1112, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.40018966334755807, |
|
"grad_norm": 0.9692912194018656, |
|
"learning_rate": 9.96642877273169e-06, |
|
"loss": 0.0837, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.4011379800853485, |
|
"grad_norm": 0.7181203670103851, |
|
"learning_rate": 9.965787009494458e-06, |
|
"loss": 0.0648, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.40208629682313896, |
|
"grad_norm": 0.9389223502528147, |
|
"learning_rate": 9.96513919113035e-06, |
|
"loss": 0.0846, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.40303461356092934, |
|
"grad_norm": 0.6566856036851983, |
|
"learning_rate": 9.964485318429292e-06, |
|
"loss": 0.0776, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.4039829302987198, |
|
"grad_norm": 1.0028156563396406, |
|
"learning_rate": 9.963825392188595e-06, |
|
"loss": 0.0719, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.4049312470365102, |
|
"grad_norm": 0.9682157984093804, |
|
"learning_rate": 9.963159413212952e-06, |
|
"loss": 0.1058, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.4058795637743006, |
|
"grad_norm": 1.1561667939356075, |
|
"learning_rate": 9.96248738231444e-06, |
|
"loss": 0.0982, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.406827880512091, |
|
"grad_norm": 0.7960344078481167, |
|
"learning_rate": 9.961809300312512e-06, |
|
"loss": 0.0643, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.40777619724988146, |
|
"grad_norm": 0.914323773268032, |
|
"learning_rate": 9.961125168034e-06, |
|
"loss": 0.0835, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4087245139876719, |
|
"grad_norm": 0.7441869330920762, |
|
"learning_rate": 9.960434986313113e-06, |
|
"loss": 0.0559, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.4096728307254623, |
|
"grad_norm": 2.4732017252552367, |
|
"learning_rate": 9.959738755991437e-06, |
|
"loss": 0.1445, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.41062114746325273, |
|
"grad_norm": 0.8533585342555405, |
|
"learning_rate": 9.959036477917935e-06, |
|
"loss": 0.0575, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.4115694642010431, |
|
"grad_norm": 0.8190438451317316, |
|
"learning_rate": 9.95832815294894e-06, |
|
"loss": 0.0794, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.41251778093883357, |
|
"grad_norm": 1.0046620676404385, |
|
"learning_rate": 9.957613781948164e-06, |
|
"loss": 0.0686, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.413466097676624, |
|
"grad_norm": 0.9887051267008984, |
|
"learning_rate": 9.956893365786691e-06, |
|
"loss": 0.0618, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.4144144144144144, |
|
"grad_norm": 0.6105909207601089, |
|
"learning_rate": 9.95616690534297e-06, |
|
"loss": 0.0572, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.41536273115220484, |
|
"grad_norm": 1.5234824479103468, |
|
"learning_rate": 9.955434401502825e-06, |
|
"loss": 0.0994, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.41631104788999523, |
|
"grad_norm": 1.1295839815001452, |
|
"learning_rate": 9.954695855159454e-06, |
|
"loss": 0.073, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.4172593646277857, |
|
"grad_norm": 0.6583329952843571, |
|
"learning_rate": 9.95395126721341e-06, |
|
"loss": 0.0699, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4182076813655761, |
|
"grad_norm": 0.955937586299997, |
|
"learning_rate": 9.953200638572625e-06, |
|
"loss": 0.0815, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.4191559981033665, |
|
"grad_norm": 1.5323108400108396, |
|
"learning_rate": 9.95244397015239e-06, |
|
"loss": 0.0732, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.42010431484115696, |
|
"grad_norm": 1.677920724371183, |
|
"learning_rate": 9.951681262875365e-06, |
|
"loss": 0.0944, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 0.8926328574943209, |
|
"learning_rate": 9.95091251767157e-06, |
|
"loss": 0.0731, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.4220009483167378, |
|
"grad_norm": 1.2692898943255297, |
|
"learning_rate": 9.950137735478389e-06, |
|
"loss": 0.1029, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.42294926505452823, |
|
"grad_norm": 0.7345506207483801, |
|
"learning_rate": 9.949356917240569e-06, |
|
"loss": 0.0748, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.4238975817923186, |
|
"grad_norm": 1.2435473519034808, |
|
"learning_rate": 9.948570063910216e-06, |
|
"loss": 0.1009, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.42484589853010907, |
|
"grad_norm": 0.7650866909769807, |
|
"learning_rate": 9.947777176446792e-06, |
|
"loss": 0.0746, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.42579421526789946, |
|
"grad_norm": 1.3807429981979404, |
|
"learning_rate": 9.946978255817121e-06, |
|
"loss": 0.0701, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.4267425320056899, |
|
"grad_norm": 0.5315623424461096, |
|
"learning_rate": 9.946173302995382e-06, |
|
"loss": 0.0574, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.42769084874348035, |
|
"grad_norm": 0.8562951763201797, |
|
"learning_rate": 9.94536231896311e-06, |
|
"loss": 0.0951, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.42863916548127073, |
|
"grad_norm": 1.1965590998104225, |
|
"learning_rate": 9.944545304709192e-06, |
|
"loss": 0.0877, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.4295874822190612, |
|
"grad_norm": 1.2735339749816497, |
|
"learning_rate": 9.943722261229872e-06, |
|
"loss": 0.0768, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.43053579895685157, |
|
"grad_norm": 0.9370658659046329, |
|
"learning_rate": 9.942893189528743e-06, |
|
"loss": 0.0782, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.431484115694642, |
|
"grad_norm": 1.5520551397042521, |
|
"learning_rate": 9.942058090616748e-06, |
|
"loss": 0.1039, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.43243243243243246, |
|
"grad_norm": 1.3529615602541014, |
|
"learning_rate": 9.941216965512183e-06, |
|
"loss": 0.0867, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.43338074917022285, |
|
"grad_norm": 1.192234505990805, |
|
"learning_rate": 9.940369815240688e-06, |
|
"loss": 0.0809, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.4343290659080133, |
|
"grad_norm": 0.9763205758532367, |
|
"learning_rate": 9.939516640835254e-06, |
|
"loss": 0.0652, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.4352773826458037, |
|
"grad_norm": 1.3415645605638937, |
|
"learning_rate": 9.938657443336212e-06, |
|
"loss": 0.109, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.4362256993835941, |
|
"grad_norm": 1.1595154129634277, |
|
"learning_rate": 9.937792223791244e-06, |
|
"loss": 0.1002, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.43717401612138457, |
|
"grad_norm": 1.33436975844217, |
|
"learning_rate": 9.936920983255372e-06, |
|
"loss": 0.114, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.43812233285917496, |
|
"grad_norm": 1.0009653043703806, |
|
"learning_rate": 9.936043722790956e-06, |
|
"loss": 0.0827, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.4390706495969654, |
|
"grad_norm": 1.1900315382859075, |
|
"learning_rate": 9.935160443467704e-06, |
|
"loss": 0.0991, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.4400189663347558, |
|
"grad_norm": 0.7796648666540394, |
|
"learning_rate": 9.934271146362658e-06, |
|
"loss": 0.0729, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.44096728307254623, |
|
"grad_norm": 0.7692033539386839, |
|
"learning_rate": 9.933375832560199e-06, |
|
"loss": 0.0752, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4419155998103367, |
|
"grad_norm": 0.7898679053377281, |
|
"learning_rate": 9.932474503152047e-06, |
|
"loss": 0.0557, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.44286391654812707, |
|
"grad_norm": 1.308054442070126, |
|
"learning_rate": 9.931567159237252e-06, |
|
"loss": 0.1, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.4438122332859175, |
|
"grad_norm": 0.8281027248286734, |
|
"learning_rate": 9.930653801922205e-06, |
|
"loss": 0.1066, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.4447605500237079, |
|
"grad_norm": 0.6589498594732086, |
|
"learning_rate": 9.929734432320621e-06, |
|
"loss": 0.061, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.44570886676149835, |
|
"grad_norm": 1.0105820136512023, |
|
"learning_rate": 9.928809051553554e-06, |
|
"loss": 0.0771, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4466571834992888, |
|
"grad_norm": 1.174475732403723, |
|
"learning_rate": 9.927877660749385e-06, |
|
"loss": 0.1029, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.4476055002370792, |
|
"grad_norm": 0.7007588523937572, |
|
"learning_rate": 9.92694026104382e-06, |
|
"loss": 0.0548, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.4485538169748696, |
|
"grad_norm": 0.7548622992450297, |
|
"learning_rate": 9.925996853579897e-06, |
|
"loss": 0.071, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.44950213371266, |
|
"grad_norm": 0.9151211373906433, |
|
"learning_rate": 9.92504743950798e-06, |
|
"loss": 0.0728, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.45045045045045046, |
|
"grad_norm": 1.3188113799099948, |
|
"learning_rate": 9.924092019985751e-06, |
|
"loss": 0.071, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.45139876718824085, |
|
"grad_norm": 0.834826643366671, |
|
"learning_rate": 9.923130596178221e-06, |
|
"loss": 0.0827, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.4523470839260313, |
|
"grad_norm": 0.8853088211117691, |
|
"learning_rate": 9.922163169257722e-06, |
|
"loss": 0.0714, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.45329540066382173, |
|
"grad_norm": 0.9773650061711494, |
|
"learning_rate": 9.921189740403902e-06, |
|
"loss": 0.0902, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.4542437174016121, |
|
"grad_norm": 0.8530429782086267, |
|
"learning_rate": 9.92021031080373e-06, |
|
"loss": 0.0896, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.45519203413940257, |
|
"grad_norm": 0.6841245724165017, |
|
"learning_rate": 9.919224881651494e-06, |
|
"loss": 0.0574, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.45614035087719296, |
|
"grad_norm": 0.8751901827667304, |
|
"learning_rate": 9.918233454148795e-06, |
|
"loss": 0.0712, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.4570886676149834, |
|
"grad_norm": 0.8605318101074332, |
|
"learning_rate": 9.917236029504549e-06, |
|
"loss": 0.0758, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.45803698435277385, |
|
"grad_norm": 0.6297402738230038, |
|
"learning_rate": 9.916232608934982e-06, |
|
"loss": 0.0835, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.45898530109056423, |
|
"grad_norm": 1.2633792305334934, |
|
"learning_rate": 9.915223193663639e-06, |
|
"loss": 0.097, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.4599336178283547, |
|
"grad_norm": 0.9453282561376489, |
|
"learning_rate": 9.914207784921366e-06, |
|
"loss": 0.0813, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.46088193456614507, |
|
"grad_norm": 1.0981998450683066, |
|
"learning_rate": 9.913186383946322e-06, |
|
"loss": 0.0831, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.4618302513039355, |
|
"grad_norm": 0.9453607555522517, |
|
"learning_rate": 9.91215899198397e-06, |
|
"loss": 0.0668, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.46277856804172596, |
|
"grad_norm": 0.8480655824160724, |
|
"learning_rate": 9.911125610287085e-06, |
|
"loss": 0.0803, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.46372688477951635, |
|
"grad_norm": 0.7365032755805906, |
|
"learning_rate": 9.910086240115738e-06, |
|
"loss": 0.0503, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.4646752015173068, |
|
"grad_norm": 0.9926545138390478, |
|
"learning_rate": 9.909040882737301e-06, |
|
"loss": 0.0785, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4656235182550972, |
|
"grad_norm": 1.078153469225969, |
|
"learning_rate": 9.907989539426455e-06, |
|
"loss": 0.0942, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.4665718349928876, |
|
"grad_norm": 0.891582918999742, |
|
"learning_rate": 9.906932211465173e-06, |
|
"loss": 0.0713, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.46752015173067807, |
|
"grad_norm": 0.8352029023952229, |
|
"learning_rate": 9.90586890014273e-06, |
|
"loss": 0.0871, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.46846846846846846, |
|
"grad_norm": 1.4543230270611818, |
|
"learning_rate": 9.904799606755695e-06, |
|
"loss": 0.1049, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.4694167852062589, |
|
"grad_norm": 0.9571877161884975, |
|
"learning_rate": 9.90372433260793e-06, |
|
"loss": 0.0856, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4703651019440493, |
|
"grad_norm": 0.6657483404024113, |
|
"learning_rate": 9.90264307901059e-06, |
|
"loss": 0.0631, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.47131341868183974, |
|
"grad_norm": 1.2493973473928695, |
|
"learning_rate": 9.901555847282123e-06, |
|
"loss": 0.0973, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.4722617354196302, |
|
"grad_norm": 0.6689914382563446, |
|
"learning_rate": 9.900462638748266e-06, |
|
"loss": 0.0582, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.47321005215742057, |
|
"grad_norm": 0.8246501895880392, |
|
"learning_rate": 9.899363454742044e-06, |
|
"loss": 0.0727, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.474158368895211, |
|
"grad_norm": 1.442170890658491, |
|
"learning_rate": 9.898258296603769e-06, |
|
"loss": 0.0931, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4751066856330014, |
|
"grad_norm": 0.7582565389247256, |
|
"learning_rate": 9.897147165681034e-06, |
|
"loss": 0.0722, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.47605500237079185, |
|
"grad_norm": 0.627525129279453, |
|
"learning_rate": 9.896030063328718e-06, |
|
"loss": 0.0597, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.4770033191085823, |
|
"grad_norm": 0.6342149242840518, |
|
"learning_rate": 9.894906990908982e-06, |
|
"loss": 0.0725, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.4779516358463727, |
|
"grad_norm": 0.8212079234115165, |
|
"learning_rate": 9.893777949791266e-06, |
|
"loss": 0.0649, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.4788999525841631, |
|
"grad_norm": 0.8923951454231676, |
|
"learning_rate": 9.89264294135229e-06, |
|
"loss": 0.0595, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4798482693219535, |
|
"grad_norm": 1.0318440665130484, |
|
"learning_rate": 9.891501966976041e-06, |
|
"loss": 0.0842, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.48079658605974396, |
|
"grad_norm": 0.6944537972828242, |
|
"learning_rate": 9.890355028053793e-06, |
|
"loss": 0.0752, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.4817449027975344, |
|
"grad_norm": 1.0705584030604105, |
|
"learning_rate": 9.889202125984088e-06, |
|
"loss": 0.0647, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.4826932195353248, |
|
"grad_norm": 0.9754252622446561, |
|
"learning_rate": 9.88804326217274e-06, |
|
"loss": 0.0687, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.48364153627311524, |
|
"grad_norm": 0.9660762094606946, |
|
"learning_rate": 9.886878438032828e-06, |
|
"loss": 0.0789, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4845898530109056, |
|
"grad_norm": 0.5832722133461282, |
|
"learning_rate": 9.885707654984703e-06, |
|
"loss": 0.0636, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.48553816974869607, |
|
"grad_norm": 0.7052006552554221, |
|
"learning_rate": 9.884530914455984e-06, |
|
"loss": 0.0586, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.4864864864864865, |
|
"grad_norm": 0.9822072228951928, |
|
"learning_rate": 9.88334821788155e-06, |
|
"loss": 0.0645, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.4874348032242769, |
|
"grad_norm": 0.9641946540266126, |
|
"learning_rate": 9.882159566703547e-06, |
|
"loss": 0.0885, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.48838311996206735, |
|
"grad_norm": 0.6403136140606015, |
|
"learning_rate": 9.880964962371378e-06, |
|
"loss": 0.0678, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.48933143669985774, |
|
"grad_norm": 0.7486541793123711, |
|
"learning_rate": 9.879764406341705e-06, |
|
"loss": 0.0741, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4902797534376482, |
|
"grad_norm": 0.5779229700891555, |
|
"learning_rate": 9.87855790007845e-06, |
|
"loss": 0.0646, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.49122807017543857, |
|
"grad_norm": 0.7611283230447122, |
|
"learning_rate": 9.87734544505279e-06, |
|
"loss": 0.0768, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.492176386913229, |
|
"grad_norm": 0.5823535883100547, |
|
"learning_rate": 9.876127042743155e-06, |
|
"loss": 0.0703, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.49312470365101946, |
|
"grad_norm": 0.6827829977739827, |
|
"learning_rate": 9.874902694635226e-06, |
|
"loss": 0.0772, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.49407302038880985, |
|
"grad_norm": 0.7254200544564426, |
|
"learning_rate": 9.873672402221937e-06, |
|
"loss": 0.0634, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.4950213371266003, |
|
"grad_norm": 0.6425214796651868, |
|
"learning_rate": 9.872436167003468e-06, |
|
"loss": 0.064, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.4959696538643907, |
|
"grad_norm": 0.623192525545158, |
|
"learning_rate": 9.871193990487242e-06, |
|
"loss": 0.077, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.4969179706021811, |
|
"grad_norm": 0.7225947749173619, |
|
"learning_rate": 9.869945874187936e-06, |
|
"loss": 0.075, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.49786628733997157, |
|
"grad_norm": 2.0516616577595435, |
|
"learning_rate": 9.868691819627462e-06, |
|
"loss": 0.0867, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.49881460407776196, |
|
"grad_norm": 1.0257158284306434, |
|
"learning_rate": 9.867431828334974e-06, |
|
"loss": 0.0588, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.4997629208155524, |
|
"grad_norm": 0.8403229438927825, |
|
"learning_rate": 9.86616590184687e-06, |
|
"loss": 0.0823, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.5007112375533428, |
|
"grad_norm": 0.6449240492145598, |
|
"learning_rate": 9.864894041706779e-06, |
|
"loss": 0.0567, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.5016595542911333, |
|
"grad_norm": 0.8789018684523284, |
|
"learning_rate": 9.863616249465567e-06, |
|
"loss": 0.0713, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.5026078710289237, |
|
"grad_norm": 0.9524887983478211, |
|
"learning_rate": 9.862332526681336e-06, |
|
"loss": 0.0835, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5035561877667141, |
|
"grad_norm": 0.6422268170348604, |
|
"learning_rate": 9.861042874919417e-06, |
|
"loss": 0.0606, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.5045045045045045, |
|
"grad_norm": 0.9032374038451735, |
|
"learning_rate": 9.859747295752374e-06, |
|
"loss": 0.0773, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.505452821242295, |
|
"grad_norm": 0.9269404822199643, |
|
"learning_rate": 9.858445790759992e-06, |
|
"loss": 0.0822, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.5064011379800853, |
|
"grad_norm": 0.7043514434980399, |
|
"learning_rate": 9.857138361529288e-06, |
|
"loss": 0.0688, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.5073494547178757, |
|
"grad_norm": 0.8239211698855243, |
|
"learning_rate": 9.8558250096545e-06, |
|
"loss": 0.0542, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5082977714556662, |
|
"grad_norm": 0.8633975590563754, |
|
"learning_rate": 9.85450573673709e-06, |
|
"loss": 0.0744, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.5092460881934566, |
|
"grad_norm": 0.6985004021466871, |
|
"learning_rate": 9.853180544385737e-06, |
|
"loss": 0.047, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.510194404931247, |
|
"grad_norm": 0.5889042803503781, |
|
"learning_rate": 9.851849434216338e-06, |
|
"loss": 0.0557, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.5111427216690374, |
|
"grad_norm": 0.7765705663935071, |
|
"learning_rate": 9.850512407852012e-06, |
|
"loss": 0.0669, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.5120910384068279, |
|
"grad_norm": 0.8204550382112847, |
|
"learning_rate": 9.849169466923086e-06, |
|
"loss": 0.0685, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5130393551446183, |
|
"grad_norm": 0.5256883407913393, |
|
"learning_rate": 9.847820613067098e-06, |
|
"loss": 0.0537, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.5139876718824087, |
|
"grad_norm": 0.6838576750776693, |
|
"learning_rate": 9.8464658479288e-06, |
|
"loss": 0.0704, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.5149359886201992, |
|
"grad_norm": 0.8974806559813661, |
|
"learning_rate": 9.845105173160152e-06, |
|
"loss": 0.0899, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.5158843053579896, |
|
"grad_norm": 0.7219053990698988, |
|
"learning_rate": 9.843738590420317e-06, |
|
"loss": 0.0468, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.51683262209578, |
|
"grad_norm": 1.032987889739876, |
|
"learning_rate": 9.842366101375664e-06, |
|
"loss": 0.0562, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5177809388335705, |
|
"grad_norm": 0.7651951768284668, |
|
"learning_rate": 9.840987707699765e-06, |
|
"loss": 0.0669, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.5187292555713608, |
|
"grad_norm": 0.6813496832389402, |
|
"learning_rate": 9.839603411073388e-06, |
|
"loss": 0.0706, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.5196775723091512, |
|
"grad_norm": 0.7229692269198181, |
|
"learning_rate": 9.838213213184505e-06, |
|
"loss": 0.0771, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.5206258890469416, |
|
"grad_norm": 1.157471128375012, |
|
"learning_rate": 9.836817115728277e-06, |
|
"loss": 0.0932, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.5215742057847321, |
|
"grad_norm": 0.8058138449457062, |
|
"learning_rate": 9.835415120407063e-06, |
|
"loss": 0.0539, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5225225225225225, |
|
"grad_norm": 0.6915528599019737, |
|
"learning_rate": 9.834007228930414e-06, |
|
"loss": 0.0688, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.5234708392603129, |
|
"grad_norm": 0.8835152385091712, |
|
"learning_rate": 9.832593443015068e-06, |
|
"loss": 0.0605, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.5244191559981034, |
|
"grad_norm": 0.6896706794263241, |
|
"learning_rate": 9.83117376438495e-06, |
|
"loss": 0.0668, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.5253674727358938, |
|
"grad_norm": 0.7651857964351815, |
|
"learning_rate": 9.829748194771175e-06, |
|
"loss": 0.064, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.6216741056003758, |
|
"learning_rate": 9.828316735912037e-06, |
|
"loss": 0.0541, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5272641062114747, |
|
"grad_norm": 0.6813673301708452, |
|
"learning_rate": 9.826879389553014e-06, |
|
"loss": 0.0574, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.5282124229492651, |
|
"grad_norm": 0.7147998418504048, |
|
"learning_rate": 9.825436157446761e-06, |
|
"loss": 0.0576, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.5291607396870555, |
|
"grad_norm": 0.6352148290105686, |
|
"learning_rate": 9.82398704135311e-06, |
|
"loss": 0.066, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.5301090564248458, |
|
"grad_norm": 0.8511240887028577, |
|
"learning_rate": 9.822532043039068e-06, |
|
"loss": 0.0687, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.5310573731626363, |
|
"grad_norm": 0.6876408977841421, |
|
"learning_rate": 9.821071164278815e-06, |
|
"loss": 0.0838, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5320056899004267, |
|
"grad_norm": 0.7354217835184531, |
|
"learning_rate": 9.819604406853703e-06, |
|
"loss": 0.0552, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.5329540066382171, |
|
"grad_norm": 0.9572067784227991, |
|
"learning_rate": 9.818131772552249e-06, |
|
"loss": 0.1099, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.5339023233760076, |
|
"grad_norm": 0.7931127239607592, |
|
"learning_rate": 9.816653263170137e-06, |
|
"loss": 0.0706, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.534850640113798, |
|
"grad_norm": 0.8242420526129728, |
|
"learning_rate": 9.815168880510218e-06, |
|
"loss": 0.0946, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.5357989568515884, |
|
"grad_norm": 1.0330372476146157, |
|
"learning_rate": 9.8136786263825e-06, |
|
"loss": 0.0951, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5367472735893789, |
|
"grad_norm": 0.7553297432270302, |
|
"learning_rate": 9.812182502604151e-06, |
|
"loss": 0.0663, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.5376955903271693, |
|
"grad_norm": 0.8446853429895546, |
|
"learning_rate": 9.810680510999505e-06, |
|
"loss": 0.0728, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.5386439070649597, |
|
"grad_norm": 0.5089680701907852, |
|
"learning_rate": 9.809172653400036e-06, |
|
"loss": 0.0501, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.5395922238027501, |
|
"grad_norm": 0.7258180066288827, |
|
"learning_rate": 9.807658931644382e-06, |
|
"loss": 0.0752, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 0.7028402619162881, |
|
"learning_rate": 9.806139347578331e-06, |
|
"loss": 0.059, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.541488857278331, |
|
"grad_norm": 0.7248854010393692, |
|
"learning_rate": 9.804613903054813e-06, |
|
"loss": 0.0851, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.5424371740161213, |
|
"grad_norm": 0.7176555652391681, |
|
"learning_rate": 9.803082599933911e-06, |
|
"loss": 0.0697, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.5433854907539118, |
|
"grad_norm": 0.4808404612456389, |
|
"learning_rate": 9.801545440082845e-06, |
|
"loss": 0.0569, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.5443338074917022, |
|
"grad_norm": 0.8731137568130377, |
|
"learning_rate": 9.800002425375984e-06, |
|
"loss": 0.0657, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.5452821242294926, |
|
"grad_norm": 0.7816194292982013, |
|
"learning_rate": 9.798453557694828e-06, |
|
"loss": 0.0724, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5462304409672831, |
|
"grad_norm": 0.9042436959378762, |
|
"learning_rate": 9.796898838928022e-06, |
|
"loss": 0.0784, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.5471787577050735, |
|
"grad_norm": 1.0293154765529384, |
|
"learning_rate": 9.79533827097134e-06, |
|
"loss": 0.098, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.5481270744428639, |
|
"grad_norm": 0.8678391414260259, |
|
"learning_rate": 9.793771855727691e-06, |
|
"loss": 0.0635, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.5490753911806543, |
|
"grad_norm": 0.6041409950077287, |
|
"learning_rate": 9.792199595107115e-06, |
|
"loss": 0.0524, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.5500237079184448, |
|
"grad_norm": 1.0292476772898875, |
|
"learning_rate": 9.790621491026773e-06, |
|
"loss": 0.0829, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5509720246562352, |
|
"grad_norm": 0.7074515600768486, |
|
"learning_rate": 9.78903754541096e-06, |
|
"loss": 0.0704, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.5519203413940256, |
|
"grad_norm": 0.7603340975922476, |
|
"learning_rate": 9.787447760191092e-06, |
|
"loss": 0.0788, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.5528686581318161, |
|
"grad_norm": 1.0766706695954442, |
|
"learning_rate": 9.785852137305699e-06, |
|
"loss": 0.079, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.5538169748696065, |
|
"grad_norm": 0.7555731931730972, |
|
"learning_rate": 9.784250678700435e-06, |
|
"loss": 0.0705, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.5547652916073968, |
|
"grad_norm": 0.7010961175305198, |
|
"learning_rate": 9.782643386328073e-06, |
|
"loss": 0.0713, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5557136083451872, |
|
"grad_norm": 1.0580272254821363, |
|
"learning_rate": 9.781030262148492e-06, |
|
"loss": 0.0671, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.5566619250829777, |
|
"grad_norm": 0.6594876081209583, |
|
"learning_rate": 9.779411308128685e-06, |
|
"loss": 0.0867, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.5576102418207681, |
|
"grad_norm": 1.3649847896410103, |
|
"learning_rate": 9.777786526242759e-06, |
|
"loss": 0.0847, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.5585585585585585, |
|
"grad_norm": 0.6223880228627037, |
|
"learning_rate": 9.776155918471916e-06, |
|
"loss": 0.0579, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.559506875296349, |
|
"grad_norm": 0.6862572922646061, |
|
"learning_rate": 9.774519486804476e-06, |
|
"loss": 0.053, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5604551920341394, |
|
"grad_norm": 0.6562455064809456, |
|
"learning_rate": 9.772877233235848e-06, |
|
"loss": 0.0651, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.5614035087719298, |
|
"grad_norm": 0.7150505236504866, |
|
"learning_rate": 9.771229159768547e-06, |
|
"loss": 0.0697, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.5623518255097203, |
|
"grad_norm": 0.7505406859172821, |
|
"learning_rate": 9.769575268412182e-06, |
|
"loss": 0.0691, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.5633001422475107, |
|
"grad_norm": 0.7340490905887499, |
|
"learning_rate": 9.767915561183456e-06, |
|
"loss": 0.0748, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.5642484589853011, |
|
"grad_norm": 0.7987611706335997, |
|
"learning_rate": 9.766250040106166e-06, |
|
"loss": 0.0682, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5651967757230915, |
|
"grad_norm": 1.2974449597341617, |
|
"learning_rate": 9.764578707211199e-06, |
|
"loss": 0.0751, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.566145092460882, |
|
"grad_norm": 0.6191420122018653, |
|
"learning_rate": 9.762901564536523e-06, |
|
"loss": 0.0667, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.5670934091986723, |
|
"grad_norm": 0.6903639931399153, |
|
"learning_rate": 9.761218614127193e-06, |
|
"loss": 0.0653, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.5680417259364627, |
|
"grad_norm": 0.7974449669867185, |
|
"learning_rate": 9.759529858035351e-06, |
|
"loss": 0.0662, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.5689900426742532, |
|
"grad_norm": 1.6445977802603875, |
|
"learning_rate": 9.75783529832021e-06, |
|
"loss": 0.0781, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5699383594120436, |
|
"grad_norm": 0.7682344601188886, |
|
"learning_rate": 9.756134937048066e-06, |
|
"loss": 0.0516, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.570886676149834, |
|
"grad_norm": 0.6505039594954853, |
|
"learning_rate": 9.754428776292287e-06, |
|
"loss": 0.0522, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.5718349928876245, |
|
"grad_norm": 1.0748139183671632, |
|
"learning_rate": 9.752716818133309e-06, |
|
"loss": 0.0787, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.5727833096254149, |
|
"grad_norm": 0.7575374337239762, |
|
"learning_rate": 9.750999064658644e-06, |
|
"loss": 0.0618, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.5737316263632053, |
|
"grad_norm": 0.5005741056916544, |
|
"learning_rate": 9.749275517962868e-06, |
|
"loss": 0.0579, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5746799431009957, |
|
"grad_norm": 0.9747236186565804, |
|
"learning_rate": 9.747546180147618e-06, |
|
"loss": 0.1137, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.5756282598387862, |
|
"grad_norm": 0.5945741852680105, |
|
"learning_rate": 9.745811053321597e-06, |
|
"loss": 0.0528, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.5765765765765766, |
|
"grad_norm": 0.8767385416979725, |
|
"learning_rate": 9.744070139600564e-06, |
|
"loss": 0.0756, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.577524893314367, |
|
"grad_norm": 0.805183732938404, |
|
"learning_rate": 9.742323441107335e-06, |
|
"loss": 0.0796, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.5784732100521575, |
|
"grad_norm": 0.4622182813428181, |
|
"learning_rate": 9.74057095997178e-06, |
|
"loss": 0.0466, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5794215267899478, |
|
"grad_norm": 1.323185570736391, |
|
"learning_rate": 9.738812698330821e-06, |
|
"loss": 0.0803, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.5803698435277382, |
|
"grad_norm": 0.6017510939556475, |
|
"learning_rate": 9.737048658328428e-06, |
|
"loss": 0.0473, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.5813181602655287, |
|
"grad_norm": 0.9340483579893749, |
|
"learning_rate": 9.735278842115616e-06, |
|
"loss": 0.0726, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.5822664770033191, |
|
"grad_norm": 0.8017302866486061, |
|
"learning_rate": 9.733503251850443e-06, |
|
"loss": 0.0508, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.5832147937411095, |
|
"grad_norm": 0.4915103436956615, |
|
"learning_rate": 9.73172188969801e-06, |
|
"loss": 0.0511, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5841631104788999, |
|
"grad_norm": 0.5454251857464146, |
|
"learning_rate": 9.729934757830455e-06, |
|
"loss": 0.043, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.5851114272166904, |
|
"grad_norm": 0.45382702737394764, |
|
"learning_rate": 9.728141858426953e-06, |
|
"loss": 0.046, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.5860597439544808, |
|
"grad_norm": 0.5609546349379012, |
|
"learning_rate": 9.726343193673707e-06, |
|
"loss": 0.0528, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.5870080606922712, |
|
"grad_norm": 0.600673482298699, |
|
"learning_rate": 9.724538765763953e-06, |
|
"loss": 0.0539, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.5879563774300617, |
|
"grad_norm": 0.9417089865736203, |
|
"learning_rate": 9.722728576897956e-06, |
|
"loss": 0.0583, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5889046941678521, |
|
"grad_norm": 0.4653439643190733, |
|
"learning_rate": 9.720912629283004e-06, |
|
"loss": 0.05, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.5898530109056425, |
|
"grad_norm": 1.026549188147293, |
|
"learning_rate": 9.719090925133408e-06, |
|
"loss": 0.0643, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.590801327643433, |
|
"grad_norm": 0.7947545630855374, |
|
"learning_rate": 9.717263466670496e-06, |
|
"loss": 0.0827, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.5917496443812233, |
|
"grad_norm": 0.5505357789361721, |
|
"learning_rate": 9.715430256122616e-06, |
|
"loss": 0.057, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.5926979611190137, |
|
"grad_norm": 0.6227650085275758, |
|
"learning_rate": 9.713591295725126e-06, |
|
"loss": 0.0613, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5936462778568041, |
|
"grad_norm": 0.8089764410308476, |
|
"learning_rate": 9.711746587720398e-06, |
|
"loss": 0.0575, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5945945945945946, |
|
"grad_norm": 0.8681782262186932, |
|
"learning_rate": 9.709896134357815e-06, |
|
"loss": 0.0664, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.595542911332385, |
|
"grad_norm": 0.682165737662686, |
|
"learning_rate": 9.708039937893759e-06, |
|
"loss": 0.0558, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5964912280701754, |
|
"grad_norm": 0.6331915650172267, |
|
"learning_rate": 9.706178000591617e-06, |
|
"loss": 0.0628, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5974395448079659, |
|
"grad_norm": 0.5712611189361939, |
|
"learning_rate": 9.704310324721782e-06, |
|
"loss": 0.0741, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5983878615457563, |
|
"grad_norm": 0.4974903145873453, |
|
"learning_rate": 9.70243691256164e-06, |
|
"loss": 0.0569, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.5993361782835467, |
|
"grad_norm": 0.8755421451427193, |
|
"learning_rate": 9.700557766395567e-06, |
|
"loss": 0.0884, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.6002844950213371, |
|
"grad_norm": 0.5236784076286586, |
|
"learning_rate": 9.698672888514938e-06, |
|
"loss": 0.0493, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.6012328117591276, |
|
"grad_norm": 0.6525012362182552, |
|
"learning_rate": 9.696782281218117e-06, |
|
"loss": 0.0683, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.602181128496918, |
|
"grad_norm": 0.5119217968942416, |
|
"learning_rate": 9.69488594681045e-06, |
|
"loss": 0.0449, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6031294452347084, |
|
"grad_norm": 0.6576021927278618, |
|
"learning_rate": 9.692983887604269e-06, |
|
"loss": 0.0674, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.6040777619724989, |
|
"grad_norm": 0.7157400695119305, |
|
"learning_rate": 9.691076105918885e-06, |
|
"loss": 0.0692, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.6050260787102892, |
|
"grad_norm": 0.873028935018846, |
|
"learning_rate": 9.689162604080589e-06, |
|
"loss": 0.0999, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.6059743954480796, |
|
"grad_norm": 0.8384167589559871, |
|
"learning_rate": 9.687243384422646e-06, |
|
"loss": 0.0771, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.6069227121858701, |
|
"grad_norm": 0.5020655439555515, |
|
"learning_rate": 9.685318449285292e-06, |
|
"loss": 0.0512, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6078710289236605, |
|
"grad_norm": 0.36608001502573706, |
|
"learning_rate": 9.683387801015733e-06, |
|
"loss": 0.0377, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.6088193456614509, |
|
"grad_norm": 0.7919506442179929, |
|
"learning_rate": 9.681451441968144e-06, |
|
"loss": 0.0775, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.6097676623992413, |
|
"grad_norm": 0.6274619623629013, |
|
"learning_rate": 9.67950937450366e-06, |
|
"loss": 0.0645, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.6107159791370318, |
|
"grad_norm": 0.5896565427831529, |
|
"learning_rate": 9.677561600990378e-06, |
|
"loss": 0.0595, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.6116642958748222, |
|
"grad_norm": 0.5142338666265971, |
|
"learning_rate": 9.67560812380335e-06, |
|
"loss": 0.0597, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6126126126126126, |
|
"grad_norm": 0.6109668570207277, |
|
"learning_rate": 9.67364894532459e-06, |
|
"loss": 0.07, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.6135609293504031, |
|
"grad_norm": 0.6756478515313759, |
|
"learning_rate": 9.671684067943056e-06, |
|
"loss": 0.0612, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.6145092460881935, |
|
"grad_norm": 0.6142876685386528, |
|
"learning_rate": 9.669713494054662e-06, |
|
"loss": 0.06, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.6154575628259839, |
|
"grad_norm": 0.8252522199066464, |
|
"learning_rate": 9.667737226062262e-06, |
|
"loss": 0.118, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.6164058795637744, |
|
"grad_norm": 0.48924053020562824, |
|
"learning_rate": 9.665755266375657e-06, |
|
"loss": 0.0542, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6173541963015647, |
|
"grad_norm": 0.9087121397095356, |
|
"learning_rate": 9.663767617411587e-06, |
|
"loss": 0.0611, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.6183025130393551, |
|
"grad_norm": 0.7764764902550111, |
|
"learning_rate": 9.66177428159373e-06, |
|
"loss": 0.0676, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.6192508297771455, |
|
"grad_norm": 0.44918893065172116, |
|
"learning_rate": 9.659775261352697e-06, |
|
"loss": 0.0474, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.620199146514936, |
|
"grad_norm": 0.9162652994629981, |
|
"learning_rate": 9.657770559126034e-06, |
|
"loss": 0.0981, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.6211474632527264, |
|
"grad_norm": 0.6543823860401999, |
|
"learning_rate": 9.655760177358208e-06, |
|
"loss": 0.0744, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6220957799905168, |
|
"grad_norm": 0.44085186666179094, |
|
"learning_rate": 9.653744118500623e-06, |
|
"loss": 0.0532, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.6230440967283073, |
|
"grad_norm": 0.7980175435844092, |
|
"learning_rate": 9.651722385011592e-06, |
|
"loss": 0.0807, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.6239924134660977, |
|
"grad_norm": 0.4853866988799319, |
|
"learning_rate": 9.649694979356358e-06, |
|
"loss": 0.0454, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.6249407302038881, |
|
"grad_norm": 0.5662361885259662, |
|
"learning_rate": 9.647661904007076e-06, |
|
"loss": 0.0621, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.6258890469416786, |
|
"grad_norm": 0.8127269026146419, |
|
"learning_rate": 9.645623161442814e-06, |
|
"loss": 0.0773, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.626837363679469, |
|
"grad_norm": 0.6294162739235921, |
|
"learning_rate": 9.643578754149552e-06, |
|
"loss": 0.0599, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.6277856804172594, |
|
"grad_norm": 0.6965237350859914, |
|
"learning_rate": 9.641528684620179e-06, |
|
"loss": 0.0542, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.6287339971550497, |
|
"grad_norm": 0.5265921422928361, |
|
"learning_rate": 9.639472955354483e-06, |
|
"loss": 0.0496, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.6296823138928402, |
|
"grad_norm": 0.8663040094375097, |
|
"learning_rate": 9.63741156885916e-06, |
|
"loss": 0.0733, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.6306306306306306, |
|
"grad_norm": 0.7508837936313448, |
|
"learning_rate": 9.635344527647798e-06, |
|
"loss": 0.08, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.631578947368421, |
|
"grad_norm": 0.6827540936282853, |
|
"learning_rate": 9.633271834240885e-06, |
|
"loss": 0.0732, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.6325272641062115, |
|
"grad_norm": 0.7441700461651841, |
|
"learning_rate": 9.631193491165798e-06, |
|
"loss": 0.0555, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.6334755808440019, |
|
"grad_norm": 0.8313881844290032, |
|
"learning_rate": 9.629109500956803e-06, |
|
"loss": 0.0782, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.6344238975817923, |
|
"grad_norm": 0.47754915650781987, |
|
"learning_rate": 9.627019866155056e-06, |
|
"loss": 0.0547, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.6353722143195828, |
|
"grad_norm": 0.6618532396312571, |
|
"learning_rate": 9.624924589308591e-06, |
|
"loss": 0.0515, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6363205310573732, |
|
"grad_norm": 1.147117197534475, |
|
"learning_rate": 9.622823672972323e-06, |
|
"loss": 0.0882, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.6372688477951636, |
|
"grad_norm": 0.5779383814129484, |
|
"learning_rate": 9.620717119708047e-06, |
|
"loss": 0.0659, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.638217164532954, |
|
"grad_norm": 0.5799389859663083, |
|
"learning_rate": 9.618604932084427e-06, |
|
"loss": 0.0606, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.6391654812707445, |
|
"grad_norm": 6.608545253943764, |
|
"learning_rate": 9.616487112677e-06, |
|
"loss": 0.066, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.6401137980085349, |
|
"grad_norm": 0.7235578117181891, |
|
"learning_rate": 9.614363664068168e-06, |
|
"loss": 0.0628, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6410621147463252, |
|
"grad_norm": 0.6994528460712487, |
|
"learning_rate": 9.6122345888472e-06, |
|
"loss": 0.0628, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.6420104314841157, |
|
"grad_norm": 0.6208663188504899, |
|
"learning_rate": 9.610099889610224e-06, |
|
"loss": 0.0554, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.6429587482219061, |
|
"grad_norm": 0.6345977149189366, |
|
"learning_rate": 9.607959568960226e-06, |
|
"loss": 0.0632, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.6439070649596965, |
|
"grad_norm": 0.8061055021711904, |
|
"learning_rate": 9.605813629507046e-06, |
|
"loss": 0.0684, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.6448553816974869, |
|
"grad_norm": 0.6913423639588181, |
|
"learning_rate": 9.603662073867375e-06, |
|
"loss": 0.0673, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6458036984352774, |
|
"grad_norm": 0.7586179752230898, |
|
"learning_rate": 9.601504904664758e-06, |
|
"loss": 0.0702, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.6467520151730678, |
|
"grad_norm": 0.5215807067369997, |
|
"learning_rate": 9.599342124529576e-06, |
|
"loss": 0.0484, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.6477003319108582, |
|
"grad_norm": 0.4193899811291156, |
|
"learning_rate": 9.597173736099056e-06, |
|
"loss": 0.0455, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.6486486486486487, |
|
"grad_norm": 1.0231627903377674, |
|
"learning_rate": 9.594999742017267e-06, |
|
"loss": 0.0755, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.6495969653864391, |
|
"grad_norm": 0.5818860445113369, |
|
"learning_rate": 9.592820144935107e-06, |
|
"loss": 0.0457, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6505452821242295, |
|
"grad_norm": 0.8523614115619248, |
|
"learning_rate": 9.590634947510312e-06, |
|
"loss": 0.0666, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.65149359886202, |
|
"grad_norm": 0.6819462318103672, |
|
"learning_rate": 9.588444152407441e-06, |
|
"loss": 0.0621, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.6524419155998104, |
|
"grad_norm": 0.7350860734842137, |
|
"learning_rate": 9.586247762297882e-06, |
|
"loss": 0.0616, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.6533902323376007, |
|
"grad_norm": 0.6877200427996193, |
|
"learning_rate": 9.584045779859848e-06, |
|
"loss": 0.0691, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.6543385490753911, |
|
"grad_norm": 0.7777410132259543, |
|
"learning_rate": 9.581838207778367e-06, |
|
"loss": 0.0672, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6552868658131816, |
|
"grad_norm": 1.0340407583447775, |
|
"learning_rate": 9.579625048745281e-06, |
|
"loss": 0.0692, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.656235182550972, |
|
"grad_norm": 0.6061769180463831, |
|
"learning_rate": 9.577406305459251e-06, |
|
"loss": 0.0519, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.6571834992887624, |
|
"grad_norm": 0.7287017758175208, |
|
"learning_rate": 9.575181980625743e-06, |
|
"loss": 0.0626, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.6581318160265529, |
|
"grad_norm": 0.6923184185544935, |
|
"learning_rate": 9.57295207695703e-06, |
|
"loss": 0.0602, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.6590801327643433, |
|
"grad_norm": 0.7441802004305137, |
|
"learning_rate": 9.570716597172187e-06, |
|
"loss": 0.0785, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6600284495021337, |
|
"grad_norm": 0.5600328414907927, |
|
"learning_rate": 9.568475543997088e-06, |
|
"loss": 0.0525, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.6609767662399242, |
|
"grad_norm": 0.6179093672887623, |
|
"learning_rate": 9.566228920164405e-06, |
|
"loss": 0.0498, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.6619250829777146, |
|
"grad_norm": 1.0001632318997007, |
|
"learning_rate": 9.563976728413602e-06, |
|
"loss": 0.1065, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.662873399715505, |
|
"grad_norm": 0.6197443639375237, |
|
"learning_rate": 9.56171897149093e-06, |
|
"loss": 0.0429, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.6638217164532954, |
|
"grad_norm": 0.7426532648337794, |
|
"learning_rate": 9.55945565214943e-06, |
|
"loss": 0.0603, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6647700331910859, |
|
"grad_norm": 0.9809220324323352, |
|
"learning_rate": 9.557186773148922e-06, |
|
"loss": 0.0844, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.6657183499288762, |
|
"grad_norm": 0.6596268576375636, |
|
"learning_rate": 9.554912337256007e-06, |
|
"loss": 0.0627, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.6445430375796782, |
|
"learning_rate": 9.552632347244062e-06, |
|
"loss": 0.0621, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.6676149834044571, |
|
"grad_norm": 0.6854389668990125, |
|
"learning_rate": 9.550346805893236e-06, |
|
"loss": 0.0709, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.6685633001422475, |
|
"grad_norm": 0.9157472924094435, |
|
"learning_rate": 9.548055715990448e-06, |
|
"loss": 0.0669, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6695116168800379, |
|
"grad_norm": 0.6210182918721243, |
|
"learning_rate": 9.545759080329381e-06, |
|
"loss": 0.0642, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.6704599336178284, |
|
"grad_norm": 0.5811606762164421, |
|
"learning_rate": 9.543456901710483e-06, |
|
"loss": 0.0734, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.6714082503556188, |
|
"grad_norm": 0.6797271720519124, |
|
"learning_rate": 9.541149182940958e-06, |
|
"loss": 0.0543, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.6723565670934092, |
|
"grad_norm": 0.5126068611905316, |
|
"learning_rate": 9.538835926834766e-06, |
|
"loss": 0.0504, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.6733048838311996, |
|
"grad_norm": 0.6464058845065579, |
|
"learning_rate": 9.536517136212623e-06, |
|
"loss": 0.0596, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6742532005689901, |
|
"grad_norm": 0.5987248394746172, |
|
"learning_rate": 9.534192813901986e-06, |
|
"loss": 0.0561, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.6752015173067805, |
|
"grad_norm": 0.5757268664620699, |
|
"learning_rate": 9.531862962737065e-06, |
|
"loss": 0.0662, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.6761498340445709, |
|
"grad_norm": 0.6884820373956889, |
|
"learning_rate": 9.529527585558806e-06, |
|
"loss": 0.0734, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.6770981507823614, |
|
"grad_norm": 0.5599551362853026, |
|
"learning_rate": 9.5271866852149e-06, |
|
"loss": 0.0497, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.6780464675201517, |
|
"grad_norm": 1.2727013612767513, |
|
"learning_rate": 9.524840264559762e-06, |
|
"loss": 0.0806, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6789947842579421, |
|
"grad_norm": 0.5125594480614294, |
|
"learning_rate": 9.522488326454551e-06, |
|
"loss": 0.0464, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.6799431009957326, |
|
"grad_norm": 0.9279881234599379, |
|
"learning_rate": 9.520130873767141e-06, |
|
"loss": 0.0466, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.680891417733523, |
|
"grad_norm": 0.5884738866592291, |
|
"learning_rate": 9.517767909372143e-06, |
|
"loss": 0.0463, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.6818397344713134, |
|
"grad_norm": 0.6405987798189022, |
|
"learning_rate": 9.515399436150879e-06, |
|
"loss": 0.0646, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.6827880512091038, |
|
"grad_norm": 0.6141893191288851, |
|
"learning_rate": 9.513025456991394e-06, |
|
"loss": 0.0713, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6837363679468943, |
|
"grad_norm": 0.5294631004623913, |
|
"learning_rate": 9.510645974788441e-06, |
|
"loss": 0.0533, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.6846846846846847, |
|
"grad_norm": 0.5983803884552171, |
|
"learning_rate": 9.508260992443492e-06, |
|
"loss": 0.0574, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.6856330014224751, |
|
"grad_norm": 0.7168015362345571, |
|
"learning_rate": 9.505870512864715e-06, |
|
"loss": 0.0622, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.6865813181602656, |
|
"grad_norm": 0.8061703745318712, |
|
"learning_rate": 9.503474538966992e-06, |
|
"loss": 0.072, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.687529634898056, |
|
"grad_norm": 0.6410612258118752, |
|
"learning_rate": 9.501073073671896e-06, |
|
"loss": 0.0454, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6884779516358464, |
|
"grad_norm": 0.790215058142473, |
|
"learning_rate": 9.498666119907701e-06, |
|
"loss": 0.0677, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.6894262683736367, |
|
"grad_norm": 0.6299133472058956, |
|
"learning_rate": 9.496253680609371e-06, |
|
"loss": 0.0585, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.6903745851114272, |
|
"grad_norm": 1.0623017139889208, |
|
"learning_rate": 9.493835758718561e-06, |
|
"loss": 0.069, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.6913229018492176, |
|
"grad_norm": 0.5536012592608316, |
|
"learning_rate": 9.491412357183607e-06, |
|
"loss": 0.0686, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.692271218587008, |
|
"grad_norm": 0.6038206755461478, |
|
"learning_rate": 9.488983478959534e-06, |
|
"loss": 0.0706, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6932195353247985, |
|
"grad_norm": 0.6342419868913964, |
|
"learning_rate": 9.486549127008037e-06, |
|
"loss": 0.0496, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.6941678520625889, |
|
"grad_norm": 1.1555208683238716, |
|
"learning_rate": 9.484109304297493e-06, |
|
"loss": 0.0834, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.6951161688003793, |
|
"grad_norm": 0.8509380581545992, |
|
"learning_rate": 9.481664013802943e-06, |
|
"loss": 0.0794, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.6960644855381698, |
|
"grad_norm": 0.8224046322343856, |
|
"learning_rate": 9.479213258506102e-06, |
|
"loss": 0.0869, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.6970128022759602, |
|
"grad_norm": 0.6505920471844966, |
|
"learning_rate": 9.476757041395342e-06, |
|
"loss": 0.0642, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.6979611190137506, |
|
"grad_norm": 0.5162948092375159, |
|
"learning_rate": 9.474295365465697e-06, |
|
"loss": 0.0539, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.698909435751541, |
|
"grad_norm": 0.7194486779836317, |
|
"learning_rate": 9.471828233718863e-06, |
|
"loss": 0.0585, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.6998577524893315, |
|
"grad_norm": 0.9014549238602243, |
|
"learning_rate": 9.46935564916318e-06, |
|
"loss": 0.0874, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.7008060692271219, |
|
"grad_norm": 0.7378312572460828, |
|
"learning_rate": 9.466877614813645e-06, |
|
"loss": 0.0657, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 0.815800968244944, |
|
"learning_rate": 9.464394133691891e-06, |
|
"loss": 0.0538, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7027027027027027, |
|
"grad_norm": 0.5271528573688194, |
|
"learning_rate": 9.461905208826202e-06, |
|
"loss": 0.0619, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.7036510194404931, |
|
"grad_norm": 0.9062594050922635, |
|
"learning_rate": 9.459410843251496e-06, |
|
"loss": 0.0659, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.7045993361782835, |
|
"grad_norm": 0.6578698656781865, |
|
"learning_rate": 9.456911040009323e-06, |
|
"loss": 0.0577, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.705547652916074, |
|
"grad_norm": 0.6791351680766123, |
|
"learning_rate": 9.454405802147864e-06, |
|
"loss": 0.0669, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.7064959696538644, |
|
"grad_norm": 0.7662019136887008, |
|
"learning_rate": 9.451895132721933e-06, |
|
"loss": 0.0692, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7074442863916548, |
|
"grad_norm": 0.6997379483885225, |
|
"learning_rate": 9.449379034792961e-06, |
|
"loss": 0.0609, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.7083926031294452, |
|
"grad_norm": 0.6231531262832446, |
|
"learning_rate": 9.446857511429e-06, |
|
"loss": 0.0568, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.7093409198672357, |
|
"grad_norm": 0.638618143024491, |
|
"learning_rate": 9.444330565704715e-06, |
|
"loss": 0.0391, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.7102892366050261, |
|
"grad_norm": 0.6101709327712237, |
|
"learning_rate": 9.441798200701388e-06, |
|
"loss": 0.0692, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.7112375533428165, |
|
"grad_norm": 0.7771396965466206, |
|
"learning_rate": 9.439260419506906e-06, |
|
"loss": 0.0616, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.712185870080607, |
|
"grad_norm": 0.663533581873393, |
|
"learning_rate": 9.436717225215761e-06, |
|
"loss": 0.0706, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.7131341868183974, |
|
"grad_norm": 0.7406791150442034, |
|
"learning_rate": 9.434168620929045e-06, |
|
"loss": 0.0759, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.7140825035561877, |
|
"grad_norm": 0.6589932311994989, |
|
"learning_rate": 9.431614609754446e-06, |
|
"loss": 0.0676, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.7150308202939782, |
|
"grad_norm": 0.7873737037891946, |
|
"learning_rate": 9.429055194806247e-06, |
|
"loss": 0.0661, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.7159791370317686, |
|
"grad_norm": 0.6588547169267579, |
|
"learning_rate": 9.42649037920532e-06, |
|
"loss": 0.068, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.716927453769559, |
|
"grad_norm": 0.8208102856389554, |
|
"learning_rate": 9.423920166079122e-06, |
|
"loss": 0.0829, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.7178757705073494, |
|
"grad_norm": 0.5652492127213, |
|
"learning_rate": 9.421344558561689e-06, |
|
"loss": 0.0754, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.7188240872451399, |
|
"grad_norm": 2.03543668980321, |
|
"learning_rate": 9.418763559793639e-06, |
|
"loss": 0.0469, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.7197724039829303, |
|
"grad_norm": 0.7132600676949169, |
|
"learning_rate": 9.41617717292216e-06, |
|
"loss": 0.058, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.7207207207207207, |
|
"grad_norm": 0.5814418519545377, |
|
"learning_rate": 9.413585401101014e-06, |
|
"loss": 0.0676, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7216690374585112, |
|
"grad_norm": 0.778087468578043, |
|
"learning_rate": 9.410988247490527e-06, |
|
"loss": 0.0565, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.7226173541963016, |
|
"grad_norm": 0.5978506887698309, |
|
"learning_rate": 9.408385715257589e-06, |
|
"loss": 0.0526, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.723565670934092, |
|
"grad_norm": 0.7345386180038043, |
|
"learning_rate": 9.405777807575643e-06, |
|
"loss": 0.0779, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.7245139876718824, |
|
"grad_norm": 0.6765882263629432, |
|
"learning_rate": 9.403164527624695e-06, |
|
"loss": 0.0739, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.7254623044096729, |
|
"grad_norm": 0.6200059319183251, |
|
"learning_rate": 9.400545878591297e-06, |
|
"loss": 0.0425, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7264106211474632, |
|
"grad_norm": 0.5764913642807622, |
|
"learning_rate": 9.397921863668545e-06, |
|
"loss": 0.0525, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.7273589378852536, |
|
"grad_norm": 0.5072870053545583, |
|
"learning_rate": 9.395292486056087e-06, |
|
"loss": 0.0466, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.7283072546230441, |
|
"grad_norm": 0.6266493674563252, |
|
"learning_rate": 9.3926577489601e-06, |
|
"loss": 0.0564, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.7292555713608345, |
|
"grad_norm": 0.6781903020718192, |
|
"learning_rate": 9.390017655593303e-06, |
|
"loss": 0.0625, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.7302038880986249, |
|
"grad_norm": 0.6970906328583575, |
|
"learning_rate": 9.387372209174943e-06, |
|
"loss": 0.0499, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7311522048364154, |
|
"grad_norm": 0.4830643779006922, |
|
"learning_rate": 9.384721412930797e-06, |
|
"loss": 0.0522, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.7321005215742058, |
|
"grad_norm": 0.5981146539751457, |
|
"learning_rate": 9.382065270093164e-06, |
|
"loss": 0.0503, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.7330488383119962, |
|
"grad_norm": 0.6288690777841561, |
|
"learning_rate": 9.37940378390086e-06, |
|
"loss": 0.0505, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.7339971550497866, |
|
"grad_norm": 0.6043657243192845, |
|
"learning_rate": 9.376736957599219e-06, |
|
"loss": 0.048, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.7349454717875771, |
|
"grad_norm": 1.3199303132586044, |
|
"learning_rate": 9.37406479444009e-06, |
|
"loss": 0.0787, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.7358937885253675, |
|
"grad_norm": 0.9970354985082576, |
|
"learning_rate": 9.37138729768182e-06, |
|
"loss": 0.0593, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.7368421052631579, |
|
"grad_norm": 0.6154243426982743, |
|
"learning_rate": 9.36870447058927e-06, |
|
"loss": 0.0552, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.7377904220009484, |
|
"grad_norm": 0.688917247579616, |
|
"learning_rate": 9.366016316433796e-06, |
|
"loss": 0.0688, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.7387387387387387, |
|
"grad_norm": 0.8890574424533809, |
|
"learning_rate": 9.363322838493252e-06, |
|
"loss": 0.0616, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.7396870554765291, |
|
"grad_norm": 0.5256518464793154, |
|
"learning_rate": 9.360624040051975e-06, |
|
"loss": 0.0449, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7406353722143196, |
|
"grad_norm": 0.7015686604630017, |
|
"learning_rate": 9.357919924400802e-06, |
|
"loss": 0.0744, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.74158368895211, |
|
"grad_norm": 0.5444389461448026, |
|
"learning_rate": 9.355210494837046e-06, |
|
"loss": 0.058, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.7425320056899004, |
|
"grad_norm": 0.8635005280396899, |
|
"learning_rate": 9.352495754664501e-06, |
|
"loss": 0.0817, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.7434803224276908, |
|
"grad_norm": 0.3975227023619501, |
|
"learning_rate": 9.349775707193439e-06, |
|
"loss": 0.0325, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.7444286391654813, |
|
"grad_norm": 0.9671794171858287, |
|
"learning_rate": 9.347050355740598e-06, |
|
"loss": 0.0942, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7453769559032717, |
|
"grad_norm": 0.8627076848581986, |
|
"learning_rate": 9.34431970362919e-06, |
|
"loss": 0.0603, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.7463252726410621, |
|
"grad_norm": 0.676971569472859, |
|
"learning_rate": 9.341583754188887e-06, |
|
"loss": 0.0609, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.7472735893788526, |
|
"grad_norm": 0.6234019106033082, |
|
"learning_rate": 9.338842510755822e-06, |
|
"loss": 0.0527, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.748221906116643, |
|
"grad_norm": 0.5688808355503273, |
|
"learning_rate": 9.336095976672578e-06, |
|
"loss": 0.0746, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.7491702228544334, |
|
"grad_norm": 0.8927220033190019, |
|
"learning_rate": 9.3333441552882e-06, |
|
"loss": 0.0663, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7501185395922239, |
|
"grad_norm": 0.6760705893906477, |
|
"learning_rate": 9.33058704995817e-06, |
|
"loss": 0.0607, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.7510668563300142, |
|
"grad_norm": 0.6421619908578323, |
|
"learning_rate": 9.327824664044418e-06, |
|
"loss": 0.0601, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.7520151730678046, |
|
"grad_norm": 0.7064042205046658, |
|
"learning_rate": 9.32505700091531e-06, |
|
"loss": 0.0656, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.752963489805595, |
|
"grad_norm": 0.6789456621715841, |
|
"learning_rate": 9.322284063945651e-06, |
|
"loss": 0.0754, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.7539118065433855, |
|
"grad_norm": 0.6349001762224292, |
|
"learning_rate": 9.319505856516674e-06, |
|
"loss": 0.055, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.7548601232811759, |
|
"grad_norm": 0.7970733715082516, |
|
"learning_rate": 9.316722382016037e-06, |
|
"loss": 0.0606, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.7558084400189663, |
|
"grad_norm": 0.6989262918440643, |
|
"learning_rate": 9.313933643837825e-06, |
|
"loss": 0.0419, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.7567567567567568, |
|
"grad_norm": 0.45444386596780545, |
|
"learning_rate": 9.311139645382539e-06, |
|
"loss": 0.0499, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.7577050734945472, |
|
"grad_norm": 0.7340919059070612, |
|
"learning_rate": 9.308340390057091e-06, |
|
"loss": 0.0605, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.7586533902323376, |
|
"grad_norm": 0.49624254277855845, |
|
"learning_rate": 9.305535881274812e-06, |
|
"loss": 0.038, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7596017069701281, |
|
"grad_norm": 0.5558026345234958, |
|
"learning_rate": 9.302726122455425e-06, |
|
"loss": 0.0477, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.7605500237079185, |
|
"grad_norm": 0.7377034794768454, |
|
"learning_rate": 9.299911117025071e-06, |
|
"loss": 0.0798, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.7614983404457089, |
|
"grad_norm": 0.6642309640857783, |
|
"learning_rate": 9.297090868416276e-06, |
|
"loss": 0.0578, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.7624466571834992, |
|
"grad_norm": 0.4901567357915514, |
|
"learning_rate": 9.294265380067965e-06, |
|
"loss": 0.0546, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.7633949739212897, |
|
"grad_norm": 0.5504015183910195, |
|
"learning_rate": 9.291434655425452e-06, |
|
"loss": 0.0476, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7643432906590801, |
|
"grad_norm": 0.7880325712467479, |
|
"learning_rate": 9.288598697940433e-06, |
|
"loss": 0.0967, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.7652916073968705, |
|
"grad_norm": 1.0094413699993006, |
|
"learning_rate": 9.285757511070987e-06, |
|
"loss": 0.0547, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.766239924134661, |
|
"grad_norm": 0.5462824953438216, |
|
"learning_rate": 9.28291109828157e-06, |
|
"loss": 0.0622, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.7671882408724514, |
|
"grad_norm": 0.6095693174069973, |
|
"learning_rate": 9.28005946304301e-06, |
|
"loss": 0.054, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.7681365576102418, |
|
"grad_norm": 0.5522598480936777, |
|
"learning_rate": 9.277202608832502e-06, |
|
"loss": 0.0608, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7690848743480322, |
|
"grad_norm": 0.8887551561479244, |
|
"learning_rate": 9.274340539133604e-06, |
|
"loss": 0.0733, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.7700331910858227, |
|
"grad_norm": 0.6536519593388536, |
|
"learning_rate": 9.271473257436239e-06, |
|
"loss": 0.0704, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.7709815078236131, |
|
"grad_norm": 0.6903014054311826, |
|
"learning_rate": 9.268600767236677e-06, |
|
"loss": 0.0839, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.7719298245614035, |
|
"grad_norm": 0.5929159416904847, |
|
"learning_rate": 9.265723072037546e-06, |
|
"loss": 0.0592, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.772878141299194, |
|
"grad_norm": 0.7439638317959937, |
|
"learning_rate": 9.26284017534782e-06, |
|
"loss": 0.0568, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7738264580369844, |
|
"grad_norm": 0.5860050856048022, |
|
"learning_rate": 9.259952080682812e-06, |
|
"loss": 0.0667, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.7747747747747747, |
|
"grad_norm": 0.4842910654706692, |
|
"learning_rate": 9.257058791564175e-06, |
|
"loss": 0.0513, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.7757230915125652, |
|
"grad_norm": 0.789038697553299, |
|
"learning_rate": 9.254160311519896e-06, |
|
"loss": 0.0557, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.7766714082503556, |
|
"grad_norm": 0.5387139258318481, |
|
"learning_rate": 9.251256644084292e-06, |
|
"loss": 0.0558, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.777619724988146, |
|
"grad_norm": 0.8887946106511906, |
|
"learning_rate": 9.248347792798006e-06, |
|
"loss": 0.0776, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7785680417259364, |
|
"grad_norm": 0.7477907494684204, |
|
"learning_rate": 9.245433761208e-06, |
|
"loss": 0.0706, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.7795163584637269, |
|
"grad_norm": 0.8176178183928178, |
|
"learning_rate": 9.242514552867556e-06, |
|
"loss": 0.0806, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.7804646752015173, |
|
"grad_norm": 0.5104409829727489, |
|
"learning_rate": 9.239590171336262e-06, |
|
"loss": 0.0427, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.7814129919393077, |
|
"grad_norm": 0.5922185838285359, |
|
"learning_rate": 9.236660620180024e-06, |
|
"loss": 0.0553, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.7823613086770982, |
|
"grad_norm": 0.9414341871189567, |
|
"learning_rate": 9.23372590297104e-06, |
|
"loss": 0.0678, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7833096254148886, |
|
"grad_norm": 0.49939628701466243, |
|
"learning_rate": 9.230786023287819e-06, |
|
"loss": 0.0437, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.784257942152679, |
|
"grad_norm": 0.519425273825053, |
|
"learning_rate": 9.227840984715154e-06, |
|
"loss": 0.0497, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.7852062588904695, |
|
"grad_norm": 0.5443123255099412, |
|
"learning_rate": 9.224890790844137e-06, |
|
"loss": 0.0612, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.7861545756282599, |
|
"grad_norm": 0.511905527310258, |
|
"learning_rate": 9.221935445272144e-06, |
|
"loss": 0.0449, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.7871028923660502, |
|
"grad_norm": 0.6705781452415145, |
|
"learning_rate": 9.218974951602829e-06, |
|
"loss": 0.063, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7880512091038406, |
|
"grad_norm": 0.47754646141190604, |
|
"learning_rate": 9.216009313446125e-06, |
|
"loss": 0.0688, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.7889995258416311, |
|
"grad_norm": 0.5705276893342319, |
|
"learning_rate": 9.213038534418244e-06, |
|
"loss": 0.0686, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.7899478425794215, |
|
"grad_norm": 0.4253509537520698, |
|
"learning_rate": 9.21006261814166e-06, |
|
"loss": 0.0427, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.7908961593172119, |
|
"grad_norm": 0.533220697742502, |
|
"learning_rate": 9.207081568245112e-06, |
|
"loss": 0.0394, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.7918444760550024, |
|
"grad_norm": 0.5786737951816707, |
|
"learning_rate": 9.2040953883636e-06, |
|
"loss": 0.0556, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.7927927927927928, |
|
"grad_norm": 1.05765776588404, |
|
"learning_rate": 9.20110408213838e-06, |
|
"loss": 0.0388, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.7937411095305832, |
|
"grad_norm": 0.809530041430475, |
|
"learning_rate": 9.19810765321696e-06, |
|
"loss": 0.1042, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.7946894262683737, |
|
"grad_norm": 0.4767483114016521, |
|
"learning_rate": 9.19510610525309e-06, |
|
"loss": 0.0586, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.7956377430061641, |
|
"grad_norm": 0.6212000890855088, |
|
"learning_rate": 9.192099441906765e-06, |
|
"loss": 0.063, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.7965860597439545, |
|
"grad_norm": 0.5793471462839893, |
|
"learning_rate": 9.189087666844219e-06, |
|
"loss": 0.0599, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7975343764817449, |
|
"grad_norm": 0.6109133021965912, |
|
"learning_rate": 9.186070783737915e-06, |
|
"loss": 0.0655, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.7984826932195354, |
|
"grad_norm": 1.7579309929430755, |
|
"learning_rate": 9.183048796266547e-06, |
|
"loss": 0.0531, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.7994310099573257, |
|
"grad_norm": 0.6305893305402994, |
|
"learning_rate": 9.180021708115034e-06, |
|
"loss": 0.069, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.8003793266951161, |
|
"grad_norm": 0.5799218206040034, |
|
"learning_rate": 9.176989522974512e-06, |
|
"loss": 0.0548, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.8013276434329066, |
|
"grad_norm": 0.5205329821796497, |
|
"learning_rate": 9.173952244542335e-06, |
|
"loss": 0.0551, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.802275960170697, |
|
"grad_norm": 0.6401356176971456, |
|
"learning_rate": 9.170909876522067e-06, |
|
"loss": 0.0613, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.8032242769084874, |
|
"grad_norm": 0.6283553782308525, |
|
"learning_rate": 9.167862422623474e-06, |
|
"loss": 0.0681, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.8041725936462779, |
|
"grad_norm": 0.5291087716357314, |
|
"learning_rate": 9.164809886562532e-06, |
|
"loss": 0.0428, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.8051209103840683, |
|
"grad_norm": 0.6176212098121372, |
|
"learning_rate": 9.161752272061405e-06, |
|
"loss": 0.0607, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.8060692271218587, |
|
"grad_norm": 0.5258734780929885, |
|
"learning_rate": 9.158689582848454e-06, |
|
"loss": 0.0555, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8070175438596491, |
|
"grad_norm": 0.5473102285657928, |
|
"learning_rate": 9.155621822658229e-06, |
|
"loss": 0.0461, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.8079658605974396, |
|
"grad_norm": 0.7147069989389465, |
|
"learning_rate": 9.15254899523146e-06, |
|
"loss": 0.0699, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.80891417733523, |
|
"grad_norm": 0.5116476113725856, |
|
"learning_rate": 9.14947110431506e-06, |
|
"loss": 0.0593, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.8098624940730204, |
|
"grad_norm": 0.599625799358922, |
|
"learning_rate": 9.146388153662109e-06, |
|
"loss": 0.0719, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 0.5657265833927722, |
|
"learning_rate": 9.143300147031866e-06, |
|
"loss": 0.0539, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8117591275486012, |
|
"grad_norm": 0.490285928003467, |
|
"learning_rate": 9.14020708818975e-06, |
|
"loss": 0.0551, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.8127074442863916, |
|
"grad_norm": 0.5667257690255696, |
|
"learning_rate": 9.137108980907341e-06, |
|
"loss": 0.0485, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.813655761024182, |
|
"grad_norm": 0.7288808283591064, |
|
"learning_rate": 9.134005828962373e-06, |
|
"loss": 0.0464, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.8146040777619725, |
|
"grad_norm": 0.6578159612053353, |
|
"learning_rate": 9.130897636138736e-06, |
|
"loss": 0.0458, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.8155523944997629, |
|
"grad_norm": 0.6699312438910595, |
|
"learning_rate": 9.127784406226462e-06, |
|
"loss": 0.0484, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8165007112375533, |
|
"grad_norm": 0.7577555099867996, |
|
"learning_rate": 9.124666143021728e-06, |
|
"loss": 0.0552, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.8174490279753438, |
|
"grad_norm": 0.6571718231580975, |
|
"learning_rate": 9.121542850326849e-06, |
|
"loss": 0.0418, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.8183973447131342, |
|
"grad_norm": 0.7375729716381728, |
|
"learning_rate": 9.118414531950268e-06, |
|
"loss": 0.0586, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.8193456614509246, |
|
"grad_norm": 0.7778186396499422, |
|
"learning_rate": 9.115281191706563e-06, |
|
"loss": 0.0638, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.8202939781887151, |
|
"grad_norm": 1.1753642296648885, |
|
"learning_rate": 9.11214283341643e-06, |
|
"loss": 0.0914, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.8212422949265055, |
|
"grad_norm": 0.6221136965708439, |
|
"learning_rate": 9.108999460906687e-06, |
|
"loss": 0.0513, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.8221906116642959, |
|
"grad_norm": 1.0126156537474953, |
|
"learning_rate": 9.105851078010265e-06, |
|
"loss": 0.0511, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.8231389284020862, |
|
"grad_norm": 0.609505398312846, |
|
"learning_rate": 9.102697688566204e-06, |
|
"loss": 0.0607, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.8240872451398767, |
|
"grad_norm": 0.6781545775462046, |
|
"learning_rate": 9.09953929641965e-06, |
|
"loss": 0.0537, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.8250355618776671, |
|
"grad_norm": 0.6162914997785193, |
|
"learning_rate": 9.096375905421849e-06, |
|
"loss": 0.0514, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8259838786154575, |
|
"grad_norm": 0.9380195573648793, |
|
"learning_rate": 9.093207519430138e-06, |
|
"loss": 0.0592, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.826932195353248, |
|
"grad_norm": 0.6891518456384623, |
|
"learning_rate": 9.090034142307955e-06, |
|
"loss": 0.0611, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.8278805120910384, |
|
"grad_norm": 0.6860355795137043, |
|
"learning_rate": 9.086855777924813e-06, |
|
"loss": 0.0651, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.8288288288288288, |
|
"grad_norm": 0.5941193542193252, |
|
"learning_rate": 9.083672430156313e-06, |
|
"loss": 0.0561, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.8297771455666193, |
|
"grad_norm": 0.9859763647912905, |
|
"learning_rate": 9.080484102884132e-06, |
|
"loss": 0.0558, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8307254623044097, |
|
"grad_norm": 0.6607364577205248, |
|
"learning_rate": 9.077290799996015e-06, |
|
"loss": 0.0445, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.8316737790422001, |
|
"grad_norm": 0.4579344621348973, |
|
"learning_rate": 9.074092525385777e-06, |
|
"loss": 0.0532, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.8326220957799905, |
|
"grad_norm": 0.44020290978074095, |
|
"learning_rate": 9.070889282953297e-06, |
|
"loss": 0.0432, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.833570412517781, |
|
"grad_norm": 0.7817453278171299, |
|
"learning_rate": 9.067681076604507e-06, |
|
"loss": 0.0622, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.8345187292555714, |
|
"grad_norm": 0.6106825636941368, |
|
"learning_rate": 9.064467910251396e-06, |
|
"loss": 0.0499, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8354670459933617, |
|
"grad_norm": 0.5733918003298187, |
|
"learning_rate": 9.061249787812e-06, |
|
"loss": 0.058, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.8364153627311522, |
|
"grad_norm": 0.734104839469145, |
|
"learning_rate": 9.058026713210396e-06, |
|
"loss": 0.0603, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.8373636794689426, |
|
"grad_norm": 0.5863205921902287, |
|
"learning_rate": 9.054798690376702e-06, |
|
"loss": 0.0542, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.838311996206733, |
|
"grad_norm": 0.6529541400114963, |
|
"learning_rate": 9.051565723247072e-06, |
|
"loss": 0.0546, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.8392603129445235, |
|
"grad_norm": 0.8496840763418192, |
|
"learning_rate": 9.048327815763682e-06, |
|
"loss": 0.0499, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8402086296823139, |
|
"grad_norm": 0.4879463969986272, |
|
"learning_rate": 9.045084971874738e-06, |
|
"loss": 0.0404, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.8411569464201043, |
|
"grad_norm": 0.48366631890428774, |
|
"learning_rate": 9.041837195534462e-06, |
|
"loss": 0.0438, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 0.5668428745474414, |
|
"learning_rate": 9.038584490703095e-06, |
|
"loss": 0.0577, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.8430535798956852, |
|
"grad_norm": 0.6139669391301276, |
|
"learning_rate": 9.03532686134688e-06, |
|
"loss": 0.0699, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.8440018966334756, |
|
"grad_norm": 0.6191388281587789, |
|
"learning_rate": 9.032064311438073e-06, |
|
"loss": 0.0588, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.844950213371266, |
|
"grad_norm": 0.6152385003685913, |
|
"learning_rate": 9.028796844954924e-06, |
|
"loss": 0.0537, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.8458985301090565, |
|
"grad_norm": 0.755005932732524, |
|
"learning_rate": 9.025524465881683e-06, |
|
"loss": 0.0649, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.8468468468468469, |
|
"grad_norm": 0.7676513218085563, |
|
"learning_rate": 9.022247178208585e-06, |
|
"loss": 0.0635, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.8477951635846372, |
|
"grad_norm": 0.5920047067355723, |
|
"learning_rate": 9.018964985931856e-06, |
|
"loss": 0.06, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.8487434803224277, |
|
"grad_norm": 0.5807083572715754, |
|
"learning_rate": 9.015677893053695e-06, |
|
"loss": 0.0505, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8496917970602181, |
|
"grad_norm": 0.7897487160161104, |
|
"learning_rate": 9.012385903582286e-06, |
|
"loss": 0.0714, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.8506401137980085, |
|
"grad_norm": 0.5382652341176712, |
|
"learning_rate": 9.009089021531777e-06, |
|
"loss": 0.0512, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.8515884305357989, |
|
"grad_norm": 0.8441756486986386, |
|
"learning_rate": 9.005787250922285e-06, |
|
"loss": 0.0766, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.8525367472735894, |
|
"grad_norm": 0.42966299233294036, |
|
"learning_rate": 9.002480595779883e-06, |
|
"loss": 0.0469, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.8534850640113798, |
|
"grad_norm": 0.5779848432711783, |
|
"learning_rate": 8.999169060136609e-06, |
|
"loss": 0.0549, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8544333807491702, |
|
"grad_norm": 0.49828391414464324, |
|
"learning_rate": 8.995852648030444e-06, |
|
"loss": 0.0513, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.8553816974869607, |
|
"grad_norm": 0.5712972033755797, |
|
"learning_rate": 8.99253136350532e-06, |
|
"loss": 0.0642, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.8563300142247511, |
|
"grad_norm": 0.7463859566833713, |
|
"learning_rate": 8.989205210611106e-06, |
|
"loss": 0.0669, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.8572783309625415, |
|
"grad_norm": 0.6015754760898006, |
|
"learning_rate": 8.98587419340361e-06, |
|
"loss": 0.0518, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.8582266477003319, |
|
"grad_norm": 0.7279488477743896, |
|
"learning_rate": 8.982538315944573e-06, |
|
"loss": 0.0603, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.8591749644381224, |
|
"grad_norm": 0.41210687518386613, |
|
"learning_rate": 8.979197582301662e-06, |
|
"loss": 0.0508, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.8601232811759127, |
|
"grad_norm": 1.4900739335277513, |
|
"learning_rate": 8.97585199654846e-06, |
|
"loss": 0.072, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.8610715979137031, |
|
"grad_norm": 0.5450963951689192, |
|
"learning_rate": 8.972501562764476e-06, |
|
"loss": 0.0566, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.8620199146514936, |
|
"grad_norm": 0.5356916948533633, |
|
"learning_rate": 8.969146285035119e-06, |
|
"loss": 0.0471, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.862968231389284, |
|
"grad_norm": 0.6064958608566305, |
|
"learning_rate": 8.965786167451713e-06, |
|
"loss": 0.0586, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8639165481270744, |
|
"grad_norm": 0.6550030676781202, |
|
"learning_rate": 8.962421214111486e-06, |
|
"loss": 0.0622, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.8648648648648649, |
|
"grad_norm": 0.5789487697080219, |
|
"learning_rate": 8.959051429117551e-06, |
|
"loss": 0.0587, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.8658131816026553, |
|
"grad_norm": 0.6480466907010984, |
|
"learning_rate": 8.955676816578922e-06, |
|
"loss": 0.0596, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.8667614983404457, |
|
"grad_norm": 0.703037972481164, |
|
"learning_rate": 8.9522973806105e-06, |
|
"loss": 0.0836, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.8677098150782361, |
|
"grad_norm": 0.49499510899266297, |
|
"learning_rate": 8.94891312533306e-06, |
|
"loss": 0.0493, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.8686581318160266, |
|
"grad_norm": 0.4679737716122778, |
|
"learning_rate": 8.945524054873261e-06, |
|
"loss": 0.0473, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.869606448553817, |
|
"grad_norm": 0.4868047238192127, |
|
"learning_rate": 8.942130173363628e-06, |
|
"loss": 0.0617, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.8705547652916074, |
|
"grad_norm": 0.48143223119722567, |
|
"learning_rate": 8.938731484942557e-06, |
|
"loss": 0.0459, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.8715030820293979, |
|
"grad_norm": 0.5109365563225756, |
|
"learning_rate": 8.935327993754307e-06, |
|
"loss": 0.0603, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.8724513987671882, |
|
"grad_norm": 0.5946328530954544, |
|
"learning_rate": 8.931919703948981e-06, |
|
"loss": 0.0663, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8733997155049786, |
|
"grad_norm": 0.6675396299202498, |
|
"learning_rate": 8.928506619682549e-06, |
|
"loss": 0.0522, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.8743480322427691, |
|
"grad_norm": 0.5242785281728278, |
|
"learning_rate": 8.925088745116817e-06, |
|
"loss": 0.0477, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.8752963489805595, |
|
"grad_norm": 0.4607255100157249, |
|
"learning_rate": 8.921666084419435e-06, |
|
"loss": 0.0444, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.8762446657183499, |
|
"grad_norm": 0.6127086410246447, |
|
"learning_rate": 8.918238641763894e-06, |
|
"loss": 0.0505, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"grad_norm": 0.7108664485212953, |
|
"learning_rate": 8.914806421329505e-06, |
|
"loss": 0.0372, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8781412991939308, |
|
"grad_norm": 0.48171514690034495, |
|
"learning_rate": 8.911369427301418e-06, |
|
"loss": 0.0467, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.8790896159317212, |
|
"grad_norm": 0.5032020795283936, |
|
"learning_rate": 8.907927663870592e-06, |
|
"loss": 0.0383, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.8800379326695116, |
|
"grad_norm": 0.6490864569323296, |
|
"learning_rate": 8.90448113523381e-06, |
|
"loss": 0.0703, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.8809862494073021, |
|
"grad_norm": 0.5274849878368799, |
|
"learning_rate": 8.901029845593658e-06, |
|
"loss": 0.0497, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.8819345661450925, |
|
"grad_norm": 0.7209898569229573, |
|
"learning_rate": 8.897573799158534e-06, |
|
"loss": 0.0845, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8828828828828829, |
|
"grad_norm": 0.653701403062353, |
|
"learning_rate": 8.894113000142636e-06, |
|
"loss": 0.0528, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.8838311996206734, |
|
"grad_norm": 0.5252034559155617, |
|
"learning_rate": 8.890647452765954e-06, |
|
"loss": 0.054, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.8847795163584637, |
|
"grad_norm": 0.6597062824750437, |
|
"learning_rate": 8.887177161254267e-06, |
|
"loss": 0.0508, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.8857278330962541, |
|
"grad_norm": 0.9841434864966624, |
|
"learning_rate": 8.883702129839144e-06, |
|
"loss": 0.06, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.8866761498340445, |
|
"grad_norm": 0.4716559195813748, |
|
"learning_rate": 8.880222362757928e-06, |
|
"loss": 0.0484, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.887624466571835, |
|
"grad_norm": 0.6275887169553205, |
|
"learning_rate": 8.87673786425374e-06, |
|
"loss": 0.055, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.8885727833096254, |
|
"grad_norm": 0.5480616561224483, |
|
"learning_rate": 8.87324863857547e-06, |
|
"loss": 0.0512, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.8895211000474158, |
|
"grad_norm": 0.5716073816122306, |
|
"learning_rate": 8.869754689977774e-06, |
|
"loss": 0.0575, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.8904694167852063, |
|
"grad_norm": 0.8761043849726794, |
|
"learning_rate": 8.866256022721062e-06, |
|
"loss": 0.0508, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.8914177335229967, |
|
"grad_norm": 0.7017157731117182, |
|
"learning_rate": 8.862752641071499e-06, |
|
"loss": 0.0546, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8923660502607871, |
|
"grad_norm": 1.5138916151321196, |
|
"learning_rate": 8.859244549301005e-06, |
|
"loss": 0.0658, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.8933143669985776, |
|
"grad_norm": 0.8433261605133346, |
|
"learning_rate": 8.855731751687233e-06, |
|
"loss": 0.0553, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.894262683736368, |
|
"grad_norm": 0.5494966721887847, |
|
"learning_rate": 8.852214252513582e-06, |
|
"loss": 0.0494, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.8952110004741584, |
|
"grad_norm": 0.6006177701179363, |
|
"learning_rate": 8.848692056069184e-06, |
|
"loss": 0.0612, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.8961593172119487, |
|
"grad_norm": 0.6876171031491582, |
|
"learning_rate": 8.84516516664889e-06, |
|
"loss": 0.0609, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.8971076339497392, |
|
"grad_norm": 0.846588378426009, |
|
"learning_rate": 8.841633588553287e-06, |
|
"loss": 0.0593, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.8980559506875296, |
|
"grad_norm": 1.175631640532978, |
|
"learning_rate": 8.838097326088667e-06, |
|
"loss": 0.0767, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.89900426742532, |
|
"grad_norm": 0.7010270158444133, |
|
"learning_rate": 8.834556383567042e-06, |
|
"loss": 0.0637, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.8999525841631105, |
|
"grad_norm": 0.7103962193756044, |
|
"learning_rate": 8.831010765306124e-06, |
|
"loss": 0.047, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.9009009009009009, |
|
"grad_norm": 0.9919713077792982, |
|
"learning_rate": 8.827460475629334e-06, |
|
"loss": 0.0699, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9018492176386913, |
|
"grad_norm": 0.9438936607800321, |
|
"learning_rate": 8.823905518865782e-06, |
|
"loss": 0.0962, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.9027975343764817, |
|
"grad_norm": 0.41357107371942303, |
|
"learning_rate": 8.820345899350275e-06, |
|
"loss": 0.0393, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.9037458511142722, |
|
"grad_norm": 0.6094306471098007, |
|
"learning_rate": 8.8167816214233e-06, |
|
"loss": 0.0547, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.9046941678520626, |
|
"grad_norm": 0.45434395748515616, |
|
"learning_rate": 8.81321268943103e-06, |
|
"loss": 0.0458, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.905642484589853, |
|
"grad_norm": 0.584662000585842, |
|
"learning_rate": 8.809639107725308e-06, |
|
"loss": 0.0684, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.9065908013276435, |
|
"grad_norm": 0.6281479664499341, |
|
"learning_rate": 8.80606088066365e-06, |
|
"loss": 0.0485, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.9075391180654339, |
|
"grad_norm": 0.5220137398785665, |
|
"learning_rate": 8.802478012609235e-06, |
|
"loss": 0.0478, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.9084874348032242, |
|
"grad_norm": 0.7613507347001472, |
|
"learning_rate": 8.798890507930899e-06, |
|
"loss": 0.0534, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.9094357515410147, |
|
"grad_norm": 0.5338153539509801, |
|
"learning_rate": 8.795298371003138e-06, |
|
"loss": 0.0467, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.9103840682788051, |
|
"grad_norm": 0.508435320780577, |
|
"learning_rate": 8.791701606206092e-06, |
|
"loss": 0.05, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9113323850165955, |
|
"grad_norm": 0.6801979027503147, |
|
"learning_rate": 8.788100217925541e-06, |
|
"loss": 0.0654, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.9122807017543859, |
|
"grad_norm": 0.5472159955708181, |
|
"learning_rate": 8.78449421055291e-06, |
|
"loss": 0.0566, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.9132290184921764, |
|
"grad_norm": 0.5546852372370231, |
|
"learning_rate": 8.78088358848525e-06, |
|
"loss": 0.0544, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.9141773352299668, |
|
"grad_norm": 0.7376086419870055, |
|
"learning_rate": 8.777268356125244e-06, |
|
"loss": 0.0618, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.9151256519677572, |
|
"grad_norm": 0.461174714622349, |
|
"learning_rate": 8.773648517881194e-06, |
|
"loss": 0.0527, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.9160739687055477, |
|
"grad_norm": 1.100649311314461, |
|
"learning_rate": 8.770024078167017e-06, |
|
"loss": 0.075, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.9170222854433381, |
|
"grad_norm": 0.5385193734337945, |
|
"learning_rate": 8.766395041402245e-06, |
|
"loss": 0.056, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.9179706021811285, |
|
"grad_norm": 0.4215583451342763, |
|
"learning_rate": 8.762761412012011e-06, |
|
"loss": 0.045, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.918918918918919, |
|
"grad_norm": 0.5690890175604749, |
|
"learning_rate": 8.75912319442705e-06, |
|
"loss": 0.0568, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.9198672356567094, |
|
"grad_norm": 0.5598668678593514, |
|
"learning_rate": 8.755480393083694e-06, |
|
"loss": 0.0629, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9208155523944997, |
|
"grad_norm": 0.4230299561301444, |
|
"learning_rate": 8.751833012423861e-06, |
|
"loss": 0.0402, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.9217638691322901, |
|
"grad_norm": 0.8504416588391118, |
|
"learning_rate": 8.74818105689505e-06, |
|
"loss": 0.0521, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.9227121858700806, |
|
"grad_norm": 0.461086821346764, |
|
"learning_rate": 8.744524530950351e-06, |
|
"loss": 0.0426, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.923660502607871, |
|
"grad_norm": 0.5086789755859074, |
|
"learning_rate": 8.740863439048412e-06, |
|
"loss": 0.0487, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.9246088193456614, |
|
"grad_norm": 0.45915883182777006, |
|
"learning_rate": 8.737197785653457e-06, |
|
"loss": 0.0444, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9255571360834519, |
|
"grad_norm": 0.6701095989032753, |
|
"learning_rate": 8.73352757523527e-06, |
|
"loss": 0.0707, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.9265054528212423, |
|
"grad_norm": 0.460793794881083, |
|
"learning_rate": 8.729852812269192e-06, |
|
"loss": 0.0462, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.9274537695590327, |
|
"grad_norm": 0.42146552351647865, |
|
"learning_rate": 8.726173501236115e-06, |
|
"loss": 0.0413, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.9284020862968232, |
|
"grad_norm": 0.4515670497285217, |
|
"learning_rate": 8.722489646622477e-06, |
|
"loss": 0.0486, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.9293504030346136, |
|
"grad_norm": 0.7298661971153528, |
|
"learning_rate": 8.718801252920257e-06, |
|
"loss": 0.0728, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.930298719772404, |
|
"grad_norm": 0.6123325398467794, |
|
"learning_rate": 8.715108324626967e-06, |
|
"loss": 0.0528, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.9312470365101944, |
|
"grad_norm": 0.5334963078534037, |
|
"learning_rate": 8.711410866245648e-06, |
|
"loss": 0.0409, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.9321953532479849, |
|
"grad_norm": 0.44851971952458897, |
|
"learning_rate": 8.70770888228487e-06, |
|
"loss": 0.0509, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.9331436699857752, |
|
"grad_norm": 0.9770313333004932, |
|
"learning_rate": 8.704002377258714e-06, |
|
"loss": 0.0463, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.9340919867235656, |
|
"grad_norm": 0.7370636377202378, |
|
"learning_rate": 8.700291355686779e-06, |
|
"loss": 0.0637, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.9350403034613561, |
|
"grad_norm": 0.6070776528057518, |
|
"learning_rate": 8.69657582209417e-06, |
|
"loss": 0.0488, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.9359886201991465, |
|
"grad_norm": 0.7278417266877663, |
|
"learning_rate": 8.692855781011494e-06, |
|
"loss": 0.0501, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.9369369369369369, |
|
"grad_norm": 0.4731052806759658, |
|
"learning_rate": 8.689131236974853e-06, |
|
"loss": 0.0417, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.9378852536747273, |
|
"grad_norm": 0.45598792555472306, |
|
"learning_rate": 8.68540219452584e-06, |
|
"loss": 0.0396, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.9388335704125178, |
|
"grad_norm": 0.5661429908370399, |
|
"learning_rate": 8.681668658211535e-06, |
|
"loss": 0.0577, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9397818871503082, |
|
"grad_norm": 0.41955875165931145, |
|
"learning_rate": 8.677930632584496e-06, |
|
"loss": 0.0432, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.9407302038880986, |
|
"grad_norm": 0.4107826749470781, |
|
"learning_rate": 8.674188122202756e-06, |
|
"loss": 0.0535, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.9416785206258891, |
|
"grad_norm": 0.47653411892607034, |
|
"learning_rate": 8.670441131629816e-06, |
|
"loss": 0.0586, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.9426268373636795, |
|
"grad_norm": 0.53171021829938, |
|
"learning_rate": 8.66668966543464e-06, |
|
"loss": 0.0518, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.9435751541014699, |
|
"grad_norm": 0.43148473645836083, |
|
"learning_rate": 8.662933728191651e-06, |
|
"loss": 0.0431, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.9445234708392604, |
|
"grad_norm": 0.4471351558402442, |
|
"learning_rate": 8.659173324480722e-06, |
|
"loss": 0.0438, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.9454717875770507, |
|
"grad_norm": 0.5782265716940447, |
|
"learning_rate": 8.65540845888717e-06, |
|
"loss": 0.0719, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.9464201043148411, |
|
"grad_norm": 0.4141433604011682, |
|
"learning_rate": 8.651639136001762e-06, |
|
"loss": 0.0469, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.9473684210526315, |
|
"grad_norm": 0.6061219180547935, |
|
"learning_rate": 8.647865360420686e-06, |
|
"loss": 0.0489, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.948316737790422, |
|
"grad_norm": 0.48916698447496854, |
|
"learning_rate": 8.644087136745572e-06, |
|
"loss": 0.0513, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9492650545282124, |
|
"grad_norm": 0.37441669864478105, |
|
"learning_rate": 8.640304469583469e-06, |
|
"loss": 0.0412, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.9502133712660028, |
|
"grad_norm": 1.0623152293680482, |
|
"learning_rate": 8.636517363546838e-06, |
|
"loss": 0.0655, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.9511616880037933, |
|
"grad_norm": 0.7061581986197312, |
|
"learning_rate": 8.63272582325357e-06, |
|
"loss": 0.0499, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.9521100047415837, |
|
"grad_norm": 0.5399127227606683, |
|
"learning_rate": 8.62892985332694e-06, |
|
"loss": 0.0518, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.9530583214793741, |
|
"grad_norm": 0.4559892605058489, |
|
"learning_rate": 8.625129458395643e-06, |
|
"loss": 0.0459, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.9540066382171646, |
|
"grad_norm": 0.485355373272851, |
|
"learning_rate": 8.621324643093762e-06, |
|
"loss": 0.0454, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.954954954954955, |
|
"grad_norm": 0.7459047370537332, |
|
"learning_rate": 8.617515412060771e-06, |
|
"loss": 0.06, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.9559032716927454, |
|
"grad_norm": 0.7374476556281685, |
|
"learning_rate": 8.613701769941526e-06, |
|
"loss": 0.0677, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.9568515884305357, |
|
"grad_norm": 0.5640575902917073, |
|
"learning_rate": 8.609883721386266e-06, |
|
"loss": 0.0464, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.9577999051683262, |
|
"grad_norm": 0.5329518829334081, |
|
"learning_rate": 8.606061271050601e-06, |
|
"loss": 0.0422, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9587482219061166, |
|
"grad_norm": 0.5672285885118362, |
|
"learning_rate": 8.602234423595509e-06, |
|
"loss": 0.0432, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.959696538643907, |
|
"grad_norm": 0.49279890911522445, |
|
"learning_rate": 8.598403183687328e-06, |
|
"loss": 0.0411, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.9606448553816975, |
|
"grad_norm": 0.5195118583178678, |
|
"learning_rate": 8.594567555997755e-06, |
|
"loss": 0.0575, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.9615931721194879, |
|
"grad_norm": 1.372925234445775, |
|
"learning_rate": 8.590727545203833e-06, |
|
"loss": 0.0615, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.9625414888572783, |
|
"grad_norm": 0.7147315054833345, |
|
"learning_rate": 8.586883155987955e-06, |
|
"loss": 0.0712, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9634898055950688, |
|
"grad_norm": 0.5802509696174448, |
|
"learning_rate": 8.583034393037848e-06, |
|
"loss": 0.0552, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.9644381223328592, |
|
"grad_norm": 0.49007583048635933, |
|
"learning_rate": 8.579181261046576e-06, |
|
"loss": 0.0449, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.9653864390706496, |
|
"grad_norm": 0.48751614831454176, |
|
"learning_rate": 8.57532376471253e-06, |
|
"loss": 0.0475, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.96633475580844, |
|
"grad_norm": 0.6496160692100631, |
|
"learning_rate": 8.571461908739415e-06, |
|
"loss": 0.0523, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.9672830725462305, |
|
"grad_norm": 0.481345745516473, |
|
"learning_rate": 8.567595697836266e-06, |
|
"loss": 0.0515, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9682313892840209, |
|
"grad_norm": 0.5247818144993567, |
|
"learning_rate": 8.563725136717419e-06, |
|
"loss": 0.0494, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.9691797060218112, |
|
"grad_norm": 0.8474516614825078, |
|
"learning_rate": 8.559850230102513e-06, |
|
"loss": 0.0578, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.9701280227596017, |
|
"grad_norm": 0.7494686751693889, |
|
"learning_rate": 8.555970982716492e-06, |
|
"loss": 0.0613, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.9710763394973921, |
|
"grad_norm": 0.528161959351856, |
|
"learning_rate": 8.55208739928959e-06, |
|
"loss": 0.0446, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.9720246562351825, |
|
"grad_norm": 0.7556057248494816, |
|
"learning_rate": 8.54819948455733e-06, |
|
"loss": 0.0611, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.972972972972973, |
|
"grad_norm": 0.4857201457975449, |
|
"learning_rate": 8.54430724326051e-06, |
|
"loss": 0.0396, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.9739212897107634, |
|
"grad_norm": 0.4633933638270801, |
|
"learning_rate": 8.540410680145213e-06, |
|
"loss": 0.045, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.9748696064485538, |
|
"grad_norm": 0.5215732727679809, |
|
"learning_rate": 8.536509799962784e-06, |
|
"loss": 0.047, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.9758179231863442, |
|
"grad_norm": 1.5449712519877792, |
|
"learning_rate": 8.532604607469839e-06, |
|
"loss": 0.0717, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.9767662399241347, |
|
"grad_norm": 0.46693259860172376, |
|
"learning_rate": 8.528695107428247e-06, |
|
"loss": 0.0458, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9777145566619251, |
|
"grad_norm": 0.5388054089062692, |
|
"learning_rate": 8.52478130460513e-06, |
|
"loss": 0.047, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.9786628733997155, |
|
"grad_norm": 0.5283181708144433, |
|
"learning_rate": 8.520863203772858e-06, |
|
"loss": 0.0496, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.979611190137506, |
|
"grad_norm": 0.5890035811704775, |
|
"learning_rate": 8.516940809709044e-06, |
|
"loss": 0.0437, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.9805595068752964, |
|
"grad_norm": 0.446739345865473, |
|
"learning_rate": 8.513014127196533e-06, |
|
"loss": 0.042, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.9815078236130867, |
|
"grad_norm": 0.49851759898580866, |
|
"learning_rate": 8.509083161023399e-06, |
|
"loss": 0.0553, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.9824561403508771, |
|
"grad_norm": 0.8986990099986447, |
|
"learning_rate": 8.505147915982943e-06, |
|
"loss": 0.0491, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.9834044570886676, |
|
"grad_norm": 0.4813313700157437, |
|
"learning_rate": 8.501208396873677e-06, |
|
"loss": 0.0524, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.984352773826458, |
|
"grad_norm": 0.7823009578163489, |
|
"learning_rate": 8.497264608499332e-06, |
|
"loss": 0.0542, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.9853010905642484, |
|
"grad_norm": 0.5256393060960738, |
|
"learning_rate": 8.49331655566884e-06, |
|
"loss": 0.0545, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.9862494073020389, |
|
"grad_norm": 0.5400471979930811, |
|
"learning_rate": 8.489364243196334e-06, |
|
"loss": 0.0495, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9871977240398293, |
|
"grad_norm": 0.5862041954662611, |
|
"learning_rate": 8.485407675901142e-06, |
|
"loss": 0.0442, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.9881460407776197, |
|
"grad_norm": 0.6834922008296388, |
|
"learning_rate": 8.48144685860778e-06, |
|
"loss": 0.064, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.9890943575154102, |
|
"grad_norm": 0.8002369541010694, |
|
"learning_rate": 8.477481796145945e-06, |
|
"loss": 0.0464, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.9900426742532006, |
|
"grad_norm": 0.47393154077930216, |
|
"learning_rate": 8.47351249335051e-06, |
|
"loss": 0.0485, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.990990990990991, |
|
"grad_norm": 0.4987272807246751, |
|
"learning_rate": 8.469538955061525e-06, |
|
"loss": 0.0478, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.9919393077287814, |
|
"grad_norm": 0.6406968710094035, |
|
"learning_rate": 8.465561186124193e-06, |
|
"loss": 0.0494, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.9928876244665719, |
|
"grad_norm": 0.5319476049591959, |
|
"learning_rate": 8.46157919138889e-06, |
|
"loss": 0.038, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.9938359412043622, |
|
"grad_norm": 0.5377926003236448, |
|
"learning_rate": 8.457592975711128e-06, |
|
"loss": 0.0415, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.9947842579421526, |
|
"grad_norm": 0.5054973123174826, |
|
"learning_rate": 8.45360254395158e-06, |
|
"loss": 0.0509, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.9957325746799431, |
|
"grad_norm": 0.6511826899131821, |
|
"learning_rate": 8.449607900976056e-06, |
|
"loss": 0.0496, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9966808914177335, |
|
"grad_norm": 0.34335574918053036, |
|
"learning_rate": 8.445609051655497e-06, |
|
"loss": 0.0322, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.9976292081555239, |
|
"grad_norm": 0.5324023086103392, |
|
"learning_rate": 8.441606000865978e-06, |
|
"loss": 0.0465, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.9985775248933144, |
|
"grad_norm": 0.3971741987281817, |
|
"learning_rate": 8.437598753488693e-06, |
|
"loss": 0.0316, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.9995258416311048, |
|
"grad_norm": 0.4702644191912913, |
|
"learning_rate": 8.43358731440996e-06, |
|
"loss": 0.0424, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.9995258416311048, |
|
"eval_loss": 0.05579984560608864, |
|
"eval_runtime": 205.6016, |
|
"eval_samples_per_second": 34.547, |
|
"eval_steps_per_second": 1.08, |
|
"step": 1054 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 3162, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.8007437509892506e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|