|
{ |
|
"best_metric": 0.0158307533711195, |
|
"best_model_checkpoint": "autotrain-ltphq-1aue6/checkpoint-7029", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 7029, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010670081092616303, |
|
"grad_norm": 10.576315879821777, |
|
"learning_rate": 1.7780938833570414e-06, |
|
"loss": 0.5983, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.021340162185232606, |
|
"grad_norm": 6.441564559936523, |
|
"learning_rate": 3.556187766714083e-06, |
|
"loss": 0.5878, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03201024327784891, |
|
"grad_norm": 5.742478370666504, |
|
"learning_rate": 5.334281650071124e-06, |
|
"loss": 0.4551, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04268032437046521, |
|
"grad_norm": 3.9501702785491943, |
|
"learning_rate": 7.112375533428166e-06, |
|
"loss": 0.3351, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05335040546308152, |
|
"grad_norm": 3.156757354736328, |
|
"learning_rate": 8.890469416785207e-06, |
|
"loss": 0.2165, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06402048655569782, |
|
"grad_norm": 1.531072735786438, |
|
"learning_rate": 1.0668563300142247e-05, |
|
"loss": 0.1089, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07469056764831412, |
|
"grad_norm": 0.6878227591514587, |
|
"learning_rate": 1.244665718349929e-05, |
|
"loss": 0.1164, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08536064874093043, |
|
"grad_norm": 0.3712044358253479, |
|
"learning_rate": 1.4224751066856332e-05, |
|
"loss": 0.0563, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09603072983354674, |
|
"grad_norm": 5.825244903564453, |
|
"learning_rate": 1.6002844950213374e-05, |
|
"loss": 0.0577, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10670081092616304, |
|
"grad_norm": 7.699280261993408, |
|
"learning_rate": 1.7780938833570414e-05, |
|
"loss": 0.0995, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11737089201877934, |
|
"grad_norm": 1.6869832277297974, |
|
"learning_rate": 1.9559032716927454e-05, |
|
"loss": 0.0223, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12804097311139565, |
|
"grad_norm": 0.5479092597961426, |
|
"learning_rate": 2.1337126600284495e-05, |
|
"loss": 0.0396, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13871105420401195, |
|
"grad_norm": 17.6248836517334, |
|
"learning_rate": 2.3115220483641535e-05, |
|
"loss": 0.0629, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.14938113529662825, |
|
"grad_norm": 1.2864736318588257, |
|
"learning_rate": 2.489331436699858e-05, |
|
"loss": 0.0815, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.16005121638924455, |
|
"grad_norm": 14.023919105529785, |
|
"learning_rate": 2.6671408250355616e-05, |
|
"loss": 0.1471, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.17072129748186085, |
|
"grad_norm": 0.7266956567764282, |
|
"learning_rate": 2.8449502133712663e-05, |
|
"loss": 0.1421, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18139137857447715, |
|
"grad_norm": 1.591202735900879, |
|
"learning_rate": 3.0227596017069704e-05, |
|
"loss": 0.0837, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.19206145966709348, |
|
"grad_norm": 0.03316282480955124, |
|
"learning_rate": 3.200568990042675e-05, |
|
"loss": 0.0419, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.20273154075970978, |
|
"grad_norm": 0.0206840131431818, |
|
"learning_rate": 3.3783783783783784e-05, |
|
"loss": 0.0621, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.21340162185232608, |
|
"grad_norm": 0.017792997881770134, |
|
"learning_rate": 3.556187766714083e-05, |
|
"loss": 0.1231, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22407170294494239, |
|
"grad_norm": 5.664891242980957, |
|
"learning_rate": 3.7339971550497865e-05, |
|
"loss": 0.1255, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2347417840375587, |
|
"grad_norm": 0.08337491750717163, |
|
"learning_rate": 3.911806543385491e-05, |
|
"loss": 0.1177, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.245411865130175, |
|
"grad_norm": 11.999979019165039, |
|
"learning_rate": 4.089615931721195e-05, |
|
"loss": 0.0796, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.2560819462227913, |
|
"grad_norm": 0.007278278470039368, |
|
"learning_rate": 4.267425320056899e-05, |
|
"loss": 0.0677, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2667520273154076, |
|
"grad_norm": 16.901777267456055, |
|
"learning_rate": 4.4452347083926033e-05, |
|
"loss": 0.2031, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.2774221084080239, |
|
"grad_norm": 8.973575592041016, |
|
"learning_rate": 4.623044096728307e-05, |
|
"loss": 0.0724, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2880921895006402, |
|
"grad_norm": 0.5288171768188477, |
|
"learning_rate": 4.8008534850640114e-05, |
|
"loss": 0.0616, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.2987622705932565, |
|
"grad_norm": 0.03448805212974548, |
|
"learning_rate": 4.978662873399716e-05, |
|
"loss": 0.0951, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3094323516858728, |
|
"grad_norm": 37.160011291503906, |
|
"learning_rate": 4.982611444830857e-05, |
|
"loss": 0.2065, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3201024327784891, |
|
"grad_norm": 0.19236384332180023, |
|
"learning_rate": 4.9628517230477395e-05, |
|
"loss": 0.2069, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3307725138711054, |
|
"grad_norm": 0.1479603499174118, |
|
"learning_rate": 4.9430920012646225e-05, |
|
"loss": 0.1349, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.3414425949637217, |
|
"grad_norm": 0.3465137779712677, |
|
"learning_rate": 4.923332279481505e-05, |
|
"loss": 0.13, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.352112676056338, |
|
"grad_norm": 0.05050384998321533, |
|
"learning_rate": 4.903572557698388e-05, |
|
"loss": 0.137, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3627827571489543, |
|
"grad_norm": 0.16721266508102417, |
|
"learning_rate": 4.883812835915271e-05, |
|
"loss": 0.1692, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.37345283824157066, |
|
"grad_norm": 0.015766268596053123, |
|
"learning_rate": 4.864053114132153e-05, |
|
"loss": 0.1324, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.38412291933418696, |
|
"grad_norm": 0.04455697163939476, |
|
"learning_rate": 4.8442933923490356e-05, |
|
"loss": 0.1173, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.39479300042680326, |
|
"grad_norm": 0.045407216995954514, |
|
"learning_rate": 4.8245336705659186e-05, |
|
"loss": 0.1282, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.40546308151941957, |
|
"grad_norm": 0.014451881870627403, |
|
"learning_rate": 4.804773948782802e-05, |
|
"loss": 0.156, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.41613316261203587, |
|
"grad_norm": 10.56628131866455, |
|
"learning_rate": 4.785014226999684e-05, |
|
"loss": 0.1354, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.42680324370465217, |
|
"grad_norm": 9.949240684509277, |
|
"learning_rate": 4.765254505216567e-05, |
|
"loss": 0.1674, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.43747332479726847, |
|
"grad_norm": 0.8956235647201538, |
|
"learning_rate": 4.7454947834334494e-05, |
|
"loss": 0.0663, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.44814340588988477, |
|
"grad_norm": 11.43295669555664, |
|
"learning_rate": 4.725735061650332e-05, |
|
"loss": 0.106, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4588134869825011, |
|
"grad_norm": 0.10002151876688004, |
|
"learning_rate": 4.7059753398672155e-05, |
|
"loss": 0.0652, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4694835680751174, |
|
"grad_norm": 0.018603714182972908, |
|
"learning_rate": 4.686215618084098e-05, |
|
"loss": 0.0544, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4801536491677337, |
|
"grad_norm": 0.002447799313813448, |
|
"learning_rate": 4.66645589630098e-05, |
|
"loss": 0.0874, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.49082373026035, |
|
"grad_norm": 0.029736997559666634, |
|
"learning_rate": 4.6466961745178625e-05, |
|
"loss": 0.1893, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5014938113529663, |
|
"grad_norm": 10.314343452453613, |
|
"learning_rate": 4.6269364527347456e-05, |
|
"loss": 0.1898, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.5121638924455826, |
|
"grad_norm": 0.6686179041862488, |
|
"learning_rate": 4.6071767309516286e-05, |
|
"loss": 0.0865, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5228339735381989, |
|
"grad_norm": 1.8905715942382812, |
|
"learning_rate": 4.587417009168511e-05, |
|
"loss": 0.1412, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5335040546308152, |
|
"grad_norm": 2.919725179672241, |
|
"learning_rate": 4.567657287385394e-05, |
|
"loss": 0.1305, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5441741357234315, |
|
"grad_norm": 0.24073974788188934, |
|
"learning_rate": 4.547897565602276e-05, |
|
"loss": 0.1203, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5548442168160478, |
|
"grad_norm": 0.010738029144704342, |
|
"learning_rate": 4.5281378438191594e-05, |
|
"loss": 0.0627, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5655142979086641, |
|
"grad_norm": 0.008832808583974838, |
|
"learning_rate": 4.5083781220360424e-05, |
|
"loss": 0.1611, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.5761843790012804, |
|
"grad_norm": 3.6089112758636475, |
|
"learning_rate": 4.488618400252925e-05, |
|
"loss": 0.146, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5868544600938967, |
|
"grad_norm": 0.08832165598869324, |
|
"learning_rate": 4.468858678469807e-05, |
|
"loss": 0.1005, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.597524541186513, |
|
"grad_norm": 0.03827153891324997, |
|
"learning_rate": 4.44909895668669e-05, |
|
"loss": 0.1597, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6081946222791293, |
|
"grad_norm": 0.02119293250143528, |
|
"learning_rate": 4.429339234903573e-05, |
|
"loss": 0.0933, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.6188647033717456, |
|
"grad_norm": 0.004805543925613165, |
|
"learning_rate": 4.4095795131204555e-05, |
|
"loss": 0.0904, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.629534784464362, |
|
"grad_norm": 0.560352623462677, |
|
"learning_rate": 4.3898197913373385e-05, |
|
"loss": 0.1021, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6402048655569782, |
|
"grad_norm": 0.13053584098815918, |
|
"learning_rate": 4.370060069554221e-05, |
|
"loss": 0.1822, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6508749466495946, |
|
"grad_norm": 0.020129365846514702, |
|
"learning_rate": 4.350300347771103e-05, |
|
"loss": 0.1116, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.6615450277422108, |
|
"grad_norm": 0.08490480482578278, |
|
"learning_rate": 4.330540625987986e-05, |
|
"loss": 0.1514, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6722151088348272, |
|
"grad_norm": 17.20114517211914, |
|
"learning_rate": 4.310780904204869e-05, |
|
"loss": 0.1683, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.6828851899274434, |
|
"grad_norm": 20.063180923461914, |
|
"learning_rate": 4.2910211824217516e-05, |
|
"loss": 0.0969, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6935552710200598, |
|
"grad_norm": 10.146587371826172, |
|
"learning_rate": 4.271261460638634e-05, |
|
"loss": 0.1382, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"grad_norm": 0.04244920238852501, |
|
"learning_rate": 4.251501738855517e-05, |
|
"loss": 0.102, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7148954332052924, |
|
"grad_norm": 3.3513338565826416, |
|
"learning_rate": 4.2317420170724e-05, |
|
"loss": 0.0844, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.7255655142979086, |
|
"grad_norm": 9.942954063415527, |
|
"learning_rate": 4.2119822952892824e-05, |
|
"loss": 0.1169, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.736235595390525, |
|
"grad_norm": 0.02176540717482567, |
|
"learning_rate": 4.1922225735061654e-05, |
|
"loss": 0.0546, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7469056764831413, |
|
"grad_norm": 0.02619314193725586, |
|
"learning_rate": 4.172462851723048e-05, |
|
"loss": 0.1489, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7575757575757576, |
|
"grad_norm": 4.979767799377441, |
|
"learning_rate": 4.152703129939931e-05, |
|
"loss": 0.1087, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.7682458386683739, |
|
"grad_norm": 0.22245188057422638, |
|
"learning_rate": 4.132943408156814e-05, |
|
"loss": 0.0505, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7789159197609902, |
|
"grad_norm": 3.7733798027038574, |
|
"learning_rate": 4.113183686373696e-05, |
|
"loss": 0.1281, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.7895860008536065, |
|
"grad_norm": 0.004993762820959091, |
|
"learning_rate": 4.0934239645905786e-05, |
|
"loss": 0.0967, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8002560819462228, |
|
"grad_norm": 6.705662250518799, |
|
"learning_rate": 4.0736642428074616e-05, |
|
"loss": 0.088, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8109261630388391, |
|
"grad_norm": 0.09842755645513535, |
|
"learning_rate": 4.053904521024344e-05, |
|
"loss": 0.09, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8215962441314554, |
|
"grad_norm": 0.006683106068521738, |
|
"learning_rate": 4.034144799241227e-05, |
|
"loss": 0.0329, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.8322663252240717, |
|
"grad_norm": 0.8531517386436462, |
|
"learning_rate": 4.014385077458109e-05, |
|
"loss": 0.1405, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.842936406316688, |
|
"grad_norm": 0.011011715978384018, |
|
"learning_rate": 3.9946253556749924e-05, |
|
"loss": 0.1381, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.8536064874093043, |
|
"grad_norm": 0.20137256383895874, |
|
"learning_rate": 3.974865633891875e-05, |
|
"loss": 0.1315, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8642765685019206, |
|
"grad_norm": 4.466578006744385, |
|
"learning_rate": 3.955105912108758e-05, |
|
"loss": 0.0826, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.8749466495945369, |
|
"grad_norm": 0.007189568132162094, |
|
"learning_rate": 3.935346190325641e-05, |
|
"loss": 0.0696, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.8856167306871532, |
|
"grad_norm": 0.13521578907966614, |
|
"learning_rate": 3.915586468542523e-05, |
|
"loss": 0.0647, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.8962868117797695, |
|
"grad_norm": 0.09942985326051712, |
|
"learning_rate": 3.8958267467594055e-05, |
|
"loss": 0.0337, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9069568928723858, |
|
"grad_norm": 0.003063550451770425, |
|
"learning_rate": 3.8760670249762885e-05, |
|
"loss": 0.1349, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9176269739650021, |
|
"grad_norm": 0.03615418076515198, |
|
"learning_rate": 3.8563073031931715e-05, |
|
"loss": 0.1884, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9282970550576184, |
|
"grad_norm": 2.4063897132873535, |
|
"learning_rate": 3.836547581410054e-05, |
|
"loss": 0.0942, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.9389671361502347, |
|
"grad_norm": 0.10824164003133774, |
|
"learning_rate": 3.816787859626937e-05, |
|
"loss": 0.1643, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.949637217242851, |
|
"grad_norm": 0.23877990245819092, |
|
"learning_rate": 3.797028137843819e-05, |
|
"loss": 0.2041, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.9603072983354674, |
|
"grad_norm": 0.12965470552444458, |
|
"learning_rate": 3.7772684160607016e-05, |
|
"loss": 0.0686, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.9709773794280837, |
|
"grad_norm": 9.515990257263184, |
|
"learning_rate": 3.757508694277585e-05, |
|
"loss": 0.1288, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.9816474605207, |
|
"grad_norm": 0.010902081616222858, |
|
"learning_rate": 3.737748972494468e-05, |
|
"loss": 0.1204, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9923175416133163, |
|
"grad_norm": 0.12074346095323563, |
|
"learning_rate": 3.71798925071135e-05, |
|
"loss": 0.0735, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9942369263607257, |
|
"eval_auc": 0.9994051665223928, |
|
"eval_f1": 0.9942369263607257, |
|
"eval_loss": 0.02018207497894764, |
|
"eval_precision": 0.9910638297872341, |
|
"eval_recall": 0.9974304068522484, |
|
"eval_runtime": 1286.73, |
|
"eval_samples_per_second": 3.641, |
|
"eval_steps_per_second": 0.228, |
|
"step": 2343 |
|
}, |
|
{ |
|
"epoch": 1.0029876227059327, |
|
"grad_norm": 0.13857701420783997, |
|
"learning_rate": 3.6982295289282324e-05, |
|
"loss": 0.1404, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.0136577037985488, |
|
"grad_norm": 16.29566764831543, |
|
"learning_rate": 3.6784698071451154e-05, |
|
"loss": 0.1023, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.0243277848911652, |
|
"grad_norm": 0.007060299627482891, |
|
"learning_rate": 3.6587100853619984e-05, |
|
"loss": 0.0314, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0349978659837815, |
|
"grad_norm": 0.1959122270345688, |
|
"learning_rate": 3.638950363578881e-05, |
|
"loss": 0.0698, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.0456679470763979, |
|
"grad_norm": 2.1727123260498047, |
|
"learning_rate": 3.619190641795764e-05, |
|
"loss": 0.1026, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.056338028169014, |
|
"grad_norm": 0.14948531985282898, |
|
"learning_rate": 3.599430920012646e-05, |
|
"loss": 0.1336, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.0670081092616304, |
|
"grad_norm": 0.0020017814822494984, |
|
"learning_rate": 3.579671198229529e-05, |
|
"loss": 0.0718, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0776781903542467, |
|
"grad_norm": 0.040247637778520584, |
|
"learning_rate": 3.559911476446412e-05, |
|
"loss": 0.1003, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.088348271446863, |
|
"grad_norm": 0.0026400748174637556, |
|
"learning_rate": 3.5401517546632946e-05, |
|
"loss": 0.0103, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.0990183525394792, |
|
"grad_norm": 13.71839714050293, |
|
"learning_rate": 3.520392032880177e-05, |
|
"loss": 0.1549, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.1096884336320956, |
|
"grad_norm": 0.016973601654171944, |
|
"learning_rate": 3.50063231109706e-05, |
|
"loss": 0.0833, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.120358514724712, |
|
"grad_norm": 0.023715652525424957, |
|
"learning_rate": 3.480872589313943e-05, |
|
"loss": 0.1606, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.1310285958173283, |
|
"grad_norm": 0.12024948000907898, |
|
"learning_rate": 3.4611128675308254e-05, |
|
"loss": 0.0659, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.1416986769099444, |
|
"grad_norm": 0.013869931921362877, |
|
"learning_rate": 3.4413531457477084e-05, |
|
"loss": 0.1145, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.1523687580025608, |
|
"grad_norm": 0.003865574486553669, |
|
"learning_rate": 3.421593423964591e-05, |
|
"loss": 0.0742, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.1630388390951771, |
|
"grad_norm": 6.1942524909973145, |
|
"learning_rate": 3.401833702181473e-05, |
|
"loss": 0.1685, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.1737089201877935, |
|
"grad_norm": 13.037029266357422, |
|
"learning_rate": 3.382073980398356e-05, |
|
"loss": 0.0882, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.1843790012804098, |
|
"grad_norm": 0.0354326069355011, |
|
"learning_rate": 3.362314258615239e-05, |
|
"loss": 0.1236, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.195049082373026, |
|
"grad_norm": 0.18212293088436127, |
|
"learning_rate": 3.3425545368321215e-05, |
|
"loss": 0.0612, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.2057191634656423, |
|
"grad_norm": 0.019807470962405205, |
|
"learning_rate": 3.322794815049004e-05, |
|
"loss": 0.0237, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.2163892445582587, |
|
"grad_norm": 0.48490288853645325, |
|
"learning_rate": 3.303035093265887e-05, |
|
"loss": 0.1864, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.2270593256508748, |
|
"grad_norm": 15.739009857177734, |
|
"learning_rate": 3.28327537148277e-05, |
|
"loss": 0.0783, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.2377294067434912, |
|
"grad_norm": 0.00520035345107317, |
|
"learning_rate": 3.263515649699652e-05, |
|
"loss": 0.056, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.2483994878361075, |
|
"grad_norm": 0.007584866136312485, |
|
"learning_rate": 3.243755927916535e-05, |
|
"loss": 0.1054, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.259069568928724, |
|
"grad_norm": 0.009660612791776657, |
|
"learning_rate": 3.2239962061334176e-05, |
|
"loss": 0.1001, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.2697396500213403, |
|
"grad_norm": 5.9183735847473145, |
|
"learning_rate": 3.2042364843503e-05, |
|
"loss": 0.0959, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.2804097311139564, |
|
"grad_norm": 0.017659351229667664, |
|
"learning_rate": 3.184476762567184e-05, |
|
"loss": 0.0771, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2910798122065728, |
|
"grad_norm": 0.05887264013290405, |
|
"learning_rate": 3.164717040784066e-05, |
|
"loss": 0.0527, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.301749893299189, |
|
"grad_norm": 47.84782028198242, |
|
"learning_rate": 3.1449573190009484e-05, |
|
"loss": 0.0658, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.3124199743918052, |
|
"grad_norm": 0.046731848269701004, |
|
"learning_rate": 3.1251975972178314e-05, |
|
"loss": 0.0446, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.3230900554844216, |
|
"grad_norm": 23.92389678955078, |
|
"learning_rate": 3.105437875434714e-05, |
|
"loss": 0.1645, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.333760136577038, |
|
"grad_norm": 0.23928241431713104, |
|
"learning_rate": 3.085678153651597e-05, |
|
"loss": 0.0808, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.3444302176696543, |
|
"grad_norm": 0.004650407936424017, |
|
"learning_rate": 3.06591843186848e-05, |
|
"loss": 0.0788, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.3551002987622707, |
|
"grad_norm": 2.407900333404541, |
|
"learning_rate": 3.0461587100853622e-05, |
|
"loss": 0.0663, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.365770379854887, |
|
"grad_norm": 0.024215010926127434, |
|
"learning_rate": 3.026398988302245e-05, |
|
"loss": 0.0836, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.3764404609475032, |
|
"grad_norm": 8.639642715454102, |
|
"learning_rate": 3.0066392665191273e-05, |
|
"loss": 0.0826, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.3871105420401195, |
|
"grad_norm": 0.4214279353618622, |
|
"learning_rate": 2.9868795447360103e-05, |
|
"loss": 0.0407, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.3977806231327359, |
|
"grad_norm": 3.026334285736084, |
|
"learning_rate": 2.967119822952893e-05, |
|
"loss": 0.0902, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"grad_norm": 5.5475568771362305, |
|
"learning_rate": 2.9473601011697753e-05, |
|
"loss": 0.0213, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.4191207853179684, |
|
"grad_norm": 0.4343937337398529, |
|
"learning_rate": 2.9276003793866587e-05, |
|
"loss": 0.0485, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.4297908664105847, |
|
"grad_norm": 4.497000217437744, |
|
"learning_rate": 2.907840657603541e-05, |
|
"loss": 0.0254, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.440460947503201, |
|
"grad_norm": 9.44819164276123, |
|
"learning_rate": 2.8880809358204237e-05, |
|
"loss": 0.1441, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.4511310285958174, |
|
"grad_norm": 0.5778021812438965, |
|
"learning_rate": 2.8683212140373068e-05, |
|
"loss": 0.0601, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.4618011096884336, |
|
"grad_norm": 0.0012221585493534803, |
|
"learning_rate": 2.848561492254189e-05, |
|
"loss": 0.0134, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.47247119078105, |
|
"grad_norm": 0.003332477994263172, |
|
"learning_rate": 2.8288017704710718e-05, |
|
"loss": 0.0872, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.4831412718736663, |
|
"grad_norm": 19.613975524902344, |
|
"learning_rate": 2.809042048687955e-05, |
|
"loss": 0.0975, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.4938113529662824, |
|
"grad_norm": 0.0008805744582787156, |
|
"learning_rate": 2.7892823269048375e-05, |
|
"loss": 0.0348, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5044814340588988, |
|
"grad_norm": 0.745171070098877, |
|
"learning_rate": 2.76952260512172e-05, |
|
"loss": 0.0709, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"grad_norm": 0.0018973586848005652, |
|
"learning_rate": 2.749762883338603e-05, |
|
"loss": 0.1068, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.5258215962441315, |
|
"grad_norm": 7.711709022521973, |
|
"learning_rate": 2.7300031615554856e-05, |
|
"loss": 0.1518, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.5364916773367479, |
|
"grad_norm": 0.08238033205270767, |
|
"learning_rate": 2.710243439772368e-05, |
|
"loss": 0.0806, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.5471617584293642, |
|
"grad_norm": 0.00853784941136837, |
|
"learning_rate": 2.6904837179892507e-05, |
|
"loss": 0.0889, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.5578318395219803, |
|
"grad_norm": 0.06024482846260071, |
|
"learning_rate": 2.6707239962061337e-05, |
|
"loss": 0.0724, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.5685019206145967, |
|
"grad_norm": 0.06635627895593643, |
|
"learning_rate": 2.650964274423016e-05, |
|
"loss": 0.0477, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.5791720017072128, |
|
"grad_norm": 0.057497624307870865, |
|
"learning_rate": 2.6312045526398987e-05, |
|
"loss": 0.0362, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.5898420827998292, |
|
"grad_norm": 5.420880317687988, |
|
"learning_rate": 2.6114448308567818e-05, |
|
"loss": 0.0996, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.6005121638924455, |
|
"grad_norm": 0.03688732162117958, |
|
"learning_rate": 2.5916851090736644e-05, |
|
"loss": 0.1065, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.611182244985062, |
|
"grad_norm": 0.01889336109161377, |
|
"learning_rate": 2.5719253872905468e-05, |
|
"loss": 0.0675, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.6218523260776783, |
|
"grad_norm": 0.041272666305303574, |
|
"learning_rate": 2.5521656655074298e-05, |
|
"loss": 0.0783, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.6325224071702946, |
|
"grad_norm": 0.0283421128988266, |
|
"learning_rate": 2.5324059437243125e-05, |
|
"loss": 0.0535, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.6431924882629108, |
|
"grad_norm": 0.03971688076853752, |
|
"learning_rate": 2.512646221941195e-05, |
|
"loss": 0.0916, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.6538625693555271, |
|
"grad_norm": 0.7453581690788269, |
|
"learning_rate": 2.492886500158078e-05, |
|
"loss": 0.0275, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.6645326504481432, |
|
"grad_norm": 0.0038267953786998987, |
|
"learning_rate": 2.4731267783749606e-05, |
|
"loss": 0.0636, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.6752027315407596, |
|
"grad_norm": 17.677406311035156, |
|
"learning_rate": 2.4533670565918433e-05, |
|
"loss": 0.0811, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.685872812633376, |
|
"grad_norm": 0.056949540972709656, |
|
"learning_rate": 2.433607334808726e-05, |
|
"loss": 0.1143, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.6965428937259923, |
|
"grad_norm": 0.06619152426719666, |
|
"learning_rate": 2.4138476130256087e-05, |
|
"loss": 0.0825, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.7072129748186087, |
|
"grad_norm": 29.96984100341797, |
|
"learning_rate": 2.3940878912424914e-05, |
|
"loss": 0.1266, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.717883055911225, |
|
"grad_norm": 7.958868026733398, |
|
"learning_rate": 2.374328169459374e-05, |
|
"loss": 0.0979, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.7285531370038414, |
|
"grad_norm": 0.0707533210515976, |
|
"learning_rate": 2.354568447676257e-05, |
|
"loss": 0.1238, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.7392232180964575, |
|
"grad_norm": 5.3426408767700195, |
|
"learning_rate": 2.3348087258931394e-05, |
|
"loss": 0.0941, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.7498932991890739, |
|
"grad_norm": 0.8626427054405212, |
|
"learning_rate": 2.315049004110022e-05, |
|
"loss": 0.037, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.76056338028169, |
|
"grad_norm": 0.007791485637426376, |
|
"learning_rate": 2.2952892823269048e-05, |
|
"loss": 0.039, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.7712334613743064, |
|
"grad_norm": 0.0019313797820359468, |
|
"learning_rate": 2.2755295605437875e-05, |
|
"loss": 0.0719, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.7819035424669227, |
|
"grad_norm": 0.058322928845882416, |
|
"learning_rate": 2.2557698387606705e-05, |
|
"loss": 0.0242, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.792573623559539, |
|
"grad_norm": 6.136843204498291, |
|
"learning_rate": 2.236010116977553e-05, |
|
"loss": 0.1471, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.8032437046521554, |
|
"grad_norm": 16.353532791137695, |
|
"learning_rate": 2.216250395194436e-05, |
|
"loss": 0.034, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.8139137857447718, |
|
"grad_norm": 18.50251007080078, |
|
"learning_rate": 2.1964906734113186e-05, |
|
"loss": 0.0398, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.824583866837388, |
|
"grad_norm": 0.3170296251773834, |
|
"learning_rate": 2.176730951628201e-05, |
|
"loss": 0.0841, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.8352539479300043, |
|
"grad_norm": 0.0018119533779099584, |
|
"learning_rate": 2.156971229845084e-05, |
|
"loss": 0.0634, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.8459240290226204, |
|
"grad_norm": 0.035546980798244476, |
|
"learning_rate": 2.1372115080619663e-05, |
|
"loss": 0.1411, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.8565941101152368, |
|
"grad_norm": 0.5771517753601074, |
|
"learning_rate": 2.1174517862788494e-05, |
|
"loss": 0.0917, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.8672641912078531, |
|
"grad_norm": 0.013703244738280773, |
|
"learning_rate": 2.097692064495732e-05, |
|
"loss": 0.0859, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.8779342723004695, |
|
"grad_norm": 20.912160873413086, |
|
"learning_rate": 2.0779323427126148e-05, |
|
"loss": 0.0813, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.8886043533930859, |
|
"grad_norm": 0.2195858508348465, |
|
"learning_rate": 2.0581726209294974e-05, |
|
"loss": 0.1836, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.8992744344857022, |
|
"grad_norm": 8.048083305358887, |
|
"learning_rate": 2.03841289914638e-05, |
|
"loss": 0.0874, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.9099445155783183, |
|
"grad_norm": 0.010916252620518208, |
|
"learning_rate": 2.0186531773632628e-05, |
|
"loss": 0.0122, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.9206145966709347, |
|
"grad_norm": 29.587345123291016, |
|
"learning_rate": 1.9988934555801455e-05, |
|
"loss": 0.0956, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.9312846777635508, |
|
"grad_norm": 24.480087280273438, |
|
"learning_rate": 1.9791337337970282e-05, |
|
"loss": 0.0754, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.9419547588561672, |
|
"grad_norm": 15.161417961120605, |
|
"learning_rate": 1.959374012013911e-05, |
|
"loss": 0.0414, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.9526248399487836, |
|
"grad_norm": 0.2126699984073639, |
|
"learning_rate": 1.9396142902307936e-05, |
|
"loss": 0.0727, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.9632949210414, |
|
"grad_norm": 9.748570442199707, |
|
"learning_rate": 1.9198545684476763e-05, |
|
"loss": 0.0911, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.9739650021340163, |
|
"grad_norm": 0.005681836046278477, |
|
"learning_rate": 1.900094846664559e-05, |
|
"loss": 0.0652, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.9846350832266326, |
|
"grad_norm": 0.010802343487739563, |
|
"learning_rate": 1.880335124881442e-05, |
|
"loss": 0.0306, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.995305164319249, |
|
"grad_norm": 0.0069721778854727745, |
|
"learning_rate": 1.8605754030983244e-05, |
|
"loss": 0.0705, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9955176093916756, |
|
"eval_auc": 0.9994811608729328, |
|
"eval_f1": 0.9955137791070284, |
|
"eval_loss": 0.01761646941304207, |
|
"eval_precision": 0.9931798806479113, |
|
"eval_recall": 0.9978586723768736, |
|
"eval_runtime": 1381.3155, |
|
"eval_samples_per_second": 3.392, |
|
"eval_steps_per_second": 0.212, |
|
"step": 4686 |
|
}, |
|
{ |
|
"epoch": 2.0059752454118653, |
|
"grad_norm": 0.010495180264115334, |
|
"learning_rate": 1.840815681315207e-05, |
|
"loss": 0.0779, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.0166453265044813, |
|
"grad_norm": 1.1236854791641235, |
|
"learning_rate": 1.8210559595320897e-05, |
|
"loss": 0.07, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.0273154075970976, |
|
"grad_norm": 0.09067221730947495, |
|
"learning_rate": 1.8012962377489724e-05, |
|
"loss": 0.0346, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.037985488689714, |
|
"grad_norm": 0.005663533229380846, |
|
"learning_rate": 1.7815365159658555e-05, |
|
"loss": 0.0374, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.0486555697823303, |
|
"grad_norm": 0.004197725094854832, |
|
"learning_rate": 1.7617767941827378e-05, |
|
"loss": 0.0368, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.0593256508749467, |
|
"grad_norm": 44.16444778442383, |
|
"learning_rate": 1.742017072399621e-05, |
|
"loss": 0.0744, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.069995731967563, |
|
"grad_norm": 0.021498629823327065, |
|
"learning_rate": 1.7222573506165035e-05, |
|
"loss": 0.1031, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.0806658130601794, |
|
"grad_norm": 0.057749390602111816, |
|
"learning_rate": 1.702497628833386e-05, |
|
"loss": 0.0597, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.0913358941527957, |
|
"grad_norm": 0.15275585651397705, |
|
"learning_rate": 1.682737907050269e-05, |
|
"loss": 0.0305, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.1020059752454117, |
|
"grad_norm": 0.13239429891109467, |
|
"learning_rate": 1.6629781852671516e-05, |
|
"loss": 0.0338, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"grad_norm": 9.17479419708252, |
|
"learning_rate": 1.6432184634840343e-05, |
|
"loss": 0.0517, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.1233461374306444, |
|
"grad_norm": 0.002612267853692174, |
|
"learning_rate": 1.623458741700917e-05, |
|
"loss": 0.0472, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.1340162185232607, |
|
"grad_norm": 6.946547031402588, |
|
"learning_rate": 1.6036990199177997e-05, |
|
"loss": 0.0854, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.144686299615877, |
|
"grad_norm": 0.0008439666125923395, |
|
"learning_rate": 1.5839392981346824e-05, |
|
"loss": 0.0394, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.1553563807084934, |
|
"grad_norm": 0.03294059634208679, |
|
"learning_rate": 1.564179576351565e-05, |
|
"loss": 0.0745, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.16602646180111, |
|
"grad_norm": 5.746143817901611, |
|
"learning_rate": 1.5444198545684478e-05, |
|
"loss": 0.0595, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.176696542893726, |
|
"grad_norm": 0.020633000880479813, |
|
"learning_rate": 1.5246601327853304e-05, |
|
"loss": 0.0027, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.1873666239863425, |
|
"grad_norm": 0.08342266827821732, |
|
"learning_rate": 1.5049004110022133e-05, |
|
"loss": 0.0944, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.1980367050789584, |
|
"grad_norm": 6.828884124755859, |
|
"learning_rate": 1.4851406892190958e-05, |
|
"loss": 0.0776, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.208706786171575, |
|
"grad_norm": 2.5025949478149414, |
|
"learning_rate": 1.4653809674359787e-05, |
|
"loss": 0.0812, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 2.219376867264191, |
|
"grad_norm": 0.010205933824181557, |
|
"learning_rate": 1.4456212456528612e-05, |
|
"loss": 0.0416, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.2300469483568075, |
|
"grad_norm": 0.014995824545621872, |
|
"learning_rate": 1.4258615238697439e-05, |
|
"loss": 0.0476, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 2.240717029449424, |
|
"grad_norm": 0.011430823244154453, |
|
"learning_rate": 1.4061018020866268e-05, |
|
"loss": 0.0688, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.25138711054204, |
|
"grad_norm": 0.024417445063591003, |
|
"learning_rate": 1.3863420803035093e-05, |
|
"loss": 0.0445, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 2.2620571916346566, |
|
"grad_norm": 0.04339329153299332, |
|
"learning_rate": 1.3665823585203921e-05, |
|
"loss": 0.0443, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 0.006649048998951912, |
|
"learning_rate": 1.346822636737275e-05, |
|
"loss": 0.076, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 2.283397353819889, |
|
"grad_norm": 11.57873821258545, |
|
"learning_rate": 1.3270629149541575e-05, |
|
"loss": 0.0266, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.294067434912505, |
|
"grad_norm": 0.12145959585905075, |
|
"learning_rate": 1.3073031931710402e-05, |
|
"loss": 0.0953, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.3047375160051216, |
|
"grad_norm": 0.03219663351774216, |
|
"learning_rate": 1.2875434713879227e-05, |
|
"loss": 0.0797, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.315407597097738, |
|
"grad_norm": 0.011474654078483582, |
|
"learning_rate": 1.2677837496048056e-05, |
|
"loss": 0.0274, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 2.3260776781903543, |
|
"grad_norm": 0.001972693484276533, |
|
"learning_rate": 1.2480240278216883e-05, |
|
"loss": 0.0563, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.3367477592829706, |
|
"grad_norm": 0.9134419560432434, |
|
"learning_rate": 1.2282643060385712e-05, |
|
"loss": 0.0292, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 2.347417840375587, |
|
"grad_norm": 0.002546141389757395, |
|
"learning_rate": 1.2085045842554538e-05, |
|
"loss": 0.0428, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.3580879214682033, |
|
"grad_norm": 0.0010626994771882892, |
|
"learning_rate": 1.1887448624723364e-05, |
|
"loss": 0.076, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.3687580025608197, |
|
"grad_norm": 0.0040688286535441875, |
|
"learning_rate": 1.168985140689219e-05, |
|
"loss": 0.0782, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.3794280836534356, |
|
"grad_norm": 0.02786325477063656, |
|
"learning_rate": 1.149225418906102e-05, |
|
"loss": 0.0123, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.390098164746052, |
|
"grad_norm": 0.4415593147277832, |
|
"learning_rate": 1.1294656971229846e-05, |
|
"loss": 0.0589, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.4007682458386683, |
|
"grad_norm": 0.0095005938783288, |
|
"learning_rate": 1.1097059753398673e-05, |
|
"loss": 0.1002, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.4114383269312847, |
|
"grad_norm": 0.11727124452590942, |
|
"learning_rate": 1.08994625355675e-05, |
|
"loss": 0.0324, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.422108408023901, |
|
"grad_norm": 3.638735294342041, |
|
"learning_rate": 1.0701865317736327e-05, |
|
"loss": 0.1456, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.4327784891165174, |
|
"grad_norm": 0.025659436360001564, |
|
"learning_rate": 1.0504268099905154e-05, |
|
"loss": 0.0323, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.4434485702091338, |
|
"grad_norm": 0.000763273739721626, |
|
"learning_rate": 1.030667088207398e-05, |
|
"loss": 0.019, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.4541186513017497, |
|
"grad_norm": 13.73189926147461, |
|
"learning_rate": 1.0109073664242808e-05, |
|
"loss": 0.0804, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.464788732394366, |
|
"grad_norm": 13.580681800842285, |
|
"learning_rate": 9.911476446411636e-06, |
|
"loss": 0.0628, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.4754588134869824, |
|
"grad_norm": 0.0029777430463582277, |
|
"learning_rate": 9.713879228580461e-06, |
|
"loss": 0.0807, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.4861288945795987, |
|
"grad_norm": 0.003513498930260539, |
|
"learning_rate": 9.516282010749288e-06, |
|
"loss": 0.0826, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 2.496798975672215, |
|
"grad_norm": 0.009089035913348198, |
|
"learning_rate": 9.318684792918115e-06, |
|
"loss": 0.0406, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.5074690567648314, |
|
"grad_norm": 0.0068649169988930225, |
|
"learning_rate": 9.121087575086944e-06, |
|
"loss": 0.1065, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.518139137857448, |
|
"grad_norm": 20.251689910888672, |
|
"learning_rate": 8.92349035725577e-06, |
|
"loss": 0.1227, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.528809218950064, |
|
"grad_norm": 0.1939096450805664, |
|
"learning_rate": 8.725893139424598e-06, |
|
"loss": 0.0343, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.5394793000426805, |
|
"grad_norm": 0.6377553939819336, |
|
"learning_rate": 8.528295921593425e-06, |
|
"loss": 0.0247, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.550149381135297, |
|
"grad_norm": 1.8269633054733276, |
|
"learning_rate": 8.330698703762251e-06, |
|
"loss": 0.057, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.560819462227913, |
|
"grad_norm": 0.004431420471519232, |
|
"learning_rate": 8.133101485931078e-06, |
|
"loss": 0.0184, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.571489543320529, |
|
"grad_norm": 0.0886378139257431, |
|
"learning_rate": 7.935504268099905e-06, |
|
"loss": 0.1559, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.5821596244131455, |
|
"grad_norm": 0.011996462009847164, |
|
"learning_rate": 7.737907050268732e-06, |
|
"loss": 0.1207, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.592829705505762, |
|
"grad_norm": 0.025645237416028976, |
|
"learning_rate": 7.54030983243756e-06, |
|
"loss": 0.0327, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.603499786598378, |
|
"grad_norm": 0.05047876015305519, |
|
"learning_rate": 7.342712614606387e-06, |
|
"loss": 0.07, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.6141698676909946, |
|
"grad_norm": 0.24467694759368896, |
|
"learning_rate": 7.145115396775214e-06, |
|
"loss": 0.0104, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.6248399487836105, |
|
"grad_norm": 0.07860807329416275, |
|
"learning_rate": 6.94751817894404e-06, |
|
"loss": 0.073, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.635510029876227, |
|
"grad_norm": 0.03573083132505417, |
|
"learning_rate": 6.7499209611128685e-06, |
|
"loss": 0.0616, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.646180110968843, |
|
"grad_norm": 1.0572718381881714, |
|
"learning_rate": 6.552323743281695e-06, |
|
"loss": 0.0872, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.6568501920614596, |
|
"grad_norm": 0.009367382153868675, |
|
"learning_rate": 6.3547265254505215e-06, |
|
"loss": 0.0388, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.667520273154076, |
|
"grad_norm": 0.02948431856930256, |
|
"learning_rate": 6.157129307619349e-06, |
|
"loss": 0.0831, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.6781903542466923, |
|
"grad_norm": 0.0797591432929039, |
|
"learning_rate": 5.959532089788176e-06, |
|
"loss": 0.049, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.6888604353393086, |
|
"grad_norm": 0.034841641783714294, |
|
"learning_rate": 5.761934871957003e-06, |
|
"loss": 0.0055, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.699530516431925, |
|
"grad_norm": 0.07419008761644363, |
|
"learning_rate": 5.56433765412583e-06, |
|
"loss": 0.1128, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.7102005975245413, |
|
"grad_norm": 0.0030936244875192642, |
|
"learning_rate": 5.366740436294658e-06, |
|
"loss": 0.0564, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.7208706786171577, |
|
"grad_norm": 0.003594167297706008, |
|
"learning_rate": 5.169143218463484e-06, |
|
"loss": 0.0928, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.731540759709774, |
|
"grad_norm": 0.03685923293232918, |
|
"learning_rate": 4.9715460006323115e-06, |
|
"loss": 0.0502, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.74221084080239, |
|
"grad_norm": 0.019324608147144318, |
|
"learning_rate": 4.7739487828011385e-06, |
|
"loss": 0.0204, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.7528809218950063, |
|
"grad_norm": 22.45241355895996, |
|
"learning_rate": 4.576351564969965e-06, |
|
"loss": 0.0867, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.7635510029876227, |
|
"grad_norm": 0.14366178214550018, |
|
"learning_rate": 4.378754347138792e-06, |
|
"loss": 0.0366, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 2.774221084080239, |
|
"grad_norm": 1.7778751850128174, |
|
"learning_rate": 4.181157129307619e-06, |
|
"loss": 0.097, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.7848911651728554, |
|
"grad_norm": 4.860283851623535, |
|
"learning_rate": 3.983559911476446e-06, |
|
"loss": 0.0884, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 2.7955612462654718, |
|
"grad_norm": 0.00165728444699198, |
|
"learning_rate": 3.785962693645274e-06, |
|
"loss": 0.0195, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.8062313273580877, |
|
"grad_norm": 0.03730342909693718, |
|
"learning_rate": 3.5883654758141003e-06, |
|
"loss": 0.0412, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"grad_norm": 0.0034714387729763985, |
|
"learning_rate": 3.3907682579829277e-06, |
|
"loss": 0.0714, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.8275714895433204, |
|
"grad_norm": 0.06972959637641907, |
|
"learning_rate": 3.1931710401517546e-06, |
|
"loss": 0.0082, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.8382415706359367, |
|
"grad_norm": 0.022793615236878395, |
|
"learning_rate": 2.995573822320582e-06, |
|
"loss": 0.0705, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.848911651728553, |
|
"grad_norm": 0.05906020104885101, |
|
"learning_rate": 2.797976604489409e-06, |
|
"loss": 0.0435, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.8595817328211695, |
|
"grad_norm": 0.012936658225953579, |
|
"learning_rate": 2.600379386658236e-06, |
|
"loss": 0.0852, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.870251813913786, |
|
"grad_norm": 0.004626471549272537, |
|
"learning_rate": 2.402782168827063e-06, |
|
"loss": 0.03, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 2.880921895006402, |
|
"grad_norm": 0.08372853696346283, |
|
"learning_rate": 2.20518495099589e-06, |
|
"loss": 0.0152, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.8915919760990185, |
|
"grad_norm": 1.3282454013824463, |
|
"learning_rate": 2.0075877331647173e-06, |
|
"loss": 0.0816, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 2.902262057191635, |
|
"grad_norm": 3.8434557914733887, |
|
"learning_rate": 1.8099905153335442e-06, |
|
"loss": 0.05, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.9129321382842512, |
|
"grad_norm": 0.008098805323243141, |
|
"learning_rate": 1.6123932975023712e-06, |
|
"loss": 0.0347, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 2.923602219376867, |
|
"grad_norm": 0.0022435523569583893, |
|
"learning_rate": 1.4147960796711983e-06, |
|
"loss": 0.0214, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.9342723004694835, |
|
"grad_norm": 7.002129554748535, |
|
"learning_rate": 1.2171988618400254e-06, |
|
"loss": 0.0121, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 2.9449423815621, |
|
"grad_norm": 0.11791983246803284, |
|
"learning_rate": 1.0196016440088523e-06, |
|
"loss": 0.1015, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.955612462654716, |
|
"grad_norm": 0.0031008380465209484, |
|
"learning_rate": 8.220044261776794e-07, |
|
"loss": 0.0016, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 2.9662825437473326, |
|
"grad_norm": 0.00397999444976449, |
|
"learning_rate": 6.244072083465065e-07, |
|
"loss": 0.0238, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.976952624839949, |
|
"grad_norm": 0.14921312034130096, |
|
"learning_rate": 4.2680999051533353e-07, |
|
"loss": 0.0147, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 2.987622705932565, |
|
"grad_norm": 0.005675365682691336, |
|
"learning_rate": 2.2921277268416063e-07, |
|
"loss": 0.0175, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.998292787025181, |
|
"grad_norm": 0.009994860738515854, |
|
"learning_rate": 3.1615554852987675e-08, |
|
"loss": 0.0694, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9961579509071505, |
|
"eval_auc": 0.9994886327395326, |
|
"eval_f1": 0.9961538461538462, |
|
"eval_loss": 0.0158307533711195, |
|
"eval_precision": 0.9940298507462687, |
|
"eval_recall": 0.9982869379014989, |
|
"eval_runtime": 1329.756, |
|
"eval_samples_per_second": 3.523, |
|
"eval_steps_per_second": 0.22, |
|
"step": 7029 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 7029, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.355902228225831e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|