{ "best_metric": 0.0158307533711195, "best_model_checkpoint": "autotrain-ltphq-1aue6/checkpoint-7029", "epoch": 3.0, "eval_steps": 500, "global_step": 7029, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010670081092616303, "grad_norm": 10.576315879821777, "learning_rate": 1.7780938833570414e-06, "loss": 0.5983, "step": 25 }, { "epoch": 0.021340162185232606, "grad_norm": 6.441564559936523, "learning_rate": 3.556187766714083e-06, "loss": 0.5878, "step": 50 }, { "epoch": 0.03201024327784891, "grad_norm": 5.742478370666504, "learning_rate": 5.334281650071124e-06, "loss": 0.4551, "step": 75 }, { "epoch": 0.04268032437046521, "grad_norm": 3.9501702785491943, "learning_rate": 7.112375533428166e-06, "loss": 0.3351, "step": 100 }, { "epoch": 0.05335040546308152, "grad_norm": 3.156757354736328, "learning_rate": 8.890469416785207e-06, "loss": 0.2165, "step": 125 }, { "epoch": 0.06402048655569782, "grad_norm": 1.531072735786438, "learning_rate": 1.0668563300142247e-05, "loss": 0.1089, "step": 150 }, { "epoch": 0.07469056764831412, "grad_norm": 0.6878227591514587, "learning_rate": 1.244665718349929e-05, "loss": 0.1164, "step": 175 }, { "epoch": 0.08536064874093043, "grad_norm": 0.3712044358253479, "learning_rate": 1.4224751066856332e-05, "loss": 0.0563, "step": 200 }, { "epoch": 0.09603072983354674, "grad_norm": 5.825244903564453, "learning_rate": 1.6002844950213374e-05, "loss": 0.0577, "step": 225 }, { "epoch": 0.10670081092616304, "grad_norm": 7.699280261993408, "learning_rate": 1.7780938833570414e-05, "loss": 0.0995, "step": 250 }, { "epoch": 0.11737089201877934, "grad_norm": 1.6869832277297974, "learning_rate": 1.9559032716927454e-05, "loss": 0.0223, "step": 275 }, { "epoch": 0.12804097311139565, "grad_norm": 0.5479092597961426, "learning_rate": 2.1337126600284495e-05, "loss": 0.0396, "step": 300 }, { "epoch": 0.13871105420401195, "grad_norm": 17.6248836517334, "learning_rate": 2.3115220483641535e-05, "loss": 0.0629, "step": 325 }, { "epoch": 0.14938113529662825, "grad_norm": 1.2864736318588257, "learning_rate": 2.489331436699858e-05, "loss": 0.0815, "step": 350 }, { "epoch": 0.16005121638924455, "grad_norm": 14.023919105529785, "learning_rate": 2.6671408250355616e-05, "loss": 0.1471, "step": 375 }, { "epoch": 0.17072129748186085, "grad_norm": 0.7266956567764282, "learning_rate": 2.8449502133712663e-05, "loss": 0.1421, "step": 400 }, { "epoch": 0.18139137857447715, "grad_norm": 1.591202735900879, "learning_rate": 3.0227596017069704e-05, "loss": 0.0837, "step": 425 }, { "epoch": 0.19206145966709348, "grad_norm": 0.03316282480955124, "learning_rate": 3.200568990042675e-05, "loss": 0.0419, "step": 450 }, { "epoch": 0.20273154075970978, "grad_norm": 0.0206840131431818, "learning_rate": 3.3783783783783784e-05, "loss": 0.0621, "step": 475 }, { "epoch": 0.21340162185232608, "grad_norm": 0.017792997881770134, "learning_rate": 3.556187766714083e-05, "loss": 0.1231, "step": 500 }, { "epoch": 0.22407170294494239, "grad_norm": 5.664891242980957, "learning_rate": 3.7339971550497865e-05, "loss": 0.1255, "step": 525 }, { "epoch": 0.2347417840375587, "grad_norm": 0.08337491750717163, "learning_rate": 3.911806543385491e-05, "loss": 0.1177, "step": 550 }, { "epoch": 0.245411865130175, "grad_norm": 11.999979019165039, "learning_rate": 4.089615931721195e-05, "loss": 0.0796, "step": 575 }, { "epoch": 0.2560819462227913, "grad_norm": 0.007278278470039368, "learning_rate": 4.267425320056899e-05, "loss": 0.0677, "step": 600 }, { "epoch": 0.2667520273154076, "grad_norm": 16.901777267456055, "learning_rate": 4.4452347083926033e-05, "loss": 0.2031, "step": 625 }, { "epoch": 0.2774221084080239, "grad_norm": 8.973575592041016, "learning_rate": 4.623044096728307e-05, "loss": 0.0724, "step": 650 }, { "epoch": 0.2880921895006402, "grad_norm": 0.5288171768188477, "learning_rate": 4.8008534850640114e-05, "loss": 0.0616, "step": 675 }, { "epoch": 0.2987622705932565, "grad_norm": 0.03448805212974548, "learning_rate": 4.978662873399716e-05, "loss": 0.0951, "step": 700 }, { "epoch": 0.3094323516858728, "grad_norm": 37.160011291503906, "learning_rate": 4.982611444830857e-05, "loss": 0.2065, "step": 725 }, { "epoch": 0.3201024327784891, "grad_norm": 0.19236384332180023, "learning_rate": 4.9628517230477395e-05, "loss": 0.2069, "step": 750 }, { "epoch": 0.3307725138711054, "grad_norm": 0.1479603499174118, "learning_rate": 4.9430920012646225e-05, "loss": 0.1349, "step": 775 }, { "epoch": 0.3414425949637217, "grad_norm": 0.3465137779712677, "learning_rate": 4.923332279481505e-05, "loss": 0.13, "step": 800 }, { "epoch": 0.352112676056338, "grad_norm": 0.05050384998321533, "learning_rate": 4.903572557698388e-05, "loss": 0.137, "step": 825 }, { "epoch": 0.3627827571489543, "grad_norm": 0.16721266508102417, "learning_rate": 4.883812835915271e-05, "loss": 0.1692, "step": 850 }, { "epoch": 0.37345283824157066, "grad_norm": 0.015766268596053123, "learning_rate": 4.864053114132153e-05, "loss": 0.1324, "step": 875 }, { "epoch": 0.38412291933418696, "grad_norm": 0.04455697163939476, "learning_rate": 4.8442933923490356e-05, "loss": 0.1173, "step": 900 }, { "epoch": 0.39479300042680326, "grad_norm": 0.045407216995954514, "learning_rate": 4.8245336705659186e-05, "loss": 0.1282, "step": 925 }, { "epoch": 0.40546308151941957, "grad_norm": 0.014451881870627403, "learning_rate": 4.804773948782802e-05, "loss": 0.156, "step": 950 }, { "epoch": 0.41613316261203587, "grad_norm": 10.56628131866455, "learning_rate": 4.785014226999684e-05, "loss": 0.1354, "step": 975 }, { "epoch": 0.42680324370465217, "grad_norm": 9.949240684509277, "learning_rate": 4.765254505216567e-05, "loss": 0.1674, "step": 1000 }, { "epoch": 0.43747332479726847, "grad_norm": 0.8956235647201538, "learning_rate": 4.7454947834334494e-05, "loss": 0.0663, "step": 1025 }, { "epoch": 0.44814340588988477, "grad_norm": 11.43295669555664, "learning_rate": 4.725735061650332e-05, "loss": 0.106, "step": 1050 }, { "epoch": 0.4588134869825011, "grad_norm": 0.10002151876688004, "learning_rate": 4.7059753398672155e-05, "loss": 0.0652, "step": 1075 }, { "epoch": 0.4694835680751174, "grad_norm": 0.018603714182972908, "learning_rate": 4.686215618084098e-05, "loss": 0.0544, "step": 1100 }, { "epoch": 0.4801536491677337, "grad_norm": 0.002447799313813448, "learning_rate": 4.66645589630098e-05, "loss": 0.0874, "step": 1125 }, { "epoch": 0.49082373026035, "grad_norm": 0.029736997559666634, "learning_rate": 4.6466961745178625e-05, "loss": 0.1893, "step": 1150 }, { "epoch": 0.5014938113529663, "grad_norm": 10.314343452453613, "learning_rate": 4.6269364527347456e-05, "loss": 0.1898, "step": 1175 }, { "epoch": 0.5121638924455826, "grad_norm": 0.6686179041862488, "learning_rate": 4.6071767309516286e-05, "loss": 0.0865, "step": 1200 }, { "epoch": 0.5228339735381989, "grad_norm": 1.8905715942382812, "learning_rate": 4.587417009168511e-05, "loss": 0.1412, "step": 1225 }, { "epoch": 0.5335040546308152, "grad_norm": 2.919725179672241, "learning_rate": 4.567657287385394e-05, "loss": 0.1305, "step": 1250 }, { "epoch": 0.5441741357234315, "grad_norm": 0.24073974788188934, "learning_rate": 4.547897565602276e-05, "loss": 0.1203, "step": 1275 }, { "epoch": 0.5548442168160478, "grad_norm": 0.010738029144704342, "learning_rate": 4.5281378438191594e-05, "loss": 0.0627, "step": 1300 }, { "epoch": 0.5655142979086641, "grad_norm": 0.008832808583974838, "learning_rate": 4.5083781220360424e-05, "loss": 0.1611, "step": 1325 }, { "epoch": 0.5761843790012804, "grad_norm": 3.6089112758636475, "learning_rate": 4.488618400252925e-05, "loss": 0.146, "step": 1350 }, { "epoch": 0.5868544600938967, "grad_norm": 0.08832165598869324, "learning_rate": 4.468858678469807e-05, "loss": 0.1005, "step": 1375 }, { "epoch": 0.597524541186513, "grad_norm": 0.03827153891324997, "learning_rate": 4.44909895668669e-05, "loss": 0.1597, "step": 1400 }, { "epoch": 0.6081946222791293, "grad_norm": 0.02119293250143528, "learning_rate": 4.429339234903573e-05, "loss": 0.0933, "step": 1425 }, { "epoch": 0.6188647033717456, "grad_norm": 0.004805543925613165, "learning_rate": 4.4095795131204555e-05, "loss": 0.0904, "step": 1450 }, { "epoch": 0.629534784464362, "grad_norm": 0.560352623462677, "learning_rate": 4.3898197913373385e-05, "loss": 0.1021, "step": 1475 }, { "epoch": 0.6402048655569782, "grad_norm": 0.13053584098815918, "learning_rate": 4.370060069554221e-05, "loss": 0.1822, "step": 1500 }, { "epoch": 0.6508749466495946, "grad_norm": 0.020129365846514702, "learning_rate": 4.350300347771103e-05, "loss": 0.1116, "step": 1525 }, { "epoch": 0.6615450277422108, "grad_norm": 0.08490480482578278, "learning_rate": 4.330540625987986e-05, "loss": 0.1514, "step": 1550 }, { "epoch": 0.6722151088348272, "grad_norm": 17.20114517211914, "learning_rate": 4.310780904204869e-05, "loss": 0.1683, "step": 1575 }, { "epoch": 0.6828851899274434, "grad_norm": 20.063180923461914, "learning_rate": 4.2910211824217516e-05, "loss": 0.0969, "step": 1600 }, { "epoch": 0.6935552710200598, "grad_norm": 10.146587371826172, "learning_rate": 4.271261460638634e-05, "loss": 0.1382, "step": 1625 }, { "epoch": 0.704225352112676, "grad_norm": 0.04244920238852501, "learning_rate": 4.251501738855517e-05, "loss": 0.102, "step": 1650 }, { "epoch": 0.7148954332052924, "grad_norm": 3.3513338565826416, "learning_rate": 4.2317420170724e-05, "loss": 0.0844, "step": 1675 }, { "epoch": 0.7255655142979086, "grad_norm": 9.942954063415527, "learning_rate": 4.2119822952892824e-05, "loss": 0.1169, "step": 1700 }, { "epoch": 0.736235595390525, "grad_norm": 0.02176540717482567, "learning_rate": 4.1922225735061654e-05, "loss": 0.0546, "step": 1725 }, { "epoch": 0.7469056764831413, "grad_norm": 0.02619314193725586, "learning_rate": 4.172462851723048e-05, "loss": 0.1489, "step": 1750 }, { "epoch": 0.7575757575757576, "grad_norm": 4.979767799377441, "learning_rate": 4.152703129939931e-05, "loss": 0.1087, "step": 1775 }, { "epoch": 0.7682458386683739, "grad_norm": 0.22245188057422638, "learning_rate": 4.132943408156814e-05, "loss": 0.0505, "step": 1800 }, { "epoch": 0.7789159197609902, "grad_norm": 3.7733798027038574, "learning_rate": 4.113183686373696e-05, "loss": 0.1281, "step": 1825 }, { "epoch": 0.7895860008536065, "grad_norm": 0.004993762820959091, "learning_rate": 4.0934239645905786e-05, "loss": 0.0967, "step": 1850 }, { "epoch": 0.8002560819462228, "grad_norm": 6.705662250518799, "learning_rate": 4.0736642428074616e-05, "loss": 0.088, "step": 1875 }, { "epoch": 0.8109261630388391, "grad_norm": 0.09842755645513535, "learning_rate": 4.053904521024344e-05, "loss": 0.09, "step": 1900 }, { "epoch": 0.8215962441314554, "grad_norm": 0.006683106068521738, "learning_rate": 4.034144799241227e-05, "loss": 0.0329, "step": 1925 }, { "epoch": 0.8322663252240717, "grad_norm": 0.8531517386436462, "learning_rate": 4.014385077458109e-05, "loss": 0.1405, "step": 1950 }, { "epoch": 0.842936406316688, "grad_norm": 0.011011715978384018, "learning_rate": 3.9946253556749924e-05, "loss": 0.1381, "step": 1975 }, { "epoch": 0.8536064874093043, "grad_norm": 0.20137256383895874, "learning_rate": 3.974865633891875e-05, "loss": 0.1315, "step": 2000 }, { "epoch": 0.8642765685019206, "grad_norm": 4.466578006744385, "learning_rate": 3.955105912108758e-05, "loss": 0.0826, "step": 2025 }, { "epoch": 0.8749466495945369, "grad_norm": 0.007189568132162094, "learning_rate": 3.935346190325641e-05, "loss": 0.0696, "step": 2050 }, { "epoch": 0.8856167306871532, "grad_norm": 0.13521578907966614, "learning_rate": 3.915586468542523e-05, "loss": 0.0647, "step": 2075 }, { "epoch": 0.8962868117797695, "grad_norm": 0.09942985326051712, "learning_rate": 3.8958267467594055e-05, "loss": 0.0337, "step": 2100 }, { "epoch": 0.9069568928723858, "grad_norm": 0.003063550451770425, "learning_rate": 3.8760670249762885e-05, "loss": 0.1349, "step": 2125 }, { "epoch": 0.9176269739650021, "grad_norm": 0.03615418076515198, "learning_rate": 3.8563073031931715e-05, "loss": 0.1884, "step": 2150 }, { "epoch": 0.9282970550576184, "grad_norm": 2.4063897132873535, "learning_rate": 3.836547581410054e-05, "loss": 0.0942, "step": 2175 }, { "epoch": 0.9389671361502347, "grad_norm": 0.10824164003133774, "learning_rate": 3.816787859626937e-05, "loss": 0.1643, "step": 2200 }, { "epoch": 0.949637217242851, "grad_norm": 0.23877990245819092, "learning_rate": 3.797028137843819e-05, "loss": 0.2041, "step": 2225 }, { "epoch": 0.9603072983354674, "grad_norm": 0.12965470552444458, "learning_rate": 3.7772684160607016e-05, "loss": 0.0686, "step": 2250 }, { "epoch": 0.9709773794280837, "grad_norm": 9.515990257263184, "learning_rate": 3.757508694277585e-05, "loss": 0.1288, "step": 2275 }, { "epoch": 0.9816474605207, "grad_norm": 0.010902081616222858, "learning_rate": 3.737748972494468e-05, "loss": 0.1204, "step": 2300 }, { "epoch": 0.9923175416133163, "grad_norm": 0.12074346095323563, "learning_rate": 3.71798925071135e-05, "loss": 0.0735, "step": 2325 }, { "epoch": 1.0, "eval_accuracy": 0.9942369263607257, "eval_auc": 0.9994051665223928, "eval_f1": 0.9942369263607257, "eval_loss": 0.02018207497894764, "eval_precision": 0.9910638297872341, "eval_recall": 0.9974304068522484, "eval_runtime": 1286.73, "eval_samples_per_second": 3.641, "eval_steps_per_second": 0.228, "step": 2343 }, { "epoch": 1.0029876227059327, "grad_norm": 0.13857701420783997, "learning_rate": 3.6982295289282324e-05, "loss": 0.1404, "step": 2350 }, { "epoch": 1.0136577037985488, "grad_norm": 16.29566764831543, "learning_rate": 3.6784698071451154e-05, "loss": 0.1023, "step": 2375 }, { "epoch": 1.0243277848911652, "grad_norm": 0.007060299627482891, "learning_rate": 3.6587100853619984e-05, "loss": 0.0314, "step": 2400 }, { "epoch": 1.0349978659837815, "grad_norm": 0.1959122270345688, "learning_rate": 3.638950363578881e-05, "loss": 0.0698, "step": 2425 }, { "epoch": 1.0456679470763979, "grad_norm": 2.1727123260498047, "learning_rate": 3.619190641795764e-05, "loss": 0.1026, "step": 2450 }, { "epoch": 1.056338028169014, "grad_norm": 0.14948531985282898, "learning_rate": 3.599430920012646e-05, "loss": 0.1336, "step": 2475 }, { "epoch": 1.0670081092616304, "grad_norm": 0.0020017814822494984, "learning_rate": 3.579671198229529e-05, "loss": 0.0718, "step": 2500 }, { "epoch": 1.0776781903542467, "grad_norm": 0.040247637778520584, "learning_rate": 3.559911476446412e-05, "loss": 0.1003, "step": 2525 }, { "epoch": 1.088348271446863, "grad_norm": 0.0026400748174637556, "learning_rate": 3.5401517546632946e-05, "loss": 0.0103, "step": 2550 }, { "epoch": 1.0990183525394792, "grad_norm": 13.71839714050293, "learning_rate": 3.520392032880177e-05, "loss": 0.1549, "step": 2575 }, { "epoch": 1.1096884336320956, "grad_norm": 0.016973601654171944, "learning_rate": 3.50063231109706e-05, "loss": 0.0833, "step": 2600 }, { "epoch": 1.120358514724712, "grad_norm": 0.023715652525424957, "learning_rate": 3.480872589313943e-05, "loss": 0.1606, "step": 2625 }, { "epoch": 1.1310285958173283, "grad_norm": 0.12024948000907898, "learning_rate": 3.4611128675308254e-05, "loss": 0.0659, "step": 2650 }, { "epoch": 1.1416986769099444, "grad_norm": 0.013869931921362877, "learning_rate": 3.4413531457477084e-05, "loss": 0.1145, "step": 2675 }, { "epoch": 1.1523687580025608, "grad_norm": 0.003865574486553669, "learning_rate": 3.421593423964591e-05, "loss": 0.0742, "step": 2700 }, { "epoch": 1.1630388390951771, "grad_norm": 6.1942524909973145, "learning_rate": 3.401833702181473e-05, "loss": 0.1685, "step": 2725 }, { "epoch": 1.1737089201877935, "grad_norm": 13.037029266357422, "learning_rate": 3.382073980398356e-05, "loss": 0.0882, "step": 2750 }, { "epoch": 1.1843790012804098, "grad_norm": 0.0354326069355011, "learning_rate": 3.362314258615239e-05, "loss": 0.1236, "step": 2775 }, { "epoch": 1.195049082373026, "grad_norm": 0.18212293088436127, "learning_rate": 3.3425545368321215e-05, "loss": 0.0612, "step": 2800 }, { "epoch": 1.2057191634656423, "grad_norm": 0.019807470962405205, "learning_rate": 3.322794815049004e-05, "loss": 0.0237, "step": 2825 }, { "epoch": 1.2163892445582587, "grad_norm": 0.48490288853645325, "learning_rate": 3.303035093265887e-05, "loss": 0.1864, "step": 2850 }, { "epoch": 1.2270593256508748, "grad_norm": 15.739009857177734, "learning_rate": 3.28327537148277e-05, "loss": 0.0783, "step": 2875 }, { "epoch": 1.2377294067434912, "grad_norm": 0.00520035345107317, "learning_rate": 3.263515649699652e-05, "loss": 0.056, "step": 2900 }, { "epoch": 1.2483994878361075, "grad_norm": 0.007584866136312485, "learning_rate": 3.243755927916535e-05, "loss": 0.1054, "step": 2925 }, { "epoch": 1.259069568928724, "grad_norm": 0.009660612791776657, "learning_rate": 3.2239962061334176e-05, "loss": 0.1001, "step": 2950 }, { "epoch": 1.2697396500213403, "grad_norm": 5.9183735847473145, "learning_rate": 3.2042364843503e-05, "loss": 0.0959, "step": 2975 }, { "epoch": 1.2804097311139564, "grad_norm": 0.017659351229667664, "learning_rate": 3.184476762567184e-05, "loss": 0.0771, "step": 3000 }, { "epoch": 1.2910798122065728, "grad_norm": 0.05887264013290405, "learning_rate": 3.164717040784066e-05, "loss": 0.0527, "step": 3025 }, { "epoch": 1.301749893299189, "grad_norm": 47.84782028198242, "learning_rate": 3.1449573190009484e-05, "loss": 0.0658, "step": 3050 }, { "epoch": 1.3124199743918052, "grad_norm": 0.046731848269701004, "learning_rate": 3.1251975972178314e-05, "loss": 0.0446, "step": 3075 }, { "epoch": 1.3230900554844216, "grad_norm": 23.92389678955078, "learning_rate": 3.105437875434714e-05, "loss": 0.1645, "step": 3100 }, { "epoch": 1.333760136577038, "grad_norm": 0.23928241431713104, "learning_rate": 3.085678153651597e-05, "loss": 0.0808, "step": 3125 }, { "epoch": 1.3444302176696543, "grad_norm": 0.004650407936424017, "learning_rate": 3.06591843186848e-05, "loss": 0.0788, "step": 3150 }, { "epoch": 1.3551002987622707, "grad_norm": 2.407900333404541, "learning_rate": 3.0461587100853622e-05, "loss": 0.0663, "step": 3175 }, { "epoch": 1.365770379854887, "grad_norm": 0.024215010926127434, "learning_rate": 3.026398988302245e-05, "loss": 0.0836, "step": 3200 }, { "epoch": 1.3764404609475032, "grad_norm": 8.639642715454102, "learning_rate": 3.0066392665191273e-05, "loss": 0.0826, "step": 3225 }, { "epoch": 1.3871105420401195, "grad_norm": 0.4214279353618622, "learning_rate": 2.9868795447360103e-05, "loss": 0.0407, "step": 3250 }, { "epoch": 1.3977806231327359, "grad_norm": 3.026334285736084, "learning_rate": 2.967119822952893e-05, "loss": 0.0902, "step": 3275 }, { "epoch": 1.408450704225352, "grad_norm": 5.5475568771362305, "learning_rate": 2.9473601011697753e-05, "loss": 0.0213, "step": 3300 }, { "epoch": 1.4191207853179684, "grad_norm": 0.4343937337398529, "learning_rate": 2.9276003793866587e-05, "loss": 0.0485, "step": 3325 }, { "epoch": 1.4297908664105847, "grad_norm": 4.497000217437744, "learning_rate": 2.907840657603541e-05, "loss": 0.0254, "step": 3350 }, { "epoch": 1.440460947503201, "grad_norm": 9.44819164276123, "learning_rate": 2.8880809358204237e-05, "loss": 0.1441, "step": 3375 }, { "epoch": 1.4511310285958174, "grad_norm": 0.5778021812438965, "learning_rate": 2.8683212140373068e-05, "loss": 0.0601, "step": 3400 }, { "epoch": 1.4618011096884336, "grad_norm": 0.0012221585493534803, "learning_rate": 2.848561492254189e-05, "loss": 0.0134, "step": 3425 }, { "epoch": 1.47247119078105, "grad_norm": 0.003332477994263172, "learning_rate": 2.8288017704710718e-05, "loss": 0.0872, "step": 3450 }, { "epoch": 1.4831412718736663, "grad_norm": 19.613975524902344, "learning_rate": 2.809042048687955e-05, "loss": 0.0975, "step": 3475 }, { "epoch": 1.4938113529662824, "grad_norm": 0.0008805744582787156, "learning_rate": 2.7892823269048375e-05, "loss": 0.0348, "step": 3500 }, { "epoch": 1.5044814340588988, "grad_norm": 0.745171070098877, "learning_rate": 2.76952260512172e-05, "loss": 0.0709, "step": 3525 }, { "epoch": 1.5151515151515151, "grad_norm": 0.0018973586848005652, "learning_rate": 2.749762883338603e-05, "loss": 0.1068, "step": 3550 }, { "epoch": 1.5258215962441315, "grad_norm": 7.711709022521973, "learning_rate": 2.7300031615554856e-05, "loss": 0.1518, "step": 3575 }, { "epoch": 1.5364916773367479, "grad_norm": 0.08238033205270767, "learning_rate": 2.710243439772368e-05, "loss": 0.0806, "step": 3600 }, { "epoch": 1.5471617584293642, "grad_norm": 0.00853784941136837, "learning_rate": 2.6904837179892507e-05, "loss": 0.0889, "step": 3625 }, { "epoch": 1.5578318395219803, "grad_norm": 0.06024482846260071, "learning_rate": 2.6707239962061337e-05, "loss": 0.0724, "step": 3650 }, { "epoch": 1.5685019206145967, "grad_norm": 0.06635627895593643, "learning_rate": 2.650964274423016e-05, "loss": 0.0477, "step": 3675 }, { "epoch": 1.5791720017072128, "grad_norm": 0.057497624307870865, "learning_rate": 2.6312045526398987e-05, "loss": 0.0362, "step": 3700 }, { "epoch": 1.5898420827998292, "grad_norm": 5.420880317687988, "learning_rate": 2.6114448308567818e-05, "loss": 0.0996, "step": 3725 }, { "epoch": 1.6005121638924455, "grad_norm": 0.03688732162117958, "learning_rate": 2.5916851090736644e-05, "loss": 0.1065, "step": 3750 }, { "epoch": 1.611182244985062, "grad_norm": 0.01889336109161377, "learning_rate": 2.5719253872905468e-05, "loss": 0.0675, "step": 3775 }, { "epoch": 1.6218523260776783, "grad_norm": 0.041272666305303574, "learning_rate": 2.5521656655074298e-05, "loss": 0.0783, "step": 3800 }, { "epoch": 1.6325224071702946, "grad_norm": 0.0283421128988266, "learning_rate": 2.5324059437243125e-05, "loss": 0.0535, "step": 3825 }, { "epoch": 1.6431924882629108, "grad_norm": 0.03971688076853752, "learning_rate": 2.512646221941195e-05, "loss": 0.0916, "step": 3850 }, { "epoch": 1.6538625693555271, "grad_norm": 0.7453581690788269, "learning_rate": 2.492886500158078e-05, "loss": 0.0275, "step": 3875 }, { "epoch": 1.6645326504481432, "grad_norm": 0.0038267953786998987, "learning_rate": 2.4731267783749606e-05, "loss": 0.0636, "step": 3900 }, { "epoch": 1.6752027315407596, "grad_norm": 17.677406311035156, "learning_rate": 2.4533670565918433e-05, "loss": 0.0811, "step": 3925 }, { "epoch": 1.685872812633376, "grad_norm": 0.056949540972709656, "learning_rate": 2.433607334808726e-05, "loss": 0.1143, "step": 3950 }, { "epoch": 1.6965428937259923, "grad_norm": 0.06619152426719666, "learning_rate": 2.4138476130256087e-05, "loss": 0.0825, "step": 3975 }, { "epoch": 1.7072129748186087, "grad_norm": 29.96984100341797, "learning_rate": 2.3940878912424914e-05, "loss": 0.1266, "step": 4000 }, { "epoch": 1.717883055911225, "grad_norm": 7.958868026733398, "learning_rate": 2.374328169459374e-05, "loss": 0.0979, "step": 4025 }, { "epoch": 1.7285531370038414, "grad_norm": 0.0707533210515976, "learning_rate": 2.354568447676257e-05, "loss": 0.1238, "step": 4050 }, { "epoch": 1.7392232180964575, "grad_norm": 5.3426408767700195, "learning_rate": 2.3348087258931394e-05, "loss": 0.0941, "step": 4075 }, { "epoch": 1.7498932991890739, "grad_norm": 0.8626427054405212, "learning_rate": 2.315049004110022e-05, "loss": 0.037, "step": 4100 }, { "epoch": 1.76056338028169, "grad_norm": 0.007791485637426376, "learning_rate": 2.2952892823269048e-05, "loss": 0.039, "step": 4125 }, { "epoch": 1.7712334613743064, "grad_norm": 0.0019313797820359468, "learning_rate": 2.2755295605437875e-05, "loss": 0.0719, "step": 4150 }, { "epoch": 1.7819035424669227, "grad_norm": 0.058322928845882416, "learning_rate": 2.2557698387606705e-05, "loss": 0.0242, "step": 4175 }, { "epoch": 1.792573623559539, "grad_norm": 6.136843204498291, "learning_rate": 2.236010116977553e-05, "loss": 0.1471, "step": 4200 }, { "epoch": 1.8032437046521554, "grad_norm": 16.353532791137695, "learning_rate": 2.216250395194436e-05, "loss": 0.034, "step": 4225 }, { "epoch": 1.8139137857447718, "grad_norm": 18.50251007080078, "learning_rate": 2.1964906734113186e-05, "loss": 0.0398, "step": 4250 }, { "epoch": 1.824583866837388, "grad_norm": 0.3170296251773834, "learning_rate": 2.176730951628201e-05, "loss": 0.0841, "step": 4275 }, { "epoch": 1.8352539479300043, "grad_norm": 0.0018119533779099584, "learning_rate": 2.156971229845084e-05, "loss": 0.0634, "step": 4300 }, { "epoch": 1.8459240290226204, "grad_norm": 0.035546980798244476, "learning_rate": 2.1372115080619663e-05, "loss": 0.1411, "step": 4325 }, { "epoch": 1.8565941101152368, "grad_norm": 0.5771517753601074, "learning_rate": 2.1174517862788494e-05, "loss": 0.0917, "step": 4350 }, { "epoch": 1.8672641912078531, "grad_norm": 0.013703244738280773, "learning_rate": 2.097692064495732e-05, "loss": 0.0859, "step": 4375 }, { "epoch": 1.8779342723004695, "grad_norm": 20.912160873413086, "learning_rate": 2.0779323427126148e-05, "loss": 0.0813, "step": 4400 }, { "epoch": 1.8886043533930859, "grad_norm": 0.2195858508348465, "learning_rate": 2.0581726209294974e-05, "loss": 0.1836, "step": 4425 }, { "epoch": 1.8992744344857022, "grad_norm": 8.048083305358887, "learning_rate": 2.03841289914638e-05, "loss": 0.0874, "step": 4450 }, { "epoch": 1.9099445155783183, "grad_norm": 0.010916252620518208, "learning_rate": 2.0186531773632628e-05, "loss": 0.0122, "step": 4475 }, { "epoch": 1.9206145966709347, "grad_norm": 29.587345123291016, "learning_rate": 1.9988934555801455e-05, "loss": 0.0956, "step": 4500 }, { "epoch": 1.9312846777635508, "grad_norm": 24.480087280273438, "learning_rate": 1.9791337337970282e-05, "loss": 0.0754, "step": 4525 }, { "epoch": 1.9419547588561672, "grad_norm": 15.161417961120605, "learning_rate": 1.959374012013911e-05, "loss": 0.0414, "step": 4550 }, { "epoch": 1.9526248399487836, "grad_norm": 0.2126699984073639, "learning_rate": 1.9396142902307936e-05, "loss": 0.0727, "step": 4575 }, { "epoch": 1.9632949210414, "grad_norm": 9.748570442199707, "learning_rate": 1.9198545684476763e-05, "loss": 0.0911, "step": 4600 }, { "epoch": 1.9739650021340163, "grad_norm": 0.005681836046278477, "learning_rate": 1.900094846664559e-05, "loss": 0.0652, "step": 4625 }, { "epoch": 1.9846350832266326, "grad_norm": 0.010802343487739563, "learning_rate": 1.880335124881442e-05, "loss": 0.0306, "step": 4650 }, { "epoch": 1.995305164319249, "grad_norm": 0.0069721778854727745, "learning_rate": 1.8605754030983244e-05, "loss": 0.0705, "step": 4675 }, { "epoch": 2.0, "eval_accuracy": 0.9955176093916756, "eval_auc": 0.9994811608729328, "eval_f1": 0.9955137791070284, "eval_loss": 0.01761646941304207, "eval_precision": 0.9931798806479113, "eval_recall": 0.9978586723768736, "eval_runtime": 1381.3155, "eval_samples_per_second": 3.392, "eval_steps_per_second": 0.212, "step": 4686 }, { "epoch": 2.0059752454118653, "grad_norm": 0.010495180264115334, "learning_rate": 1.840815681315207e-05, "loss": 0.0779, "step": 4700 }, { "epoch": 2.0166453265044813, "grad_norm": 1.1236854791641235, "learning_rate": 1.8210559595320897e-05, "loss": 0.07, "step": 4725 }, { "epoch": 2.0273154075970976, "grad_norm": 0.09067221730947495, "learning_rate": 1.8012962377489724e-05, "loss": 0.0346, "step": 4750 }, { "epoch": 2.037985488689714, "grad_norm": 0.005663533229380846, "learning_rate": 1.7815365159658555e-05, "loss": 0.0374, "step": 4775 }, { "epoch": 2.0486555697823303, "grad_norm": 0.004197725094854832, "learning_rate": 1.7617767941827378e-05, "loss": 0.0368, "step": 4800 }, { "epoch": 2.0593256508749467, "grad_norm": 44.16444778442383, "learning_rate": 1.742017072399621e-05, "loss": 0.0744, "step": 4825 }, { "epoch": 2.069995731967563, "grad_norm": 0.021498629823327065, "learning_rate": 1.7222573506165035e-05, "loss": 0.1031, "step": 4850 }, { "epoch": 2.0806658130601794, "grad_norm": 0.057749390602111816, "learning_rate": 1.702497628833386e-05, "loss": 0.0597, "step": 4875 }, { "epoch": 2.0913358941527957, "grad_norm": 0.15275585651397705, "learning_rate": 1.682737907050269e-05, "loss": 0.0305, "step": 4900 }, { "epoch": 2.1020059752454117, "grad_norm": 0.13239429891109467, "learning_rate": 1.6629781852671516e-05, "loss": 0.0338, "step": 4925 }, { "epoch": 2.112676056338028, "grad_norm": 9.17479419708252, "learning_rate": 1.6432184634840343e-05, "loss": 0.0517, "step": 4950 }, { "epoch": 2.1233461374306444, "grad_norm": 0.002612267853692174, "learning_rate": 1.623458741700917e-05, "loss": 0.0472, "step": 4975 }, { "epoch": 2.1340162185232607, "grad_norm": 6.946547031402588, "learning_rate": 1.6036990199177997e-05, "loss": 0.0854, "step": 5000 }, { "epoch": 2.144686299615877, "grad_norm": 0.0008439666125923395, "learning_rate": 1.5839392981346824e-05, "loss": 0.0394, "step": 5025 }, { "epoch": 2.1553563807084934, "grad_norm": 0.03294059634208679, "learning_rate": 1.564179576351565e-05, "loss": 0.0745, "step": 5050 }, { "epoch": 2.16602646180111, "grad_norm": 5.746143817901611, "learning_rate": 1.5444198545684478e-05, "loss": 0.0595, "step": 5075 }, { "epoch": 2.176696542893726, "grad_norm": 0.020633000880479813, "learning_rate": 1.5246601327853304e-05, "loss": 0.0027, "step": 5100 }, { "epoch": 2.1873666239863425, "grad_norm": 0.08342266827821732, "learning_rate": 1.5049004110022133e-05, "loss": 0.0944, "step": 5125 }, { "epoch": 2.1980367050789584, "grad_norm": 6.828884124755859, "learning_rate": 1.4851406892190958e-05, "loss": 0.0776, "step": 5150 }, { "epoch": 2.208706786171575, "grad_norm": 2.5025949478149414, "learning_rate": 1.4653809674359787e-05, "loss": 0.0812, "step": 5175 }, { "epoch": 2.219376867264191, "grad_norm": 0.010205933824181557, "learning_rate": 1.4456212456528612e-05, "loss": 0.0416, "step": 5200 }, { "epoch": 2.2300469483568075, "grad_norm": 0.014995824545621872, "learning_rate": 1.4258615238697439e-05, "loss": 0.0476, "step": 5225 }, { "epoch": 2.240717029449424, "grad_norm": 0.011430823244154453, "learning_rate": 1.4061018020866268e-05, "loss": 0.0688, "step": 5250 }, { "epoch": 2.25138711054204, "grad_norm": 0.024417445063591003, "learning_rate": 1.3863420803035093e-05, "loss": 0.0445, "step": 5275 }, { "epoch": 2.2620571916346566, "grad_norm": 0.04339329153299332, "learning_rate": 1.3665823585203921e-05, "loss": 0.0443, "step": 5300 }, { "epoch": 2.2727272727272725, "grad_norm": 0.006649048998951912, "learning_rate": 1.346822636737275e-05, "loss": 0.076, "step": 5325 }, { "epoch": 2.283397353819889, "grad_norm": 11.57873821258545, "learning_rate": 1.3270629149541575e-05, "loss": 0.0266, "step": 5350 }, { "epoch": 2.294067434912505, "grad_norm": 0.12145959585905075, "learning_rate": 1.3073031931710402e-05, "loss": 0.0953, "step": 5375 }, { "epoch": 2.3047375160051216, "grad_norm": 0.03219663351774216, "learning_rate": 1.2875434713879227e-05, "loss": 0.0797, "step": 5400 }, { "epoch": 2.315407597097738, "grad_norm": 0.011474654078483582, "learning_rate": 1.2677837496048056e-05, "loss": 0.0274, "step": 5425 }, { "epoch": 2.3260776781903543, "grad_norm": 0.001972693484276533, "learning_rate": 1.2480240278216883e-05, "loss": 0.0563, "step": 5450 }, { "epoch": 2.3367477592829706, "grad_norm": 0.9134419560432434, "learning_rate": 1.2282643060385712e-05, "loss": 0.0292, "step": 5475 }, { "epoch": 2.347417840375587, "grad_norm": 0.002546141389757395, "learning_rate": 1.2085045842554538e-05, "loss": 0.0428, "step": 5500 }, { "epoch": 2.3580879214682033, "grad_norm": 0.0010626994771882892, "learning_rate": 1.1887448624723364e-05, "loss": 0.076, "step": 5525 }, { "epoch": 2.3687580025608197, "grad_norm": 0.0040688286535441875, "learning_rate": 1.168985140689219e-05, "loss": 0.0782, "step": 5550 }, { "epoch": 2.3794280836534356, "grad_norm": 0.02786325477063656, "learning_rate": 1.149225418906102e-05, "loss": 0.0123, "step": 5575 }, { "epoch": 2.390098164746052, "grad_norm": 0.4415593147277832, "learning_rate": 1.1294656971229846e-05, "loss": 0.0589, "step": 5600 }, { "epoch": 2.4007682458386683, "grad_norm": 0.0095005938783288, "learning_rate": 1.1097059753398673e-05, "loss": 0.1002, "step": 5625 }, { "epoch": 2.4114383269312847, "grad_norm": 0.11727124452590942, "learning_rate": 1.08994625355675e-05, "loss": 0.0324, "step": 5650 }, { "epoch": 2.422108408023901, "grad_norm": 3.638735294342041, "learning_rate": 1.0701865317736327e-05, "loss": 0.1456, "step": 5675 }, { "epoch": 2.4327784891165174, "grad_norm": 0.025659436360001564, "learning_rate": 1.0504268099905154e-05, "loss": 0.0323, "step": 5700 }, { "epoch": 2.4434485702091338, "grad_norm": 0.000763273739721626, "learning_rate": 1.030667088207398e-05, "loss": 0.019, "step": 5725 }, { "epoch": 2.4541186513017497, "grad_norm": 13.73189926147461, "learning_rate": 1.0109073664242808e-05, "loss": 0.0804, "step": 5750 }, { "epoch": 2.464788732394366, "grad_norm": 13.580681800842285, "learning_rate": 9.911476446411636e-06, "loss": 0.0628, "step": 5775 }, { "epoch": 2.4754588134869824, "grad_norm": 0.0029777430463582277, "learning_rate": 9.713879228580461e-06, "loss": 0.0807, "step": 5800 }, { "epoch": 2.4861288945795987, "grad_norm": 0.003513498930260539, "learning_rate": 9.516282010749288e-06, "loss": 0.0826, "step": 5825 }, { "epoch": 2.496798975672215, "grad_norm": 0.009089035913348198, "learning_rate": 9.318684792918115e-06, "loss": 0.0406, "step": 5850 }, { "epoch": 2.5074690567648314, "grad_norm": 0.0068649169988930225, "learning_rate": 9.121087575086944e-06, "loss": 0.1065, "step": 5875 }, { "epoch": 2.518139137857448, "grad_norm": 20.251689910888672, "learning_rate": 8.92349035725577e-06, "loss": 0.1227, "step": 5900 }, { "epoch": 2.528809218950064, "grad_norm": 0.1939096450805664, "learning_rate": 8.725893139424598e-06, "loss": 0.0343, "step": 5925 }, { "epoch": 2.5394793000426805, "grad_norm": 0.6377553939819336, "learning_rate": 8.528295921593425e-06, "loss": 0.0247, "step": 5950 }, { "epoch": 2.550149381135297, "grad_norm": 1.8269633054733276, "learning_rate": 8.330698703762251e-06, "loss": 0.057, "step": 5975 }, { "epoch": 2.560819462227913, "grad_norm": 0.004431420471519232, "learning_rate": 8.133101485931078e-06, "loss": 0.0184, "step": 6000 }, { "epoch": 2.571489543320529, "grad_norm": 0.0886378139257431, "learning_rate": 7.935504268099905e-06, "loss": 0.1559, "step": 6025 }, { "epoch": 2.5821596244131455, "grad_norm": 0.011996462009847164, "learning_rate": 7.737907050268732e-06, "loss": 0.1207, "step": 6050 }, { "epoch": 2.592829705505762, "grad_norm": 0.025645237416028976, "learning_rate": 7.54030983243756e-06, "loss": 0.0327, "step": 6075 }, { "epoch": 2.603499786598378, "grad_norm": 0.05047876015305519, "learning_rate": 7.342712614606387e-06, "loss": 0.07, "step": 6100 }, { "epoch": 2.6141698676909946, "grad_norm": 0.24467694759368896, "learning_rate": 7.145115396775214e-06, "loss": 0.0104, "step": 6125 }, { "epoch": 2.6248399487836105, "grad_norm": 0.07860807329416275, "learning_rate": 6.94751817894404e-06, "loss": 0.073, "step": 6150 }, { "epoch": 2.635510029876227, "grad_norm": 0.03573083132505417, "learning_rate": 6.7499209611128685e-06, "loss": 0.0616, "step": 6175 }, { "epoch": 2.646180110968843, "grad_norm": 1.0572718381881714, "learning_rate": 6.552323743281695e-06, "loss": 0.0872, "step": 6200 }, { "epoch": 2.6568501920614596, "grad_norm": 0.009367382153868675, "learning_rate": 6.3547265254505215e-06, "loss": 0.0388, "step": 6225 }, { "epoch": 2.667520273154076, "grad_norm": 0.02948431856930256, "learning_rate": 6.157129307619349e-06, "loss": 0.0831, "step": 6250 }, { "epoch": 2.6781903542466923, "grad_norm": 0.0797591432929039, "learning_rate": 5.959532089788176e-06, "loss": 0.049, "step": 6275 }, { "epoch": 2.6888604353393086, "grad_norm": 0.034841641783714294, "learning_rate": 5.761934871957003e-06, "loss": 0.0055, "step": 6300 }, { "epoch": 2.699530516431925, "grad_norm": 0.07419008761644363, "learning_rate": 5.56433765412583e-06, "loss": 0.1128, "step": 6325 }, { "epoch": 2.7102005975245413, "grad_norm": 0.0030936244875192642, "learning_rate": 5.366740436294658e-06, "loss": 0.0564, "step": 6350 }, { "epoch": 2.7208706786171577, "grad_norm": 0.003594167297706008, "learning_rate": 5.169143218463484e-06, "loss": 0.0928, "step": 6375 }, { "epoch": 2.731540759709774, "grad_norm": 0.03685923293232918, "learning_rate": 4.9715460006323115e-06, "loss": 0.0502, "step": 6400 }, { "epoch": 2.74221084080239, "grad_norm": 0.019324608147144318, "learning_rate": 4.7739487828011385e-06, "loss": 0.0204, "step": 6425 }, { "epoch": 2.7528809218950063, "grad_norm": 22.45241355895996, "learning_rate": 4.576351564969965e-06, "loss": 0.0867, "step": 6450 }, { "epoch": 2.7635510029876227, "grad_norm": 0.14366178214550018, "learning_rate": 4.378754347138792e-06, "loss": 0.0366, "step": 6475 }, { "epoch": 2.774221084080239, "grad_norm": 1.7778751850128174, "learning_rate": 4.181157129307619e-06, "loss": 0.097, "step": 6500 }, { "epoch": 2.7848911651728554, "grad_norm": 4.860283851623535, "learning_rate": 3.983559911476446e-06, "loss": 0.0884, "step": 6525 }, { "epoch": 2.7955612462654718, "grad_norm": 0.00165728444699198, "learning_rate": 3.785962693645274e-06, "loss": 0.0195, "step": 6550 }, { "epoch": 2.8062313273580877, "grad_norm": 0.03730342909693718, "learning_rate": 3.5883654758141003e-06, "loss": 0.0412, "step": 6575 }, { "epoch": 2.816901408450704, "grad_norm": 0.0034714387729763985, "learning_rate": 3.3907682579829277e-06, "loss": 0.0714, "step": 6600 }, { "epoch": 2.8275714895433204, "grad_norm": 0.06972959637641907, "learning_rate": 3.1931710401517546e-06, "loss": 0.0082, "step": 6625 }, { "epoch": 2.8382415706359367, "grad_norm": 0.022793615236878395, "learning_rate": 2.995573822320582e-06, "loss": 0.0705, "step": 6650 }, { "epoch": 2.848911651728553, "grad_norm": 0.05906020104885101, "learning_rate": 2.797976604489409e-06, "loss": 0.0435, "step": 6675 }, { "epoch": 2.8595817328211695, "grad_norm": 0.012936658225953579, "learning_rate": 2.600379386658236e-06, "loss": 0.0852, "step": 6700 }, { "epoch": 2.870251813913786, "grad_norm": 0.004626471549272537, "learning_rate": 2.402782168827063e-06, "loss": 0.03, "step": 6725 }, { "epoch": 2.880921895006402, "grad_norm": 0.08372853696346283, "learning_rate": 2.20518495099589e-06, "loss": 0.0152, "step": 6750 }, { "epoch": 2.8915919760990185, "grad_norm": 1.3282454013824463, "learning_rate": 2.0075877331647173e-06, "loss": 0.0816, "step": 6775 }, { "epoch": 2.902262057191635, "grad_norm": 3.8434557914733887, "learning_rate": 1.8099905153335442e-06, "loss": 0.05, "step": 6800 }, { "epoch": 2.9129321382842512, "grad_norm": 0.008098805323243141, "learning_rate": 1.6123932975023712e-06, "loss": 0.0347, "step": 6825 }, { "epoch": 2.923602219376867, "grad_norm": 0.0022435523569583893, "learning_rate": 1.4147960796711983e-06, "loss": 0.0214, "step": 6850 }, { "epoch": 2.9342723004694835, "grad_norm": 7.002129554748535, "learning_rate": 1.2171988618400254e-06, "loss": 0.0121, "step": 6875 }, { "epoch": 2.9449423815621, "grad_norm": 0.11791983246803284, "learning_rate": 1.0196016440088523e-06, "loss": 0.1015, "step": 6900 }, { "epoch": 2.955612462654716, "grad_norm": 0.0031008380465209484, "learning_rate": 8.220044261776794e-07, "loss": 0.0016, "step": 6925 }, { "epoch": 2.9662825437473326, "grad_norm": 0.00397999444976449, "learning_rate": 6.244072083465065e-07, "loss": 0.0238, "step": 6950 }, { "epoch": 2.976952624839949, "grad_norm": 0.14921312034130096, "learning_rate": 4.2680999051533353e-07, "loss": 0.0147, "step": 6975 }, { "epoch": 2.987622705932565, "grad_norm": 0.005675365682691336, "learning_rate": 2.2921277268416063e-07, "loss": 0.0175, "step": 7000 }, { "epoch": 2.998292787025181, "grad_norm": 0.009994860738515854, "learning_rate": 3.1615554852987675e-08, "loss": 0.0694, "step": 7025 }, { "epoch": 3.0, "eval_accuracy": 0.9961579509071505, "eval_auc": 0.9994886327395326, "eval_f1": 0.9961538461538462, "eval_loss": 0.0158307533711195, "eval_precision": 0.9940298507462687, "eval_recall": 0.9982869379014989, "eval_runtime": 1329.756, "eval_samples_per_second": 3.523, "eval_steps_per_second": 0.22, "step": 7029 } ], "logging_steps": 25, "max_steps": 7029, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.355902228225831e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }