{ "best_metric": 26.439685364873398, "best_model_checkpoint": "./whisper-ft-2/checkpoint-5000", "epoch": 1.0, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 26.441381454467773, "learning_rate": 6.875e-06, "loss": 5.6669, "step": 25 }, { "epoch": 0.01, "grad_norm": 16.109895706176758, "learning_rate": 9.969806763285025e-06, "loss": 3.5576, "step": 50 }, { "epoch": 0.01, "grad_norm": 17.341266632080078, "learning_rate": 9.919484702093398e-06, "loss": 2.3173, "step": 75 }, { "epoch": 0.02, "grad_norm": 14.84139347076416, "learning_rate": 9.869162640901772e-06, "loss": 1.732, "step": 100 }, { "epoch": 0.03, "grad_norm": 13.36408519744873, "learning_rate": 9.818840579710146e-06, "loss": 1.4347, "step": 125 }, { "epoch": 0.03, "grad_norm": 15.8803071975708, "learning_rate": 9.768518518518519e-06, "loss": 1.2313, "step": 150 }, { "epoch": 0.04, "grad_norm": 17.588510513305664, "learning_rate": 9.718196457326893e-06, "loss": 1.0921, "step": 175 }, { "epoch": 0.04, "grad_norm": 14.752327919006348, "learning_rate": 9.667874396135266e-06, "loss": 0.9493, "step": 200 }, { "epoch": 0.04, "grad_norm": 13.243648529052734, "learning_rate": 9.61755233494364e-06, "loss": 0.8546, "step": 225 }, { "epoch": 0.05, "grad_norm": 15.082148551940918, "learning_rate": 9.567230273752013e-06, "loss": 0.8029, "step": 250 }, { "epoch": 0.06, "grad_norm": 7.932609558105469, "learning_rate": 9.516908212560388e-06, "loss": 0.7795, "step": 275 }, { "epoch": 0.06, "grad_norm": 14.98747444152832, "learning_rate": 9.46658615136876e-06, "loss": 0.7094, "step": 300 }, { "epoch": 0.07, "grad_norm": 10.299836158752441, "learning_rate": 9.416264090177135e-06, "loss": 0.6991, "step": 325 }, { "epoch": 0.07, "grad_norm": 9.431194305419922, "learning_rate": 9.36594202898551e-06, "loss": 0.6727, "step": 350 }, { "epoch": 0.07, "grad_norm": 12.089103698730469, "learning_rate": 9.315619967793882e-06, "loss": 0.6613, "step": 375 }, { "epoch": 0.08, "grad_norm": 11.374141693115234, "learning_rate": 9.265297906602255e-06, "loss": 0.6281, "step": 400 }, { "epoch": 0.09, "grad_norm": 10.616567611694336, "learning_rate": 9.214975845410629e-06, "loss": 0.5975, "step": 425 }, { "epoch": 0.09, "grad_norm": 12.249051094055176, "learning_rate": 9.164653784219002e-06, "loss": 0.5734, "step": 450 }, { "epoch": 0.1, "grad_norm": 12.755301475524902, "learning_rate": 9.114331723027376e-06, "loss": 0.525, "step": 475 }, { "epoch": 0.1, "grad_norm": 8.402365684509277, "learning_rate": 9.06400966183575e-06, "loss": 0.5021, "step": 500 }, { "epoch": 0.1, "grad_norm": 8.66573429107666, "learning_rate": 9.013687600644123e-06, "loss": 0.5239, "step": 525 }, { "epoch": 0.11, "grad_norm": 11.849873542785645, "learning_rate": 8.963365539452496e-06, "loss": 0.4548, "step": 550 }, { "epoch": 0.12, "grad_norm": 8.986794471740723, "learning_rate": 8.91304347826087e-06, "loss": 0.4634, "step": 575 }, { "epoch": 0.12, "grad_norm": 18.423568725585938, "learning_rate": 8.862721417069245e-06, "loss": 0.4796, "step": 600 }, { "epoch": 0.12, "grad_norm": 7.8966217041015625, "learning_rate": 8.812399355877618e-06, "loss": 0.4142, "step": 625 }, { "epoch": 0.13, "grad_norm": 8.645583152770996, "learning_rate": 8.76207729468599e-06, "loss": 0.438, "step": 650 }, { "epoch": 0.14, "grad_norm": 8.235858917236328, "learning_rate": 8.711755233494365e-06, "loss": 0.448, "step": 675 }, { "epoch": 0.14, "grad_norm": 10.634264945983887, "learning_rate": 8.661433172302737e-06, "loss": 0.454, "step": 700 }, { "epoch": 0.14, "grad_norm": 7.304662704467773, "learning_rate": 8.611111111111112e-06, "loss": 0.441, "step": 725 }, { "epoch": 0.15, "grad_norm": 7.09723424911499, "learning_rate": 8.560789049919486e-06, "loss": 0.4172, "step": 750 }, { "epoch": 0.15, "grad_norm": 11.544793128967285, "learning_rate": 8.510466988727859e-06, "loss": 0.4305, "step": 775 }, { "epoch": 0.16, "grad_norm": 8.155645370483398, "learning_rate": 8.460144927536232e-06, "loss": 0.4273, "step": 800 }, { "epoch": 0.17, "grad_norm": 7.164479732513428, "learning_rate": 8.409822866344606e-06, "loss": 0.4339, "step": 825 }, { "epoch": 0.17, "grad_norm": 8.852354049682617, "learning_rate": 8.35950080515298e-06, "loss": 0.4595, "step": 850 }, { "epoch": 0.17, "grad_norm": 9.475948333740234, "learning_rate": 8.309178743961353e-06, "loss": 0.4104, "step": 875 }, { "epoch": 0.18, "grad_norm": 9.111871719360352, "learning_rate": 8.258856682769728e-06, "loss": 0.4573, "step": 900 }, { "epoch": 0.18, "grad_norm": 8.254072189331055, "learning_rate": 8.2085346215781e-06, "loss": 0.5042, "step": 925 }, { "epoch": 0.19, "grad_norm": 9.376124382019043, "learning_rate": 8.158212560386473e-06, "loss": 0.488, "step": 950 }, { "epoch": 0.2, "grad_norm": 8.461128234863281, "learning_rate": 8.107890499194848e-06, "loss": 0.4621, "step": 975 }, { "epoch": 0.2, "grad_norm": 8.678207397460938, "learning_rate": 8.057568438003222e-06, "loss": 0.4632, "step": 1000 }, { "epoch": 0.2, "eval_loss": 0.5091106295585632, "eval_runtime": 245.5352, "eval_samples_per_second": 4.073, "eval_steps_per_second": 0.509, "eval_wer": 38.22701957857266, "step": 1000 }, { "epoch": 0.2, "grad_norm": 8.345236778259277, "learning_rate": 8.007246376811595e-06, "loss": 0.4577, "step": 1025 }, { "epoch": 0.21, "grad_norm": 8.78304386138916, "learning_rate": 7.956924315619969e-06, "loss": 0.4378, "step": 1050 }, { "epoch": 0.21, "grad_norm": 9.15285873413086, "learning_rate": 7.906602254428342e-06, "loss": 0.4534, "step": 1075 }, { "epoch": 0.22, "grad_norm": 10.154984474182129, "learning_rate": 7.856280193236716e-06, "loss": 0.4185, "step": 1100 }, { "epoch": 0.23, "grad_norm": 8.451964378356934, "learning_rate": 7.805958132045089e-06, "loss": 0.4098, "step": 1125 }, { "epoch": 0.23, "grad_norm": 7.336400508880615, "learning_rate": 7.755636070853463e-06, "loss": 0.3994, "step": 1150 }, { "epoch": 0.23, "grad_norm": 8.46390151977539, "learning_rate": 7.705314009661836e-06, "loss": 0.4028, "step": 1175 }, { "epoch": 0.24, "grad_norm": 10.172577857971191, "learning_rate": 7.654991948470209e-06, "loss": 0.3936, "step": 1200 }, { "epoch": 0.24, "grad_norm": 10.022027015686035, "learning_rate": 7.604669887278584e-06, "loss": 0.4025, "step": 1225 }, { "epoch": 0.25, "grad_norm": 10.936142921447754, "learning_rate": 7.5543478260869576e-06, "loss": 0.3701, "step": 1250 }, { "epoch": 0.26, "grad_norm": 8.6853609085083, "learning_rate": 7.504025764895331e-06, "loss": 0.3529, "step": 1275 }, { "epoch": 0.26, "grad_norm": 8.691036224365234, "learning_rate": 7.453703703703704e-06, "loss": 0.3525, "step": 1300 }, { "epoch": 0.27, "grad_norm": 7.8596978187561035, "learning_rate": 7.403381642512077e-06, "loss": 0.3481, "step": 1325 }, { "epoch": 0.27, "grad_norm": 6.271942615509033, "learning_rate": 7.353059581320452e-06, "loss": 0.3225, "step": 1350 }, { "epoch": 0.28, "grad_norm": 7.836112022399902, "learning_rate": 7.302737520128825e-06, "loss": 0.3821, "step": 1375 }, { "epoch": 0.28, "grad_norm": 9.443171501159668, "learning_rate": 7.252415458937199e-06, "loss": 0.4297, "step": 1400 }, { "epoch": 0.28, "grad_norm": 8.32925033569336, "learning_rate": 7.202093397745572e-06, "loss": 0.4633, "step": 1425 }, { "epoch": 0.29, "grad_norm": 10.218457221984863, "learning_rate": 7.151771336553945e-06, "loss": 0.4489, "step": 1450 }, { "epoch": 0.29, "grad_norm": 8.612972259521484, "learning_rate": 7.10144927536232e-06, "loss": 0.4022, "step": 1475 }, { "epoch": 0.3, "grad_norm": 9.206517219543457, "learning_rate": 7.051127214170693e-06, "loss": 0.3668, "step": 1500 }, { "epoch": 0.3, "grad_norm": 10.358244895935059, "learning_rate": 7.000805152979067e-06, "loss": 0.4162, "step": 1525 }, { "epoch": 0.31, "grad_norm": 12.02837085723877, "learning_rate": 6.95048309178744e-06, "loss": 0.436, "step": 1550 }, { "epoch": 0.32, "grad_norm": 10.476570129394531, "learning_rate": 6.900161030595813e-06, "loss": 0.3599, "step": 1575 }, { "epoch": 0.32, "grad_norm": 8.006072044372559, "learning_rate": 6.8498389694041875e-06, "loss": 0.3669, "step": 1600 }, { "epoch": 0.33, "grad_norm": 16.798221588134766, "learning_rate": 6.799516908212561e-06, "loss": 0.3632, "step": 1625 }, { "epoch": 0.33, "grad_norm": 8.768495559692383, "learning_rate": 6.749194847020935e-06, "loss": 0.3225, "step": 1650 }, { "epoch": 0.34, "grad_norm": 10.018780708312988, "learning_rate": 6.698872785829308e-06, "loss": 0.325, "step": 1675 }, { "epoch": 0.34, "grad_norm": 10.418109893798828, "learning_rate": 6.648550724637681e-06, "loss": 0.3321, "step": 1700 }, { "epoch": 0.34, "grad_norm": 6.952179908752441, "learning_rate": 6.598228663446056e-06, "loss": 0.3242, "step": 1725 }, { "epoch": 0.35, "grad_norm": 10.024521827697754, "learning_rate": 6.547906602254429e-06, "loss": 0.3291, "step": 1750 }, { "epoch": 0.35, "grad_norm": 7.440296173095703, "learning_rate": 6.497584541062802e-06, "loss": 0.329, "step": 1775 }, { "epoch": 0.36, "grad_norm": 8.184727668762207, "learning_rate": 6.447262479871176e-06, "loss": 0.3228, "step": 1800 }, { "epoch": 0.36, "grad_norm": 8.101869583129883, "learning_rate": 6.3969404186795495e-06, "loss": 0.3222, "step": 1825 }, { "epoch": 0.37, "grad_norm": 12.450462341308594, "learning_rate": 6.346618357487924e-06, "loss": 0.35, "step": 1850 }, { "epoch": 0.38, "grad_norm": 13.891828536987305, "learning_rate": 6.296296296296297e-06, "loss": 0.3659, "step": 1875 }, { "epoch": 0.38, "grad_norm": 9.327531814575195, "learning_rate": 6.24597423510467e-06, "loss": 0.3609, "step": 1900 }, { "epoch": 0.39, "grad_norm": 7.067734241485596, "learning_rate": 6.195652173913044e-06, "loss": 0.3558, "step": 1925 }, { "epoch": 0.39, "grad_norm": 11.90934944152832, "learning_rate": 6.145330112721417e-06, "loss": 0.3312, "step": 1950 }, { "epoch": 0.4, "grad_norm": 8.565199851989746, "learning_rate": 6.095008051529792e-06, "loss": 0.3347, "step": 1975 }, { "epoch": 0.4, "grad_norm": 6.947161674499512, "learning_rate": 6.044685990338165e-06, "loss": 0.2988, "step": 2000 }, { "epoch": 0.4, "eval_loss": 0.42369821667671204, "eval_runtime": 243.0792, "eval_samples_per_second": 4.114, "eval_steps_per_second": 0.514, "eval_wer": 32.68071424470345, "step": 2000 }, { "epoch": 0.41, "grad_norm": 10.25965404510498, "learning_rate": 5.994363929146538e-06, "loss": 0.3133, "step": 2025 }, { "epoch": 0.41, "grad_norm": 12.943211555480957, "learning_rate": 5.944041867954912e-06, "loss": 0.3662, "step": 2050 }, { "epoch": 0.41, "grad_norm": 8.808210372924805, "learning_rate": 5.893719806763285e-06, "loss": 0.3933, "step": 2075 }, { "epoch": 0.42, "grad_norm": 8.858198165893555, "learning_rate": 5.84339774557166e-06, "loss": 0.4115, "step": 2100 }, { "epoch": 0.42, "grad_norm": 8.931889533996582, "learning_rate": 5.793075684380033e-06, "loss": 0.3585, "step": 2125 }, { "epoch": 0.43, "grad_norm": 9.787882804870605, "learning_rate": 5.742753623188406e-06, "loss": 0.351, "step": 2150 }, { "epoch": 0.43, "grad_norm": 8.339137077331543, "learning_rate": 5.692431561996779e-06, "loss": 0.3425, "step": 2175 }, { "epoch": 0.44, "grad_norm": 7.447598457336426, "learning_rate": 5.642109500805153e-06, "loss": 0.3567, "step": 2200 }, { "epoch": 0.45, "grad_norm": 7.393462181091309, "learning_rate": 5.591787439613527e-06, "loss": 0.3852, "step": 2225 }, { "epoch": 0.45, "grad_norm": 9.472132682800293, "learning_rate": 5.541465378421901e-06, "loss": 0.3978, "step": 2250 }, { "epoch": 0.46, "grad_norm": 9.781373023986816, "learning_rate": 5.4911433172302745e-06, "loss": 0.3917, "step": 2275 }, { "epoch": 0.46, "grad_norm": 10.88880443572998, "learning_rate": 5.440821256038647e-06, "loss": 0.4059, "step": 2300 }, { "epoch": 0.47, "grad_norm": 10.626157760620117, "learning_rate": 5.390499194847021e-06, "loss": 0.3995, "step": 2325 }, { "epoch": 0.47, "grad_norm": 10.283743858337402, "learning_rate": 5.340177133655395e-06, "loss": 0.4168, "step": 2350 }, { "epoch": 0.47, "grad_norm": 9.085090637207031, "learning_rate": 5.289855072463769e-06, "loss": 0.3932, "step": 2375 }, { "epoch": 0.48, "grad_norm": 9.050111770629883, "learning_rate": 5.239533011272142e-06, "loss": 0.386, "step": 2400 }, { "epoch": 0.48, "grad_norm": 7.001188278198242, "learning_rate": 5.189210950080515e-06, "loss": 0.3305, "step": 2425 }, { "epoch": 0.49, "grad_norm": 8.82314395904541, "learning_rate": 5.138888888888889e-06, "loss": 0.318, "step": 2450 }, { "epoch": 0.49, "grad_norm": 8.965357780456543, "learning_rate": 5.088566827697263e-06, "loss": 0.3568, "step": 2475 }, { "epoch": 0.5, "grad_norm": 8.673226356506348, "learning_rate": 5.038244766505637e-06, "loss": 0.3693, "step": 2500 }, { "epoch": 0.51, "grad_norm": 9.710456848144531, "learning_rate": 4.98792270531401e-06, "loss": 0.4114, "step": 2525 }, { "epoch": 0.51, "grad_norm": 11.537069320678711, "learning_rate": 4.937600644122384e-06, "loss": 0.4652, "step": 2550 }, { "epoch": 0.52, "grad_norm": 11.736916542053223, "learning_rate": 4.887278582930757e-06, "loss": 0.4348, "step": 2575 }, { "epoch": 0.52, "grad_norm": 8.418719291687012, "learning_rate": 4.836956521739131e-06, "loss": 0.3498, "step": 2600 }, { "epoch": 0.53, "grad_norm": 8.387438774108887, "learning_rate": 4.786634460547504e-06, "loss": 0.33, "step": 2625 }, { "epoch": 0.53, "grad_norm": 7.15954065322876, "learning_rate": 4.736312399355878e-06, "loss": 0.3454, "step": 2650 }, { "epoch": 0.54, "grad_norm": 7.480624675750732, "learning_rate": 4.6859903381642516e-06, "loss": 0.3317, "step": 2675 }, { "epoch": 0.54, "grad_norm": 7.614017963409424, "learning_rate": 4.635668276972625e-06, "loss": 0.3202, "step": 2700 }, { "epoch": 0.55, "grad_norm": 9.156942367553711, "learning_rate": 4.585346215780999e-06, "loss": 0.2789, "step": 2725 }, { "epoch": 0.55, "grad_norm": 10.527360916137695, "learning_rate": 4.535024154589372e-06, "loss": 0.2546, "step": 2750 }, { "epoch": 0.56, "grad_norm": 7.232975959777832, "learning_rate": 4.484702093397746e-06, "loss": 0.2724, "step": 2775 }, { "epoch": 0.56, "grad_norm": 6.251858711242676, "learning_rate": 4.434380032206119e-06, "loss": 0.2791, "step": 2800 }, { "epoch": 0.56, "grad_norm": 8.357397079467773, "learning_rate": 4.384057971014493e-06, "loss": 0.307, "step": 2825 }, { "epoch": 0.57, "grad_norm": 8.629735946655273, "learning_rate": 4.3337359098228665e-06, "loss": 0.3721, "step": 2850 }, { "epoch": 0.57, "grad_norm": 8.04019832611084, "learning_rate": 4.28341384863124e-06, "loss": 0.3302, "step": 2875 }, { "epoch": 0.58, "grad_norm": 10.451481819152832, "learning_rate": 4.233091787439614e-06, "loss": 0.3338, "step": 2900 }, { "epoch": 0.58, "grad_norm": 7.338428020477295, "learning_rate": 4.182769726247988e-06, "loss": 0.3381, "step": 2925 }, { "epoch": 0.59, "grad_norm": 6.68162727355957, "learning_rate": 4.132447665056361e-06, "loss": 0.2993, "step": 2950 }, { "epoch": 0.59, "grad_norm": 7.178064823150635, "learning_rate": 4.082125603864734e-06, "loss": 0.2728, "step": 2975 }, { "epoch": 0.6, "grad_norm": 6.970361709594727, "learning_rate": 4.031803542673109e-06, "loss": 0.2715, "step": 3000 }, { "epoch": 0.6, "eval_loss": 0.3627614378929138, "eval_runtime": 242.2219, "eval_samples_per_second": 4.128, "eval_steps_per_second": 0.516, "eval_wer": 28.190848022047426, "step": 3000 }, { "epoch": 0.6, "grad_norm": 7.680355072021484, "learning_rate": 3.9814814814814814e-06, "loss": 0.2641, "step": 3025 }, { "epoch": 0.61, "grad_norm": 7.352539539337158, "learning_rate": 3.931159420289856e-06, "loss": 0.2947, "step": 3050 }, { "epoch": 0.61, "grad_norm": 7.088489055633545, "learning_rate": 3.880837359098229e-06, "loss": 0.3204, "step": 3075 }, { "epoch": 0.62, "grad_norm": 9.058192253112793, "learning_rate": 3.830515297906602e-06, "loss": 0.3085, "step": 3100 }, { "epoch": 0.62, "grad_norm": 10.828897476196289, "learning_rate": 3.780193236714976e-06, "loss": 0.2772, "step": 3125 }, { "epoch": 0.63, "grad_norm": 7.811698913574219, "learning_rate": 3.7298711755233497e-06, "loss": 0.2449, "step": 3150 }, { "epoch": 0.64, "grad_norm": 7.802531719207764, "learning_rate": 3.6795491143317237e-06, "loss": 0.2515, "step": 3175 }, { "epoch": 0.64, "grad_norm": 7.107741832733154, "learning_rate": 3.629227053140097e-06, "loss": 0.2964, "step": 3200 }, { "epoch": 0.65, "grad_norm": 7.613478183746338, "learning_rate": 3.5789049919484704e-06, "loss": 0.2901, "step": 3225 }, { "epoch": 0.65, "grad_norm": 6.808854103088379, "learning_rate": 3.5285829307568444e-06, "loss": 0.2764, "step": 3250 }, { "epoch": 0.66, "grad_norm": 8.42264461517334, "learning_rate": 3.4782608695652175e-06, "loss": 0.2439, "step": 3275 }, { "epoch": 0.66, "grad_norm": 6.853939533233643, "learning_rate": 3.4279388083735915e-06, "loss": 0.263, "step": 3300 }, { "epoch": 0.67, "grad_norm": 7.323888301849365, "learning_rate": 3.377616747181965e-06, "loss": 0.3071, "step": 3325 }, { "epoch": 0.67, "grad_norm": 8.018033027648926, "learning_rate": 3.327294685990338e-06, "loss": 0.3161, "step": 3350 }, { "epoch": 0.68, "grad_norm": 6.292661666870117, "learning_rate": 3.276972624798712e-06, "loss": 0.2889, "step": 3375 }, { "epoch": 0.68, "grad_norm": 7.282548904418945, "learning_rate": 3.2266505636070853e-06, "loss": 0.2909, "step": 3400 }, { "epoch": 0.69, "grad_norm": 9.771224975585938, "learning_rate": 3.1763285024154593e-06, "loss": 0.2804, "step": 3425 }, { "epoch": 0.69, "grad_norm": 6.97499418258667, "learning_rate": 3.126006441223833e-06, "loss": 0.3079, "step": 3450 }, { "epoch": 0.69, "grad_norm": 6.536733627319336, "learning_rate": 3.075684380032206e-06, "loss": 0.2727, "step": 3475 }, { "epoch": 0.7, "grad_norm": 9.328429222106934, "learning_rate": 3.02536231884058e-06, "loss": 0.2927, "step": 3500 }, { "epoch": 0.7, "grad_norm": 10.615299224853516, "learning_rate": 2.9750402576489536e-06, "loss": 0.2957, "step": 3525 }, { "epoch": 0.71, "grad_norm": 8.456421852111816, "learning_rate": 2.9247181964573276e-06, "loss": 0.3089, "step": 3550 }, { "epoch": 0.71, "grad_norm": 7.571265697479248, "learning_rate": 2.8743961352657007e-06, "loss": 0.3166, "step": 3575 }, { "epoch": 0.72, "grad_norm": 7.515317440032959, "learning_rate": 2.8240740740740743e-06, "loss": 0.3027, "step": 3600 }, { "epoch": 0.72, "grad_norm": 9.137214660644531, "learning_rate": 2.773752012882448e-06, "loss": 0.2972, "step": 3625 }, { "epoch": 0.73, "grad_norm": 8.118337631225586, "learning_rate": 2.7234299516908214e-06, "loss": 0.3027, "step": 3650 }, { "epoch": 0.73, "grad_norm": 11.383882522583008, "learning_rate": 2.6731078904991954e-06, "loss": 0.3401, "step": 3675 }, { "epoch": 0.74, "grad_norm": 8.050200462341309, "learning_rate": 2.6227858293075685e-06, "loss": 0.3462, "step": 3700 }, { "epoch": 0.74, "grad_norm": 8.983549118041992, "learning_rate": 2.572463768115942e-06, "loss": 0.3346, "step": 3725 }, { "epoch": 0.75, "grad_norm": 5.890308380126953, "learning_rate": 2.522141706924316e-06, "loss": 0.3202, "step": 3750 }, { "epoch": 0.76, "grad_norm": 7.097532272338867, "learning_rate": 2.4718196457326892e-06, "loss": 0.2893, "step": 3775 }, { "epoch": 0.76, "grad_norm": 8.50672721862793, "learning_rate": 2.4214975845410628e-06, "loss": 0.2999, "step": 3800 }, { "epoch": 0.77, "grad_norm": 6.510353088378906, "learning_rate": 2.3711755233494368e-06, "loss": 0.3054, "step": 3825 }, { "epoch": 0.77, "grad_norm": 7.819746971130371, "learning_rate": 2.3208534621578103e-06, "loss": 0.2801, "step": 3850 }, { "epoch": 0.78, "grad_norm": 9.897961616516113, "learning_rate": 2.270531400966184e-06, "loss": 0.2668, "step": 3875 }, { "epoch": 0.78, "grad_norm": 6.692742347717285, "learning_rate": 2.220209339774557e-06, "loss": 0.2632, "step": 3900 }, { "epoch": 0.79, "grad_norm": 6.9810404777526855, "learning_rate": 2.169887278582931e-06, "loss": 0.24, "step": 3925 }, { "epoch": 0.79, "grad_norm": 6.765547275543213, "learning_rate": 2.1195652173913046e-06, "loss": 0.2831, "step": 3950 }, { "epoch": 0.8, "grad_norm": 8.033112525939941, "learning_rate": 2.069243156199678e-06, "loss": 0.316, "step": 3975 }, { "epoch": 0.8, "grad_norm": 9.17114543914795, "learning_rate": 2.0189210950080517e-06, "loss": 0.3315, "step": 4000 }, { "epoch": 0.8, "eval_loss": 0.3504044711589813, "eval_runtime": 243.0184, "eval_samples_per_second": 4.115, "eval_steps_per_second": 0.514, "eval_wer": 28.288453809496467, "step": 4000 }, { "epoch": 0.81, "grad_norm": 8.472304344177246, "learning_rate": 1.9685990338164253e-06, "loss": 0.3486, "step": 4025 }, { "epoch": 0.81, "grad_norm": 8.651259422302246, "learning_rate": 1.918276972624799e-06, "loss": 0.3579, "step": 4050 }, { "epoch": 0.81, "grad_norm": 7.6784772872924805, "learning_rate": 1.8679549114331724e-06, "loss": 0.34, "step": 4075 }, { "epoch": 0.82, "grad_norm": 8.391368865966797, "learning_rate": 1.817632850241546e-06, "loss": 0.3065, "step": 4100 }, { "epoch": 0.82, "grad_norm": 6.728703022003174, "learning_rate": 1.7673107890499197e-06, "loss": 0.2958, "step": 4125 }, { "epoch": 0.83, "grad_norm": 6.842586517333984, "learning_rate": 1.7169887278582933e-06, "loss": 0.2566, "step": 4150 }, { "epoch": 0.83, "grad_norm": 6.050861835479736, "learning_rate": 1.6666666666666667e-06, "loss": 0.2687, "step": 4175 }, { "epoch": 0.84, "grad_norm": 9.184779167175293, "learning_rate": 1.6163446054750404e-06, "loss": 0.2652, "step": 4200 }, { "epoch": 0.84, "grad_norm": 7.263152122497559, "learning_rate": 1.566022544283414e-06, "loss": 0.2604, "step": 4225 }, { "epoch": 0.85, "grad_norm": 7.312118053436279, "learning_rate": 1.5157004830917876e-06, "loss": 0.2352, "step": 4250 }, { "epoch": 0.85, "grad_norm": 10.993212699890137, "learning_rate": 1.4653784219001613e-06, "loss": 0.253, "step": 4275 }, { "epoch": 0.86, "grad_norm": 5.131811618804932, "learning_rate": 1.4150563607085347e-06, "loss": 0.2723, "step": 4300 }, { "epoch": 0.86, "grad_norm": 7.325843334197998, "learning_rate": 1.3647342995169083e-06, "loss": 0.2579, "step": 4325 }, { "epoch": 0.87, "grad_norm": 7.593961715698242, "learning_rate": 1.3144122383252818e-06, "loss": 0.2666, "step": 4350 }, { "epoch": 0.88, "grad_norm": 8.811367988586426, "learning_rate": 1.2640901771336556e-06, "loss": 0.2799, "step": 4375 }, { "epoch": 0.88, "grad_norm": 10.823134422302246, "learning_rate": 1.2137681159420292e-06, "loss": 0.2999, "step": 4400 }, { "epoch": 0.89, "grad_norm": 7.269660949707031, "learning_rate": 1.1634460547504027e-06, "loss": 0.3029, "step": 4425 }, { "epoch": 0.89, "grad_norm": 9.235569953918457, "learning_rate": 1.1131239935587763e-06, "loss": 0.3232, "step": 4450 }, { "epoch": 0.9, "grad_norm": 8.534402847290039, "learning_rate": 1.0628019323671499e-06, "loss": 0.3101, "step": 4475 }, { "epoch": 0.9, "grad_norm": 9.410504341125488, "learning_rate": 1.0124798711755234e-06, "loss": 0.3178, "step": 4500 }, { "epoch": 0.91, "grad_norm": 8.4865083694458, "learning_rate": 9.62157809983897e-07, "loss": 0.2748, "step": 4525 }, { "epoch": 0.91, "grad_norm": 7.169732093811035, "learning_rate": 9.118357487922707e-07, "loss": 0.281, "step": 4550 }, { "epoch": 0.92, "grad_norm": 7.270644187927246, "learning_rate": 8.615136876006441e-07, "loss": 0.2714, "step": 4575 }, { "epoch": 0.92, "grad_norm": 6.573908805847168, "learning_rate": 8.111916264090178e-07, "loss": 0.278, "step": 4600 }, { "epoch": 0.93, "grad_norm": 9.489112854003906, "learning_rate": 7.608695652173914e-07, "loss": 0.2941, "step": 4625 }, { "epoch": 0.93, "grad_norm": 6.89200496673584, "learning_rate": 7.105475040257649e-07, "loss": 0.2792, "step": 4650 }, { "epoch": 0.94, "grad_norm": 8.81523323059082, "learning_rate": 6.602254428341386e-07, "loss": 0.2586, "step": 4675 }, { "epoch": 0.94, "grad_norm": 6.394697666168213, "learning_rate": 6.099033816425121e-07, "loss": 0.3147, "step": 4700 }, { "epoch": 0.94, "grad_norm": 7.134545803070068, "learning_rate": 5.595813204508857e-07, "loss": 0.3337, "step": 4725 }, { "epoch": 0.95, "grad_norm": 8.418858528137207, "learning_rate": 5.092592592592593e-07, "loss": 0.3001, "step": 4750 }, { "epoch": 0.95, "grad_norm": 7.931731224060059, "learning_rate": 4.5893719806763294e-07, "loss": 0.31, "step": 4775 }, { "epoch": 0.96, "grad_norm": 8.287907600402832, "learning_rate": 4.086151368760065e-07, "loss": 0.2927, "step": 4800 }, { "epoch": 0.96, "grad_norm": 6.868620872497559, "learning_rate": 3.5829307568438007e-07, "loss": 0.2962, "step": 4825 }, { "epoch": 0.97, "grad_norm": 8.824197769165039, "learning_rate": 3.079710144927537e-07, "loss": 0.2618, "step": 4850 }, { "epoch": 0.97, "grad_norm": 7.290480136871338, "learning_rate": 2.5764895330112725e-07, "loss": 0.248, "step": 4875 }, { "epoch": 0.98, "grad_norm": 5.285947799682617, "learning_rate": 2.073268921095008e-07, "loss": 0.2223, "step": 4900 }, { "epoch": 0.98, "grad_norm": 6.8819169998168945, "learning_rate": 1.570048309178744e-07, "loss": 0.2055, "step": 4925 }, { "epoch": 0.99, "grad_norm": 6.588069915771484, "learning_rate": 1.0668276972624801e-07, "loss": 0.2279, "step": 4950 }, { "epoch": 0.99, "grad_norm": 11.339116096496582, "learning_rate": 5.6360708534621584e-08, "loss": 0.2176, "step": 4975 }, { "epoch": 1.0, "grad_norm": 6.7692036628723145, "learning_rate": 6.0386473429951695e-09, "loss": 0.2065, "step": 5000 }, { "epoch": 1.0, "eval_loss": 0.3357778787612915, "eval_runtime": 244.8142, "eval_samples_per_second": 4.085, "eval_steps_per_second": 0.511, "eval_wer": 26.439685364873398, "step": 5000 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 1000, "total_flos": 1.9695108096e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }