whisper-base-fr-1 / trainer_state.json
arun100's picture
End of training
b9a0bc5 verified
raw
history blame contribute delete
No virus
37.7 kB
{
"best_metric": 27.650982108014144,
"best_model_checkpoint": "./checkpoint-7000",
"epoch": 13.063071428571428,
"eval_steps": 500,
"global_step": 7000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.8e-08,
"loss": 1.3628,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 9.8e-08,
"loss": 1.3146,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 1.4799999999999998e-07,
"loss": 1.3827,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 1.98e-07,
"loss": 1.288,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 2.48e-07,
"loss": 1.2545,
"step": 125
},
{
"epoch": 0.02,
"learning_rate": 2.98e-07,
"loss": 1.2401,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 3.4799999999999994e-07,
"loss": 1.2529,
"step": 175
},
{
"epoch": 0.03,
"learning_rate": 3.98e-07,
"loss": 1.25,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 4.48e-07,
"loss": 1.1481,
"step": 225
},
{
"epoch": 0.04,
"learning_rate": 4.979999999999999e-07,
"loss": 1.0013,
"step": 250
},
{
"epoch": 0.04,
"learning_rate": 5.48e-07,
"loss": 0.9133,
"step": 275
},
{
"epoch": 0.04,
"learning_rate": 5.979999999999999e-07,
"loss": 0.9555,
"step": 300
},
{
"epoch": 0.05,
"learning_rate": 6.48e-07,
"loss": 0.8325,
"step": 325
},
{
"epoch": 0.05,
"learning_rate": 6.979999999999999e-07,
"loss": 0.7611,
"step": 350
},
{
"epoch": 0.05,
"learning_rate": 7.48e-07,
"loss": 0.7607,
"step": 375
},
{
"epoch": 0.06,
"learning_rate": 7.98e-07,
"loss": 0.6794,
"step": 400
},
{
"epoch": 0.06,
"learning_rate": 8.48e-07,
"loss": 0.6333,
"step": 425
},
{
"epoch": 0.06,
"learning_rate": 8.98e-07,
"loss": 0.6445,
"step": 450
},
{
"epoch": 0.07,
"learning_rate": 9.479999999999999e-07,
"loss": 0.6742,
"step": 475
},
{
"epoch": 0.07,
"learning_rate": 9.98e-07,
"loss": 0.739,
"step": 500
},
{
"epoch": 0.07,
"eval_loss": 0.7505670785903931,
"eval_runtime": 1365.9553,
"eval_samples_per_second": 11.817,
"eval_steps_per_second": 0.37,
"eval_wer": 35.00880999964524,
"step": 500
},
{
"epoch": 1.0,
"learning_rate": 9.963076923076924e-07,
"loss": 0.6733,
"step": 525
},
{
"epoch": 1.01,
"learning_rate": 9.924615384615386e-07,
"loss": 0.6337,
"step": 550
},
{
"epoch": 1.01,
"learning_rate": 9.886153846153845e-07,
"loss": 0.6459,
"step": 575
},
{
"epoch": 1.01,
"learning_rate": 9.847692307692307e-07,
"loss": 0.6253,
"step": 600
},
{
"epoch": 1.02,
"learning_rate": 9.809230769230769e-07,
"loss": 0.6447,
"step": 625
},
{
"epoch": 1.02,
"learning_rate": 9.77076923076923e-07,
"loss": 0.6424,
"step": 650
},
{
"epoch": 1.02,
"learning_rate": 9.732307692307692e-07,
"loss": 0.6403,
"step": 675
},
{
"epoch": 1.03,
"learning_rate": 9.693846153846154e-07,
"loss": 0.6501,
"step": 700
},
{
"epoch": 1.03,
"learning_rate": 9.655384615384616e-07,
"loss": 0.619,
"step": 725
},
{
"epoch": 1.04,
"learning_rate": 9.616923076923077e-07,
"loss": 0.6014,
"step": 750
},
{
"epoch": 1.04,
"learning_rate": 9.578461538461537e-07,
"loss": 0.581,
"step": 775
},
{
"epoch": 1.04,
"learning_rate": 9.539999999999999e-07,
"loss": 0.561,
"step": 800
},
{
"epoch": 1.05,
"learning_rate": 9.501538461538461e-07,
"loss": 0.5794,
"step": 825
},
{
"epoch": 1.05,
"learning_rate": 9.463076923076922e-07,
"loss": 0.6063,
"step": 850
},
{
"epoch": 1.05,
"learning_rate": 9.424615384615384e-07,
"loss": 0.5932,
"step": 875
},
{
"epoch": 1.06,
"learning_rate": 9.386153846153846e-07,
"loss": 0.5755,
"step": 900
},
{
"epoch": 1.06,
"learning_rate": 9.347692307692308e-07,
"loss": 0.5826,
"step": 925
},
{
"epoch": 1.06,
"learning_rate": 9.309230769230769e-07,
"loss": 0.6204,
"step": 950
},
{
"epoch": 1.07,
"learning_rate": 9.270769230769231e-07,
"loss": 0.5591,
"step": 975
},
{
"epoch": 1.07,
"learning_rate": 9.232307692307692e-07,
"loss": 0.6131,
"step": 1000
},
{
"epoch": 1.07,
"eval_loss": 0.6595470905303955,
"eval_runtime": 1267.4973,
"eval_samples_per_second": 12.735,
"eval_steps_per_second": 0.398,
"eval_wer": 31.429821553161545,
"step": 1000
},
{
"epoch": 2.0,
"learning_rate": 9.193846153846154e-07,
"loss": 0.5829,
"step": 1025
},
{
"epoch": 2.01,
"learning_rate": 9.155384615384615e-07,
"loss": 0.5177,
"step": 1050
},
{
"epoch": 2.01,
"learning_rate": 9.116923076923077e-07,
"loss": 0.5542,
"step": 1075
},
{
"epoch": 2.01,
"learning_rate": 9.078461538461538e-07,
"loss": 0.591,
"step": 1100
},
{
"epoch": 2.02,
"learning_rate": 9.039999999999999e-07,
"loss": 0.5383,
"step": 1125
},
{
"epoch": 2.02,
"learning_rate": 9.001538461538461e-07,
"loss": 0.5493,
"step": 1150
},
{
"epoch": 2.02,
"learning_rate": 8.963076923076923e-07,
"loss": 0.5522,
"step": 1175
},
{
"epoch": 2.03,
"learning_rate": 8.924615384615384e-07,
"loss": 0.5528,
"step": 1200
},
{
"epoch": 2.03,
"learning_rate": 8.886153846153845e-07,
"loss": 0.5702,
"step": 1225
},
{
"epoch": 2.03,
"learning_rate": 8.847692307692307e-07,
"loss": 0.5457,
"step": 1250
},
{
"epoch": 2.04,
"learning_rate": 8.809230769230769e-07,
"loss": 0.5805,
"step": 1275
},
{
"epoch": 2.04,
"learning_rate": 8.770769230769231e-07,
"loss": 0.5246,
"step": 1300
},
{
"epoch": 2.05,
"learning_rate": 8.732307692307692e-07,
"loss": 0.5805,
"step": 1325
},
{
"epoch": 2.05,
"learning_rate": 8.693846153846154e-07,
"loss": 0.541,
"step": 1350
},
{
"epoch": 2.05,
"learning_rate": 8.655384615384616e-07,
"loss": 0.5333,
"step": 1375
},
{
"epoch": 2.06,
"learning_rate": 8.616923076923076e-07,
"loss": 0.5509,
"step": 1400
},
{
"epoch": 2.06,
"learning_rate": 8.578461538461538e-07,
"loss": 0.5224,
"step": 1425
},
{
"epoch": 2.06,
"learning_rate": 8.539999999999999e-07,
"loss": 0.5498,
"step": 1450
},
{
"epoch": 2.07,
"learning_rate": 8.501538461538461e-07,
"loss": 0.501,
"step": 1475
},
{
"epoch": 2.07,
"learning_rate": 8.463076923076922e-07,
"loss": 0.5311,
"step": 1500
},
{
"epoch": 2.07,
"eval_loss": 0.6301009654998779,
"eval_runtime": 1277.0428,
"eval_samples_per_second": 12.639,
"eval_steps_per_second": 0.395,
"eval_wer": 30.623322256778966,
"step": 1500
},
{
"epoch": 3.0,
"learning_rate": 8.424615384615384e-07,
"loss": 0.62,
"step": 1525
},
{
"epoch": 3.01,
"learning_rate": 8.386153846153846e-07,
"loss": 0.5652,
"step": 1550
},
{
"epoch": 3.01,
"learning_rate": 8.347692307692308e-07,
"loss": 0.5905,
"step": 1575
},
{
"epoch": 3.01,
"learning_rate": 8.309230769230768e-07,
"loss": 0.5598,
"step": 1600
},
{
"epoch": 3.02,
"learning_rate": 8.27076923076923e-07,
"loss": 0.5169,
"step": 1625
},
{
"epoch": 3.02,
"learning_rate": 8.232307692307692e-07,
"loss": 0.5144,
"step": 1650
},
{
"epoch": 3.02,
"learning_rate": 8.193846153846154e-07,
"loss": 0.5033,
"step": 1675
},
{
"epoch": 3.03,
"learning_rate": 8.155384615384615e-07,
"loss": 0.5355,
"step": 1700
},
{
"epoch": 3.03,
"learning_rate": 8.116923076923077e-07,
"loss": 0.5146,
"step": 1725
},
{
"epoch": 3.03,
"learning_rate": 8.078461538461539e-07,
"loss": 0.5071,
"step": 1750
},
{
"epoch": 3.04,
"learning_rate": 8.04e-07,
"loss": 0.5108,
"step": 1775
},
{
"epoch": 3.04,
"learning_rate": 8.001538461538461e-07,
"loss": 0.4895,
"step": 1800
},
{
"epoch": 3.04,
"learning_rate": 7.963076923076922e-07,
"loss": 0.4777,
"step": 1825
},
{
"epoch": 3.05,
"learning_rate": 7.924615384615384e-07,
"loss": 0.5107,
"step": 1850
},
{
"epoch": 3.05,
"learning_rate": 7.886153846153845e-07,
"loss": 0.5348,
"step": 1875
},
{
"epoch": 3.06,
"learning_rate": 7.847692307692307e-07,
"loss": 0.4912,
"step": 1900
},
{
"epoch": 3.06,
"learning_rate": 7.809230769230769e-07,
"loss": 0.5226,
"step": 1925
},
{
"epoch": 3.06,
"learning_rate": 7.770769230769231e-07,
"loss": 0.5073,
"step": 1950
},
{
"epoch": 3.07,
"learning_rate": 7.732307692307692e-07,
"loss": 0.4799,
"step": 1975
},
{
"epoch": 3.07,
"learning_rate": 7.693846153846153e-07,
"loss": 0.551,
"step": 2000
},
{
"epoch": 3.07,
"eval_loss": 0.6141447424888611,
"eval_runtime": 1258.2269,
"eval_samples_per_second": 12.828,
"eval_steps_per_second": 0.401,
"eval_wer": 29.781937726901837,
"step": 2000
},
{
"epoch": 4.0,
"learning_rate": 7.655384615384615e-07,
"loss": 0.5604,
"step": 2025
},
{
"epoch": 4.0,
"learning_rate": 7.616923076923077e-07,
"loss": 0.4793,
"step": 2050
},
{
"epoch": 4.01,
"learning_rate": 7.578461538461538e-07,
"loss": 0.4757,
"step": 2075
},
{
"epoch": 4.01,
"learning_rate": 7.54e-07,
"loss": 0.501,
"step": 2100
},
{
"epoch": 4.02,
"learning_rate": 7.501538461538462e-07,
"loss": 0.5085,
"step": 2125
},
{
"epoch": 4.02,
"learning_rate": 7.463076923076924e-07,
"loss": 0.5199,
"step": 2150
},
{
"epoch": 4.02,
"learning_rate": 7.424615384615385e-07,
"loss": 0.5725,
"step": 2175
},
{
"epoch": 4.03,
"learning_rate": 7.386153846153845e-07,
"loss": 0.5193,
"step": 2200
},
{
"epoch": 4.03,
"learning_rate": 7.347692307692307e-07,
"loss": 0.5119,
"step": 2225
},
{
"epoch": 4.03,
"learning_rate": 7.309230769230768e-07,
"loss": 0.5094,
"step": 2250
},
{
"epoch": 4.04,
"learning_rate": 7.27076923076923e-07,
"loss": 0.5565,
"step": 2275
},
{
"epoch": 4.04,
"learning_rate": 7.232307692307692e-07,
"loss": 0.5109,
"step": 2300
},
{
"epoch": 4.04,
"learning_rate": 7.193846153846154e-07,
"loss": 0.4878,
"step": 2325
},
{
"epoch": 4.05,
"learning_rate": 7.155384615384615e-07,
"loss": 0.4965,
"step": 2350
},
{
"epoch": 4.05,
"learning_rate": 7.116923076923077e-07,
"loss": 0.482,
"step": 2375
},
{
"epoch": 4.05,
"learning_rate": 7.078461538461538e-07,
"loss": 0.4418,
"step": 2400
},
{
"epoch": 4.06,
"learning_rate": 7.04e-07,
"loss": 0.4697,
"step": 2425
},
{
"epoch": 4.06,
"learning_rate": 7.001538461538461e-07,
"loss": 0.4584,
"step": 2450
},
{
"epoch": 4.07,
"learning_rate": 6.963076923076923e-07,
"loss": 0.4697,
"step": 2475
},
{
"epoch": 4.07,
"learning_rate": 6.924615384615385e-07,
"loss": 0.4588,
"step": 2500
},
{
"epoch": 4.07,
"eval_loss": 0.6003413200378418,
"eval_runtime": 1263.8901,
"eval_samples_per_second": 12.771,
"eval_steps_per_second": 0.4,
"eval_wer": 29.252746473043768,
"step": 2500
},
{
"epoch": 5.0,
"learning_rate": 6.886153846153846e-07,
"loss": 0.5057,
"step": 2525
},
{
"epoch": 5.0,
"learning_rate": 6.847692307692307e-07,
"loss": 0.4379,
"step": 2550
},
{
"epoch": 5.01,
"learning_rate": 6.809230769230769e-07,
"loss": 0.4401,
"step": 2575
},
{
"epoch": 5.01,
"learning_rate": 6.770769230769231e-07,
"loss": 0.4427,
"step": 2600
},
{
"epoch": 5.01,
"learning_rate": 6.732307692307691e-07,
"loss": 0.4519,
"step": 2625
},
{
"epoch": 5.02,
"learning_rate": 6.693846153846153e-07,
"loss": 0.4947,
"step": 2650
},
{
"epoch": 5.02,
"learning_rate": 6.655384615384615e-07,
"loss": 0.4607,
"step": 2675
},
{
"epoch": 5.03,
"learning_rate": 6.616923076923077e-07,
"loss": 0.4355,
"step": 2700
},
{
"epoch": 5.03,
"learning_rate": 6.578461538461538e-07,
"loss": 0.4545,
"step": 2725
},
{
"epoch": 5.03,
"learning_rate": 6.54e-07,
"loss": 0.4304,
"step": 2750
},
{
"epoch": 5.04,
"learning_rate": 6.501538461538462e-07,
"loss": 0.5341,
"step": 2775
},
{
"epoch": 5.04,
"learning_rate": 6.463076923076924e-07,
"loss": 0.5021,
"step": 2800
},
{
"epoch": 5.04,
"learning_rate": 6.424615384615384e-07,
"loss": 0.4787,
"step": 2825
},
{
"epoch": 5.05,
"learning_rate": 6.386153846153846e-07,
"loss": 0.4878,
"step": 2850
},
{
"epoch": 5.05,
"learning_rate": 6.347692307692308e-07,
"loss": 0.474,
"step": 2875
},
{
"epoch": 5.05,
"learning_rate": 6.309230769230768e-07,
"loss": 0.4823,
"step": 2900
},
{
"epoch": 5.06,
"learning_rate": 6.27076923076923e-07,
"loss": 0.4788,
"step": 2925
},
{
"epoch": 5.06,
"learning_rate": 6.232307692307692e-07,
"loss": 0.457,
"step": 2950
},
{
"epoch": 5.06,
"learning_rate": 6.193846153846154e-07,
"loss": 0.4343,
"step": 2975
},
{
"epoch": 5.07,
"learning_rate": 6.155384615384615e-07,
"loss": 0.4163,
"step": 3000
},
{
"epoch": 5.07,
"eval_loss": 0.5935855507850647,
"eval_runtime": 1271.4925,
"eval_samples_per_second": 12.695,
"eval_steps_per_second": 0.397,
"eval_wer": 29.02924446862103,
"step": 3000
},
{
"epoch": 5.07,
"learning_rate": 6.118461538461538e-07,
"loss": 0.4411,
"step": 3025
},
{
"epoch": 6.0,
"learning_rate": 6.079999999999999e-07,
"loss": 0.5187,
"step": 3050
},
{
"epoch": 6.01,
"learning_rate": 6.041538461538461e-07,
"loss": 0.521,
"step": 3075
},
{
"epoch": 6.01,
"learning_rate": 6.003076923076923e-07,
"loss": 0.5194,
"step": 3100
},
{
"epoch": 6.01,
"learning_rate": 5.964615384615385e-07,
"loss": 0.5188,
"step": 3125
},
{
"epoch": 6.02,
"learning_rate": 5.926153846153846e-07,
"loss": 0.5057,
"step": 3150
},
{
"epoch": 6.02,
"learning_rate": 5.887692307692308e-07,
"loss": 0.4798,
"step": 3175
},
{
"epoch": 6.02,
"learning_rate": 5.849230769230769e-07,
"loss": 0.4956,
"step": 3200
},
{
"epoch": 6.03,
"learning_rate": 5.810769230769231e-07,
"loss": 0.4896,
"step": 3225
},
{
"epoch": 6.03,
"learning_rate": 5.772307692307692e-07,
"loss": 0.5033,
"step": 3250
},
{
"epoch": 6.04,
"learning_rate": 5.733846153846154e-07,
"loss": 0.5011,
"step": 3275
},
{
"epoch": 6.04,
"learning_rate": 5.695384615384615e-07,
"loss": 0.4566,
"step": 3300
},
{
"epoch": 6.04,
"learning_rate": 5.656923076923076e-07,
"loss": 0.4661,
"step": 3325
},
{
"epoch": 6.05,
"learning_rate": 5.618461538461538e-07,
"loss": 0.4607,
"step": 3350
},
{
"epoch": 6.05,
"learning_rate": 5.58e-07,
"loss": 0.4659,
"step": 3375
},
{
"epoch": 6.05,
"learning_rate": 5.541538461538461e-07,
"loss": 0.4577,
"step": 3400
},
{
"epoch": 6.06,
"learning_rate": 5.503076923076922e-07,
"loss": 0.4721,
"step": 3425
},
{
"epoch": 6.06,
"learning_rate": 5.464615384615384e-07,
"loss": 0.469,
"step": 3450
},
{
"epoch": 6.06,
"learning_rate": 5.426153846153846e-07,
"loss": 0.4759,
"step": 3475
},
{
"epoch": 6.07,
"learning_rate": 5.387692307692308e-07,
"loss": 0.4553,
"step": 3500
},
{
"epoch": 6.07,
"eval_loss": 0.5837779641151428,
"eval_runtime": 1248.1855,
"eval_samples_per_second": 12.932,
"eval_steps_per_second": 0.405,
"eval_wer": 28.479949859867794,
"step": 3500
},
{
"epoch": 6.07,
"learning_rate": 5.349230769230769e-07,
"loss": 0.5038,
"step": 3525
},
{
"epoch": 7.0,
"learning_rate": 5.310769230769231e-07,
"loss": 0.5066,
"step": 3550
},
{
"epoch": 7.01,
"learning_rate": 5.272307692307693e-07,
"loss": 0.4496,
"step": 3575
},
{
"epoch": 7.01,
"learning_rate": 5.233846153846154e-07,
"loss": 0.4628,
"step": 3600
},
{
"epoch": 7.01,
"learning_rate": 5.195384615384615e-07,
"loss": 0.4837,
"step": 3625
},
{
"epoch": 7.02,
"learning_rate": 5.156923076923077e-07,
"loss": 0.4728,
"step": 3650
},
{
"epoch": 7.02,
"learning_rate": 5.118461538461538e-07,
"loss": 0.4741,
"step": 3675
},
{
"epoch": 7.02,
"learning_rate": 5.079999999999999e-07,
"loss": 0.4565,
"step": 3700
},
{
"epoch": 7.03,
"learning_rate": 5.041538461538461e-07,
"loss": 0.4867,
"step": 3725
},
{
"epoch": 7.03,
"learning_rate": 5.003076923076923e-07,
"loss": 0.4573,
"step": 3750
},
{
"epoch": 7.03,
"learning_rate": 4.964615384615385e-07,
"loss": 0.4872,
"step": 3775
},
{
"epoch": 7.04,
"learning_rate": 4.926153846153846e-07,
"loss": 0.4499,
"step": 3800
},
{
"epoch": 7.04,
"learning_rate": 4.887692307692308e-07,
"loss": 0.4456,
"step": 3825
},
{
"epoch": 7.05,
"learning_rate": 4.849230769230769e-07,
"loss": 0.437,
"step": 3850
},
{
"epoch": 7.05,
"learning_rate": 4.810769230769231e-07,
"loss": 0.4494,
"step": 3875
},
{
"epoch": 7.05,
"learning_rate": 4.772307692307692e-07,
"loss": 0.4497,
"step": 3900
},
{
"epoch": 7.06,
"learning_rate": 4.733846153846154e-07,
"loss": 0.4457,
"step": 3925
},
{
"epoch": 7.06,
"learning_rate": 4.695384615384615e-07,
"loss": 0.4571,
"step": 3950
},
{
"epoch": 7.06,
"learning_rate": 4.6569230769230765e-07,
"loss": 0.4555,
"step": 3975
},
{
"epoch": 7.07,
"learning_rate": 4.618461538461538e-07,
"loss": 0.4395,
"step": 4000
},
{
"epoch": 7.07,
"eval_loss": 0.5783179998397827,
"eval_runtime": 1220.7231,
"eval_samples_per_second": 13.222,
"eval_steps_per_second": 0.414,
"eval_wer": 28.24876127857337,
"step": 4000
},
{
"epoch": 7.07,
"learning_rate": 4.58e-07,
"loss": 0.483,
"step": 4025
},
{
"epoch": 8.0,
"learning_rate": 4.541538461538461e-07,
"loss": 0.5286,
"step": 4050
},
{
"epoch": 8.01,
"learning_rate": 4.503076923076923e-07,
"loss": 0.513,
"step": 4075
},
{
"epoch": 8.01,
"learning_rate": 4.4646153846153847e-07,
"loss": 0.492,
"step": 4100
},
{
"epoch": 8.01,
"learning_rate": 4.426153846153846e-07,
"loss": 0.4849,
"step": 4125
},
{
"epoch": 8.02,
"learning_rate": 4.387692307692307e-07,
"loss": 0.4528,
"step": 4150
},
{
"epoch": 8.02,
"learning_rate": 4.349230769230769e-07,
"loss": 0.4383,
"step": 4175
},
{
"epoch": 8.02,
"learning_rate": 4.3107692307692306e-07,
"loss": 0.4397,
"step": 4200
},
{
"epoch": 8.03,
"learning_rate": 4.2723076923076923e-07,
"loss": 0.4659,
"step": 4225
},
{
"epoch": 8.03,
"learning_rate": 4.2338461538461536e-07,
"loss": 0.4536,
"step": 4250
},
{
"epoch": 8.03,
"learning_rate": 4.1953846153846153e-07,
"loss": 0.4476,
"step": 4275
},
{
"epoch": 8.04,
"learning_rate": 4.1569230769230765e-07,
"loss": 0.4319,
"step": 4300
},
{
"epoch": 8.04,
"learning_rate": 4.118461538461538e-07,
"loss": 0.4332,
"step": 4325
},
{
"epoch": 8.04,
"learning_rate": 4.0799999999999995e-07,
"loss": 0.4134,
"step": 4350
},
{
"epoch": 8.05,
"learning_rate": 4.041538461538461e-07,
"loss": 0.4494,
"step": 4375
},
{
"epoch": 8.05,
"learning_rate": 4.003076923076923e-07,
"loss": 0.4804,
"step": 4400
},
{
"epoch": 8.06,
"learning_rate": 3.9646153846153847e-07,
"loss": 0.4146,
"step": 4425
},
{
"epoch": 8.06,
"learning_rate": 3.926153846153846e-07,
"loss": 0.4724,
"step": 4450
},
{
"epoch": 8.06,
"learning_rate": 3.887692307692307e-07,
"loss": 0.4462,
"step": 4475
},
{
"epoch": 8.07,
"learning_rate": 3.849230769230769e-07,
"loss": 0.4233,
"step": 4500
},
{
"epoch": 8.07,
"eval_loss": 0.5746914148330688,
"eval_runtime": 1237.3512,
"eval_samples_per_second": 13.045,
"eval_steps_per_second": 0.408,
"eval_wer": 28.07788276196445,
"step": 4500
},
{
"epoch": 8.07,
"learning_rate": 3.8107692307692306e-07,
"loss": 0.4978,
"step": 4525
},
{
"epoch": 9.0,
"learning_rate": 3.7723076923076924e-07,
"loss": 0.4922,
"step": 4550
},
{
"epoch": 9.0,
"learning_rate": 3.7338461538461536e-07,
"loss": 0.4241,
"step": 4575
},
{
"epoch": 9.01,
"learning_rate": 3.6953846153846153e-07,
"loss": 0.4194,
"step": 4600
},
{
"epoch": 9.01,
"learning_rate": 3.656923076923077e-07,
"loss": 0.4515,
"step": 4625
},
{
"epoch": 9.02,
"learning_rate": 3.618461538461539e-07,
"loss": 0.4537,
"step": 4650
},
{
"epoch": 9.02,
"learning_rate": 3.5799999999999995e-07,
"loss": 0.4676,
"step": 4675
},
{
"epoch": 9.02,
"learning_rate": 3.541538461538461e-07,
"loss": 0.5138,
"step": 4700
},
{
"epoch": 9.03,
"learning_rate": 3.503076923076923e-07,
"loss": 0.4551,
"step": 4725
},
{
"epoch": 9.03,
"learning_rate": 3.4646153846153847e-07,
"loss": 0.4621,
"step": 4750
},
{
"epoch": 9.03,
"learning_rate": 3.426153846153846e-07,
"loss": 0.4554,
"step": 4775
},
{
"epoch": 9.04,
"learning_rate": 3.3876923076923077e-07,
"loss": 0.5044,
"step": 4800
},
{
"epoch": 9.04,
"learning_rate": 3.3492307692307694e-07,
"loss": 0.4592,
"step": 4825
},
{
"epoch": 9.04,
"learning_rate": 3.3107692307692306e-07,
"loss": 0.4401,
"step": 4850
},
{
"epoch": 9.05,
"learning_rate": 3.272307692307692e-07,
"loss": 0.4405,
"step": 4875
},
{
"epoch": 9.05,
"learning_rate": 3.2338461538461536e-07,
"loss": 0.4307,
"step": 4900
},
{
"epoch": 9.05,
"learning_rate": 3.1953846153846153e-07,
"loss": 0.3942,
"step": 4925
},
{
"epoch": 9.06,
"learning_rate": 3.156923076923077e-07,
"loss": 0.4354,
"step": 4950
},
{
"epoch": 9.06,
"learning_rate": 3.1184615384615383e-07,
"loss": 0.4048,
"step": 4975
},
{
"epoch": 9.07,
"learning_rate": 3.08e-07,
"loss": 0.4204,
"step": 5000
},
{
"epoch": 9.07,
"eval_loss": 0.5712096691131592,
"eval_runtime": 1254.0999,
"eval_samples_per_second": 12.871,
"eval_steps_per_second": 0.403,
"eval_wer": 28.11217672031503,
"step": 5000
},
{
"epoch": 9.07,
"learning_rate": 3.0415384615384613e-07,
"loss": 0.4269,
"step": 5025
},
{
"epoch": 10.0,
"learning_rate": 3.0046153846153846e-07,
"loss": 0.4425,
"step": 5050
},
{
"epoch": 10.0,
"learning_rate": 2.966153846153846e-07,
"loss": 0.3973,
"step": 5075
},
{
"epoch": 10.01,
"learning_rate": 2.9276923076923075e-07,
"loss": 0.3994,
"step": 5100
},
{
"epoch": 10.01,
"learning_rate": 2.8892307692307693e-07,
"loss": 0.4144,
"step": 5125
},
{
"epoch": 10.02,
"learning_rate": 2.8507692307692305e-07,
"loss": 0.4165,
"step": 5150
},
{
"epoch": 10.02,
"learning_rate": 2.812307692307692e-07,
"loss": 0.4084,
"step": 5175
},
{
"epoch": 10.02,
"learning_rate": 2.773846153846154e-07,
"loss": 0.3953,
"step": 5200
},
{
"epoch": 10.03,
"learning_rate": 2.7353846153846157e-07,
"loss": 0.3909,
"step": 5225
},
{
"epoch": 10.03,
"learning_rate": 2.6969230769230764e-07,
"loss": 0.3984,
"step": 5250
},
{
"epoch": 10.03,
"learning_rate": 2.658461538461538e-07,
"loss": 0.3863,
"step": 5275
},
{
"epoch": 10.04,
"learning_rate": 2.62e-07,
"loss": 0.4402,
"step": 5300
},
{
"epoch": 10.04,
"learning_rate": 2.5815384615384616e-07,
"loss": 0.4355,
"step": 5325
},
{
"epoch": 10.04,
"learning_rate": 2.543076923076923e-07,
"loss": 0.4346,
"step": 5350
},
{
"epoch": 10.05,
"learning_rate": 2.5046153846153846e-07,
"loss": 0.44,
"step": 5375
},
{
"epoch": 10.05,
"learning_rate": 2.4661538461538463e-07,
"loss": 0.4339,
"step": 5400
},
{
"epoch": 10.05,
"learning_rate": 2.4276923076923076e-07,
"loss": 0.4363,
"step": 5425
},
{
"epoch": 10.06,
"learning_rate": 2.3892307692307693e-07,
"loss": 0.4694,
"step": 5450
},
{
"epoch": 10.06,
"learning_rate": 2.3507692307692305e-07,
"loss": 0.4834,
"step": 5475
},
{
"epoch": 10.06,
"learning_rate": 2.3123076923076923e-07,
"loss": 0.4378,
"step": 5500
},
{
"epoch": 10.06,
"eval_loss": 0.5695297718048096,
"eval_runtime": 1232.4207,
"eval_samples_per_second": 13.097,
"eval_steps_per_second": 0.41,
"eval_wer": 28.057779407069283,
"step": 5500
},
{
"epoch": 10.07,
"learning_rate": 2.2738461538461537e-07,
"loss": 0.4635,
"step": 5525
},
{
"epoch": 11.0,
"learning_rate": 2.2353846153846155e-07,
"loss": 0.4252,
"step": 5550
},
{
"epoch": 11.0,
"learning_rate": 2.1969230769230767e-07,
"loss": 0.4115,
"step": 5575
},
{
"epoch": 11.01,
"learning_rate": 2.1584615384615384e-07,
"loss": 0.4215,
"step": 5600
},
{
"epoch": 11.01,
"learning_rate": 2.12e-07,
"loss": 0.4793,
"step": 5625
},
{
"epoch": 11.01,
"learning_rate": 2.0815384615384614e-07,
"loss": 0.4638,
"step": 5650
},
{
"epoch": 11.02,
"learning_rate": 2.043076923076923e-07,
"loss": 0.4225,
"step": 5675
},
{
"epoch": 11.02,
"learning_rate": 2.0046153846153846e-07,
"loss": 0.436,
"step": 5700
},
{
"epoch": 11.03,
"learning_rate": 1.966153846153846e-07,
"loss": 0.4274,
"step": 5725
},
{
"epoch": 11.03,
"learning_rate": 1.9276923076923076e-07,
"loss": 0.4437,
"step": 5750
},
{
"epoch": 11.03,
"learning_rate": 1.889230769230769e-07,
"loss": 0.4447,
"step": 5775
},
{
"epoch": 11.04,
"learning_rate": 1.8507692307692308e-07,
"loss": 0.4578,
"step": 5800
},
{
"epoch": 11.04,
"learning_rate": 1.812307692307692e-07,
"loss": 0.6333,
"step": 5825
},
{
"epoch": 11.04,
"learning_rate": 1.7738461538461538e-07,
"loss": 0.4166,
"step": 5850
},
{
"epoch": 11.05,
"learning_rate": 1.7353846153846152e-07,
"loss": 0.4178,
"step": 5875
},
{
"epoch": 11.05,
"learning_rate": 1.696923076923077e-07,
"loss": 0.3826,
"step": 5900
},
{
"epoch": 11.05,
"learning_rate": 1.6584615384615382e-07,
"loss": 0.4209,
"step": 5925
},
{
"epoch": 11.06,
"learning_rate": 1.62e-07,
"loss": 0.4674,
"step": 5950
},
{
"epoch": 11.06,
"learning_rate": 1.5815384615384614e-07,
"loss": 0.4828,
"step": 5975
},
{
"epoch": 11.06,
"learning_rate": 1.5430769230769232e-07,
"loss": 0.4337,
"step": 6000
},
{
"epoch": 11.06,
"eval_loss": 0.5673043727874756,
"eval_runtime": 1224.2663,
"eval_samples_per_second": 13.184,
"eval_steps_per_second": 0.412,
"eval_wer": 27.781653914832727,
"step": 6000
},
{
"epoch": 11.07,
"learning_rate": 1.5046153846153844e-07,
"loss": 0.3946,
"step": 6025
},
{
"epoch": 11.07,
"learning_rate": 1.466153846153846e-07,
"loss": 0.4453,
"step": 6050
},
{
"epoch": 12.0,
"learning_rate": 1.4276923076923076e-07,
"loss": 0.4741,
"step": 6075
},
{
"epoch": 12.01,
"learning_rate": 1.389230769230769e-07,
"loss": 0.4201,
"step": 6100
},
{
"epoch": 12.01,
"learning_rate": 1.3507692307692306e-07,
"loss": 0.4456,
"step": 6125
},
{
"epoch": 12.01,
"learning_rate": 1.3123076923076923e-07,
"loss": 0.4489,
"step": 6150
},
{
"epoch": 12.02,
"learning_rate": 1.2738461538461538e-07,
"loss": 0.4399,
"step": 6175
},
{
"epoch": 12.02,
"learning_rate": 1.2353846153846153e-07,
"loss": 0.4538,
"step": 6200
},
{
"epoch": 12.02,
"learning_rate": 1.196923076923077e-07,
"loss": 0.4297,
"step": 6225
},
{
"epoch": 12.03,
"learning_rate": 1.1584615384615385e-07,
"loss": 0.4568,
"step": 6250
},
{
"epoch": 12.03,
"learning_rate": 1.12e-07,
"loss": 0.4262,
"step": 6275
},
{
"epoch": 12.04,
"learning_rate": 1.0815384615384614e-07,
"loss": 0.4578,
"step": 6300
},
{
"epoch": 12.04,
"learning_rate": 1.043076923076923e-07,
"loss": 0.425,
"step": 6325
},
{
"epoch": 12.04,
"learning_rate": 1.0046153846153845e-07,
"loss": 0.4336,
"step": 6350
},
{
"epoch": 12.05,
"learning_rate": 9.661538461538462e-08,
"loss": 0.3991,
"step": 6375
},
{
"epoch": 12.05,
"learning_rate": 9.276923076923076e-08,
"loss": 0.4253,
"step": 6400
},
{
"epoch": 12.05,
"learning_rate": 8.892307692307692e-08,
"loss": 0.427,
"step": 6425
},
{
"epoch": 12.06,
"learning_rate": 8.507692307692307e-08,
"loss": 0.428,
"step": 6450
},
{
"epoch": 12.06,
"learning_rate": 8.123076923076923e-08,
"loss": 0.4389,
"step": 6475
},
{
"epoch": 12.06,
"learning_rate": 7.738461538461538e-08,
"loss": 0.4277,
"step": 6500
},
{
"epoch": 12.06,
"eval_loss": 0.565819501876831,
"eval_runtime": 1225.0325,
"eval_samples_per_second": 13.176,
"eval_steps_per_second": 0.412,
"eval_wer": 27.663398886037626,
"step": 6500
},
{
"epoch": 12.07,
"learning_rate": 7.353846153846153e-08,
"loss": 0.4094,
"step": 6525
},
{
"epoch": 12.07,
"learning_rate": 6.969230769230769e-08,
"loss": 0.4744,
"step": 6550
},
{
"epoch": 13.0,
"learning_rate": 6.584615384615384e-08,
"loss": 0.4288,
"step": 6575
},
{
"epoch": 13.01,
"learning_rate": 6.2e-08,
"loss": 0.4053,
"step": 6600
},
{
"epoch": 13.01,
"learning_rate": 5.8153846153846154e-08,
"loss": 0.4049,
"step": 6625
},
{
"epoch": 13.01,
"learning_rate": 5.43076923076923e-08,
"loss": 0.4309,
"step": 6650
},
{
"epoch": 13.02,
"learning_rate": 5.0461538461538456e-08,
"loss": 0.3928,
"step": 6675
},
{
"epoch": 13.02,
"learning_rate": 4.661538461538461e-08,
"loss": 0.4274,
"step": 6700
},
{
"epoch": 13.02,
"learning_rate": 4.2769230769230765e-08,
"loss": 0.423,
"step": 6725
},
{
"epoch": 13.03,
"learning_rate": 3.892307692307692e-08,
"loss": 0.4922,
"step": 6750
},
{
"epoch": 13.03,
"learning_rate": 3.5076923076923074e-08,
"loss": 0.4777,
"step": 6775
},
{
"epoch": 13.03,
"learning_rate": 3.123076923076923e-08,
"loss": 0.4448,
"step": 6800
},
{
"epoch": 13.04,
"learning_rate": 2.7384615384615383e-08,
"loss": 0.4551,
"step": 6825
},
{
"epoch": 13.04,
"learning_rate": 2.3538461538461538e-08,
"loss": 0.5006,
"step": 6850
},
{
"epoch": 13.05,
"learning_rate": 1.9692307692307693e-08,
"loss": 0.5345,
"step": 6875
},
{
"epoch": 13.05,
"learning_rate": 1.5846153846153847e-08,
"loss": 0.4159,
"step": 6900
},
{
"epoch": 13.05,
"learning_rate": 1.2e-08,
"loss": 0.4167,
"step": 6925
},
{
"epoch": 13.06,
"learning_rate": 8.153846153846153e-09,
"loss": 0.4118,
"step": 6950
},
{
"epoch": 13.06,
"learning_rate": 4.307692307692307e-09,
"loss": 0.3877,
"step": 6975
},
{
"epoch": 13.06,
"learning_rate": 4.615384615384615e-10,
"loss": 0.419,
"step": 7000
},
{
"epoch": 13.06,
"eval_loss": 0.5653703808784485,
"eval_runtime": 1226.1688,
"eval_samples_per_second": 13.164,
"eval_steps_per_second": 0.412,
"eval_wer": 27.650982108014144,
"step": 7000
},
{
"epoch": 13.06,
"step": 7000,
"total_flos": 2.905221919997952e+19,
"train_loss": 0.5182813051768712,
"train_runtime": 39943.7657,
"train_samples_per_second": 11.216,
"train_steps_per_second": 0.175
}
],
"logging_steps": 25,
"max_steps": 7000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"total_flos": 2.905221919997952e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}