BTX24's picture
End of training
fc02bbe verified
raw
history blame contribute delete
No virus
11 kB
{
"best_metric": 0.5785123966942148,
"best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-tekno24/checkpoint-224",
"epoch": 14.782608695652174,
"eval_steps": 500,
"global_step": 255,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5797101449275363,
"grad_norm": 4.828278064727783,
"learning_rate": 1.923076923076923e-05,
"loss": 1.4008,
"step": 10
},
{
"epoch": 0.9855072463768116,
"eval_accuracy": 0.40587695133149676,
"eval_f1": 0.3220119812956281,
"eval_loss": 1.2967476844787598,
"eval_precision": 0.3790936482459676,
"eval_recall": 0.40587695133149676,
"eval_runtime": 5.7887,
"eval_samples_per_second": 188.127,
"eval_steps_per_second": 3.11,
"step": 17
},
{
"epoch": 1.1594202898550725,
"grad_norm": 3.097071409225464,
"learning_rate": 3.846153846153846e-05,
"loss": 1.3258,
"step": 20
},
{
"epoch": 1.7391304347826086,
"grad_norm": 3.920624256134033,
"learning_rate": 4.912663755458515e-05,
"loss": 1.2363,
"step": 30
},
{
"epoch": 1.971014492753623,
"eval_accuracy": 0.5032139577594124,
"eval_f1": 0.41868311008776393,
"eval_loss": 1.130901575088501,
"eval_precision": 0.48705100222787945,
"eval_recall": 0.5032139577594124,
"eval_runtime": 6.0169,
"eval_samples_per_second": 180.99,
"eval_steps_per_second": 2.992,
"step": 34
},
{
"epoch": 2.318840579710145,
"grad_norm": 4.518988132476807,
"learning_rate": 4.6943231441048036e-05,
"loss": 1.1413,
"step": 40
},
{
"epoch": 2.898550724637681,
"grad_norm": 2.2280020713806152,
"learning_rate": 4.475982532751092e-05,
"loss": 1.1716,
"step": 50
},
{
"epoch": 2.9565217391304346,
"eval_accuracy": 0.5160697887970616,
"eval_f1": 0.43845341130360466,
"eval_loss": 1.0983200073242188,
"eval_precision": 0.46095184546832213,
"eval_recall": 0.5160697887970616,
"eval_runtime": 5.9715,
"eval_samples_per_second": 182.365,
"eval_steps_per_second": 3.014,
"step": 51
},
{
"epoch": 3.4782608695652173,
"grad_norm": 4.956761837005615,
"learning_rate": 4.2576419213973804e-05,
"loss": 1.1479,
"step": 60
},
{
"epoch": 4.0,
"eval_accuracy": 0.5408631772268135,
"eval_f1": 0.5013630393836427,
"eval_loss": 1.0550026893615723,
"eval_precision": 0.5066784174960421,
"eval_recall": 0.5408631772268135,
"eval_runtime": 6.0323,
"eval_samples_per_second": 180.528,
"eval_steps_per_second": 2.984,
"step": 69
},
{
"epoch": 4.057971014492754,
"grad_norm": 2.8600962162017822,
"learning_rate": 4.039301310043668e-05,
"loss": 1.098,
"step": 70
},
{
"epoch": 4.63768115942029,
"grad_norm": 2.5968716144561768,
"learning_rate": 3.8209606986899565e-05,
"loss": 1.1058,
"step": 80
},
{
"epoch": 4.9855072463768115,
"eval_accuracy": 0.5500459136822773,
"eval_f1": 0.4942077921670986,
"eval_loss": 1.039696455001831,
"eval_precision": 0.5207943123811102,
"eval_recall": 0.5500459136822773,
"eval_runtime": 6.023,
"eval_samples_per_second": 180.807,
"eval_steps_per_second": 2.989,
"step": 86
},
{
"epoch": 5.217391304347826,
"grad_norm": 4.285628318786621,
"learning_rate": 3.602620087336245e-05,
"loss": 1.0882,
"step": 90
},
{
"epoch": 5.797101449275362,
"grad_norm": 2.5915679931640625,
"learning_rate": 3.384279475982533e-05,
"loss": 1.0656,
"step": 100
},
{
"epoch": 5.971014492753623,
"eval_accuracy": 0.5555555555555556,
"eval_f1": 0.539618105421093,
"eval_loss": 1.055788278579712,
"eval_precision": 0.5486152071828155,
"eval_recall": 0.5555555555555556,
"eval_runtime": 6.049,
"eval_samples_per_second": 180.029,
"eval_steps_per_second": 2.976,
"step": 103
},
{
"epoch": 6.3768115942028984,
"grad_norm": 3.3794102668762207,
"learning_rate": 3.165938864628821e-05,
"loss": 1.0333,
"step": 110
},
{
"epoch": 6.956521739130435,
"grad_norm": 3.542375326156616,
"learning_rate": 2.9475982532751094e-05,
"loss": 1.0328,
"step": 120
},
{
"epoch": 6.956521739130435,
"eval_accuracy": 0.5730027548209367,
"eval_f1": 0.5465172742823081,
"eval_loss": 1.0216330289840698,
"eval_precision": 0.5512656679762724,
"eval_recall": 0.5730027548209367,
"eval_runtime": 5.949,
"eval_samples_per_second": 183.057,
"eval_steps_per_second": 3.026,
"step": 120
},
{
"epoch": 7.536231884057971,
"grad_norm": 4.52678108215332,
"learning_rate": 2.7510917030567685e-05,
"loss": 1.0116,
"step": 130
},
{
"epoch": 8.0,
"eval_accuracy": 0.5362718089990818,
"eval_f1": 0.5186530261287737,
"eval_loss": 1.0468671321868896,
"eval_precision": 0.5119022081965966,
"eval_recall": 0.5362718089990818,
"eval_runtime": 6.032,
"eval_samples_per_second": 180.536,
"eval_steps_per_second": 2.984,
"step": 138
},
{
"epoch": 8.115942028985508,
"grad_norm": 3.202193021774292,
"learning_rate": 2.5327510917030566e-05,
"loss": 1.0344,
"step": 140
},
{
"epoch": 8.695652173913043,
"grad_norm": 4.579655647277832,
"learning_rate": 2.3144104803493453e-05,
"loss": 1.012,
"step": 150
},
{
"epoch": 8.985507246376812,
"eval_accuracy": 0.5629017447199265,
"eval_f1": 0.5226383764667623,
"eval_loss": 1.0215554237365723,
"eval_precision": 0.5344130335534324,
"eval_recall": 0.5629017447199265,
"eval_runtime": 6.005,
"eval_samples_per_second": 181.348,
"eval_steps_per_second": 2.997,
"step": 155
},
{
"epoch": 9.27536231884058,
"grad_norm": 4.717169284820557,
"learning_rate": 2.096069868995633e-05,
"loss": 1.0312,
"step": 160
},
{
"epoch": 9.855072463768115,
"grad_norm": 4.781270503997803,
"learning_rate": 1.8777292576419214e-05,
"loss": 1.0076,
"step": 170
},
{
"epoch": 9.971014492753623,
"eval_accuracy": 0.5674931129476584,
"eval_f1": 0.5274537313386477,
"eval_loss": 1.0186196565628052,
"eval_precision": 0.5379365299560036,
"eval_recall": 0.5674931129476584,
"eval_runtime": 6.0992,
"eval_samples_per_second": 178.549,
"eval_steps_per_second": 2.951,
"step": 172
},
{
"epoch": 10.434782608695652,
"grad_norm": 4.275968551635742,
"learning_rate": 1.6593886462882098e-05,
"loss": 0.9714,
"step": 180
},
{
"epoch": 10.956521739130435,
"eval_accuracy": 0.5638200183654729,
"eval_f1": 0.5498769817492152,
"eval_loss": 1.0204566717147827,
"eval_precision": 0.5549239770702264,
"eval_recall": 0.5638200183654729,
"eval_runtime": 5.9924,
"eval_samples_per_second": 181.73,
"eval_steps_per_second": 3.004,
"step": 189
},
{
"epoch": 11.014492753623188,
"grad_norm": 10.067936897277832,
"learning_rate": 1.4410480349344979e-05,
"loss": 0.9775,
"step": 190
},
{
"epoch": 11.594202898550725,
"grad_norm": 3.973658323287964,
"learning_rate": 1.222707423580786e-05,
"loss": 0.9843,
"step": 200
},
{
"epoch": 12.0,
"eval_accuracy": 0.5656565656565656,
"eval_f1": 0.548837023367577,
"eval_loss": 1.011660099029541,
"eval_precision": 0.5494958647042864,
"eval_recall": 0.5656565656565656,
"eval_runtime": 5.945,
"eval_samples_per_second": 183.179,
"eval_steps_per_second": 3.028,
"step": 207
},
{
"epoch": 12.173913043478262,
"grad_norm": 3.4741806983947754,
"learning_rate": 1.0043668122270743e-05,
"loss": 0.947,
"step": 210
},
{
"epoch": 12.753623188405797,
"grad_norm": 3.447770118713379,
"learning_rate": 7.860262008733626e-06,
"loss": 0.9427,
"step": 220
},
{
"epoch": 12.985507246376812,
"eval_accuracy": 0.5785123966942148,
"eval_f1": 0.5642688943190026,
"eval_loss": 1.007211446762085,
"eval_precision": 0.5601763323294238,
"eval_recall": 0.5785123966942148,
"eval_runtime": 5.9174,
"eval_samples_per_second": 184.034,
"eval_steps_per_second": 3.042,
"step": 224
},
{
"epoch": 13.333333333333334,
"grad_norm": 3.590308666229248,
"learning_rate": 5.676855895196507e-06,
"loss": 0.9503,
"step": 230
},
{
"epoch": 13.91304347826087,
"grad_norm": 3.6937625408172607,
"learning_rate": 3.493449781659389e-06,
"loss": 0.9268,
"step": 240
},
{
"epoch": 13.971014492753623,
"eval_accuracy": 0.5785123966942148,
"eval_f1": 0.565188401679607,
"eval_loss": 1.0067576169967651,
"eval_precision": 0.562081206107597,
"eval_recall": 0.5785123966942148,
"eval_runtime": 5.9299,
"eval_samples_per_second": 183.645,
"eval_steps_per_second": 3.035,
"step": 241
},
{
"epoch": 14.492753623188406,
"grad_norm": 4.805427074432373,
"learning_rate": 1.3100436681222706e-06,
"loss": 0.9525,
"step": 250
},
{
"epoch": 14.782608695652174,
"eval_accuracy": 0.5785123966942148,
"eval_f1": 0.5641357580424653,
"eval_loss": 1.0072529315948486,
"eval_precision": 0.5641234911012597,
"eval_recall": 0.5785123966942148,
"eval_runtime": 6.1926,
"eval_samples_per_second": 175.856,
"eval_steps_per_second": 2.907,
"step": 255
},
{
"epoch": 14.782608695652174,
"step": 255,
"total_flos": 4.988285676013732e+18,
"train_loss": 1.0613567090501972,
"train_runtime": 1231.6208,
"train_samples_per_second": 53.016,
"train_steps_per_second": 0.207
}
],
"logging_steps": 10,
"max_steps": 255,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.988285676013732e+18,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}