vit-base-renovation / trainer_state.json
rshrott's picture
🍻 cheers
763b2b8
raw
history blame
No virus
7 kB
{
"best_metric": 0.8943801522254944,
"best_model_checkpoint": "./vit-base-renovation/checkpoint-200",
"epoch": 8.0,
"global_step": 456,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18,
"learning_rate": 0.0001956140350877193,
"loss": 0.9993,
"step": 10
},
{
"epoch": 0.35,
"learning_rate": 0.0001912280701754386,
"loss": 0.9628,
"step": 20
},
{
"epoch": 0.53,
"learning_rate": 0.00018684210526315792,
"loss": 1.058,
"step": 30
},
{
"epoch": 0.7,
"learning_rate": 0.0001824561403508772,
"loss": 0.9951,
"step": 40
},
{
"epoch": 0.88,
"learning_rate": 0.0001780701754385965,
"loss": 1.0107,
"step": 50
},
{
"epoch": 1.05,
"learning_rate": 0.0001736842105263158,
"loss": 1.0481,
"step": 60
},
{
"epoch": 1.23,
"learning_rate": 0.0001692982456140351,
"loss": 0.9934,
"step": 70
},
{
"epoch": 1.4,
"learning_rate": 0.0001649122807017544,
"loss": 0.9848,
"step": 80
},
{
"epoch": 1.58,
"learning_rate": 0.0001605263157894737,
"loss": 0.8844,
"step": 90
},
{
"epoch": 1.75,
"learning_rate": 0.00015614035087719297,
"loss": 0.8483,
"step": 100
},
{
"epoch": 1.75,
"eval_accuracy": 0.5445544554455446,
"eval_loss": 0.9965408444404602,
"eval_runtime": 4.6358,
"eval_samples_per_second": 21.787,
"eval_steps_per_second": 2.804,
"step": 100
},
{
"epoch": 1.93,
"learning_rate": 0.0001517543859649123,
"loss": 0.9321,
"step": 110
},
{
"epoch": 2.11,
"learning_rate": 0.00014736842105263158,
"loss": 0.7607,
"step": 120
},
{
"epoch": 2.28,
"learning_rate": 0.00014298245614035088,
"loss": 0.6632,
"step": 130
},
{
"epoch": 2.46,
"learning_rate": 0.00013859649122807018,
"loss": 0.7359,
"step": 140
},
{
"epoch": 2.63,
"learning_rate": 0.00013421052631578948,
"loss": 0.7393,
"step": 150
},
{
"epoch": 2.81,
"learning_rate": 0.0001298245614035088,
"loss": 0.6817,
"step": 160
},
{
"epoch": 2.98,
"learning_rate": 0.00012543859649122806,
"loss": 0.6913,
"step": 170
},
{
"epoch": 3.16,
"learning_rate": 0.00012149122807017544,
"loss": 0.5017,
"step": 180
},
{
"epoch": 3.33,
"learning_rate": 0.00011710526315789474,
"loss": 0.3448,
"step": 190
},
{
"epoch": 3.51,
"learning_rate": 0.00011271929824561404,
"loss": 0.3474,
"step": 200
},
{
"epoch": 3.51,
"eval_accuracy": 0.6831683168316832,
"eval_loss": 0.8943801522254944,
"eval_runtime": 4.5642,
"eval_samples_per_second": 22.129,
"eval_steps_per_second": 2.848,
"step": 200
},
{
"epoch": 3.68,
"learning_rate": 0.00010833333333333333,
"loss": 0.3598,
"step": 210
},
{
"epoch": 3.86,
"learning_rate": 0.00010394736842105264,
"loss": 0.3214,
"step": 220
},
{
"epoch": 4.04,
"learning_rate": 9.956140350877193e-05,
"loss": 0.2831,
"step": 230
},
{
"epoch": 4.21,
"learning_rate": 9.517543859649123e-05,
"loss": 0.1586,
"step": 240
},
{
"epoch": 4.39,
"learning_rate": 9.078947368421054e-05,
"loss": 0.0929,
"step": 250
},
{
"epoch": 4.56,
"learning_rate": 8.640350877192982e-05,
"loss": 0.1087,
"step": 260
},
{
"epoch": 4.74,
"learning_rate": 8.201754385964913e-05,
"loss": 0.1019,
"step": 270
},
{
"epoch": 4.91,
"learning_rate": 7.763157894736843e-05,
"loss": 0.0528,
"step": 280
},
{
"epoch": 5.09,
"learning_rate": 7.324561403508772e-05,
"loss": 0.1051,
"step": 290
},
{
"epoch": 5.26,
"learning_rate": 6.885964912280702e-05,
"loss": 0.0328,
"step": 300
},
{
"epoch": 5.26,
"eval_accuracy": 0.6633663366336634,
"eval_loss": 1.158254861831665,
"eval_runtime": 4.4598,
"eval_samples_per_second": 22.647,
"eval_steps_per_second": 2.915,
"step": 300
},
{
"epoch": 5.44,
"learning_rate": 6.447368421052632e-05,
"loss": 0.0686,
"step": 310
},
{
"epoch": 5.61,
"learning_rate": 6.0087719298245616e-05,
"loss": 0.0358,
"step": 320
},
{
"epoch": 5.79,
"learning_rate": 5.570175438596491e-05,
"loss": 0.0255,
"step": 330
},
{
"epoch": 5.96,
"learning_rate": 5.131578947368422e-05,
"loss": 0.0401,
"step": 340
},
{
"epoch": 6.14,
"learning_rate": 4.6929824561403515e-05,
"loss": 0.0217,
"step": 350
},
{
"epoch": 6.32,
"learning_rate": 4.254385964912281e-05,
"loss": 0.0208,
"step": 360
},
{
"epoch": 6.49,
"learning_rate": 3.815789473684211e-05,
"loss": 0.0203,
"step": 370
},
{
"epoch": 6.67,
"learning_rate": 3.377192982456141e-05,
"loss": 0.0196,
"step": 380
},
{
"epoch": 6.84,
"learning_rate": 2.9385964912280706e-05,
"loss": 0.0176,
"step": 390
},
{
"epoch": 7.02,
"learning_rate": 2.5e-05,
"loss": 0.0176,
"step": 400
},
{
"epoch": 7.02,
"eval_accuracy": 0.6831683168316832,
"eval_loss": 1.0844857692718506,
"eval_runtime": 4.6632,
"eval_samples_per_second": 21.659,
"eval_steps_per_second": 2.788,
"step": 400
},
{
"epoch": 7.19,
"learning_rate": 2.06140350877193e-05,
"loss": 0.0168,
"step": 410
},
{
"epoch": 7.37,
"learning_rate": 1.62280701754386e-05,
"loss": 0.0165,
"step": 420
},
{
"epoch": 7.54,
"learning_rate": 1.1842105263157895e-05,
"loss": 0.0161,
"step": 430
},
{
"epoch": 7.72,
"learning_rate": 7.456140350877193e-06,
"loss": 0.0163,
"step": 440
},
{
"epoch": 7.89,
"learning_rate": 3.070175438596491e-06,
"loss": 0.0162,
"step": 450
},
{
"epoch": 8.0,
"step": 456,
"total_flos": 5.6352679914450125e+17,
"train_loss": 0.3986694132745789,
"train_runtime": 506.3898,
"train_samples_per_second": 14.36,
"train_steps_per_second": 0.9
}
],
"max_steps": 456,
"num_train_epochs": 8,
"total_flos": 5.6352679914450125e+17,
"trial_name": null,
"trial_params": null
}