|
{ |
|
"best_metric": 0.6884405016899109, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_qqp_128/checkpoint-31273", |
|
"epoch": 16.0, |
|
"global_step": 45488, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.9518, |
|
"step": 2843 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7536482809794707, |
|
"eval_combined_score": 0.7033049767266691, |
|
"eval_f1": 0.6529616724738676, |
|
"eval_loss": 0.8352172374725342, |
|
"eval_runtime": 71.9208, |
|
"eval_samples_per_second": 562.146, |
|
"eval_steps_per_second": 4.394, |
|
"step": 2843 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.8249, |
|
"step": 5686 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7606727677467228, |
|
"eval_combined_score": 0.6912633809032191, |
|
"eval_f1": 0.6218539940597155, |
|
"eval_loss": 0.7766293883323669, |
|
"eval_runtime": 71.8035, |
|
"eval_samples_per_second": 563.064, |
|
"eval_steps_per_second": 4.401, |
|
"step": 5686 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.7847, |
|
"step": 8529 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.764828097947069, |
|
"eval_combined_score": 0.7025018479441673, |
|
"eval_f1": 0.6401755979412656, |
|
"eval_loss": 0.7624924778938293, |
|
"eval_runtime": 71.7415, |
|
"eval_samples_per_second": 563.551, |
|
"eval_steps_per_second": 4.405, |
|
"step": 8529 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.7498, |
|
"step": 11372 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7637892653969824, |
|
"eval_combined_score": 0.6917464963493354, |
|
"eval_f1": 0.6197037273016884, |
|
"eval_loss": 0.7550554275512695, |
|
"eval_runtime": 71.5767, |
|
"eval_samples_per_second": 564.848, |
|
"eval_steps_per_second": 4.415, |
|
"step": 11372 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.7137, |
|
"step": 14215 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7691318327974277, |
|
"eval_combined_score": 0.7118036449838919, |
|
"eval_f1": 0.6544754571703562, |
|
"eval_loss": 0.7386809587478638, |
|
"eval_runtime": 71.6477, |
|
"eval_samples_per_second": 564.289, |
|
"eval_steps_per_second": 4.41, |
|
"step": 14215 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.6762, |
|
"step": 17058 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7753153598812763, |
|
"eval_combined_score": 0.7236628792473973, |
|
"eval_f1": 0.6720103986135182, |
|
"eval_loss": 0.7165194153785706, |
|
"eval_runtime": 71.8883, |
|
"eval_samples_per_second": 562.4, |
|
"eval_steps_per_second": 4.396, |
|
"step": 17058 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.6373, |
|
"step": 19901 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.778308186989859, |
|
"eval_combined_score": 0.7273846100454836, |
|
"eval_f1": 0.6764610331011082, |
|
"eval_loss": 0.7042196989059448, |
|
"eval_runtime": 71.6003, |
|
"eval_samples_per_second": 564.662, |
|
"eval_steps_per_second": 4.413, |
|
"step": 19901 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.6045, |
|
"step": 22744 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7798664358149888, |
|
"eval_combined_score": 0.7350428593972265, |
|
"eval_f1": 0.690219282979464, |
|
"eval_loss": 0.7075350880622864, |
|
"eval_runtime": 71.5758, |
|
"eval_samples_per_second": 564.856, |
|
"eval_steps_per_second": 4.415, |
|
"step": 22744 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.5729, |
|
"step": 25587 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7791738807815978, |
|
"eval_combined_score": 0.7215273063270988, |
|
"eval_f1": 0.6638807318725999, |
|
"eval_loss": 0.7232872843742371, |
|
"eval_runtime": 71.3607, |
|
"eval_samples_per_second": 566.558, |
|
"eval_steps_per_second": 4.428, |
|
"step": 25587 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.545, |
|
"step": 28430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7805342567400445, |
|
"eval_combined_score": 0.7492781557955792, |
|
"eval_f1": 0.7180220548511138, |
|
"eval_loss": 0.708789587020874, |
|
"eval_runtime": 71.5867, |
|
"eval_samples_per_second": 564.77, |
|
"eval_steps_per_second": 4.414, |
|
"step": 28430 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.5183, |
|
"step": 31273 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7871877318822657, |
|
"eval_combined_score": 0.7466776716921062, |
|
"eval_f1": 0.7061676115019466, |
|
"eval_loss": 0.6884405016899109, |
|
"eval_runtime": 71.6715, |
|
"eval_samples_per_second": 564.102, |
|
"eval_steps_per_second": 4.409, |
|
"step": 31273 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.4948, |
|
"step": 34116 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7868661884739055, |
|
"eval_combined_score": 0.7472385300891464, |
|
"eval_f1": 0.7076108717043874, |
|
"eval_loss": 0.7063745260238647, |
|
"eval_runtime": 71.4754, |
|
"eval_samples_per_second": 565.649, |
|
"eval_steps_per_second": 4.421, |
|
"step": 34116 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.4724, |
|
"step": 36959 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7883502349740292, |
|
"eval_combined_score": 0.7501615189141915, |
|
"eval_f1": 0.7119728028543537, |
|
"eval_loss": 0.7052645683288574, |
|
"eval_runtime": 71.6405, |
|
"eval_samples_per_second": 564.345, |
|
"eval_steps_per_second": 4.411, |
|
"step": 36959 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.4514, |
|
"step": 39802 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7903042295325253, |
|
"eval_combined_score": 0.7463735102456954, |
|
"eval_f1": 0.7024427909588656, |
|
"eval_loss": 0.7314090728759766, |
|
"eval_runtime": 71.7172, |
|
"eval_samples_per_second": 563.742, |
|
"eval_steps_per_second": 4.406, |
|
"step": 39802 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4321, |
|
"step": 42645 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7891169923324264, |
|
"eval_combined_score": 0.7559694194431963, |
|
"eval_f1": 0.7228218465539662, |
|
"eval_loss": 0.7111542820930481, |
|
"eval_runtime": 71.6308, |
|
"eval_samples_per_second": 564.422, |
|
"eval_steps_per_second": 4.412, |
|
"step": 42645 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.4152, |
|
"step": 45488 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7908978481325748, |
|
"eval_combined_score": 0.7560176006727, |
|
"eval_f1": 0.721137353212825, |
|
"eval_loss": 0.7409694194793701, |
|
"eval_runtime": 71.6151, |
|
"eval_samples_per_second": 564.546, |
|
"eval_steps_per_second": 4.412, |
|
"step": 45488 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 45488, |
|
"total_flos": 1.3875395219895091e+17, |
|
"train_loss": 0.6153143427550604, |
|
"train_runtime": 24971.032, |
|
"train_samples_per_second": 728.536, |
|
"train_steps_per_second": 5.693 |
|
} |
|
], |
|
"max_steps": 142150, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.3875395219895091e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|