{ "best_metric": 0.4276963144473042, "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-8552", "epoch": 4.0, "eval_steps": 500, "global_step": 8552, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23386342376052385, "grad_norm": 3.80633282661438, "learning_rate": 1.4038087770081304e-06, "loss": 0.6287, "step": 500 }, { "epoch": 0.4677268475210477, "grad_norm": 5.112864017486572, "learning_rate": 1.3349270902069464e-06, "loss": 0.5844, "step": 1000 }, { "epoch": 0.7015902712815716, "grad_norm": 16.39091682434082, "learning_rate": 1.2660454034057625e-06, "loss": 0.5781, "step": 1500 }, { "epoch": 0.9354536950420954, "grad_norm": 7.839633464813232, "learning_rate": 1.1971637166045783e-06, "loss": 0.5482, "step": 2000 }, { "epoch": 1.0, "eval_loss": 0.5578071475028992, "eval_matthews_correlation": 0.25376727839543856, "eval_runtime": 1.0111, "eval_samples_per_second": 1031.593, "eval_steps_per_second": 129.567, "step": 2138 }, { "epoch": 1.1693171188026192, "grad_norm": 17.102230072021484, "learning_rate": 1.1282820298033944e-06, "loss": 0.5349, "step": 2500 }, { "epoch": 1.4031805425631432, "grad_norm": 14.366960525512695, "learning_rate": 1.0594003430022104e-06, "loss": 0.5129, "step": 3000 }, { "epoch": 1.637043966323667, "grad_norm": 26.894271850585938, "learning_rate": 9.905186562010263e-07, "loss": 0.482, "step": 3500 }, { "epoch": 1.8709073900841908, "grad_norm": 9.88732624053955, "learning_rate": 9.216369693998422e-07, "loss": 0.4997, "step": 4000 }, { "epoch": 2.0, "eval_loss": 0.5702487230300903, "eval_matthews_correlation": 0.4033061591161787, "eval_runtime": 1.0209, "eval_samples_per_second": 1021.619, "eval_steps_per_second": 128.315, "step": 4276 }, { "epoch": 2.1047708138447145, "grad_norm": 10.997356414794922, "learning_rate": 8.527552825986582e-07, "loss": 0.4925, "step": 4500 }, { "epoch": 2.3386342376052385, "grad_norm": 10.581222534179688, "learning_rate": 7.838735957974742e-07, "loss": 0.4627, "step": 5000 }, { "epoch": 2.5724976613657624, "grad_norm": 23.820741653442383, "learning_rate": 7.149919089962902e-07, "loss": 0.4931, "step": 5500 }, { "epoch": 2.8063610851262863, "grad_norm": 34.82048416137695, "learning_rate": 6.461102221951062e-07, "loss": 0.4764, "step": 6000 }, { "epoch": 3.0, "eval_loss": 0.6053777933120728, "eval_matthews_correlation": 0.42419730237002035, "eval_runtime": 1.0601, "eval_samples_per_second": 983.872, "eval_steps_per_second": 123.574, "step": 6414 }, { "epoch": 3.0402245088868103, "grad_norm": 14.74622917175293, "learning_rate": 5.772285353939221e-07, "loss": 0.4914, "step": 6500 }, { "epoch": 3.2740879326473338, "grad_norm": 14.342729568481445, "learning_rate": 5.083468485927381e-07, "loss": 0.4473, "step": 7000 }, { "epoch": 3.5079513564078577, "grad_norm": 49.85002517700195, "learning_rate": 4.3946516179155406e-07, "loss": 0.4811, "step": 7500 }, { "epoch": 3.7418147801683816, "grad_norm": 42.889713287353516, "learning_rate": 3.705834749903701e-07, "loss": 0.4755, "step": 8000 }, { "epoch": 3.9756782039289056, "grad_norm": 8.599506378173828, "learning_rate": 3.01701788189186e-07, "loss": 0.4762, "step": 8500 }, { "epoch": 4.0, "eval_loss": 0.6376412510871887, "eval_matthews_correlation": 0.4276963144473042, "eval_runtime": 1.1398, "eval_samples_per_second": 915.085, "eval_steps_per_second": 114.934, "step": 8552 } ], "logging_steps": 500, "max_steps": 10690, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 138658697190840.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "learning_rate": 1.4726904638093145e-06, "num_train_epochs": 5, "per_device_train_batch_size": 4, "seed": 37 } }