{ "best_metric": 0.5105542540550232, "best_model_checkpoint": "./beans_outputs/checkpoint-1170", "epoch": 10.0, "eval_steps": 500, "global_step": 1300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7692307692307693, "grad_norm": 7.213888168334961, "learning_rate": 5e-05, "loss": 1.094, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.518796992481203, "eval_loss": 1.0090891122817993, "eval_runtime": 1.1391, "eval_samples_per_second": 116.756, "eval_steps_per_second": 14.924, "step": 130 }, { "epoch": 1.5384615384615383, "grad_norm": 2.6927850246429443, "learning_rate": 5e-05, "loss": 1.0, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.6090225563909775, "eval_loss": 0.880857527256012, "eval_runtime": 1.158, "eval_samples_per_second": 114.856, "eval_steps_per_second": 14.681, "step": 260 }, { "epoch": 2.3076923076923075, "grad_norm": 6.594575881958008, "learning_rate": 5e-05, "loss": 0.8928, "step": 300 }, { "epoch": 3.0, "eval_accuracy": 0.6390977443609023, "eval_loss": 0.8694148063659668, "eval_runtime": 1.1236, "eval_samples_per_second": 118.367, "eval_steps_per_second": 15.13, "step": 390 }, { "epoch": 3.076923076923077, "grad_norm": 7.34794807434082, "learning_rate": 5e-05, "loss": 0.7855, "step": 400 }, { "epoch": 3.8461538461538463, "grad_norm": 4.2354817390441895, "learning_rate": 5e-05, "loss": 0.7179, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.7142857142857143, "eval_loss": 0.6974284052848816, "eval_runtime": 1.1571, "eval_samples_per_second": 114.947, "eval_steps_per_second": 14.692, "step": 520 }, { "epoch": 4.615384615384615, "grad_norm": 5.142866611480713, "learning_rate": 5e-05, "loss": 0.6783, "step": 600 }, { "epoch": 5.0, "eval_accuracy": 0.7518796992481203, "eval_loss": 0.669804573059082, "eval_runtime": 1.0102, "eval_samples_per_second": 131.662, "eval_steps_per_second": 16.829, "step": 650 }, { "epoch": 5.384615384615385, "grad_norm": 6.277930736541748, "learning_rate": 5e-05, "loss": 0.7052, "step": 700 }, { "epoch": 6.0, "eval_accuracy": 0.7518796992481203, "eval_loss": 0.6211526989936829, "eval_runtime": 1.1627, "eval_samples_per_second": 114.39, "eval_steps_per_second": 14.621, "step": 780 }, { "epoch": 6.153846153846154, "grad_norm": 7.9038262367248535, "learning_rate": 5e-05, "loss": 0.636, "step": 800 }, { "epoch": 6.923076923076923, "grad_norm": 15.989542007446289, "learning_rate": 5e-05, "loss": 0.608, "step": 900 }, { "epoch": 7.0, "eval_accuracy": 0.706766917293233, "eval_loss": 0.7636638283729553, "eval_runtime": 1.1935, "eval_samples_per_second": 111.438, "eval_steps_per_second": 14.244, "step": 910 }, { "epoch": 7.6923076923076925, "grad_norm": 5.844383716583252, "learning_rate": 5e-05, "loss": 0.6457, "step": 1000 }, { "epoch": 8.0, "eval_accuracy": 0.8195488721804511, "eval_loss": 0.5296058654785156, "eval_runtime": 1.1689, "eval_samples_per_second": 113.785, "eval_steps_per_second": 14.544, "step": 1040 }, { "epoch": 8.461538461538462, "grad_norm": 21.204557418823242, "learning_rate": 5e-05, "loss": 0.5747, "step": 1100 }, { "epoch": 9.0, "eval_accuracy": 0.7969924812030075, "eval_loss": 0.5105542540550232, "eval_runtime": 1.037, "eval_samples_per_second": 128.258, "eval_steps_per_second": 16.394, "step": 1170 }, { "epoch": 9.23076923076923, "grad_norm": 15.454485893249512, "learning_rate": 5e-05, "loss": 0.5939, "step": 1200 }, { "epoch": 10.0, "grad_norm": 12.411216735839844, "learning_rate": 5e-05, "loss": 0.5294, "step": 1300 }, { "epoch": 10.0, "eval_accuracy": 0.6917293233082706, "eval_loss": 0.7639761567115784, "eval_runtime": 1.0303, "eval_samples_per_second": 129.085, "eval_steps_per_second": 16.5, "step": 1300 }, { "epoch": 10.0, "step": 1300, "total_flos": 8.772706474360013e+17, "train_loss": 0.7277990575937124, "train_runtime": 205.3588, "train_samples_per_second": 50.351, "train_steps_per_second": 6.33 } ], "logging_steps": 100, "max_steps": 1300, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.772706474360013e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }