{ "best_metric": 0.6879923855830649, "best_model_checkpoint": "output/pretraining/vihealthbert-w_unsup-SynPD/lr3e-5_wr0.1_wd0.0/checkpoint-55000", "epoch": 10.0, "eval_steps": 5000, "global_step": 58030, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017232465965879716, "grad_norm": 41.827266693115234, "learning_rate": 5.169739789763915e-09, "loss": 13.7242, "step": 1 }, { "epoch": 0.8616232982939859, "grad_norm": 5.6151957511901855, "learning_rate": 2.584869894881958e-05, "loss": 7.0234, "step": 5000 }, { "epoch": 0.8616232982939859, "eval_accuracy": 0.5576033171446898, "eval_loss": 2.590928077697754, "eval_runtime": 37.589, "eval_samples_per_second": 260.023, "eval_steps_per_second": 16.255, "step": 5000 }, { "epoch": 1.7232465965879717, "grad_norm": 4.410743713378906, "learning_rate": 2.758917801137343e-05, "loss": 5.2736, "step": 10000 }, { "epoch": 1.7232465965879717, "eval_accuracy": 0.596153624823046, "eval_loss": 2.188957452774048, "eval_runtime": 37.6297, "eval_samples_per_second": 259.741, "eval_steps_per_second": 16.237, "step": 10000 }, { "epoch": 2.5848698948819577, "grad_norm": 4.754213809967041, "learning_rate": 2.4717100350393475e-05, "loss": 4.9126, "step": 15000 }, { "epoch": 2.5848698948819577, "eval_accuracy": 0.6381037526075694, "eval_loss": 1.909491777420044, "eval_runtime": 36.8556, "eval_samples_per_second": 265.197, "eval_steps_per_second": 16.578, "step": 15000 }, { "epoch": 3.4464931931759435, "grad_norm": 4.2047200202941895, "learning_rate": 2.184502268941352e-05, "loss": 4.791, "step": 20000 }, { "epoch": 3.4464931931759435, "eval_accuracy": 0.6468589272593681, "eval_loss": 1.8286069631576538, "eval_runtime": 35.9342, "eval_samples_per_second": 271.997, "eval_steps_per_second": 17.003, "step": 20000 }, { "epoch": 4.30811649146993, "grad_norm": 4.629519462585449, "learning_rate": 1.897294502843357e-05, "loss": 4.6538, "step": 25000 }, { "epoch": 4.30811649146993, "eval_accuracy": 0.6644416567883901, "eval_loss": 1.714424967765808, "eval_runtime": 35.6122, "eval_samples_per_second": 274.456, "eval_steps_per_second": 17.157, "step": 25000 }, { "epoch": 5.1697397897639155, "grad_norm": 3.993448257446289, "learning_rate": 1.6100867367453616e-05, "loss": 4.5846, "step": 30000 }, { "epoch": 5.1697397897639155, "eval_accuracy": 0.6703549228453575, "eval_loss": 1.6779303550720215, "eval_runtime": 36.1572, "eval_samples_per_second": 270.32, "eval_steps_per_second": 16.898, "step": 30000 }, { "epoch": 6.031363088057901, "grad_norm": 4.6959357261657715, "learning_rate": 1.3228789706473663e-05, "loss": 4.5568, "step": 35000 }, { "epoch": 6.031363088057901, "eval_accuracy": 0.6765665494261385, "eval_loss": 1.6362268924713135, "eval_runtime": 36.2906, "eval_samples_per_second": 269.326, "eval_steps_per_second": 16.836, "step": 35000 }, { "epoch": 6.892986386351887, "grad_norm": 4.4082441329956055, "learning_rate": 1.035671204549371e-05, "loss": 4.5079, "step": 40000 }, { "epoch": 6.892986386351887, "eval_accuracy": 0.681425908452811, "eval_loss": 1.6008453369140625, "eval_runtime": 36.1284, "eval_samples_per_second": 270.535, "eval_steps_per_second": 16.912, "step": 40000 }, { "epoch": 7.754609684645873, "grad_norm": 5.832913398742676, "learning_rate": 7.484634384513757e-06, "loss": 4.469, "step": 45000 }, { "epoch": 7.754609684645873, "eval_accuracy": 0.6805218690729352, "eval_loss": 1.6063588857650757, "eval_runtime": 36.0752, "eval_samples_per_second": 270.934, "eval_steps_per_second": 16.937, "step": 45000 }, { "epoch": 8.61623298293986, "grad_norm": 3.991995096206665, "learning_rate": 4.612556723533804e-06, "loss": 4.4514, "step": 50000 }, { "epoch": 8.61623298293986, "eval_accuracy": 0.6852411542306852, "eval_loss": 1.5799689292907715, "eval_runtime": 37.5803, "eval_samples_per_second": 260.083, "eval_steps_per_second": 16.259, "step": 50000 }, { "epoch": 9.477856281233844, "grad_norm": 4.424502849578857, "learning_rate": 1.7404790625538515e-06, "loss": 4.4317, "step": 55000 }, { "epoch": 9.477856281233844, "eval_accuracy": 0.6879923855830649, "eval_loss": 1.5539859533309937, "eval_runtime": 35.7439, "eval_samples_per_second": 273.445, "eval_steps_per_second": 17.094, "step": 55000 }, { "epoch": 10.0, "step": 58030, "total_flos": 0.0, "train_loss": 4.85471958516693, "train_runtime": 21406.4835, "train_samples_per_second": 86.742, "train_steps_per_second": 2.711 } ], "logging_steps": 5000, "max_steps": 58030, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 5000, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }