{ "best_metric": 0.030865401029586792, "best_model_checkpoint": "./vit-brain-tumour/checkpoint-900", "epoch": 4.0, "eval_steps": 100, "global_step": 940, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0425531914893617, "grad_norm": 2.4241483211517334, "learning_rate": 0.00019787234042553193, "loss": 0.9058, "step": 10 }, { "epoch": 0.0851063829787234, "grad_norm": 1.0025970935821533, "learning_rate": 0.00019574468085106384, "loss": 0.4081, "step": 20 }, { "epoch": 0.1276595744680851, "grad_norm": 1.7178714275360107, "learning_rate": 0.00019361702127659576, "loss": 0.221, "step": 30 }, { "epoch": 0.1702127659574468, "grad_norm": 1.0210686922073364, "learning_rate": 0.00019148936170212768, "loss": 0.4859, "step": 40 }, { "epoch": 0.2127659574468085, "grad_norm": 1.8148099184036255, "learning_rate": 0.00018936170212765957, "loss": 0.1614, "step": 50 }, { "epoch": 0.2553191489361702, "grad_norm": 2.7210752964019775, "learning_rate": 0.0001872340425531915, "loss": 0.1979, "step": 60 }, { "epoch": 0.2978723404255319, "grad_norm": 1.6892952919006348, "learning_rate": 0.0001851063829787234, "loss": 0.1603, "step": 70 }, { "epoch": 0.3404255319148936, "grad_norm": 0.6372403502464294, "learning_rate": 0.00018297872340425532, "loss": 0.2073, "step": 80 }, { "epoch": 0.3829787234042553, "grad_norm": 0.22926707565784454, "learning_rate": 0.00018085106382978726, "loss": 0.1834, "step": 90 }, { "epoch": 0.425531914893617, "grad_norm": 3.5747313499450684, "learning_rate": 0.00017872340425531915, "loss": 0.227, "step": 100 }, { "epoch": 0.425531914893617, "eval_accuracy": 0.8909599254426841, "eval_loss": 0.3066631555557251, "eval_runtime": 4.984, "eval_samples_per_second": 215.29, "eval_steps_per_second": 27.087, "step": 100 }, { "epoch": 0.46808510638297873, "grad_norm": 2.685378074645996, "learning_rate": 0.00017659574468085107, "loss": 0.1576, "step": 110 }, { "epoch": 0.5106382978723404, "grad_norm": 2.99109148979187, "learning_rate": 0.00017446808510638298, "loss": 0.1967, "step": 120 }, { "epoch": 0.5531914893617021, "grad_norm": 0.5496222376823425, "learning_rate": 0.0001723404255319149, "loss": 0.2398, "step": 130 }, { "epoch": 0.5957446808510638, "grad_norm": 2.8761274814605713, "learning_rate": 0.00017021276595744682, "loss": 0.1012, "step": 140 }, { "epoch": 0.6382978723404256, "grad_norm": 1.6604396104812622, "learning_rate": 0.00016808510638297873, "loss": 0.0942, "step": 150 }, { "epoch": 0.6808510638297872, "grad_norm": 0.680061399936676, "learning_rate": 0.00016595744680851065, "loss": 0.1741, "step": 160 }, { "epoch": 0.723404255319149, "grad_norm": 0.24274976551532745, "learning_rate": 0.00016382978723404257, "loss": 0.1793, "step": 170 }, { "epoch": 0.7659574468085106, "grad_norm": 2.806928873062134, "learning_rate": 0.00016170212765957446, "loss": 0.1423, "step": 180 }, { "epoch": 0.8085106382978723, "grad_norm": 1.0465861558914185, "learning_rate": 0.00015957446808510637, "loss": 0.1028, "step": 190 }, { "epoch": 0.851063829787234, "grad_norm": 0.5258079171180725, "learning_rate": 0.00015744680851063832, "loss": 0.0659, "step": 200 }, { "epoch": 0.851063829787234, "eval_accuracy": 0.9627213420316869, "eval_loss": 0.11090704798698425, "eval_runtime": 5.4644, "eval_samples_per_second": 196.362, "eval_steps_per_second": 24.705, "step": 200 }, { "epoch": 0.8936170212765957, "grad_norm": 6.45848274230957, "learning_rate": 0.0001553191489361702, "loss": 0.0879, "step": 210 }, { "epoch": 0.9361702127659575, "grad_norm": 1.5740026235580444, "learning_rate": 0.00015319148936170213, "loss": 0.081, "step": 220 }, { "epoch": 0.9787234042553191, "grad_norm": 1.8585551977157593, "learning_rate": 0.00015106382978723407, "loss": 0.095, "step": 230 }, { "epoch": 1.0212765957446808, "grad_norm": 0.10410909354686737, "learning_rate": 0.00014893617021276596, "loss": 0.1254, "step": 240 }, { "epoch": 1.0638297872340425, "grad_norm": 0.05694637447595596, "learning_rate": 0.00014680851063829788, "loss": 0.025, "step": 250 }, { "epoch": 1.1063829787234043, "grad_norm": 0.053941983729600906, "learning_rate": 0.0001446808510638298, "loss": 0.0336, "step": 260 }, { "epoch": 1.148936170212766, "grad_norm": 5.261789798736572, "learning_rate": 0.0001425531914893617, "loss": 0.0965, "step": 270 }, { "epoch": 1.1914893617021276, "grad_norm": 1.4031267166137695, "learning_rate": 0.00014042553191489363, "loss": 0.0207, "step": 280 }, { "epoch": 1.2340425531914894, "grad_norm": 0.06696026772260666, "learning_rate": 0.00013829787234042554, "loss": 0.0992, "step": 290 }, { "epoch": 1.2765957446808511, "grad_norm": 0.055372320115566254, "learning_rate": 0.00013617021276595746, "loss": 0.0404, "step": 300 }, { "epoch": 1.2765957446808511, "eval_accuracy": 0.9776328052190121, "eval_loss": 0.09002945572137833, "eval_runtime": 5.5899, "eval_samples_per_second": 191.953, "eval_steps_per_second": 24.151, "step": 300 }, { "epoch": 1.3191489361702127, "grad_norm": 0.5205884575843811, "learning_rate": 0.00013404255319148938, "loss": 0.1412, "step": 310 }, { "epoch": 1.3617021276595744, "grad_norm": 0.23773987591266632, "learning_rate": 0.00013191489361702127, "loss": 0.0442, "step": 320 }, { "epoch": 1.4042553191489362, "grad_norm": 4.623290061950684, "learning_rate": 0.00012978723404255318, "loss": 0.0839, "step": 330 }, { "epoch": 1.4468085106382977, "grad_norm": 0.059595488011837006, "learning_rate": 0.00012765957446808513, "loss": 0.0131, "step": 340 }, { "epoch": 1.4893617021276595, "grad_norm": 5.406118392944336, "learning_rate": 0.00012553191489361702, "loss": 0.0347, "step": 350 }, { "epoch": 1.5319148936170213, "grad_norm": 0.03830511495471001, "learning_rate": 0.00012340425531914893, "loss": 0.0145, "step": 360 }, { "epoch": 1.574468085106383, "grad_norm": 0.043490536510944366, "learning_rate": 0.00012127659574468086, "loss": 0.0299, "step": 370 }, { "epoch": 1.6170212765957448, "grad_norm": 0.046317484229803085, "learning_rate": 0.00011914893617021277, "loss": 0.0225, "step": 380 }, { "epoch": 1.6595744680851063, "grad_norm": 0.2598080337047577, "learning_rate": 0.00011702127659574468, "loss": 0.0475, "step": 390 }, { "epoch": 1.702127659574468, "grad_norm": 0.43866652250289917, "learning_rate": 0.00011489361702127661, "loss": 0.05, "step": 400 }, { "epoch": 1.702127659574468, "eval_accuracy": 0.9748369058713886, "eval_loss": 0.10819696635007858, "eval_runtime": 5.1592, "eval_samples_per_second": 207.977, "eval_steps_per_second": 26.167, "step": 400 }, { "epoch": 1.7446808510638299, "grad_norm": 4.7892985343933105, "learning_rate": 0.00011276595744680852, "loss": 0.059, "step": 410 }, { "epoch": 1.7872340425531914, "grad_norm": 0.0325588583946228, "learning_rate": 0.00011063829787234043, "loss": 0.0592, "step": 420 }, { "epoch": 1.8297872340425532, "grad_norm": 14.177051544189453, "learning_rate": 0.00010851063829787234, "loss": 0.1221, "step": 430 }, { "epoch": 1.872340425531915, "grad_norm": 0.03597332164645195, "learning_rate": 0.00010638297872340425, "loss": 0.071, "step": 440 }, { "epoch": 1.9148936170212765, "grad_norm": 0.058201372623443604, "learning_rate": 0.00010425531914893618, "loss": 0.0227, "step": 450 }, { "epoch": 1.9574468085106385, "grad_norm": 1.1982567310333252, "learning_rate": 0.00010212765957446809, "loss": 0.0354, "step": 460 }, { "epoch": 2.0, "grad_norm": 0.06208239495754242, "learning_rate": 0.0001, "loss": 0.0258, "step": 470 }, { "epoch": 2.0425531914893615, "grad_norm": 0.04629204794764519, "learning_rate": 9.787234042553192e-05, "loss": 0.0322, "step": 480 }, { "epoch": 2.0851063829787235, "grad_norm": 0.029703835025429726, "learning_rate": 9.574468085106384e-05, "loss": 0.0064, "step": 490 }, { "epoch": 2.127659574468085, "grad_norm": 0.036140162497758865, "learning_rate": 9.361702127659576e-05, "loss": 0.006, "step": 500 }, { "epoch": 2.127659574468085, "eval_accuracy": 0.9888164026095061, "eval_loss": 0.037363313138484955, "eval_runtime": 5.6218, "eval_samples_per_second": 190.863, "eval_steps_per_second": 24.013, "step": 500 }, { "epoch": 2.1702127659574466, "grad_norm": 0.023606618866324425, "learning_rate": 9.148936170212766e-05, "loss": 0.02, "step": 510 }, { "epoch": 2.2127659574468086, "grad_norm": 0.04733359441161156, "learning_rate": 8.936170212765958e-05, "loss": 0.0371, "step": 520 }, { "epoch": 2.25531914893617, "grad_norm": 0.03030354529619217, "learning_rate": 8.723404255319149e-05, "loss": 0.0056, "step": 530 }, { "epoch": 2.297872340425532, "grad_norm": 0.21325312554836273, "learning_rate": 8.510638297872341e-05, "loss": 0.0146, "step": 540 }, { "epoch": 2.3404255319148937, "grad_norm": 0.018344180658459663, "learning_rate": 8.297872340425533e-05, "loss": 0.0103, "step": 550 }, { "epoch": 2.382978723404255, "grad_norm": 0.022901207208633423, "learning_rate": 8.085106382978723e-05, "loss": 0.0052, "step": 560 }, { "epoch": 2.425531914893617, "grad_norm": 0.023875948041677475, "learning_rate": 7.872340425531916e-05, "loss": 0.0048, "step": 570 }, { "epoch": 2.4680851063829787, "grad_norm": 0.01968464069068432, "learning_rate": 7.659574468085106e-05, "loss": 0.0047, "step": 580 }, { "epoch": 2.5106382978723403, "grad_norm": 0.11908008903265, "learning_rate": 7.446808510638298e-05, "loss": 0.0047, "step": 590 }, { "epoch": 2.5531914893617023, "grad_norm": 0.020715612918138504, "learning_rate": 7.23404255319149e-05, "loss": 0.0147, "step": 600 }, { "epoch": 2.5531914893617023, "eval_accuracy": 0.9888164026095061, "eval_loss": 0.0540616437792778, "eval_runtime": 5.1006, "eval_samples_per_second": 210.367, "eval_steps_per_second": 26.467, "step": 600 }, { "epoch": 2.595744680851064, "grad_norm": 0.019655458629131317, "learning_rate": 7.021276595744681e-05, "loss": 0.0041, "step": 610 }, { "epoch": 2.6382978723404253, "grad_norm": 0.04054981470108032, "learning_rate": 6.808510638297873e-05, "loss": 0.0625, "step": 620 }, { "epoch": 2.6808510638297873, "grad_norm": 0.18255342543125153, "learning_rate": 6.595744680851063e-05, "loss": 0.0171, "step": 630 }, { "epoch": 2.723404255319149, "grad_norm": 0.02170875295996666, "learning_rate": 6.382978723404256e-05, "loss": 0.0044, "step": 640 }, { "epoch": 2.7659574468085104, "grad_norm": 0.01993757300078869, "learning_rate": 6.170212765957447e-05, "loss": 0.0042, "step": 650 }, { "epoch": 2.8085106382978724, "grad_norm": 0.017473889514803886, "learning_rate": 5.9574468085106384e-05, "loss": 0.0302, "step": 660 }, { "epoch": 2.851063829787234, "grad_norm": 0.018037663772702217, "learning_rate": 5.744680851063831e-05, "loss": 0.0044, "step": 670 }, { "epoch": 2.8936170212765955, "grad_norm": 0.016940144822001457, "learning_rate": 5.531914893617022e-05, "loss": 0.0314, "step": 680 }, { "epoch": 2.9361702127659575, "grad_norm": 0.018456028774380684, "learning_rate": 5.319148936170213e-05, "loss": 0.0039, "step": 690 }, { "epoch": 2.978723404255319, "grad_norm": 0.02064073272049427, "learning_rate": 5.1063829787234044e-05, "loss": 0.0105, "step": 700 }, { "epoch": 2.978723404255319, "eval_accuracy": 0.9906803355079217, "eval_loss": 0.03594699874520302, "eval_runtime": 5.0939, "eval_samples_per_second": 210.644, "eval_steps_per_second": 26.502, "step": 700 }, { "epoch": 3.021276595744681, "grad_norm": 0.018245166167616844, "learning_rate": 4.893617021276596e-05, "loss": 0.0041, "step": 710 }, { "epoch": 3.0638297872340425, "grad_norm": 0.01676538586616516, "learning_rate": 4.680851063829788e-05, "loss": 0.0047, "step": 720 }, { "epoch": 3.106382978723404, "grad_norm": 0.018977968022227287, "learning_rate": 4.468085106382979e-05, "loss": 0.0035, "step": 730 }, { "epoch": 3.148936170212766, "grad_norm": 0.01823911815881729, "learning_rate": 4.2553191489361704e-05, "loss": 0.0048, "step": 740 }, { "epoch": 3.1914893617021276, "grad_norm": 0.017579803243279457, "learning_rate": 4.0425531914893614e-05, "loss": 0.0035, "step": 750 }, { "epoch": 3.2340425531914896, "grad_norm": 0.013467966578900814, "learning_rate": 3.829787234042553e-05, "loss": 0.004, "step": 760 }, { "epoch": 3.276595744680851, "grad_norm": 0.03895196318626404, "learning_rate": 3.617021276595745e-05, "loss": 0.0036, "step": 770 }, { "epoch": 3.3191489361702127, "grad_norm": 0.019369514659047127, "learning_rate": 3.4042553191489365e-05, "loss": 0.0034, "step": 780 }, { "epoch": 3.3617021276595747, "grad_norm": 0.014404061250388622, "learning_rate": 3.191489361702128e-05, "loss": 0.0042, "step": 790 }, { "epoch": 3.404255319148936, "grad_norm": 0.016735393553972244, "learning_rate": 2.9787234042553192e-05, "loss": 0.0032, "step": 800 }, { "epoch": 3.404255319148936, "eval_accuracy": 0.9906803355079217, "eval_loss": 0.039232004433870316, "eval_runtime": 5.1085, "eval_samples_per_second": 210.041, "eval_steps_per_second": 26.426, "step": 800 }, { "epoch": 3.4468085106382977, "grad_norm": 0.01882547326385975, "learning_rate": 2.765957446808511e-05, "loss": 0.0033, "step": 810 }, { "epoch": 3.4893617021276597, "grad_norm": 0.018505167216062546, "learning_rate": 2.5531914893617022e-05, "loss": 0.0039, "step": 820 }, { "epoch": 3.5319148936170213, "grad_norm": 0.013403504155576229, "learning_rate": 2.340425531914894e-05, "loss": 0.008, "step": 830 }, { "epoch": 3.574468085106383, "grad_norm": 0.013858008198440075, "learning_rate": 2.1276595744680852e-05, "loss": 0.0033, "step": 840 }, { "epoch": 3.617021276595745, "grad_norm": 0.01901717483997345, "learning_rate": 1.9148936170212766e-05, "loss": 0.0031, "step": 850 }, { "epoch": 3.6595744680851063, "grad_norm": 0.015291731804609299, "learning_rate": 1.7021276595744682e-05, "loss": 0.0031, "step": 860 }, { "epoch": 3.702127659574468, "grad_norm": 0.015405308455228806, "learning_rate": 1.4893617021276596e-05, "loss": 0.0037, "step": 870 }, { "epoch": 3.74468085106383, "grad_norm": 0.01782970316708088, "learning_rate": 1.2765957446808511e-05, "loss": 0.0032, "step": 880 }, { "epoch": 3.7872340425531914, "grad_norm": 0.013772828504443169, "learning_rate": 1.0638297872340426e-05, "loss": 0.0032, "step": 890 }, { "epoch": 3.829787234042553, "grad_norm": 3.2476401329040527, "learning_rate": 8.510638297872341e-06, "loss": 0.0055, "step": 900 }, { "epoch": 3.829787234042553, "eval_accuracy": 0.9925442684063374, "eval_loss": 0.030865401029586792, "eval_runtime": 5.6777, "eval_samples_per_second": 188.986, "eval_steps_per_second": 23.777, "step": 900 }, { "epoch": 3.872340425531915, "grad_norm": 0.01216566190123558, "learning_rate": 6.3829787234042555e-06, "loss": 0.0029, "step": 910 }, { "epoch": 3.9148936170212765, "grad_norm": 0.02088315039873123, "learning_rate": 4.255319148936171e-06, "loss": 0.0032, "step": 920 }, { "epoch": 3.9574468085106385, "grad_norm": 0.014219781383872032, "learning_rate": 2.1276595744680853e-06, "loss": 0.0031, "step": 930 }, { "epoch": 4.0, "grad_norm": 0.048915717750787735, "learning_rate": 0.0, "loss": 0.0031, "step": 940 }, { "epoch": 4.0, "step": 940, "total_flos": 1.1651904419373711e+18, "train_loss": 0.07046630619926021, "train_runtime": 463.0785, "train_samples_per_second": 32.47, "train_steps_per_second": 2.03 } ], "logging_steps": 10, "max_steps": 940, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1651904419373711e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }