{ "best_metric": 0.1927209496498108, "best_model_checkpoint": "./vit-base-brain-tumor-detection3/checkpoint-1500", "epoch": 60.0, "eval_steps": 500, "global_step": 3840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.78125, "grad_norm": 0.014016176573932171, "learning_rate": 5.000000000000001e-07, "loss": 0.0028, "step": 50 }, { "epoch": 1.5625, "grad_norm": 0.013060510158538818, "learning_rate": 1.0000000000000002e-06, "loss": 0.0028, "step": 100 }, { "epoch": 2.34375, "grad_norm": 0.013337934389710426, "learning_rate": 1.5e-06, "loss": 0.0028, "step": 150 }, { "epoch": 3.125, "grad_norm": 0.013658256269991398, "learning_rate": 2.0000000000000003e-06, "loss": 0.0027, "step": 200 }, { "epoch": 3.90625, "grad_norm": 0.012454299256205559, "learning_rate": 2.5e-06, "loss": 0.0026, "step": 250 }, { "epoch": 4.6875, "grad_norm": 0.013191607780754566, "learning_rate": 3e-06, "loss": 0.0026, "step": 300 }, { "epoch": 5.46875, "grad_norm": 0.0118486937135458, "learning_rate": 3.5e-06, "loss": 0.0025, "step": 350 }, { "epoch": 6.25, "grad_norm": 0.0345335379242897, "learning_rate": 4.000000000000001e-06, "loss": 0.0024, "step": 400 }, { "epoch": 7.03125, "grad_norm": 0.013763554394245148, "learning_rate": 4.5e-06, "loss": 0.0137, "step": 450 }, { "epoch": 7.8125, "grad_norm": 0.014735482633113861, "learning_rate": 5e-06, "loss": 0.0048, "step": 500 }, { "epoch": 7.8125, "eval_accuracy": 0.947265625, "eval_loss": 0.2336536943912506, "eval_runtime": 5.8997, "eval_samples_per_second": 173.569, "eval_steps_per_second": 21.696, "step": 500 }, { "epoch": 8.59375, "grad_norm": 0.013056355528533459, "learning_rate": 5.500000000000001e-06, "loss": 0.0022, "step": 550 }, { "epoch": 9.375, "grad_norm": 0.009187333285808563, "learning_rate": 6e-06, "loss": 0.0021, "step": 600 }, { "epoch": 10.15625, "grad_norm": 0.0087556978687644, "learning_rate": 6.5000000000000004e-06, "loss": 0.0019, "step": 650 }, { "epoch": 10.9375, "grad_norm": 0.008410913869738579, "learning_rate": 7e-06, "loss": 0.0018, "step": 700 }, { "epoch": 11.71875, "grad_norm": 0.008203917182981968, "learning_rate": 7.500000000000001e-06, "loss": 0.0017, "step": 750 }, { "epoch": 12.5, "grad_norm": 0.007246215827763081, "learning_rate": 8.000000000000001e-06, "loss": 0.0016, "step": 800 }, { "epoch": 13.28125, "grad_norm": 0.006727874744683504, "learning_rate": 8.5e-06, "loss": 0.0015, "step": 850 }, { "epoch": 14.0625, "grad_norm": 0.007697463966906071, "learning_rate": 9e-06, "loss": 0.0014, "step": 900 }, { "epoch": 14.84375, "grad_norm": 0.005949131678789854, "learning_rate": 9.5e-06, "loss": 0.0013, "step": 950 }, { "epoch": 15.625, "grad_norm": 0.0054717655293643475, "learning_rate": 1e-05, "loss": 0.0012, "step": 1000 }, { "epoch": 15.625, "eval_accuracy": 0.953125, "eval_loss": 0.19501826167106628, "eval_runtime": 5.9147, "eval_samples_per_second": 173.128, "eval_steps_per_second": 21.641, "step": 1000 }, { "epoch": 16.40625, "grad_norm": 0.005219893530011177, "learning_rate": 9.965181058495823e-06, "loss": 0.0011, "step": 1050 }, { "epoch": 17.1875, "grad_norm": 0.004757468122988939, "learning_rate": 9.930362116991644e-06, "loss": 0.0011, "step": 1100 }, { "epoch": 17.96875, "grad_norm": 0.004971610382199287, "learning_rate": 9.895543175487466e-06, "loss": 0.001, "step": 1150 }, { "epoch": 18.75, "grad_norm": 0.0046828743070364, "learning_rate": 9.860724233983288e-06, "loss": 0.0009, "step": 1200 }, { "epoch": 19.53125, "grad_norm": 0.004280711989849806, "learning_rate": 9.82590529247911e-06, "loss": 0.0009, "step": 1250 }, { "epoch": 20.3125, "grad_norm": 0.004425444174557924, "learning_rate": 9.79108635097493e-06, "loss": 0.0008, "step": 1300 }, { "epoch": 21.09375, "grad_norm": 0.0037732652854174376, "learning_rate": 9.756267409470753e-06, "loss": 0.0008, "step": 1350 }, { "epoch": 21.875, "grad_norm": 0.0033754699397832155, "learning_rate": 9.721448467966575e-06, "loss": 0.0007, "step": 1400 }, { "epoch": 22.65625, "grad_norm": 0.003637350630015135, "learning_rate": 9.686629526462397e-06, "loss": 0.0007, "step": 1450 }, { "epoch": 23.4375, "grad_norm": 0.003412399208173156, "learning_rate": 9.651810584958218e-06, "loss": 0.0007, "step": 1500 }, { "epoch": 23.4375, "eval_accuracy": 0.9580078125, "eval_loss": 0.1927209496498108, "eval_runtime": 5.2401, "eval_samples_per_second": 195.416, "eval_steps_per_second": 24.427, "step": 1500 }, { "epoch": 24.21875, "grad_norm": 0.002839893801137805, "learning_rate": 9.61699164345404e-06, "loss": 0.0006, "step": 1550 }, { "epoch": 25.0, "grad_norm": 0.0031008291989564896, "learning_rate": 9.58217270194986e-06, "loss": 0.0006, "step": 1600 }, { "epoch": 25.78125, "grad_norm": 0.002541514113545418, "learning_rate": 9.547353760445683e-06, "loss": 0.0006, "step": 1650 }, { "epoch": 26.5625, "grad_norm": 0.0025104843080043793, "learning_rate": 9.512534818941505e-06, "loss": 0.0005, "step": 1700 }, { "epoch": 27.34375, "grad_norm": 0.0023143806029111147, "learning_rate": 9.477715877437327e-06, "loss": 0.0005, "step": 1750 }, { "epoch": 28.125, "grad_norm": 0.0023780674673616886, "learning_rate": 9.442896935933148e-06, "loss": 0.0005, "step": 1800 }, { "epoch": 28.90625, "grad_norm": 0.002274406375363469, "learning_rate": 9.40807799442897e-06, "loss": 0.0005, "step": 1850 }, { "epoch": 29.6875, "grad_norm": 0.002076026052236557, "learning_rate": 9.373259052924792e-06, "loss": 0.0005, "step": 1900 }, { "epoch": 30.46875, "grad_norm": 0.0024436817038804293, "learning_rate": 9.338440111420614e-06, "loss": 0.0004, "step": 1950 }, { "epoch": 31.25, "grad_norm": 0.0018446892499923706, "learning_rate": 9.303621169916436e-06, "loss": 0.0004, "step": 2000 }, { "epoch": 31.25, "eval_accuracy": 0.962890625, "eval_loss": 0.1969820261001587, "eval_runtime": 5.2387, "eval_samples_per_second": 195.469, "eval_steps_per_second": 24.434, "step": 2000 }, { "epoch": 32.03125, "grad_norm": 0.0020159403793513775, "learning_rate": 9.268802228412257e-06, "loss": 0.0004, "step": 2050 }, { "epoch": 32.8125, "grad_norm": 0.0019202978583052754, "learning_rate": 9.23398328690808e-06, "loss": 0.0004, "step": 2100 }, { "epoch": 33.59375, "grad_norm": 0.0030681404750794172, "learning_rate": 9.1991643454039e-06, "loss": 0.0004, "step": 2150 }, { "epoch": 34.375, "grad_norm": 0.0016341815935447812, "learning_rate": 9.164345403899722e-06, "loss": 0.0004, "step": 2200 }, { "epoch": 35.15625, "grad_norm": 0.0016691142227500677, "learning_rate": 9.129526462395544e-06, "loss": 0.0003, "step": 2250 }, { "epoch": 35.9375, "grad_norm": 0.0017921621911227703, "learning_rate": 9.094707520891366e-06, "loss": 0.0003, "step": 2300 }, { "epoch": 36.71875, "grad_norm": 0.00160547427367419, "learning_rate": 9.059888579387187e-06, "loss": 0.0003, "step": 2350 }, { "epoch": 37.5, "grad_norm": 0.0014217059360817075, "learning_rate": 9.025069637883009e-06, "loss": 0.0003, "step": 2400 }, { "epoch": 38.28125, "grad_norm": 0.001448018359951675, "learning_rate": 8.990250696378831e-06, "loss": 0.0003, "step": 2450 }, { "epoch": 39.0625, "grad_norm": 0.0017675248673185706, "learning_rate": 8.955431754874653e-06, "loss": 0.0003, "step": 2500 }, { "epoch": 39.0625, "eval_accuracy": 0.962890625, "eval_loss": 0.20403626561164856, "eval_runtime": 5.1962, "eval_samples_per_second": 197.067, "eval_steps_per_second": 24.633, "step": 2500 }, { "epoch": 39.84375, "grad_norm": 0.0017623680178076029, "learning_rate": 8.920612813370474e-06, "loss": 0.0003, "step": 2550 }, { "epoch": 40.625, "grad_norm": 0.0011810092255473137, "learning_rate": 8.885793871866296e-06, "loss": 0.0003, "step": 2600 }, { "epoch": 41.40625, "grad_norm": 0.001152553828433156, "learning_rate": 8.850974930362117e-06, "loss": 0.0003, "step": 2650 }, { "epoch": 42.1875, "grad_norm": 0.0012170104309916496, "learning_rate": 8.816155988857939e-06, "loss": 0.0003, "step": 2700 }, { "epoch": 42.96875, "grad_norm": 0.0010642099659889936, "learning_rate": 8.781337047353761e-06, "loss": 0.0002, "step": 2750 }, { "epoch": 43.75, "grad_norm": 0.0010462955106049776, "learning_rate": 8.746518105849583e-06, "loss": 0.0002, "step": 2800 }, { "epoch": 44.53125, "grad_norm": 0.0010893407743424177, "learning_rate": 8.711699164345404e-06, "loss": 0.0002, "step": 2850 }, { "epoch": 45.3125, "grad_norm": 0.0010920371860265732, "learning_rate": 8.676880222841226e-06, "loss": 0.0002, "step": 2900 }, { "epoch": 46.09375, "grad_norm": 0.0010040885536000133, "learning_rate": 8.642061281337048e-06, "loss": 0.0002, "step": 2950 }, { "epoch": 46.875, "grad_norm": 0.0009422469302080572, "learning_rate": 8.60724233983287e-06, "loss": 0.0002, "step": 3000 }, { "epoch": 46.875, "eval_accuracy": 0.962890625, "eval_loss": 0.21138769388198853, "eval_runtime": 5.8076, "eval_samples_per_second": 176.32, "eval_steps_per_second": 22.04, "step": 3000 }, { "epoch": 47.65625, "grad_norm": 0.0011073002824559808, "learning_rate": 8.572423398328693e-06, "loss": 0.0002, "step": 3050 }, { "epoch": 48.4375, "grad_norm": 0.000991741195321083, "learning_rate": 8.537604456824513e-06, "loss": 0.0002, "step": 3100 }, { "epoch": 49.21875, "grad_norm": 0.0008712337585166097, "learning_rate": 8.502785515320335e-06, "loss": 0.0002, "step": 3150 }, { "epoch": 50.0, "grad_norm": 0.0008826220873743296, "learning_rate": 8.467966573816156e-06, "loss": 0.0002, "step": 3200 }, { "epoch": 50.78125, "grad_norm": 0.0009179635089822114, "learning_rate": 8.433147632311978e-06, "loss": 0.0002, "step": 3250 }, { "epoch": 51.5625, "grad_norm": 0.0008320676279254258, "learning_rate": 8.3983286908078e-06, "loss": 0.0002, "step": 3300 }, { "epoch": 52.34375, "grad_norm": 0.0007437244057655334, "learning_rate": 8.363509749303623e-06, "loss": 0.0002, "step": 3350 }, { "epoch": 53.125, "grad_norm": 0.0007439731853082776, "learning_rate": 8.328690807799443e-06, "loss": 0.0002, "step": 3400 }, { "epoch": 53.90625, "grad_norm": 0.0007023093639872968, "learning_rate": 8.293871866295265e-06, "loss": 0.0002, "step": 3450 }, { "epoch": 54.6875, "grad_norm": 0.0011785700917243958, "learning_rate": 8.259052924791087e-06, "loss": 0.0002, "step": 3500 }, { "epoch": 54.6875, "eval_accuracy": 0.96484375, "eval_loss": 0.217063769698143, "eval_runtime": 5.3451, "eval_samples_per_second": 191.577, "eval_steps_per_second": 23.947, "step": 3500 }, { "epoch": 55.46875, "grad_norm": 0.0007988162687979639, "learning_rate": 8.22423398328691e-06, "loss": 0.0001, "step": 3550 }, { "epoch": 56.25, "grad_norm": 0.0009737128275446594, "learning_rate": 8.18941504178273e-06, "loss": 0.0001, "step": 3600 }, { "epoch": 57.03125, "grad_norm": 0.0006344786379486322, "learning_rate": 8.154596100278552e-06, "loss": 0.0001, "step": 3650 }, { "epoch": 57.8125, "grad_norm": 0.0009238629718311131, "learning_rate": 8.119777158774373e-06, "loss": 0.0001, "step": 3700 }, { "epoch": 58.59375, "grad_norm": 0.000863746739923954, "learning_rate": 8.084958217270195e-06, "loss": 0.0001, "step": 3750 }, { "epoch": 59.375, "grad_norm": 0.0005797584308311343, "learning_rate": 8.050139275766017e-06, "loss": 0.0001, "step": 3800 }, { "epoch": 60.0, "step": 3840, "total_flos": 1.904477274611122e+19, "train_loss": 0.0010260362852325974, "train_runtime": 2806.5312, "train_samples_per_second": 87.567, "train_steps_per_second": 1.368 } ], "logging_steps": 50, "max_steps": 3840, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.904477274611122e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }