|
{ |
|
"best_metric": 0.09645664691925049, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned_v2024-7-24-frost/checkpoint-500", |
|
"epoch": 30.0, |
|
"eval_steps": 100, |
|
"global_step": 1920, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 0.3123115003108978, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.0743, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 0.10650705546140671, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.0699, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 0.40636828541755676, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.0732, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.42912840843200684, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.075, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 0.3166373372077942, |
|
"learning_rate": 5.208333333333334e-05, |
|
"loss": 0.0695, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 0.8551476001739502, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0883, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.09375, |
|
"grad_norm": 0.18066875636577606, |
|
"learning_rate": 7.291666666666667e-05, |
|
"loss": 0.0699, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.28325945138931274, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.0627, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.40625, |
|
"grad_norm": 0.3701513409614563, |
|
"learning_rate": 9.375e-05, |
|
"loss": 0.0866, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 0.35587912797927856, |
|
"learning_rate": 0.00010416666666666667, |
|
"loss": 0.0728, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"eval_accuracy": 0.984070796460177, |
|
"eval_f1": 0.9606986899563319, |
|
"eval_loss": 0.06593623757362366, |
|
"eval_precision": 0.9691629955947136, |
|
"eval_recall": 0.9523809523809523, |
|
"eval_runtime": 0.9039, |
|
"eval_samples_per_second": 125.009, |
|
"eval_steps_per_second": 16.594, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.71875, |
|
"grad_norm": 0.23759329319000244, |
|
"learning_rate": 0.00011458333333333333, |
|
"loss": 0.0653, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 0.6092272996902466, |
|
"learning_rate": 0.000125, |
|
"loss": 0.1015, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.03125, |
|
"grad_norm": 0.15887708961963654, |
|
"learning_rate": 0.0001354166666666667, |
|
"loss": 0.09, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.1875, |
|
"grad_norm": 0.3399417996406555, |
|
"learning_rate": 0.00014583333333333335, |
|
"loss": 0.0847, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.34375, |
|
"grad_norm": 0.2599344253540039, |
|
"learning_rate": 0.00015625, |
|
"loss": 0.0722, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.20714014768600464, |
|
"learning_rate": 0.0001666666666666667, |
|
"loss": 0.0915, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.65625, |
|
"grad_norm": 0.7900287508964539, |
|
"learning_rate": 0.00017708333333333335, |
|
"loss": 0.1008, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.8125, |
|
"grad_norm": 0.23315797746181488, |
|
"learning_rate": 0.0001875, |
|
"loss": 0.1142, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.96875, |
|
"grad_norm": 1.258319616317749, |
|
"learning_rate": 0.0001979166666666667, |
|
"loss": 0.1027, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 1.0168662071228027, |
|
"learning_rate": 0.0001990740740740741, |
|
"loss": 0.0871, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"eval_accuracy": 0.9566371681415929, |
|
"eval_f1": 0.8941684665226782, |
|
"eval_loss": 0.12436065077781677, |
|
"eval_precision": 0.8922413793103449, |
|
"eval_recall": 0.8961038961038961, |
|
"eval_runtime": 0.8601, |
|
"eval_samples_per_second": 131.383, |
|
"eval_steps_per_second": 17.44, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.28125, |
|
"grad_norm": 0.38566353917121887, |
|
"learning_rate": 0.0001979166666666667, |
|
"loss": 0.1166, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.4375, |
|
"grad_norm": 0.4687894284725189, |
|
"learning_rate": 0.00019675925925925926, |
|
"loss": 0.108, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.59375, |
|
"grad_norm": 0.5190223455429077, |
|
"learning_rate": 0.00019560185185185186, |
|
"loss": 0.0901, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.5094243288040161, |
|
"learning_rate": 0.00019444444444444446, |
|
"loss": 0.1144, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.90625, |
|
"grad_norm": 0.5921277403831482, |
|
"learning_rate": 0.00019328703703703706, |
|
"loss": 0.1196, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.0625, |
|
"grad_norm": 0.23840609192848206, |
|
"learning_rate": 0.00019212962962962963, |
|
"loss": 0.1122, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.21875, |
|
"grad_norm": 0.9276812672615051, |
|
"learning_rate": 0.00019097222222222223, |
|
"loss": 0.1147, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.375, |
|
"grad_norm": 0.7325614094734192, |
|
"learning_rate": 0.00018981481481481483, |
|
"loss": 0.1075, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.53125, |
|
"grad_norm": 0.5574468374252319, |
|
"learning_rate": 0.00018865740740740743, |
|
"loss": 0.0958, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.6875, |
|
"grad_norm": 0.3893429934978485, |
|
"learning_rate": 0.0001875, |
|
"loss": 0.0999, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.6875, |
|
"eval_accuracy": 0.963716814159292, |
|
"eval_f1": 0.9125799573560768, |
|
"eval_loss": 0.10427873581647873, |
|
"eval_precision": 0.8991596638655462, |
|
"eval_recall": 0.9264069264069265, |
|
"eval_runtime": 0.8493, |
|
"eval_samples_per_second": 133.049, |
|
"eval_steps_per_second": 17.661, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.84375, |
|
"grad_norm": 0.49028488993644714, |
|
"learning_rate": 0.0001863425925925926, |
|
"loss": 0.1087, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.6510241627693176, |
|
"learning_rate": 0.0001851851851851852, |
|
"loss": 0.0949, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.15625, |
|
"grad_norm": 0.3244408667087555, |
|
"learning_rate": 0.00018402777777777778, |
|
"loss": 0.0957, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.3125, |
|
"grad_norm": 0.32894158363342285, |
|
"learning_rate": 0.00018287037037037038, |
|
"loss": 0.0761, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.46875, |
|
"grad_norm": 0.4168912470340729, |
|
"learning_rate": 0.00018171296296296297, |
|
"loss": 0.1014, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.625, |
|
"grad_norm": 0.30746978521347046, |
|
"learning_rate": 0.00018055555555555557, |
|
"loss": 0.0789, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.78125, |
|
"grad_norm": 0.3337535262107849, |
|
"learning_rate": 0.00017939814814814815, |
|
"loss": 0.0891, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.9375, |
|
"grad_norm": 0.2659320533275604, |
|
"learning_rate": 0.00017824074074074075, |
|
"loss": 0.0798, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.09375, |
|
"grad_norm": 0.28791913390159607, |
|
"learning_rate": 0.00017708333333333335, |
|
"loss": 0.0961, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 0.41803187131881714, |
|
"learning_rate": 0.00017592592592592595, |
|
"loss": 0.0743, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_accuracy": 0.9610619469026549, |
|
"eval_f1": 0.9043478260869565, |
|
"eval_loss": 0.10431604832410812, |
|
"eval_precision": 0.9082969432314411, |
|
"eval_recall": 0.9004329004329005, |
|
"eval_runtime": 1.3126, |
|
"eval_samples_per_second": 86.086, |
|
"eval_steps_per_second": 11.427, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.40625, |
|
"grad_norm": 0.398034930229187, |
|
"learning_rate": 0.00017476851851851852, |
|
"loss": 0.0798, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.5625, |
|
"grad_norm": 0.533364474773407, |
|
"learning_rate": 0.00017361111111111112, |
|
"loss": 0.0808, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.71875, |
|
"grad_norm": 0.6189862489700317, |
|
"learning_rate": 0.00017245370370370372, |
|
"loss": 0.091, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.875, |
|
"grad_norm": 0.31593209505081177, |
|
"learning_rate": 0.00017129629629629632, |
|
"loss": 0.0729, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.03125, |
|
"grad_norm": 0.3167741000652313, |
|
"learning_rate": 0.0001701388888888889, |
|
"loss": 0.0796, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.1875, |
|
"grad_norm": 0.30901169776916504, |
|
"learning_rate": 0.0001689814814814815, |
|
"loss": 0.0867, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.34375, |
|
"grad_norm": 0.13378705084323883, |
|
"learning_rate": 0.0001678240740740741, |
|
"loss": 0.0701, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 0.15507709980010986, |
|
"learning_rate": 0.0001666666666666667, |
|
"loss": 0.0789, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.65625, |
|
"grad_norm": 0.21113860607147217, |
|
"learning_rate": 0.00016550925925925926, |
|
"loss": 0.0647, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.8125, |
|
"grad_norm": 0.15848499536514282, |
|
"learning_rate": 0.00016435185185185186, |
|
"loss": 0.0655, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.8125, |
|
"eval_accuracy": 0.963716814159292, |
|
"eval_f1": 0.9118279569892475, |
|
"eval_loss": 0.09645664691925049, |
|
"eval_precision": 0.905982905982906, |
|
"eval_recall": 0.9177489177489178, |
|
"eval_runtime": 2.2304, |
|
"eval_samples_per_second": 50.664, |
|
"eval_steps_per_second": 6.725, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.96875, |
|
"grad_norm": 0.19086486101150513, |
|
"learning_rate": 0.00016319444444444446, |
|
"loss": 0.0502, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.125, |
|
"grad_norm": 0.4851354956626892, |
|
"learning_rate": 0.00016203703703703706, |
|
"loss": 0.0646, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.28125, |
|
"grad_norm": 0.43803560733795166, |
|
"learning_rate": 0.00016087962962962963, |
|
"loss": 0.0668, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.4375, |
|
"grad_norm": 0.26552197337150574, |
|
"learning_rate": 0.00015972222222222223, |
|
"loss": 0.0549, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.59375, |
|
"grad_norm": 0.18909405171871185, |
|
"learning_rate": 0.00015856481481481483, |
|
"loss": 0.07, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"grad_norm": 0.2485276311635971, |
|
"learning_rate": 0.00015740740740740743, |
|
"loss": 0.0525, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.90625, |
|
"grad_norm": 0.4424391984939575, |
|
"learning_rate": 0.00015625, |
|
"loss": 0.0775, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 9.0625, |
|
"grad_norm": 0.2584344446659088, |
|
"learning_rate": 0.0001550925925925926, |
|
"loss": 0.064, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.21875, |
|
"grad_norm": 0.6115286946296692, |
|
"learning_rate": 0.0001539351851851852, |
|
"loss": 0.0599, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"grad_norm": 0.5155323147773743, |
|
"learning_rate": 0.00015277777777777777, |
|
"loss": 0.0559, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"eval_accuracy": 0.9619469026548673, |
|
"eval_f1": 0.9087048832271762, |
|
"eval_loss": 0.10384609550237656, |
|
"eval_precision": 0.8916666666666667, |
|
"eval_recall": 0.9264069264069265, |
|
"eval_runtime": 2.2545, |
|
"eval_samples_per_second": 50.123, |
|
"eval_steps_per_second": 6.653, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.53125, |
|
"grad_norm": 0.40844494104385376, |
|
"learning_rate": 0.00015162037037037037, |
|
"loss": 0.072, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.6875, |
|
"grad_norm": 0.2091340720653534, |
|
"learning_rate": 0.00015046296296296297, |
|
"loss": 0.0513, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.84375, |
|
"grad_norm": 0.22117160260677338, |
|
"learning_rate": 0.00014930555555555557, |
|
"loss": 0.0623, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.31236401200294495, |
|
"learning_rate": 0.00014814814814814815, |
|
"loss": 0.0534, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 10.15625, |
|
"grad_norm": 0.31281912326812744, |
|
"learning_rate": 0.00014699074074074075, |
|
"loss": 0.0644, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 10.3125, |
|
"grad_norm": 0.5201927423477173, |
|
"learning_rate": 0.00014583333333333335, |
|
"loss": 0.057, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 10.46875, |
|
"grad_norm": 0.2596763074398041, |
|
"learning_rate": 0.00014467592592592594, |
|
"loss": 0.0542, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 10.625, |
|
"grad_norm": 0.3063810467720032, |
|
"learning_rate": 0.00014351851851851852, |
|
"loss": 0.0389, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 10.78125, |
|
"grad_norm": 0.48713332414627075, |
|
"learning_rate": 0.00014236111111111112, |
|
"loss": 0.0742, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 10.9375, |
|
"grad_norm": 0.21316884458065033, |
|
"learning_rate": 0.00014120370370370372, |
|
"loss": 0.0517, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.9375, |
|
"eval_accuracy": 0.9584070796460177, |
|
"eval_f1": 0.8997867803837952, |
|
"eval_loss": 0.09719711542129517, |
|
"eval_precision": 0.8865546218487395, |
|
"eval_recall": 0.9134199134199135, |
|
"eval_runtime": 0.8764, |
|
"eval_samples_per_second": 128.937, |
|
"eval_steps_per_second": 17.115, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 11.09375, |
|
"grad_norm": 0.9216361045837402, |
|
"learning_rate": 0.00014004629629629632, |
|
"loss": 0.0623, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"grad_norm": 0.31130528450012207, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 0.0641, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 11.40625, |
|
"grad_norm": 1.03948974609375, |
|
"learning_rate": 0.0001377314814814815, |
|
"loss": 0.0594, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 11.5625, |
|
"grad_norm": 0.12757237255573273, |
|
"learning_rate": 0.0001365740740740741, |
|
"loss": 0.0572, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 11.71875, |
|
"grad_norm": 0.25488346815109253, |
|
"learning_rate": 0.0001354166666666667, |
|
"loss": 0.0533, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 11.875, |
|
"grad_norm": 0.2517576813697815, |
|
"learning_rate": 0.00013425925925925926, |
|
"loss": 0.0557, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 12.03125, |
|
"grad_norm": 0.14332328736782074, |
|
"learning_rate": 0.00013310185185185186, |
|
"loss": 0.0433, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 12.1875, |
|
"grad_norm": 0.7062014937400818, |
|
"learning_rate": 0.00013194444444444446, |
|
"loss": 0.0569, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 12.34375, |
|
"grad_norm": 0.727057158946991, |
|
"learning_rate": 0.00013078703703703706, |
|
"loss": 0.0443, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 0.17331984639167786, |
|
"learning_rate": 0.00012962962962962963, |
|
"loss": 0.0407, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_accuracy": 0.963716814159292, |
|
"eval_f1": 0.9110629067245118, |
|
"eval_loss": 0.11198227852582932, |
|
"eval_precision": 0.9130434782608695, |
|
"eval_recall": 0.9090909090909091, |
|
"eval_runtime": 0.9019, |
|
"eval_samples_per_second": 125.294, |
|
"eval_steps_per_second": 16.632, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.65625, |
|
"grad_norm": 0.104576975107193, |
|
"learning_rate": 0.00012847222222222223, |
|
"loss": 0.0465, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 12.8125, |
|
"grad_norm": 0.6552168726921082, |
|
"learning_rate": 0.00012731481481481483, |
|
"loss": 0.0536, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 12.96875, |
|
"grad_norm": 0.39452189207077026, |
|
"learning_rate": 0.00012615740740740743, |
|
"loss": 0.0514, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 13.125, |
|
"grad_norm": 0.16756129264831543, |
|
"learning_rate": 0.000125, |
|
"loss": 0.0417, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 13.28125, |
|
"grad_norm": 0.13866697251796722, |
|
"learning_rate": 0.00012384259259259258, |
|
"loss": 0.0419, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 13.4375, |
|
"grad_norm": 0.9053749442100525, |
|
"learning_rate": 0.0001226851851851852, |
|
"loss": 0.0548, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 13.59375, |
|
"grad_norm": 0.43149927258491516, |
|
"learning_rate": 0.00012152777777777777, |
|
"loss": 0.0503, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"grad_norm": 0.49532395601272583, |
|
"learning_rate": 0.00012037037037037037, |
|
"loss": 0.0476, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 13.90625, |
|
"grad_norm": 0.12025842815637589, |
|
"learning_rate": 0.00011921296296296296, |
|
"loss": 0.049, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 14.0625, |
|
"grad_norm": 0.8570975065231323, |
|
"learning_rate": 0.00011805555555555556, |
|
"loss": 0.0513, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 14.0625, |
|
"eval_accuracy": 0.9557522123893806, |
|
"eval_f1": 0.8893805309734513, |
|
"eval_loss": 0.1092919260263443, |
|
"eval_precision": 0.9095022624434389, |
|
"eval_recall": 0.8701298701298701, |
|
"eval_runtime": 1.2237, |
|
"eval_samples_per_second": 92.344, |
|
"eval_steps_per_second": 12.258, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 14.21875, |
|
"grad_norm": 0.4120664596557617, |
|
"learning_rate": 0.00011689814814814815, |
|
"loss": 0.0552, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 14.375, |
|
"grad_norm": 0.24265483021736145, |
|
"learning_rate": 0.00011574074074074075, |
|
"loss": 0.0434, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 14.53125, |
|
"grad_norm": 0.23618777096271515, |
|
"learning_rate": 0.00011458333333333333, |
|
"loss": 0.0482, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 14.6875, |
|
"grad_norm": 0.1366555392742157, |
|
"learning_rate": 0.00011342592592592593, |
|
"loss": 0.045, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 14.84375, |
|
"grad_norm": 0.1841152310371399, |
|
"learning_rate": 0.00011226851851851852, |
|
"loss": 0.0539, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.6849538087844849, |
|
"learning_rate": 0.00011111111111111112, |
|
"loss": 0.0363, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 15.15625, |
|
"grad_norm": 0.5442699790000916, |
|
"learning_rate": 0.0001099537037037037, |
|
"loss": 0.0372, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 15.3125, |
|
"grad_norm": 0.3825988173484802, |
|
"learning_rate": 0.0001087962962962963, |
|
"loss": 0.0405, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 15.46875, |
|
"grad_norm": 0.0459093414247036, |
|
"learning_rate": 0.00010763888888888889, |
|
"loss": 0.0386, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 15.625, |
|
"grad_norm": 0.2602522373199463, |
|
"learning_rate": 0.00010648148148148149, |
|
"loss": 0.0378, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.625, |
|
"eval_accuracy": 0.9548672566371681, |
|
"eval_f1": 0.8888888888888888, |
|
"eval_loss": 0.11969945579767227, |
|
"eval_precision": 0.8947368421052632, |
|
"eval_recall": 0.8831168831168831, |
|
"eval_runtime": 0.8768, |
|
"eval_samples_per_second": 128.871, |
|
"eval_steps_per_second": 17.107, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.78125, |
|
"grad_norm": 0.07926033437252045, |
|
"learning_rate": 0.00010532407407407407, |
|
"loss": 0.0419, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 15.9375, |
|
"grad_norm": 0.2084084302186966, |
|
"learning_rate": 0.00010416666666666667, |
|
"loss": 0.0336, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 16.09375, |
|
"grad_norm": 0.11587415635585785, |
|
"learning_rate": 0.00010300925925925926, |
|
"loss": 0.0293, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"grad_norm": 0.4128260314464569, |
|
"learning_rate": 0.00010185185185185186, |
|
"loss": 0.0346, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 16.40625, |
|
"grad_norm": 0.2051563411951065, |
|
"learning_rate": 0.00010069444444444445, |
|
"loss": 0.0404, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 16.5625, |
|
"grad_norm": 1.0257600545883179, |
|
"learning_rate": 9.953703703703704e-05, |
|
"loss": 0.0521, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 16.71875, |
|
"grad_norm": 0.13610199093818665, |
|
"learning_rate": 9.837962962962963e-05, |
|
"loss": 0.0513, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 16.875, |
|
"grad_norm": 0.5424107909202576, |
|
"learning_rate": 9.722222222222223e-05, |
|
"loss": 0.0662, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 17.03125, |
|
"grad_norm": 0.1417212188243866, |
|
"learning_rate": 9.606481481481482e-05, |
|
"loss": 0.0364, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 17.1875, |
|
"grad_norm": 0.15864621102809906, |
|
"learning_rate": 9.490740740740742e-05, |
|
"loss": 0.0487, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 17.1875, |
|
"eval_accuracy": 0.9646017699115044, |
|
"eval_f1": 0.9137931034482759, |
|
"eval_loss": 0.09552007168531418, |
|
"eval_precision": 0.9098712446351931, |
|
"eval_recall": 0.9177489177489178, |
|
"eval_runtime": 0.8603, |
|
"eval_samples_per_second": 131.352, |
|
"eval_steps_per_second": 17.436, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 17.34375, |
|
"grad_norm": 0.5382766127586365, |
|
"learning_rate": 9.375e-05, |
|
"loss": 0.0315, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"grad_norm": 0.16078180074691772, |
|
"learning_rate": 9.25925925925926e-05, |
|
"loss": 0.0245, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 17.65625, |
|
"grad_norm": 0.05187100172042847, |
|
"learning_rate": 9.143518518518519e-05, |
|
"loss": 0.0342, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 17.8125, |
|
"grad_norm": 0.4016551077365875, |
|
"learning_rate": 9.027777777777779e-05, |
|
"loss": 0.0285, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 17.96875, |
|
"grad_norm": 0.22832362353801727, |
|
"learning_rate": 8.912037037037037e-05, |
|
"loss": 0.0379, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 18.125, |
|
"grad_norm": 1.3720444440841675, |
|
"learning_rate": 8.796296296296297e-05, |
|
"loss": 0.0369, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 18.28125, |
|
"grad_norm": 0.23146755993366241, |
|
"learning_rate": 8.680555555555556e-05, |
|
"loss": 0.0343, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 18.4375, |
|
"grad_norm": 0.2672041356563568, |
|
"learning_rate": 8.564814814814816e-05, |
|
"loss": 0.0353, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 18.59375, |
|
"grad_norm": 0.17212288081645966, |
|
"learning_rate": 8.449074074074074e-05, |
|
"loss": 0.0315, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"grad_norm": 0.12989170849323273, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.0272, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"eval_accuracy": 0.9566371681415929, |
|
"eval_f1": 0.8927789934354485, |
|
"eval_loss": 0.10875095427036285, |
|
"eval_precision": 0.9026548672566371, |
|
"eval_recall": 0.8831168831168831, |
|
"eval_runtime": 1.2152, |
|
"eval_samples_per_second": 92.985, |
|
"eval_steps_per_second": 12.343, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 18.90625, |
|
"grad_norm": 0.15251386165618896, |
|
"learning_rate": 8.217592592592593e-05, |
|
"loss": 0.0353, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 19.0625, |
|
"grad_norm": 0.1500956416130066, |
|
"learning_rate": 8.101851851851853e-05, |
|
"loss": 0.0258, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 19.21875, |
|
"grad_norm": 0.16236737370491028, |
|
"learning_rate": 7.986111111111112e-05, |
|
"loss": 0.0318, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 19.375, |
|
"grad_norm": 0.5188699960708618, |
|
"learning_rate": 7.870370370370372e-05, |
|
"loss": 0.0388, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 19.53125, |
|
"grad_norm": 0.14171747863292694, |
|
"learning_rate": 7.75462962962963e-05, |
|
"loss": 0.0371, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 19.6875, |
|
"grad_norm": 0.355496883392334, |
|
"learning_rate": 7.638888888888889e-05, |
|
"loss": 0.0278, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 19.84375, |
|
"grad_norm": 0.30447283387184143, |
|
"learning_rate": 7.523148148148149e-05, |
|
"loss": 0.0307, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.8779629468917847, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 0.0378, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 20.15625, |
|
"grad_norm": 0.24115116894245148, |
|
"learning_rate": 7.291666666666667e-05, |
|
"loss": 0.0234, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 20.3125, |
|
"grad_norm": 0.07465353608131409, |
|
"learning_rate": 7.175925925925926e-05, |
|
"loss": 0.0241, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 20.3125, |
|
"eval_accuracy": 0.963716814159292, |
|
"eval_f1": 0.9114470842332613, |
|
"eval_loss": 0.09792255610227585, |
|
"eval_precision": 0.9094827586206896, |
|
"eval_recall": 0.9134199134199135, |
|
"eval_runtime": 0.8811, |
|
"eval_samples_per_second": 128.253, |
|
"eval_steps_per_second": 17.025, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 20.46875, |
|
"grad_norm": 0.44642359018325806, |
|
"learning_rate": 7.060185185185186e-05, |
|
"loss": 0.0271, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 20.625, |
|
"grad_norm": 0.16677480936050415, |
|
"learning_rate": 6.944444444444444e-05, |
|
"loss": 0.0199, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 20.78125, |
|
"grad_norm": 0.05306961014866829, |
|
"learning_rate": 6.828703703703704e-05, |
|
"loss": 0.028, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 20.9375, |
|
"grad_norm": 0.7962948679924011, |
|
"learning_rate": 6.712962962962963e-05, |
|
"loss": 0.0238, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 21.09375, |
|
"grad_norm": 0.19253899157047272, |
|
"learning_rate": 6.597222222222223e-05, |
|
"loss": 0.0367, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"grad_norm": 0.22666649520397186, |
|
"learning_rate": 6.481481481481482e-05, |
|
"loss": 0.021, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 21.40625, |
|
"grad_norm": 0.09341959655284882, |
|
"learning_rate": 6.365740740740742e-05, |
|
"loss": 0.0217, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 21.5625, |
|
"grad_norm": 0.40562504529953003, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0268, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 21.71875, |
|
"grad_norm": 0.20743058621883392, |
|
"learning_rate": 6.13425925925926e-05, |
|
"loss": 0.0394, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 21.875, |
|
"grad_norm": 0.16062897443771362, |
|
"learning_rate": 6.018518518518519e-05, |
|
"loss": 0.0311, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 21.875, |
|
"eval_accuracy": 0.9654867256637168, |
|
"eval_f1": 0.9157667386609072, |
|
"eval_loss": 0.11342811584472656, |
|
"eval_precision": 0.9137931034482759, |
|
"eval_recall": 0.9177489177489178, |
|
"eval_runtime": 0.8884, |
|
"eval_samples_per_second": 127.2, |
|
"eval_steps_per_second": 16.885, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 22.03125, |
|
"grad_norm": 0.08394443988800049, |
|
"learning_rate": 5.902777777777778e-05, |
|
"loss": 0.0312, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 22.1875, |
|
"grad_norm": 0.6736553311347961, |
|
"learning_rate": 5.787037037037037e-05, |
|
"loss": 0.0383, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 22.34375, |
|
"grad_norm": 0.563914954662323, |
|
"learning_rate": 5.6712962962962965e-05, |
|
"loss": 0.0287, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"grad_norm": 0.08304356783628464, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 0.026, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 22.65625, |
|
"grad_norm": 0.6314889788627625, |
|
"learning_rate": 5.439814814814815e-05, |
|
"loss": 0.0337, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 22.8125, |
|
"grad_norm": 0.1526585817337036, |
|
"learning_rate": 5.3240740740740744e-05, |
|
"loss": 0.0386, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 22.96875, |
|
"grad_norm": 0.4352094829082489, |
|
"learning_rate": 5.208333333333334e-05, |
|
"loss": 0.0225, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 23.125, |
|
"grad_norm": 0.07802680879831314, |
|
"learning_rate": 5.092592592592593e-05, |
|
"loss": 0.028, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 23.28125, |
|
"grad_norm": 0.06631523370742798, |
|
"learning_rate": 4.976851851851852e-05, |
|
"loss": 0.0216, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 23.4375, |
|
"grad_norm": 0.4568875730037689, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.0303, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 23.4375, |
|
"eval_accuracy": 0.9628318584070796, |
|
"eval_f1": 0.9078947368421053, |
|
"eval_loss": 0.10922601819038391, |
|
"eval_precision": 0.92, |
|
"eval_recall": 0.8961038961038961, |
|
"eval_runtime": 1.1366, |
|
"eval_samples_per_second": 99.417, |
|
"eval_steps_per_second": 13.197, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 23.59375, |
|
"grad_norm": 0.16732257604599, |
|
"learning_rate": 4.745370370370371e-05, |
|
"loss": 0.0182, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 23.75, |
|
"grad_norm": 0.8489612340927124, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.0419, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 23.90625, |
|
"grad_norm": 0.23256537318229675, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 0.0344, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 24.0625, |
|
"grad_norm": 0.23274816572666168, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 0.0283, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 24.21875, |
|
"grad_norm": 0.1935439109802246, |
|
"learning_rate": 4.282407407407408e-05, |
|
"loss": 0.0293, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 24.375, |
|
"grad_norm": 0.4433891773223877, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.0341, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 24.53125, |
|
"grad_norm": 0.3329981863498688, |
|
"learning_rate": 4.0509259259259265e-05, |
|
"loss": 0.022, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 24.6875, |
|
"grad_norm": 0.774336040019989, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 0.0304, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 24.84375, |
|
"grad_norm": 0.45676717162132263, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 0.0213, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.18224991858005524, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.0225, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.9628318584070796, |
|
"eval_f1": 0.908296943231441, |
|
"eval_loss": 0.11213955283164978, |
|
"eval_precision": 0.9162995594713657, |
|
"eval_recall": 0.9004329004329005, |
|
"eval_runtime": 1.05, |
|
"eval_samples_per_second": 107.622, |
|
"eval_steps_per_second": 14.286, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 25.15625, |
|
"grad_norm": 0.45673811435699463, |
|
"learning_rate": 3.587962962962963e-05, |
|
"loss": 0.0253, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 25.3125, |
|
"grad_norm": 0.09005212038755417, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.0127, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 25.46875, |
|
"grad_norm": 0.20682398974895477, |
|
"learning_rate": 3.3564814814814815e-05, |
|
"loss": 0.0231, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 25.625, |
|
"grad_norm": 0.7664525508880615, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.0174, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 25.78125, |
|
"grad_norm": 0.20978455245494843, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.0203, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 25.9375, |
|
"grad_norm": 0.5540274977684021, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 0.0205, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 26.09375, |
|
"grad_norm": 0.1240416169166565, |
|
"learning_rate": 2.8935185185185186e-05, |
|
"loss": 0.0168, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 26.25, |
|
"grad_norm": 0.04385749623179436, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0162, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 26.40625, |
|
"grad_norm": 0.1799972951412201, |
|
"learning_rate": 2.6620370370370372e-05, |
|
"loss": 0.0196, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 26.5625, |
|
"grad_norm": 0.24593585729599, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.0292, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 26.5625, |
|
"eval_accuracy": 0.9619469026548673, |
|
"eval_f1": 0.9071274298056156, |
|
"eval_loss": 0.11493521183729172, |
|
"eval_precision": 0.9051724137931034, |
|
"eval_recall": 0.9090909090909091, |
|
"eval_runtime": 0.8719, |
|
"eval_samples_per_second": 129.607, |
|
"eval_steps_per_second": 17.204, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 26.71875, |
|
"grad_norm": 0.2344673126935959, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 0.0177, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 26.875, |
|
"grad_norm": 0.263621062040329, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.0288, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 27.03125, |
|
"grad_norm": 0.27248746156692505, |
|
"learning_rate": 2.1990740740740743e-05, |
|
"loss": 0.0158, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 27.1875, |
|
"grad_norm": 0.35065901279449463, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.0198, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 27.34375, |
|
"grad_norm": 0.23319651186466217, |
|
"learning_rate": 1.967592592592593e-05, |
|
"loss": 0.0208, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"grad_norm": 0.15196481347084045, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0161, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 27.65625, |
|
"grad_norm": 0.17273353040218353, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 0.0213, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 27.8125, |
|
"grad_norm": 0.31511059403419495, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.012, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 27.96875, |
|
"grad_norm": 0.09265203773975372, |
|
"learning_rate": 1.5046296296296297e-05, |
|
"loss": 0.0218, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 28.125, |
|
"grad_norm": 0.2638147473335266, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0261, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 28.125, |
|
"eval_accuracy": 0.9619469026548673, |
|
"eval_f1": 0.9079229122055675, |
|
"eval_loss": 0.11067904531955719, |
|
"eval_precision": 0.8983050847457628, |
|
"eval_recall": 0.9177489177489178, |
|
"eval_runtime": 0.9722, |
|
"eval_samples_per_second": 116.235, |
|
"eval_steps_per_second": 15.429, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 28.28125, |
|
"grad_norm": 0.2734526991844177, |
|
"learning_rate": 1.2731481481481482e-05, |
|
"loss": 0.0175, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 28.4375, |
|
"grad_norm": 0.06026133522391319, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.0168, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 28.59375, |
|
"grad_norm": 0.02611556649208069, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.0177, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 28.75, |
|
"grad_norm": 0.23434928059577942, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.0252, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 28.90625, |
|
"grad_norm": 0.07623735815286636, |
|
"learning_rate": 8.101851851851852e-06, |
|
"loss": 0.0235, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 29.0625, |
|
"grad_norm": 0.05061192065477371, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.0185, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 29.21875, |
|
"grad_norm": 0.03355779871344566, |
|
"learning_rate": 5.787037037037038e-06, |
|
"loss": 0.0157, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 29.375, |
|
"grad_norm": 0.023396974429488182, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.016, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 29.53125, |
|
"grad_norm": 0.31754446029663086, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.0213, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 29.6875, |
|
"grad_norm": 0.04646694287657738, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.0166, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 29.6875, |
|
"eval_accuracy": 0.9610619469026549, |
|
"eval_f1": 0.9051724137931034, |
|
"eval_loss": 0.11101004481315613, |
|
"eval_precision": 0.9012875536480687, |
|
"eval_recall": 0.9090909090909091, |
|
"eval_runtime": 0.8549, |
|
"eval_samples_per_second": 132.182, |
|
"eval_steps_per_second": 17.546, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 29.84375, |
|
"grad_norm": 0.05433151125907898, |
|
"learning_rate": 1.1574074074074074e-06, |
|
"loss": 0.0174, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.3085578382015228, |
|
"learning_rate": 0.0, |
|
"loss": 0.0208, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 1920, |
|
"total_flos": 2.352825493649326e+18, |
|
"train_loss": 0.05003170374160012, |
|
"train_runtime": 517.118, |
|
"train_samples_per_second": 58.71, |
|
"train_steps_per_second": 3.713 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1920, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.352825493649326e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|