{ "best_metric": 0.9590698631546456, "best_model_checkpoint": ".//debugged_es_gsd_ses_udpipe_8_0.1_0.00002_20_04-22-24_22-38/checkpoint-32020", "epoch": 19.99968779269435, "global_step": 32020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.6659729448491154e-05, "loss": 0.9658, "step": 1601 }, { "epoch": 1.0, "eval_accuracy": 0.9632624280173934, "eval_f1": 0.9191161314259801, "eval_loss": 0.153659850358963, "eval_precision": 0.9176029962546817, "eval_recall": 0.9206342651856134, "eval_runtime": 12.5305, "eval_samples_per_second": 127.848, "eval_steps_per_second": 16.041, "step": 1601 }, { "epoch": 2.0, "learning_rate": 1.9149445145856868e-05, "loss": 0.118, "step": 3202 }, { "epoch": 2.0, "eval_accuracy": 0.9749441767540251, "eval_f1": 0.9426544669476715, "eval_loss": 0.09417378902435303, "eval_precision": 0.9387746744205094, "eval_recall": 0.9465664615765784, "eval_runtime": 12.5537, "eval_samples_per_second": 127.612, "eval_steps_per_second": 16.011, "step": 3202 }, { "epoch": 3.0, "learning_rate": 1.8085587082198154e-05, "loss": 0.0727, "step": 4803 }, { "epoch": 3.0, "eval_accuracy": 0.9784228463979316, "eval_f1": 0.9501459044017919, "eval_loss": 0.07907041162252426, "eval_precision": 0.9457148701165883, "eval_recall": 0.9546186563158112, "eval_runtime": 12.5508, "eval_samples_per_second": 127.642, "eval_steps_per_second": 16.015, "step": 4803 }, { "epoch": 4.0, "learning_rate": 1.702172901853944e-05, "loss": 0.0479, "step": 6404 }, { "epoch": 4.0, "eval_accuracy": 0.9803267128922317, "eval_f1": 0.9543441998393706, "eval_loss": 0.07787933200597763, "eval_precision": 0.9518938460274423, "eval_recall": 0.9568072015526283, "eval_runtime": 12.5524, "eval_samples_per_second": 127.625, "eval_steps_per_second": 16.013, "step": 6404 }, { "epoch": 5.0, "learning_rate": 1.5957870954880726e-05, "loss": 0.0342, "step": 8005 }, { "epoch": 5.0, "eval_accuracy": 0.9791984957104243, "eval_f1": 0.951995205720072, "eval_loss": 0.08329462260007858, "eval_precision": 0.9528418962521717, "eval_recall": 0.9511500185819879, "eval_runtime": 12.5712, "eval_samples_per_second": 127.435, "eval_steps_per_second": 15.989, "step": 8005 }, { "epoch": 6.0, "learning_rate": 1.489401289122201e-05, "loss": 0.0233, "step": 9606 }, { "epoch": 6.0, "eval_accuracy": 0.9811023622047244, "eval_f1": 0.956315778633658, "eval_loss": 0.08429370075464249, "eval_precision": 0.9539776462853385, "eval_recall": 0.9586654003386051, "eval_runtime": 12.5589, "eval_samples_per_second": 127.559, "eval_steps_per_second": 16.005, "step": 9606 }, { "epoch": 7.0, "learning_rate": 1.3830154827563293e-05, "loss": 0.0168, "step": 11207 }, { "epoch": 7.0, "eval_accuracy": 0.9807262898107886, "eval_f1": 0.9542881840821619, "eval_loss": 0.09856697916984558, "eval_precision": 0.9512925728354534, "eval_recall": 0.9573027212288888, "eval_runtime": 12.7018, "eval_samples_per_second": 126.124, "eval_steps_per_second": 15.825, "step": 11207 }, { "epoch": 8.0, "learning_rate": 1.276629676390458e-05, "loss": 0.0121, "step": 12808 }, { "epoch": 8.0, "eval_accuracy": 0.9815254436479022, "eval_f1": 0.9566986580077682, "eval_loss": 0.10770849883556366, "eval_precision": 0.9522952295229523, "eval_recall": 0.9611429987199075, "eval_runtime": 12.5218, "eval_samples_per_second": 127.937, "eval_steps_per_second": 16.052, "step": 12808 }, { "epoch": 9.0, "learning_rate": 1.1702438700245863e-05, "loss": 0.0102, "step": 14409 }, { "epoch": 9.0, "eval_accuracy": 0.9800916676460218, "eval_f1": 0.9545660129106535, "eval_loss": 0.12106840312480927, "eval_precision": 0.9505015353121802, "eval_recall": 0.9586654003386051, "eval_runtime": 12.5371, "eval_samples_per_second": 127.781, "eval_steps_per_second": 16.032, "step": 14409 }, { "epoch": 10.0, "learning_rate": 1.063858063658715e-05, "loss": 0.0073, "step": 16010 }, { "epoch": 10.0, "eval_accuracy": 0.9807732988600305, "eval_f1": 0.9554837514663813, "eval_loss": 0.12773701548576355, "eval_precision": 0.9524454291810274, "eval_recall": 0.95854152041954, "eval_runtime": 12.7247, "eval_samples_per_second": 125.897, "eval_steps_per_second": 15.796, "step": 16010 }, { "epoch": 11.0, "learning_rate": 9.574722572928434e-06, "loss": 0.0068, "step": 17611 }, { "epoch": 11.0, "eval_accuracy": 0.9810083441062405, "eval_f1": 0.9555354647817867, "eval_loss": 0.13228633999824524, "eval_precision": 0.9509631507913787, "eval_recall": 0.9601519593673865, "eval_runtime": 12.7411, "eval_samples_per_second": 125.735, "eval_steps_per_second": 15.776, "step": 17611 }, { "epoch": 12.0, "learning_rate": 8.51086450926972e-06, "loss": 0.0051, "step": 19212 }, { "epoch": 12.0, "eval_accuracy": 0.9815724526971442, "eval_f1": 0.95762694407251, "eval_loss": 0.12687553465366364, "eval_precision": 0.9554422887208155, "eval_recall": 0.9598216129165462, "eval_runtime": 12.7033, "eval_samples_per_second": 126.109, "eval_steps_per_second": 15.823, "step": 19212 }, { "epoch": 13.0, "learning_rate": 7.447006445611005e-06, "loss": 0.0035, "step": 20813 }, { "epoch": 13.0, "eval_accuracy": 0.982089552238806, "eval_f1": 0.9584635630860339, "eval_loss": 0.13741779327392578, "eval_precision": 0.9545771042391972, "eval_recall": 0.9623817979105587, "eval_runtime": 12.6726, "eval_samples_per_second": 126.414, "eval_steps_per_second": 15.861, "step": 20813 }, { "epoch": 14.0, "learning_rate": 6.38314838195229e-06, "loss": 0.0027, "step": 22414 }, { "epoch": 14.0, "eval_accuracy": 0.9823481020096368, "eval_f1": 0.9589226973684211, "eval_loss": 0.14472806453704834, "eval_precision": 0.9548785980428285, "eval_recall": 0.9630011975058843, "eval_runtime": 12.6834, "eval_samples_per_second": 126.306, "eval_steps_per_second": 15.847, "step": 22414 }, { "epoch": 15.0, "learning_rate": 5.319290318293575e-06, "loss": 0.003, "step": 24015 }, { "epoch": 15.0, "eval_accuracy": 0.9819015160418381, "eval_f1": 0.9580811200329422, "eval_loss": 0.1390347182750702, "eval_precision": 0.955405904816655, "eval_recall": 0.9607713589627122, "eval_runtime": 13.4492, "eval_samples_per_second": 119.115, "eval_steps_per_second": 14.945, "step": 24015 }, { "epoch": 16.0, "learning_rate": 4.25543225463486e-06, "loss": 0.0025, "step": 25616 }, { "epoch": 16.0, "eval_accuracy": 0.9812433893524504, "eval_f1": 0.9562028758600799, "eval_loss": 0.1537328064441681, "eval_precision": 0.9540801644398766, "eval_recall": 0.9583350538877647, "eval_runtime": 12.9219, "eval_samples_per_second": 123.976, "eval_steps_per_second": 15.555, "step": 25616 }, { "epoch": 17.0, "learning_rate": 3.191574190976145e-06, "loss": 0.0029, "step": 27217 }, { "epoch": 17.0, "eval_accuracy": 0.982207074861911, "eval_f1": 0.9580219463487947, "eval_loss": 0.15050023794174194, "eval_precision": 0.9552882246674331, "eval_recall": 0.9607713589627122, "eval_runtime": 12.7344, "eval_samples_per_second": 125.801, "eval_steps_per_second": 15.784, "step": 27217 }, { "epoch": 18.0, "learning_rate": 2.12771612731743e-06, "loss": 0.0022, "step": 28818 }, { "epoch": 18.0, "eval_accuracy": 0.9823245974850159, "eval_f1": 0.9589283509609449, "eval_loss": 0.14898641407489777, "eval_precision": 0.955703211517165, "eval_recall": 0.9621753313787835, "eval_runtime": 12.8609, "eval_samples_per_second": 124.563, "eval_steps_per_second": 15.629, "step": 28818 }, { "epoch": 19.0, "learning_rate": 1.063858063658715e-06, "loss": 0.0021, "step": 30419 }, { "epoch": 19.0, "eval_accuracy": 0.982019038664943, "eval_f1": 0.9582288369033425, "eval_loss": 0.15266422927379608, "eval_precision": 0.954070981210856, "eval_recall": 0.9624230912169137, "eval_runtime": 12.7956, "eval_samples_per_second": 125.199, "eval_steps_per_second": 15.709, "step": 30419 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 0.0018, "step": 32020 }, { "epoch": 20.0, "eval_accuracy": 0.9824421201081208, "eval_f1": 0.9590698631546456, "eval_loss": 0.15378834307193756, "eval_precision": 0.9559028632373452, "eval_recall": 0.9622579179914936, "eval_runtime": 12.9229, "eval_samples_per_second": 123.966, "eval_steps_per_second": 15.554, "step": 32020 }, { "epoch": 20.0, "step": 32020, "total_flos": 2.8281562629741216e+16, "train_loss": 0.06704426395528247, "train_runtime": 9262.2202, "train_samples_per_second": 27.663, "train_steps_per_second": 3.457 } ], "max_steps": 32020, "num_train_epochs": 20, "total_flos": 2.8281562629741216e+16, "trial_name": null, "trial_params": null }