{ "epoch": 0.9983510322538092, "eval_logits/chosen": -2.7162837982177734, "eval_logits/rejected": -2.5735130310058594, "eval_logps/chosen": -592.9717407226562, "eval_logps/rejected": -654.3297729492188, "eval_loss": 0.625907838344574, "eval_rewards/accuracies": 0.659919023513794, "eval_rewards/chosen": -3.2950997352600098, "eval_rewards/margins": 0.5043820142745972, "eval_rewards/rejected": -3.7994813919067383, "eval_runtime": 307.1902, "eval_samples": 1976, "eval_samples_per_second": 6.432, "eval_steps_per_second": 0.804, "total_flos": 0.0, "train_loss": 0.5460537698505796, "train_runtime": 22947.8791, "train_samples": 60643, "train_samples_per_second": 2.643, "train_steps_per_second": 0.021 }