{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1563, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.1847133757961784e-09, "logits/generated": -0.09026163071393967, "logits/real": -0.800382137298584, "logps/generated": -180.3804931640625, "logps/real": -164.2542724609375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 3.184713375796178e-08, "logits/generated": -0.08320371806621552, "logits/real": -0.7675037980079651, "logps/generated": -161.61961364746094, "logps/real": -178.428466796875, "loss": 0.6945, "rewards/accuracies": 0.4444444477558136, "rewards/generated": 0.007918823510408401, "rewards/margins": -0.007639557123184204, "rewards/real": 0.00027926763868890703, "step": 10 }, { "epoch": 0.01, "learning_rate": 6.369426751592356e-08, "logits/generated": -0.1008232831954956, "logits/real": -0.9005411863327026, "logps/generated": -165.965576171875, "logps/real": -174.21055603027344, "loss": 0.6805, "rewards/accuracies": 0.612500011920929, "rewards/generated": -0.011105736717581749, "rewards/margins": 0.017230339348316193, "rewards/real": 0.006124601699411869, "step": 20 }, { "epoch": 0.02, "learning_rate": 9.554140127388536e-08, "logits/generated": -0.08152450621128082, "logits/real": -0.7754586935043335, "logps/generated": -165.53176879882812, "logps/real": -185.11846923828125, "loss": 0.6398, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -0.0970459133386612, "rewards/margins": 0.10573717206716537, "rewards/real": 0.008691254071891308, "step": 30 }, { "epoch": 0.03, "learning_rate": 1.2738853503184713e-07, "logits/generated": 0.019896607846021652, "logits/real": -0.8798272013664246, "logps/generated": -153.7320556640625, "logps/real": -180.03219604492188, "loss": 0.5699, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.25305813550949097, "rewards/margins": 0.2757338285446167, "rewards/real": 0.022675666958093643, "step": 40 }, { "epoch": 0.03, "learning_rate": 1.592356687898089e-07, "logits/generated": -0.05750712752342224, "logits/real": -0.8368139266967773, "logps/generated": -161.1537322998047, "logps/real": -172.9757537841797, "loss": 0.4647, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.4674338400363922, "rewards/margins": 0.5483574271202087, "rewards/real": 0.0809236392378807, "step": 50 }, { "epoch": 0.04, "learning_rate": 1.9108280254777072e-07, "logits/generated": -0.011790583841502666, "logits/real": -0.7276524305343628, "logps/generated": -175.3116455078125, "logps/real": -182.06173706054688, "loss": 0.3557, "rewards/accuracies": 1.0, "rewards/generated": -0.9212247729301453, "rewards/margins": 1.0005762577056885, "rewards/real": 0.07935139536857605, "step": 60 }, { "epoch": 0.04, "learning_rate": 2.2292993630573247e-07, "logits/generated": 0.037871506065130234, "logits/real": -0.757712721824646, "logps/generated": -168.0603790283203, "logps/real": -185.0572509765625, "loss": 0.2403, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.2728922367095947, "rewards/margins": 1.4110310077667236, "rewards/real": 0.1381385624408722, "step": 70 }, { "epoch": 0.05, "learning_rate": 2.5477707006369425e-07, "logits/generated": 0.008935372345149517, "logits/real": -0.8579050302505493, "logps/generated": -176.29393005371094, "logps/real": -171.60939025878906, "loss": 0.1815, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.7076524496078491, "rewards/margins": 1.8567367792129517, "rewards/real": 0.1490844190120697, "step": 80 }, { "epoch": 0.06, "learning_rate": 2.86624203821656e-07, "logits/generated": -0.0024472028017044067, "logits/real": -0.8354307413101196, "logps/generated": -194.20306396484375, "logps/real": -181.1865997314453, "loss": 0.1284, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.3786566257476807, "rewards/margins": 2.5542044639587402, "rewards/real": 0.17554807662963867, "step": 90 }, { "epoch": 0.06, "learning_rate": 3.184713375796178e-07, "logits/generated": -0.007845225743949413, "logits/real": -0.8014926910400391, "logps/generated": -193.57733154296875, "logps/real": -172.54800415039062, "loss": 0.117, "rewards/accuracies": 1.0, "rewards/generated": -2.8054349422454834, "rewards/margins": 2.9194352626800537, "rewards/real": 0.11400020122528076, "step": 100 }, { "epoch": 0.07, "learning_rate": 3.5031847133757957e-07, "logits/generated": 0.023908555507659912, "logits/real": -0.7339428663253784, "logps/generated": -195.67071533203125, "logps/real": -188.1604461669922, "loss": 0.0891, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.3142905235290527, "rewards/margins": 3.3457164764404297, "rewards/real": 0.03142569214105606, "step": 110 }, { "epoch": 0.08, "learning_rate": 3.8216560509554143e-07, "logits/generated": 0.04019797593355179, "logits/real": -0.6734101176261902, "logps/generated": -209.1021728515625, "logps/real": -176.04254150390625, "loss": 0.0794, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.002293109893799, "rewards/margins": 4.102808475494385, "rewards/real": 0.1005152240395546, "step": 120 }, { "epoch": 0.08, "learning_rate": 4.140127388535032e-07, "logits/generated": 0.07181330770254135, "logits/real": -0.6598816514015198, "logps/generated": -198.8852081298828, "logps/real": -184.90480041503906, "loss": 0.0761, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -4.574056625366211, "rewards/margins": 4.542202949523926, "rewards/real": -0.031853675842285156, "step": 130 }, { "epoch": 0.09, "learning_rate": 4.4585987261146494e-07, "logits/generated": 0.09077299386262894, "logits/real": -0.7375579476356506, "logps/generated": -217.79299926757812, "logps/real": -169.71145629882812, "loss": 0.048, "rewards/accuracies": 1.0, "rewards/generated": -5.316611289978027, "rewards/margins": 5.399137020111084, "rewards/real": 0.08252569288015366, "step": 140 }, { "epoch": 0.1, "learning_rate": 4.777070063694267e-07, "logits/generated": 0.019507689401507378, "logits/real": -0.529100775718689, "logps/generated": -227.7717742919922, "logps/real": -181.4638214111328, "loss": 0.0624, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -5.985389709472656, "rewards/margins": 5.939720630645752, "rewards/real": -0.04566919058561325, "step": 150 }, { "epoch": 0.1, "learning_rate": 4.989331436699858e-07, "logits/generated": 0.07675327360630035, "logits/real": -0.6792179346084595, "logps/generated": -230.45321655273438, "logps/real": -177.54190063476562, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/generated": -6.596798896789551, "rewards/margins": 6.574460506439209, "rewards/real": -0.02233867719769478, "step": 160 }, { "epoch": 0.11, "learning_rate": 4.953769559032717e-07, "logits/generated": 0.10339117050170898, "logits/real": -0.6523188352584839, "logps/generated": -238.82418823242188, "logps/real": -172.30154418945312, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/generated": -7.214089870452881, "rewards/margins": 7.0066819190979, "rewards/real": -0.2074071168899536, "step": 170 }, { "epoch": 0.12, "learning_rate": 4.918207681365576e-07, "logits/generated": -0.02095809206366539, "logits/real": -0.5748814344406128, "logps/generated": -243.3128204345703, "logps/real": -193.81964111328125, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/generated": -7.754981994628906, "rewards/margins": 7.589665412902832, "rewards/real": -0.16531690955162048, "step": 180 }, { "epoch": 0.12, "learning_rate": 4.882645803698435e-07, "logits/generated": 0.08188272267580032, "logits/real": -0.6443125009536743, "logps/generated": -239.46932983398438, "logps/real": -188.2861328125, "loss": 0.0393, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -7.951255798339844, "rewards/margins": 7.745802402496338, "rewards/real": -0.2054535448551178, "step": 190 }, { "epoch": 0.13, "learning_rate": 4.847083926031294e-07, "logits/generated": 0.0788329690694809, "logits/real": -0.6880910396575928, "logps/generated": -249.65542602539062, "logps/real": -176.37173461914062, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/generated": -8.179863929748535, "rewards/margins": 7.825772762298584, "rewards/real": -0.35409015417099, "step": 200 }, { "epoch": 0.13, "learning_rate": 4.811522048364154e-07, "logits/generated": 0.08456435799598694, "logits/real": -0.7073934674263, "logps/generated": -243.7119903564453, "logps/real": -164.15887451171875, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/generated": -8.219891548156738, "rewards/margins": 8.312192916870117, "rewards/real": 0.09230276197195053, "step": 210 }, { "epoch": 0.14, "learning_rate": 4.775960170697012e-07, "logits/generated": 0.05936474725604057, "logits/real": -0.6907894015312195, "logps/generated": -248.00094604492188, "logps/real": -162.20657348632812, "loss": 0.0321, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -8.413338661193848, "rewards/margins": 8.484495162963867, "rewards/real": 0.07115854322910309, "step": 220 }, { "epoch": 0.15, "learning_rate": 4.7403982930298717e-07, "logits/generated": 0.10569562762975693, "logits/real": -0.7800209522247314, "logps/generated": -250.5619659423828, "logps/real": -172.96253967285156, "loss": 0.0278, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.249897003173828, "rewards/margins": 9.181459426879883, "rewards/real": -0.06843843311071396, "step": 230 }, { "epoch": 0.15, "learning_rate": 4.7048364153627306e-07, "logits/generated": 0.053275883197784424, "logits/real": -0.5331145524978638, "logps/generated": -257.272705078125, "logps/real": -204.5064239501953, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/generated": -9.000371932983398, "rewards/margins": 8.605985641479492, "rewards/real": -0.39438483119010925, "step": 240 }, { "epoch": 0.16, "learning_rate": 4.66927453769559e-07, "logits/generated": 0.03677482530474663, "logits/real": -0.6608942151069641, "logps/generated": -256.04937744140625, "logps/real": -171.3863067626953, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/generated": -9.227904319763184, "rewards/margins": 8.95788860321045, "rewards/real": -0.2700158953666687, "step": 250 }, { "epoch": 0.17, "learning_rate": 4.633712660028449e-07, "logits/generated": 0.047906339168548584, "logits/real": -0.762579083442688, "logps/generated": -265.7643127441406, "logps/real": -178.68174743652344, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/generated": -9.940900802612305, "rewards/margins": 9.152986526489258, "rewards/real": -0.7879153490066528, "step": 260 }, { "epoch": 0.17, "learning_rate": 4.5981507823613085e-07, "logits/generated": 0.0478428415954113, "logits/real": -0.6810993552207947, "logps/generated": -258.02484130859375, "logps/real": -183.0740509033203, "loss": 0.0224, "rewards/accuracies": 0.987500011920929, "rewards/generated": -10.063767433166504, "rewards/margins": 9.119011878967285, "rewards/real": -0.9447552561759949, "step": 270 }, { "epoch": 0.18, "learning_rate": 4.562588904694168e-07, "logits/generated": 0.07144404947757721, "logits/real": -0.6452735662460327, "logps/generated": -260.16082763671875, "logps/real": -188.64410400390625, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/generated": -10.373169898986816, "rewards/margins": 9.765576362609863, "rewards/real": -0.6075931787490845, "step": 280 }, { "epoch": 0.19, "learning_rate": 4.5270270270270264e-07, "logits/generated": 0.08266101777553558, "logits/real": -0.716413140296936, "logps/generated": -276.4801330566406, "logps/real": -185.527099609375, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/generated": -11.44568920135498, "rewards/margins": 10.838994979858398, "rewards/real": -0.6066935062408447, "step": 290 }, { "epoch": 0.19, "learning_rate": 4.491465149359886e-07, "logits/generated": 0.0009769715834408998, "logits/real": -0.7033424973487854, "logps/generated": -272.52093505859375, "logps/real": -179.6094512939453, "loss": 0.0218, "rewards/accuracies": 0.987500011920929, "rewards/generated": -11.34181022644043, "rewards/margins": 10.347066879272461, "rewards/real": -0.9947425127029419, "step": 300 }, { "epoch": 0.2, "learning_rate": 4.4559032716927454e-07, "logits/generated": 0.07658599317073822, "logits/real": -0.5730828046798706, "logps/generated": -276.45294189453125, "logps/real": -200.97845458984375, "loss": 0.0238, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.649636268615723, "rewards/margins": 10.637258529663086, "rewards/real": -1.0123790502548218, "step": 310 }, { "epoch": 0.2, "learning_rate": 4.420341394025605e-07, "logits/generated": 0.019550871104002, "logits/real": -0.5840874910354614, "logps/generated": -280.53192138671875, "logps/real": -189.03172302246094, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/generated": -11.912101745605469, "rewards/margins": 11.156845092773438, "rewards/real": -0.7552580237388611, "step": 320 }, { "epoch": 0.21, "learning_rate": 4.384779516358463e-07, "logits/generated": 0.016371339559555054, "logits/real": -0.5726695656776428, "logps/generated": -266.34918212890625, "logps/real": -202.59814453125, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/generated": -11.33076286315918, "rewards/margins": 9.75381851196289, "rewards/real": -1.576944351196289, "step": 330 }, { "epoch": 0.22, "learning_rate": 4.3492176386913227e-07, "logits/generated": 0.0978037491440773, "logits/real": -0.6194095611572266, "logps/generated": -282.8346862792969, "logps/real": -204.97592163085938, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/generated": -12.424463272094727, "rewards/margins": 11.069517135620117, "rewards/real": -1.3549461364746094, "step": 340 }, { "epoch": 0.22, "learning_rate": 4.313655761024182e-07, "logits/generated": 0.10168097913265228, "logits/real": -0.625900149345398, "logps/generated": -289.7073974609375, "logps/real": -203.98751831054688, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/generated": -13.3988676071167, "rewards/margins": 11.112601280212402, "rewards/real": -2.2862656116485596, "step": 350 }, { "epoch": 0.23, "learning_rate": 4.278093883357041e-07, "logits/generated": 0.05643658712506294, "logits/real": -0.613519549369812, "logps/generated": -285.1255187988281, "logps/real": -188.86361694335938, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/generated": -12.26307487487793, "rewards/margins": 11.37584400177002, "rewards/real": -0.8872316479682922, "step": 360 }, { "epoch": 0.24, "learning_rate": 4.2425320056899e-07, "logits/generated": 0.017720462754368782, "logits/real": -0.5340021848678589, "logps/generated": -296.48681640625, "logps/real": -216.097900390625, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/generated": -13.157635688781738, "rewards/margins": 11.516908645629883, "rewards/real": -1.640728235244751, "step": 370 }, { "epoch": 0.24, "learning_rate": 4.2069701280227595e-07, "logits/generated": -0.021676432341337204, "logits/real": -0.7345054745674133, "logps/generated": -295.46429443359375, "logps/real": -198.0674591064453, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/generated": -12.902421951293945, "rewards/margins": 11.71843147277832, "rewards/real": -1.1839900016784668, "step": 380 }, { "epoch": 0.25, "learning_rate": 4.1714082503556185e-07, "logits/generated": 0.07925084233283997, "logits/real": -0.5841912031173706, "logps/generated": -298.8299255371094, "logps/real": -189.8162384033203, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/generated": -13.550666809082031, "rewards/margins": 12.233985900878906, "rewards/real": -1.3166826963424683, "step": 390 }, { "epoch": 0.26, "learning_rate": 4.135846372688478e-07, "logits/generated": 0.025105977430939674, "logits/real": -0.6952486634254456, "logps/generated": -279.43560791015625, "logps/real": -197.62535095214844, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/generated": -12.729209899902344, "rewards/margins": 11.284834861755371, "rewards/real": -1.444373369216919, "step": 400 }, { "epoch": 0.26, "learning_rate": 4.100284495021337e-07, "logits/generated": 0.05239884927868843, "logits/real": -0.6132751703262329, "logps/generated": -305.1339416503906, "logps/real": -191.66676330566406, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -14.304582595825195, "rewards/margins": 12.9876127243042, "rewards/real": -1.3169682025909424, "step": 410 }, { "epoch": 0.27, "learning_rate": 4.064722617354196e-07, "logits/generated": 0.034467507153749466, "logits/real": -0.7083422541618347, "logps/generated": -304.05780029296875, "logps/real": -200.8745880126953, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/generated": -14.695897102355957, "rewards/margins": 12.881891250610352, "rewards/real": -1.8140056133270264, "step": 420 }, { "epoch": 0.28, "learning_rate": 4.0291607396870553e-07, "logits/generated": 0.03394109755754471, "logits/real": -0.6564615964889526, "logps/generated": -291.5844421386719, "logps/real": -188.79075622558594, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -13.64738941192627, "rewards/margins": 12.21278190612793, "rewards/real": -1.4346075057983398, "step": 430 }, { "epoch": 0.28, "learning_rate": 3.993598862019915e-07, "logits/generated": 0.07299565523862839, "logits/real": -0.6604090332984924, "logps/generated": -309.156982421875, "logps/real": -203.24072265625, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/generated": -15.03246784210205, "rewards/margins": 12.468481063842773, "rewards/real": -2.563986301422119, "step": 440 }, { "epoch": 0.29, "learning_rate": 3.9580369843527737e-07, "logits/generated": 0.0038179433904588223, "logits/real": -0.6290857195854187, "logps/generated": -326.712890625, "logps/real": -202.99484252929688, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/generated": -15.489263534545898, "rewards/margins": 12.454556465148926, "rewards/real": -3.0347084999084473, "step": 450 }, { "epoch": 0.29, "learning_rate": 3.9224751066856327e-07, "logits/generated": 0.031186867505311966, "logits/real": -0.4273042678833008, "logps/generated": -323.9992980957031, "logps/real": -225.23263549804688, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/generated": -15.867002487182617, "rewards/margins": 13.411462783813477, "rewards/real": -2.455543041229248, "step": 460 }, { "epoch": 0.3, "learning_rate": 3.886913229018492e-07, "logits/generated": 0.011080889031291008, "logits/real": -0.6169866919517517, "logps/generated": -313.5555114746094, "logps/real": -195.68914794921875, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/generated": -15.070566177368164, "rewards/margins": 13.439569473266602, "rewards/real": -1.630995750427246, "step": 470 }, { "epoch": 0.31, "learning_rate": 3.851351351351351e-07, "logits/generated": -0.029481088742613792, "logits/real": -0.6428096890449524, "logps/generated": -306.30755615234375, "logps/real": -198.16749572753906, "loss": 0.0107, "rewards/accuracies": 0.987500011920929, "rewards/generated": -14.478108406066895, "rewards/margins": 12.663415908813477, "rewards/real": -1.814692497253418, "step": 480 }, { "epoch": 0.31, "learning_rate": 3.8157894736842105e-07, "logits/generated": 0.015122579410672188, "logits/real": -0.6262849569320679, "logps/generated": -301.0884704589844, "logps/real": -190.44667053222656, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/generated": -13.957537651062012, "rewards/margins": 11.782486915588379, "rewards/real": -2.1750526428222656, "step": 490 }, { "epoch": 0.32, "learning_rate": 3.7802275960170695e-07, "logits/generated": -0.011642997153103352, "logits/real": -0.5468995571136475, "logps/generated": -322.72308349609375, "logps/real": -188.96719360351562, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/generated": -15.367193222045898, "rewards/margins": 14.392413139343262, "rewards/real": -0.9747812151908875, "step": 500 }, { "epoch": 0.32, "eval_logits/generated": 0.015148750506341457, "eval_logits/real": -0.618992805480957, "eval_logps/generated": -304.2368469238281, "eval_logps/real": -186.9757843017578, "eval_loss": 0.010758413933217525, "eval_rewards/accuracies": 0.9976114630699158, "eval_rewards/generated": -14.384950637817383, "eval_rewards/margins": 13.620210647583008, "eval_rewards/real": -0.7647396922111511, "eval_runtime": 424.798, "eval_samples_per_second": 11.77, "eval_steps_per_second": 0.37, "step": 500 }, { "epoch": 0.33, "learning_rate": 3.7446657183499284e-07, "logits/generated": 0.04711627587676048, "logits/real": -0.6923630833625793, "logps/generated": -311.3076171875, "logps/real": -192.34048461914062, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -15.26708698272705, "rewards/margins": 13.872920036315918, "rewards/real": -1.394165277481079, "step": 510 }, { "epoch": 0.33, "learning_rate": 3.709103840682788e-07, "logits/generated": 0.03908165544271469, "logits/real": -0.631860613822937, "logps/generated": -310.2327880859375, "logps/real": -186.8871612548828, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -15.100445747375488, "rewards/margins": 13.6294527053833, "rewards/real": -1.470994234085083, "step": 520 }, { "epoch": 0.34, "learning_rate": 3.6735419630156474e-07, "logits/generated": 0.04315485060214996, "logits/real": -0.5955843329429626, "logps/generated": -324.2378845214844, "logps/real": -187.12576293945312, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/generated": -16.153406143188477, "rewards/margins": 14.313261032104492, "rewards/real": -1.8401434421539307, "step": 530 }, { "epoch": 0.35, "learning_rate": 3.637980085348506e-07, "logits/generated": -0.03953739255666733, "logits/real": -0.6422590017318726, "logps/generated": -323.29638671875, "logps/real": -203.27786254882812, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/generated": -16.259048461914062, "rewards/margins": 13.929720878601074, "rewards/real": -2.3293280601501465, "step": 540 }, { "epoch": 0.35, "learning_rate": 3.602418207681365e-07, "logits/generated": -0.04372464120388031, "logits/real": -0.6528729796409607, "logps/generated": -336.2181701660156, "logps/real": -208.175048828125, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/generated": -16.822994232177734, "rewards/margins": 14.49010944366455, "rewards/real": -2.3328843116760254, "step": 550 }, { "epoch": 0.36, "learning_rate": 3.5668563300142247e-07, "logits/generated": -0.020463664084672928, "logits/real": -0.5609344244003296, "logps/generated": -317.5655822753906, "logps/real": -197.87765502929688, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/generated": -15.073542594909668, "rewards/margins": 12.639741897583008, "rewards/real": -2.433799982070923, "step": 560 }, { "epoch": 0.36, "learning_rate": 3.5312944523470837e-07, "logits/generated": 0.016961723566055298, "logits/real": -0.7112401723861694, "logps/generated": -322.7782897949219, "logps/real": -191.416015625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -15.618112564086914, "rewards/margins": 14.197979927062988, "rewards/real": -1.4201303720474243, "step": 570 }, { "epoch": 0.37, "learning_rate": 3.495732574679943e-07, "logits/generated": 0.03125763684511185, "logits/real": -0.6455451250076294, "logps/generated": -309.74517822265625, "logps/real": -192.3853302001953, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/generated": -15.057307243347168, "rewards/margins": 13.615495681762695, "rewards/real": -1.4418113231658936, "step": 580 }, { "epoch": 0.38, "learning_rate": 3.460170697012802e-07, "logits/generated": 0.030627410858869553, "logits/real": -0.6639117002487183, "logps/generated": -330.22894287109375, "logps/real": -195.18409729003906, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/generated": -16.0391902923584, "rewards/margins": 14.437360763549805, "rewards/real": -1.6018317937850952, "step": 590 }, { "epoch": 0.38, "learning_rate": 3.424608819345661e-07, "logits/generated": 0.06100524589419365, "logits/real": -0.6973519325256348, "logps/generated": -319.52166748046875, "logps/real": -179.44314575195312, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -15.46537971496582, "rewards/margins": 14.726943969726562, "rewards/real": -0.7384368777275085, "step": 600 }, { "epoch": 0.39, "learning_rate": 3.3890469416785205e-07, "logits/generated": 0.06144358962774277, "logits/real": -0.6208174228668213, "logps/generated": -311.03912353515625, "logps/real": -192.3145751953125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -15.110832214355469, "rewards/margins": 13.566309928894043, "rewards/real": -1.544521689414978, "step": 610 }, { "epoch": 0.4, "learning_rate": 3.35348506401138e-07, "logits/generated": -0.018809977918863297, "logits/real": -0.6647375822067261, "logps/generated": -331.81158447265625, "logps/real": -182.822021484375, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/generated": -16.408363342285156, "rewards/margins": 15.543344497680664, "rewards/real": -0.8650201559066772, "step": 620 }, { "epoch": 0.4, "learning_rate": 3.3179231863442384e-07, "logits/generated": -0.014424433931708336, "logits/real": -0.5232574939727783, "logps/generated": -321.45355224609375, "logps/real": -204.66917419433594, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -16.32326889038086, "rewards/margins": 14.821706771850586, "rewards/real": -1.501560091972351, "step": 630 }, { "epoch": 0.41, "learning_rate": 3.282361308677098e-07, "logits/generated": -0.04307156428694725, "logits/real": -0.6383107900619507, "logps/generated": -332.80963134765625, "logps/real": -191.7883758544922, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -15.911900520324707, "rewards/margins": 14.832717895507812, "rewards/real": -1.0791819095611572, "step": 640 }, { "epoch": 0.42, "learning_rate": 3.2467994310099573e-07, "logits/generated": 0.020891521126031876, "logits/real": -0.6430577039718628, "logps/generated": -350.6064147949219, "logps/real": -215.30593872070312, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/generated": -18.383892059326172, "rewards/margins": 16.04793357849121, "rewards/real": -2.3359580039978027, "step": 650 }, { "epoch": 0.42, "learning_rate": 3.211237553342817e-07, "logits/generated": -0.024233415722846985, "logits/real": -0.6739251017570496, "logps/generated": -333.5208435058594, "logps/real": -194.91732788085938, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/generated": -16.731473922729492, "rewards/margins": 14.82574462890625, "rewards/real": -1.9057306051254272, "step": 660 }, { "epoch": 0.43, "learning_rate": 3.175675675675675e-07, "logits/generated": 0.023840907961130142, "logits/real": -0.6880885362625122, "logps/generated": -318.2210998535156, "logps/real": -184.1695556640625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/generated": -16.254070281982422, "rewards/margins": 14.820034980773926, "rewards/real": -1.4340364933013916, "step": 670 }, { "epoch": 0.44, "learning_rate": 3.1401137980085347e-07, "logits/generated": 0.040734268724918365, "logits/real": -0.6580570340156555, "logps/generated": -342.2274475097656, "logps/real": -205.65689086914062, "loss": 0.011, "rewards/accuracies": 0.987500011920929, "rewards/generated": -17.80494499206543, "rewards/margins": 15.115242004394531, "rewards/real": -2.689703941345215, "step": 680 }, { "epoch": 0.44, "learning_rate": 3.104551920341394e-07, "logits/generated": -0.019141068682074547, "logits/real": -0.6394578814506531, "logps/generated": -324.29833984375, "logps/real": -201.87010192871094, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -16.063854217529297, "rewards/margins": 14.041638374328613, "rewards/real": -2.0222160816192627, "step": 690 }, { "epoch": 0.45, "learning_rate": 3.068990042674253e-07, "logits/generated": -0.02943194843828678, "logits/real": -0.6647931337356567, "logps/generated": -309.67327880859375, "logps/real": -181.3977508544922, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/generated": -15.244012832641602, "rewards/margins": 13.764185905456543, "rewards/real": -1.4798262119293213, "step": 700 }, { "epoch": 0.45, "learning_rate": 3.033428165007112e-07, "logits/generated": -0.0134804155677557, "logits/real": -0.688398003578186, "logps/generated": -333.3070373535156, "logps/real": -196.06004333496094, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/generated": -16.678394317626953, "rewards/margins": 14.339719772338867, "rewards/real": -2.3386740684509277, "step": 710 }, { "epoch": 0.46, "learning_rate": 2.9978662873399715e-07, "logits/generated": -0.01525292731821537, "logits/real": -0.5911905169487, "logps/generated": -330.7987060546875, "logps/real": -198.35397338867188, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -16.70512580871582, "rewards/margins": 14.928197860717773, "rewards/real": -1.776925802230835, "step": 720 }, { "epoch": 0.47, "learning_rate": 2.9623044096728305e-07, "logits/generated": 0.047141142189502716, "logits/real": -0.5441254377365112, "logps/generated": -320.4557800292969, "logps/real": -211.35848999023438, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -16.464683532714844, "rewards/margins": 14.595657348632812, "rewards/real": -1.8690249919891357, "step": 730 }, { "epoch": 0.47, "learning_rate": 2.92674253200569e-07, "logits/generated": 0.018107902258634567, "logits/real": -0.6069762110710144, "logps/generated": -335.17401123046875, "logps/real": -191.33583068847656, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -17.258358001708984, "rewards/margins": 15.983779907226562, "rewards/real": -1.2745764255523682, "step": 740 }, { "epoch": 0.48, "learning_rate": 2.8911806543385494e-07, "logits/generated": 0.05721588060259819, "logits/real": -0.6501365900039673, "logps/generated": -334.0870666503906, "logps/real": -201.9585418701172, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -16.955425262451172, "rewards/margins": 16.156780242919922, "rewards/real": -0.7986453175544739, "step": 750 }, { "epoch": 0.49, "learning_rate": 2.855618776671408e-07, "logits/generated": -0.00029001757502555847, "logits/real": -0.4777015745639801, "logps/generated": -349.4190368652344, "logps/real": -214.5007781982422, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -18.192649841308594, "rewards/margins": 16.606849670410156, "rewards/real": -1.5857971906661987, "step": 760 }, { "epoch": 0.49, "learning_rate": 2.8200568990042673e-07, "logits/generated": 0.006303996779024601, "logits/real": -0.6401196718215942, "logps/generated": -335.5325622558594, "logps/real": -194.22637939453125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -17.122852325439453, "rewards/margins": 15.594779968261719, "rewards/real": -1.5280735492706299, "step": 770 }, { "epoch": 0.5, "learning_rate": 2.784495021337127e-07, "logits/generated": -0.015740731731057167, "logits/real": -0.6430305242538452, "logps/generated": -333.03778076171875, "logps/real": -185.57615661621094, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -17.422563552856445, "rewards/margins": 15.94238567352295, "rewards/real": -1.4801769256591797, "step": 780 }, { "epoch": 0.51, "learning_rate": 2.7489331436699857e-07, "logits/generated": -0.06366153061389923, "logits/real": -0.5953705310821533, "logps/generated": -346.43719482421875, "logps/real": -207.64895629882812, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -17.68976402282715, "rewards/margins": 15.903286933898926, "rewards/real": -1.7864751815795898, "step": 790 }, { "epoch": 0.51, "learning_rate": 2.7133712660028446e-07, "logits/generated": 0.05433814972639084, "logits/real": -0.659256637096405, "logps/generated": -347.67437744140625, "logps/real": -188.8865509033203, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -18.18773651123047, "rewards/margins": 16.599082946777344, "rewards/real": -1.5886526107788086, "step": 800 }, { "epoch": 0.52, "learning_rate": 2.677809388335704e-07, "logits/generated": -0.006709927227348089, "logits/real": -0.6219618916511536, "logps/generated": -339.08404541015625, "logps/real": -192.36767578125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -17.780960083007812, "rewards/margins": 15.594240188598633, "rewards/real": -2.186721086502075, "step": 810 }, { "epoch": 0.52, "learning_rate": 2.642247510668563e-07, "logits/generated": 0.024327615275979042, "logits/real": -0.6300166845321655, "logps/generated": -359.64288330078125, "logps/real": -189.8053741455078, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -19.835926055908203, "rewards/margins": 18.566604614257812, "rewards/real": -1.2693183422088623, "step": 820 }, { "epoch": 0.53, "learning_rate": 2.6066856330014225e-07, "logits/generated": 0.060337960720062256, "logits/real": -0.6221122145652771, "logps/generated": -346.8661193847656, "logps/real": -193.4240264892578, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/generated": -18.56460952758789, "rewards/margins": 17.463274002075195, "rewards/real": -1.1013351678848267, "step": 830 }, { "epoch": 0.54, "learning_rate": 2.5711237553342815e-07, "logits/generated": -0.023245109245181084, "logits/real": -0.5685423612594604, "logps/generated": -363.9253234863281, "logps/real": -206.6332244873047, "loss": 0.0058, "rewards/accuracies": 0.987500011920929, "rewards/generated": -18.62310791015625, "rewards/margins": 17.27718162536621, "rewards/real": -1.3459270000457764, "step": 840 }, { "epoch": 0.54, "learning_rate": 2.5355618776671404e-07, "logits/generated": 0.05930706113576889, "logits/real": -0.6789587736129761, "logps/generated": -344.4968566894531, "logps/real": -199.61949157714844, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -19.401254653930664, "rewards/margins": 17.86998748779297, "rewards/real": -1.5312663316726685, "step": 850 }, { "epoch": 0.55, "learning_rate": 2.5e-07, "logits/generated": -0.02314385026693344, "logits/real": -0.5737181305885315, "logps/generated": -353.2101135253906, "logps/real": -192.0513458251953, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -19.10390853881836, "rewards/margins": 17.52272605895996, "rewards/real": -1.581182599067688, "step": 860 }, { "epoch": 0.56, "learning_rate": 2.4644381223328594e-07, "logits/generated": 0.058255720883607864, "logits/real": -0.5887473821640015, "logps/generated": -358.34857177734375, "logps/real": -195.61085510253906, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -19.416332244873047, "rewards/margins": 18.28666877746582, "rewards/real": -1.129664421081543, "step": 870 }, { "epoch": 0.56, "learning_rate": 2.4288762446657183e-07, "logits/generated": 0.08289220184087753, "logits/real": -0.5633417367935181, "logps/generated": -371.24578857421875, "logps/real": -209.87020874023438, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -21.660175323486328, "rewards/margins": 19.35919189453125, "rewards/real": -2.3009822368621826, "step": 880 }, { "epoch": 0.57, "learning_rate": 2.393314366998578e-07, "logits/generated": 0.05838945508003235, "logits/real": -0.5614827871322632, "logps/generated": -385.99591064453125, "logps/real": -205.49008178710938, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/generated": -22.40130043029785, "rewards/margins": 19.79316520690918, "rewards/real": -2.6081345081329346, "step": 890 }, { "epoch": 0.58, "learning_rate": 2.3577524893314365e-07, "logits/generated": 0.0925709456205368, "logits/real": -0.6390553712844849, "logps/generated": -385.9515075683594, "logps/real": -199.3788299560547, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -22.699363708496094, "rewards/margins": 20.273067474365234, "rewards/real": -2.4262948036193848, "step": 900 }, { "epoch": 0.58, "learning_rate": 2.322190611664296e-07, "logits/generated": -0.001485310262069106, "logits/real": -0.4676692485809326, "logps/generated": -368.5473937988281, "logps/real": -213.4370880126953, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -21.21763801574707, "rewards/margins": 19.124008178710938, "rewards/real": -2.0936279296875, "step": 910 }, { "epoch": 0.59, "learning_rate": 2.2866287339971549e-07, "logits/generated": -0.01602059043943882, "logits/real": -0.6552165150642395, "logps/generated": -399.9144592285156, "logps/real": -208.2056121826172, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -23.339561462402344, "rewards/margins": 20.945171356201172, "rewards/real": -2.3943886756896973, "step": 920 }, { "epoch": 0.6, "learning_rate": 2.251066856330014e-07, "logits/generated": 0.05472123622894287, "logits/real": -0.5271707773208618, "logps/generated": -377.61981201171875, "logps/real": -199.87130737304688, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -22.324268341064453, "rewards/margins": 19.352123260498047, "rewards/real": -2.972146511077881, "step": 930 }, { "epoch": 0.6, "learning_rate": 2.2155049786628733e-07, "logits/generated": -0.025077398866415024, "logits/real": -0.5260539054870605, "logps/generated": -394.1059265136719, "logps/real": -210.8734130859375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -23.064285278320312, "rewards/margins": 20.856571197509766, "rewards/real": -2.2077155113220215, "step": 940 }, { "epoch": 0.61, "learning_rate": 2.1799431009957325e-07, "logits/generated": 0.008644811809062958, "logits/real": -0.5587931275367737, "logps/generated": -402.0521545410156, "logps/real": -202.02291870117188, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -23.23007583618164, "rewards/margins": 19.87685203552246, "rewards/real": -3.3532238006591797, "step": 950 }, { "epoch": 0.61, "learning_rate": 2.1443812233285914e-07, "logits/generated": -0.009537003934383392, "logits/real": -0.4732537269592285, "logps/generated": -380.66107177734375, "logps/real": -210.02407836914062, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -22.10177993774414, "rewards/margins": 19.873910903930664, "rewards/real": -2.2278692722320557, "step": 960 }, { "epoch": 0.62, "learning_rate": 2.108819345661451e-07, "logits/generated": 0.04497765749692917, "logits/real": -0.5100663900375366, "logps/generated": -383.2145080566406, "logps/real": -200.04031372070312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -21.68323516845703, "rewards/margins": 19.536602020263672, "rewards/real": -2.146634340286255, "step": 970 }, { "epoch": 0.63, "learning_rate": 2.0732574679943098e-07, "logits/generated": 0.051282238215208054, "logits/real": -0.7591557502746582, "logps/generated": -358.7184143066406, "logps/real": -178.8624267578125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/generated": -19.63443374633789, "rewards/margins": 18.222341537475586, "rewards/real": -1.4120899438858032, "step": 980 }, { "epoch": 0.63, "learning_rate": 2.0376955903271693e-07, "logits/generated": 0.05088866874575615, "logits/real": -0.4944595694541931, "logps/generated": -370.34417724609375, "logps/real": -208.4033966064453, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -21.147663116455078, "rewards/margins": 19.88173484802246, "rewards/real": -1.2659282684326172, "step": 990 }, { "epoch": 0.64, "learning_rate": 2.0021337126600283e-07, "logits/generated": 0.0677376538515091, "logits/real": -0.5833539366722107, "logps/generated": -366.4508361816406, "logps/real": -182.74696350097656, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -20.16263198852539, "rewards/margins": 18.523616790771484, "rewards/real": -1.6390106678009033, "step": 1000 }, { "epoch": 0.64, "eval_logits/generated": 0.04800041764974594, "eval_logits/real": -0.5656154155731201, "eval_logps/generated": -376.97998046875, "eval_logps/real": -197.0876007080078, "eval_loss": 0.004381492733955383, "eval_rewards/accuracies": 0.9984076619148254, "eval_rewards/generated": -21.65926742553711, "eval_rewards/margins": 19.883346557617188, "eval_rewards/real": -1.7759193181991577, "eval_runtime": 321.6683, "eval_samples_per_second": 15.544, "eval_steps_per_second": 0.488, "step": 1000 }, { "epoch": 0.65, "learning_rate": 1.9665718349928875e-07, "logits/generated": 0.10137276351451874, "logits/real": -0.5880488753318787, "logps/generated": -336.6202392578125, "logps/real": -179.52365112304688, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/generated": -18.403759002685547, "rewards/margins": 17.433141708374023, "rewards/real": -0.9706158638000488, "step": 1010 }, { "epoch": 0.65, "learning_rate": 1.931009957325747e-07, "logits/generated": 0.07191314548254013, "logits/real": -0.7045632600784302, "logps/generated": -371.539306640625, "logps/real": -174.90870666503906, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -20.966075897216797, "rewards/margins": 19.68502426147461, "rewards/real": -1.2810522317886353, "step": 1020 }, { "epoch": 0.66, "learning_rate": 1.895448079658606e-07, "logits/generated": 0.021189400926232338, "logits/real": -0.5944398641586304, "logps/generated": -345.79901123046875, "logps/real": -197.9308319091797, "loss": 0.0123, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -18.253501892089844, "rewards/margins": 16.816665649414062, "rewards/real": -1.436837077140808, "step": 1030 }, { "epoch": 0.67, "learning_rate": 1.859886201991465e-07, "logits/generated": 0.028367796912789345, "logits/real": -0.5693169832229614, "logps/generated": -369.0507507324219, "logps/real": -203.1673126220703, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -20.233728408813477, "rewards/margins": 18.871437072753906, "rewards/real": -1.3622897863388062, "step": 1040 }, { "epoch": 0.67, "learning_rate": 1.8243243243243243e-07, "logits/generated": 0.11130674183368683, "logits/real": -0.6199926137924194, "logps/generated": -396.66668701171875, "logps/real": -199.72732543945312, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -22.905460357666016, "rewards/margins": 21.137920379638672, "rewards/real": -1.767538070678711, "step": 1050 }, { "epoch": 0.68, "learning_rate": 1.7887624466571835e-07, "logits/generated": 0.051538754254579544, "logits/real": -0.5638888478279114, "logps/generated": -382.5877380371094, "logps/real": -193.47872924804688, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/generated": -22.123483657836914, "rewards/margins": 20.473840713500977, "rewards/real": -1.6496423482894897, "step": 1060 }, { "epoch": 0.68, "learning_rate": 1.7532005689900424e-07, "logits/generated": 0.056547343730926514, "logits/real": -0.5736340284347534, "logps/generated": -399.03253173828125, "logps/real": -205.837158203125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -23.35310173034668, "rewards/margins": 20.6170597076416, "rewards/real": -2.7360422611236572, "step": 1070 }, { "epoch": 0.69, "learning_rate": 1.717638691322902e-07, "logits/generated": 0.12326414883136749, "logits/real": -0.5854828357696533, "logps/generated": -368.6696472167969, "logps/real": -194.57887268066406, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/generated": -21.72028923034668, "rewards/margins": 19.422595977783203, "rewards/real": -2.297696352005005, "step": 1080 }, { "epoch": 0.7, "learning_rate": 1.6820768136557609e-07, "logits/generated": 0.03324466198682785, "logits/real": -0.5589950680732727, "logps/generated": -379.6975402832031, "logps/real": -189.21792602539062, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -20.778305053710938, "rewards/margins": 19.09261131286621, "rewards/real": -1.6856931447982788, "step": 1090 }, { "epoch": 0.7, "learning_rate": 1.64651493598862e-07, "logits/generated": 0.02137361653149128, "logits/real": -0.4791427552700043, "logps/generated": -372.0145263671875, "logps/real": -201.42239379882812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -20.45681381225586, "rewards/margins": 19.019607543945312, "rewards/real": -1.4372069835662842, "step": 1100 }, { "epoch": 0.71, "learning_rate": 1.6109530583214793e-07, "logits/generated": 0.06219879537820816, "logits/real": -0.6077834367752075, "logps/generated": -361.52496337890625, "logps/real": -187.40945434570312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -20.687042236328125, "rewards/margins": 19.435745239257812, "rewards/real": -1.2512991428375244, "step": 1110 }, { "epoch": 0.72, "learning_rate": 1.5753911806543385e-07, "logits/generated": 0.02082439325749874, "logits/real": -0.6671017408370972, "logps/generated": -399.9078369140625, "logps/real": -197.09945678710938, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -23.697898864746094, "rewards/margins": 21.734453201293945, "rewards/real": -1.9634456634521484, "step": 1120 }, { "epoch": 0.72, "learning_rate": 1.5398293029871974e-07, "logits/generated": 0.07103635370731354, "logits/real": -0.5315567851066589, "logps/generated": -390.80413818359375, "logps/real": -209.8661651611328, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -23.044010162353516, "rewards/margins": 20.588970184326172, "rewards/real": -2.4550397396087646, "step": 1130 }, { "epoch": 0.73, "learning_rate": 1.504267425320057e-07, "logits/generated": 0.05514199659228325, "logits/real": -0.5959967374801636, "logps/generated": -372.26470947265625, "logps/real": -187.91561889648438, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -21.262130737304688, "rewards/margins": 19.89595603942871, "rewards/real": -1.366172194480896, "step": 1140 }, { "epoch": 0.74, "learning_rate": 1.4687055476529158e-07, "logits/generated": 0.03651849180459976, "logits/real": -0.5296968817710876, "logps/generated": -375.4384765625, "logps/real": -198.07846069335938, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -20.608938217163086, "rewards/margins": 19.235652923583984, "rewards/real": -1.373286247253418, "step": 1150 }, { "epoch": 0.74, "learning_rate": 1.4331436699857753e-07, "logits/generated": -0.031901903450489044, "logits/real": -0.42245230078697205, "logps/generated": -385.30987548828125, "logps/real": -198.95516967773438, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -21.40415382385254, "rewards/margins": 19.745981216430664, "rewards/real": -1.6581722497940063, "step": 1160 }, { "epoch": 0.75, "learning_rate": 1.3975817923186345e-07, "logits/generated": 0.07458638399839401, "logits/real": -0.4577251970767975, "logps/generated": -376.2591552734375, "logps/real": -211.70458984375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -21.238889694213867, "rewards/margins": 19.84615135192871, "rewards/real": -1.3927379846572876, "step": 1170 }, { "epoch": 0.75, "learning_rate": 1.3620199146514935e-07, "logits/generated": 0.06895387917757034, "logits/real": -0.4532155990600586, "logps/generated": -361.93939208984375, "logps/real": -179.66732788085938, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -20.205707550048828, "rewards/margins": 18.50905990600586, "rewards/real": -1.6966466903686523, "step": 1180 }, { "epoch": 0.76, "learning_rate": 1.326458036984353e-07, "logits/generated": 0.026944806799292564, "logits/real": -0.5042958855628967, "logps/generated": -378.512939453125, "logps/real": -193.09017944335938, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -21.360944747924805, "rewards/margins": 19.692447662353516, "rewards/real": -1.6684958934783936, "step": 1190 }, { "epoch": 0.77, "learning_rate": 1.290896159317212e-07, "logits/generated": 0.05960095673799515, "logits/real": -0.5585105419158936, "logps/generated": -394.94696044921875, "logps/real": -191.21163940429688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -23.144710540771484, "rewards/margins": 21.34160041809082, "rewards/real": -1.8031113147735596, "step": 1200 }, { "epoch": 0.77, "learning_rate": 1.255334281650071e-07, "logits/generated": 0.04382283240556717, "logits/real": -0.4478573203086853, "logps/generated": -375.7854309082031, "logps/real": -205.15869140625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -21.472270965576172, "rewards/margins": 19.42158317565918, "rewards/real": -2.050690174102783, "step": 1210 }, { "epoch": 0.78, "learning_rate": 1.2197724039829303e-07, "logits/generated": 0.07887273281812668, "logits/real": -0.6188726425170898, "logps/generated": -378.6775817871094, "logps/real": -190.72250366210938, "loss": 0.0053, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.472759246826172, "rewards/margins": 19.55636215209961, "rewards/real": -1.916398286819458, "step": 1220 }, { "epoch": 0.79, "learning_rate": 1.1842105263157894e-07, "logits/generated": 0.0605277419090271, "logits/real": -0.6036852598190308, "logps/generated": -369.0652160644531, "logps/real": -183.35617065429688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -20.453426361083984, "rewards/margins": 18.384937286376953, "rewards/real": -2.0684916973114014, "step": 1230 }, { "epoch": 0.79, "learning_rate": 1.1486486486486487e-07, "logits/generated": 0.06586415320634842, "logits/real": -0.5749965906143188, "logps/generated": -372.5237121582031, "logps/real": -195.25076293945312, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -20.654064178466797, "rewards/margins": 19.203500747680664, "rewards/real": -1.450567364692688, "step": 1240 }, { "epoch": 0.8, "learning_rate": 1.1130867709815078e-07, "logits/generated": 0.08540566265583038, "logits/real": -0.6170912981033325, "logps/generated": -374.17303466796875, "logps/real": -194.3673095703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -21.099042892456055, "rewards/margins": 19.65339469909668, "rewards/real": -1.4456470012664795, "step": 1250 }, { "epoch": 0.81, "learning_rate": 1.077524893314367e-07, "logits/generated": 0.06357467174530029, "logits/real": -0.687986433506012, "logps/generated": -392.5172424316406, "logps/real": -191.45401000976562, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -22.80368423461914, "rewards/margins": 20.335935592651367, "rewards/real": -2.467747688293457, "step": 1260 }, { "epoch": 0.81, "learning_rate": 1.0419630156472262e-07, "logits/generated": 0.016886264085769653, "logits/real": -0.4550386965274811, "logps/generated": -390.584228515625, "logps/real": -218.6328887939453, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -23.16832733154297, "rewards/margins": 20.595144271850586, "rewards/real": -2.5731775760650635, "step": 1270 }, { "epoch": 0.82, "learning_rate": 1.0064011379800854e-07, "logits/generated": -0.022652573883533478, "logits/real": -0.4286680817604065, "logps/generated": -379.67706298828125, "logps/real": -206.49093627929688, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -21.629016876220703, "rewards/margins": 19.87604522705078, "rewards/real": -1.7529706954956055, "step": 1280 }, { "epoch": 0.83, "learning_rate": 9.708392603129445e-08, "logits/generated": 0.056550562381744385, "logits/real": -0.4019811749458313, "logps/generated": -370.08251953125, "logps/real": -197.77896118164062, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -21.446491241455078, "rewards/margins": 19.191186904907227, "rewards/real": -2.255300521850586, "step": 1290 }, { "epoch": 0.83, "learning_rate": 9.352773826458037e-08, "logits/generated": 0.008305387571454048, "logits/real": -0.5809749364852905, "logps/generated": -400.37347412109375, "logps/real": -197.43136596679688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -23.287565231323242, "rewards/margins": 21.73343276977539, "rewards/real": -1.5541306734085083, "step": 1300 }, { "epoch": 0.84, "learning_rate": 8.997155049786629e-08, "logits/generated": 0.046849604696035385, "logits/real": -0.6843993663787842, "logps/generated": -390.62030029296875, "logps/real": -181.0294647216797, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -22.432353973388672, "rewards/margins": 21.127017974853516, "rewards/real": -1.3053334951400757, "step": 1310 }, { "epoch": 0.84, "learning_rate": 8.64153627311522e-08, "logits/generated": 0.052729617804288864, "logits/real": -0.5115618109703064, "logps/generated": -375.7951354980469, "logps/real": -196.12313842773438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -21.45965003967285, "rewards/margins": 19.496532440185547, "rewards/real": -1.9631179571151733, "step": 1320 }, { "epoch": 0.85, "learning_rate": 8.285917496443812e-08, "logits/generated": 0.035291388630867004, "logits/real": -0.5863553881645203, "logps/generated": -395.09197998046875, "logps/real": -202.11160278320312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -22.766231536865234, "rewards/margins": 20.85399627685547, "rewards/real": -1.912235975265503, "step": 1330 }, { "epoch": 0.86, "learning_rate": 7.930298719772404e-08, "logits/generated": 0.07267922163009644, "logits/real": -0.49688243865966797, "logps/generated": -373.9420166015625, "logps/real": -205.9117889404297, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -21.421688079833984, "rewards/margins": 19.645835876464844, "rewards/real": -1.7758514881134033, "step": 1340 }, { "epoch": 0.86, "learning_rate": 7.574679943100994e-08, "logits/generated": 0.07065759599208832, "logits/real": -0.5725045204162598, "logps/generated": -378.9009704589844, "logps/real": -181.6143035888672, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -22.113445281982422, "rewards/margins": 20.17637062072754, "rewards/real": -1.937076210975647, "step": 1350 }, { "epoch": 0.87, "learning_rate": 7.219061166429587e-08, "logits/generated": 0.05919628217816353, "logits/real": -0.5659859776496887, "logps/generated": -379.78924560546875, "logps/real": -190.56930541992188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -22.432254791259766, "rewards/margins": 20.832565307617188, "rewards/real": -1.5996865034103394, "step": 1360 }, { "epoch": 0.88, "learning_rate": 6.863442389758179e-08, "logits/generated": 0.044001154601573944, "logits/real": -0.5443329215049744, "logps/generated": -378.08587646484375, "logps/real": -204.49148559570312, "loss": 0.0139, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.367244720458984, "rewards/margins": 19.974727630615234, "rewards/real": -1.3925195932388306, "step": 1370 }, { "epoch": 0.88, "learning_rate": 6.507823613086771e-08, "logits/generated": 0.046902261674404144, "logits/real": -0.5738197565078735, "logps/generated": -380.58148193359375, "logps/real": -207.26724243164062, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -21.340457916259766, "rewards/margins": 19.454242706298828, "rewards/real": -1.886214017868042, "step": 1380 }, { "epoch": 0.89, "learning_rate": 6.152204836415363e-08, "logits/generated": 0.0785381942987442, "logits/real": -0.569561243057251, "logps/generated": -367.63519287109375, "logps/real": -203.30282592773438, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -21.134796142578125, "rewards/margins": 18.931886672973633, "rewards/real": -2.2029080390930176, "step": 1390 }, { "epoch": 0.9, "learning_rate": 5.796586059743954e-08, "logits/generated": -0.007580602075904608, "logits/real": -0.5636885762214661, "logps/generated": -381.52642822265625, "logps/real": -198.91770935058594, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -21.588598251342773, "rewards/margins": 19.854793548583984, "rewards/real": -1.7338052988052368, "step": 1400 }, { "epoch": 0.9, "learning_rate": 5.4409672830725456e-08, "logits/generated": 0.0493854358792305, "logits/real": -0.5976940989494324, "logps/generated": -399.3151550292969, "logps/real": -194.88201904296875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -22.92904281616211, "rewards/margins": 21.451683044433594, "rewards/real": -1.477359652519226, "step": 1410 }, { "epoch": 0.91, "learning_rate": 5.0853485064011376e-08, "logits/generated": 0.08968095481395721, "logits/real": -0.6684913039207458, "logps/generated": -380.35552978515625, "logps/real": -183.67074584960938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -21.802627563476562, "rewards/margins": 20.21030616760254, "rewards/real": -1.5923227071762085, "step": 1420 }, { "epoch": 0.91, "learning_rate": 4.72972972972973e-08, "logits/generated": 0.08673722296953201, "logits/real": -0.6326942443847656, "logps/generated": -379.32373046875, "logps/real": -193.1202392578125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -22.219440460205078, "rewards/margins": 20.84255599975586, "rewards/real": -1.3768887519836426, "step": 1430 }, { "epoch": 0.92, "learning_rate": 4.374110953058322e-08, "logits/generated": 0.031193479895591736, "logits/real": -0.5564336776733398, "logps/generated": -359.17181396484375, "logps/real": -193.7860107421875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -20.49233055114746, "rewards/margins": 18.665729522705078, "rewards/real": -1.8266017436981201, "step": 1440 }, { "epoch": 0.93, "learning_rate": 4.018492176386913e-08, "logits/generated": 0.09109187871217728, "logits/real": -0.5564282536506653, "logps/generated": -374.38702392578125, "logps/real": -194.70132446289062, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -20.670825958251953, "rewards/margins": 19.197032928466797, "rewards/real": -1.4737932682037354, "step": 1450 }, { "epoch": 0.93, "learning_rate": 3.6628733997155046e-08, "logits/generated": 0.08201099932193756, "logits/real": -0.6359506845474243, "logps/generated": -386.39190673828125, "logps/real": -176.69708251953125, "loss": 0.0068, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.43024444580078, "rewards/margins": 20.56548309326172, "rewards/real": -1.8647606372833252, "step": 1460 }, { "epoch": 0.94, "learning_rate": 3.3072546230440967e-08, "logits/generated": 0.08995040506124496, "logits/real": -0.5263134241104126, "logps/generated": -378.1429138183594, "logps/real": -191.90821838378906, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -21.608402252197266, "rewards/margins": 19.824888229370117, "rewards/real": -1.7835137844085693, "step": 1470 }, { "epoch": 0.95, "learning_rate": 2.9516358463726884e-08, "logits/generated": 0.030845308676362038, "logits/real": -0.5490936040878296, "logps/generated": -369.69525146484375, "logps/real": -200.53517150878906, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -20.388917922973633, "rewards/margins": 19.097196578979492, "rewards/real": -1.2917201519012451, "step": 1480 }, { "epoch": 0.95, "learning_rate": 2.59601706970128e-08, "logits/generated": 0.0783102884888649, "logits/real": -0.5921775698661804, "logps/generated": -361.94158935546875, "logps/real": -206.4454803466797, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -20.625545501708984, "rewards/margins": 19.025144577026367, "rewards/real": -1.6004012823104858, "step": 1490 }, { "epoch": 0.96, "learning_rate": 2.240398293029872e-08, "logits/generated": 0.06931595504283905, "logits/real": -0.5738247632980347, "logps/generated": -371.99847412109375, "logps/real": -194.58804321289062, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -21.12423324584961, "rewards/margins": 19.16390037536621, "rewards/real": -1.9603347778320312, "step": 1500 }, { "epoch": 0.96, "eval_logits/generated": 0.05270044878125191, "eval_logits/real": -0.5586805939674377, "eval_logps/generated": -369.19976806640625, "eval_logps/real": -194.6499481201172, "eval_loss": 0.0032336623407900333, "eval_rewards/accuracies": 0.9992038011550903, "eval_rewards/generated": -20.88124656677246, "eval_rewards/margins": 19.349092483520508, "eval_rewards/real": -1.5321544408798218, "eval_runtime": 319.8593, "eval_samples_per_second": 15.632, "eval_steps_per_second": 0.491, "step": 1500 }, { "epoch": 0.97, "learning_rate": 1.8847795163584636e-08, "logits/generated": 0.010430006310343742, "logits/real": -0.5986403226852417, "logps/generated": -369.27630615234375, "logps/real": -194.7587127685547, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -20.733909606933594, "rewards/margins": 19.032808303833008, "rewards/real": -1.7011024951934814, "step": 1510 }, { "epoch": 0.97, "learning_rate": 1.5291607396870554e-08, "logits/generated": 0.0546521432697773, "logits/real": -0.5425094366073608, "logps/generated": -366.5072021484375, "logps/real": -192.2476348876953, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -21.005374908447266, "rewards/margins": 19.410686492919922, "rewards/real": -1.594689965248108, "step": 1520 }, { "epoch": 0.98, "learning_rate": 1.1735419630156473e-08, "logits/generated": 0.07052431255578995, "logits/real": -0.591436505317688, "logps/generated": -371.2902526855469, "logps/real": -192.4247589111328, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -21.630722045898438, "rewards/margins": 20.07946014404297, "rewards/real": -1.5512605905532837, "step": 1530 }, { "epoch": 0.99, "learning_rate": 8.179231863442388e-09, "logits/generated": 0.043054401874542236, "logits/real": -0.547429084777832, "logps/generated": -377.97113037109375, "logps/real": -199.3291473388672, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -21.497386932373047, "rewards/margins": 19.968860626220703, "rewards/real": -1.5285276174545288, "step": 1540 }, { "epoch": 0.99, "learning_rate": 4.623044096728307e-09, "logits/generated": 0.04375555366277695, "logits/real": -0.5209413766860962, "logps/generated": -369.3624267578125, "logps/real": -201.68338012695312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -21.231218338012695, "rewards/margins": 19.51310157775879, "rewards/real": -1.718117356300354, "step": 1550 }, { "epoch": 1.0, "learning_rate": 1.0668563300142248e-09, "logits/generated": 0.011072209104895592, "logits/real": -0.5893142223358154, "logps/generated": -360.9320983886719, "logps/real": -193.64450073242188, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -19.670141220092773, "rewards/margins": 17.939659118652344, "rewards/real": -1.7304834127426147, "step": 1560 }, { "epoch": 1.0, "step": 1563, "total_flos": 0.0, "train_loss": 0.03590757104014634, "train_runtime": 9394.7927, "train_samples_per_second": 5.322, "train_steps_per_second": 0.166 } ], "logging_steps": 10, "max_steps": 1563, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }