|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1563, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.1847133757961784e-09, |
|
"logits/generated": -0.09026163071393967, |
|
"logits/real": -0.800382137298584, |
|
"logps/generated": -180.3804931640625, |
|
"logps/real": -164.2542724609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.184713375796178e-08, |
|
"logits/generated": -0.08320371806621552, |
|
"logits/real": -0.7675037980079651, |
|
"logps/generated": -161.61961364746094, |
|
"logps/real": -178.428466796875, |
|
"loss": 0.6945, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/generated": 0.007918823510408401, |
|
"rewards/margins": -0.007639557123184204, |
|
"rewards/real": 0.00027926763868890703, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.369426751592356e-08, |
|
"logits/generated": -0.1008232831954956, |
|
"logits/real": -0.9005411863327026, |
|
"logps/generated": -165.965576171875, |
|
"logps/real": -174.21055603027344, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/generated": -0.011105736717581749, |
|
"rewards/margins": 0.017230339348316193, |
|
"rewards/real": 0.006124601699411869, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.554140127388536e-08, |
|
"logits/generated": -0.08152450621128082, |
|
"logits/real": -0.7754586935043335, |
|
"logps/generated": -165.53176879882812, |
|
"logps/real": -185.11846923828125, |
|
"loss": 0.6398, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -0.0970459133386612, |
|
"rewards/margins": 0.10573717206716537, |
|
"rewards/real": 0.008691254071891308, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2738853503184713e-07, |
|
"logits/generated": 0.019896607846021652, |
|
"logits/real": -0.8798272013664246, |
|
"logps/generated": -153.7320556640625, |
|
"logps/real": -180.03219604492188, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.25305813550949097, |
|
"rewards/margins": 0.2757338285446167, |
|
"rewards/real": 0.022675666958093643, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.592356687898089e-07, |
|
"logits/generated": -0.05750712752342224, |
|
"logits/real": -0.8368139266967773, |
|
"logps/generated": -161.1537322998047, |
|
"logps/real": -172.9757537841797, |
|
"loss": 0.4647, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.4674338400363922, |
|
"rewards/margins": 0.5483574271202087, |
|
"rewards/real": 0.0809236392378807, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9108280254777072e-07, |
|
"logits/generated": -0.011790583841502666, |
|
"logits/real": -0.7276524305343628, |
|
"logps/generated": -175.3116455078125, |
|
"logps/real": -182.06173706054688, |
|
"loss": 0.3557, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.9212247729301453, |
|
"rewards/margins": 1.0005762577056885, |
|
"rewards/real": 0.07935139536857605, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.2292993630573247e-07, |
|
"logits/generated": 0.037871506065130234, |
|
"logits/real": -0.757712721824646, |
|
"logps/generated": -168.0603790283203, |
|
"logps/real": -185.0572509765625, |
|
"loss": 0.2403, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.2728922367095947, |
|
"rewards/margins": 1.4110310077667236, |
|
"rewards/real": 0.1381385624408722, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5477707006369425e-07, |
|
"logits/generated": 0.008935372345149517, |
|
"logits/real": -0.8579050302505493, |
|
"logps/generated": -176.29393005371094, |
|
"logps/real": -171.60939025878906, |
|
"loss": 0.1815, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.7076524496078491, |
|
"rewards/margins": 1.8567367792129517, |
|
"rewards/real": 0.1490844190120697, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.86624203821656e-07, |
|
"logits/generated": -0.0024472028017044067, |
|
"logits/real": -0.8354307413101196, |
|
"logps/generated": -194.20306396484375, |
|
"logps/real": -181.1865997314453, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.3786566257476807, |
|
"rewards/margins": 2.5542044639587402, |
|
"rewards/real": 0.17554807662963867, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.184713375796178e-07, |
|
"logits/generated": -0.007845225743949413, |
|
"logits/real": -0.8014926910400391, |
|
"logps/generated": -193.57733154296875, |
|
"logps/real": -172.54800415039062, |
|
"loss": 0.117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.8054349422454834, |
|
"rewards/margins": 2.9194352626800537, |
|
"rewards/real": 0.11400020122528076, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.5031847133757957e-07, |
|
"logits/generated": 0.023908555507659912, |
|
"logits/real": -0.7339428663253784, |
|
"logps/generated": -195.67071533203125, |
|
"logps/real": -188.1604461669922, |
|
"loss": 0.0891, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.3142905235290527, |
|
"rewards/margins": 3.3457164764404297, |
|
"rewards/real": 0.03142569214105606, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8216560509554143e-07, |
|
"logits/generated": 0.04019797593355179, |
|
"logits/real": -0.6734101176261902, |
|
"logps/generated": -209.1021728515625, |
|
"logps/real": -176.04254150390625, |
|
"loss": 0.0794, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.002293109893799, |
|
"rewards/margins": 4.102808475494385, |
|
"rewards/real": 0.1005152240395546, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.140127388535032e-07, |
|
"logits/generated": 0.07181330770254135, |
|
"logits/real": -0.6598816514015198, |
|
"logps/generated": -198.8852081298828, |
|
"logps/real": -184.90480041503906, |
|
"loss": 0.0761, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.574056625366211, |
|
"rewards/margins": 4.542202949523926, |
|
"rewards/real": -0.031853675842285156, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4585987261146494e-07, |
|
"logits/generated": 0.09077299386262894, |
|
"logits/real": -0.7375579476356506, |
|
"logps/generated": -217.79299926757812, |
|
"logps/real": -169.71145629882812, |
|
"loss": 0.048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.316611289978027, |
|
"rewards/margins": 5.399137020111084, |
|
"rewards/real": 0.08252569288015366, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.777070063694267e-07, |
|
"logits/generated": 0.019507689401507378, |
|
"logits/real": -0.529100775718689, |
|
"logps/generated": -227.7717742919922, |
|
"logps/real": -181.4638214111328, |
|
"loss": 0.0624, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -5.985389709472656, |
|
"rewards/margins": 5.939720630645752, |
|
"rewards/real": -0.04566919058561325, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.989331436699858e-07, |
|
"logits/generated": 0.07675327360630035, |
|
"logits/real": -0.6792179346084595, |
|
"logps/generated": -230.45321655273438, |
|
"logps/real": -177.54190063476562, |
|
"loss": 0.0403, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.596798896789551, |
|
"rewards/margins": 6.574460506439209, |
|
"rewards/real": -0.02233867719769478, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.953769559032717e-07, |
|
"logits/generated": 0.10339117050170898, |
|
"logits/real": -0.6523188352584839, |
|
"logps/generated": -238.82418823242188, |
|
"logps/real": -172.30154418945312, |
|
"loss": 0.044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.214089870452881, |
|
"rewards/margins": 7.0066819190979, |
|
"rewards/real": -0.2074071168899536, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.918207681365576e-07, |
|
"logits/generated": -0.02095809206366539, |
|
"logits/real": -0.5748814344406128, |
|
"logps/generated": -243.3128204345703, |
|
"logps/real": -193.81964111328125, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.754981994628906, |
|
"rewards/margins": 7.589665412902832, |
|
"rewards/real": -0.16531690955162048, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.882645803698435e-07, |
|
"logits/generated": 0.08188272267580032, |
|
"logits/real": -0.6443125009536743, |
|
"logps/generated": -239.46932983398438, |
|
"logps/real": -188.2861328125, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -7.951255798339844, |
|
"rewards/margins": 7.745802402496338, |
|
"rewards/real": -0.2054535448551178, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.847083926031294e-07, |
|
"logits/generated": 0.0788329690694809, |
|
"logits/real": -0.6880910396575928, |
|
"logps/generated": -249.65542602539062, |
|
"logps/real": -176.37173461914062, |
|
"loss": 0.045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.179863929748535, |
|
"rewards/margins": 7.825772762298584, |
|
"rewards/real": -0.35409015417099, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.811522048364154e-07, |
|
"logits/generated": 0.08456435799598694, |
|
"logits/real": -0.7073934674263, |
|
"logps/generated": -243.7119903564453, |
|
"logps/real": -164.15887451171875, |
|
"loss": 0.0312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.219891548156738, |
|
"rewards/margins": 8.312192916870117, |
|
"rewards/real": 0.09230276197195053, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.775960170697012e-07, |
|
"logits/generated": 0.05936474725604057, |
|
"logits/real": -0.6907894015312195, |
|
"logps/generated": -248.00094604492188, |
|
"logps/real": -162.20657348632812, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -8.413338661193848, |
|
"rewards/margins": 8.484495162963867, |
|
"rewards/real": 0.07115854322910309, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7403982930298717e-07, |
|
"logits/generated": 0.10569562762975693, |
|
"logits/real": -0.7800209522247314, |
|
"logps/generated": -250.5619659423828, |
|
"logps/real": -172.96253967285156, |
|
"loss": 0.0278, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -9.249897003173828, |
|
"rewards/margins": 9.181459426879883, |
|
"rewards/real": -0.06843843311071396, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7048364153627306e-07, |
|
"logits/generated": 0.053275883197784424, |
|
"logits/real": -0.5331145524978638, |
|
"logps/generated": -257.272705078125, |
|
"logps/real": -204.5064239501953, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.000371932983398, |
|
"rewards/margins": 8.605985641479492, |
|
"rewards/real": -0.39438483119010925, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.66927453769559e-07, |
|
"logits/generated": 0.03677482530474663, |
|
"logits/real": -0.6608942151069641, |
|
"logps/generated": -256.04937744140625, |
|
"logps/real": -171.3863067626953, |
|
"loss": 0.027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.227904319763184, |
|
"rewards/margins": 8.95788860321045, |
|
"rewards/real": -0.2700158953666687, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.633712660028449e-07, |
|
"logits/generated": 0.047906339168548584, |
|
"logits/real": -0.762579083442688, |
|
"logps/generated": -265.7643127441406, |
|
"logps/real": -178.68174743652344, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.940900802612305, |
|
"rewards/margins": 9.152986526489258, |
|
"rewards/real": -0.7879153490066528, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5981507823613085e-07, |
|
"logits/generated": 0.0478428415954113, |
|
"logits/real": -0.6810993552207947, |
|
"logps/generated": -258.02484130859375, |
|
"logps/real": -183.0740509033203, |
|
"loss": 0.0224, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -10.063767433166504, |
|
"rewards/margins": 9.119011878967285, |
|
"rewards/real": -0.9447552561759949, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.562588904694168e-07, |
|
"logits/generated": 0.07144404947757721, |
|
"logits/real": -0.6452735662460327, |
|
"logps/generated": -260.16082763671875, |
|
"logps/real": -188.64410400390625, |
|
"loss": 0.0228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.373169898986816, |
|
"rewards/margins": 9.765576362609863, |
|
"rewards/real": -0.6075931787490845, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5270270270270264e-07, |
|
"logits/generated": 0.08266101777553558, |
|
"logits/real": -0.716413140296936, |
|
"logps/generated": -276.4801330566406, |
|
"logps/real": -185.527099609375, |
|
"loss": 0.0148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.44568920135498, |
|
"rewards/margins": 10.838994979858398, |
|
"rewards/real": -0.6066935062408447, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.491465149359886e-07, |
|
"logits/generated": 0.0009769715834408998, |
|
"logits/real": -0.7033424973487854, |
|
"logps/generated": -272.52093505859375, |
|
"logps/real": -179.6094512939453, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -11.34181022644043, |
|
"rewards/margins": 10.347066879272461, |
|
"rewards/real": -0.9947425127029419, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4559032716927454e-07, |
|
"logits/generated": 0.07658599317073822, |
|
"logits/real": -0.5730828046798706, |
|
"logps/generated": -276.45294189453125, |
|
"logps/real": -200.97845458984375, |
|
"loss": 0.0238, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -11.649636268615723, |
|
"rewards/margins": 10.637258529663086, |
|
"rewards/real": -1.0123790502548218, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.420341394025605e-07, |
|
"logits/generated": 0.019550871104002, |
|
"logits/real": -0.5840874910354614, |
|
"logps/generated": -280.53192138671875, |
|
"logps/real": -189.03172302246094, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.912101745605469, |
|
"rewards/margins": 11.156845092773438, |
|
"rewards/real": -0.7552580237388611, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.384779516358463e-07, |
|
"logits/generated": 0.016371339559555054, |
|
"logits/real": -0.5726695656776428, |
|
"logps/generated": -266.34918212890625, |
|
"logps/real": -202.59814453125, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.33076286315918, |
|
"rewards/margins": 9.75381851196289, |
|
"rewards/real": -1.576944351196289, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3492176386913227e-07, |
|
"logits/generated": 0.0978037491440773, |
|
"logits/real": -0.6194095611572266, |
|
"logps/generated": -282.8346862792969, |
|
"logps/real": -204.97592163085938, |
|
"loss": 0.0191, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.424463272094727, |
|
"rewards/margins": 11.069517135620117, |
|
"rewards/real": -1.3549461364746094, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.313655761024182e-07, |
|
"logits/generated": 0.10168097913265228, |
|
"logits/real": -0.625900149345398, |
|
"logps/generated": -289.7073974609375, |
|
"logps/real": -203.98751831054688, |
|
"loss": 0.0158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.3988676071167, |
|
"rewards/margins": 11.112601280212402, |
|
"rewards/real": -2.2862656116485596, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.278093883357041e-07, |
|
"logits/generated": 0.05643658712506294, |
|
"logits/real": -0.613519549369812, |
|
"logps/generated": -285.1255187988281, |
|
"logps/real": -188.86361694335938, |
|
"loss": 0.0182, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.26307487487793, |
|
"rewards/margins": 11.37584400177002, |
|
"rewards/real": -0.8872316479682922, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2425320056899e-07, |
|
"logits/generated": 0.017720462754368782, |
|
"logits/real": -0.5340021848678589, |
|
"logps/generated": -296.48681640625, |
|
"logps/real": -216.097900390625, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.157635688781738, |
|
"rewards/margins": 11.516908645629883, |
|
"rewards/real": -1.640728235244751, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2069701280227595e-07, |
|
"logits/generated": -0.021676432341337204, |
|
"logits/real": -0.7345054745674133, |
|
"logps/generated": -295.46429443359375, |
|
"logps/real": -198.0674591064453, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.902421951293945, |
|
"rewards/margins": 11.71843147277832, |
|
"rewards/real": -1.1839900016784668, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1714082503556185e-07, |
|
"logits/generated": 0.07925084233283997, |
|
"logits/real": -0.5841912031173706, |
|
"logps/generated": -298.8299255371094, |
|
"logps/real": -189.8162384033203, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.550666809082031, |
|
"rewards/margins": 12.233985900878906, |
|
"rewards/real": -1.3166826963424683, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.135846372688478e-07, |
|
"logits/generated": 0.025105977430939674, |
|
"logits/real": -0.6952486634254456, |
|
"logps/generated": -279.43560791015625, |
|
"logps/real": -197.62535095214844, |
|
"loss": 0.0192, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.729209899902344, |
|
"rewards/margins": 11.284834861755371, |
|
"rewards/real": -1.444373369216919, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.100284495021337e-07, |
|
"logits/generated": 0.05239884927868843, |
|
"logits/real": -0.6132751703262329, |
|
"logps/generated": -305.1339416503906, |
|
"logps/real": -191.66676330566406, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.304582595825195, |
|
"rewards/margins": 12.9876127243042, |
|
"rewards/real": -1.3169682025909424, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.064722617354196e-07, |
|
"logits/generated": 0.034467507153749466, |
|
"logits/real": -0.7083422541618347, |
|
"logps/generated": -304.05780029296875, |
|
"logps/real": -200.8745880126953, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.695897102355957, |
|
"rewards/margins": 12.881891250610352, |
|
"rewards/real": -1.8140056133270264, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.0291607396870553e-07, |
|
"logits/generated": 0.03394109755754471, |
|
"logits/real": -0.6564615964889526, |
|
"logps/generated": -291.5844421386719, |
|
"logps/real": -188.79075622558594, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.64738941192627, |
|
"rewards/margins": 12.21278190612793, |
|
"rewards/real": -1.4346075057983398, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.993598862019915e-07, |
|
"logits/generated": 0.07299565523862839, |
|
"logits/real": -0.6604090332984924, |
|
"logps/generated": -309.156982421875, |
|
"logps/real": -203.24072265625, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.03246784210205, |
|
"rewards/margins": 12.468481063842773, |
|
"rewards/real": -2.563986301422119, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9580369843527737e-07, |
|
"logits/generated": 0.0038179433904588223, |
|
"logits/real": -0.6290857195854187, |
|
"logps/generated": -326.712890625, |
|
"logps/real": -202.99484252929688, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.489263534545898, |
|
"rewards/margins": 12.454556465148926, |
|
"rewards/real": -3.0347084999084473, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9224751066856327e-07, |
|
"logits/generated": 0.031186867505311966, |
|
"logits/real": -0.4273042678833008, |
|
"logps/generated": -323.9992980957031, |
|
"logps/real": -225.23263549804688, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.867002487182617, |
|
"rewards/margins": 13.411462783813477, |
|
"rewards/real": -2.455543041229248, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.886913229018492e-07, |
|
"logits/generated": 0.011080889031291008, |
|
"logits/real": -0.6169866919517517, |
|
"logps/generated": -313.5555114746094, |
|
"logps/real": -195.68914794921875, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.070566177368164, |
|
"rewards/margins": 13.439569473266602, |
|
"rewards/real": -1.630995750427246, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.851351351351351e-07, |
|
"logits/generated": -0.029481088742613792, |
|
"logits/real": -0.6428096890449524, |
|
"logps/generated": -306.30755615234375, |
|
"logps/real": -198.16749572753906, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -14.478108406066895, |
|
"rewards/margins": 12.663415908813477, |
|
"rewards/real": -1.814692497253418, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8157894736842105e-07, |
|
"logits/generated": 0.015122579410672188, |
|
"logits/real": -0.6262849569320679, |
|
"logps/generated": -301.0884704589844, |
|
"logps/real": -190.44667053222656, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.957537651062012, |
|
"rewards/margins": 11.782486915588379, |
|
"rewards/real": -2.1750526428222656, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7802275960170695e-07, |
|
"logits/generated": -0.011642997153103352, |
|
"logits/real": -0.5468995571136475, |
|
"logps/generated": -322.72308349609375, |
|
"logps/real": -188.96719360351562, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.367193222045898, |
|
"rewards/margins": 14.392413139343262, |
|
"rewards/real": -0.9747812151908875, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/generated": 0.015148750506341457, |
|
"eval_logits/real": -0.618992805480957, |
|
"eval_logps/generated": -304.2368469238281, |
|
"eval_logps/real": -186.9757843017578, |
|
"eval_loss": 0.010758413933217525, |
|
"eval_rewards/accuracies": 0.9976114630699158, |
|
"eval_rewards/generated": -14.384950637817383, |
|
"eval_rewards/margins": 13.620210647583008, |
|
"eval_rewards/real": -0.7647396922111511, |
|
"eval_runtime": 424.798, |
|
"eval_samples_per_second": 11.77, |
|
"eval_steps_per_second": 0.37, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7446657183499284e-07, |
|
"logits/generated": 0.04711627587676048, |
|
"logits/real": -0.6923630833625793, |
|
"logps/generated": -311.3076171875, |
|
"logps/real": -192.34048461914062, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.26708698272705, |
|
"rewards/margins": 13.872920036315918, |
|
"rewards/real": -1.394165277481079, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.709103840682788e-07, |
|
"logits/generated": 0.03908165544271469, |
|
"logits/real": -0.631860613822937, |
|
"logps/generated": -310.2327880859375, |
|
"logps/real": -186.8871612548828, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.100445747375488, |
|
"rewards/margins": 13.6294527053833, |
|
"rewards/real": -1.470994234085083, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6735419630156474e-07, |
|
"logits/generated": 0.04315485060214996, |
|
"logits/real": -0.5955843329429626, |
|
"logps/generated": -324.2378845214844, |
|
"logps/real": -187.12576293945312, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.153406143188477, |
|
"rewards/margins": 14.313261032104492, |
|
"rewards/real": -1.8401434421539307, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.637980085348506e-07, |
|
"logits/generated": -0.03953739255666733, |
|
"logits/real": -0.6422590017318726, |
|
"logps/generated": -323.29638671875, |
|
"logps/real": -203.27786254882812, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.259048461914062, |
|
"rewards/margins": 13.929720878601074, |
|
"rewards/real": -2.3293280601501465, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.602418207681365e-07, |
|
"logits/generated": -0.04372464120388031, |
|
"logits/real": -0.6528729796409607, |
|
"logps/generated": -336.2181701660156, |
|
"logps/real": -208.175048828125, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.822994232177734, |
|
"rewards/margins": 14.49010944366455, |
|
"rewards/real": -2.3328843116760254, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5668563300142247e-07, |
|
"logits/generated": -0.020463664084672928, |
|
"logits/real": -0.5609344244003296, |
|
"logps/generated": -317.5655822753906, |
|
"logps/real": -197.87765502929688, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.073542594909668, |
|
"rewards/margins": 12.639741897583008, |
|
"rewards/real": -2.433799982070923, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5312944523470837e-07, |
|
"logits/generated": 0.016961723566055298, |
|
"logits/real": -0.7112401723861694, |
|
"logps/generated": -322.7782897949219, |
|
"logps/real": -191.416015625, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.618112564086914, |
|
"rewards/margins": 14.197979927062988, |
|
"rewards/real": -1.4201303720474243, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.495732574679943e-07, |
|
"logits/generated": 0.03125763684511185, |
|
"logits/real": -0.6455451250076294, |
|
"logps/generated": -309.74517822265625, |
|
"logps/real": -192.3853302001953, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.057307243347168, |
|
"rewards/margins": 13.615495681762695, |
|
"rewards/real": -1.4418113231658936, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.460170697012802e-07, |
|
"logits/generated": 0.030627410858869553, |
|
"logits/real": -0.6639117002487183, |
|
"logps/generated": -330.22894287109375, |
|
"logps/real": -195.18409729003906, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.0391902923584, |
|
"rewards/margins": 14.437360763549805, |
|
"rewards/real": -1.6018317937850952, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.424608819345661e-07, |
|
"logits/generated": 0.06100524589419365, |
|
"logits/real": -0.6973519325256348, |
|
"logps/generated": -319.52166748046875, |
|
"logps/real": -179.44314575195312, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.46537971496582, |
|
"rewards/margins": 14.726943969726562, |
|
"rewards/real": -0.7384368777275085, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3890469416785205e-07, |
|
"logits/generated": 0.06144358962774277, |
|
"logits/real": -0.6208174228668213, |
|
"logps/generated": -311.03912353515625, |
|
"logps/real": -192.3145751953125, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.110832214355469, |
|
"rewards/margins": 13.566309928894043, |
|
"rewards/real": -1.544521689414978, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.35348506401138e-07, |
|
"logits/generated": -0.018809977918863297, |
|
"logits/real": -0.6647375822067261, |
|
"logps/generated": -331.81158447265625, |
|
"logps/real": -182.822021484375, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.408363342285156, |
|
"rewards/margins": 15.543344497680664, |
|
"rewards/real": -0.8650201559066772, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3179231863442384e-07, |
|
"logits/generated": -0.014424433931708336, |
|
"logits/real": -0.5232574939727783, |
|
"logps/generated": -321.45355224609375, |
|
"logps/real": -204.66917419433594, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.32326889038086, |
|
"rewards/margins": 14.821706771850586, |
|
"rewards/real": -1.501560091972351, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.282361308677098e-07, |
|
"logits/generated": -0.04307156428694725, |
|
"logits/real": -0.6383107900619507, |
|
"logps/generated": -332.80963134765625, |
|
"logps/real": -191.7883758544922, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.911900520324707, |
|
"rewards/margins": 14.832717895507812, |
|
"rewards/real": -1.0791819095611572, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2467994310099573e-07, |
|
"logits/generated": 0.020891521126031876, |
|
"logits/real": -0.6430577039718628, |
|
"logps/generated": -350.6064147949219, |
|
"logps/real": -215.30593872070312, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.383892059326172, |
|
"rewards/margins": 16.04793357849121, |
|
"rewards/real": -2.3359580039978027, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.211237553342817e-07, |
|
"logits/generated": -0.024233415722846985, |
|
"logits/real": -0.6739251017570496, |
|
"logps/generated": -333.5208435058594, |
|
"logps/real": -194.91732788085938, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.731473922729492, |
|
"rewards/margins": 14.82574462890625, |
|
"rewards/real": -1.9057306051254272, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.175675675675675e-07, |
|
"logits/generated": 0.023840907961130142, |
|
"logits/real": -0.6880885362625122, |
|
"logps/generated": -318.2210998535156, |
|
"logps/real": -184.1695556640625, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.254070281982422, |
|
"rewards/margins": 14.820034980773926, |
|
"rewards/real": -1.4340364933013916, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1401137980085347e-07, |
|
"logits/generated": 0.040734268724918365, |
|
"logits/real": -0.6580570340156555, |
|
"logps/generated": -342.2274475097656, |
|
"logps/real": -205.65689086914062, |
|
"loss": 0.011, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -17.80494499206543, |
|
"rewards/margins": 15.115242004394531, |
|
"rewards/real": -2.689703941345215, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.104551920341394e-07, |
|
"logits/generated": -0.019141068682074547, |
|
"logits/real": -0.6394578814506531, |
|
"logps/generated": -324.29833984375, |
|
"logps/real": -201.87010192871094, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.063854217529297, |
|
"rewards/margins": 14.041638374328613, |
|
"rewards/real": -2.0222160816192627, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.068990042674253e-07, |
|
"logits/generated": -0.02943194843828678, |
|
"logits/real": -0.6647931337356567, |
|
"logps/generated": -309.67327880859375, |
|
"logps/real": -181.3977508544922, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.244012832641602, |
|
"rewards/margins": 13.764185905456543, |
|
"rewards/real": -1.4798262119293213, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.033428165007112e-07, |
|
"logits/generated": -0.0134804155677557, |
|
"logits/real": -0.688398003578186, |
|
"logps/generated": -333.3070373535156, |
|
"logps/real": -196.06004333496094, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.678394317626953, |
|
"rewards/margins": 14.339719772338867, |
|
"rewards/real": -2.3386740684509277, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9978662873399715e-07, |
|
"logits/generated": -0.01525292731821537, |
|
"logits/real": -0.5911905169487, |
|
"logps/generated": -330.7987060546875, |
|
"logps/real": -198.35397338867188, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.70512580871582, |
|
"rewards/margins": 14.928197860717773, |
|
"rewards/real": -1.776925802230835, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9623044096728305e-07, |
|
"logits/generated": 0.047141142189502716, |
|
"logits/real": -0.5441254377365112, |
|
"logps/generated": -320.4557800292969, |
|
"logps/real": -211.35848999023438, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.464683532714844, |
|
"rewards/margins": 14.595657348632812, |
|
"rewards/real": -1.8690249919891357, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.92674253200569e-07, |
|
"logits/generated": 0.018107902258634567, |
|
"logits/real": -0.6069762110710144, |
|
"logps/generated": -335.17401123046875, |
|
"logps/real": -191.33583068847656, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.258358001708984, |
|
"rewards/margins": 15.983779907226562, |
|
"rewards/real": -1.2745764255523682, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8911806543385494e-07, |
|
"logits/generated": 0.05721588060259819, |
|
"logits/real": -0.6501365900039673, |
|
"logps/generated": -334.0870666503906, |
|
"logps/real": -201.9585418701172, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.955425262451172, |
|
"rewards/margins": 16.156780242919922, |
|
"rewards/real": -0.7986453175544739, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.855618776671408e-07, |
|
"logits/generated": -0.00029001757502555847, |
|
"logits/real": -0.4777015745639801, |
|
"logps/generated": -349.4190368652344, |
|
"logps/real": -214.5007781982422, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.192649841308594, |
|
"rewards/margins": 16.606849670410156, |
|
"rewards/real": -1.5857971906661987, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8200568990042673e-07, |
|
"logits/generated": 0.006303996779024601, |
|
"logits/real": -0.6401196718215942, |
|
"logps/generated": -335.5325622558594, |
|
"logps/real": -194.22637939453125, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.122852325439453, |
|
"rewards/margins": 15.594779968261719, |
|
"rewards/real": -1.5280735492706299, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.784495021337127e-07, |
|
"logits/generated": -0.015740731731057167, |
|
"logits/real": -0.6430305242538452, |
|
"logps/generated": -333.03778076171875, |
|
"logps/real": -185.57615661621094, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.422563552856445, |
|
"rewards/margins": 15.94238567352295, |
|
"rewards/real": -1.4801769256591797, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7489331436699857e-07, |
|
"logits/generated": -0.06366153061389923, |
|
"logits/real": -0.5953705310821533, |
|
"logps/generated": -346.43719482421875, |
|
"logps/real": -207.64895629882812, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.68976402282715, |
|
"rewards/margins": 15.903286933898926, |
|
"rewards/real": -1.7864751815795898, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7133712660028446e-07, |
|
"logits/generated": 0.05433814972639084, |
|
"logits/real": -0.659256637096405, |
|
"logps/generated": -347.67437744140625, |
|
"logps/real": -188.8865509033203, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.18773651123047, |
|
"rewards/margins": 16.599082946777344, |
|
"rewards/real": -1.5886526107788086, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.677809388335704e-07, |
|
"logits/generated": -0.006709927227348089, |
|
"logits/real": -0.6219618916511536, |
|
"logps/generated": -339.08404541015625, |
|
"logps/real": -192.36767578125, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.780960083007812, |
|
"rewards/margins": 15.594240188598633, |
|
"rewards/real": -2.186721086502075, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.642247510668563e-07, |
|
"logits/generated": 0.024327615275979042, |
|
"logits/real": -0.6300166845321655, |
|
"logps/generated": -359.64288330078125, |
|
"logps/real": -189.8053741455078, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.835926055908203, |
|
"rewards/margins": 18.566604614257812, |
|
"rewards/real": -1.2693183422088623, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6066856330014225e-07, |
|
"logits/generated": 0.060337960720062256, |
|
"logits/real": -0.6221122145652771, |
|
"logps/generated": -346.8661193847656, |
|
"logps/real": -193.4240264892578, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.56460952758789, |
|
"rewards/margins": 17.463274002075195, |
|
"rewards/real": -1.1013351678848267, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5711237553342815e-07, |
|
"logits/generated": -0.023245109245181084, |
|
"logits/real": -0.5685423612594604, |
|
"logps/generated": -363.9253234863281, |
|
"logps/real": -206.6332244873047, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -18.62310791015625, |
|
"rewards/margins": 17.27718162536621, |
|
"rewards/real": -1.3459270000457764, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5355618776671404e-07, |
|
"logits/generated": 0.05930706113576889, |
|
"logits/real": -0.6789587736129761, |
|
"logps/generated": -344.4968566894531, |
|
"logps/real": -199.61949157714844, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.401254653930664, |
|
"rewards/margins": 17.86998748779297, |
|
"rewards/real": -1.5312663316726685, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5e-07, |
|
"logits/generated": -0.02314385026693344, |
|
"logits/real": -0.5737181305885315, |
|
"logps/generated": -353.2101135253906, |
|
"logps/real": -192.0513458251953, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.10390853881836, |
|
"rewards/margins": 17.52272605895996, |
|
"rewards/real": -1.581182599067688, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4644381223328594e-07, |
|
"logits/generated": 0.058255720883607864, |
|
"logits/real": -0.5887473821640015, |
|
"logps/generated": -358.34857177734375, |
|
"logps/real": -195.61085510253906, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.416332244873047, |
|
"rewards/margins": 18.28666877746582, |
|
"rewards/real": -1.129664421081543, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4288762446657183e-07, |
|
"logits/generated": 0.08289220184087753, |
|
"logits/real": -0.5633417367935181, |
|
"logps/generated": -371.24578857421875, |
|
"logps/real": -209.87020874023438, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.660175323486328, |
|
"rewards/margins": 19.35919189453125, |
|
"rewards/real": -2.3009822368621826, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.393314366998578e-07, |
|
"logits/generated": 0.05838945508003235, |
|
"logits/real": -0.5614827871322632, |
|
"logps/generated": -385.99591064453125, |
|
"logps/real": -205.49008178710938, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.40130043029785, |
|
"rewards/margins": 19.79316520690918, |
|
"rewards/real": -2.6081345081329346, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3577524893314365e-07, |
|
"logits/generated": 0.0925709456205368, |
|
"logits/real": -0.6390553712844849, |
|
"logps/generated": -385.9515075683594, |
|
"logps/real": -199.3788299560547, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.699363708496094, |
|
"rewards/margins": 20.273067474365234, |
|
"rewards/real": -2.4262948036193848, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.322190611664296e-07, |
|
"logits/generated": -0.001485310262069106, |
|
"logits/real": -0.4676692485809326, |
|
"logps/generated": -368.5473937988281, |
|
"logps/real": -213.4370880126953, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.21763801574707, |
|
"rewards/margins": 19.124008178710938, |
|
"rewards/real": -2.0936279296875, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2866287339971549e-07, |
|
"logits/generated": -0.01602059043943882, |
|
"logits/real": -0.6552165150642395, |
|
"logps/generated": -399.9144592285156, |
|
"logps/real": -208.2056121826172, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.339561462402344, |
|
"rewards/margins": 20.945171356201172, |
|
"rewards/real": -2.3943886756896973, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.251066856330014e-07, |
|
"logits/generated": 0.05472123622894287, |
|
"logits/real": -0.5271707773208618, |
|
"logps/generated": -377.61981201171875, |
|
"logps/real": -199.87130737304688, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.324268341064453, |
|
"rewards/margins": 19.352123260498047, |
|
"rewards/real": -2.972146511077881, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2155049786628733e-07, |
|
"logits/generated": -0.025077398866415024, |
|
"logits/real": -0.5260539054870605, |
|
"logps/generated": -394.1059265136719, |
|
"logps/real": -210.8734130859375, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.064285278320312, |
|
"rewards/margins": 20.856571197509766, |
|
"rewards/real": -2.2077155113220215, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1799431009957325e-07, |
|
"logits/generated": 0.008644811809062958, |
|
"logits/real": -0.5587931275367737, |
|
"logps/generated": -402.0521545410156, |
|
"logps/real": -202.02291870117188, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.23007583618164, |
|
"rewards/margins": 19.87685203552246, |
|
"rewards/real": -3.3532238006591797, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1443812233285914e-07, |
|
"logits/generated": -0.009537003934383392, |
|
"logits/real": -0.4732537269592285, |
|
"logps/generated": -380.66107177734375, |
|
"logps/real": -210.02407836914062, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.10177993774414, |
|
"rewards/margins": 19.873910903930664, |
|
"rewards/real": -2.2278692722320557, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.108819345661451e-07, |
|
"logits/generated": 0.04497765749692917, |
|
"logits/real": -0.5100663900375366, |
|
"logps/generated": -383.2145080566406, |
|
"logps/real": -200.04031372070312, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.68323516845703, |
|
"rewards/margins": 19.536602020263672, |
|
"rewards/real": -2.146634340286255, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0732574679943098e-07, |
|
"logits/generated": 0.051282238215208054, |
|
"logits/real": -0.7591557502746582, |
|
"logps/generated": -358.7184143066406, |
|
"logps/real": -178.8624267578125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.63443374633789, |
|
"rewards/margins": 18.222341537475586, |
|
"rewards/real": -1.4120899438858032, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0376955903271693e-07, |
|
"logits/generated": 0.05088866874575615, |
|
"logits/real": -0.4944595694541931, |
|
"logps/generated": -370.34417724609375, |
|
"logps/real": -208.4033966064453, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.147663116455078, |
|
"rewards/margins": 19.88173484802246, |
|
"rewards/real": -1.2659282684326172, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0021337126600283e-07, |
|
"logits/generated": 0.0677376538515091, |
|
"logits/real": -0.5833539366722107, |
|
"logps/generated": -366.4508361816406, |
|
"logps/real": -182.74696350097656, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.16263198852539, |
|
"rewards/margins": 18.523616790771484, |
|
"rewards/real": -1.6390106678009033, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/generated": 0.04800041764974594, |
|
"eval_logits/real": -0.5656154155731201, |
|
"eval_logps/generated": -376.97998046875, |
|
"eval_logps/real": -197.0876007080078, |
|
"eval_loss": 0.004381492733955383, |
|
"eval_rewards/accuracies": 0.9984076619148254, |
|
"eval_rewards/generated": -21.65926742553711, |
|
"eval_rewards/margins": 19.883346557617188, |
|
"eval_rewards/real": -1.7759193181991577, |
|
"eval_runtime": 321.6683, |
|
"eval_samples_per_second": 15.544, |
|
"eval_steps_per_second": 0.488, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9665718349928875e-07, |
|
"logits/generated": 0.10137276351451874, |
|
"logits/real": -0.5880488753318787, |
|
"logps/generated": -336.6202392578125, |
|
"logps/real": -179.52365112304688, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.403759002685547, |
|
"rewards/margins": 17.433141708374023, |
|
"rewards/real": -0.9706158638000488, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.931009957325747e-07, |
|
"logits/generated": 0.07191314548254013, |
|
"logits/real": -0.7045632600784302, |
|
"logps/generated": -371.539306640625, |
|
"logps/real": -174.90870666503906, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.966075897216797, |
|
"rewards/margins": 19.68502426147461, |
|
"rewards/real": -1.2810522317886353, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.895448079658606e-07, |
|
"logits/generated": 0.021189400926232338, |
|
"logits/real": -0.5944398641586304, |
|
"logps/generated": -345.79901123046875, |
|
"logps/real": -197.9308319091797, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -18.253501892089844, |
|
"rewards/margins": 16.816665649414062, |
|
"rewards/real": -1.436837077140808, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.859886201991465e-07, |
|
"logits/generated": 0.028367796912789345, |
|
"logits/real": -0.5693169832229614, |
|
"logps/generated": -369.0507507324219, |
|
"logps/real": -203.1673126220703, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.233728408813477, |
|
"rewards/margins": 18.871437072753906, |
|
"rewards/real": -1.3622897863388062, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8243243243243243e-07, |
|
"logits/generated": 0.11130674183368683, |
|
"logits/real": -0.6199926137924194, |
|
"logps/generated": -396.66668701171875, |
|
"logps/real": -199.72732543945312, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.905460357666016, |
|
"rewards/margins": 21.137920379638672, |
|
"rewards/real": -1.767538070678711, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7887624466571835e-07, |
|
"logits/generated": 0.051538754254579544, |
|
"logits/real": -0.5638888478279114, |
|
"logps/generated": -382.5877380371094, |
|
"logps/real": -193.47872924804688, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.123483657836914, |
|
"rewards/margins": 20.473840713500977, |
|
"rewards/real": -1.6496423482894897, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7532005689900424e-07, |
|
"logits/generated": 0.056547343730926514, |
|
"logits/real": -0.5736340284347534, |
|
"logps/generated": -399.03253173828125, |
|
"logps/real": -205.837158203125, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.35310173034668, |
|
"rewards/margins": 20.6170597076416, |
|
"rewards/real": -2.7360422611236572, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.717638691322902e-07, |
|
"logits/generated": 0.12326414883136749, |
|
"logits/real": -0.5854828357696533, |
|
"logps/generated": -368.6696472167969, |
|
"logps/real": -194.57887268066406, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.72028923034668, |
|
"rewards/margins": 19.422595977783203, |
|
"rewards/real": -2.297696352005005, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6820768136557609e-07, |
|
"logits/generated": 0.03324466198682785, |
|
"logits/real": -0.5589950680732727, |
|
"logps/generated": -379.6975402832031, |
|
"logps/real": -189.21792602539062, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.778305053710938, |
|
"rewards/margins": 19.09261131286621, |
|
"rewards/real": -1.6856931447982788, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.64651493598862e-07, |
|
"logits/generated": 0.02137361653149128, |
|
"logits/real": -0.4791427552700043, |
|
"logps/generated": -372.0145263671875, |
|
"logps/real": -201.42239379882812, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.45681381225586, |
|
"rewards/margins": 19.019607543945312, |
|
"rewards/real": -1.4372069835662842, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6109530583214793e-07, |
|
"logits/generated": 0.06219879537820816, |
|
"logits/real": -0.6077834367752075, |
|
"logps/generated": -361.52496337890625, |
|
"logps/real": -187.40945434570312, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.687042236328125, |
|
"rewards/margins": 19.435745239257812, |
|
"rewards/real": -1.2512991428375244, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5753911806543385e-07, |
|
"logits/generated": 0.02082439325749874, |
|
"logits/real": -0.6671017408370972, |
|
"logps/generated": -399.9078369140625, |
|
"logps/real": -197.09945678710938, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.697898864746094, |
|
"rewards/margins": 21.734453201293945, |
|
"rewards/real": -1.9634456634521484, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5398293029871974e-07, |
|
"logits/generated": 0.07103635370731354, |
|
"logits/real": -0.5315567851066589, |
|
"logps/generated": -390.80413818359375, |
|
"logps/real": -209.8661651611328, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.044010162353516, |
|
"rewards/margins": 20.588970184326172, |
|
"rewards/real": -2.4550397396087646, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.504267425320057e-07, |
|
"logits/generated": 0.05514199659228325, |
|
"logits/real": -0.5959967374801636, |
|
"logps/generated": -372.26470947265625, |
|
"logps/real": -187.91561889648438, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.262130737304688, |
|
"rewards/margins": 19.89595603942871, |
|
"rewards/real": -1.366172194480896, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4687055476529158e-07, |
|
"logits/generated": 0.03651849180459976, |
|
"logits/real": -0.5296968817710876, |
|
"logps/generated": -375.4384765625, |
|
"logps/real": -198.07846069335938, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.608938217163086, |
|
"rewards/margins": 19.235652923583984, |
|
"rewards/real": -1.373286247253418, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4331436699857753e-07, |
|
"logits/generated": -0.031901903450489044, |
|
"logits/real": -0.42245230078697205, |
|
"logps/generated": -385.30987548828125, |
|
"logps/real": -198.95516967773438, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.40415382385254, |
|
"rewards/margins": 19.745981216430664, |
|
"rewards/real": -1.6581722497940063, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3975817923186345e-07, |
|
"logits/generated": 0.07458638399839401, |
|
"logits/real": -0.4577251970767975, |
|
"logps/generated": -376.2591552734375, |
|
"logps/real": -211.70458984375, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.238889694213867, |
|
"rewards/margins": 19.84615135192871, |
|
"rewards/real": -1.3927379846572876, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3620199146514935e-07, |
|
"logits/generated": 0.06895387917757034, |
|
"logits/real": -0.4532155990600586, |
|
"logps/generated": -361.93939208984375, |
|
"logps/real": -179.66732788085938, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.205707550048828, |
|
"rewards/margins": 18.50905990600586, |
|
"rewards/real": -1.6966466903686523, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.326458036984353e-07, |
|
"logits/generated": 0.026944806799292564, |
|
"logits/real": -0.5042958855628967, |
|
"logps/generated": -378.512939453125, |
|
"logps/real": -193.09017944335938, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.360944747924805, |
|
"rewards/margins": 19.692447662353516, |
|
"rewards/real": -1.6684958934783936, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.290896159317212e-07, |
|
"logits/generated": 0.05960095673799515, |
|
"logits/real": -0.5585105419158936, |
|
"logps/generated": -394.94696044921875, |
|
"logps/real": -191.21163940429688, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.144710540771484, |
|
"rewards/margins": 21.34160041809082, |
|
"rewards/real": -1.8031113147735596, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.255334281650071e-07, |
|
"logits/generated": 0.04382283240556717, |
|
"logits/real": -0.4478573203086853, |
|
"logps/generated": -375.7854309082031, |
|
"logps/real": -205.15869140625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.472270965576172, |
|
"rewards/margins": 19.42158317565918, |
|
"rewards/real": -2.050690174102783, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2197724039829303e-07, |
|
"logits/generated": 0.07887273281812668, |
|
"logits/real": -0.6188726425170898, |
|
"logps/generated": -378.6775817871094, |
|
"logps/real": -190.72250366210938, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.472759246826172, |
|
"rewards/margins": 19.55636215209961, |
|
"rewards/real": -1.916398286819458, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1842105263157894e-07, |
|
"logits/generated": 0.0605277419090271, |
|
"logits/real": -0.6036852598190308, |
|
"logps/generated": -369.0652160644531, |
|
"logps/real": -183.35617065429688, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.453426361083984, |
|
"rewards/margins": 18.384937286376953, |
|
"rewards/real": -2.0684916973114014, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1486486486486487e-07, |
|
"logits/generated": 0.06586415320634842, |
|
"logits/real": -0.5749965906143188, |
|
"logps/generated": -372.5237121582031, |
|
"logps/real": -195.25076293945312, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.654064178466797, |
|
"rewards/margins": 19.203500747680664, |
|
"rewards/real": -1.450567364692688, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1130867709815078e-07, |
|
"logits/generated": 0.08540566265583038, |
|
"logits/real": -0.6170912981033325, |
|
"logps/generated": -374.17303466796875, |
|
"logps/real": -194.3673095703125, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.099042892456055, |
|
"rewards/margins": 19.65339469909668, |
|
"rewards/real": -1.4456470012664795, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.077524893314367e-07, |
|
"logits/generated": 0.06357467174530029, |
|
"logits/real": -0.687986433506012, |
|
"logps/generated": -392.5172424316406, |
|
"logps/real": -191.45401000976562, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.80368423461914, |
|
"rewards/margins": 20.335935592651367, |
|
"rewards/real": -2.467747688293457, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0419630156472262e-07, |
|
"logits/generated": 0.016886264085769653, |
|
"logits/real": -0.4550386965274811, |
|
"logps/generated": -390.584228515625, |
|
"logps/real": -218.6328887939453, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.16832733154297, |
|
"rewards/margins": 20.595144271850586, |
|
"rewards/real": -2.5731775760650635, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0064011379800854e-07, |
|
"logits/generated": -0.022652573883533478, |
|
"logits/real": -0.4286680817604065, |
|
"logps/generated": -379.67706298828125, |
|
"logps/real": -206.49093627929688, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.629016876220703, |
|
"rewards/margins": 19.87604522705078, |
|
"rewards/real": -1.7529706954956055, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.708392603129445e-08, |
|
"logits/generated": 0.056550562381744385, |
|
"logits/real": -0.4019811749458313, |
|
"logps/generated": -370.08251953125, |
|
"logps/real": -197.77896118164062, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.446491241455078, |
|
"rewards/margins": 19.191186904907227, |
|
"rewards/real": -2.255300521850586, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.352773826458037e-08, |
|
"logits/generated": 0.008305387571454048, |
|
"logits/real": -0.5809749364852905, |
|
"logps/generated": -400.37347412109375, |
|
"logps/real": -197.43136596679688, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.287565231323242, |
|
"rewards/margins": 21.73343276977539, |
|
"rewards/real": -1.5541306734085083, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.997155049786629e-08, |
|
"logits/generated": 0.046849604696035385, |
|
"logits/real": -0.6843993663787842, |
|
"logps/generated": -390.62030029296875, |
|
"logps/real": -181.0294647216797, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.432353973388672, |
|
"rewards/margins": 21.127017974853516, |
|
"rewards/real": -1.3053334951400757, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.64153627311522e-08, |
|
"logits/generated": 0.052729617804288864, |
|
"logits/real": -0.5115618109703064, |
|
"logps/generated": -375.7951354980469, |
|
"logps/real": -196.12313842773438, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.45965003967285, |
|
"rewards/margins": 19.496532440185547, |
|
"rewards/real": -1.9631179571151733, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.285917496443812e-08, |
|
"logits/generated": 0.035291388630867004, |
|
"logits/real": -0.5863553881645203, |
|
"logps/generated": -395.09197998046875, |
|
"logps/real": -202.11160278320312, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.766231536865234, |
|
"rewards/margins": 20.85399627685547, |
|
"rewards/real": -1.912235975265503, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.930298719772404e-08, |
|
"logits/generated": 0.07267922163009644, |
|
"logits/real": -0.49688243865966797, |
|
"logps/generated": -373.9420166015625, |
|
"logps/real": -205.9117889404297, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.421688079833984, |
|
"rewards/margins": 19.645835876464844, |
|
"rewards/real": -1.7758514881134033, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.574679943100994e-08, |
|
"logits/generated": 0.07065759599208832, |
|
"logits/real": -0.5725045204162598, |
|
"logps/generated": -378.9009704589844, |
|
"logps/real": -181.6143035888672, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.113445281982422, |
|
"rewards/margins": 20.17637062072754, |
|
"rewards/real": -1.937076210975647, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.219061166429587e-08, |
|
"logits/generated": 0.05919628217816353, |
|
"logits/real": -0.5659859776496887, |
|
"logps/generated": -379.78924560546875, |
|
"logps/real": -190.56930541992188, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.432254791259766, |
|
"rewards/margins": 20.832565307617188, |
|
"rewards/real": -1.5996865034103394, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.863442389758179e-08, |
|
"logits/generated": 0.044001154601573944, |
|
"logits/real": -0.5443329215049744, |
|
"logps/generated": -378.08587646484375, |
|
"logps/real": -204.49148559570312, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -21.367244720458984, |
|
"rewards/margins": 19.974727630615234, |
|
"rewards/real": -1.3925195932388306, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.507823613086771e-08, |
|
"logits/generated": 0.046902261674404144, |
|
"logits/real": -0.5738197565078735, |
|
"logps/generated": -380.58148193359375, |
|
"logps/real": -207.26724243164062, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.340457916259766, |
|
"rewards/margins": 19.454242706298828, |
|
"rewards/real": -1.886214017868042, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.152204836415363e-08, |
|
"logits/generated": 0.0785381942987442, |
|
"logits/real": -0.569561243057251, |
|
"logps/generated": -367.63519287109375, |
|
"logps/real": -203.30282592773438, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.134796142578125, |
|
"rewards/margins": 18.931886672973633, |
|
"rewards/real": -2.2029080390930176, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.796586059743954e-08, |
|
"logits/generated": -0.007580602075904608, |
|
"logits/real": -0.5636885762214661, |
|
"logps/generated": -381.52642822265625, |
|
"logps/real": -198.91770935058594, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.588598251342773, |
|
"rewards/margins": 19.854793548583984, |
|
"rewards/real": -1.7338052988052368, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.4409672830725456e-08, |
|
"logits/generated": 0.0493854358792305, |
|
"logits/real": -0.5976940989494324, |
|
"logps/generated": -399.3151550292969, |
|
"logps/real": -194.88201904296875, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.92904281616211, |
|
"rewards/margins": 21.451683044433594, |
|
"rewards/real": -1.477359652519226, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.0853485064011376e-08, |
|
"logits/generated": 0.08968095481395721, |
|
"logits/real": -0.6684913039207458, |
|
"logps/generated": -380.35552978515625, |
|
"logps/real": -183.67074584960938, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.802627563476562, |
|
"rewards/margins": 20.21030616760254, |
|
"rewards/real": -1.5923227071762085, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.72972972972973e-08, |
|
"logits/generated": 0.08673722296953201, |
|
"logits/real": -0.6326942443847656, |
|
"logps/generated": -379.32373046875, |
|
"logps/real": -193.1202392578125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -22.219440460205078, |
|
"rewards/margins": 20.84255599975586, |
|
"rewards/real": -1.3768887519836426, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.374110953058322e-08, |
|
"logits/generated": 0.031193479895591736, |
|
"logits/real": -0.5564336776733398, |
|
"logps/generated": -359.17181396484375, |
|
"logps/real": -193.7860107421875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.49233055114746, |
|
"rewards/margins": 18.665729522705078, |
|
"rewards/real": -1.8266017436981201, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.018492176386913e-08, |
|
"logits/generated": 0.09109187871217728, |
|
"logits/real": -0.5564282536506653, |
|
"logps/generated": -374.38702392578125, |
|
"logps/real": -194.70132446289062, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.670825958251953, |
|
"rewards/margins": 19.197032928466797, |
|
"rewards/real": -1.4737932682037354, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.6628733997155046e-08, |
|
"logits/generated": 0.08201099932193756, |
|
"logits/real": -0.6359506845474243, |
|
"logps/generated": -386.39190673828125, |
|
"logps/real": -176.69708251953125, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -22.43024444580078, |
|
"rewards/margins": 20.56548309326172, |
|
"rewards/real": -1.8647606372833252, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.3072546230440967e-08, |
|
"logits/generated": 0.08995040506124496, |
|
"logits/real": -0.5263134241104126, |
|
"logps/generated": -378.1429138183594, |
|
"logps/real": -191.90821838378906, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.608402252197266, |
|
"rewards/margins": 19.824888229370117, |
|
"rewards/real": -1.7835137844085693, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9516358463726884e-08, |
|
"logits/generated": 0.030845308676362038, |
|
"logits/real": -0.5490936040878296, |
|
"logps/generated": -369.69525146484375, |
|
"logps/real": -200.53517150878906, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.388917922973633, |
|
"rewards/margins": 19.097196578979492, |
|
"rewards/real": -1.2917201519012451, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.59601706970128e-08, |
|
"logits/generated": 0.0783102884888649, |
|
"logits/real": -0.5921775698661804, |
|
"logps/generated": -361.94158935546875, |
|
"logps/real": -206.4454803466797, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.625545501708984, |
|
"rewards/margins": 19.025144577026367, |
|
"rewards/real": -1.6004012823104858, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.240398293029872e-08, |
|
"logits/generated": 0.06931595504283905, |
|
"logits/real": -0.5738247632980347, |
|
"logps/generated": -371.99847412109375, |
|
"logps/real": -194.58804321289062, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.12423324584961, |
|
"rewards/margins": 19.16390037536621, |
|
"rewards/real": -1.9603347778320312, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/generated": 0.05270044878125191, |
|
"eval_logits/real": -0.5586805939674377, |
|
"eval_logps/generated": -369.19976806640625, |
|
"eval_logps/real": -194.6499481201172, |
|
"eval_loss": 0.0032336623407900333, |
|
"eval_rewards/accuracies": 0.9992038011550903, |
|
"eval_rewards/generated": -20.88124656677246, |
|
"eval_rewards/margins": 19.349092483520508, |
|
"eval_rewards/real": -1.5321544408798218, |
|
"eval_runtime": 319.8593, |
|
"eval_samples_per_second": 15.632, |
|
"eval_steps_per_second": 0.491, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.8847795163584636e-08, |
|
"logits/generated": 0.010430006310343742, |
|
"logits/real": -0.5986403226852417, |
|
"logps/generated": -369.27630615234375, |
|
"logps/real": -194.7587127685547, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.733909606933594, |
|
"rewards/margins": 19.032808303833008, |
|
"rewards/real": -1.7011024951934814, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5291607396870554e-08, |
|
"logits/generated": 0.0546521432697773, |
|
"logits/real": -0.5425094366073608, |
|
"logps/generated": -366.5072021484375, |
|
"logps/real": -192.2476348876953, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.005374908447266, |
|
"rewards/margins": 19.410686492919922, |
|
"rewards/real": -1.594689965248108, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1735419630156473e-08, |
|
"logits/generated": 0.07052431255578995, |
|
"logits/real": -0.591436505317688, |
|
"logps/generated": -371.2902526855469, |
|
"logps/real": -192.4247589111328, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.630722045898438, |
|
"rewards/margins": 20.07946014404297, |
|
"rewards/real": -1.5512605905532837, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.179231863442388e-09, |
|
"logits/generated": 0.043054401874542236, |
|
"logits/real": -0.547429084777832, |
|
"logps/generated": -377.97113037109375, |
|
"logps/real": -199.3291473388672, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.497386932373047, |
|
"rewards/margins": 19.968860626220703, |
|
"rewards/real": -1.5285276174545288, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.623044096728307e-09, |
|
"logits/generated": 0.04375555366277695, |
|
"logits/real": -0.5209413766860962, |
|
"logps/generated": -369.3624267578125, |
|
"logps/real": -201.68338012695312, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -21.231218338012695, |
|
"rewards/margins": 19.51310157775879, |
|
"rewards/real": -1.718117356300354, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0668563300142248e-09, |
|
"logits/generated": 0.011072209104895592, |
|
"logits/real": -0.5893142223358154, |
|
"logps/generated": -360.9320983886719, |
|
"logps/real": -193.64450073242188, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.670141220092773, |
|
"rewards/margins": 17.939659118652344, |
|
"rewards/real": -1.7304834127426147, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1563, |
|
"total_flos": 0.0, |
|
"train_loss": 0.03590757104014634, |
|
"train_runtime": 9394.7927, |
|
"train_samples_per_second": 5.322, |
|
"train_steps_per_second": 0.166 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1563, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|