|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1719, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9069767441860465e-09, |
|
"logits/generated": 0.22510835528373718, |
|
"logits/real": -0.4991265833377838, |
|
"logps/generated": -159.64022827148438, |
|
"logps/real": -219.83563232421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9069767441860464e-08, |
|
"logits/generated": 0.04235713928937912, |
|
"logits/real": -0.9326836466789246, |
|
"logps/generated": -236.92852783203125, |
|
"logps/real": -175.13916015625, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/generated": -0.008745013736188412, |
|
"rewards/margins": 0.007466559763997793, |
|
"rewards/real": -0.001278453622944653, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.813953488372093e-08, |
|
"logits/generated": 0.10617052018642426, |
|
"logits/real": -0.8270567059516907, |
|
"logps/generated": -197.11135864257812, |
|
"logps/real": -185.62973022460938, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/generated": -0.011160850524902344, |
|
"rewards/margins": 0.02405616268515587, |
|
"rewards/real": 0.01289531122893095, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.720930232558139e-08, |
|
"logits/generated": 0.08242492377758026, |
|
"logits/real": -0.8459693789482117, |
|
"logps/generated": -197.53134155273438, |
|
"logps/real": -177.90036010742188, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.07798535376787186, |
|
"rewards/margins": 0.13355930149555206, |
|
"rewards/real": 0.0555739589035511, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1627906976744186e-07, |
|
"logits/generated": 0.08196545392274857, |
|
"logits/real": -0.7228237390518188, |
|
"logps/generated": -209.08792114257812, |
|
"logps/real": -204.48580932617188, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.20528952777385712, |
|
"rewards/margins": 0.33634811639785767, |
|
"rewards/real": 0.13105858862400055, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4534883720930232e-07, |
|
"logits/generated": 0.01023351214826107, |
|
"logits/real": -0.7781884074211121, |
|
"logps/generated": -196.1292266845703, |
|
"logps/real": -172.7035369873047, |
|
"loss": 0.4045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.44744282960891724, |
|
"rewards/margins": 0.7074223756790161, |
|
"rewards/real": 0.2599795460700989, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7441860465116279e-07, |
|
"logits/generated": 0.029533693566918373, |
|
"logits/real": -0.9067603945732117, |
|
"logps/generated": -217.8592529296875, |
|
"logps/real": -179.58804321289062, |
|
"loss": 0.3137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.6678295135498047, |
|
"rewards/margins": 1.038863182067871, |
|
"rewards/real": 0.3710337281227112, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0348837209302325e-07, |
|
"logits/generated": -0.0033102657180279493, |
|
"logits/real": -0.8307914733886719, |
|
"logps/generated": -213.654541015625, |
|
"logps/real": -172.53134155273438, |
|
"loss": 0.1785, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.1765917539596558, |
|
"rewards/margins": 1.7910858392715454, |
|
"rewards/real": 0.6144940257072449, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3255813953488372e-07, |
|
"logits/generated": 0.08044329285621643, |
|
"logits/real": -0.7260358333587646, |
|
"logps/generated": -206.86099243164062, |
|
"logps/real": -164.09310913085938, |
|
"loss": 0.0948, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.155766010284424, |
|
"rewards/margins": 3.0567328929901123, |
|
"rewards/real": 0.9009668231010437, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.616279069767442e-07, |
|
"logits/generated": 0.04424278065562248, |
|
"logits/real": -0.724961519241333, |
|
"logps/generated": -227.4031524658203, |
|
"logps/real": -166.308837890625, |
|
"loss": 0.065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.788259983062744, |
|
"rewards/margins": 3.774054765701294, |
|
"rewards/real": 0.9857945442199707, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9069767441860464e-07, |
|
"logits/generated": 0.11271452903747559, |
|
"logits/real": -0.7200266718864441, |
|
"logps/generated": -214.77658081054688, |
|
"logps/real": -168.38412475585938, |
|
"loss": 0.0417, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.815293788909912, |
|
"rewards/margins": 4.901894569396973, |
|
"rewards/real": 1.0866007804870605, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1976744186046514e-07, |
|
"logits/generated": -0.007130931131541729, |
|
"logits/real": -0.5636553168296814, |
|
"logps/generated": -267.8116760253906, |
|
"logps/real": -182.135986328125, |
|
"loss": 0.0241, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.643799781799316, |
|
"rewards/margins": 5.737751483917236, |
|
"rewards/real": 1.093951940536499, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4883720930232557e-07, |
|
"logits/generated": -0.11325208842754364, |
|
"logits/real": -0.5849964022636414, |
|
"logps/generated": -285.40081787109375, |
|
"logps/real": -179.18301391601562, |
|
"loss": 0.0211, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.410036087036133, |
|
"rewards/margins": 6.6054368019104, |
|
"rewards/real": 1.195401668548584, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.77906976744186e-07, |
|
"logits/generated": -0.051985882222652435, |
|
"logits/real": -0.658206045627594, |
|
"logps/generated": -290.97161865234375, |
|
"logps/real": -169.36712646484375, |
|
"loss": 0.0159, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.11798620223999, |
|
"rewards/margins": 7.369329929351807, |
|
"rewards/real": 1.2513433694839478, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.069767441860465e-07, |
|
"logits/generated": 0.005048721097409725, |
|
"logits/real": -0.7477072477340698, |
|
"logps/generated": -270.76324462890625, |
|
"logps/real": -162.5026397705078, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.5477166175842285, |
|
"rewards/margins": 7.861792087554932, |
|
"rewards/real": 1.3140751123428345, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.3604651162790694e-07, |
|
"logits/generated": -0.018860597163438797, |
|
"logits/real": -0.6284798383712769, |
|
"logps/generated": -263.63818359375, |
|
"logps/real": -170.4339599609375, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.5605058670043945, |
|
"rewards/margins": 8.930419921875, |
|
"rewards/real": 1.369913101196289, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6511627906976743e-07, |
|
"logits/generated": -0.0799163281917572, |
|
"logits/real": -0.6108471751213074, |
|
"logps/generated": -272.9430236816406, |
|
"logps/real": -172.77984619140625, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.780602931976318, |
|
"rewards/margins": 9.111438751220703, |
|
"rewards/real": 1.3308355808258057, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.941860465116279e-07, |
|
"logits/generated": -0.08004440367221832, |
|
"logits/real": -0.566792368888855, |
|
"logps/generated": -284.3381652832031, |
|
"logps/real": -167.94363403320312, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.041540145874023, |
|
"rewards/margins": 9.337651252746582, |
|
"rewards/real": 1.2961114645004272, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.974143503555268e-07, |
|
"logits/generated": -0.10057459771633148, |
|
"logits/real": -0.6175776720046997, |
|
"logps/generated": -275.0603942871094, |
|
"logps/real": -167.98239135742188, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.851411819458008, |
|
"rewards/margins": 10.230875015258789, |
|
"rewards/real": 1.3794633150100708, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.941822882999354e-07, |
|
"logits/generated": -0.03914680331945419, |
|
"logits/real": -0.6671714186668396, |
|
"logps/generated": -307.4989318847656, |
|
"logps/real": -148.45492553710938, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.499483108520508, |
|
"rewards/margins": 10.952972412109375, |
|
"rewards/real": 1.453489065170288, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.909502262443438e-07, |
|
"logits/generated": -0.1555934101343155, |
|
"logits/real": -0.6996999979019165, |
|
"logps/generated": -289.0143737792969, |
|
"logps/real": -154.1925811767578, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.878443717956543, |
|
"rewards/margins": 11.454755783081055, |
|
"rewards/real": 1.5763123035430908, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.877181641887524e-07, |
|
"logits/generated": -0.16817393898963928, |
|
"logits/real": -0.5885288119316101, |
|
"logps/generated": -313.0885009765625, |
|
"logps/real": -173.83486938476562, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.3558931350708, |
|
"rewards/margins": 11.955841064453125, |
|
"rewards/real": 1.5999480485916138, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.84486102133161e-07, |
|
"logits/generated": -0.18991371989250183, |
|
"logits/real": -0.6707152128219604, |
|
"logps/generated": -308.0616760253906, |
|
"logps/real": -150.18075561523438, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.431336402893066, |
|
"rewards/margins": 12.006966590881348, |
|
"rewards/real": 1.5756289958953857, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.812540400775695e-07, |
|
"logits/generated": -0.18832966685295105, |
|
"logits/real": -0.631781816482544, |
|
"logps/generated": -330.06683349609375, |
|
"logps/real": -149.5428924560547, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.349983215332031, |
|
"rewards/margins": 12.996706008911133, |
|
"rewards/real": 1.6467218399047852, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.78021978021978e-07, |
|
"logits/generated": -0.15396884083747864, |
|
"logits/real": -0.4513896107673645, |
|
"logps/generated": -326.8087158203125, |
|
"logps/real": -169.1533660888672, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.024203300476074, |
|
"rewards/margins": 12.732254028320312, |
|
"rewards/real": 1.7080507278442383, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.747899159663865e-07, |
|
"logits/generated": -0.16423162817955017, |
|
"logits/real": -0.5932506918907166, |
|
"logps/generated": -326.5214538574219, |
|
"logps/real": -167.0145263671875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.629524230957031, |
|
"rewards/margins": 14.42304515838623, |
|
"rewards/real": 1.7935196161270142, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7155785391079506e-07, |
|
"logits/generated": -0.13844823837280273, |
|
"logits/real": -0.5799323320388794, |
|
"logps/generated": -312.210693359375, |
|
"logps/real": -160.5195770263672, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.824631690979004, |
|
"rewards/margins": 13.548501968383789, |
|
"rewards/real": 1.723870873451233, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.683257918552036e-07, |
|
"logits/generated": -0.2020149677991867, |
|
"logits/real": -0.5803182721138, |
|
"logps/generated": -321.19403076171875, |
|
"logps/real": -154.0579376220703, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.354485511779785, |
|
"rewards/margins": 13.974222183227539, |
|
"rewards/real": 1.6197364330291748, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6509372979961214e-07, |
|
"logits/generated": -0.18715237081050873, |
|
"logits/real": -0.6276318430900574, |
|
"logps/generated": -345.169677734375, |
|
"logps/real": -160.62017822265625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.69969654083252, |
|
"rewards/margins": 14.637018203735352, |
|
"rewards/real": 1.9373207092285156, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.618616677440207e-07, |
|
"logits/generated": -0.17162290215492249, |
|
"logits/real": -0.5753442645072937, |
|
"logps/generated": -323.808837890625, |
|
"logps/real": -155.9623260498047, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.896127700805664, |
|
"rewards/margins": 14.664695739746094, |
|
"rewards/real": 1.7685680389404297, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5862960568842917e-07, |
|
"logits/generated": -0.21047766506671906, |
|
"logits/real": -0.6026321649551392, |
|
"logps/generated": -362.8931579589844, |
|
"logps/real": -166.73097229003906, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.919809341430664, |
|
"rewards/margins": 14.736520767211914, |
|
"rewards/real": 1.8167121410369873, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5539754363283774e-07, |
|
"logits/generated": -0.16381976008415222, |
|
"logits/real": -0.5685454607009888, |
|
"logps/generated": -344.93719482421875, |
|
"logps/real": -169.48390197753906, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.71995735168457, |
|
"rewards/margins": 14.397488594055176, |
|
"rewards/real": 1.6775312423706055, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5216548157724625e-07, |
|
"logits/generated": -0.23839232325553894, |
|
"logits/real": -0.7003042101860046, |
|
"logps/generated": -347.67486572265625, |
|
"logps/real": -144.32846069335938, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.433148384094238, |
|
"rewards/margins": 15.338560104370117, |
|
"rewards/real": 1.905413031578064, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.489334195216548e-07, |
|
"logits/generated": -0.20531699061393738, |
|
"logits/real": -0.5904011726379395, |
|
"logps/generated": -358.9759216308594, |
|
"logps/real": -161.4681854248047, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.109331130981445, |
|
"rewards/margins": 14.828271865844727, |
|
"rewards/real": 1.7189394235610962, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4570135746606334e-07, |
|
"logits/generated": -0.22274942696094513, |
|
"logits/real": -0.47239452600479126, |
|
"logps/generated": -358.387451171875, |
|
"logps/real": -151.049072265625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.70154094696045, |
|
"rewards/margins": 15.348774909973145, |
|
"rewards/real": 1.6472349166870117, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4246929541047185e-07, |
|
"logits/generated": -0.09264518320560455, |
|
"logits/real": -0.6958349347114563, |
|
"logps/generated": -324.8814392089844, |
|
"logps/real": -159.4300537109375, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.762893676757812, |
|
"rewards/margins": 16.524675369262695, |
|
"rewards/real": 1.761784315109253, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3923723335488036e-07, |
|
"logits/generated": -0.2574939429759979, |
|
"logits/real": -0.6005350351333618, |
|
"logps/generated": -355.54034423828125, |
|
"logps/real": -176.0032501220703, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.123771667480469, |
|
"rewards/margins": 15.87486743927002, |
|
"rewards/real": 1.7510942220687866, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3600517129928893e-07, |
|
"logits/generated": -0.1868589222431183, |
|
"logits/real": -0.6507897973060608, |
|
"logps/generated": -373.49420166015625, |
|
"logps/real": -159.02708435058594, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.44430923461914, |
|
"rewards/margins": 18.2752628326416, |
|
"rewards/real": 1.8309524059295654, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.327731092436975e-07, |
|
"logits/generated": -0.2316346913576126, |
|
"logits/real": -0.6136451959609985, |
|
"logps/generated": -369.7330017089844, |
|
"logps/real": -171.8548583984375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.07020378112793, |
|
"rewards/margins": 17.97062110900879, |
|
"rewards/real": 1.9004182815551758, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2954104718810596e-07, |
|
"logits/generated": -0.15154589712619781, |
|
"logits/real": -0.6708321571350098, |
|
"logps/generated": -363.1715393066406, |
|
"logps/real": -149.56558227539062, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.832409858703613, |
|
"rewards/margins": 17.56808090209961, |
|
"rewards/real": 1.7356704473495483, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2630898513251453e-07, |
|
"logits/generated": -0.18206624686717987, |
|
"logits/real": -0.5952498912811279, |
|
"logps/generated": -345.5340881347656, |
|
"logps/real": -169.5566864013672, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.744781494140625, |
|
"rewards/margins": 17.42547035217285, |
|
"rewards/real": 1.6806890964508057, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2307692307692304e-07, |
|
"logits/generated": -0.21467003226280212, |
|
"logits/real": -0.6994205117225647, |
|
"logps/generated": -382.9744567871094, |
|
"logps/real": -161.76727294921875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.919597625732422, |
|
"rewards/margins": 18.602148056030273, |
|
"rewards/real": 1.6825525760650635, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.198448610213316e-07, |
|
"logits/generated": -0.16589388251304626, |
|
"logits/real": -0.6795812845230103, |
|
"logps/generated": -359.50897216796875, |
|
"logps/real": -158.7269744873047, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.896496772766113, |
|
"rewards/margins": 17.915653228759766, |
|
"rewards/real": 2.0191569328308105, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.166127989657401e-07, |
|
"logits/generated": -0.17213213443756104, |
|
"logits/real": -0.6090983748435974, |
|
"logps/generated": -352.26861572265625, |
|
"logps/real": -156.11839294433594, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.868573188781738, |
|
"rewards/margins": 16.57754898071289, |
|
"rewards/real": 1.7089776992797852, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1338073691014864e-07, |
|
"logits/generated": -0.2293224036693573, |
|
"logits/real": -0.6463335752487183, |
|
"logps/generated": -334.8617858886719, |
|
"logps/real": -168.5841064453125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.44855785369873, |
|
"rewards/margins": 15.291763305664062, |
|
"rewards/real": 1.8432031869888306, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1014867485455715e-07, |
|
"logits/generated": -0.09427131712436676, |
|
"logits/real": -0.5305672883987427, |
|
"logps/generated": -359.8374328613281, |
|
"logps/real": -174.03419494628906, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.683209419250488, |
|
"rewards/margins": 15.647130012512207, |
|
"rewards/real": 1.963921308517456, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.069166127989657e-07, |
|
"logits/generated": -0.16022275388240814, |
|
"logits/real": -0.6646589040756226, |
|
"logps/generated": -327.9355773925781, |
|
"logps/real": -158.57614135742188, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.220995903015137, |
|
"rewards/margins": 16.33051872253418, |
|
"rewards/real": 2.1095223426818848, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.036845507433743e-07, |
|
"logits/generated": -0.12328042089939117, |
|
"logits/real": -0.48259586095809937, |
|
"logps/generated": -306.2872009277344, |
|
"logps/real": -181.9349365234375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.589244842529297, |
|
"rewards/margins": 14.392335891723633, |
|
"rewards/real": 1.8030906915664673, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.004524886877828e-07, |
|
"logits/generated": -0.16407117247581482, |
|
"logits/real": -0.5029650926589966, |
|
"logps/generated": -327.85662841796875, |
|
"logps/real": -179.72067260742188, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.962748527526855, |
|
"rewards/margins": 15.690637588500977, |
|
"rewards/real": 1.7278881072998047, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.972204266321913e-07, |
|
"logits/generated": -0.10069570690393448, |
|
"logits/real": -0.48057037591934204, |
|
"logps/generated": -322.61041259765625, |
|
"logps/real": -183.6177215576172, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.659870147705078, |
|
"rewards/margins": 13.506695747375488, |
|
"rewards/real": 1.846824288368225, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9398836457659983e-07, |
|
"logits/generated": -0.11525435745716095, |
|
"logits/real": -0.5510282516479492, |
|
"logps/generated": -312.3033142089844, |
|
"logps/real": -161.03700256347656, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.423480987548828, |
|
"rewards/margins": 13.183720588684082, |
|
"rewards/real": 1.7602403163909912, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/generated": -0.06748126447200775, |
|
"eval_logits/real": -0.5572667121887207, |
|
"eval_logps/generated": -297.7837829589844, |
|
"eval_logps/real": -161.91171264648438, |
|
"eval_loss": 0.0023772784043103456, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -12.561324119567871, |
|
"eval_rewards/margins": 14.320941925048828, |
|
"eval_rewards/real": 1.759616732597351, |
|
"eval_runtime": 35.1534, |
|
"eval_samples_per_second": 14.223, |
|
"eval_steps_per_second": 0.455, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.907563025210084e-07, |
|
"logits/generated": -0.13562723994255066, |
|
"logits/real": -0.5651453733444214, |
|
"logps/generated": -306.77203369140625, |
|
"logps/real": -164.38670349121094, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.604525566101074, |
|
"rewards/margins": 14.576957702636719, |
|
"rewards/real": 1.9724317789077759, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.875242404654169e-07, |
|
"logits/generated": -0.16329458355903625, |
|
"logits/real": -0.6475385427474976, |
|
"logps/generated": -322.47998046875, |
|
"logps/real": -155.1940460205078, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.303722381591797, |
|
"rewards/margins": 14.135655403137207, |
|
"rewards/real": 1.831933617591858, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.842921784098255e-07, |
|
"logits/generated": -0.16837731003761292, |
|
"logits/real": -0.6170174479484558, |
|
"logps/generated": -345.0, |
|
"logps/real": -150.05941772460938, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.038650512695312, |
|
"rewards/margins": 14.98759937286377, |
|
"rewards/real": 1.9489485025405884, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8106011635423394e-07, |
|
"logits/generated": -0.23766961693763733, |
|
"logits/real": -0.5920494794845581, |
|
"logps/generated": -323.0770263671875, |
|
"logps/real": -163.19320678710938, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.857145309448242, |
|
"rewards/margins": 14.674581527709961, |
|
"rewards/real": 1.8174375295639038, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.778280542986425e-07, |
|
"logits/generated": -0.1339137852191925, |
|
"logits/real": -0.5589958429336548, |
|
"logps/generated": -340.87005615234375, |
|
"logps/real": -173.7953338623047, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.979646682739258, |
|
"rewards/margins": 14.847864151000977, |
|
"rewards/real": 1.8682178258895874, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.745959922430511e-07, |
|
"logits/generated": -0.18612061440944672, |
|
"logits/real": -0.6156530976295471, |
|
"logps/generated": -347.98406982421875, |
|
"logps/real": -157.8029022216797, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.1200590133667, |
|
"rewards/margins": 16.077503204345703, |
|
"rewards/real": 1.9574439525604248, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.713639301874596e-07, |
|
"logits/generated": -0.15092961490154266, |
|
"logits/real": -0.5848164558410645, |
|
"logps/generated": -329.3385314941406, |
|
"logps/real": -167.6374053955078, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.033224105834961, |
|
"rewards/margins": 16.028520584106445, |
|
"rewards/real": 1.995296835899353, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6813186813186816e-07, |
|
"logits/generated": -0.17901554703712463, |
|
"logits/real": -0.3941217064857483, |
|
"logps/generated": -345.0260314941406, |
|
"logps/real": -172.32496643066406, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.950887680053711, |
|
"rewards/margins": 15.942358016967773, |
|
"rewards/real": 1.9914672374725342, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.648998060762766e-07, |
|
"logits/generated": -0.1686992645263672, |
|
"logits/real": -0.5567506551742554, |
|
"logps/generated": -373.4571838378906, |
|
"logps/real": -156.1488037109375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.744462966918945, |
|
"rewards/margins": 16.727497100830078, |
|
"rewards/real": 1.9830348491668701, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.616677440206852e-07, |
|
"logits/generated": -0.18675687909126282, |
|
"logits/real": -0.5821475982666016, |
|
"logps/generated": -355.96197509765625, |
|
"logps/real": -175.8487091064453, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.0609712600708, |
|
"rewards/margins": 17.050426483154297, |
|
"rewards/real": 1.9894546270370483, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.584356819650937e-07, |
|
"logits/generated": -0.15092836320400238, |
|
"logits/real": -0.5460507869720459, |
|
"logps/generated": -350.76031494140625, |
|
"logps/real": -157.88308715820312, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.409652709960938, |
|
"rewards/margins": 16.388113021850586, |
|
"rewards/real": 1.9784597158432007, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5520361990950227e-07, |
|
"logits/generated": -0.1378619521856308, |
|
"logits/real": -0.5395939946174622, |
|
"logps/generated": -343.2633361816406, |
|
"logps/real": -156.80564880371094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.973530769348145, |
|
"rewards/margins": 16.889755249023438, |
|
"rewards/real": 1.9162266254425049, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.519715578539108e-07, |
|
"logits/generated": -0.1499830186367035, |
|
"logits/real": -0.5538982152938843, |
|
"logps/generated": -350.88299560546875, |
|
"logps/real": -159.18971252441406, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.071060180664062, |
|
"rewards/margins": 17.079561233520508, |
|
"rewards/real": 2.008500814437866, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.487394957983193e-07, |
|
"logits/generated": -0.1028517633676529, |
|
"logits/real": -0.5367806553840637, |
|
"logps/generated": -347.6337890625, |
|
"logps/real": -157.7081298828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.268692016601562, |
|
"rewards/margins": 17.024709701538086, |
|
"rewards/real": 1.756016731262207, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.4550743374272786e-07, |
|
"logits/generated": -0.21606239676475525, |
|
"logits/real": -0.6731992959976196, |
|
"logps/generated": -363.62860107421875, |
|
"logps/real": -151.80657958984375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.409940719604492, |
|
"rewards/margins": 18.39179039001465, |
|
"rewards/real": 1.981850266456604, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.422753716871364e-07, |
|
"logits/generated": -0.2115277796983719, |
|
"logits/real": -0.6237621307373047, |
|
"logps/generated": -369.09539794921875, |
|
"logps/real": -167.1064910888672, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.647296905517578, |
|
"rewards/margins": 18.58269500732422, |
|
"rewards/real": 1.935397744178772, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3904330963154494e-07, |
|
"logits/generated": -0.2040260136127472, |
|
"logits/real": -0.5004506707191467, |
|
"logps/generated": -367.04595947265625, |
|
"logps/real": -156.01954650878906, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.034652709960938, |
|
"rewards/margins": 19.96006965637207, |
|
"rewards/real": 1.9254153966903687, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.358112475759534e-07, |
|
"logits/generated": -0.1658962070941925, |
|
"logits/real": -0.5005853772163391, |
|
"logps/generated": -346.9508056640625, |
|
"logps/real": -166.74932861328125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.732416152954102, |
|
"rewards/margins": 17.828378677368164, |
|
"rewards/real": 2.0959630012512207, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3257918552036197e-07, |
|
"logits/generated": -0.2046574056148529, |
|
"logits/real": -0.5572770833969116, |
|
"logps/generated": -342.04315185546875, |
|
"logps/real": -153.7974853515625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.750532150268555, |
|
"rewards/margins": 18.043107986450195, |
|
"rewards/real": 2.2925753593444824, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.293471234647705e-07, |
|
"logits/generated": -0.14821186661720276, |
|
"logits/real": -0.5680894255638123, |
|
"logps/generated": -374.68304443359375, |
|
"logps/real": -162.72024536132812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.691937446594238, |
|
"rewards/margins": 18.041702270507812, |
|
"rewards/real": 2.3497626781463623, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.2611506140917905e-07, |
|
"logits/generated": -0.13955196738243103, |
|
"logits/real": -0.5606727004051208, |
|
"logps/generated": -334.7650451660156, |
|
"logps/real": -155.86685180664062, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.3911075592041, |
|
"rewards/margins": 18.84897232055664, |
|
"rewards/real": 2.4578652381896973, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2288299935358757e-07, |
|
"logits/generated": -0.1431746780872345, |
|
"logits/real": -0.5892812013626099, |
|
"logps/generated": -349.84832763671875, |
|
"logps/real": -164.02850341796875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.084333419799805, |
|
"rewards/margins": 19.181116104125977, |
|
"rewards/real": 2.0967822074890137, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.196509372979961e-07, |
|
"logits/generated": -0.15400557219982147, |
|
"logits/real": -0.4679229259490967, |
|
"logps/generated": -357.32196044921875, |
|
"logps/real": -169.57557678222656, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.080036163330078, |
|
"rewards/margins": 19.405277252197266, |
|
"rewards/real": 2.3252413272857666, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1641887524240465e-07, |
|
"logits/generated": -0.28581511974334717, |
|
"logits/real": -0.434844970703125, |
|
"logps/generated": -387.32293701171875, |
|
"logps/real": -163.56687927246094, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.026063919067383, |
|
"rewards/margins": 19.424678802490234, |
|
"rewards/real": 2.3986144065856934, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1318681318681316e-07, |
|
"logits/generated": -0.2568507790565491, |
|
"logits/real": -0.5576425194740295, |
|
"logps/generated": -400.52569580078125, |
|
"logps/real": -154.03855895996094, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.303359985351562, |
|
"rewards/margins": 20.68377113342285, |
|
"rewards/real": 2.3804097175598145, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.0995475113122173e-07, |
|
"logits/generated": -0.14214341342449188, |
|
"logits/real": -0.5439749360084534, |
|
"logps/generated": -367.5722351074219, |
|
"logps/real": -152.11997985839844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.070608139038086, |
|
"rewards/margins": 18.29524803161621, |
|
"rewards/real": 2.2246389389038086, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0672268907563024e-07, |
|
"logits/generated": -0.18583758175373077, |
|
"logits/real": -0.5424495935440063, |
|
"logps/generated": -374.7005310058594, |
|
"logps/real": -158.53256225585938, |
|
"loss": 0.1306, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -15.18799877166748, |
|
"rewards/margins": 16.96066665649414, |
|
"rewards/real": 1.7726675271987915, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0349062702003876e-07, |
|
"logits/generated": -0.07082077860832214, |
|
"logits/real": -0.49967464804649353, |
|
"logps/generated": -346.9859313964844, |
|
"logps/real": -167.21066284179688, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.303332328796387, |
|
"rewards/margins": 17.711301803588867, |
|
"rewards/real": 2.4079716205596924, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0025856496444727e-07, |
|
"logits/generated": -0.08859279006719589, |
|
"logits/real": -0.49546709656715393, |
|
"logps/generated": -345.524658203125, |
|
"logps/real": -165.77511596679688, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.066813468933105, |
|
"rewards/margins": 16.517831802368164, |
|
"rewards/real": 2.451017379760742, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9702650290885584e-07, |
|
"logits/generated": -0.10903888940811157, |
|
"logits/real": -0.6787563562393188, |
|
"logps/generated": -358.34600830078125, |
|
"logps/real": -145.3551025390625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.254704475402832, |
|
"rewards/margins": 17.734054565429688, |
|
"rewards/real": 2.4793505668640137, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9379444085326436e-07, |
|
"logits/generated": -0.12196536362171173, |
|
"logits/real": -0.6077748537063599, |
|
"logps/generated": -355.57763671875, |
|
"logps/real": -139.5839080810547, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.810674667358398, |
|
"rewards/margins": 18.17705535888672, |
|
"rewards/real": 2.3663809299468994, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.905623787976729e-07, |
|
"logits/generated": -0.08650495111942291, |
|
"logits/real": -0.5872990489006042, |
|
"logps/generated": -352.50579833984375, |
|
"logps/real": -144.9486846923828, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.788429260253906, |
|
"rewards/margins": 17.235305786132812, |
|
"rewards/real": 2.44687819480896, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8733031674208144e-07, |
|
"logits/generated": -0.10015587508678436, |
|
"logits/real": -0.5014637112617493, |
|
"logps/generated": -346.60638427734375, |
|
"logps/real": -160.1988983154297, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.358625411987305, |
|
"rewards/margins": 17.704397201538086, |
|
"rewards/real": 2.345770835876465, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8409825468648995e-07, |
|
"logits/generated": -0.11494553089141846, |
|
"logits/real": -0.5590375661849976, |
|
"logps/generated": -380.1128234863281, |
|
"logps/real": -141.56283569335938, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.548856735229492, |
|
"rewards/margins": 17.986303329467773, |
|
"rewards/real": 2.4374477863311768, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.808661926308985e-07, |
|
"logits/generated": -0.11113546788692474, |
|
"logits/real": -0.598678469657898, |
|
"logps/generated": -345.5535583496094, |
|
"logps/real": -154.47348022460938, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.850610733032227, |
|
"rewards/margins": 18.411144256591797, |
|
"rewards/real": 2.560535430908203, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7763413057530703e-07, |
|
"logits/generated": -0.11749716103076935, |
|
"logits/real": -0.4995584487915039, |
|
"logps/generated": -347.54632568359375, |
|
"logps/real": -172.43051147460938, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.894627571105957, |
|
"rewards/margins": 18.432037353515625, |
|
"rewards/real": 2.5374093055725098, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.744020685197156e-07, |
|
"logits/generated": -0.13078978657722473, |
|
"logits/real": -0.5895199775695801, |
|
"logps/generated": -348.36199951171875, |
|
"logps/real": -164.4255828857422, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.548139572143555, |
|
"rewards/margins": 18.038969039916992, |
|
"rewards/real": 2.490828037261963, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7117000646412406e-07, |
|
"logits/generated": -0.16448038816452026, |
|
"logits/real": -0.47440147399902344, |
|
"logps/generated": -343.2660827636719, |
|
"logps/real": -161.40206909179688, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.443878173828125, |
|
"rewards/margins": 17.610179901123047, |
|
"rewards/real": 2.1663026809692383, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6793794440853263e-07, |
|
"logits/generated": -0.09738098084926605, |
|
"logits/real": -0.5735375881195068, |
|
"logps/generated": -348.8927917480469, |
|
"logps/real": -152.59396362304688, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.16368579864502, |
|
"rewards/margins": 17.542085647583008, |
|
"rewards/real": 2.37839937210083, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6470588235294114e-07, |
|
"logits/generated": -0.06919053941965103, |
|
"logits/real": -0.5278010368347168, |
|
"logps/generated": -350.89886474609375, |
|
"logps/real": -160.8590850830078, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.82422161102295, |
|
"rewards/margins": 18.034015655517578, |
|
"rewards/real": 2.209794282913208, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.614738202973497e-07, |
|
"logits/generated": -0.19934818148612976, |
|
"logits/real": -0.7232086658477783, |
|
"logps/generated": -361.8060607910156, |
|
"logps/real": -130.89114379882812, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.74767017364502, |
|
"rewards/margins": 17.264440536499023, |
|
"rewards/real": 2.516772508621216, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.582417582417583e-07, |
|
"logits/generated": -0.1751207411289215, |
|
"logits/real": -0.4808773398399353, |
|
"logps/generated": -357.970947265625, |
|
"logps/real": -154.8083038330078, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.84760570526123, |
|
"rewards/margins": 17.3386287689209, |
|
"rewards/real": 2.4910240173339844, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5500969618616674e-07, |
|
"logits/generated": -0.14850488305091858, |
|
"logits/real": -0.5464012026786804, |
|
"logps/generated": -342.80584716796875, |
|
"logps/real": -156.18069458007812, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.144935607910156, |
|
"rewards/margins": 17.409893035888672, |
|
"rewards/real": 2.2649571895599365, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.517776341305753e-07, |
|
"logits/generated": -0.20405209064483643, |
|
"logits/real": -0.608977198600769, |
|
"logps/generated": -363.1436767578125, |
|
"logps/real": -137.00033569335938, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.706165313720703, |
|
"rewards/margins": 19.141508102416992, |
|
"rewards/real": 2.4353442192077637, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.485455720749838e-07, |
|
"logits/generated": -0.169557124376297, |
|
"logits/real": -0.5004895329475403, |
|
"logps/generated": -391.0760803222656, |
|
"logps/real": -164.77609252929688, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.571683883666992, |
|
"rewards/margins": 18.823991775512695, |
|
"rewards/real": 2.2523064613342285, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4531351001939233e-07, |
|
"logits/generated": -0.15461762249469757, |
|
"logits/real": -0.4168466627597809, |
|
"logps/generated": -348.8349609375, |
|
"logps/real": -175.52706909179688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.24575424194336, |
|
"rewards/margins": 18.61968231201172, |
|
"rewards/real": 2.373926877975464, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.420814479638009e-07, |
|
"logits/generated": -0.18714717030525208, |
|
"logits/real": -0.6084888577461243, |
|
"logps/generated": -379.20538330078125, |
|
"logps/real": -157.06103515625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.869117736816406, |
|
"rewards/margins": 19.20322608947754, |
|
"rewards/real": 2.334106922149658, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.388493859082094e-07, |
|
"logits/generated": -0.1750311255455017, |
|
"logits/real": -0.6716971397399902, |
|
"logps/generated": -368.5721740722656, |
|
"logps/real": -153.18988037109375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.923906326293945, |
|
"rewards/margins": 19.389135360717773, |
|
"rewards/real": 2.465231418609619, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3561732385261796e-07, |
|
"logits/generated": -0.11852458864450455, |
|
"logits/real": -0.5619971752166748, |
|
"logps/generated": -359.63848876953125, |
|
"logps/real": -158.0484161376953, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.107145309448242, |
|
"rewards/margins": 18.463592529296875, |
|
"rewards/real": 2.35644793510437, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.323852617970265e-07, |
|
"logits/generated": -0.14745311439037323, |
|
"logits/real": -0.6188663244247437, |
|
"logps/generated": -362.33990478515625, |
|
"logps/real": -152.244140625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.988080978393555, |
|
"rewards/margins": 18.147762298583984, |
|
"rewards/real": 2.1596803665161133, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/generated": -0.08865788578987122, |
|
"eval_logits/real": -0.5338985919952393, |
|
"eval_logps/generated": -338.52691650390625, |
|
"eval_logps/real": -156.03204345703125, |
|
"eval_loss": 0.0012918972643092275, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -16.635643005371094, |
|
"eval_rewards/margins": 18.983226776123047, |
|
"eval_rewards/real": 2.347583770751953, |
|
"eval_runtime": 34.1439, |
|
"eval_samples_per_second": 14.644, |
|
"eval_steps_per_second": 0.469, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.29153199741435e-07, |
|
"logits/generated": -0.1425812840461731, |
|
"logits/real": -0.49166035652160645, |
|
"logps/generated": -338.87689208984375, |
|
"logps/real": -154.56332397460938, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.0732364654541, |
|
"rewards/margins": 19.233314514160156, |
|
"rewards/real": 2.1600756645202637, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2592113768584355e-07, |
|
"logits/generated": -0.1277109533548355, |
|
"logits/real": -0.46646562218666077, |
|
"logps/generated": -343.853515625, |
|
"logps/real": -174.08665466308594, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.29716682434082, |
|
"rewards/margins": 17.77501678466797, |
|
"rewards/real": 2.477850914001465, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.226890756302521e-07, |
|
"logits/generated": -0.16665223240852356, |
|
"logits/real": -0.6230305433273315, |
|
"logps/generated": -354.49395751953125, |
|
"logps/real": -147.0028533935547, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.22447967529297, |
|
"rewards/margins": 18.776592254638672, |
|
"rewards/real": 2.5521116256713867, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1945701357466063e-07, |
|
"logits/generated": -0.1843683123588562, |
|
"logits/real": -0.6376962661743164, |
|
"logps/generated": -386.64337158203125, |
|
"logps/real": -135.3435821533203, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.382848739624023, |
|
"rewards/margins": 18.70191192626953, |
|
"rewards/real": 2.3190653324127197, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1622495151906917e-07, |
|
"logits/generated": -0.15593907237052917, |
|
"logits/real": -0.7408004403114319, |
|
"logps/generated": -361.1799011230469, |
|
"logps/real": -134.4879150390625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.901073455810547, |
|
"rewards/margins": 19.18222427368164, |
|
"rewards/real": 2.2811498641967773, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.129928894634777e-07, |
|
"logits/generated": -0.13433247804641724, |
|
"logits/real": -0.6540313959121704, |
|
"logps/generated": -386.4795227050781, |
|
"logps/real": -158.45773315429688, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.906547546386719, |
|
"rewards/margins": 18.234060287475586, |
|
"rewards/real": 2.3275156021118164, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.0976082740788623e-07, |
|
"logits/generated": -0.09167423099279404, |
|
"logits/real": -0.6178083419799805, |
|
"logps/generated": -361.160400390625, |
|
"logps/real": -149.57241821289062, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.030376434326172, |
|
"rewards/margins": 19.508739471435547, |
|
"rewards/real": 2.478362560272217, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0652876535229474e-07, |
|
"logits/generated": -0.13464868068695068, |
|
"logits/real": -0.5777574777603149, |
|
"logps/generated": -356.6364440917969, |
|
"logps/real": -152.95327758789062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.351245880126953, |
|
"rewards/margins": 19.758331298828125, |
|
"rewards/real": 2.407087564468384, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0329670329670329e-07, |
|
"logits/generated": -0.1649603694677353, |
|
"logits/real": -0.6630862951278687, |
|
"logps/generated": -354.8274230957031, |
|
"logps/real": -149.8674774169922, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.621112823486328, |
|
"rewards/margins": 18.923389434814453, |
|
"rewards/real": 2.3022754192352295, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0006464124111183e-07, |
|
"logits/generated": -0.13935817778110504, |
|
"logits/real": -0.6070569753646851, |
|
"logps/generated": -362.0398864746094, |
|
"logps/real": -154.50875854492188, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.047237396240234, |
|
"rewards/margins": 18.52444839477539, |
|
"rewards/real": 2.4772121906280518, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9683257918552034e-07, |
|
"logits/generated": -0.1529316008090973, |
|
"logits/real": -0.621186375617981, |
|
"logps/generated": -344.2818908691406, |
|
"logps/real": -154.1363983154297, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.116077423095703, |
|
"rewards/margins": 19.72161293029785, |
|
"rewards/real": 2.6055357456207275, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9360051712992888e-07, |
|
"logits/generated": -0.10819858312606812, |
|
"logits/real": -0.5849012732505798, |
|
"logps/generated": -354.9508972167969, |
|
"logps/real": -152.56747436523438, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.38785171508789, |
|
"rewards/margins": 19.879560470581055, |
|
"rewards/real": 2.491711139678955, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.903684550743374e-07, |
|
"logits/generated": -0.08584876358509064, |
|
"logits/real": -0.5865710973739624, |
|
"logps/generated": -379.0561828613281, |
|
"logps/real": -155.8355255126953, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.967159271240234, |
|
"rewards/margins": 19.309185028076172, |
|
"rewards/real": 2.342027187347412, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8713639301874596e-07, |
|
"logits/generated": -0.15022264420986176, |
|
"logits/real": -0.4709080755710602, |
|
"logps/generated": -392.18841552734375, |
|
"logps/real": -168.2960968017578, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.170442581176758, |
|
"rewards/margins": 19.61642837524414, |
|
"rewards/real": 2.445988655090332, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.839043309631545e-07, |
|
"logits/generated": -0.17172668874263763, |
|
"logits/real": -0.49033862352371216, |
|
"logps/generated": -402.3332824707031, |
|
"logps/real": -164.15505981445312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.572994232177734, |
|
"rewards/margins": 19.646896362304688, |
|
"rewards/real": 2.0739011764526367, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8067226890756302e-07, |
|
"logits/generated": -0.16388961672782898, |
|
"logits/real": -0.5507704019546509, |
|
"logps/generated": -350.19708251953125, |
|
"logps/real": -167.8086395263672, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.57755470275879, |
|
"rewards/margins": 19.858524322509766, |
|
"rewards/real": 2.2809712886810303, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7744020685197156e-07, |
|
"logits/generated": -0.0807359591126442, |
|
"logits/real": -0.482065349817276, |
|
"logps/generated": -361.5498046875, |
|
"logps/real": -160.36280822753906, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.408950805664062, |
|
"rewards/margins": 18.69610595703125, |
|
"rewards/real": 2.287153482437134, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7420814479638007e-07, |
|
"logits/generated": -0.16364505887031555, |
|
"logits/real": -0.5634728074073792, |
|
"logps/generated": -380.2509460449219, |
|
"logps/real": -147.43365478515625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.384185791015625, |
|
"rewards/margins": 19.747817993164062, |
|
"rewards/real": 2.363633394241333, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7097608274078861e-07, |
|
"logits/generated": -0.15771499276161194, |
|
"logits/real": -0.5497723817825317, |
|
"logps/generated": -362.02044677734375, |
|
"logps/real": -157.92074584960938, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.45067024230957, |
|
"rewards/margins": 19.83937644958496, |
|
"rewards/real": 2.388707399368286, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6774402068519713e-07, |
|
"logits/generated": -0.2227613627910614, |
|
"logits/real": -0.6034024953842163, |
|
"logps/generated": -380.91912841796875, |
|
"logps/real": -150.18222045898438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.618989944458008, |
|
"rewards/margins": 21.065921783447266, |
|
"rewards/real": 2.446932315826416, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6451195862960567e-07, |
|
"logits/generated": -0.19881358742713928, |
|
"logits/real": -0.5300999879837036, |
|
"logps/generated": -387.4521179199219, |
|
"logps/real": -159.89407348632812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.226730346679688, |
|
"rewards/margins": 19.690814971923828, |
|
"rewards/real": 2.464085817337036, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6127989657401424e-07, |
|
"logits/generated": -0.09233691543340683, |
|
"logits/real": -0.5764601230621338, |
|
"logps/generated": -375.98626708984375, |
|
"logps/real": -151.65374755859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.89620590209961, |
|
"rewards/margins": 19.12668800354004, |
|
"rewards/real": 2.2304842472076416, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5804783451842275e-07, |
|
"logits/generated": -0.12695951759815216, |
|
"logits/real": -0.6606336236000061, |
|
"logps/generated": -372.2718200683594, |
|
"logps/real": -160.5155792236328, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.518178939819336, |
|
"rewards/margins": 19.972362518310547, |
|
"rewards/real": 2.45418643951416, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.548157724628313e-07, |
|
"logits/generated": -0.10833124816417694, |
|
"logits/real": -0.4885649085044861, |
|
"logps/generated": -365.9476318359375, |
|
"logps/real": -183.0030059814453, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.58841323852539, |
|
"rewards/margins": 20.075151443481445, |
|
"rewards/real": 2.4867382049560547, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.515837104072398e-07, |
|
"logits/generated": -0.22033734619617462, |
|
"logits/real": -0.5959911942481995, |
|
"logps/generated": -391.39019775390625, |
|
"logps/real": -145.48614501953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.7489013671875, |
|
"rewards/margins": 21.08713150024414, |
|
"rewards/real": 2.338231325149536, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4835164835164835e-07, |
|
"logits/generated": -0.16730687022209167, |
|
"logits/real": -0.5786349177360535, |
|
"logps/generated": -397.57000732421875, |
|
"logps/real": -139.5902557373047, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.101282119750977, |
|
"rewards/margins": 19.492258071899414, |
|
"rewards/real": 2.3909752368927, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.451195862960569e-07, |
|
"logits/generated": -0.15924270451068878, |
|
"logits/real": -0.614446759223938, |
|
"logps/generated": -384.3814697265625, |
|
"logps/real": -148.5280303955078, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.19200325012207, |
|
"rewards/margins": 18.723474502563477, |
|
"rewards/real": 2.5314714908599854, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.418875242404654e-07, |
|
"logits/generated": -0.14972060918807983, |
|
"logits/real": -0.4534078538417816, |
|
"logps/generated": -378.16552734375, |
|
"logps/real": -158.2146453857422, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.538646697998047, |
|
"rewards/margins": 19.77383041381836, |
|
"rewards/real": 2.235182762145996, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3865546218487394e-07, |
|
"logits/generated": -0.20875568687915802, |
|
"logits/real": -0.5104445815086365, |
|
"logps/generated": -381.76416015625, |
|
"logps/real": -165.17587280273438, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.26884651184082, |
|
"rewards/margins": 19.617124557495117, |
|
"rewards/real": 2.348278760910034, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3542340012928246e-07, |
|
"logits/generated": -0.17283296585083008, |
|
"logits/real": -0.633509635925293, |
|
"logps/generated": -394.7670593261719, |
|
"logps/real": -145.33033752441406, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.757518768310547, |
|
"rewards/margins": 20.08961296081543, |
|
"rewards/real": 2.332092046737671, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3219133807369102e-07, |
|
"logits/generated": -0.1465195119380951, |
|
"logits/real": -0.5287576913833618, |
|
"logps/generated": -356.48468017578125, |
|
"logps/real": -159.90274047851562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.607791900634766, |
|
"rewards/margins": 19.824115753173828, |
|
"rewards/real": 2.2163243293762207, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2895927601809956e-07, |
|
"logits/generated": -0.10496039688587189, |
|
"logits/real": -0.5634900331497192, |
|
"logps/generated": -381.1453552246094, |
|
"logps/real": -153.85971069335938, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.60517120361328, |
|
"rewards/margins": 20.00139045715332, |
|
"rewards/real": 2.3962197303771973, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2572721396250808e-07, |
|
"logits/generated": -0.17766788601875305, |
|
"logits/real": -0.560404896736145, |
|
"logps/generated": -371.6963195800781, |
|
"logps/real": -153.42520141601562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.938549041748047, |
|
"rewards/margins": 20.271284103393555, |
|
"rewards/real": 2.3327372074127197, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.224951519069166e-07, |
|
"logits/generated": -0.1533491164445877, |
|
"logits/real": -0.6101264953613281, |
|
"logps/generated": -387.2968444824219, |
|
"logps/real": -142.7995147705078, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.7419376373291, |
|
"rewards/margins": 21.141387939453125, |
|
"rewards/real": 2.3994479179382324, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1926308985132513e-07, |
|
"logits/generated": -0.1321093738079071, |
|
"logits/real": -0.5637373924255371, |
|
"logps/generated": -375.9904479980469, |
|
"logps/real": -150.62429809570312, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.543394088745117, |
|
"rewards/margins": 21.009830474853516, |
|
"rewards/real": 2.4664340019226074, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1603102779573367e-07, |
|
"logits/generated": -0.06719120591878891, |
|
"logits/real": -0.5216054916381836, |
|
"logps/generated": -359.5106201171875, |
|
"logps/real": -166.0596160888672, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.27791976928711, |
|
"rewards/margins": 19.75971794128418, |
|
"rewards/real": 2.481797695159912, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.127989657401422e-07, |
|
"logits/generated": -0.1628725826740265, |
|
"logits/real": -0.5706795454025269, |
|
"logps/generated": -384.9640197753906, |
|
"logps/real": -158.7132110595703, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.661848068237305, |
|
"rewards/margins": 20.897113800048828, |
|
"rewards/real": 2.235262870788574, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0956690368455074e-07, |
|
"logits/generated": -0.17886587977409363, |
|
"logits/real": -0.5294612646102905, |
|
"logps/generated": -395.3720703125, |
|
"logps/real": -170.44444274902344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.413227081298828, |
|
"rewards/margins": 20.763330459594727, |
|
"rewards/real": 2.350105047225952, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0633484162895927e-07, |
|
"logits/generated": -0.09874434769153595, |
|
"logits/real": -0.5987704992294312, |
|
"logps/generated": -380.1065368652344, |
|
"logps/real": -151.54112243652344, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.474868774414062, |
|
"rewards/margins": 19.799442291259766, |
|
"rewards/real": 2.3245718479156494, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.031027795733678e-07, |
|
"logits/generated": -0.10035858303308487, |
|
"logits/real": -0.5784216523170471, |
|
"logps/generated": -343.70623779296875, |
|
"logps/real": -151.8103790283203, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.4163875579834, |
|
"rewards/margins": 19.73638916015625, |
|
"rewards/real": 2.3200018405914307, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.987071751777634e-08, |
|
"logits/generated": -0.2470317780971527, |
|
"logits/real": -0.5798231959342957, |
|
"logps/generated": -383.2638854980469, |
|
"logps/real": -143.225341796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.954282760620117, |
|
"rewards/margins": 21.157846450805664, |
|
"rewards/real": 2.2035629749298096, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.663865546218488e-08, |
|
"logits/generated": -0.16073891520500183, |
|
"logits/real": -0.5076737999916077, |
|
"logps/generated": -382.41485595703125, |
|
"logps/real": -161.91615295410156, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.91403579711914, |
|
"rewards/margins": 21.18549156188965, |
|
"rewards/real": 2.271456003189087, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.340659340659341e-08, |
|
"logits/generated": -0.1498544067144394, |
|
"logits/real": -0.6157928705215454, |
|
"logps/generated": -393.04632568359375, |
|
"logps/real": -144.5144500732422, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.77938461303711, |
|
"rewards/margins": 21.006399154663086, |
|
"rewards/real": 2.227015256881714, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.017453135100193e-08, |
|
"logits/generated": -0.14160631597042084, |
|
"logits/real": -0.45154842734336853, |
|
"logps/generated": -385.874755859375, |
|
"logps/real": -174.3894500732422, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.51175308227539, |
|
"rewards/margins": 20.70651626586914, |
|
"rewards/real": 2.194763660430908, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.694246929541046e-08, |
|
"logits/generated": -0.18230785429477692, |
|
"logits/real": -0.5540772676467896, |
|
"logps/generated": -386.5364990234375, |
|
"logps/real": -166.11813354492188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.474395751953125, |
|
"rewards/margins": 21.723533630371094, |
|
"rewards/real": 2.2491371631622314, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.371040723981899e-08, |
|
"logits/generated": -0.12085733562707901, |
|
"logits/real": -0.5206674337387085, |
|
"logps/generated": -386.822265625, |
|
"logps/real": -165.15667724609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.291152954101562, |
|
"rewards/margins": 20.772869110107422, |
|
"rewards/real": 2.4817166328430176, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.047834518422754e-08, |
|
"logits/generated": -0.10163680464029312, |
|
"logits/real": -0.5839998126029968, |
|
"logps/generated": -373.8666076660156, |
|
"logps/real": -149.49905395507812, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.110733032226562, |
|
"rewards/margins": 21.506431579589844, |
|
"rewards/real": 2.395698070526123, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.724628312863607e-08, |
|
"logits/generated": -0.16938480734825134, |
|
"logits/real": -0.5904119610786438, |
|
"logps/generated": -378.79241943359375, |
|
"logps/real": -160.4859619140625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.471508026123047, |
|
"rewards/margins": 20.79552459716797, |
|
"rewards/real": 2.324016809463501, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.40142210730446e-08, |
|
"logits/generated": -0.16900010406970978, |
|
"logits/real": -0.5733057260513306, |
|
"logps/generated": -387.27557373046875, |
|
"logps/real": -161.57508850097656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.826797485351562, |
|
"rewards/margins": 21.24004364013672, |
|
"rewards/real": 2.413245677947998, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.078215901745313e-08, |
|
"logits/generated": -0.09169816225767136, |
|
"logits/real": -0.5817584991455078, |
|
"logps/generated": -384.75518798828125, |
|
"logps/real": -159.7227783203125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.648197174072266, |
|
"rewards/margins": 20.902324676513672, |
|
"rewards/real": 2.2541282176971436, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_logits/generated": -0.11049004644155502, |
|
"eval_logits/real": -0.5281875729560852, |
|
"eval_logps/generated": -369.5713806152344, |
|
"eval_logps/real": -156.97747802734375, |
|
"eval_loss": 0.0005908250459469855, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -19.740087509155273, |
|
"eval_rewards/margins": 21.993125915527344, |
|
"eval_rewards/real": 2.253039836883545, |
|
"eval_runtime": 34.0236, |
|
"eval_samples_per_second": 14.696, |
|
"eval_steps_per_second": 0.47, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.755009696186167e-08, |
|
"logits/generated": -0.22507600486278534, |
|
"logits/real": -0.6320706605911255, |
|
"logps/generated": -392.18670654296875, |
|
"logps/real": -158.72862243652344, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.797693252563477, |
|
"rewards/margins": 21.236988067626953, |
|
"rewards/real": 2.439295768737793, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.43180349062702e-08, |
|
"logits/generated": -0.1503431349992752, |
|
"logits/real": -0.6102325320243835, |
|
"logps/generated": -374.20367431640625, |
|
"logps/real": -149.55465698242188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.283313751220703, |
|
"rewards/margins": 21.53586196899414, |
|
"rewards/real": 2.252546787261963, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.108597285067872e-08, |
|
"logits/generated": -0.120747409760952, |
|
"logits/real": -0.6200373768806458, |
|
"logps/generated": -382.924560546875, |
|
"logps/real": -158.5184783935547, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.336917877197266, |
|
"rewards/margins": 20.731319427490234, |
|
"rewards/real": 2.3944053649902344, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.785391079508726e-08, |
|
"logits/generated": -0.12152354419231415, |
|
"logits/real": -0.6277292966842651, |
|
"logps/generated": -383.5947570800781, |
|
"logps/real": -151.67758178710938, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.26775550842285, |
|
"rewards/margins": 21.722991943359375, |
|
"rewards/real": 2.4552369117736816, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.46218487394958e-08, |
|
"logits/generated": -0.19445089995861053, |
|
"logits/real": -0.5363883376121521, |
|
"logps/generated": -421.3140563964844, |
|
"logps/real": -149.7243194580078, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.073074340820312, |
|
"rewards/margins": 22.369853973388672, |
|
"rewards/real": 2.296781063079834, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.1389786683904325e-08, |
|
"logits/generated": -0.14755277335643768, |
|
"logits/real": -0.48324212431907654, |
|
"logps/generated": -400.7479248046875, |
|
"logps/real": -169.32374572753906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.729032516479492, |
|
"rewards/margins": 21.765872955322266, |
|
"rewards/real": 2.0368378162384033, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.8157724628312865e-08, |
|
"logits/generated": -0.16394725441932678, |
|
"logits/real": -0.5636594295501709, |
|
"logps/generated": -383.39971923828125, |
|
"logps/real": -144.65826416015625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.265291213989258, |
|
"rewards/margins": 21.481277465820312, |
|
"rewards/real": 2.215987205505371, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.492566257272139e-08, |
|
"logits/generated": -0.19718605279922485, |
|
"logits/real": -0.5453578233718872, |
|
"logps/generated": -378.180419921875, |
|
"logps/real": -158.4501190185547, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.664989471435547, |
|
"rewards/margins": 22.054704666137695, |
|
"rewards/real": 2.38971209526062, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.169360051712993e-08, |
|
"logits/generated": -0.14852051436901093, |
|
"logits/real": -0.5091778039932251, |
|
"logps/generated": -395.76031494140625, |
|
"logps/real": -159.28765869140625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.871753692626953, |
|
"rewards/margins": 21.131423950195312, |
|
"rewards/real": 2.259671211242676, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/generated": -0.17138710618019104, |
|
"logits/real": -0.5305647253990173, |
|
"logps/generated": -396.1712341308594, |
|
"logps/real": -158.55323791503906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.358753204345703, |
|
"rewards/margins": 21.559642791748047, |
|
"rewards/real": 2.2008888721466064, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5229476405946995e-08, |
|
"logits/generated": -0.09422336518764496, |
|
"logits/real": -0.526726484298706, |
|
"logps/generated": -392.82965087890625, |
|
"logps/real": -156.38845825195312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.553489685058594, |
|
"rewards/margins": 22.91410255432129, |
|
"rewards/real": 2.3606128692626953, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.199741435035552e-08, |
|
"logits/generated": -0.17992162704467773, |
|
"logits/real": -0.5685652494430542, |
|
"logps/generated": -388.3940124511719, |
|
"logps/real": -150.02366638183594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.91706085205078, |
|
"rewards/margins": 21.100475311279297, |
|
"rewards/real": 2.183415174484253, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.8765352294764057e-08, |
|
"logits/generated": -0.1457439512014389, |
|
"logits/real": -0.5713385343551636, |
|
"logps/generated": -404.9525451660156, |
|
"logps/real": -158.5803985595703, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.6348876953125, |
|
"rewards/margins": 22.953630447387695, |
|
"rewards/real": 2.3187408447265625, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.553329023917259e-08, |
|
"logits/generated": -0.1862095445394516, |
|
"logits/real": -0.5357497930526733, |
|
"logps/generated": -397.0615234375, |
|
"logps/real": -164.1053924560547, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.842931747436523, |
|
"rewards/margins": 22.151233673095703, |
|
"rewards/real": 2.308300495147705, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.2301228183581126e-08, |
|
"logits/generated": -0.12968115508556366, |
|
"logits/real": -0.5192117094993591, |
|
"logps/generated": -381.94891357421875, |
|
"logps/real": -165.88563537597656, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.871870040893555, |
|
"rewards/margins": 20.711994171142578, |
|
"rewards/real": 1.8401228189468384, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.906916612798966e-08, |
|
"logits/generated": -0.12426308542490005, |
|
"logits/real": -0.5116248726844788, |
|
"logps/generated": -395.79498291015625, |
|
"logps/real": -168.85110473632812, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.11214828491211, |
|
"rewards/margins": 21.347225189208984, |
|
"rewards/real": 2.2350780963897705, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5837104072398187e-08, |
|
"logits/generated": -0.20020797848701477, |
|
"logits/real": -0.5309361815452576, |
|
"logps/generated": -390.20611572265625, |
|
"logps/real": -152.30860900878906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.755496978759766, |
|
"rewards/margins": 23.04546356201172, |
|
"rewards/real": 2.2899651527404785, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2605042016806723e-08, |
|
"logits/generated": -0.2055603265762329, |
|
"logits/real": -0.46124568581581116, |
|
"logps/generated": -389.9735107421875, |
|
"logps/real": -167.1623077392578, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.435937881469727, |
|
"rewards/margins": 22.819133758544922, |
|
"rewards/real": 2.383200168609619, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.372979961215254e-09, |
|
"logits/generated": -0.18891258537769318, |
|
"logits/real": -0.621761679649353, |
|
"logps/generated": -401.1512756347656, |
|
"logps/real": -161.91128540039062, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -19.869232177734375, |
|
"rewards/margins": 22.079214096069336, |
|
"rewards/real": 2.209984064102173, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.140917905623787e-09, |
|
"logits/generated": -0.17986151576042175, |
|
"logits/real": -0.5024611353874207, |
|
"logps/generated": -412.3807678222656, |
|
"logps/real": -164.343505859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -18.891101837158203, |
|
"rewards/margins": 21.108158111572266, |
|
"rewards/real": 2.2170538902282715, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.9088558500323206e-09, |
|
"logits/generated": -0.17019499838352203, |
|
"logits/real": -0.56906658411026, |
|
"logps/generated": -404.4753112792969, |
|
"logps/real": -154.4612579345703, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -20.2159366607666, |
|
"rewards/margins": 22.49294662475586, |
|
"rewards/real": 2.277008295059204, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1719, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0229055878915893, |
|
"train_runtime": 9181.6734, |
|
"train_samples_per_second": 5.99, |
|
"train_steps_per_second": 0.187 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1719, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|