|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 4168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002399232245681382, |
|
"grad_norm": 4.689366619482009, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -1.0011488199234009, |
|
"logits/rejected": -0.9304202795028687, |
|
"logps/chosen": -135.81626892089844, |
|
"logps/rejected": -132.77923583984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023992322456813818, |
|
"grad_norm": 4.862910991687305, |
|
"learning_rate": 1.199040767386091e-08, |
|
"logits/chosen": -0.8224125504493713, |
|
"logits/rejected": -0.9372239708900452, |
|
"logps/chosen": -348.92108154296875, |
|
"logps/rejected": -284.24493408203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": 0.00047310083755292, |
|
"rewards/margins": 0.0009964401833713055, |
|
"rewards/rejected": -0.000523339316714555, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 4.872842861483512, |
|
"learning_rate": 2.398081534772182e-08, |
|
"logits/chosen": -0.7908538579940796, |
|
"logits/rejected": -0.8495171666145325, |
|
"logps/chosen": -227.61257934570312, |
|
"logps/rejected": -197.7724609375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0003789737238548696, |
|
"rewards/margins": 0.00038116061477921903, |
|
"rewards/rejected": -2.1869550437259022e-06, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007197696737044146, |
|
"grad_norm": 4.595566615642153, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -0.9029897451400757, |
|
"logits/rejected": -0.9816384315490723, |
|
"logps/chosen": -238.19448852539062, |
|
"logps/rejected": -245.59716796875, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00023587894975207746, |
|
"rewards/margins": 0.0008664665510877967, |
|
"rewards/rejected": -0.0006305875140242279, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 4.745750839391565, |
|
"learning_rate": 4.796163069544364e-08, |
|
"logits/chosen": -0.8999044299125671, |
|
"logits/rejected": -0.9681742787361145, |
|
"logps/chosen": -249.49063110351562, |
|
"logps/rejected": -239.22427368164062, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0009130829130299389, |
|
"rewards/margins": 0.00013075381866656244, |
|
"rewards/rejected": 0.0007823290070518851, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01199616122840691, |
|
"grad_norm": 5.057774890923517, |
|
"learning_rate": 5.995203836930455e-08, |
|
"logits/chosen": -0.8674479722976685, |
|
"logits/rejected": -0.8950203061103821, |
|
"logps/chosen": -258.783203125, |
|
"logps/rejected": -224.3574981689453, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00040593650192022324, |
|
"rewards/margins": -0.00040950890979729593, |
|
"rewards/rejected": 3.572390369299683e-06, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 5.092132113680646, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": -0.9632419347763062, |
|
"logits/rejected": -0.9102532267570496, |
|
"logps/chosen": -264.07708740234375, |
|
"logps/rejected": -246.053955078125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.000676992698572576, |
|
"rewards/margins": -0.0012114204000681639, |
|
"rewards/rejected": 0.0005344276432879269, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016794625719769675, |
|
"grad_norm": 4.648224429804769, |
|
"learning_rate": 8.393285371702638e-08, |
|
"logits/chosen": -0.7308753132820129, |
|
"logits/rejected": -0.7530420422554016, |
|
"logps/chosen": -268.9087829589844, |
|
"logps/rejected": -253.7935791015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0011928745079785585, |
|
"rewards/margins": -0.001063015777617693, |
|
"rewards/rejected": -0.0001298588322242722, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 4.986612393009332, |
|
"learning_rate": 9.592326139088728e-08, |
|
"logits/chosen": -0.8931838870048523, |
|
"logits/rejected": -0.7523937821388245, |
|
"logps/chosen": -179.30514526367188, |
|
"logps/rejected": -231.7339324951172, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 5.182736640563235e-05, |
|
"rewards/margins": 0.0005375603213906288, |
|
"rewards/rejected": -0.0004857330641243607, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021593090211132437, |
|
"grad_norm": 4.638674368908913, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": -0.9545013308525085, |
|
"logits/rejected": -1.0180871486663818, |
|
"logps/chosen": -318.1070251464844, |
|
"logps/rejected": -274.193603515625, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -6.821169517934322e-05, |
|
"rewards/margins": -2.9464903491316363e-05, |
|
"rewards/rejected": -3.8746675272705033e-05, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 4.888424185382462, |
|
"learning_rate": 1.199040767386091e-07, |
|
"logits/chosen": -0.7748934030532837, |
|
"logits/rejected": -0.6958049535751343, |
|
"logps/chosen": -248.3775634765625, |
|
"logps/rejected": -264.9973449707031, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.001517611788585782, |
|
"rewards/margins": -0.0018173797288909554, |
|
"rewards/rejected": 0.00029976802761666477, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026391554702495202, |
|
"grad_norm": 4.238219176172328, |
|
"learning_rate": 1.3189448441247004e-07, |
|
"logits/chosen": -0.9006500244140625, |
|
"logits/rejected": -0.9068830609321594, |
|
"logps/chosen": -214.5918731689453, |
|
"logps/rejected": -218.4353790283203, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0011649501975625753, |
|
"rewards/margins": 0.003199439961463213, |
|
"rewards/rejected": -0.002034489531069994, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 4.7854336032871485, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": -0.8462833166122437, |
|
"logits/rejected": -0.9375897645950317, |
|
"logps/chosen": -271.18206787109375, |
|
"logps/rejected": -258.28997802734375, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0003926992940250784, |
|
"rewards/margins": 0.0025149863213300705, |
|
"rewards/rejected": -0.0021222869399935007, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.031190019193857964, |
|
"grad_norm": 4.384651028319668, |
|
"learning_rate": 1.5587529976019183e-07, |
|
"logits/chosen": -0.9110666513442993, |
|
"logits/rejected": -0.8872417211532593, |
|
"logps/chosen": -196.56625366210938, |
|
"logps/rejected": -294.2314453125, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.002021104097366333, |
|
"rewards/margins": 0.0021953617688268423, |
|
"rewards/rejected": -0.004216466099023819, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 4.619105718711405, |
|
"learning_rate": 1.6786570743405277e-07, |
|
"logits/chosen": -0.7167148590087891, |
|
"logits/rejected": -0.7378164529800415, |
|
"logps/chosen": -274.03936767578125, |
|
"logps/rejected": -268.20806884765625, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0016457755118608475, |
|
"rewards/margins": 0.004239819012582302, |
|
"rewards/rejected": -0.00588559452444315, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03598848368522073, |
|
"grad_norm": 4.887855835358377, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": -0.8701818585395813, |
|
"logits/rejected": -0.8630602955818176, |
|
"logps/chosen": -211.195556640625, |
|
"logps/rejected": -207.6147003173828, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.002448505023494363, |
|
"rewards/margins": 0.007906012237071991, |
|
"rewards/rejected": -0.01035451702773571, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 4.9642153171388745, |
|
"learning_rate": 1.9184652278177456e-07, |
|
"logits/chosen": -0.7513445615768433, |
|
"logits/rejected": -0.800182044506073, |
|
"logps/chosen": -288.314208984375, |
|
"logps/rejected": -223.4950714111328, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0031119356863200665, |
|
"rewards/margins": 0.012469562701880932, |
|
"rewards/rejected": -0.015581500716507435, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040786948176583494, |
|
"grad_norm": 4.521207364726132, |
|
"learning_rate": 2.038369304556355e-07, |
|
"logits/chosen": -0.716513991355896, |
|
"logits/rejected": -0.7601069211959839, |
|
"logps/chosen": -318.1102294921875, |
|
"logps/rejected": -305.749267578125, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0036616616416722536, |
|
"rewards/margins": 0.00973360612988472, |
|
"rewards/rejected": -0.013395266607403755, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 5.023702329637655, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": -0.9385740160942078, |
|
"logits/rejected": -0.943560779094696, |
|
"logps/chosen": -224.21755981445312, |
|
"logps/rejected": -217.0615234375, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.002689545974135399, |
|
"rewards/margins": 0.022182326763868332, |
|
"rewards/rejected": -0.02487187273800373, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04558541266794626, |
|
"grad_norm": 5.705487432813769, |
|
"learning_rate": 2.278177458033573e-07, |
|
"logits/chosen": -0.8128509521484375, |
|
"logits/rejected": -0.8663360476493835, |
|
"logps/chosen": -298.9399719238281, |
|
"logps/rejected": -244.75387573242188, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.004362248349934816, |
|
"rewards/margins": 0.006819064728915691, |
|
"rewards/rejected": -0.011181313544511795, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 4.513448783590523, |
|
"learning_rate": 2.398081534772182e-07, |
|
"logits/chosen": -0.8495550155639648, |
|
"logits/rejected": -0.7926044464111328, |
|
"logps/chosen": -300.69775390625, |
|
"logps/rejected": -287.484375, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.004859840031713247, |
|
"rewards/margins": 0.03267320245504379, |
|
"rewards/rejected": -0.03753304481506348, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05038387715930902, |
|
"grad_norm": 4.771408453293688, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": -0.8157240152359009, |
|
"logits/rejected": -0.8260771632194519, |
|
"logps/chosen": -227.16909790039062, |
|
"logps/rejected": -251.068115234375, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.007311197463423014, |
|
"rewards/margins": 0.01891271583735943, |
|
"rewards/rejected": -0.026223912835121155, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 4.625214029931686, |
|
"learning_rate": 2.637889688249401e-07, |
|
"logits/chosen": -0.7995166182518005, |
|
"logits/rejected": -0.845539927482605, |
|
"logps/chosen": -296.12158203125, |
|
"logps/rejected": -306.5632019042969, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.013005027547478676, |
|
"rewards/margins": 0.015108886174857616, |
|
"rewards/rejected": -0.028113916516304016, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05518234165067178, |
|
"grad_norm": 5.010963725044197, |
|
"learning_rate": 2.7577937649880093e-07, |
|
"logits/chosen": -0.7807406783103943, |
|
"logits/rejected": -0.6935420632362366, |
|
"logps/chosen": -223.29647827148438, |
|
"logps/rejected": -259.60162353515625, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0160575769841671, |
|
"rewards/margins": 0.02244911529123783, |
|
"rewards/rejected": -0.03850669413805008, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 5.3808625345693715, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": -0.8535972833633423, |
|
"logits/rejected": -0.8847853541374207, |
|
"logps/chosen": -278.45166015625, |
|
"logps/rejected": -233.5392608642578, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.007952993735671043, |
|
"rewards/margins": 0.029812496155500412, |
|
"rewards/rejected": -0.037765491753816605, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05998080614203455, |
|
"grad_norm": 5.070039825169341, |
|
"learning_rate": 2.997601918465228e-07, |
|
"logits/chosen": -0.7644799947738647, |
|
"logits/rejected": -0.7972002029418945, |
|
"logps/chosen": -219.7716064453125, |
|
"logps/rejected": -215.15316772460938, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.017915142700076103, |
|
"rewards/margins": 0.026944369077682495, |
|
"rewards/rejected": -0.04485950991511345, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 4.754186632320252, |
|
"learning_rate": 3.1175059952038366e-07, |
|
"logits/chosen": -0.9055066108703613, |
|
"logits/rejected": -0.8144194483757019, |
|
"logps/chosen": -254.07125854492188, |
|
"logps/rejected": -262.3128356933594, |
|
"loss": 0.6714, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.025571072474122047, |
|
"rewards/margins": 0.05727959796786308, |
|
"rewards/rejected": -0.08285067230463028, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0647792706333973, |
|
"grad_norm": 4.987049183479494, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": -0.804566502571106, |
|
"logits/rejected": -0.9701822996139526, |
|
"logps/chosen": -268.6120910644531, |
|
"logps/rejected": -215.0527801513672, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.025534141808748245, |
|
"rewards/margins": 0.01729689911007881, |
|
"rewards/rejected": -0.04283104091882706, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 4.996897467037669, |
|
"learning_rate": 3.3573141486810554e-07, |
|
"logits/chosen": -0.9051429629325867, |
|
"logits/rejected": -0.8783286809921265, |
|
"logps/chosen": -281.0115966796875, |
|
"logps/rejected": -272.17315673828125, |
|
"loss": 0.6616, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02867250144481659, |
|
"rewards/margins": 0.056794364005327225, |
|
"rewards/rejected": -0.08546686172485352, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06957773512476008, |
|
"grad_norm": 4.661711977634519, |
|
"learning_rate": 3.477218225419664e-07, |
|
"logits/chosen": -0.7616173028945923, |
|
"logits/rejected": -0.6811286807060242, |
|
"logps/chosen": -276.2654113769531, |
|
"logps/rejected": -253.44387817382812, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.030496900901198387, |
|
"rewards/margins": 0.05069463700056076, |
|
"rewards/rejected": -0.0811915397644043, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 5.234931356849739, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": -0.8670452237129211, |
|
"logits/rejected": -0.8828343152999878, |
|
"logps/chosen": -253.47982788085938, |
|
"logps/rejected": -279.6092224121094, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.05864090472459793, |
|
"rewards/margins": 0.061179112643003464, |
|
"rewards/rejected": -0.1198200210928917, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07437619961612284, |
|
"grad_norm": 4.795640451841223, |
|
"learning_rate": 3.7170263788968827e-07, |
|
"logits/chosen": -0.7577391266822815, |
|
"logits/rejected": -0.8055315017700195, |
|
"logps/chosen": -261.38665771484375, |
|
"logps/rejected": -237.2653350830078, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.05181436613202095, |
|
"rewards/margins": 0.08690813928842545, |
|
"rewards/rejected": -0.1387225091457367, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 4.6531369927460835, |
|
"learning_rate": 3.836930455635491e-07, |
|
"logits/chosen": -0.8808850049972534, |
|
"logits/rejected": -0.9084761738777161, |
|
"logps/chosen": -259.6866760253906, |
|
"logps/rejected": -239.19082641601562, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06028270721435547, |
|
"rewards/margins": 0.0768328607082367, |
|
"rewards/rejected": -0.13711555302143097, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07917466410748561, |
|
"grad_norm": 4.7018147060937165, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": -0.7389161586761475, |
|
"logits/rejected": -0.6371687650680542, |
|
"logps/chosen": -243.20669555664062, |
|
"logps/rejected": -291.19415283203125, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10904283821582794, |
|
"rewards/margins": 0.09945854544639587, |
|
"rewards/rejected": -0.20850138366222382, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 4.83587717123604, |
|
"learning_rate": 4.07673860911271e-07, |
|
"logits/chosen": -0.7003086805343628, |
|
"logits/rejected": -0.7263328433036804, |
|
"logps/chosen": -230.62255859375, |
|
"logps/rejected": -264.69952392578125, |
|
"loss": 0.6503, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0853889137506485, |
|
"rewards/margins": 0.13564836978912354, |
|
"rewards/rejected": -0.22103726863861084, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08397312859884837, |
|
"grad_norm": 5.295107224792075, |
|
"learning_rate": 4.1966426858513185e-07, |
|
"logits/chosen": -0.8799580335617065, |
|
"logits/rejected": -0.8661099672317505, |
|
"logps/chosen": -296.1805725097656, |
|
"logps/rejected": -299.8250732421875, |
|
"loss": 0.6515, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.14768368005752563, |
|
"rewards/margins": 0.09014255553483963, |
|
"rewards/rejected": -0.23782619833946228, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 5.350703522133975, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": -0.7589418888092041, |
|
"logits/rejected": -0.8757201433181763, |
|
"logps/chosen": -253.6654510498047, |
|
"logps/rejected": -221.54281616210938, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14394545555114746, |
|
"rewards/margins": 0.0798153355717659, |
|
"rewards/rejected": -0.22376079857349396, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08877159309021113, |
|
"grad_norm": 5.728112222958844, |
|
"learning_rate": 4.436450839328537e-07, |
|
"logits/chosen": -0.7830215096473694, |
|
"logits/rejected": -0.7601363062858582, |
|
"logps/chosen": -245.7024688720703, |
|
"logps/rejected": -276.0493469238281, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17416706681251526, |
|
"rewards/margins": 0.17155224084854126, |
|
"rewards/rejected": -0.3457193076610565, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 4.9514500106693, |
|
"learning_rate": 4.556354916067146e-07, |
|
"logits/chosen": -0.8882322311401367, |
|
"logits/rejected": -0.8203359842300415, |
|
"logps/chosen": -232.29898071289062, |
|
"logps/rejected": -257.9759826660156, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14858679473400116, |
|
"rewards/margins": 0.20146794617176056, |
|
"rewards/rejected": -0.3500547409057617, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0935700575815739, |
|
"grad_norm": 5.8336231290197045, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": -0.7206496596336365, |
|
"logits/rejected": -0.7572492361068726, |
|
"logps/chosen": -277.67913818359375, |
|
"logps/rejected": -261.2207946777344, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2389966994524002, |
|
"rewards/margins": 0.1022852212190628, |
|
"rewards/rejected": -0.3412818908691406, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 5.45994847173407, |
|
"learning_rate": 4.796163069544364e-07, |
|
"logits/chosen": -0.749753475189209, |
|
"logits/rejected": -0.8122553825378418, |
|
"logps/chosen": -262.7334289550781, |
|
"logps/rejected": -268.3887634277344, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.2442113608121872, |
|
"rewards/margins": 0.20316508412361145, |
|
"rewards/rejected": -0.44737643003463745, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09836852207293666, |
|
"grad_norm": 5.702146266766378, |
|
"learning_rate": 4.916067146282974e-07, |
|
"logits/chosen": -0.8613287806510925, |
|
"logits/rejected": -0.8260523676872253, |
|
"logps/chosen": -268.2001037597656, |
|
"logps/rejected": -314.8636474609375, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2837222218513489, |
|
"rewards/margins": 0.21735891699790955, |
|
"rewards/rejected": -0.5010811686515808, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 5.941538001240442, |
|
"learning_rate": 4.999992108529978e-07, |
|
"logits/chosen": -0.7500723600387573, |
|
"logits/rejected": -0.7804423570632935, |
|
"logps/chosen": -338.72479248046875, |
|
"logps/rejected": -326.6620178222656, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3654174506664276, |
|
"rewards/margins": 0.21260254085063934, |
|
"rewards/rejected": -0.5780200362205505, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10316698656429943, |
|
"grad_norm": 6.009621410768706, |
|
"learning_rate": 4.999851817115532e-07, |
|
"logits/chosen": -0.9403728246688843, |
|
"logits/rejected": -0.8628015518188477, |
|
"logps/chosen": -264.67034912109375, |
|
"logps/rejected": -304.2147216796875, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.326229989528656, |
|
"rewards/margins": 0.4290277361869812, |
|
"rewards/rejected": -0.7552577257156372, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 5.690063285652722, |
|
"learning_rate": 4.999536171027889e-07, |
|
"logits/chosen": -0.8182615041732788, |
|
"logits/rejected": -0.8673620223999023, |
|
"logps/chosen": -304.6317443847656, |
|
"logps/rejected": -316.14019775390625, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.43472570180892944, |
|
"rewards/margins": 0.2013782262802124, |
|
"rewards/rejected": -0.6361039280891418, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10796545105566219, |
|
"grad_norm": 6.437625645010029, |
|
"learning_rate": 4.999045192408369e-07, |
|
"logits/chosen": -0.7834080457687378, |
|
"logits/rejected": -0.8002208471298218, |
|
"logps/chosen": -268.7138366699219, |
|
"logps/rejected": -268.2469787597656, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.46527180075645447, |
|
"rewards/margins": 0.16177940368652344, |
|
"rewards/rejected": -0.6270512342453003, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 6.346554276129153, |
|
"learning_rate": 4.998378915697171e-07, |
|
"logits/chosen": -0.8159794807434082, |
|
"logits/rejected": -0.8230727910995483, |
|
"logps/chosen": -291.6131591796875, |
|
"logps/rejected": -324.294189453125, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.402113676071167, |
|
"rewards/margins": 0.38154715299606323, |
|
"rewards/rejected": -0.7836608290672302, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11276391554702495, |
|
"grad_norm": 5.770852636176762, |
|
"learning_rate": 4.997537387630958e-07, |
|
"logits/chosen": -0.8236961364746094, |
|
"logits/rejected": -0.8538937568664551, |
|
"logps/chosen": -234.36978149414062, |
|
"logps/rejected": -280.78802490234375, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3996524512767792, |
|
"rewards/margins": 0.3925968408584595, |
|
"rewards/rejected": -0.7922492623329163, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 6.932352951399689, |
|
"learning_rate": 4.996520667239582e-07, |
|
"logits/chosen": -1.0455796718597412, |
|
"logits/rejected": -0.961613655090332, |
|
"logps/chosen": -268.81707763671875, |
|
"logps/rejected": -362.69891357421875, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5076214075088501, |
|
"rewards/margins": 0.4294431805610657, |
|
"rewards/rejected": -0.9370646476745605, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11756238003838772, |
|
"grad_norm": 7.088135104125632, |
|
"learning_rate": 4.995328825841939e-07, |
|
"logits/chosen": -0.8383450508117676, |
|
"logits/rejected": -0.7808793783187866, |
|
"logps/chosen": -248.08639526367188, |
|
"logps/rejected": -321.13519287109375, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.48594579100608826, |
|
"rewards/margins": 0.676557719707489, |
|
"rewards/rejected": -1.1625034809112549, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 7.550410687183492, |
|
"learning_rate": 4.993961947040967e-07, |
|
"logits/chosen": -0.8356281518936157, |
|
"logits/rejected": -0.8823004961013794, |
|
"logps/chosen": -340.29791259765625, |
|
"logps/rejected": -333.99981689453125, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7787412405014038, |
|
"rewards/margins": 0.24195539951324463, |
|
"rewards/rejected": -1.0206966400146484, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12236084452975048, |
|
"grad_norm": 8.043941220353142, |
|
"learning_rate": 4.992420126717784e-07, |
|
"logits/chosen": -0.8867794275283813, |
|
"logits/rejected": -0.822262167930603, |
|
"logps/chosen": -275.9137878417969, |
|
"logps/rejected": -352.0505065917969, |
|
"loss": 0.5757, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.46751028299331665, |
|
"rewards/margins": 0.7203239798545837, |
|
"rewards/rejected": -1.1878341436386108, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 7.943939511731397, |
|
"learning_rate": 4.990703473024958e-07, |
|
"logits/chosen": -0.790678858757019, |
|
"logits/rejected": -0.8552435636520386, |
|
"logps/chosen": -336.27557373046875, |
|
"logps/rejected": -372.5177001953125, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7617267370223999, |
|
"rewards/margins": 0.3834512233734131, |
|
"rewards/rejected": -1.145177960395813, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12715930902111325, |
|
"grad_norm": 7.835632823524396, |
|
"learning_rate": 4.98881210637893e-07, |
|
"logits/chosen": -0.9683562517166138, |
|
"logits/rejected": -0.9080503582954407, |
|
"logps/chosen": -253.3780975341797, |
|
"logps/rejected": -338.4334411621094, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5727646350860596, |
|
"rewards/margins": 0.47228097915649414, |
|
"rewards/rejected": -1.0450456142425537, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 6.170675144426816, |
|
"learning_rate": 4.986746159451553e-07, |
|
"logits/chosen": -0.8312419056892395, |
|
"logits/rejected": -0.8283737897872925, |
|
"logps/chosen": -292.1555480957031, |
|
"logps/rejected": -331.30499267578125, |
|
"loss": 0.5932, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5170794725418091, |
|
"rewards/margins": 0.4533451497554779, |
|
"rewards/rejected": -0.9704246520996094, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.131957773512476, |
|
"grad_norm": 5.877933895862304, |
|
"learning_rate": 4.984505777160795e-07, |
|
"logits/chosen": -0.7666348218917847, |
|
"logits/rejected": -0.7571445107460022, |
|
"logps/chosen": -358.6742858886719, |
|
"logps/rejected": -395.4786071777344, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7097586393356323, |
|
"rewards/margins": 0.32830795645713806, |
|
"rewards/rejected": -1.0380666255950928, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 6.79860706241038, |
|
"learning_rate": 4.982091116660574e-07, |
|
"logits/chosen": -0.9468134045600891, |
|
"logits/rejected": -1.0048458576202393, |
|
"logps/chosen": -250.75411987304688, |
|
"logps/rejected": -242.416259765625, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6376280784606934, |
|
"rewards/margins": 0.16986791789531708, |
|
"rewards/rejected": -0.8074960708618164, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13675623800383876, |
|
"grad_norm": 8.675977571454341, |
|
"learning_rate": 4.979502347329732e-07, |
|
"logits/chosen": -0.7279460430145264, |
|
"logits/rejected": -0.702147364616394, |
|
"logps/chosen": -355.9872131347656, |
|
"logps/rejected": -433.03436279296875, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7968459129333496, |
|
"rewards/margins": 0.5167227983474731, |
|
"rewards/rejected": -1.3135687112808228, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 10.049920254278586, |
|
"learning_rate": 4.976739650760151e-07, |
|
"logits/chosen": -0.9273891448974609, |
|
"logits/rejected": -0.9307486414909363, |
|
"logps/chosen": -308.32110595703125, |
|
"logps/rejected": -333.7012634277344, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6520169973373413, |
|
"rewards/margins": 0.3839523494243622, |
|
"rewards/rejected": -1.0359693765640259, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14155470249520152, |
|
"grad_norm": 8.308480938577231, |
|
"learning_rate": 4.97380322074402e-07, |
|
"logits/chosen": -0.7263349294662476, |
|
"logits/rejected": -0.7392244338989258, |
|
"logps/chosen": -283.90802001953125, |
|
"logps/rejected": -315.4358825683594, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7617335319519043, |
|
"rewards/margins": 0.3228367567062378, |
|
"rewards/rejected": -1.084570288658142, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 7.912284801575183, |
|
"learning_rate": 4.970693263260237e-07, |
|
"logits/chosen": -0.9102510213851929, |
|
"logits/rejected": -0.9313921928405762, |
|
"logps/chosen": -329.23565673828125, |
|
"logps/rejected": -352.90655517578125, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6352382898330688, |
|
"rewards/margins": 0.5430476665496826, |
|
"rewards/rejected": -1.178285837173462, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1463531669865643, |
|
"grad_norm": 7.991602124810873, |
|
"learning_rate": 4.967409996459966e-07, |
|
"logits/chosen": -0.8471067547798157, |
|
"logits/rejected": -0.8511239886283875, |
|
"logps/chosen": -330.8809814453125, |
|
"logps/rejected": -349.6996765136719, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6577950716018677, |
|
"rewards/margins": 0.4866298735141754, |
|
"rewards/rejected": -1.1444250345230103, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 7.858534413433715, |
|
"learning_rate": 4.963953650651326e-07, |
|
"logits/chosen": -0.7265298962593079, |
|
"logits/rejected": -0.7749903202056885, |
|
"logps/chosen": -423.787841796875, |
|
"logps/rejected": -374.99310302734375, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9267325401306152, |
|
"rewards/margins": 0.3885432481765747, |
|
"rewards/rejected": -1.31527578830719, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15115163147792707, |
|
"grad_norm": 7.914507028628532, |
|
"learning_rate": 4.960324468283248e-07, |
|
"logits/chosen": -1.0132184028625488, |
|
"logits/rejected": -1.020798921585083, |
|
"logps/chosen": -295.89031982421875, |
|
"logps/rejected": -348.19757080078125, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8688480257987976, |
|
"rewards/margins": 0.48463934659957886, |
|
"rewards/rejected": -1.353487253189087, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 8.168951986763473, |
|
"learning_rate": 4.956522703928451e-07, |
|
"logits/chosen": -0.90748131275177, |
|
"logits/rejected": -0.8296523094177246, |
|
"logps/chosen": -306.3932189941406, |
|
"logps/rejected": -355.0683898925781, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8589761853218079, |
|
"rewards/margins": 0.45014849305152893, |
|
"rewards/rejected": -1.3091247081756592, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15595009596928983, |
|
"grad_norm": 12.189636245889604, |
|
"learning_rate": 4.952548624265606e-07, |
|
"logits/chosen": -0.8050463795661926, |
|
"logits/rejected": -0.8074838519096375, |
|
"logps/chosen": -363.4576416015625, |
|
"logps/rejected": -372.71533203125, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9918814897537231, |
|
"rewards/margins": 0.31118518114089966, |
|
"rewards/rejected": -1.3030664920806885, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 8.801792548075712, |
|
"learning_rate": 4.948402508060607e-07, |
|
"logits/chosen": -0.9548357129096985, |
|
"logits/rejected": -0.944226086139679, |
|
"logps/chosen": -309.63763427734375, |
|
"logps/rejected": -365.5370178222656, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8808960914611816, |
|
"rewards/margins": 0.6289945244789124, |
|
"rewards/rejected": -1.5098905563354492, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16074856046065258, |
|
"grad_norm": 9.037871356385454, |
|
"learning_rate": 4.944084646147038e-07, |
|
"logits/chosen": -0.8803000450134277, |
|
"logits/rejected": -0.9197956323623657, |
|
"logps/chosen": -398.33673095703125, |
|
"logps/rejected": -418.3768005371094, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9903624653816223, |
|
"rewards/margins": 0.4092163145542145, |
|
"rewards/rejected": -1.3995788097381592, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 7.995793666629949, |
|
"learning_rate": 4.939595341405754e-07, |
|
"logits/chosen": -0.8957052230834961, |
|
"logits/rejected": -0.9519813656806946, |
|
"logps/chosen": -327.54058837890625, |
|
"logps/rejected": -375.4893798828125, |
|
"loss": 0.5507, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8798785209655762, |
|
"rewards/margins": 0.5467365384101868, |
|
"rewards/rejected": -1.4266151189804077, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16554702495201534, |
|
"grad_norm": 9.303423916451855, |
|
"learning_rate": 4.93493490874365e-07, |
|
"logits/chosen": -0.8354761004447937, |
|
"logits/rejected": -0.8360861539840698, |
|
"logps/chosen": -314.87786865234375, |
|
"logps/rejected": -362.77154541015625, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8782456517219543, |
|
"rewards/margins": 0.41027846932411194, |
|
"rewards/rejected": -1.2885241508483887, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 9.577548468423071, |
|
"learning_rate": 4.93010367507156e-07, |
|
"logits/chosen": -0.9991138577461243, |
|
"logits/rejected": -0.995546817779541, |
|
"logps/chosen": -284.5464782714844, |
|
"logps/rejected": -341.00433349609375, |
|
"loss": 0.537, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8869355320930481, |
|
"rewards/margins": 0.7838705778121948, |
|
"rewards/rejected": -1.6708061695098877, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17034548944337813, |
|
"grad_norm": 10.701876294903373, |
|
"learning_rate": 4.925101979281332e-07, |
|
"logits/chosen": -0.841864287853241, |
|
"logits/rejected": -0.936977207660675, |
|
"logps/chosen": -371.80328369140625, |
|
"logps/rejected": -404.70672607421875, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8815019726753235, |
|
"rewards/margins": 0.852567195892334, |
|
"rewards/rejected": -1.7340691089630127, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 8.673550568480893, |
|
"learning_rate": 4.919930172222054e-07, |
|
"logits/chosen": -0.9126158952713013, |
|
"logits/rejected": -0.9728757739067078, |
|
"logps/chosen": -332.90386962890625, |
|
"logps/rejected": -394.7867736816406, |
|
"loss": 0.5303, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9386744499206543, |
|
"rewards/margins": 0.6791925430297852, |
|
"rewards/rejected": -1.61786687374115, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1751439539347409, |
|
"grad_norm": 10.4122473453474, |
|
"learning_rate": 4.914588616675445e-07, |
|
"logits/chosen": -1.0145832300186157, |
|
"logits/rejected": -1.0208103656768799, |
|
"logps/chosen": -288.4735412597656, |
|
"logps/rejected": -360.450927734375, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.78793865442276, |
|
"rewards/margins": 0.6522720456123352, |
|
"rewards/rejected": -1.4402107000350952, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 10.505660408208904, |
|
"learning_rate": 4.909077687330404e-07, |
|
"logits/chosen": -0.8563149571418762, |
|
"logits/rejected": -0.8991769552230835, |
|
"logps/chosen": -359.00299072265625, |
|
"logps/rejected": -381.2506408691406, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0156606435775757, |
|
"rewards/margins": 0.48986172676086426, |
|
"rewards/rejected": -1.50552237033844, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17994241842610365, |
|
"grad_norm": 10.897264447866968, |
|
"learning_rate": 4.903397770756729e-07, |
|
"logits/chosen": -0.9542545080184937, |
|
"logits/rejected": -0.9912341833114624, |
|
"logps/chosen": -351.9880065917969, |
|
"logps/rejected": -430.06072998046875, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0298378467559814, |
|
"rewards/margins": 0.8000310063362122, |
|
"rewards/rejected": -1.8298689126968384, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 8.170911605589284, |
|
"learning_rate": 4.897549265378004e-07, |
|
"logits/chosen": -0.9441335797309875, |
|
"logits/rejected": -0.9833663702011108, |
|
"logps/chosen": -413.30047607421875, |
|
"logps/rejected": -478.78204345703125, |
|
"loss": 0.5212, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0744590759277344, |
|
"rewards/margins": 0.6504513621330261, |
|
"rewards/rejected": -1.7249103784561157, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1847408829174664, |
|
"grad_norm": 10.341368234668948, |
|
"learning_rate": 4.891532581443643e-07, |
|
"logits/chosen": -1.0848381519317627, |
|
"logits/rejected": -1.078758955001831, |
|
"logps/chosen": -377.7035827636719, |
|
"logps/rejected": -493.32928466796875, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9114073514938354, |
|
"rewards/margins": 1.1706379652023315, |
|
"rewards/rejected": -2.082045555114746, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 11.04593711106465, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": -0.9228931665420532, |
|
"logits/rejected": -0.8969389200210571, |
|
"logps/chosen": -337.7319030761719, |
|
"logps/rejected": -435.77752685546875, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0482293367385864, |
|
"rewards/margins": 0.77088862657547, |
|
"rewards/rejected": -1.8191179037094116, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18953934740882916, |
|
"grad_norm": 9.643533943369212, |
|
"learning_rate": 4.878996377861367e-07, |
|
"logits/chosen": -0.9793928265571594, |
|
"logits/rejected": -1.0138920545578003, |
|
"logps/chosen": -314.68878173828125, |
|
"logps/rejected": -389.3453063964844, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.099780559539795, |
|
"rewards/margins": 0.6440693140029907, |
|
"rewards/rejected": -1.7438499927520752, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 9.649660460622457, |
|
"learning_rate": 4.872477737578327e-07, |
|
"logits/chosen": -0.9505901336669922, |
|
"logits/rejected": -0.8831882476806641, |
|
"logps/chosen": -372.86322021484375, |
|
"logps/rejected": -490.08740234375, |
|
"loss": 0.5045, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1724433898925781, |
|
"rewards/margins": 1.1937037706375122, |
|
"rewards/rejected": -2.366147041320801, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19433781190019195, |
|
"grad_norm": 11.639238021932554, |
|
"learning_rate": 4.865792677407718e-07, |
|
"logits/chosen": -0.988836407661438, |
|
"logits/rejected": -1.0147147178649902, |
|
"logps/chosen": -381.7936706542969, |
|
"logps/rejected": -402.01043701171875, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4327753782272339, |
|
"rewards/margins": 0.4645722806453705, |
|
"rewards/rejected": -1.8973478078842163, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 11.366678972402584, |
|
"learning_rate": 4.858941666279955e-07, |
|
"logits/chosen": -0.949034571647644, |
|
"logits/rejected": -0.978123664855957, |
|
"logps/chosen": -379.8930358886719, |
|
"logps/rejected": -413.527099609375, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2424204349517822, |
|
"rewards/margins": 0.4267699122428894, |
|
"rewards/rejected": -1.6691904067993164, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1991362763915547, |
|
"grad_norm": 10.723571043699938, |
|
"learning_rate": 4.851925184766247e-07, |
|
"logits/chosen": -0.9376896023750305, |
|
"logits/rejected": -1.007285714149475, |
|
"logps/chosen": -336.7314453125, |
|
"logps/rejected": -412.2381896972656, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9813965559005737, |
|
"rewards/margins": 0.9238882064819336, |
|
"rewards/rejected": -1.9052846431732178, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 11.28027279286211, |
|
"learning_rate": 4.844743725044897e-07, |
|
"logits/chosen": -0.9019923210144043, |
|
"logits/rejected": -1.030314326286316, |
|
"logps/chosen": -322.65948486328125, |
|
"logps/rejected": -373.43670654296875, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9081098437309265, |
|
"rewards/margins": 0.7220152616500854, |
|
"rewards/rejected": -1.6301252841949463, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20393474088291746, |
|
"grad_norm": 10.272798699401713, |
|
"learning_rate": 4.837397790866774e-07, |
|
"logits/chosen": -1.0090559720993042, |
|
"logits/rejected": -0.9971574544906616, |
|
"logps/chosen": -351.38568115234375, |
|
"logps/rejected": -452.35888671875, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.7977692484855652, |
|
"rewards/margins": 1.1507394313812256, |
|
"rewards/rejected": -1.948508858680725, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 9.299129898380613, |
|
"learning_rate": 4.829887897519974e-07, |
|
"logits/chosen": -1.0525013208389282, |
|
"logits/rejected": -1.0134930610656738, |
|
"logps/chosen": -310.9789123535156, |
|
"logps/rejected": -408.0035400390625, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9323593378067017, |
|
"rewards/margins": 0.7073816061019897, |
|
"rewards/rejected": -1.6397409439086914, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20873320537428022, |
|
"grad_norm": 8.671420413674893, |
|
"learning_rate": 4.82221457179368e-07, |
|
"logits/chosen": -1.0403704643249512, |
|
"logits/rejected": -1.0115526914596558, |
|
"logps/chosen": -338.336181640625, |
|
"logps/rejected": -430.98468017578125, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8510007858276367, |
|
"rewards/margins": 1.0371253490447998, |
|
"rewards/rejected": -1.888126015663147, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 9.900230839528136, |
|
"learning_rate": 4.814378351941206e-07, |
|
"logits/chosen": -0.9543834924697876, |
|
"logits/rejected": -0.9598924517631531, |
|
"logps/chosen": -324.9510803222656, |
|
"logps/rejected": -367.14776611328125, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8847082853317261, |
|
"rewards/margins": 0.5305272936820984, |
|
"rewards/rejected": -1.4152355194091797, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21353166986564298, |
|
"grad_norm": 9.272434926289451, |
|
"learning_rate": 4.806379787642241e-07, |
|
"logits/chosen": -1.007602334022522, |
|
"logits/rejected": -0.9781683087348938, |
|
"logps/chosen": -325.66839599609375, |
|
"logps/rejected": -409.8434143066406, |
|
"loss": 0.5685, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9686576724052429, |
|
"rewards/margins": 0.7592862844467163, |
|
"rewards/rejected": -1.727943778038025, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 8.284902734735002, |
|
"learning_rate": 4.798219439964293e-07, |
|
"logits/chosen": -1.0210028886795044, |
|
"logits/rejected": -1.084486484527588, |
|
"logps/chosen": -330.61688232421875, |
|
"logps/rejected": -401.58001708984375, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0252277851104736, |
|
"rewards/margins": 0.5490860939025879, |
|
"rewards/rejected": -1.5743141174316406, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21833013435700577, |
|
"grad_norm": 7.769735531369734, |
|
"learning_rate": 4.78989788132333e-07, |
|
"logits/chosen": -1.0263112783432007, |
|
"logits/rejected": -0.9949830770492554, |
|
"logps/chosen": -287.7435302734375, |
|
"logps/rejected": -406.2470703125, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8495758771896362, |
|
"rewards/margins": 1.082848310470581, |
|
"rewards/rejected": -1.9324243068695068, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 8.170204511457829, |
|
"learning_rate": 4.781415695443631e-07, |
|
"logits/chosen": -0.9709676504135132, |
|
"logits/rejected": -0.9841318130493164, |
|
"logps/chosen": -386.8170471191406, |
|
"logps/rejected": -453.66571044921875, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.131913185119629, |
|
"rewards/margins": 0.5731815695762634, |
|
"rewards/rejected": -1.7050946950912476, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22312859884836853, |
|
"grad_norm": 8.525976901789441, |
|
"learning_rate": 4.772773477316836e-07, |
|
"logits/chosen": -0.916145920753479, |
|
"logits/rejected": -0.9438095092773438, |
|
"logps/chosen": -374.3315124511719, |
|
"logps/rejected": -448.864013671875, |
|
"loss": 0.5149, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0942604541778564, |
|
"rewards/margins": 0.7264348864555359, |
|
"rewards/rejected": -1.8206952810287476, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 12.196210253104345, |
|
"learning_rate": 4.7639718331602117e-07, |
|
"logits/chosen": -0.8837151527404785, |
|
"logits/rejected": -0.883104145526886, |
|
"logps/chosen": -365.3779296875, |
|
"logps/rejected": -482.9402770996094, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.106382131576538, |
|
"rewards/margins": 1.2564634084701538, |
|
"rewards/rejected": -2.3628456592559814, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22792706333973128, |
|
"grad_norm": 11.24723169528466, |
|
"learning_rate": 4.7550113803741275e-07, |
|
"logits/chosen": -0.9752639532089233, |
|
"logits/rejected": -1.0816590785980225, |
|
"logps/chosen": -385.86956787109375, |
|
"logps/rejected": -397.8951416015625, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1907027959823608, |
|
"rewards/margins": 0.7141844630241394, |
|
"rewards/rejected": -1.9048871994018555, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 10.85809850081635, |
|
"learning_rate": 4.7458927474987454e-07, |
|
"logits/chosen": -0.8752844929695129, |
|
"logits/rejected": -0.9301251173019409, |
|
"logps/chosen": -393.0145568847656, |
|
"logps/rejected": -391.2146301269531, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.897404670715332, |
|
"rewards/margins": 0.5560187101364136, |
|
"rewards/rejected": -1.453423261642456, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23272552783109404, |
|
"grad_norm": 10.402623732075206, |
|
"learning_rate": 4.7366165741699347e-07, |
|
"logits/chosen": -0.8860486745834351, |
|
"logits/rejected": -0.933478057384491, |
|
"logps/chosen": -427.3525390625, |
|
"logps/rejected": -484.7303771972656, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2039297819137573, |
|
"rewards/margins": 0.8201591372489929, |
|
"rewards/rejected": -2.0240888595581055, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 10.420724727543737, |
|
"learning_rate": 4.727183511074401e-07, |
|
"logits/chosen": -1.0526244640350342, |
|
"logits/rejected": -1.0677237510681152, |
|
"logps/chosen": -407.3580017089844, |
|
"logps/rejected": -422.97711181640625, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4225349426269531, |
|
"rewards/margins": 0.27180853486061096, |
|
"rewards/rejected": -1.6943433284759521, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2375239923224568, |
|
"grad_norm": 10.619438612411185, |
|
"learning_rate": 4.717594219904043e-07, |
|
"logits/chosen": -0.9206924438476562, |
|
"logits/rejected": -1.0142751932144165, |
|
"logps/chosen": -369.31671142578125, |
|
"logps/rejected": -393.07769775390625, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1511679887771606, |
|
"rewards/margins": 0.6668072938919067, |
|
"rewards/rejected": -1.8179752826690674, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 11.563760369851115, |
|
"learning_rate": 4.7078493733095393e-07, |
|
"logits/chosen": -1.0036451816558838, |
|
"logits/rejected": -1.0272811651229858, |
|
"logps/chosen": -328.3309020996094, |
|
"logps/rejected": -445.1051330566406, |
|
"loss": 0.5186, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9604378938674927, |
|
"rewards/margins": 1.0202256441116333, |
|
"rewards/rejected": -1.9806636571884155, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2423224568138196, |
|
"grad_norm": 12.237132564608, |
|
"learning_rate": 4.6979496548531614e-07, |
|
"logits/chosen": -1.0756165981292725, |
|
"logits/rejected": -1.0265034437179565, |
|
"logps/chosen": -367.80816650390625, |
|
"logps/rejected": -496.73583984375, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.24051034450531, |
|
"rewards/margins": 0.7898116111755371, |
|
"rewards/rejected": -2.0303218364715576, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 10.269516186946461, |
|
"learning_rate": 4.6878957589608293e-07, |
|
"logits/chosen": -1.0293898582458496, |
|
"logits/rejected": -0.9980789422988892, |
|
"logps/chosen": -382.0529479980469, |
|
"logps/rejected": -538.0097045898438, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3041552305221558, |
|
"rewards/margins": 1.118542194366455, |
|
"rewards/rejected": -2.4226975440979004, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24712092130518235, |
|
"grad_norm": 9.05638881736831, |
|
"learning_rate": 4.6776883908733956e-07, |
|
"logits/chosen": -1.1042773723602295, |
|
"logits/rejected": -1.17487633228302, |
|
"logps/chosen": -395.26531982421875, |
|
"logps/rejected": -437.3297424316406, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.207805871963501, |
|
"rewards/margins": 1.0340633392333984, |
|
"rewards/rejected": -2.2418694496154785, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 11.280624127584458, |
|
"learning_rate": 4.667328266597178e-07, |
|
"logits/chosen": -1.0581328868865967, |
|
"logits/rejected": -1.0977164506912231, |
|
"logps/chosen": -372.9267883300781, |
|
"logps/rejected": -451.66009521484375, |
|
"loss": 0.4696, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2191641330718994, |
|
"rewards/margins": 0.8757356405258179, |
|
"rewards/rejected": -2.0948996543884277, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2519193857965451, |
|
"grad_norm": 9.42640563137626, |
|
"learning_rate": 4.6568161128537354e-07, |
|
"logits/chosen": -0.9135216474533081, |
|
"logits/rejected": -1.0233265161514282, |
|
"logps/chosen": -347.3189392089844, |
|
"logps/rejected": -395.1881103515625, |
|
"loss": 0.5045, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0815908908843994, |
|
"rewards/margins": 0.9593822360038757, |
|
"rewards/rejected": -2.04097318649292, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 11.866401340802232, |
|
"learning_rate": 4.6461526670288877e-07, |
|
"logits/chosen": -0.9709247350692749, |
|
"logits/rejected": -0.9719343185424805, |
|
"logps/chosen": -392.99859619140625, |
|
"logps/rejected": -453.5699768066406, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2631556987762451, |
|
"rewards/margins": 0.8903251886367798, |
|
"rewards/rejected": -2.1534810066223145, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2567178502879079, |
|
"grad_norm": 11.302715265992923, |
|
"learning_rate": 4.635338677120994e-07, |
|
"logits/chosen": -1.168346643447876, |
|
"logits/rejected": -1.1507086753845215, |
|
"logps/chosen": -359.5755920410156, |
|
"logps/rejected": -499.57611083984375, |
|
"loss": 0.4546, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1692172288894653, |
|
"rewards/margins": 1.161725640296936, |
|
"rewards/rejected": -2.3309428691864014, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 10.538848822959446, |
|
"learning_rate": 4.6243749016884835e-07, |
|
"logits/chosen": -0.9228366017341614, |
|
"logits/rejected": -0.9742697477340698, |
|
"logps/chosen": -393.55865478515625, |
|
"logps/rejected": -613.6375732421875, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4483288526535034, |
|
"rewards/margins": 1.6291669607162476, |
|
"rewards/rejected": -3.07749605178833, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2615163147792706, |
|
"grad_norm": 15.598964227058538, |
|
"learning_rate": 4.613262109796645e-07, |
|
"logits/chosen": -1.0201536417007446, |
|
"logits/rejected": -0.9872088432312012, |
|
"logps/chosen": -388.32159423828125, |
|
"logps/rejected": -569.1511840820312, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3905266523361206, |
|
"rewards/margins": 1.4417566061019897, |
|
"rewards/rejected": -2.8322832584381104, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 10.412089533834534, |
|
"learning_rate": 4.602001080963678e-07, |
|
"logits/chosen": -1.0111544132232666, |
|
"logits/rejected": -1.0401980876922607, |
|
"logps/chosen": -415.40728759765625, |
|
"logps/rejected": -510.05926513671875, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4814815521240234, |
|
"rewards/margins": 1.197854995727539, |
|
"rewards/rejected": -2.6793365478515625, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2663147792706334, |
|
"grad_norm": 10.947859358372769, |
|
"learning_rate": 4.590592605106017e-07, |
|
"logits/chosen": -1.040560007095337, |
|
"logits/rejected": -1.0646026134490967, |
|
"logps/chosen": -393.5159606933594, |
|
"logps/rejected": -461.66864013671875, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0464909076690674, |
|
"rewards/margins": 0.9438858032226562, |
|
"rewards/rejected": -1.9903767108917236, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 10.271128985395928, |
|
"learning_rate": 4.5790374824829165e-07, |
|
"logits/chosen": -1.0148289203643799, |
|
"logits/rejected": -1.0559489727020264, |
|
"logps/chosen": -268.1494140625, |
|
"logps/rejected": -381.31707763671875, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.941615104675293, |
|
"rewards/margins": 0.9399920701980591, |
|
"rewards/rejected": -1.8816072940826416, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27111324376199614, |
|
"grad_norm": 14.425090791310733, |
|
"learning_rate": 4.5673365236403216e-07, |
|
"logits/chosen": -1.112364411354065, |
|
"logits/rejected": -1.1866824626922607, |
|
"logps/chosen": -286.95819091796875, |
|
"logps/rejected": -442.0973205566406, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.07503342628479, |
|
"rewards/margins": 1.2303565740585327, |
|
"rewards/rejected": -2.305389881134033, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 9.80302236807559, |
|
"learning_rate": 4.5554905493540075e-07, |
|
"logits/chosen": -1.1836650371551514, |
|
"logits/rejected": -1.1820929050445557, |
|
"logps/chosen": -316.4050598144531, |
|
"logps/rejected": -470.31878662109375, |
|
"loss": 0.4759, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1204586029052734, |
|
"rewards/margins": 1.4639848470687866, |
|
"rewards/rejected": -2.5844433307647705, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2759117082533589, |
|
"grad_norm": 10.46266914998414, |
|
"learning_rate": 4.5435003905720074e-07, |
|
"logits/chosen": -1.1017481088638306, |
|
"logits/rejected": -1.1844024658203125, |
|
"logps/chosen": -397.47625732421875, |
|
"logps/rejected": -495.28021240234375, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3188390731811523, |
|
"rewards/margins": 1.2287555932998657, |
|
"rewards/rejected": -2.5475945472717285, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 13.802897784185829, |
|
"learning_rate": 4.531366888356324e-07, |
|
"logits/chosen": -1.1266191005706787, |
|
"logits/rejected": -1.1002269983291626, |
|
"logps/chosen": -320.9498596191406, |
|
"logps/rejected": -531.9197998046875, |
|
"loss": 0.4492, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4906550645828247, |
|
"rewards/margins": 1.6824109554290771, |
|
"rewards/rejected": -3.1730659008026123, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2807101727447217, |
|
"grad_norm": 18.106306987333475, |
|
"learning_rate": 4.519090893823931e-07, |
|
"logits/chosen": -1.197227954864502, |
|
"logits/rejected": -1.2483270168304443, |
|
"logps/chosen": -382.64154052734375, |
|
"logps/rejected": -496.5115661621094, |
|
"loss": 0.4706, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5682321786880493, |
|
"rewards/margins": 1.1952569484710693, |
|
"rewards/rejected": -2.763489246368408, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 12.198433930244558, |
|
"learning_rate": 4.5066732680870734e-07, |
|
"logits/chosen": -1.14240562915802, |
|
"logits/rejected": -1.2160329818725586, |
|
"logps/chosen": -349.089599609375, |
|
"logps/rejected": -437.95263671875, |
|
"loss": 0.4708, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0620535612106323, |
|
"rewards/margins": 1.2815176248550415, |
|
"rewards/rejected": -2.343571186065674, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28550863723608444, |
|
"grad_norm": 11.921423223157628, |
|
"learning_rate": 4.494114882192862e-07, |
|
"logits/chosen": -1.1435043811798096, |
|
"logits/rejected": -1.1478463411331177, |
|
"logps/chosen": -354.37200927734375, |
|
"logps/rejected": -491.3812561035156, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1012070178985596, |
|
"rewards/margins": 1.6031544208526611, |
|
"rewards/rejected": -2.7043614387512207, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 12.244819859614891, |
|
"learning_rate": 4.4814166170621735e-07, |
|
"logits/chosen": -1.2157360315322876, |
|
"logits/rejected": -1.2492748498916626, |
|
"logps/chosen": -377.10089111328125, |
|
"logps/rejected": -490.47576904296875, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4752638339996338, |
|
"rewards/margins": 1.339400291442871, |
|
"rewards/rejected": -2.814664363861084, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2903071017274472, |
|
"grad_norm": 13.831179008022444, |
|
"learning_rate": 4.468579363427858e-07, |
|
"logits/chosen": -1.1191552877426147, |
|
"logits/rejected": -1.1627353429794312, |
|
"logps/chosen": -398.43060302734375, |
|
"logps/rejected": -515.5997314453125, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.595988154411316, |
|
"rewards/margins": 1.365837812423706, |
|
"rewards/rejected": -2.9618258476257324, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 12.594765793696023, |
|
"learning_rate": 4.4556040217722555e-07, |
|
"logits/chosen": -1.248393177986145, |
|
"logits/rejected": -1.2377097606658936, |
|
"logps/chosen": -360.39739990234375, |
|
"logps/rejected": -560.1267700195312, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3543295860290527, |
|
"rewards/margins": 1.653586745262146, |
|
"rewards/rejected": -3.007916212081909, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29510556621880996, |
|
"grad_norm": 14.658580352857703, |
|
"learning_rate": 4.442491502264033e-07, |
|
"logits/chosen": -1.1721140146255493, |
|
"logits/rejected": -1.205334186553955, |
|
"logps/chosen": -372.86688232421875, |
|
"logps/rejected": -431.7432556152344, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.6799089908599854, |
|
"rewards/margins": 0.7123211622238159, |
|
"rewards/rejected": -2.392230272293091, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 11.30132796398374, |
|
"learning_rate": 4.429242724694338e-07, |
|
"logits/chosen": -1.2736822366714478, |
|
"logits/rejected": -1.2566986083984375, |
|
"logps/chosen": -386.52886962890625, |
|
"logps/rejected": -536.6383056640625, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5427987575531006, |
|
"rewards/margins": 1.334408164024353, |
|
"rewards/rejected": -2.877207040786743, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29990403071017274, |
|
"grad_norm": 11.45540426694367, |
|
"learning_rate": 4.4158586184122817e-07, |
|
"logits/chosen": -1.1903879642486572, |
|
"logits/rejected": -1.2902122735977173, |
|
"logps/chosen": -395.0272521972656, |
|
"logps/rejected": -514.5638427734375, |
|
"loss": 0.4587, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2198941707611084, |
|
"rewards/margins": 1.4861936569213867, |
|
"rewards/rejected": -2.706087589263916, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 11.855125244864809, |
|
"learning_rate": 4.4023401222597443e-07, |
|
"logits/chosen": -1.0970547199249268, |
|
"logits/rejected": -1.2169404029846191, |
|
"logps/chosen": -414.16546630859375, |
|
"logps/rejected": -512.9844970703125, |
|
"loss": 0.4475, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4606043100357056, |
|
"rewards/margins": 1.193366289138794, |
|
"rewards/rejected": -2.653970241546631, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30470249520153553, |
|
"grad_norm": 12.674642065308364, |
|
"learning_rate": 4.3886881845055235e-07, |
|
"logits/chosen": -1.1634668111801147, |
|
"logits/rejected": -1.2584116458892822, |
|
"logps/chosen": -352.8936767578125, |
|
"logps/rejected": -543.56396484375, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2661120891571045, |
|
"rewards/margins": 1.9337279796600342, |
|
"rewards/rejected": -3.1998400688171387, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 11.264055060402159, |
|
"learning_rate": 4.374903762778814e-07, |
|
"logits/chosen": -1.2410491704940796, |
|
"logits/rejected": -1.2714251279830933, |
|
"logps/chosen": -431.96771240234375, |
|
"logps/rejected": -563.4871826171875, |
|
"loss": 0.4646, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8797252178192139, |
|
"rewards/margins": 1.5396064519882202, |
|
"rewards/rejected": -3.4193317890167236, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30950095969289826, |
|
"grad_norm": 12.684189653289591, |
|
"learning_rate": 4.3609878240020356e-07, |
|
"logits/chosen": -1.1412585973739624, |
|
"logits/rejected": -1.2513244152069092, |
|
"logps/chosen": -471.1331481933594, |
|
"logps/rejected": -574.435546875, |
|
"loss": 0.4442, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9110310077667236, |
|
"rewards/margins": 1.6200447082519531, |
|
"rewards/rejected": -3.5310757160186768, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 12.047131092826515, |
|
"learning_rate": 4.346941344323005e-07, |
|
"logits/chosen": -1.2705122232437134, |
|
"logits/rejected": -1.3864275217056274, |
|
"logps/chosen": -426.615478515625, |
|
"logps/rejected": -498.53863525390625, |
|
"loss": 0.4877, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9668481349945068, |
|
"rewards/margins": 1.2020810842514038, |
|
"rewards/rejected": -3.1689295768737793, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31429942418426104, |
|
"grad_norm": 12.410299379758362, |
|
"learning_rate": 4.332765309046467e-07, |
|
"logits/chosen": -1.2316473722457886, |
|
"logits/rejected": -1.24112868309021, |
|
"logps/chosen": -399.12347412109375, |
|
"logps/rejected": -496.7018127441406, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.460193157196045, |
|
"rewards/margins": 1.3175086975097656, |
|
"rewards/rejected": -2.7777016162872314, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 15.436355881965927, |
|
"learning_rate": 4.3184607125649754e-07, |
|
"logits/chosen": -1.1702232360839844, |
|
"logits/rejected": -1.208388328552246, |
|
"logps/chosen": -382.8656311035156, |
|
"logps/rejected": -577.5721435546875, |
|
"loss": 0.4796, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2770411968231201, |
|
"rewards/margins": 1.7833073139190674, |
|
"rewards/rejected": -3.0603485107421875, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3190978886756238, |
|
"grad_norm": 12.142589026682131, |
|
"learning_rate": 4.304028558289141e-07, |
|
"logits/chosen": -1.190229892730713, |
|
"logits/rejected": -1.2123523950576782, |
|
"logps/chosen": -417.66973876953125, |
|
"logps/rejected": -549.3724365234375, |
|
"loss": 0.4502, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5125898122787476, |
|
"rewards/margins": 1.5069903135299683, |
|
"rewards/rejected": -3.019580125808716, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 10.693755286537069, |
|
"learning_rate": 4.28946985857725e-07, |
|
"logits/chosen": -1.2291617393493652, |
|
"logits/rejected": -1.2268054485321045, |
|
"logps/chosen": -412.4300231933594, |
|
"logps/rejected": -607.4716186523438, |
|
"loss": 0.4468, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5301696062088013, |
|
"rewards/margins": 2.023198366165161, |
|
"rewards/rejected": -3.553368091583252, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32389635316698656, |
|
"grad_norm": 11.965095051615716, |
|
"learning_rate": 4.2747856346642445e-07, |
|
"logits/chosen": -1.1990400552749634, |
|
"logits/rejected": -1.2139463424682617, |
|
"logps/chosen": -364.44830322265625, |
|
"logps/rejected": -493.9198303222656, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5109294652938843, |
|
"rewards/margins": 1.342005729675293, |
|
"rewards/rejected": -2.8529350757598877, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 14.030913432042324, |
|
"learning_rate": 4.2599769165900933e-07, |
|
"logits/chosen": -1.2126591205596924, |
|
"logits/rejected": -1.2595001459121704, |
|
"logps/chosen": -447.2684631347656, |
|
"logps/rejected": -650.9678344726562, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.054805278778076, |
|
"rewards/margins": 2.237144947052002, |
|
"rewards/rejected": -4.29194974899292, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32869481765834935, |
|
"grad_norm": 12.230893363228695, |
|
"learning_rate": 4.245044743127535e-07, |
|
"logits/chosen": -1.3876649141311646, |
|
"logits/rejected": -1.345571517944336, |
|
"logps/chosen": -420.2569885253906, |
|
"logps/rejected": -553.880859375, |
|
"loss": 0.4671, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8658406734466553, |
|
"rewards/margins": 1.272249460220337, |
|
"rewards/rejected": -3.138089895248413, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 18.683125746223514, |
|
"learning_rate": 4.229990161709214e-07, |
|
"logits/chosen": -1.2496898174285889, |
|
"logits/rejected": -1.1489694118499756, |
|
"logps/chosen": -374.80059814453125, |
|
"logps/rejected": -599.7036743164062, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.621886968612671, |
|
"rewards/margins": 1.9923725128173828, |
|
"rewards/rejected": -3.614259719848633, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3334932821497121, |
|
"grad_norm": 11.157118917089283, |
|
"learning_rate": 4.214814228354204e-07, |
|
"logits/chosen": -1.2218196392059326, |
|
"logits/rejected": -1.2584130764007568, |
|
"logps/chosen": -449.47979736328125, |
|
"logps/rejected": -655.5496826171875, |
|
"loss": 0.4609, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.958253264427185, |
|
"rewards/margins": 2.2051448822021484, |
|
"rewards/rejected": -4.163398265838623, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 12.746683018380017, |
|
"learning_rate": 4.1995180075939375e-07, |
|
"logits/chosen": -1.3946723937988281, |
|
"logits/rejected": -1.3772931098937988, |
|
"logps/chosen": -442.54339599609375, |
|
"logps/rejected": -570.6209106445312, |
|
"loss": 0.4417, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7088006734848022, |
|
"rewards/margins": 1.4957315921783447, |
|
"rewards/rejected": -3.2045321464538574, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33829174664107486, |
|
"grad_norm": 12.044939890412799, |
|
"learning_rate": 4.1841025723975297e-07, |
|
"logits/chosen": -1.188822627067566, |
|
"logits/rejected": -1.219215989112854, |
|
"logps/chosen": -408.2652282714844, |
|
"logps/rejected": -595.224365234375, |
|
"loss": 0.4384, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3506208658218384, |
|
"rewards/margins": 2.057248115539551, |
|
"rewards/rejected": -3.4078686237335205, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 11.734403426656625, |
|
"learning_rate": 4.168569004096516e-07, |
|
"logits/chosen": -1.1842548847198486, |
|
"logits/rejected": -1.1583616733551025, |
|
"logps/chosen": -385.1539306640625, |
|
"logps/rejected": -605.2198486328125, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6475471258163452, |
|
"rewards/margins": 1.9943052530288696, |
|
"rewards/rejected": -3.6418521404266357, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3430902111324376, |
|
"grad_norm": 9.504127526147306, |
|
"learning_rate": 4.152918392308997e-07, |
|
"logits/chosen": -1.5127391815185547, |
|
"logits/rejected": -1.4623585939407349, |
|
"logps/chosen": -459.7693786621094, |
|
"logps/rejected": -617.2127685546875, |
|
"loss": 0.4449, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.342836380004883, |
|
"rewards/margins": 1.6365751028060913, |
|
"rewards/rejected": -3.9794116020202637, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 20.373026702217544, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -1.300527811050415, |
|
"logits/rejected": -1.2247835397720337, |
|
"logps/chosen": -456.451416015625, |
|
"logps/rejected": -692.865478515625, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.3793184757232666, |
|
"rewards/margins": 1.9824209213256836, |
|
"rewards/rejected": -4.361739635467529, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3478886756238004, |
|
"grad_norm": 16.685897640348756, |
|
"learning_rate": 4.121270437720526e-07, |
|
"logits/chosen": -1.1498229503631592, |
|
"logits/rejected": -1.1219522953033447, |
|
"logps/chosen": -400.486328125, |
|
"logps/rejected": -553.6771240234375, |
|
"loss": 0.454, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.084050416946411, |
|
"rewards/margins": 1.0678091049194336, |
|
"rewards/rejected": -3.151859760284424, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 11.077851343051192, |
|
"learning_rate": 4.105275314897852e-07, |
|
"logits/chosen": -1.3562729358673096, |
|
"logits/rejected": -1.3173191547393799, |
|
"logps/chosen": -371.6280517578125, |
|
"logps/rejected": -650.3944702148438, |
|
"loss": 0.4699, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.779268503189087, |
|
"rewards/margins": 2.3870112895965576, |
|
"rewards/rejected": -4.1662797927856445, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35268714011516317, |
|
"grad_norm": 14.118121128015067, |
|
"learning_rate": 4.089167588389508e-07, |
|
"logits/chosen": -1.1345436573028564, |
|
"logits/rejected": -1.2407124042510986, |
|
"logps/chosen": -508.22467041015625, |
|
"logps/rejected": -647.5037841796875, |
|
"loss": 0.4706, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.88066828250885, |
|
"rewards/margins": 1.833953619003296, |
|
"rewards/rejected": -3.7146217823028564, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 15.18907635756053, |
|
"learning_rate": 4.072948388088515e-07, |
|
"logits/chosen": -1.151426911354065, |
|
"logits/rejected": -1.1657240390777588, |
|
"logps/chosen": -432.26025390625, |
|
"logps/rejected": -617.5946044921875, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.788640022277832, |
|
"rewards/margins": 1.791176438331604, |
|
"rewards/rejected": -3.5798168182373047, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3574856046065259, |
|
"grad_norm": 14.317431275466847, |
|
"learning_rate": 4.056618851707334e-07, |
|
"logits/chosen": -1.1826136112213135, |
|
"logits/rejected": -1.2314163446426392, |
|
"logps/chosen": -389.71746826171875, |
|
"logps/rejected": -564.3736572265625, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.384912371635437, |
|
"rewards/margins": 1.6980838775634766, |
|
"rewards/rejected": -3.082996368408203, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 15.887257243467523, |
|
"learning_rate": 4.0401801246980675e-07, |
|
"logits/chosen": -1.3013509511947632, |
|
"logits/rejected": -1.3612343072891235, |
|
"logps/chosen": -393.45379638671875, |
|
"logps/rejected": -499.9305114746094, |
|
"loss": 0.4618, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9321129322052002, |
|
"rewards/margins": 1.2076321840286255, |
|
"rewards/rejected": -3.139744520187378, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3622840690978887, |
|
"grad_norm": 15.741703489655176, |
|
"learning_rate": 4.0236333601721043e-07, |
|
"logits/chosen": -1.2282559871673584, |
|
"logits/rejected": -1.1802866458892822, |
|
"logps/chosen": -474.82177734375, |
|
"logps/rejected": -591.8897094726562, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.9944359064102173, |
|
"rewards/margins": 1.0587834119796753, |
|
"rewards/rejected": -3.0532193183898926, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 17.204220874752522, |
|
"learning_rate": 4.0069797188192364e-07, |
|
"logits/chosen": -1.2478222846984863, |
|
"logits/rejected": -1.24253511428833, |
|
"logps/chosen": -451.41009521484375, |
|
"logps/rejected": -622.8987426757812, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8151447772979736, |
|
"rewards/margins": 1.9790477752685547, |
|
"rewards/rejected": -3.7941925525665283, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3670825335892514, |
|
"grad_norm": 14.543319199762806, |
|
"learning_rate": 3.9902203688262417e-07, |
|
"logits/chosen": -1.1997919082641602, |
|
"logits/rejected": -1.2181050777435303, |
|
"logps/chosen": -405.4238586425781, |
|
"logps/rejected": -524.9036254882812, |
|
"loss": 0.4355, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5551273822784424, |
|
"rewards/margins": 1.3034616708755493, |
|
"rewards/rejected": -2.858588695526123, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 11.536440138005817, |
|
"learning_rate": 3.9733564857949365e-07, |
|
"logits/chosen": -1.27651047706604, |
|
"logits/rejected": -1.3442704677581787, |
|
"logps/chosen": -513.730224609375, |
|
"logps/rejected": -606.420166015625, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.227952003479004, |
|
"rewards/margins": 1.299182653427124, |
|
"rewards/rejected": -3.527134656906128, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3718809980806142, |
|
"grad_norm": 13.953076272266463, |
|
"learning_rate": 3.9563892526597177e-07, |
|
"logits/chosen": -1.302734136581421, |
|
"logits/rejected": -1.257291555404663, |
|
"logps/chosen": -375.5953369140625, |
|
"logps/rejected": -528.0511474609375, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6560758352279663, |
|
"rewards/margins": 1.0600662231445312, |
|
"rewards/rejected": -2.716142177581787, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 11.662781366416757, |
|
"learning_rate": 3.9393198596045795e-07, |
|
"logits/chosen": -1.2684893608093262, |
|
"logits/rejected": -1.2016541957855225, |
|
"logps/chosen": -372.7796630859375, |
|
"logps/rejected": -552.9669799804688, |
|
"loss": 0.4854, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6109774112701416, |
|
"rewards/margins": 1.5703436136245728, |
|
"rewards/rejected": -3.181320905685425, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.376679462571977, |
|
"grad_norm": 11.657236582029201, |
|
"learning_rate": 3.922149503979628e-07, |
|
"logits/chosen": -1.2256669998168945, |
|
"logits/rejected": -1.2542228698730469, |
|
"logps/chosen": -448.0088806152344, |
|
"logps/rejected": -790.6217041015625, |
|
"loss": 0.449, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.899916410446167, |
|
"rewards/margins": 3.3502612113952637, |
|
"rewards/rejected": -5.25017786026001, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 13.35177426898264, |
|
"learning_rate": 3.904879390217095e-07, |
|
"logits/chosen": -1.3202365636825562, |
|
"logits/rejected": -1.3830177783966064, |
|
"logps/chosen": -455.22967529296875, |
|
"logps/rejected": -581.9063720703125, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1927895545959473, |
|
"rewards/margins": 1.4322152137756348, |
|
"rewards/rejected": -3.625004529953003, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3814779270633397, |
|
"grad_norm": 13.573747186048726, |
|
"learning_rate": 3.8875107297468463e-07, |
|
"logits/chosen": -1.3035602569580078, |
|
"logits/rejected": -1.245466947555542, |
|
"logps/chosen": -416.05084228515625, |
|
"logps/rejected": -740.8409423828125, |
|
"loss": 0.4497, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8670103549957275, |
|
"rewards/margins": 2.8193886280059814, |
|
"rewards/rejected": -4.686398983001709, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 13.749682710553607, |
|
"learning_rate": 3.87004474091141e-07, |
|
"logits/chosen": -1.176382303237915, |
|
"logits/rejected": -1.177053689956665, |
|
"logps/chosen": -393.9292907714844, |
|
"logps/rejected": -558.60888671875, |
|
"loss": 0.4624, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7796415090560913, |
|
"rewards/margins": 1.491105318069458, |
|
"rewards/rejected": -3.2707467079162598, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3862763915547025, |
|
"grad_norm": 12.151112854676569, |
|
"learning_rate": 3.8524826488805114e-07, |
|
"logits/chosen": -1.4273042678833008, |
|
"logits/rejected": -1.4034093618392944, |
|
"logps/chosen": -442.8750915527344, |
|
"logps/rejected": -616.5904541015625, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7405941486358643, |
|
"rewards/margins": 2.146420955657959, |
|
"rewards/rejected": -3.8870151042938232, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 9.870668269338232, |
|
"learning_rate": 3.834825685565133e-07, |
|
"logits/chosen": -1.358819842338562, |
|
"logits/rejected": -1.4594396352767944, |
|
"logps/chosen": -363.05926513671875, |
|
"logps/rejected": -469.5292053222656, |
|
"loss": 0.4181, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3789103031158447, |
|
"rewards/margins": 1.4029977321624756, |
|
"rewards/rejected": -2.781907796859741, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39107485604606523, |
|
"grad_norm": 12.06984534361086, |
|
"learning_rate": 3.8170750895311007e-07, |
|
"logits/chosen": -1.233335018157959, |
|
"logits/rejected": -1.2165110111236572, |
|
"logps/chosen": -442.47021484375, |
|
"logps/rejected": -587.38818359375, |
|
"loss": 0.4164, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.698992133140564, |
|
"rewards/margins": 1.7231460809707642, |
|
"rewards/rejected": -3.422138214111328, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 11.992529219223993, |
|
"learning_rate": 3.7992321059122045e-07, |
|
"logits/chosen": -1.2353224754333496, |
|
"logits/rejected": -1.3311399221420288, |
|
"logps/chosen": -461.4444274902344, |
|
"logps/rejected": -634.5919189453125, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.387753963470459, |
|
"rewards/margins": 1.8558781147003174, |
|
"rewards/rejected": -4.2436323165893555, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.395873320537428, |
|
"grad_norm": 13.334807101320292, |
|
"learning_rate": 3.7812979863228576e-07, |
|
"logits/chosen": -1.4029086828231812, |
|
"logits/rejected": -1.4273648262023926, |
|
"logps/chosen": -434.45782470703125, |
|
"logps/rejected": -619.6177978515625, |
|
"loss": 0.4505, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.4253311157226562, |
|
"rewards/margins": 1.7180039882659912, |
|
"rewards/rejected": -4.143334865570068, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 14.545492077815492, |
|
"learning_rate": 3.763273988770296e-07, |
|
"logits/chosen": -1.30009126663208, |
|
"logits/rejected": -1.3962773084640503, |
|
"logps/chosen": -408.5030822753906, |
|
"logps/rejected": -618.9495239257812, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7208665609359741, |
|
"rewards/margins": 2.1056978702545166, |
|
"rewards/rejected": -3.826564311981201, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4006717850287908, |
|
"grad_norm": 14.42291716594792, |
|
"learning_rate": 3.7451613775663405e-07, |
|
"logits/chosen": -1.2345439195632935, |
|
"logits/rejected": -1.1613664627075195, |
|
"logps/chosen": -431.56683349609375, |
|
"logps/rejected": -756.0855102539062, |
|
"loss": 0.459, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0580296516418457, |
|
"rewards/margins": 3.100363254547119, |
|
"rewards/rejected": -5.158392906188965, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 13.846182180264046, |
|
"learning_rate": 3.726961423238706e-07, |
|
"logits/chosen": -1.3649914264678955, |
|
"logits/rejected": -1.3719325065612793, |
|
"logps/chosen": -399.53228759765625, |
|
"logps/rejected": -725.3392333984375, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.87088143825531, |
|
"rewards/margins": 3.0345349311828613, |
|
"rewards/rejected": -4.905416965484619, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.40547024952015354, |
|
"grad_norm": 13.862779001584391, |
|
"learning_rate": 3.708675402441882e-07, |
|
"logits/chosen": -1.1647714376449585, |
|
"logits/rejected": -1.3258562088012695, |
|
"logps/chosen": -452.04046630859375, |
|
"logps/rejected": -574.329833984375, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.89810049533844, |
|
"rewards/margins": 1.4911009073257446, |
|
"rewards/rejected": -3.3892014026641846, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 13.918601228648548, |
|
"learning_rate": 3.6903045978675775e-07, |
|
"logits/chosen": -1.1976958513259888, |
|
"logits/rejected": -1.2260172367095947, |
|
"logps/chosen": -395.0202941894531, |
|
"logps/rejected": -686.57177734375, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7495826482772827, |
|
"rewards/margins": 3.0733304023742676, |
|
"rewards/rejected": -4.82291316986084, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4102687140115163, |
|
"grad_norm": 11.597661521745744, |
|
"learning_rate": 3.6718502981547474e-07, |
|
"logits/chosen": -1.2923458814620972, |
|
"logits/rejected": -1.266068458557129, |
|
"logps/chosen": -408.5785827636719, |
|
"logps/rejected": -584.2232055664062, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6165058612823486, |
|
"rewards/margins": 1.335237741470337, |
|
"rewards/rejected": -2.9517433643341064, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 11.408780497698448, |
|
"learning_rate": 3.6533137977991986e-07, |
|
"logits/chosen": -1.2169060707092285, |
|
"logits/rejected": -1.2535167932510376, |
|
"logps/chosen": -421.34320068359375, |
|
"logps/rejected": -612.0996704101562, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.618795394897461, |
|
"rewards/margins": 1.626263976097107, |
|
"rewards/rejected": -3.2450594902038574, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41506717850287905, |
|
"grad_norm": 14.279826921613408, |
|
"learning_rate": 3.6346963970627865e-07, |
|
"logits/chosen": -1.255234718322754, |
|
"logits/rejected": -1.146271824836731, |
|
"logps/chosen": -421.241455078125, |
|
"logps/rejected": -664.4832763671875, |
|
"loss": 0.4133, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8466880321502686, |
|
"rewards/margins": 2.243635416030884, |
|
"rewards/rejected": -4.090323448181152, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 13.239703723404237, |
|
"learning_rate": 3.615999401882207e-07, |
|
"logits/chosen": -1.5136260986328125, |
|
"logits/rejected": -1.4776732921600342, |
|
"logps/chosen": -430.2723693847656, |
|
"logps/rejected": -688.0913696289062, |
|
"loss": 0.4462, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2629590034484863, |
|
"rewards/margins": 2.4102649688720703, |
|
"rewards/rejected": -4.673223495483398, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41986564299424184, |
|
"grad_norm": 11.59386325114309, |
|
"learning_rate": 3.597224123777389e-07, |
|
"logits/chosen": -1.2650483846664429, |
|
"logits/rejected": -1.2001924514770508, |
|
"logps/chosen": -438.1640625, |
|
"logps/rejected": -684.7293701171875, |
|
"loss": 0.4401, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.011852741241455, |
|
"rewards/margins": 2.2977705001831055, |
|
"rewards/rejected": -4.3096232414245605, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 13.837858721173825, |
|
"learning_rate": 3.5783718797595e-07, |
|
"logits/chosen": -1.3295023441314697, |
|
"logits/rejected": -1.4063255786895752, |
|
"logps/chosen": -463.84027099609375, |
|
"logps/rejected": -608.602294921875, |
|
"loss": 0.4515, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9257501363754272, |
|
"rewards/margins": 1.8007062673568726, |
|
"rewards/rejected": -3.7264564037323, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4246641074856046, |
|
"grad_norm": 18.356726333155102, |
|
"learning_rate": 3.559443992238558e-07, |
|
"logits/chosen": -1.272377848625183, |
|
"logits/rejected": -1.2996290922164917, |
|
"logps/chosen": -379.01922607421875, |
|
"logps/rejected": -670.1522827148438, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.478298306465149, |
|
"rewards/margins": 2.6530449390411377, |
|
"rewards/rejected": -4.131342887878418, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 11.835957489925283, |
|
"learning_rate": 3.540441788930673e-07, |
|
"logits/chosen": -1.210574746131897, |
|
"logits/rejected": -1.208512306213379, |
|
"logps/chosen": -486.7884216308594, |
|
"logps/rejected": -726.0255126953125, |
|
"loss": 0.4257, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0521273612976074, |
|
"rewards/margins": 2.712066888809204, |
|
"rewards/rejected": -4.764194488525391, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42946257197696736, |
|
"grad_norm": 14.562344268328332, |
|
"learning_rate": 3.5213666027649123e-07, |
|
"logits/chosen": -1.318163275718689, |
|
"logits/rejected": -1.4022140502929688, |
|
"logps/chosen": -517.6325073242188, |
|
"logps/rejected": -609.1475830078125, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.6355819702148438, |
|
"rewards/margins": 1.281418800354004, |
|
"rewards/rejected": -3.9170005321502686, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 14.530291818469866, |
|
"learning_rate": 3.5022197717898017e-07, |
|
"logits/chosen": -1.1955803632736206, |
|
"logits/rejected": -1.2934623956680298, |
|
"logps/chosen": -425.0252380371094, |
|
"logps/rejected": -642.680419921875, |
|
"loss": 0.3978, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.185080051422119, |
|
"rewards/margins": 2.503284215927124, |
|
"rewards/rejected": -4.688364028930664, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43426103646833014, |
|
"grad_norm": 14.242954877580903, |
|
"learning_rate": 3.4830026390794633e-07, |
|
"logits/chosen": -1.2469186782836914, |
|
"logits/rejected": -1.3009865283966064, |
|
"logps/chosen": -522.1409301757812, |
|
"logps/rejected": -738.9695434570312, |
|
"loss": 0.3894, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.4441685676574707, |
|
"rewards/margins": 2.662193775177002, |
|
"rewards/rejected": -5.106362342834473, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 9.558413013643854, |
|
"learning_rate": 3.4637165526394104e-07, |
|
"logits/chosen": -1.3089157342910767, |
|
"logits/rejected": -1.2989561557769775, |
|
"logps/chosen": -416.4744567871094, |
|
"logps/rejected": -597.2052612304688, |
|
"loss": 0.4459, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.026801347732544, |
|
"rewards/margins": 1.6668050289154053, |
|
"rewards/rejected": -3.6936068534851074, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43905950095969287, |
|
"grad_norm": 14.440126018329256, |
|
"learning_rate": 3.4443628653119814e-07, |
|
"logits/chosen": -1.1547075510025024, |
|
"logits/rejected": -1.141548752784729, |
|
"logps/chosen": -483.82611083984375, |
|
"logps/rejected": -792.4986572265625, |
|
"loss": 0.4601, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.251401424407959, |
|
"rewards/margins": 2.6689209938049316, |
|
"rewards/rejected": -4.920322895050049, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 14.248839559891321, |
|
"learning_rate": 3.424942934681453e-07, |
|
"logits/chosen": -1.2738267183303833, |
|
"logits/rejected": -1.395904302597046, |
|
"logps/chosen": -409.0834045410156, |
|
"logps/rejected": -667.9530639648438, |
|
"loss": 0.442, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.819021463394165, |
|
"rewards/margins": 2.630481243133545, |
|
"rewards/rejected": -4.449502468109131, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44385796545105566, |
|
"grad_norm": 17.408491732936906, |
|
"learning_rate": 3.405458122978804e-07, |
|
"logits/chosen": -1.340673804283142, |
|
"logits/rejected": -1.329695463180542, |
|
"logps/chosen": -464.5760803222656, |
|
"logps/rejected": -588.0704345703125, |
|
"loss": 0.4376, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9475841522216797, |
|
"rewards/margins": 1.5327271223068237, |
|
"rewards/rejected": -3.480311632156372, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 17.70224543826807, |
|
"learning_rate": 3.3859097969861633e-07, |
|
"logits/chosen": -1.230010986328125, |
|
"logits/rejected": -1.2035300731658936, |
|
"logps/chosen": -465.7073669433594, |
|
"logps/rejected": -621.9720458984375, |
|
"loss": 0.413, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0379059314727783, |
|
"rewards/margins": 1.874291181564331, |
|
"rewards/rejected": -3.9121971130371094, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44865642994241844, |
|
"grad_norm": 15.872391005642767, |
|
"learning_rate": 3.366299327940936e-07, |
|
"logits/chosen": -1.2397372722625732, |
|
"logits/rejected": -1.1490371227264404, |
|
"logps/chosen": -457.95428466796875, |
|
"logps/rejected": -709.8195190429688, |
|
"loss": 0.4361, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8263241052627563, |
|
"rewards/margins": 2.2876338958740234, |
|
"rewards/rejected": -4.113957405090332, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 13.36488995176592, |
|
"learning_rate": 3.3466280914396117e-07, |
|
"logits/chosen": -1.2212001085281372, |
|
"logits/rejected": -1.2048442363739014, |
|
"logps/chosen": -419.3158264160156, |
|
"logps/rejected": -692.9566650390625, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9814516305923462, |
|
"rewards/margins": 2.5255990028381348, |
|
"rewards/rejected": -4.507050514221191, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4534548944337812, |
|
"grad_norm": 17.862568943622634, |
|
"learning_rate": 3.326897467341281e-07, |
|
"logits/chosen": -1.2388769388198853, |
|
"logits/rejected": -1.2880442142486572, |
|
"logps/chosen": -386.8263244628906, |
|
"logps/rejected": -666.2364501953125, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.99746835231781, |
|
"rewards/margins": 2.632657766342163, |
|
"rewards/rejected": -4.630125999450684, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 15.985949773009649, |
|
"learning_rate": 3.3071088396708335e-07, |
|
"logits/chosen": -1.3479843139648438, |
|
"logits/rejected": -1.2633702754974365, |
|
"logps/chosen": -371.4305114746094, |
|
"logps/rejected": -675.2957763671875, |
|
"loss": 0.4398, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.869970679283142, |
|
"rewards/margins": 2.837160348892212, |
|
"rewards/rejected": -4.7071309089660645, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45825335892514396, |
|
"grad_norm": 12.92794837231206, |
|
"learning_rate": 3.2872635965218824e-07, |
|
"logits/chosen": -1.1383055448532104, |
|
"logits/rejected": -1.1345998048782349, |
|
"logps/chosen": -482.2320251464844, |
|
"logps/rejected": -699.35693359375, |
|
"loss": 0.4548, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.414862871170044, |
|
"rewards/margins": 1.9727367162704468, |
|
"rewards/rejected": -4.387599468231201, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 11.420466341932578, |
|
"learning_rate": 3.2673631299593905e-07, |
|
"logits/chosen": -1.1113722324371338, |
|
"logits/rejected": -1.2537376880645752, |
|
"logps/chosen": -475.3736877441406, |
|
"logps/rejected": -699.142822265625, |
|
"loss": 0.429, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.2085394859313965, |
|
"rewards/margins": 2.2637460231781006, |
|
"rewards/rejected": -4.472285270690918, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4630518234165067, |
|
"grad_norm": 15.008753955364448, |
|
"learning_rate": 3.247408835922024e-07, |
|
"logits/chosen": -1.177137017250061, |
|
"logits/rejected": -1.1614809036254883, |
|
"logps/chosen": -539.2333374023438, |
|
"logps/rejected": -795.7393188476562, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4702744483947754, |
|
"rewards/margins": 2.489729404449463, |
|
"rewards/rejected": -4.960003852844238, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 14.291733539194276, |
|
"learning_rate": 3.2274021141242306e-07, |
|
"logits/chosen": -1.1657174825668335, |
|
"logits/rejected": -1.1828467845916748, |
|
"logps/chosen": -456.3023376464844, |
|
"logps/rejected": -699.6104736328125, |
|
"loss": 0.4162, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1304402351379395, |
|
"rewards/margins": 2.3655619621276855, |
|
"rewards/rejected": -4.496002197265625, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4678502879078695, |
|
"grad_norm": 23.848910537253687, |
|
"learning_rate": 3.2073443679580613e-07, |
|
"logits/chosen": -1.0966782569885254, |
|
"logits/rejected": -1.1587860584259033, |
|
"logps/chosen": -455.427734375, |
|
"logps/rejected": -578.6062622070312, |
|
"loss": 0.4329, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9730167388916016, |
|
"rewards/margins": 1.2179396152496338, |
|
"rewards/rejected": -3.1909565925598145, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 11.44940099558639, |
|
"learning_rate": 3.1872370043947194e-07, |
|
"logits/chosen": -1.3351823091506958, |
|
"logits/rejected": -1.3538246154785156, |
|
"logps/chosen": -409.07293701171875, |
|
"logps/rejected": -723.4902954101562, |
|
"loss": 0.4044, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5190470218658447, |
|
"rewards/margins": 3.2088820934295654, |
|
"rewards/rejected": -4.72792911529541, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47264875239923226, |
|
"grad_norm": 17.234175648711105, |
|
"learning_rate": 3.167081433885874e-07, |
|
"logits/chosen": -0.9567463994026184, |
|
"logits/rejected": -1.000705599784851, |
|
"logps/chosen": -531.3121948242188, |
|
"logps/rejected": -729.120849609375, |
|
"loss": 0.371, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.2870779037475586, |
|
"rewards/margins": 1.6067097187042236, |
|
"rewards/rejected": -3.893787384033203, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 16.389722152541037, |
|
"learning_rate": 3.14687907026472e-07, |
|
"logits/chosen": -1.0606454610824585, |
|
"logits/rejected": -1.1345421075820923, |
|
"logps/chosen": -389.0166320800781, |
|
"logps/rejected": -602.7018432617188, |
|
"loss": 0.4226, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7767404317855835, |
|
"rewards/margins": 1.9564529657363892, |
|
"rewards/rejected": -3.7331931591033936, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.477447216890595, |
|
"grad_norm": 14.662707269745031, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": -1.0552016496658325, |
|
"logits/rejected": -1.088765263557434, |
|
"logps/chosen": -541.9423217773438, |
|
"logps/rejected": -711.3270263671875, |
|
"loss": 0.4593, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.5332961082458496, |
|
"rewards/margins": 1.669792890548706, |
|
"rewards/rejected": -4.203089714050293, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 16.149947240810953, |
|
"learning_rate": 3.1063396353306097e-07, |
|
"logits/chosen": -1.2002663612365723, |
|
"logits/rejected": -1.3123780488967896, |
|
"logps/chosen": -463.75640869140625, |
|
"logps/rejected": -653.8046875, |
|
"loss": 0.4244, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.132018566131592, |
|
"rewards/margins": 2.276289224624634, |
|
"rewards/rejected": -4.408308029174805, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"eval_logits/chosen": -1.2865077257156372, |
|
"eval_logits/rejected": -1.286653995513916, |
|
"eval_logps/chosen": -464.0679626464844, |
|
"eval_logps/rejected": -732.3422241210938, |
|
"eval_loss": 0.4295724630355835, |
|
"eval_rewards/accuracies": 0.824999988079071, |
|
"eval_rewards/chosen": -2.255470037460327, |
|
"eval_rewards/margins": 2.6071014404296875, |
|
"eval_rewards/rejected": -4.862571716308594, |
|
"eval_runtime": 214.5851, |
|
"eval_samples_per_second": 20.789, |
|
"eval_steps_per_second": 0.326, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4822456813819578, |
|
"grad_norm": 11.158252638659, |
|
"learning_rate": 3.0860054076979535e-07, |
|
"logits/chosen": -1.225662112236023, |
|
"logits/rejected": -1.192147135734558, |
|
"logps/chosen": -505.77191162109375, |
|
"logps/rejected": -732.186767578125, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.518340587615967, |
|
"rewards/margins": 2.517788887023926, |
|
"rewards/rejected": -5.036129474639893, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 15.918645486110469, |
|
"learning_rate": 3.065630074114115e-07, |
|
"logits/chosen": -1.2051775455474854, |
|
"logits/rejected": -1.2806353569030762, |
|
"logps/chosen": -469.9501953125, |
|
"logps/rejected": -781.4722290039062, |
|
"loss": 0.438, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.963057518005371, |
|
"rewards/margins": 3.50740385055542, |
|
"rewards/rejected": -5.470460891723633, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4870441458733205, |
|
"grad_norm": 16.15646785556536, |
|
"learning_rate": 3.0452150638277947e-07, |
|
"logits/chosen": -1.1673038005828857, |
|
"logits/rejected": -1.1155571937561035, |
|
"logps/chosen": -405.2390441894531, |
|
"logps/rejected": -619.8092651367188, |
|
"loss": 0.4506, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.970237374305725, |
|
"rewards/margins": 2.0424869060516357, |
|
"rewards/rejected": -4.01272439956665, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 11.754069249870854, |
|
"learning_rate": 3.024761808870856e-07, |
|
"logits/chosen": -1.3374531269073486, |
|
"logits/rejected": -1.317359447479248, |
|
"logps/chosen": -427.8550720214844, |
|
"logps/rejected": -775.02880859375, |
|
"loss": 0.3996, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.074645757675171, |
|
"rewards/margins": 3.4690213203430176, |
|
"rewards/rejected": -5.543666839599609, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4918426103646833, |
|
"grad_norm": 20.96567040506931, |
|
"learning_rate": 3.004271743957875e-07, |
|
"logits/chosen": -1.0144097805023193, |
|
"logits/rejected": -0.9976661801338196, |
|
"logps/chosen": -516.1878662109375, |
|
"logps/rejected": -665.16259765625, |
|
"loss": 0.4411, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.640017509460449, |
|
"rewards/margins": 1.281677007675171, |
|
"rewards/rejected": -3.921694278717041, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 18.281834799077405, |
|
"learning_rate": 2.983746306385499e-07, |
|
"logits/chosen": -1.3244729042053223, |
|
"logits/rejected": -1.2717360258102417, |
|
"logps/chosen": -431.5679626464844, |
|
"logps/rejected": -677.4711303710938, |
|
"loss": 0.4167, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0494117736816406, |
|
"rewards/margins": 2.2310073375701904, |
|
"rewards/rejected": -4.28041934967041, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4966410748560461, |
|
"grad_norm": 17.365428628060563, |
|
"learning_rate": 2.963186935931628e-07, |
|
"logits/chosen": -1.2782858610153198, |
|
"logits/rejected": -1.2341490983963013, |
|
"logps/chosen": -478.1234436035156, |
|
"logps/rejected": -617.0537109375, |
|
"loss": 0.4379, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1914291381835938, |
|
"rewards/margins": 1.4232972860336304, |
|
"rewards/rejected": -3.6147263050079346, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 12.528209474957825, |
|
"learning_rate": 2.9425950747544176e-07, |
|
"logits/chosen": -1.1168311834335327, |
|
"logits/rejected": -1.1911078691482544, |
|
"logps/chosen": -565.50830078125, |
|
"logps/rejected": -848.2404174804688, |
|
"loss": 0.4305, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.775134563446045, |
|
"rewards/margins": 3.091364622116089, |
|
"rewards/rejected": -5.8664984703063965, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5014395393474088, |
|
"grad_norm": 18.44109381423814, |
|
"learning_rate": 2.921972167291119e-07, |
|
"logits/chosen": -1.1824846267700195, |
|
"logits/rejected": -1.2502390146255493, |
|
"logps/chosen": -494.66259765625, |
|
"logps/rejected": -717.1361694335938, |
|
"loss": 0.4187, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2825825214385986, |
|
"rewards/margins": 2.0693299770355225, |
|
"rewards/rejected": -4.351912498474121, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 15.890171516115618, |
|
"learning_rate": 2.9013196601567567e-07, |
|
"logits/chosen": -1.087349772453308, |
|
"logits/rejected": -1.0854907035827637, |
|
"logps/chosen": -417.927490234375, |
|
"logps/rejected": -603.4684448242188, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8442258834838867, |
|
"rewards/margins": 1.7202110290527344, |
|
"rewards/rejected": -3.5644371509552, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5062380038387716, |
|
"grad_norm": 12.705040080557687, |
|
"learning_rate": 2.8806390020426555e-07, |
|
"logits/chosen": -1.2159066200256348, |
|
"logits/rejected": -1.1780139207839966, |
|
"logps/chosen": -435.548095703125, |
|
"logps/rejected": -650.815185546875, |
|
"loss": 0.3877, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9098494052886963, |
|
"rewards/margins": 2.0971081256866455, |
|
"rewards/rejected": -4.006957054138184, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 18.982869027144268, |
|
"learning_rate": 2.8599316436148187e-07, |
|
"logits/chosen": -1.1808390617370605, |
|
"logits/rejected": -1.1597092151641846, |
|
"logps/chosen": -484.2972717285156, |
|
"logps/rejected": -646.4186401367188, |
|
"loss": 0.4346, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4805798530578613, |
|
"rewards/margins": 1.6115633249282837, |
|
"rewards/rejected": -4.0921430587768555, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5110364683301344, |
|
"grad_norm": 12.789123364626542, |
|
"learning_rate": 2.8391990374121723e-07, |
|
"logits/chosen": -1.2177742719650269, |
|
"logits/rejected": -1.1412584781646729, |
|
"logps/chosen": -479.531494140625, |
|
"logps/rejected": -768.7166748046875, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.5543904304504395, |
|
"rewards/margins": 2.6575286388397217, |
|
"rewards/rejected": -5.211918830871582, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 11.99254712258343, |
|
"learning_rate": 2.818442637744669e-07, |
|
"logits/chosen": -1.2230430841445923, |
|
"logits/rejected": -1.2256507873535156, |
|
"logps/chosen": -452.6123046875, |
|
"logps/rejected": -675.5736083984375, |
|
"loss": 0.4139, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.214447259902954, |
|
"rewards/margins": 2.154179334640503, |
|
"rewards/rejected": -4.368626594543457, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5158349328214972, |
|
"grad_norm": 13.21777154530317, |
|
"learning_rate": 2.797663900591284e-07, |
|
"logits/chosen": -1.2086105346679688, |
|
"logits/rejected": -1.2604271173477173, |
|
"logps/chosen": -446.7076721191406, |
|
"logps/rejected": -617.2671508789062, |
|
"loss": 0.398, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9999488592147827, |
|
"rewards/margins": 2.0054893493652344, |
|
"rewards/rejected": -4.005438327789307, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 13.837490902323914, |
|
"learning_rate": 2.776864283497874e-07, |
|
"logits/chosen": -1.2421865463256836, |
|
"logits/rejected": -1.3203766345977783, |
|
"logps/chosen": -435.1234436035156, |
|
"logps/rejected": -740.7435302734375, |
|
"loss": 0.4333, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.179964542388916, |
|
"rewards/margins": 3.039562940597534, |
|
"rewards/rejected": -5.219527244567871, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5206333973128598, |
|
"grad_norm": 12.069623770694928, |
|
"learning_rate": 2.756045245474943e-07, |
|
"logits/chosen": -1.1063625812530518, |
|
"logits/rejected": -1.072736144065857, |
|
"logps/chosen": -469.37939453125, |
|
"logps/rejected": -645.7225341796875, |
|
"loss": 0.4177, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.081900119781494, |
|
"rewards/margins": 1.5807701349258423, |
|
"rewards/rejected": -3.662670135498047, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 15.586578076435153, |
|
"learning_rate": 2.7352082468952977e-07, |
|
"logits/chosen": -1.2151401042938232, |
|
"logits/rejected": -1.244135856628418, |
|
"logps/chosen": -479.72796630859375, |
|
"logps/rejected": -837.6242065429688, |
|
"loss": 0.4676, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.5905280113220215, |
|
"rewards/margins": 3.3142483234405518, |
|
"rewards/rejected": -5.904776096343994, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5254318618042226, |
|
"grad_norm": 17.30867538238994, |
|
"learning_rate": 2.7143547493916e-07, |
|
"logits/chosen": -1.2965537309646606, |
|
"logits/rejected": -1.2317306995391846, |
|
"logps/chosen": -434.38067626953125, |
|
"logps/rejected": -838.6102294921875, |
|
"loss": 0.4131, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.950038194656372, |
|
"rewards/margins": 4.041052341461182, |
|
"rewards/rejected": -5.991089820861816, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 11.72682555257016, |
|
"learning_rate": 2.693486215753853e-07, |
|
"logits/chosen": -1.2447761297225952, |
|
"logits/rejected": -1.2085590362548828, |
|
"logps/chosen": -437.3580627441406, |
|
"logps/rejected": -767.2139892578125, |
|
"loss": 0.4264, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.104055404663086, |
|
"rewards/margins": 3.457770586013794, |
|
"rewards/rejected": -5.561825752258301, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5302303262955854, |
|
"grad_norm": 16.198064432954975, |
|
"learning_rate": 2.6726041098267805e-07, |
|
"logits/chosen": -1.168273687362671, |
|
"logits/rejected": -1.2161977291107178, |
|
"logps/chosen": -497.7745666503906, |
|
"logps/rejected": -609.3350830078125, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3043274879455566, |
|
"rewards/margins": 1.402414083480835, |
|
"rewards/rejected": -3.7067413330078125, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 19.37490800094979, |
|
"learning_rate": 2.6517098964071507e-07, |
|
"logits/chosen": -1.182252049446106, |
|
"logits/rejected": -1.205956220626831, |
|
"logps/chosen": -459.35968017578125, |
|
"logps/rejected": -567.3809814453125, |
|
"loss": 0.477, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.175320863723755, |
|
"rewards/margins": 0.9405016899108887, |
|
"rewards/rejected": -3.1158225536346436, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5350287907869482, |
|
"grad_norm": 19.34432528811352, |
|
"learning_rate": 2.630805041141023e-07, |
|
"logits/chosen": -1.3205252885818481, |
|
"logits/rejected": -1.3020694255828857, |
|
"logps/chosen": -419.14727783203125, |
|
"logps/rejected": -813.0269775390625, |
|
"loss": 0.4507, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.068502426147461, |
|
"rewards/margins": 3.749777317047119, |
|
"rewards/rejected": -5.818280220031738, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 17.869651405457827, |
|
"learning_rate": 2.609891010420941e-07, |
|
"logits/chosen": -1.2761646509170532, |
|
"logits/rejected": -1.2586033344268799, |
|
"logps/chosen": -440.08270263671875, |
|
"logps/rejected": -681.1771850585938, |
|
"loss": 0.4139, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9673511981964111, |
|
"rewards/margins": 2.405892848968506, |
|
"rewards/rejected": -4.373243808746338, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.539827255278311, |
|
"grad_norm": 16.14736853278147, |
|
"learning_rate": 2.5889692712830674e-07, |
|
"logits/chosen": -1.1019476652145386, |
|
"logits/rejected": -1.1302673816680908, |
|
"logps/chosen": -390.2100524902344, |
|
"logps/rejected": -552.5843505859375, |
|
"loss": 0.4161, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7728662490844727, |
|
"rewards/margins": 1.6656566858291626, |
|
"rewards/rejected": -3.438523054122925, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 16.14985626238556, |
|
"learning_rate": 2.5680412913042843e-07, |
|
"logits/chosen": -1.2380822896957397, |
|
"logits/rejected": -1.2011024951934814, |
|
"logps/chosen": -434.4576721191406, |
|
"logps/rejected": -744.7467041015625, |
|
"loss": 0.4166, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.052339553833008, |
|
"rewards/margins": 3.060929298400879, |
|
"rewards/rejected": -5.113268852233887, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5446257197696737, |
|
"grad_norm": 20.209606887329077, |
|
"learning_rate": 2.5471085384992404e-07, |
|
"logits/chosen": -1.2722258567810059, |
|
"logits/rejected": -1.2316162586212158, |
|
"logps/chosen": -433.7353515625, |
|
"logps/rejected": -920.55712890625, |
|
"loss": 0.4205, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1150050163269043, |
|
"rewards/margins": 4.714972019195557, |
|
"rewards/rejected": -6.829976558685303, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 13.645177424361062, |
|
"learning_rate": 2.526172481217381e-07, |
|
"logits/chosen": -1.1917331218719482, |
|
"logits/rejected": -1.1137642860412598, |
|
"logps/chosen": -463.8427734375, |
|
"logps/rejected": -684.7598876953125, |
|
"loss": 0.4592, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.671189308166504, |
|
"rewards/margins": 2.050757884979248, |
|
"rewards/rejected": -4.72194766998291, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5494241842610365, |
|
"grad_norm": 19.346585502374158, |
|
"learning_rate": 2.5052345880399456e-07, |
|
"logits/chosen": -1.3105088472366333, |
|
"logits/rejected": -1.3653063774108887, |
|
"logps/chosen": -450.48150634765625, |
|
"logps/rejected": -608.4674682617188, |
|
"loss": 0.4074, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3599371910095215, |
|
"rewards/margins": 1.5191092491149902, |
|
"rewards/rejected": -3.8790462017059326, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 11.134867800418832, |
|
"learning_rate": 2.4842963276769555e-07, |
|
"logits/chosen": -1.2500735521316528, |
|
"logits/rejected": -1.1833133697509766, |
|
"logps/chosen": -434.44805908203125, |
|
"logps/rejected": -694.0464477539062, |
|
"loss": 0.4129, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.3137922286987305, |
|
"rewards/margins": 2.1529812812805176, |
|
"rewards/rejected": -4.46677303314209, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5542226487523992, |
|
"grad_norm": 13.388561282642513, |
|
"learning_rate": 2.463359168864189e-07, |
|
"logits/chosen": -1.1017297506332397, |
|
"logits/rejected": -1.2655115127563477, |
|
"logps/chosen": -515.0410766601562, |
|
"logps/rejected": -691.630126953125, |
|
"loss": 0.4386, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2696266174316406, |
|
"rewards/margins": 2.1189069747924805, |
|
"rewards/rejected": -4.388534069061279, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 21.830085417242575, |
|
"learning_rate": 2.4424245802601555e-07, |
|
"logits/chosen": -1.2653098106384277, |
|
"logits/rejected": -1.2332645654678345, |
|
"logps/chosen": -436.19073486328125, |
|
"logps/rejected": -619.58203125, |
|
"loss": 0.4089, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1965491771698, |
|
"rewards/margins": 1.2768805027008057, |
|
"rewards/rejected": -3.4734294414520264, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.559021113243762, |
|
"grad_norm": 14.621416023558584, |
|
"learning_rate": 2.421494030343072e-07, |
|
"logits/chosen": -1.167513370513916, |
|
"logits/rejected": -1.3203797340393066, |
|
"logps/chosen": -495.20306396484375, |
|
"logps/rejected": -584.0275268554688, |
|
"loss": 0.47, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.520407199859619, |
|
"rewards/margins": 1.3572065830230713, |
|
"rewards/rejected": -3.8776137828826904, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 13.806279084269839, |
|
"learning_rate": 2.400568987307861e-07, |
|
"logits/chosen": -1.2116050720214844, |
|
"logits/rejected": -1.2625479698181152, |
|
"logps/chosen": -463.60595703125, |
|
"logps/rejected": -533.0452270507812, |
|
"loss": 0.3951, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.520028591156006, |
|
"rewards/margins": 0.7540720701217651, |
|
"rewards/rejected": -3.2741007804870605, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5638195777351248, |
|
"grad_norm": 17.208724843562575, |
|
"learning_rate": 2.379650918963156e-07, |
|
"logits/chosen": -1.265211582183838, |
|
"logits/rejected": -1.2508124113082886, |
|
"logps/chosen": -419.3246154785156, |
|
"logps/rejected": -666.0604858398438, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3264572620391846, |
|
"rewards/margins": 2.320263624191284, |
|
"rewards/rejected": -4.646720886230469, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 19.054319387788627, |
|
"learning_rate": 2.3587412926283438e-07, |
|
"logits/chosen": -1.2373709678649902, |
|
"logits/rejected": -1.1830167770385742, |
|
"logps/chosen": -515.0059814453125, |
|
"logps/rejected": -775.9603271484375, |
|
"loss": 0.4394, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.3372507095336914, |
|
"rewards/margins": 2.948981523513794, |
|
"rewards/rejected": -5.286231994628906, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5686180422264875, |
|
"grad_norm": 18.046127707033907, |
|
"learning_rate": 2.337841575030642e-07, |
|
"logits/chosen": -1.0661755800247192, |
|
"logits/rejected": -1.052562952041626, |
|
"logps/chosen": -483.67071533203125, |
|
"logps/rejected": -702.375, |
|
"loss": 0.4293, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1245293617248535, |
|
"rewards/margins": 2.080724000930786, |
|
"rewards/rejected": -4.2052531242370605, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 11.044379945267481, |
|
"learning_rate": 2.316953232202206e-07, |
|
"logits/chosen": -1.1942309141159058, |
|
"logits/rejected": -1.371203899383545, |
|
"logps/chosen": -426.2565002441406, |
|
"logps/rejected": -538.9791870117188, |
|
"loss": 0.3953, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0590853691101074, |
|
"rewards/margins": 1.724047064781189, |
|
"rewards/rejected": -3.783132553100586, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5734165067178503, |
|
"grad_norm": 14.319869332524695, |
|
"learning_rate": 2.2960777293772958e-07, |
|
"logits/chosen": -1.1835999488830566, |
|
"logits/rejected": -1.2751821279525757, |
|
"logps/chosen": -406.2999572753906, |
|
"logps/rejected": -733.0162353515625, |
|
"loss": 0.4318, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0234265327453613, |
|
"rewards/margins": 3.427159070968628, |
|
"rewards/rejected": -5.45058536529541, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 11.706540267968286, |
|
"learning_rate": 2.2752165308894974e-07, |
|
"logits/chosen": -1.2021677494049072, |
|
"logits/rejected": -1.1848233938217163, |
|
"logps/chosen": -410.93975830078125, |
|
"logps/rejected": -670.9909057617188, |
|
"loss": 0.4182, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.33550763130188, |
|
"rewards/margins": 2.6671481132507324, |
|
"rewards/rejected": -5.002655982971191, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5782149712092131, |
|
"grad_norm": 16.988346428741217, |
|
"learning_rate": 2.254371100069005e-07, |
|
"logits/chosen": -1.0889265537261963, |
|
"logits/rejected": -0.9927159547805786, |
|
"logps/chosen": -427.29193115234375, |
|
"logps/rejected": -658.6199340820312, |
|
"loss": 0.4049, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.9339721202850342, |
|
"rewards/margins": 2.0747880935668945, |
|
"rewards/rejected": -4.008760452270508, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 21.542226745337405, |
|
"learning_rate": 2.2335428991399725e-07, |
|
"logits/chosen": -1.1989023685455322, |
|
"logits/rejected": -1.1998827457427979, |
|
"logps/chosen": -413.19171142578125, |
|
"logps/rejected": -963.6064453125, |
|
"loss": 0.4078, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.3345561027526855, |
|
"rewards/margins": 5.340123176574707, |
|
"rewards/rejected": -7.674679756164551, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5830134357005758, |
|
"grad_norm": 15.709987012395423, |
|
"learning_rate": 2.2127333891179458e-07, |
|
"logits/chosen": -1.2488889694213867, |
|
"logits/rejected": -1.2438524961471558, |
|
"logps/chosen": -370.1678161621094, |
|
"logps/rejected": -775.2645263671875, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8031924962997437, |
|
"rewards/margins": 3.7851650714874268, |
|
"rewards/rejected": -5.588356971740723, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 18.692702891715406, |
|
"learning_rate": 2.1919440297073782e-07, |
|
"logits/chosen": -1.1863176822662354, |
|
"logits/rejected": -1.1739771366119385, |
|
"logps/chosen": -411.5914001464844, |
|
"logps/rejected": -734.830078125, |
|
"loss": 0.4481, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.193366765975952, |
|
"rewards/margins": 3.1098275184631348, |
|
"rewards/rejected": -5.303194522857666, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5878119001919386, |
|
"grad_norm": 14.049015216558205, |
|
"learning_rate": 2.1711762791992368e-07, |
|
"logits/chosen": -1.1900994777679443, |
|
"logits/rejected": -1.1979767084121704, |
|
"logps/chosen": -491.08880615234375, |
|
"logps/rejected": -776.205078125, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1984424591064453, |
|
"rewards/margins": 3.0933632850646973, |
|
"rewards/rejected": -5.291806221008301, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 15.123102008910312, |
|
"learning_rate": 2.1504315943687114e-07, |
|
"logits/chosen": -1.1304634809494019, |
|
"logits/rejected": -1.0604979991912842, |
|
"logps/chosen": -444.66839599609375, |
|
"logps/rejected": -772.3875122070312, |
|
"loss": 0.4161, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.300208330154419, |
|
"rewards/margins": 2.822746992111206, |
|
"rewards/rejected": -5.122956275939941, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5926103646833013, |
|
"grad_norm": 16.476123838994056, |
|
"learning_rate": 2.1297114303730248e-07, |
|
"logits/chosen": -1.0877963304519653, |
|
"logits/rejected": -0.9808356165885925, |
|
"logps/chosen": -430.3238220214844, |
|
"logps/rejected": -756.1893920898438, |
|
"loss": 0.4441, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.168168306350708, |
|
"rewards/margins": 2.786461353302002, |
|
"rewards/rejected": -4.954629898071289, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 16.3511804347965, |
|
"learning_rate": 2.1090172406493616e-07, |
|
"logits/chosen": -1.095245122909546, |
|
"logits/rejected": -1.0183230638504028, |
|
"logps/chosen": -397.98480224609375, |
|
"logps/rejected": -673.2918701171875, |
|
"loss": 0.3975, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.7769782543182373, |
|
"rewards/margins": 2.612269401550293, |
|
"rewards/rejected": -4.389247894287109, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5974088291746641, |
|
"grad_norm": 20.613201850030986, |
|
"learning_rate": 2.0883504768129146e-07, |
|
"logits/chosen": -1.2274065017700195, |
|
"logits/rejected": -1.2041352987289429, |
|
"logps/chosen": -488.35260009765625, |
|
"logps/rejected": -783.0089721679688, |
|
"loss": 0.4259, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3304946422576904, |
|
"rewards/margins": 2.9999914169311523, |
|
"rewards/rejected": -5.330485820770264, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 18.3928478904682, |
|
"learning_rate": 2.0677125885550571e-07, |
|
"logits/chosen": -1.1274101734161377, |
|
"logits/rejected": -1.246701955795288, |
|
"logps/chosen": -457.32415771484375, |
|
"logps/rejected": -619.3272094726562, |
|
"loss": 0.4264, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.35748553276062, |
|
"rewards/margins": 1.9687366485595703, |
|
"rewards/rejected": -4.3262224197387695, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6022072936660269, |
|
"grad_norm": 18.845165093912573, |
|
"learning_rate": 2.0471050235416587e-07, |
|
"logits/chosen": -1.0176677703857422, |
|
"logits/rejected": -1.1635395288467407, |
|
"logps/chosen": -465.5758361816406, |
|
"logps/rejected": -703.4918823242188, |
|
"loss": 0.3765, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2303969860076904, |
|
"rewards/margins": 2.811887502670288, |
|
"rewards/rejected": -5.042284965515137, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 16.99666155725327, |
|
"learning_rate": 2.026529227311532e-07, |
|
"logits/chosen": -1.2282030582427979, |
|
"logits/rejected": -1.2217004299163818, |
|
"logps/chosen": -436.674072265625, |
|
"logps/rejected": -751.2864379882812, |
|
"loss": 0.46, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3597781658172607, |
|
"rewards/margins": 3.0093655586242676, |
|
"rewards/rejected": -5.369143962860107, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6070057581573897, |
|
"grad_norm": 17.94857061645596, |
|
"learning_rate": 2.005986643175036e-07, |
|
"logits/chosen": -1.125093698501587, |
|
"logits/rejected": -1.0239616632461548, |
|
"logps/chosen": -448.51837158203125, |
|
"logps/rejected": -756.28466796875, |
|
"loss": 0.3639, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9389982223510742, |
|
"rewards/margins": 3.0756373405456543, |
|
"rewards/rejected": -5.014636039733887, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 14.03073566548983, |
|
"learning_rate": 1.9854787121128328e-07, |
|
"logits/chosen": -1.1564371585845947, |
|
"logits/rejected": -1.2444071769714355, |
|
"logps/chosen": -429.7975158691406, |
|
"logps/rejected": -546.2296142578125, |
|
"loss": 0.4717, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.235487222671509, |
|
"rewards/margins": 1.5971519947052002, |
|
"rewards/rejected": -3.832639217376709, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6118042226487524, |
|
"grad_norm": 17.056601069921566, |
|
"learning_rate": 1.9650068726748106e-07, |
|
"logits/chosen": -1.154388666152954, |
|
"logits/rejected": -1.233816146850586, |
|
"logps/chosen": -488.4398498535156, |
|
"logps/rejected": -727.38916015625, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.468658685684204, |
|
"rewards/margins": 2.3356359004974365, |
|
"rewards/rejected": -4.804293632507324, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 14.873549291466142, |
|
"learning_rate": 1.9445725608791718e-07, |
|
"logits/chosen": -1.1273863315582275, |
|
"logits/rejected": -1.127369999885559, |
|
"logps/chosen": -439.273681640625, |
|
"logps/rejected": -970.4906005859375, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.041533946990967, |
|
"rewards/margins": 5.191417694091797, |
|
"rewards/rejected": -7.2329511642456055, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6166026871401151, |
|
"grad_norm": 15.773654226605428, |
|
"learning_rate": 1.924177210111705e-07, |
|
"logits/chosen": -1.212803602218628, |
|
"logits/rejected": -1.2397335767745972, |
|
"logps/chosen": -417.0751037597656, |
|
"logps/rejected": -742.2860107421875, |
|
"loss": 0.4398, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.125763416290283, |
|
"rewards/margins": 3.112739086151123, |
|
"rewards/rejected": -5.238502502441406, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 13.969918658633894, |
|
"learning_rate": 1.9038222510252364e-07, |
|
"logits/chosen": -1.1539736986160278, |
|
"logits/rejected": -1.1613657474517822, |
|
"logps/chosen": -448.21173095703125, |
|
"logps/rejected": -631.4861450195312, |
|
"loss": 0.4127, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.106086254119873, |
|
"rewards/margins": 1.963559865951538, |
|
"rewards/rejected": -4.069646835327148, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6214011516314779, |
|
"grad_norm": 15.025163221103547, |
|
"learning_rate": 1.883509111439277e-07, |
|
"logits/chosen": -1.1364405155181885, |
|
"logits/rejected": -1.1150002479553223, |
|
"logps/chosen": -435.28558349609375, |
|
"logps/rejected": -850.2390747070312, |
|
"loss": 0.4349, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.216041326522827, |
|
"rewards/margins": 3.508605480194092, |
|
"rewards/rejected": -5.72464656829834, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 12.217302246897113, |
|
"learning_rate": 1.8632392162398665e-07, |
|
"logits/chosen": -1.1138544082641602, |
|
"logits/rejected": -1.085440993309021, |
|
"logps/chosen": -457.41107177734375, |
|
"logps/rejected": -764.52490234375, |
|
"loss": 0.4118, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9107215404510498, |
|
"rewards/margins": 3.08528995513916, |
|
"rewards/rejected": -4.996011257171631, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6261996161228407, |
|
"grad_norm": 18.46550627636187, |
|
"learning_rate": 1.84301398727962e-07, |
|
"logits/chosen": -1.2148702144622803, |
|
"logits/rejected": -1.0856149196624756, |
|
"logps/chosen": -348.2018127441406, |
|
"logps/rejected": -761.6448974609375, |
|
"loss": 0.4097, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7905614376068115, |
|
"rewards/margins": 3.7506096363067627, |
|
"rewards/rejected": -5.541171073913574, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 19.299201781895523, |
|
"learning_rate": 1.8228348432779966e-07, |
|
"logits/chosen": -1.203184962272644, |
|
"logits/rejected": -1.1859591007232666, |
|
"logps/chosen": -446.0292053222656, |
|
"logps/rejected": -726.4371948242188, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.329465389251709, |
|
"rewards/margins": 2.8768537044525146, |
|
"rewards/rejected": -5.206319332122803, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6309980806142035, |
|
"grad_norm": 12.068424144263599, |
|
"learning_rate": 1.8027031997217773e-07, |
|
"logits/chosen": -1.2793502807617188, |
|
"logits/rejected": -1.252190351486206, |
|
"logps/chosen": -443.053955078125, |
|
"logps/rejected": -1062.28564453125, |
|
"loss": 0.3593, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4902422428131104, |
|
"rewards/margins": 5.98113489151001, |
|
"rewards/rejected": -8.471376419067383, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 12.134661285568798, |
|
"learning_rate": 1.7826204687657758e-07, |
|
"logits/chosen": -1.0787185430526733, |
|
"logits/rejected": -1.0653938055038452, |
|
"logps/chosen": -486.08880615234375, |
|
"logps/rejected": -640.8261108398438, |
|
"loss": 0.3725, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.273076295852661, |
|
"rewards/margins": 1.9071142673492432, |
|
"rewards/rejected": -4.180190086364746, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6357965451055663, |
|
"grad_norm": 17.885684332947207, |
|
"learning_rate": 1.762588059133781e-07, |
|
"logits/chosen": -1.0950032472610474, |
|
"logits/rejected": -1.1858885288238525, |
|
"logps/chosen": -497.4866638183594, |
|
"logps/rejected": -718.3407592773438, |
|
"loss": 0.4108, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2074708938598633, |
|
"rewards/margins": 2.5658068656921387, |
|
"rewards/rejected": -4.77327823638916, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 14.528060337749242, |
|
"learning_rate": 1.7426073760197406e-07, |
|
"logits/chosen": -1.1224123239517212, |
|
"logits/rejected": -1.0580319166183472, |
|
"logps/chosen": -437.44549560546875, |
|
"logps/rejected": -904.7830200195312, |
|
"loss": 0.4371, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.128122091293335, |
|
"rewards/margins": 4.340218544006348, |
|
"rewards/rejected": -6.468340873718262, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6405950095969289, |
|
"grad_norm": 11.381160671599869, |
|
"learning_rate": 1.7226798209891935e-07, |
|
"logits/chosen": -0.9424558877944946, |
|
"logits/rejected": -1.0885859727859497, |
|
"logps/chosen": -470.26434326171875, |
|
"logps/rejected": -712.3435668945312, |
|
"loss": 0.3844, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.301588296890259, |
|
"rewards/margins": 2.9525039196014404, |
|
"rewards/rejected": -5.254092216491699, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 19.023529875568627, |
|
"learning_rate": 1.7028067918809535e-07, |
|
"logits/chosen": -1.064992070198059, |
|
"logits/rejected": -1.028592824935913, |
|
"logps/chosen": -398.50384521484375, |
|
"logps/rejected": -970.7301025390625, |
|
"loss": 0.4204, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0184497833251953, |
|
"rewards/margins": 5.186704635620117, |
|
"rewards/rejected": -7.2051544189453125, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6453934740882917, |
|
"grad_norm": 16.12712460489812, |
|
"learning_rate": 1.6829896827090584e-07, |
|
"logits/chosen": -1.1934797763824463, |
|
"logits/rejected": -1.231001615524292, |
|
"logps/chosen": -464.2530822753906, |
|
"logps/rejected": -632.6357421875, |
|
"loss": 0.4421, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3296985626220703, |
|
"rewards/margins": 1.9349523782730103, |
|
"rewards/rejected": -4.264651298522949, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 11.504903057526331, |
|
"learning_rate": 1.6632298835649844e-07, |
|
"logits/chosen": -1.0596282482147217, |
|
"logits/rejected": -0.9983747601509094, |
|
"logps/chosen": -477.0462341308594, |
|
"logps/rejected": -902.0972900390625, |
|
"loss": 0.3847, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.2268457412719727, |
|
"rewards/margins": 3.9091105461120605, |
|
"rewards/rejected": -6.135955333709717, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6501919385796545, |
|
"grad_norm": 27.609604865313603, |
|
"learning_rate": 1.6435287805201364e-07, |
|
"logits/chosen": -1.1235686540603638, |
|
"logits/rejected": -1.0528533458709717, |
|
"logps/chosen": -478.143798828125, |
|
"logps/rejected": -667.5210571289062, |
|
"loss": 0.4531, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.356539726257324, |
|
"rewards/margins": 1.9865471124649048, |
|
"rewards/rejected": -4.343085765838623, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 16.415350337205087, |
|
"learning_rate": 1.6238877555286207e-07, |
|
"logits/chosen": -1.1847976446151733, |
|
"logits/rejected": -1.1770317554473877, |
|
"logps/chosen": -453.5834045410156, |
|
"logps/rejected": -752.9813232421875, |
|
"loss": 0.3986, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8817520141601562, |
|
"rewards/margins": 2.8628299236297607, |
|
"rewards/rejected": -4.744582176208496, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6549904030710173, |
|
"grad_norm": 17.669293063839785, |
|
"learning_rate": 1.60430818633031e-07, |
|
"logits/chosen": -1.0959994792938232, |
|
"logits/rejected": -1.0774368047714233, |
|
"logps/chosen": -454.6305236816406, |
|
"logps/rejected": -751.3426513671875, |
|
"loss": 0.398, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.12265682220459, |
|
"rewards/margins": 2.978886604309082, |
|
"rewards/rejected": -5.101543426513672, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 16.006219619936903, |
|
"learning_rate": 1.5847914463541939e-07, |
|
"logits/chosen": -1.1957377195358276, |
|
"logits/rejected": -1.1904990673065186, |
|
"logps/chosen": -405.86431884765625, |
|
"logps/rejected": -793.5975952148438, |
|
"loss": 0.3881, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2277333736419678, |
|
"rewards/margins": 3.6207995414733887, |
|
"rewards/rejected": -5.8485331535339355, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6597888675623801, |
|
"grad_norm": 12.430236202376024, |
|
"learning_rate": 1.5653389046220427e-07, |
|
"logits/chosen": -1.0426452159881592, |
|
"logits/rejected": -1.0287232398986816, |
|
"logps/chosen": -418.88427734375, |
|
"logps/rejected": -726.9313354492188, |
|
"loss": 0.3936, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9855321645736694, |
|
"rewards/margins": 2.8939387798309326, |
|
"rewards/rejected": -4.8794708251953125, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 20.11094539300265, |
|
"learning_rate": 1.545951925652375e-07, |
|
"logits/chosen": -1.1118113994598389, |
|
"logits/rejected": -1.1644222736358643, |
|
"logps/chosen": -506.046142578125, |
|
"logps/rejected": -755.5003662109375, |
|
"loss": 0.3998, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.162713050842285, |
|
"rewards/margins": 2.99284029006958, |
|
"rewards/rejected": -5.155553340911865, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6645873320537428, |
|
"grad_norm": 16.91737052802927, |
|
"learning_rate": 1.5266318693647423e-07, |
|
"logits/chosen": -1.131763219833374, |
|
"logits/rejected": -1.1238648891448975, |
|
"logps/chosen": -483.41912841796875, |
|
"logps/rejected": -633.3712768554688, |
|
"loss": 0.4135, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.371241569519043, |
|
"rewards/margins": 1.6103929281234741, |
|
"rewards/rejected": -3.9816346168518066, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 18.088895471463584, |
|
"learning_rate": 1.5073800909843353e-07, |
|
"logits/chosen": -1.1542402505874634, |
|
"logits/rejected": -1.2480642795562744, |
|
"logps/chosen": -461.5935974121094, |
|
"logps/rejected": -689.340087890625, |
|
"loss": 0.4061, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0994582176208496, |
|
"rewards/margins": 2.842416286468506, |
|
"rewards/rejected": -4.9418745040893555, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6693857965451055, |
|
"grad_norm": 17.217768649723784, |
|
"learning_rate": 1.488197940946922e-07, |
|
"logits/chosen": -1.0978938341140747, |
|
"logits/rejected": -1.0699193477630615, |
|
"logps/chosen": -440.05169677734375, |
|
"logps/rejected": -651.9158325195312, |
|
"loss": 0.366, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.789375901222229, |
|
"rewards/margins": 2.706338405609131, |
|
"rewards/rejected": -4.49571418762207, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 25.169875158147637, |
|
"learning_rate": 1.4690867648041167e-07, |
|
"logits/chosen": -0.994094967842102, |
|
"logits/rejected": -1.0959413051605225, |
|
"logps/chosen": -452.80810546875, |
|
"logps/rejected": -717.0819091796875, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.118849277496338, |
|
"rewards/margins": 2.8910040855407715, |
|
"rewards/rejected": -5.009852886199951, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6741842610364683, |
|
"grad_norm": 13.750415835285578, |
|
"learning_rate": 1.4500479031289987e-07, |
|
"logits/chosen": -0.9904336929321289, |
|
"logits/rejected": -1.0503981113433838, |
|
"logps/chosen": -449.04913330078125, |
|
"logps/rejected": -674.6497802734375, |
|
"loss": 0.454, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8702030181884766, |
|
"rewards/margins": 2.3403210639953613, |
|
"rewards/rejected": -4.210524082183838, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 12.953044708545338, |
|
"learning_rate": 1.4310826914220747e-07, |
|
"logits/chosen": -1.0443706512451172, |
|
"logits/rejected": -1.1001513004302979, |
|
"logps/chosen": -534.1334838867188, |
|
"logps/rejected": -685.5401611328125, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.4353697299957275, |
|
"rewards/margins": 1.7159233093261719, |
|
"rewards/rejected": -4.15129280090332, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6789827255278311, |
|
"grad_norm": 11.770848466287676, |
|
"learning_rate": 1.412192460017597e-07, |
|
"logits/chosen": -1.1065876483917236, |
|
"logits/rejected": -1.0538074970245361, |
|
"logps/chosen": -474.156982421875, |
|
"logps/rejected": -803.2342529296875, |
|
"loss": 0.4204, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.451140880584717, |
|
"rewards/margins": 3.259047031402588, |
|
"rewards/rejected": -5.710188865661621, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 13.446807703778937, |
|
"learning_rate": 1.3933785339902504e-07, |
|
"logits/chosen": -1.1683580875396729, |
|
"logits/rejected": -1.0467820167541504, |
|
"logps/chosen": -407.0369873046875, |
|
"logps/rejected": -731.3319091796875, |
|
"loss": 0.4413, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2081449031829834, |
|
"rewards/margins": 2.7958226203918457, |
|
"rewards/rejected": -5.003968238830566, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6837811900191939, |
|
"grad_norm": 12.945264354253307, |
|
"learning_rate": 1.374642233062197e-07, |
|
"logits/chosen": -1.0711700916290283, |
|
"logits/rejected": -1.1465437412261963, |
|
"logps/chosen": -492.7830505371094, |
|
"logps/rejected": -738.3113403320312, |
|
"loss": 0.4438, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2333264350891113, |
|
"rewards/margins": 2.7537784576416016, |
|
"rewards/rejected": -4.987104892730713, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 16.03013282047824, |
|
"learning_rate": 1.355984871510511e-07, |
|
"logits/chosen": -1.0226466655731201, |
|
"logits/rejected": -0.9650028347969055, |
|
"logps/chosen": -485.961181640625, |
|
"logps/rejected": -760.8209228515625, |
|
"loss": 0.4099, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.129098415374756, |
|
"rewards/margins": 2.591064691543579, |
|
"rewards/rejected": -4.720162391662598, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6885796545105566, |
|
"grad_norm": 13.460390874624267, |
|
"learning_rate": 1.3374077580749783e-07, |
|
"logits/chosen": -1.1847305297851562, |
|
"logits/rejected": -1.1330692768096924, |
|
"logps/chosen": -376.7669982910156, |
|
"logps/rejected": -649.6778564453125, |
|
"loss": 0.412, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9333322048187256, |
|
"rewards/margins": 2.54672908782959, |
|
"rewards/rejected": -4.4800615310668945, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 19.972945286063492, |
|
"learning_rate": 1.3189121958663024e-07, |
|
"logits/chosen": -1.0317165851593018, |
|
"logits/rejected": -1.1716341972351074, |
|
"logps/chosen": -525.8427734375, |
|
"logps/rejected": -662.1897583007812, |
|
"loss": 0.4068, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.6331121921539307, |
|
"rewards/margins": 1.6707206964492798, |
|
"rewards/rejected": -4.303833484649658, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6933781190019194, |
|
"grad_norm": 20.87380867505956, |
|
"learning_rate": 1.3004994822746895e-07, |
|
"logits/chosen": -1.16623854637146, |
|
"logits/rejected": -1.1874853372573853, |
|
"logps/chosen": -418.4476623535156, |
|
"logps/rejected": -645.4654541015625, |
|
"loss": 0.4138, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9430526494979858, |
|
"rewards/margins": 2.151594877243042, |
|
"rewards/rejected": -4.094647407531738, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 16.04665832154, |
|
"learning_rate": 1.2821709088788434e-07, |
|
"logits/chosen": -0.9877917170524597, |
|
"logits/rejected": -1.0230954885482788, |
|
"logps/chosen": -397.6720886230469, |
|
"logps/rejected": -668.94873046875, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.033013105392456, |
|
"rewards/margins": 2.66807222366333, |
|
"rewards/rejected": -4.701085090637207, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6981765834932822, |
|
"grad_norm": 18.3654519072182, |
|
"learning_rate": 1.2639277613553736e-07, |
|
"logits/chosen": -1.2031629085540771, |
|
"logits/rejected": -1.1549075841903687, |
|
"logps/chosen": -384.4791564941406, |
|
"logps/rejected": -596.3194580078125, |
|
"loss": 0.4244, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0126969814300537, |
|
"rewards/margins": 2.0921952724456787, |
|
"rewards/rejected": -4.104892253875732, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 12.871071702081897, |
|
"learning_rate": 1.2457713193885975e-07, |
|
"logits/chosen": -1.0249868631362915, |
|
"logits/rejected": -0.955339252948761, |
|
"logps/chosen": -356.92828369140625, |
|
"logps/rejected": -710.9237060546875, |
|
"loss": 0.3915, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9744545221328735, |
|
"rewards/margins": 3.1722934246063232, |
|
"rewards/rejected": -5.146748065948486, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7029750479846449, |
|
"grad_norm": 18.857927055260983, |
|
"learning_rate": 1.2277028565807838e-07, |
|
"logits/chosen": -1.1315641403198242, |
|
"logits/rejected": -1.175862431526184, |
|
"logps/chosen": -448.12420654296875, |
|
"logps/rejected": -643.8922119140625, |
|
"loss": 0.4123, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.075835943222046, |
|
"rewards/margins": 2.037856340408325, |
|
"rewards/rejected": -4.113692283630371, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 19.270697064040355, |
|
"learning_rate": 1.209723640362815e-07, |
|
"logits/chosen": -1.055815577507019, |
|
"logits/rejected": -1.0176749229431152, |
|
"logps/chosen": -472.85418701171875, |
|
"logps/rejected": -900.6534423828125, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1895081996917725, |
|
"rewards/margins": 4.365129470825195, |
|
"rewards/rejected": -6.554637908935547, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7077735124760077, |
|
"grad_norm": 11.756470338560021, |
|
"learning_rate": 1.191834931905277e-07, |
|
"logits/chosen": -1.021990180015564, |
|
"logits/rejected": -1.0114670991897583, |
|
"logps/chosen": -526.177978515625, |
|
"logps/rejected": -784.1201171875, |
|
"loss": 0.397, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.455296516418457, |
|
"rewards/margins": 2.5135977268218994, |
|
"rewards/rejected": -4.968894004821777, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 13.074744119052957, |
|
"learning_rate": 1.1740379860299988e-07, |
|
"logits/chosen": -1.085823655128479, |
|
"logits/rejected": -1.0509856939315796, |
|
"logps/chosen": -483.33160400390625, |
|
"logps/rejected": -673.5759887695312, |
|
"loss": 0.4438, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2232718467712402, |
|
"rewards/margins": 1.7320282459259033, |
|
"rewards/rejected": -3.9553000926971436, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7125719769673704, |
|
"grad_norm": 13.781742295328323, |
|
"learning_rate": 1.1563340511220254e-07, |
|
"logits/chosen": -1.078175663948059, |
|
"logits/rejected": -1.0989038944244385, |
|
"logps/chosen": -510.52978515625, |
|
"logps/rejected": -873.8924560546875, |
|
"loss": 0.437, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.4151575565338135, |
|
"rewards/margins": 3.7648189067840576, |
|
"rewards/rejected": -6.179976463317871, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 12.441472662730447, |
|
"learning_rate": 1.1387243690420556e-07, |
|
"logits/chosen": -1.0066481828689575, |
|
"logits/rejected": -1.0207774639129639, |
|
"logps/chosen": -498.4524841308594, |
|
"logps/rejected": -774.4942626953125, |
|
"loss": 0.4093, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1083011627197266, |
|
"rewards/margins": 2.8360419273376465, |
|
"rewards/rejected": -4.944342613220215, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7173704414587332, |
|
"grad_norm": 19.827064803673473, |
|
"learning_rate": 1.1212101750393235e-07, |
|
"logits/chosen": -1.1261366605758667, |
|
"logits/rejected": -1.1484588384628296, |
|
"logps/chosen": -450.61541748046875, |
|
"logps/rejected": -729.1220703125, |
|
"loss": 0.3958, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2109882831573486, |
|
"rewards/margins": 2.9870235919952393, |
|
"rewards/rejected": -5.1980109214782715, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 11.684514143150514, |
|
"learning_rate": 1.1037926976649562e-07, |
|
"logits/chosen": -1.0930601358413696, |
|
"logits/rejected": -1.1050859689712524, |
|
"logps/chosen": -469.44677734375, |
|
"logps/rejected": -770.8958129882812, |
|
"loss": 0.4399, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3145837783813477, |
|
"rewards/margins": 2.7300620079040527, |
|
"rewards/rejected": -5.0446457862854, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.722168905950096, |
|
"grad_norm": 22.78691874055665, |
|
"learning_rate": 1.0864731586857936e-07, |
|
"logits/chosen": -0.9925867319107056, |
|
"logits/rejected": -1.0378546714782715, |
|
"logps/chosen": -467.55621337890625, |
|
"logps/rejected": -736.3528442382812, |
|
"loss": 0.3888, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.0709593296051025, |
|
"rewards/margins": 2.980044364929199, |
|
"rewards/rejected": -5.051003456115723, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 19.8573401803232, |
|
"learning_rate": 1.0692527729986839e-07, |
|
"logits/chosen": -1.0133628845214844, |
|
"logits/rejected": -1.054368257522583, |
|
"logps/chosen": -474.64312744140625, |
|
"logps/rejected": -708.5240478515625, |
|
"loss": 0.3697, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2988035678863525, |
|
"rewards/margins": 2.4555907249450684, |
|
"rewards/rejected": -4.754393577575684, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7269673704414588, |
|
"grad_norm": 15.961064721992486, |
|
"learning_rate": 1.0521327485452692e-07, |
|
"logits/chosen": -1.0904114246368408, |
|
"logits/rejected": -1.1059800386428833, |
|
"logps/chosen": -459.8374938964844, |
|
"logps/rejected": -742.4410400390625, |
|
"loss": 0.4197, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.326892375946045, |
|
"rewards/margins": 2.9531028270721436, |
|
"rewards/rejected": -5.279995441436768, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 18.92614489476278, |
|
"learning_rate": 1.0351142862272468e-07, |
|
"logits/chosen": -1.0305520296096802, |
|
"logits/rejected": -1.0847365856170654, |
|
"logps/chosen": -404.38726806640625, |
|
"logps/rejected": -921.81689453125, |
|
"loss": 0.4332, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0585224628448486, |
|
"rewards/margins": 5.24090576171875, |
|
"rewards/rejected": -7.299429416656494, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7317658349328215, |
|
"grad_norm": 17.7646623717338, |
|
"learning_rate": 1.0181985798221343e-07, |
|
"logits/chosen": -1.0311057567596436, |
|
"logits/rejected": -1.0113275051116943, |
|
"logps/chosen": -472.11846923828125, |
|
"logps/rejected": -774.8313598632812, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.2493302822113037, |
|
"rewards/margins": 2.9238831996917725, |
|
"rewards/rejected": -5.173213005065918, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 16.97123994535482, |
|
"learning_rate": 1.0013868158995329e-07, |
|
"logits/chosen": -1.055016279220581, |
|
"logits/rejected": -1.0831139087677002, |
|
"logps/chosen": -483.268310546875, |
|
"logps/rejected": -701.4729614257812, |
|
"loss": 0.4338, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.4825241565704346, |
|
"rewards/margins": 2.385812282562256, |
|
"rewards/rejected": -4.868335247039795, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7365642994241842, |
|
"grad_norm": 13.977094418250983, |
|
"learning_rate": 9.84680173737887e-08, |
|
"logits/chosen": -1.1702104806900024, |
|
"logits/rejected": -1.1845287084579468, |
|
"logps/chosen": -477.15740966796875, |
|
"logps/rejected": -644.910400390625, |
|
"loss": 0.4187, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.2820022106170654, |
|
"rewards/margins": 2.027010679244995, |
|
"rewards/rejected": -4.3090128898620605, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 14.318408739762742, |
|
"learning_rate": 9.680798252417713e-08, |
|
"logits/chosen": -1.2429280281066895, |
|
"logits/rejected": -1.275286316871643, |
|
"logps/chosen": -392.6990661621094, |
|
"logps/rejected": -679.7120971679688, |
|
"loss": 0.4099, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.0060112476348877, |
|
"rewards/margins": 2.5065300464630127, |
|
"rewards/rejected": -4.5125412940979, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.741362763915547, |
|
"grad_norm": 17.01122593327606, |
|
"learning_rate": 9.515869348596808e-08, |
|
"logits/chosen": -1.0446436405181885, |
|
"logits/rejected": -1.1268213987350464, |
|
"logps/chosen": -495.5243225097656, |
|
"logps/rejected": -660.196533203125, |
|
"loss": 0.4184, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.165391206741333, |
|
"rewards/margins": 1.9829740524291992, |
|
"rewards/rejected": -4.148365020751953, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 13.45512588577857, |
|
"learning_rate": 9.352026595023493e-08, |
|
"logits/chosen": -1.1435279846191406, |
|
"logits/rejected": -1.1522104740142822, |
|
"logps/chosen": -485.2201232910156, |
|
"logps/rejected": -617.6146240234375, |
|
"loss": 0.4004, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.105456829071045, |
|
"rewards/margins": 1.6230169534683228, |
|
"rewards/rejected": -3.72847318649292, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7461612284069098, |
|
"grad_norm": 18.173194781052167, |
|
"learning_rate": 9.189281484616004e-08, |
|
"logits/chosen": -1.0414386987686157, |
|
"logits/rejected": -1.0121897459030151, |
|
"logps/chosen": -395.79931640625, |
|
"logps/rejected": -666.1370849609375, |
|
"loss": 0.4577, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1656196117401123, |
|
"rewards/margins": 2.18383526802063, |
|
"rewards/rejected": -4.349454402923584, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 16.783716977152825, |
|
"learning_rate": 9.027645433297249e-08, |
|
"logits/chosen": -0.9481878280639648, |
|
"logits/rejected": -0.9732887148857117, |
|
"logps/chosen": -541.752685546875, |
|
"logps/rejected": -781.5398559570312, |
|
"loss": 0.4536, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5279250144958496, |
|
"rewards/margins": 2.745492458343506, |
|
"rewards/rejected": -5.273416996002197, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7509596928982726, |
|
"grad_norm": 17.892434423988117, |
|
"learning_rate": 8.867129779194066e-08, |
|
"logits/chosen": -1.1220483779907227, |
|
"logits/rejected": -1.1574240922927856, |
|
"logps/chosen": -368.3999328613281, |
|
"logps/rejected": -705.1334228515625, |
|
"loss": 0.401, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7526142597198486, |
|
"rewards/margins": 3.364561080932617, |
|
"rewards/rejected": -5.117176055908203, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 16.65117790496681, |
|
"learning_rate": 8.707745781841866e-08, |
|
"logits/chosen": -0.945580005645752, |
|
"logits/rejected": -0.9971720576286316, |
|
"logps/chosen": -401.4361877441406, |
|
"logps/rejected": -752.6023559570312, |
|
"loss": 0.4458, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0107078552246094, |
|
"rewards/margins": 3.4325504302978516, |
|
"rewards/rejected": -5.443258762359619, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7557581573896354, |
|
"grad_norm": 11.383207349861097, |
|
"learning_rate": 8.549504621394831e-08, |
|
"logits/chosen": -1.1988914012908936, |
|
"logits/rejected": -1.152146577835083, |
|
"logps/chosen": -380.92840576171875, |
|
"logps/rejected": -766.2640380859375, |
|
"loss": 0.3463, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6404314041137695, |
|
"rewards/margins": 3.8094921112060547, |
|
"rewards/rejected": -5.449923515319824, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 14.817235687069312, |
|
"learning_rate": 8.392417397841703e-08, |
|
"logits/chosen": -1.0774648189544678, |
|
"logits/rejected": -1.1336710453033447, |
|
"logps/chosen": -428.7699279785156, |
|
"logps/rejected": -665.4924926757812, |
|
"loss": 0.433, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9814361333847046, |
|
"rewards/margins": 2.189484119415283, |
|
"rewards/rejected": -4.170919895172119, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.760556621880998, |
|
"grad_norm": 12.002932740050225, |
|
"learning_rate": 8.236495130227083e-08, |
|
"logits/chosen": -0.9945319890975952, |
|
"logits/rejected": -1.0899218320846558, |
|
"logps/chosen": -462.98907470703125, |
|
"logps/rejected": -786.7493896484375, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.9386112689971924, |
|
"rewards/margins": 3.5603737831115723, |
|
"rewards/rejected": -5.4989848136901855, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 19.054219594890455, |
|
"learning_rate": 8.081748755878612e-08, |
|
"logits/chosen": -1.0964199304580688, |
|
"logits/rejected": -1.1900081634521484, |
|
"logps/chosen": -488.88525390625, |
|
"logps/rejected": -617.3141479492188, |
|
"loss": 0.3881, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3542323112487793, |
|
"rewards/margins": 1.7926626205444336, |
|
"rewards/rejected": -4.146894454956055, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7653550863723608, |
|
"grad_norm": 10.957826763580568, |
|
"learning_rate": 7.928189129639632e-08, |
|
"logits/chosen": -1.0295380353927612, |
|
"logits/rejected": -1.001744031906128, |
|
"logps/chosen": -416.1560974121094, |
|
"logps/rejected": -653.9662475585938, |
|
"loss": 0.3917, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.0308852195739746, |
|
"rewards/margins": 2.194223165512085, |
|
"rewards/rejected": -4.2251081466674805, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 21.757679034021272, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": -1.0701841115951538, |
|
"logits/rejected": -1.0753045082092285, |
|
"logps/chosen": -457.8702087402344, |
|
"logps/rejected": -725.718505859375, |
|
"loss": 0.4356, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3207480907440186, |
|
"rewards/margins": 2.469742774963379, |
|
"rewards/rejected": -4.790491104125977, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7701535508637236, |
|
"grad_norm": 20.167723441940918, |
|
"learning_rate": 7.624673123879682e-08, |
|
"logits/chosen": -0.9837926030158997, |
|
"logits/rejected": -1.0835992097854614, |
|
"logps/chosen": -432.2687072753906, |
|
"logps/rejected": -644.8499755859375, |
|
"loss": 0.4163, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0870277881622314, |
|
"rewards/margins": 2.2406442165374756, |
|
"rewards/rejected": -4.327672004699707, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 18.669387474954554, |
|
"learning_rate": 7.474738034800663e-08, |
|
"logits/chosen": -1.1607104539871216, |
|
"logits/rejected": -1.0950881242752075, |
|
"logps/chosen": -384.7247314453125, |
|
"logps/rejected": -849.2708129882812, |
|
"loss": 0.4627, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0593299865722656, |
|
"rewards/margins": 4.698477268218994, |
|
"rewards/rejected": -6.75780725479126, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7749520153550864, |
|
"grad_norm": 12.574697681707546, |
|
"learning_rate": 7.326032273221606e-08, |
|
"logits/chosen": -1.1745719909667969, |
|
"logits/rejected": -1.1211249828338623, |
|
"logps/chosen": -492.7989196777344, |
|
"logps/rejected": -735.6827392578125, |
|
"loss": 0.354, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.242023468017578, |
|
"rewards/margins": 2.6166510581970215, |
|
"rewards/rejected": -4.8586745262146, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 23.069615047778715, |
|
"learning_rate": 7.178566270260872e-08, |
|
"logits/chosen": -1.189727544784546, |
|
"logits/rejected": -1.192415475845337, |
|
"logps/chosen": -464.24102783203125, |
|
"logps/rejected": -783.3685302734375, |
|
"loss": 0.4231, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.2558867931365967, |
|
"rewards/margins": 2.903276205062866, |
|
"rewards/rejected": -5.159163475036621, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7797504798464492, |
|
"grad_norm": 13.52470077231319, |
|
"learning_rate": 7.032350370072709e-08, |
|
"logits/chosen": -0.996944785118103, |
|
"logits/rejected": -1.0384135246276855, |
|
"logps/chosen": -465.0677795410156, |
|
"logps/rejected": -785.6456298828125, |
|
"loss": 0.3889, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.093614339828491, |
|
"rewards/margins": 3.286773204803467, |
|
"rewards/rejected": -5.380387306213379, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 17.442232282658164, |
|
"learning_rate": 6.887394829121596e-08, |
|
"logits/chosen": -1.1311534643173218, |
|
"logits/rejected": -1.1876181364059448, |
|
"logps/chosen": -485.97259521484375, |
|
"logps/rejected": -1016.5963745117188, |
|
"loss": 0.4039, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.456922769546509, |
|
"rewards/margins": 5.4143595695495605, |
|
"rewards/rejected": -7.871281623840332, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7845489443378119, |
|
"grad_norm": 14.265276149673632, |
|
"learning_rate": 6.743709815462833e-08, |
|
"logits/chosen": -1.149627447128296, |
|
"logits/rejected": -1.1739815473556519, |
|
"logps/chosen": -488.38433837890625, |
|
"logps/rejected": -799.1815185546875, |
|
"loss": 0.4014, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.456972360610962, |
|
"rewards/margins": 3.4012649059295654, |
|
"rewards/rejected": -5.858237266540527, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 14.656217972804923, |
|
"learning_rate": 6.601305408029287e-08, |
|
"logits/chosen": -1.1081793308258057, |
|
"logits/rejected": -1.1610908508300781, |
|
"logps/chosen": -469.09783935546875, |
|
"logps/rejected": -725.4248046875, |
|
"loss": 0.4108, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4499924182891846, |
|
"rewards/margins": 2.5296425819396973, |
|
"rewards/rejected": -4.979634761810303, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7893474088291746, |
|
"grad_norm": 17.848039169645286, |
|
"learning_rate": 6.460191595924366e-08, |
|
"logits/chosen": -1.0300790071487427, |
|
"logits/rejected": -1.0361210107803345, |
|
"logps/chosen": -490.31488037109375, |
|
"logps/rejected": -749.389892578125, |
|
"loss": 0.4049, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4620461463928223, |
|
"rewards/margins": 2.546797513961792, |
|
"rewards/rejected": -5.008843421936035, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 13.85499152294089, |
|
"learning_rate": 6.320378277721342e-08, |
|
"logits/chosen": -1.1166772842407227, |
|
"logits/rejected": -1.1094615459442139, |
|
"logps/chosen": -472.2708435058594, |
|
"logps/rejected": -616.1146240234375, |
|
"loss": 0.4347, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3808658123016357, |
|
"rewards/margins": 1.5156285762786865, |
|
"rewards/rejected": -3.8964946269989014, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7941458733205374, |
|
"grad_norm": 15.101274241746347, |
|
"learning_rate": 6.181875260769032e-08, |
|
"logits/chosen": -1.1072207689285278, |
|
"logits/rejected": -1.2136280536651611, |
|
"logps/chosen": -457.7784729003906, |
|
"logps/rejected": -720.9715576171875, |
|
"loss": 0.4173, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8839908838272095, |
|
"rewards/margins": 3.2553863525390625, |
|
"rewards/rejected": -5.139378070831299, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 15.530326265159838, |
|
"learning_rate": 6.044692260503797e-08, |
|
"logits/chosen": -1.0710835456848145, |
|
"logits/rejected": -1.1088721752166748, |
|
"logps/chosen": -511.77593994140625, |
|
"logps/rejected": -862.2023315429688, |
|
"loss": 0.3341, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.3845109939575195, |
|
"rewards/margins": 3.6880950927734375, |
|
"rewards/rejected": -6.072606086730957, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7989443378119002, |
|
"grad_norm": 17.77444268921122, |
|
"learning_rate": 5.9088388997680984e-08, |
|
"logits/chosen": -1.0058419704437256, |
|
"logits/rejected": -1.1048399209976196, |
|
"logps/chosen": -543.4456176757812, |
|
"logps/rejected": -763.7996826171875, |
|
"loss": 0.4104, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.327815055847168, |
|
"rewards/margins": 2.9269397258758545, |
|
"rewards/rejected": -5.254754543304443, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 17.8842799505974, |
|
"learning_rate": 5.774324708135439e-08, |
|
"logits/chosen": -1.2106958627700806, |
|
"logits/rejected": -1.2864594459533691, |
|
"logps/chosen": -432.79486083984375, |
|
"logps/rejected": -644.1822509765625, |
|
"loss": 0.4089, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.3121211528778076, |
|
"rewards/margins": 2.3210206031799316, |
|
"rewards/rejected": -4.633141994476318, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.803742802303263, |
|
"grad_norm": 11.331732688823461, |
|
"learning_rate": 5.641159121241953e-08, |
|
"logits/chosen": -1.2257134914398193, |
|
"logits/rejected": -1.1192344427108765, |
|
"logps/chosen": -437.44122314453125, |
|
"logps/rejected": -826.15185546875, |
|
"loss": 0.3994, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3724796772003174, |
|
"rewards/margins": 3.5549049377441406, |
|
"rewards/rejected": -5.927384853363037, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 11.40432022732026, |
|
"learning_rate": 5.5093514801245106e-08, |
|
"logits/chosen": -1.0627083778381348, |
|
"logits/rejected": -1.08839750289917, |
|
"logps/chosen": -462.755126953125, |
|
"logps/rejected": -749.5454711914062, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3041393756866455, |
|
"rewards/margins": 2.629760265350342, |
|
"rewards/rejected": -4.933899879455566, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8085412667946257, |
|
"grad_norm": 16.009435064579, |
|
"learning_rate": 5.378911030565453e-08, |
|
"logits/chosen": -0.9565284848213196, |
|
"logits/rejected": -0.9821860194206238, |
|
"logps/chosen": -533.07080078125, |
|
"logps/rejected": -830.4237060546875, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.5907983779907227, |
|
"rewards/margins": 2.66767954826355, |
|
"rewards/rejected": -5.25847864151001, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 14.563417443153222, |
|
"learning_rate": 5.249846922444101e-08, |
|
"logits/chosen": -1.185606598854065, |
|
"logits/rejected": -1.2072519063949585, |
|
"logps/chosen": -449.8453063964844, |
|
"logps/rejected": -1068.011962890625, |
|
"loss": 0.3812, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.5254974365234375, |
|
"rewards/margins": 6.294084548950195, |
|
"rewards/rejected": -8.819581985473633, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8133397312859885, |
|
"grad_norm": 18.118325314041343, |
|
"learning_rate": 5.122168209094865e-08, |
|
"logits/chosen": -1.1093202829360962, |
|
"logits/rejected": -1.1948482990264893, |
|
"logps/chosen": -440.5008850097656, |
|
"logps/rejected": -575.1368408203125, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.41264009475708, |
|
"rewards/margins": 1.3445088863372803, |
|
"rewards/rejected": -3.7571487426757812, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 13.552322900784995, |
|
"learning_rate": 4.995883846672222e-08, |
|
"logits/chosen": -1.0201151371002197, |
|
"logits/rejected": -1.129849910736084, |
|
"logps/chosen": -577.0701293945312, |
|
"logps/rejected": -773.9287719726562, |
|
"loss": 0.4045, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3463945388793945, |
|
"rewards/margins": 2.6721103191375732, |
|
"rewards/rejected": -5.018505573272705, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8181381957773513, |
|
"grad_norm": 14.631891117796933, |
|
"learning_rate": 4.871002693522486e-08, |
|
"logits/chosen": -1.0706162452697754, |
|
"logits/rejected": -1.0791727304458618, |
|
"logps/chosen": -484.5125427246094, |
|
"logps/rejected": -756.0465087890625, |
|
"loss": 0.4146, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4046783447265625, |
|
"rewards/margins": 2.9627346992492676, |
|
"rewards/rejected": -5.367413520812988, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 11.654610414390184, |
|
"learning_rate": 4.7475335095623956e-08, |
|
"logits/chosen": -1.0974230766296387, |
|
"logits/rejected": -1.0560171604156494, |
|
"logps/chosen": -479.367919921875, |
|
"logps/rejected": -745.7228393554688, |
|
"loss": 0.4229, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4068191051483154, |
|
"rewards/margins": 2.776381731033325, |
|
"rewards/rejected": -5.183200836181641, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.822936660268714, |
|
"grad_norm": 21.88271199976919, |
|
"learning_rate": 4.6254849556646714e-08, |
|
"logits/chosen": -0.9372541308403015, |
|
"logits/rejected": -0.9571690559387207, |
|
"logps/chosen": -509.55133056640625, |
|
"logps/rejected": -853.4140625, |
|
"loss": 0.4194, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.310447931289673, |
|
"rewards/margins": 3.700510025024414, |
|
"rewards/rejected": -6.010957717895508, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 18.034151428931484, |
|
"learning_rate": 4.504865593050483e-08, |
|
"logits/chosen": -1.1065231561660767, |
|
"logits/rejected": -1.1098486185073853, |
|
"logps/chosen": -518.8222045898438, |
|
"logps/rejected": -746.7293090820312, |
|
"loss": 0.4183, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7079575061798096, |
|
"rewards/margins": 2.2379913330078125, |
|
"rewards/rejected": -4.945948600769043, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8277351247600768, |
|
"grad_norm": 17.996732359854178, |
|
"learning_rate": 4.385683882688895e-08, |
|
"logits/chosen": -0.9841295480728149, |
|
"logits/rejected": -1.083620548248291, |
|
"logps/chosen": -518.1678466796875, |
|
"logps/rejected": -604.3717041015625, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.5432040691375732, |
|
"rewards/margins": 1.3551008701324463, |
|
"rewards/rejected": -3.8983047008514404, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 21.03759918631638, |
|
"learning_rate": 4.2679481847033985e-08, |
|
"logits/chosen": -1.0917068719863892, |
|
"logits/rejected": -1.0973399877548218, |
|
"logps/chosen": -475.8905334472656, |
|
"logps/rejected": -735.8399658203125, |
|
"loss": 0.4462, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.32417893409729, |
|
"rewards/margins": 2.500762462615967, |
|
"rewards/rejected": -4.824941158294678, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8325335892514395, |
|
"grad_norm": 15.12770624008802, |
|
"learning_rate": 4.151666757785435e-08, |
|
"logits/chosen": -1.122983455657959, |
|
"logits/rejected": -1.1037867069244385, |
|
"logps/chosen": -397.2513732910156, |
|
"logps/rejected": -841.8653564453125, |
|
"loss": 0.3949, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.7809816598892212, |
|
"rewards/margins": 4.345281600952148, |
|
"rewards/rejected": -6.12626314163208, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 17.000426067634862, |
|
"learning_rate": 4.036847758615136e-08, |
|
"logits/chosen": -0.9286991953849792, |
|
"logits/rejected": -1.021832823753357, |
|
"logps/chosen": -520.7754516601562, |
|
"logps/rejected": -835.9027099609375, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9686105251312256, |
|
"rewards/margins": 3.0616462230682373, |
|
"rewards/rejected": -6.030257225036621, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8373320537428023, |
|
"grad_norm": 13.480464564958877, |
|
"learning_rate": 3.923499241289113e-08, |
|
"logits/chosen": -1.0932060480117798, |
|
"logits/rejected": -1.1599218845367432, |
|
"logps/chosen": -533.0609130859375, |
|
"logps/rejected": -862.5074462890625, |
|
"loss": 0.4438, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.498558759689331, |
|
"rewards/margins": 3.8457469940185547, |
|
"rewards/rejected": -6.344305515289307, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 18.263076008689207, |
|
"learning_rate": 3.811629156755541e-08, |
|
"logits/chosen": -1.043247103691101, |
|
"logits/rejected": -1.033249020576477, |
|
"logps/chosen": -495.8736877441406, |
|
"logps/rejected": -736.5914306640625, |
|
"loss": 0.4318, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.257842540740967, |
|
"rewards/margins": 2.4572973251342773, |
|
"rewards/rejected": -4.715139389038086, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8421305182341651, |
|
"grad_norm": 10.736541271527615, |
|
"learning_rate": 3.701245352256391e-08, |
|
"logits/chosen": -1.0726077556610107, |
|
"logits/rejected": -1.186214566230774, |
|
"logps/chosen": -482.1836853027344, |
|
"logps/rejected": -610.18994140625, |
|
"loss": 0.396, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.107025623321533, |
|
"rewards/margins": 1.534145712852478, |
|
"rewards/rejected": -3.6411712169647217, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 12.862620265499748, |
|
"learning_rate": 3.592355570776984e-08, |
|
"logits/chosen": -1.1533534526824951, |
|
"logits/rejected": -1.1715278625488281, |
|
"logps/chosen": -382.75006103515625, |
|
"logps/rejected": -651.4183959960938, |
|
"loss": 0.397, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7870197296142578, |
|
"rewards/margins": 2.5599758625030518, |
|
"rewards/rejected": -4.3469953536987305, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8469289827255279, |
|
"grad_norm": 11.784757005145922, |
|
"learning_rate": 3.484967450502904e-08, |
|
"logits/chosen": -1.0784326791763306, |
|
"logits/rejected": -1.1201931238174438, |
|
"logps/chosen": -376.4716796875, |
|
"logps/rejected": -701.4554443359375, |
|
"loss": 0.4067, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.858490228652954, |
|
"rewards/margins": 2.8213589191436768, |
|
"rewards/rejected": -4.679849147796631, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 22.056135137773204, |
|
"learning_rate": 3.3790885242841296e-08, |
|
"logits/chosen": -1.0288774967193604, |
|
"logits/rejected": -1.073750376701355, |
|
"logps/chosen": -450.322265625, |
|
"logps/rejected": -869.6065673828125, |
|
"loss": 0.3731, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.2526326179504395, |
|
"rewards/margins": 4.20005464553833, |
|
"rewards/rejected": -6.4526872634887695, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8517274472168906, |
|
"grad_norm": 19.737949374986822, |
|
"learning_rate": 3.274726219106677e-08, |
|
"logits/chosen": -1.0363576412200928, |
|
"logits/rejected": -1.0900583267211914, |
|
"logps/chosen": -513.31298828125, |
|
"logps/rejected": -884.9781494140625, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.4154794216156006, |
|
"rewards/margins": 3.753586530685425, |
|
"rewards/rejected": -6.169066429138184, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 17.054064949812986, |
|
"learning_rate": 3.171887855571642e-08, |
|
"logits/chosen": -1.07740318775177, |
|
"logits/rejected": -1.048232078552246, |
|
"logps/chosen": -420.1988830566406, |
|
"logps/rejected": -608.5756225585938, |
|
"loss": 0.3766, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.087628126144409, |
|
"rewards/margins": 1.9552154541015625, |
|
"rewards/rejected": -4.042843818664551, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8565259117082533, |
|
"grad_norm": 17.863686096756574, |
|
"learning_rate": 3.070580647381643e-08, |
|
"logits/chosen": -1.0473045110702515, |
|
"logits/rejected": -1.067647933959961, |
|
"logps/chosen": -426.6707458496094, |
|
"logps/rejected": -775.1964111328125, |
|
"loss": 0.4329, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1260108947753906, |
|
"rewards/margins": 3.496943950653076, |
|
"rewards/rejected": -5.622954845428467, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 12.554705710287928, |
|
"learning_rate": 2.9708117008348576e-08, |
|
"logits/chosen": -1.1423534154891968, |
|
"logits/rejected": -1.2145392894744873, |
|
"logps/chosen": -519.9121704101562, |
|
"logps/rejected": -651.9493408203125, |
|
"loss": 0.401, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.350532054901123, |
|
"rewards/margins": 1.7727361917495728, |
|
"rewards/rejected": -4.1232686042785645, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8613243761996161, |
|
"grad_norm": 17.33820204066364, |
|
"learning_rate": 2.8725880143264992e-08, |
|
"logits/chosen": -1.1288349628448486, |
|
"logits/rejected": -1.1007946729660034, |
|
"logps/chosen": -472.87713623046875, |
|
"logps/rejected": -697.078125, |
|
"loss": 0.4697, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.494798183441162, |
|
"rewards/margins": 1.797978162765503, |
|
"rewards/rejected": -4.292776584625244, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 19.735604660872788, |
|
"learning_rate": 2.775916477857948e-08, |
|
"logits/chosen": -1.0792783498764038, |
|
"logits/rejected": -1.0856952667236328, |
|
"logps/chosen": -425.07177734375, |
|
"logps/rejected": -650.5399169921875, |
|
"loss": 0.3895, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.2981741428375244, |
|
"rewards/margins": 2.2162742614746094, |
|
"rewards/rejected": -4.514448165893555, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8661228406909789, |
|
"grad_norm": 19.83106738953191, |
|
"learning_rate": 2.680803872553408e-08, |
|
"logits/chosen": -1.0853979587554932, |
|
"logits/rejected": -1.0893454551696777, |
|
"logps/chosen": -422.78076171875, |
|
"logps/rejected": -977.7130126953125, |
|
"loss": 0.426, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.013327121734619, |
|
"rewards/margins": 5.603024959564209, |
|
"rewards/rejected": -7.616351127624512, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 24.90411371661287, |
|
"learning_rate": 2.5872568701842706e-08, |
|
"logits/chosen": -1.1065361499786377, |
|
"logits/rejected": -1.121259331703186, |
|
"logps/chosen": -385.88641357421875, |
|
"logps/rejected": -718.7703857421875, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.989143967628479, |
|
"rewards/margins": 3.0591297149658203, |
|
"rewards/rejected": -5.04827356338501, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8709213051823417, |
|
"grad_norm": 20.14060859194896, |
|
"learning_rate": 2.495282032701096e-08, |
|
"logits/chosen": -1.0481445789337158, |
|
"logits/rejected": -1.1717673540115356, |
|
"logps/chosen": -347.03582763671875, |
|
"logps/rejected": -661.1770629882812, |
|
"loss": 0.3896, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.8835344314575195, |
|
"rewards/margins": 3.3319649696350098, |
|
"rewards/rejected": -5.215498924255371, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 19.14741858415139, |
|
"learning_rate": 2.4048858117733133e-08, |
|
"logits/chosen": -1.1484493017196655, |
|
"logits/rejected": -1.1738967895507812, |
|
"logps/chosen": -448.4156188964844, |
|
"logps/rejected": -831.2892456054688, |
|
"loss": 0.3792, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.096663475036621, |
|
"rewards/margins": 4.216563701629639, |
|
"rewards/rejected": -6.31322717666626, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8757197696737045, |
|
"grad_norm": 15.380617560449137, |
|
"learning_rate": 2.3160745483366938e-08, |
|
"logits/chosen": -1.0661578178405762, |
|
"logits/rejected": -1.0423481464385986, |
|
"logps/chosen": -451.716796875, |
|
"logps/rejected": -690.4423217773438, |
|
"loss": 0.4006, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.405106544494629, |
|
"rewards/margins": 1.979060411453247, |
|
"rewards/rejected": -4.384166717529297, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 17.884025211619726, |
|
"learning_rate": 2.2288544721485197e-08, |
|
"logits/chosen": -1.1487770080566406, |
|
"logits/rejected": -1.1188671588897705, |
|
"logps/chosen": -369.1369323730469, |
|
"logps/rejected": -784.3793334960938, |
|
"loss": 0.4026, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7735888957977295, |
|
"rewards/margins": 3.929485321044922, |
|
"rewards/rejected": -5.7030744552612305, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8805182341650671, |
|
"grad_norm": 16.690714182145946, |
|
"learning_rate": 2.1432317013506117e-08, |
|
"logits/chosen": -1.1557639837265015, |
|
"logits/rejected": -1.235864281654358, |
|
"logps/chosen": -479.1221618652344, |
|
"logps/rejected": -734.0775146484375, |
|
"loss": 0.4415, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.424396276473999, |
|
"rewards/margins": 2.9635913372039795, |
|
"rewards/rejected": -5.38798713684082, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 19.313778590115387, |
|
"learning_rate": 2.0592122420401704e-08, |
|
"logits/chosen": -0.9829009771347046, |
|
"logits/rejected": -1.0783188343048096, |
|
"logps/chosen": -424.56463623046875, |
|
"logps/rejected": -663.997802734375, |
|
"loss": 0.4162, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.211613178253174, |
|
"rewards/margins": 2.252444267272949, |
|
"rewards/rejected": -4.464056968688965, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8853166986564299, |
|
"grad_norm": 16.700042187142635, |
|
"learning_rate": 1.976801987848459e-08, |
|
"logits/chosen": -1.1402798891067505, |
|
"logits/rejected": -1.111434817314148, |
|
"logps/chosen": -460.59893798828125, |
|
"logps/rejected": -831.3668212890625, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2458419799804688, |
|
"rewards/margins": 3.5192267894744873, |
|
"rewards/rejected": -5.765069007873535, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 18.002635499528342, |
|
"learning_rate": 1.8960067195273987e-08, |
|
"logits/chosen": -1.149012565612793, |
|
"logits/rejected": -1.1922236680984497, |
|
"logps/chosen": -389.1723327636719, |
|
"logps/rejected": -704.3775024414062, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.9992425441741943, |
|
"rewards/margins": 3.1408438682556152, |
|
"rewards/rejected": -5.140086650848389, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8901151631477927, |
|
"grad_norm": 14.779338531754624, |
|
"learning_rate": 1.816832104544072e-08, |
|
"logits/chosen": -0.9585069417953491, |
|
"logits/rejected": -0.9837729334831238, |
|
"logps/chosen": -488.01055908203125, |
|
"logps/rejected": -722.9725341796875, |
|
"loss": 0.4102, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.372795581817627, |
|
"rewards/margins": 2.566098928451538, |
|
"rewards/rejected": -4.938894748687744, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 14.732524058414059, |
|
"learning_rate": 1.7392836966831553e-08, |
|
"logits/chosen": -0.8768994212150574, |
|
"logits/rejected": -0.8977943658828735, |
|
"logps/chosen": -459.11309814453125, |
|
"logps/rejected": -748.9290161132812, |
|
"loss": 0.3607, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.143648147583008, |
|
"rewards/margins": 3.162809371948242, |
|
"rewards/rejected": -5.30645751953125, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8949136276391555, |
|
"grad_norm": 17.982384045789107, |
|
"learning_rate": 1.663366935657373e-08, |
|
"logits/chosen": -1.1603506803512573, |
|
"logits/rejected": -1.2332944869995117, |
|
"logps/chosen": -403.1298828125, |
|
"logps/rejected": -722.2110595703125, |
|
"loss": 0.4374, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1506400108337402, |
|
"rewards/margins": 3.0482773780822754, |
|
"rewards/rejected": -5.198916912078857, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 18.681352163564856, |
|
"learning_rate": 1.5890871467258898e-08, |
|
"logits/chosen": -0.9103895425796509, |
|
"logits/rejected": -0.9239616394042969, |
|
"logps/chosen": -530.7114868164062, |
|
"logps/rejected": -747.0721435546875, |
|
"loss": 0.4056, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3054797649383545, |
|
"rewards/margins": 2.478546619415283, |
|
"rewards/rejected": -4.784027099609375, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8997120921305183, |
|
"grad_norm": 14.698356072509888, |
|
"learning_rate": 1.5164495403207967e-08, |
|
"logits/chosen": -1.153602123260498, |
|
"logits/rejected": -1.12786066532135, |
|
"logps/chosen": -489.3046875, |
|
"logps/rejected": -890.0556640625, |
|
"loss": 0.3969, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.4070847034454346, |
|
"rewards/margins": 3.750683546066284, |
|
"rewards/rejected": -6.157768249511719, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 15.890482276272099, |
|
"learning_rate": 1.4454592116815962e-08, |
|
"logits/chosen": -1.0214381217956543, |
|
"logits/rejected": -1.004897117614746, |
|
"logps/chosen": -439.8526916503906, |
|
"logps/rejected": -769.1018676757812, |
|
"loss": 0.3697, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9336268901824951, |
|
"rewards/margins": 3.114084005355835, |
|
"rewards/rejected": -5.047710418701172, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.904510556621881, |
|
"grad_norm": 11.251412552986052, |
|
"learning_rate": 1.3761211404977934e-08, |
|
"logits/chosen": -1.1649322509765625, |
|
"logits/rejected": -1.1327035427093506, |
|
"logps/chosen": -441.202880859375, |
|
"logps/rejected": -933.3021240234375, |
|
"loss": 0.3523, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2993717193603516, |
|
"rewards/margins": 4.983204364776611, |
|
"rewards/rejected": -7.282576560974121, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 20.47517342425732, |
|
"learning_rate": 1.3084401905596177e-08, |
|
"logits/chosen": -1.0593699216842651, |
|
"logits/rejected": -1.1538712978363037, |
|
"logps/chosen": -489.8775939941406, |
|
"logps/rejected": -762.0972290039062, |
|
"loss": 0.4482, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2437944412231445, |
|
"rewards/margins": 3.1374268531799316, |
|
"rewards/rejected": -5.381220817565918, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9093090211132437, |
|
"grad_norm": 22.249007795619104, |
|
"learning_rate": 1.2424211094168053e-08, |
|
"logits/chosen": -0.9747680425643921, |
|
"logits/rejected": -1.0482494831085205, |
|
"logps/chosen": -508.7771911621094, |
|
"logps/rejected": -758.6298217773438, |
|
"loss": 0.3939, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0802645683288574, |
|
"rewards/margins": 2.5900025367736816, |
|
"rewards/rejected": -4.670267581939697, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 15.390160001717241, |
|
"learning_rate": 1.1780685280456143e-08, |
|
"logits/chosen": -1.118100881576538, |
|
"logits/rejected": -1.0623310804367065, |
|
"logps/chosen": -522.0684814453125, |
|
"logps/rejected": -999.7626953125, |
|
"loss": 0.4403, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.4960930347442627, |
|
"rewards/margins": 4.626833438873291, |
|
"rewards/rejected": -7.122926235198975, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9141074856046065, |
|
"grad_norm": 18.77137560752492, |
|
"learning_rate": 1.1153869605239564e-08, |
|
"logits/chosen": -1.1241267919540405, |
|
"logits/rejected": -1.2161864042282104, |
|
"logps/chosen": -470.51123046875, |
|
"logps/rejected": -613.9768676757812, |
|
"loss": 0.409, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1745758056640625, |
|
"rewards/margins": 1.793039083480835, |
|
"rewards/rejected": -3.9676146507263184, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 16.685258955661418, |
|
"learning_rate": 1.0543808037147606e-08, |
|
"logits/chosen": -1.140171766281128, |
|
"logits/rejected": -1.1334731578826904, |
|
"logps/chosen": -439.6238708496094, |
|
"logps/rejected": -944.9068603515625, |
|
"loss": 0.4045, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9790058135986328, |
|
"rewards/margins": 5.0386128425598145, |
|
"rewards/rejected": -7.017618656158447, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9189059500959693, |
|
"grad_norm": 11.823950534472615, |
|
"learning_rate": 9.95054336957557e-09, |
|
"logits/chosen": -1.1124851703643799, |
|
"logits/rejected": -1.0793733596801758, |
|
"logps/chosen": -450.46759033203125, |
|
"logps/rejected": -621.4553833007812, |
|
"loss": 0.3762, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.129909038543701, |
|
"rewards/margins": 1.6316797733306885, |
|
"rewards/rejected": -3.7615890502929688, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 16.192817201214602, |
|
"learning_rate": 9.37411721768286e-09, |
|
"logits/chosen": -1.1737887859344482, |
|
"logits/rejected": -1.1507189273834229, |
|
"logps/chosen": -509.4762268066406, |
|
"logps/rejected": -871.7518310546875, |
|
"loss": 0.4002, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.6139490604400635, |
|
"rewards/margins": 3.223182201385498, |
|
"rewards/rejected": -5.837131023406982, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9237044145873321, |
|
"grad_norm": 12.058728640051894, |
|
"learning_rate": 8.81457001547392e-09, |
|
"logits/chosen": -1.022780418395996, |
|
"logits/rejected": -0.9987403154373169, |
|
"logps/chosen": -478.4554138183594, |
|
"logps/rejected": -652.6536865234375, |
|
"loss": 0.4088, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.370469570159912, |
|
"rewards/margins": 1.6161177158355713, |
|
"rewards/rejected": -3.9865875244140625, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 13.938662102562736, |
|
"learning_rate": 8.271941012961942e-09, |
|
"logits/chosen": -1.0831263065338135, |
|
"logits/rejected": -0.9888957738876343, |
|
"logps/chosen": -427.6864318847656, |
|
"logps/rejected": -906.8592529296875, |
|
"loss": 0.3924, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.329150676727295, |
|
"rewards/margins": 4.188321113586426, |
|
"rewards/rejected": -6.5174713134765625, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9285028790786948, |
|
"grad_norm": 17.278174451554634, |
|
"learning_rate": 7.746268273415568e-09, |
|
"logits/chosen": -1.2020190954208374, |
|
"logits/rejected": -1.1284525394439697, |
|
"logps/chosen": -484.2650451660156, |
|
"logps/rejected": -632.048583984375, |
|
"loss": 0.4266, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3161118030548096, |
|
"rewards/margins": 1.0392600297927856, |
|
"rewards/rejected": -3.3553714752197266, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 15.498111728144039, |
|
"learning_rate": 7.237588670689076e-09, |
|
"logits/chosen": -1.0772826671600342, |
|
"logits/rejected": -1.160631537437439, |
|
"logps/chosen": -455.0005798339844, |
|
"logps/rejected": -892.5280151367188, |
|
"loss": 0.3896, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.258082866668701, |
|
"rewards/margins": 4.663710117340088, |
|
"rewards/rejected": -6.921793460845947, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9333013435700576, |
|
"grad_norm": 15.18662366523353, |
|
"learning_rate": 6.745937886635606e-09, |
|
"logits/chosen": -1.0644091367721558, |
|
"logits/rejected": -0.9969073534011841, |
|
"logps/chosen": -481.2547912597656, |
|
"logps/rejected": -849.2018432617188, |
|
"loss": 0.3922, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.187262773513794, |
|
"rewards/margins": 3.615166425704956, |
|
"rewards/rejected": -5.802428722381592, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 18.86870094684195, |
|
"learning_rate": 6.271350408604409e-09, |
|
"logits/chosen": -1.1208722591400146, |
|
"logits/rejected": -1.0991759300231934, |
|
"logps/chosen": -380.5445251464844, |
|
"logps/rejected": -697.8775024414062, |
|
"loss": 0.412, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.7180122137069702, |
|
"rewards/margins": 2.9493772983551025, |
|
"rewards/rejected": -4.667389869689941, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9380998080614203, |
|
"grad_norm": 13.841623997146602, |
|
"learning_rate": 5.813859527021487e-09, |
|
"logits/chosen": -1.1407277584075928, |
|
"logits/rejected": -1.14281165599823, |
|
"logps/chosen": -466.45355224609375, |
|
"logps/rejected": -869.8348388671875, |
|
"loss": 0.3899, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.353449821472168, |
|
"rewards/margins": 4.2861199378967285, |
|
"rewards/rejected": -6.6395697593688965, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 14.912969619458565, |
|
"learning_rate": 5.373497333054616e-09, |
|
"logits/chosen": -1.163573980331421, |
|
"logits/rejected": -1.1741966009140015, |
|
"logps/chosen": -495.27313232421875, |
|
"logps/rejected": -631.5628662109375, |
|
"loss": 0.4395, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.410792589187622, |
|
"rewards/margins": 1.440456748008728, |
|
"rewards/rejected": -3.8512492179870605, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9428982725527831, |
|
"grad_norm": 18.054972694480718, |
|
"learning_rate": 4.950294716362213e-09, |
|
"logits/chosen": -1.1209748983383179, |
|
"logits/rejected": -1.1992738246917725, |
|
"logps/chosen": -533.5642700195312, |
|
"logps/rejected": -736.080810546875, |
|
"loss": 0.4296, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.637540817260742, |
|
"rewards/margins": 2.0387625694274902, |
|
"rewards/rejected": -4.676303386688232, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 14.468498354603758, |
|
"learning_rate": 4.544281362926422e-09, |
|
"logits/chosen": -1.0455518960952759, |
|
"logits/rejected": -1.061714768409729, |
|
"logps/chosen": -488.90576171875, |
|
"logps/rejected": -747.364013671875, |
|
"loss": 0.4202, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0449106693267822, |
|
"rewards/margins": 2.6913721561431885, |
|
"rewards/rejected": -4.736282825469971, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9476967370441459, |
|
"grad_norm": 15.662743345785968, |
|
"learning_rate": 4.15548575297095e-09, |
|
"logits/chosen": -1.0954151153564453, |
|
"logits/rejected": -1.108139991760254, |
|
"logps/chosen": -445.718994140625, |
|
"logps/rejected": -760.5974731445312, |
|
"loss": 0.4125, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.274768114089966, |
|
"rewards/margins": 3.2226016521453857, |
|
"rewards/rejected": -5.497369766235352, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 12.35347569811952, |
|
"learning_rate": 3.7839351589631366e-09, |
|
"logits/chosen": -1.0665959119796753, |
|
"logits/rejected": -0.9220373034477234, |
|
"logps/chosen": -430.4939880371094, |
|
"logps/rejected": -755.52587890625, |
|
"loss": 0.3998, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.2020294666290283, |
|
"rewards/margins": 2.7335734367370605, |
|
"rewards/rejected": -4.93560266494751, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9524952015355086, |
|
"grad_norm": 17.123505773306672, |
|
"learning_rate": 3.4296556437010405e-09, |
|
"logits/chosen": -1.2090275287628174, |
|
"logits/rejected": -1.2055715322494507, |
|
"logps/chosen": -387.20947265625, |
|
"logps/rejected": -725.6389770507812, |
|
"loss": 0.3924, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0009734630584717, |
|
"rewards/margins": 3.3572704792022705, |
|
"rewards/rejected": -5.3582444190979, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 26.53516553962381, |
|
"learning_rate": 3.092672058485124e-09, |
|
"logits/chosen": -1.2251334190368652, |
|
"logits/rejected": -1.1628016233444214, |
|
"logps/chosen": -447.06671142578125, |
|
"logps/rejected": -905.41943359375, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.332759380340576, |
|
"rewards/margins": 4.409472465515137, |
|
"rewards/rejected": -6.742232322692871, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9572936660268714, |
|
"grad_norm": 15.849586481858255, |
|
"learning_rate": 2.7730080413750356e-09, |
|
"logits/chosen": -1.0247732400894165, |
|
"logits/rejected": -1.0921010971069336, |
|
"logps/chosen": -465.94317626953125, |
|
"logps/rejected": -652.0645751953125, |
|
"loss": 0.4045, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.15604829788208, |
|
"rewards/margins": 1.82071852684021, |
|
"rewards/rejected": -3.976767063140869, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 12.747060730063819, |
|
"learning_rate": 2.4706860155316033e-09, |
|
"logits/chosen": -1.0665229558944702, |
|
"logits/rejected": -1.1173847913742065, |
|
"logps/chosen": -532.3182983398438, |
|
"logps/rejected": -821.4293212890625, |
|
"loss": 0.4311, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.262028932571411, |
|
"rewards/margins": 2.8439760208129883, |
|
"rewards/rejected": -5.106005668640137, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"eval_logits/chosen": -1.2033898830413818, |
|
"eval_logits/rejected": -1.1906352043151855, |
|
"eval_logps/chosen": -463.1195373535156, |
|
"eval_logps/rejected": -796.0059204101562, |
|
"eval_loss": 0.40018871426582336, |
|
"eval_rewards/accuracies": 0.8535714149475098, |
|
"eval_rewards/chosen": -2.245985507965088, |
|
"eval_rewards/margins": 3.253223419189453, |
|
"eval_rewards/rejected": -5.499208927154541, |
|
"eval_runtime": 202.8468, |
|
"eval_samples_per_second": 21.992, |
|
"eval_steps_per_second": 0.345, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9620921305182342, |
|
"grad_norm": 21.87211176820157, |
|
"learning_rate": 2.185727187643843e-09, |
|
"logits/chosen": -1.122045874595642, |
|
"logits/rejected": -1.1218305826187134, |
|
"logps/chosen": -418.22686767578125, |
|
"logps/rejected": -835.3477783203125, |
|
"loss": 0.4449, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.2049636840820312, |
|
"rewards/margins": 4.108201503753662, |
|
"rewards/rejected": -6.313164710998535, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 20.178611681191477, |
|
"learning_rate": 1.9181515464413434e-09, |
|
"logits/chosen": -1.0017145872116089, |
|
"logits/rejected": -0.9959618449211121, |
|
"logps/chosen": -547.8878173828125, |
|
"logps/rejected": -944.1638793945312, |
|
"loss": 0.3753, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.129427433013916, |
|
"rewards/margins": 3.9424376487731934, |
|
"rewards/rejected": -6.071865081787109, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.966890595009597, |
|
"grad_norm": 14.197633100109993, |
|
"learning_rate": 1.6679778612923302e-09, |
|
"logits/chosen": -1.0468965768814087, |
|
"logits/rejected": -1.1212776899337769, |
|
"logps/chosen": -515.878173828125, |
|
"logps/rejected": -670.55224609375, |
|
"loss": 0.3835, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4107260704040527, |
|
"rewards/margins": 1.5576350688934326, |
|
"rewards/rejected": -3.9683613777160645, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 20.71404401442789, |
|
"learning_rate": 1.43522368088686e-09, |
|
"logits/chosen": -1.0676016807556152, |
|
"logits/rejected": -1.0992966890335083, |
|
"logps/chosen": -489.1631774902344, |
|
"logps/rejected": -992.6973876953125, |
|
"loss": 0.4542, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4302258491516113, |
|
"rewards/margins": 4.995226860046387, |
|
"rewards/rejected": -7.425453186035156, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9716890595009597, |
|
"grad_norm": 18.762913271326, |
|
"learning_rate": 1.2199053320059993e-09, |
|
"logits/chosen": -1.0681426525115967, |
|
"logits/rejected": -1.047761082649231, |
|
"logps/chosen": -479.65350341796875, |
|
"logps/rejected": -744.2103881835938, |
|
"loss": 0.406, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2822158336639404, |
|
"rewards/margins": 2.5112454891204834, |
|
"rewards/rejected": -4.793460845947266, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 15.021979385637746, |
|
"learning_rate": 1.0220379183764338e-09, |
|
"logits/chosen": -1.1485240459442139, |
|
"logits/rejected": -1.0922056436538696, |
|
"logps/chosen": -389.207275390625, |
|
"logps/rejected": -744.5093383789062, |
|
"loss": 0.3763, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.987817406654358, |
|
"rewards/margins": 3.430870771408081, |
|
"rewards/rejected": -5.418687343597412, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9764875239923224, |
|
"grad_norm": 13.67162874026034, |
|
"learning_rate": 8.416353196111503e-10, |
|
"logits/chosen": -1.1141016483306885, |
|
"logits/rejected": -1.0663163661956787, |
|
"logps/chosen": -481.4126892089844, |
|
"logps/rejected": -799.8731689453125, |
|
"loss": 0.4452, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6318254470825195, |
|
"rewards/margins": 3.2658958435058594, |
|
"rewards/rejected": -5.897720813751221, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 17.482908148705075, |
|
"learning_rate": 6.787101902356873e-10, |
|
"logits/chosen": -1.200620412826538, |
|
"logits/rejected": -1.1152732372283936, |
|
"logps/chosen": -499.2351989746094, |
|
"logps/rejected": -834.9778442382812, |
|
"loss": 0.3757, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.555325984954834, |
|
"rewards/margins": 3.111412525177002, |
|
"rewards/rejected": -5.666738510131836, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9812859884836852, |
|
"grad_norm": 18.707983969311634, |
|
"learning_rate": 5.332739588005953e-10, |
|
"logits/chosen": -1.1368467807769775, |
|
"logits/rejected": -1.1244173049926758, |
|
"logps/chosen": -390.2232971191406, |
|
"logps/rejected": -816.9163818359375, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.021688461303711, |
|
"rewards/margins": 3.9690163135528564, |
|
"rewards/rejected": -5.9907050132751465, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 21.443691190389707, |
|
"learning_rate": 4.053368270797164e-10, |
|
"logits/chosen": -1.0214062929153442, |
|
"logits/rejected": -1.0243003368377686, |
|
"logps/chosen": -468.2923889160156, |
|
"logps/rejected": -744.1151123046875, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.444369077682495, |
|
"rewards/margins": 2.769819736480713, |
|
"rewards/rejected": -5.214189052581787, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.986084452975048, |
|
"grad_norm": 12.556013081787883, |
|
"learning_rate": 2.949077693545354e-10, |
|
"logits/chosen": -0.9810346364974976, |
|
"logits/rejected": -1.0578103065490723, |
|
"logps/chosen": -514.701416015625, |
|
"logps/rejected": -736.8178100585938, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.4852397441864014, |
|
"rewards/margins": 1.9187476634979248, |
|
"rewards/rejected": -4.403986930847168, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 14.55216329888536, |
|
"learning_rate": 2.0199453178471047e-10, |
|
"logits/chosen": -1.0134025812149048, |
|
"logits/rejected": -1.1030604839324951, |
|
"logps/chosen": -521.6679077148438, |
|
"logps/rejected": -632.6597900390625, |
|
"loss": 0.4009, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.2684521675109863, |
|
"rewards/margins": 1.4823023080825806, |
|
"rewards/rejected": -3.7507545948028564, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9908829174664108, |
|
"grad_norm": 15.400882538943376, |
|
"learning_rate": 1.266036318647301e-10, |
|
"logits/chosen": -1.0723247528076172, |
|
"logits/rejected": -1.0602704286575317, |
|
"logps/chosen": -508.36163330078125, |
|
"logps/rejected": -874.0475463867188, |
|
"loss": 0.3882, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.1347973346710205, |
|
"rewards/margins": 3.8190529346466064, |
|
"rewards/rejected": -5.9538493156433105, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 21.162328364678483, |
|
"learning_rate": 6.874035796672339e-11, |
|
"logits/chosen": -1.1026606559753418, |
|
"logits/rejected": -1.1409575939178467, |
|
"logps/chosen": -481.29962158203125, |
|
"logps/rejected": -960.5101318359375, |
|
"loss": 0.4091, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1151065826416016, |
|
"rewards/margins": 5.2843122482299805, |
|
"rewards/rejected": -7.399418830871582, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9956813819577736, |
|
"grad_norm": 17.96942114686989, |
|
"learning_rate": 2.8408768969423458e-11, |
|
"logits/chosen": -1.1128456592559814, |
|
"logits/rejected": -1.0932135581970215, |
|
"logps/chosen": -469.04327392578125, |
|
"logps/rejected": -757.17431640625, |
|
"loss": 0.3819, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0967726707458496, |
|
"rewards/margins": 2.7530088424682617, |
|
"rewards/rejected": -4.8497819900512695, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 18.413638935256603, |
|
"learning_rate": 5.611693973617271e-12, |
|
"logits/chosen": -1.0999760627746582, |
|
"logits/rejected": -1.0788906812667847, |
|
"logps/chosen": -418.7471618652344, |
|
"logps/rejected": -718.1671142578125, |
|
"loss": 0.4369, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1133265495300293, |
|
"rewards/margins": 2.8272883892059326, |
|
"rewards/rejected": -4.940615177154541, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4717116741438516, |
|
"train_runtime": 20989.4384, |
|
"train_samples_per_second": 6.354, |
|
"train_steps_per_second": 0.199 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|