{ "best_metric": null, "best_model_checkpoint": null, "epoch": null, "global_step": 0, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.9997541914704325e-05, "loss": 0.0855, "reward": 0.814, "step": 49 }, { "epoch": 0.01, "learning_rate": 2.9989966819651966e-05, "loss": 0.0814, "reward": 0.9376, "step": 99 }, { "epoch": 0.02, "learning_rate": 2.9977276276102345e-05, "loss": 0.0734, "reward": 0.8684, "step": 149 }, { "epoch": 0.02, "learning_rate": 2.995947461480701e-05, "loss": 0.0716, "reward": 0.8537, "step": 199 }, { "epoch": 0.03, "learning_rate": 2.9936567910728124e-05, "loss": 0.0603, "reward": 0.6429, "step": 249 }, { "epoch": 0.04, "learning_rate": 2.9908563980965393e-05, "loss": 0.0624, "reward": 0.641, "step": 299 }, { "epoch": 0.04, "learning_rate": 2.9875472382088355e-05, "loss": 0.0631, "reward": 0.8062, "step": 349 }, { "epoch": 0.05, "learning_rate": 2.9837304406875167e-05, "loss": 0.0577, "reward": 0.5326, "step": 399 }, { "epoch": 0.05, "learning_rate": 2.9794073080458815e-05, "loss": 0.0556, "reward": 0.9085, "step": 449 }, { "epoch": 0.06, "learning_rate": 2.9745793155882214e-05, "loss": 0.055, "reward": 0.9616, "step": 499 }, { "epoch": 0.06, "learning_rate": 2.9692481109063605e-05, "loss": 0.0525, "reward": 0.899, "step": 549 }, { "epoch": 0.07, "learning_rate": 2.963415513317399e-05, "loss": 0.0518, "reward": 0.6793, "step": 599 }, { "epoch": 0.08, "learning_rate": 2.9570835132428594e-05, "loss": 0.0546, "reward": 0.8445, "step": 649 }, { "epoch": 0.08, "learning_rate": 2.9502542715294366e-05, "loss": 0.0532, "reward": 0.7952, "step": 699 }, { "epoch": 0.09, "learning_rate": 2.942930118711593e-05, "loss": 0.0515, "reward": 0.8009, "step": 749 }, { "epoch": 0.09, "learning_rate": 2.9351135542162432e-05, "loss": 0.0445, "reward": 1.1039, "step": 799 }, { "epoch": 0.1, "learning_rate": 2.9268072455098065e-05, "loss": 0.0447, "reward": 0.858, "step": 849 }, { "epoch": 0.11, "learning_rate": 2.918014027187909e-05, "loss": 0.0455, "reward": 0.9129, "step": 899 }, { "epoch": 0.11, "learning_rate": 2.9087369000080567e-05, "loss": 0.0459, "reward": 0.9215, "step": 949 }, { "epoch": 0.12, "learning_rate": 2.898979029865602e-05, "loss": 0.0424, "reward": 0.8282, "step": 999 }, { "epoch": 0.12, "learning_rate": 2.888743746713357e-05, "loss": 0.0437, "reward": 0.9707, "step": 1049 }, { "epoch": 0.13, "learning_rate": 2.8780345434252185e-05, "loss": 0.041, "reward": 0.796, "step": 1099 }, { "epoch": 0.14, "learning_rate": 2.8668550746041966e-05, "loss": 0.0468, "reward": 0.9538, "step": 1149 }, { "epoch": 0.14, "learning_rate": 2.8552091553352533e-05, "loss": 0.0409, "reward": 0.8948, "step": 1199 }, { "epoch": 0.15, "learning_rate": 2.8431007598833705e-05, "loss": 0.0408, "reward": 0.7338, "step": 1249 }, { "epoch": 0.15, "learning_rate": 2.830534020337303e-05, "loss": 0.0392, "reward": 0.8156, "step": 1299 }, { "epoch": 0.16, "learning_rate": 2.817513225199466e-05, "loss": 0.0375, "reward": 0.7625, "step": 1349 }, { "epoch": 0.16, "learning_rate": 2.8040428179224528e-05, "loss": 0.04, "reward": 0.9719, "step": 1399 }, { "epoch": 0.17, "learning_rate": 2.790127395392666e-05, "loss": 0.034, "reward": 0.8545, "step": 1449 }, { "epoch": 0.18, "learning_rate": 2.7757717063615962e-05, "loss": 0.0437, "reward": 0.8889, "step": 1499 }, { "epoch": 0.18, "learning_rate": 2.7609806498252692e-05, "loss": 0.0355, "reward": 1.2005, "step": 1549 }, { "epoch": 0.19, "learning_rate": 2.745759273352425e-05, "loss": 0.0368, "reward": 1.0282, "step": 1599 }, { "epoch": 0.19, "learning_rate": 2.7301127713619938e-05, "loss": 0.0346, "reward": 0.9873, "step": 1649 }, { "epoch": 0.2, "learning_rate": 2.7140464833504564e-05, "loss": 0.034, "reward": 0.7074, "step": 1699 }, { "epoch": 0.21, "learning_rate": 2.6975658920697006e-05, "loss": 0.0334, "reward": 0.9238, "step": 1749 }, { "epoch": 0.21, "learning_rate": 2.680676621655984e-05, "loss": 0.0317, "reward": 0.8791, "step": 1799 }, { "epoch": 0.22, "learning_rate": 2.663384435710654e-05, "loss": 0.0348, "reward": 1.1593, "step": 1849 }, { "epoch": 0.22, "learning_rate": 2.6456952353332712e-05, "loss": 0.0309, "reward": 1.0537, "step": 1899 }, { "epoch": 0.23, "learning_rate": 2.6276150571078108e-05, "loss": 0.0333, "reward": 1.0686, "step": 1949 }, { "epoch": 0.24, "learning_rate": 2.6091500710426278e-05, "loss": 0.0355, "reward": 1.0331, "step": 1999 }, { "epoch": 0.24, "learning_rate": 2.5903065784648947e-05, "loss": 0.0331, "reward": 0.8029, "step": 2049 }, { "epoch": 0.25, "learning_rate": 2.5710910098702187e-05, "loss": 0.0324, "reward": 0.9726, "step": 2099 }, { "epoch": 0.25, "learning_rate": 2.5515099227281836e-05, "loss": 0.0322, "reward": 0.9616, "step": 2149 }, { "epoch": 0.26, "learning_rate": 2.5315699992445617e-05, "loss": 0.0322, "reward": 0.9011, "step": 2199 }, { "epoch": 0.26, "learning_rate": 2.511278044080954e-05, "loss": 0.0314, "reward": 0.7529, "step": 2249 }, { "epoch": 0.27, "learning_rate": 2.4906409820326436e-05, "loss": 0.0286, "reward": 1.1313, "step": 2299 }, { "epoch": 0.28, "learning_rate": 2.4696658556654575e-05, "loss": 0.0285, "reward": 1.032, "step": 2349 }, { "epoch": 0.28, "learning_rate": 2.4483598229124274e-05, "loss": 0.0296, "reward": 1.0514, "step": 2399 }, { "epoch": 0.29, "learning_rate": 2.42673015463109e-05, "loss": 0.0308, "reward": 1.1721, "step": 2449 }, { "epoch": 0.29, "learning_rate": 2.404784232122248e-05, "loss": 0.0288, "reward": 1.2602, "step": 2499 }, { "epoch": 0.3, "learning_rate": 2.382529544611038e-05, "loss": 0.0274, "reward": 1.3181, "step": 2549 }, { "epoch": 0.31, "learning_rate": 2.3599736866911756e-05, "loss": 0.0302, "reward": 0.8566, "step": 2599 }, { "epoch": 0.31, "learning_rate": 2.3371243557332333e-05, "loss": 0.0324, "reward": 1.1646, "step": 2649 }, { "epoch": 0.32, "learning_rate": 2.313989349257855e-05, "loss": 0.028, "reward": 1.3095, "step": 2699 }, { "epoch": 0.32, "learning_rate": 2.2905765622747843e-05, "loss": 0.0294, "reward": 0.9202, "step": 2749 }, { "epoch": 0.33, "learning_rate": 2.266893984588631e-05, "loss": 0.0299, "reward": 0.9073, "step": 2799 }, { "epoch": 0.34, "learning_rate": 2.242949698072283e-05, "loss": 0.0264, "reward": 1.0046, "step": 2849 }, { "epoch": 0.34, "learning_rate": 2.2187518739089033e-05, "loss": 0.0317, "reward": 1.1227, "step": 2899 }, { "epoch": 0.35, "learning_rate": 2.194308769803444e-05, "loss": 0.0276, "reward": 1.0356, "step": 2949 }, { "epoch": 0.35, "learning_rate": 2.1696287271646406e-05, "loss": 0.0253, "reward": 1.1648, "step": 2999 }, { "epoch": 0.36, "learning_rate": 2.1447201682584356e-05, "loss": 0.026, "reward": 1.155, "step": 3049 }, { "epoch": 0.36, "learning_rate": 2.1195915933338133e-05, "loss": 0.0265, "reward": 0.9532, "step": 3099 }, { "epoch": 0.37, "learning_rate": 2.0942515777220186e-05, "loss": 0.0278, "reward": 1.1358, "step": 3149 }, { "epoch": 0.38, "learning_rate": 2.0687087689101562e-05, "loss": 0.0258, "reward": 1.0723, "step": 3199 }, { "epoch": 0.38, "learning_rate": 2.0429718835901672e-05, "loss": 0.029, "reward": 1.3277, "step": 3249 }, { "epoch": 0.39, "learning_rate": 2.0170497046841824e-05, "loss": 0.0281, "reward": 1.2176, "step": 3299 }, { "epoch": 0.39, "learning_rate": 1.9909510783472825e-05, "loss": 0.0258, "reward": 1.3399, "step": 3349 }, { "epoch": 0.4, "learning_rate": 1.964684910948672e-05, "loss": 0.0279, "reward": 1.1264, "step": 3399 }, { "epoch": 0.41, "learning_rate": 1.9382601660323124e-05, "loss": 0.0259, "reward": 1.0383, "step": 3449 }, { "epoch": 0.41, "learning_rate": 1.911685861258034e-05, "loss": 0.0244, "reward": 1.135, "step": 3499 }, { "epoch": 0.42, "learning_rate": 1.8849710653241923e-05, "loss": 0.0246, "reward": 1.0922, "step": 3549 }, { "epoch": 0.42, "learning_rate": 1.858124894872895e-05, "loss": 0.0243, "reward": 1.1385, "step": 3599 }, { "epoch": 0.43, "learning_rate": 1.8311565113788777e-05, "loss": 0.0255, "reward": 0.9836, "step": 3649 }, { "epoch": 0.44, "learning_rate": 1.804075118023072e-05, "loss": 0.0244, "reward": 1.0459, "step": 3699 }, { "epoch": 0.44, "learning_rate": 1.7768899565519493e-05, "loss": 0.0233, "reward": 1.1805, "step": 3749 }, { "epoch": 0.45, "learning_rate": 1.749610304123695e-05, "loss": 0.0264, "reward": 1.305, "step": 3799 }, { "epoch": 0.45, "learning_rate": 1.7222454701423068e-05, "loss": 0.0237, "reward": 1.0362, "step": 3849 }, { "epoch": 0.46, "learning_rate": 1.694804793080681e-05, "loss": 0.0236, "reward": 1.2275, "step": 3899 }, { "epoch": 0.46, "learning_rate": 1.6672976372937838e-05, "loss": 0.0238, "reward": 1.2652, "step": 3949 }, { "epoch": 0.47, "learning_rate": 1.63973338982299e-05, "loss": 0.0235, "reward": 1.1863, "step": 3999 }, { "epoch": 0.48, "learning_rate": 1.6121214571926765e-05, "loss": 0.0253, "reward": 1.4457, "step": 4049 }, { "epoch": 0.48, "learning_rate": 1.5844712622001708e-05, "loss": 0.0241, "reward": 1.3766, "step": 4099 }, { "epoch": 0.49, "learning_rate": 1.5567922407001432e-05, "loss": 0.0228, "reward": 1.2514, "step": 4149 }, { "epoch": 0.49, "learning_rate": 1.5290938383845442e-05, "loss": 0.0237, "reward": 1.2308, "step": 4199 }, { "epoch": 0.5, "learning_rate": 1.5013855075591872e-05, "loss": 0.0219, "reward": 1.3735, "step": 4249 }, { "epoch": 0.51, "learning_rate": 1.4736767039180697e-05, "loss": 0.0239, "reward": 1.1207, "step": 4299 }, { "epoch": 0.51, "learning_rate": 1.4459768833165414e-05, "loss": 0.0219, "reward": 1.1005, "step": 4349 }, { "epoch": 0.52, "learning_rate": 1.4182954985444172e-05, "loss": 0.023, "reward": 1.4297, "step": 4399 }, { "epoch": 0.52, "learning_rate": 1.3906419961001339e-05, "loss": 0.0238, "reward": 1.289, "step": 4449 }, { "epoch": 0.53, "learning_rate": 1.3630258129670565e-05, "loss": 0.0221, "reward": 1.3299, "step": 4499 }, { "epoch": 0.54, "learning_rate": 1.3354563733930315e-05, "loss": 0.0221, "reward": 1.5207, "step": 4549 }, { "epoch": 0.54, "learning_rate": 1.3079430856742829e-05, "loss": 0.0207, "reward": 1.3068, "step": 4599 }, { "epoch": 0.55, "learning_rate": 1.2804953389447579e-05, "loss": 0.0228, "reward": 1.2289, "step": 4649 }, { "epoch": 0.55, "learning_rate": 1.2531224999720032e-05, "loss": 0.0222, "reward": 1.1784, "step": 4699 }, { "epoch": 0.56, "learning_rate": 1.2258339099606862e-05, "loss": 0.0208, "reward": 1.1996, "step": 4749 }, { "epoch": 0.56, "learning_rate": 1.198638881364825e-05, "loss": 0.0213, "reward": 1.2377, "step": 4799 }, { "epoch": 0.57, "learning_rate": 1.1715466947098438e-05, "loss": 0.0217, "reward": 1.1597, "step": 4849 }, { "epoch": 0.58, "learning_rate": 1.1445665954255139e-05, "loss": 0.02, "reward": 1.2, "step": 4899 }, { "epoch": 0.58, "learning_rate": 1.1177077906908772e-05, "loss": 0.0233, "reward": 1.3867, "step": 4949 }, { "epoch": 0.59, "learning_rate": 1.0909794462922214e-05, "loss": 0.0213, "reward": 1.3454, "step": 4999 }, { "epoch": 0.59, "learning_rate": 1.064390683495178e-05, "loss": 0.0212, "reward": 1.2913, "step": 5049 }, { "epoch": 0.6, "learning_rate": 1.0379505759320209e-05, "loss": 0.0215, "reward": 1.1036, "step": 5099 }, { "epoch": 0.61, "learning_rate": 1.0116681465052087e-05, "loss": 0.0211, "reward": 1.5439, "step": 5149 }, { "epoch": 0.61, "learning_rate": 9.855523643082532e-06, "loss": 0.0204, "reward": 1.2613, "step": 5199 }, { "epoch": 0.62, "learning_rate": 9.596121415649359e-06, "loss": 0.0219, "reward": 1.4101, "step": 5249 }, { "epoch": 0.62, "learning_rate": 9.33856330587944e-06, "loss": 0.022, "reward": 1.4715, "step": 5299 }, { "epoch": 0.63, "learning_rate": 9.082937207579442e-06, "loss": 0.0227, "reward": 1.3201, "step": 5349 }, { "epoch": 0.64, "learning_rate": 8.82933035524135e-06, "loss": 0.0194, "reward": 1.2946, "step": 5399 }, { "epoch": 0.64, "learning_rate": 8.577829294272992e-06, "loss": 0.0206, "reward": 1.4951, "step": 5449 }, { "epoch": 0.65, "learning_rate": 8.328519851463702e-06, "loss": 0.0207, "reward": 1.5987, "step": 5499 }, { "epoch": 0.65, "learning_rate": 8.08148710569524e-06, "loss": 0.0202, "reward": 1.5224, "step": 5549 }, { "epoch": 0.66, "learning_rate": 7.836815358907908e-06, "loss": 0.0195, "reward": 1.5981, "step": 5599 }, { "epoch": 0.66, "learning_rate": 7.594588107331857e-06, "loss": 0.0198, "reward": 1.3337, "step": 5649 }, { "epoch": 0.67, "learning_rate": 7.354888012993293e-06, "loss": 0.0194, "reward": 1.503, "step": 5699 }, { "epoch": 0.68, "learning_rate": 7.117796875505393e-06, "loss": 0.0192, "reward": 1.3961, "step": 5749 }, { "epoch": 0.68, "learning_rate": 6.883395604153524e-06, "loss": 0.0204, "reward": 1.5568, "step": 5799 }, { "epoch": 0.69, "learning_rate": 6.651764190284266e-06, "loss": 0.0195, "reward": 1.4106, "step": 5849 }, { "epoch": 0.69, "learning_rate": 6.422981680007759e-06, "loss": 0.0191, "reward": 1.5962, "step": 5899 }, { "epoch": 0.7, "learning_rate": 6.197126147222517e-06, "loss": 0.0208, "reward": 1.5746, "step": 5949 }, { "epoch": 0.71, "learning_rate": 5.974274666972112e-06, "loss": 0.0201, "reward": 1.2134, "step": 5999 }, { "epoch": 0.71, "learning_rate": 5.754503289142692e-06, "loss": 0.0198, "reward": 1.6037, "step": 6049 }, { "epoch": 0.72, "learning_rate": 5.537887012510291e-06, "loss": 0.0199, "reward": 1.5427, "step": 6099 }, { "epoch": 0.72, "learning_rate": 5.324499759146934e-06, "loss": 0.0202, "reward": 1.5012, "step": 6149 }, { "epoch": 0.73, "learning_rate": 5.11441434919409e-06, "loss": 0.019, "reward": 1.6545, "step": 6199 }, { "epoch": 0.74, "learning_rate": 4.907702476012234e-06, "loss": 0.0187, "reward": 1.529, "step": 6249 }, { "epoch": 0.74, "learning_rate": 4.704434681714884e-06, "loss": 0.0195, "reward": 1.7154, "step": 6299 }, { "epoch": 0.75, "learning_rate": 4.504680333095542e-06, "loss": 0.0206, "reward": 1.549, "step": 6349 }, { "epoch": 0.75, "learning_rate": 4.308507597955685e-06, "loss": 0.0194, "reward": 1.7198, "step": 6399 }, { "epoch": 0.76, "learning_rate": 4.115983421841979e-06, "loss": 0.0194, "reward": 1.298, "step": 6449 }, { "epoch": 0.76, "learning_rate": 3.927173505200547e-06, "loss": 0.0196, "reward": 1.5776, "step": 6499 }, { "epoch": 0.77, "learning_rate": 3.742142280956153e-06, "loss": 0.0194, "reward": 1.4295, "step": 6549 }, { "epoch": 0.78, "learning_rate": 3.5609528925239476e-06, "loss": 0.0194, "reward": 1.4245, "step": 6599 }, { "epoch": 0.78, "learning_rate": 3.3836671722612646e-06, "loss": 0.0194, "reward": 1.5306, "step": 6649 }, { "epoch": 0.79, "learning_rate": 3.2103456203668223e-06, "loss": 0.0201, "reward": 1.4046, "step": 6699 }, { "epoch": 0.79, "learning_rate": 3.041047384234521e-06, "loss": 0.0178, "reward": 1.5582, "step": 6749 }, { "epoch": 0.8, "learning_rate": 2.875830238268942e-06, "loss": 0.0193, "reward": 1.4851, "step": 6799 }, { "epoch": 0.81, "learning_rate": 2.714750564169339e-06, "loss": 0.019, "reward": 1.5795, "step": 6849 }, { "epoch": 0.81, "learning_rate": 2.557863331688927e-06, "loss": 0.0193, "reward": 1.5771, "step": 6899 }, { "epoch": 0.82, "learning_rate": 2.405222079876017e-06, "loss": 0.0197, "reward": 1.4866, "step": 6949 }, { "epoch": 0.82, "learning_rate": 2.256878898803354e-06, "loss": 0.0198, "reward": 1.4232, "step": 6999 }, { "epoch": 0.83, "learning_rate": 2.112884411791984e-06, "loss": 0.0196, "reward": 1.5922, "step": 7049 }, { "epoch": 0.83, "learning_rate": 1.9732877581356075e-06, "loss": 0.0196, "reward": 1.5317, "step": 7099 }, { "epoch": 0.84, "learning_rate": 1.8381365763314151e-06, "loss": 0.019, "reward": 1.4884, "step": 7149 }, { "epoch": 0.85, "learning_rate": 1.7074769878230494e-06, "loss": 0.0181, "reward": 1.589, "step": 7199 }, { "epoch": 0.85, "learning_rate": 1.5813535812612856e-06, "loss": 0.019, "reward": 1.5272, "step": 7249 }, { "epoch": 0.86, "learning_rate": 1.4598093972878007e-06, "loss": 0.0204, "reward": 1.5452, "step": 7299 }, { "epoch": 0.86, "learning_rate": 1.3428859138471839e-06, "loss": 0.0188, "reward": 1.6511, "step": 7349 }, { "epoch": 0.87, "learning_rate": 1.2306230320322798e-06, "loss": 0.0192, "reward": 1.3072, "step": 7399 }, { "epoch": 0.88, "learning_rate": 1.1230590624675747e-06, "loss": 0.0194, "reward": 1.6043, "step": 7449 }, { "epoch": 0.88, "learning_rate": 1.0202307122354288e-06, "loss": 0.0185, "reward": 1.5806, "step": 7499 }, { "epoch": 0.89, "learning_rate": 9.221730723494504e-07, "loss": 0.0187, "reward": 1.3901, "step": 7549 }, { "epoch": 0.89, "learning_rate": 8.289196057794096e-07, "loss": 0.019, "reward": 1.6659, "step": 7599 }, { "epoch": 0.9, "learning_rate": 7.405021360317366e-07, "loss": 0.0192, "reward": 1.3981, "step": 7649 }, { "epoch": 0.91, "learning_rate": 6.569508362894783e-07, "loss": 0.0182, "reward": 1.6831, "step": 7699 }, { "epoch": 0.91, "learning_rate": 5.782942191154622e-07, "loss": 0.0188, "reward": 1.4812, "step": 7749 }, { "epoch": 0.92, "learning_rate": 5.045591267221461e-07, "loss": 0.0187, "reward": 1.7532, "step": 7799 }, { "epoch": 0.92, "learning_rate": 4.3577072181150035e-07, "loss": 0.0185, "reward": 1.6008, "step": 7849 }, { "epoch": 0.93, "learning_rate": 3.719524789880202e-07, "loss": 0.0192, "reward": 1.8267, "step": 7899 }, { "epoch": 0.93, "learning_rate": 3.1312617674783385e-07, "loss": 0.0188, "reward": 1.8393, "step": 7949 }, { "epoch": 0.94, "learning_rate": 2.5931189004661406e-07, "loss": 0.0182, "reward": 1.4103, "step": 7999 }, { "epoch": 0.95, "learning_rate": 2.1052798344882495e-07, "loss": 0.0187, "reward": 1.4023, "step": 8049 }, { "epoch": 0.95, "learning_rate": 1.667911048606785e-07, "loss": 0.0192, "reward": 1.5402, "step": 8099 }, { "epoch": 0.96, "learning_rate": 1.2811617984889056e-07, "loss": 0.0182, "reward": 1.5328, "step": 8149 }, { "epoch": 0.96, "learning_rate": 9.451640654721538e-08, "loss": 0.0183, "reward": 1.7633, "step": 8199 }, { "epoch": 0.97, "learning_rate": 6.600325115246831e-08, "loss": 0.0183, "reward": 1.5819, "step": 8249 }, { "epoch": 0.98, "learning_rate": 4.2586444011600835e-08, "loss": 0.0183, "reward": 1.6798, "step": 8299 }, { "epoch": 0.98, "learning_rate": 2.4273976301131818e-08, "loss": 0.0193, "reward": 1.742, "step": 8349 }, { "epoch": 0.99, "learning_rate": 1.1072097300102168e-08, "loss": 0.0189, "reward": 1.4572, "step": 8399 }, { "epoch": 0.99, "learning_rate": 2.98531225744747e-09, "loss": 0.0179, "reward": 1.4872, "step": 8449 }, { "epoch": 1.0, "learning_rate": 1.6380854554465253e-11, "loss": 0.0185, "reward": 1.9113, "step": 8499 } ], "max_steps": 8502, "num_train_epochs": 1.0, "total_flos": 0, "trial_name": null, "trial_params": null }