diff --git a/ppo_8_2/README.md b/ppo_8_2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/adapter_config.json b/ppo_8_2/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/adapter_model.bin b/ppo_8_2/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..579da99fba4feba910452049f8f79f6cc35612d3 --- /dev/null +++ b/ppo_8_2/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c928225a000087e828682fd7dbb78c2e0967c4809838b2625c8172f6d7e5ea9 +size 26269517 diff --git a/ppo_8_2/checkpoint-1000/README.md b/ppo_8_2/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-1000/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-1000/adapter_config.json b/ppo_8_2/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-1000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-1000/adapter_model.bin b/ppo_8_2/checkpoint-1000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea1e3879354ee4abb7b3b8c60de1c9a77e6330dc --- /dev/null +++ b/ppo_8_2/checkpoint-1000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b38d60a11127e242f9a807b77448d47842e0f18af9424ae49afe5390c54a56 +size 26269517 diff --git a/ppo_8_2/checkpoint-1000/finetuning_args.json b/ppo_8_2/checkpoint-1000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-1000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-1000/reward/adapter_config.json b/ppo_8_2/checkpoint-1000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-1000/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-1000/reward/adapter_model.bin b/ppo_8_2/checkpoint-1000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-1000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-1000/training_args.bin b/ppo_8_2/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-1000/value_head.bin b/ppo_8_2/checkpoint-1000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..0780b373b36a36de6aace00560d74a951c42d5e9 --- /dev/null +++ b/ppo_8_2/checkpoint-1000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6bbe9deb8bed10ff62722d2119134fd8ec11e8e788cf2f711483bd7dc20395 +size 21491 diff --git a/ppo_8_2/checkpoint-1500/README.md b/ppo_8_2/checkpoint-1500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-1500/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-1500/adapter_config.json b/ppo_8_2/checkpoint-1500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-1500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-1500/adapter_model.bin b/ppo_8_2/checkpoint-1500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdcba43b3b73600758ef713be8253becef6ec33c --- /dev/null +++ b/ppo_8_2/checkpoint-1500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b098c659862af7bed6745f049aab340876fc9b217030cdc029e3ce977b3ce4d6 +size 26269517 diff --git a/ppo_8_2/checkpoint-1500/finetuning_args.json b/ppo_8_2/checkpoint-1500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-1500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-1500/reward/adapter_config.json b/ppo_8_2/checkpoint-1500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-1500/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-1500/reward/adapter_model.bin b/ppo_8_2/checkpoint-1500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-1500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-1500/training_args.bin b/ppo_8_2/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-1500/value_head.bin b/ppo_8_2/checkpoint-1500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..5d70598d9878ac7acb9f799a748bbadf677a8f3c --- /dev/null +++ b/ppo_8_2/checkpoint-1500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d90d06cd75a2a587a6c8982bf0f843e42ecdd4626396bb9414094056407b0169 +size 21491 diff --git a/ppo_8_2/checkpoint-2000/README.md b/ppo_8_2/checkpoint-2000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-2000/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-2000/adapter_config.json b/ppo_8_2/checkpoint-2000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-2000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-2000/adapter_model.bin b/ppo_8_2/checkpoint-2000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1895137e9225e14e566c451fda2c69c4483fe6f5 --- /dev/null +++ b/ppo_8_2/checkpoint-2000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baf7b9f520e3c9685f93827f6fc58dfb518aa8e08b106d294e8b7636cc904b1a +size 26269517 diff --git a/ppo_8_2/checkpoint-2000/finetuning_args.json b/ppo_8_2/checkpoint-2000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-2000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-2000/reward/adapter_config.json b/ppo_8_2/checkpoint-2000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-2000/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-2000/reward/adapter_model.bin b/ppo_8_2/checkpoint-2000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-2000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-2000/training_args.bin b/ppo_8_2/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-2000/value_head.bin b/ppo_8_2/checkpoint-2000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..eda5c1b01df7e42956ae6ec51f182a9af46665f3 --- /dev/null +++ b/ppo_8_2/checkpoint-2000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:407f2258d250161d932305f5295f61b79a4e1abdf8cb52d7ac71febe14ed222d +size 21491 diff --git a/ppo_8_2/checkpoint-2500/README.md b/ppo_8_2/checkpoint-2500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-2500/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-2500/adapter_config.json b/ppo_8_2/checkpoint-2500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-2500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-2500/adapter_model.bin b/ppo_8_2/checkpoint-2500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..812a47dc3af515060d9c1ed7e7b119930771b6cb --- /dev/null +++ b/ppo_8_2/checkpoint-2500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c4a5692a05c7670f077797e6b91105f40fb45c93f02fdec53c1c8e5722b3e2 +size 26269517 diff --git a/ppo_8_2/checkpoint-2500/finetuning_args.json b/ppo_8_2/checkpoint-2500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-2500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-2500/reward/adapter_config.json b/ppo_8_2/checkpoint-2500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-2500/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-2500/reward/adapter_model.bin b/ppo_8_2/checkpoint-2500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-2500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-2500/training_args.bin b/ppo_8_2/checkpoint-2500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-2500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-2500/value_head.bin b/ppo_8_2/checkpoint-2500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..d491413fc82942e69e1e3e36de7f00b450801d89 --- /dev/null +++ b/ppo_8_2/checkpoint-2500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:449a3bc0c64e508f458b2d25423d2b1b1cc4aefd2b77836d0d24438044d47764 +size 21491 diff --git a/ppo_8_2/checkpoint-3000/README.md b/ppo_8_2/checkpoint-3000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-3000/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-3000/adapter_config.json b/ppo_8_2/checkpoint-3000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-3000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-3000/adapter_model.bin b/ppo_8_2/checkpoint-3000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c19dcee26dc1dc6660cf144d7c734754eef35fcc --- /dev/null +++ b/ppo_8_2/checkpoint-3000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1cddedf88eb5949bf2f92a31823f7dae138a9b27e552274e7487121e55e1f8b +size 26269517 diff --git a/ppo_8_2/checkpoint-3000/finetuning_args.json b/ppo_8_2/checkpoint-3000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-3000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-3000/reward/adapter_config.json b/ppo_8_2/checkpoint-3000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-3000/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-3000/reward/adapter_model.bin b/ppo_8_2/checkpoint-3000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-3000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-3000/training_args.bin b/ppo_8_2/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-3000/value_head.bin b/ppo_8_2/checkpoint-3000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..f8fe515c2279724782c9c339ea8cac237c039ec7 --- /dev/null +++ b/ppo_8_2/checkpoint-3000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff7d97a52142494b42184b5e5873fa764f12e053ace8543cca9d5ca06375419d +size 21491 diff --git a/ppo_8_2/checkpoint-3500/README.md b/ppo_8_2/checkpoint-3500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-3500/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-3500/adapter_config.json b/ppo_8_2/checkpoint-3500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-3500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-3500/adapter_model.bin b/ppo_8_2/checkpoint-3500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdc4462534ed2e6ffd0566572c29e49cfb299f5d --- /dev/null +++ b/ppo_8_2/checkpoint-3500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7386a10fda60048403b823666286102ca2a4970b45c718d62879e0ab3b302b5 +size 26269517 diff --git a/ppo_8_2/checkpoint-3500/finetuning_args.json b/ppo_8_2/checkpoint-3500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-3500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-3500/reward/adapter_config.json b/ppo_8_2/checkpoint-3500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-3500/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-3500/reward/adapter_model.bin b/ppo_8_2/checkpoint-3500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-3500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-3500/training_args.bin b/ppo_8_2/checkpoint-3500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-3500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-3500/value_head.bin b/ppo_8_2/checkpoint-3500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..6fa6c9afb8d62c08d3592ce78f26c0280526cffe --- /dev/null +++ b/ppo_8_2/checkpoint-3500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:819a0a2a22b8ce849cf2fba7f77bc7fc0064b3e562cafcb8d8afc26b7f55f1fd +size 21491 diff --git a/ppo_8_2/checkpoint-4000/README.md b/ppo_8_2/checkpoint-4000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-4000/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-4000/adapter_config.json b/ppo_8_2/checkpoint-4000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-4000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-4000/adapter_model.bin b/ppo_8_2/checkpoint-4000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..75dbb29379ef0feddee93ca915812648c8672caf --- /dev/null +++ b/ppo_8_2/checkpoint-4000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:636d9c8c4301dab64e144c3b35ca38f1227d076daf12c87b0db7180dba406e43 +size 26269517 diff --git a/ppo_8_2/checkpoint-4000/finetuning_args.json b/ppo_8_2/checkpoint-4000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-4000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-4000/reward/adapter_config.json b/ppo_8_2/checkpoint-4000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-4000/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-4000/reward/adapter_model.bin b/ppo_8_2/checkpoint-4000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-4000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-4000/training_args.bin b/ppo_8_2/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-4000/value_head.bin b/ppo_8_2/checkpoint-4000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..a54dd67c681eff75ad418c4c4e30f578448c7179 --- /dev/null +++ b/ppo_8_2/checkpoint-4000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0208d2c0bd5485502a8eb5dbc747fdbd734f372f20eee9ce893e06daf6423244 +size 21491 diff --git a/ppo_8_2/checkpoint-4500/README.md b/ppo_8_2/checkpoint-4500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-4500/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-4500/adapter_config.json b/ppo_8_2/checkpoint-4500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-4500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-4500/adapter_model.bin b/ppo_8_2/checkpoint-4500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e8a500164c89ff0ada3939056542cad024c7115 --- /dev/null +++ b/ppo_8_2/checkpoint-4500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d523f75bd5a882319eb104414b4751f3d72656d0d64ee4df7142b001e181b81e +size 26269517 diff --git a/ppo_8_2/checkpoint-4500/finetuning_args.json b/ppo_8_2/checkpoint-4500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-4500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-4500/reward/adapter_config.json b/ppo_8_2/checkpoint-4500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-4500/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-4500/reward/adapter_model.bin b/ppo_8_2/checkpoint-4500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-4500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-4500/training_args.bin b/ppo_8_2/checkpoint-4500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-4500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-4500/value_head.bin b/ppo_8_2/checkpoint-4500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..12cebb07726b39d541c8965d1da8a69e78edeef8 --- /dev/null +++ b/ppo_8_2/checkpoint-4500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee1548f2463b399fe9794edc05002090a5361bc0921c27846c55cccebd05909 +size 21491 diff --git a/ppo_8_2/checkpoint-500/README.md b/ppo_8_2/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-500/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-500/adapter_config.json b/ppo_8_2/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-500/adapter_model.bin b/ppo_8_2/checkpoint-500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f795cd9cdb5791bb4e69d294057bd15924a11483 --- /dev/null +++ b/ppo_8_2/checkpoint-500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e69cd44521f0426a4d36061f27077e14fd31fa39e5e4e15142263c8d2e5bfe5 +size 26269517 diff --git a/ppo_8_2/checkpoint-500/finetuning_args.json b/ppo_8_2/checkpoint-500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-500/reward/adapter_config.json b/ppo_8_2/checkpoint-500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-500/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-500/reward/adapter_model.bin b/ppo_8_2/checkpoint-500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-500/training_args.bin b/ppo_8_2/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-500/value_head.bin b/ppo_8_2/checkpoint-500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..40b2f78657a828d4851fe7ed26d69db95fa67a79 --- /dev/null +++ b/ppo_8_2/checkpoint-500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e70c587a29c43d1c3a4116ce5e05e7ceafe219b2bb35a38dfd13ecbb2e3260dd +size 21491 diff --git a/ppo_8_2/checkpoint-5000/README.md b/ppo_8_2/checkpoint-5000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-5000/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-5000/adapter_config.json b/ppo_8_2/checkpoint-5000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-5000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-5000/adapter_model.bin b/ppo_8_2/checkpoint-5000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..648bd5bb72c64ae68a7608239b2e07f29a146f92 --- /dev/null +++ b/ppo_8_2/checkpoint-5000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e154af658dbb388dd3281a415ccd42d866d64650aedc154f048160a577fdb2ec +size 26269517 diff --git a/ppo_8_2/checkpoint-5000/finetuning_args.json b/ppo_8_2/checkpoint-5000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-5000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-5000/reward/adapter_config.json b/ppo_8_2/checkpoint-5000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-5000/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-5000/reward/adapter_model.bin b/ppo_8_2/checkpoint-5000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-5000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-5000/training_args.bin b/ppo_8_2/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-5000/value_head.bin b/ppo_8_2/checkpoint-5000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..7f82be8d541925b6ad512d0191b3f03f715afb5a --- /dev/null +++ b/ppo_8_2/checkpoint-5000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2615ac70a46b192c94efcc383999223dd127f686c24c281c03a79963c3a39e7a +size 21491 diff --git a/ppo_8_2/checkpoint-5500/README.md b/ppo_8_2/checkpoint-5500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-5500/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-5500/adapter_config.json b/ppo_8_2/checkpoint-5500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-5500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-5500/adapter_model.bin b/ppo_8_2/checkpoint-5500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9a746a6f54c478ee4b5542dd507d4cd1e790daa --- /dev/null +++ b/ppo_8_2/checkpoint-5500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c59063a011bf62faf492d7c0656da9509b7b61447a2126263cadae0aad70ecc +size 26269517 diff --git a/ppo_8_2/checkpoint-5500/finetuning_args.json b/ppo_8_2/checkpoint-5500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-5500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-5500/reward/adapter_config.json b/ppo_8_2/checkpoint-5500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-5500/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-5500/reward/adapter_model.bin b/ppo_8_2/checkpoint-5500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-5500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-5500/training_args.bin b/ppo_8_2/checkpoint-5500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-5500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-5500/value_head.bin b/ppo_8_2/checkpoint-5500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..fbe60e55371c37e60d1023429b5097ffc257917e --- /dev/null +++ b/ppo_8_2/checkpoint-5500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c392565525874d2cac82d848d3304cf5ba1d0fad87835ef6bcc3efe6692cc1 +size 21491 diff --git a/ppo_8_2/checkpoint-6000/README.md b/ppo_8_2/checkpoint-6000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-6000/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-6000/adapter_config.json b/ppo_8_2/checkpoint-6000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-6000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-6000/adapter_model.bin b/ppo_8_2/checkpoint-6000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ce121ad2553e40b5dbb34bf83c54b65566686dc --- /dev/null +++ b/ppo_8_2/checkpoint-6000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e58043f6550524db22574c957dc61f398d2d0de4afa9c08aa7d4a01ba88b7b2 +size 26269517 diff --git a/ppo_8_2/checkpoint-6000/finetuning_args.json b/ppo_8_2/checkpoint-6000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-6000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-6000/reward/adapter_config.json b/ppo_8_2/checkpoint-6000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-6000/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-6000/reward/adapter_model.bin b/ppo_8_2/checkpoint-6000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-6000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-6000/training_args.bin b/ppo_8_2/checkpoint-6000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-6000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-6000/value_head.bin b/ppo_8_2/checkpoint-6000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7468e6d6219c27f88403c7fb0e34f5729667a56 --- /dev/null +++ b/ppo_8_2/checkpoint-6000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9063dda452cba68334945cb0a8e6aa62ecb4bf394327aee0d7cbac951c16e83 +size 21491 diff --git a/ppo_8_2/checkpoint-6500/README.md b/ppo_8_2/checkpoint-6500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-6500/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-6500/adapter_config.json b/ppo_8_2/checkpoint-6500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-6500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-6500/adapter_model.bin b/ppo_8_2/checkpoint-6500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8aaa19c9d9ccf112331f2c1047cca0fc92ce6311 --- /dev/null +++ b/ppo_8_2/checkpoint-6500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7e9c575152374fad2fa519a4e6127c5899608e758ae633a0f41d119eeeed0c +size 26269517 diff --git a/ppo_8_2/checkpoint-6500/finetuning_args.json b/ppo_8_2/checkpoint-6500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-6500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-6500/reward/adapter_config.json b/ppo_8_2/checkpoint-6500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-6500/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-6500/reward/adapter_model.bin b/ppo_8_2/checkpoint-6500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-6500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-6500/training_args.bin b/ppo_8_2/checkpoint-6500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-6500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-6500/value_head.bin b/ppo_8_2/checkpoint-6500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..6debe71240d0cd933f15a71f26cd3a9618bf46cb --- /dev/null +++ b/ppo_8_2/checkpoint-6500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66c3d5e550039288850cb336e245d0be411c82e70f25937d9007e5ad07bf8808 +size 21491 diff --git a/ppo_8_2/checkpoint-7000/README.md b/ppo_8_2/checkpoint-7000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-7000/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-7000/adapter_config.json b/ppo_8_2/checkpoint-7000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-7000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-7000/adapter_model.bin b/ppo_8_2/checkpoint-7000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac57109cfea662865cfcbfbc1e15279e1adb82af --- /dev/null +++ b/ppo_8_2/checkpoint-7000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261443bba172469172307b549bf0fbec792d7018676fc07c34f827a68a964846 +size 26269517 diff --git a/ppo_8_2/checkpoint-7000/finetuning_args.json b/ppo_8_2/checkpoint-7000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-7000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-7000/reward/adapter_config.json b/ppo_8_2/checkpoint-7000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-7000/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-7000/reward/adapter_model.bin b/ppo_8_2/checkpoint-7000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-7000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-7000/training_args.bin b/ppo_8_2/checkpoint-7000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-7000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-7000/value_head.bin b/ppo_8_2/checkpoint-7000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..c081eb4af7d5e25e36d7e68820b053ad4fbdd1e7 --- /dev/null +++ b/ppo_8_2/checkpoint-7000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872080161b9ad90f0fa00aa4221fa933365a78926bc5fa0ac736aa4eab3f088f +size 21491 diff --git a/ppo_8_2/checkpoint-7500/README.md b/ppo_8_2/checkpoint-7500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-7500/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-7500/adapter_config.json b/ppo_8_2/checkpoint-7500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-7500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-7500/adapter_model.bin b/ppo_8_2/checkpoint-7500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5f189ac584c9e74630da36f609b2ce7be49ccbea --- /dev/null +++ b/ppo_8_2/checkpoint-7500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe9634dec7ba8ee288ccc0fcff67674fd79b28c4264537d89bc68d485c04acc +size 26269517 diff --git a/ppo_8_2/checkpoint-7500/finetuning_args.json b/ppo_8_2/checkpoint-7500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-7500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-7500/reward/adapter_config.json b/ppo_8_2/checkpoint-7500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-7500/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-7500/reward/adapter_model.bin b/ppo_8_2/checkpoint-7500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-7500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-7500/training_args.bin b/ppo_8_2/checkpoint-7500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-7500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-7500/value_head.bin b/ppo_8_2/checkpoint-7500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..92b5644603a71efe386a8a7a1cc3b1072b5ff0d9 --- /dev/null +++ b/ppo_8_2/checkpoint-7500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21581624b633c3162fc885dedd888209a8983dfde0a7b56ae97bdfd8da4a1f2e +size 21491 diff --git a/ppo_8_2/checkpoint-8000/README.md b/ppo_8_2/checkpoint-8000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-8000/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-8000/adapter_config.json b/ppo_8_2/checkpoint-8000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-8000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-8000/adapter_model.bin b/ppo_8_2/checkpoint-8000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f4735c9bd726a07760e69f779cbd11f116928f05 --- /dev/null +++ b/ppo_8_2/checkpoint-8000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:268645a7e2b5ed6dd8ec9bed6ae70409ada123a1dfb30e0911ca343f2d55db32 +size 26269517 diff --git a/ppo_8_2/checkpoint-8000/finetuning_args.json b/ppo_8_2/checkpoint-8000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-8000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-8000/reward/adapter_config.json b/ppo_8_2/checkpoint-8000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-8000/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-8000/reward/adapter_model.bin b/ppo_8_2/checkpoint-8000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-8000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-8000/training_args.bin b/ppo_8_2/checkpoint-8000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-8000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-8000/value_head.bin b/ppo_8_2/checkpoint-8000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..b51d7961b54deac3fa0716708ae91db2cf94ffe3 --- /dev/null +++ b/ppo_8_2/checkpoint-8000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce317795d235d16c6edef4450f3f6c418f2380b2547189c5f370fe73f319343 +size 21491 diff --git a/ppo_8_2/checkpoint-8500/README.md b/ppo_8_2/checkpoint-8500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d4576fe074287232d3836bf69c21d3f2593290d9 --- /dev/null +++ b/ppo_8_2/checkpoint-8500/README.md @@ -0,0 +1,9 @@ +--- +library_name: peft +--- +## Training procedure + +### Framework versions + + +- PEFT 0.4.0 diff --git a/ppo_8_2/checkpoint-8500/adapter_config.json b/ppo_8_2/checkpoint-8500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-8500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-8500/adapter_model.bin b/ppo_8_2/checkpoint-8500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e72ced34baf13333faf94fd1e07578fc652ee843 --- /dev/null +++ b/ppo_8_2/checkpoint-8500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed162f7feb5e09cf609b6eaab8358703abe0f673651a0594ec8bcbfc91af7a6c +size 26269517 diff --git a/ppo_8_2/checkpoint-8500/finetuning_args.json b/ppo_8_2/checkpoint-8500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/checkpoint-8500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/checkpoint-8500/reward/adapter_config.json b/ppo_8_2/checkpoint-8500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/checkpoint-8500/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/checkpoint-8500/reward/adapter_model.bin b/ppo_8_2/checkpoint-8500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/checkpoint-8500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/checkpoint-8500/training_args.bin b/ppo_8_2/checkpoint-8500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/checkpoint-8500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/checkpoint-8500/value_head.bin b/ppo_8_2/checkpoint-8500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..d377b59abe6e0167469e6f42f18fd65ef9aa34a5 --- /dev/null +++ b/ppo_8_2/checkpoint-8500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2880a39851a55b327475a27c4ae651df874293872cc70982ef742d80d245002c +size 21491 diff --git a/ppo_8_2/finetuning_args.json b/ppo_8_2/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..36a921b3eb84159ad54e7697e9d7d3e2fde38fec --- /dev/null +++ b/ppo_8_2/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/ppo_8_2/reward/adapter_config.json b/ppo_8_2/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..68a712347e91c68b7c105a17272d3e1ff785e5fd --- /dev/null +++ b/ppo_8_2/reward/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ppo_8_2/reward/adapter_model.bin b/ppo_8_2/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/ppo_8_2/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/ppo_8_2/trainer_log.jsonl b/ppo_8_2/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4aed6691d928f557e8fb27e7392a9148dc157ada --- /dev/null +++ b/ppo_8_2/trainer_log.jsonl @@ -0,0 +1,170 @@ +{"current_steps": 49, "total_steps": 8502, "loss": 0.0855, "eval_loss": null, "predict_loss": null, "reward": 0.814, "learning_rate": 2.9997541914704325e-05, "epoch": 0.01, "percentage": 0.58, "elapsed_time": "0:10:46", "remaining_time": "1 day, 6:58:22"} +{"current_steps": 99, "total_steps": 8502, "loss": 0.0814, "eval_loss": null, "predict_loss": null, "reward": 0.9376, "learning_rate": 2.9989966819651966e-05, "epoch": 0.01, "percentage": 1.16, "elapsed_time": "0:21:04", "remaining_time": "1 day, 5:49:14"} +{"current_steps": 149, "total_steps": 8502, "loss": 0.0734, "eval_loss": null, "predict_loss": null, "reward": 0.8684, "learning_rate": 2.9977276276102345e-05, "epoch": 0.02, "percentage": 1.75, "elapsed_time": "0:31:33", "remaining_time": "1 day, 5:29:01"} +{"current_steps": 199, "total_steps": 8502, "loss": 0.0716, "eval_loss": null, "predict_loss": null, "reward": 0.8537, "learning_rate": 2.995947461480701e-05, "epoch": 0.02, "percentage": 2.34, "elapsed_time": "0:41:58", "remaining_time": "1 day, 5:11:10"} +{"current_steps": 249, "total_steps": 8502, "loss": 0.0603, "eval_loss": null, "predict_loss": null, "reward": 0.6429, "learning_rate": 2.9936567910728124e-05, "epoch": 0.03, "percentage": 2.93, "elapsed_time": "0:52:59", "remaining_time": "1 day, 5:16:15"} +{"current_steps": 299, "total_steps": 8502, "loss": 0.0624, "eval_loss": null, "predict_loss": null, "reward": 0.641, "learning_rate": 2.9908563980965393e-05, "epoch": 0.04, "percentage": 3.52, "elapsed_time": "1:03:42", "remaining_time": "1 day, 5:07:55"} +{"current_steps": 349, "total_steps": 8502, "loss": 0.0631, "eval_loss": null, "predict_loss": null, "reward": 0.8062, "learning_rate": 2.9875472382088355e-05, "epoch": 0.04, "percentage": 4.1, "elapsed_time": "1:14:57", "remaining_time": "1 day, 5:11:05"} +{"current_steps": 399, "total_steps": 8502, "loss": 0.0577, "eval_loss": null, "predict_loss": null, "reward": 0.5326, "learning_rate": 2.9837304406875167e-05, "epoch": 0.05, "percentage": 4.69, "elapsed_time": "1:25:13", "remaining_time": "1 day, 4:50:55"} +{"current_steps": 449, "total_steps": 8502, "loss": 0.0556, "eval_loss": null, "predict_loss": null, "reward": 0.9085, "learning_rate": 2.9794073080458815e-05, "epoch": 0.05, "percentage": 5.28, "elapsed_time": "1:36:21", "remaining_time": "1 day, 4:48:11"} +{"current_steps": 499, "total_steps": 8502, "loss": 0.055, "eval_loss": null, "predict_loss": null, "reward": 0.9616, "learning_rate": 2.9745793155882214e-05, "epoch": 0.06, "percentage": 5.87, "elapsed_time": "1:46:57", "remaining_time": "1 day, 4:35:28"} +{"current_steps": 549, "total_steps": 8502, "loss": 0.0525, "eval_loss": null, "predict_loss": null, "reward": 0.899, "learning_rate": 2.9692481109063605e-05, "epoch": 0.06, "percentage": 6.46, "elapsed_time": "1:56:48", "remaining_time": "1 day, 4:12:07"} +{"current_steps": 599, "total_steps": 8502, "loss": 0.0518, "eval_loss": null, "predict_loss": null, "reward": 0.6793, "learning_rate": 2.963415513317399e-05, "epoch": 0.07, "percentage": 7.05, "elapsed_time": "2:07:22", "remaining_time": "1 day, 4:00:27"} +{"current_steps": 649, "total_steps": 8502, "loss": 0.0546, "eval_loss": null, "predict_loss": null, "reward": 0.8445, "learning_rate": 2.9570835132428594e-05, "epoch": 0.08, "percentage": 7.63, "elapsed_time": "2:17:27", "remaining_time": "1 day, 3:43:14"} +{"current_steps": 699, "total_steps": 8502, "loss": 0.0532, "eval_loss": null, "predict_loss": null, "reward": 0.7952, "learning_rate": 2.9502542715294366e-05, "epoch": 0.08, "percentage": 8.22, "elapsed_time": "2:27:04", "remaining_time": "1 day, 3:21:51"} +{"current_steps": 749, "total_steps": 8502, "loss": 0.0515, "eval_loss": null, "predict_loss": null, "reward": 0.8009, "learning_rate": 2.942930118711593e-05, "epoch": 0.09, "percentage": 8.81, "elapsed_time": "2:37:41", "remaining_time": "1 day, 3:12:13"} +{"current_steps": 799, "total_steps": 8502, "loss": 0.0445, "eval_loss": null, "predict_loss": null, "reward": 1.1039, "learning_rate": 2.9351135542162432e-05, "epoch": 0.09, "percentage": 9.4, "elapsed_time": "2:48:32", "remaining_time": "1 day, 3:04:49"} +{"current_steps": 849, "total_steps": 8502, "loss": 0.0447, "eval_loss": null, "predict_loss": null, "reward": 0.858, "learning_rate": 2.9268072455098065e-05, "epoch": 0.1, "percentage": 9.99, "elapsed_time": "2:58:43", "remaining_time": "1 day, 2:51:00"} +{"current_steps": 899, "total_steps": 8502, "loss": 0.0455, "eval_loss": null, "predict_loss": null, "reward": 0.9129, "learning_rate": 2.918014027187909e-05, "epoch": 0.11, "percentage": 10.57, "elapsed_time": "3:08:42", "remaining_time": "1 day, 2:35:57"} +{"current_steps": 949, "total_steps": 8502, "loss": 0.0459, "eval_loss": null, "predict_loss": null, "reward": 0.9215, "learning_rate": 2.9087369000080567e-05, "epoch": 0.11, "percentage": 11.16, "elapsed_time": "3:19:25", "remaining_time": "1 day, 2:27:08"} +{"current_steps": 999, "total_steps": 8502, "loss": 0.0424, "eval_loss": null, "predict_loss": null, "reward": 0.8282, "learning_rate": 2.898979029865602e-05, "epoch": 0.12, "percentage": 11.75, "elapsed_time": "3:30:06", "remaining_time": "1 day, 2:18:02"} +{"current_steps": 1049, "total_steps": 8502, "loss": 0.0437, "eval_loss": null, "predict_loss": null, "reward": 0.9707, "learning_rate": 2.888743746713357e-05, "epoch": 0.12, "percentage": 12.34, "elapsed_time": "3:40:26", "remaining_time": "1 day, 2:06:14"} +{"current_steps": 1099, "total_steps": 8502, "loss": 0.041, "eval_loss": null, "predict_loss": null, "reward": 0.796, "learning_rate": 2.8780345434252185e-05, "epoch": 0.13, "percentage": 12.93, "elapsed_time": "3:51:10", "remaining_time": "1 day, 1:57:16"} +{"current_steps": 1149, "total_steps": 8502, "loss": 0.0468, "eval_loss": null, "predict_loss": null, "reward": 0.9538, "learning_rate": 2.8668550746041966e-05, "epoch": 0.14, "percentage": 13.51, "elapsed_time": "4:01:22", "remaining_time": "1 day, 1:44:43"} +{"current_steps": 1199, "total_steps": 8502, "loss": 0.0409, "eval_loss": null, "predict_loss": null, "reward": 0.8948, "learning_rate": 2.8552091553352533e-05, "epoch": 0.14, "percentage": 14.1, "elapsed_time": "4:11:36", "remaining_time": "1 day, 1:32:33"} +{"current_steps": 1249, "total_steps": 8502, "loss": 0.0408, "eval_loss": null, "predict_loss": null, "reward": 0.7338, "learning_rate": 2.8431007598833705e-05, "epoch": 0.15, "percentage": 14.69, "elapsed_time": "4:21:58", "remaining_time": "1 day, 1:21:19"} +{"current_steps": 1299, "total_steps": 8502, "loss": 0.0392, "eval_loss": null, "predict_loss": null, "reward": 0.8156, "learning_rate": 2.830534020337303e-05, "epoch": 0.15, "percentage": 15.28, "elapsed_time": "4:32:14", "remaining_time": "1 day, 1:09:35"} +{"current_steps": 1349, "total_steps": 8502, "loss": 0.0375, "eval_loss": null, "predict_loss": null, "reward": 0.7625, "learning_rate": 2.817513225199466e-05, "epoch": 0.16, "percentage": 15.87, "elapsed_time": "4:42:43", "remaining_time": "1 day, 0:59:06"} +{"current_steps": 1399, "total_steps": 8502, "loss": 0.04, "eval_loss": null, "predict_loss": null, "reward": 0.9719, "learning_rate": 2.8040428179224528e-05, "epoch": 0.16, "percentage": 16.45, "elapsed_time": "4:53:10", "remaining_time": "1 day, 0:48:28"} +{"current_steps": 1449, "total_steps": 8502, "loss": 0.034, "eval_loss": null, "predict_loss": null, "reward": 0.8545, "learning_rate": 2.790127395392666e-05, "epoch": 0.17, "percentage": 17.04, "elapsed_time": "5:03:31", "remaining_time": "1 day, 0:37:22"} +{"current_steps": 1499, "total_steps": 8502, "loss": 0.0437, "eval_loss": null, "predict_loss": null, "reward": 0.8889, "learning_rate": 2.7757717063615962e-05, "epoch": 0.18, "percentage": 17.63, "elapsed_time": "5:14:43", "remaining_time": "1 day, 0:30:19"} +{"current_steps": 1549, "total_steps": 8502, "loss": 0.0355, "eval_loss": null, "predict_loss": null, "reward": 1.2005, "learning_rate": 2.7609806498252692e-05, "epoch": 0.18, "percentage": 18.22, "elapsed_time": "5:25:39", "remaining_time": "1 day, 0:21:47"} +{"current_steps": 1599, "total_steps": 8502, "loss": 0.0368, "eval_loss": null, "predict_loss": null, "reward": 1.0282, "learning_rate": 2.745759273352425e-05, "epoch": 0.19, "percentage": 18.81, "elapsed_time": "5:36:29", "remaining_time": "1 day, 0:12:40"} +{"current_steps": 1649, "total_steps": 8502, "loss": 0.0346, "eval_loss": null, "predict_loss": null, "reward": 0.9873, "learning_rate": 2.7301127713619938e-05, "epoch": 0.19, "percentage": 19.4, "elapsed_time": "5:47:44", "remaining_time": "1 day, 0:05:11"} +{"current_steps": 1699, "total_steps": 8502, "loss": 0.034, "eval_loss": null, "predict_loss": null, "reward": 0.7074, "learning_rate": 2.7140464833504564e-05, "epoch": 0.2, "percentage": 19.98, "elapsed_time": "5:58:27", "remaining_time": "23:55:16"} +{"current_steps": 1749, "total_steps": 8502, "loss": 0.0334, "eval_loss": null, "predict_loss": null, "reward": 0.9238, "learning_rate": 2.6975658920697006e-05, "epoch": 0.21, "percentage": 20.57, "elapsed_time": "6:09:13", "remaining_time": "23:45:35"} +{"current_steps": 1799, "total_steps": 8502, "loss": 0.0317, "eval_loss": null, "predict_loss": null, "reward": 0.8791, "learning_rate": 2.680676621655984e-05, "epoch": 0.21, "percentage": 21.16, "elapsed_time": "6:19:58", "remaining_time": "23:35:47"} +{"current_steps": 1849, "total_steps": 8502, "loss": 0.0348, "eval_loss": null, "predict_loss": null, "reward": 1.1593, "learning_rate": 2.663384435710654e-05, "epoch": 0.22, "percentage": 21.75, "elapsed_time": "6:31:32", "remaining_time": "23:28:50"} +{"current_steps": 1899, "total_steps": 8502, "loss": 0.0309, "eval_loss": null, "predict_loss": null, "reward": 1.0537, "learning_rate": 2.6456952353332712e-05, "epoch": 0.22, "percentage": 22.34, "elapsed_time": "6:42:44", "remaining_time": "23:20:22"} +{"current_steps": 1949, "total_steps": 8502, "loss": 0.0333, "eval_loss": null, "predict_loss": null, "reward": 1.0686, "learning_rate": 2.6276150571078108e-05, "epoch": 0.23, "percentage": 22.92, "elapsed_time": "6:53:21", "remaining_time": "23:09:47"} +{"current_steps": 1999, "total_steps": 8502, "loss": 0.0355, "eval_loss": null, "predict_loss": null, "reward": 1.0331, "learning_rate": 2.6091500710426278e-05, "epoch": 0.24, "percentage": 23.51, "elapsed_time": "7:04:11", "remaining_time": "22:59:55"} +{"current_steps": 2049, "total_steps": 8502, "loss": 0.0331, "eval_loss": null, "predict_loss": null, "reward": 0.8029, "learning_rate": 2.5903065784648947e-05, "epoch": 0.24, "percentage": 24.1, "elapsed_time": "7:14:51", "remaining_time": "22:49:29"} +{"current_steps": 2099, "total_steps": 8502, "loss": 0.0324, "eval_loss": null, "predict_loss": null, "reward": 0.9726, "learning_rate": 2.5710910098702187e-05, "epoch": 0.25, "percentage": 24.69, "elapsed_time": "7:25:23", "remaining_time": "22:38:38"} +{"current_steps": 2149, "total_steps": 8502, "loss": 0.0322, "eval_loss": null, "predict_loss": null, "reward": 0.9616, "learning_rate": 2.5515099227281836e-05, "epoch": 0.25, "percentage": 25.28, "elapsed_time": "7:36:33", "remaining_time": "22:29:41"} +{"current_steps": 2199, "total_steps": 8502, "loss": 0.0322, "eval_loss": null, "predict_loss": null, "reward": 0.9011, "learning_rate": 2.5315699992445617e-05, "epoch": 0.26, "percentage": 25.86, "elapsed_time": "7:47:25", "remaining_time": "22:19:46"} +{"current_steps": 2249, "total_steps": 8502, "loss": 0.0314, "eval_loss": null, "predict_loss": null, "reward": 0.7529, "learning_rate": 2.511278044080954e-05, "epoch": 0.26, "percentage": 26.45, "elapsed_time": "7:58:28", "remaining_time": "22:10:20"} +{"current_steps": 2299, "total_steps": 8502, "loss": 0.0286, "eval_loss": null, "predict_loss": null, "reward": 1.1313, "learning_rate": 2.4906409820326436e-05, "epoch": 0.27, "percentage": 27.04, "elapsed_time": "8:09:42", "remaining_time": "22:01:16"} +{"current_steps": 2349, "total_steps": 8502, "loss": 0.0285, "eval_loss": null, "predict_loss": null, "reward": 1.032, "learning_rate": 2.4696658556654575e-05, "epoch": 0.28, "percentage": 27.63, "elapsed_time": "8:21:38", "remaining_time": "21:54:01"} +{"current_steps": 2399, "total_steps": 8502, "loss": 0.0296, "eval_loss": null, "predict_loss": null, "reward": 1.0514, "learning_rate": 2.4483598229124274e-05, "epoch": 0.28, "percentage": 28.22, "elapsed_time": "8:33:20", "remaining_time": "21:45:54"} +{"current_steps": 2449, "total_steps": 8502, "loss": 0.0308, "eval_loss": null, "predict_loss": null, "reward": 1.1721, "learning_rate": 2.42673015463109e-05, "epoch": 0.29, "percentage": 28.8, "elapsed_time": "8:44:42", "remaining_time": "21:36:53"} +{"current_steps": 2499, "total_steps": 8502, "loss": 0.0288, "eval_loss": null, "predict_loss": null, "reward": 1.2602, "learning_rate": 2.404784232122248e-05, "epoch": 0.29, "percentage": 29.39, "elapsed_time": "8:55:49", "remaining_time": "21:27:07"} +{"current_steps": 2549, "total_steps": 8502, "loss": 0.0274, "eval_loss": null, "predict_loss": null, "reward": 1.3181, "learning_rate": 2.382529544611038e-05, "epoch": 0.3, "percentage": 29.98, "elapsed_time": "9:07:30", "remaining_time": "21:18:40"} +{"current_steps": 2599, "total_steps": 8502, "loss": 0.0302, "eval_loss": null, "predict_loss": null, "reward": 0.8566, "learning_rate": 2.3599736866911756e-05, "epoch": 0.31, "percentage": 30.57, "elapsed_time": "9:17:53", "remaining_time": "21:07:06"} +{"current_steps": 2649, "total_steps": 8502, "loss": 0.0324, "eval_loss": null, "predict_loss": null, "reward": 1.1646, "learning_rate": 2.3371243557332333e-05, "epoch": 0.31, "percentage": 31.16, "elapsed_time": "9:28:48", "remaining_time": "20:56:47"} +{"current_steps": 2699, "total_steps": 8502, "loss": 0.028, "eval_loss": null, "predict_loss": null, "reward": 1.3095, "learning_rate": 2.313989349257855e-05, "epoch": 0.32, "percentage": 31.75, "elapsed_time": "9:39:10", "remaining_time": "20:45:15"} +{"current_steps": 2749, "total_steps": 8502, "loss": 0.0294, "eval_loss": null, "predict_loss": null, "reward": 0.9202, "learning_rate": 2.2905765622747843e-05, "epoch": 0.32, "percentage": 32.33, "elapsed_time": "9:50:13", "remaining_time": "20:35:11"} +{"current_steps": 2799, "total_steps": 8502, "loss": 0.0299, "eval_loss": null, "predict_loss": null, "reward": 0.9073, "learning_rate": 2.266893984588631e-05, "epoch": 0.33, "percentage": 32.92, "elapsed_time": "10:01:00", "remaining_time": "20:24:33"} +{"current_steps": 2849, "total_steps": 8502, "loss": 0.0264, "eval_loss": null, "predict_loss": null, "reward": 1.0046, "learning_rate": 2.242949698072283e-05, "epoch": 0.34, "percentage": 33.51, "elapsed_time": "10:12:17", "remaining_time": "20:14:54"} +{"current_steps": 2899, "total_steps": 8502, "loss": 0.0317, "eval_loss": null, "predict_loss": null, "reward": 1.1227, "learning_rate": 2.2187518739089033e-05, "epoch": 0.34, "percentage": 34.1, "elapsed_time": "10:24:04", "remaining_time": "20:06:09"} +{"current_steps": 2949, "total_steps": 8502, "loss": 0.0276, "eval_loss": null, "predict_loss": null, "reward": 1.0356, "learning_rate": 2.194308769803444e-05, "epoch": 0.35, "percentage": 34.69, "elapsed_time": "10:35:53", "remaining_time": "19:57:24"} +{"current_steps": 2999, "total_steps": 8502, "loss": 0.0253, "eval_loss": null, "predict_loss": null, "reward": 1.1648, "learning_rate": 2.1696287271646406e-05, "epoch": 0.35, "percentage": 35.27, "elapsed_time": "10:47:34", "remaining_time": "19:48:15"} +{"current_steps": 3049, "total_steps": 8502, "loss": 0.026, "eval_loss": null, "predict_loss": null, "reward": 1.155, "learning_rate": 2.1447201682584356e-05, "epoch": 0.36, "percentage": 35.86, "elapsed_time": "10:59:28", "remaining_time": "19:39:25"} +{"current_steps": 3099, "total_steps": 8502, "loss": 0.0265, "eval_loss": null, "predict_loss": null, "reward": 0.9532, "learning_rate": 2.1195915933338133e-05, "epoch": 0.36, "percentage": 36.45, "elapsed_time": "11:10:37", "remaining_time": "19:29:13"} +{"current_steps": 3149, "total_steps": 8502, "loss": 0.0278, "eval_loss": null, "predict_loss": null, "reward": 1.1358, "learning_rate": 2.0942515777220186e-05, "epoch": 0.37, "percentage": 37.04, "elapsed_time": "11:21:48", "remaining_time": "19:19:00"} +{"current_steps": 3199, "total_steps": 8502, "loss": 0.0258, "eval_loss": null, "predict_loss": null, "reward": 1.0723, "learning_rate": 2.0687087689101562e-05, "epoch": 0.38, "percentage": 37.63, "elapsed_time": "11:33:17", "remaining_time": "19:09:16"} +{"current_steps": 3249, "total_steps": 8502, "loss": 0.029, "eval_loss": null, "predict_loss": null, "reward": 1.3277, "learning_rate": 2.0429718835901672e-05, "epoch": 0.38, "percentage": 38.21, "elapsed_time": "11:44:54", "remaining_time": "18:59:41"} +{"current_steps": 3299, "total_steps": 8502, "loss": 0.0281, "eval_loss": null, "predict_loss": null, "reward": 1.2176, "learning_rate": 2.0170497046841824e-05, "epoch": 0.39, "percentage": 38.8, "elapsed_time": "11:56:03", "remaining_time": "18:49:19"} +{"current_steps": 3349, "total_steps": 8502, "loss": 0.0258, "eval_loss": null, "predict_loss": null, "reward": 1.3399, "learning_rate": 1.9909510783472825e-05, "epoch": 0.39, "percentage": 39.39, "elapsed_time": "12:07:42", "remaining_time": "18:39:42"} +{"current_steps": 3399, "total_steps": 8502, "loss": 0.0279, "eval_loss": null, "predict_loss": null, "reward": 1.1264, "learning_rate": 1.964684910948672e-05, "epoch": 0.4, "percentage": 39.98, "elapsed_time": "12:19:08", "remaining_time": "18:29:41"} +{"current_steps": 3449, "total_steps": 8502, "loss": 0.0259, "eval_loss": null, "predict_loss": null, "reward": 1.0383, "learning_rate": 1.9382601660323124e-05, "epoch": 0.41, "percentage": 40.57, "elapsed_time": "12:30:33", "remaining_time": "18:19:36"} +{"current_steps": 3499, "total_steps": 8502, "loss": 0.0244, "eval_loss": null, "predict_loss": null, "reward": 1.135, "learning_rate": 1.911685861258034e-05, "epoch": 0.41, "percentage": 41.16, "elapsed_time": "12:42:04", "remaining_time": "18:09:37"} +{"current_steps": 3549, "total_steps": 8502, "loss": 0.0246, "eval_loss": null, "predict_loss": null, "reward": 1.0922, "learning_rate": 1.8849710653241923e-05, "epoch": 0.42, "percentage": 41.74, "elapsed_time": "12:52:59", "remaining_time": "17:58:48"} +{"current_steps": 3599, "total_steps": 8502, "loss": 0.0243, "eval_loss": null, "predict_loss": null, "reward": 1.1385, "learning_rate": 1.858124894872895e-05, "epoch": 0.42, "percentage": 42.33, "elapsed_time": "13:04:11", "remaining_time": "17:48:19"} +{"current_steps": 3649, "total_steps": 8502, "loss": 0.0255, "eval_loss": null, "predict_loss": null, "reward": 0.9836, "learning_rate": 1.8311565113788777e-05, "epoch": 0.43, "percentage": 42.92, "elapsed_time": "13:15:12", "remaining_time": "17:37:34"} +{"current_steps": 3699, "total_steps": 8502, "loss": 0.0244, "eval_loss": null, "predict_loss": null, "reward": 1.0459, "learning_rate": 1.804075118023072e-05, "epoch": 0.44, "percentage": 43.51, "elapsed_time": "13:26:35", "remaining_time": "17:27:18"} +{"current_steps": 3749, "total_steps": 8502, "loss": 0.0233, "eval_loss": null, "predict_loss": null, "reward": 1.1805, "learning_rate": 1.7768899565519493e-05, "epoch": 0.44, "percentage": 44.1, "elapsed_time": "13:37:15", "remaining_time": "17:16:07"} +{"current_steps": 3799, "total_steps": 8502, "loss": 0.0264, "eval_loss": null, "predict_loss": null, "reward": 1.305, "learning_rate": 1.749610304123695e-05, "epoch": 0.45, "percentage": 44.68, "elapsed_time": "13:48:20", "remaining_time": "17:05:27"} +{"current_steps": 3849, "total_steps": 8502, "loss": 0.0237, "eval_loss": null, "predict_loss": null, "reward": 1.0362, "learning_rate": 1.7222454701423068e-05, "epoch": 0.45, "percentage": 45.27, "elapsed_time": "14:00:10", "remaining_time": "16:55:39"} +{"current_steps": 3899, "total_steps": 8502, "loss": 0.0236, "eval_loss": null, "predict_loss": null, "reward": 1.2275, "learning_rate": 1.694804793080681e-05, "epoch": 0.46, "percentage": 45.86, "elapsed_time": "14:11:10", "remaining_time": "16:44:51"} +{"current_steps": 3949, "total_steps": 8502, "loss": 0.0238, "eval_loss": null, "predict_loss": null, "reward": 1.2652, "learning_rate": 1.6672976372937838e-05, "epoch": 0.46, "percentage": 46.45, "elapsed_time": "14:23:10", "remaining_time": "16:35:12"} +{"current_steps": 3999, "total_steps": 8502, "loss": 0.0235, "eval_loss": null, "predict_loss": null, "reward": 1.1863, "learning_rate": 1.63973338982299e-05, "epoch": 0.47, "percentage": 47.04, "elapsed_time": "14:34:31", "remaining_time": "16:24:44"} +{"current_steps": 4049, "total_steps": 8502, "loss": 0.0253, "eval_loss": null, "predict_loss": null, "reward": 1.4457, "learning_rate": 1.6121214571926765e-05, "epoch": 0.48, "percentage": 47.62, "elapsed_time": "14:46:47", "remaining_time": "16:15:16"} +{"current_steps": 4099, "total_steps": 8502, "loss": 0.0241, "eval_loss": null, "predict_loss": null, "reward": 1.3766, "learning_rate": 1.5844712622001708e-05, "epoch": 0.48, "percentage": 48.21, "elapsed_time": "14:58:41", "remaining_time": "16:05:20"} +{"current_steps": 4149, "total_steps": 8502, "loss": 0.0228, "eval_loss": null, "predict_loss": null, "reward": 1.2514, "learning_rate": 1.5567922407001432e-05, "epoch": 0.49, "percentage": 48.8, "elapsed_time": "15:11:01", "remaining_time": "15:55:49"} +{"current_steps": 4199, "total_steps": 8502, "loss": 0.0237, "eval_loss": null, "predict_loss": null, "reward": 1.2308, "learning_rate": 1.5290938383845442e-05, "epoch": 0.49, "percentage": 49.39, "elapsed_time": "15:22:41", "remaining_time": "15:45:32"} +{"current_steps": 4249, "total_steps": 8502, "loss": 0.0219, "eval_loss": null, "predict_loss": null, "reward": 1.3735, "learning_rate": 1.5013855075591872e-05, "epoch": 0.5, "percentage": 49.98, "elapsed_time": "15:34:38", "remaining_time": "15:35:31"} +{"current_steps": 4299, "total_steps": 8502, "loss": 0.0239, "eval_loss": null, "predict_loss": null, "reward": 1.1207, "learning_rate": 1.4736767039180697e-05, "epoch": 0.51, "percentage": 50.56, "elapsed_time": "15:46:03", "remaining_time": "15:24:55"} +{"current_steps": 4349, "total_steps": 8502, "loss": 0.0219, "eval_loss": null, "predict_loss": null, "reward": 1.1005, "learning_rate": 1.4459768833165414e-05, "epoch": 0.51, "percentage": 51.15, "elapsed_time": "15:58:06", "remaining_time": "15:14:55"} +{"current_steps": 4399, "total_steps": 8502, "loss": 0.023, "eval_loss": null, "predict_loss": null, "reward": 1.4297, "learning_rate": 1.4182954985444172e-05, "epoch": 0.52, "percentage": 51.74, "elapsed_time": "16:10:21", "remaining_time": "15:05:04"} +{"current_steps": 4449, "total_steps": 8502, "loss": 0.0238, "eval_loss": null, "predict_loss": null, "reward": 1.289, "learning_rate": 1.3906419961001339e-05, "epoch": 0.52, "percentage": 52.33, "elapsed_time": "16:22:49", "remaining_time": "14:55:21"} +{"current_steps": 4499, "total_steps": 8502, "loss": 0.0221, "eval_loss": null, "predict_loss": null, "reward": 1.3299, "learning_rate": 1.3630258129670565e-05, "epoch": 0.53, "percentage": 52.92, "elapsed_time": "16:34:52", "remaining_time": "14:45:12"} +{"current_steps": 4549, "total_steps": 8502, "loss": 0.0221, "eval_loss": null, "predict_loss": null, "reward": 1.5207, "learning_rate": 1.3354563733930315e-05, "epoch": 0.54, "percentage": 53.51, "elapsed_time": "16:46:57", "remaining_time": "14:35:01"} +{"current_steps": 4599, "total_steps": 8502, "loss": 0.0207, "eval_loss": null, "predict_loss": null, "reward": 1.3068, "learning_rate": 1.3079430856742829e-05, "epoch": 0.54, "percentage": 54.09, "elapsed_time": "16:59:38", "remaining_time": "14:25:20"} +{"current_steps": 4649, "total_steps": 8502, "loss": 0.0228, "eval_loss": null, "predict_loss": null, "reward": 1.2289, "learning_rate": 1.2804953389447579e-05, "epoch": 0.55, "percentage": 54.68, "elapsed_time": "17:11:44", "remaining_time": "14:15:05"} +{"current_steps": 4699, "total_steps": 8502, "loss": 0.0222, "eval_loss": null, "predict_loss": null, "reward": 1.1784, "learning_rate": 1.2531224999720032e-05, "epoch": 0.55, "percentage": 55.27, "elapsed_time": "17:23:37", "remaining_time": "14:04:37"} +{"current_steps": 4749, "total_steps": 8502, "loss": 0.0208, "eval_loss": null, "predict_loss": null, "reward": 1.1996, "learning_rate": 1.2258339099606862e-05, "epoch": 0.56, "percentage": 55.86, "elapsed_time": "17:35:09", "remaining_time": "13:53:51"} +{"current_steps": 4799, "total_steps": 8502, "loss": 0.0213, "eval_loss": null, "predict_loss": null, "reward": 1.2377, "learning_rate": 1.198638881364825e-05, "epoch": 0.56, "percentage": 56.45, "elapsed_time": "17:47:07", "remaining_time": "13:43:24"} +{"current_steps": 4849, "total_steps": 8502, "loss": 0.0217, "eval_loss": null, "predict_loss": null, "reward": 1.1597, "learning_rate": 1.1715466947098438e-05, "epoch": 0.57, "percentage": 57.03, "elapsed_time": "17:58:27", "remaining_time": "13:32:27"} +{"current_steps": 4899, "total_steps": 8502, "loss": 0.02, "eval_loss": null, "predict_loss": null, "reward": 1.2, "learning_rate": 1.1445665954255139e-05, "epoch": 0.58, "percentage": 57.62, "elapsed_time": "18:10:27", "remaining_time": "13:21:59"} +{"current_steps": 4949, "total_steps": 8502, "loss": 0.0233, "eval_loss": null, "predict_loss": null, "reward": 1.3867, "learning_rate": 1.1177077906908772e-05, "epoch": 0.58, "percentage": 58.21, "elapsed_time": "18:22:41", "remaining_time": "13:11:38"} +{"current_steps": 4999, "total_steps": 8502, "loss": 0.0213, "eval_loss": null, "predict_loss": null, "reward": 1.3454, "learning_rate": 1.0909794462922214e-05, "epoch": 0.59, "percentage": 58.8, "elapsed_time": "18:34:36", "remaining_time": "13:01:02"} +{"current_steps": 5049, "total_steps": 8502, "loss": 0.0212, "eval_loss": null, "predict_loss": null, "reward": 1.2913, "learning_rate": 1.064390683495178e-05, "epoch": 0.59, "percentage": 59.39, "elapsed_time": "18:46:42", "remaining_time": "12:50:33"} +{"current_steps": 5099, "total_steps": 8502, "loss": 0.0215, "eval_loss": null, "predict_loss": null, "reward": 1.1036, "learning_rate": 1.0379505759320209e-05, "epoch": 0.6, "percentage": 59.97, "elapsed_time": "18:58:37", "remaining_time": "12:39:53"} +{"current_steps": 5149, "total_steps": 8502, "loss": 0.0211, "eval_loss": null, "predict_loss": null, "reward": 1.5439, "learning_rate": 1.0116681465052087e-05, "epoch": 0.61, "percentage": 60.56, "elapsed_time": "19:10:45", "remaining_time": "12:29:22"} +{"current_steps": 5199, "total_steps": 8502, "loss": 0.0204, "eval_loss": null, "predict_loss": null, "reward": 1.2613, "learning_rate": 9.855523643082532e-06, "epoch": 0.61, "percentage": 61.15, "elapsed_time": "19:22:59", "remaining_time": "12:18:52"} +{"current_steps": 5249, "total_steps": 8502, "loss": 0.0219, "eval_loss": null, "predict_loss": null, "reward": 1.4101, "learning_rate": 9.596121415649359e-06, "epoch": 0.62, "percentage": 61.74, "elapsed_time": "19:34:46", "remaining_time": "12:08:02"} +{"current_steps": 5299, "total_steps": 8502, "loss": 0.022, "eval_loss": null, "predict_loss": null, "reward": 1.4715, "learning_rate": 9.33856330587944e-06, "epoch": 0.62, "percentage": 62.33, "elapsed_time": "19:46:48", "remaining_time": "11:57:22"} +{"current_steps": 5349, "total_steps": 8502, "loss": 0.0227, "eval_loss": null, "predict_loss": null, "reward": 1.3201, "learning_rate": 9.082937207579442e-06, "epoch": 0.63, "percentage": 62.91, "elapsed_time": "19:59:20", "remaining_time": "11:46:57"} +{"current_steps": 5399, "total_steps": 8502, "loss": 0.0194, "eval_loss": null, "predict_loss": null, "reward": 1.2946, "learning_rate": 8.82933035524135e-06, "epoch": 0.64, "percentage": 63.5, "elapsed_time": "20:12:12", "remaining_time": "11:36:41"} +{"current_steps": 5449, "total_steps": 8502, "loss": 0.0206, "eval_loss": null, "predict_loss": null, "reward": 1.4951, "learning_rate": 8.577829294272992e-06, "epoch": 0.64, "percentage": 64.09, "elapsed_time": "20:24:18", "remaining_time": "11:25:57"} +{"current_steps": 5499, "total_steps": 8502, "loss": 0.0207, "eval_loss": null, "predict_loss": null, "reward": 1.5987, "learning_rate": 8.328519851463702e-06, "epoch": 0.65, "percentage": 64.68, "elapsed_time": "20:36:58", "remaining_time": "11:15:30"} +{"current_steps": 5549, "total_steps": 8502, "loss": 0.0202, "eval_loss": null, "predict_loss": null, "reward": 1.5224, "learning_rate": 8.08148710569524e-06, "epoch": 0.65, "percentage": 65.27, "elapsed_time": "20:48:52", "remaining_time": "11:04:36"} +{"current_steps": 5599, "total_steps": 8502, "loss": 0.0195, "eval_loss": null, "predict_loss": null, "reward": 1.5981, "learning_rate": 7.836815358907908e-06, "epoch": 0.66, "percentage": 65.86, "elapsed_time": "21:01:21", "remaining_time": "10:53:59"} +{"current_steps": 5649, "total_steps": 8502, "loss": 0.0198, "eval_loss": null, "predict_loss": null, "reward": 1.3337, "learning_rate": 7.594588107331857e-06, "epoch": 0.66, "percentage": 66.44, "elapsed_time": "21:13:25", "remaining_time": "10:43:08"} +{"current_steps": 5699, "total_steps": 8502, "loss": 0.0194, "eval_loss": null, "predict_loss": null, "reward": 1.503, "learning_rate": 7.354888012993293e-06, "epoch": 0.67, "percentage": 67.03, "elapsed_time": "21:25:41", "remaining_time": "10:32:21"} +{"current_steps": 5749, "total_steps": 8502, "loss": 0.0192, "eval_loss": null, "predict_loss": null, "reward": 1.3961, "learning_rate": 7.117796875505393e-06, "epoch": 0.68, "percentage": 67.62, "elapsed_time": "21:38:22", "remaining_time": "10:21:45"} +{"current_steps": 5799, "total_steps": 8502, "loss": 0.0204, "eval_loss": null, "predict_loss": null, "reward": 1.5568, "learning_rate": 6.883395604153524e-06, "epoch": 0.68, "percentage": 68.21, "elapsed_time": "21:51:01", "remaining_time": "10:11:05"} +{"current_steps": 5849, "total_steps": 8502, "loss": 0.0195, "eval_loss": null, "predict_loss": null, "reward": 1.4106, "learning_rate": 6.651764190284266e-06, "epoch": 0.69, "percentage": 68.8, "elapsed_time": "22:02:55", "remaining_time": "10:00:03"} +{"current_steps": 5899, "total_steps": 8502, "loss": 0.0191, "eval_loss": null, "predict_loss": null, "reward": 1.5962, "learning_rate": 6.422981680007759e-06, "epoch": 0.69, "percentage": 69.38, "elapsed_time": "22:15:11", "remaining_time": "9:49:10"} +{"current_steps": 5949, "total_steps": 8502, "loss": 0.0208, "eval_loss": null, "predict_loss": null, "reward": 1.5746, "learning_rate": 6.197126147222517e-06, "epoch": 0.7, "percentage": 69.97, "elapsed_time": "22:27:21", "remaining_time": "9:38:12"} +{"current_steps": 5999, "total_steps": 8502, "loss": 0.0201, "eval_loss": null, "predict_loss": null, "reward": 1.2134, "learning_rate": 5.974274666972112e-06, "epoch": 0.71, "percentage": 70.56, "elapsed_time": "22:38:50", "remaining_time": "9:26:57"} +{"current_steps": 6049, "total_steps": 8502, "loss": 0.0198, "eval_loss": null, "predict_loss": null, "reward": 1.6037, "learning_rate": 5.754503289142692e-06, "epoch": 0.71, "percentage": 71.15, "elapsed_time": "22:50:55", "remaining_time": "9:15:56"} +{"current_steps": 6099, "total_steps": 8502, "loss": 0.0199, "eval_loss": null, "predict_loss": null, "reward": 1.5427, "learning_rate": 5.537887012510291e-06, "epoch": 0.72, "percentage": 71.74, "elapsed_time": "23:03:06", "remaining_time": "9:04:56"} +{"current_steps": 6149, "total_steps": 8502, "loss": 0.0202, "eval_loss": null, "predict_loss": null, "reward": 1.5012, "learning_rate": 5.324499759146934e-06, "epoch": 0.72, "percentage": 72.32, "elapsed_time": "23:16:17", "remaining_time": "8:54:18"} +{"current_steps": 6199, "total_steps": 8502, "loss": 0.019, "eval_loss": null, "predict_loss": null, "reward": 1.6545, "learning_rate": 5.11441434919409e-06, "epoch": 0.73, "percentage": 72.91, "elapsed_time": "23:29:16", "remaining_time": "8:43:33"} +{"current_steps": 6249, "total_steps": 8502, "loss": 0.0187, "eval_loss": null, "predict_loss": null, "reward": 1.529, "learning_rate": 4.907702476012234e-06, "epoch": 0.74, "percentage": 73.5, "elapsed_time": "23:42:09", "remaining_time": "8:32:44"} +{"current_steps": 6299, "total_steps": 8502, "loss": 0.0195, "eval_loss": null, "predict_loss": null, "reward": 1.7154, "learning_rate": 4.704434681714884e-06, "epoch": 0.74, "percentage": 74.09, "elapsed_time": "23:55:34", "remaining_time": "8:22:04"} +{"current_steps": 6349, "total_steps": 8502, "loss": 0.0206, "eval_loss": null, "predict_loss": null, "reward": 1.549, "learning_rate": 4.504680333095542e-06, "epoch": 0.75, "percentage": 74.68, "elapsed_time": "1 day, 0:08:35", "remaining_time": "8:11:13"} +{"current_steps": 6399, "total_steps": 8502, "loss": 0.0194, "eval_loss": null, "predict_loss": null, "reward": 1.7198, "learning_rate": 4.308507597955685e-06, "epoch": 0.75, "percentage": 75.26, "elapsed_time": "1 day, 0:21:41", "remaining_time": "8:00:22"} +{"current_steps": 6449, "total_steps": 8502, "loss": 0.0194, "eval_loss": null, "predict_loss": null, "reward": 1.298, "learning_rate": 4.115983421841979e-06, "epoch": 0.76, "percentage": 75.85, "elapsed_time": "1 day, 0:34:11", "remaining_time": "7:49:18"} +{"current_steps": 6499, "total_steps": 8502, "loss": 0.0196, "eval_loss": null, "predict_loss": null, "reward": 1.5776, "learning_rate": 3.927173505200547e-06, "epoch": 0.76, "percentage": 76.44, "elapsed_time": "1 day, 0:46:28", "remaining_time": "7:38:07"} +{"current_steps": 6549, "total_steps": 8502, "loss": 0.0194, "eval_loss": null, "predict_loss": null, "reward": 1.4295, "learning_rate": 3.742142280956153e-06, "epoch": 0.77, "percentage": 77.03, "elapsed_time": "1 day, 0:58:50", "remaining_time": "7:26:58"} +{"current_steps": 6599, "total_steps": 8502, "loss": 0.0194, "eval_loss": null, "predict_loss": null, "reward": 1.4245, "learning_rate": 3.5609528925239476e-06, "epoch": 0.78, "percentage": 77.62, "elapsed_time": "1 day, 1:11:29", "remaining_time": "7:15:52"} +{"current_steps": 6649, "total_steps": 8502, "loss": 0.0194, "eval_loss": null, "predict_loss": null, "reward": 1.5306, "learning_rate": 3.3836671722612646e-06, "epoch": 0.78, "percentage": 78.21, "elapsed_time": "1 day, 1:24:15", "remaining_time": "7:04:47"} +{"current_steps": 6699, "total_steps": 8502, "loss": 0.0201, "eval_loss": null, "predict_loss": null, "reward": 1.4046, "learning_rate": 3.2103456203668223e-06, "epoch": 0.79, "percentage": 78.79, "elapsed_time": "1 day, 1:36:48", "remaining_time": "6:53:37"} +{"current_steps": 6749, "total_steps": 8502, "loss": 0.0178, "eval_loss": null, "predict_loss": null, "reward": 1.5582, "learning_rate": 3.041047384234521e-06, "epoch": 0.79, "percentage": 79.38, "elapsed_time": "1 day, 1:49:48", "remaining_time": "6:42:33"} +{"current_steps": 6799, "total_steps": 8502, "loss": 0.0193, "eval_loss": null, "predict_loss": null, "reward": 1.4851, "learning_rate": 2.875830238268942e-06, "epoch": 0.8, "percentage": 79.97, "elapsed_time": "1 day, 2:02:15", "remaining_time": "6:31:18"} +{"current_steps": 6849, "total_steps": 8502, "loss": 0.019, "eval_loss": null, "predict_loss": null, "reward": 1.5795, "learning_rate": 2.714750564169339e-06, "epoch": 0.81, "percentage": 80.56, "elapsed_time": "1 day, 2:14:38", "remaining_time": "6:20:02"} +{"current_steps": 6899, "total_steps": 8502, "loss": 0.0193, "eval_loss": null, "predict_loss": null, "reward": 1.5771, "learning_rate": 2.557863331688927e-06, "epoch": 0.81, "percentage": 81.15, "elapsed_time": "1 day, 2:27:44", "remaining_time": "6:08:54"} +{"current_steps": 6949, "total_steps": 8502, "loss": 0.0197, "eval_loss": null, "predict_loss": null, "reward": 1.4866, "learning_rate": 2.405222079876017e-06, "epoch": 0.82, "percentage": 81.73, "elapsed_time": "1 day, 2:40:49", "remaining_time": "5:57:45"} +{"current_steps": 6999, "total_steps": 8502, "loss": 0.0198, "eval_loss": null, "predict_loss": null, "reward": 1.4232, "learning_rate": 2.256878898803354e-06, "epoch": 0.82, "percentage": 82.32, "elapsed_time": "1 day, 2:53:01", "remaining_time": "5:46:23"} +{"current_steps": 7049, "total_steps": 8502, "loss": 0.0196, "eval_loss": null, "predict_loss": null, "reward": 1.5922, "learning_rate": 2.112884411791984e-06, "epoch": 0.83, "percentage": 82.91, "elapsed_time": "1 day, 3:05:03", "remaining_time": "5:34:58"} +{"current_steps": 7099, "total_steps": 8502, "loss": 0.0196, "eval_loss": null, "predict_loss": null, "reward": 1.5317, "learning_rate": 1.9732877581356075e-06, "epoch": 0.83, "percentage": 83.5, "elapsed_time": "1 day, 3:17:24", "remaining_time": "5:23:36"} +{"current_steps": 7149, "total_steps": 8502, "loss": 0.019, "eval_loss": null, "predict_loss": null, "reward": 1.4884, "learning_rate": 1.8381365763314151e-06, "epoch": 0.84, "percentage": 84.09, "elapsed_time": "1 day, 3:30:13", "remaining_time": "5:12:19"} +{"current_steps": 7199, "total_steps": 8502, "loss": 0.0181, "eval_loss": null, "predict_loss": null, "reward": 1.589, "learning_rate": 1.7074769878230494e-06, "epoch": 0.85, "percentage": 84.67, "elapsed_time": "1 day, 3:43:02", "remaining_time": "5:01:00"} +{"current_steps": 7249, "total_steps": 8502, "loss": 0.019, "eval_loss": null, "predict_loss": null, "reward": 1.5272, "learning_rate": 1.5813535812612856e-06, "epoch": 0.85, "percentage": 85.26, "elapsed_time": "1 day, 3:55:31", "remaining_time": "4:49:37"} +{"current_steps": 7299, "total_steps": 8502, "loss": 0.0204, "eval_loss": null, "predict_loss": null, "reward": 1.5452, "learning_rate": 1.4598093972878007e-06, "epoch": 0.86, "percentage": 85.85, "elapsed_time": "1 day, 4:08:03", "remaining_time": "4:38:13"} +{"current_steps": 7349, "total_steps": 8502, "loss": 0.0188, "eval_loss": null, "predict_loss": null, "reward": 1.6511, "learning_rate": 1.3428859138471839e-06, "epoch": 0.86, "percentage": 86.44, "elapsed_time": "1 day, 4:20:38", "remaining_time": "4:26:49"} +{"current_steps": 7399, "total_steps": 8502, "loss": 0.0192, "eval_loss": null, "predict_loss": null, "reward": 1.3072, "learning_rate": 1.2306230320322798e-06, "epoch": 0.87, "percentage": 87.03, "elapsed_time": "1 day, 4:33:22", "remaining_time": "4:15:25"} +{"current_steps": 7449, "total_steps": 8502, "loss": 0.0194, "eval_loss": null, "predict_loss": null, "reward": 1.6043, "learning_rate": 1.1230590624675747e-06, "epoch": 0.88, "percentage": 87.61, "elapsed_time": "1 day, 4:46:03", "remaining_time": "4:03:59"} +{"current_steps": 7499, "total_steps": 8502, "loss": 0.0185, "eval_loss": null, "predict_loss": null, "reward": 1.5806, "learning_rate": 1.0202307122354288e-06, "epoch": 0.88, "percentage": 88.2, "elapsed_time": "1 day, 4:58:42", "remaining_time": "3:52:33"} +{"current_steps": 7549, "total_steps": 8502, "loss": 0.0187, "eval_loss": null, "predict_loss": null, "reward": 1.3901, "learning_rate": 9.221730723494504e-07, "epoch": 0.89, "percentage": 88.79, "elapsed_time": "1 day, 5:11:13", "remaining_time": "3:41:04"} +{"current_steps": 7599, "total_steps": 8502, "loss": 0.019, "eval_loss": null, "predict_loss": null, "reward": 1.6659, "learning_rate": 8.289196057794096e-07, "epoch": 0.89, "percentage": 89.38, "elapsed_time": "1 day, 5:23:56", "remaining_time": "3:29:36"} +{"current_steps": 7649, "total_steps": 8502, "loss": 0.0192, "eval_loss": null, "predict_loss": null, "reward": 1.3981, "learning_rate": 7.405021360317366e-07, "epoch": 0.9, "percentage": 89.97, "elapsed_time": "1 day, 5:36:13", "remaining_time": "3:18:04"} +{"current_steps": 7699, "total_steps": 8502, "loss": 0.0182, "eval_loss": null, "predict_loss": null, "reward": 1.6831, "learning_rate": 6.569508362894783e-07, "epoch": 0.91, "percentage": 90.56, "elapsed_time": "1 day, 5:50:02", "remaining_time": "3:06:42"} +{"current_steps": 7749, "total_steps": 8502, "loss": 0.0188, "eval_loss": null, "predict_loss": null, "reward": 1.4812, "learning_rate": 5.782942191154622e-07, "epoch": 0.91, "percentage": 91.14, "elapsed_time": "1 day, 6:02:33", "remaining_time": "2:55:09"} +{"current_steps": 7799, "total_steps": 8502, "loss": 0.0187, "eval_loss": null, "predict_loss": null, "reward": 1.7532, "learning_rate": 5.045591267221461e-07, "epoch": 0.92, "percentage": 91.73, "elapsed_time": "1 day, 6:15:28", "remaining_time": "2:43:38"} +{"current_steps": 7849, "total_steps": 8502, "loss": 0.0185, "eval_loss": null, "predict_loss": null, "reward": 1.6008, "learning_rate": 4.3577072181150035e-07, "epoch": 0.92, "percentage": 92.32, "elapsed_time": "1 day, 6:27:57", "remaining_time": "2:32:04"} +{"current_steps": 7899, "total_steps": 8502, "loss": 0.0192, "eval_loss": null, "predict_loss": null, "reward": 1.8267, "learning_rate": 3.719524789880202e-07, "epoch": 0.93, "percentage": 92.91, "elapsed_time": "1 day, 6:41:23", "remaining_time": "2:20:34"} +{"current_steps": 7949, "total_steps": 8502, "loss": 0.0188, "eval_loss": null, "predict_loss": null, "reward": 1.8393, "learning_rate": 3.1312617674783385e-07, "epoch": 0.93, "percentage": 93.5, "elapsed_time": "1 day, 6:54:25", "remaining_time": "2:09:00"} +{"current_steps": 7999, "total_steps": 8502, "loss": 0.0182, "eval_loss": null, "predict_loss": null, "reward": 1.4103, "learning_rate": 2.5931189004661406e-07, "epoch": 0.94, "percentage": 94.08, "elapsed_time": "1 day, 7:06:59", "remaining_time": "1:57:24"} +{"current_steps": 8049, "total_steps": 8502, "loss": 0.0187, "eval_loss": null, "predict_loss": null, "reward": 1.4023, "learning_rate": 2.1052798344882495e-07, "epoch": 0.95, "percentage": 94.67, "elapsed_time": "1 day, 7:20:12", "remaining_time": "1:45:49"} +{"current_steps": 8099, "total_steps": 8502, "loss": 0.0192, "eval_loss": null, "predict_loss": null, "reward": 1.5402, "learning_rate": 1.667911048606785e-07, "epoch": 0.95, "percentage": 95.26, "elapsed_time": "1 day, 7:32:52", "remaining_time": "1:34:11"} +{"current_steps": 8149, "total_steps": 8502, "loss": 0.0182, "eval_loss": null, "predict_loss": null, "reward": 1.5328, "learning_rate": 1.2811617984889056e-07, "epoch": 0.96, "percentage": 95.85, "elapsed_time": "1 day, 7:45:59", "remaining_time": "1:22:33"} +{"current_steps": 8199, "total_steps": 8502, "loss": 0.0183, "eval_loss": null, "predict_loss": null, "reward": 1.7633, "learning_rate": 9.451640654721538e-08, "epoch": 0.96, "percentage": 96.44, "elapsed_time": "1 day, 7:58:59", "remaining_time": "1:10:55"} +{"current_steps": 8249, "total_steps": 8502, "loss": 0.0183, "eval_loss": null, "predict_loss": null, "reward": 1.5819, "learning_rate": 6.600325115246831e-08, "epoch": 0.97, "percentage": 97.02, "elapsed_time": "1 day, 8:12:38", "remaining_time": "0:59:16"} +{"current_steps": 8299, "total_steps": 8502, "loss": 0.0183, "eval_loss": null, "predict_loss": null, "reward": 1.6798, "learning_rate": 4.2586444011600835e-08, "epoch": 0.98, "percentage": 97.61, "elapsed_time": "1 day, 8:25:46", "remaining_time": "0:47:35"} +{"current_steps": 8349, "total_steps": 8502, "loss": 0.0193, "eval_loss": null, "predict_loss": null, "reward": 1.742, "learning_rate": 2.4273976301131818e-08, "epoch": 0.98, "percentage": 98.2, "elapsed_time": "1 day, 8:38:47", "remaining_time": "0:35:53"} +{"current_steps": 8399, "total_steps": 8502, "loss": 0.0189, "eval_loss": null, "predict_loss": null, "reward": 1.4572, "learning_rate": 1.1072097300102168e-08, "epoch": 0.99, "percentage": 98.79, "elapsed_time": "1 day, 8:51:26", "remaining_time": "0:24:10"} +{"current_steps": 8449, "total_steps": 8502, "loss": 0.0179, "eval_loss": null, "predict_loss": null, "reward": 1.4872, "learning_rate": 2.98531225744747e-09, "epoch": 0.99, "percentage": 99.38, "elapsed_time": "1 day, 9:03:28", "remaining_time": "0:12:26"} +{"current_steps": 8499, "total_steps": 8502, "loss": 0.0185, "eval_loss": null, "predict_loss": null, "reward": 1.9113, "learning_rate": 1.6380854554465253e-11, "epoch": 1.0, "percentage": 99.96, "elapsed_time": "1 day, 9:16:35", "remaining_time": "0:00:42"} diff --git a/ppo_8_2/trainer_state.json b/ppo_8_2/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1aad630af49426d4f964fd3d73848403bccfcd5f --- /dev/null +++ b/ppo_8_2/trainer_state.json @@ -0,0 +1,1206 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": null, + "global_step": 0, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 2.9997541914704325e-05, + "loss": 0.0855, + "reward": 0.814, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 2.9989966819651966e-05, + "loss": 0.0814, + "reward": 0.9376, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 2.9977276276102345e-05, + "loss": 0.0734, + "reward": 0.8684, + "step": 149 + }, + { + "epoch": 0.02, + "learning_rate": 2.995947461480701e-05, + "loss": 0.0716, + "reward": 0.8537, + "step": 199 + }, + { + "epoch": 0.03, + "learning_rate": 2.9936567910728124e-05, + "loss": 0.0603, + "reward": 0.6429, + "step": 249 + }, + { + "epoch": 0.04, + "learning_rate": 2.9908563980965393e-05, + "loss": 0.0624, + "reward": 0.641, + "step": 299 + }, + { + "epoch": 0.04, + "learning_rate": 2.9875472382088355e-05, + "loss": 0.0631, + "reward": 0.8062, + "step": 349 + }, + { + "epoch": 0.05, + "learning_rate": 2.9837304406875167e-05, + "loss": 0.0577, + "reward": 0.5326, + "step": 399 + }, + { + "epoch": 0.05, + "learning_rate": 2.9794073080458815e-05, + "loss": 0.0556, + "reward": 0.9085, + "step": 449 + }, + { + "epoch": 0.06, + "learning_rate": 2.9745793155882214e-05, + "loss": 0.055, + "reward": 0.9616, + "step": 499 + }, + { + "epoch": 0.06, + "learning_rate": 2.9692481109063605e-05, + "loss": 0.0525, + "reward": 0.899, + "step": 549 + }, + { + "epoch": 0.07, + "learning_rate": 2.963415513317399e-05, + "loss": 0.0518, + "reward": 0.6793, + "step": 599 + }, + { + "epoch": 0.08, + "learning_rate": 2.9570835132428594e-05, + "loss": 0.0546, + "reward": 0.8445, + "step": 649 + }, + { + "epoch": 0.08, + "learning_rate": 2.9502542715294366e-05, + "loss": 0.0532, + "reward": 0.7952, + "step": 699 + }, + { + "epoch": 0.09, + "learning_rate": 2.942930118711593e-05, + "loss": 0.0515, + "reward": 0.8009, + "step": 749 + }, + { + "epoch": 0.09, + "learning_rate": 2.9351135542162432e-05, + "loss": 0.0445, + "reward": 1.1039, + "step": 799 + }, + { + "epoch": 0.1, + "learning_rate": 2.9268072455098065e-05, + "loss": 0.0447, + "reward": 0.858, + "step": 849 + }, + { + "epoch": 0.11, + "learning_rate": 2.918014027187909e-05, + "loss": 0.0455, + "reward": 0.9129, + "step": 899 + }, + { + "epoch": 0.11, + "learning_rate": 2.9087369000080567e-05, + "loss": 0.0459, + "reward": 0.9215, + "step": 949 + }, + { + "epoch": 0.12, + "learning_rate": 2.898979029865602e-05, + "loss": 0.0424, + "reward": 0.8282, + "step": 999 + }, + { + "epoch": 0.12, + "learning_rate": 2.888743746713357e-05, + "loss": 0.0437, + "reward": 0.9707, + "step": 1049 + }, + { + "epoch": 0.13, + "learning_rate": 2.8780345434252185e-05, + "loss": 0.041, + "reward": 0.796, + "step": 1099 + }, + { + "epoch": 0.14, + "learning_rate": 2.8668550746041966e-05, + "loss": 0.0468, + "reward": 0.9538, + "step": 1149 + }, + { + "epoch": 0.14, + "learning_rate": 2.8552091553352533e-05, + "loss": 0.0409, + "reward": 0.8948, + "step": 1199 + }, + { + "epoch": 0.15, + "learning_rate": 2.8431007598833705e-05, + "loss": 0.0408, + "reward": 0.7338, + "step": 1249 + }, + { + "epoch": 0.15, + "learning_rate": 2.830534020337303e-05, + "loss": 0.0392, + "reward": 0.8156, + "step": 1299 + }, + { + "epoch": 0.16, + "learning_rate": 2.817513225199466e-05, + "loss": 0.0375, + "reward": 0.7625, + "step": 1349 + }, + { + "epoch": 0.16, + "learning_rate": 2.8040428179224528e-05, + "loss": 0.04, + "reward": 0.9719, + "step": 1399 + }, + { + "epoch": 0.17, + "learning_rate": 2.790127395392666e-05, + "loss": 0.034, + "reward": 0.8545, + "step": 1449 + }, + { + "epoch": 0.18, + "learning_rate": 2.7757717063615962e-05, + "loss": 0.0437, + "reward": 0.8889, + "step": 1499 + }, + { + "epoch": 0.18, + "learning_rate": 2.7609806498252692e-05, + "loss": 0.0355, + "reward": 1.2005, + "step": 1549 + }, + { + "epoch": 0.19, + "learning_rate": 2.745759273352425e-05, + "loss": 0.0368, + "reward": 1.0282, + "step": 1599 + }, + { + "epoch": 0.19, + "learning_rate": 2.7301127713619938e-05, + "loss": 0.0346, + "reward": 0.9873, + "step": 1649 + }, + { + "epoch": 0.2, + "learning_rate": 2.7140464833504564e-05, + "loss": 0.034, + "reward": 0.7074, + "step": 1699 + }, + { + "epoch": 0.21, + "learning_rate": 2.6975658920697006e-05, + "loss": 0.0334, + "reward": 0.9238, + "step": 1749 + }, + { + "epoch": 0.21, + "learning_rate": 2.680676621655984e-05, + "loss": 0.0317, + "reward": 0.8791, + "step": 1799 + }, + { + "epoch": 0.22, + "learning_rate": 2.663384435710654e-05, + "loss": 0.0348, + "reward": 1.1593, + "step": 1849 + }, + { + "epoch": 0.22, + "learning_rate": 2.6456952353332712e-05, + "loss": 0.0309, + "reward": 1.0537, + "step": 1899 + }, + { + "epoch": 0.23, + "learning_rate": 2.6276150571078108e-05, + "loss": 0.0333, + "reward": 1.0686, + "step": 1949 + }, + { + "epoch": 0.24, + "learning_rate": 2.6091500710426278e-05, + "loss": 0.0355, + "reward": 1.0331, + "step": 1999 + }, + { + "epoch": 0.24, + "learning_rate": 2.5903065784648947e-05, + "loss": 0.0331, + "reward": 0.8029, + "step": 2049 + }, + { + "epoch": 0.25, + "learning_rate": 2.5710910098702187e-05, + "loss": 0.0324, + "reward": 0.9726, + "step": 2099 + }, + { + "epoch": 0.25, + "learning_rate": 2.5515099227281836e-05, + "loss": 0.0322, + "reward": 0.9616, + "step": 2149 + }, + { + "epoch": 0.26, + "learning_rate": 2.5315699992445617e-05, + "loss": 0.0322, + "reward": 0.9011, + "step": 2199 + }, + { + "epoch": 0.26, + "learning_rate": 2.511278044080954e-05, + "loss": 0.0314, + "reward": 0.7529, + "step": 2249 + }, + { + "epoch": 0.27, + "learning_rate": 2.4906409820326436e-05, + "loss": 0.0286, + "reward": 1.1313, + "step": 2299 + }, + { + "epoch": 0.28, + "learning_rate": 2.4696658556654575e-05, + "loss": 0.0285, + "reward": 1.032, + "step": 2349 + }, + { + "epoch": 0.28, + "learning_rate": 2.4483598229124274e-05, + "loss": 0.0296, + "reward": 1.0514, + "step": 2399 + }, + { + "epoch": 0.29, + "learning_rate": 2.42673015463109e-05, + "loss": 0.0308, + "reward": 1.1721, + "step": 2449 + }, + { + "epoch": 0.29, + "learning_rate": 2.404784232122248e-05, + "loss": 0.0288, + "reward": 1.2602, + "step": 2499 + }, + { + "epoch": 0.3, + "learning_rate": 2.382529544611038e-05, + "loss": 0.0274, + "reward": 1.3181, + "step": 2549 + }, + { + "epoch": 0.31, + "learning_rate": 2.3599736866911756e-05, + "loss": 0.0302, + "reward": 0.8566, + "step": 2599 + }, + { + "epoch": 0.31, + "learning_rate": 2.3371243557332333e-05, + "loss": 0.0324, + "reward": 1.1646, + "step": 2649 + }, + { + "epoch": 0.32, + "learning_rate": 2.313989349257855e-05, + "loss": 0.028, + "reward": 1.3095, + "step": 2699 + }, + { + "epoch": 0.32, + "learning_rate": 2.2905765622747843e-05, + "loss": 0.0294, + "reward": 0.9202, + "step": 2749 + }, + { + "epoch": 0.33, + "learning_rate": 2.266893984588631e-05, + "loss": 0.0299, + "reward": 0.9073, + "step": 2799 + }, + { + "epoch": 0.34, + "learning_rate": 2.242949698072283e-05, + "loss": 0.0264, + "reward": 1.0046, + "step": 2849 + }, + { + "epoch": 0.34, + "learning_rate": 2.2187518739089033e-05, + "loss": 0.0317, + "reward": 1.1227, + "step": 2899 + }, + { + "epoch": 0.35, + "learning_rate": 2.194308769803444e-05, + "loss": 0.0276, + "reward": 1.0356, + "step": 2949 + }, + { + "epoch": 0.35, + "learning_rate": 2.1696287271646406e-05, + "loss": 0.0253, + "reward": 1.1648, + "step": 2999 + }, + { + "epoch": 0.36, + "learning_rate": 2.1447201682584356e-05, + "loss": 0.026, + "reward": 1.155, + "step": 3049 + }, + { + "epoch": 0.36, + "learning_rate": 2.1195915933338133e-05, + "loss": 0.0265, + "reward": 0.9532, + "step": 3099 + }, + { + "epoch": 0.37, + "learning_rate": 2.0942515777220186e-05, + "loss": 0.0278, + "reward": 1.1358, + "step": 3149 + }, + { + "epoch": 0.38, + "learning_rate": 2.0687087689101562e-05, + "loss": 0.0258, + "reward": 1.0723, + "step": 3199 + }, + { + "epoch": 0.38, + "learning_rate": 2.0429718835901672e-05, + "loss": 0.029, + "reward": 1.3277, + "step": 3249 + }, + { + "epoch": 0.39, + "learning_rate": 2.0170497046841824e-05, + "loss": 0.0281, + "reward": 1.2176, + "step": 3299 + }, + { + "epoch": 0.39, + "learning_rate": 1.9909510783472825e-05, + "loss": 0.0258, + "reward": 1.3399, + "step": 3349 + }, + { + "epoch": 0.4, + "learning_rate": 1.964684910948672e-05, + "loss": 0.0279, + "reward": 1.1264, + "step": 3399 + }, + { + "epoch": 0.41, + "learning_rate": 1.9382601660323124e-05, + "loss": 0.0259, + "reward": 1.0383, + "step": 3449 + }, + { + "epoch": 0.41, + "learning_rate": 1.911685861258034e-05, + "loss": 0.0244, + "reward": 1.135, + "step": 3499 + }, + { + "epoch": 0.42, + "learning_rate": 1.8849710653241923e-05, + "loss": 0.0246, + "reward": 1.0922, + "step": 3549 + }, + { + "epoch": 0.42, + "learning_rate": 1.858124894872895e-05, + "loss": 0.0243, + "reward": 1.1385, + "step": 3599 + }, + { + "epoch": 0.43, + "learning_rate": 1.8311565113788777e-05, + "loss": 0.0255, + "reward": 0.9836, + "step": 3649 + }, + { + "epoch": 0.44, + "learning_rate": 1.804075118023072e-05, + "loss": 0.0244, + "reward": 1.0459, + "step": 3699 + }, + { + "epoch": 0.44, + "learning_rate": 1.7768899565519493e-05, + "loss": 0.0233, + "reward": 1.1805, + "step": 3749 + }, + { + "epoch": 0.45, + "learning_rate": 1.749610304123695e-05, + "loss": 0.0264, + "reward": 1.305, + "step": 3799 + }, + { + "epoch": 0.45, + "learning_rate": 1.7222454701423068e-05, + "loss": 0.0237, + "reward": 1.0362, + "step": 3849 + }, + { + "epoch": 0.46, + "learning_rate": 1.694804793080681e-05, + "loss": 0.0236, + "reward": 1.2275, + "step": 3899 + }, + { + "epoch": 0.46, + "learning_rate": 1.6672976372937838e-05, + "loss": 0.0238, + "reward": 1.2652, + "step": 3949 + }, + { + "epoch": 0.47, + "learning_rate": 1.63973338982299e-05, + "loss": 0.0235, + "reward": 1.1863, + "step": 3999 + }, + { + "epoch": 0.48, + "learning_rate": 1.6121214571926765e-05, + "loss": 0.0253, + "reward": 1.4457, + "step": 4049 + }, + { + "epoch": 0.48, + "learning_rate": 1.5844712622001708e-05, + "loss": 0.0241, + "reward": 1.3766, + "step": 4099 + }, + { + "epoch": 0.49, + "learning_rate": 1.5567922407001432e-05, + "loss": 0.0228, + "reward": 1.2514, + "step": 4149 + }, + { + "epoch": 0.49, + "learning_rate": 1.5290938383845442e-05, + "loss": 0.0237, + "reward": 1.2308, + "step": 4199 + }, + { + "epoch": 0.5, + "learning_rate": 1.5013855075591872e-05, + "loss": 0.0219, + "reward": 1.3735, + "step": 4249 + }, + { + "epoch": 0.51, + "learning_rate": 1.4736767039180697e-05, + "loss": 0.0239, + "reward": 1.1207, + "step": 4299 + }, + { + "epoch": 0.51, + "learning_rate": 1.4459768833165414e-05, + "loss": 0.0219, + "reward": 1.1005, + "step": 4349 + }, + { + "epoch": 0.52, + "learning_rate": 1.4182954985444172e-05, + "loss": 0.023, + "reward": 1.4297, + "step": 4399 + }, + { + "epoch": 0.52, + "learning_rate": 1.3906419961001339e-05, + "loss": 0.0238, + "reward": 1.289, + "step": 4449 + }, + { + "epoch": 0.53, + "learning_rate": 1.3630258129670565e-05, + "loss": 0.0221, + "reward": 1.3299, + "step": 4499 + }, + { + "epoch": 0.54, + "learning_rate": 1.3354563733930315e-05, + "loss": 0.0221, + "reward": 1.5207, + "step": 4549 + }, + { + "epoch": 0.54, + "learning_rate": 1.3079430856742829e-05, + "loss": 0.0207, + "reward": 1.3068, + "step": 4599 + }, + { + "epoch": 0.55, + "learning_rate": 1.2804953389447579e-05, + "loss": 0.0228, + "reward": 1.2289, + "step": 4649 + }, + { + "epoch": 0.55, + "learning_rate": 1.2531224999720032e-05, + "loss": 0.0222, + "reward": 1.1784, + "step": 4699 + }, + { + "epoch": 0.56, + "learning_rate": 1.2258339099606862e-05, + "loss": 0.0208, + "reward": 1.1996, + "step": 4749 + }, + { + "epoch": 0.56, + "learning_rate": 1.198638881364825e-05, + "loss": 0.0213, + "reward": 1.2377, + "step": 4799 + }, + { + "epoch": 0.57, + "learning_rate": 1.1715466947098438e-05, + "loss": 0.0217, + "reward": 1.1597, + "step": 4849 + }, + { + "epoch": 0.58, + "learning_rate": 1.1445665954255139e-05, + "loss": 0.02, + "reward": 1.2, + "step": 4899 + }, + { + "epoch": 0.58, + "learning_rate": 1.1177077906908772e-05, + "loss": 0.0233, + "reward": 1.3867, + "step": 4949 + }, + { + "epoch": 0.59, + "learning_rate": 1.0909794462922214e-05, + "loss": 0.0213, + "reward": 1.3454, + "step": 4999 + }, + { + "epoch": 0.59, + "learning_rate": 1.064390683495178e-05, + "loss": 0.0212, + "reward": 1.2913, + "step": 5049 + }, + { + "epoch": 0.6, + "learning_rate": 1.0379505759320209e-05, + "loss": 0.0215, + "reward": 1.1036, + "step": 5099 + }, + { + "epoch": 0.61, + "learning_rate": 1.0116681465052087e-05, + "loss": 0.0211, + "reward": 1.5439, + "step": 5149 + }, + { + "epoch": 0.61, + "learning_rate": 9.855523643082532e-06, + "loss": 0.0204, + "reward": 1.2613, + "step": 5199 + }, + { + "epoch": 0.62, + "learning_rate": 9.596121415649359e-06, + "loss": 0.0219, + "reward": 1.4101, + "step": 5249 + }, + { + "epoch": 0.62, + "learning_rate": 9.33856330587944e-06, + "loss": 0.022, + "reward": 1.4715, + "step": 5299 + }, + { + "epoch": 0.63, + "learning_rate": 9.082937207579442e-06, + "loss": 0.0227, + "reward": 1.3201, + "step": 5349 + }, + { + "epoch": 0.64, + "learning_rate": 8.82933035524135e-06, + "loss": 0.0194, + "reward": 1.2946, + "step": 5399 + }, + { + "epoch": 0.64, + "learning_rate": 8.577829294272992e-06, + "loss": 0.0206, + "reward": 1.4951, + "step": 5449 + }, + { + "epoch": 0.65, + "learning_rate": 8.328519851463702e-06, + "loss": 0.0207, + "reward": 1.5987, + "step": 5499 + }, + { + "epoch": 0.65, + "learning_rate": 8.08148710569524e-06, + "loss": 0.0202, + "reward": 1.5224, + "step": 5549 + }, + { + "epoch": 0.66, + "learning_rate": 7.836815358907908e-06, + "loss": 0.0195, + "reward": 1.5981, + "step": 5599 + }, + { + "epoch": 0.66, + "learning_rate": 7.594588107331857e-06, + "loss": 0.0198, + "reward": 1.3337, + "step": 5649 + }, + { + "epoch": 0.67, + "learning_rate": 7.354888012993293e-06, + "loss": 0.0194, + "reward": 1.503, + "step": 5699 + }, + { + "epoch": 0.68, + "learning_rate": 7.117796875505393e-06, + "loss": 0.0192, + "reward": 1.3961, + "step": 5749 + }, + { + "epoch": 0.68, + "learning_rate": 6.883395604153524e-06, + "loss": 0.0204, + "reward": 1.5568, + "step": 5799 + }, + { + "epoch": 0.69, + "learning_rate": 6.651764190284266e-06, + "loss": 0.0195, + "reward": 1.4106, + "step": 5849 + }, + { + "epoch": 0.69, + "learning_rate": 6.422981680007759e-06, + "loss": 0.0191, + "reward": 1.5962, + "step": 5899 + }, + { + "epoch": 0.7, + "learning_rate": 6.197126147222517e-06, + "loss": 0.0208, + "reward": 1.5746, + "step": 5949 + }, + { + "epoch": 0.71, + "learning_rate": 5.974274666972112e-06, + "loss": 0.0201, + "reward": 1.2134, + "step": 5999 + }, + { + "epoch": 0.71, + "learning_rate": 5.754503289142692e-06, + "loss": 0.0198, + "reward": 1.6037, + "step": 6049 + }, + { + "epoch": 0.72, + "learning_rate": 5.537887012510291e-06, + "loss": 0.0199, + "reward": 1.5427, + "step": 6099 + }, + { + "epoch": 0.72, + "learning_rate": 5.324499759146934e-06, + "loss": 0.0202, + "reward": 1.5012, + "step": 6149 + }, + { + "epoch": 0.73, + "learning_rate": 5.11441434919409e-06, + "loss": 0.019, + "reward": 1.6545, + "step": 6199 + }, + { + "epoch": 0.74, + "learning_rate": 4.907702476012234e-06, + "loss": 0.0187, + "reward": 1.529, + "step": 6249 + }, + { + "epoch": 0.74, + "learning_rate": 4.704434681714884e-06, + "loss": 0.0195, + "reward": 1.7154, + "step": 6299 + }, + { + "epoch": 0.75, + "learning_rate": 4.504680333095542e-06, + "loss": 0.0206, + "reward": 1.549, + "step": 6349 + }, + { + "epoch": 0.75, + "learning_rate": 4.308507597955685e-06, + "loss": 0.0194, + "reward": 1.7198, + "step": 6399 + }, + { + "epoch": 0.76, + "learning_rate": 4.115983421841979e-06, + "loss": 0.0194, + "reward": 1.298, + "step": 6449 + }, + { + "epoch": 0.76, + "learning_rate": 3.927173505200547e-06, + "loss": 0.0196, + "reward": 1.5776, + "step": 6499 + }, + { + "epoch": 0.77, + "learning_rate": 3.742142280956153e-06, + "loss": 0.0194, + "reward": 1.4295, + "step": 6549 + }, + { + "epoch": 0.78, + "learning_rate": 3.5609528925239476e-06, + "loss": 0.0194, + "reward": 1.4245, + "step": 6599 + }, + { + "epoch": 0.78, + "learning_rate": 3.3836671722612646e-06, + "loss": 0.0194, + "reward": 1.5306, + "step": 6649 + }, + { + "epoch": 0.79, + "learning_rate": 3.2103456203668223e-06, + "loss": 0.0201, + "reward": 1.4046, + "step": 6699 + }, + { + "epoch": 0.79, + "learning_rate": 3.041047384234521e-06, + "loss": 0.0178, + "reward": 1.5582, + "step": 6749 + }, + { + "epoch": 0.8, + "learning_rate": 2.875830238268942e-06, + "loss": 0.0193, + "reward": 1.4851, + "step": 6799 + }, + { + "epoch": 0.81, + "learning_rate": 2.714750564169339e-06, + "loss": 0.019, + "reward": 1.5795, + "step": 6849 + }, + { + "epoch": 0.81, + "learning_rate": 2.557863331688927e-06, + "loss": 0.0193, + "reward": 1.5771, + "step": 6899 + }, + { + "epoch": 0.82, + "learning_rate": 2.405222079876017e-06, + "loss": 0.0197, + "reward": 1.4866, + "step": 6949 + }, + { + "epoch": 0.82, + "learning_rate": 2.256878898803354e-06, + "loss": 0.0198, + "reward": 1.4232, + "step": 6999 + }, + { + "epoch": 0.83, + "learning_rate": 2.112884411791984e-06, + "loss": 0.0196, + "reward": 1.5922, + "step": 7049 + }, + { + "epoch": 0.83, + "learning_rate": 1.9732877581356075e-06, + "loss": 0.0196, + "reward": 1.5317, + "step": 7099 + }, + { + "epoch": 0.84, + "learning_rate": 1.8381365763314151e-06, + "loss": 0.019, + "reward": 1.4884, + "step": 7149 + }, + { + "epoch": 0.85, + "learning_rate": 1.7074769878230494e-06, + "loss": 0.0181, + "reward": 1.589, + "step": 7199 + }, + { + "epoch": 0.85, + "learning_rate": 1.5813535812612856e-06, + "loss": 0.019, + "reward": 1.5272, + "step": 7249 + }, + { + "epoch": 0.86, + "learning_rate": 1.4598093972878007e-06, + "loss": 0.0204, + "reward": 1.5452, + "step": 7299 + }, + { + "epoch": 0.86, + "learning_rate": 1.3428859138471839e-06, + "loss": 0.0188, + "reward": 1.6511, + "step": 7349 + }, + { + "epoch": 0.87, + "learning_rate": 1.2306230320322798e-06, + "loss": 0.0192, + "reward": 1.3072, + "step": 7399 + }, + { + "epoch": 0.88, + "learning_rate": 1.1230590624675747e-06, + "loss": 0.0194, + "reward": 1.6043, + "step": 7449 + }, + { + "epoch": 0.88, + "learning_rate": 1.0202307122354288e-06, + "loss": 0.0185, + "reward": 1.5806, + "step": 7499 + }, + { + "epoch": 0.89, + "learning_rate": 9.221730723494504e-07, + "loss": 0.0187, + "reward": 1.3901, + "step": 7549 + }, + { + "epoch": 0.89, + "learning_rate": 8.289196057794096e-07, + "loss": 0.019, + "reward": 1.6659, + "step": 7599 + }, + { + "epoch": 0.9, + "learning_rate": 7.405021360317366e-07, + "loss": 0.0192, + "reward": 1.3981, + "step": 7649 + }, + { + "epoch": 0.91, + "learning_rate": 6.569508362894783e-07, + "loss": 0.0182, + "reward": 1.6831, + "step": 7699 + }, + { + "epoch": 0.91, + "learning_rate": 5.782942191154622e-07, + "loss": 0.0188, + "reward": 1.4812, + "step": 7749 + }, + { + "epoch": 0.92, + "learning_rate": 5.045591267221461e-07, + "loss": 0.0187, + "reward": 1.7532, + "step": 7799 + }, + { + "epoch": 0.92, + "learning_rate": 4.3577072181150035e-07, + "loss": 0.0185, + "reward": 1.6008, + "step": 7849 + }, + { + "epoch": 0.93, + "learning_rate": 3.719524789880202e-07, + "loss": 0.0192, + "reward": 1.8267, + "step": 7899 + }, + { + "epoch": 0.93, + "learning_rate": 3.1312617674783385e-07, + "loss": 0.0188, + "reward": 1.8393, + "step": 7949 + }, + { + "epoch": 0.94, + "learning_rate": 2.5931189004661406e-07, + "loss": 0.0182, + "reward": 1.4103, + "step": 7999 + }, + { + "epoch": 0.95, + "learning_rate": 2.1052798344882495e-07, + "loss": 0.0187, + "reward": 1.4023, + "step": 8049 + }, + { + "epoch": 0.95, + "learning_rate": 1.667911048606785e-07, + "loss": 0.0192, + "reward": 1.5402, + "step": 8099 + }, + { + "epoch": 0.96, + "learning_rate": 1.2811617984889056e-07, + "loss": 0.0182, + "reward": 1.5328, + "step": 8149 + }, + { + "epoch": 0.96, + "learning_rate": 9.451640654721538e-08, + "loss": 0.0183, + "reward": 1.7633, + "step": 8199 + }, + { + "epoch": 0.97, + "learning_rate": 6.600325115246831e-08, + "loss": 0.0183, + "reward": 1.5819, + "step": 8249 + }, + { + "epoch": 0.98, + "learning_rate": 4.2586444011600835e-08, + "loss": 0.0183, + "reward": 1.6798, + "step": 8299 + }, + { + "epoch": 0.98, + "learning_rate": 2.4273976301131818e-08, + "loss": 0.0193, + "reward": 1.742, + "step": 8349 + }, + { + "epoch": 0.99, + "learning_rate": 1.1072097300102168e-08, + "loss": 0.0189, + "reward": 1.4572, + "step": 8399 + }, + { + "epoch": 0.99, + "learning_rate": 2.98531225744747e-09, + "loss": 0.0179, + "reward": 1.4872, + "step": 8449 + }, + { + "epoch": 1.0, + "learning_rate": 1.6380854554465253e-11, + "loss": 0.0185, + "reward": 1.9113, + "step": 8499 + } + ], + "max_steps": 8502, + "num_train_epochs": 1.0, + "total_flos": 0, + "trial_name": null, + "trial_params": null +} diff --git a/ppo_8_2/training_args.bin b/ppo_8_2/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..06e810d261f7498fcd78b1cb31be8ef4888d81fd --- /dev/null +++ b/ppo_8_2/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2 +size 3359 diff --git a/ppo_8_2/training_loss.png b/ppo_8_2/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..8b5453a63f5f3f69c6f0f574dfc22ac5f2b33b20 Binary files /dev/null and b/ppo_8_2/training_loss.png differ diff --git a/ppo_8_2/training_reward.png b/ppo_8_2/training_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..693f1f0373ac375663293a304e468e8ab5b6a28c Binary files /dev/null and b/ppo_8_2/training_reward.png differ diff --git a/ppo_8_2/value_head.bin b/ppo_8_2/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb35d31d845b8f6ca6811a1fea615043afaac11b --- /dev/null +++ b/ppo_8_2/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b774bcacdfb0cbbbbb56a9095a7bcdb2f85f4d6813cd616b32ab48f22941c6e +size 21491