Suprit commited on
Commit
eb3bdc6
1 Parent(s): ef00c6f

Upload 148 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ppo_8_2/README.md +9 -0
  2. ppo_8_2/adapter_config.json +21 -0
  3. ppo_8_2/adapter_model.bin +3 -0
  4. ppo_8_2/checkpoint-1000/README.md +9 -0
  5. ppo_8_2/checkpoint-1000/adapter_config.json +21 -0
  6. ppo_8_2/checkpoint-1000/adapter_model.bin +3 -0
  7. ppo_8_2/checkpoint-1000/finetuning_args.json +13 -0
  8. ppo_8_2/checkpoint-1000/reward/adapter_config.json +21 -0
  9. ppo_8_2/checkpoint-1000/reward/adapter_model.bin +3 -0
  10. ppo_8_2/checkpoint-1000/training_args.bin +3 -0
  11. ppo_8_2/checkpoint-1000/value_head.bin +3 -0
  12. ppo_8_2/checkpoint-1500/README.md +9 -0
  13. ppo_8_2/checkpoint-1500/adapter_config.json +21 -0
  14. ppo_8_2/checkpoint-1500/adapter_model.bin +3 -0
  15. ppo_8_2/checkpoint-1500/finetuning_args.json +13 -0
  16. ppo_8_2/checkpoint-1500/reward/adapter_config.json +21 -0
  17. ppo_8_2/checkpoint-1500/reward/adapter_model.bin +3 -0
  18. ppo_8_2/checkpoint-1500/training_args.bin +3 -0
  19. ppo_8_2/checkpoint-1500/value_head.bin +3 -0
  20. ppo_8_2/checkpoint-2000/README.md +9 -0
  21. ppo_8_2/checkpoint-2000/adapter_config.json +21 -0
  22. ppo_8_2/checkpoint-2000/adapter_model.bin +3 -0
  23. ppo_8_2/checkpoint-2000/finetuning_args.json +13 -0
  24. ppo_8_2/checkpoint-2000/reward/adapter_config.json +21 -0
  25. ppo_8_2/checkpoint-2000/reward/adapter_model.bin +3 -0
  26. ppo_8_2/checkpoint-2000/training_args.bin +3 -0
  27. ppo_8_2/checkpoint-2000/value_head.bin +3 -0
  28. ppo_8_2/checkpoint-2500/README.md +9 -0
  29. ppo_8_2/checkpoint-2500/adapter_config.json +21 -0
  30. ppo_8_2/checkpoint-2500/adapter_model.bin +3 -0
  31. ppo_8_2/checkpoint-2500/finetuning_args.json +13 -0
  32. ppo_8_2/checkpoint-2500/reward/adapter_config.json +21 -0
  33. ppo_8_2/checkpoint-2500/reward/adapter_model.bin +3 -0
  34. ppo_8_2/checkpoint-2500/training_args.bin +3 -0
  35. ppo_8_2/checkpoint-2500/value_head.bin +3 -0
  36. ppo_8_2/checkpoint-3000/README.md +9 -0
  37. ppo_8_2/checkpoint-3000/adapter_config.json +21 -0
  38. ppo_8_2/checkpoint-3000/adapter_model.bin +3 -0
  39. ppo_8_2/checkpoint-3000/finetuning_args.json +13 -0
  40. ppo_8_2/checkpoint-3000/reward/adapter_config.json +21 -0
  41. ppo_8_2/checkpoint-3000/reward/adapter_model.bin +3 -0
  42. ppo_8_2/checkpoint-3000/training_args.bin +3 -0
  43. ppo_8_2/checkpoint-3000/value_head.bin +3 -0
  44. ppo_8_2/checkpoint-3500/README.md +9 -0
  45. ppo_8_2/checkpoint-3500/adapter_config.json +21 -0
  46. ppo_8_2/checkpoint-3500/adapter_model.bin +3 -0
  47. ppo_8_2/checkpoint-3500/finetuning_args.json +13 -0
  48. ppo_8_2/checkpoint-3500/reward/adapter_config.json +21 -0
  49. ppo_8_2/checkpoint-3500/reward/adapter_model.bin +3 -0
  50. ppo_8_2/checkpoint-3500/training_args.bin +3 -0
ppo_8_2/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
ppo_8_2/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c928225a000087e828682fd7dbb78c2e0967c4809838b2625c8172f6d7e5ea9
3
+ size 26269517
ppo_8_2/checkpoint-1000/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
ppo_8_2/checkpoint-1000/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-1000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3b38d60a11127e242f9a807b77448d47842e0f18af9424ae49afe5390c54a56
3
+ size 26269517
ppo_8_2/checkpoint-1000/finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "q_proj",
8
+ "v_proj"
9
+ ],
10
+ "name_module_trainable": "mlp",
11
+ "num_hidden_layers": 32,
12
+ "num_layer_trainable": 3
13
+ }
ppo_8_2/checkpoint-1000/reward/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-1000/reward/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
ppo_8_2/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2
3
+ size 3359
ppo_8_2/checkpoint-1000/value_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee6bbe9deb8bed10ff62722d2119134fd8ec11e8e788cf2f711483bd7dc20395
3
+ size 21491
ppo_8_2/checkpoint-1500/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
ppo_8_2/checkpoint-1500/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-1500/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b098c659862af7bed6745f049aab340876fc9b217030cdc029e3ce977b3ce4d6
3
+ size 26269517
ppo_8_2/checkpoint-1500/finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "q_proj",
8
+ "v_proj"
9
+ ],
10
+ "name_module_trainable": "mlp",
11
+ "num_hidden_layers": 32,
12
+ "num_layer_trainable": 3
13
+ }
ppo_8_2/checkpoint-1500/reward/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-1500/reward/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
ppo_8_2/checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2
3
+ size 3359
ppo_8_2/checkpoint-1500/value_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d90d06cd75a2a587a6c8982bf0f843e42ecdd4626396bb9414094056407b0169
3
+ size 21491
ppo_8_2/checkpoint-2000/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
ppo_8_2/checkpoint-2000/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-2000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baf7b9f520e3c9685f93827f6fc58dfb518aa8e08b106d294e8b7636cc904b1a
3
+ size 26269517
ppo_8_2/checkpoint-2000/finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "q_proj",
8
+ "v_proj"
9
+ ],
10
+ "name_module_trainable": "mlp",
11
+ "num_hidden_layers": 32,
12
+ "num_layer_trainable": 3
13
+ }
ppo_8_2/checkpoint-2000/reward/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-2000/reward/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
ppo_8_2/checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2
3
+ size 3359
ppo_8_2/checkpoint-2000/value_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:407f2258d250161d932305f5295f61b79a4e1abdf8cb52d7ac71febe14ed222d
3
+ size 21491
ppo_8_2/checkpoint-2500/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
ppo_8_2/checkpoint-2500/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-2500/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c4a5692a05c7670f077797e6b91105f40fb45c93f02fdec53c1c8e5722b3e2
3
+ size 26269517
ppo_8_2/checkpoint-2500/finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "q_proj",
8
+ "v_proj"
9
+ ],
10
+ "name_module_trainable": "mlp",
11
+ "num_hidden_layers": 32,
12
+ "num_layer_trainable": 3
13
+ }
ppo_8_2/checkpoint-2500/reward/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-2500/reward/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
ppo_8_2/checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2
3
+ size 3359
ppo_8_2/checkpoint-2500/value_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:449a3bc0c64e508f458b2d25423d2b1b1cc4aefd2b77836d0d24438044d47764
3
+ size 21491
ppo_8_2/checkpoint-3000/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
ppo_8_2/checkpoint-3000/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-3000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1cddedf88eb5949bf2f92a31823f7dae138a9b27e552274e7487121e55e1f8b
3
+ size 26269517
ppo_8_2/checkpoint-3000/finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "q_proj",
8
+ "v_proj"
9
+ ],
10
+ "name_module_trainable": "mlp",
11
+ "num_hidden_layers": 32,
12
+ "num_layer_trainable": 3
13
+ }
ppo_8_2/checkpoint-3000/reward/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-3000/reward/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
ppo_8_2/checkpoint-3000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2
3
+ size 3359
ppo_8_2/checkpoint-3000/value_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff7d97a52142494b42184b5e5873fa764f12e053ace8543cca9d5ca06375419d
3
+ size 21491
ppo_8_2/checkpoint-3500/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.4.0
ppo_8_2/checkpoint-3500/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-3500/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7386a10fda60048403b823666286102ca2a4970b45c718d62879e0ab3b302b5
3
+ size 26269517
ppo_8_2/checkpoint-3500/finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "q_proj",
8
+ "v_proj"
9
+ ],
10
+ "name_module_trainable": "mlp",
11
+ "num_hidden_layers": 32,
12
+ "num_layer_trainable": 3
13
+ }
ppo_8_2/checkpoint-3500/reward/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/hy-tmp/Ziya-LLaMA-13B-v1",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
ppo_8_2/checkpoint-3500/reward/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
ppo_8_2/checkpoint-3500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d62ae8853df9b7076a445889032c6f5e0e6027dd349ea6633ce27d8f17c6c3f2
3
+ size 3359