flytech commited on
Commit
94c64f2
1 Parent(s): 68a52f0

Training in progress, step 4, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -4,30 +4,6 @@ library_name: peft
4
  ## Training procedure
5
 
6
 
7
- The following `bitsandbytes` quantization config was used during training:
8
- - quant_method: bitsandbytes
9
- - load_in_8bit: False
10
- - load_in_4bit: True
11
- - llm_int8_threshold: 6.0
12
- - llm_int8_skip_modules: None
13
- - llm_int8_enable_fp32_cpu_offload: False
14
- - llm_int8_has_fp16_weight: False
15
- - bnb_4bit_quant_type: nf4
16
- - bnb_4bit_use_double_quant: True
17
- - bnb_4bit_compute_dtype: float16
18
-
19
- The following `bitsandbytes` quantization config was used during training:
20
- - quant_method: bitsandbytes
21
- - load_in_8bit: False
22
- - load_in_4bit: True
23
- - llm_int8_threshold: 6.0
24
- - llm_int8_skip_modules: None
25
- - llm_int8_enable_fp32_cpu_offload: False
26
- - llm_int8_has_fp16_weight: False
27
- - bnb_4bit_quant_type: nf4
28
- - bnb_4bit_use_double_quant: True
29
- - bnb_4bit_compute_dtype: float16
30
-
31
  The following `bitsandbytes` quantization config was used during training:
32
  - quant_method: bitsandbytes
33
  - load_in_8bit: False
@@ -53,8 +29,6 @@ The following `bitsandbytes` quantization config was used during training:
53
  - bnb_4bit_compute_dtype: float16
54
  ### Framework versions
55
 
56
- - PEFT 0.5.0
57
- - PEFT 0.5.0
58
  - PEFT 0.5.0
59
 
60
  - PEFT 0.5.0
 
4
  ## Training procedure
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  The following `bitsandbytes` quantization config was used during training:
8
  - quant_method: bitsandbytes
9
  - load_in_8bit: False
 
29
  - bnb_4bit_compute_dtype: float16
30
  ### Framework versions
31
 
 
 
32
  - PEFT 0.5.0
33
 
34
  - PEFT 0.5.0
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:894902fc83ce981ce902d325f92c66c93f068f601ec88ba81ca2818c4f4ea82c
3
  size 40137613
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2afaa2d90767d6e6b3122609a0b28522b7c540e55fffa6cece521823863a13
3
  size 40137613
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f41804539ce4dbc951729526a56d2696d7d2042884d136c4e448563b20767c9c
3
  size 40036040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef41f8521e50bf504d99781460bd3460e64607713ee863c3a8562b76421fac45
3
  size 40036040
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5f1842a91acf8bc3388db088cf49775e50c5c216645f7d35ef69df393c50c91
3
  size 20523679
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbc28aff6c1f0519ffb9cd7a501dc311b1aa599b81b95e0587dc2546c3ac6680
3
  size 20523679
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18aff74b667dba3ffb21fc4e30c6282bcfa51148f5f176894821bf027611df3f
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90f803b28408be4400865e7afc43e8d783a7dbb70d4236447fec88b54e3956a2
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359ea6d423af08e8e9047ba958e9726b4e403615b5c587f71a606d0689d2621a
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24d86c276b7647b8353d4643979580f8916c0427c3136aa6cb63bd500555f215
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.025,
5
  "eval_steps": 2,
6
- "global_step": 2,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -20,13 +20,26 @@
20
  "eval_samples_per_second": 1.759,
21
  "eval_steps_per_second": 0.227,
22
  "step": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  }
24
  ],
25
  "logging_steps": 2,
26
  "max_steps": 240,
27
  "num_train_epochs": 3,
28
  "save_steps": 2,
29
- "total_flos": 332202164355072.0,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05,
5
  "eval_steps": 2,
6
+ "global_step": 4,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
20
  "eval_samples_per_second": 1.759,
21
  "eval_steps_per_second": 0.227,
22
  "step": 2
23
+ },
24
+ {
25
+ "epoch": 0.05,
26
+ "learning_rate": 0.001,
27
+ "loss": 1.5289,
28
+ "step": 4
29
+ },
30
+ {
31
+ "epoch": 0.05,
32
+ "eval_runtime": 128.0886,
33
+ "eval_samples_per_second": 1.757,
34
+ "eval_steps_per_second": 0.226,
35
+ "step": 4
36
  }
37
  ],
38
  "logging_steps": 2,
39
  "max_steps": 240,
40
  "num_train_epochs": 3,
41
  "save_steps": 2,
42
+ "total_flos": 664404328710144.0,
43
  "trial_name": null,
44
  "trial_params": null
45
  }