winglian commited on
Commit
c2a0792
1 Parent(s): 5c3f5db

swap batch size for gradient accumulation steps to decouple from num gpu

Browse files
README.md CHANGED
@@ -265,7 +265,7 @@ wandb_log_model: # 'checkpoint'
265
  output_dir: ./completed-model
266
 
267
  # training hyperparameters
268
- batch_size: 8
269
  micro_batch_size: 2
270
  eval_batch_size: 2
271
  num_epochs: 3
 
265
  output_dir: ./completed-model
266
 
267
  # training hyperparameters
268
+ gradient_accumulation_steps: 1
269
  micro_batch_size: 2
270
  eval_batch_size: 2
271
  num_epochs: 3
configs/cerebras_1_3B_alpaca.yml CHANGED
@@ -26,7 +26,7 @@ wandb_watch:
26
  wandb_run_id:
27
  wandb_log_model:
28
  output_dir: ./lora-alpaca
29
- batch_size: 32
30
  micro_batch_size: 4
31
  num_epochs: 5
32
  learning_rate: 0.0003
 
26
  wandb_run_id:
27
  wandb_log_model:
28
  output_dir: ./lora-alpaca
29
+ gradient_accumulation_steps: 1
30
  micro_batch_size: 4
31
  num_epochs: 5
32
  learning_rate: 0.0003
configs/galactica_1_3B.yml CHANGED
@@ -23,7 +23,7 @@ wandb_watch:
23
  wandb_run_id:
24
  wandb_log_model:
25
  output_dir: ./lora-llama-alpaca
26
- batch_size: 32
27
  micro_batch_size: 16
28
  num_epochs: 3
29
  learning_rate: 0.00003
 
23
  wandb_run_id:
24
  wandb_log_model:
25
  output_dir: ./lora-llama-alpaca
26
+ gradient_accumulation_steps: 1
27
  micro_batch_size: 16
28
  num_epochs: 3
29
  learning_rate: 0.00003
configs/gpt_neox_20b.yml CHANGED
@@ -25,7 +25,7 @@ wandb_watch:
25
  wandb_run_id:
26
  wandb_log_model:
27
  output_dir: ./gpt4all-neox-20b
28
- batch_size: 48
29
  micro_batch_size: 4
30
  num_epochs: 5
31
  learning_rate: 0.00003
 
25
  wandb_run_id:
26
  wandb_log_model:
27
  output_dir: ./gpt4all-neox-20b
28
+ gradient_accumulation_steps: 1
29
  micro_batch_size: 4
30
  num_epochs: 5
31
  learning_rate: 0.00003
configs/llama_13B_alpaca.yml CHANGED
@@ -23,7 +23,7 @@ wandb_watch:
23
  wandb_run_id:
24
  wandb_log_model:
25
  output_dir: ./llama-13b-sharegpt
26
- batch_size: 64
27
  micro_batch_size: 2
28
  warmup_steps: 1000
29
  save_steps:
 
23
  wandb_run_id:
24
  wandb_log_model:
25
  output_dir: ./llama-13b-sharegpt
26
+ gradient_accumulation_steps: 1
27
  micro_batch_size: 2
28
  warmup_steps: 1000
29
  save_steps:
configs/llama_65B_alpaca.yml CHANGED
@@ -29,7 +29,7 @@ wandb_watch:
29
  wandb_run_id:
30
  wandb_log_model:
31
  output_dir: ./lora-llama-alpaca
32
- batch_size: 128
33
  micro_batch_size: 16
34
  warmup_steps: 1000
35
  save_steps:
 
29
  wandb_run_id:
30
  wandb_log_model:
31
  output_dir: ./lora-llama-alpaca
32
+ gradient_accumulation_steps: 1
33
  micro_batch_size: 16
34
  warmup_steps: 1000
35
  save_steps:
configs/llama_7B_4bit.yml CHANGED
@@ -26,7 +26,7 @@ wandb_watch:
26
  wandb_run_id:
27
  wandb_log_model:
28
  output_dir: ./lora-test
29
- batch_size: 8
30
  micro_batch_size: 2
31
  num_epochs: 3
32
  warmup_steps: 100
 
26
  wandb_run_id:
27
  wandb_log_model:
28
  output_dir: ./lora-test
29
+ gradient_accumulation_steps: 1
30
  micro_batch_size: 2
31
  num_epochs: 3
32
  warmup_steps: 100
configs/llama_7B_alpaca.yml CHANGED
@@ -28,7 +28,7 @@ wandb_watch:
28
  wandb_run_id:
29
  wandb_log_model:
30
  output_dir: ./lora-llama-alpaca
31
- batch_size: 128
32
  micro_batch_size: 16
33
  num_epochs: 5
34
  learning_rate: 0.00003
 
28
  wandb_run_id:
29
  wandb_log_model:
30
  output_dir: ./lora-llama-alpaca
31
+ gradient_accumulation_steps: 1
32
  micro_batch_size: 16
33
  num_epochs: 5
34
  learning_rate: 0.00003
configs/llama_7B_jeopardy.yml CHANGED
@@ -24,7 +24,7 @@ wandb_watch:
24
  wandb_run_id:
25
  wandb_log_model:
26
  output_dir: ./jeopardy-bot-7b
27
- batch_size: 4
28
  micro_batch_size: 1
29
  num_epochs: 2
30
  optimizer: adamw_bnb_8bit
 
24
  wandb_run_id:
25
  wandb_log_model:
26
  output_dir: ./jeopardy-bot-7b
27
+ gradient_accumulation_steps: 2
28
  micro_batch_size: 1
29
  num_epochs: 2
30
  optimizer: adamw_bnb_8bit
configs/pythia_1_2B_alpaca.yml CHANGED
@@ -28,7 +28,7 @@ wandb_watch:
28
  wandb_run_id:
29
  wandb_log_model:
30
  output_dir: ./lora-alpaca
31
- batch_size: 48
32
  micro_batch_size: 4
33
  num_epochs: 5
34
  learning_rate: 0.00001
 
28
  wandb_run_id:
29
  wandb_log_model:
30
  output_dir: ./lora-alpaca
31
+ gradient_accumulation_steps: 1
32
  micro_batch_size: 4
33
  num_epochs: 5
34
  learning_rate: 0.00001
configs/quickstart.yml CHANGED
@@ -26,7 +26,7 @@ wandb_watch:
26
  wandb_run_id:
27
  wandb_log_model:
28
  output_dir: ./lora-test
29
- batch_size: 4
30
  micro_batch_size: 1
31
  num_epochs: 3
32
  warmup_steps: 100
 
26
  wandb_run_id:
27
  wandb_log_model:
28
  output_dir: ./lora-test
29
+ gradient_accumulation_steps: 1
30
  micro_batch_size: 1
31
  num_epochs: 3
32
  warmup_steps: 100
configs/sample.yml CHANGED
@@ -53,7 +53,8 @@ wandb_log_model:
53
  # where to save the finsihed model to
54
  output_dir: ./completed-model
55
  # training hyperparameters
56
- batch_size: 8
 
57
  micro_batch_size: 2
58
  num_epochs: 3
59
  warmup_steps: 100
 
53
  # where to save the finsihed model to
54
  output_dir: ./completed-model
55
  # training hyperparameters
56
+ gradient_accumulation_steps: 1
57
+ batch_size:
58
  micro_batch_size: 2
59
  num_epochs: 3
60
  warmup_steps: 100
configs/stability_3b.yml CHANGED
@@ -22,7 +22,7 @@ wandb_watch:
22
  wandb_run_id:
23
  wandb_log_model:
24
  output_dir: ./stable-alpaca-3b
25
- batch_size: 2
26
  micro_batch_size: 1
27
  num_epochs: 1
28
  optimizer: adamw_bnb_8bit
 
22
  wandb_run_id:
23
  wandb_log_model:
24
  output_dir: ./stable-alpaca-3b
25
+ gradient_accumulation_steps: 1
26
  micro_batch_size: 1
27
  num_epochs: 1
28
  optimizer: adamw_bnb_8bit
configs/vicuna_13B_4bit_reflect.yml CHANGED
@@ -30,7 +30,7 @@ wandb_watch:
30
  wandb_run_id:
31
  wandb_log_model:
32
  output_dir: ./lora-reflect
33
- batch_size: 8
34
  micro_batch_size: 2
35
  num_epochs: 3
36
  learning_rate: 0.00003
 
30
  wandb_run_id:
31
  wandb_log_model:
32
  output_dir: ./lora-reflect
33
+ gradient_accumulation_steps: 1
34
  micro_batch_size: 2
35
  num_epochs: 3
36
  learning_rate: 0.00003
examples/gptq-lora-7b/config.yml CHANGED
@@ -26,7 +26,7 @@ wandb_watch:
26
  wandb_run_id:
27
  wandb_log_model:
28
  output_dir: ./llama-7b-lora-int4
29
- batch_size: 1
30
  micro_batch_size: 1
31
  num_epochs: 3
32
  optimizer: adamw_bnb_8bit
 
26
  wandb_run_id:
27
  wandb_log_model:
28
  output_dir: ./llama-7b-lora-int4
29
+ gradient_accumulation_steps: 1
30
  micro_batch_size: 1
31
  num_epochs: 3
32
  optimizer: adamw_bnb_8bit
examples/mpt-7b/config.yml CHANGED
@@ -24,7 +24,7 @@ wandb_watch:
24
  wandb_run_id:
25
  wandb_log_model:
26
  output_dir: ./mpt-alpaca-7b
27
- batch_size: 1
28
  micro_batch_size: 1
29
  num_epochs: 3
30
  optimizer: adamw_bnb_8bit
 
24
  wandb_run_id:
25
  wandb_log_model:
26
  output_dir: ./mpt-alpaca-7b
27
+ gradient_accumulation_steps: 1
28
  micro_batch_size: 1
29
  num_epochs: 3
30
  optimizer: adamw_bnb_8bit