utensil commited on
Commit
72bf8aa
1 Parent(s): 8afb0fb

Create config-7b-qlora.yml

Browse files
Files changed (1) hide show
  1. examples/falcon/config-7b-qlora.yml +68 -0
examples/falcon/config-7b-qlora.yml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: tiiuae/falcon-7b
2
+ base_model_config: tiiuae/falcon-7b
3
+ trust_remote_code: true
4
+ model_type: AutoModelForCausalLM
5
+ tokenizer_type: AutoTokenizer
6
+ load_in_8bit: false
7
+ load_in_4bit: true
8
+ gptq: false
9
+ strict: false
10
+ push_dataset_to_hub:
11
+ datasets:
12
+ - path: QingyiSi/Alpaca-CoT
13
+ data_files:
14
+ - Chain-of-Thought/formatted_cot_data/gsm8k_train.json
15
+ type: "alpaca:chat"
16
+ dataset_prepared_path: last_run_prepared
17
+ val_set_size: 0.01
18
+ adapter: qlora
19
+ lora_model_dir:
20
+ sequence_len: 2048
21
+ max_packed_sequence_len: 2048
22
+ lora_r: 64
23
+ lora_alpha: 16
24
+ lora_dropout: 0.05
25
+ lora_target_modules:
26
+ lora_target_linear: true
27
+ lora_fan_in_fan_out:
28
+ wandb_project: falcon-qlora
29
+ wandb_watch:
30
+ wandb_run_id:
31
+ wandb_log_model:
32
+ output_dir: ./qlora-out
33
+ batch_size: 8
34
+ micro_batch_size: 4
35
+ num_epochs: 3
36
+ optimizer: paged_adamw_32bit
37
+ torchdistx_path:
38
+ lr_scheduler: cosine
39
+ learning_rate: 0.0002
40
+ train_on_inputs: false
41
+ group_by_length: false
42
+ bf16: true
43
+ fp16: false
44
+ tf32: true
45
+ gradient_checkpointing: true
46
+ # stop training after this many evaluation losses have increased in a row
47
+ # https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
48
+ early_stopping_patience: 3
49
+ resume_from_checkpoint:
50
+ auto_resume_from_checkpoints: true
51
+ local_rank:
52
+ logging_steps: 1
53
+ xformers_attention: false
54
+ flash_attention:
55
+ gptq_groupsize:
56
+ gptq_model_v1:
57
+ warmup_steps: 10
58
+ eval_steps: 5
59
+ save_steps: 10
60
+ debug:
61
+ deepspeed:
62
+ weight_decay: 0.000001
63
+ fsdp:
64
+ fsdp_config:
65
+ special_tokens:
66
+ pad_token: "<|endoftext|>"
67
+ bos_token: ">>ABSTRACT<<"
68
+ eos_token: "<|endoftext|>"