winglian commited on
Commit
d060c80
1 Parent(s): d33a975

add llama 7b config and fiz lora_fan_in_fan_out for llama (copy pasta bug)

Browse files
configs/llama_65B_alpaca.yml CHANGED
@@ -22,7 +22,7 @@ lora_dropout: 0.05
22
  lora_target_modules:
23
  - q_proj
24
  - w_proj
25
- lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific
26
  wandb_project: llama-65b-lora
27
  wandb_watch:
28
  wandb_run_id:
 
22
  lora_target_modules:
23
  - q_proj
24
  - w_proj
25
+ lora_fan_in_fan_out: false
26
  wandb_project: llama-65b-lora
27
  wandb_watch:
28
  wandb_run_id:
configs/llama_7B_alpaca.yml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: huggyllama/llama-7b
2
+ model_type: LlamaForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+ load_in_8bit: true
5
+ datasets:
6
+ - path: data/alpaca_data_gpt4.jsonl
7
+ type: alpaca
8
+ - path: data/vicuna_cleaned.jsonl
9
+ type: sharegpt
10
+ - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl
11
+ type: gpteacher
12
+ - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl
13
+ type: gpteacher
14
+ dataset_prepared_path: data/last_run
15
+ val_set_size: 0.04
16
+ adapter: lora
17
+ lora_model_dir:
18
+ sequence_len: 2048
19
+ lora_r: 8
20
+ lora_alpha: 16
21
+ lora_dropout: 0.05
22
+ lora_target_modules:
23
+ - q_proj
24
+ - w_proj
25
+ lora_fan_in_fan_out: false
26
+ wandb_project: llama-7b-lora
27
+ wandb_watch:
28
+ wandb_run_id:
29
+ wandb_log_model: checkpoint
30
+ output_dir: ./lora-llama-alpaca
31
+ batch_size: 128
32
+ micro_batch_size: 16
33
+ num_epochs: 5
34
+ learning_rate: 0.00003
35
+ train_on_inputs: false
36
+ group_by_length: false
37
+ bf16: true
38
+ tf32: true
39
+ resume_from_checkpoint:
40
+ local_rank:
41
+ deepspeed: