winglian commited on
Commit
cbd7499
1 Parent(s): 5045696

add axolotl config

Browse files
Files changed (1) hide show
  1. configs/manticore-13b-v2.yml +114 -0
configs/manticore-13b-v2.yml ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: huggyllama/llama-13b
2
+ base_model_config: huggyllama/llama-13b
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ load_in_8bit: false
6
+ strict: false
7
+ push_dataset_to_hub: winglian
8
+ dataset_shard_num: 4
9
+ dataset_shard_idx: 0
10
+ datasets:
11
+ - path: redacted
12
+ data_files:
13
+ - v12_no_ai.shard_0.jsonl
14
+ type: pygmalion
15
+ - path: winglian/evals
16
+ data_files:
17
+ - hf/ARC-Challenge.jsonl
18
+ - hf/ARC-Easy.jsonl
19
+ - hf/riddle_sense.jsonl
20
+ type: explainchoice:chat
21
+ - path: winglian/evals
22
+ data_files:
23
+ - openai/tldr.jsonl
24
+ type: summarizetldr:chat
25
+ - path: winglian/evals
26
+ data_files:
27
+ - hf/gsm8k.jsonl
28
+ type: alpacachat.load_qa
29
+ - path: winglian/evals
30
+ data_files:
31
+ - hellaswag/hellaswag.jsonl
32
+ type: explainchoice:chat
33
+ - path: metaeval/ScienceQA_text_only
34
+ type: concisechoice:chat
35
+ - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered
36
+ type: alpaca:chat
37
+ - path: ehartford/wizard_vicuna_70k_unfiltered
38
+ type: sharegpt:chat
39
+ - path: winglian/chatlogs-en-cleaned
40
+ data_files:
41
+ - sharegpt_cleaned.jsonl
42
+ type: sharegpt:chat
43
+ - path: teknium/GPT4-LLM-Cleaned
44
+ type: alpaca:chat
45
+ - path: teknium/GPTeacher-General-Instruct
46
+ data_files: gpt4-instruct-similarity-0.6-dataset.json
47
+ type: gpteacher:chat
48
+ - path: ewof/code-alpaca-instruct-unfiltered
49
+ type: alpaca:chat
50
+ - path: QingyiSi/Alpaca-CoT
51
+ data_files:
52
+ - Chain-of-Thought/formatted_cot_data/aqua_train.json [4/1757]
53
+ - Chain-of-Thought/formatted_cot_data/creak_train.json
54
+ - Chain-of-Thought/formatted_cot_data/ecqa_train.json
55
+ - Chain-of-Thought/formatted_cot_data/esnli_train.json
56
+ - Chain-of-Thought/formatted_cot_data/gsm8k_train.json
57
+ - Chain-of-Thought/formatted_cot_data/qasc_train.json
58
+ - Chain-of-Thought/formatted_cot_data/qed_train.json
59
+ - Chain-of-Thought/formatted_cot_data/sensemaking_train.json
60
+ - Chain-of-Thought/formatted_cot_data/strategyqa_train.json
61
+ - GPTeacher/Roleplay/formatted_roleplay-similarity_0.6-instruct-dataset.json
62
+ type: alpaca:chat
63
+ dataset_prepared_path: last_run_prepared
64
+ val_set_size: 0.02
65
+ adapter:
66
+ lora_model_dir:
67
+ sequence_len: 2048
68
+ max_packed_sequence_len: 2048
69
+ lora_r:
70
+ lora_alpha:
71
+ lora_dropout:
72
+ lora_target_modules:
73
+ lora_fan_in_fan_out:
74
+ wandb_project: manticore-13b-v2
75
+ wandb_watch:
76
+ wandb_run_id:
77
+ wandb_log_model:
78
+ output_dir: ./manticore-13b-v2
79
+ batch_size: 512
80
+ micro_batch_size: 8
81
+ num_epochs: 4
82
+ optimizer:
83
+ torchdistx_path:
84
+ lr_scheduler:
85
+ learning_rate: 0.00004
86
+ train_on_inputs: false
87
+ group_by_length: false
88
+ bf16: true
89
+ tf32: true
90
+ gradient_checkpointing: true
91
+ early_stopping_patience:
92
+ resume_from_checkpoint:
93
+ local_rank:
94
+ logging_steps: 1
95
+ xformers_attention: true
96
+ flash_attention:
97
+ gptq_groupsize:
98
+ gptq_model_v1:
99
+ warmup_steps: 20
100
+ eval_steps: 10
101
+ save_steps:
102
+ debug:
103
+ deepspeed:
104
+ weight_decay: 0
105
+ fsdp:
106
+ - full_shard
107
+ - auto_wrap
108
+ fsdp_config:
109
+ fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
110
+ special_tokens:
111
+ bos_token: "<s>"
112
+ eos_token: "</s>"
113
+ unk_token: "<unk>"
114
+