winglian commited on
Commit
2d8def6
1 Parent(s): 44c9d01

simplify by removing duplicate base_model_config (#772)

Browse files
examples/cerebras/btlm-ft.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: cerebras/btlm-3b-8k-base
2
- base_model_config: cerebras/btlm-3b-8k-base
3
  model_type: AutoModelForCausalLM
4
  tokenizer_type: GPT2Tokenizer
5
  trust_remote_code: true
 
1
  base_model: cerebras/btlm-3b-8k-base
 
2
  model_type: AutoModelForCausalLM
3
  tokenizer_type: GPT2Tokenizer
4
  trust_remote_code: true
examples/cerebras/qlora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: cerebras/Cerebras-GPT-1.3B
2
- base_model_config: cerebras/Cerebras-GPT-1.3B
3
  load_in_8bit: false
4
  load_in_4bit: true
5
  strict: false
 
1
  base_model: cerebras/Cerebras-GPT-1.3B
 
2
  load_in_8bit: false
3
  load_in_4bit: true
4
  strict: false
examples/code-llama/13b/lora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: codellama/CodeLlama-13b-hf
2
- base_model_config: codellama/CodeLlama-13b-hf
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: CodeLlamaTokenizer
5
  is_llama_derived_model: true
 
1
  base_model: codellama/CodeLlama-13b-hf
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: CodeLlamaTokenizer
4
  is_llama_derived_model: true
examples/code-llama/13b/qlora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: codellama/CodeLlama-13b-hf
2
- base_model_config: codellama/CodeLlama-13b-hf
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: CodeLlamaTokenizer
5
  is_llama_derived_model: true
 
1
  base_model: codellama/CodeLlama-13b-hf
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: CodeLlamaTokenizer
4
  is_llama_derived_model: true
examples/code-llama/34b/lora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: codellama/CodeLlama-34b-hf
2
- base_model_config: codellama/CodeLlama-34b-hf
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: CodeLlamaTokenizer
5
  is_llama_derived_model: true
 
1
  base_model: codellama/CodeLlama-34b-hf
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: CodeLlamaTokenizer
4
  is_llama_derived_model: true
examples/code-llama/34b/qlora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: codellama/CodeLlama-34b-hf
2
- base_model_config: codellama/CodeLlama-34b-hf
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: CodeLlamaTokenizer
5
  is_llama_derived_model: true
 
1
  base_model: codellama/CodeLlama-34b-hf
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: CodeLlamaTokenizer
4
  is_llama_derived_model: true
examples/code-llama/7b/lora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: codellama/CodeLlama-7b-hf
2
- base_model_config: codellama/CodeLlama-7b-hf
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: CodeLlamaTokenizer
5
  is_llama_derived_model: true
 
1
  base_model: codellama/CodeLlama-7b-hf
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: CodeLlamaTokenizer
4
  is_llama_derived_model: true
examples/code-llama/7b/qlora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: codellama/CodeLlama-7b-hf
2
- base_model_config: codellama/CodeLlama-7b-hf
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: CodeLlamaTokenizer
5
  is_llama_derived_model: true
 
1
  base_model: codellama/CodeLlama-7b-hf
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: CodeLlamaTokenizer
4
  is_llama_derived_model: true
examples/falcon/config-7b-lora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: tiiuae/falcon-7b
2
- base_model_config: tiiuae/falcon-7b
3
  trust_remote_code: true
4
  model_type: AutoModelForCausalLM
5
  tokenizer_type: AutoTokenizer
 
1
  base_model: tiiuae/falcon-7b
 
2
  trust_remote_code: true
3
  model_type: AutoModelForCausalLM
4
  tokenizer_type: AutoTokenizer
examples/falcon/config-7b-qlora.yml CHANGED
@@ -1,7 +1,6 @@
1
  # 1b: tiiuae/falcon-rw-1b
2
  # 40b: tiiuae/falcon-40b
3
  base_model: tiiuae/falcon-7b
4
- base_model_config: tiiuae/falcon-7b
5
  # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
6
  trust_remote_code: true
7
  model_type: AutoModelForCausalLM
 
1
  # 1b: tiiuae/falcon-rw-1b
2
  # 40b: tiiuae/falcon-40b
3
  base_model: tiiuae/falcon-7b
 
4
  # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
5
  trust_remote_code: true
6
  model_type: AutoModelForCausalLM
examples/falcon/config-7b.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: tiiuae/falcon-7b
2
- base_model_config: tiiuae/falcon-7b
3
  trust_remote_code: true
4
  model_type: AutoModelForCausalLM
5
  tokenizer_type: AutoTokenizer
 
1
  base_model: tiiuae/falcon-7b
 
2
  trust_remote_code: true
3
  model_type: AutoModelForCausalLM
4
  tokenizer_type: AutoTokenizer
examples/gptj/qlora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: EleutherAI/gpt-j-6b
2
- base_model_config: EleutherAI/gpt-j-6b
3
  load_in_8bit: false
4
  load_in_4bit: true
5
  strict: false
 
1
  base_model: EleutherAI/gpt-j-6b
 
2
  load_in_8bit: false
3
  load_in_4bit: true
4
  strict: false
examples/jeopardy-bot/config.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: huggyllama/llama-7b
2
- base_model_config: huggyllama/llama-7b
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: LlamaTokenizer
5
  load_in_8bit: false
 
1
  base_model: huggyllama/llama-7b
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
  load_in_8bit: false
examples/llama-2/fft_optimized.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: NousResearch/Llama-2-7b-hf
2
- base_model_config: NousResearch/Llama-2-7b-hf
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: LlamaTokenizer
5
  is_llama_derived_model: true
 
1
  base_model: NousResearch/Llama-2-7b-hf
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
  is_llama_derived_model: true
examples/llama-2/gptq-lora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: TheBloke/Llama-2-7B-GPTQ
2
- base_model_config: TheBloke/Llama-2-7B-GPTQ
3
  is_llama_derived_model: false
4
  gptq: true
5
  gptq_disable_exllama: true
 
1
  base_model: TheBloke/Llama-2-7B-GPTQ
 
2
  is_llama_derived_model: false
3
  gptq: true
4
  gptq_disable_exllama: true
examples/llama-2/lora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: NousResearch/Llama-2-7b-hf
2
- base_model_config: NousResearch/Llama-2-7b-hf
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: LlamaTokenizer
5
  is_llama_derived_model: true
 
1
  base_model: NousResearch/Llama-2-7b-hf
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
  is_llama_derived_model: true
examples/llama-2/qlora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: NousResearch/Llama-2-7b-hf
2
- base_model_config: NousResearch/Llama-2-7b-hf
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: LlamaTokenizer
5
  is_llama_derived_model: true
 
1
  base_model: NousResearch/Llama-2-7b-hf
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
  is_llama_derived_model: true
examples/llama-2/relora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: NousResearch/Llama-2-7b-hf
2
- base_model_config: NousResearch/Llama-2-7b-hf
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: LlamaTokenizer
5
  is_llama_derived_model: true
 
1
  base_model: NousResearch/Llama-2-7b-hf
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
  is_llama_derived_model: true
examples/llama-2/tiny-llama.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: PY007/TinyLlama-1.1B-step-50K-105b
2
- base_model_config: PY007/TinyLlama-1.1B-step-50K-105b
3
 
4
  model_type: LlamaForCausalLM
5
  tokenizer_type: LlamaTokenizer
 
1
  base_model: PY007/TinyLlama-1.1B-step-50K-105b
 
2
 
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: LlamaTokenizer
examples/mistral/config.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: mistralai/Mistral-7B-v0.1
2
- base_model_config: mistralai/Mistral-7B-v0.1
3
  model_type: MistralForCausalLM
4
  tokenizer_type: LlamaTokenizer
5
  is_mistral_derived_model: true
 
1
  base_model: mistralai/Mistral-7B-v0.1
 
2
  model_type: MistralForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
  is_mistral_derived_model: true
examples/mistral/qlora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: mistralai/Mistral-7B-v0.1
2
- base_model_config: mistralai/Mistral-7B-v0.1
3
  model_type: MistralForCausalLM
4
  tokenizer_type: LlamaTokenizer
5
  is_mistral_derived_model: true
 
1
  base_model: mistralai/Mistral-7B-v0.1
 
2
  model_type: MistralForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
  is_mistral_derived_model: true
examples/mpt-7b/config.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: mosaicml/mpt-7b
2
- base_model_config: mosaicml/mpt-7b
3
  tokenizer_type: AutoTokenizer
4
  trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
5
  load_in_8bit: false
 
1
  base_model: mosaicml/mpt-7b
 
2
  tokenizer_type: AutoTokenizer
3
  trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
4
  load_in_8bit: false
examples/openllama-3b/config.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: openlm-research/open_llama_3b_v2
2
- base_model_config: openlm-research/open_llama_3b_v2
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: LlamaTokenizer
5
  load_in_8bit: false
 
1
  base_model: openlm-research/open_llama_3b_v2
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
  load_in_8bit: false
examples/openllama-3b/lora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: openlm-research/open_llama_3b_v2
2
- base_model_config: openlm-research/open_llama_3b_v2
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: LlamaTokenizer
5
  load_in_8bit: true
 
1
  base_model: openlm-research/open_llama_3b_v2
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
  load_in_8bit: true
examples/openllama-3b/qlora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: openlm-research/open_llama_3b_v2
2
- base_model_config: openlm-research/open_llama_3b_v2
3
  model_type: LlamaForCausalLM
4
  tokenizer_type: LlamaTokenizer
5
  load_in_8bit: false
 
1
  base_model: openlm-research/open_llama_3b_v2
 
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
  load_in_8bit: false
examples/phi/phi-ft.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: microsoft/phi-1_5
2
- base_model_config: microsoft/phi-1_5
3
  model_type: MixFormerSequentialForCausalLM
4
  tokenizer_type: AutoTokenizer
5
  is_llama_derived_model: false
 
1
  base_model: microsoft/phi-1_5
 
2
  model_type: MixFormerSequentialForCausalLM
3
  tokenizer_type: AutoTokenizer
4
  is_llama_derived_model: false
examples/phi/phi-qlora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: microsoft/phi-1_5
2
- base_model_config: microsoft/phi-1_5
3
  model_type: AutoModelForCausalLM
4
  tokenizer_type: AutoTokenizer
5
  is_llama_derived_model: false
 
1
  base_model: microsoft/phi-1_5
 
2
  model_type: AutoModelForCausalLM
3
  tokenizer_type: AutoTokenizer
4
  is_llama_derived_model: false
examples/pythia-12b/config.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: EleutherAI/pythia-12b-deduped
2
- base_model_config: EleutherAI/pythia-12b-deduped
3
  base_model_ignore_patterns: pytorch* # prefer safetensors
4
  model_type: GPTNeoXForCausalLM
5
  tokenizer_type: AutoTokenizer
 
1
  base_model: EleutherAI/pythia-12b-deduped
 
2
  base_model_ignore_patterns: pytorch* # prefer safetensors
3
  model_type: GPTNeoXForCausalLM
4
  tokenizer_type: AutoTokenizer
examples/pythia/lora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: EleutherAI/pythia-1.4b-deduped
2
- base_model_config: EleutherAI/pythia-1.4b-deduped
3
  load_in_8bit: true
4
  datasets:
5
  - path: teknium/GPT4-LLM-Cleaned
 
1
  base_model: EleutherAI/pythia-1.4b-deduped
 
2
  load_in_8bit: true
3
  datasets:
4
  - path: teknium/GPT4-LLM-Cleaned
examples/redpajama/config-3b.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
2
- base_model_config: togethercomputer/RedPajama-INCITE-Chat-3B-v1
3
  model_type: GPTNeoXForCausalLM
4
  tokenizer_type: AutoTokenizer
5
  trust_remote_code:
 
1
  base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
 
2
  model_type: GPTNeoXForCausalLM
3
  tokenizer_type: AutoTokenizer
4
  trust_remote_code:
examples/replit-3b/config-lora.yml CHANGED
@@ -1,5 +1,4 @@
1
  base_model: replit/replit-code-v1-3b
2
- base_model_config: replit/replit-code-v1-3b
3
  trust_remote_code: true
4
  load_in_8bit: false
5
  datasets:
 
1
  base_model: replit/replit-code-v1-3b
 
2
  trust_remote_code: true
3
  load_in_8bit: false
4
  datasets:
examples/xgen-7b/xgen-7b-8k-qlora.yml CHANGED
@@ -1,7 +1,6 @@
1
  # An example finetuning Saleforce's XGen-7b model with 8k context using qlora
2
  # on Tim Dettmer's Guanaco dataset.
3
  base_model: Salesforce/xgen-7b-8k-base
4
- base_model_config: Salesforce/xgen-7b-8k-base
5
  trust_remote_code: true
6
  model_type: AutoModelForCausalLM
7
  tokenizer_type: AutoTokenizer
 
1
  # An example finetuning Saleforce's XGen-7b model with 8k context using qlora
2
  # on Tim Dettmer's Guanaco dataset.
3
  base_model: Salesforce/xgen-7b-8k-base
 
4
  trust_remote_code: true
5
  model_type: AutoModelForCausalLM
6
  tokenizer_type: AutoTokenizer
src/axolotl/utils/config.py CHANGED
@@ -79,6 +79,9 @@ def normalize_config(cfg):
79
 
80
  cfg.dataset_processes = cfg.dataset_processes or os.cpu_count()
81
 
 
 
 
82
  model_config = load_model_config(cfg)
83
  cfg.model_config_type = model_config.model_type
84
 
 
79
 
80
  cfg.dataset_processes = cfg.dataset_processes or os.cpu_count()
81
 
82
+ if not cfg.base_model_config:
83
+ cfg.base_model_config = cfg.base_model
84
+
85
  model_config = load_model_config(cfg)
86
  cfg.model_config_type = model_config.model_type
87
 
tests/e2e/test_fused_llama.py CHANGED
@@ -31,7 +31,6 @@ class TestFusedLlama(unittest.TestCase):
31
  cfg = DictDefault(
32
  {
33
  "base_model": "JackFram/llama-68m",
34
- "base_model_config": "JackFram/llama-68m",
35
  "flash_attention": True,
36
  "flash_attn_fuse_qkv": True,
37
  "flash_attn_fuse_mlp": True,
 
31
  cfg = DictDefault(
32
  {
33
  "base_model": "JackFram/llama-68m",
 
34
  "flash_attention": True,
35
  "flash_attn_fuse_qkv": True,
36
  "flash_attn_fuse_mlp": True,
tests/e2e/test_lora_llama.py CHANGED
@@ -29,7 +29,6 @@ class TestLoraLlama(unittest.TestCase):
29
  cfg = DictDefault(
30
  {
31
  "base_model": "JackFram/llama-68m",
32
- "base_model_config": "JackFram/llama-68m",
33
  "tokenizer_type": "LlamaTokenizer",
34
  "sequence_len": 1024,
35
  "load_in_8bit": True,
@@ -72,7 +71,6 @@ class TestLoraLlama(unittest.TestCase):
72
  cfg = DictDefault(
73
  {
74
  "base_model": "JackFram/llama-68m",
75
- "base_model_config": "JackFram/llama-68m",
76
  "tokenizer_type": "LlamaTokenizer",
77
  "sequence_len": 1024,
78
  "sample_packing": True,
@@ -117,7 +115,6 @@ class TestLoraLlama(unittest.TestCase):
117
  cfg = DictDefault(
118
  {
119
  "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ",
120
- "base_model_config": "TheBlokeAI/jackfram_llama-68m-GPTQ",
121
  "model_type": "AutoModelForCausalLM",
122
  "tokenizer_type": "LlamaTokenizer",
123
  "sequence_len": 1024,
 
29
  cfg = DictDefault(
30
  {
31
  "base_model": "JackFram/llama-68m",
 
32
  "tokenizer_type": "LlamaTokenizer",
33
  "sequence_len": 1024,
34
  "load_in_8bit": True,
 
71
  cfg = DictDefault(
72
  {
73
  "base_model": "JackFram/llama-68m",
 
74
  "tokenizer_type": "LlamaTokenizer",
75
  "sequence_len": 1024,
76
  "sample_packing": True,
 
115
  cfg = DictDefault(
116
  {
117
  "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ",
 
118
  "model_type": "AutoModelForCausalLM",
119
  "tokenizer_type": "LlamaTokenizer",
120
  "sequence_len": 1024,
tests/e2e/test_mistral.py CHANGED
@@ -31,7 +31,6 @@ class TestMistral(unittest.TestCase):
31
  cfg = DictDefault(
32
  {
33
  "base_model": "openaccess-ai-collective/tiny-mistral",
34
- "base_model_config": "openaccess-ai-collective/tiny-mistral",
35
  "flash_attention": True,
36
  "sequence_len": 1024,
37
  "load_in_8bit": True,
@@ -77,7 +76,6 @@ class TestMistral(unittest.TestCase):
77
  cfg = DictDefault(
78
  {
79
  "base_model": "openaccess-ai-collective/tiny-mistral",
80
- "base_model_config": "openaccess-ai-collective/tiny-mistral",
81
  "flash_attention": True,
82
  "sequence_len": 1024,
83
  "val_set_size": 0.1,
 
31
  cfg = DictDefault(
32
  {
33
  "base_model": "openaccess-ai-collective/tiny-mistral",
 
34
  "flash_attention": True,
35
  "sequence_len": 1024,
36
  "load_in_8bit": True,
 
76
  cfg = DictDefault(
77
  {
78
  "base_model": "openaccess-ai-collective/tiny-mistral",
 
79
  "flash_attention": True,
80
  "sequence_len": 1024,
81
  "val_set_size": 0.1,
tests/e2e/test_mistral_samplepack.py CHANGED
@@ -31,7 +31,6 @@ class TestMistral(unittest.TestCase):
31
  cfg = DictDefault(
32
  {
33
  "base_model": "openaccess-ai-collective/tiny-mistral",
34
- "base_model_config": "openaccess-ai-collective/tiny-mistral",
35
  "flash_attention": True,
36
  "sample_packing": True,
37
  "sequence_len": 1024,
@@ -78,7 +77,6 @@ class TestMistral(unittest.TestCase):
78
  cfg = DictDefault(
79
  {
80
  "base_model": "openaccess-ai-collective/tiny-mistral",
81
- "base_model_config": "openaccess-ai-collective/tiny-mistral",
82
  "flash_attention": True,
83
  "sample_packing": True,
84
  "sequence_len": 1024,
 
31
  cfg = DictDefault(
32
  {
33
  "base_model": "openaccess-ai-collective/tiny-mistral",
 
34
  "flash_attention": True,
35
  "sample_packing": True,
36
  "sequence_len": 1024,
 
77
  cfg = DictDefault(
78
  {
79
  "base_model": "openaccess-ai-collective/tiny-mistral",
 
80
  "flash_attention": True,
81
  "sample_packing": True,
82
  "sequence_len": 1024,
tests/e2e/test_phi.py CHANGED
@@ -27,7 +27,6 @@ class TestPhi(unittest.TestCase):
27
  cfg = DictDefault(
28
  {
29
  "base_model": "microsoft/phi-1_5",
30
- "base_model_config": "microsoft/phi-1_5",
31
  "trust_remote_code": True,
32
  "model_type": "MixFormerSequentialForCausalLM",
33
  "tokenizer_type": "AutoTokenizer",
@@ -71,7 +70,6 @@ class TestPhi(unittest.TestCase):
71
  cfg = DictDefault(
72
  {
73
  "base_model": "microsoft/phi-1_5",
74
- "base_model_config": "microsoft/phi-1_5",
75
  "trust_remote_code": True,
76
  "model_type": "MixFormerSequentialForCausalLM",
77
  "tokenizer_type": "AutoTokenizer",
 
27
  cfg = DictDefault(
28
  {
29
  "base_model": "microsoft/phi-1_5",
 
30
  "trust_remote_code": True,
31
  "model_type": "MixFormerSequentialForCausalLM",
32
  "tokenizer_type": "AutoTokenizer",
 
70
  cfg = DictDefault(
71
  {
72
  "base_model": "microsoft/phi-1_5",
 
73
  "trust_remote_code": True,
74
  "model_type": "MixFormerSequentialForCausalLM",
75
  "tokenizer_type": "AutoTokenizer",
tests/test_normalize_config.py CHANGED
@@ -37,3 +37,10 @@ class NormalizeConfigTestCase(unittest.TestCase):
37
  normalize_config(cfg)
38
 
39
  assert cfg.learning_rate == 0.00005
 
 
 
 
 
 
 
 
37
  normalize_config(cfg)
38
 
39
  assert cfg.learning_rate == 0.00005
40
+
41
+ def test_base_model_config_set_when_empty(self):
42
+ cfg = self._get_base_cfg()
43
+ del cfg.base_model_config
44
+ normalize_config(cfg)
45
+
46
+ assert cfg.base_model_config == cfg.base_model