winglian commited on
Commit
88e17ff
1 Parent(s): 6f84980

add float16 docs and tweak typehints

Browse files
Files changed (2) hide show
  1. README.md +8 -0
  2. src/axolotl/utils/models.py +5 -3
README.md CHANGED
@@ -264,6 +264,8 @@ See sample configs in [configs](configs) folder or [examples](examples) for quic
264
  bf16: true # require >=ampere
265
  fp16: true
266
  tf32: true # require >=ampere
 
 
267
  ```
268
  Note: Repo does not do 4-bit quantization.
269
 
@@ -522,6 +524,12 @@ Add below flag to train command above
522
  --merge_lora --lora_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
523
  ```
524
 
 
 
 
 
 
 
525
  ## Common Errors 🧰
526
 
527
  > Cuda out of memory
 
264
  bf16: true # require >=ampere
265
  fp16: true
266
  tf32: true # require >=ampere
267
+ bfloat16: true # require >=ampere, use instead of bf16 when you don't want AMP
268
+ float16: true # use instead of fp16 when you don't want AMP
269
  ```
270
  Note: Repo does not do 4-bit quantization.
271
 
 
524
  --merge_lora --lora_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
525
  ```
526
 
527
+ If you run out of CUDA memory, you can try to merge in system RAM with
528
+
529
+ ```bash
530
+ CUDA_VISIBLE_DEVICES="" python3 scripts/finetune.py ...
531
+ ```
532
+
533
  ## Common Errors 🧰
534
 
535
  > Cuda out of memory
src/axolotl/utils/models.py CHANGED
@@ -11,13 +11,14 @@ import bitsandbytes as bnb
11
  import torch
12
  import transformers
13
  from optimum.bettertransformer import BetterTransformer
14
- from transformers import PreTrainedModel # noqa: F401
15
- from transformers import (
16
  AutoConfig,
17
  AutoModelForCausalLM,
18
  AutoTokenizer,
19
  BitsAndBytesConfig,
20
  LlamaConfig,
 
 
21
  )
22
 
23
  from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN
@@ -71,7 +72,7 @@ def load_tokenizer(
71
  def load_model(
72
  base_model, base_model_config, model_type, tokenizer, cfg, adapter="lora"
73
  ):
74
- # type: (str, str, str, AutoTokenizer, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
75
  """
76
  Load a model from a base model and a model type.
77
  """
@@ -284,6 +285,7 @@ def load_model(
284
  model = AutoModelForCausalLM.from_pretrained(
285
  base_model,
286
  load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
 
287
  torch_dtype=torch_dtype,
288
  device_map=cfg.device_map,
289
  trust_remote_code=cfg.trust_remote_code or False,
 
11
  import torch
12
  import transformers
13
  from optimum.bettertransformer import BetterTransformer
14
+ from transformers import ( # noqa: F401
 
15
  AutoConfig,
16
  AutoModelForCausalLM,
17
  AutoTokenizer,
18
  BitsAndBytesConfig,
19
  LlamaConfig,
20
+ PreTrainedModel,
21
+ PreTrainedTokenizerBase,
22
  )
23
 
24
  from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN
 
72
  def load_model(
73
  base_model, base_model_config, model_type, tokenizer, cfg, adapter="lora"
74
  ):
75
+ # type: (str, str, str, PreTrainedTokenizerBase, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
76
  """
77
  Load a model from a base model and a model type.
78
  """
 
285
  model = AutoModelForCausalLM.from_pretrained(
286
  base_model,
287
  load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
288
+ load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
289
  torch_dtype=torch_dtype,
290
  device_map=cfg.device_map,
291
  trust_remote_code=cfg.trust_remote_code or False,