winglian commited on
Commit
1066751
1 Parent(s): 1b63bf1

don't resize embeddings to multiples of 32x by default

Browse files
Files changed (2) hide show
  1. README.md +3 -0
  2. src/axolotl/utils/models.py +5 -1
README.md CHANGED
@@ -322,6 +322,9 @@ tokenizer_type: AutoTokenizer
322
  trust_remote_code:
323
  # use_fast option for tokenizer loading from_pretrained, default to True
324
  tokenizer_use_fast:
 
 
 
325
 
326
  # whether you are training a 4-bit GPTQ quantized model
327
  gptq: true
 
322
  trust_remote_code:
323
  # use_fast option for tokenizer loading from_pretrained, default to True
324
  tokenizer_use_fast:
325
+ # resize the model embeddings when new tokens are added to multiples of 32
326
+ # this is reported to improve training speed on some models
327
+ resize_token_embeddings_to_32x:
328
 
329
  # whether you are training a 4-bit GPTQ quantized model
330
  gptq: true
src/axolotl/utils/models.py CHANGED
@@ -301,7 +301,11 @@ def load_model(
301
  **model_kwargs,
302
  )
303
 
304
- embeddings_len = math.ceil(len(tokenizer) / 32) * 32
 
 
 
 
305
  model.resize_token_embeddings(embeddings_len)
306
 
307
  if (
 
301
  **model_kwargs,
302
  )
303
 
304
+ embeddings_len = (
305
+ math.ceil(len(tokenizer) / 32) * 32
306
+ if cfg.resize_token_embeddings_to_32x
307
+ else len(tokenizer)
308
+ )
309
  model.resize_token_embeddings(embeddings_len)
310
 
311
  if (