tmm1 commited on
Commit
fd55bc8
1 Parent(s): 8e197f6

use math.ceil instead of round /cc #498

Browse files
Files changed (1) hide show
  1. src/axolotl/utils/trainer.py +3 -1
src/axolotl/utils/trainer.py CHANGED
@@ -588,7 +588,9 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
588
  "padding": True, # True/"longest" is the default
589
  }
590
  if cfg.pad_to_sequence_len:
591
- data_collator_kwargs["pad_to_multiple_of"] = 64 * round(cfg.sequence_len / 64)
 
 
592
  else:
593
  # A100 is best at 64, while others at 8. Let's use the larger so we don't have to check
594
  # https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html
 
588
  "padding": True, # True/"longest" is the default
589
  }
590
  if cfg.pad_to_sequence_len:
591
+ data_collator_kwargs["pad_to_multiple_of"] = 64 * math.ceil(
592
+ cfg.sequence_len / 64
593
+ )
594
  else:
595
  # A100 is best at 64, while others at 8. Let's use the larger so we don't have to check
596
  # https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html