winglian commited on
Commit
c56b450
1 Parent(s): 1e07c16

drop empty tokenized rows too (#509)

Browse files
Files changed (1) hide show
  1. src/axolotl/utils/trainer.py +1 -1
src/axolotl/utils/trainer.py CHANGED
@@ -361,7 +361,7 @@ def add_position_ids(sample):
361
 
362
 
363
  def drop_long_seq(sample, sequence_len=2048):
364
- return len(sample["input_ids"]) <= sequence_len
365
 
366
 
367
  @contextmanager
 
361
 
362
 
363
  def drop_long_seq(sample, sequence_len=2048):
364
+ return len(sample["input_ids"]) <= sequence_len and len(sample["input_ids"]) > 0
365
 
366
 
367
  @contextmanager