winglian commited on
Commit
e7aa7b1
1 Parent(s): e5bb22a

gracefully handle length feature used for group by (#565)

Browse files
Files changed (1) hide show
  1. src/axolotl/utils/dataloader.py +2 -0
src/axolotl/utils/dataloader.py CHANGED
@@ -223,6 +223,8 @@ class MultipackDistributedDataloader:
223
  concatenated = {}
224
  batched_data = [self.dataset[batch_idx] for batch_idx in batch]
225
  for feature in features:
 
 
226
  if feature == "attention_mask":
227
  arrays = [
228
  (attn_mask_cum_idx + idx + 1) * np.array(item[feature])
 
223
  concatenated = {}
224
  batched_data = [self.dataset[batch_idx] for batch_idx in batch]
225
  for feature in features:
226
+ if feature == "length":
227
+ continue
228
  if feature == "attention_mask":
229
  arrays = [
230
  (attn_mask_cum_idx + idx + 1) * np.array(item[feature])