winglian commited on
Commit
90036eb
1 Parent(s): 9032e61

optimize calculation of cu_seqlens from position_ids (#1084) [skip ci]

Browse files
Files changed (1) hide show
  1. src/axolotl/monkeypatch/utils.py +2 -1
src/axolotl/monkeypatch/utils.py CHANGED
@@ -55,6 +55,7 @@ def get_cu_seqlens(attn_mask):
55
  return torch.stack(results).to(dtype=torch.int32), torch.stack(max_seq_lens)
56
 
57
 
 
58
  def get_cu_seqlens_from_pos_ids(position_ids):
59
  """generate a cumulative sequence length mask for flash attention using pos ids"""
60
  if len(position_ids.shape) == 1:
@@ -81,7 +82,7 @@ def get_cu_seqlens_from_pos_ids(position_ids):
81
  # Get the indices where the sequence starts
82
  start_indices = torch.cat(
83
  [
84
- (seq_starts).nonzero(as_tuple=True)[0],
85
  torch.tensor([len(adjusted_row)], dtype=torch.int32, device=device),
86
  ]
87
  )
 
55
  return torch.stack(results).to(dtype=torch.int32), torch.stack(max_seq_lens)
56
 
57
 
58
+ @torch.jit.script
59
  def get_cu_seqlens_from_pos_ids(position_ids):
60
  """generate a cumulative sequence length mask for flash attention using pos ids"""
61
  if len(position_ids.shape) == 1:
 
82
  # Get the indices where the sequence starts
83
  start_indices = torch.cat(
84
  [
85
+ torch.nonzero(seq_starts).unbind(dim=1)[0],
86
  torch.tensor([len(adjusted_row)], dtype=torch.int32, device=device),
87
  ]
88
  )