tmm1 commited on
Commit
a213d99
1 Parent(s): fbf49a4
src/axolotl/monkeypatch/llama_attn_hijack_flash.py CHANGED
@@ -155,12 +155,10 @@ def flashattn_forward(
155
  # during training q,k,v always have same seqlen
156
  assert key_states.shape == query_states.shape
157
  is_causal = True
158
- elif past_key_value is None:
159
- is_causal = True
160
  else:
161
  # turn off FA causal mask after first inference autoregressive iteration
162
  # only on first autoregressive step q,k,v have same seqlen
163
- is_causal = past_key_value is not None
164
 
165
  if cu_seqlens is not None and max_seqlen is not None:
166
  # special handling using sample packing
 
155
  # during training q,k,v always have same seqlen
156
  assert key_states.shape == query_states.shape
157
  is_causal = True
 
 
158
  else:
159
  # turn off FA causal mask after first inference autoregressive iteration
160
  # only on first autoregressive step q,k,v have same seqlen
161
+ is_causal = key_states.shape == query_states.shape
162
 
163
  if cu_seqlens is not None and max_seqlen is not None:
164
  # special handling using sample packing