winglian commited on
Commit
f1f60cb
1 Parent(s): 450e04d

Flash attn hotfix (#951)

Browse files

* use previous arg

* use eager to use legacy attention that can be patched

Files changed (1) hide show
  1. src/axolotl/utils/models.py +4 -0
src/axolotl/utils/models.py CHANGED
@@ -324,6 +324,10 @@ def load_model(
324
  model_config._attn_implementation = ( # pylint: disable=protected-access
325
  "flash_attention_2"
326
  )
 
 
 
 
327
 
328
  try:
329
  if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq:
 
324
  model_config._attn_implementation = ( # pylint: disable=protected-access
325
  "flash_attention_2"
326
  )
327
+ else:
328
+ model_config._attn_implementation = ( # pylint: disable=protected-access
329
+ "eager"
330
+ )
331
 
332
  try:
333
  if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq: