The Objective Dad commited on
Commit
6d57f2f
1 Parent(s): 20ed4c1

ergonomic update to optimizer config doc (#548)

Browse files
Files changed (1) hide show
  1. README.md +24 -0
README.md CHANGED
@@ -560,6 +560,30 @@ log_sweep_min_lr:
560
  log_sweep_max_lr:
561
 
562
  # specify optimizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  optimizer:
564
  # specify weight decay
565
  weight_decay:
 
560
  log_sweep_max_lr:
561
 
562
  # specify optimizer
563
+ # Valid values are driven by the Transformers OptimizerNames class, see:
564
+ # https://github.com/huggingface/transformers/blob/95b374952dc27d8511541d6f5a4e22c9ec11fb24/src/transformers/training_args.py#L134
565
+ #
566
+ # Note that not all optimizers may be available in your environment, ex: 'adamw_anyprecision' is part of
567
+ # torchdistx, 'adamw_bnb_8bit' is part of bnb.optim.Adam8bit, etc. When in doubt, it is recommended to start with the optimizer used
568
+ # in the examples/ for your model and fine-tuning use case.
569
+ #
570
+ # Valid values for 'optimizer' include:
571
+ # - adamw_hf
572
+ # - adamw_torch
573
+ # - adamw_torch_fused
574
+ # - adamw_torch_xla
575
+ # - adamw_apex_fused
576
+ # - adafactor
577
+ # - adamw_anyprecision
578
+ # - sgd
579
+ # - adagrad
580
+ # - adamw_bnb_8bit
581
+ # - lion_8bit
582
+ # - lion_32bit
583
+ # - paged_adamw_32bit
584
+ # - paged_adamw_8bit
585
+ # - paged_lion_32bit
586
+ # - paged_lion_8bit
587
  optimizer:
588
  # specify weight decay
589
  weight_decay: