Nanobit commited on
Commit
231031a
2 Parent(s): 5daf7d5 5491278

Merge pull request #275 from NanoCode012/feat/safetensors

Browse files
Files changed (2) hide show
  1. README.md +3 -0
  2. src/axolotl/utils/trainer.py +3 -0
README.md CHANGED
@@ -413,6 +413,9 @@ logging_steps:
413
  save_steps:
414
  eval_steps:
415
 
 
 
 
416
  # whether to mask out or include the human's prompt from the training labels
417
  train_on_inputs: false
418
  # don't use this, leads to wonky training (according to someone on the internet)
 
413
  save_steps:
414
  eval_steps:
415
 
416
+ # save model as safetensors (require safetensors package)
417
+ save_safetensors:
418
+
419
  # whether to mask out or include the human's prompt from the training labels
420
  train_on_inputs: false
421
  # don't use this, leads to wonky training (according to someone on the internet)
src/axolotl/utils/trainer.py CHANGED
@@ -182,6 +182,9 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer):
182
  training_arguments_kwargs["hub_model_id"] = cfg.hub_model_id
183
  training_arguments_kwargs["push_to_hub"] = True
184
 
 
 
 
185
  training_args = AxolotlTrainingArguments(
186
  per_device_train_batch_size=cfg.micro_batch_size,
187
  per_device_eval_batch_size=cfg.eval_batch_size
 
182
  training_arguments_kwargs["hub_model_id"] = cfg.hub_model_id
183
  training_arguments_kwargs["push_to_hub"] = True
184
 
185
+ if cfg.save_safetensors:
186
+ training_arguments_kwargs["save_safetensors"] = cfg.save_safetensors
187
+
188
  training_args = AxolotlTrainingArguments(
189
  per_device_train_batch_size=cfg.micro_batch_size,
190
  per_device_eval_batch_size=cfg.eval_batch_size