winglian commited on
Commit
4f2584f
1 Parent(s): 8d43785

shuffle and split dataset after save/load

Browse files
FAQS.md CHANGED
@@ -1,4 +1,4 @@
1
  # FAQs
2
 
3
  - Can you train StableLM with this? Yes, but only with a single GPU atm. Multi GPU support is coming soon! Just waiting on this [PR](https://github.com/huggingface/transformers/pull/22874)
4
- -
 
1
  # FAQs
2
 
3
  - Can you train StableLM with this? Yes, but only with a single GPU atm. Multi GPU support is coming soon! Just waiting on this [PR](https://github.com/huggingface/transformers/pull/22874)
4
+ - Will this work with Deepspeed? That's still a WIP, but setting `export ACCELERATE_USE_DEEPSPEED=true` should work in some cases
ds_config.json CHANGED
@@ -11,11 +11,10 @@
11
  "min_loss_scale": 1
12
  },
13
  "scheduler": {
14
- "type": "WarmupLR",
15
  "params": {
16
- "warmup_min_lr": "auto",
17
- "warmup_max_lr": "auto",
18
- "warmup_num_steps": "auto"
19
  }
20
  },
21
  "zero_optimization": {
@@ -25,7 +24,8 @@
25
  "allgather_bucket_size": 5e8,
26
  "contiguous_gradients": true,
27
  "reduce_bucket_size": "auto",
28
- "reduce_scatter": true
 
29
  },
30
  "gradient_accumulation_steps": "auto",
31
  "gradient_clipping": "auto",
 
11
  "min_loss_scale": 1
12
  },
13
  "scheduler": {
14
+ "type": "OneCycle",
15
  "params": {
16
+ "cycle_min_lr": 1e-7,
17
+ "cycle_max_lr": 1e-4
 
18
  }
19
  },
20
  "zero_optimization": {
 
24
  "allgather_bucket_size": 5e8,
25
  "contiguous_gradients": true,
26
  "reduce_bucket_size": "auto",
27
+ "reduce_scatter": true,
28
+ "stage3_gather_16bit_weights_on_model_save": true
29
  },
30
  "gradient_accumulation_steps": "auto",
31
  "gradient_clipping": "auto",
src/axolotl/utils/data.py CHANGED
@@ -119,16 +119,15 @@ def load_prepare_datasets(tokenizer, cfg, default_dataset_prepared_path):
119
  seq_length=max_packed_sequence_len,
120
  )
121
  logging.info("merging, packing, shuffling, and splitting master dataset")
122
- # TODO don't split dataset here, shuffle and save first, then split, that way we can
123
- # re-split when loading again
124
- dataset = Dataset.from_list([_ for _ in constant_len_dataset]).train_test_split(
125
- test_size=cfg.val_set_size, shuffle=True, seed=42
126
- )
127
 
128
  if cfg.local_rank == 0:
129
  logging.info(f"Saving prepared dataset to disk... {prepared_ds_path}")
130
  dataset.save_to_disk(prepared_ds_path)
131
 
 
 
 
132
  train_dataset = dataset["train"]
133
  eval_dataset = dataset["test"]
134
 
 
119
  seq_length=max_packed_sequence_len,
120
  )
121
  logging.info("merging, packing, shuffling, and splitting master dataset")
122
+ dataset = Dataset.from_list([_ for _ in constant_len_dataset]).shuffle(seed=42)
 
 
 
 
123
 
124
  if cfg.local_rank == 0:
125
  logging.info(f"Saving prepared dataset to disk... {prepared_ds_path}")
126
  dataset.save_to_disk(prepared_ds_path)
127
 
128
+ dataset = dataset.train_test_split(
129
+ test_size=cfg.val_set_size, shuffle=False
130
+ )
131
  train_dataset = dataset["train"]
132
  eval_dataset = dataset["test"]
133
 
src/axolotl/utils/models.py CHANGED
@@ -75,7 +75,7 @@ def load_model(
75
  snapshot_download_kwargs = {}
76
  if cfg.base_model_ignore_patterns:
77
  snapshot_download_kwargs["ignore_patterns"] = cfg.base_model_ignore_patterns
78
- cache_model_path = Path(snapshot_download(base_model, ** snapshot_download_kwargs))
79
  files = (
80
  list(cache_model_path.glob("*.pt"))
81
  + list(cache_model_path.glob("*.safetensors"))
 
75
  snapshot_download_kwargs = {}
76
  if cfg.base_model_ignore_patterns:
77
  snapshot_download_kwargs["ignore_patterns"] = cfg.base_model_ignore_patterns
78
+ cache_model_path = Path(snapshot_download(base_model, **snapshot_download_kwargs))
79
  files = (
80
  list(cache_model_path.glob("*.pt"))
81
  + list(cache_model_path.glob("*.safetensors"))