Nanobit commited on
Commit
cb18856
2 Parent(s): 87dffbc 8626b54

Merge pull request #85 from NanoCode012/fix/add-dataset-shard-readme

Browse files
Files changed (1) hide show
  1. README.md +4 -0
README.md CHANGED
@@ -209,6 +209,10 @@ dataset_prepared_path: data/last_run_prepared
209
  push_dataset_to_hub: # repo path
210
  # How much of the dataset to set aside as evaluation. 1 = 100%, 0.50 = 50%, etc
211
  val_set_size: 0.04
 
 
 
 
212
 
213
  # the maximum length of an input to train with, this should typically be less than 2048
214
  # as most models have a token/context limit of 2048
 
209
  push_dataset_to_hub: # repo path
210
  # How much of the dataset to set aside as evaluation. 1 = 100%, 0.50 = 50%, etc
211
  val_set_size: 0.04
212
+ # Num shards for whole dataset
213
+ dataset_shard_num:
214
+ # Index of shard to use for whole dataset
215
+ dataset_shard_idx:
216
 
217
  # the maximum length of an input to train with, this should typically be less than 2048
218
  # as most models have a token/context limit of 2048