Clean readme update script

#2
by Wauplin HF staff - opened
Files changed (1) hide show
  1. src/readme_update.py +16 -22
src/readme_update.py CHANGED
@@ -1,14 +1,10 @@
1
  import os
2
  from datetime import datetime
3
- from pathlib import Path
4
- from shutil import rmtree
5
 
6
  import pytz
7
- from huggingface_hub import HfApi, Repository
8
 
9
  GENERATED_BELOW_MARKER = "--- Generated Part of README Below ---"
10
- hf_token = os.environ["HUGGINGFACE_AUTH_TOKEN"]
11
- local_repo_path = "./readme_repo"
12
 
13
 
14
  def update_dataset_readme(dataset_name: str, subreddit: str, new_rows: int) -> None:
@@ -19,31 +15,29 @@ def update_dataset_readme(dataset_name: str, subreddit: str, new_rows: int) -> N
19
  dataset_name (str): Name of the dataset repository.
20
  subreddit (str): Name of the subreddit being used for dataset creation.
21
  new_rows (int): Number of new rows added in the latest update.
22
- hf_token (str): Hugging Face authentication token.
23
- local_repo_path (str): Local path to clone the repository.
24
  """
25
  # Initialize HfApi
26
  api = HfApi()
 
 
 
27
 
28
- if Path(local_repo_path).exists():
29
- rmtree(local_repo_path)
30
-
31
- # Clone the repository locally
32
- repo = Repository(local_repo_path, clone_from=dataset_name, repo_type='dataset', use_auth_token=hf_token)
33
-
34
- # Read the README file
35
- with open(f"{local_repo_path}/README.md", "r") as file:
36
  old_readme = file.read()
37
 
38
- # Modify the README
39
  new_readme = append_to_readme(subreddit=subreddit, new_rows=new_rows, old_readme=old_readme)
40
 
41
- # Write the updated README back to the repository
42
- with open(f"{local_repo_path}/README.md", "w") as file:
43
- file.write(new_readme)
44
-
45
- # Push the changes
46
- repo.push_to_hub(blocking=True, commit_message=f'Pushing {new_rows} new rows')
 
 
 
47
 
48
 
49
  def append_to_readme(subreddit: str, new_rows: int, old_readme: str) -> str:
 
1
  import os
2
  from datetime import datetime
 
 
3
 
4
  import pytz
5
+ from huggingface_hub import HfApi
6
 
7
  GENERATED_BELOW_MARKER = "--- Generated Part of README Below ---"
 
 
8
 
9
 
10
  def update_dataset_readme(dataset_name: str, subreddit: str, new_rows: int) -> None:
 
15
  dataset_name (str): Name of the dataset repository.
16
  subreddit (str): Name of the subreddit being used for dataset creation.
17
  new_rows (int): Number of new rows added in the latest update.
 
 
18
  """
19
  # Initialize HfApi
20
  api = HfApi()
21
+
22
+ # Download README file
23
+ readme_path = api.hf_hub_download(repo_id=dataset_name, repo_type="dataset", filename="README.md", token=hf_token)
24
 
25
+ # Read it
26
+ with open(readme_path, "r") as file:
 
 
 
 
 
 
27
  old_readme = file.read()
28
 
29
+ # Modify it
30
  new_readme = append_to_readme(subreddit=subreddit, new_rows=new_rows, old_readme=old_readme)
31
 
32
+ # Commit modifications
33
+ api.upload_file(
34
+ path_or_fileobj=new_readme.encode(),
35
+ path_in_repo="README.md",
36
+ repo_id=dataset_name,
37
+ repo_type="dataset",
38
+ token=hf_token,
39
+ commit_message=f'Pushing {new_rows} new rows'
40
+ )
41
 
42
 
43
  def append_to_readme(subreddit: str, new_rows: int, old_readme: str) -> str: