Skip to content

Commit 8f4f079

Browse files
committed
Added the parser argument --force for regenerating processed files
1 parent 241634a commit 8f4f079

File tree

3 files changed

+27
-11
lines changed

3 files changed

+27
-11
lines changed

scripts/2-process/gcs_process.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,12 @@ def parse_arguments():
5959
parser.add_argument(
6060
"--enable-git",
6161
action="store_true",
62-
help="Enable git actions such as fetch, merge, add, commit, and push"
63-
" (default: False)",
62+
help="Enable git actions such as fetch, merge, add, commit, and push",
63+
)
64+
parser.add_argument(
65+
"--force",
66+
action="store_true",
67+
help="Regenerate data even if processed files already exist",
6468
)
6569
args = parser.parse_args()
6670
if not args.enable_save and args.enable_git:
@@ -73,9 +77,9 @@ def parse_arguments():
7377
return args
7478

7579

76-
def check_for_data_files(file_paths):
80+
def check_for_data_files(args, file_paths):
7781
for path in file_paths:
78-
if os.path.exists(path):
82+
if os.path.exists(path) and not args.force:
7983
raise shared.QuantifyingException(
8084
f"Processed data already exists for {QUARTER}", 0
8185
)
@@ -327,7 +331,7 @@ def main():
327331
args = parse_arguments()
328332
shared.paths_log(LOGGER, PATHS)
329333
shared.git_fetch_and_merge(args, PATHS["repo"])
330-
check_for_data_files(FILE_PATHS)
334+
check_for_data_files(args, FILE_PATHS)
331335

332336
# Count data
333337
file1_count = shared.path_join(PATHS["data_1-fetch"], "gcs_1_count.csv")

scripts/2-process/github_process.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ def parse_arguments():
5252
help="Enable git actions such as fetch, merge, add, commit, and push"
5353
" (default: False)",
5454
)
55+
parser.add_argument(
56+
"--force",
57+
action="store_true",
58+
help="Regenerate data even if processed files already exist",
59+
)
60+
5561
args = parser.parse_args()
5662
if not args.enable_save and args.enable_git:
5763
parser.error("--enable-git requires --enable-save")
@@ -63,9 +69,9 @@ def parse_arguments():
6369
return args
6470

6571

66-
def check_for_data_files(file_paths):
72+
def check_for_data_files(args, file_paths):
6773
for path in file_paths:
68-
if os.path.exists(path):
74+
if os.path.exists(path) and not args.force:
6975
raise shared.QuantifyingException(
7076
f"Processed data already exists for {QUARTER}", 0
7177
)
@@ -144,7 +150,7 @@ def main():
144150
args = parse_arguments()
145151
shared.paths_log(LOGGER, PATHS)
146152
shared.git_fetch_and_merge(args, PATHS["repo"])
147-
check_for_data_files(FILE_PATHS)
153+
check_for_data_files(args, FILE_PATHS)
148154
file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
149155
count_data = shared.open_data_file(
150156
LOGGER, file_count, usecols=["TOOL_IDENTIFIER", "COUNT"]

scripts/2-process/wikipedia_process.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ def parse_arguments():
6363
help="Enable git actions such as fetch, merge, add, commit, and push"
6464
" (default: False)",
6565
)
66+
parser.add_argument(
67+
"--force",
68+
action="store_true",
69+
help="Regenerate data even if processed files already exist",
70+
)
71+
6672
args = parser.parse_args()
6773
if not args.enable_save and args.enable_git:
6874
parser.error("--enable-git requires --enable-save")
@@ -74,9 +80,9 @@ def parse_arguments():
7480
return args
7581

7682

77-
def check_for_data_files(file_paths):
83+
def check_for_data_files(args, file_paths):
7884
for path in file_paths:
79-
if os.path.exists(path):
85+
if os.path.exists(path) and not args.force:
8086
raise shared.QuantifyingException(
8187
f"Processed data already exists for {QUARTER}", 0
8288
)
@@ -166,7 +172,7 @@ def main():
166172
args = parse_arguments()
167173
shared.paths_log(LOGGER, PATHS)
168174
shared.git_fetch_and_merge(args, PATHS["repo"])
169-
check_for_data_files(FILE_PATHS)
175+
check_for_data_files(args, FILE_PATHS)
170176
file_count = shared.path_join(
171177
PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv"
172178
)

0 commit comments

Comments
 (0)