Skip to content

Commit 8b1441f

Browse files
committed
Consolidated checks function
1 parent d2a80ee commit 8b1441f

File tree

7 files changed

+21
-63
lines changed

7 files changed

+21
-63
lines changed

scripts/2-process/gcs_process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ def main():
317317
args = parse_arguments()
318318
shared.paths_log(LOGGER, PATHS)
319319
shared.git_fetch_and_merge(args, PATHS["repo"])
320-
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
320+
shared.check_completion_file_exists(args, FILE_PATHS)
321321

322322
# Count data
323323
file1_count = shared.path_join(PATHS["data_1-fetch"], "gcs_1_count.csv")

scripts/2-process/github_process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def main():
136136
args = parse_arguments()
137137
shared.paths_log(LOGGER, PATHS)
138138
shared.git_fetch_and_merge(args, PATHS["repo"])
139-
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
139+
shared.check_completion_file_exists(args, FILE_PATHS)
140140
file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
141141
count_data = shared.open_data_file(
142142
LOGGER, file_count, usecols=["TOOL_IDENTIFIER", "COUNT"]

scripts/2-process/wikipedia_process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def main():
158158
args = parse_arguments()
159159
shared.paths_log(LOGGER, PATHS)
160160
shared.git_fetch_and_merge(args, PATHS["repo"])
161-
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
161+
shared.check_completion_file_exists(args, FILE_PATHS)
162162
file_count = shared.path_join(
163163
PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv"
164164
)

scripts/3-report/gcs_report.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
QUARTER = os.path.basename(PATHS["data_quarter"])
3131
SECTION_FILE = Path(__file__).name
3232
SECTION_TITLE = "Google Custom Search (GCS)"
33+
LAST_ENTRY = shared.path_join(PATHS["data_phase"], "gcs_free_culture.png")
3334

3435

3536
def parse_arguments():
@@ -75,23 +76,6 @@ def parse_arguments():
7576
return args
7677

7778

78-
def check_report_completion(args):
79-
""" "
80-
The function checks for the last plot and image
81-
caption created in this script. This helps to
82-
immediately know if all plots in the script have
83-
been created and should not be regenerated.
84-
85-
"""
86-
if args.force:
87-
return
88-
last_entry = shared.path_join(PATHS["data_phase"], "gcs_free_culture.png")
89-
if os.path.exists(last_entry):
90-
raise shared.QuantifyingException(
91-
f"{last_entry} already exists. Report script completed", 0
92-
)
93-
94-
9579
def gcs_intro(args):
9680
"""
9781
Write Google Custom Search (GCS) introduction.
@@ -513,7 +497,7 @@ def main():
513497
args = parse_arguments()
514498
shared.paths_log(LOGGER, PATHS)
515499
shared.git_fetch_and_merge(args, PATHS["repo"])
516-
check_report_completion(args)
500+
shared.check_completion_file_exists(args, LAST_ENTRY)
517501
gcs_intro(args)
518502
plot_products(args)
519503
plot_tool_status(args)

scripts/3-report/github_report.py

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
QUARTER = os.path.basename(PATHS["data_quarter"])
2929
SECTION_FILE = Path(__file__).name
3030
SECTION_TITLE = "Github"
31+
LAST_ENTRY = shared.path_join(PATHS["data_phase"], "github_restriction.png")
3132

3233

3334
def parse_arguments():
@@ -78,25 +79,6 @@ def parse_arguments():
7879
return args
7980

8081

81-
def check_report_completion(args):
82-
""" "
83-
The function checks for the last plot and image
84-
caption created in this script. This helps to
85-
immediately know if all plots in the script have
86-
been created and should not be regenerated.
87-
88-
"""
89-
if args.force:
90-
return
91-
last_entry = shared.path_join(
92-
PATHS["data_phase"], "github_restriction.png"
93-
)
94-
if os.path.exists(last_entry):
95-
raise shared.QuantifyingException(
96-
f"{last_entry} already exists. Report script completed", 0
97-
)
98-
99-
10082
def load_data(args):
10183
"""
10284
Load the collected data from the CSV file.
@@ -267,7 +249,7 @@ def main():
267249
args = parse_arguments()
268250
shared.paths_log(LOGGER, PATHS)
269251
shared.git_fetch_and_merge(args, PATHS["repo"])
270-
check_report_completion(args)
252+
shared.check_completion_file_exists(args, LAST_ENTRY)
271253
github_intro(args)
272254
plot_totals_by_license_type(args)
273255
plot_totals_by_restriction(args)

scripts/3-report/wikipedia_report.py

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828
QUARTER = os.path.basename(PATHS["data_quarter"])
2929
SECTION_FILE = Path(__file__).name
3030
SECTION_TITLE = "Wikipedia"
31+
LAST_ENTRY = shared.path_join(
32+
PATHS["data_phase"], "wikipedia_least_language_usage.png"
33+
)
3134

3235

3336
def parse_arguments():
@@ -73,25 +76,6 @@ def parse_arguments():
7376
return args
7477

7578

76-
def check_report_completion(args):
77-
""" "
78-
The function checks for the last plot and image
79-
caption created in this script. This helps to
80-
immediately know if all plots in the script have
81-
been created and should not be regenerated.
82-
83-
"""
84-
if args.force:
85-
return
86-
last_entry = shared.path_join(
87-
PATHS["data_phase"], "wikipedia_least_language_usage.png"
88-
)
89-
if os.path.exists(last_entry):
90-
raise shared.QuantifyingException(
91-
f"{last_entry} already exists. Report script completed", 0
92-
)
93-
94-
9579
def wikipedia_intro(args):
9680
"""
9781
Write Wikipedia introduction.
@@ -285,7 +269,7 @@ def main():
285269
args = parse_arguments()
286270
shared.paths_log(LOGGER, PATHS)
287271
shared.git_fetch_and_merge(args, PATHS["repo"])
288-
check_report_completion(args)
272+
shared.check_completion_file_exists(args, LAST_ENTRY)
289273
wikipedia_intro(args)
290274
plot_language_representation(args)
291275
plot_highest_language_usage(args)

scripts/shared.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,21 @@ def data_to_csv(args, data, file_path):
4747
)
4848

4949

50-
def check_for_data_files(args, file_paths, QUARTER):
50+
def check_completion_file_exists(args, file_paths):
51+
""" "
52+
This function checks if expected output files
53+
exists. If any exist and --force is not provided,
54+
the script exits early by raising a QuantifyingException.
55+
In the case of a report file, we check if last output exists.
56+
"""
5157
if args.force:
5258
return
59+
if isinstance(file_paths, str):
60+
file_paths = [file_paths]
5361
for path in file_paths:
5462
if os.path.exists(path):
5563
raise QuantifyingException(
56-
f"Processed data already exists for {QUARTER}", 0
64+
f"Output files already exists for {args.quarter}", 0
5765
)
5866

5967

0 commit comments

Comments
 (0)