Skip to content

Commit b5c80e0

Browse files
committed
added check function for process script
1 parent afdbbea commit b5c80e0

File tree

3 files changed

+20
-1
lines changed

3 files changed

+20
-1
lines changed

scripts/1-fetch/wikipedia_fetch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def check_for_completion():
6767
try:
6868
with open(FILE_LANGUAGES, "r", newline="") as file_obj:
6969
reader = csv.DictReader(file_obj, dialect="unix")
70-
if len(list(reader)) > 0:
70+
if len(list(reader)) > 300:
7171
raise shared.QuantifyingException(
7272
f"Data fetch completed for {QUARTER}", 0
7373
)

scripts/2-process/github_process.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,13 @@ def parse_arguments():
5959
return args
6060

6161

62+
def check_for_data_file(file_path):
63+
if os.path.exists(file_path):
64+
raise shared.QuantifyingException(
65+
f"Processed data already exists for {QUARTER}", 0
66+
)
67+
68+
6269
def data_to_csv(args, data, file_path):
6370
if not args.enable_save:
6471
return
@@ -91,6 +98,7 @@ def process_totals_by_license(args, count_data):
9198
file_path = shared.path_join(
9299
PATHS["data_phase"], "github_totals_by_license.csv"
93100
)
101+
check_for_data_file(file_path)
94102
data_to_csv(args, data, file_path)
95103

96104

@@ -125,6 +133,7 @@ def process_totals_by_restriction(args, count_data):
125133
file_path = shared.path_join(
126134
PATHS["data_phase"], "github_totals_by_restriction.csv"
127135
)
136+
check_for_data_file(file_path)
128137
data_to_csv(args, data, file_path)
129138

130139

scripts/2-process/wikipedia_process.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ def parse_arguments():
6363
return args
6464

6565

66+
def check_for_data_file(file_path):
67+
if os.path.exists(file_path):
68+
raise shared.QuantifyingException(
69+
f"Processed data already exists for {QUARTER}", 0
70+
)
71+
72+
6673
def data_to_csv(args, data, file_path):
6774
if not args.enable_save:
6875
return
@@ -91,6 +98,7 @@ def process_highest_language_usage(args, count_data):
9198
file_path = shared.path_join(
9299
PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
93100
)
101+
check_for_data_file(file_path)
94102
data_to_csv(args, top_10, file_path)
95103

96104

@@ -114,6 +122,7 @@ def process_least_language_usage(args, count_data):
114122
file_path = shared.path_join(
115123
PATHS["data_phase"], "wikipedia_least_language_usage.csv"
116124
)
125+
check_for_data_file(file_path)
117126
data_to_csv(args, bottom_10, file_path)
118127

119128

@@ -140,6 +149,7 @@ def process_language_representation(args, count_data):
140149
file_path = shared.path_join(
141150
PATHS["data_phase"], "wikipedia_language_representation.csv"
142151
)
152+
check_for_data_file(file_path)
143153
data_to_csv(args, language_counts, file_path)
144154

145155

0 commit comments

Comments
 (0)