Skip to content

Commit 7eba804

Browse files
committed
Made review changes
1 parent 80b6475 commit 7eba804

File tree

4 files changed

+39
-21
lines changed

4 files changed

+39
-21
lines changed

scripts/2-process/gcs_process.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,17 @@
2727

2828
# Constants
2929
QUARTER = os.path.basename(PATHS["data_quarter"])
30+
FILE_PATHS = [
31+
shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv"),
32+
shared.path_join(PATHS["data_phase"], "gcs_status_combined_totals.csv"),
33+
shared.path_join(PATHS["data_phase"], "gcs_status_lastest_totals.csv"),
34+
shared.path_join(PATHS["data_phase"], "gcs_status_prior_totals.csv"),
35+
shared.path_join(PATHS["data_phase"], "gcs_status_retired_totals.csv"),
36+
shared.path_join(PATHS["data_phase"], "gcs_totals_by_country.csv"),
37+
shared.path_join(PATHS["data_phase"], "gcs_totals_by_free_cultural.csv"),
38+
shared.path_join(PATHS["data_phase"], "gcs_totals_by_language.csv"),
39+
shared.path_join(PATHS["data_phase"], "gcs_totals_by_restrictions.csv"),
40+
]
3041

3142

3243
def parse_arguments():
@@ -62,7 +73,7 @@ def parse_arguments():
6273
return args
6374

6475

65-
def check_for_data_file(file_path):
76+
def check_for_data_files(file_path):
6677
if os.path.exists(file_path):
6778
raise shared.QuantifyingException(
6879
f"Processed data already exists for {QUARTER}", 0
@@ -118,7 +129,6 @@ def process_product_totals(args, count_data):
118129
data.items(), columns=["CC legal tool product", "Count"]
119130
)
120131
file_path = shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv")
121-
check_for_data_file(file_path)
122132
data_to_csv(args, data, file_path)
123133

124134

@@ -200,8 +210,7 @@ def process_latest_prior_retired_totals(args, count_data):
200210
file_path = shared.path_join(
201211
PATHS["data_phase"], f"gcs_status_{key}_totals.csv"
202212
)
203-
check_for_data_file(file_path)
204-
data_to_csv(args, dataframe, file_path)
213+
data_to_csv(args, dataframe, file_path)
205214

206215

207216
def process_totals_by_free_cultural(args, count_data):
@@ -234,7 +243,6 @@ def process_totals_by_free_cultural(args, count_data):
234243
file_path = shared.path_join(
235244
PATHS["data_phase"], "gcs_totals_by_free_cultural.csv"
236245
)
237-
check_for_data_file(file_path)
238246
data_to_csv(args, data, file_path)
239247

240248

@@ -269,7 +277,6 @@ def process_totals_by_restrictions(args, count_data):
269277
file_path = shared.path_join(
270278
PATHS["data_phase"], "gcs_totals_by_restrictions.csv"
271279
)
272-
check_for_data_file(file_path)
273280
data_to_csv(args, data, file_path)
274281

275282

@@ -291,7 +298,6 @@ def process_totals_by_language(args, data):
291298
file_path = shared.path_join(
292299
PATHS["data_phase"], "gcs_totals_by_language.csv"
293300
)
294-
check_for_data_file(file_path)
295301
data_to_csv(args, data, file_path)
296302

297303

@@ -313,14 +319,14 @@ def process_totals_by_country(args, data):
313319
file_path = shared.path_join(
314320
PATHS["data_phase"], "gcs_totals_by_country.csv"
315321
)
316-
check_for_data_file(file_path)
317322
data_to_csv(args, data, file_path)
318323

319324

320325
def main():
321326
args = parse_arguments()
322327
shared.paths_log(LOGGER, PATHS)
323328
shared.git_fetch_and_merge(args, PATHS["repo"])
329+
check_for_data_files(FILE_PATHS)
324330

325331
# Count data
326332
file1_count = shared.path_join(PATHS["data_1-fetch"], "gcs_1_count.csv")

scripts/2-process/github_process.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424

2525
# Constants
2626
QUARTER = os.path.basename(PATHS["data_quarter"])
27+
FILE_PATHS = [
28+
shared.path_join(PATHS["data_phase"], "github_totals_by_license.csv"),
29+
shared.path_join(PATHS["data_phase"], "github_totals_by_restriction.csv"),
30+
]
2731

2832

2933
def parse_arguments():
@@ -59,7 +63,7 @@ def parse_arguments():
5963
return args
6064

6165

62-
def check_for_data_file(file_path):
66+
def check_for_data_files(file_path):
6367
if os.path.exists(file_path):
6468
raise shared.QuantifyingException(
6569
f"Processed data already exists for {QUARTER}", 0
@@ -98,7 +102,6 @@ def process_totals_by_license(args, count_data):
98102
file_path = shared.path_join(
99103
PATHS["data_phase"], "github_totals_by_license.csv"
100104
)
101-
check_for_data_file(file_path)
102105
data_to_csv(args, data, file_path)
103106

104107

@@ -133,15 +136,14 @@ def process_totals_by_restriction(args, count_data):
133136
file_path = shared.path_join(
134137
PATHS["data_phase"], "github_totals_by_restriction.csv"
135138
)
136-
check_for_data_file(file_path)
137139
data_to_csv(args, data, file_path)
138140

139141

140142
def main():
141143
args = parse_arguments()
142144
shared.paths_log(LOGGER, PATHS)
143145
shared.git_fetch_and_merge(args, PATHS["repo"])
144-
146+
check_for_data_files(FILE_PATHS)
145147
file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
146148
count_data = shared.open_data_file(
147149
LOGGER, file_count, usecols=["TOOL_IDENTIFIER", "COUNT"]

scripts/2-process/wikipedia_process.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,17 @@
2828

2929
# Constants
3030
QUARTER = os.path.basename(PATHS["data_quarter"])
31+
FILE_PATHS = [
32+
shared.path_join(
33+
PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
34+
),
35+
shared.path_join(
36+
PATHS["data_phase"], "wikipedia_least_language_usage.csv"
37+
),
38+
shared.path_join(
39+
PATHS["data_phase"], "wikipedia_language_representation.csv"
40+
),
41+
]
3142

3243

3344
def parse_arguments():
@@ -63,7 +74,7 @@ def parse_arguments():
6374
return args
6475

6576

66-
def check_for_data_file(file_path):
77+
def check_for_data_files(file_path):
6778
if os.path.exists(file_path):
6879
raise shared.QuantifyingException(
6980
f"Processed data already exists for {QUARTER}", 0
@@ -98,7 +109,6 @@ def process_highest_language_usage(args, count_data):
98109
file_path = shared.path_join(
99110
PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
100111
)
101-
check_for_data_file(file_path)
102112
data_to_csv(args, top_10, file_path)
103113

104114

@@ -122,7 +132,6 @@ def process_least_language_usage(args, count_data):
122132
file_path = shared.path_join(
123133
PATHS["data_phase"], "wikipedia_least_language_usage.csv"
124134
)
125-
check_for_data_file(file_path)
126135
data_to_csv(args, bottom_10, file_path)
127136

128137

@@ -149,14 +158,14 @@ def process_language_representation(args, count_data):
149158
file_path = shared.path_join(
150159
PATHS["data_phase"], "wikipedia_language_representation.csv"
151160
)
152-
check_for_data_file(file_path)
153161
data_to_csv(args, language_counts, file_path)
154162

155163

156164
def main():
157165
args = parse_arguments()
158166
shared.paths_log(LOGGER, PATHS)
159167
shared.git_fetch_and_merge(args, PATHS["repo"])
168+
check_for_data_files(FILE_PATHS)
160169
file_count = shared.path_join(
161170
PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv"
162171
)

scripts/shared.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -283,14 +283,15 @@ def update_readme(
283283
image_caption,
284284
entry_text=None,
285285
):
286+
"""
287+
Update the README.md file with the generated images and descriptions.
288+
"""
286289
logger = args.logger
287290
paths = args.paths
288291
ordered_sections = section_order()
289292
logger.info("ordered_sections:", ordered_sections)
290293
logger.info("section_title:", repr(section_title))
291-
"""
292-
Update the README.md file with the generated images and descriptions.
293-
"""
294+
294295
if not args.enable_save:
295296
return
296297
if image_path and not image_caption:
@@ -336,8 +337,8 @@ def update_readme(
336337
# Sections that should come before this section
337338
sections_before = ordered_sections[:current_postion]
338339
# we find the last existing section that comes before this section
339-
for prev_section in reversed(sections_before):
340-
prev_end_line = f"<!-- section end {prev_section} -->\n"
340+
for prev_section_title in reversed(sections_before):
341+
prev_end_line = f"<!-- section end {prev_section_title} -->\n"
341342
if prev_end_line in lines:
342343
insert_index = lines.index(prev_end_line) + 1
343344
break

0 commit comments

Comments
 (0)