Skip to content

Commit 8a470e4

Browse files
committed
Made review changes
1 parent 8f4f079 commit 8a470e4

File tree

7 files changed

+81
-55
lines changed

7 files changed

+81
-55
lines changed

scripts/2-process/gcs_process.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,6 @@ def parse_arguments():
7777
return args
7878

7979

80-
def check_for_data_files(args, file_paths):
81-
for path in file_paths:
82-
if os.path.exists(path) and not args.force:
83-
raise shared.QuantifyingException(
84-
f"Processed data already exists for {QUARTER}", 0
85-
)
86-
87-
8880
def data_to_csv(args, data, file_path):
8981
if not args.enable_save:
9082
return
@@ -331,7 +323,7 @@ def main():
331323
args = parse_arguments()
332324
shared.paths_log(LOGGER, PATHS)
333325
shared.git_fetch_and_merge(args, PATHS["repo"])
334-
check_for_data_files(args, FILE_PATHS)
326+
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
335327

336328
# Count data
337329
file1_count = shared.path_join(PATHS["data_1-fetch"], "gcs_1_count.csv")

scripts/2-process/github_process.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,6 @@ def parse_arguments():
6969
return args
7070

7171

72-
def check_for_data_files(args, file_paths):
73-
for path in file_paths:
74-
if os.path.exists(path) and not args.force:
75-
raise shared.QuantifyingException(
76-
f"Processed data already exists for {QUARTER}", 0
77-
)
78-
79-
8072
def data_to_csv(args, data, file_path):
8173
if not args.enable_save:
8274
return
@@ -150,7 +142,7 @@ def main():
150142
args = parse_arguments()
151143
shared.paths_log(LOGGER, PATHS)
152144
shared.git_fetch_and_merge(args, PATHS["repo"])
153-
check_for_data_files(args, FILE_PATHS)
145+
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
154146
file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
155147
count_data = shared.open_data_file(
156148
LOGGER, file_count, usecols=["TOOL_IDENTIFIER", "COUNT"]
@@ -176,7 +168,7 @@ def main():
176168
LOGGER.info(e.message)
177169
else:
178170
LOGGER.error(e.message)
179-
sys.exit(e.code)
171+
sys.exit(e.exit_code)
180172
except SystemExit as e:
181173
LOGGER.error(f"System exit with code: {e.code}")
182174
sys.exit(e.code)

scripts/2-process/wikipedia_process.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,6 @@ def parse_arguments():
8080
return args
8181

8282

83-
def check_for_data_files(args, file_paths):
84-
for path in file_paths:
85-
if os.path.exists(path) and not args.force:
86-
raise shared.QuantifyingException(
87-
f"Processed data already exists for {QUARTER}", 0
88-
)
89-
90-
9183
def data_to_csv(args, data, file_path):
9284
if not args.enable_save:
9385
return
@@ -172,7 +164,7 @@ def main():
172164
args = parse_arguments()
173165
shared.paths_log(LOGGER, PATHS)
174166
shared.git_fetch_and_merge(args, PATHS["repo"])
175-
check_for_data_files(args, FILE_PATHS)
167+
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
176168
file_count = shared.path_join(
177169
PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv"
178170
)

scripts/3-report/gcs_report.py

100644100755
Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828

2929
# Constants
3030
QUARTER = os.path.basename(PATHS["data_quarter"])
31-
SECTION = Path(__file__).name
31+
SECTION_FILE = Path(__file__).name
32+
SECTION_TITLE = "Google Custom Search (GCS)"
3233

3334

3435
def parse_arguments():
@@ -84,7 +85,8 @@ def gcs_intro(args):
8485
total_count = f"{data['Count'].sum():,d}"
8586
shared.update_readme(
8687
args,
87-
SECTION,
88+
SECTION_FILE,
89+
SECTION_TITLE,
8890
"Overview",
8991
None,
9092
None,
@@ -138,7 +140,8 @@ def plot_products(args):
138140

139141
shared.update_readme(
140142
args,
141-
SECTION,
143+
SECTION_FILE,
144+
SECTION_TITLE,
142145
title,
143146
image_path,
144147
"Plots showing Creative Commons (CC) legal tool product totals and"
@@ -181,7 +184,8 @@ def plot_tool_status(args):
181184

182185
shared.update_readme(
183186
args,
184-
SECTION,
187+
SECTION_FILE,
188+
SECTION_TITLE,
185189
title,
186190
image_path,
187191
"Plots showing Creative Commons (CC) legal tool status totals and"
@@ -224,7 +228,8 @@ def plot_latest_tools(args):
224228

225229
shared.update_readme(
226230
args,
227-
SECTION,
231+
SECTION_FILE,
232+
SECTION_TITLE,
228233
title,
229234
image_path,
230235
"Plots showing latest Creative Commons (CC) legal tool totals and"
@@ -266,7 +271,8 @@ def plot_prior_tools(args):
266271

267272
shared.update_readme(
268273
args,
269-
SECTION,
274+
SECTION_FILE,
275+
SECTION_TITLE,
270276
title,
271277
image_path,
272278
"Plots showing prior Creative Commons (CC) legal tool totals and"
@@ -312,7 +318,8 @@ def plot_retired_tools(args):
312318

313319
shared.update_readme(
314320
args,
315-
SECTION,
321+
SECTION_FILE,
322+
SECTION_TITLE,
316323
title,
317324
image_path,
318325
"Plots showing retired Creative Commons (CC) legal tools total and"
@@ -361,7 +368,8 @@ def plot_countries_highest_usage(args):
361368

362369
shared.update_readme(
363370
args,
364-
SECTION,
371+
SECTION_FILE,
372+
SECTION_TITLE,
365373
title,
366374
image_path,
367375
"Plots showing countries with the highest useage of the latest"
@@ -414,7 +422,8 @@ def plot_languages_highest_usage(args):
414422

415423
shared.update_readme(
416424
args,
417-
SECTION,
425+
SECTION_FILE,
426+
SECTION_TITLE,
418427
title,
419428
image_path,
420429
"Plots showing languages with the highest useage of the latest"
@@ -461,7 +470,8 @@ def plot_free_culture(args):
461470

462471
shared.update_readme(
463472
args,
464-
SECTION,
473+
SECTION_FILE,
474+
SECTION_TITLE,
465475
title,
466476
image_path,
467477
"Plots showing Approved for Free Cultural Works legal tool usage.",

scripts/3-report/github_report.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,13 @@
2626
# Setup
2727
LOGGER, PATHS = shared.setup(__file__)
2828
QUARTER = os.path.basename(PATHS["data_quarter"])
29-
SECTION = Path(__file__).name
29+
SECTION_FILE = Path(__file__).name
30+
SECTION_TITLE = "Github"
31+
32+
IMAGE_PATHS = [
33+
shared.path_join(PATHS["data_phase"], "github_totals_by_license_type.png"),
34+
shared.path_join(PATHS["data_phase"], "github_restriction.png"),
35+
]
3036

3137

3238
def parse_arguments():
@@ -56,6 +62,11 @@ def parse_arguments():
5662
help="Enable git actions such as fetch, merge, add, commit, and push"
5763
" (default: False)",
5864
)
65+
parser.add_argument(
66+
"--force",
67+
action="store_true",
68+
help="Regenerate data even if images files already exist",
69+
)
5970
args = parser.parse_args()
6071
if not args.enable_save and args.enable_git:
6172
parser.error("--enable-git requires --enable-save")
@@ -67,6 +78,14 @@ def parse_arguments():
6778
return args
6879

6980

81+
def check_image_files(args, image_paths):
82+
for path in image_paths:
83+
if os.path.exists(path) and not args.force:
84+
raise shared.QuantifyingException(
85+
f"image file already exists for {path}", 0
86+
)
87+
88+
7089
def load_data(args):
7190
"""
7291
Load the collected data from the CSV file.
@@ -100,7 +119,8 @@ def github_intro(args):
100119
cc_percentage = f"{(cc_total / total_repositories) * 100:.2f}%"
101120
shared.update_readme(
102121
args,
103-
SECTION,
122+
SECTION_FILE,
123+
SECTION_TITLE,
104124
"Overview",
105125
None,
106126
None,
@@ -111,7 +131,7 @@ def github_intro(args):
111131
f"** of the {total_repositories} total public repositories"
112132
" on GitHub that use a CC legal tool. Additionally,"
113133
" many more use a non-CC use a Public domain"
114-
" equivalent legal tools.**\n"
134+
" equivalent legal tools.\n"
115135
"\n"
116136
" The Github data showcases the different level of"
117137
" rights reserved on repositories We have Public"
@@ -121,7 +141,7 @@ def github_intro(args):
121141
" without restriction."
122142
" See more at"
123143
" [Public-domain-equivalent license]"
124-
"(https://en.wikipedia.org/wiki/Public-domain-equivalent_license)"
144+
"(https://en.wikipedia.org/wiki/Public-domain-equivalent_license).\n"
125145
" While a Permissive category of license contains works"
126146
" under MIT-0 and CC BY 4.0 allows users to"
127147
" reuse the code with some conditions and attribution"
@@ -130,7 +150,7 @@ def github_intro(args):
130150
" and Copyleft contains works under CC BY-SA 4.0."
131151
" which requires any derivative works to be licensed"
132152
" under the same terms."
133-
" [Copyleft](https://en.wikipedia.org/wiki/Copyleft)"
153+
" [Copyleft](https://en.wikipedia.org/wiki/Copyleft).\n"
134154
"\n"
135155
"Thank you GitHub for providing public API"
136156
" access to repository metadata!",
@@ -172,7 +192,8 @@ def plot_totals_by_license_type(args):
172192

173193
shared.update_readme(
174194
args,
175-
SECTION,
195+
SECTION_FILE,
196+
SECTION_TITLE,
176197
title,
177198
image_path,
178199
"Plots showing totals by license type."
@@ -220,7 +241,8 @@ def plot_totals_by_restriction(args):
220241

221242
shared.update_readme(
222243
args,
223-
SECTION,
244+
SECTION_FILE,
245+
SECTION_TITLE,
224246
title,
225247
image_path,
226248
"Plots showing totals by different levels of restrictions."
@@ -234,6 +256,7 @@ def main():
234256
args = parse_arguments()
235257
shared.paths_log(LOGGER, PATHS)
236258
shared.git_fetch_and_merge(args, PATHS["repo"])
259+
check_image_files(args, IMAGE_PATHS)
237260
github_intro(args)
238261
plot_totals_by_license_type(args)
239262
plot_totals_by_restriction(args)

scripts/3-report/wikipedia_report.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
# Setup
2727
LOGGER, PATHS = shared.setup(__file__)
2828
QUARTER = os.path.basename(PATHS["data_quarter"])
29-
SECTION = Path(__file__).name
29+
SECTION_FILE = Path(__file__).name
30+
SECTION_TITLE = "Wikipedia"
3031

3132

3233
def parse_arguments():
@@ -98,7 +99,8 @@ def wikipedia_intro(args):
9899
language_count = len(data)
99100
shared.update_readme(
100101
args,
101-
SECTION,
102+
SECTION_FILE,
103+
SECTION_TITLE,
102104
"Overview",
103105
None,
104106
None,
@@ -156,7 +158,8 @@ def plot_language_representation(args):
156158

157159
shared.update_readme(
158160
args,
159-
SECTION,
161+
SECTION_FILE,
162+
SECTION_TITLE,
160163
title,
161164
image_path,
162165
"Plots showing the language representation across different language"
@@ -201,7 +204,8 @@ def plot_highest_language_usage(args):
201204

202205
shared.update_readme(
203206
args,
204-
SECTION,
207+
SECTION_FILE,
208+
SECTION_TITLE,
205209
title,
206210
image_path,
207211
"Plots showing the most represented languages across the different"
@@ -244,7 +248,8 @@ def plot_least_language_usage(args):
244248

245249
shared.update_readme(
246250
args,
247-
SECTION,
251+
SECTION_FILE,
252+
SECTION_TITLE,
248253
title,
249254
image_path,
250255
"Plots showing the least represented languages across the different"

scripts/shared.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,16 @@ def __init__(self, message, exit_code=None):
3636
super().__init__(self.message)
3737

3838

39+
def check_for_data_files(args, file_paths, QUARTER):
40+
if args.force:
41+
return
42+
for path in file_paths:
43+
if os.path.exists(path):
44+
raise QuantifyingException(
45+
f"Processed data already exists for {QUARTER}", 0
46+
)
47+
48+
3949
def get_session(accept_header=None, session=None):
4050
"""
4151
Create or configure a reusable HTTPS session with retry logic and
@@ -272,11 +282,13 @@ def setup(current_file):
272282
def section_order():
273283
report_dir = os.path.join(os.path.dirname(__file__), "3-report")
274284
report_files = os.listdir(report_dir)
285+
report_files.sort()
275286
return report_files
276287

277288

278289
def update_readme(
279290
args,
291+
section_file,
280292
section_title,
281293
entry_title,
282294
image_path,
@@ -289,8 +301,8 @@ def update_readme(
289301
logger = args.logger
290302
paths = args.paths
291303
ordered_sections = section_order()
292-
logger.info("ordered_sections:", ordered_sections)
293-
logger.info("section_title:", repr(section_title))
304+
logger.info(f"ordered_sections:, {ordered_sections}")
305+
logger.info(f"section_title:, {section_title}")
294306

295307
if not args.enable_save:
296308
return
@@ -308,8 +320,8 @@ def update_readme(
308320
readme_path = path_join(paths["data"], args.quarter, "README.md")
309321

310322
# Define section markers for each data source
311-
section_start_line = f"<!-- section start {section_title} -->\n"
312-
section_end_line = f"<!-- section end {section_title} -->\n"
323+
section_start_line = f"<!-- section start {section_file} -->\n"
324+
section_end_line = f"<!-- section end {section_file} -->\n"
313325

314326
# Define entry markers for each plot (optional) and description
315327
entry_start_line = f"<!-- entry start {entry_title} -->\n"
@@ -333,7 +345,7 @@ def update_readme(
333345
else:
334346
insert_index = None
335347
# If not present, we find the position to insert the section
336-
current_postion = ordered_sections.index(section_title)
348+
current_postion = ordered_sections.index(section_file)
337349
# Sections that should come before this section
338350
sections_before = ordered_sections[:current_postion]
339351
# we find the last existing section that comes before this section

0 commit comments

Comments
 (0)