Skip to content

Commit 8d198e1

Browse files
committed
Keep only modified report and shared scripts
1 parent 2c57018 commit 8d198e1

File tree

3 files changed

+18
-28
lines changed

3 files changed

+18
-28
lines changed

scripts/3-report/gcs_report.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ def gcs_intro(args):
7979
)
8080
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
8181
name_label = "CC legal tool product"
82-
data = shared.open_data_file(LOGGER, file_path)
83-
data.set_index(name_label, inplace=True)
82+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
8483
total_count = f"{data['Count'].sum():,d}"
8584
shared.update_readme(
8685
args,
@@ -111,8 +110,7 @@ def plot_products(args):
111110
)
112111
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
113112
name_label = "CC legal tool product"
114-
data = shared.open_data_file(LOGGER, file_path)
115-
data.set_index(name_label, inplace=True)
113+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
116114

117115
data = data[::-1] # reverse order
118116

@@ -158,8 +156,7 @@ def plot_tool_status(args):
158156
)
159157
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
160158
name_label = "CC legal tool"
161-
data = shared.open_data_file(LOGGER, file_path)
162-
data.set_index(name_label, inplace=True)
159+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
163160
data.sort_values(name_label, ascending=False, inplace=True)
164161

165162
title = "CC legal tools status"
@@ -202,8 +199,7 @@ def plot_latest_tools(args):
202199
)
203200
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
204201
name_label = "CC legal tool"
205-
data = shared.open_data_file(LOGGER, file_path)
206-
data.set_index(name_label, inplace=True)
202+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
207203
data.sort_values(name_label, ascending=False, inplace=True)
208204

209205
title = "Latest CC legal tools"
@@ -245,8 +241,7 @@ def plot_prior_tools(args):
245241
)
246242
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
247243
name_label = "CC legal tool"
248-
data = shared.open_data_file(LOGGER, file_path)
249-
data.set_index(name_label, inplace=True)
244+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
250245
data.sort_values(name_label, ascending=False, inplace=True)
251246

252247
title = "Prior CC legal tools"
@@ -291,8 +286,7 @@ def plot_retired_tools(args):
291286
)
292287
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
293288
name_label = "CC legal tool"
294-
data = shared.open_data_file(LOGGER, file_path)
295-
data.set_index(name_label, inplace=True)
289+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
296290
data.sort_values(name_label, ascending=False, inplace=True)
297291

298292
title = "Retired CC legal tools"
@@ -338,8 +332,7 @@ def plot_countries_highest_usage(args):
338332
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
339333
name_label = "Country"
340334
data_label = "Count"
341-
data = shared.open_data_file(LOGGER, file_path)
342-
data.set_index(name_label, inplace=True)
335+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
343336
total_count = f"{data['Count'].sum():,d}"
344337
data.sort_values(data_label, ascending=False, inplace=True)
345338
data = data[:10] # limit to highest 10
@@ -392,8 +385,7 @@ def plot_languages_highest_usage(args):
392385
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
393386
name_label = "Language"
394387
data_label = "Count"
395-
data = shared.open_data_file(LOGGER, file_path)
396-
data.set_index(name_label, inplace=True)
388+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
397389
total_count = f"{data['Count'].sum():,d}"
398390
data.sort_values(data_label, ascending=False, inplace=True)
399391
data = data[:10] # limit to highest 10

scripts/3-report/github_report.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ def github_intro(args):
9393
)
9494
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
9595
name_label = "TOOL_IDENTIFIER"
96-
data = shared.open_data_file(LOGGER, file_path)
97-
data.set_index(name_label, inplace=True)
96+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
9897
total_repositories = data.loc["Total public repositories", "COUNT"]
9998
cc_total = data[data.index.str.startswith("CC")]["COUNT"].sum()
10099
cc_percentage = f"{(cc_total / total_repositories) * 100:.2f}%"
@@ -149,8 +148,7 @@ def plot_totals_by_license_type(args):
149148
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
150149
name_label = "License"
151150
data_label = "Count"
152-
data = shared.open_data_file(LOGGER, file_path)
153-
data.set_index(name_label, inplace=True)
151+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
154152
data.sort_values(data_label, ascending=True, inplace=True)
155153
title = "Totals by license type"
156154
plt = plot.combined_plot(
@@ -199,8 +197,7 @@ def plot_totals_by_restriction(args):
199197
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
200198
name_label = "Category"
201199
data_label = "Count"
202-
data = shared.open_data_file(LOGGER, file_path)
203-
data.set_index(name_label, inplace=True)
200+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
204201
data.sort_values(name_label, ascending=False, inplace=True)
205202
title = "Totals by restriction"
206203
plt = plot.combined_plot(

scripts/shared.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,12 @@ def get_session(accept_header=None, session=None):
6767
return session
6868

6969

70-
def open_data_file(logger, file_path, usecols=None):
70+
def open_data_file(
71+
logger,
72+
file_path,
73+
usecols=None,
74+
index_col=None,
75+
):
7176
"""
7277
Open a CSV data file safely and convert
7378
expected errors into QuantifyingException.
@@ -76,27 +81,23 @@ def open_data_file(logger, file_path, usecols=None):
7681
"""
7782
try:
7883
# Reading the file
79-
return pd.read_csv(file_path, usecols=usecols)
80-
84+
return pd.read_csv(file_path, usecols=usecols, index_col=index_col)
8185
# File does not exist
8286
except FileNotFoundError:
8387
raise QuantifyingException(
8488
message=f"Data file not found: {file_path}", exit_code=1
8589
)
86-
8790
# Empty or invalid CSV file
8891
except pd.errors.EmptyDataError:
8992
raise QuantifyingException(
9093
message=f"CSV file is empty or invalid: {file_path}", exit_code=1
9194
)
92-
9395
# Permission denied
9496
except PermissionError:
9597
raise QuantifyingException(
9698
message=f"Permission denied when accessing data file: {file_path}",
9799
exit_code=1,
98100
)
99-
100101
# Any other unexpected issue
101102
except Exception as e:
102103
raise QuantifyingException(

0 commit comments

Comments
 (0)