Skip to content

Commit 958d291

Browse files
committed
Moved data_to_csv to shared.py
1 parent 08075ba commit 958d291

File tree

4 files changed

+22
-44
lines changed

4 files changed

+22
-44
lines changed

scripts/2-process/gcs_process.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
"""
55
# Standard library
66
import argparse
7-
import csv
87
import os
98
import sys
109
import textwrap
@@ -62,16 +61,6 @@ def parse_arguments():
6261
return args
6362

6463

65-
def data_to_csv(args, data, file_path):
66-
if not args.enable_save:
67-
return
68-
os.makedirs(PATHS["data_phase"], exist_ok=True)
69-
# emulate csv.unix_dialect
70-
data.to_csv(
71-
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
72-
)
73-
74-
7564
def process_product_totals(args, count_data):
7665
"""
7766
Processing count data: totals by product
@@ -111,7 +100,7 @@ def process_product_totals(args, count_data):
111100
data.items(), columns=["CC legal tool product", "Count"]
112101
)
113102
file_path = shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv")
114-
data_to_csv(args, data, file_path)
103+
shared.data_to_csv(args, data, file_path, PATHS)
115104

116105

117106
def process_latest_prior_retired_totals(args, count_data):
@@ -192,7 +181,7 @@ def process_latest_prior_retired_totals(args, count_data):
192181
file_path = shared.path_join(
193182
PATHS["data_phase"], f"gcs_status_{key}_totals.csv"
194183
)
195-
data_to_csv(args, dataframe, file_path)
184+
shared.data_to_csv(args, dataframe, file_path)
196185

197186

198187
def process_totals_by_free_cultural(args, count_data):
@@ -225,7 +214,7 @@ def process_totals_by_free_cultural(args, count_data):
225214
file_path = shared.path_join(
226215
PATHS["data_phase"], "gcs_totals_by_free_cultural.csv"
227216
)
228-
data_to_csv(args, data, file_path)
217+
shared.data_to_csv(args, data, file_path)
229218

230219

231220
def process_totals_by_restrictions(args, count_data):
@@ -259,7 +248,7 @@ def process_totals_by_restrictions(args, count_data):
259248
file_path = shared.path_join(
260249
PATHS["data_phase"], "gcs_totals_by_restrictions.csv"
261250
)
262-
data_to_csv(args, data, file_path)
251+
shared.data_to_csv(args, data, file_path)
263252

264253

265254
def process_totals_by_language(args, data):
@@ -280,7 +269,7 @@ def process_totals_by_language(args, data):
280269
file_path = shared.path_join(
281270
PATHS["data_phase"], "gcs_totals_by_language.csv"
282271
)
283-
data_to_csv(args, data, file_path)
272+
shared.data_to_csv(args, data, file_path)
284273

285274

286275
def process_totals_by_country(args, data):
@@ -301,7 +290,7 @@ def process_totals_by_country(args, data):
301290
file_path = shared.path_join(
302291
PATHS["data_phase"], "gcs_totals_by_country.csv"
303292
)
304-
data_to_csv(args, data, file_path)
293+
shared.data_to_csv(args, data, file_path)
305294

306295

307296
def main():

scripts/2-process/github_process.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
"""
66
# Standard library
77
import argparse
8-
import csv
98
import os
109
import sys
1110
import traceback
@@ -66,16 +65,6 @@ def check_for_data_file(file_path):
6665
)
6766

6867

69-
def data_to_csv(args, data, file_path):
70-
if not args.enable_save:
71-
return
72-
os.makedirs(PATHS["data_phase"], exist_ok=True)
73-
# emulate csv.unix_dialect
74-
data.to_csv(
75-
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
76-
)
77-
78-
7968
def process_totals_by_license(args, count_data):
8069
"""
8170
Processing count data: totals by License
@@ -99,7 +88,7 @@ def process_totals_by_license(args, count_data):
9988
PATHS["data_phase"], "github_totals_by_license.csv"
10089
)
10190
check_for_data_file(file_path)
102-
data_to_csv(args, data, file_path)
91+
shared.data_to_csv(args, data, file_path, PATHS)
10392

10493

10594
def process_totals_by_restriction(args, count_data):
@@ -134,7 +123,7 @@ def process_totals_by_restriction(args, count_data):
134123
PATHS["data_phase"], "github_totals_by_restriction.csv"
135124
)
136125
check_for_data_file(file_path)
137-
data_to_csv(args, data, file_path)
126+
shared.data_to_csv(args, data, file_path, PATHS)
138127

139128

140129
def main():

scripts/2-process/wikipedia_process.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
"""
66
# Standard library
77
import argparse
8-
import csv
98
import os
109
import sys
1110
import textwrap
@@ -70,16 +69,6 @@ def check_for_data_file(file_path):
7069
)
7170

7271

73-
def data_to_csv(args, data, file_path):
74-
if not args.enable_save:
75-
return
76-
os.makedirs(PATHS["data_phase"], exist_ok=True)
77-
# emulate csv.unix_dialect
78-
data.to_csv(
79-
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
80-
)
81-
82-
8372
def process_highest_language_usage(args, count_data):
8473
"""
8574
Processing count data: Most represented languages
@@ -99,7 +88,7 @@ def process_highest_language_usage(args, count_data):
9988
PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
10089
)
10190
check_for_data_file(file_path)
102-
data_to_csv(args, top_10, file_path)
91+
shared.data_to_csv(args, top_10, file_path, PATHS)
10392

10493

10594
def process_least_language_usage(args, count_data):
@@ -123,7 +112,7 @@ def process_least_language_usage(args, count_data):
123112
PATHS["data_phase"], "wikipedia_least_language_usage.csv"
124113
)
125114
check_for_data_file(file_path)
126-
data_to_csv(args, bottom_10, file_path)
115+
shared.data_to_csv(args, bottom_10, file_path, PATHS)
127116

128117

129118
def process_language_representation(args, count_data):
@@ -150,7 +139,7 @@ def process_language_representation(args, count_data):
150139
PATHS["data_phase"], "wikipedia_language_representation.csv"
151140
)
152141
check_for_data_file(file_path)
153-
data_to_csv(args, language_counts, file_path)
142+
shared.data_to_csv(args, language_counts, file_path, PATHS)
154143

155144

156145
def main():

scripts/shared.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Standard library
2+
import csv
23
import logging
34
import os
45
import sys
@@ -36,6 +37,16 @@ def __init__(self, message, exit_code=None):
3637
super().__init__(self.message)
3738

3839

40+
def data_to_csv(args, data, file_path, PATHS):
41+
if not args.enable_save:
42+
return
43+
os.makedirs(PATHS["data_phase"], exist_ok=True)
44+
# emulate csv.unix_dialect
45+
data.to_csv(
46+
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
47+
)
48+
49+
3950
def get_session(accept_header=None, session=None):
4051
"""
4152
Create or configure a reusable HTTPS session with retry logic and

0 commit comments

Comments
 (0)