Made review changes

oree-xx · oree-xx · commit 7eba804e508f · 2026-01-09T15:25:05.000+01:00
diff --git a/scripts/2-process/gcs_process.py b/scripts/2-process/gcs_process.py
@@ -27,6 +27,17 @@
 
 # Constants
 QUARTER = os.path.basename(PATHS["data_quarter"])
+FILE_PATHS = [
+    shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_status_combined_totals.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_status_lastest_totals.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_status_prior_totals.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_status_retired_totals.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_totals_by_country.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_totals_by_free_cultural.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_totals_by_language.csv"),
+    shared.path_join(PATHS["data_phase"], "gcs_totals_by_restrictions.csv"),
+]
 
 
 def parse_arguments():
@@ -62,7 +73,7 @@ def parse_arguments():
     return args
 
 
-def check_for_data_file(file_path):
+def check_for_data_files(file_path):
     if os.path.exists(file_path):
         raise shared.QuantifyingException(
             f"Processed data already exists for {QUARTER}", 0
@@ -118,7 +129,6 @@ def process_product_totals(args, count_data):
         data.items(), columns=["CC legal tool product", "Count"]
     )
     file_path = shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv")
-    check_for_data_file(file_path)
     data_to_csv(args, data, file_path)
 
 
@@ -200,8 +210,7 @@ def process_latest_prior_retired_totals(args, count_data):
         file_path = shared.path_join(
             PATHS["data_phase"], f"gcs_status_{key}_totals.csv"
         )
-    check_for_data_file(file_path)
-    data_to_csv(args, dataframe, file_path)
+        data_to_csv(args, dataframe, file_path)
 
 
 def process_totals_by_free_cultural(args, count_data):
@@ -234,7 +243,6 @@ def process_totals_by_free_cultural(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "gcs_totals_by_free_cultural.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, data, file_path)
 
 
@@ -269,7 +277,6 @@ def process_totals_by_restrictions(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "gcs_totals_by_restrictions.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, data, file_path)
 
 
@@ -291,7 +298,6 @@ def process_totals_by_language(args, data):
     file_path = shared.path_join(
         PATHS["data_phase"], "gcs_totals_by_language.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, data, file_path)
 
 
@@ -313,14 +319,14 @@ def process_totals_by_country(args, data):
     file_path = shared.path_join(
         PATHS["data_phase"], "gcs_totals_by_country.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, data, file_path)
 
 
 def main():
     args = parse_arguments()
     shared.paths_log(LOGGER, PATHS)
     shared.git_fetch_and_merge(args, PATHS["repo"])
+    check_for_data_files(FILE_PATHS)
 
     # Count data
     file1_count = shared.path_join(PATHS["data_1-fetch"], "gcs_1_count.csv")
diff --git a/scripts/2-process/github_process.py b/scripts/2-process/github_process.py
@@ -24,6 +24,10 @@
 
 # Constants
 QUARTER = os.path.basename(PATHS["data_quarter"])
+FILE_PATHS = [
+    shared.path_join(PATHS["data_phase"], "github_totals_by_license.csv"),
+    shared.path_join(PATHS["data_phase"], "github_totals_by_restriction.csv"),
+]
 
 
 def parse_arguments():
@@ -59,7 +63,7 @@ def parse_arguments():
     return args
 
 
-def check_for_data_file(file_path):
+def check_for_data_files(file_path):
     if os.path.exists(file_path):
         raise shared.QuantifyingException(
             f"Processed data already exists for {QUARTER}", 0
@@ -98,7 +102,6 @@ def process_totals_by_license(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "github_totals_by_license.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, data, file_path)
 
 
@@ -133,15 +136,14 @@ def process_totals_by_restriction(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "github_totals_by_restriction.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, data, file_path)
 
 
 def main():
     args = parse_arguments()
     shared.paths_log(LOGGER, PATHS)
     shared.git_fetch_and_merge(args, PATHS["repo"])
-
+    check_for_data_files(FILE_PATHS)
     file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
     count_data = shared.open_data_file(
         LOGGER, file_count, usecols=["TOOL_IDENTIFIER", "COUNT"]
diff --git a/scripts/2-process/wikipedia_process.py b/scripts/2-process/wikipedia_process.py
@@ -28,6 +28,17 @@
 
 # Constants
 QUARTER = os.path.basename(PATHS["data_quarter"])
+FILE_PATHS = [
+    shared.path_join(
+        PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
+    ),
+    shared.path_join(
+        PATHS["data_phase"], "wikipedia_least_language_usage.csv"
+    ),
+    shared.path_join(
+        PATHS["data_phase"], "wikipedia_language_representation.csv"
+    ),
+]
 
 
 def parse_arguments():
@@ -63,7 +74,7 @@ def parse_arguments():
     return args
 
 
-def check_for_data_file(file_path):
+def check_for_data_files(file_path):
     if os.path.exists(file_path):
         raise shared.QuantifyingException(
             f"Processed data already exists for {QUARTER}", 0
@@ -98,7 +109,6 @@ def process_highest_language_usage(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, top_10, file_path)
 
 
@@ -122,7 +132,6 @@ def process_least_language_usage(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "wikipedia_least_language_usage.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, bottom_10, file_path)
 
 
@@ -149,14 +158,14 @@ def process_language_representation(args, count_data):
     file_path = shared.path_join(
         PATHS["data_phase"], "wikipedia_language_representation.csv"
     )
-    check_for_data_file(file_path)
     data_to_csv(args, language_counts, file_path)
 
 
 def main():
     args = parse_arguments()
     shared.paths_log(LOGGER, PATHS)
     shared.git_fetch_and_merge(args, PATHS["repo"])
+    check_for_data_files(FILE_PATHS)
     file_count = shared.path_join(
         PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv"
     )
diff --git a/scripts/shared.py b/scripts/shared.py
@@ -283,14 +283,15 @@ def update_readme(
     image_caption,
     entry_text=None,
 ):
+    """
+    Update the README.md file with the generated images and descriptions.
+    """
     logger = args.logger
     paths = args.paths
     ordered_sections = section_order()
     logger.info("ordered_sections:", ordered_sections)
     logger.info("section_title:", repr(section_title))
-    """
-    Update the README.md file with the generated images and descriptions.
-    """
+
     if not args.enable_save:
         return
     if image_path and not image_caption:
@@ -336,8 +337,8 @@ def update_readme(
         # Sections that should come before this section
         sections_before = ordered_sections[:current_postion]
         # we find the last existing section that comes before this section
-        for prev_section in reversed(sections_before):
-            prev_end_line = f"<!-- section end {prev_section} -->\n"
+        for prev_section_title in reversed(sections_before):
+            prev_end_line = f"<!-- section end {prev_section_title} -->\n"
             if prev_end_line in lines:
                 insert_index = lines.index(prev_end_line) + 1
                 break