Skip to content

Commit 7defab5

Browse files
committed
Add logging and fix silent exception handling in arxiv_category_converter
1 parent 58a9f99 commit 7defab5

1 file changed

Lines changed: 49 additions & 32 deletions

File tree

dev/arxiv_category_converter.py

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,32 @@
44
"""
55
# Standard library
66
import csv
7+
import logging
78
import os
9+
import sys
810

911
# Third-party
1012
import yaml
1113

14+
# Add scripts directory to path to import shared module
15+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'scripts'))
16+
17+
logger = logging.getLogger(__name__)
18+
1219

1320
def load_category_mapping(data_dir):
1421
"""Load category code to label mapping from YAML file."""
1522
mapping_file = os.path.join(data_dir, "arxiv_category_map.yaml")
1623

1724
if not os.path.exists(mapping_file):
25+
logger.warning(f"Category mapping file not found: {mapping_file}")
1826
return {}
1927

2028
try:
2129
with open(mapping_file, "r") as f:
2230
return yaml.safe_load(f) or {}
23-
except Exception:
31+
except (yaml.YAMLError, IOError) as e:
32+
logger.error(f"Failed to load category mapping from {mapping_file}: {e}")
2433
return {}
2534

2635

@@ -34,39 +43,47 @@ def convert_categories_to_friendly_names(input_file, output_file, data_dir):
3443
data_dir: Directory containing arxiv_category_map.yaml
3544
"""
3645
if not os.path.exists(input_file):
46+
logger.error(f"Input file not found: {input_file}")
3747
return
3848

3949
# Load category mapping
4050
category_mapping = load_category_mapping(data_dir)
51+
logger.info(f"Loaded {len(category_mapping)} category mappings")
52+
53+
try:
54+
with (
55+
open(input_file, "r") as infile,
56+
open(output_file, "w", newline="") as outfile,
57+
):
58+
reader = csv.DictReader(infile)
59+
60+
# Create new fieldnames with both code and label
61+
fieldnames = []
62+
for field in reader.fieldnames:
63+
fieldnames.append(field)
64+
if field == "CATEGORY":
65+
fieldnames.append("CATEGORY_LABEL")
66+
67+
writer = csv.DictWriter(outfile, fieldnames=fieldnames, dialect="unix")
68+
writer.writeheader()
69+
70+
for row in reader:
71+
if "CATEGORY" in row:
72+
category_code = row["CATEGORY"]
73+
# Convert code to label, fallback to uppercase first part
74+
category_label = category_mapping.get(
75+
category_code,
76+
(
77+
category_code.split(".")[0].upper()
78+
if category_code and "." in category_code
79+
else category_code
80+
),
81+
)
82+
row["CATEGORY_LABEL"] = category_label
4183

42-
with (
43-
open(input_file, "r") as infile,
44-
open(output_file, "w", newline="") as outfile,
45-
):
46-
reader = csv.DictReader(infile)
47-
48-
# Create new fieldnames with both code and label
49-
fieldnames = []
50-
for field in reader.fieldnames:
51-
fieldnames.append(field)
52-
if field == "CATEGORY":
53-
fieldnames.append("CATEGORY_LABEL")
54-
55-
writer = csv.DictWriter(outfile, fieldnames=fieldnames, dialect="unix")
56-
writer.writeheader()
57-
58-
for row in reader:
59-
if "CATEGORY" in row:
60-
category_code = row["CATEGORY"]
61-
# Convert code to label, fallback to uppercase first part
62-
category_label = category_mapping.get(
63-
category_code,
64-
(
65-
category_code.split(".")[0].upper()
66-
if category_code and "." in category_code
67-
else category_code
68-
),
69-
)
70-
row["CATEGORY_LABEL"] = category_label
71-
72-
writer.writerow(row)
84+
writer.writerow(row)
85+
86+
logger.info(f"Successfully converted categories: {input_file} -> {output_file}")
87+
88+
except (IOError, csv.Error) as e:
89+
logger.error(f"Failed to process CSV files: {e}")

0 commit comments

Comments
 (0)