Skip to content

Commit 9183088

Browse files
committed
refactor: use shared module for comprehensive ArXiv categories
1 parent 076b95a commit 9183088

1 file changed

Lines changed: 78 additions & 0 deletions

File tree

dev/create_arxiv_category_map.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Create arxiv_category_map.yaml with essential ArXiv category mappings
4+
based on common categories found in ArXiv CC licensed papers.
5+
"""
6+
# Standard library
7+
import os
8+
import sys
9+
10+
# Third-party
11+
import yaml
12+
13+
# Add parent directory for shared imports
14+
sys.path.append(os.path.join(os.path.dirname(__file__), "..", "scripts"))
15+
16+
# First-party/Local
17+
import shared # noqa: E402
18+
19+
20+
def create_arxiv_category_mapping():
21+
"""Fetch ArXiv category mappings from API."""
22+
23+
# Import shared module to use the comprehensive mapping
24+
try:
25+
categories = shared.get_arxiv_categories()
26+
if categories:
27+
return categories
28+
except Exception as e:
29+
print(f"Failed to get categories: {e}")
30+
31+
# Fallback to essential categories if API fails
32+
# Covering the most common ArXiv categories
33+
return {
34+
"cs.AI": "Computer Science - Artificial Intelligence",
35+
"cs.CL": "Computer Science - Computation and Language",
36+
"cs.CV": "Computer Science - Computer Vision and Pattern Recognition",
37+
"cs.LG": "Computer Science - Machine Learning",
38+
"physics.comp-ph": "Physics - Computational Physics",
39+
"math.CO": "Mathematics - Combinatorics",
40+
"stat.AP": "Statistics - Applications",
41+
}
42+
43+
44+
def main():
45+
"""Main function."""
46+
logger, paths = shared.setup(__file__)
47+
48+
# Create category mapping
49+
categories = create_arxiv_category_mapping()
50+
51+
# Output path
52+
output_path = os.path.join(paths["data"], "arxiv_category_map.yaml")
53+
54+
try:
55+
# Add header comment
56+
header = [
57+
"# ArXiv category code to name mappings",
58+
"# Generated by dev/create_arxiv_category_map.py",
59+
"# Comprehensive ArXiv category taxonomy",
60+
"",
61+
]
62+
63+
with open(output_path, "w", encoding="utf-8") as f:
64+
f.write("\n".join(header))
65+
yaml.dump(categories, f, default_flow_style=False, sort_keys=True)
66+
67+
logger.info(
68+
f"Created ArXiv category mapping: {output_path} "
69+
f"with {len(categories)} categories"
70+
)
71+
72+
except Exception as e:
73+
logger.error(f"Failed to create category mapping: {e}")
74+
sys.exit(1)
75+
76+
77+
if __name__ == "__main__":
78+
main()

0 commit comments

Comments
 (0)