|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Create arxiv_category_map.yaml with essential ArXiv category mappings |
| 4 | +based on common categories found in ArXiv CC licensed papers. |
| 5 | +""" |
| 6 | +# Standard library |
| 7 | +import os |
| 8 | +import sys |
| 9 | + |
| 10 | +# Third-party |
| 11 | +import yaml |
| 12 | + |
| 13 | +# Add parent directory for shared imports |
| 14 | +sys.path.append(os.path.join(os.path.dirname(__file__), "..", "scripts")) |
| 15 | + |
| 16 | +# First-party/Local |
| 17 | +import shared # noqa: E402 |
| 18 | + |
| 19 | + |
| 20 | +def create_arxiv_category_mapping(): |
| 21 | + """Fetch ArXiv category mappings from API.""" |
| 22 | + |
| 23 | + # Import shared module to use the comprehensive mapping |
| 24 | + try: |
| 25 | + categories = shared.get_arxiv_categories() |
| 26 | + if categories: |
| 27 | + return categories |
| 28 | + except Exception as e: |
| 29 | + print(f"Failed to get categories: {e}") |
| 30 | + |
| 31 | + # Fallback to essential categories if API fails |
| 32 | + # Covering the most common ArXiv categories |
| 33 | + return { |
| 34 | + "cs.AI": "Computer Science - Artificial Intelligence", |
| 35 | + "cs.CL": "Computer Science - Computation and Language", |
| 36 | + "cs.CV": "Computer Science - Computer Vision and Pattern Recognition", |
| 37 | + "cs.LG": "Computer Science - Machine Learning", |
| 38 | + "physics.comp-ph": "Physics - Computational Physics", |
| 39 | + "math.CO": "Mathematics - Combinatorics", |
| 40 | + "stat.AP": "Statistics - Applications", |
| 41 | + } |
| 42 | + |
| 43 | + |
| 44 | +def main(): |
| 45 | + """Main function.""" |
| 46 | + logger, paths = shared.setup(__file__) |
| 47 | + |
| 48 | + # Create category mapping |
| 49 | + categories = create_arxiv_category_mapping() |
| 50 | + |
| 51 | + # Output path |
| 52 | + output_path = os.path.join(paths["data"], "arxiv_category_map.yaml") |
| 53 | + |
| 54 | + try: |
| 55 | + # Add header comment |
| 56 | + header = [ |
| 57 | + "# ArXiv category code to name mappings", |
| 58 | + "# Generated by dev/create_arxiv_category_map.py", |
| 59 | + "# Comprehensive ArXiv category taxonomy", |
| 60 | + "", |
| 61 | + ] |
| 62 | + |
| 63 | + with open(output_path, "w", encoding="utf-8") as f: |
| 64 | + f.write("\n".join(header)) |
| 65 | + yaml.dump(categories, f, default_flow_style=False, sort_keys=True) |
| 66 | + |
| 67 | + logger.info( |
| 68 | + f"Created ArXiv category mapping: {output_path} " |
| 69 | + f"with {len(categories)} categories" |
| 70 | + ) |
| 71 | + |
| 72 | + except Exception as e: |
| 73 | + logger.error(f"Failed to create category mapping: {e}") |
| 74 | + sys.exit(1) |
| 75 | + |
| 76 | + |
| 77 | +if __name__ == "__main__": |
| 78 | + main() |
0 commit comments