|
4 | 4 | from datetime import datetime, timezone |
5 | 5 |
|
6 | 6 | # Third-party |
7 | | -import yaml |
8 | 7 | from git import InvalidGitRepositoryError, NoSuchPathError, Repo |
9 | 8 | from pandas import PeriodIndex |
10 | 9 |
|
@@ -162,223 +161,6 @@ def setup(current_file): |
162 | 161 | return logger, paths |
163 | 162 |
|
164 | 163 |
|
165 | | -def get_arxiv_categories(): |
166 | | - """Get comprehensive ArXiv category taxonomy mapping.""" |
167 | | - # ArXiv's comprehensive category mapping |
168 | | - categories = { |
169 | | - # Computer Science |
170 | | - "cs.AI": "Artificial Intelligence", |
171 | | - "cs.AR": "Hardware Architecture", |
172 | | - "cs.CC": "Computational Complexity", |
173 | | - "cs.CE": "Computational Engineering, Finance, and Science", |
174 | | - "cs.CG": "Computational Geometry", |
175 | | - "cs.CL": "Computation and Language", |
176 | | - "cs.CR": "Cryptography and Security", |
177 | | - "cs.CV": "Computer Vision and Pattern Recognition", |
178 | | - "cs.CY": "Computers and Society", |
179 | | - "cs.DB": "Databases", |
180 | | - "cs.DC": "Distributed, Parallel, and Cluster Computing", |
181 | | - "cs.DL": "Digital Libraries", |
182 | | - "cs.DM": "Discrete Mathematics", |
183 | | - "cs.DS": "Data Structures and Algorithms", |
184 | | - "cs.ET": "Emerging Technologies", |
185 | | - "cs.FL": "Formal Languages and Automata Theory", |
186 | | - "cs.GL": "General Literature", |
187 | | - "cs.GR": "Graphics", |
188 | | - "cs.GT": "Computer Science and Game Theory", |
189 | | - "cs.HC": "Human-Computer Interaction", |
190 | | - "cs.IR": "Information Retrieval", |
191 | | - "cs.IT": "Information Theory", |
192 | | - "cs.LG": "Machine Learning", |
193 | | - "cs.LO": "Logic in Computer Science", |
194 | | - "cs.MA": "Multiagent Systems", |
195 | | - "cs.MM": "Multimedia", |
196 | | - "cs.MS": "Mathematical Software", |
197 | | - "cs.NA": "Numerical Analysis", |
198 | | - "cs.NE": "Neural and Evolutionary Computing", |
199 | | - "cs.NI": "Networking and Internet Architecture", |
200 | | - "cs.OH": "Other Computer Science", |
201 | | - "cs.OS": "Operating Systems", |
202 | | - "cs.PF": "Performance", |
203 | | - "cs.PL": "Programming Languages", |
204 | | - "cs.RO": "Robotics", |
205 | | - "cs.SC": "Symbolic Computation", |
206 | | - "cs.SD": "Sound", |
207 | | - "cs.SE": "Software Engineering", |
208 | | - "cs.SI": "Social and Information Networks", |
209 | | - "cs.SY": "Systems and Control", |
210 | | - # Mathematics |
211 | | - "math.AC": "Commutative Algebra", |
212 | | - "math.AG": "Algebraic Geometry", |
213 | | - "math.AP": "Analysis of PDEs", |
214 | | - "math.AT": "Algebraic Topology", |
215 | | - "math.CA": "Classical Analysis and ODEs", |
216 | | - "math.CO": "Combinatorics", |
217 | | - "math.CT": "Category Theory", |
218 | | - "math.CV": "Complex Variables", |
219 | | - "math.DG": "Differential Geometry", |
220 | | - "math.DS": "Dynamical Systems", |
221 | | - "math.FA": "Functional Analysis", |
222 | | - "math.GM": "General Mathematics", |
223 | | - "math.GN": "General Topology", |
224 | | - "math.GR": "Group Theory", |
225 | | - "math.GT": "Geometric Topology", |
226 | | - "math.HO": "History and Overview", |
227 | | - "math.IT": "Information Theory", |
228 | | - "math.KT": "K-Theory and Homology", |
229 | | - "math.LO": "Logic", |
230 | | - "math.MG": "Metric Geometry", |
231 | | - "math.MP": "Mathematical Physics", |
232 | | - "math.NA": "Numerical Analysis", |
233 | | - "math.NT": "Number Theory", |
234 | | - "math.OA": "Operator Algebras", |
235 | | - "math.OC": "Optimization and Control", |
236 | | - "math.PR": "Probability", |
237 | | - "math.QA": "Quantum Algebra", |
238 | | - "math.RA": "Rings and Algebras", |
239 | | - "math.RT": "Representation Theory", |
240 | | - "math.SG": "Symplectic Geometry", |
241 | | - "math.SP": "Spectral Theory", |
242 | | - "math.ST": "Statistics Theory", |
243 | | - # Physics |
244 | | - "physics.acc-ph": "Accelerator Physics", |
245 | | - "physics.ao-ph": "Atmospheric and Oceanic Physics", |
246 | | - "physics.app-ph": "Applied Physics", |
247 | | - "physics.atm-clus": "Atomic and Molecular Clusters", |
248 | | - "physics.atom-ph": "Atomic Physics", |
249 | | - "physics.bio-ph": "Biological Physics", |
250 | | - "physics.chem-ph": "Chemical Physics", |
251 | | - "physics.class-ph": "Classical Physics", |
252 | | - "physics.comp-ph": "Computational Physics", |
253 | | - "physics.data-an": "Data Analysis, Statistics and Probability", |
254 | | - "physics.ed-ph": "Physics Education", |
255 | | - "physics.flu-dyn": "Fluid Dynamics", |
256 | | - "physics.gen-ph": "General Physics", |
257 | | - "physics.geo-ph": "Geophysics", |
258 | | - "physics.hist-ph": "History and Philosophy of Physics", |
259 | | - "physics.ins-det": "Instrumentation and Detectors", |
260 | | - "physics.med-ph": "Medical Physics", |
261 | | - "physics.optics": "Optics", |
262 | | - "physics.plasm-ph": "Plasma Physics", |
263 | | - "physics.pop-ph": "Popular Physics", |
264 | | - "physics.soc-ph": "Physics and Society", |
265 | | - "physics.space-ph": "Space Physics", |
266 | | - # Statistics |
267 | | - "stat.AP": "Applications", |
268 | | - "stat.CO": "Computation", |
269 | | - "stat.ME": "Methodology", |
270 | | - "stat.ML": "Machine Learning", |
271 | | - "stat.OT": "Other Statistics", |
272 | | - "stat.TH": "Statistics Theory", |
273 | | - # Quantitative Biology |
274 | | - "q-bio.BM": "Biomolecules", |
275 | | - "q-bio.CB": "Cell Behavior", |
276 | | - "q-bio.GN": "Genomics", |
277 | | - "q-bio.MN": "Molecular Networks", |
278 | | - "q-bio.NC": "Neurons and Cognition", |
279 | | - "q-bio.OT": "Other Quantitative Biology", |
280 | | - "q-bio.PE": "Populations and Evolution", |
281 | | - "q-bio.QM": "Quantitative Methods", |
282 | | - "q-bio.SC": "Subcellular Processes", |
283 | | - "q-bio.TO": "Tissues and Organs", |
284 | | - # Economics |
285 | | - "econ.EM": "Econometrics", |
286 | | - "econ.GN": "General Economics", |
287 | | - "econ.TH": "Theoretical Economics", |
288 | | - # Electrical Engineering |
289 | | - "eess.AS": "Audio and Speech Processing", |
290 | | - "eess.IV": "Image and Video Processing", |
291 | | - "eess.SP": "Signal Processing", |
292 | | - "eess.SY": "Systems and Control", |
293 | | - # High Energy Physics |
294 | | - "hep-ex": "High Energy Physics - Experiment", |
295 | | - "hep-lat": "High Energy Physics - Lattice", |
296 | | - "hep-ph": "High Energy Physics - Phenomenology", |
297 | | - "hep-th": "High Energy Physics - Theory", |
298 | | - # Other Physics |
299 | | - "astro-ph": "Astrophysics", |
300 | | - "astro-ph.CO": "Cosmology and Nongalactic Astrophysics", |
301 | | - "astro-ph.EP": "Earth and Planetary Astrophysics", |
302 | | - "astro-ph.GA": "Astrophysics of Galaxies", |
303 | | - "astro-ph.HE": "High Energy Astrophysical Phenomena", |
304 | | - "astro-ph.IM": "Instrumentation and Methods for Astrophysics", |
305 | | - "astro-ph.SR": "Solar and Stellar Astrophysics", |
306 | | - "cond-mat.dis-nn": "Disordered Systems and Neural Networks", |
307 | | - "cond-mat.mes-hall": "Mesoscale and Nanoscale Physics", |
308 | | - "cond-mat.mtrl-sci": "Materials Science", |
309 | | - "cond-mat.other": "Other Condensed Matter", |
310 | | - "cond-mat.quant-gas": "Quantum Gases", |
311 | | - "cond-mat.soft": "Soft Condensed Matter", |
312 | | - "cond-mat.stat-mech": "Statistical Mechanics", |
313 | | - "cond-mat.str-el": "Strongly Correlated Electrons", |
314 | | - "cond-mat.supr-con": "Superconductivity", |
315 | | - "gr-qc": "General Relativity and Quantum Cosmology", |
316 | | - "nlin.AO": "Adaptation and Self-Organizing Systems", |
317 | | - "nlin.CD": "Chaotic Dynamics", |
318 | | - "nlin.CG": "Cellular Automata and Lattice Gases", |
319 | | - "nlin.PS": "Pattern Formation and Solitons", |
320 | | - "nlin.SI": "Exactly Solvable and Integrable Systems", |
321 | | - "nucl-ex": "Nuclear Experiment", |
322 | | - "nucl-th": "Nuclear Theory", |
323 | | - "quant-ph": "Quantum Physics", |
324 | | - } |
325 | | - |
326 | | - return categories |
327 | | - |
328 | | - |
329 | | -def load_arxiv_categories(data_dir=None): |
330 | | - """Load ArXiv category mappings with fallback to comprehensive mapping.""" |
331 | | - categories = {} |
332 | | - |
333 | | - # Try loading from YAML file first |
334 | | - if data_dir: |
335 | | - yaml_path = os.path.join(data_dir, "arxiv_category_map.yaml") |
336 | | - if os.path.exists(yaml_path): |
337 | | - try: |
338 | | - with open(yaml_path, "r", encoding="utf-8") as f: |
339 | | - categories = yaml.safe_load(f) or {} |
340 | | - logging.info( |
341 | | - f"Loaded {len(categories)} categories from {yaml_path}" |
342 | | - ) |
343 | | - except (yaml.YAMLError, IOError, OSError) as e: |
344 | | - logging.warning(f"Failed to load category YAML: {e}") |
345 | | - |
346 | | - # Fallback to comprehensive mapping if no local categories |
347 | | - if not categories: |
348 | | - categories = get_arxiv_categories() |
349 | | - if categories and data_dir: |
350 | | - # Save fetched categories for future use |
351 | | - try: |
352 | | - os.makedirs(data_dir, exist_ok=True) |
353 | | - yaml_path = os.path.join(data_dir, "arxiv_category_map.yaml") |
354 | | - with open(yaml_path, "w", encoding="utf-8") as f: |
355 | | - yaml.dump( |
356 | | - categories, f, default_flow_style=False, sort_keys=True |
357 | | - ) |
358 | | - logging.info( |
359 | | - f"Saved {len(categories)} categories to {yaml_path}" |
360 | | - ) |
361 | | - except (yaml.YAMLError, IOError, OSError) as e: |
362 | | - logging.warning(f"Failed to save categories: {e}") |
363 | | - |
364 | | - return categories |
365 | | - |
366 | | - |
367 | | -def normalize_arxiv_category(code, categories=None): |
368 | | - """Convert category code to human-readable label.""" |
369 | | - if not code or code == "Unknown": |
370 | | - return code |
371 | | - |
372 | | - if categories and code in categories: |
373 | | - return categories[code] |
374 | | - |
375 | | - # Fallback: use uppercase first part of code |
376 | | - if "." in code: |
377 | | - return code.split(".")[0].upper() |
378 | | - |
379 | | - return code |
380 | | - |
381 | | - |
382 | 164 | def update_readme( |
383 | 165 | args, |
384 | 166 | section_title, |
|
0 commit comments