3838BASE_URL = "http://export.arxiv.org/api/query?"
3939DEFAULT_FETCH_LIMIT = 800 # Default total papers to fetch
4040
41-
41+ # CSV Headers
42+ HEADER_AUTHOR_BUCKET = ["TOOL_IDENTIFIER" , "AUTHOR_BUCKET" , "COUNT" ]
43+ HEADER_CATEGORY_REPORT = [
44+ "TOOL_IDENTIFIER" ,
45+ "CATEGORY_CODE" ,
46+ "CATEGORY_LABEL" ,
47+ "COUNT" ,
48+ ]
49+ HEADER_COUNT = ["TOOL_IDENTIFIER" , "COUNT" ]
50+ HEADER_YEAR = ["TOOL_IDENTIFIER" , "YEAR" , "COUNT" ]
4251
4352# Search Queries
4453SEARCH_QUERIES = [
6069 'all:"CC-0"' ,
6170]
6271
63- # CSV Headers
64- HEADER_COUNT = ["TOOL_IDENTIFIER" , "COUNT" ]
65- HEADER_CATEGORY_REPORT = [
66- "TOOL_IDENTIFIER" ,
67- "CATEGORY_CODE" ,
68- "CATEGORY_LABEL" ,
69- "COUNT" ,
70- ]
71- HEADER_YEAR = ["TOOL_IDENTIFIER" , "YEAR" , "COUNT" ]
72- HEADER_AUTHOR_BUCKET = ["TOOL_IDENTIFIER" , "AUTHOR_BUCKET" , "COUNT" ]
73-
7472# Compiled regex patterns for CC license detection
7573CC_PATTERNS = [
7674 (re .compile (r"\bCC[-\s]?0\b" , re .IGNORECASE ), "CC0" ),
264262 PATHS ["data_1-fetch" ], "arxiv_4_count_by_author_bucket.csv"
265263)
266264# records metadata for each run for audit, reproducibility, and provenance
267- FILE_PROVENANCE = shared .path_join (PATHS ["data_1-fetch" ], "arxiv_provenance.yaml" )
265+ FILE_PROVENANCE = shared .path_join (
266+ PATHS ["data_1-fetch" ], "arxiv_provenance.yaml"
267+ )
268268
269269# Runtime variables
270270QUARTER = os .path .basename (PATHS ["data_quarter" ])
273273# parsing arguments function
274274def parse_arguments ():
275275 """Parse command-line options, returns parsed argument namespace.
276-
276+
277277 Note: The --limit parameter sets the total number of papers to fetch
278278 across all search queries, not per query. ArXiv API recommends
279279 maximum of 30000 results per session for optimal performance.
@@ -287,8 +287,10 @@ def parse_arguments():
287287 help = (
288288 f"Total limit of papers to fetch across all search queries "
289289 f"(default: { DEFAULT_FETCH_LIMIT } ). Maximum recommended: 30000. "
290- f"Note: Individual queries limited to 500 results (implementation choice). "
291- f"See ArXiv API documentation: https://info.arxiv.org/help/api/user-manual.html"
290+ f"Note: Individual queries limited to 500 results "
291+ f"(implementation choice). "
292+ f"See ArXiv API documentation: "
293+ f"https://info.arxiv.org/help/api/user-manual.html"
292294 ),
293295 )
294296 parser .add_argument (
@@ -602,7 +604,10 @@ def query_arxiv(args):
602604 if papers_found_in_batch == 0 :
603605 break
604606
605- LOGGER .info (f"Query '{ search_query } ' completed: { papers_found_for_query } papers found" )
607+ LOGGER .info (
608+ f"Query '{ search_query } ' completed: "
609+ f"{ papers_found_for_query } papers found"
610+ )
606611
607612 # Save results
608613 if args .enable_save :
0 commit comments