3636# Constants
3737# API Configuration
3838BASE_URL = "http://export.arxiv.org/api/query?"
39- API_DELAY_SECONDS = 3 # ArXiv recommended delay between API calls
40- RESULTS_PER_REQUEST = 50 # Number of results per API request
41- MAX_RESULTS_PER_QUERY = 500 # Maximum results to fetch per search query
4239DEFAULT_FETCH_LIMIT = 800 # Default total papers to fetch
4340
44- # HTTP Retry Configuration
45- RETRY_TOTAL = 5
46- RETRY_BACKOFF_FACTOR = 1
41+
4742
4843# Search Queries
4944SEARCH_QUERIES = [
@@ -337,8 +332,8 @@ def initialize_all_data_files(args):
337332def get_requests_session ():
338333 """Create request session with retry logic"""
339334 retry_strategy = Retry (
340- total = RETRY_TOTAL ,
341- backoff_factor = RETRY_BACKOFF_FACTOR ,
335+ total = 5 ,
336+ backoff_factor = 1 ,
342337 status_forcelist = shared .STATUS_FORCELIST ,
343338 )
344339 session = requests .Session ()
@@ -485,7 +480,6 @@ def save_count_data(
485480 )
486481
487482 # Save aggregated category report (top N per license, rest -> Other)
488- TOP_N = 10
489483 with open (
490484 FILE_ARXIV_CATEGORY_REPORT_AGGREGATE , "w" , newline = "" , encoding = "utf-8"
491485 ) as fh :
@@ -506,8 +500,8 @@ def save_count_data(
506500 sorted_cats = sorted (
507501 cats .items (), key = lambda x : x [1 ], reverse = True
508502 )
509- top = sorted_cats [:TOP_N ]
510- others = sorted_cats [TOP_N :]
503+ top = sorted_cats [:10 ]
504+ others = sorted_cats [10 :]
511505 other_count = sum (c for _ , c in others )
512506 for code , c in top :
513507 label = CATEGORIES .get (code , code )
@@ -572,7 +566,7 @@ def query_arxiv(args):
572566 LOGGER .info ("Beginning to fetch results from ArXiv API" )
573567 session = get_requests_session ()
574568
575- results_per_iteration = RESULTS_PER_REQUEST
569+ results_per_iteration = 50
576570
577571 search_queries = SEARCH_QUERIES
578572
@@ -593,7 +587,7 @@ def query_arxiv(args):
593587
594588 for start in range (
595589 0 ,
596- min (args .limit - total_fetched , MAX_RESULTS_PER_QUERY ),
590+ min (args .limit - total_fetched , 500 ),
597591 results_per_iteration ,
598592 ):
599593 encoded_query = urllib .parse .quote_plus (search_query )
@@ -647,7 +641,7 @@ def query_arxiv(args):
647641
648642 # arXiv recommends a 3-seconds delay between consecutive
649643 # api calls for efficiency
650- time .sleep (API_DELAY_SECONDS )
644+ time .sleep (3 )
651645 except requests .RequestException as e :
652646 LOGGER .error (f"Request failed: { e } " )
653647 break
0 commit comments