@@ -191,7 +191,7 @@ def parse_arguments():
191191 "--limit" ,
192192 type = int ,
193193 default = DEFAULT_FETCH_LIMIT ,
194- help = f"Limit number of papers to fetch (default: { DEFAULT_FETCH_LIMIT } )" ,
194+ help = f"Limit papers to fetch (default: { DEFAULT_FETCH_LIMIT } )" ,
195195 )
196196 parser .add_argument (
197197 "--enable-save" ,
@@ -212,7 +212,7 @@ def parse_arguments():
212212def initialize_data_file (file_path , headers ):
213213 """Initialize CSV file with headers if it doesn't exist."""
214214 if not os .path .isfile (file_path ):
215- with open (file_path , "w" , newline = "" ) as file_obj :
215+ with open (file_path , "w" , newline = "" , encoding = "utf-8" ) as file_obj :
216216 writer = csv .DictWriter (
217217 file_obj , fieldnames = headers , dialect = "unix"
218218 )
@@ -295,7 +295,9 @@ def extract_year_from_entry(entry):
295295 try :
296296 return entry .published [:4 ] # Extract year from date string
297297 except (AttributeError , IndexError ) as e :
298- LOGGER .debug (f"Failed to extract year from entry.published '{ entry .published } ': { e } " )
298+ LOGGER .debug (
299+ f"Failed to extract year from '{ entry .published } ': { e } "
300+ )
299301 return "Unknown"
300302
301303
@@ -334,14 +336,14 @@ def save_count_data(
334336 # author_counts: {license: {author_count(int|None): count}}
335337
336338 # Save license counts
337- with open (FILE_ARXIV_COUNT , "w" , newline = "" ) as fh :
339+ with open (FILE_ARXIV_COUNT , "w" , newline = "" , encoding = "utf-8" ) as fh :
338340 writer = csv .DictWriter (fh , fieldnames = HEADER_COUNT , dialect = "unix" )
339341 writer .writeheader ()
340342 for lic , c in license_counts .items ():
341343 writer .writerow ({"TOOL_IDENTIFIER" : lic , "COUNT" : c })
342344
343345 # Save detailed category counts (code)
344- with open (FILE_ARXIV_CATEGORY , "w" , newline = "" ) as fh :
346+ with open (FILE_ARXIV_CATEGORY , "w" , newline = "" , encoding = "utf-8" ) as fh :
345347 writer = csv .DictWriter (fh , fieldnames = HEADER_CATEGORY , dialect = "unix" )
346348 writer .writeheader ()
347349 for lic , cats in category_counts .items ():
@@ -351,7 +353,9 @@ def save_count_data(
351353 )
352354
353355 # Save category report with labels and percent
354- with open (FILE_ARXIV_CATEGORY_REPORT , "w" , newline = "" ) as fh :
356+ with open (
357+ FILE_ARXIV_CATEGORY_REPORT , "w" , newline = "" , encoding = "utf-8"
358+ ) as fh :
355359 writer = csv .DictWriter (
356360 fh , fieldnames = HEADER_CATEGORY_REPORT , dialect = "unix"
357361 )
@@ -380,7 +384,9 @@ def save_count_data(
380384
381385 # Save aggregated category report (top N per license, rest -> Other)
382386 TOP_N = 10
383- with open (FILE_ARXIV_CATEGORY_REPORT_AGGREGATE , "w" , newline = "" ) as fh :
387+ with open (
388+ FILE_ARXIV_CATEGORY_REPORT_AGGREGATE , "w" , newline = "" , encoding = "utf-8"
389+ ) as fh :
384390 writer = csv .DictWriter (
385391 fh ,
386392 fieldnames = [
@@ -430,7 +436,7 @@ def save_count_data(
430436 )
431437
432438 # Save year counts
433- with open (FILE_ARXIV_YEAR , "w" , newline = "" ) as fh :
439+ with open (FILE_ARXIV_YEAR , "w" , newline = "" , encoding = "utf-8" ) as fh :
434440 writer = csv .DictWriter (fh , fieldnames = HEADER_YEAR , dialect = "unix" )
435441 writer .writeheader ()
436442 for lic , years in year_counts .items ():
@@ -440,7 +446,7 @@ def save_count_data(
440446 )
441447
442448 # Save detailed author counts (AUTHOR_COUNT as integer or Unknown)
443- with open (FILE_ARXIV_AUTHOR , "w" , newline = "" ) as fh :
449+ with open (FILE_ARXIV_AUTHOR , "w" , newline = "" , encoding = "utf-8" ) as fh :
444450 writer = csv .DictWriter (fh , fieldnames = HEADER_AUTHOR , dialect = "unix" )
445451 writer .writeheader ()
446452 for lic , acs in author_counts .items ():
@@ -454,7 +460,9 @@ def save_count_data(
454460 )
455461
456462 # Save author buckets summary
457- with open (FILE_ARXIV_AUTHOR_BUCKET , "w" , newline = "" ) as fh :
463+ with open (
464+ FILE_ARXIV_AUTHOR_BUCKET , "w" , newline = "" , encoding = "utf-8"
465+ ) as fh :
458466 writer = csv .DictWriter (
459467 fh , fieldnames = HEADER_AUTHOR_BUCKET , dialect = "unix"
460468 )
@@ -503,7 +511,9 @@ def query_arxiv(args):
503511 consecutive_empty_calls = 0
504512
505513 for start in range (
506- 0 , min (args .limit - total_fetched , MAX_RESULTS_PER_QUERY ), results_per_iteration
514+ 0 ,
515+ min (args .limit - total_fetched , MAX_RESULTS_PER_QUERY ),
516+ results_per_iteration ,
507517 ):
508518 encoded_query = urllib .parse .quote_plus (search_query )
509519 query = (
0 commit comments