Skip to content

Commit 5317b77

Browse files
committed
chore: Fix encoding and newlines in arxiv_fetch.py per issue #217
1 parent 32a8c60 commit 5317b77

1 file changed

Lines changed: 6 additions & 6 deletions

File tree

scripts/1-fetch/arxiv_fetch.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ def parse_arguments():
312312
def initialize_data_file(file_path, headers):
313313
"""Initialize CSV file with headers if it doesn't exist."""
314314
if not os.path.isfile(file_path):
315-
with open(file_path, "w", newline="", encoding="utf-8") as file_obj:
315+
with open(file_path, "w", encoding="utf-8", newline="\n") as file_obj:
316316
writer = csv.DictWriter(
317317
file_obj, fieldnames=headers, dialect="unix"
318318
)
@@ -455,15 +455,15 @@ def save_count_data(
455455
# author_counts: {license: {author_count(int|None): count}}
456456

457457
# Save license counts
458-
with open(FILE_ARXIV_COUNT, "w", newline="", encoding="utf-8") as fh:
458+
with open(FILE_ARXIV_COUNT, "w", encoding="utf-8", newline="\n") as fh:
459459
writer = csv.DictWriter(fh, fieldnames=HEADER_COUNT, dialect="unix")
460460
writer.writeheader()
461461
for lic, c in license_counts.items():
462462
writer.writerow({"TOOL_IDENTIFIER": lic, "COUNT": c})
463463

464464
# Save category report with labels
465465
with open(
466-
FILE_ARXIV_CATEGORY_REPORT, "w", newline="", encoding="utf-8"
466+
FILE_ARXIV_CATEGORY_REPORT, "w", encoding="utf-8", newline="\n"
467467
) as fh:
468468
writer = csv.DictWriter(
469469
fh, fieldnames=HEADER_CATEGORY_REPORT, dialect="unix"
@@ -482,7 +482,7 @@ def save_count_data(
482482
)
483483

484484
# Save year counts
485-
with open(FILE_ARXIV_YEAR, "w", newline="", encoding="utf-8") as fh:
485+
with open(FILE_ARXIV_YEAR, "w", encoding="utf-8", newline="\n") as fh:
486486
writer = csv.DictWriter(fh, fieldnames=HEADER_YEAR, dialect="unix")
487487
writer.writeheader()
488488
for lic, years in year_counts.items():
@@ -493,7 +493,7 @@ def save_count_data(
493493

494494
# Save author buckets summary
495495
with open(
496-
FILE_ARXIV_AUTHOR_BUCKET, "w", newline="", encoding="utf-8"
496+
FILE_ARXIV_AUTHOR_BUCKET, "w", encoding="utf-8", newline="\n"
497497
) as fh:
498498
writer = csv.DictWriter(
499499
fh, fieldnames=HEADER_AUTHOR_BUCKET, dialect="unix"
@@ -624,7 +624,7 @@ def query_arxiv(args):
624624

625625
# write provenance YAML for auditing
626626
try:
627-
with open(FILE_PROVENANCE, "w", encoding="utf-8") as fh:
627+
with open(FILE_PROVENANCE, "w", encoding="utf-8", newline="\n") as fh:
628628
yaml.dump(provenance_data, fh, default_flow_style=False, indent=2)
629629
except Exception as e:
630630
LOGGER.warning("Failed to write provenance file: %s", e)

0 commit comments

Comments
 (0)