Skip to content

Commit 03b7c69

Browse files
committed
Convert provenance output from JSON to YAML and store in /data directory
1 parent 7dbf3c0 commit 03b7c69

1 file changed

Lines changed: 3 additions & 6 deletions

File tree

scripts/1-fetch/arxiv_fetch.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
# Standard library
66
import argparse
77
import csv
8-
import json
98
import os
109
import re
1110
import sys
@@ -61,9 +60,7 @@
6160
PATHS["data_1-fetch"], "arxiv_4_count_by_author_bucket.csv"
6261
)
6362
# records metadata for each run for audit, reproducibility, and provenance
64-
FILE_PROVENANCE = shared.path_join(
65-
PATHS["data_1-fetch"], "arxiv_provenance.json"
66-
)
63+
FILE_PROVENANCE = shared.path_join(PATHS["data"], "arxiv_provenance.yaml")
6764

6865
HEADER_COUNT = ["TOOL_IDENTIFIER", "COUNT"]
6966
HEADER_CATEGORY = ["TOOL_IDENTIFIER", "CATEGORY", "COUNT"]
@@ -590,11 +587,11 @@ def query_arxiv(args):
590587
"script": os.path.basename(__file__),
591588
}
592589

593-
# write provenance JSON for auditing
590+
# write provenance YAML for auditing
594591
try:
595592
os.makedirs(os.path.dirname(FILE_PROVENANCE), exist_ok=True)
596593
with open(FILE_PROVENANCE, "w", encoding="utf-8") as fh:
597-
json.dump(provenance_data, fh, indent=2)
594+
yaml.dump(provenance_data, fh, default_flow_style=False, indent=2)
598595
except Exception as e:
599596
LOGGER.warning("Failed to write provenance file: %s", e)
600597

0 commit comments

Comments
 (0)