Skip to content

Commit b8fa72d

Browse files
committed
clarify rate limit and reformat via Black
1 parent 04a74a0 commit b8fa72d

1 file changed

Lines changed: 7 additions & 6 deletions

File tree

scripts/1-fetch/arxiv_fetch.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@
6969
"http://creativecommons.org/licenses/by-nd/4.0/": "CC BY-ND 4.0",
7070
"http://creativecommons.org/licenses/by-sa/3.0/": "CC BY-SA 3.0",
7171
"http://creativecommons.org/licenses/by-sa/4.0/": "CC BY-SA 4.0",
72-
"http://creativecommons.org/licenses/publicdomain": "CC CERTIFICATION 1.0 US",
72+
"http://creativecommons.org/licenses/publicdomain": "CC CERTIFICATION 1.0"
73+
" US",
7374
"http://creativecommons.org/publicdomain/zero/1.0/": "CC0 1.0",
7475
"http://creativecommons.org/share-your-work/public-domain/cc0/": "CC0",
7576
}
@@ -598,10 +599,9 @@ def query_arxiv(args):
598599
metadata = extract_metadata_from_xml(record_xml)
599600

600601
# Only process CC-licensed papers
601-
if (
602-
metadata["license"] != "Unknown"
603-
and metadata["license"].startswith("CC")
604-
):
602+
if metadata["license"] != "Unknown" and metadata[
603+
"license"
604+
].startswith("CC"):
605605
license_info = metadata["license"]
606606
category = metadata["category"]
607607
year = metadata["year"]
@@ -637,7 +637,8 @@ def query_arxiv(args):
637637
LOGGER.info("No more records available")
638638
break
639639

640-
# OAI-PMH recommends delays between requests
640+
# OAI-PMH requires a 3 second delay between requests
641+
# https://info.arxiv.org/help/api/tou.html#rate-limits
641642
time.sleep(3)
642643

643644
except requests.HTTPError as e:

0 commit comments

Comments
 (0)