|
69 | 69 | "http://creativecommons.org/licenses/by-nd/4.0/": "CC BY-ND 4.0", |
70 | 70 | "http://creativecommons.org/licenses/by-sa/3.0/": "CC BY-SA 3.0", |
71 | 71 | "http://creativecommons.org/licenses/by-sa/4.0/": "CC BY-SA 4.0", |
72 | | - "http://creativecommons.org/licenses/publicdomain": "CC CERTIFICATION 1.0 US", |
| 72 | + "http://creativecommons.org/licenses/publicdomain": "CC CERTIFICATION 1.0" |
| 73 | + " US", |
73 | 74 | "http://creativecommons.org/publicdomain/zero/1.0/": "CC0 1.0", |
74 | 75 | "http://creativecommons.org/share-your-work/public-domain/cc0/": "CC0", |
75 | 76 | } |
@@ -598,10 +599,9 @@ def query_arxiv(args): |
598 | 599 | metadata = extract_metadata_from_xml(record_xml) |
599 | 600 |
|
600 | 601 | # Only process CC-licensed papers |
601 | | - if ( |
602 | | - metadata["license"] != "Unknown" |
603 | | - and metadata["license"].startswith("CC") |
604 | | - ): |
| 602 | + if metadata["license"] != "Unknown" and metadata[ |
| 603 | + "license" |
| 604 | + ].startswith("CC"): |
605 | 605 | license_info = metadata["license"] |
606 | 606 | category = metadata["category"] |
607 | 607 | year = metadata["year"] |
@@ -637,7 +637,8 @@ def query_arxiv(args): |
637 | 637 | LOGGER.info("No more records available") |
638 | 638 | break |
639 | 639 |
|
640 | | - # OAI-PMH recommends delays between requests |
| 640 | + # OAI-PMH requires a 3 second delay between requests |
| 641 | + # https://info.arxiv.org/help/api/tou.html#rate-limits |
641 | 642 | time.sleep(3) |
642 | 643 |
|
643 | 644 | except requests.HTTPError as e: |
|
0 commit comments