Skip to content

Commit 04a74a0

Browse files
committed
Fix license filtering to use startswith instead of substring matching
1 parent cd24b78 commit 04a74a0

1 file changed

Lines changed: 2 additions & 2 deletions

File tree

scripts/1-fetch/arxiv_fetch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ def query_arxiv(args):
600600
# Only process CC-licensed papers
601601
if (
602602
metadata["license"] != "Unknown"
603-
and "CC" in metadata["license"]
603+
and metadata["license"].startswith("CC")
604604
):
605605
license_info = metadata["license"]
606606
category = metadata["category"]
@@ -684,7 +684,7 @@ def query_arxiv(args):
684684
f"Provenance file write failed: {e}", 1
685685
)
686686

687-
LOGGER.info(f"Total CC licensed papers fetched: {total_fetched}")
687+
LOGGER.info(f"Total papers with CC licenses fetched: {total_fetched}")
688688
LOGGER.info(f"License distribution: {dict(license_counts)}")
689689

690690

0 commit comments

Comments
 (0)