Skip to content

Commit 391ddc5

Browse files
committed
Merge branch 'arxiv-fetch' into arxiv-2026Q1-fetch
2 parents 390a815 + 0b96b09 commit 391ddc5

File tree

9 files changed

+35
-0
lines changed

9 files changed

+35
-0
lines changed

scripts/1-fetch/arxiv_fetch.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
Fetch arXiv articles that use a CC legal tool using the OAI-PMH API.
44
OAI-PMH: Open Archives Initiative Protocol for Metadata Havesting.
55
"""
6+
67
# Standard library
78
import argparse
89
import csv
@@ -61,6 +62,26 @@
6162
HEADER_COUNT = ["TOOL_IDENTIFIER", "COUNT"]
6263
HEADER_YEAR = ["TOOL_IDENTIFIER", "YEAR", "COUNT"]
6364
QUARTER = os.path.basename(PATHS["data_quarter"])
65+
SUBSUMED_CATEGORIES = {
66+
# https://arxiv.org/archive/alg-geom
67+
# "The alg-geom archive has been subsumed into Algebraic Geometry
68+
# (math.AG)."
69+
"alg-geom": "math.AG",
70+
# https://arxiv.org/archive/chao-dyn
71+
# "The chao-dyn archive has been subsumed into Chaotic Dynamics (nlin.CD)."
72+
"chao-dyn": "nlin.CD",
73+
# https://arxiv.org/archive/dg-ga
74+
# "The dg-ga archive has been subsumed into Differential Geometry
75+
# (math.DG)."
76+
"dg-ga": "math.DG",
77+
# https://arxiv.org/archive/solv-int
78+
# "The solv-int archive has been subsumed into Exactly Solvable and
79+
# Integrable Systems (nlin.SI)."
80+
"solv-int": "nlin.SI",
81+
# https://arxiv.org/archive/q-alg
82+
# "The q-alg archive has been subsumed into Quantum Algebra (math.QA)."
83+
"q-alg": "math.QA",
84+
}
6485

6586

6687
# parsing arguments function
@@ -247,6 +268,10 @@ def extract_record_metadata(args, record):
247268
categories_elem = record.find(".//{http://arxiv.org/OAI/arXiv/}categories")
248269
if categories_elem is not None and categories_elem.text:
249270
metadata["categories"] = categories_elem.text.strip().split()
271+
for index, code in enumerate(metadata["categories"]):
272+
metadata["categories"][index] = SUBSUMED_CATEGORIES.get(code, code)
273+
metadata["categories"] = list(set(metadata["categories"]))
274+
metadata["categories"].sort()
250275
else:
251276
metadata["categories"] = False
252277

@@ -409,6 +434,8 @@ def query_arxiv(args, session):
409434
cc_articles_found += 1
410435

411436
if args.show_added and cc_articles_added:
437+
cc_articles_added = list(set(cc_articles_added))
438+
cc_articles_added.sort()
412439
LOGGER.info(f" CC articles added: {', '.join(cc_articles_added)}")
413440

414441
LOGGER.info(

scripts/1-fetch/gcs_fetch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"""
33
Fetch CC Legal Tool usage data from Google Custom Search (GCS) API.
44
"""
5+
56
# Standard library
67
import argparse
78
import csv

scripts/2-process/gcs_process.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"""
33
Process Google Custom Search (GCS) data.
44
"""
5+
56
# Standard library
67
import argparse
78
import os

scripts/2-process/github_process.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
This file is dedicated to processing GitHub data
44
for analysis and comparison between quarters.
55
"""
6+
67
# Standard library
78
import argparse
89
import os

scripts/2-process/wikipedia_process.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
This file is dedicated to processing Wikipedia data
44
for analysis and comparison between quarters.
55
"""
6+
67
# Standard library
78
import argparse
89
import os

scripts/3-report/gcs_report.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
This file is dedicated to visualizing and analyzing the data collected
44
from Google Custom Search (GCS).
55
"""
6+
67
# Standard library
78
import argparse
89
import os

scripts/3-report/github_report.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
This file is dedicated to visualizing and analyzing the data collected
44
from GitHub.
55
"""
6+
67
# Standard library
78
import argparse
89
import os

scripts/3-report/wikipedia_report.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
This file is dedicated to visualizing and analyzing the data collected
44
from Wikipedia.
55
"""
6+
67
# Standard library
78
import argparse
89
import os

scripts/3-report/zzz-notes.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"""
33
Add project references.
44
"""
5+
56
# Standard library
67
import argparse
78
import os

0 commit comments

Comments
 (0)