@@ -259,8 +259,8 @@ def parse_arguments():
259259 default = DEFAULT_YEARS_BACK ,
260260 help = (
261261 "Number of years back from current year to fetch (default:"
262- f" { DEFAULT_YEARS_BACK } ). Use a value of -1 to specify"
263- " <earliestDatestamp> ."
262+ f" { DEFAULT_YEARS_BACK } ). Use a value of -1 to specify 2008-02-05 "
263+ " (first date a CC licensed article was added) ."
264264 ),
265265 )
266266
@@ -270,9 +270,9 @@ def parse_arguments():
270270 # Restrict args.years_back to earliest datetime and initialize
271271 # args.from_date
272272 #
273- # Earliest is hard coded here. Occasionally, it should be verified against
274- # <earliestDatestamp> in: https://oaipmh.arxiv.org/oai?verb=Identify
275- earliest_date = datetime (2005 , 9 , 16 , tzinfo = timezone .utc )
273+ # Survey of records indicated the first CC licenced article was added on
274+ # 2008-02-05
275+ earliest_date = datetime (2008 , 2 , 5 , tzinfo = timezone .utc )
276276 this_year = datetime .now (timezone .utc ).year
277277 if args .years_back == - 1 :
278278 arg_date = earliest_date
@@ -371,12 +371,12 @@ def extract_record_metadata(record):
371371 if not license_info .startswith ("CC" ):
372372 return {}
373373
374- # Extract added on
375- added_on_elem = record .find (
376- ".//{http://www.openarchives.org/OAI/2.0/}datestamp"
377- )
378- if added_on_elem is not None and added_on_elem .text :
379- added_on = added_on_elem .text .strip ()
374+ # # Extract added on
375+ # added_on_elem = record.find(
376+ # ".//{http://www.openarchives.org/OAI/2.0/}datestamp"
377+ # )
378+ # if added_on_elem is not None and added_on_elem.text:
379+ # added_on = added_on_elem.text.strip()
380380
381381 # Extract author count
382382 authors = record .findall (".//{http://arxiv.org/OAI/arXiv/}author" )
@@ -414,7 +414,7 @@ def extract_record_metadata(record):
414414 year = "Unknown"
415415
416416 metadata = {
417- "added_on" : added_on ,
417+ # "added_on": added_on,
418418 "author_count" : author_count ,
419419 "category" : category ,
420420 "license" : license_info ,
@@ -450,7 +450,7 @@ def query_arxiv(args, session):
450450 batch = 1
451451 total_fetched = 0
452452 cc_articles_found = 0
453- min_added_on = False
453+ # min_added_on = False
454454 resumption_token = None
455455
456456 # Proceed is set to False when limit reached or end of records (missing
@@ -513,9 +513,10 @@ def query_arxiv(args, session):
513513 metadata = extract_record_metadata (record )
514514 if not metadata : # Only true for CC licensed articles
515515 continue
516- added_on = metadata ["added_on" ]
517- if not min_added_on or added_on < min_added_on :
518- min_added_on = added_on
516+
517+ # added_on = metadata["added_on"]
518+ # if not min_added_on or added_on < min_added_on:
519+ # min_added_on = added_on
519520
520521 license_info = metadata ["license" ]
521522
@@ -537,8 +538,9 @@ def query_arxiv(args, session):
537538 batch_cc_count += 1
538539 cc_articles_found += 1
539540
540- if min_added_on :
541- LOGGER .info (f" Earliest CC article addition: { min_added_on } " )
541+ # if min_added_on:
542+ # LOGGER.info(f" Earliest CC article addition: {min_added_on}")
543+
542544 LOGGER .info (
543545 f" Batch CC licensed articles: { batch_cc_count } , Total"
544546 f" CC-licensed articles: { cc_articles_found } "
0 commit comments