1+ import time
12from typing import List , Set
23
34from pymongo import MongoClient
45from ted_sws .core .model .notice import Notice , NoticeStatus
56from ted_sws .core .service .batch_processing import chunks
67from ted_sws .data_manager .adapters .notice_repository import NoticeRepository
78from ted_sws .data_manager .adapters .sparql_endpoint import SPARQLTripleStoreEndpoint
9+ from ted_sws .notice_validator .resources import NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH , \
10+ NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH
811
912WEBAPI_SPARQL_URL = "https://publications.europa.eu/webapi/rdf/sparql"
10- CELLAR_NOTICE_AVAILABILITY_QUERY = "ASK {{ VALUES ?instance {{<{notice_uri}>}} ?instance ?predicate [] . }}"
11- CELLAR_NOTICES_AVAILABILITY_QUERY = "select distinct ?s {{VALUES ?s {{$notice_uries}} ?s ?p ?o . }}"
1213WEBAPI_SPARQL_RUN_FORMAT = "application/sparql-results+json"
1314INVALID_NOTICE_URI = 'https://www.w3.org/1999/02/22-rdf-syntax-ns#type-invalid'
14- DEFAULT_NOTICES_BATCH_SIZE = 1000
15+ DEFAULT_NOTICES_BATCH_SIZE = 5000
16+ DEFAULT_CELLAR_REQUEST_DELAY = 3
1517
1618
1719def check_availability_of_notice_in_cellar (notice_uri : str , endpoint_url : str = WEBAPI_SPARQL_URL ) -> bool :
@@ -21,7 +23,8 @@ def check_availability_of_notice_in_cellar(notice_uri: str, endpoint_url: str =
2123 :param endpoint_url:
2224 :return:
2325 """
24- query = CELLAR_NOTICE_AVAILABILITY_QUERY .format (notice_uri = notice_uri )
26+ query_template = NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH .read_text (encoding = "utf-8" )
27+ query = query_template .format (notice_uri = notice_uri )
2528 result = SPARQLTripleStoreEndpoint (endpoint_url = endpoint_url ).with_query (sparql_query = query ).fetch_tree ()
2629 return result ['boolean' ]
2730
@@ -33,9 +36,11 @@ def check_availability_of_notices_in_cellar(notice_uries: List[str], endpoint_ur
3336 :param endpoint_url:
3437 :return:
3538 """
39+ query_template = NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH .read_text (encoding = "utf-8" )
3640 notice_uries = " " .join ([f"<{ notice_uri } >" for notice_uri in notice_uries ])
37- query = CELLAR_NOTICE_AVAILABILITY_QUERY .format (notice_uri = notice_uries )
38- result = SPARQLTripleStoreEndpoint (endpoint_url = endpoint_url ).with_query (sparql_query = query ).fetch_tabular ()
41+ query = query_template .format (notice_uries = notice_uries )
42+ result = SPARQLTripleStoreEndpoint (endpoint_url = endpoint_url ,
43+ use_post_method = True ).with_query (sparql_query = query ).fetch_tabular ()
3944 return set (result ['s' ].to_list ())
4045
4146
@@ -66,11 +71,13 @@ def validate_notice_availability_in_cellar(notice: Notice, notice_uri: str = Non
6671 return notice
6772
6873
69- def validate_notices_availability_in_cellar (notice_statuses : List [NoticeStatus ], mongodb_client : MongoClient ):
74+ def validate_notices_availability_in_cellar (notice_statuses : List [NoticeStatus ], mongodb_client : MongoClient ,
75+ cellar_request_delay_in_seconds : int = DEFAULT_CELLAR_REQUEST_DELAY ):
7076 """
7177 This function validate availability in cellar foreach notice from notices with a notice_status in notice_statuses.
7278 :param notice_statuses:
7379 :param mongodb_client:
80+ :param cellar_request_delay_in_seconds:
7481 :return:
7582 """
7683 notice_repository = NoticeRepository (mongodb_client = mongodb_client )
@@ -90,3 +97,4 @@ def validate_notices_availability_in_cellar(notice_statuses: List[NoticeStatus],
9097 else :
9198 notice .update_status_to (new_status = NoticeStatus .PUBLICLY_UNAVAILABLE )
9299 notice_repository .update (notice = notice )
100+ time .sleep (cellar_request_delay_in_seconds )
0 commit comments