1- from typing import List
1+ from typing import List , Set
22
33from pymongo import MongoClient
44from ted_sws .core .model .notice import Notice , NoticeStatus
5+ from ted_sws .core .service .batch_processing import chunks
56from ted_sws .data_manager .adapters .notice_repository import NoticeRepository
67from ted_sws .data_manager .adapters .sparql_endpoint import SPARQLTripleStoreEndpoint
78
89WEBAPI_SPARQL_URL = "https://publications.europa.eu/webapi/rdf/sparql"
910CELLAR_NOTICE_AVAILABILITY_QUERY = "ASK {{ VALUES ?instance {{<{notice_uri}>}} ?instance ?predicate [] . }}"
11+ CELLAR_NOTICES_AVAILABILITY_QUERY = "select distinct ?s {{VALUES ?s {{$notice_uries}} ?s ?p ?o . }}"
1012WEBAPI_SPARQL_RUN_FORMAT = "application/sparql-results+json"
1113INVALID_NOTICE_URI = 'https://www.w3.org/1999/02/22-rdf-syntax-ns#type-invalid'
14+ DEFAULT_NOTICES_BATCH_SIZE = 1000
1215
1316
1417def check_availability_of_notice_in_cellar (notice_uri : str , endpoint_url : str = WEBAPI_SPARQL_URL ) -> bool :
@@ -23,6 +26,19 @@ def check_availability_of_notice_in_cellar(notice_uri: str, endpoint_url: str =
2326 return result ['boolean' ]
2427
2528
29+ def check_availability_of_notices_in_cellar (notice_uries : List [str ], endpoint_url : str = WEBAPI_SPARQL_URL ) -> Set [str ]:
30+ """
31+ This service check the notices availability in Cellar, and return available set of notice uries.
32+ :param notice_uries:
33+ :param endpoint_url:
34+ :return:
35+ """
36+ notice_uries = " " .join ([f"<{ notice_uri } >" for notice_uri in notice_uries ])
37+ query = CELLAR_NOTICE_AVAILABILITY_QUERY .format (notice_uri = notice_uries )
38+ result = SPARQLTripleStoreEndpoint (endpoint_url = endpoint_url ).with_query (sparql_query = query ).fetch_tabular ()
39+ return set (result ['s' ].to_list ())
40+
41+
2642def generate_notice_uri_from_notice_id (notice_id : str ) -> str :
2743 """
2844 This service generates Cellar URI for a notice, determined by notice_id
@@ -60,6 +76,17 @@ def validate_notices_availability_in_cellar(notice_statuses: List[NoticeStatus],
6076 notice_repository = NoticeRepository (mongodb_client = mongodb_client )
6177 for notice_status in notice_statuses :
6278 selected_notices = notice_repository .get_notices_by_status (notice_status = notice_status )
63- for selected_notice in selected_notices :
64- validate_notice_availability_in_cellar (notice = selected_notice )
65- notice_repository .update (notice = selected_notice )
79+ for selected_notices_chunk in chunks (selected_notices , chunk_size = DEFAULT_NOTICES_BATCH_SIZE ):
80+ selected_notices_map = {
81+ generate_notice_uri_from_notice_id (notice_id = notice .ted_id ): notice
82+ for notice in selected_notices_chunk
83+ }
84+ selected_notices_uries = list (selected_notices_map .keys ())
85+ available_notice_uries_in_cellar = check_availability_of_notices_in_cellar (
86+ notice_uries = selected_notices_uries )
87+ for notice_uri , notice in selected_notices_map .items ():
88+ if notice_uri in available_notice_uries_in_cellar :
89+ notice .update_status_to (new_status = NoticeStatus .PUBLICLY_AVAILABLE )
90+ else :
91+ notice .update_status_to (new_status = NoticeStatus .PUBLICLY_UNAVAILABLE )
92+ notice_repository .update (notice = notice )
0 commit comments