Skip to content

Commit c3250dc

Browse files
Merge pull request #424 from OP-TED/feature/TED-1138
add Cellar request dellay
2 parents d6a12c8 + 69e6c59 commit c3250dc

7 files changed

Lines changed: 49 additions & 23 deletions

File tree

ted_sws/data_manager/adapters/sparql_endpoint.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import pandas as pd
1616
import rdflib
17-
from SPARQLWrapper import SPARQLWrapper, CSV, JSON, RDF
17+
from SPARQLWrapper import SPARQLWrapper, CSV, JSON, RDF, POST
1818

1919
from ted_sws import config
2020

@@ -35,13 +35,15 @@ class SPARQLClientPool(object):
3535
connection_pool = {}
3636

3737
@staticmethod
38-
def create_or_reuse_connection(endpoint_url: str, user: str, password: str):
38+
def create_or_reuse_connection(endpoint_url: str, user: str, password: str, use_post_method: bool = False):
3939
if endpoint_url not in SPARQLClientPool.connection_pool:
4040
sparql_wrapper = SPARQLWrapper(endpoint_url)
4141
sparql_wrapper.setCredentials(
4242
user=user,
4343
passwd=password
4444
)
45+
if use_post_method:
46+
sparql_wrapper.setMethod(method=POST)
4547
SPARQLClientPool.connection_pool[endpoint_url] = sparql_wrapper
4648
return SPARQLClientPool.connection_pool[endpoint_url]
4749

@@ -123,10 +125,11 @@ def add_data_to_repository(self, file_content, repository_name, mime_type):
123125

124126
class SPARQLTripleStoreEndpoint(TripleStoreEndpointABC):
125127

126-
def __init__(self, endpoint_url: str, user: str = None, password: str = None):
128+
def __init__(self, endpoint_url: str, user: str = None, password: str = None, use_post_method: bool = False):
127129
user = user if user else config.AGRAPH_SUPER_USER
128130
password = password if password else config.AGRAPH_SUPER_PASSWORD
129-
self.endpoint = SPARQLClientPool.create_or_reuse_connection(endpoint_url, user, password)
131+
self.endpoint = SPARQLClientPool.create_or_reuse_connection(endpoint_url, user, password,
132+
use_post_method=use_post_method)
130133

131134
def _set_sparql_query(self, sparql_query: str):
132135
"""
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pathlib
22

33
MASTER_DATA_REGISTRY_RESOURCES_PATH = pathlib.Path(__file__).parent.resolve()
4-
5-
TRIPLES_BY_CET_URI_SPARQL_QUERY_TEMPLATE_PATH = MASTER_DATA_REGISTRY_RESOURCES_PATH / "sparql_query_templates/get_by_cet_uri.rq"
6-
PROCEDURE_SUBJECTS_SPARQL_QUERY_TEMPLATE_PATH = MASTER_DATA_REGISTRY_RESOURCES_PATH / "sparql_query_templates/get_procedure_uris.rq"
7-
RDF_FRAGMENT_BY_URI_SPARQL_QUERY_TEMPLATE_PATH = MASTER_DATA_REGISTRY_RESOURCES_PATH / "sparql_query_templates/get_2_dependency_levels_for_a_uri_as_root.rq"
4+
SPARQL_QUERY_TEMPLATES_PATH = MASTER_DATA_REGISTRY_RESOURCES_PATH / "sparql_query_templates"
5+
TRIPLES_BY_CET_URI_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "get_by_cet_uri.rq"
6+
PROCEDURE_SUBJECTS_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "get_procedure_uris.rq"
7+
RDF_FRAGMENT_BY_URI_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "get_2_dependency_levels_for_a_uri_as_root.rq"
Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
#!/usr/bin/python3
1+
import pathlib
22

3-
# __init__.py
4-
# Date: 11/02/2022
5-
# Author: Eugeniu Costetchi
6-
# Email: costezki.eugen@gmail.com
7-
8-
""" """
3+
NOTICE_VALIDATOR_RESOURCES_PATH = pathlib.Path(__file__).parent.resolve()
4+
SPARQL_QUERY_TEMPLATES_PATH = NOTICE_VALIDATOR_RESOURCES_PATH / "sparql_query_templates"
5+
NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "check_notice_availability.rq"
6+
NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH = SPARQL_QUERY_TEMPLATES_PATH / "check_notices_availability.rq"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
ASK {{
2+
VALUES ?instance {{<{notice_uri}>}}
3+
?instance ?predicate [] .
4+
}}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
select distinct ?s
2+
{{
3+
VALUES ?s {{{notice_uries}}}
4+
?s ?p ?o .
5+
}}

ted_sws/notice_validator/services/check_availability_of_notice_in_cellar.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
1+
import time
12
from typing import List, Set
23

34
from pymongo import MongoClient
45
from ted_sws.core.model.notice import Notice, NoticeStatus
56
from ted_sws.core.service.batch_processing import chunks
67
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository
78
from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint
9+
from ted_sws.notice_validator.resources import NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH, \
10+
NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH
811

912
WEBAPI_SPARQL_URL = "https://publications.europa.eu/webapi/rdf/sparql"
10-
CELLAR_NOTICE_AVAILABILITY_QUERY = "ASK {{ VALUES ?instance {{<{notice_uri}>}} ?instance ?predicate [] . }}"
11-
CELLAR_NOTICES_AVAILABILITY_QUERY = "select distinct ?s {{VALUES ?s {{$notice_uries}} ?s ?p ?o . }}"
1213
WEBAPI_SPARQL_RUN_FORMAT = "application/sparql-results+json"
1314
INVALID_NOTICE_URI = 'https://www.w3.org/1999/02/22-rdf-syntax-ns#type-invalid'
14-
DEFAULT_NOTICES_BATCH_SIZE = 1000
15+
DEFAULT_NOTICES_BATCH_SIZE = 5000
16+
DEFAULT_CELLAR_REQUEST_DELAY = 3
1517

1618

1719
def check_availability_of_notice_in_cellar(notice_uri: str, endpoint_url: str = WEBAPI_SPARQL_URL) -> bool:
@@ -21,7 +23,8 @@ def check_availability_of_notice_in_cellar(notice_uri: str, endpoint_url: str =
2123
:param endpoint_url:
2224
:return:
2325
"""
24-
query = CELLAR_NOTICE_AVAILABILITY_QUERY.format(notice_uri=notice_uri)
26+
query_template = NOTICE_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8")
27+
query = query_template.format(notice_uri=notice_uri)
2528
result = SPARQLTripleStoreEndpoint(endpoint_url=endpoint_url).with_query(sparql_query=query).fetch_tree()
2629
return result['boolean']
2730

@@ -33,9 +36,11 @@ def check_availability_of_notices_in_cellar(notice_uries: List[str], endpoint_ur
3336
:param endpoint_url:
3437
:return:
3538
"""
39+
query_template = NOTICES_AVAILABILITY_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8")
3640
notice_uries = " ".join([f"<{notice_uri}>" for notice_uri in notice_uries])
37-
query = CELLAR_NOTICE_AVAILABILITY_QUERY.format(notice_uri=notice_uries)
38-
result = SPARQLTripleStoreEndpoint(endpoint_url=endpoint_url).with_query(sparql_query=query).fetch_tabular()
41+
query = query_template.format(notice_uries=notice_uries)
42+
result = SPARQLTripleStoreEndpoint(endpoint_url=endpoint_url,
43+
use_post_method=True).with_query(sparql_query=query).fetch_tabular()
3944
return set(result['s'].to_list())
4045

4146

@@ -66,11 +71,13 @@ def validate_notice_availability_in_cellar(notice: Notice, notice_uri: str = Non
6671
return notice
6772

6873

69-
def validate_notices_availability_in_cellar(notice_statuses: List[NoticeStatus], mongodb_client: MongoClient):
74+
def validate_notices_availability_in_cellar(notice_statuses: List[NoticeStatus], mongodb_client: MongoClient,
75+
cellar_request_delay_in_seconds: int = DEFAULT_CELLAR_REQUEST_DELAY):
7076
"""
7177
This function validate availability in cellar foreach notice from notices with a notice_status in notice_statuses.
7278
:param notice_statuses:
7379
:param mongodb_client:
80+
:param cellar_request_delay_in_seconds:
7481
:return:
7582
"""
7683
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
@@ -90,3 +97,4 @@ def validate_notices_availability_in_cellar(notice_statuses: List[NoticeStatus],
9097
else:
9198
notice.update_status_to(new_status=NoticeStatus.PUBLICLY_UNAVAILABLE)
9299
notice_repository.update(notice=notice)
100+
time.sleep(cellar_request_delay_in_seconds)

tests/e2e/notice_validator/test_check_availability_of_notice_in_cellar.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from ted_sws.core.model.notice import NoticeStatus
22
from ted_sws.notice_validator.services.check_availability_of_notice_in_cellar import \
3-
check_availability_of_notice_in_cellar, validate_notice_availability_in_cellar
3+
check_availability_of_notice_in_cellar, validate_notice_availability_in_cellar, \
4+
check_availability_of_notices_in_cellar, DEFAULT_NOTICES_BATCH_SIZE
45

56

67
def test_check_availability_of_notice_in_cellar(valid_cellar_uri, invalid_cellar_uri):
@@ -19,3 +20,10 @@ def test_validate_notice_availability_in_cellar(fake_notice_F03, valid_cellar_ur
1920
fake_notice_F03._status = NoticeStatus.PUBLISHED
2021
validate_notice_availability_in_cellar(notice=fake_notice_F03, notice_uri=invalid_cellar_uri)
2122
assert fake_notice_F03.status == NoticeStatus.PUBLICLY_UNAVAILABLE
23+
24+
25+
def test_validate_notices_availability_in_cellar(valid_cellar_uri, invalid_cellar_uri):
26+
notice_uries = [valid_cellar_uri] * DEFAULT_NOTICES_BATCH_SIZE + [invalid_cellar_uri]
27+
available_uries = check_availability_of_notices_in_cellar(notice_uries=notice_uries)
28+
assert valid_cellar_uri in available_uries
29+
assert invalid_cellar_uri not in available_uries

0 commit comments

Comments
 (0)