Skip to content

Commit 0780a49

Browse files
Merge pull request #208 from OP-TED/feature/TED-586
Feature/ted 586
2 parents 587c515 + 72f63a8 commit 0780a49

9 files changed

Lines changed: 241 additions & 22 deletions

File tree

ted_sws/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,21 @@ class TedAPIConfig:
155155
def TED_API_URL(self) -> str:
156156
return EnvConfigResolver().config_resolve()
157157

158+
class FusekiConfig:
159+
@property
160+
def FUSEKI_ADMIN_USER(self) -> str:
161+
return EnvConfigResolver().config_resolve()
162+
163+
@property
164+
def FUSEKI_ADMIN_PASSWORD(self) -> str:
165+
return EnvConfigResolver().config_resolve()
166+
167+
@property
168+
def FUSEKI_ADMIN_HOST(self) -> str:
169+
return EnvConfigResolver().config_resolve()
158170

159171
class TedConfigResolver(MongoDBConfig, RMLMapperConfig, XMLProcessorConfig, ELKConfig, LoggingConfig,
160-
GitHubArtefacts, API, AllegroConfig, TedAPIConfig):
172+
GitHubArtefacts, API, AllegroConfig, TedAPIConfig, FusekiConfig):
161173
"""
162174
This class resolve the secrets of the ted-sws project.
163175
"""

ted_sws/data_manager/adapters/sparql_endpoint.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ def fetch_rdf(self) -> rdflib.Graph:
104104
:return:
105105
"""
106106

107+
def add_data_to_repository(self, file_content, repository_name, mime_type):
108+
pass
109+
107110

108111
class SPARQLTripleStoreEndpoint(TripleStoreEndpointABC):
109112

ted_sws/data_manager/adapters/triple_store.py

Lines changed: 155 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,50 @@
11
import abc
2+
from json import loads
3+
from pathlib import Path
4+
from typing import List, Union
5+
from urllib.parse import urljoin
26

7+
import rdflib.util
8+
import requests
39
from franz.openrdf.repository import Repository
410
from franz.openrdf.sail import AllegroGraphServer
11+
from requests.auth import HTTPBasicAuth
512

13+
from ted_sws import config
614
from ted_sws.data_manager.adapters.sparql_endpoint import TripleStoreEndpointABC, SPARQLTripleStoreEndpoint
715

816

917
class TripleStoreABC:
1018
@abc.abstractmethod
11-
def add_data_to_repository(self, file_content: str, repository_name: str):
19+
def create_repository(self, repository_name: str):
20+
"""
21+
Method to create a repository
22+
:param repository_name:
23+
:return:
24+
"""
25+
26+
@abc.abstractmethod
27+
def delete_repository(self, repository_name: str):
28+
"""
29+
Method to delete a repository
30+
:param repository_name:
31+
:return:
32+
"""
33+
34+
@abc.abstractmethod
35+
def list_repositories(self) -> List[str]:
36+
"""
37+
Method to list all repositories
38+
:return:
39+
"""
40+
41+
@abc.abstractmethod
42+
def add_data_to_repository(self, file_content: Union[str, bytes, bytearray], mime_type: str, repository_name: str):
1243
"""
1344
Method to add triples from a string
1445
:param file_content:
1546
:param repository_name:
47+
:param mime_type:
1648
:return:
1749
"""
1850

@@ -31,7 +63,7 @@ def get_sparql_triple_store_endpoint(self, repository_name: str = None) -> Tripl
3163
"""
3264

3365

34-
class AllegroGraphTripleStore(abc.ABC):
66+
class AllegroGraphTripleStore(TripleStoreABC):
3567
"""
3668
This class is handling interactions with Allegro Graph triple store
3769
Note: If catalog name is not set, every operation will be executed at root level in the triple store
@@ -84,13 +116,15 @@ def list_repositories(self):
84116
"""
85117
return self.allegro.openCatalog(name=self.catalog_name).listRepositories()
86118

87-
def add_data_to_repository(self, file_content: str, repository_name: str = None):
119+
def add_data_to_repository(self, file_content: Union[str, bytes, bytearray], mime_type: str, repository_name: str = None):
88120
"""
89121
Method to add triples from a string
90122
:param file_content:
91123
:param repository_name:
124+
:param mime_type:
92125
:return:
93126
"""
127+
94128
repository = self._get_repository(repository_name=repository_name)
95129
repository.getConnection().addData(data=file_content)
96130

@@ -111,3 +145,121 @@ def get_sparql_triple_store_endpoint(self, repository_name: str = None) -> Tripl
111145
endpoint_url = f"{self.host}/repositories/{repository_name}"
112146
sparql_endpoint = SPARQLTripleStoreEndpoint(endpoint_url=endpoint_url)
113147
return sparql_endpoint
148+
149+
150+
class FusekiException(Exception):
151+
"""
152+
An exception when Fuseki server interaction has failed.
153+
"""
154+
155+
156+
RDF_MIME_TYPES = {
157+
"turtle": "text/turtle",
158+
"xml": "application/rdf+xml",
159+
"json-ld": "application/ld+json",
160+
"n3": "text/n3",
161+
"nt": "application/n-triples",
162+
"nquads": "application/n-quads",
163+
"trig": "application/trig",
164+
}
165+
166+
167+
class FusekiAdapter(TripleStoreABC):
168+
169+
def __init__(self, host: str = config.FUSEKI_ADMIN_HOST,
170+
user: str = config.FUSEKI_ADMIN_USER,
171+
password: str = config.FUSEKI_ADMIN_PASSWORD):
172+
173+
self.host = host
174+
self.user = user
175+
self.password = password
176+
177+
def create_repository(self, repository_name: str):
178+
"""
179+
Create the dataset for the Fuseki store
180+
:param repository_name: The repository identifier. This should be short alphanumeric string uniquely
181+
identifying the repository
182+
:return: true if repository was created
183+
"""
184+
if not repository_name:
185+
raise ValueError('Repository name cannot be empty.')
186+
187+
data = {
188+
'dbType': 'tdb', # assuming that all repository are created persistent across restart
189+
'dbName': repository_name
190+
}
191+
192+
response = requests.post(urljoin(self.host, f"/$/datasets"),
193+
auth=HTTPBasicAuth(self.user,
194+
self.password),
195+
data=data)
196+
197+
if response.status_code == 409:
198+
raise FusekiException('A repository with this name already exists.')
199+
200+
def add_data_to_repository(self, file_content: Union[str, bytes, bytearray], mime_type: str, repository_name: str):
201+
url = urljoin(self.host, f"{repository_name}/data")
202+
headers = {
203+
"Content-Type": mime_type,
204+
}
205+
response = requests.post(url,
206+
auth=HTTPBasicAuth(self.user, self.password),
207+
data=file_content, headers=headers)
208+
if response.status_code != 200:
209+
raise FusekiException(f'Server refused to load the content with code {response.status_code}.')
210+
211+
def add_file_to_repository(self, file_path: Path, repository_name: str):
212+
file_content = file_path.open('rb').read()
213+
file_format = rdflib.util.guess_format(str(file_path))
214+
mime_type = RDF_MIME_TYPES[file_format] if file_format in RDF_MIME_TYPES else "text/n3"
215+
self.add_data_to_repository(file_content=file_content, mime_type=mime_type, repository_name=repository_name)
216+
217+
def delete_repository(self, repository_name: str):
218+
"""
219+
Delete the repository from the Fuseki store
220+
:param repository_name: The repository identifier. This should be short alphanumeric string uniquely
221+
identifying the repository
222+
:return: true if repository was deleted
223+
"""
224+
response = requests.delete(urljoin(self.host, f"/$/datasets/{repository_name}"),
225+
auth=HTTPBasicAuth(self.user,
226+
self.password))
227+
228+
if response.status_code == 404:
229+
raise FusekiException('The repository doesn\'t exist.')
230+
231+
def list_repositories(self) -> list:
232+
"""
233+
Get the list of the repository names from the Fuseki store.
234+
:return: the list of the repository names
235+
:rtype: list
236+
"""
237+
response = requests.get(urljoin(self.host, "/$/datasets"),
238+
auth=HTTPBasicAuth(self.user,
239+
self.password))
240+
241+
if response.status_code != 200:
242+
raise FusekiException(f"Fuseki server request ({response.url}) returned response {response.status_code}")
243+
244+
return self._select_repository_names_from_fuseki_response(response_text=response.text)
245+
246+
@staticmethod
247+
def _select_repository_names_from_fuseki_response(response_text) -> list:
248+
"""
249+
Helper method for digging for the list of repository.
250+
:param response_text:
251+
:return: list of the repository names
252+
"""
253+
result = loads(response_text)
254+
return [d_item['ds.name'][1:] for d_item in result['datasets']]
255+
256+
def get_sparql_triple_store_endpoint(self, repository_name: str = None) -> TripleStoreEndpointABC:
257+
"""
258+
Helper method to create the url for querying.
259+
:param repository_name: The dataset identifier. This should be short alphanumeric string uniquely
260+
identifying the repository
261+
:return: the query url
262+
"""
263+
endpoint_url = urljoin(self.host, repository_name + "/sparql")
264+
sparql_endpoint = SPARQLTripleStoreEndpoint(endpoint_url=endpoint_url)
265+
return sparql_endpoint

ted_sws/notice_publisher_triple_store/services/load_transformed_notice_into_triple_store.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
This module implements functionality to load a given notice into a triple store.
33
"""
44
from ted_sws.data_manager.adapters.repository_abc import NoticeRepositoryABC
5-
from ted_sws.data_manager.adapters.triple_store import TripleStoreEndpointABC
5+
from ted_sws.data_manager.adapters.triple_store import TripleStoreEndpointABC, RDF_MIME_TYPES
66

77
DEFAULT_NOTICE_REPOSITORY_NAME = "notices"
88

@@ -16,6 +16,6 @@ def load_notice_into_triple_store(notice_id: str, notice_repository: NoticeRepos
1616
notice = notice_repository.get(reference=notice_id)
1717
if notice is None:
1818
raise ValueError('Notice, with "%s" notice_id, was not found' % notice_id)
19-
19+
mime_type = RDF_MIME_TYPES
2020
rdf_manifestation_string = notice.rdf_manifestation.object_data
21-
triple_store_repository.add_data_to_repository(file_content=rdf_manifestation_string, repository_name=repository_name)
21+
triple_store_repository.add_data_to_repository(file_content=rdf_manifestation_string, repository_name=repository_name, mime_type=mime_type)

tests/e2e/conftest.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from pymongo import MongoClient
33

44
from ted_sws import config
5-
from ted_sws.data_manager.adapters.triple_store import AllegroGraphTripleStore
5+
from ted_sws.data_manager.adapters.triple_store import AllegroGraphTripleStore, FusekiAdapter
66
from tests import TEST_DATA_PATH
77

88

@@ -34,3 +34,8 @@ def path_ttl_file():
3434
@pytest.fixture
3535
def fake_mapping_suite_id() -> str:
3636
return "test_package"
37+
38+
39+
@pytest.fixture
40+
def fuseki_triple_store():
41+
return FusekiAdapter(host=config.FUSEKI_ADMIN_HOST, user=config.FUSEKI_ADMIN_USER, password=config.FUSEKI_ADMIN_PASSWORD)
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from pathlib import Path
2+
3+
import pytest
4+
from ted_sws import config
5+
from ted_sws.data_manager.adapters.triple_store import FusekiAdapter, FusekiException
6+
from tests import TEST_DATA_PATH
7+
from tests.fakes.fake_repository import FakeNoticeRepository
8+
9+
REPOSITORY_NAME = "unknown_repository_123456677"
10+
SPARQL_QUERY_TRIPLES = "select * {?s ?p ?o} limit 10"
11+
12+
13+
def test_fuseki_triple_store_connection():
14+
triple_store = FusekiAdapter()
15+
if REPOSITORY_NAME in triple_store.list_repositories():
16+
triple_store.delete_repository(repository_name=REPOSITORY_NAME)
17+
18+
triple_store.create_repository(repository_name=REPOSITORY_NAME)
19+
20+
assert REPOSITORY_NAME in triple_store.list_repositories()
21+
22+
with pytest.raises(FusekiException):
23+
triple_store.create_repository(repository_name=REPOSITORY_NAME)
24+
25+
26+
def test_fuseki_triple_store_add_file_to_repository():
27+
triple_store = FusekiAdapter()
28+
rdf_file_path = TEST_DATA_PATH / "example.ttl"
29+
assert rdf_file_path.exists()
30+
triple_store.add_file_to_repository(rdf_file_path,
31+
repository_name="unknown_repository_123456677")
32+
33+
34+
def test_fuseki_triple_store_get_sparql_endpoint(fuseki_triple_store):
35+
sparql_endpoint = fuseki_triple_store.get_sparql_triple_store_endpoint(repository_name="unknown_repository_123456677")
36+
assert sparql_endpoint is not None
37+
df_query_result = sparql_endpoint.with_query(sparql_query=SPARQL_QUERY_TRIPLES).fetch_tabular()
38+
assert df_query_result is not None
39+
assert len(df_query_result) > 0
40+
41+
triple_store = FusekiAdapter()
42+
triple_store.delete_repository(repository_name=REPOSITORY_NAME)
43+
44+
with pytest.raises(FusekiException):
45+
triple_store.delete_repository(repository_name=REPOSITORY_NAME)
46+
47+
assert REPOSITORY_NAME not in triple_store.list_repositories()
48+

tests/e2e/mapping_suite_processor/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
import pymongo
33
import pytest
44

5+
from ted_sws.data_manager.adapters.triple_store import RDF_MIME_TYPES
56
from tests import TEST_DATA_PATH
67

8+
79
@pytest.fixture
810
@mongomock.patch(servers=(('server.example.com', 27017),))
911
def fake_mongodb_client():
@@ -14,6 +16,7 @@ def fake_mongodb_client():
1416
def file_system_repository_path():
1517
return TEST_DATA_PATH / "notice_transformer" / "mapping_suite_processor_repository"
1618

19+
1720
@pytest.fixture
1821
def yarrrml_file_content():
1922
return """prefixes:
@@ -29,6 +32,7 @@ def yarrrml_file_content():
2932
- [ex:name, $(firstname)]
3033
"""
3134

35+
3236
@pytest.fixture
3337
def rml_file_result():
3438
return """@prefix rr: <http://www.w3.org/ns/r2rml#>.
@@ -84,3 +88,6 @@ def package_folder_path():
8488
return TEST_DATA_PATH / "notice_validator" / "test_repository" / "test_package"
8589

8690

91+
@pytest.fixture
92+
def mime_type():
93+
return RDF_MIME_TYPES

tests/e2e/mapping_suite_processor/test_allegro_triple_store.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11

22

3-
def test_allegro_allegro_triple_store(ttl_file, path_ttl_file, allegro_triple_store):
3+
def test_allegro_triple_store(ttl_file, path_ttl_file, allegro_triple_store, mime_type):
44
allegro_triple_store.create_repository(repository_name="testing")
55
assert isinstance(allegro_triple_store.list_repositories(), list)
66
assert "testing" in allegro_triple_store.list_repositories()
7-
allegro_triple_store.add_data_to_repository(file_content=ttl_file, repository_name="testing")
7+
allegro_triple_store.add_data_to_repository(file_content=ttl_file,mime_type=mime_type,repository_name="testing")
88
allegro_triple_store.add_file_to_repository(file_path=path_ttl_file, repository_name="testing")
99
assert allegro_triple_store._get_repository(repository_name="testing").getConnection().size() == 484
1010
allegro_triple_store.delete_repository(repository_name="testing")

tests/e2e/notice_publisher_triple_store/test_load_transformed_notice_into_triple_store.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
load_notice_into_triple_store, DEFAULT_NOTICE_REPOSITORY_NAME
33
from tests.fakes.fake_repository import FakeNoticeRepository
44

5-
65
SPARQL_QUERY_TRIPLES = "select * {?s ?p ?o}"
76
SPARQL_QUERY_GRAPH = "SELECT ?g { GRAPH ?g { ?s ?p ?o } }"
87
SPARQL_QUERY_FIXED_URI = "select * { <http://data.europa.eu/a4g/resource/ReviewerOrganisationIdentifier/2018-S-175-396207/de2507f9-ae25-37c8-809c-0109efe10669> ?p ?o .} "
@@ -19,19 +18,12 @@ def test_load_notice_into_triple_store(transformed_complete_notice, allegro_trip
1918

2019
df_query_result = sparql_endpoint.with_query(sparql_query=SPARQL_QUERY_TRIPLES).fetch_tabular()
2120
assert df_query_result is not None
22-
# assert that the graph in the triple store has more than 1 triple inside
23-
if len(df_query_result) > 0:
24-
assert True
21+
assert len(df_query_result) > 0
2522

26-
# assert that at least one graph exists in the triple store
27-
df_query_result = sparql_endpoint.with_query(sparql_query=SPARQL_QUERY_GRAPH).fetch_tabular()
23+
df_query_result = sparql_endpoint.with_query(sparql_query=SPARQL_QUERY_GRAPH).fetch_tree()
2824
assert df_query_result is not None
29-
if len(df_query_result) > 0:
30-
assert True
25+
assert len(df_query_result) > 0
3126

32-
# assert that the there is an epo:SPARQL_QUERY_FIXED_URI object that has the value equal to the notice ID
3327
df_query_result = sparql_endpoint.with_query(sparql_query=SPARQL_QUERY_FIXED_URI).fetch_tabular()
3428
assert df_query_result is not None
35-
print(df_query_result)
36-
if len(df_query_result) > 0:
37-
assert True
29+
assert len(df_query_result) > 0

0 commit comments

Comments
 (0)