Skip to content

Commit bab5223

Browse files
committed
refactoring of the triple store classes
1 parent 891b7e9 commit bab5223

21 files changed

Lines changed: 155 additions & 82 deletions

File tree

ted_sws/core/adapters/sparql_triple_store.py renamed to ted_sws/data_manager/adapters/sparql_endpoint.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from string import Template
1616

1717
DEFAULT_ENCODING = 'utf-8'
18-
DEFAULT_ENDPOINT_URL = "https://publications.europa.eu/webapi/rdf/sparql"
1918

2019

2120
class SubstitutionTemplate(Template):
@@ -37,14 +36,14 @@ def create_or_reuse_connection(endpoint_url: str):
3736
return SPARQLClientPool.connection_pool[endpoint_url]
3837

3938

40-
class TripleStoreABC(ABC):
39+
class TripleStoreEndpointABC(ABC):
4140
"""
4241
This class provides an abstraction for a TripleStore.
4342
"""
4443

4544
@abstractmethod
4645
def with_query(self, sparql_query: str, substitution_variables: dict = None,
47-
sparql_prefixes: str = "") -> 'TripleStoreABC':
46+
sparql_prefixes: str = "") -> 'TripleStoreEndpointABC':
4847
"""
4948
This method will take a query in a string format
5049
:param sparql_query:
@@ -55,7 +54,7 @@ def with_query(self, sparql_query: str, substitution_variables: dict = None,
5554

5655
@abstractmethod
5756
def with_query_from_file(self, sparql_query_file_path: str, substitution_variables: dict = None,
58-
prefixes: str = "") -> 'TripleStoreABC':
57+
prefixes: str = "") -> 'TripleStoreEndpointABC':
5958
"""
6059
This method will read a query from a file
6160
:param sparql_query_file_path:
@@ -79,13 +78,13 @@ def fetch_tree(self) -> dict:
7978
"""
8079

8180

82-
class SPARQLTripleStore(TripleStoreABC):
81+
class SPARQLTripleStoreEndpoint(TripleStoreEndpointABC):
8382

84-
def __init__(self, endpoint_url: str = DEFAULT_ENDPOINT_URL):
83+
def __init__(self, endpoint_url: str):
8584
self.endpoint = SPARQLClientPool.create_or_reuse_connection(endpoint_url)
8685

8786
def with_query(self, sparql_query: str, substitution_variables: dict = None,
88-
sparql_prefixes: str = "") -> TripleStoreABC:
87+
sparql_prefixes: str = "") -> TripleStoreEndpointABC:
8988
"""
9089
Set the query text and return the reference to self for chaining.
9190
:return:
@@ -100,7 +99,7 @@ def with_query(self, sparql_query: str, substitution_variables: dict = None,
10099
return self
101100

102101
def with_query_from_file(self, sparql_query_file_path: str, substitution_variables: dict = None,
103-
prefixes: str = "") -> TripleStoreABC:
102+
prefixes: str = "") -> TripleStoreEndpointABC:
104103
"""
105104
Set the query text and return the reference to self for chaining.
106105
:return:

ted_sws/mapping_suite_processor/adapters/allegro_triple_store.py renamed to ted_sws/data_manager/adapters/triple_store.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
from franz.openrdf.repository import Repository
44
from franz.openrdf.sail import AllegroGraphServer
55

6+
from ted_sws.data_manager.adapters.sparql_endpoint import TripleStoreEndpointABC, SPARQLTripleStoreEndpoint
67

7-
class VersatileTripleStoreABC:
8+
9+
class TripleStoreABC:
810
@abc.abstractmethod
911
def add_data_to_repository(self, file_content: str, repository_name: str):
1012
"""
@@ -23,20 +25,30 @@ def add_file_to_repository(self, file_path, repository_name):
2325
:return:
2426
"""
2527

28+
def get_sparql_triple_store_endpoint(self, repository_name: str = None) -> TripleStoreEndpointABC:
29+
"""
30+
Return a triple store SPARQL endpoint connection
31+
"""
2632

27-
class AllegroGraphVersatileTripleStore(abc.ABC):
33+
34+
class AllegroGraphTripleStore(abc.ABC):
2835
"""
2936
This class is handling interactions with Allegro Graph triple store
3037
Note: If catalog name is not set, every operation will be executed at root level in the triple store
3138
"""
3239

33-
def __init__(self, host: str, user: str, password: str, catalog_name=None):
40+
def __init__(self, host: str, user: str, password: str, default_repository="default", catalog_name=None):
3441
self.host = host
3542
self.user = user
3643
self.password = password
3744
self.catalog_name = catalog_name
38-
self.allegro = AllegroGraphServer(host=self.host, port=443,
39-
user=self.user, password=self.password)
45+
self.default_repository = default_repository
46+
self.allegro = AllegroGraphServer(host=self.host,
47+
port=443,
48+
user=self.user,
49+
password=self.password,
50+
verifyhost=0,
51+
verifypeer=0)
4052

4153
def create_repository(self, repository_name: str):
4254
"""
@@ -62,7 +74,8 @@ def _get_repository(self, repository_name: str) -> Repository:
6274
:param repository_name:
6375
:return:
6476
"""
65-
return self.allegro.openCatalog().getRepository(name=repository_name, access_verb=Repository.ACCESS)
77+
name = repository_name if repository_name else self.default_repository
78+
return self.allegro.openCatalog().getRepository(name=name, access_verb=Repository.ACCESS)
6679

6780
def list_repositories(self):
6881
"""
@@ -71,7 +84,7 @@ def list_repositories(self):
7184
"""
7285
return self.allegro.openCatalog(name=self.catalog_name).listRepositories()
7386

74-
def add_data_to_repository(self, file_content: str, repository_name: str):
87+
def add_data_to_repository(self, file_content: str, repository_name: str = None):
7588
"""
7689
Method to add triples from a string
7790
:param file_content:
@@ -81,7 +94,7 @@ def add_data_to_repository(self, file_content: str, repository_name: str):
8194
repository = self._get_repository(repository_name=repository_name)
8295
repository.getConnection().addData(data=file_content)
8396

84-
def add_file_to_repository(self, file_path, repository_name):
97+
def add_file_to_repository(self, file_path, repository_name: str = None):
8598
"""
8699
Method to add triples from a file
87100
:param file_path:
@@ -90,3 +103,11 @@ def add_file_to_repository(self, file_path, repository_name):
90103
"""
91104
repository = self._get_repository(repository_name=repository_name)
92105
repository.getConnection().addFile(filePath=file_path)
106+
107+
def get_sparql_triple_store_endpoint(self, repository_name: str = None) -> TripleStoreEndpointABC:
108+
"""
109+
Return a triple store SPARQL endpoint connection
110+
"""
111+
endpoint_url = f"{self.host}/repositories/{repository_name}"
112+
sparql_endpoint = SPARQLTripleStoreEndpoint(endpoint_url=endpoint_url)
113+
return sparql_endpoint

ted_sws/data_manager/entrypoints/cli/cmd_generate_mapping_resources.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import click
66

77
from ted_sws.core.adapters.cmd_runner import CmdRunner as BaseCmdRunner, DEFAULT_MAPPINGS_PATH
8-
from ted_sws.core.adapters.sparql_triple_store import SPARQLTripleStore, TripleStoreABC
8+
from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint, TripleStoreEndpointABC
99
from ted_sws.resources import QUERIES_PATH, MAPPING_FILES_PATH
1010
from ted_sws.event_manager.adapters.logger import LOG_INFO_TEXT
1111
from ted_sws.data_manager.adapters.mapping_suite_repository import TRANSFORM_PACKAGE_NAME, RESOURCES_PACKAGE_NAME
@@ -29,7 +29,7 @@ def __init__(
2929
self,
3030
queries_folder,
3131
output_folder,
32-
triple_store: TripleStoreABC = SPARQLTripleStore()
32+
triple_store: TripleStoreEndpointABC = SPARQLTripleStoreEndpoint()
3333
):
3434
super().__init__(name=CMD_NAME)
3535
self.queries_folder_path = Path(os.path.realpath(queries_folder))
@@ -59,7 +59,7 @@ def run(mapping_suite_id=None,
5959
opt_queries_folder: str = str(QUERIES_PATH),
6060
opt_output_folder: str = str(MAPPING_FILES_PATH),
6161
opt_mappings_folder: str = DEFAULT_MAPPINGS_PATH,
62-
triple_store: TripleStoreABC = SPARQLTripleStore()):
62+
triple_store: TripleStoreEndpointABC = SPARQLTripleStoreEndpoint()):
6363
"""
6464
This method will generate a json file for each ran SPARQL query in the resources folder
6565
:param mapping_suite_id:
@@ -94,7 +94,7 @@ def run(mapping_suite_id=None,
9494
help="Use to overwrite default OUTPUT")
9595
@click.option('-m', '--opt-mappings-folder', default=DEFAULT_MAPPINGS_PATH)
9696
def main(mapping_suite_id, opt_queries_folder, opt_output_folder, opt_mappings_folder):
97-
run(mapping_suite_id, opt_queries_folder, opt_output_folder, opt_mappings_folder, SPARQLTripleStore())
97+
run(mapping_suite_id, opt_queries_folder, opt_output_folder, opt_mappings_folder, SPARQLTripleStoreEndpoint())
9898

9999

100100
if __name__ == '__main__':

ted_sws/mapping_suite_processor/services/load_mapping_suite_output_into_triple_store.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import pathlib
22

33
from ted_sws import config
4-
from ted_sws.mapping_suite_processor.adapters.allegro_triple_store import AllegroGraphVersatileTripleStore
4+
from ted_sws.data_manager.adapters.triple_store import AllegroGraphTripleStore
55

66

7-
def repository_exists(triple_store: AllegroGraphVersatileTripleStore, repository_name) -> bool:
7+
def repository_exists(triple_store: AllegroGraphTripleStore, repository_name) -> bool:
88
"""
99
Method to check if the repository is in the triple store
1010
:param triple_store:
@@ -35,8 +35,8 @@ def load_mapping_suite_output_into_triple_store(package_folder_path, allegro_hos
3535

3636
ttl_files_paths = [str(path) for path in package_folder_path.glob("output/**/*.ttl")]
3737

38-
triple_store = AllegroGraphVersatileTripleStore(host=allegro_host, password=allegro_password,
39-
user=allegro_user, catalog_name=allegro_catalog_name)
38+
triple_store = AllegroGraphTripleStore(host=allegro_host, password=allegro_password,
39+
user=allegro_user, catalog_name=allegro_catalog_name)
4040

4141
if repository_exists(triple_store=triple_store, repository_name=package_name):
4242
triple_store.delete_repository(repository_name=package_name)

ted_sws/notice_publisher_triple_store/services/load_transformed_notice_into_triple_store.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
This module implements functionality to load a given notice into a triple store.
33
"""
44
from ted_sws.data_manager.adapters.repository_abc import NoticeRepositoryABC
5-
from ted_sws.mapping_suite_processor.adapters.allegro_triple_store import VersatileTripleStoreABC
5+
from ted_sws.data_manager.adapters.triple_store import TripleStoreEndpointABC
66

77
DEFAULT_NOTICE_REPOSITORY_NAME = "notices"
88

99

1010
def load_notice_into_triple_store(notice_id: str, notice_repository: NoticeRepositoryABC,
11-
triple_store: VersatileTripleStoreABC,
11+
triple_store_repository: TripleStoreEndpointABC,
1212
repository_name: str = DEFAULT_NOTICE_REPOSITORY_NAME):
1313
"""
1414
@@ -18,4 +18,4 @@ def load_notice_into_triple_store(notice_id: str, notice_repository: NoticeRepos
1818
raise ValueError('Notice, with "%s" notice_id, was not found' % notice_id)
1919

2020
rdf_manifestation_string = notice.rdf_manifestation.object_data
21-
triple_store.add_data_to_repository(file_content=rdf_manifestation_string, repository_name=repository_name)
21+
triple_store_repository.add_data_to_repository(file_content=rdf_manifestation_string, repository_name=repository_name)

tests/e2e/conftest.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,30 @@
22
from pymongo import MongoClient
33

44
from ted_sws import config
5+
from ted_sws.data_manager.adapters.triple_store import AllegroGraphTripleStore
6+
from tests import TEST_DATA_PATH
57

68

79
@pytest.fixture
810
def mongodb_client():
911
uri = config.MONGO_DB_AUTH_URL
1012
mongodb_client = MongoClient(uri)
1113
return mongodb_client
14+
15+
16+
@pytest.fixture
17+
def allegro_triple_store():
18+
return AllegroGraphTripleStore(host=config.ALLEGRO_HOST, user=config.AGRAPH_SUPER_USER,
19+
password=config.AGRAPH_SUPER_PASSWORD)
20+
21+
22+
@pytest.fixture
23+
def ttl_file():
24+
path = TEST_DATA_PATH / "notice_transformer" / "test_repository" / "test_package" / "transformation" / "mappings" / "award_of_contract.rml.ttl"
25+
return path.read_text()
26+
27+
28+
@pytest.fixture
29+
def path_ttl_file():
30+
path = TEST_DATA_PATH / "notice_transformer" / "test_repository" / "test_package" / "transformation" / "mappings" / "complementary_information.rml.ttl"
31+
return str(path)

tests/e2e/data_manager/conftest.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
SHACLTestSuite, TransformationTestData, MappingSuite
77
from tests import TEST_DATA_PATH
88

9-
109
@pytest.fixture
1110
def query_content():
1211
return """# title: Official name
@@ -239,3 +238,8 @@ def fake_mapping_suite_id() -> str:
239238
@pytest.fixture
240239
def invalid_mapping_suite_id() -> str:
241240
return "test_invalid_package"
241+
242+
243+
@pytest.fixture
244+
def cellar_sparql_endpoint():
245+
return "https://publications.europa.eu/webapi/rdf/sparql"

tests/e2e/metadata_normaliser/test_sparql_triple_store.py renamed to tests/e2e/data_manager/test_sparql_triple_store_endpoint.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import pandas as pd
22
import pytest
33

4-
from ted_sws.core.adapters.sparql_triple_store import SPARQLTripleStore
4+
from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLTripleStoreEndpoint
55
from tests import TEST_DATA_PATH
66

77

8-
def test_sparql_triple_store_with_query():
8+
def test_sparql_triple_store_with_query(cellar_sparql_endpoint):
99
query = """prefix cdm: <http://publications.europa.eu/ontology/cdm#>
1010
PREFIX dct: <http://purl.org/dc/terms/>
1111
PREFIX dc: <http://purl.org/dc/elements/1.1/>
@@ -24,7 +24,7 @@ def test_sparql_triple_store_with_query():
2424
# dc: identifier
2525

2626
substitution_variables = {"value": 10}
27-
execute_query = SPARQLTripleStore().with_query(
27+
execute_query = SPARQLTripleStoreEndpoint(endpoint_url=cellar_sparql_endpoint).with_query(
2828
sparql_query=query, substitution_variables=substitution_variables)
2929

3030
tabular_results = execute_query.fetch_tabular()
@@ -36,16 +36,16 @@ def test_sparql_triple_store_with_query():
3636
"value"] == "http://publications.europa.eu/resource/authority/buyer-legal-type/OP_DATPRO"
3737

3838
with pytest.raises(Exception):
39-
SPARQLTripleStore().with_query(sparql_query="").fetch_tree()
39+
SPARQLTripleStoreEndpoint(endpoint_url=cellar_sparql_endpoint).with_query(sparql_query="").fetch_tree()
4040

4141
with pytest.raises(Exception):
42-
SPARQLTripleStore().with_query(sparql_query="").fetch_tabular()
42+
SPARQLTripleStoreEndpoint(endpoint_url=cellar_sparql_endpoint).with_query(sparql_query="").fetch_tabular()
4343

4444

45-
def test_sparql_triple_store_with_query_from_file():
45+
def test_sparql_triple_store_with_query_from_file(cellar_sparql_endpoint):
4646
query_path = TEST_DATA_PATH / "sparql_queries" / "buyer_legal_type.rq"
4747
substitution_variables = {"value": 10}
48-
execute_query = SPARQLTripleStore().with_query_from_file(sparql_query_file_path=query_path,substitution_variables=substitution_variables)
48+
execute_query = SPARQLTripleStoreEndpoint(endpoint_url=cellar_sparql_endpoint).with_query_from_file(sparql_query_file_path=query_path, substitution_variables=substitution_variables)
4949

5050
tabular_results = execute_query.fetch_tabular()
5151
tree_results = execute_query.fetch_tree()
@@ -56,6 +56,3 @@ def test_sparql_triple_store_with_query_from_file():
5656
"value"] == "http://publications.europa.eu/resource/authority/buyer-legal-type/OP_DATPRO"
5757

5858
assert "europa.eu" in execute_query.__str__()
59-
60-
61-
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/python3
2+
3+
# minio_feature_store.py
4+
# Date: 21.07.2022
5+
# Author: Eugeniu Costetchi
6+
# Email: eugen@meaningfy.ws
7+
8+
"""
9+
10+
"""
11+
12+
REPOSITORY_NAME = "this_is_a_test_repository"
13+
SPARQL_QUERY = "select * {?s ?p ?o} limit 10"
14+
15+
16+
def test_repository_creation(allegro_triple_store):
17+
if REPOSITORY_NAME in allegro_triple_store.list_repositories():
18+
allegro_triple_store.delete_repository(repository_name=REPOSITORY_NAME)
19+
20+
assert REPOSITORY_NAME not in allegro_triple_store.list_repositories()
21+
22+
allegro_triple_store.create_repository(repository_name=REPOSITORY_NAME)
23+
24+
assert REPOSITORY_NAME in allegro_triple_store.list_repositories()
25+
26+
allegro_triple_store.delete_repository(repository_name=REPOSITORY_NAME)
27+
28+
29+
def test_loading_data(allegro_triple_store, path_ttl_file):
30+
if REPOSITORY_NAME not in allegro_triple_store.list_repositories():
31+
allegro_triple_store.create_repository(repository_name=REPOSITORY_NAME)
32+
33+
allegro_triple_store.add_file_to_repository(file_path=path_ttl_file, repository_name=REPOSITORY_NAME)
34+
35+
sparql_endpoint = allegro_triple_store.get_sparql_triple_store_endpoint(repository_name=REPOSITORY_NAME)
36+
37+
assert sparql_endpoint
38+
39+
query_result = sparql_endpoint.with_query(sparql_query=SPARQL_QUERY).fetch_tree()
40+
41+
assert query_result
42+
assert "head" in query_result
43+
assert "results" in query_result
44+
assert len(query_result["results"]["bindings"]) == 10

0 commit comments

Comments
 (0)