Skip to content

Commit dc81c1b

Browse files
Merge pull request #204 from meaningfy-ws/feature/TED-46
Feature/ted 46
2 parents bc6167d + 40a4109 commit dc81c1b

19 files changed

Lines changed: 341 additions & 40 deletions

File tree

ted_sws/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ def AGRAPH_SUPER_PASSWORD(self) -> str:
8484
def ALLEGRO_HOST(self) -> str:
8585
return EnvConfigResolver().config_resolve()
8686

87+
@property
88+
def TRIPLE_STORE_ENDPOINT_URL(self)->str:
89+
return EnvConfigResolver().config_resolve()
90+
8791

8892
class ELKConfig:
8993

ted_sws/data_manager/adapters/sparql_endpoint.py

Lines changed: 103 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,18 @@
66
# Email: stefan.stratulat1997@gmail.com
77

88
import io
9+
import json
10+
import pathlib
911
from abc import ABC, abstractmethod
1012
from pathlib import Path
1113
from string import Template
1214

1315
import pandas as pd
14-
from SPARQLWrapper import SPARQLWrapper, CSV, JSON
16+
import rdflib
17+
from SPARQLWrapper import SPARQLWrapper, CSV, JSON, RDF
1518

1619
DEFAULT_ENCODING = 'utf-8'
20+
DEFAULT_RDF_FILE_FORMAT = "n3"
1721

1822

1923
class SubstitutionTemplate(Template):
@@ -40,7 +44,6 @@ class TripleStoreEndpointABC(ABC):
4044
This class provides an abstraction for a TripleStore.
4145
"""
4246

43-
@abstractmethod
4447
def with_query(self, sparql_query: str, substitution_variables: dict = None,
4548
sparql_prefixes: str = "") -> 'TripleStoreEndpointABC':
4649
"""
@@ -50,15 +53,32 @@ def with_query(self, sparql_query: str, substitution_variables: dict = None,
5053
:param sparql_prefixes:
5154
:return:
5255
"""
56+
if substitution_variables:
57+
template_query = SubstitutionTemplate(sparql_query)
58+
sparql_query = template_query.safe_substitute(substitution_variables)
59+
60+
sparql_query = (sparql_prefixes + " " + sparql_query).strip()
61+
self._set_sparql_query(sparql_query=sparql_query)
62+
return self
5363

54-
@abstractmethod
5564
def with_query_from_file(self, sparql_query_file_path: str, substitution_variables: dict = None,
56-
prefixes: str = "") -> 'TripleStoreEndpointABC':
65+
sparql_prefixes: str = "") -> 'TripleStoreEndpointABC':
5766
"""
5867
This method will read a query from a file
5968
:param sparql_query_file_path:
6069
:param substitution_variables:
61-
:param prefixes:
70+
:param sparql_prefixes:
71+
:return:
72+
"""
73+
sparql_query = Path(sparql_query_file_path).resolve().read_text(encoding="utf-8")
74+
return self.with_query(sparql_query=sparql_query, substitution_variables=substitution_variables,
75+
sparql_prefixes=sparql_prefixes)
76+
77+
@abstractmethod
78+
def _set_sparql_query(self, sparql_query: str):
79+
"""
80+
This method is used to set sparql query for future query operation.
81+
:param sparql_query:
6282
:return:
6383
"""
6484

@@ -76,45 +96,27 @@ def fetch_tree(self) -> dict:
7696
:return:
7797
"""
7898

79-
80-
class SPARQLTripleStoreEndpoint(TripleStoreEndpointABC):
81-
82-
def __init__(self, endpoint_url: str):
83-
self.endpoint = SPARQLClientPool.create_or_reuse_connection(endpoint_url)
84-
85-
def with_query(self, sparql_query: str, substitution_variables: dict = None,
86-
sparql_prefixes: str = "") -> TripleStoreEndpointABC:
99+
@abstractmethod
100+
def fetch_rdf(self) -> rdflib.Graph:
87101
"""
88-
Set the query text and return the reference to self for chaining.
102+
This method will return the result of the SPARQL query in a RDF format,
103+
use this method only for SPARQL queries of type CONSTRUCT.
89104
:return:
90105
"""
91-
if substitution_variables:
92-
template_query = SubstitutionTemplate(sparql_query)
93-
sparql_query = template_query.safe_substitute(substitution_variables)
94106

95-
new_query = (sparql_prefixes + " " + sparql_query).strip()
96107

97-
self.endpoint.setQuery(new_query)
98-
return self
108+
class SPARQLTripleStoreEndpoint(TripleStoreEndpointABC):
99109

100-
def with_query_from_file(self, sparql_query_file_path: str, substitution_variables: dict = None,
101-
prefixes: str = "") -> TripleStoreEndpointABC:
110+
def __init__(self, endpoint_url: str):
111+
self.endpoint = SPARQLClientPool.create_or_reuse_connection(endpoint_url)
112+
113+
def _set_sparql_query(self, sparql_query: str):
102114
"""
103-
Set the query text and return the reference to self for chaining.
115+
This method is used to set sparql query for future query operation.
116+
:param sparql_query:
104117
:return:
105118
"""
106-
107-
with open(Path(sparql_query_file_path).resolve(), 'r') as file:
108-
query_from_file = file.read()
109-
110-
if substitution_variables:
111-
template_query = SubstitutionTemplate(query_from_file)
112-
query_from_file = template_query.safe_substitute(substitution_variables)
113-
114-
new_query = (prefixes + " " + query_from_file).strip()
115-
116-
self.endpoint.setQuery(new_query)
117-
return self
119+
self.endpoint.setQuery(sparql_query)
118120

119121
def fetch_tabular(self) -> pd.DataFrame:
120122
"""
@@ -139,5 +141,71 @@ def fetch_tree(self):
139141
self.endpoint.setReturnFormat(JSON)
140142
return self.endpoint.queryAndConvert()
141143

144+
def fetch_rdf(self) -> rdflib.Graph:
145+
"""
146+
This method will return the result of the SPARQL query in a RDF format,
147+
use this method only for SPARQL queries of type CONSTRUCT or DESCRIBE.
148+
:return:
149+
"""
150+
self.endpoint.setReturnFormat(RDF)
151+
return self.endpoint.queryAndConvert()
152+
142153
def __str__(self):
143154
return f"from <...{str(self.endpoint.endpoint)[-30:]}> {str(self.endpoint.queryString)[:60]} ..."
155+
156+
157+
class SPARQLStringEndpoint(TripleStoreEndpointABC):
158+
"""
159+
This class is specialized to query an RDF string content using SPARQL queries.
160+
"""
161+
162+
def __init__(self, rdf_content: str, rdf_content_format: str = DEFAULT_RDF_FILE_FORMAT):
163+
self.graph = rdflib.Graph()
164+
self.graph.parse(data=rdf_content, format=rdf_content_format)
165+
self.sparql_query = None
166+
167+
def _set_sparql_query(self, sparql_query: str):
168+
"""
169+
This method is used to set sparql query for future query operation.
170+
:param sparql_query:
171+
:return:
172+
"""
173+
self.sparql_query = sparql_query
174+
175+
def fetch_tabular(self) -> pd.DataFrame:
176+
"""
177+
Get query results in a tabular format
178+
:return:
179+
"""
180+
query_result = self.graph.query(query_object=self.sparql_query)
181+
return pd.DataFrame(data=query_result, columns=[str(var) for var in query_result.vars])
182+
183+
def fetch_tree(self) -> dict:
184+
"""
185+
Get query results in a dict format
186+
:return:
187+
"""
188+
query_result = self.graph.query(query_object=self.sparql_query)
189+
return json.loads(query_result.serialize(format="json"))
190+
191+
def fetch_rdf(self) -> rdflib.Graph:
192+
"""
193+
This method will return the result of the SPARQL query in a RDF format,
194+
use this method only for SPARQL queries of type CONSTRUCT or DESCRIBE.
195+
:return:
196+
"""
197+
query_result = self.graph.query(query_object=self.sparql_query)
198+
if query_result.type in ("CONSTRUCT", "DESCRIBE"):
199+
return query_result.graph
200+
else:
201+
raise Exception("Fetch RDF method work only with CONSTRUCT and DESCRIBE sparql queries!")
202+
203+
204+
class SPARQLFileEndpoint(SPARQLStringEndpoint):
205+
"""
206+
This class is specialized to query an RDF file using SPARQL queries.
207+
"""
208+
209+
def __init__(self, rdf_file_path: pathlib.Path):
210+
rdf_content = rdf_file_path.read_text(encoding="utf-8")
211+
super().__init__(rdf_content)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/usr/bin/python3
2+
3+
# __init__.py
4+
# Date: 29.07.2022
5+
# Author: Stratulat Ștefan
6+
# Email: stefan.stratulat1997@gmail.com
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import pathlib
2+
3+
MASTER_DATA_REGISTRY_RESOURCES_PATH = pathlib.Path(__file__).parent.resolve()
4+
5+
TRIPLES_BY_CET_URI_SPARQL_QUERY_TEMPLATE_PATH = MASTER_DATA_REGISTRY_RESOURCES_PATH / "sparql_query_templates/get_by_cet_uri.rq"
6+
RDF_FRAGMENT_BY_URI_SPARQL_QUERY_TEMPLATE_PATH = MASTER_DATA_REGISTRY_RESOURCES_PATH / "sparql_query_templates/get_2_dependency_levels_for_a_uri_as_root.rq"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
prefix org: <http://www.w3.org/ns/org#>
2+
prefix epo: <http://data.europa.eu/a4g/ontology#>
3+
4+
construct {
5+
?s ?p ?o .
6+
?o ?op ?oo . }
7+
{
8+
values ?s {<$uri>}
9+
?s ?p ?o .
10+
?o ?op ?oo .
11+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
prefix epo: <http://data.europa.eu/a4g/ontology#>
2+
SELECT DISTINCT ?s
3+
{
4+
?s a <$uri> .
5+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/usr/bin/python3
2+
3+
# __init__.py
4+
# Date: 29.07.2022
5+
# Author: Stratulat Ștefan
6+
# Email: stefan.stratulat1997@gmail.com
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/python3
2+
3+
# rdf_fragment_processor.py
4+
# Date: 29.07.2022
5+
# Author: Stratulat Ștefan
6+
# Email: stefan.stratulat1997@gmail.com
7+
8+
"""
9+
10+
"""
11+
import pathlib
12+
from string import Template
13+
from typing import List
14+
15+
import rdflib
16+
from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLStringEndpoint
17+
from ted_sws.master_data_registry.resources import RDF_FRAGMENT_BY_URI_SPARQL_QUERY_TEMPLATE_PATH, \
18+
TRIPLES_BY_CET_URI_SPARQL_QUERY_TEMPLATE_PATH
19+
20+
DEFAULT_RDF_FILE_FORMAT = "n3"
21+
22+
23+
def get_rdf_fragment_by_cet_uri_from_string(rdf_content: str, cet_uri: str,
24+
rdf_content_format: str = DEFAULT_RDF_FILE_FORMAT) -> List[rdflib.Graph]:
25+
"""
26+
This function extracts from an RDF file content a list of RDFFragments dependent on a CET URI.
27+
:param rdf_content:
28+
:param cet_uri:
29+
:param rdf_content_format:
30+
:return:
31+
"""
32+
sparql_endpoint = SPARQLStringEndpoint(rdf_content=rdf_content, rdf_content_format=rdf_content_format)
33+
sparql_query = TRIPLES_BY_CET_URI_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8")
34+
sparql_query = Template(sparql_query).substitute(uri=cet_uri)
35+
query_table_result = sparql_endpoint.with_query(sparql_query=sparql_query).fetch_tabular()
36+
sparql_query = RDF_FRAGMENT_BY_URI_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8")
37+
sparql_query = Template(sparql_query)
38+
rdf_fragments = []
39+
query_list_result = query_table_result["s"].to_list()
40+
for uri in query_list_result:
41+
rdf_fragment = sparql_endpoint.with_query(
42+
sparql_query=sparql_query.substitute(uri=uri)).fetch_rdf()
43+
rdf_fragments.append(rdf_fragment)
44+
return rdf_fragments
45+
46+
47+
def get_rdf_fragments_by_cet_uri_from_file(rdf_file_path: pathlib.Path, cet_uri: str,
48+
rdf_file_content_format: str = DEFAULT_RDF_FILE_FORMAT) -> List[
49+
rdflib.Graph]:
50+
"""
51+
This function extracts from an RDF file a list of RDFFragments dependent on a CET URI.
52+
:param rdf_file_path:
53+
:param cet_uri:
54+
:param rdf_file_content_format:
55+
:return:
56+
"""
57+
return get_rdf_fragment_by_cet_uri_from_string(rdf_content=rdf_file_path.read_text(encoding="utf-8"),
58+
cet_uri=cet_uri,
59+
rdf_content_format=rdf_file_content_format)

tests/e2e/data_manager/test_sparql_triple_store_endpoint.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ def test_sparql_triple_store_with_query(cellar_sparql_endpoint):
4545
def test_sparql_triple_store_with_query_from_file(cellar_sparql_endpoint):
4646
query_path = TEST_DATA_PATH / "sparql_queries" / "buyer_legal_type.rq"
4747
substitution_variables = {"value": 10}
48-
execute_query = SPARQLTripleStoreEndpoint(endpoint_url=cellar_sparql_endpoint).with_query_from_file(sparql_query_file_path=query_path, substitution_variables=substitution_variables)
48+
execute_query = SPARQLTripleStoreEndpoint(endpoint_url=cellar_sparql_endpoint).with_query_from_file(
49+
sparql_query_file_path=query_path, substitution_variables=substitution_variables)
4950

5051
tabular_results = execute_query.fetch_tabular()
5152
tree_results = execute_query.fetch_tree()

tests/e2e/master_data_registry/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)