Skip to content

Commit f23de72

Browse files
Merge branch 'main' into feature/TED-1066
2 parents b3c16fa + 62f5d0a commit f23de72

10 files changed

Lines changed: 183 additions & 53 deletions

File tree

ted_sws/core/model/transform.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class ConceptualMappingXPATH(MappingSuiteComponent):
7272

7373
class ConceptualMappingDiff(MappingSuiteComponent):
7474
""""""
75+
created_at: str = datetime.now().isoformat()
7576
metadata: Optional[dict]
7677
data: Optional[dict]
7778

ted_sws/mapping_suite_processor/entrypoints/cli/cmd_conceptual_mapping_differ.py

Lines changed: 37 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,14 @@ def __init__(
4444
output_folder
4545
):
4646
super().__init__(name=CMD_NAME)
47-
self.mapping_suite_id = self._init_list_input_opts_split(mapping_suite_id)
48-
self.file = self._init_list_input_opts_split(file)
49-
self.branch = self._init_list_input_opts_split(branch)
47+
self.mapping_suite_ids = self._init_list_input_opts_split(mapping_suite_id)
48+
self.files = self._init_list_input_opts_split(file)
49+
self.branches = self._init_list_input_opts_split(branch)
5050
self.mappings_path = mappings_path
5151
self.output_folder = output_folder
5252

53-
def _report(self, data, files: list):
53+
def _report(self, diff, files: list):
54+
data = diff['data']
5455
report_file_file_name_json = Path(self.output_folder) / (DEFAULT_REPORT_FILE_NAME + ".json")
5556
with open(report_file_file_name_json, 'w+') as report_file:
5657
report_file.write(json.dumps(data, indent=2))
@@ -60,9 +61,11 @@ def _report(self, data, files: list):
6061
generate_conceptual_mappings_diff_html_report(
6162
ConceptualMappingDiff(
6263
metadata={
63-
"branches": self.branch,
64-
"mapping_suite_ids": self.mapping_suite_id,
65-
"files": files
64+
"branches": self.branches,
65+
"mapping_suite_ids": self.mapping_suite_ids,
66+
"files": files,
67+
"defaults": diff['metadata']['defaults'],
68+
"metadata": diff['metadata']['metadata']
6669
},
6770
data=data
6871
))
@@ -81,12 +84,12 @@ def _mappings_path(self) -> Path:
8184
return mappings_path
8285

8386
def _display_input(self):
84-
if self.branch:
85-
self.log(LOG_WARN_TEXT.format("GIT Branches: ") + str(self.branch))
86-
if self.mapping_suite_id:
87-
self.log(LOG_WARN_TEXT.format("MappingSuites: ") + str(self.mapping_suite_id))
88-
if self.file:
89-
self.log(LOG_WARN_TEXT.format("Files: ") + str(self.file))
87+
if self.branches:
88+
self.log(LOG_WARN_TEXT.format("GIT Branches: ") + str(self.branches))
89+
if self.mapping_suite_ids:
90+
self.log(LOG_WARN_TEXT.format("MappingSuites: ") + str(self.mapping_suite_ids))
91+
if self.files:
92+
self.log(LOG_WARN_TEXT.format("Files: ") + str(self.files))
9093

9194
def run_cmd(self):
9295
self._display_input()
@@ -95,49 +98,51 @@ def run_cmd(self):
9598
filepath1 = None
9699
filepath2 = None
97100

98-
file_len = len(self.file)
99-
mapping_suite_id_len = len(self.mapping_suite_id)
100-
branch_len = len(self.branch)
101+
file_len = len(self.files)
102+
mapping_suite_id_len = len(self.mapping_suite_ids)
103+
branch_len = len(self.branches)
101104

102-
if not self.branch:
103-
if self.file and file_len == 2:
104-
filepath1 = self.file[0]
105+
if not self.branches:
106+
if self.files and file_len == 2:
107+
filepath1 = self.files[0]
105108
assert Path(filepath1).is_file()
106-
filepath2 = self.file[1]
109+
filepath2 = self.files[1]
107110
assert Path(filepath2).is_file()
108-
elif self.mapping_suite_id:
111+
elif self.mapping_suite_ids:
109112
mappings_path = self._mappings_path()
110113
if mapping_suite_id_len == 2:
111-
filepath1 = self._conceptual_mappings_file_path(mappings_path, self.mapping_suite_id[0])
114+
filepath1 = self._conceptual_mappings_file_path(mappings_path, self.mapping_suite_ids[0])
112115
assert Path(filepath1).is_file()
113-
filepath2 = self._conceptual_mappings_file_path(mappings_path, self.mapping_suite_id[1])
116+
filepath2 = self._conceptual_mappings_file_path(mappings_path, self.mapping_suite_ids[1])
114117
assert Path(filepath2).is_file()
115118
elif mapping_suite_id_len == 1 and file_len == 1:
116-
filepath1 = self._conceptual_mappings_file_path(mappings_path, self.mapping_suite_id[0])
119+
filepath1 = self._conceptual_mappings_file_path(mappings_path, self.mapping_suite_ids[0])
117120
assert Path(filepath1).is_file()
118-
filepath2 = self.file[0]
121+
filepath2 = str(Path(self.files[0]).resolve())
119122
assert Path(filepath2).is_file()
120123

121124
error = None
122125
if filepath1 and filepath2:
123126
diff = mapping_suite_diff_files_conceptual_mappings([Path(filepath1), Path(filepath2)])
124-
elif self.branch:
127+
elif self.branches:
125128
assert mapping_suite_id_len > 0
126-
if branch_len == 1 and mapping_suite_id_len == 1 and not self.file:
129+
if branch_len == 1 and mapping_suite_id_len == 1 and not self.files:
127130
mappings_path = self._mappings_path()
128-
filepath2 = self._conceptual_mappings_file_path(mappings_path, self.mapping_suite_id[0])
131+
filepath2 = self._conceptual_mappings_file_path(mappings_path, self.mapping_suite_ids[0])
129132
else:
130-
filepath2 = (self.file[0] if file_len == 1 else None)
133+
filepath2 = (self.files[0] if file_len == 1 else None)
131134

132135
diff = mapping_suite_diff_repo_conceptual_mappings(
133-
branch_or_tag_name=self.branch,
134-
mapping_suite_id=self.mapping_suite_id,
136+
branch_or_tag_name=self.branches,
137+
mapping_suite_id=self.mapping_suite_ids,
135138
filepath=Path(filepath2) if filepath2 else None
136139
)
137140
else:
138141
error = Exception("Cannot do a diff with provided input!")
139142

140-
self._report(data=diff, files=[filepath1, filepath2])
143+
if not error:
144+
self._report(diff=diff, files=[filepath1, filepath2])
145+
141146
self.run_cmd_result(error)
142147

143148

ted_sws/mapping_suite_processor/resources/templates/conceptual_mappings_diff_report.jinja2

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,85 @@
2020
table td {
2121
width: auto
2222
}
23+
24+
table.heading td,
25+
table.heading th {
26+
border: 1px solid #ddd;
27+
}
28+
table.heading th {
29+
background-color: #eee;
30+
text-align: left;
31+
}
32+
table.heading thead td {
33+
text-align: left;
34+
font-weight: bold;
35+
width: 50%;
36+
}
37+
table.heading tbody th {
38+
white-space: nowrap;
39+
}
2340
</style>
2441
</head>
2542
<body>
2643
<h1>Conceptual Mappings Diff HTML report </h1>
2744
<hr>
28-
<h3>
29-
{{ metadata | replace('\n', '<br>') | replace(' ', '&nbsp;') }}
30-
</h3>
45+
<ul>
46+
<li>Created at: {{ created_at }}</li>
47+
</ul>
48+
<hr>
49+
<h2>
50+
Conceptual Mappings
51+
</h2>
52+
<table class="heading">
53+
<thead>
54+
<tr>
55+
<th></th>
56+
<td>Conceptual Mapping 1</td>
57+
<td>Conceptual Mapping 2</td>
58+
</tr>
59+
</thead>
60+
<tbody>
61+
{% set branch1 = metadata['branches'][0] or metadata['defaults']['branch'] or "local" %}
62+
{% set branch2 = metadata['branches'][1] or metadata['defaults']['branch'] or "local" %}
63+
<tr>
64+
<th>Branch</th>
65+
<td>{{ branch1 }}</td>
66+
<td>{{ branch2 }}</td>
67+
</tr>
68+
{% set mapping_suite1 = metadata['mapping_suite_ids'][0] or metadata['mapping_suite_ids'][1] %}
69+
{% set mapping_suite2 = metadata['mapping_suite_ids'][1] or metadata['mapping_suite_ids'][0] %}
70+
<tr>
71+
<th>Mapping Suite</th>
72+
<td>{{ mapping_suite1 }}</td>
73+
<td>{{ mapping_suite2 }}</td>
74+
</tr>
75+
{% set file1 = metadata['files'][0] or metadata['defaults']['conceptual_mapping'] %}
76+
{% set file2 = metadata['files'][1] or metadata['defaults']['conceptual_mapping'] %}
77+
<tr>
78+
<th>Conceptual Mapping File</th>
79+
<td>{{ file1 }}</td>
80+
<td>{{ file2 }}</td>
81+
</tr>
82+
<tr>
83+
<th>Mapping Version</th>
84+
<td>{{ metadata['metadata'][0]['mapping_version'] }}</td>
85+
<td>{{ metadata['metadata'][1]['mapping_version'] }}</td>
86+
</tr>
87+
</tbody>
88+
</table>
3189
<hr>
3290
<h2>Report details: </h2>
33-
34-
{{ data }}
91+
{% if data %}
92+
{{ data }}
93+
{% else %}
94+
<h3>NO DIFF</h3>
95+
{% endif %}
3596
<script>
36-
document.querySelector('body>table').classList.add('dataTable');
97+
const tables = document.querySelectorAll('body>table');
98+
console.log(tables);
99+
for (const table of tables) {
100+
table.classList.add('dataTable');
101+
}
37102
</script>
38103
</body>
39104
</html>

ted_sws/mapping_suite_processor/services/conceptual_mapping_differ.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,19 @@ def mapping_suite_diff_conceptual_mappings(mappings: List[ConceptualMapping]) ->
2828
:return:
2929
"""
3030
assert mappings and len(mappings) == 2
31-
return DeepDiff(mappings[0].dict(), mappings[1].dict(), ignore_order=True)
31+
diff: ConceptualMappingDiff = ConceptualMappingDiff()
32+
diff.metadata = {
33+
"defaults": {
34+
"branch": "local",
35+
"conceptual_mapping": MS_TRANSFORM_FOLDER_NAME + "/" + MS_CONCEPTUAL_MAPPING_FILE_NAME
36+
},
37+
"metadata": [
38+
mappings[0].metadata.dict(),
39+
mappings[1].metadata.dict()
40+
]
41+
}
42+
diff.data = DeepDiff(mappings[0].dict(), mappings[1].dict(), ignore_order=True)
43+
return diff.dict()
3244

3345

3446
def mapping_suite_diff_files_conceptual_mappings(filepaths: List[Path]) -> dict:
@@ -103,7 +115,8 @@ def mapping_suite_diff_repo_conceptual_mappings(branch_or_tag_name: List[str], m
103115

104116
def generate_conceptual_mappings_diff_html_report(diff: ConceptualMappingDiff):
105117
html_report = TEMPLATES.get_template(CONCEPTUAL_MAPPINGS_DIFF_HTML_REPORT_TEMPLATE).render({
106-
"metadata": json.dumps(diff.metadata, indent=2),
118+
"metadata": diff.metadata,
119+
"created_at": diff.created_at,
107120
"data": json2html.convert(
108121
json=diff.data,
109122
table_attributes='class="display" border="1"',

ted_sws/master_data_registry/resources/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
MASTER_DATA_REGISTRY_RESOURCES_PATH = pathlib.Path(__file__).parent.resolve()
44

55
TRIPLES_BY_CET_URI_SPARQL_QUERY_TEMPLATE_PATH = MASTER_DATA_REGISTRY_RESOURCES_PATH / "sparql_query_templates/get_by_cet_uri.rq"
6+
PROCEDURE_SUBJECTS_SPARQL_QUERY_TEMPLATE_PATH = MASTER_DATA_REGISTRY_RESOURCES_PATH / "sparql_query_templates/get_procedure_uris.rq"
67
RDF_FRAGMENT_BY_URI_SPARQL_QUERY_TEMPLATE_PATH = MASTER_DATA_REGISTRY_RESOURCES_PATH / "sparql_query_templates/get_2_dependency_levels_for_a_uri_as_root.rq"
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
prefix epo:<http://data.europa.eu/a4g/ontology#>
2+
3+
SELECT DISTINCT ?s
4+
WHERE {
5+
?s a epo:Procedure.
6+
?o ?po ?s.
7+
filter not exists {?o epo:refersToPreviousProcedure ?s}
8+
}

ted_sws/master_data_registry/services/entity_deduplication.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from ted_sws.event_manager.services.log import log_error, log_notice_error
1818
from ted_sws.master_data_registry.services.rdf_fragment_processor import get_rdf_fragments_by_cet_uri_from_notices, \
1919
merge_rdf_fragments_into_graph, write_rdf_fragments_in_triple_store, RDF_FRAGMENT_FROM_NOTICE_PROPERTY, \
20-
get_subjects_by_cet_uri, get_rdf_fragment_by_cet_uri_from_notice
20+
get_procedure_subjects, get_rdf_fragment_by_root_uri_from_notice
2121

2222
MDR_TEMPORARY_FUSEKI_DATASET_NAME = "tmp_mdr_dataset"
2323
MDR_FUSEKI_DATASET_NAME = "mdr_dataset"
@@ -258,7 +258,7 @@ def deduplicate_procedure_entities(notices: List[Notice], procedure_cet_uri: str
258258
if parent_notice and parent_notice.rdf_manifestation and parent_notice.rdf_manifestation.object_data:
259259
rdf_content = parent_notice.rdf_manifestation.object_data
260260
sparql_endpoint = SPARQLStringEndpoint(rdf_content=rdf_content)
261-
result_uris = get_subjects_by_cet_uri(sparql_endpoint=sparql_endpoint, cet_uri=procedure_cet_uri)
261+
result_uris = get_procedure_subjects(sparql_endpoint=sparql_endpoint)
262262
result_uris_len = len(result_uris)
263263
if result_uris_len != 1:
264264
notice_normalised_metadata = parent_notice.normalised_metadata
@@ -269,11 +269,12 @@ def deduplicate_procedure_entities(notices: List[Notice], procedure_cet_uri: str
269269
notice_status=parent_notice.status,
270270
notice_eforms_subtype=notice_normalised_metadata.eforms_subtype if notice_normalised_metadata else None)
271271
else:
272+
result_uri = result_uris[0]
272273
parent_procedure_uri = rdflib.URIRef(result_uris[0])
273274
parent_uries[parent_notice_id] = parent_procedure_uri
274-
parent_procedure_rdf_fragments = get_rdf_fragment_by_cet_uri_from_notice(notice=parent_notice,
275-
cet_uri=procedure_cet_uri)
276-
parent_new_cet = {parent_procedure_uri: parent_procedure_rdf_fragments[0]}
275+
parent_procedure_rdf_fragment = get_rdf_fragment_by_root_uri_from_notice(notice=parent_notice,
276+
root_uri=result_uri)
277+
parent_new_cet = {parent_procedure_uri: parent_procedure_rdf_fragment}
277278
register_new_cets_in_mdr(new_canonical_entities=parent_new_cet, triple_store=triple_store,
278279
mdr_dataset_name=mdr_dataset_name)
279280

@@ -282,7 +283,7 @@ def deduplicate_procedure_entities(notices: List[Notice], procedure_cet_uri: str
282283
for child_notice in notice_families[parent_uri_key]:
283284
rdf_content = child_notice.rdf_manifestation.object_data
284285
sparql_endpoint = SPARQLStringEndpoint(rdf_content=rdf_content)
285-
result_uris = get_subjects_by_cet_uri(sparql_endpoint=sparql_endpoint, cet_uri=procedure_cet_uri)
286+
result_uris = get_procedure_subjects(sparql_endpoint=sparql_endpoint)
286287
result_uris_len = len(result_uris)
287288
if result_uris_len != 1:
288289
notice_normalised_metadata = child_notice.normalised_metadata

ted_sws/master_data_registry/services/rdf_fragment_processor.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@
1010
"""
1111
import pathlib
1212
from string import Template
13-
from typing import List, Tuple
13+
from typing import List, Tuple, Optional
1414

1515
import rdflib
1616

1717
from ted_sws.core.model.notice import Notice
1818
from ted_sws.data_manager.adapters.sparql_endpoint import SPARQLStringEndpoint
1919
from ted_sws.data_manager.adapters.triple_store import TripleStoreABC
2020
from ted_sws.master_data_registry.resources import RDF_FRAGMENT_BY_URI_SPARQL_QUERY_TEMPLATE_PATH, \
21-
TRIPLES_BY_CET_URI_SPARQL_QUERY_TEMPLATE_PATH
21+
TRIPLES_BY_CET_URI_SPARQL_QUERY_TEMPLATE_PATH, PROCEDURE_SUBJECTS_SPARQL_QUERY_TEMPLATE_PATH
2222

2323
RDFTriple = Tuple[rdflib.term.Node, rdflib.term.Node, rdflib.term.Node]
2424

@@ -39,6 +39,17 @@ def get_subjects_by_cet_uri(sparql_endpoint: SPARQLStringEndpoint, cet_uri: str)
3939
return query_table_result["s"].to_list()
4040

4141

42+
def get_procedure_subjects(sparql_endpoint: SPARQLStringEndpoint) -> List[str]:
43+
"""
44+
This function return a list of procedure subjects.
45+
:param sparql_endpoint:
46+
:return:
47+
"""
48+
sparql_query = PROCEDURE_SUBJECTS_SPARQL_QUERY_TEMPLATE_PATH.read_text(encoding="utf-8")
49+
query_table_result = sparql_endpoint.with_query(sparql_query=sparql_query).fetch_tabular()
50+
return query_table_result["s"].to_list()
51+
52+
4253
def get_rdf_fragment_by_root_uri(sparql_endpoint: SPARQLStringEndpoint, root_uri: str,
4354
inject_triples: List[RDFTriple] = None) -> rdflib.Graph:
4455
"""
@@ -92,6 +103,25 @@ def get_rdf_fragments_by_cet_uri_from_file(rdf_file_path: pathlib.Path, cet_uri:
92103
rdf_content_format=rdf_file_content_format)
93104

94105

106+
def get_rdf_fragment_by_root_uri_from_notice(notice: Notice, root_uri: str) -> Optional[rdflib.Graph]:
107+
"""
108+
This function extracts from a Notice RDF content a RDFFragment dependent on a root URI.
109+
:param notice:
110+
:param root_uri:
111+
:return:
112+
"""
113+
sparql_endpoint = SPARQLStringEndpoint(rdf_content=notice.rdf_manifestation.object_data,
114+
rdf_content_format=DEFAULT_RDF_FILE_FORMAT)
115+
rdf_fragment = get_rdf_fragment_by_root_uri(sparql_endpoint=sparql_endpoint, root_uri=root_uri,
116+
inject_triples=[(rdflib.URIRef(root_uri),
117+
RDF_FRAGMENT_FROM_NOTICE_PROPERTY,
118+
rdflib.Literal(notice.ted_id))
119+
]
120+
)
121+
return rdf_fragment
122+
123+
124+
95125
def get_rdf_fragment_by_cet_uri_from_notice(notice: Notice, cet_uri: str) -> List[rdflib.Graph]:
96126
"""
97127
This function extracts from a Notice RDF content a list of RDFFragments dependent on a CET URI.

tests/e2e/mapping_suite_processor/conceptual_mapping_differ/test_cmd_conceptual_mapping_differ.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def test_cmd_conceptual_mapping_differ(caplog, cli_runner, fake_test_mapping_sui
4848
response = cli_runner.invoke(cli_main,
4949
["--opt-mappings-folder", temp_mapping_suite_path,
5050
"--opt-output-folder", temp_folder])
51+
5152
assert "FAILED" in response.output
5253
assert "Cannot do a diff" in response.output
5354

0 commit comments

Comments
 (0)