Skip to content

Commit da88262

Browse files
Merge pull request #393 from OP-TED/feature/TED-990
Procedures CET fragments shall also be loaded into the MDR
2 parents c72ffd2 + 9524a60 commit da88262

1 file changed

Lines changed: 17 additions & 2 deletions

File tree

ted_sws/master_data_registry/services/entity_deduplication.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from ted_sws.event_manager.services.log import log_error, log_notice_error
1818
from ted_sws.master_data_registry.services.rdf_fragment_processor import get_rdf_fragments_by_cet_uri_from_notices, \
1919
merge_rdf_fragments_into_graph, write_rdf_fragments_in_triple_store, RDF_FRAGMENT_FROM_NOTICE_PROPERTY, \
20-
get_subjects_by_cet_uri
20+
get_subjects_by_cet_uri, get_rdf_fragment_by_cet_uri_from_notice
2121

2222
MDR_TEMPORARY_FUSEKI_DATASET_NAME = "tmp_mdr_dataset"
2323
MDR_FUSEKI_DATASET_NAME = "mdr_dataset"
@@ -226,12 +226,14 @@ def deduplicate_entities_by_cet_uri(notices: List[Notice], cet_uri: str,
226226
alignment_graph=cet_alignment_links, inject_reflexive_links=True)
227227

228228

229-
def deduplicate_procedure_entities(notices: List[Notice], procedure_cet_uri: str, mongodb_client: MongoClient):
229+
def deduplicate_procedure_entities(notices: List[Notice], procedure_cet_uri: str, mongodb_client: MongoClient,
230+
mdr_dataset_name: str = MDR_FUSEKI_DATASET_NAME):
230231
"""
231232
This function deduplicate procedure entities for each notice from batch of notices.
232233
:param notices:
233234
:param procedure_cet_uri:
234235
:param mongodb_client:
236+
:param mdr_dataset_name:
235237
:return:
236238
"""
237239
notice_families = defaultdict(list)
@@ -243,6 +245,14 @@ def deduplicate_procedure_entities(notices: List[Notice], procedure_cet_uri: str
243245

244246
parent_uries = {}
245247
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
248+
triple_store = FusekiAdapter()
249+
if mdr_dataset_name not in triple_store.list_repositories():
250+
try:
251+
triple_store.create_repository(repository_name=mdr_dataset_name)
252+
except Exception as exception:
253+
if str(exception) != FUSEKI_REPOSITORY_ALREADY_EXIST_ERROR_MSG:
254+
log_error(message=str(exception))
255+
246256
for parent_notice_id in notice_families.keys():
247257
parent_notice = notice_repository.get(reference=parent_notice_id)
248258
if parent_notice and parent_notice.rdf_manifestation and parent_notice.rdf_manifestation.object_data:
@@ -261,6 +271,11 @@ def deduplicate_procedure_entities(notices: List[Notice], procedure_cet_uri: str
261271
else:
262272
parent_procedure_uri = rdflib.URIRef(result_uris[0])
263273
parent_uries[parent_notice_id] = parent_procedure_uri
274+
parent_procedure_rdf_fragments = get_rdf_fragment_by_cet_uri_from_notice(notice=parent_notice,
275+
cet_uri=procedure_cet_uri)
276+
parent_new_cet = {parent_procedure_uri: parent_procedure_rdf_fragments[0]}
277+
register_new_cets_in_mdr(new_canonical_entities=parent_new_cet, triple_store=triple_store,
278+
mdr_dataset_name=mdr_dataset_name)
264279

265280
for parent_uri_key in parent_uries.keys():
266281
parent_uri = parent_uries[parent_uri_key]

0 commit comments

Comments
 (0)