1717from ted_sws .event_manager .services .log import log_error , log_notice_error
1818from ted_sws .master_data_registry .services .rdf_fragment_processor import get_rdf_fragments_by_cet_uri_from_notices , \
1919 merge_rdf_fragments_into_graph , write_rdf_fragments_in_triple_store , RDF_FRAGMENT_FROM_NOTICE_PROPERTY , \
20- get_subjects_by_cet_uri
20+ get_subjects_by_cet_uri , get_rdf_fragment_by_cet_uri_from_notice
2121
2222MDR_TEMPORARY_FUSEKI_DATASET_NAME = "tmp_mdr_dataset"
2323MDR_FUSEKI_DATASET_NAME = "mdr_dataset"
@@ -226,12 +226,14 @@ def deduplicate_entities_by_cet_uri(notices: List[Notice], cet_uri: str,
226226 alignment_graph = cet_alignment_links , inject_reflexive_links = True )
227227
228228
229- def deduplicate_procedure_entities (notices : List [Notice ], procedure_cet_uri : str , mongodb_client : MongoClient ):
229+ def deduplicate_procedure_entities (notices : List [Notice ], procedure_cet_uri : str , mongodb_client : MongoClient ,
230+ mdr_dataset_name : str = MDR_FUSEKI_DATASET_NAME ):
230231 """
231232 This function deduplicate procedure entities for each notice from batch of notices.
232233 :param notices:
233234 :param procedure_cet_uri:
234235 :param mongodb_client:
236+ :param mdr_dataset_name:
235237 :return:
236238 """
237239 notice_families = defaultdict (list )
@@ -243,6 +245,14 @@ def deduplicate_procedure_entities(notices: List[Notice], procedure_cet_uri: str
243245
244246 parent_uries = {}
245247 notice_repository = NoticeRepository (mongodb_client = mongodb_client )
248+ triple_store = FusekiAdapter ()
249+ if mdr_dataset_name not in triple_store .list_repositories ():
250+ try :
251+ triple_store .create_repository (repository_name = mdr_dataset_name )
252+ except Exception as exception :
253+ if str (exception ) != FUSEKI_REPOSITORY_ALREADY_EXIST_ERROR_MSG :
254+ log_error (message = str (exception ))
255+
246256 for parent_notice_id in notice_families .keys ():
247257 parent_notice = notice_repository .get (reference = parent_notice_id )
248258 if parent_notice and parent_notice .rdf_manifestation and parent_notice .rdf_manifestation .object_data :
@@ -261,6 +271,11 @@ def deduplicate_procedure_entities(notices: List[Notice], procedure_cet_uri: str
261271 else :
262272 parent_procedure_uri = rdflib .URIRef (result_uris [0 ])
263273 parent_uries [parent_notice_id ] = parent_procedure_uri
274+ parent_procedure_rdf_fragments = get_rdf_fragment_by_cet_uri_from_notice (notice = parent_notice ,
275+ cet_uri = procedure_cet_uri )
276+ parent_new_cet = {parent_procedure_uri : parent_procedure_rdf_fragments [0 ]}
277+ register_new_cets_in_mdr (new_canonical_entities = parent_new_cet , triple_store = triple_store ,
278+ mdr_dataset_name = mdr_dataset_name )
264279
265280 for parent_uri_key in parent_uries .keys ():
266281 parent_uri = parent_uries [parent_uri_key ]
0 commit comments