11import abc
2+ import html
3+ import re
24from datetime import datetime
35from typing import Dict , Tuple , List
4- import re
6+
57import pandas as pd
6- import html
8+ from pymongo import MongoClient
79
810from src .ted_sws .core .model .metadata import NormalisedMetadata , LanguageTaggedString , NoticeSource
11+ from src .ted_sws .core .model .notice import Notice
12+ from src .ted_sws .event_manager .services .log import log_notice_info
913from src .ted_sws .notice_metadata_processor .model .metadata import ExtractedMetadata
1014from src .ted_sws .notice_metadata_processor .services .metadata_constraints import filter_df_by_variables
1115from src .ted_sws .resources .mapping_files_registry import MappingFilesRegistry
4044EFORM_SDK_VERSION_KEY = "eform_sdk_version"
4145NOTICE_SOURCE_KEY = "notice_source"
4246ENGLISH_LANGUAGE_TAG = "EN"
43- mapping_registry = MappingFilesRegistry ()
4447
4548
4649def get_html_compatible_string (input_string : LanguageTaggedString ) -> LanguageTaggedString :
@@ -86,6 +89,8 @@ def normalise_metadata(self, extracted_metadata: ExtractedMetadata) -> Normalise
8689
8790
8891class DefaultNoticeMetadataNormaliser (NoticeMetadataNormaliserABC ):
92+ def __init__ (self , notice : Notice , mongodb_client : MongoClient = None ):
93+ self .mapping_registry = MappingFilesRegistry (notice = notice , mongodb_client = mongodb_client )
8994
9095 @classmethod
9196 def normalise_legal_basis_value (cls , value : str ) -> str :
@@ -207,15 +212,15 @@ def normalise_metadata(self, extracted_metadata: ExtractedMetadata) -> Normalise
207212 Generate the normalised metadata
208213 :return:
209214 """
210- countries_map = mapping_registry .countries
211- form_type_map = mapping_registry .form_type
212- languages_map = mapping_registry .languages
213- legal_basis_map = mapping_registry .legal_basis
214- notice_type_map = mapping_registry .notice_type
215- nuts_map = mapping_registry .nuts
216- standard_forms_map = mapping_registry .sf_notice_df
217- eforms_map = mapping_registry .ef_notice_df
218- filter_map = mapping_registry .filter_map_df
215+ countries_map = self . mapping_registry .countries
216+ form_type_map = self . mapping_registry .form_type
217+ languages_map = self . mapping_registry .languages
218+ legal_basis_map = self . mapping_registry .legal_basis
219+ notice_type_map = self . mapping_registry .notice_type
220+ nuts_map = self . mapping_registry .nuts
221+ standard_forms_map = self . mapping_registry .sf_notice_df
222+ eforms_map = self . mapping_registry .ef_notice_df
223+ filter_map = self . mapping_registry .filter_map_df
219224 form_type , notice_type , legal_basis , eforms_subtype = self .get_form_type_and_notice_type (
220225 sf_map = standard_forms_map , ef_map = eforms_map , filter_map = filter_map ,
221226 extracted_notice_type = extracted_metadata .extracted_notice_type ,
@@ -273,6 +278,8 @@ class EformsNoticeMetadataNormaliser(NoticeMetadataNormaliserABC):
273278 """
274279 Metadata normaliser for eForms
275280 """
281+ def __init__ (self , notice : Notice , mongodb_client : MongoClient = None ):
282+ self .mapping_registry = MappingFilesRegistry (notice = notice , mongodb_client = mongodb_client )
276283
277284 @classmethod
278285 def iso_date_format (cls , _date : str , with_none = False ):
@@ -283,17 +290,17 @@ def iso_date_format(cls, _date: str, with_none=False):
283290 return datetime .fromisoformat (_date ).isoformat ()
284291 return None
285292
286- @classmethod
287- def get_form_type_notice_type_and_legal_basis (cls , extracted_notice_subtype : str ) -> Tuple :
293+ def get_form_type_notice_type_and_legal_basis (self , extracted_notice_subtype : str ) -> Tuple :
288294 """
289295 Get the values for form type, notice type and legal basis from the eForm mapping files
290296 """
291- ef_map : pd .DataFrame = mapping_registry .ef_notice_df
297+ ef_map : pd .DataFrame = self . mapping_registry .ef_notice_df
292298 try :
293299 filtered_df = ef_map .query (f"{ E_FORMS_SUBTYPE_KEY } =='{ extracted_notice_subtype } '" ).to_dict (orient = 'records' )[0 ]
294300 except :
295301 raise Exception (
296302 f'No mapping available for { extracted_notice_subtype } notice subtype. Please check that the field exists in the XML content if the notice subtype is not specified in this message' )
303+
297304 try :
298305 form_type = filtered_df [FORM_TYPE_KEY ]
299306 notice_type = filtered_df [E_FORM_NOTICE_TYPE_COLUMN ]
@@ -312,11 +319,11 @@ def normalise_metadata(self, extracted_metadata: ExtractedMetadata) -> Normalise
312319 :return:
313320 """
314321 extracted_metadata = extracted_metadata
315- form_type_map = mapping_registry .form_type
316- languages_map = mapping_registry .languages
317- legal_basis_map = mapping_registry .legal_basis
318- notice_type_map = mapping_registry .notice_type
319- nuts_map = mapping_registry .nuts
322+ form_type_map = self . mapping_registry .form_type
323+ languages_map = self . mapping_registry .languages
324+ legal_basis_map = self . mapping_registry .legal_basis
325+ notice_type_map = self . mapping_registry .notice_type
326+ nuts_map = self . mapping_registry .nuts
320327 form_type , notice_type , legal_basis = self .get_form_type_notice_type_and_legal_basis (
321328 extracted_notice_subtype = extracted_metadata .extracted_notice_subtype )
322329 metadata = {
@@ -354,3 +361,4 @@ def normalise_metadata(self, extracted_metadata: ExtractedMetadata) -> Normalise
354361 }
355362
356363 return NormalisedMetadata (** metadata )
364+
0 commit comments