@@ -126,245 +126,4 @@ def create_normalised_metadata_view(normalised_metadata: NormalisedMetadata) ->
126126 ** normalised_metadata_dict )
127127 return None
128128
129- # class MetadataNormaliserABC(abc.ABC):
130- # """
131- # Abstract class for notice metadata normalising process
132- # """
133- #
134- # @abc.abstractmethod
135- # def normalise_metadata(self) -> NormalisedMetadata:
136- # """
137- # Method to normalise metadata
138- # """
139- #
140- #
141- # class MetadataNormaliser(MetadataNormaliserABC):
142- # """
143- # Metadata normaliser
144- # """
145- #
146- # def __init__(self, notice: Notice):
147- # self.notice = notice
148- #
149- # def normalise_metadata(self):
150- # """
151- # Method that is normalising the metadata
152- # :return:
153- # """
154- # extracted_metadata = XMLManifestationMetadataExtractor(
155- # xml_manifestation=self.notice.xml_manifestation).to_metadata()
156- # normalised_metadata = ExtractedMetadataNormaliser(extracted_metadata).to_metadata()
157- # self.notice.set_normalised_metadata(normalised_metadata)
158129
159-
160- # class ExtractedMetadataNormaliser:
161- #
162- # def __init__(self, extracted_metadata: ExtractedMetadata):
163- # self.extracted_metadata = extracted_metadata
164- #
165- # @classmethod
166- # def get_map_value(cls, mapping: Dict, value: str) -> str:
167- # """
168- # Returns mapped URI for value
169- # :param mapping:
170- # :param value:
171- # :return:
172- # """
173- # entry_list = [element for element in mapping['results']['bindings'] if
174- # element['code']['value'] == value.strip()]
175- # entry = None
176- # if entry_list:
177- # entry = entry_list[0]
178- #
179- # return entry['conceptURI']['value'] if entry else None
180- #
181- # @classmethod
182- # def normalise_legal_basis_value(cls, value: str) -> str:
183- # """
184- # Transforms and returns Legal Basis value
185- # :param value:
186- # :return:
187- # """
188- # pattern = "3{year}L{number}"
189- # normalised_value = value
190- # parts = value.split("/") if value and ("/" in value) else [value]
191- # if len(parts) > 1:
192- # normalised_value = pattern.format(year=parts[0], number=parts[1].rjust(4, "0"))
193- #
194- # return normalised_value
195- #
196- # @classmethod
197- # def normalise_form_number(cls, value: str) -> str:
198- # """
199- # Normalise form number to be F{number} format.
200- # ##Decided to keep normalisation of the input data
201- # Rules:
202- # * The form number should start with a letter ("F", "T")
203- # * The form number isn't always a number (CEI,EEIG)
204- # * If the number is between 1 - 9 then it must have 0 as prefix (F02 not F2)
205- # :param value:
206- # :return:
207- # """
208- # if value:
209- # form_number_parts = re.split(r"(?=\d)", value, 1)
210- # if len(form_number_parts) == 2:
211- # text_part: str = form_number_parts[0] if form_number_parts[0] else "F"
212- # number_part: str = form_number_parts[1]
213- # if text_part.isalpha() and number_part.isdecimal():
214- # number_part = "0" + number_part if number_part and len(number_part) < 2 else number_part
215- # return text_part + number_part
216- # return value
217- #
218- # @classmethod
219- # def get_filter_variables_values(cls, form_number: str, extracted_notice_type: str, legal_basis: str,
220- # document_type_code: str, filter_map: pd.DataFrame) -> dict:
221- # """
222- # Get necessary values to filter mapping dataframe
223- # :param form_number:
224- # :param extracted_notice_type:
225- # :param legal_basis:
226- # :param document_type_code:
227- # :param filter_map:
228- # :return:
229- # """
230- # variables = {
231- # FORM_NUMBER_KEY: form_number,
232- # SF_NOTICE_TYPE_KEY: extracted_notice_type,
233- # DOCUMENT_CODE_KEY: document_type_code,
234- # LEGAL_BASIS_KEY: legal_basis
235- # }
236- # try:
237- # filter_variables = \
238- # filter_map.query(f"{FORM_NUMBER_KEY}=='{variables[FORM_NUMBER_KEY]}'").to_dict(orient='records')[0]
239- # except:
240- # raise Exception(
241- # f"This notice doesn't have a form number or the extracted form number is not in the mapping. "
242- # f"Form number found is {form_number}, document code is {document_type_code} and legal basis is {legal_basis}")
243- #
244- # for key, value in filter_variables.items():
245- # if value == 0:
246- # filter_variables[key] = None
247- # if value == 1:
248- # filter_variables[key] = variables[key]
249- #
250- # return filter_variables
251- #
252- # @classmethod
253- # def get_form_type_and_notice_type(cls, filter_map: pd.DataFrame, ef_map: pd.DataFrame, sf_map: pd.DataFrame,
254- # form_number: str,
255- # extracted_notice_type: str, legal_basis: str, document_type_code: str) -> Tuple:
256- # """
257- # Returns notice_type and form_type
258- # :param ef_map:
259- # :param filter_map:
260- # :param sf_map:
261- # :param form_number:
262- # :param extracted_notice_type:
263- # :param legal_basis:
264- # :param document_type_code:
265- # :return:
266- # """
267- # mapping_df = pd.merge(sf_map, ef_map, on=E_FORMS_SUBTYPE_KEY, how="left")
268- # filter_variables = cls.get_filter_variables_values(form_number=form_number, filter_map=filter_map,
269- # extracted_notice_type=extracted_notice_type,
270- # legal_basis=legal_basis,
271- # document_type_code=document_type_code)
272- # filtered_df = filter_df_by_variables(df=mapping_df, form_number=filter_variables[FORM_NUMBER_KEY],
273- # sf_notice_type=filter_variables[SF_NOTICE_TYPE_KEY],
274- # legal_basis=filter_variables[LEGAL_BASIS_KEY],
275- # document_code=filter_variables[DOCUMENT_CODE_KEY])
276- # try:
277- # form_type = filtered_df[FORM_TYPE_KEY].values[0]
278- # notice_type = filtered_df[E_FORM_NOTICE_TYPE_COLUMN].values[0]
279- # legal_basis = filtered_df[E_FORM_LEGAL_BASIS_COLUMN].values[0]
280- # eforms_subtype = filtered_df[E_FORMS_SUBTYPE_KEY].values[0]
281- # except:
282- # raise Exception(
283- # f"This notice can't be mapped with the current mapping files (standard forms mapping and eforms mapping)."
284- # f"Searched values: form number={form_number}, extracted_notice_type {extracted_notice_type},"
285- # f" legal_basis {legal_basis}, document_code {document_type_code}. "
286- # f"Therefore form_type, notice_type, legal_basis and eforms_subtype fields can't be normalised")
287- #
288- # return form_type, notice_type, legal_basis, eforms_subtype
289- #
290- # def get_map_list_value_by_code(self, mapping: Dict, listing: List):
291- # result = []
292- # for element in listing:
293- # if element:
294- # map_value = self.get_map_value(mapping=mapping, value=element.code)
295- # if map_value:
296- # result.append(map_value)
297- # return result
298- #
299- # @classmethod
300- # def iso_date_format(cls, _date: str, with_none=False):
301- # if _date or not with_none:
302- # return datetime.strptime(_date, '%Y%m%d').isoformat()
303- # return None
304- #
305- # def to_metadata(self) -> NormalisedMetadata:
306- # """
307- # Generate the normalised metadata
308- # :return:
309- # """
310- #
311- # mapping_registry = MappingFilesRegistry()
312- # countries_map = mapping_registry.countries
313- # form_type_map = mapping_registry.form_type
314- # languages_map = mapping_registry.languages
315- # legal_basis_map = mapping_registry.legal_basis
316- # notice_type_map = mapping_registry.notice_type
317- # nuts_map = mapping_registry.nuts
318- # standard_forms_map = mapping_registry.sf_notice_df
319- # eforms_map = mapping_registry.ef_notice_df
320- # filter_map = mapping_registry.filter_map_df
321- # form_type, notice_type, legal_basis, eforms_subtype = self.get_form_type_and_notice_type(
322- # sf_map=standard_forms_map, ef_map=eforms_map, filter_map=filter_map,
323- # extracted_notice_type=self.extracted_metadata.extracted_notice_type,
324- # form_number=self.normalise_form_number(
325- # self.extracted_metadata.extracted_form_number),
326- # legal_basis=self.normalise_legal_basis_value(
327- # self.extracted_metadata.legal_basis_directive),
328- # document_type_code=self.extracted_metadata.extracted_document_type.code
329- # )
330- # extracted_metadata = self.extracted_metadata
331- #
332- # metadata = {
333- # TITLE_KEY: [title.title for title in extracted_metadata.title],
334- # LONG_TITLE_KEY: [
335- # LanguageTaggedString(text=JOIN_SEP.join(
336- # [
337- # title.title_country.text,
338- # title.title_city.text,
339- # title.title.text
340- # ]),
341- # language=title.title.language) for title in extracted_metadata.title
342- # ],
343- # NOTICE_NUMBER_KEY: extracted_metadata.notice_publication_number,
344- # PUBLICATION_DATE_KEY: self.iso_date_format(extracted_metadata.publication_date),
345- # OJS_NUMBER_KEY: extracted_metadata.ojs_issue_number,
346- # OJS_TYPE_KEY: extracted_metadata.ojs_type if extracted_metadata.ojs_type else "S",
347- # BUYER_CITY_KEY: [city_of_buyer for city_of_buyer in extracted_metadata.city_of_buyer],
348- # BUYER_NAME_KEY: [name_of_buyer for name_of_buyer in extracted_metadata.name_of_buyer],
349- # LANGUAGE_KEY: self.get_map_value(mapping=languages_map, value=extracted_metadata.original_language),
350- # BUYER_COUNTRY_KEY: self.get_map_value(mapping=countries_map, value=extracted_metadata.country_of_buyer),
351- # EU_INSTITUTION_KEY: False if extracted_metadata.eu_institution == '-' else True,
352- # SENT_DATE_KEY: self.iso_date_format(extracted_metadata.document_sent_date, True),
353- # DEADLINE_DATE_KEY: self.iso_date_format(extracted_metadata.deadline_for_submission, True),
354- # NOTICE_TYPE_KEY: self.get_map_value(mapping=notice_type_map, value=notice_type),
355- # FORM_TYPE_KEY: self.get_map_value(mapping=form_type_map, value=form_type),
356- # PLACE_OF_PERFORMANCE_KEY: self.get_map_list_value_by_code(
357- # mapping=nuts_map,
358- # listing=extracted_metadata.place_of_performance
359- # ),
360- # EXTRACTED_LEGAL_BASIS_KEY: self.get_map_value(mapping=legal_basis_map,
361- # value=self.normalise_legal_basis_value(
362- # extracted_metadata.legal_basis_directive
363- # )) if extracted_metadata.legal_basis_directive else None,
364- # FORM_NUMBER_KEY: self.normalise_form_number(value=extracted_metadata.extracted_form_number),
365- # LEGAL_BASIS_DIRECTIVE_KEY: self.get_map_value(mapping=legal_basis_map, value=legal_basis),
366- # E_FORMS_SUBTYPE_KEY: str(eforms_subtype),
367- # XSD_VERSION_KEY: extracted_metadata.xml_schema_version
368- # }
369- #
370- # return NormalisedMetadata(**metadata)
0 commit comments