Skip to content

Commit 8c85cf3

Browse files
committed
adde extra flag in metadata
1 parent 16e384d commit 8c85cf3

3 files changed

Lines changed: 6 additions & 243 deletions

File tree

ted_sws/core/model/metadata.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class NormalisedMetadata(Metadata):
8181
eforms_subtype: str
8282
xsd_version: str
8383
published_in_cellar_counter: int = Field(default=0)
84+
is_eForm: Optional[bool]
8485

8586

8687
class NormalisedMetadataView(Metadata):

ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
DEADLINE_DATE_KEY = "deadline_for_submission"
3737
NOTICE_TYPE_KEY = "notice_type"
3838
XSD_VERSION_KEY = "xsd_version"
39+
IS_EFORM_KEY = "is_eForm"
3940
ENGLISH_LANGUAGE_TAG = "EN"
4041
mapping_registry = MappingFilesRegistry()
4142

@@ -255,7 +256,8 @@ def normalise_metadata(self, extracted_metadata: ExtractedMetadata) -> Normalise
255256
FORM_NUMBER_KEY: self.normalise_form_number(value=extracted_metadata.extracted_form_number),
256257
LEGAL_BASIS_DIRECTIVE_KEY: get_map_value(mapping=legal_basis_map, value=legal_basis),
257258
E_FORMS_SUBTYPE_KEY: str(eforms_subtype),
258-
XSD_VERSION_KEY: extracted_metadata.xml_schema_version
259+
XSD_VERSION_KEY: extracted_metadata.xml_schema_version,
260+
IS_EFORM_KEY: False
259261
}
260262

261263
return NormalisedMetadata(**metadata)
@@ -337,7 +339,8 @@ def normalise_metadata(self, extracted_metadata: ExtractedMetadata) -> Normalise
337339
LEGAL_BASIS_DIRECTIVE_KEY: get_map_value(mapping=legal_basis_map,
338340
value=legal_basis),
339341
E_FORMS_SUBTYPE_KEY: extracted_metadata.extracted_notice_subtype,
340-
XSD_VERSION_KEY: extracted_metadata.xml_schema_version
342+
XSD_VERSION_KEY: extracted_metadata.xml_schema_version,
343+
IS_EFORM_KEY: True
341344
}
342345

343346
return NormalisedMetadata(**metadata)

ted_sws/notice_metadata_processor/services/metadata_normalizer.py

Lines changed: 0 additions & 241 deletions
Original file line numberDiff line numberDiff line change
@@ -126,245 +126,4 @@ def create_normalised_metadata_view(normalised_metadata: NormalisedMetadata) ->
126126
**normalised_metadata_dict)
127127
return None
128128

129-
# class MetadataNormaliserABC(abc.ABC):
130-
# """
131-
# Abstract class for notice metadata normalising process
132-
# """
133-
#
134-
# @abc.abstractmethod
135-
# def normalise_metadata(self) -> NormalisedMetadata:
136-
# """
137-
# Method to normalise metadata
138-
# """
139-
#
140-
#
141-
# class MetadataNormaliser(MetadataNormaliserABC):
142-
# """
143-
# Metadata normaliser
144-
# """
145-
#
146-
# def __init__(self, notice: Notice):
147-
# self.notice = notice
148-
#
149-
# def normalise_metadata(self):
150-
# """
151-
# Method that is normalising the metadata
152-
# :return:
153-
# """
154-
# extracted_metadata = XMLManifestationMetadataExtractor(
155-
# xml_manifestation=self.notice.xml_manifestation).to_metadata()
156-
# normalised_metadata = ExtractedMetadataNormaliser(extracted_metadata).to_metadata()
157-
# self.notice.set_normalised_metadata(normalised_metadata)
158129

159-
160-
# class ExtractedMetadataNormaliser:
161-
#
162-
# def __init__(self, extracted_metadata: ExtractedMetadata):
163-
# self.extracted_metadata = extracted_metadata
164-
#
165-
# @classmethod
166-
# def get_map_value(cls, mapping: Dict, value: str) -> str:
167-
# """
168-
# Returns mapped URI for value
169-
# :param mapping:
170-
# :param value:
171-
# :return:
172-
# """
173-
# entry_list = [element for element in mapping['results']['bindings'] if
174-
# element['code']['value'] == value.strip()]
175-
# entry = None
176-
# if entry_list:
177-
# entry = entry_list[0]
178-
#
179-
# return entry['conceptURI']['value'] if entry else None
180-
#
181-
# @classmethod
182-
# def normalise_legal_basis_value(cls, value: str) -> str:
183-
# """
184-
# Transforms and returns Legal Basis value
185-
# :param value:
186-
# :return:
187-
# """
188-
# pattern = "3{year}L{number}"
189-
# normalised_value = value
190-
# parts = value.split("/") if value and ("/" in value) else [value]
191-
# if len(parts) > 1:
192-
# normalised_value = pattern.format(year=parts[0], number=parts[1].rjust(4, "0"))
193-
#
194-
# return normalised_value
195-
#
196-
# @classmethod
197-
# def normalise_form_number(cls, value: str) -> str:
198-
# """
199-
# Normalise form number to be F{number} format.
200-
# ##Decided to keep normalisation of the input data
201-
# Rules:
202-
# * The form number should start with a letter ("F", "T")
203-
# * The form number isn't always a number (CEI,EEIG)
204-
# * If the number is between 1 - 9 then it must have 0 as prefix (F02 not F2)
205-
# :param value:
206-
# :return:
207-
# """
208-
# if value:
209-
# form_number_parts = re.split(r"(?=\d)", value, 1)
210-
# if len(form_number_parts) == 2:
211-
# text_part: str = form_number_parts[0] if form_number_parts[0] else "F"
212-
# number_part: str = form_number_parts[1]
213-
# if text_part.isalpha() and number_part.isdecimal():
214-
# number_part = "0" + number_part if number_part and len(number_part) < 2 else number_part
215-
# return text_part + number_part
216-
# return value
217-
#
218-
# @classmethod
219-
# def get_filter_variables_values(cls, form_number: str, extracted_notice_type: str, legal_basis: str,
220-
# document_type_code: str, filter_map: pd.DataFrame) -> dict:
221-
# """
222-
# Get necessary values to filter mapping dataframe
223-
# :param form_number:
224-
# :param extracted_notice_type:
225-
# :param legal_basis:
226-
# :param document_type_code:
227-
# :param filter_map:
228-
# :return:
229-
# """
230-
# variables = {
231-
# FORM_NUMBER_KEY: form_number,
232-
# SF_NOTICE_TYPE_KEY: extracted_notice_type,
233-
# DOCUMENT_CODE_KEY: document_type_code,
234-
# LEGAL_BASIS_KEY: legal_basis
235-
# }
236-
# try:
237-
# filter_variables = \
238-
# filter_map.query(f"{FORM_NUMBER_KEY}=='{variables[FORM_NUMBER_KEY]}'").to_dict(orient='records')[0]
239-
# except:
240-
# raise Exception(
241-
# f"This notice doesn't have a form number or the extracted form number is not in the mapping. "
242-
# f"Form number found is {form_number}, document code is {document_type_code} and legal basis is {legal_basis}")
243-
#
244-
# for key, value in filter_variables.items():
245-
# if value == 0:
246-
# filter_variables[key] = None
247-
# if value == 1:
248-
# filter_variables[key] = variables[key]
249-
#
250-
# return filter_variables
251-
#
252-
# @classmethod
253-
# def get_form_type_and_notice_type(cls, filter_map: pd.DataFrame, ef_map: pd.DataFrame, sf_map: pd.DataFrame,
254-
# form_number: str,
255-
# extracted_notice_type: str, legal_basis: str, document_type_code: str) -> Tuple:
256-
# """
257-
# Returns notice_type and form_type
258-
# :param ef_map:
259-
# :param filter_map:
260-
# :param sf_map:
261-
# :param form_number:
262-
# :param extracted_notice_type:
263-
# :param legal_basis:
264-
# :param document_type_code:
265-
# :return:
266-
# """
267-
# mapping_df = pd.merge(sf_map, ef_map, on=E_FORMS_SUBTYPE_KEY, how="left")
268-
# filter_variables = cls.get_filter_variables_values(form_number=form_number, filter_map=filter_map,
269-
# extracted_notice_type=extracted_notice_type,
270-
# legal_basis=legal_basis,
271-
# document_type_code=document_type_code)
272-
# filtered_df = filter_df_by_variables(df=mapping_df, form_number=filter_variables[FORM_NUMBER_KEY],
273-
# sf_notice_type=filter_variables[SF_NOTICE_TYPE_KEY],
274-
# legal_basis=filter_variables[LEGAL_BASIS_KEY],
275-
# document_code=filter_variables[DOCUMENT_CODE_KEY])
276-
# try:
277-
# form_type = filtered_df[FORM_TYPE_KEY].values[0]
278-
# notice_type = filtered_df[E_FORM_NOTICE_TYPE_COLUMN].values[0]
279-
# legal_basis = filtered_df[E_FORM_LEGAL_BASIS_COLUMN].values[0]
280-
# eforms_subtype = filtered_df[E_FORMS_SUBTYPE_KEY].values[0]
281-
# except:
282-
# raise Exception(
283-
# f"This notice can't be mapped with the current mapping files (standard forms mapping and eforms mapping)."
284-
# f"Searched values: form number={form_number}, extracted_notice_type {extracted_notice_type},"
285-
# f" legal_basis {legal_basis}, document_code {document_type_code}. "
286-
# f"Therefore form_type, notice_type, legal_basis and eforms_subtype fields can't be normalised")
287-
#
288-
# return form_type, notice_type, legal_basis, eforms_subtype
289-
#
290-
# def get_map_list_value_by_code(self, mapping: Dict, listing: List):
291-
# result = []
292-
# for element in listing:
293-
# if element:
294-
# map_value = self.get_map_value(mapping=mapping, value=element.code)
295-
# if map_value:
296-
# result.append(map_value)
297-
# return result
298-
#
299-
# @classmethod
300-
# def iso_date_format(cls, _date: str, with_none=False):
301-
# if _date or not with_none:
302-
# return datetime.strptime(_date, '%Y%m%d').isoformat()
303-
# return None
304-
#
305-
# def to_metadata(self) -> NormalisedMetadata:
306-
# """
307-
# Generate the normalised metadata
308-
# :return:
309-
# """
310-
#
311-
# mapping_registry = MappingFilesRegistry()
312-
# countries_map = mapping_registry.countries
313-
# form_type_map = mapping_registry.form_type
314-
# languages_map = mapping_registry.languages
315-
# legal_basis_map = mapping_registry.legal_basis
316-
# notice_type_map = mapping_registry.notice_type
317-
# nuts_map = mapping_registry.nuts
318-
# standard_forms_map = mapping_registry.sf_notice_df
319-
# eforms_map = mapping_registry.ef_notice_df
320-
# filter_map = mapping_registry.filter_map_df
321-
# form_type, notice_type, legal_basis, eforms_subtype = self.get_form_type_and_notice_type(
322-
# sf_map=standard_forms_map, ef_map=eforms_map, filter_map=filter_map,
323-
# extracted_notice_type=self.extracted_metadata.extracted_notice_type,
324-
# form_number=self.normalise_form_number(
325-
# self.extracted_metadata.extracted_form_number),
326-
# legal_basis=self.normalise_legal_basis_value(
327-
# self.extracted_metadata.legal_basis_directive),
328-
# document_type_code=self.extracted_metadata.extracted_document_type.code
329-
# )
330-
# extracted_metadata = self.extracted_metadata
331-
#
332-
# metadata = {
333-
# TITLE_KEY: [title.title for title in extracted_metadata.title],
334-
# LONG_TITLE_KEY: [
335-
# LanguageTaggedString(text=JOIN_SEP.join(
336-
# [
337-
# title.title_country.text,
338-
# title.title_city.text,
339-
# title.title.text
340-
# ]),
341-
# language=title.title.language) for title in extracted_metadata.title
342-
# ],
343-
# NOTICE_NUMBER_KEY: extracted_metadata.notice_publication_number,
344-
# PUBLICATION_DATE_KEY: self.iso_date_format(extracted_metadata.publication_date),
345-
# OJS_NUMBER_KEY: extracted_metadata.ojs_issue_number,
346-
# OJS_TYPE_KEY: extracted_metadata.ojs_type if extracted_metadata.ojs_type else "S",
347-
# BUYER_CITY_KEY: [city_of_buyer for city_of_buyer in extracted_metadata.city_of_buyer],
348-
# BUYER_NAME_KEY: [name_of_buyer for name_of_buyer in extracted_metadata.name_of_buyer],
349-
# LANGUAGE_KEY: self.get_map_value(mapping=languages_map, value=extracted_metadata.original_language),
350-
# BUYER_COUNTRY_KEY: self.get_map_value(mapping=countries_map, value=extracted_metadata.country_of_buyer),
351-
# EU_INSTITUTION_KEY: False if extracted_metadata.eu_institution == '-' else True,
352-
# SENT_DATE_KEY: self.iso_date_format(extracted_metadata.document_sent_date, True),
353-
# DEADLINE_DATE_KEY: self.iso_date_format(extracted_metadata.deadline_for_submission, True),
354-
# NOTICE_TYPE_KEY: self.get_map_value(mapping=notice_type_map, value=notice_type),
355-
# FORM_TYPE_KEY: self.get_map_value(mapping=form_type_map, value=form_type),
356-
# PLACE_OF_PERFORMANCE_KEY: self.get_map_list_value_by_code(
357-
# mapping=nuts_map,
358-
# listing=extracted_metadata.place_of_performance
359-
# ),
360-
# EXTRACTED_LEGAL_BASIS_KEY: self.get_map_value(mapping=legal_basis_map,
361-
# value=self.normalise_legal_basis_value(
362-
# extracted_metadata.legal_basis_directive
363-
# )) if extracted_metadata.legal_basis_directive else None,
364-
# FORM_NUMBER_KEY: self.normalise_form_number(value=extracted_metadata.extracted_form_number),
365-
# LEGAL_BASIS_DIRECTIVE_KEY: self.get_map_value(mapping=legal_basis_map, value=legal_basis),
366-
# E_FORMS_SUBTYPE_KEY: str(eforms_subtype),
367-
# XSD_VERSION_KEY: extracted_metadata.xml_schema_version
368-
# }
369-
#
370-
# return NormalisedMetadata(**metadata)

0 commit comments

Comments
 (0)