Skip to content

Commit e148160

Browse files
committed
fixed error for out of index in normalisation process
1 parent 10ec84b commit e148160

7 files changed

Lines changed: 708 additions & 5 deletions

File tree

ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,8 +363,10 @@ def legal_basis_directive(self):
363363
@property
364364
def extracted_notice_subtype(self):
365365
return extract_text_from_element(
366-
element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype, namespaces=self.namespaces))
367-
366+
element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype_first,
367+
namespaces=self.namespaces)) or extract_text_from_element(
368+
element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype_second,
369+
namespaces=self.namespaces))
368370
@property
369371
def extracted_eform_type(self):
370372
return extract_attribute_from_element(

ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,11 @@ def get_form_type_notice_type_and_legal_basis(cls, extracted_notice_subtype: str
284284
Get the values for form type, notice type and legal basis from the eForm mapping files
285285
"""
286286
ef_map: pd.DataFrame = mapping_registry.ef_notice_df
287-
filtered_df = ef_map.query(f"{E_FORMS_SUBTYPE_KEY}=='{extracted_notice_subtype}'").to_dict(orient='records')[0]
287+
try:
288+
filtered_df = ef_map.query(f"{E_FORMS_SUBTYPE_KEY}=='{extracted_notice_subtype}'").to_dict(orient='records')[0]
289+
except:
290+
raise Exception(
291+
'No eform subtype was extracted from the notice content. Please check that the field exists in the XML content')
288292
try:
289293
form_type = filtered_df[FORM_TYPE_KEY]
290294
notice_type = filtered_df[E_FORM_NOTICE_TYPE_COLUMN]

ted_sws/notice_metadata_processor/adapters/xpath_registry.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,13 @@ def xpath_legal_basis_directive(self):
203203
return ".//cbc:RegulatoryDomain"
204204

205205
@property
206-
def xpath_notice_subtype(self):
206+
def xpath_notice_subtype_first(self):
207207
return ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']"
208208

209+
@property
210+
def xpath_notice_subtype_second(self):
211+
return ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode"
212+
209213
@property
210214
def xpath_form_type(self):
211215
return ".//cbc:NoticeTypeCode"

tests/test_data/notice_normalisation/2023-OJS153-00486429.xml

Lines changed: 335 additions & 0 deletions
Large diffs are not rendered by default.

tests/test_data/notice_normalisation/no_eform_subtype_notice.xml

Lines changed: 335 additions & 0 deletions
Large diffs are not rendered by default.

tests/unit/notice_metadata_processor/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ def file_system_repository_path():
1616
return TEST_DATA_PATH / "notice_transformer" / "mapping_suite_processor_repository"
1717

1818

19+
@pytest.fixture
20+
def notice_normalisation_test_data_path():
21+
return TEST_DATA_PATH / "notice_normalisation"
22+
23+
1924
@pytest.fixture
2025
def eforms_xml_notice_paths() -> List[pathlib.Path]:
2126
eforms_xml_notices_path = TEST_DATA_PATH / "eforms_samples"

tests/unit/notice_metadata_processor/test_metadata_normaliser.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
22

3+
from ted_sws.core.model.manifestation import XMLManifestation
34
from ted_sws.core.model.metadata import NormalisedMetadata
45
from ted_sws.core.model.notice import NoticeStatus
56
from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import \
@@ -11,7 +12,8 @@
1112
from ted_sws.notice_metadata_processor.services.metadata_constraints import filter_df_by_variables
1213
from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice, normalise_notice_by_id, \
1314
check_if_xml_manifestation_is_eform, find_metadata_extractor_based_on_xml_manifestation, \
14-
find_metadata_normaliser_based_on_xml_manifestation, extract_notice_metadata, normalise_notice_metadata
15+
find_metadata_normaliser_based_on_xml_manifestation, extract_notice_metadata, normalise_notice_metadata, \
16+
extract_and_normalise_notice_metadata
1517
from ted_sws.resources.mapping_files_registry import MappingFilesRegistry
1618

1719

@@ -217,3 +219,19 @@ def test_get_form_type_notice_type_and_legal_basis():
217219
assert form_type == 'competition'
218220
assert notice_type == 'cn-social'
219221
assert legal_basis == '32014L0024'
222+
223+
224+
def test_normalising_notice_out_of_index(notice_normalisation_test_data_path):
225+
notice_xml_path = notice_normalisation_test_data_path / "2023-OJS153-00486429.xml"
226+
notice_content = notice_xml_path.read_text(encoding="utf-8")
227+
normalised_notice_metadata = extract_and_normalise_notice_metadata(
228+
xml_manifestation=XMLManifestation(object_data=notice_content))
229+
assert normalised_notice_metadata.eforms_subtype == "16"
230+
assert normalised_notice_metadata.notice_publication_number == "00486429-2023"
231+
232+
broken_notice_xml_path = notice_normalisation_test_data_path / "no_eform_subtype_notice.xml"
233+
broke_notice_content = broken_notice_xml_path.read_text(encoding="utf-8")
234+
235+
with pytest.raises(Exception):
236+
extract_and_normalise_notice_metadata(
237+
xml_manifestation=XMLManifestation(object_data=broke_notice_content))

0 commit comments

Comments
 (0)